docs: Update local branch from upstream

merge remote branch from commit f1fd00da78

Signed-off-by: Ychau Wang <wangyongchao.bj@inspur.com>
This commit is contained in:
Yongchao Wang 2020-08-27 17:50:44 +08:00 committed by Ychau Wang
commit 00bd04f923
177 changed files with 2349 additions and 3878 deletions

View File

@ -0,0 +1,73 @@
name: Commit Message Check
on:
pull_request:
types:
- opened
- reopened
- synchronize
env:
error_msg: |+
See the document below for help on formatting commits for the project.
https://github.com/kata-containers/community/blob/master/CONTRIBUTING.md#patch-forma
jobs:
commit-message-check:
runs-on: ubuntu-latest
name: Commit Message Check
steps:
- name: Get PR Commits
id: 'get-pr-commits'
uses: tim-actions/get-pr-commits@v1.0.0
with:
token: ${{ secrets.GITHUB_TOKEN }}
- name: DCO Check
uses: tim-actions/dco@2fd0504dc0d27b33f542867c300c60840c6dcb20
with:
commits: ${{ steps.get-pr-commits.outputs.commits }}
- name: Commit Body Missing Check
if: ${{ success() || failure() }}
uses: tim-actions/commit-body-check@v1.0.2
with:
commits: ${{ steps.get-pr-commits.outputs.commits }}
- name: Check Subject Line Length
if: ${{ success() || failure() }}
uses: tim-actions/commit-message-checker-with-regex@v0.3.1
with:
commits: ${{ steps.get-pr-commits.outputs.commits }}
pattern: '^.{0,75}(\n.*)*$'
error: 'Subject too long (max 75)'
post_error: ${{ env.error_msg }}
- name: Check Body Line Length
if: ${{ success() || failure() }}
uses: tim-actions/commit-message-checker-with-regex@v0.3.1
with:
commits: ${{ steps.get-pr-commits.outputs.commits }}
pattern: '^.+(\n.{0,72})*$|^.+\n\s*[^a-zA-Z\s\n]|^.+\n\S+$'
error: 'Body line too long (max 72)'
post_error: ${{ env.error_msg }}
- name: Check Fixes
if: ${{ success() || failure() }}
uses: tim-actions/commit-message-checker-with-regex@v0.3.1
with:
commits: ${{ steps.get-pr-commits.outputs.commits }}
pattern: '\s*Fixes\s*:?\s*(#\d+|github\.com\/kata-containers\/[a-z-.]*#\d+)'
flags: 'i'
error: 'No "Fixes" found'
post_error: ${{ env.error_msg }}
one_pass_all_pass: 'true'
- name: Check Subsystem
if: ${{ success() || failure() }}
uses: tim-actions/commit-message-checker-with-regex@v0.3.1
with:
commits: ${{ steps.get-pr-commits.outputs.commits }}
pattern: '^[\s\t]*[^:\s\t]+[\s\t]*:'
error: 'Failed to find subsystem in subject'
post_error: ${{ env.error_msg }}

View File

@ -1,22 +0,0 @@
name: DCO check
on:
pull_request:
types:
- opened
- reopened
- synchronize
jobs:
dco_check_job:
runs-on: ubuntu-latest
name: DCO Check
steps:
- name: Get PR Commits
id: 'get-pr-commits'
uses: tim-actions/get-pr-commits@ed97a21c3f83c3417e67a4733ea76887293a2c8f
with:
token: ${{ secrets.GITHUB_TOKEN }}
- name: DCO Check
uses: tim-actions/dco@2fd0504dc0d27b33f542867c300c60840c6dcb20
with:
commits: ${{ steps.get-pr-commits.outputs.commits }}

View File

@ -8,9 +8,8 @@
* [Kata Containers-developed components](#kata-containers-developed-components)
* [Agent](#agent)
* [KSM throttler](#ksm-throttler)
* [Proxy](#proxy)
* [Runtime](#runtime)
* [Shim](#shim)
* [Trace forwarder](#trace-forwarder)
* [Additional](#additional)
* [Hypervisor](#hypervisor)
* [Kernel](#kernel)
@ -75,26 +74,12 @@ The [`kata-ksm-throttler`](https://github.com/kata-containers/ksm-throttler)
is an optional utility that monitors containers and deduplicates memory to
maximize container density on a host.
##### Proxy
The [`kata-proxy`](https://github.com/kata-containers/proxy) is a process that
runs on the host and co-ordinates access to the agent running inside the
virtual machine.
##### Runtime
The [`kata-runtime`](src/runtime/README.md) is usually
invoked by a container manager and provides high-level verbs to manage
containers.
##### Shim
The [`kata-shim`](https://github.com/kata-containers/shim) is a process that
runs on the host. It acts as though it is the workload (which actually runs
inside the virtual machine). This shim is required to be compliant with the
expectations of the [OCI runtime
specification](https://github.com/opencontainers/runtime-spec).
##### Trace forwarder
The [`kata-trace-forwarder`](src/trace-forwarder) is a component only used

View File

@ -1 +1 @@
2.0.0-alpha2
2.0.0-alpha3

View File

@ -13,7 +13,6 @@
* [journald rate limiting](#journald-rate-limiting)
* [`systemd-journald` suppressing messages](#systemd-journald-suppressing-messages)
* [Disabling `systemd-journald` rate limiting](#disabling-systemd-journald-rate-limiting)
* [Build and install Kata proxy](#build-and-install-kata-proxy)
* [Build and install Kata shim](#build-and-install-kata-shim)
* [Create and install rootfs and initrd image](#create-and-install-rootfs-and-initrd-image)
* [Build a custom Kata agent - OPTIONAL](#build-a-custom-kata-agent---optional)
@ -243,13 +242,6 @@ Restart `systemd-journald` for the changes to take effect:
$ sudo systemctl restart systemd-journald
```
# Build and install Kata proxy
```
$ go get -d -u github.com/kata-containers/proxy
$ cd $GOPATH/src/github.com/kata-containers/proxy && make && sudo make install
```
# Build and install Kata shim
```
@ -378,11 +370,11 @@ $ (cd /usr/share/kata-containers && sudo ln -sf "$image" kata-containers-initrd.
# Install guest kernel images
You can build and install the guest kernel image as shown [here](https://github.com/kata-containers/packaging/tree/master/kernel#build-kata-containers-kernel).
You can build and install the guest kernel image as shown [here](../tools/packaging/kernel/README.md#build-kata-containers-kernel).
# Install a hypervisor
When setting up Kata using a [packaged installation method](https://github.com/kata-containers/documentation/tree/master/install#installing-on-a-linux-system), the `qemu-lite` hypervisor is installed automatically. For other installation methods, you will need to manually install a suitable hypervisor.
When setting up Kata using a [packaged installation method](install/README.md#installing-on-a-linux-system), the `qemu-lite` hypervisor is installed automatically. For other installation methods, you will need to manually install a suitable hypervisor.
## Build a custom QEMU
@ -447,14 +439,14 @@ Refer to to the [Run Kata Containers with Kubernetes](how-to/run-kata-with-k8s.m
If you are unable to create a Kata Container first ensure you have
[enabled full debug](#enable-full-debug)
before attempting to create a container. Then run the
[`kata-collect-data.sh`](https://github.com/kata-containers/runtime/blob/master/data/kata-collect-data.sh.in)
[`kata-collect-data.sh`](../src/runtime/data/kata-collect-data.sh.in)
script and paste its output directly into a
[GitHub issue](https://github.com/kata-containers/kata-containers/issues/new).
> **Note:**
>
> The `kata-collect-data.sh` script is built from the
> [runtime](https://github.com/kata-containers/runtime) repository.
> [runtime](../src/runtime) repository.
To perform analysis on Kata logs, use the
[`kata-log-parser`](https://github.com/kata-containers/tests/tree/master/cmd/log-parser)
@ -507,7 +499,7 @@ the following steps (using rootfs or initrd image).
> additional packages in the rootfs and add “agent.debug_console” to kernel parameters in the runtime
> config file. This tells the Kata agent to launch the console directly.
>
> Once these steps are taken you can connect to the virtual machine using the [debug console](https://github.com/kata-containers/documentation/blob/master/Developer-Guide.md#connect-to-the-virtual-machine-using-the-debug-console).
> Once these steps are taken you can connect to the virtual machine using the [debug console](Developer-Guide.md#connect-to-the-virtual-machine-using-the-debug-console).
### Create a custom image containing a shell
@ -571,7 +563,7 @@ $ sudo install -o root -g root -m 0640 kata-containers.img "/usr/share/kata-cont
```
Next, modify the `image=` values in the `[hypervisor.qemu]` section of the
[configuration file](https://github.com/kata-containers/runtime#configuration)
[configuration file](../src/runtime/README.md#configuration)
to specify the full path to the image name specified in the previous code
section. Alternatively, recreate the symbolic link so it points to
the new debug image:

View File

@ -39,7 +39,7 @@ Some of these limitations have potential solutions, whereas others exist
due to fundamental architectural differences generally related to the
use of VMs.
The [Kata Container runtime](https://github.com/kata-containers/runtime)
The [Kata Container runtime](../src/runtime)
launches each container within its own hardware isolated VM, and each VM has
its own kernel. Due to this higher degree of isolation, certain container
capabilities cannot be supported or are implicitly enabled through the VM.
@ -78,7 +78,7 @@ The following link shows the latest list of limitations:
If you would like to work on resolving a limitation, please refer to the
[contributors guide](https://github.com/kata-containers/community/blob/master/CONTRIBUTING.md).
If you wish to raise an issue for a new limitation, either
[raise an issue directly on the runtime](https://github.com/kata-containers/runtime/issues/new)
[raise an issue directly on the runtime](https://github.com/kata-containers/kata-containers/issues/new)
or see the
[project table of contents](https://github.com/kata-containers/kata-containers)
for advice on which repository to raise the issue against.
@ -270,11 +270,6 @@ The following examples outline some of the various areas constraints can be appl
This can be achieved by specifying particular hypervisor configuration options.
- Constrain the [shim](https://github.com/kata-containers/shim) process.
This process represents the container workload running inside the VM.
- Constrain the [proxy](https://github.com/kata-containers/proxy) process.
Note that in some circumstances it might be necessary to apply particular constraints
to more than one of the previous areas to achieve the desired level of isolation and resource control.

View File

@ -69,7 +69,7 @@
We make use of [GitHub actions](https://github.com/features/actions) in this [file](https://github.com/kata-containers/kata-containers/blob/master/.github/workflows/main.yaml) in the `kata-containers/kata-containers` repository to build and upload release artifacts. This action is auto triggered with the above step when a new tag is pushed to the `kata-containers/kata-conatiners` repository.
Check the [actions status page](https://github.com/kata-containers/kata-containers/actions) to verify all steps in the actions workflow have completed successfully. On success, a static tarball containing Kata release artifacts will be uploaded to the [Release page](https://github.com/kata-containers/runtime/releases).
Check the [actions status page](https://github.com/kata-containers/kata-containers/actions) to verify all steps in the actions workflow have completed successfully. On success, a static tarball containing Kata release artifacts will be uploaded to the [Release page](https://github.com/kata-containers/kata-containers/releases).
### Create OBS Packages

View File

@ -68,7 +68,7 @@ $ for container in $(sudo docker ps -q); do sudo docker stop $container; done
The automatic migration of
[Clear Containers configuration](https://github.com/clearcontainers/runtime#configuration) to
[Kata Containers configuration](https://github.com/kata-containers/runtime#configuration) is
[Kata Containers configuration](../src/runtime/README.md#configuration) is
not supported.
If you have made changes to your Clear Containers configuration, you should
@ -111,7 +111,7 @@ $ sudo rm /etc/systemd/system/docker.service.d/clear-containers.conf
## Install Kata Containers
Follow one of the [installation guides](https://github.com/kata-containers/documentation/tree/master/install).
Follow one of the [installation guides](install).
## Create a Kata Container
@ -126,12 +126,12 @@ not configured to use the same container root storage. Currently, runV defaults
defaults to `/var/run/kata-containers`.
Now, to upgrade from runV you need to fresh install Kata Containers by following one of
the [installation guides](https://github.com/kata-containers/documentation/tree/master/install).
the [installation guides](install).
# Upgrade Kata Containers
As shown in the
[installation instructions](https://github.com/kata-containers/documentation/blob/master/install),
[installation instructions](install),
Kata Containers provide binaries for popular distributions in their native
packaging formats. This allows Kata Containers to be upgraded using the
standard package management tools for your distribution.
@ -150,7 +150,7 @@ Since the official assets are packaged, they are automatically upgraded when
new package versions are published.
> **Warning**: Note that if you use custom assets (by modifying the
> [Kata Runtime configuration > file](https://github.com/kata-containers/runtime/#configuration)),
> [Kata Runtime configuration > file](../src/runtime/README.md#configuration)),
> it is your responsibility to ensure they are updated as necessary.
### Guest kernel
@ -159,7 +159,7 @@ The `kata-linux-container` package contains a Linux\* kernel based on the
latest vanilla version of the
[long-term kernel](https://www.kernel.org/)
plus a small number of
[patches](https://github.com/kata-containers/packaging/tree/master/kernel).
[patches](../tools/packaging/kernel).
The `Longterm` branch is only updated with
[important bug fixes](https://www.kernel.org/category/releases.html)
@ -174,7 +174,7 @@ The `kata-containers-image` package is updated only when critical updates are
available for the packages used to create it, such as:
- systemd
- [Kata Containers Agent](https://github.com/kata-containers/agent)
- [Kata Containers Agent](../src/agent)
### Determining asset versions

View File

@ -1,7 +1,6 @@
# Kata Containers and VSOCKs
- [Introduction](#introduction)
- [proxy communication diagram](#proxy-communication-diagram)
- [VSOCK communication diagram](#vsock-communication-diagram)
- [System requirements](#system-requirements)
- [Advantages of using VSOCKs](#advantages-of-using-vsocks)
@ -16,46 +15,10 @@ processes in the virtual machine can read/write data from/to a serial port
device and the processes in the host can read/write data from/to a Unix socket.
Most GNU/Linux distributions have support for serial ports, making it the most
portable solution. However, the serial link limits read/write access to one
process at a time. To deal with this limitation the resources (serial port and
Unix socket) must be multiplexed. In Kata Containers those resources are
multiplexed by using [`kata-proxy`][2] and [Yamux][3], the following diagram shows
how it's implemented.
### proxy communication diagram
```
.----------------------.
| .------------------. |
| | .-----. .-----. | |
| | |cont1| |cont2| | |
| | `-----' `-----' | |
| | \ / | |
| | .---------. | |
| | | agent | | |
| | `---------' | |
| | | | |
| | .-----------. | |
| |POD |serial port| | |
| `----|-----------|-' |
| | socket | |
| `-----------' |
| | |
| .-------. |
| | proxy | |
| `-------' |
| | |
| .------./ \.------. |
| | shim | | shim | |
| `------' `------' |
| Host |
`----------------------'
```
A newer, simpler method is [VSOCKs][4], which can accept connections from
multiple clients and does not require multiplexers ([`kata-proxy`][2] and
[Yamux][3]). The following diagram shows how it's implemented in Kata Containers.
process at a time.
A newer, simpler method is [VSOCKs][1], which can accept connections from
multiple clients. The following diagram shows how it's implemented in Kata Containers.
### VSOCK communication diagram
@ -95,6 +58,7 @@ The Kata Containers version must be greater than or equal to 1.2.0 and `use_vsoc
must be set to `true` in the runtime [configuration file][1].
### With VMWare guest
To use Kata Containers with VSOCKs in a VMWare guest environment, first stop the `vmware-tools` service and unload the VMWare Linux kernel module.
```
sudo systemctl stop vmware-tools
@ -107,28 +71,25 @@ sudo modprobe -i vhost_vsock
### High density
Using a proxy for multiplexing the connections between the VM and the host uses
4.5MB per [POD][5]. In a high density deployment this could add up to GBs of
4.5MB per [POD][2]. In a high density deployment this could add up to GBs of
memory that could have been used to host more PODs. When we talk about density
each kilobyte matters and it might be the decisive factor between run another
POD or not. For example if you have 500 PODs running in a server, the same
amount of [`kata-proxy`][2] processes will be running and consuming for around
amount of [`kata-proxy`][3] processes will be running and consuming for around
2250MB of RAM. Before making the decision not to use VSOCKs, you should ask
yourself, how many more containers can run with the memory RAM consumed by the
Kata proxies?
### Reliability
[`kata-proxy`][2] is in charge of multiplexing the connections between virtual
[`kata-proxy`][3] is in charge of multiplexing the connections between virtual
machine and host processes, if it dies all connections get broken. For example
if you have a [POD][5] with 10 containers running, if `kata-proxy` dies it would
if you have a [POD][2] with 10 containers running, if `kata-proxy` dies it would
be impossible to contact your containers, though they would still be running.
Since communication via VSOCKs is direct, the only way to lose communication
with the containers is if the VM itself or the [shim][6] dies, if this happens
with the containers is if the VM itself or the `containerd-shim-kata-v2` dies, if this happens
the containers are removed automatically.
[1]: https://github.com/kata-containers/runtime#configuration
[2]: https://github.com/kata-containers/proxy
[3]: https://github.com/hashicorp/yamux
[4]: https://wiki.qemu.org/Features/VirtioVsock
[5]: ./vcpu-handling.md#virtual-cpus-and-kubernetes-pods
[6]: https://github.com/kata-containers/shim
[1]: https://wiki.qemu.org/Features/VirtioVsock
[2]: ./vcpu-handling.md#virtual-cpus-and-kubernetes-pods
[3]: https://github.com/kata-containers/proxy

View File

@ -17,8 +17,6 @@
* [exec](#exec)
* [kill](#kill)
* [delete](#delete)
* [Proxy](#proxy)
* [Shim](#shim)
* [Networking](#networking)
* [Storage](#storage)
* [Kubernetes Support](#kubernetes-support)
@ -37,7 +35,7 @@ This is an architectural overview of Kata Containers, based on the 1.5.0 release
The two primary deliverables of the Kata Containers project are a container runtime
and a CRI friendly shim. There is also a CRI friendly library API behind them.
The [Kata Containers runtime (`kata-runtime`)](https://github.com/kata-containers/runtime)
The [Kata Containers runtime (`kata-runtime`)](../../src/runtime)
is compatible with the [OCI](https://github.com/opencontainers) [runtime specification](https://github.com/opencontainers/runtime-spec)
and therefore works seamlessly with the
[Docker\* Engine](https://www.docker.com/products/docker-engine) pluggable runtime
@ -52,7 +50,7 @@ the Docker engine or `kubelet` (Kubernetes) creates respectively.
![Docker and Kata Containers](arch-images/docker-kata.png)
The [`containerd-shim-kata-v2` (shown as `shimv2` from this point onwards)](https://github.com/kata-containers/runtime/tree/master/containerd-shim-v2)
The [`containerd-shim-kata-v2` (shown as `shimv2` from this point onwards)](../../src/runtime/containerd-shim-v2)
is another Kata Containers entrypoint, which
implements the [Containerd Runtime V2 (Shim API)](https://github.com/containerd/containerd/tree/master/runtime/v2) for Kata.
With `shimv2`, Kubernetes can launch Pod and OCI compatible containers with one shim (the `shimv2`) per Pod instead
@ -62,7 +60,7 @@ of `2N+1` shims (a `containerd-shim` and a `kata-shim` for each container and th
![Kubernetes integration with shimv2](arch-images/shimv2.svg)
The container process is then spawned by
[agent](https://github.com/kata-containers/agent), an agent process running
[agent](../../src/agent), an agent process running
as a daemon inside the virtual machine. `kata-agent` runs a gRPC server in
the guest using a VIRTIO serial or VSOCK interface which QEMU exposes as a socket
file on the host. `kata-runtime` uses a gRPC protocol to communicate with
@ -72,30 +70,7 @@ stderr, stdin) between the containers and the manage engines (e.g. Docker Engine
For any given container, both the init process and all potentially executed
commands within that container, together with their related I/O streams, need
to go through the VIRTIO serial or VSOCK interface exported by QEMU.
In the VIRTIO serial case, a [Kata Containers
proxy (`kata-proxy`)](https://github.com/kata-containers/proxy) instance is
launched for each virtual machine to handle multiplexing and demultiplexing
those commands and streams.
On the host, each container process's removal is handled by a reaper in the higher
layers of the container stack. In the case of Docker or containerd it is handled by `containerd-shim`.
In the case of CRI-O it is handled by `conmon`. For clarity, for the remainder
of this document the term "container process reaper" will be used to refer to
either reaper. As Kata Containers processes run inside their own virtual machines,
the container process reaper cannot monitor, control
or reap them. `kata-runtime` fixes that issue by creating an [additional shim process
(`kata-shim`)](https://github.com/kata-containers/shim) between the container process
reaper and `kata-proxy`. A `kata-shim` instance will both forward signals and `stdin`
streams to the container process on the guest and pass the container `stdout`
and `stderr` streams back up the stack to the CRI shim or Docker via the container process
reaper. `kata-runtime` creates a `kata-shim` daemon for each container and for each
OCI command received to run within an already running container (example, `docker
exec`).
Since Kata Containers version 1.5, the new introduced `shimv2` has integrated the
functionalities of the reaper, the `kata-runtime`, the `kata-shim`, and the `kata-proxy`.
As a result, there will not be any of the additional processes previously listed.
to go through the VSOCK interface exported by QEMU.
The container workload, that is, the actual OCI bundle rootfs, is exported from the
host to the virtual machine. In the case where a block-based graph driver is
@ -155,7 +130,7 @@ The only service running in the context of the initrd is the [Agent](#agent) as
## Agent
[`kata-agent`](https://github.com/kata-containers/agent) is a process running in the
[`kata-agent`](../../src/agent) is a process running in the
guest as a supervisor for managing containers and processes running within
those containers.
@ -164,12 +139,7 @@ run several containers per VM to support container engines that require multiple
containers running inside a pod. In the case of docker, `kata-runtime` creates a
single container per pod.
`kata-agent` communicates with the other Kata components over gRPC.
It also runs a [`yamux`](https://github.com/hashicorp/yamux) server on the same gRPC URL.
The `kata-agent` makes use of [`libcontainer`](https://github.com/opencontainers/runc/tree/master/libcontainer)
to manage the lifecycle of the container. This way the `kata-agent` reuses most
of the code used by [`runc`](https://github.com/opencontainers/runc).
`kata-agent` communicates with the other Kata components over ttRPC.
### Agent gRPC protocol
@ -199,7 +169,7 @@ Most users will not need to modify the configuration file.
The file is well commented and provides a few "knobs" that can be used to modify
the behavior of the runtime.
The configuration file is also used to enable runtime [debug output](https://github.com/kata-containers/documentation/blob/master/Developer-Guide.md#enable-full-debug).
The configuration file is also used to enable runtime [debug output](../Developer-Guide.md#enable-full-debug).
### Significant OCI commands
@ -324,57 +294,6 @@ process representing this container process.
4. Communicate with `kata-agent` (connecting the proxy) to remove the container configuration from the VM.
4. Return container status.
## Proxy
Communication with the VM can be achieved by either `virtio-serial` or, if the host
kernel is newer than v4.8, a virtual socket, `vsock` can be used. The default is `virtio-serial`.
The VM will likely be running multiple container processes. In the event `virtio-serial`
is used, the I/O streams associated with each process needs to be multiplexed and demultiplexed on the host. On systems with `vsock` support, this component becomes optional.
`kata-proxy` is a process offering access to the VM [`kata-agent`](https://github.com/kata-containers/agent)
to multiple `kata-shim` and `kata-runtime` clients associated with the VM. Its
main role is to route the I/O streams and signals between each `kata-shim`
instance and the `kata-agent`.
`kata-proxy` connects to `kata-agent` on a Unix domain socket that `kata-runtime` provides
while spawning `kata-proxy`.
`kata-proxy` uses [`yamux`](https://github.com/hashicorp/yamux) to multiplex gRPC
requests on its connection to the `kata-agent`.
When proxy type is configured as `proxyBuiltIn`, we do not spawn a separate
process to proxy gRPC connections. Instead a built-in Yamux gRPC dialer is used to connect
directly to `kata-agent`. This is used by CRI container runtime server `frakti` which
calls directly into `kata-runtime`.
## Shim
A container process reaper, such as Docker's `containerd-shim` or CRI-O's `conmon`,
is designed around the assumption that it can monitor and reap the actual container
process. As the container process reaper runs on the host, it cannot directly
monitor a process running within a virtual machine. At most it can see the QEMU
process, but that is not enough. With Kata Containers, `kata-shim` acts as the
container process that the container process reaper can monitor. Therefore
`kata-shim` needs to handle all container I/O streams (`stdout`, `stdin` and `stderr`)
and forward all signals the container process reaper decides to send to the container
process.
`kata-shim` has an implicit knowledge about which VM agent will handle those streams
and signals and thus acts as an encapsulation layer between the container process
reaper and the `kata-agent`. `kata-shim`:
- Connects to `kata-proxy` on a Unix domain socket. The socket URL is passed from
`kata-runtime` to `kata-shim` when the former spawns the latter along with a
`containerID` and `execID`. The `containerID` and `execID` are used to identify
the true container process that the shim process will be shadowing or representing.
- Forwards the standard input stream from the container process reaper into
`kata-proxy` using gRPC `WriteStdin` gRPC API.
- Reads the standard output/error from the container process.
- Forwards signals it receives from the container process reaper to `kata-proxy`
using `SignalProcessRequest` API.
- Monitors terminal changes and forwards them to `kata-proxy` using gRPC `TtyWinResize`
API.
## Networking
Containers will typically live in their own, possibly shared, networking namespace.
@ -534,13 +453,13 @@ pod creation request from a container one.
### Containerd
As of Kata Containers 1.5, using `shimv2` with containerd 1.2.0 or above is the preferred
way to run Kata Containers with Kubernetes ([see the howto](https://github.com/kata-containers/documentation/blob/master/how-to/how-to-use-k8s-with-cri-containerd-and-kata.md#configure-containerd-to-use-kata-containers)).
way to run Kata Containers with Kubernetes ([see the howto](../how-to/how-to-use-k8s-with-cri-containerd-and-kata.md#configure-containerd-to-use-kata-containers)).
The CRI-O will catch up soon ([`kubernetes-sigs/cri-o#2024`](https://github.com/kubernetes-sigs/cri-o/issues/2024)).
Refer to the following how-to guides:
- [How to use Kata Containers and Containerd](/how-to/containerd-kata.md)
- [How to use Kata Containers and CRI (containerd plugin) with Kubernetes](/how-to/how-to-use-k8s-with-cri-containerd-and-kata.md)
- [How to use Kata Containers and Containerd](../how-to/containerd-kata.md)
- [How to use Kata Containers and CRI (containerd plugin) with Kubernetes](../how-to/how-to-use-k8s-with-cri-containerd-and-kata.md)
### CRI-O
@ -587,7 +506,7 @@ with a Kubernetes pod:
#### Mixing VM based and namespace based runtimes
> **Note:** Since Kubernetes 1.12, the [`Kubernetes RuntimeClass`](/how-to/containerd-kata.md#kubernetes-runtimeclass)
> **Note:** Since Kubernetes 1.12, the [`Kubernetes RuntimeClass`](../how-to/containerd-kata.md#kubernetes-runtimeclass)
> has been supported and the user can specify runtime without the non-standardized annotations.
One interesting evolution of the CRI-O support for `kata-runtime` is the ability

View File

@ -51,7 +51,7 @@ Kata Containers introduces a non-negligible overhead for running a sandbox (pod)
2) Kata Containers do not fully constrain the VMM and associated processes, instead placing a subset of them outside of the pod-cgroup.
Kata Containers provides two options for how cgroups are handled on the host. Selection of these options is done through
the `SandboxCgroupOnly` flag within the Kata Containers [configuration](https://github.com/kata-containers/runtime#configuration)
the `SandboxCgroupOnly` flag within the Kata Containers [configuration](../../src/runtime/README.md#configuration)
file.
## `SandboxCgroupOnly` enabled

View File

@ -170,6 +170,6 @@ docker run --cpus 4 -ti debian bash -c "nproc; cat /sys/fs/cgroup/cpu,cpuacct/cp
[2]: https://kubernetes.io/docs/tasks/configure-pod-container/assign-cpu-resource
[3]: https://kubernetes.io/docs/concepts/workloads/pods/pod/
[4]: https://docs.docker.com/engine/reference/commandline/update/
[5]: https://github.com/kata-containers/agent
[6]: https://github.com/kata-containers/runtime
[7]: https://github.com/kata-containers/runtime#configuration
[5]: ../../src/agent
[6]: ../../src/runtime
[7]: ../../src/runtime/README.md#configuration

View File

@ -57,7 +57,7 @@ use `RuntimeClass` instead of the deprecated annotations.
### Containerd Runtime V2 API: Shim V2 API
The [`containerd-shim-kata-v2` (short as `shimv2` in this documentation)](https://github.com/kata-containers/runtime/tree/master/containerd-shim-v2)
The [`containerd-shim-kata-v2` (short as `shimv2` in this documentation)](../../src/runtime/containerd-shim-v2)
implements the [Containerd Runtime V2 (Shim API)](https://github.com/containerd/containerd/tree/master/runtime/v2) for Kata.
With `shimv2`, Kubernetes can launch Pod and OCI-compatible containers with one shim per Pod. Prior to `shimv2`, `2N+1`
shims (i.e. a `containerd-shim` and a `kata-shim` for each container and the Pod sandbox itself) and no standalone `kata-proxy`
@ -72,7 +72,7 @@ is implemented in Kata Containers v1.5.0.
### Install Kata Containers
Follow the instructions to [install Kata Containers](https://github.com/kata-containers/documentation/blob/master/install/README.md).
Follow the instructions to [install Kata Containers](../install/README.md).
### Install containerd with CRI plugin

View File

@ -33,7 +33,7 @@ also applies to the Kata `shimv2` runtime. Differences pertaining to Kata `shim
Kata generates logs. The logs can come from numerous parts of the Kata stack (the runtime, proxy, shim
and even the agent). By default the logs
[go to the system journal](https://github.com/kata-containers/runtime#logging),
[go to the system journal](../../src/runtime/README.md#logging),
but they can also be configured to be stored in files.
The logs default format is in [`logfmt` structured logging](https://brandur.org/logfmt), but can be switched to
@ -256,7 +256,7 @@ directly from Kata, that should make overall import and processing of the log en
There are potentially two things we can do with Kata here:
- Get Kata to [output its logs in `JSON` format](https://github.com/kata-containers/runtime#logging) rather
- Get Kata to [output its logs in `JSON` format](../../src/runtime/README.md#logging) rather
than `logfmt`.
- Get Kata to log directly into a file, rather than via the system journal. This would allow us to not need
to parse the systemd format files, and capture the Kata log lines directly. It would also avoid Fluentd

View File

@ -103,6 +103,6 @@ spec:
> **Note**: To pass annotations to Kata containers, [cri must to be configurated correctly](how-to-set-sandbox-config-kata.md#cri-configuration)
[1]: https://github.com/kata-containers/runtime
[2]: https://github.com/kata-containers/agent
[1]: ../../src/runtime
[2]: ../../src/agent
[3]: https://kubernetes.io/docs/concepts/workloads/pods/pod/

View File

@ -177,7 +177,7 @@ $ sudo -E kubectl taint nodes --all node-role.kubernetes.io/master-
By default, all pods are created with the default runtime configured in CRI containerd plugin.
If a pod has the `io.kubernetes.cri.untrusted-workload` annotation set to `"true"`, the CRI plugin runs the pod with the
[Kata Containers runtime](https://github.com/kata-containers/runtime/blob/master/README.md).
[Kata Containers runtime](../../src/runtime/README.md).
- Create an untrusted pod configuration

View File

@ -49,7 +49,7 @@ This document requires the presence of the ACRN hypervisor and Kata Containers o
$ sudo sed -i "s/$kernel_img/bzImage/g" /mnt/loader/entries/$conf_file
$ sync && sudo umount /mnt && sudo reboot
```
- Kata Containers installation: Automated installation does not seem to be supported for Clear Linux, so please use [manual installation](https://github.com/kata-containers/documentation/blob/master/Developer-Guide.md) steps.
- Kata Containers installation: Automated installation does not seem to be supported for Clear Linux, so please use [manual installation](../Developer-Guide.md) steps.
> **Note:** Create rootfs image and not initrd image.
@ -82,7 +82,7 @@ $ sudo systemctl daemon-reload
$ sudo systemctl restart docker
```
4. Configure [Docker](https://github.com/kata-containers/documentation/blob/master/Developer-Guide.md#update-the-docker-systemd-unit-file) to use `kata-runtime`.
4. Configure [Docker](../Developer-Guide.md#update-the-docker-systemd-unit-file) to use `kata-runtime`.
## Configure Kata Containers with ACRN

View File

@ -19,7 +19,7 @@ Kata Containers relies by default on the QEMU hypervisor in order to spawn the v
This document describes how to run Kata Containers with NEMU, first by explaining how to download, build and install it. Then it walks through the steps needed to update your Kata Containers configuration in order to run with NEMU.
## Pre-requisites
This document requires Kata Containers to be [installed](https://github.com/kata-containers/documentation/blob/master/install/README.md) on your system.
This document requires Kata Containers to be [installed](../install/README.md) on your system.
Also, it's worth noting that NEMU only supports `x86_64` and `aarch64` architecture.

View File

@ -25,14 +25,14 @@ This document describes how to get Kata Containers to work with virtio-fs.
## Install Kata Containers with virtio-fs support
The Kata Containers NEMU configuration, the NEMU VMM and the `virtiofs` daemon are available in the [Kata Container release](https://github.com/kata-containers/runtime/releases) artifacts starting with the 1.7 release. While the feature is experimental, distribution packages are not supported, but installation is available through [`kata-deploy`](https://github.com/kata-containers/packaging/tree/master/kata-deploy).
The Kata Containers NEMU configuration, the NEMU VMM and the `virtiofs` daemon are available in the [Kata Container release](https://github.com/kata-containers/kata-containers/releases) artifacts starting with the 1.7 release. While the feature is experimental, distribution packages are not supported, but installation is available through [`kata-deploy`](../../tools/packaging/kata-deploy).
Install the latest release of Kata as follows:
```
docker run --runtime=runc -v /opt/kata:/opt/kata -v /var/run/dbus:/var/run/dbus -v /run/systemd:/run/systemd -v /etc/docker:/etc/docker -it katadocker/kata-deploy kata-deploy-docker install
```
This will place the Kata release artifacts in `/opt/kata`, and update Docker's configuration to include a runtime target, `kata-nemu`. Learn more about `kata-deploy` and how to use `kata-deploy` in Kubernetes [here](https://github.com/kata-containers/packaging/tree/master/kata-deploy#kubernetes-quick-start).
This will place the Kata release artifacts in `/opt/kata`, and update Docker's configuration to include a runtime target, `kata-nemu`. Learn more about `kata-deploy` and how to use `kata-deploy` in Kubernetes [here](../../tools/packaging/kata-deploy/README.md#kubernetes-quick-start).
## Run a Kata Container utilizing virtio-fs

View File

@ -75,5 +75,5 @@ See below example config:
privileged_without_host_devices = true
```
- [Kata Containers with CRI-O](https://github.com/kata-containers/documentation/blob/master/how-to/run-kata-with-k8s.md#cri-o)
- [Kata Containers with CRI-O](../how-to/run-kata-with-k8s.md#cri-o)

View File

@ -14,7 +14,7 @@
* [Run a Kubernetes pod with Kata Containers](#run-a-kubernetes-pod-with-kata-containers)
## Prerequisites
This guide requires Kata Containers available on your system, install-able by following [this guide](https://github.com/kata-containers/documentation/blob/master/install/README.md).
This guide requires Kata Containers available on your system, install-able by following [this guide](../install/README.md).
## Install a CRI implementation
@ -28,7 +28,7 @@ After choosing one CRI implementation, you must make the appropriate configurati
to ensure it integrates with Kata Containers.
Kata Containers 1.5 introduced the `shimv2` for containerd 1.2.0, reducing the components
required to spawn pods and containers, and this is the preferred way to run Kata Containers with Kubernetes ([as documented here](https://github.com/kata-containers/documentation/blob/master/how-to/how-to-use-k8s-with-cri-containerd-and-kata.md#configure-containerd-to-use-kata-containers)).
required to spawn pods and containers, and this is the preferred way to run Kata Containers with Kubernetes ([as documented here](../how-to/how-to-use-k8s-with-cri-containerd-and-kata.md#configure-containerd-to-use-kata-containers)).
An equivalent shim implementation for CRI-O is planned.
@ -78,7 +78,7 @@ a runtime to be used when the workload cannot be trusted and a higher level of s
is required. An additional flag can be used to let CRI-O know if a workload
should be considered _trusted_ or _untrusted_ by default.
For further details, see the documentation
[here](https://github.com/kata-containers/documentation/blob/master/design/architecture.md#mixing-vm-based-and-namespace-based-runtimes).
[here](../design/architecture.md#mixing-vm-based-and-namespace-based-runtimes).
```toml
# runtime is the OCI compatible runtime used for trusted container workloads.
@ -132,7 +132,7 @@ to properly install it.
To customize containerd to select Kata Containers runtime, follow our
"Configure containerd to use Kata Containers" internal documentation
[here](https://github.com/kata-containers/documentation/blob/master/how-to/how-to-use-k8s-with-cri-containerd-and-kata.md#configure-containerd-to-use-kata-containers).
[here](../how-to/how-to-use-k8s-with-cri-containerd-and-kata.md#configure-containerd-to-use-kata-containers).
## Install Kubernetes
@ -160,7 +160,7 @@ Environment="KUBELET_EXTRA_ARGS=--container-runtime=remote --runtime-request-tim
Environment="KUBELET_EXTRA_ARGS=--container-runtime=remote --runtime-request-timeout=15m --container-runtime-endpoint=unix:///run/containerd/containerd.sock"
```
For more information about containerd see the "Configure Kubelet to use containerd"
documentation [here](https://github.com/kata-containers/documentation/blob/master/how-to/how-to-use-k8s-with-cri-containerd-and-kata.md#configure-kubelet-to-use-containerd).
documentation [here](../how-to/how-to-use-k8s-with-cri-containerd-and-kata.md#configure-kubelet-to-use-containerd).
## Run a Kubernetes pod with Kata Containers

View File

@ -48,7 +48,7 @@ as the proxy starts.
### Kata and Kubernetes
Follow the [instructions](https://github.com/kata-containers/documentation/blob/master/install/README.md)
Follow the [instructions](../install/README.md)
to get Kata Containers properly installed and configured with Kubernetes.
You can choose between CRI-O and CRI-containerd, both are supported
through this document.

View File

@ -10,7 +10,7 @@
VMCache is a new function that creates VMs as caches before using it.
It helps speed up new container creation.
The function consists of a server and some clients communicating
through Unix socket. The protocol is gRPC in [`protocols/cache/cache.proto`](https://github.com/kata-containers/runtime/blob/master/protocols/cache/cache.proto).
through Unix socket. The protocol is gRPC in [`protocols/cache/cache.proto`](../../src/runtime/protocols/cache/cache.proto).
The VMCache server will create some VMs and cache them by factory cache.
It will convert the VM to gRPC format and transport it when gets
requested from clients.
@ -21,9 +21,9 @@ a new sandbox.
### How is this different to VM templating
Both [VM templating](https://github.com/kata-containers/documentation/blob/master/how-to/what-is-vm-templating-and-how-do-I-use-it.md) and VMCache help speed up new container creation.
Both [VM templating](../how-to/what-is-vm-templating-and-how-do-I-use-it.md) and VMCache help speed up new container creation.
When VM templating enabled, new VMs are created by cloning from a pre-created template VM, and they will share the same initramfs, kernel and agent memory in readonly mode. So it saves a lot of memory if there are many Kata Containers running on the same host.
VMCache is not vulnerable to [share memory CVE](https://github.com/kata-containers/documentation/blob/master/how-to/what-is-vm-templating-and-how-do-I-use-it.md#what-are-the-cons) because each VM doesn't share the memory.
VMCache is not vulnerable to [share memory CVE](../how-to/what-is-vm-templating-and-how-do-I-use-it.md#what-are-the-cons) because each VM doesn't share the memory.
### How to enable VMCache

View File

@ -8,7 +8,7 @@ same initramfs, kernel and agent memory in readonly mode. It is very
much like a process fork done by the kernel but here we *fork* VMs.
### How is this different from VMCache
Both [VMCache](https://github.com/kata-containers/documentation/blob/master/how-to/what-is-vm-cache-and-how-do-I-use-it.md) and VM templating help speed up new container creation.
Both [VMCache](../how-to/what-is-vm-cache-and-how-do-I-use-it.md) and VM templating help speed up new container creation.
When VMCache enabled, new VMs are created by the VMCache server. So it is not vulnerable to share memory CVE because each VM doesn't share the memory.
VM templating saves a lot of memory if there are many Kata Containers running on the same host.

View File

@ -18,7 +18,7 @@ in a system configured to run Kata Containers.
## Prerequisites
Kata Containers requires nested virtualization or bare metal.
See the
[hardware requirements](https://github.com/kata-containers/runtime/blob/master/README.md#hardware-requirements)
[hardware requirements](../../src/runtime/README.md#hardware-requirements)
to see if your system is capable of running Kata Containers.
## Packaged installation methods
@ -78,7 +78,7 @@ Manual installation instructions are available for [these distributions](#suppor
3. Install a supported container manager.
4. Configure the container manager to use `kata-runtime` as the default OCI runtime. Or, for Kata Containers 1.5.0 or above, configure the
`io.containerd.kata.v2` to be the runtime shim (see [containerd runtime v2 (shim API)](https://github.com/containerd/containerd/tree/master/runtime/v2)
and [How to use Kata Containers and CRI (containerd plugin) with Kubernetes](https://github.com/kata-containers/documentation/blob/master/how-to/how-to-use-k8s-with-cri-containerd-and-kata.md)).
and [How to use Kata Containers and CRI (containerd plugin) with Kubernetes](../how-to/how-to-use-k8s-with-cri-containerd-and-kata.md)).
> **Notes on upgrading**:
> - If you are installing Kata Containers on a system that already has Clear Containers or `runv` installed,
@ -87,7 +87,7 @@ Manual installation instructions are available for [these distributions](#suppor
> **Notes on releases**:
> - [This download server](http://download.opensuse.org/repositories/home:/katacontainers:/releases:/)
> hosts the Kata Containers packages built by OBS for all the supported architectures.
> Packages are available for the latest and stable releases (more info [here](https://github.com/kata-containers/documentation/blob/master/Stable-Branch-Strategy.md)).
> Packages are available for the latest and stable releases (more info [here](../Stable-Branch-Strategy.md)).
>
> - The following guides apply to the latest Kata Containers release
> (a.k.a. `master` release).
@ -124,4 +124,4 @@ versions. This is not recommended for normal users.
## Further information
* The [upgrading document](../Upgrading.md).
* The [developer guide](../Developer-Guide.md).
* The [runtime documentation](https://github.com/kata-containers/runtime/blob/master/README.md).
* The [runtime documentation](../../src/runtime/README.md).

View File

@ -137,4 +137,4 @@ Go onto the next step.
The process for installing Kata itself on bare metal is identical to that of a virtualization-enabled VM.
For detailed information to install Kata on your distribution of choice, see the [Kata Containers installation user guides](https://github.com/kata-containers/documentation/blob/master/install/README.md).
For detailed information to install Kata on your distribution of choice, see the [Kata Containers installation user guides](../install/README.md).

View File

@ -15,4 +15,4 @@ Create a new virtual machine with:
## Set up with distribution specific quick start
Follow distribution specific [install guides](https://github.com/kata-containers/documentation/tree/master/install#supported-distributions).
Follow distribution specific [install guides](../install/README.md#supported-distributions).

View File

@ -14,4 +14,4 @@
2. Decide which container manager to use and select the corresponding link that follows:
- [Docker](docker/centos-docker-install.md)
- [Kubernetes](https://github.com/kata-containers/documentation/blob/master/Developer-Guide.md#run-kata-containers-with-kubernetes)
- [Kubernetes](../Developer-Guide.md#run-kata-containers-with-kubernetes)

View File

@ -19,4 +19,4 @@
2. Decide which container manager to use and select the corresponding link that follows:
- [Docker](docker/debian-docker-install.md)
- [Kubernetes](https://github.com/kata-containers/documentation/blob/master/Developer-Guide.md#run-kata-containers-with-kubernetes)
- [Kubernetes](../Developer-Guide.md#run-kata-containers-with-kubernetes)

View File

@ -24,7 +24,7 @@
2. Configure Docker to use Kata Containers by default with **ONE** of the following methods:
1. systemd (this is the default and is applied automatically if you select the
[automatic installation](https://github.com/kata-containers/documentation/tree/master/install#automatic-installation) option)
[automatic installation](../../install/README.md#automatic-installation) option)
```bash
$ sudo mkdir -p /etc/systemd/system/docker.service.d/

View File

@ -37,7 +37,7 @@ a. `sysVinit`
```
b. systemd (this is the default and is applied automatically if you select the
[automatic installation](https://github.com/kata-containers/documentation/tree/master/install#automatic-installation) option)
[automatic installation](../../install/README.md#automatic-installation) option)
```bash
$ sudo mkdir -p /etc/systemd/system/docker.service.d/

View File

@ -26,7 +26,7 @@
2. Configure Docker to use Kata Containers by default with **ONE** of the following methods:
1. systemd (this is the default and is applied automatically if you select the
[automatic installation](https://github.com/kata-containers/documentation/tree/master/install#automatic-installation) option)
[automatic installation](../../install/README.md#automatic-installation) option)
```bash
$ sudo mkdir -p /etc/systemd/system/docker.service.d/

View File

@ -23,7 +23,7 @@
2. Configure Docker to use Kata Containers by default with **ONE** of the following methods:
1. Specify the runtime options in `/etc/sysconfig/docker` (this is the default and is applied automatically if you select the
[automatic installation](https://github.com/kata-containers/documentation/tree/master/install#automatic-installation) option)
[automatic installation](../../install/README.md#automatic-installation) option)
```bash
$ DOCKER_SYSCONFIG=/etc/sysconfig/docker

View File

@ -25,7 +25,7 @@
2. Configure Docker to use Kata Containers by default with **ONE** of the following methods:
1. systemd (this is the default and is applied automatically if you select the
[automatic installation](https://github.com/kata-containers/documentation/tree/master/install#automatic-installation) option)
[automatic installation](../../install/README.md#automatic-installation) option)
```bash
$ sudo mkdir -p /etc/systemd/system/docker.service.d/

View File

@ -23,7 +23,7 @@
2. Configure Docker to use Kata Containers by default with **ONE** of the following methods:
1. systemd (this is the default and is applied automatically if you select the
[automatic installation](https://github.com/kata-containers/documentation/tree/master/install#automatic-installation) option)
[automatic installation](../../install/README.md#automatic-installation) option)
```bash
$ sudo mkdir -p /etc/systemd/system/docker.service.d/

View File

@ -28,7 +28,7 @@
2. Configure Docker to use Kata Containers by default with **ONE** of the following methods:
1. systemd (this is the default and is applied automatically if you select the
[automatic installation](https://github.com/kata-containers/documentation/tree/master/install#automatic-installation) option)
[automatic installation](../../install/README.md#automatic-installation) option)
```bash
$ sudo mkdir -p /etc/systemd/system/docker.service.d/

View File

@ -14,4 +14,4 @@
2. Decide which container manager to use and select the corresponding link that follows:
- [Docker](docker/fedora-docker-install.md)
- [Kubernetes](https://github.com/kata-containers/documentation/blob/master/Developer-Guide.md#run-kata-containers-with-kubernetes)
- [Kubernetes](../Developer-Guide.md#run-kata-containers-with-kubernetes)

View File

@ -101,7 +101,7 @@ If this fails, ensure you created your instance from the correct image and that
The process for installing Kata itself on a virtualization-enabled VM is identical to that for bare metal.
For detailed information to install Kata on your distribution of choice, see the [Kata Containers installation user guides](https://github.com/kata-containers/documentation/blob/master/install/README.md).
For detailed information to install Kata on your distribution of choice, see the [Kata Containers installation user guides](../install/README.md).
## Create a Kata-enabled Image

View File

@ -54,7 +54,7 @@ to enable nested virtualization can be found on the
[KVM Nested Guests page](https://www.linux-kvm.org/page/Nested_Guests)
Alternatively, and for other architectures, the Kata Containers built in
[`kata-check`](https://github.com/kata-containers/runtime#hardware-requirements)
[`kata-check`](../../src/runtime/README.md#hardware-requirements)
command can be used *inside Minikube* once Kata has been installed, to check for compatibility.
## Setting up Minikube

View File

@ -13,4 +13,4 @@
2. Decide which container manager to use and select the corresponding link that follows:
- [Docker](docker/rhel-docker-install.md)
- [Kubernetes](https://github.com/kata-containers/documentation/blob/master/Developer-Guide.md#run-kata-containers-with-kubernetes)
- [Kubernetes](../Developer-Guide.md#run-kata-containers-with-kubernetes)

View File

@ -12,4 +12,4 @@
2. Decide which container manager to use and select the corresponding link that follows:
- [Docker](docker/sles-docker-install.md)
- [Kubernetes](https://github.com/kata-containers/documentation/blob/master/Developer-Guide.md#run-kata-containers-with-kubernetes)
- [Kubernetes](../Developer-Guide.md#run-kata-containers-with-kubernetes)

View File

@ -14,4 +14,4 @@
2. Decide which container manager to use and select the corresponding link that follows:
- [Docker](docker/ubuntu-docker-install.md)
- [Kubernetes](https://github.com/kata-containers/documentation/blob/master/Developer-Guide.md#run-kata-containers-with-kubernetes)
- [Kubernetes](../Developer-Guide.md#run-kata-containers-with-kubernetes)

View File

@ -13,4 +13,4 @@ with v2). The recommended machine type for container workloads is `v2-highcpu`
## Set up with distribution specific quick start
Follow distribution specific [install guides](https://github.com/kata-containers/documentation/tree/master/install#supported-distributions).
Follow distribution specific [install guides](../install/README.md#supported-distributions).

View File

@ -55,7 +55,7 @@ line.
## Install and configure Kata Containers
To use this feature, you need Kata version 1.3.0 or above.
Follow the [Kata Containers setup instructions](https://github.com/kata-containers/documentation/blob/master/install/README.md)
Follow the [Kata Containers setup instructions](../install/README.md)
to install the latest version of Kata.
In order to pass a GPU to a Kata Container, you need to enable the `hotplug_vfio_on_root_bus`
@ -82,12 +82,12 @@ CONFIG_DRM_I915_USERPTR=y
```
Build the Kata Containers kernel with the previous config options, using the instructions
described in [Building Kata Containers kernel](https://github.com/kata-containers/packaging/tree/master/kernel).
For further details on building and installing guest kernels, see [the developer guide](https://github.com/kata-containers/documentation/blob/master/Developer-Guide.md#install-guest-kernel-images).
described in [Building Kata Containers kernel](../../tools/packaging/kernel).
For further details on building and installing guest kernels, see [the developer guide](../Developer-Guide.md#install-guest-kernel-images).
There is an easy way to build a guest kernel that supports Intel GPU:
```
## Build guest kernel with https://github.com/kata-containers/packaging/tree/master/kernel
## Build guest kernel with ../../tools/packaging/kernel
# Prepare (download guest kernel source, generate .config)
$ ./build-kernel.sh -g intel -f setup

View File

@ -72,7 +72,7 @@ Your host kernel needs to be booted with `intel_iommu=on` on the kernel command
## Install and configure Kata Containers
To use non-large BARs devices (for example, Nvidia Tesla T4), you need Kata version 1.3.0 or above.
Follow the [Kata Containers setup instructions](https://github.com/kata-containers/documentation/blob/master/install/README.md)
Follow the [Kata Containers setup instructions](../install/README.md)
to install the latest version of Kata.
The following configuration in the Kata `configuration.toml` file as shown below can work:
@ -131,13 +131,13 @@ It is worth checking that it is not enabled in your kernel configuration to prev
Build the Kata Containers kernel with the previous config options,
using the instructions described in [Building Kata Containers kernel](https://github.com/kata-containers/packaging/tree/master/kernel).
using the instructions described in [Building Kata Containers kernel](../../tools/packaging/kernel).
For further details on building and installing guest kernels,
see [the developer guide](https://github.com/kata-containers/documentation/blob/master/Developer-Guide.md#install-guest-kernel-images).
see [the developer guide](../Developer-Guide.md#install-guest-kernel-images).
There is an easy way to build a guest kernel that supports Nvidia GPU:
```
## Build guest kernel with https://github.com/kata-containers/packaging/tree/master/kernel
## Build guest kernel with ../../tools/packaging/kernel
# Prepare (download guest kernel source, generate .config)
$ ./build-kernel.sh -v 4.19.86 -g nvidia -f setup

View File

@ -194,9 +194,9 @@ $ ls -la /sys/bus/pci/drivers/vfio-pci
This example automatically uses the latest Kata kernel supported by Kata. It
follows the instructions from the
[packaging kernel repository](https://github.com/kata-containers/packaging/tree/master/kernel)
[packaging kernel repository](../../tools/packaging/kernel)
and uses the latest Kata kernel
[config](https://github.com/kata-containers/packaging/tree/master/kernel/configs).
[config](../../tools/packaging/kernel/configs).
There are some patches that must be installed as well, which the
`build-kernel.sh` script should automatically apply. If you are using a
different kernel version, then you might need to manually apply them. Since

View File

@ -184,7 +184,7 @@ used for vhost-user devices.
The base directory for vhost-user device is a configurable value,
with the default being `/var/run/kata-containers/vhost-user`. It can be
configured by parameter `vhost_user_store_path` in [Kata TOML configuration file](https://github.com/kata-containers/runtime/blob/master/README.md#configuration).
configured by parameter `vhost_user_store_path` in [Kata TOML configuration file](../../src/runtime/README.md#configuration).
Currently, the vhost-user storage device is not enabled by default, so
the user should enable it explicitly inside the Kata TOML configuration

View File

@ -10,7 +10,7 @@ Currently, the instructions are based on the following links:
- https://docs.openstack.org/zun/latest/admin/clear-containers.html
- https://github.com/kata-containers/documentation/blob/master/install/ubuntu-installation-guide.md
- ../install/ubuntu-installation-guide.md
## Install Git to use with DevStack
@ -54,7 +54,7 @@ $ zun delete test
## Install Kata Containers
Follow [these instructions](https://github.com/kata-containers/documentation/blob/master/install/ubuntu-installation-guide.md)
Follow [these instructions](../install/ubuntu-installation-guide.md)
to install the Kata Containers components.
## Update Docker with new Kata Containers runtime

79
src/agent/Cargo.lock generated
View File

@ -30,6 +30,18 @@ version = "0.4.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b585a98a234c46fc563103e9278c9391fde1f4e6850334da895d27edb9580f62"
[[package]]
name = "arrayref"
version = "0.3.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a4c527152e37cf757a3f78aae5a06fbeefdb07ccc535c980a3208ee3060dd544"
[[package]]
name = "arrayvec"
version = "0.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cff77d8686867eceff3105329d4698d96c2391c176d5d03adc90c7389162b5b8"
[[package]]
name = "autocfg"
version = "1.0.0"
@ -49,12 +61,29 @@ dependencies = [
"rustc-demangle",
]
[[package]]
name = "base64"
version = "0.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b41b7ea54a0c9d92199de89e20e58d49f02f8e699814ef3fdf266f6f748d15c7"
[[package]]
name = "bitflags"
version = "1.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cf1de2fe8c75bc145a2f577add951f8134889b4795d47466a54a5c846d691693"
[[package]]
name = "blake2b_simd"
version = "0.5.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d8fb2d74254a3a0b5cac33ac9f8ed0e44aa50378d9dbb2e5d83bd21ed1dc2c8a"
dependencies = [
"arrayref",
"arrayvec",
"constant_time_eq",
]
[[package]]
name = "byteorder"
version = "1.3.4"
@ -95,6 +124,12 @@ dependencies = [
"time",
]
[[package]]
name = "constant_time_eq"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "245097e9a4535ee1e3e3931fcfcd55a796a44c643e8596ff6566d68f09b87bbc"
[[package]]
name = "crc32fast"
version = "1.2.0"
@ -125,6 +160,26 @@ dependencies = [
"lazy_static",
]
[[package]]
name = "dirs"
version = "3.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "142995ed02755914747cc6ca76fc7e4583cd18578746716d0508ea6ed558b9ff"
dependencies = [
"dirs-sys",
]
[[package]]
name = "dirs-sys"
version = "0.3.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8e93d7f5705de3e49895a2b5e0b8855a1c27f080192ae9c32a6432d50741a57a"
dependencies = [
"libc",
"redox_users",
"winapi",
]
[[package]]
name = "errno"
version = "0.2.5"
@ -531,6 +586,17 @@ version = "0.1.56"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2439c63f3f6139d1b57529d16bc3b8bb855230c8efcc5d3a896c8bea7c3b1e84"
[[package]]
name = "redox_users"
version = "0.3.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "09b23093265f8d200fa7b4c2c76297f47e681c655f6f1285a8780d6a022f7431"
dependencies = [
"getrandom",
"redox_syscall",
"rust-argon2",
]
[[package]]
name = "regex"
version = "1.3.7"
@ -564,6 +630,18 @@ version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cabe4fa914dec5870285fa7f71f602645da47c486e68486d2b4ceb4a343e90ac"
[[package]]
name = "rust-argon2"
version = "0.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2bc8af4bda8e1ff4932523b94d3dd20ee30a87232323eda55903ffd71d2fb017"
dependencies = [
"base64",
"blake2b_simd",
"constant_time_eq",
"crossbeam-utils",
]
[[package]]
name = "rustc-demangle"
version = "0.1.16"
@ -575,6 +653,7 @@ name = "rustjail"
version = "0.1.0"
dependencies = [
"caps",
"dirs",
"error-chain",
"lazy_static",
"libc",

View File

@ -38,10 +38,6 @@ After that, we drafted the initial code here, and any contributions are welcome.
## Getting Started
### Dependencies
The `rust-agent` depends on [`grpc-rs`](https://github.com/pingcap/grpc-rs) by PingCAP. However, the upstream `grpc-rs` and [gRPC](https://github.com/grpc/grpc) need some changes to be used here, which may take some time to be landed. Therefore, we created a temporary fork or `grpc-rs` here:
- https://github.com/alipay/grpc-rs/tree/rust_agent
### Build from Source
The rust-agent need to be built with rust nightly, and static linked with musl.
```bash
@ -52,8 +48,8 @@ cargo build --target x86_64-unknown-linux-musl --release
```
## Run Kata CI with rust-agent
* Firstly, install kata as noted by ["how to install Kata"](https://github.com/kata-containers/documentation/blob/master/install/README.md)
* Secondly, build your own kata initrd/image following the steps in ["how to build your own initrd/image"](https://github.com/kata-containers/documentation/blob/master/Developer-Guide.md#create-and-install-rootfs-and-initrd-image).
* Firstly, install kata as noted by ["how to install Kata"](../../docs/install/README.md)
* Secondly, build your own kata initrd/image following the steps in ["how to build your own initrd/image"](../../docs/Developer-Guide.md#create-and-install-rootfs-and-initrd-image).
notes: Please use your rust agent instead of the go agent when building your initrd/image.
* Clone the kata ci test cases from: https://github.com/kata-containers/tests.git, and then run the cri test with:

View File

@ -23,3 +23,4 @@ slog-scope = "4.1.2"
scan_fmt = "0.2"
regex = "1.1"
path-absolutize = "1.2.0"
dirs = "3.0.1"

View File

@ -3,6 +3,7 @@
// SPDX-License-Identifier: Apache-2.0
//
use dirs;
use lazy_static;
use oci::{Hook, Linux, LinuxNamespace, LinuxResources, POSIXRlimit, Spec};
use serde_json;
@ -37,6 +38,7 @@ use protocols::agent::StatsContainerResponse;
use nix::errno::Errno;
use nix::fcntl::{self, OFlag};
use nix::fcntl::{FcntlArg, FdFlag};
use nix::mount::MntFlags;
use nix::pty;
use nix::sched::{self, CloneFlags};
use nix::sys::signal::{self, Signal};
@ -66,6 +68,7 @@ const CRFD_FD: &str = "CRFD_FD";
const CWFD_FD: &str = "CWFD_FD";
const CLOG_FD: &str = "CLOG_FD";
const FIFO_FD: &str = "FIFO_FD";
const HOME_ENV_KEY: &str = "HOME";
#[derive(PartialEq, Clone, Copy)]
pub enum Status {
@ -150,7 +153,7 @@ lazy_static! {
r#type: "c".to_string(),
major: 1,
minor: 3,
file_mode: Some(0o066),
file_mode: Some(0o666),
uid: Some(0xffffffff),
gid: Some(0xffffffff),
});
@ -159,7 +162,7 @@ lazy_static! {
r#type: "c".to_string(),
major: 1,
minor: 5,
file_mode: Some(0o066),
file_mode: Some(0o666),
uid: Some(0xffffffff),
gid: Some(0xffffffff),
});
@ -168,7 +171,7 @@ lazy_static! {
r#type: String::from("c"),
major: 1,
minor: 7,
file_mode: Some(0o066),
file_mode: Some(0o666),
uid: Some(0xffffffff),
gid: Some(0xffffffff),
});
@ -177,7 +180,7 @@ lazy_static! {
r#type: "c".to_string(),
major: 5,
minor: 0,
file_mode: Some(0o066),
file_mode: Some(0o666),
uid: Some(0xffffffff),
gid: Some(0xffffffff),
});
@ -186,7 +189,7 @@ lazy_static! {
r#type: "c".to_string(),
major: 1,
minor: 9,
file_mode: Some(0o066),
file_mode: Some(0o666),
uid: Some(0xffffffff),
gid: Some(0xffffffff),
});
@ -195,7 +198,7 @@ lazy_static! {
r#type: "c".to_string(),
major: 1,
minor: 8,
file_mode: Some(0o066),
file_mode: Some(0o666),
uid: Some(0xffffffff),
gid: Some(0xffffffff),
});
@ -605,6 +608,13 @@ fn do_init_child(cwfd: RawFd) -> Result<()> {
env::set_var(v[0], v[1]);
}
// set the "HOME" env getting from "/etc/passwd"
if env::var_os(HOME_ENV_KEY).is_none() {
if let Some(home_dir) = dirs::home_dir() {
env::set_var(HOME_ENV_KEY, home_dir);
}
}
let exec_file = Path::new(&args[0]);
log_child!(cfd_log, "process command: {:?}", &args);
if !exec_file.exists() {
@ -963,6 +973,10 @@ impl BaseContainer for LinuxContainer {
}
self.status.transition(Status::STOPPED);
nix::mount::umount2(
spec.root.as_ref().unwrap().path.as_str(),
MntFlags::MNT_DETACH,
)?;
fs::remove_dir_all(&self.root)?;
Ok(())
}
@ -1495,13 +1509,22 @@ fn execute_hook(logger: &Logger, h: &Hook, st: &OCIState) -> Result<()> {
// state.push_str("\n");
let (rfd, wfd) = unistd::pipe2(OFlag::O_CLOEXEC)?;
match unistd::fork()? {
ForkResult::Parent { child: _ch } => {
let buf = read_sync(rfd)?;
let buf_array: [u8; 4] = [buf[0], buf[1], buf[2], buf[3]];
let status: i32 = i32::from_be_bytes(buf_array);
defer!({
let _ = unistd::close(rfd);
let _ = unistd::close(wfd);
});
info!(logger, "hook child: {}", _ch);
match unistd::fork()? {
ForkResult::Parent { child } => {
let buf = read_sync(rfd)?;
let status = if buf.len() == 4 {
let buf_array: [u8; 4] = [buf[0], buf[1], buf[2], buf[3]];
i32::from_be_bytes(buf_array)
} else {
-libc::EPIPE
};
info!(logger, "hook child: {} status: {}", child, status);
// let _ = wait::waitpid(_ch,
// Some(WaitPidFlag::WEXITED | WaitPidFlag::__WALL));
@ -1630,7 +1653,11 @@ fn execute_hook(logger: &Logger, h: &Hook, st: &OCIState) -> Result<()> {
};
handle.join().unwrap();
let _ = write_sync(wfd, status, "");
let _ = write_sync(
wfd,
SYNC_DATA,
std::str::from_utf8(&status.to_be_bytes()).unwrap_or_default(),
);
// let _ = wait::waitpid(Pid::from_raw(pid),
// Some(WaitPidFlag::WEXITED | WaitPidFlag::__WALL));
std::process::exit(0);

View File

@ -32,6 +32,7 @@ use crate::log_child;
// Info reveals information about a particular mounted filesystem. This
// struct is populated from the content in the /proc/<pid>/mountinfo file.
#[derive(std::fmt::Debug)]
pub struct Info {
id: i32,
parent: i32,
@ -51,9 +52,12 @@ const MOUNTINFOFORMAT: &'static str = "{d} {d} {d}:{d} {} {} {} {}";
lazy_static! {
static ref PROPAGATION: HashMap<&'static str, MsFlags> = {
let mut m = HashMap::new();
m.insert("shared", MsFlags::MS_SHARED | MsFlags::MS_REC);
m.insert("private", MsFlags::MS_PRIVATE | MsFlags::MS_REC);
m.insert("slave", MsFlags::MS_SLAVE | MsFlags::MS_REC);
m.insert("shared", MsFlags::MS_SHARED);
m.insert("rshared", MsFlags::MS_SHARED | MsFlags::MS_REC);
m.insert("private", MsFlags::MS_PRIVATE);
m.insert("rprivate", MsFlags::MS_PRIVATE | MsFlags::MS_REC);
m.insert("slave", MsFlags::MS_SLAVE);
m.insert("rslave", MsFlags::MS_SLAVE | MsFlags::MS_REC);
m
};
static ref OPTIONS: HashMap<&'static str, (bool, MsFlags)> = {
@ -121,6 +125,9 @@ pub fn init_rootfs(
let rootfs = root.to_str().unwrap();
mount::mount(None::<&str>, "/", None::<&str>, flags, None::<&str>)?;
rootfs_parent_mount_private(rootfs)?;
mount::mount(
Some(rootfs),
rootfs,
@ -142,6 +149,18 @@ pub fn init_rootfs(
}
mount_from(cfd_log, &m, &rootfs, flags, &data, "")?;
// bind mount won't change mount options, we need remount to make mount options
// effective.
// first check that we have non-default options required before attempting a
// remount
if m.r#type == "bind" {
for o in &m.options {
if let Some(fl) = PROPAGATION.get(o.as_str()) {
let dest = format!("{}{}", &rootfs, &m.destination);
mount::mount(None::<&str>, dest.as_str(), None::<&str>, *fl, None::<&str>)?;
}
}
}
}
}
@ -262,18 +281,71 @@ fn mount_cgroups(
Ok(())
}
pub fn pivot_rootfs<P: ?Sized + NixPath>(path: &P) -> Result<()> {
pub fn pivot_rootfs<P: ?Sized + NixPath + std::fmt::Debug>(path: &P) -> Result<()> {
let oldroot = fcntl::open("/", OFlag::O_DIRECTORY | OFlag::O_RDONLY, Mode::empty())?;
defer!(unistd::close(oldroot).unwrap());
let newroot = fcntl::open(path, OFlag::O_DIRECTORY | OFlag::O_RDONLY, Mode::empty())?;
defer!(unistd::close(newroot).unwrap());
unistd::pivot_root(path, path)?;
mount::umount2("/", MntFlags::MNT_DETACH)?;
// Change to the new root so that the pivot_root actually acts on it.
unistd::fchdir(newroot)?;
unistd::pivot_root(".", ".").chain_err(|| format!("failed to pivot_root on {:?}", path))?;
// Currently our "." is oldroot (according to the current kernel code).
// However, purely for safety, we will fchdir(oldroot) since there isn't
// really any guarantee from the kernel what /proc/self/cwd will be after a
// pivot_root(2).
unistd::fchdir(oldroot)?;
// Make oldroot rslave to make sure our unmounts don't propagate to the
// host. We don't use rprivate because this is known to cause issues due
// to races where we still have a reference to a mount while a process in
// the host namespace are trying to operate on something they think has no
// mounts (devicemapper in particular).
mount::mount(
Some("none"),
".",
Some(""),
MsFlags::MS_SLAVE | MsFlags::MS_REC,
Some(""),
)?;
// Preform the unmount. MNT_DETACH allows us to unmount /proc/self/cwd.
mount::umount2(".", MntFlags::MNT_DETACH).chain_err(|| "failed to do umount2")?;
// Switch back to our shiny new root.
unistd::chdir("/")?;
stat::umask(Mode::from_bits_truncate(0o022));
Ok(())
}
fn rootfs_parent_mount_private(path: &str) -> Result<()> {
let mount_infos = parse_mount_table()?;
let mut max_len = 0;
let mut mount_point = String::from("");
let mut options = String::from("");
for i in mount_infos {
if path.starts_with(&i.mount_point) && i.mount_point.len() > max_len {
max_len = i.mount_point.len();
mount_point = i.mount_point;
options = i.optional;
}
}
if options.contains("shared:") {
mount::mount(
None::<&str>,
mount_point.as_str(),
None::<&str>,
MsFlags::MS_PRIVATE,
None::<&str>,
)?;
}
Ok(())
}
// Parse /proc/self/mountinfo because comparing Dev and ino does not work from
// bind mounts
fn parse_mount_table() -> Result<Vec<Info>> {

View File

@ -130,10 +130,8 @@ fn create_extended_pipe(flags: OFlag, pipe_size: i32) -> Result<(RawFd, RawFd)>
#[cfg(test)]
mod tests {
use crate::process::create_extended_pipe;
use nix::fcntl::{fcntl, FcntlArg, OFlag};
use super::*;
use std::fs;
use std::os::unix::io::RawFd;
fn get_pipe_max_size() -> i32 {
fs::read_to_string("/proc/sys/fs/pipe-max-size")
@ -158,4 +156,29 @@ mod tests {
let actual_size = get_pipe_size(w);
assert_eq!(max_size, actual_size);
}
#[test]
fn test_process() {
let id = "abc123rgb";
let init = true;
let process = Process::new(
&Logger::root(slog::Discard, o!("source" => "unit-test")),
&OCIProcess::default(),
id,
init,
32,
);
let mut process = process.unwrap();
assert_eq!(process.exec_id, id);
assert_eq!(process.init, init);
// -1 by default
assert_eq!(process.pid, -1);
assert!(process.wait().is_err());
// signal to every process in the process
// group of the calling process.
process.pid = 0;
assert!(process.signal(Signal::SIGCONT).is_ok());
}
}

View File

@ -28,6 +28,7 @@ extern crate slog;
#[macro_use]
extern crate netlink;
use crate::netlink::{RtnlHandle, NETLINK_ROUTE};
use nix::fcntl::{self, OFlag};
use nix::sys::socket::{self, AddressFamily, SockAddr, SockFlag, SockType};
use nix::sys::wait::{self, WaitStatus};
@ -46,7 +47,7 @@ use std::os::unix::io::AsRawFd;
use std::path::Path;
use std::sync::mpsc::{self, Sender};
use std::sync::{Arc, Mutex, RwLock};
use std::{io, thread};
use std::{io, thread, thread::JoinHandle};
use unistd::Pid;
mod config;
@ -83,9 +84,7 @@ lazy_static! {
Arc::new(RwLock::new(config::agentConfig::new()));
}
use std::mem::MaybeUninit;
fn announce(logger: &Logger) {
fn announce(logger: &Logger, config: &agentConfig) {
let commit = match env::var("VERSION_COMMIT") {
Ok(s) => s,
Err(_) => String::from(""),
@ -99,11 +98,25 @@ fn announce(logger: &Logger) {
"agent-version" => version::AGENT_VERSION,
"api-version" => version::API_VERSION,
"config" => format!("{:?}", config),
);
}
fn main() -> Result<()> {
let args: Vec<String> = env::args().collect();
if args.len() == 2 && args[1] == "--version" {
println!(
"{} version {} (api version: {}, commit version: {}, type: rust)",
NAME,
version::AGENT_VERSION,
version::API_VERSION,
env::var("VERSION_COMMIT").unwrap_or("unknown".to_string())
);
exit(0);
}
if args.len() == 2 && args[1] == "init" {
rustjail::container::init_child();
exit(0);
@ -121,7 +134,8 @@ fn main() -> Result<()> {
let agentConfig = AGENT_CONFIG.clone();
if unistd::getpid() == Pid::from_raw(1) {
let init_mode = unistd::getpid() == Pid::from_raw(1);
if init_mode {
// dup a new file descriptor for this temporary logger writer,
// since this logger would be dropped and it's writer would
// be closed out of this code block.
@ -173,44 +187,57 @@ fn main() -> Result<()> {
// Recreate a logger with the log level get from "/proc/cmdline".
let logger = logging::create_logger(NAME, "agent", config.log_level, writer);
announce(&logger);
if args.len() == 2 && args[1] == "--version" {
// force logger to flush
drop(logger);
exit(0);
}
announce(&logger, &config);
// This "unused" variable is required as it enables the global (and crucially static) logger,
// which is required to satisfy the the lifetime constraints of the auto-generated gRPC code.
let _guard = slog_scope::set_global_logger(logger.new(o!("subsystem" => "rpc")));
start_sandbox(&logger, &config, init_mode)?;
let _ = log_handle.join();
Ok(())
}
fn start_sandbox(logger: &Logger, config: &agentConfig, init_mode: bool) -> Result<()> {
let shells = SHELLS.clone();
let debug_console_vport = config.debug_console_vport as u32;
let shell_handle = if config.debug_console {
let mut shell_handle: Option<JoinHandle<()>> = None;
if config.debug_console {
let thread_logger = logger.clone();
thread::spawn(move || {
let shells = shells.lock().unwrap();
let result = setup_debug_console(shells.to_vec(), debug_console_vport);
if result.is_err() {
// Report error, but don't fail
warn!(thread_logger, "failed to setup debug console";
let builder = thread::Builder::new();
let handle = builder
.spawn(move || {
let shells = shells.lock().unwrap();
let result = setup_debug_console(shells.to_vec(), debug_console_vport);
if result.is_err() {
// Report error, but don't fail
warn!(thread_logger, "failed to setup debug console";
"error" => format!("{}", result.unwrap_err()));
}
})
} else {
unsafe { MaybeUninit::zeroed().assume_init() }
};
}
})
.map_err(|e| format!("{:?}", e))?;
shell_handle = Some(handle);
}
// Initialize unique sandbox structure.
let s = Sandbox::new(&logger).map_err(|e| {
let mut s = Sandbox::new(&logger).map_err(|e| {
error!(logger, "Failed to create sandbox with error: {:?}", e);
e
})?;
if init_mode {
let mut rtnl = RtnlHandle::new(NETLINK_ROUTE, 0).unwrap();
rtnl.handle_localhost()?;
s.rtnl = Some(rtnl);
}
let sandbox = Arc::new(Mutex::new(s));
setup_signal_handler(&logger, sandbox.clone()).unwrap();
@ -222,38 +249,16 @@ fn main() -> Result<()> {
//vsock:///dev/vsock, port
let mut server = rpc::start(sandbox.clone(), VSOCK_ADDR, VSOCK_PORT);
/*
let _ = fs::remove_file("/tmp/testagent");
let _ = fs::remove_dir_all("/run/agent");
let mut server = grpc::start(sandbox.clone(), "unix:///tmp/testagent", 1);
*/
let handle = thread::spawn(move || {
// info!("Press ENTER to exit...");
// let _ = io::stdin().read(&mut [0]).unwrap();
// thread::sleep(Duration::from_secs(3000));
let _ = rx.recv().unwrap();
});
// receive something from destroy_sandbox here?
// or in the thread above? It depneds whether grpc request
// are run in another thread or in the main thead?
// let _ = rx.wait();
let _ = server.start().unwrap();
handle.join().unwrap();
let _ = rx.recv().map_err(|e| format!("{:?}", e));
server.shutdown();
let _ = log_handle.join();
if config.debug_console {
shell_handle.join().unwrap();
if let Some(handle) = shell_handle {
handle.join().map_err(|e| format!("{:?}", e))?;
}
let _ = fs::remove_file("/tmp/testagent");
Ok(())
}

View File

@ -3,10 +3,11 @@
// SPDX-License-Identifier: Apache-2.0
//
use std::path::Path;
use std::sync::{Arc, Mutex};
use ttrpc;
use oci::{LinuxNamespace, Spec};
use oci::{LinuxNamespace, Root, Spec};
use protobuf::{RepeatedField, SingularPtrField};
use protocols::agent::{
AgentDetails, CopyFileRequest, GuestDetailsResponse, Interfaces, ListProcessesResponse,
@ -25,6 +26,7 @@ use rustjail::process::Process;
use rustjail::specconv::CreateOpts;
use nix::errno::Errno;
use nix::mount::MsFlags;
use nix::sys::signal::Signal;
use nix::sys::stat;
use nix::unistd::{self, Pid};
@ -33,7 +35,7 @@ use rustjail::process::ProcessOperations;
use crate::device::{add_devices, rescan_pci_bus, update_device_cgroup};
use crate::linux_abi::*;
use crate::metrics::get_metrics;
use crate::mount::{add_storages, remove_mounts, STORAGEHANDLERLIST};
use crate::mount::{add_storages, remove_mounts, BareMount, STORAGEHANDLERLIST};
use crate::namespace::{NSTYPEIPC, NSTYPEPID, NSTYPEUTS};
use crate::random;
use crate::sandbox::Sandbox;
@ -127,9 +129,12 @@ impl agentService {
// Add the root partition to the device cgroup to prevent access
update_device_cgroup(&mut oci)?;
// Append guest hooks
append_guest_hooks(&s, &mut oci);
// write spec to bundle path, hooks might
// read ocispec
let olddir = setup_bundle(&oci)?;
let olddir = setup_bundle(&cid, &mut oci)?;
// restore the cwd for kata-agent process.
defer!(unistd::chdir(&olddir).unwrap());
@ -1082,6 +1087,15 @@ impl protocols::agent_ttrpc::AgentService for agentService {
s.hostname = req.hostname.clone();
s.running = true;
if !req.guest_hook_path.is_empty() {
if let Err(e) = s.add_hooks(&req.guest_hook_path) {
error!(
sl!(),
"add guest hook {} failed: {:?}", req.guest_hook_path, e
);
}
}
if req.sandbox_id.len() > 0 {
s.id = req.sandbox_id.clone();
}
@ -1521,6 +1535,18 @@ fn update_container_namespaces(
Ok(())
}
fn append_guest_hooks(s: &Sandbox, oci: &mut Spec) {
if s.hooks.is_none() {
return;
}
let guest_hooks = s.hooks.as_ref().unwrap();
let mut hooks = oci.hooks.take().unwrap_or_default();
hooks.prestart.append(&mut guest_hooks.prestart.clone());
hooks.poststart.append(&mut guest_hooks.poststart.clone());
hooks.poststop.append(&mut guest_hooks.poststop.clone());
oci.hooks = Some(hooks);
}
// Check is the container process installed the
// handler for specific signal.
fn is_signal_handled(pid: pid_t, signum: u32) -> bool {
@ -1644,26 +1670,46 @@ fn do_copy_file(req: &CopyFileRequest) -> Result<()> {
Ok(())
}
fn setup_bundle(spec: &Spec) -> Result<PathBuf> {
// Setup container bundle under CONTAINER_BASE, which is cleaned up
// before removing a container.
// - bundle path is /<CONTAINER_BASE>/<cid>/
// - config.json at /<CONTAINER_BASE>/<cid>/config.json
// - container rootfs bind mounted at /<CONTAINER_BASE>/<cid>/rootfs
// - modify container spec root to point to /<CONTAINER_BASE>/<cid>/rootfs
fn setup_bundle(cid: &str, spec: &mut Spec) -> Result<PathBuf> {
if spec.root.is_none() {
return Err(nix::Error::Sys(Errno::EINVAL).into());
}
let root = spec.root.as_ref().unwrap().path.as_str();
let spec_root = spec.root.as_ref().unwrap();
let rootfs = fs::canonicalize(root)?;
let bundle_path = rootfs.parent().unwrap().to_str().unwrap();
let bundle_path = Path::new(CONTAINER_BASE).join(cid);
let config_path = bundle_path.clone().join("config.json");
let rootfs_path = bundle_path.clone().join("rootfs");
let config = format!("{}/{}", bundle_path, "config.json");
fs::create_dir_all(&rootfs_path)?;
BareMount::new(
&spec_root.path,
rootfs_path.to_str().unwrap(),
"bind",
MsFlags::MS_BIND,
"",
&sl!(),
)
.mount()?;
spec.root = Some(Root {
path: rootfs_path.to_str().unwrap().to_owned(),
readonly: spec_root.readonly,
});
info!(
sl!(),
"{:?}",
spec.process.as_ref().unwrap().console_size.as_ref()
);
let _ = spec.save(config.as_str());
let _ = spec.save(config_path.to_str().unwrap());
let olddir = unistd::getcwd().chain_err(|| "cannot getcwd")?;
unistd::chdir(bundle_path)?;
unistd::chdir(bundle_path.to_str().unwrap())?;
Ok(olddir)
}
@ -1713,6 +1759,7 @@ fn load_kernel_module(module: &protocols::agent::KernelModule) -> Result<()> {
#[cfg(test)]
mod tests {
use super::*;
use oci::{Hook, Hooks};
#[test]
fn test_load_kernel_module() {
@ -1734,4 +1781,22 @@ mod tests {
let result = load_kernel_module(&m);
assert!(result.is_ok(), "load module should success");
}
#[test]
fn test_append_guest_hooks() {
let logger = slog::Logger::root(slog::Discard, o!());
let mut s = Sandbox::new(&logger).unwrap();
s.hooks = Some(Hooks {
prestart: vec![Hook {
path: "foo".to_string(),
..Default::default()
}],
..Default::default()
});
let mut oci = Spec {
..Default::default()
};
append_guest_hooks(&s, &mut oci);
assert_eq!(s.hooks, oci.hooks);
}
}

View File

@ -11,7 +11,7 @@ use crate::namespace::NSTYPEPID;
use crate::network::Network;
use libc::pid_t;
use netlink::{RtnlHandle, NETLINK_ROUTE};
use oci::LinuxNamespace;
use oci::{Hook, Hooks};
use protocols::agent::OnlineCPUMemRequest;
use regex::Regex;
use rustjail::cgroups;
@ -22,6 +22,8 @@ use rustjail::process::Process;
use slog::Logger;
use std::collections::HashMap;
use std::fs;
use std::os::unix::fs::PermissionsExt;
use std::path::Path;
use std::sync::mpsc::Sender;
#[derive(Debug)]
@ -42,6 +44,7 @@ pub struct Sandbox {
pub no_pivot_root: bool,
pub sender: Option<Sender<i32>>,
pub rtnl: Option<RtnlHandle>,
pub hooks: Option<Hooks>,
}
impl Sandbox {
@ -66,6 +69,7 @@ impl Sandbox {
no_pivot_root: fs_type.eq(TYPEROOTFS),
sender: None,
rtnl: Some(RtnlHandle::new(NETLINK_ROUTE, 0).unwrap()),
hooks: None,
})
}
@ -261,6 +265,57 @@ impl Sandbox {
Ok(())
}
pub fn add_hooks(&mut self, dir: &str) -> Result<()> {
let mut hooks = Hooks::default();
if let Ok(hook) = self.find_hooks(dir, "prestart") {
hooks.prestart = hook;
}
if let Ok(hook) = self.find_hooks(dir, "poststart") {
hooks.poststart = hook;
}
if let Ok(hook) = self.find_hooks(dir, "poststop") {
hooks.poststop = hook;
}
self.hooks = Some(hooks);
Ok(())
}
fn find_hooks(&self, hook_path: &str, hook_type: &str) -> Result<Vec<Hook>> {
let mut hooks = Vec::new();
for entry in fs::read_dir(Path::new(hook_path).join(hook_type))? {
let entry = entry?;
// Reject non-file, symlinks and non-executable files
if !entry.file_type()?.is_file()
|| entry.file_type()?.is_symlink()
|| entry.metadata()?.permissions().mode() & 0o777 & 0o111 == 0
{
continue;
}
let name = entry.file_name();
let hook = Hook {
path: Path::new(hook_path)
.join(hook_type)
.join(&name)
.to_str()
.unwrap()
.to_owned(),
args: vec![name.to_str().unwrap().to_owned(), hook_type.to_owned()],
..Default::default()
};
info!(
self.logger,
"found {} hook {:?} mode {:o}",
hook_type,
hook,
entry.metadata()?.permissions().mode()
);
hooks.push(hook);
}
Ok(hooks)
}
}
fn online_resources(logger: &Logger, path: &str, pattern: &str, num: i32) -> Result<i32> {
@ -315,6 +370,8 @@ mod tests {
use rustjail::container::LinuxContainer;
use rustjail::specconv::CreateOpts;
use slog::Logger;
use std::fs::{self, File};
use std::os::unix::fs::PermissionsExt;
use tempfile::Builder;
fn bind_mount(src: &str, dst: &str, logger: &Logger) -> Result<(), rustjail::errors::Error> {
@ -596,4 +653,26 @@ mod tests {
let ns_path = format!("/proc/{}/ns/pid", test_pid);
assert_eq!(s.sandbox_pidns.unwrap().path, ns_path);
}
#[test]
fn add_guest_hooks() {
let logger = slog::Logger::root(slog::Discard, o!());
let mut s = Sandbox::new(&logger).unwrap();
let tmpdir = Builder::new().tempdir().unwrap();
let tmpdir_path = tmpdir.path().to_str().unwrap();
assert!(fs::create_dir_all(tmpdir.path().join("prestart")).is_ok());
assert!(fs::create_dir_all(tmpdir.path().join("poststop")).is_ok());
let file = File::create(tmpdir.path().join("prestart").join("prestart.sh")).unwrap();
let mut perm = file.metadata().unwrap().permissions();
perm.set_mode(0o777);
assert!(file.set_permissions(perm).is_ok());
assert!(File::create(tmpdir.path().join("poststop").join("poststop.sh")).is_ok());
assert!(s.add_hooks(tmpdir_path).is_ok());
assert!(s.hooks.is_some());
assert!(s.hooks.as_ref().unwrap().prestart.len() == 1);
assert!(s.hooks.as_ref().unwrap().poststart.is_empty());
assert!(s.hooks.as_ref().unwrap().poststop.is_empty());
}
}

View File

@ -88,11 +88,11 @@ available for various operating systems.
## Quick start for developers
See the
[developer guide](https://github.com/kata-containers/documentation/blob/master/Developer-Guide.md).
[developer guide](../../docs/Developer-Guide.md).
## Architecture overview
See the [architecture overview](https://github.com/kata-containers/documentation/blob/master/design/architecture.md)
See the [architecture overview](../../docs/design/architecture.md)
for details on the Kata Containers design.
## Configuration
@ -174,12 +174,12 @@ $ sudo journalctl -t kata
## Debugging
See the
[debugging section of the developer guide](https://github.com/kata-containers/documentation/blob/master/Developer-Guide.md#troubleshoot-kata-containers).
[debugging section of the developer guide](../../docs/Developer-Guide.md#troubleshoot-kata-containers).
## Limitations
See the
[limitations file](https://github.com/kata-containers/documentation/blob/master/Limitations.md)
[limitations file](../../docs/Limitations.md)
for further details.
## Community
@ -195,7 +195,7 @@ See [how to reach the community](https://github.com/kata-containers/community/bl
See the
[project table of contents](https://github.com/kata-containers/kata-containers)
and the
[documentation repository](https://github.com/kata-containers/documentation).
[documentation repository](../../docs).
## Additional packages

View File

@ -16,3 +16,6 @@ QEMUCMD := qemu-system-aarch64
FCCMD := firecracker
# Firecracker's jailer binary name
FCJAILERCMD := jailer
# cloud-hypervisor binary name
CLHCMD := cloud-hypervisor

View File

@ -101,24 +101,6 @@ block_device_driver = "@DEFBLOCKSTORAGEDRIVER_ACRN@"
# but it will not abort container execution.
#guest_hook_path = "/usr/share/oci/hooks"
[shim.@PROJECT_TYPE@]
path = "@SHIMPATH@"
# If enabled, shim messages will be sent to the system log
# (default: disabled)
#enable_debug = true
# If enabled, the shim will create opentracing.io traces and spans.
# (See https://www.jaegertracing.io/docs/getting-started).
#
# Note: By default, the shim runs in a separate network namespace. Therefore,
# to allow it to send trace details to the Jaeger agent running on the host,
# it is necessary to set 'disable_new_netns=true' so that it runs in the host
# network namespace.
#
# (default: disabled)
#enable_tracing = true
[agent.@PROJECT_TYPE@]
# If enabled, make the agent display debug-level messages.
# (default: disabled)

View File

@ -99,25 +99,6 @@ block_device_driver = "virtio-blk"
# Default false
#enable_debug = true
[shim.@PROJECT_TYPE@]
path = "@SHIMPATH@"
# If enabled, shim messages will be sent to the system log
# (default: disabled)
#enable_debug = true
# If enabled, the shim will create opentracing.io traces and spans.
# (See https://www.jaegertracing.io/docs/getting-started).
#
# Note: By default, the shim runs in a separate network namespace. Therefore,
# to allow it to send trace details to the Jaeger agent running on the host,
# it is necessary to set 'disable_new_netns=true' so that it runs in the host
# network namespace.
#
# (default: disabled)
#enable_tracing = true
[agent.@PROJECT_TYPE@]
# If enabled, make the agent display debug-level messages.
# (default: disabled)

View File

@ -217,24 +217,6 @@ block_device_driver = "@DEFBLOCKSTORAGEDRIVER_FC@"
# Default false
#enable_template = true
[shim.@PROJECT_TYPE@]
path = "@SHIMPATH@"
# If enabled, shim messages will be sent to the system log
# (default: disabled)
#enable_debug = true
# If enabled, the shim will create opentracing.io traces and spans.
# (See https://www.jaegertracing.io/docs/getting-started).
#
# Note: By default, the shim runs in a separate network namespace. Therefore,
# to allow it to send trace details to the Jaeger agent running on the host,
# it is necessary to set 'disable_new_netns=true' so that it runs in the host
# network namespace.
#
# (default: disabled)
#enable_tracing = true
[agent.@PROJECT_TYPE@]
# If enabled, make the agent display debug-level messages.
# (default: disabled)

View File

@ -309,24 +309,6 @@ vhost_user_store_path = "@DEFVHOSTUSERSTOREPATH@"
# Default /var/run/kata-containers/cache.sock
#vm_cache_endpoint = "/var/run/kata-containers/cache.sock"
[shim.@PROJECT_TYPE@]
path = "@SHIMPATH@"
# If enabled, shim messages will be sent to the system log
# (default: disabled)
#enable_debug = true
# If enabled, the shim will create opentracing.io traces and spans.
# (See https://www.jaegertracing.io/docs/getting-started).
#
# Note: By default, the shim runs in a separate network namespace. Therefore,
# to allow it to send trace details to the Jaeger agent running on the host,
# it is necessary to set 'disable_new_netns=true' so that it runs in the host
# network namespace.
#
# (default: disabled)
#enable_tracing = true
[agent.@PROJECT_TYPE@]
# If enabled, make the agent display debug-level messages.
# (default: disabled)

View File

@ -13,7 +13,6 @@
[hypervisor.qemu]
path = "@QEMUPATH@"
kernel = "@KERNELPATH@"
initrd = "@INITRDPATH@"
image = "@IMAGEPATH@"
machine_type = "@MACHINETYPE@"
@ -333,24 +332,6 @@ vhost_user_store_path = "@DEFVHOSTUSERSTOREPATH@"
# Default /var/run/kata-containers/cache.sock
#vm_cache_endpoint = "/var/run/kata-containers/cache.sock"
[shim.@PROJECT_TYPE@]
path = "@SHIMPATH@"
# If enabled, shim messages will be sent to the system log
# (default: disabled)
#enable_debug = true
# If enabled, the shim will create opentracing.io traces and spans.
# (See https://www.jaegertracing.io/docs/getting-started).
#
# Note: By default, the shim runs in a separate network namespace. Therefore,
# to allow it to send trace details to the Jaeger agent running on the host,
# it is necessary to set 'disable_new_netns=true' so that it runs in the host
# network namespace.
#
# (default: disabled)
#enable_tracing = true
[agent.@PROJECT_TYPE@]
# If enabled, make the agent display debug-level messages.
# (default: disabled)

View File

@ -9,6 +9,7 @@ import (
"encoding/json"
"errors"
"os"
"runtime"
"strings"
"github.com/BurntSushi/toml"
@ -18,6 +19,7 @@ import (
"github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/oci"
vcUtils "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/utils"
specs "github.com/opencontainers/runtime-spec/specs-go"
"github.com/prometheus/procfs"
"github.com/urfave/cli"
)
@ -25,7 +27,7 @@ import (
//
// XXX: Increment for every change to the output format
// (meaning any change to the EnvInfo type).
const formatVersion = "1.0.24"
const formatVersion = "1.0.25"
// MetaInfo stores information on the format of the output itself
type MetaInfo struct {
@ -53,6 +55,14 @@ type ImageInfo struct {
type CPUInfo struct {
Vendor string
Model string
CPUs int
}
// MemoryInfo stores host memory details
type MemoryInfo struct {
Total uint64
Free uint64
Available uint64
}
// RuntimeConfigInfo stores runtime config details.
@ -123,6 +133,7 @@ type HostInfo struct {
Architecture string
Distro DistroInfo
CPU CPUInfo
Memory MemoryInfo
VMContainerCapable bool
SupportVSocks bool
}
@ -222,15 +233,19 @@ func getHostInfo() (HostInfo, error) {
hostCPU := CPUInfo{
Vendor: cpuVendor,
Model: cpuModel,
CPUs: runtime.NumCPU(),
}
supportVSocks, _ := vcUtils.SupportsVsocks()
memoryInfo := getMemoryInfo()
host := HostInfo{
Kernel: hostKernelVersion,
Architecture: arch,
Distro: hostDistro,
CPU: hostCPU,
Memory: memoryInfo,
VMContainerCapable: hostVMContainerCapable,
SupportVSocks: supportVSocks,
}
@ -238,6 +253,24 @@ func getHostInfo() (HostInfo, error) {
return host, nil
}
func getMemoryInfo() MemoryInfo {
fs, err := procfs.NewDefaultFS()
if err != nil {
return MemoryInfo{}
}
mi, err := fs.Meminfo()
if err != nil {
return MemoryInfo{}
}
return MemoryInfo{
Total: mi.MemTotal,
Free: mi.MemFree,
Available: mi.MemAvailable,
}
}
func getNetmonInfo(config oci.RuntimeConfig) NetmonInfo {
netmonConfig := config.NetmonConfig

View File

@ -50,6 +50,9 @@ func TestEnvGetEnvInfoSetsCPUType(t *testing.T) {
env, err := getEnvInfo(configFile, config)
assert.NoError(err)
// Free/Available are changing
expectedEnv.Host.Memory = env.Host.Memory
assert.Equal(expectedEnv, env)
assert.NotEmpty(archRequiredCPUFlags)

View File

@ -45,6 +45,9 @@ func testEnvGetEnvInfoSetsCPUTypeGeneric(t *testing.T) {
env, err := getEnvInfo(configFile, config)
assert.NoError(err)
// Free/Available are changing
expectedEnv.Host.Memory = env.Host.Memory
assert.Equal(expectedEnv, env)
assert.Equal(archRequiredCPUFlags, savedArchRequiredCPUFlags)

View File

@ -14,6 +14,7 @@ import (
"os"
"path"
"path/filepath"
"runtime"
goruntime "runtime"
"strings"
"testing"
@ -272,6 +273,10 @@ VERSION_ID="%s"
expectedHostDetails.CPU.Model = "v8"
}
// set CPU num.
// will not set memory info, because memory may be changed.
expectedHostDetails.CPU.CPUs = runtime.NumCPU()
return expectedHostDetails, nil
}
@ -391,7 +396,16 @@ func TestEnvGetHostInfo(t *testing.T) {
host, err := getHostInfo()
assert.NoError(t, err)
// Free/Available are changing
expectedHostDetails.Memory = host.Memory
assert.Equal(t, expectedHostDetails, host)
// check CPU cores and memory info
assert.Equal(t, true, host.CPU.CPUs > 0)
assert.Equal(t, true, host.Memory.Total > 0)
assert.Equal(t, true, host.Memory.Free > 0)
assert.Equal(t, true, host.Memory.Available > 0)
}
func TestEnvGetHostInfoNoProcCPUInfo(t *testing.T) {
@ -470,6 +484,9 @@ func TestEnvGetEnvInfo(t *testing.T) {
env, err := getEnvInfo(configFile, config)
assert.NoError(t, err)
// Free/Available are changing
expectedEnv.Host.Memory = env.Host.Memory
assert.Equal(t, expectedEnv, env)
}
}
@ -495,6 +512,9 @@ func TestEnvGetEnvInfoNoHypervisorVersion(t *testing.T) {
env, err := getEnvInfo(configFile, config)
assert.NoError(err)
// Free/Available are changing
expectedEnv.Host.Memory = env.Host.Memory
assert.Equal(expectedEnv, env)
}

View File

@ -6,6 +6,7 @@
package containerdshim
import (
"io"
"time"
"github.com/containerd/containerd/api/types/task"
@ -17,23 +18,25 @@ import (
)
type container struct {
s *service
ttyio *ttyIO
spec *specs.Spec
exitTime time.Time
execs map[string]*exec
exitIOch chan struct{}
exitCh chan uint32
id string
stdin string
stdout string
stderr string
bundle string
cType vc.ContainerType
exit uint32
status task.Status
terminal bool
mounted bool
s *service
ttyio *ttyIO
spec *specs.Spec
exitTime time.Time
execs map[string]*exec
exitIOch chan struct{}
stdinPipe io.WriteCloser
stdinCloser chan struct{}
exitCh chan uint32
id string
stdin string
stdout string
stderr string
bundle string
cType vc.ContainerType
exit uint32
status task.Status
terminal bool
mounted bool
}
func newContainer(s *service, r *taskAPI.CreateTaskRequest, containerType vc.ContainerType, spec *specs.Spec, mounted bool) (*container, error) {
@ -47,20 +50,21 @@ func newContainer(s *service, r *taskAPI.CreateTaskRequest, containerType vc.Con
}
c := &container{
s: s,
spec: spec,
id: r.ID,
bundle: r.Bundle,
stdin: r.Stdin,
stdout: r.Stdout,
stderr: r.Stderr,
terminal: r.Terminal,
cType: containerType,
execs: make(map[string]*exec),
status: task.StatusCreated,
exitIOch: make(chan struct{}),
exitCh: make(chan uint32, 1),
mounted: mounted,
s: s,
spec: spec,
id: r.ID,
bundle: r.Bundle,
stdin: r.Stdin,
stdout: r.Stdout,
stderr: r.Stderr,
terminal: r.Terminal,
cType: containerType,
execs: make(map[string]*exec),
status: task.StatusCreated,
exitIOch: make(chan struct{}),
exitCh: make(chan uint32, 1),
stdinCloser: make(chan struct{}),
mounted: mounted,
}
return c, nil
}

View File

@ -19,7 +19,6 @@ import (
"github.com/containerd/typeurl"
"github.com/opencontainers/runtime-spec/specs-go"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
// only register the proto type
_ "github.com/containerd/containerd/runtime/linux/runctypes"
@ -72,7 +71,7 @@ func create(ctx context.Context, s *service, r *taskAPI.CreateTaskRequest) (*con
defer func() {
if err != nil && rootFs.Mounted {
if err2 := mount.UnmountAll(rootfs, 0); err2 != nil {
logrus.WithError(err2).Warn("failed to cleanup rootfs mount")
shimLog.WithField("container-type", containerType).WithError(err2).Warn("failed to cleanup rootfs mount")
}
}
}()
@ -102,7 +101,7 @@ func create(ctx context.Context, s *service, r *taskAPI.CreateTaskRequest) (*con
defer func() {
if err != nil && rootFs.Mounted {
if err2 := mount.UnmountAll(rootfs, 0); err2 != nil {
logrus.WithError(err2).Warn("failed to cleanup rootfs mount")
shimLog.WithField("container-type", containerType).WithError(err2).Warn("failed to cleanup rootfs mount")
}
}
}()

View File

@ -12,8 +12,6 @@ import (
"github.com/containerd/containerd/mount"
"github.com/kata-containers/kata-containers/src/runtime/pkg/katautils"
"github.com/kata-containers/kata-containers/src/runtime/virtcontainers/types"
"github.com/sirupsen/logrus"
)
func deleteContainer(ctx context.Context, s *service, c *container) error {
@ -42,7 +40,7 @@ func deleteContainer(ctx context.Context, s *service, c *container) error {
if c.mounted {
rootfs := path.Join(c.bundle, "rootfs")
if err := mount.UnmountAll(rootfs, 0); err != nil {
logrus.WithError(err).Warn("failed to cleanup rootfs mount")
shimLog.WithError(err).Warn("failed to cleanup rootfs mount")
}
}

View File

@ -7,6 +7,7 @@ package containerdshim
import (
"fmt"
"io"
"strings"
"time"
@ -32,6 +33,9 @@ type exec struct {
exitIOch chan struct{}
exitCh chan uint32
stdinCloser chan struct{}
stdinPipe io.WriteCloser
exitTime time.Time
}
@ -108,13 +112,14 @@ func newExec(c *container, stdin, stdout, stderr string, terminal bool, jspec *g
}
exec := &exec{
container: c,
cmds: cmds,
tty: tty,
exitCode: exitCode255,
exitIOch: make(chan struct{}),
exitCh: make(chan uint32, 1),
status: task.StatusCreated,
container: c,
cmds: cmds,
tty: tty,
exitCode: exitCode255,
exitIOch: make(chan struct{}),
stdinCloser: make(chan struct{}),
exitCh: make(chan uint32, 1),
status: task.StatusCreated,
}
return exec, nil

View File

@ -58,9 +58,15 @@ var (
// concrete virtcontainer implementation
var vci vc.VC = &vc.VCImpl{}
// shimLog is logger for shim package
var shimLog = logrus.WithField("source", "containerd-kata-shim-v2")
// New returns a new shim service that can be used via GRPC
func New(ctx context.Context, id string, publisher events.Publisher) (cdshim.Shim, error) {
logger := logrus.WithField("ID", id)
shimLog = shimLog.WithFields(logrus.Fields{
"sandbox": id,
"pid": os.Getpid(),
})
// Discard the log before shim init its log output. Otherwise
// it will output into stdio, from which containerd would like
// to get the shim's socket address.
@ -69,8 +75,8 @@ func New(ctx context.Context, id string, publisher events.Publisher) (cdshim.Shi
if !opts.Debug {
logrus.SetLevel(logrus.WarnLevel)
}
vci.SetLogger(ctx, logger)
katautils.SetLogger(ctx, logger, logger.Logger.Level)
vci.SetLogger(ctx, shimLog)
katautils.SetLogger(ctx, shimLog, shimLog.Logger.Level)
ctx, cancel := context.WithCancel(ctx)
@ -226,7 +232,7 @@ func (s *service) forward(publisher events.Publisher) {
err := publisher.Publish(ctx, getTopic(e), e)
cancel()
if err != nil {
logrus.WithError(err).Error("post event")
shimLog.WithError(err).Error("post event")
}
}
}
@ -269,7 +275,7 @@ func getTopic(e interface{}) string {
case *eventstypes.TaskCheckpointed:
return cdruntime.TaskCheckpointedEventTopic
default:
logrus.Warnf("no topic for type %#v", e)
shimLog.WithField("event-type", e).Warn("no topic for event type")
}
return cdruntime.TaskUnknownTopic
}
@ -684,7 +690,7 @@ func (s *service) Kill(ctx context.Context, r *taskAPI.KillRequest) (_ *ptypes.E
// and return directly.
if signum == syscall.SIGKILL || signum == syscall.SIGTERM {
if c.status == task.StatusStopped {
logrus.WithField("sandbox", s.sandbox.ID()).WithField("Container", c.id).Debug("Container has already been stopped")
shimLog.WithField("sandbox", s.sandbox.ID()).WithField("container", c.id).Debug("Container has already been stopped")
return empty, nil
}
}
@ -697,10 +703,10 @@ func (s *service) Kill(ctx context.Context, r *taskAPI.KillRequest) (_ *ptypes.E
}
processID = execs.id
if processID == "" {
logrus.WithFields(logrus.Fields{
shimLog.WithFields(logrus.Fields{
"sandbox": s.sandbox.ID(),
"Container": c.id,
"ExecID": r.ExecID,
"container": c.id,
"exec-id": r.ExecID,
}).Debug("Id of exec process to be signalled is empty")
return empty, errors.New("The exec process does not exist")
}
@ -747,19 +753,23 @@ func (s *service) CloseIO(ctx context.Context, r *taskAPI.CloseIORequest) (_ *pt
return nil, err
}
tty := c.ttyio
stdin := c.stdinPipe
stdinCloser := c.stdinCloser
if r.ExecID != "" {
execs, err := c.getExec(r.ExecID)
if err != nil {
return nil, err
}
tty = execs.ttyio
stdin = execs.stdinPipe
stdinCloser = execs.stdinCloser
}
if tty != nil && tty.Stdin != nil {
if err := tty.Stdin.Close(); err != nil {
return nil, errors.Wrap(err, "close stdin")
}
// wait until the stdin io copy terminated, otherwise
// some contents would not be forwarded to the process.
<-stdinCloser
if err := stdin.Close(); err != nil {
return nil, errors.Wrap(err, "close stdin")
}
return empty, nil

View File

@ -24,8 +24,6 @@ import (
dto "github.com/prometheus/client_model/go"
"github.com/prometheus/common/expfmt"
"github.com/sirupsen/logrus"
"google.golang.org/grpc/codes"
mutils "github.com/kata-containers/kata-containers/src/runtime/pkg/utils"
@ -33,6 +31,7 @@ import (
var (
ifSupportAgentMetricsAPI = true
shimMgtLog = shimLog.WithField("subsystem", "shim-management")
)
// serveMetrics handle /metrics requests
@ -65,9 +64,9 @@ func (s *service) serveMetrics(w http.ResponseWriter, r *http.Request) {
// get metrics from agent
agentMetrics, err := s.sandbox.GetAgentMetrics()
if err != nil {
logrus.WithError(err).Error("failed GetAgentMetrics")
shimMgtLog.WithError(err).Error("failed GetAgentMetrics")
if isGRPCErrorCode(codes.NotFound, err) {
logrus.Warn("metrics API not supportted by this agent.")
shimMgtLog.Warn("metrics API not supportted by this agent.")
ifSupportAgentMetricsAPI = false
return
}
@ -119,23 +118,23 @@ func (s *service) startManagementServer(ctx context.Context, ociSpec *specs.Spec
// metrics socket will under sandbox's bundle path
metricsAddress, err := socketAddress(ctx, s.id)
if err != nil {
logrus.Errorf("failed to create socket address: %s", err.Error())
shimMgtLog.WithError(err).Error("failed to create socket address")
return
}
listener, err := cdshim.NewSocket(metricsAddress)
if err != nil {
logrus.Errorf("failed to create listener: %s", err.Error())
shimMgtLog.WithError(err).Error("failed to create listener")
return
}
// write metrics address to filesystem
if err := cdshim.WriteAddress("monitor_address", metricsAddress); err != nil {
logrus.Errorf("failed to write metrics address: %s", err.Error())
shimMgtLog.WithError(err).Errorf("failed to write metrics address")
return
}
logrus.Info("kata monitor inited")
shimMgtLog.Info("kata management inited")
// bind hanlder
m := http.NewServeMux()

View File

@ -62,17 +62,22 @@ func startContainer(ctx context.Context, s *service, c *container) error {
return err
}
c.stdinPipe = stdin
if c.stdin != "" || c.stdout != "" || c.stderr != "" {
tty, err := newTtyIO(ctx, c.stdin, c.stdout, c.stderr, c.terminal)
if err != nil {
return err
}
c.ttyio = tty
go ioCopy(c.exitIOch, tty, stdin, stdout, stderr)
go ioCopy(c.exitIOch, c.stdinCloser, tty, stdin, stdout, stderr)
} else {
//close the io exit channel, since there is no io for this container,
//otherwise the following wait goroutine will hang on this channel.
close(c.exitIOch)
//close the stdin closer channel to notify that it's safe to close process's
// io.
close(c.stdinCloser)
}
go wait(s, c, "")
@ -111,13 +116,16 @@ func startExec(ctx context.Context, s *service, containerID, execID string) (*ex
if err != nil {
return nil, err
}
execs.stdinPipe = stdin
tty, err := newTtyIO(ctx, execs.tty.stdin, execs.tty.stdout, execs.tty.stderr, execs.tty.terminal)
if err != nil {
return nil, err
}
execs.ttyio = tty
go ioCopy(execs.exitIOch, tty, stdin, stdout, stderr)
go ioCopy(execs.exitIOch, execs.stdinCloser, tty, stdin, stdout, stderr)
go wait(s, c, execID)

View File

@ -85,7 +85,7 @@ func newTtyIO(ctx context.Context, stdin, stdout, stderr string, console bool) (
return ttyIO, nil
}
func ioCopy(exitch chan struct{}, tty *ttyIO, stdinPipe io.WriteCloser, stdoutPipe, stderrPipe io.Reader) {
func ioCopy(exitch, stdinCloser chan struct{}, tty *ttyIO, stdinPipe io.WriteCloser, stdoutPipe, stderrPipe io.Reader) {
var wg sync.WaitGroup
var closeOnce sync.Once
@ -95,6 +95,8 @@ func ioCopy(exitch chan struct{}, tty *ttyIO, stdinPipe io.WriteCloser, stdoutPi
p := bufPool.Get().(*[]byte)
defer bufPool.Put(p)
io.CopyBuffer(stdinPipe, tty.Stdin, *p)
// notify that we can close process's io safely.
close(stdinCloser)
wg.Done()
}()
}

View File

@ -19,7 +19,6 @@ import (
vc "github.com/kata-containers/kata-containers/src/runtime/virtcontainers"
"github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/compatoci"
"github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/oci"
"github.com/sirupsen/logrus"
)
func cReap(s *service, status int, id, execid string, exitat time.Time) {
@ -33,18 +32,18 @@ func cReap(s *service, status int, id, execid string, exitat time.Time) {
}
func cleanupContainer(ctx context.Context, sid, cid, bundlePath string) error {
logrus.WithField("Service", "Cleanup").WithField("container", cid).Info("Cleanup container")
shimLog.WithField("service", "cleanup").WithField("container", cid).Info("Cleanup container")
err := vci.CleanupContainer(ctx, sid, cid, true)
if err != nil {
logrus.WithError(err).WithField("container", cid).Warn("failed to cleanup container")
shimLog.WithError(err).WithField("container", cid).Warn("failed to cleanup container")
return err
}
rootfs := filepath.Join(bundlePath, "rootfs")
if err := mount.UnmountAll(rootfs, 0); err != nil {
logrus.WithError(err).WithField("container", cid).Warn("failed to cleanup container rootfs")
shimLog.WithError(err).WithField("container", cid).Warn("failed to cleanup container rootfs")
return err
}

View File

@ -40,7 +40,7 @@ func wait(s *service, c *container, execID string) (int32, error) {
ret, err := s.sandbox.WaitProcess(c.id, processID)
if err != nil {
logrus.WithError(err).WithFields(logrus.Fields{
shimLog.WithError(err).WithFields(logrus.Fields{
"container": c.id,
"pid": processID,
}).Error("Wait for process failed")
@ -61,15 +61,15 @@ func wait(s *service, c *container, execID string) (int32, error) {
s.monitor <- nil
}
if err = s.sandbox.Stop(true); err != nil {
logrus.WithField("sandbox", s.sandbox.ID()).Error("failed to stop sandbox")
shimLog.WithField("sandbox", s.sandbox.ID()).Error("failed to stop sandbox")
}
if err = s.sandbox.Delete(); err != nil {
logrus.WithField("sandbox", s.sandbox.ID()).Error("failed to delete sandbox")
shimLog.WithField("sandbox", s.sandbox.ID()).Error("failed to delete sandbox")
}
} else {
if _, err = s.sandbox.StopContainer(c.id, false); err != nil {
logrus.WithError(err).WithField("container", c.id).Warn("stop container failed")
shimLog.WithError(err).WithField("container", c.id).Warn("stop container failed")
}
}
c.status = task.StatusStopped
@ -105,14 +105,14 @@ func watchSandbox(s *service) {
s.mu.Lock()
defer s.mu.Unlock()
// sandbox malfunctioning, cleanup as much as we can
logrus.WithError(err).Warn("sandbox stopped unexpectedly")
shimLog.WithError(err).Warn("sandbox stopped unexpectedly")
err = s.sandbox.Stop(true)
if err != nil {
logrus.WithError(err).Warn("stop sandbox failed")
shimLog.WithError(err).Warn("stop sandbox failed")
}
err = s.sandbox.Delete()
if err != nil {
logrus.WithError(err).Warn("delete sandbox failed")
shimLog.WithError(err).Warn("delete sandbox failed")
}
for _, c := range s.containers {
@ -120,9 +120,9 @@ func watchSandbox(s *service) {
continue
}
rootfs := path.Join(c.bundle, "rootfs")
logrus.WithField("rootfs", rootfs).WithField("id", c.id).Debug("container umount rootfs")
shimLog.WithField("rootfs", rootfs).WithField("container", c.id).Debug("container umount rootfs")
if err := mount.UnmountAll(rootfs, 0); err != nil {
logrus.WithError(err).Warn("failed to cleanup rootfs mount")
shimLog.WithError(err).Warn("failed to cleanup rootfs mount")
}
}
@ -142,7 +142,7 @@ func watchOOMEvents(ctx context.Context, s *service) {
default:
containerID, err := s.sandbox.GetOOMEvent()
if err != nil {
logrus.WithField("sandbox", s.sandbox.ID()).WithError(err).Warn("failed to get OOM event from sandbox")
shimLog.WithError(err).Warn("failed to get OOM event from sandbox")
// If the GetOOMEvent call is not implemented, then the agent is most likely an older version,
// stop attempting to get OOM events.
// for rust agent, the response code is not found

View File

@ -307,17 +307,6 @@ show_runtime_log_details()
end_section
}
show_shim_log_details()
{
local title="Shim logs"
subheading "$title"
start_section "$title"
find_system_journal_problems "shim" "@PROJECT_TYPE@-shim"
end_section
}
show_throttler_log_details()
{
local title="Throttler logs"
@ -336,7 +325,6 @@ show_log_details()
heading "$title"
show_runtime_log_details
show_shim_log_details
show_throttler_log_details
show_containerd_shimv2_log_details
@ -366,7 +354,6 @@ show_package_versions()
for project in @PROJECT_TYPE@
do
pattern+="|${project}-runtime"
pattern+="|${project}-shim"
pattern+="|${project}-ksm-throttler"
pattern+="|${project}-containers-image"
done

View File

@ -23,7 +23,6 @@ require (
github.com/docker/distribution v2.7.1+incompatible // indirect
github.com/docker/docker v1.13.1 // indirect
github.com/docker/go-events v0.0.0-20190806004212-e31b211e4f1c // indirect
github.com/docker/go-units v0.3.3
github.com/go-ini/ini v1.28.2
github.com/go-openapi/errors v0.18.0
github.com/go-openapi/runtime v0.18.0
@ -33,10 +32,8 @@ require (
github.com/gogo/googleapis v1.4.0 // indirect
github.com/gogo/protobuf v1.3.1
github.com/hashicorp/go-multierror v1.0.0
github.com/hashicorp/yamux v0.0.0-20190923154419-df201c70410d
github.com/intel/govmm v0.0.0-20200602145448-7cc469641b7b
github.com/intel/govmm v0.0.0-20200825065022-6042f6033126
github.com/mdlayher/vsock v0.0.0-20191108225356-d9c65923cb8f
github.com/mitchellh/mapstructure v1.1.2
github.com/opencontainers/image-spec v1.0.1 // indirect
github.com/opencontainers/runc v1.0.0-rc9.0.20200102164712-2b52db75279c
github.com/opencontainers/runtime-spec v1.0.2-0.20190408193819-a1b50f621a48

View File

@ -161,13 +161,15 @@ github.com/hashicorp/errwrap v1.0.0 h1:hLrqtEDnRye3+sgx6z4qVLNuviH3MR5aQ0ykNJa/U
github.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4=
github.com/hashicorp/go-multierror v1.0.0 h1:iVjPR7a6H0tWELX5NxNe7bYopibicUzc7uPribsnS6o=
github.com/hashicorp/go-multierror v1.0.0/go.mod h1:dHtQlpGsu+cZNNAkkCN/P3hoUDHhCYQXV3UM06sGGrk=
github.com/hashicorp/yamux v0.0.0-20190923154419-df201c70410d h1:W+SIwDdl3+jXWeidYySAgzytE3piq6GumXeBjFBG67c=
github.com/hashicorp/yamux v0.0.0-20190923154419-df201c70410d/go.mod h1:+NfK9FKeTrX5uv1uIXGdwYDTeHna2qgaIlx54MXqjAM=
github.com/hpcloud/tail v1.0.0 h1:nfCOvKYfkgYP8hkirhJocXT2+zOD8yUNjXaWfTlyFKI=
github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU=
github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANytuPF1OarO4DADm73n8=
github.com/intel/govmm v0.0.0-20200602145448-7cc469641b7b h1:QqUb1HVk0Nb9zyzvIkMmhI7DP5gzyWPx/6md21M52U0=
github.com/intel/govmm v0.0.0-20200602145448-7cc469641b7b/go.mod h1:QKGWoQtjvkvFtzP6ybiM3lxUHqf83Sv3oLqyELUKH4g=
github.com/intel/govmm v0.0.0-20200724170648-af9e34b91ae9 h1:GSRnVLRNweZV3f8Vo3vtMlPsuzvpj57Gj7Y7TkGVO6U=
github.com/intel/govmm v0.0.0-20200724170648-af9e34b91ae9/go.mod h1:QKGWoQtjvkvFtzP6ybiM3lxUHqf83Sv3oLqyELUKH4g=
github.com/intel/govmm v0.0.0-20200728135209-6c3315ba8a42 h1:Yu3/MlZl/kKE0Ipgio/KVorMIrjeHTVOG4+9WAddgOQ=
github.com/intel/govmm v0.0.0-20200728135209-6c3315ba8a42/go.mod h1:QKGWoQtjvkvFtzP6ybiM3lxUHqf83Sv3oLqyELUKH4g=
github.com/intel/govmm v0.0.0-20200825065022-6042f6033126 h1:yltaUdR0Vitnn/FEfy+JWbJ+oGhMAPP/3S7ja9S5yso=
github.com/intel/govmm v0.0.0-20200825065022-6042f6033126/go.mod h1:QKGWoQtjvkvFtzP6ybiM3lxUHqf83Sv3oLqyELUKH4g=
github.com/j-keck/arping v0.0.0-20160618110441-2cf9dc699c56/go.mod h1:ymszkNOg6tORTn+6F6j+Jc8TOr5osrynvN6ivFWZ2GA=
github.com/json-iterator/go v1.1.6/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU=
github.com/json-iterator/go v1.1.10/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4=

View File

@ -1,23 +0,0 @@
# Compiled Object files, Static and Dynamic libs (Shared Objects)
*.o
*.a
*.so
# Folders
_obj
_test
# Architecture specific extensions/prefixes
*.[568vq]
[568vq].out
*.cgo1.go
*.cgo2.c
_cgo_defun.c
_cgo_gotypes.go
_cgo_export.*
_testmain.go
*.exe
*.test

View File

@ -1,362 +0,0 @@
Mozilla Public License, version 2.0
1. Definitions
1.1. "Contributor"
means each individual or legal entity that creates, contributes to the
creation of, or owns Covered Software.
1.2. "Contributor Version"
means the combination of the Contributions of others (if any) used by a
Contributor and that particular Contributor's Contribution.
1.3. "Contribution"
means Covered Software of a particular Contributor.
1.4. "Covered Software"
means Source Code Form to which the initial Contributor has attached the
notice in Exhibit A, the Executable Form of such Source Code Form, and
Modifications of such Source Code Form, in each case including portions
thereof.
1.5. "Incompatible With Secondary Licenses"
means
a. that the initial Contributor has attached the notice described in
Exhibit B to the Covered Software; or
b. that the Covered Software was made available under the terms of
version 1.1 or earlier of the License, but not also under the terms of
a Secondary License.
1.6. "Executable Form"
means any form of the work other than Source Code Form.
1.7. "Larger Work"
means a work that combines Covered Software with other material, in a
separate file or files, that is not Covered Software.
1.8. "License"
means this document.
1.9. "Licensable"
means having the right to grant, to the maximum extent possible, whether
at the time of the initial grant or subsequently, any and all of the
rights conveyed by this License.
1.10. "Modifications"
means any of the following:
a. any file in Source Code Form that results from an addition to,
deletion from, or modification of the contents of Covered Software; or
b. any new file in Source Code Form that contains any Covered Software.
1.11. "Patent Claims" of a Contributor
means any patent claim(s), including without limitation, method,
process, and apparatus claims, in any patent Licensable by such
Contributor that would be infringed, but for the grant of the License,
by the making, using, selling, offering for sale, having made, import,
or transfer of either its Contributions or its Contributor Version.
1.12. "Secondary License"
means either the GNU General Public License, Version 2.0, the GNU Lesser
General Public License, Version 2.1, the GNU Affero General Public
License, Version 3.0, or any later versions of those licenses.
1.13. "Source Code Form"
means the form of the work preferred for making modifications.
1.14. "You" (or "Your")
means an individual or a legal entity exercising rights under this
License. For legal entities, "You" includes any entity that controls, is
controlled by, or is under common control with You. For purposes of this
definition, "control" means (a) the power, direct or indirect, to cause
the direction or management of such entity, whether by contract or
otherwise, or (b) ownership of more than fifty percent (50%) of the
outstanding shares or beneficial ownership of such entity.
2. License Grants and Conditions
2.1. Grants
Each Contributor hereby grants You a world-wide, royalty-free,
non-exclusive license:
a. under intellectual property rights (other than patent or trademark)
Licensable by such Contributor to use, reproduce, make available,
modify, display, perform, distribute, and otherwise exploit its
Contributions, either on an unmodified basis, with Modifications, or
as part of a Larger Work; and
b. under Patent Claims of such Contributor to make, use, sell, offer for
sale, have made, import, and otherwise transfer either its
Contributions or its Contributor Version.
2.2. Effective Date
The licenses granted in Section 2.1 with respect to any Contribution
become effective for each Contribution on the date the Contributor first
distributes such Contribution.
2.3. Limitations on Grant Scope
The licenses granted in this Section 2 are the only rights granted under
this License. No additional rights or licenses will be implied from the
distribution or licensing of Covered Software under this License.
Notwithstanding Section 2.1(b) above, no patent license is granted by a
Contributor:
a. for any code that a Contributor has removed from Covered Software; or
b. for infringements caused by: (i) Your and any other third party's
modifications of Covered Software, or (ii) the combination of its
Contributions with other software (except as part of its Contributor
Version); or
c. under Patent Claims infringed by Covered Software in the absence of
its Contributions.
This License does not grant any rights in the trademarks, service marks,
or logos of any Contributor (except as may be necessary to comply with
the notice requirements in Section 3.4).
2.4. Subsequent Licenses
No Contributor makes additional grants as a result of Your choice to
distribute the Covered Software under a subsequent version of this
License (see Section 10.2) or under the terms of a Secondary License (if
permitted under the terms of Section 3.3).
2.5. Representation
Each Contributor represents that the Contributor believes its
Contributions are its original creation(s) or it has sufficient rights to
grant the rights to its Contributions conveyed by this License.
2.6. Fair Use
This License is not intended to limit any rights You have under
applicable copyright doctrines of fair use, fair dealing, or other
equivalents.
2.7. Conditions
Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted in
Section 2.1.
3. Responsibilities
3.1. Distribution of Source Form
All distribution of Covered Software in Source Code Form, including any
Modifications that You create or to which You contribute, must be under
the terms of this License. You must inform recipients that the Source
Code Form of the Covered Software is governed by the terms of this
License, and how they can obtain a copy of this License. You may not
attempt to alter or restrict the recipients' rights in the Source Code
Form.
3.2. Distribution of Executable Form
If You distribute Covered Software in Executable Form then:
a. such Covered Software must also be made available in Source Code Form,
as described in Section 3.1, and You must inform recipients of the
Executable Form how they can obtain a copy of such Source Code Form by
reasonable means in a timely manner, at a charge no more than the cost
of distribution to the recipient; and
b. You may distribute such Executable Form under the terms of this
License, or sublicense it under different terms, provided that the
license for the Executable Form does not attempt to limit or alter the
recipients' rights in the Source Code Form under this License.
3.3. Distribution of a Larger Work
You may create and distribute a Larger Work under terms of Your choice,
provided that You also comply with the requirements of this License for
the Covered Software. If the Larger Work is a combination of Covered
Software with a work governed by one or more Secondary Licenses, and the
Covered Software is not Incompatible With Secondary Licenses, this
License permits You to additionally distribute such Covered Software
under the terms of such Secondary License(s), so that the recipient of
the Larger Work may, at their option, further distribute the Covered
Software under the terms of either this License or such Secondary
License(s).
3.4. Notices
You may not remove or alter the substance of any license notices
(including copyright notices, patent notices, disclaimers of warranty, or
limitations of liability) contained within the Source Code Form of the
Covered Software, except that You may alter any license notices to the
extent required to remedy known factual inaccuracies.
3.5. Application of Additional Terms
You may choose to offer, and to charge a fee for, warranty, support,
indemnity or liability obligations to one or more recipients of Covered
Software. However, You may do so only on Your own behalf, and not on
behalf of any Contributor. You must make it absolutely clear that any
such warranty, support, indemnity, or liability obligation is offered by
You alone, and You hereby agree to indemnify every Contributor for any
liability incurred by such Contributor as a result of warranty, support,
indemnity or liability terms You offer. You may include additional
disclaimers of warranty and limitations of liability specific to any
jurisdiction.
4. Inability to Comply Due to Statute or Regulation
If it is impossible for You to comply with any of the terms of this License
with respect to some or all of the Covered Software due to statute,
judicial order, or regulation then You must: (a) comply with the terms of
this License to the maximum extent possible; and (b) describe the
limitations and the code they affect. Such description must be placed in a
text file included with all distributions of the Covered Software under
this License. Except to the extent prohibited by statute or regulation,
such description must be sufficiently detailed for a recipient of ordinary
skill to be able to understand it.
5. Termination
5.1. The rights granted under this License will terminate automatically if You
fail to comply with any of its terms. However, if You become compliant,
then the rights granted under this License from a particular Contributor
are reinstated (a) provisionally, unless and until such Contributor
explicitly and finally terminates Your grants, and (b) on an ongoing
basis, if such Contributor fails to notify You of the non-compliance by
some reasonable means prior to 60 days after You have come back into
compliance. Moreover, Your grants from a particular Contributor are
reinstated on an ongoing basis if such Contributor notifies You of the
non-compliance by some reasonable means, this is the first time You have
received notice of non-compliance with this License from such
Contributor, and You become compliant prior to 30 days after Your receipt
of the notice.
5.2. If You initiate litigation against any entity by asserting a patent
infringement claim (excluding declaratory judgment actions,
counter-claims, and cross-claims) alleging that a Contributor Version
directly or indirectly infringes any patent, then the rights granted to
You by any and all Contributors for the Covered Software under Section
2.1 of this License shall terminate.
5.3. In the event of termination under Sections 5.1 or 5.2 above, all end user
license agreements (excluding distributors and resellers) which have been
validly granted by You or Your distributors under this License prior to
termination shall survive termination.
6. Disclaimer of Warranty
Covered Software is provided under this License on an "as is" basis,
without warranty of any kind, either expressed, implied, or statutory,
including, without limitation, warranties that the Covered Software is free
of defects, merchantable, fit for a particular purpose or non-infringing.
The entire risk as to the quality and performance of the Covered Software
is with You. Should any Covered Software prove defective in any respect,
You (not any Contributor) assume the cost of any necessary servicing,
repair, or correction. This disclaimer of warranty constitutes an essential
part of this License. No use of any Covered Software is authorized under
this License except under this disclaimer.
7. Limitation of Liability
Under no circumstances and under no legal theory, whether tort (including
negligence), contract, or otherwise, shall any Contributor, or anyone who
distributes Covered Software as permitted above, be liable to You for any
direct, indirect, special, incidental, or consequential damages of any
character including, without limitation, damages for lost profits, loss of
goodwill, work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses, even if such party shall have been
informed of the possibility of such damages. This limitation of liability
shall not apply to liability for death or personal injury resulting from
such party's negligence to the extent applicable law prohibits such
limitation. Some jurisdictions do not allow the exclusion or limitation of
incidental or consequential damages, so this exclusion and limitation may
not apply to You.
8. Litigation
Any litigation relating to this License may be brought only in the courts
of a jurisdiction where the defendant maintains its principal place of
business and such litigation shall be governed by laws of that
jurisdiction, without reference to its conflict-of-law provisions. Nothing
in this Section shall prevent a party's ability to bring cross-claims or
counter-claims.
9. Miscellaneous
This License represents the complete agreement concerning the subject
matter hereof. If any provision of this License is held to be
unenforceable, such provision shall be reformed only to the extent
necessary to make it enforceable. Any law or regulation which provides that
the language of a contract shall be construed against the drafter shall not
be used to construe this License against a Contributor.
10. Versions of the License
10.1. New Versions
Mozilla Foundation is the license steward. Except as provided in Section
10.3, no one other than the license steward has the right to modify or
publish new versions of this License. Each version will be given a
distinguishing version number.
10.2. Effect of New Versions
You may distribute the Covered Software under the terms of the version
of the License under which You originally received the Covered Software,
or under the terms of any subsequent version published by the license
steward.
10.3. Modified Versions
If you create software not governed by this License, and you want to
create a new license for such software, you may create and use a
modified version of this License if you rename the license and remove
any references to the name of the license steward (except to note that
such modified license differs from this License).
10.4. Distributing Source Code Form that is Incompatible With Secondary
Licenses If You choose to distribute Source Code Form that is
Incompatible With Secondary Licenses under the terms of this version of
the License, the notice described in Exhibit B of this License must be
attached.
Exhibit A - Source Code Form License Notice
This Source Code Form is subject to the
terms of the Mozilla Public License, v.
2.0. If a copy of the MPL was not
distributed with this file, You can
obtain one at
http://mozilla.org/MPL/2.0/.
If it is not possible or desirable to put the notice in a particular file,
then You may include the notice in a location (such as a LICENSE file in a
relevant directory) where a recipient would be likely to look for such a
notice.
You may add additional accurate notices of copyright ownership.
Exhibit B - "Incompatible With Secondary Licenses" Notice
This Source Code Form is "Incompatible
With Secondary Licenses", as defined by
the Mozilla Public License, v. 2.0.

View File

@ -1,86 +0,0 @@
# Yamux
Yamux (Yet another Multiplexer) is a multiplexing library for Golang.
It relies on an underlying connection to provide reliability
and ordering, such as TCP or Unix domain sockets, and provides
stream-oriented multiplexing. It is inspired by SPDY but is not
interoperable with it.
Yamux features include:
* Bi-directional streams
* Streams can be opened by either client or server
* Useful for NAT traversal
* Server-side push support
* Flow control
* Avoid starvation
* Back-pressure to prevent overwhelming a receiver
* Keep Alives
* Enables persistent connections over a load balancer
* Efficient
* Enables thousands of logical streams with low overhead
## Documentation
For complete documentation, see the associated [Godoc](http://godoc.org/github.com/hashicorp/yamux).
## Specification
The full specification for Yamux is provided in the `spec.md` file.
It can be used as a guide to implementors of interoperable libraries.
## Usage
Using Yamux is remarkably simple:
```go
func client() {
// Get a TCP connection
conn, err := net.Dial(...)
if err != nil {
panic(err)
}
// Setup client side of yamux
session, err := yamux.Client(conn, nil)
if err != nil {
panic(err)
}
// Open a new stream
stream, err := session.Open()
if err != nil {
panic(err)
}
// Stream implements net.Conn
stream.Write([]byte("ping"))
}
func server() {
// Accept a TCP connection
conn, err := listener.Accept()
if err != nil {
panic(err)
}
// Setup server side of yamux
session, err := yamux.Server(conn, nil)
if err != nil {
panic(err)
}
// Accept a stream
stream, err := session.Accept()
if err != nil {
panic(err)
}
// Listen for a message
buf := make([]byte, 4)
stream.Read(buf)
}
```

View File

@ -1,60 +0,0 @@
package yamux
import (
"fmt"
"net"
)
// hasAddr is used to get the address from the underlying connection
type hasAddr interface {
LocalAddr() net.Addr
RemoteAddr() net.Addr
}
// yamuxAddr is used when we cannot get the underlying address
type yamuxAddr struct {
Addr string
}
func (*yamuxAddr) Network() string {
return "yamux"
}
func (y *yamuxAddr) String() string {
return fmt.Sprintf("yamux:%s", y.Addr)
}
// Addr is used to get the address of the listener.
func (s *Session) Addr() net.Addr {
return s.LocalAddr()
}
// LocalAddr is used to get the local address of the
// underlying connection.
func (s *Session) LocalAddr() net.Addr {
addr, ok := s.conn.(hasAddr)
if !ok {
return &yamuxAddr{"local"}
}
return addr.LocalAddr()
}
// RemoteAddr is used to get the address of remote end
// of the underlying connection
func (s *Session) RemoteAddr() net.Addr {
addr, ok := s.conn.(hasAddr)
if !ok {
return &yamuxAddr{"remote"}
}
return addr.RemoteAddr()
}
// LocalAddr returns the local address
func (s *Stream) LocalAddr() net.Addr {
return s.session.LocalAddr()
}
// RemoteAddr returns the remote address
func (s *Stream) RemoteAddr() net.Addr {
return s.session.RemoteAddr()
}

View File

@ -1,157 +0,0 @@
package yamux
import (
"encoding/binary"
"fmt"
)
var (
// ErrInvalidVersion means we received a frame with an
// invalid version
ErrInvalidVersion = fmt.Errorf("invalid protocol version")
// ErrInvalidMsgType means we received a frame with an
// invalid message type
ErrInvalidMsgType = fmt.Errorf("invalid msg type")
// ErrSessionShutdown is used if there is a shutdown during
// an operation
ErrSessionShutdown = fmt.Errorf("session shutdown")
// ErrStreamsExhausted is returned if we have no more
// stream ids to issue
ErrStreamsExhausted = fmt.Errorf("streams exhausted")
// ErrDuplicateStream is used if a duplicate stream is
// opened inbound
ErrDuplicateStream = fmt.Errorf("duplicate stream initiated")
// ErrReceiveWindowExceeded indicates the window was exceeded
ErrRecvWindowExceeded = fmt.Errorf("recv window exceeded")
// ErrTimeout is used when we reach an IO deadline
ErrTimeout = fmt.Errorf("i/o deadline reached")
// ErrStreamClosed is returned when using a closed stream
ErrStreamClosed = fmt.Errorf("stream closed")
// ErrUnexpectedFlag is set when we get an unexpected flag
ErrUnexpectedFlag = fmt.Errorf("unexpected flag")
// ErrRemoteGoAway is used when we get a go away from the other side
ErrRemoteGoAway = fmt.Errorf("remote end is not accepting connections")
// ErrConnectionReset is sent if a stream is reset. This can happen
// if the backlog is exceeded, or if there was a remote GoAway.
ErrConnectionReset = fmt.Errorf("connection reset")
// ErrConnectionWriteTimeout indicates that we hit the "safety valve"
// timeout writing to the underlying stream connection.
ErrConnectionWriteTimeout = fmt.Errorf("connection write timeout")
// ErrKeepAliveTimeout is sent if a missed keepalive caused the stream close
ErrKeepAliveTimeout = fmt.Errorf("keepalive timeout")
)
const (
// protoVersion is the only version we support
protoVersion uint8 = 0
)
const (
// Data is used for data frames. They are followed
// by length bytes worth of payload.
typeData uint8 = iota
// WindowUpdate is used to change the window of
// a given stream. The length indicates the delta
// update to the window.
typeWindowUpdate
// Ping is sent as a keep-alive or to measure
// the RTT. The StreamID and Length value are echoed
// back in the response.
typePing
// GoAway is sent to terminate a session. The StreamID
// should be 0 and the length is an error code.
typeGoAway
)
const (
// SYN is sent to signal a new stream. May
// be sent with a data payload
flagSYN uint16 = 1 << iota
// ACK is sent to acknowledge a new stream. May
// be sent with a data payload
flagACK
// FIN is sent to half-close the given stream.
// May be sent with a data payload.
flagFIN
// RST is used to hard close a given stream.
flagRST
)
const (
// initialStreamWindow is the initial stream window size
initialStreamWindow uint32 = 256 * 1024
)
const (
// goAwayNormal is sent on a normal termination
goAwayNormal uint32 = iota
// goAwayProtoErr sent on a protocol error
goAwayProtoErr
// goAwayInternalErr sent on an internal error
goAwayInternalErr
)
const (
sizeOfVersion = 1
sizeOfType = 1
sizeOfFlags = 2
sizeOfStreamID = 4
sizeOfLength = 4
headerSize = sizeOfVersion + sizeOfType + sizeOfFlags +
sizeOfStreamID + sizeOfLength
)
type header []byte
func (h header) Version() uint8 {
return h[0]
}
func (h header) MsgType() uint8 {
return h[1]
}
func (h header) Flags() uint16 {
return binary.BigEndian.Uint16(h[2:4])
}
func (h header) StreamID() uint32 {
return binary.BigEndian.Uint32(h[4:8])
}
func (h header) Length() uint32 {
return binary.BigEndian.Uint32(h[8:12])
}
func (h header) String() string {
return fmt.Sprintf("Vsn:%d Type:%d Flags:%d StreamID:%d Length:%d",
h.Version(), h.MsgType(), h.Flags(), h.StreamID(), h.Length())
}
func (h header) encode(msgType uint8, flags uint16, streamID uint32, length uint32) {
h[0] = protoVersion
h[1] = msgType
binary.BigEndian.PutUint16(h[2:4], flags)
binary.BigEndian.PutUint32(h[4:8], streamID)
binary.BigEndian.PutUint32(h[8:12], length)
}

View File

@ -1 +0,0 @@
module github.com/hashicorp/yamux

View File

@ -1,98 +0,0 @@
package yamux
import (
"fmt"
"io"
"log"
"os"
"time"
)
// Config is used to tune the Yamux session
type Config struct {
// AcceptBacklog is used to limit how many streams may be
// waiting an accept.
AcceptBacklog int
// EnableKeepalive is used to do a period keep alive
// messages using a ping.
EnableKeepAlive bool
// KeepAliveInterval is how often to perform the keep alive
KeepAliveInterval time.Duration
// ConnectionWriteTimeout is meant to be a "safety valve" timeout after
// we which will suspect a problem with the underlying connection and
// close it. This is only applied to writes, where's there's generally
// an expectation that things will move along quickly.
ConnectionWriteTimeout time.Duration
// MaxStreamWindowSize is used to control the maximum
// window size that we allow for a stream.
MaxStreamWindowSize uint32
// LogOutput is used to control the log destination. Either Logger or
// LogOutput can be set, not both.
LogOutput io.Writer
// Logger is used to pass in the logger to be used. Either Logger or
// LogOutput can be set, not both.
Logger *log.Logger
}
// DefaultConfig is used to return a default configuration
func DefaultConfig() *Config {
return &Config{
AcceptBacklog: 256,
EnableKeepAlive: true,
KeepAliveInterval: 30 * time.Second,
ConnectionWriteTimeout: 10 * time.Second,
MaxStreamWindowSize: initialStreamWindow,
LogOutput: os.Stderr,
}
}
// VerifyConfig is used to verify the sanity of configuration
func VerifyConfig(config *Config) error {
if config.AcceptBacklog <= 0 {
return fmt.Errorf("backlog must be positive")
}
if config.KeepAliveInterval == 0 {
return fmt.Errorf("keep-alive interval must be positive")
}
if config.MaxStreamWindowSize < initialStreamWindow {
return fmt.Errorf("MaxStreamWindowSize must be larger than %d", initialStreamWindow)
}
if config.LogOutput != nil && config.Logger != nil {
return fmt.Errorf("both Logger and LogOutput may not be set, select one")
} else if config.LogOutput == nil && config.Logger == nil {
return fmt.Errorf("one of Logger or LogOutput must be set, select one")
}
return nil
}
// Server is used to initialize a new server-side connection.
// There must be at most one server-side connection. If a nil config is
// provided, the DefaultConfiguration will be used.
func Server(conn io.ReadWriteCloser, config *Config) (*Session, error) {
if config == nil {
config = DefaultConfig()
}
if err := VerifyConfig(config); err != nil {
return nil, err
}
return newSession(config, conn, false), nil
}
// Client is used to initialize a new client-side connection.
// There must be at most one client-side connection.
func Client(conn io.ReadWriteCloser, config *Config) (*Session, error) {
if config == nil {
config = DefaultConfig()
}
if err := VerifyConfig(config); err != nil {
return nil, err
}
return newSession(config, conn, true), nil
}

View File

@ -1,653 +0,0 @@
package yamux
import (
"bufio"
"fmt"
"io"
"io/ioutil"
"log"
"math"
"net"
"strings"
"sync"
"sync/atomic"
"time"
)
// Session is used to wrap a reliable ordered connection and to
// multiplex it into multiple streams.
type Session struct {
// remoteGoAway indicates the remote side does
// not want futher connections. Must be first for alignment.
remoteGoAway int32
// localGoAway indicates that we should stop
// accepting futher connections. Must be first for alignment.
localGoAway int32
// nextStreamID is the next stream we should
// send. This depends if we are a client/server.
nextStreamID uint32
// config holds our configuration
config *Config
// logger is used for our logs
logger *log.Logger
// conn is the underlying connection
conn io.ReadWriteCloser
// bufRead is a buffered reader
bufRead *bufio.Reader
// pings is used to track inflight pings
pings map[uint32]chan struct{}
pingID uint32
pingLock sync.Mutex
// streams maps a stream id to a stream, and inflight has an entry
// for any outgoing stream that has not yet been established. Both are
// protected by streamLock.
streams map[uint32]*Stream
inflight map[uint32]struct{}
streamLock sync.Mutex
// synCh acts like a semaphore. It is sized to the AcceptBacklog which
// is assumed to be symmetric between the client and server. This allows
// the client to avoid exceeding the backlog and instead blocks the open.
synCh chan struct{}
// acceptCh is used to pass ready streams to the client
acceptCh chan *Stream
// sendCh is used to mark a stream as ready to send,
// or to send a header out directly.
sendCh chan sendReady
// recvDoneCh is closed when recv() exits to avoid a race
// between stream registration and stream shutdown
recvDoneCh chan struct{}
// shutdown is used to safely close a session
shutdown bool
shutdownErr error
shutdownCh chan struct{}
shutdownLock sync.Mutex
}
// sendReady is used to either mark a stream as ready
// or to directly send a header
type sendReady struct {
Hdr []byte
Body io.Reader
Err chan error
}
// newSession is used to construct a new session
func newSession(config *Config, conn io.ReadWriteCloser, client bool) *Session {
logger := config.Logger
if logger == nil {
logger = log.New(config.LogOutput, "", log.LstdFlags)
}
s := &Session{
config: config,
logger: logger,
conn: conn,
bufRead: bufio.NewReader(conn),
pings: make(map[uint32]chan struct{}),
streams: make(map[uint32]*Stream),
inflight: make(map[uint32]struct{}),
synCh: make(chan struct{}, config.AcceptBacklog),
acceptCh: make(chan *Stream, config.AcceptBacklog),
sendCh: make(chan sendReady, 64),
recvDoneCh: make(chan struct{}),
shutdownCh: make(chan struct{}),
}
if client {
s.nextStreamID = 1
} else {
s.nextStreamID = 2
}
go s.recv()
go s.send()
if config.EnableKeepAlive {
go s.keepalive()
}
return s
}
// IsClosed does a safe check to see if we have shutdown
func (s *Session) IsClosed() bool {
select {
case <-s.shutdownCh:
return true
default:
return false
}
}
// CloseChan returns a read-only channel which is closed as
// soon as the session is closed.
func (s *Session) CloseChan() <-chan struct{} {
return s.shutdownCh
}
// NumStreams returns the number of currently open streams
func (s *Session) NumStreams() int {
s.streamLock.Lock()
num := len(s.streams)
s.streamLock.Unlock()
return num
}
// Open is used to create a new stream as a net.Conn
func (s *Session) Open() (net.Conn, error) {
conn, err := s.OpenStream()
if err != nil {
return nil, err
}
return conn, nil
}
// OpenStream is used to create a new stream
func (s *Session) OpenStream() (*Stream, error) {
if s.IsClosed() {
return nil, ErrSessionShutdown
}
if atomic.LoadInt32(&s.remoteGoAway) == 1 {
return nil, ErrRemoteGoAway
}
// Block if we have too many inflight SYNs
select {
case s.synCh <- struct{}{}:
case <-s.shutdownCh:
return nil, ErrSessionShutdown
}
GET_ID:
// Get an ID, and check for stream exhaustion
id := atomic.LoadUint32(&s.nextStreamID)
if id >= math.MaxUint32-1 {
return nil, ErrStreamsExhausted
}
if !atomic.CompareAndSwapUint32(&s.nextStreamID, id, id+2) {
goto GET_ID
}
// Register the stream
stream := newStream(s, id, streamInit)
s.streamLock.Lock()
s.streams[id] = stream
s.inflight[id] = struct{}{}
s.streamLock.Unlock()
// Send the window update to create
if err := stream.sendWindowUpdate(); err != nil {
select {
case <-s.synCh:
default:
s.logger.Printf("[ERR] yamux: aborted stream open without inflight syn semaphore")
}
return nil, err
}
return stream, nil
}
// Accept is used to block until the next available stream
// is ready to be accepted.
func (s *Session) Accept() (net.Conn, error) {
conn, err := s.AcceptStream()
if err != nil {
return nil, err
}
return conn, err
}
// AcceptStream is used to block until the next available stream
// is ready to be accepted.
func (s *Session) AcceptStream() (*Stream, error) {
select {
case stream := <-s.acceptCh:
if err := stream.sendWindowUpdate(); err != nil {
return nil, err
}
return stream, nil
case <-s.shutdownCh:
return nil, s.shutdownErr
}
}
// Close is used to close the session and all streams.
// Attempts to send a GoAway before closing the connection.
func (s *Session) Close() error {
s.shutdownLock.Lock()
defer s.shutdownLock.Unlock()
if s.shutdown {
return nil
}
s.shutdown = true
if s.shutdownErr == nil {
s.shutdownErr = ErrSessionShutdown
}
close(s.shutdownCh)
s.conn.Close()
<-s.recvDoneCh
s.streamLock.Lock()
defer s.streamLock.Unlock()
for _, stream := range s.streams {
stream.forceClose()
}
return nil
}
// exitErr is used to handle an error that is causing the
// session to terminate.
func (s *Session) exitErr(err error) {
s.shutdownLock.Lock()
if s.shutdownErr == nil {
s.shutdownErr = err
}
s.shutdownLock.Unlock()
s.Close()
}
// GoAway can be used to prevent accepting further
// connections. It does not close the underlying conn.
func (s *Session) GoAway() error {
return s.waitForSend(s.goAway(goAwayNormal), nil)
}
// goAway is used to send a goAway message
func (s *Session) goAway(reason uint32) header {
atomic.SwapInt32(&s.localGoAway, 1)
hdr := header(make([]byte, headerSize))
hdr.encode(typeGoAway, 0, 0, reason)
return hdr
}
// Ping is used to measure the RTT response time
func (s *Session) Ping() (time.Duration, error) {
// Get a channel for the ping
ch := make(chan struct{})
// Get a new ping id, mark as pending
s.pingLock.Lock()
id := s.pingID
s.pingID++
s.pings[id] = ch
s.pingLock.Unlock()
// Send the ping request
hdr := header(make([]byte, headerSize))
hdr.encode(typePing, flagSYN, 0, id)
if err := s.waitForSend(hdr, nil); err != nil {
return 0, err
}
// Wait for a response
start := time.Now()
select {
case <-ch:
case <-time.After(s.config.ConnectionWriteTimeout):
s.pingLock.Lock()
delete(s.pings, id) // Ignore it if a response comes later.
s.pingLock.Unlock()
return 0, ErrTimeout
case <-s.shutdownCh:
return 0, ErrSessionShutdown
}
// Compute the RTT
return time.Now().Sub(start), nil
}
// keepalive is a long running goroutine that periodically does
// a ping to keep the connection alive.
func (s *Session) keepalive() {
for {
select {
case <-time.After(s.config.KeepAliveInterval):
_, err := s.Ping()
if err != nil {
if err != ErrSessionShutdown {
s.logger.Printf("[ERR] yamux: keepalive failed: %v", err)
s.exitErr(ErrKeepAliveTimeout)
}
return
}
case <-s.shutdownCh:
return
}
}
}
// waitForSendErr waits to send a header, checking for a potential shutdown
func (s *Session) waitForSend(hdr header, body io.Reader) error {
errCh := make(chan error, 1)
return s.waitForSendErr(hdr, body, errCh)
}
// waitForSendErr waits to send a header with optional data, checking for a
// potential shutdown. Since there's the expectation that sends can happen
// in a timely manner, we enforce the connection write timeout here.
func (s *Session) waitForSendErr(hdr header, body io.Reader, errCh chan error) error {
t := timerPool.Get()
timer := t.(*time.Timer)
timer.Reset(s.config.ConnectionWriteTimeout)
defer func() {
timer.Stop()
select {
case <-timer.C:
default:
}
timerPool.Put(t)
}()
ready := sendReady{Hdr: hdr, Body: body, Err: errCh}
select {
case s.sendCh <- ready:
case <-s.shutdownCh:
return ErrSessionShutdown
case <-timer.C:
return ErrConnectionWriteTimeout
}
select {
case err := <-errCh:
return err
case <-s.shutdownCh:
return ErrSessionShutdown
case <-timer.C:
return ErrConnectionWriteTimeout
}
}
// sendNoWait does a send without waiting. Since there's the expectation that
// the send happens right here, we enforce the connection write timeout if we
// can't queue the header to be sent.
func (s *Session) sendNoWait(hdr header) error {
t := timerPool.Get()
timer := t.(*time.Timer)
timer.Reset(s.config.ConnectionWriteTimeout)
defer func() {
timer.Stop()
select {
case <-timer.C:
default:
}
timerPool.Put(t)
}()
select {
case s.sendCh <- sendReady{Hdr: hdr}:
return nil
case <-s.shutdownCh:
return ErrSessionShutdown
case <-timer.C:
return ErrConnectionWriteTimeout
}
}
// send is a long running goroutine that sends data
func (s *Session) send() {
for {
select {
case ready := <-s.sendCh:
// Send a header if ready
if ready.Hdr != nil {
sent := 0
for sent < len(ready.Hdr) {
n, err := s.conn.Write(ready.Hdr[sent:])
if err != nil {
s.logger.Printf("[ERR] yamux: Failed to write header: %v", err)
asyncSendErr(ready.Err, err)
s.exitErr(err)
return
}
sent += n
}
}
// Send data from a body if given
if ready.Body != nil {
_, err := io.Copy(s.conn, ready.Body)
if err != nil {
s.logger.Printf("[ERR] yamux: Failed to write body: %v", err)
asyncSendErr(ready.Err, err)
s.exitErr(err)
return
}
}
// No error, successful send
asyncSendErr(ready.Err, nil)
case <-s.shutdownCh:
return
}
}
}
// recv is a long running goroutine that accepts new data
func (s *Session) recv() {
if err := s.recvLoop(); err != nil {
s.exitErr(err)
}
}
// Ensure that the index of the handler (typeData/typeWindowUpdate/etc) matches the message type
var (
handlers = []func(*Session, header) error{
typeData: (*Session).handleStreamMessage,
typeWindowUpdate: (*Session).handleStreamMessage,
typePing: (*Session).handlePing,
typeGoAway: (*Session).handleGoAway,
}
)
// recvLoop continues to receive data until a fatal error is encountered
func (s *Session) recvLoop() error {
defer close(s.recvDoneCh)
hdr := header(make([]byte, headerSize))
for {
// Read the header
if _, err := io.ReadFull(s.bufRead, hdr); err != nil {
if err != io.EOF && !strings.Contains(err.Error(), "closed") && !strings.Contains(err.Error(), "reset by peer") {
s.logger.Printf("[ERR] yamux: Failed to read header: %v", err)
}
return err
}
// Verify the version
if hdr.Version() != protoVersion {
s.logger.Printf("[ERR] yamux: Invalid protocol version: %d", hdr.Version())
return ErrInvalidVersion
}
mt := hdr.MsgType()
if mt < typeData || mt > typeGoAway {
return ErrInvalidMsgType
}
if err := handlers[mt](s, hdr); err != nil {
return err
}
}
}
// handleStreamMessage handles either a data or window update frame
func (s *Session) handleStreamMessage(hdr header) error {
// Check for a new stream creation
id := hdr.StreamID()
flags := hdr.Flags()
if flags&flagSYN == flagSYN {
if err := s.incomingStream(id); err != nil {
return err
}
}
// Get the stream
s.streamLock.Lock()
stream := s.streams[id]
s.streamLock.Unlock()
// If we do not have a stream, likely we sent a RST
if stream == nil {
// Drain any data on the wire
if hdr.MsgType() == typeData && hdr.Length() > 0 {
s.logger.Printf("[WARN] yamux: Discarding data for stream: %d", id)
if _, err := io.CopyN(ioutil.Discard, s.bufRead, int64(hdr.Length())); err != nil {
s.logger.Printf("[ERR] yamux: Failed to discard data: %v", err)
return nil
}
} else {
s.logger.Printf("[WARN] yamux: frame for missing stream: %v", hdr)
}
return nil
}
// Check if this is a window update
if hdr.MsgType() == typeWindowUpdate {
if err := stream.incrSendWindow(hdr, flags); err != nil {
if sendErr := s.sendNoWait(s.goAway(goAwayProtoErr)); sendErr != nil {
s.logger.Printf("[WARN] yamux: failed to send go away: %v", sendErr)
}
return err
}
return nil
}
// Read the new data
if err := stream.readData(hdr, flags, s.bufRead); err != nil {
if sendErr := s.sendNoWait(s.goAway(goAwayProtoErr)); sendErr != nil {
s.logger.Printf("[WARN] yamux: failed to send go away: %v", sendErr)
}
return err
}
return nil
}
// handlePing is invokde for a typePing frame
func (s *Session) handlePing(hdr header) error {
flags := hdr.Flags()
pingID := hdr.Length()
// Check if this is a query, respond back in a separate context so we
// don't interfere with the receiving thread blocking for the write.
if flags&flagSYN == flagSYN {
go func() {
hdr := header(make([]byte, headerSize))
hdr.encode(typePing, flagACK, 0, pingID)
if err := s.sendNoWait(hdr); err != nil {
s.logger.Printf("[WARN] yamux: failed to send ping reply: %v", err)
}
}()
return nil
}
// Handle a response
s.pingLock.Lock()
ch := s.pings[pingID]
if ch != nil {
delete(s.pings, pingID)
close(ch)
}
s.pingLock.Unlock()
return nil
}
// handleGoAway is invokde for a typeGoAway frame
func (s *Session) handleGoAway(hdr header) error {
code := hdr.Length()
switch code {
case goAwayNormal:
atomic.SwapInt32(&s.remoteGoAway, 1)
case goAwayProtoErr:
s.logger.Printf("[ERR] yamux: received protocol error go away")
return fmt.Errorf("yamux protocol error")
case goAwayInternalErr:
s.logger.Printf("[ERR] yamux: received internal error go away")
return fmt.Errorf("remote yamux internal error")
default:
s.logger.Printf("[ERR] yamux: received unexpected go away")
return fmt.Errorf("unexpected go away received")
}
return nil
}
// incomingStream is used to create a new incoming stream
func (s *Session) incomingStream(id uint32) error {
// Reject immediately if we are doing a go away
if atomic.LoadInt32(&s.localGoAway) == 1 {
hdr := header(make([]byte, headerSize))
hdr.encode(typeWindowUpdate, flagRST, id, 0)
return s.sendNoWait(hdr)
}
// Allocate a new stream
stream := newStream(s, id, streamSYNReceived)
s.streamLock.Lock()
defer s.streamLock.Unlock()
// Check if stream already exists
if _, ok := s.streams[id]; ok {
s.logger.Printf("[ERR] yamux: duplicate stream declared")
if sendErr := s.sendNoWait(s.goAway(goAwayProtoErr)); sendErr != nil {
s.logger.Printf("[WARN] yamux: failed to send go away: %v", sendErr)
}
return ErrDuplicateStream
}
// Register the stream
s.streams[id] = stream
// Check if we've exceeded the backlog
select {
case s.acceptCh <- stream:
return nil
default:
// Backlog exceeded! RST the stream
s.logger.Printf("[WARN] yamux: backlog exceeded, forcing connection reset")
delete(s.streams, id)
stream.sendHdr.encode(typeWindowUpdate, flagRST, id, 0)
return s.sendNoWait(stream.sendHdr)
}
}
// closeStream is used to close a stream once both sides have
// issued a close. If there was an in-flight SYN and the stream
// was not yet established, then this will give the credit back.
func (s *Session) closeStream(id uint32) {
s.streamLock.Lock()
if _, ok := s.inflight[id]; ok {
select {
case <-s.synCh:
default:
s.logger.Printf("[ERR] yamux: SYN tracking out of sync")
}
}
delete(s.streams, id)
s.streamLock.Unlock()
}
// establishStream is used to mark a stream that was in the
// SYN Sent state as established.
func (s *Session) establishStream(id uint32) {
s.streamLock.Lock()
if _, ok := s.inflight[id]; ok {
delete(s.inflight, id)
} else {
s.logger.Printf("[ERR] yamux: established stream without inflight SYN (no tracking entry)")
}
select {
case <-s.synCh:
default:
s.logger.Printf("[ERR] yamux: established stream without inflight SYN (didn't have semaphore)")
}
s.streamLock.Unlock()
}

View File

@ -1,140 +0,0 @@
# Specification
We use this document to detail the internal specification of Yamux.
This is used both as a guide for implementing Yamux, but also for
alternative interoperable libraries to be built.
# Framing
Yamux uses a streaming connection underneath, but imposes a message
framing so that it can be shared between many logical streams. Each
frame contains a header like:
* Version (8 bits)
* Type (8 bits)
* Flags (16 bits)
* StreamID (32 bits)
* Length (32 bits)
This means that each header has a 12 byte overhead.
All fields are encoded in network order (big endian).
Each field is described below:
## Version Field
The version field is used for future backward compatibility. At the
current time, the field is always set to 0, to indicate the initial
version.
## Type Field
The type field is used to switch the frame message type. The following
message types are supported:
* 0x0 Data - Used to transmit data. May transmit zero length payloads
depending on the flags.
* 0x1 Window Update - Used to updated the senders receive window size.
This is used to implement per-session flow control.
* 0x2 Ping - Used to measure RTT. It can also be used to heart-beat
and do keep-alives over TCP.
* 0x3 Go Away - Used to close a session.
## Flag Field
The flags field is used to provide additional information related
to the message type. The following flags are supported:
* 0x1 SYN - Signals the start of a new stream. May be sent with a data or
window update message. Also sent with a ping to indicate outbound.
* 0x2 ACK - Acknowledges the start of a new stream. May be sent with a data
or window update message. Also sent with a ping to indicate response.
* 0x4 FIN - Performs a half-close of a stream. May be sent with a data
message or window update.
* 0x8 RST - Reset a stream immediately. May be sent with a data or
window update message.
## StreamID Field
The StreamID field is used to identify the logical stream the frame
is addressing. The client side should use odd ID's, and the server even.
This prevents any collisions. Additionally, the 0 ID is reserved to represent
the session.
Both Ping and Go Away messages should always use the 0 StreamID.
## Length Field
The meaning of the length field depends on the message type:
* Data - provides the length of bytes following the header
* Window update - provides a delta update to the window size
* Ping - Contains an opaque value, echoed back
* Go Away - Contains an error code
# Message Flow
There is no explicit connection setup, as Yamux relies on an underlying
transport to be provided. However, there is a distinction between client
and server side of the connection.
## Opening a stream
To open a stream, an initial data or window update frame is sent
with a new StreamID. The SYN flag should be set to signal a new stream.
The receiver must then reply with either a data or window update frame
with the StreamID along with the ACK flag to accept the stream or with
the RST flag to reject the stream.
Because we are relying on the reliable stream underneath, a connection
can begin sending data once the SYN flag is sent. The corresponding
ACK does not need to be received. This is particularly well suited
for an RPC system where a client wants to open a stream and immediately
fire a request without waiting for the RTT of the ACK.
This does introduce the possibility of a connection being rejected
after data has been sent already. This is a slight semantic difference
from TCP, where the conection cannot be refused after it is opened.
Clients should be prepared to handle this by checking for an error
that indicates a RST was received.
## Closing a stream
To close a stream, either side sends a data or window update frame
along with the FIN flag. This does a half-close indicating the sender
will send no further data.
Once both sides have closed the connection, the stream is closed.
Alternatively, if an error occurs, the RST flag can be used to
hard close a stream immediately.
## Flow Control
When Yamux is initially starts each stream with a 256KB window size.
There is no window size for the session.
To prevent the streams from stalling, window update frames should be
sent regularly. Yamux can be configured to provide a larger limit for
windows sizes. Both sides assume the initial 256KB window, but can
immediately send a window update as part of the SYN/ACK indicating a
larger window.
Both sides should track the number of bytes sent in Data frames
only, as only they are tracked as part of the window size.
## Session termination
When a session is being terminated, the Go Away message should
be sent. The Length should be set to one of the following to
provide an error code:
* 0x0 Normal termination
* 0x1 Protocol error
* 0x2 Internal error

View File

@ -1,470 +0,0 @@
package yamux
import (
"bytes"
"io"
"sync"
"sync/atomic"
"time"
)
type streamState int
const (
streamInit streamState = iota
streamSYNSent
streamSYNReceived
streamEstablished
streamLocalClose
streamRemoteClose
streamClosed
streamReset
)
// Stream is used to represent a logical stream
// within a session.
type Stream struct {
recvWindow uint32
sendWindow uint32
id uint32
session *Session
state streamState
stateLock sync.Mutex
recvBuf *bytes.Buffer
recvLock sync.Mutex
controlHdr header
controlErr chan error
controlHdrLock sync.Mutex
sendHdr header
sendErr chan error
sendLock sync.Mutex
recvNotifyCh chan struct{}
sendNotifyCh chan struct{}
readDeadline atomic.Value // time.Time
writeDeadline atomic.Value // time.Time
}
// newStream is used to construct a new stream within
// a given session for an ID
func newStream(session *Session, id uint32, state streamState) *Stream {
s := &Stream{
id: id,
session: session,
state: state,
controlHdr: header(make([]byte, headerSize)),
controlErr: make(chan error, 1),
sendHdr: header(make([]byte, headerSize)),
sendErr: make(chan error, 1),
recvWindow: initialStreamWindow,
sendWindow: initialStreamWindow,
recvNotifyCh: make(chan struct{}, 1),
sendNotifyCh: make(chan struct{}, 1),
}
s.readDeadline.Store(time.Time{})
s.writeDeadline.Store(time.Time{})
return s
}
// Session returns the associated stream session
func (s *Stream) Session() *Session {
return s.session
}
// StreamID returns the ID of this stream
func (s *Stream) StreamID() uint32 {
return s.id
}
// Read is used to read from the stream
func (s *Stream) Read(b []byte) (n int, err error) {
defer asyncNotify(s.recvNotifyCh)
START:
s.stateLock.Lock()
switch s.state {
case streamLocalClose:
fallthrough
case streamRemoteClose:
fallthrough
case streamClosed:
s.recvLock.Lock()
if s.recvBuf == nil || s.recvBuf.Len() == 0 {
s.recvLock.Unlock()
s.stateLock.Unlock()
return 0, io.EOF
}
s.recvLock.Unlock()
case streamReset:
s.stateLock.Unlock()
return 0, ErrConnectionReset
}
s.stateLock.Unlock()
// If there is no data available, block
s.recvLock.Lock()
if s.recvBuf == nil || s.recvBuf.Len() == 0 {
s.recvLock.Unlock()
goto WAIT
}
// Read any bytes
n, _ = s.recvBuf.Read(b)
s.recvLock.Unlock()
// Send a window update potentially
err = s.sendWindowUpdate()
return n, err
WAIT:
var timeout <-chan time.Time
var timer *time.Timer
readDeadline := s.readDeadline.Load().(time.Time)
if !readDeadline.IsZero() {
delay := readDeadline.Sub(time.Now())
timer = time.NewTimer(delay)
timeout = timer.C
}
select {
case <-s.recvNotifyCh:
if timer != nil {
timer.Stop()
}
goto START
case <-timeout:
return 0, ErrTimeout
}
}
// Write is used to write to the stream
func (s *Stream) Write(b []byte) (n int, err error) {
s.sendLock.Lock()
defer s.sendLock.Unlock()
total := 0
for total < len(b) {
n, err := s.write(b[total:])
total += n
if err != nil {
return total, err
}
}
return total, nil
}
// write is used to write to the stream, may return on
// a short write.
func (s *Stream) write(b []byte) (n int, err error) {
var flags uint16
var max uint32
var body io.Reader
START:
s.stateLock.Lock()
switch s.state {
case streamLocalClose:
fallthrough
case streamClosed:
s.stateLock.Unlock()
return 0, ErrStreamClosed
case streamReset:
s.stateLock.Unlock()
return 0, ErrConnectionReset
}
s.stateLock.Unlock()
// If there is no data available, block
window := atomic.LoadUint32(&s.sendWindow)
if window == 0 {
goto WAIT
}
// Determine the flags if any
flags = s.sendFlags()
// Send up to our send window
max = min(window, uint32(len(b)))
body = bytes.NewReader(b[:max])
// Send the header
s.sendHdr.encode(typeData, flags, s.id, max)
if err = s.session.waitForSendErr(s.sendHdr, body, s.sendErr); err != nil {
return 0, err
}
// Reduce our send window
atomic.AddUint32(&s.sendWindow, ^uint32(max-1))
// Unlock
return int(max), err
WAIT:
var timeout <-chan time.Time
writeDeadline := s.writeDeadline.Load().(time.Time)
if !writeDeadline.IsZero() {
delay := writeDeadline.Sub(time.Now())
timeout = time.After(delay)
}
select {
case <-s.sendNotifyCh:
goto START
case <-timeout:
return 0, ErrTimeout
}
return 0, nil
}
// sendFlags determines any flags that are appropriate
// based on the current stream state
func (s *Stream) sendFlags() uint16 {
s.stateLock.Lock()
defer s.stateLock.Unlock()
var flags uint16
switch s.state {
case streamInit:
flags |= flagSYN
s.state = streamSYNSent
case streamSYNReceived:
flags |= flagACK
s.state = streamEstablished
}
return flags
}
// sendWindowUpdate potentially sends a window update enabling
// further writes to take place. Must be invoked with the lock.
func (s *Stream) sendWindowUpdate() error {
s.controlHdrLock.Lock()
defer s.controlHdrLock.Unlock()
// Determine the delta update
max := s.session.config.MaxStreamWindowSize
var bufLen uint32
s.recvLock.Lock()
if s.recvBuf != nil {
bufLen = uint32(s.recvBuf.Len())
}
delta := (max - bufLen) - s.recvWindow
// Determine the flags if any
flags := s.sendFlags()
// Check if we can omit the update
if delta < (max/2) && flags == 0 {
s.recvLock.Unlock()
return nil
}
// Update our window
s.recvWindow += delta
s.recvLock.Unlock()
// Send the header
s.controlHdr.encode(typeWindowUpdate, flags, s.id, delta)
if err := s.session.waitForSendErr(s.controlHdr, nil, s.controlErr); err != nil {
return err
}
return nil
}
// sendClose is used to send a FIN
func (s *Stream) sendClose() error {
s.controlHdrLock.Lock()
defer s.controlHdrLock.Unlock()
flags := s.sendFlags()
flags |= flagFIN
s.controlHdr.encode(typeWindowUpdate, flags, s.id, 0)
if err := s.session.waitForSendErr(s.controlHdr, nil, s.controlErr); err != nil {
return err
}
return nil
}
// Close is used to close the stream
func (s *Stream) Close() error {
closeStream := false
s.stateLock.Lock()
switch s.state {
// Opened means we need to signal a close
case streamSYNSent:
fallthrough
case streamSYNReceived:
fallthrough
case streamEstablished:
s.state = streamLocalClose
goto SEND_CLOSE
case streamLocalClose:
case streamRemoteClose:
s.state = streamClosed
closeStream = true
goto SEND_CLOSE
case streamClosed:
case streamReset:
default:
panic("unhandled state")
}
s.stateLock.Unlock()
return nil
SEND_CLOSE:
s.stateLock.Unlock()
s.sendClose()
s.notifyWaiting()
if closeStream {
s.session.closeStream(s.id)
}
return nil
}
// forceClose is used for when the session is exiting
func (s *Stream) forceClose() {
s.stateLock.Lock()
s.state = streamClosed
s.stateLock.Unlock()
s.notifyWaiting()
}
// processFlags is used to update the state of the stream
// based on set flags, if any. Lock must be held
func (s *Stream) processFlags(flags uint16) error {
// Close the stream without holding the state lock
closeStream := false
defer func() {
if closeStream {
s.session.closeStream(s.id)
}
}()
s.stateLock.Lock()
defer s.stateLock.Unlock()
if flags&flagACK == flagACK {
if s.state == streamSYNSent {
s.state = streamEstablished
}
s.session.establishStream(s.id)
}
if flags&flagFIN == flagFIN {
switch s.state {
case streamSYNSent:
fallthrough
case streamSYNReceived:
fallthrough
case streamEstablished:
s.state = streamRemoteClose
s.notifyWaiting()
case streamLocalClose:
s.state = streamClosed
closeStream = true
s.notifyWaiting()
default:
s.session.logger.Printf("[ERR] yamux: unexpected FIN flag in state %d", s.state)
return ErrUnexpectedFlag
}
}
if flags&flagRST == flagRST {
s.state = streamReset
closeStream = true
s.notifyWaiting()
}
return nil
}
// notifyWaiting notifies all the waiting channels
func (s *Stream) notifyWaiting() {
asyncNotify(s.recvNotifyCh)
asyncNotify(s.sendNotifyCh)
}
// incrSendWindow updates the size of our send window
func (s *Stream) incrSendWindow(hdr header, flags uint16) error {
if err := s.processFlags(flags); err != nil {
return err
}
// Increase window, unblock a sender
atomic.AddUint32(&s.sendWindow, hdr.Length())
asyncNotify(s.sendNotifyCh)
return nil
}
// readData is used to handle a data frame
func (s *Stream) readData(hdr header, flags uint16, conn io.Reader) error {
if err := s.processFlags(flags); err != nil {
return err
}
// Check that our recv window is not exceeded
length := hdr.Length()
if length == 0 {
return nil
}
// Wrap in a limited reader
conn = &io.LimitedReader{R: conn, N: int64(length)}
// Copy into buffer
s.recvLock.Lock()
if length > s.recvWindow {
s.session.logger.Printf("[ERR] yamux: receive window exceeded (stream: %d, remain: %d, recv: %d)", s.id, s.recvWindow, length)
return ErrRecvWindowExceeded
}
if s.recvBuf == nil {
// Allocate the receive buffer just-in-time to fit the full data frame.
// This way we can read in the whole packet without further allocations.
s.recvBuf = bytes.NewBuffer(make([]byte, 0, length))
}
if _, err := io.Copy(s.recvBuf, conn); err != nil {
s.session.logger.Printf("[ERR] yamux: Failed to read stream data: %v", err)
s.recvLock.Unlock()
return err
}
// Decrement the receive window
s.recvWindow -= length
s.recvLock.Unlock()
// Unblock any readers
asyncNotify(s.recvNotifyCh)
return nil
}
// SetDeadline sets the read and write deadlines
func (s *Stream) SetDeadline(t time.Time) error {
if err := s.SetReadDeadline(t); err != nil {
return err
}
if err := s.SetWriteDeadline(t); err != nil {
return err
}
return nil
}
// SetReadDeadline sets the deadline for future Read calls.
func (s *Stream) SetReadDeadline(t time.Time) error {
s.readDeadline.Store(t)
return nil
}
// SetWriteDeadline sets the deadline for future Write calls
func (s *Stream) SetWriteDeadline(t time.Time) error {
s.writeDeadline.Store(t)
return nil
}
// Shrink is used to compact the amount of buffers utilized
// This is useful when using Yamux in a connection pool to reduce
// the idle memory utilization.
func (s *Stream) Shrink() {
s.recvLock.Lock()
if s.recvBuf != nil && s.recvBuf.Len() == 0 {
s.recvBuf = nil
}
s.recvLock.Unlock()
}

View File

@ -1,43 +0,0 @@
package yamux
import (
"sync"
"time"
)
var (
timerPool = &sync.Pool{
New: func() interface{} {
timer := time.NewTimer(time.Hour * 1e6)
timer.Stop()
return timer
},
}
)
// asyncSendErr is used to try an async send of an error
func asyncSendErr(ch chan error, err error) {
if ch == nil {
return
}
select {
case ch <- err:
default:
}
}
// asyncNotify is used to signal a waiting goroutine
func asyncNotify(ch chan struct{}) {
select {
case ch <- struct{}{}:
default:
}
}
// min computes the minimum of two values
func min(a, b uint32) uint32 {
if a < b {
return a
}
return b
}

View File

@ -123,6 +123,9 @@ const (
// VfioCCW is the vfio driver with CCW transport.
VfioCCW DeviceDriver = "vfio-ccw"
// VfioAP is the vfio driver with AP transport.
VfioAP DeviceDriver = "vfio-ap"
// VHostVSockPCI is a generic Vsock vhost device with PCI transport.
VHostVSockPCI DeviceDriver = "vhost-vsock-pci"
@ -288,6 +291,23 @@ func (object Object) QemuParams(config *Config) []string {
return qemuParams
}
// Virtio9PMultidev filesystem behaviour to deal
// with multiple devices being shared with a 9p export.
type Virtio9PMultidev string
const (
// Remap shares multiple devices with only one export.
Remap Virtio9PMultidev = "remap"
// Warn assumes that only one device is shared by the same export.
// Only a warning message is logged (once) by qemu on host side.
// This is the default behaviour.
Warn Virtio9PMultidev = "warn"
// Forbid like "warn" but also deny access to additional devices on guest.
Forbid Virtio9PMultidev = "forbid"
)
// FSDriver represents a qemu filesystem driver.
type FSDriver string
@ -350,6 +370,10 @@ type FSDevice struct {
// Transport is the virtio transport for this device.
Transport VirtioTransport
// Multidev is the filesystem behaviour to deal
// with multiple devices being shared with a 9p export
Multidev Virtio9PMultidev
}
// Virtio9PTransport is a map of the virtio-9p device name that corresponds
@ -385,6 +409,9 @@ func (fsdev FSDevice) QemuParams(config *Config) []string {
deviceParams = append(deviceParams, fmt.Sprintf(",romfile=%s", fsdev.ROMFile))
}
if fsdev.Transport.isVirtioCCW(config) {
if config.Knobs.IOMMUPlatform {
deviceParams = append(deviceParams, ",iommu_platform=on")
}
deviceParams = append(deviceParams, fmt.Sprintf(",devno=%s", fsdev.DevNo))
}
@ -393,6 +420,10 @@ func (fsdev FSDevice) QemuParams(config *Config) []string {
fsParams = append(fsParams, fmt.Sprintf(",path=%s", fsdev.Path))
fsParams = append(fsParams, fmt.Sprintf(",security_model=%s", fsdev.SecurityModel))
if fsdev.Multidev != "" {
fsParams = append(fsParams, fmt.Sprintf(",multidevs=%s", fsdev.Multidev))
}
qemuParams = append(qemuParams, "-device")
qemuParams = append(qemuParams, strings.Join(deviceParams, ""))
@ -512,6 +543,9 @@ func (cdev CharDevice) QemuParams(config *Config) []string {
}
if cdev.Driver == VirtioSerial && cdev.Transport.isVirtioCCW(config) {
if config.Knobs.IOMMUPlatform {
deviceParams = append(deviceParams, ",iommu_platform=on")
}
deviceParams = append(deviceParams, fmt.Sprintf(",devno=%s", cdev.DevNo))
}
@ -779,6 +813,9 @@ func (netdev NetDevice) QemuDeviceParams(config *Config) []string {
}
if netdev.Transport.isVirtioCCW(config) {
if config.Knobs.IOMMUPlatform {
deviceParams = append(deviceParams, ",iommu_platform=on")
}
deviceParams = append(deviceParams, fmt.Sprintf(",devno=%s", netdev.DevNo))
}
@ -912,6 +949,9 @@ func (dev SerialDevice) QemuParams(config *Config) []string {
}
if dev.Transport.isVirtioCCW(config) {
if config.Knobs.IOMMUPlatform {
deviceParams = append(deviceParams, ",iommu_platform=on")
}
deviceParams = append(deviceParams, fmt.Sprintf(",devno=%s", dev.DevNo))
}
@ -1503,6 +1543,9 @@ func (scsiCon SCSIController) QemuParams(config *Config) []string {
}
if scsiCon.Transport.isVirtioCCW(config) {
if config.Knobs.IOMMUPlatform {
devParams = append(devParams, ",iommu_platform=on")
}
devParams = append(devParams, fmt.Sprintf("devno=%s", scsiCon.DevNo))
}
@ -1685,6 +1728,9 @@ func (vsock VSOCKDevice) QemuParams(config *Config) []string {
}
if vsock.Transport.isVirtioCCW(config) {
if config.Knobs.IOMMUPlatform {
deviceParams = append(deviceParams, ",iommu_platform=on")
}
deviceParams = append(deviceParams, fmt.Sprintf(",devno=%s", vsock.DevNo))
}
@ -1755,6 +1801,9 @@ func (v RngDevice) QemuParams(config *Config) []string {
}
if v.Transport.isVirtioCCW(config) {
if config.Knobs.IOMMUPlatform {
deviceParams = append(deviceParams, ",iommu_platform=on")
}
deviceParams = append(deviceParams, fmt.Sprintf("devno=%s", v.DevNo))
}
@ -2097,6 +2146,12 @@ type Knobs struct {
// Realtime will enable realtime QEMU
Realtime bool
// Exit instead of rebooting
NoReboot bool
// IOMMUPlatform will enable IOMMU for supported devices
IOMMUPlatform bool
}
// IOThread allows IO to be performed on a separate thread.
@ -2432,6 +2487,10 @@ func (config *Config) appendKnobs() {
config.qemuParams = append(config.qemuParams, "-nographic")
}
if config.Knobs.NoReboot {
config.qemuParams = append(config.qemuParams, "--no-reboot")
}
if config.Knobs.Daemonize {
config.qemuParams = append(config.qemuParams, "-daemonize")
}

View File

@ -281,7 +281,7 @@ func (q *QMP) readLoop(fromVMCh chan<- []byte) {
fromVMCh <- sendLine
}
q.cfg.Logger.Infof("sanner return error: %v", scanner.Err())
q.cfg.Logger.Infof("scanner return error: %v", scanner.Err())
close(fromVMCh)
}
@ -1217,6 +1217,15 @@ func (q *QMP) ExecutePCIVFIOMediatedDeviceAdd(ctx context.Context, devID, sysfsd
return q.executeCommand(ctx, "device_add", args, nil)
}
// ExecuteAPVFIOMediatedDeviceAdd adds a VFIO mediated AP device to a QEMU instance using the device_add command.
func (q *QMP) ExecuteAPVFIOMediatedDeviceAdd(ctx context.Context, sysfsdev string) error {
args := map[string]interface{}{
"driver": VfioAP,
"sysfsdev": sysfsdev,
}
return q.executeCommand(ctx, "device_add", args, nil)
}
// isSocketIDSupported returns if the cpu driver supports the socket id option
func isSocketIDSupported(driver string) bool {
if driver == "host-s390x-cpu" || driver == "host-powerpc64-cpu" {

View File

@ -158,7 +158,6 @@ github.com/docker/docker/pkg/stringid
## explicit
github.com/docker/go-events
# github.com/docker/go-units v0.3.3
## explicit
github.com/docker/go-units
# github.com/globalsign/mgo v0.0.0-20181015135952-eeefdecb41b8
github.com/globalsign/mgo/bson
@ -223,10 +222,7 @@ github.com/hashicorp/errwrap
# github.com/hashicorp/go-multierror v1.0.0
## explicit
github.com/hashicorp/go-multierror
# github.com/hashicorp/yamux v0.0.0-20190923154419-df201c70410d
## explicit
github.com/hashicorp/yamux
# github.com/intel/govmm v0.0.0-20200602145448-7cc469641b7b
# github.com/intel/govmm v0.0.0-20200825065022-6042f6033126
## explicit
github.com/intel/govmm/qemu
# github.com/konsorten/go-windows-terminal-sequences v1.0.1
@ -241,7 +237,6 @@ github.com/matttproud/golang_protobuf_extensions/pbutil
## explicit
github.com/mdlayher/vsock
# github.com/mitchellh/mapstructure v1.1.2
## explicit
github.com/mitchellh/mapstructure
# github.com/opencontainers/go-digest v1.0.0-rc1
github.com/opencontainers/go-digest

View File

@ -10,8 +10,6 @@ VC_BIN_DIR := $(BIN_DIR)/virtcontainers/bin
TEST_BIN_DIR := $(VC_BIN_DIR)/test
HOOK_DIR := hook/mock
HOOK_BIN := hook
KATA_SHIM_DIR := shim/mock/kata-shim
KATA_SHIM_BIN := kata-shim
MK_DIR := $(shell dirname $(realpath $(lastword $(MAKEFILE_LIST))))
GOBUILD_FLAGS := -mod=vendor
@ -35,10 +33,7 @@ build:
hook:
$(QUIET_GOBUILD)go build $(GOBUILD_FLAGS) -o $(HOOK_DIR)/$@ $(HOOK_DIR)/*.go
kata-shim:
$(QUIET_GOBUILD)go build $(GOBUILD_FLAGS) -o $(KATA_SHIM_DIR)/$@ $(KATA_SHIM_DIR)/*.go
binaries: hook kata-shim
binaries: hook
#
# Tests
@ -51,7 +46,6 @@ check-go-static:
check-go-test:
bash $(MK_DIR)/../.ci/go-test.sh \
$(TEST_BIN_DIR)/$(KATA_SHIM_BIN) \
$(TEST_BIN_DIR)/$(HOOK_BIN)
#
@ -70,7 +64,6 @@ install:
@mkdir -p $(VC_BIN_DIR)
@mkdir -p $(TEST_BIN_DIR)
$(call INSTALL_TEST_EXEC,$(HOOK_DIR)/$(HOOK_BIN))
$(call INSTALL_TEST_EXEC,$(KATA_SHIM_DIR)/$(KATA_SHIM_BIN))
#
# Uninstall
@ -86,7 +79,6 @@ endef
uninstall:
$(call UNINSTALL_TEST_EXEC,$(HOOK_BIN))
$(call UNINSTALL_TEST_EXEC,$(KATA_SHIM_BIN))
#
# Clean
@ -99,7 +91,6 @@ $(shell test -e "$(1)" && test "$(1)" != "/" && echo "$(1)")
endef
CLEAN_FILES += $(HOOK_DIR)/$(HOOK_BIN)
CLEAN_FILES += $(SHIM_DIR)/$(KATA_SHIM_BIN)
clean:
rm -f $(foreach f,$(CLEAN_FILES),$(call FILE_SAFE_TO_REMOVE,$(f)))
@ -108,7 +99,6 @@ clean:
all \
build \
hook \
shim \
binaries \
check \
check-go-static \

View File

@ -7,19 +7,13 @@ package virtcontainers
import (
"context"
"os"
"runtime"
"syscall"
deviceApi "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/device/api"
deviceConfig "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/device/config"
"github.com/kata-containers/kata-containers/src/runtime/virtcontainers/persist"
pbTypes "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/agent/protocols"
"github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/cgroups"
"github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/compatoci"
vcTypes "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/types"
"github.com/kata-containers/kata-containers/src/runtime/virtcontainers/types"
specs "github.com/opencontainers/runtime-spec/specs-go"
opentracing "github.com/opentracing/opentracing-go"
"github.com/sirupsen/logrus"
)
@ -138,766 +132,7 @@ func createSandboxFromConfig(ctx context.Context, sandboxConfig SandboxConfig, f
return s, nil
}
// DeleteSandbox is the virtcontainers sandbox deletion entry point.
// DeleteSandbox will stop an already running container and then delete it.
func DeleteSandbox(ctx context.Context, sandboxID string) (VCSandbox, error) {
span, ctx := trace(ctx, "DeleteSandbox")
defer span.Finish()
if sandboxID == "" {
return nil, vcTypes.ErrNeedSandboxID
}
unlock, err := rwLockSandbox(sandboxID)
if err != nil {
return nil, err
}
defer unlock()
// Fetch the sandbox from storage and create it.
s, err := fetchSandbox(ctx, sandboxID)
if err != nil {
return nil, err
}
// Delete it.
if err := s.Delete(); err != nil {
return nil, err
}
return s, nil
}
// FetchSandbox is the virtcontainers sandbox fetching entry point.
// FetchSandbox will find out and connect to an existing sandbox and
// return the sandbox structure. The caller is responsible of calling
// VCSandbox.Release() after done with it.
func FetchSandbox(ctx context.Context, sandboxID string) (VCSandbox, error) {
span, ctx := trace(ctx, "FetchSandbox")
defer span.Finish()
if sandboxID == "" {
return nil, vcTypes.ErrNeedSandboxID
}
unlock, err := rwLockSandbox(sandboxID)
if err != nil {
return nil, err
}
defer unlock()
// Fetch the sandbox from storage and create it.
s, err := fetchSandbox(ctx, sandboxID)
if err != nil {
return nil, err
}
return s, nil
}
// StartSandbox is the virtcontainers sandbox starting entry point.
// StartSandbox will talk to the given hypervisor to start an existing
// sandbox and all its containers.
// It returns the sandbox ID.
func StartSandbox(ctx context.Context, sandboxID string) (VCSandbox, error) {
span, ctx := trace(ctx, "StartSandbox")
defer span.Finish()
if sandboxID == "" {
return nil, vcTypes.ErrNeedSandboxID
}
unlock, err := rwLockSandbox(sandboxID)
if err != nil {
return nil, err
}
defer unlock()
// Fetch the sandbox from storage and create it.
s, err := fetchSandbox(ctx, sandboxID)
if err != nil {
return nil, err
}
// Start it
err = s.Start()
if err != nil {
return nil, err
}
if err = s.storeSandbox(); err != nil {
return nil, err
}
return s, nil
}
// StopSandbox is the virtcontainers sandbox stopping entry point.
// StopSandbox will talk to the given agent to stop an existing sandbox and destroy all containers within that sandbox.
func StopSandbox(ctx context.Context, sandboxID string, force bool) (VCSandbox, error) {
span, ctx := trace(ctx, "StopSandbox")
defer span.Finish()
if sandboxID == "" {
return nil, vcTypes.ErrNeedSandbox
}
unlock, err := rwLockSandbox(sandboxID)
if err != nil {
return nil, err
}
defer unlock()
// Fetch the sandbox from storage and create it.
s, err := fetchSandbox(ctx, sandboxID)
if err != nil {
return nil, err
}
// Stop it.
err = s.Stop(force)
if err != nil {
return nil, err
}
if err = s.storeSandbox(); err != nil {
return nil, err
}
return s, nil
}
// RunSandbox is the virtcontainers sandbox running entry point.
// RunSandbox creates a sandbox and its containers and then it starts them.
func RunSandbox(ctx context.Context, sandboxConfig SandboxConfig, factory Factory) (VCSandbox, error) {
span, ctx := trace(ctx, "RunSandbox")
defer span.Finish()
// Create the sandbox
s, err := createSandboxFromConfig(ctx, sandboxConfig, factory)
if err != nil {
return nil, err
}
unlock, err := rwLockSandbox(s.id)
if err != nil {
return nil, err
}
defer unlock()
// Start the sandbox
err = s.Start()
if err != nil {
return nil, err
}
return s, nil
}
// ListSandbox is the virtcontainers sandbox listing entry point.
func ListSandbox(ctx context.Context) ([]SandboxStatus, error) {
span, ctx := trace(ctx, "ListSandbox")
defer span.Finish()
store, err := persist.GetDriver()
if err != nil {
return []SandboxStatus{}, err
}
dir, err := os.Open(store.RunStoragePath())
if err != nil {
if os.IsNotExist(err) {
// No sandbox directory is not an error
return []SandboxStatus{}, nil
}
return []SandboxStatus{}, err
}
defer dir.Close()
sandboxesID, err := dir.Readdirnames(0)
if err != nil {
return []SandboxStatus{}, err
}
var sandboxStatusList []SandboxStatus
for _, sandboxID := range sandboxesID {
sandboxStatus, err := StatusSandbox(ctx, sandboxID)
if err != nil {
continue
}
sandboxStatusList = append(sandboxStatusList, sandboxStatus)
}
return sandboxStatusList, nil
}
// StatusSandbox is the virtcontainers sandbox status entry point.
func StatusSandbox(ctx context.Context, sandboxID string) (SandboxStatus, error) {
span, ctx := trace(ctx, "StatusSandbox")
defer span.Finish()
if sandboxID == "" {
return SandboxStatus{}, vcTypes.ErrNeedSandboxID
}
unlock, err := rwLockSandbox(sandboxID)
if err != nil {
return SandboxStatus{}, err
}
defer unlock()
s, err := fetchSandbox(ctx, sandboxID)
if err != nil {
return SandboxStatus{}, err
}
var contStatusList []ContainerStatus
for _, container := range s.containers {
contStatus, err := statusContainer(s, container.id)
if err != nil {
return SandboxStatus{}, err
}
contStatusList = append(contStatusList, contStatus)
}
sandboxStatus := SandboxStatus{
ID: s.id,
State: s.state,
Hypervisor: s.config.HypervisorType,
HypervisorConfig: s.config.HypervisorConfig,
ContainersStatus: contStatusList,
Annotations: s.config.Annotations,
}
return sandboxStatus, nil
}
// CreateContainer is the virtcontainers container creation entry point.
// CreateContainer creates a container on a given sandbox.
func CreateContainer(ctx context.Context, sandboxID string, containerConfig ContainerConfig) (VCSandbox, VCContainer, error) {
span, ctx := trace(ctx, "CreateContainer")
defer span.Finish()
if sandboxID == "" {
return nil, nil, vcTypes.ErrNeedSandboxID
}
unlock, err := rwLockSandbox(sandboxID)
if err != nil {
return nil, nil, err
}
defer unlock()
s, err := fetchSandbox(ctx, sandboxID)
if err != nil {
return nil, nil, err
}
c, err := s.CreateContainer(containerConfig)
if err != nil {
return nil, nil, err
}
if err = s.storeSandbox(); err != nil {
return nil, nil, err
}
return s, c, nil
}
// DeleteContainer is the virtcontainers container deletion entry point.
// DeleteContainer deletes a Container from a Sandbox. If the container is running,
// it needs to be stopped first.
func DeleteContainer(ctx context.Context, sandboxID, containerID string) (VCContainer, error) {
span, ctx := trace(ctx, "DeleteContainer")
defer span.Finish()
if sandboxID == "" {
return nil, vcTypes.ErrNeedSandboxID
}
if containerID == "" {
return nil, vcTypes.ErrNeedContainerID
}
unlock, err := rwLockSandbox(sandboxID)
if err != nil {
return nil, err
}
defer unlock()
s, err := fetchSandbox(ctx, sandboxID)
if err != nil {
return nil, err
}
return s.DeleteContainer(containerID)
}
// StartContainer is the virtcontainers container starting entry point.
// StartContainer starts an already created container.
func StartContainer(ctx context.Context, sandboxID, containerID string) (VCContainer, error) {
span, ctx := trace(ctx, "StartContainer")
defer span.Finish()
if sandboxID == "" {
return nil, vcTypes.ErrNeedSandboxID
}
if containerID == "" {
return nil, vcTypes.ErrNeedContainerID
}
unlock, err := rwLockSandbox(sandboxID)
if err != nil {
return nil, err
}
defer unlock()
s, err := fetchSandbox(ctx, sandboxID)
if err != nil {
return nil, err
}
return s.StartContainer(containerID)
}
// StopContainer is the virtcontainers container stopping entry point.
// StopContainer stops an already running container.
func StopContainer(ctx context.Context, sandboxID, containerID string) (VCContainer, error) {
span, ctx := trace(ctx, "StopContainer")
defer span.Finish()
if sandboxID == "" {
return nil, vcTypes.ErrNeedSandboxID
}
if containerID == "" {
return nil, vcTypes.ErrNeedContainerID
}
unlock, err := rwLockSandbox(sandboxID)
if err != nil {
return nil, err
}
defer unlock()
s, err := fetchSandbox(ctx, sandboxID)
if err != nil {
return nil, err
}
return s.StopContainer(containerID, false)
}
// EnterContainer is the virtcontainers container command execution entry point.
// EnterContainer enters an already running container and runs a given command.
func EnterContainer(ctx context.Context, sandboxID, containerID string, cmd types.Cmd) (VCSandbox, VCContainer, *Process, error) {
span, ctx := trace(ctx, "EnterContainer")
defer span.Finish()
if sandboxID == "" {
return nil, nil, nil, vcTypes.ErrNeedSandboxID
}
if containerID == "" {
return nil, nil, nil, vcTypes.ErrNeedContainerID
}
unlock, err := rLockSandbox(sandboxID)
if err != nil {
return nil, nil, nil, err
}
defer unlock()
s, err := fetchSandbox(ctx, sandboxID)
if err != nil {
return nil, nil, nil, err
}
c, process, err := s.EnterContainer(containerID, cmd)
if err != nil {
return nil, nil, nil, err
}
return s, c, process, nil
}
// StatusContainer is the virtcontainers container status entry point.
// StatusContainer returns a detailed container status.
func StatusContainer(ctx context.Context, sandboxID, containerID string) (ContainerStatus, error) {
span, ctx := trace(ctx, "StatusContainer")
defer span.Finish()
if sandboxID == "" {
return ContainerStatus{}, vcTypes.ErrNeedSandboxID
}
if containerID == "" {
return ContainerStatus{}, vcTypes.ErrNeedContainerID
}
unlock, err := rwLockSandbox(sandboxID)
if err != nil {
return ContainerStatus{}, err
}
defer unlock()
s, err := fetchSandbox(ctx, sandboxID)
if err != nil {
return ContainerStatus{}, err
}
return statusContainer(s, containerID)
}
func statusContainer(sandbox *Sandbox, containerID string) (ContainerStatus, error) {
if container, ok := sandbox.containers[containerID]; ok {
return ContainerStatus{
ID: container.id,
State: container.state,
PID: container.process.Pid,
StartTime: container.process.StartTime,
RootFs: container.config.RootFs.Target,
Spec: container.GetPatchedOCISpec(),
Annotations: container.config.Annotations,
}, nil
}
// No matching containers in the sandbox
return ContainerStatus{}, nil
}
// KillContainer is the virtcontainers entry point to send a signal
// to a container running inside a sandbox. If all is true, all processes in
// the container will be sent the signal.
func KillContainer(ctx context.Context, sandboxID, containerID string, signal syscall.Signal, all bool) error {
span, ctx := trace(ctx, "KillContainer")
defer span.Finish()
if sandboxID == "" {
return vcTypes.ErrNeedSandboxID
}
if containerID == "" {
return vcTypes.ErrNeedContainerID
}
unlock, err := rwLockSandbox(sandboxID)
if err != nil {
return err
}
defer unlock()
s, err := fetchSandbox(ctx, sandboxID)
if err != nil {
return err
}
return s.KillContainer(containerID, signal, all)
}
// ProcessListContainer is the virtcontainers entry point to list
// processes running inside a container
func ProcessListContainer(ctx context.Context, sandboxID, containerID string, options ProcessListOptions) (ProcessList, error) {
span, ctx := trace(ctx, "ProcessListContainer")
defer span.Finish()
if sandboxID == "" {
return nil, vcTypes.ErrNeedSandboxID
}
if containerID == "" {
return nil, vcTypes.ErrNeedContainerID
}
unlock, err := rLockSandbox(sandboxID)
if err != nil {
return nil, err
}
defer unlock()
s, err := fetchSandbox(ctx, sandboxID)
if err != nil {
return nil, err
}
return s.ProcessListContainer(containerID, options)
}
// UpdateContainer is the virtcontainers entry point to update
// container's resources.
func UpdateContainer(ctx context.Context, sandboxID, containerID string, resources specs.LinuxResources) error {
span, ctx := trace(ctx, "UpdateContainer")
defer span.Finish()
if sandboxID == "" {
return vcTypes.ErrNeedSandboxID
}
if containerID == "" {
return vcTypes.ErrNeedContainerID
}
unlock, err := rwLockSandbox(sandboxID)
if err != nil {
return err
}
defer unlock()
s, err := fetchSandbox(ctx, sandboxID)
if err != nil {
return err
}
return s.UpdateContainer(containerID, resources)
}
// StatsContainer is the virtcontainers container stats entry point.
// StatsContainer returns a detailed container stats.
func StatsContainer(ctx context.Context, sandboxID, containerID string) (ContainerStats, error) {
span, ctx := trace(ctx, "StatsContainer")
defer span.Finish()
if sandboxID == "" {
return ContainerStats{}, vcTypes.ErrNeedSandboxID
}
if containerID == "" {
return ContainerStats{}, vcTypes.ErrNeedContainerID
}
unlock, err := rLockSandbox(sandboxID)
if err != nil {
return ContainerStats{}, err
}
defer unlock()
s, err := fetchSandbox(ctx, sandboxID)
if err != nil {
return ContainerStats{}, err
}
return s.StatsContainer(containerID)
}
// StatsSandbox is the virtcontainers sandbox stats entry point.
// StatsSandbox returns a detailed sandbox stats.
func StatsSandbox(ctx context.Context, sandboxID string) (SandboxStats, []ContainerStats, error) {
span, ctx := trace(ctx, "StatsSandbox")
defer span.Finish()
if sandboxID == "" {
return SandboxStats{}, []ContainerStats{}, vcTypes.ErrNeedSandboxID
}
unlock, err := rLockSandbox(sandboxID)
if err != nil {
return SandboxStats{}, []ContainerStats{}, err
}
defer unlock()
s, err := fetchSandbox(ctx, sandboxID)
if err != nil {
return SandboxStats{}, []ContainerStats{}, err
}
sandboxStats, err := s.Stats()
if err != nil {
return SandboxStats{}, []ContainerStats{}, err
}
containerStats := []ContainerStats{}
for _, c := range s.containers {
cstats, err := s.StatsContainer(c.id)
if err != nil {
return SandboxStats{}, []ContainerStats{}, err
}
containerStats = append(containerStats, cstats)
}
return sandboxStats, containerStats, nil
}
func togglePauseContainer(ctx context.Context, sandboxID, containerID string, pause bool) error {
if sandboxID == "" {
return vcTypes.ErrNeedSandboxID
}
if containerID == "" {
return vcTypes.ErrNeedContainerID
}
unlock, err := rwLockSandbox(sandboxID)
if err != nil {
return err
}
defer unlock()
s, err := fetchSandbox(ctx, sandboxID)
if err != nil {
return err
}
if pause {
return s.PauseContainer(containerID)
}
return s.ResumeContainer(containerID)
}
// PauseContainer is the virtcontainers container pause entry point.
func PauseContainer(ctx context.Context, sandboxID, containerID string) error {
span, ctx := trace(ctx, "PauseContainer")
defer span.Finish()
return togglePauseContainer(ctx, sandboxID, containerID, true)
}
// ResumeContainer is the virtcontainers container resume entry point.
func ResumeContainer(ctx context.Context, sandboxID, containerID string) error {
span, ctx := trace(ctx, "ResumeContainer")
defer span.Finish()
return togglePauseContainer(ctx, sandboxID, containerID, false)
}
// AddDevice will add a device to sandbox
func AddDevice(ctx context.Context, sandboxID string, info deviceConfig.DeviceInfo) (deviceApi.Device, error) {
span, ctx := trace(ctx, "AddDevice")
defer span.Finish()
if sandboxID == "" {
return nil, vcTypes.ErrNeedSandboxID
}
unlock, err := rwLockSandbox(sandboxID)
if err != nil {
return nil, err
}
defer unlock()
s, err := fetchSandbox(ctx, sandboxID)
if err != nil {
return nil, err
}
return s.AddDevice(info)
}
func toggleInterface(ctx context.Context, sandboxID string, inf *pbTypes.Interface, add bool) (*pbTypes.Interface, error) {
if sandboxID == "" {
return nil, vcTypes.ErrNeedSandboxID
}
unlock, err := rwLockSandbox(sandboxID)
if err != nil {
return nil, err
}
defer unlock()
s, err := fetchSandbox(ctx, sandboxID)
if err != nil {
return nil, err
}
if add {
return s.AddInterface(inf)
}
return s.RemoveInterface(inf)
}
// AddInterface is the virtcontainers add interface entry point.
func AddInterface(ctx context.Context, sandboxID string, inf *pbTypes.Interface) (*pbTypes.Interface, error) {
span, ctx := trace(ctx, "AddInterface")
defer span.Finish()
return toggleInterface(ctx, sandboxID, inf, true)
}
// RemoveInterface is the virtcontainers remove interface entry point.
func RemoveInterface(ctx context.Context, sandboxID string, inf *pbTypes.Interface) (*pbTypes.Interface, error) {
span, ctx := trace(ctx, "RemoveInterface")
defer span.Finish()
return toggleInterface(ctx, sandboxID, inf, false)
}
// ListInterfaces is the virtcontainers list interfaces entry point.
func ListInterfaces(ctx context.Context, sandboxID string) ([]*pbTypes.Interface, error) {
span, ctx := trace(ctx, "ListInterfaces")
defer span.Finish()
if sandboxID == "" {
return nil, vcTypes.ErrNeedSandboxID
}
unlock, err := rLockSandbox(sandboxID)
if err != nil {
return nil, err
}
defer unlock()
s, err := fetchSandbox(ctx, sandboxID)
if err != nil {
return nil, err
}
return s.ListInterfaces()
}
// UpdateRoutes is the virtcontainers update routes entry point.
func UpdateRoutes(ctx context.Context, sandboxID string, routes []*pbTypes.Route) ([]*pbTypes.Route, error) {
span, ctx := trace(ctx, "UpdateRoutes")
defer span.Finish()
if sandboxID == "" {
return nil, vcTypes.ErrNeedSandboxID
}
unlock, err := rwLockSandbox(sandboxID)
if err != nil {
return nil, err
}
defer unlock()
s, err := fetchSandbox(ctx, sandboxID)
if err != nil {
return nil, err
}
return s.UpdateRoutes(routes)
}
// ListRoutes is the virtcontainers list routes entry point.
func ListRoutes(ctx context.Context, sandboxID string) ([]*pbTypes.Route, error) {
span, ctx := trace(ctx, "ListRoutes")
defer span.Finish()
if sandboxID == "" {
return nil, vcTypes.ErrNeedSandboxID
}
unlock, err := rLockSandbox(sandboxID)
if err != nil {
return nil, err
}
defer unlock()
s, err := fetchSandbox(ctx, sandboxID)
if err != nil {
return nil, err
}
return s.ListRoutes()
}
// CleanupContaienr is used by shimv2 to stop and delete a container exclusively, once there is no container
// CleanupContainer is used by shimv2 to stop and delete a container exclusively, once there is no container
// in the sandbox left, do stop the sandbox and delete it. Those serial operations will be done exclusively by
// locking the sandbox.
func CleanupContainer(ctx context.Context, sandboxID, containerID string, force bool) error {

View File

@ -85,9 +85,11 @@ type clhClient interface {
// Add/remove CPUs to/from the VM
VmResizePut(ctx context.Context, vmResize chclient.VmResize) (*http.Response, error)
// Add VFIO PCI device to the VM
VmAddDevicePut(ctx context.Context, vmAddDevice chclient.VmAddDevice) (*http.Response, error)
VmAddDevicePut(ctx context.Context, vmAddDevice chclient.VmAddDevice) (chclient.PciDeviceInfo, *http.Response, error)
// Add a new disk device to the VM
VmAddDiskPut(ctx context.Context, diskConfig chclient.DiskConfig) (*http.Response, error)
VmAddDiskPut(ctx context.Context, diskConfig chclient.DiskConfig) (chclient.PciDeviceInfo, *http.Response, error)
// Remove a device from the VM
VmRemoveDevicePut(ctx context.Context, vmRemoveDevice chclient.VmRemoveDevice) (*http.Response, error)
}
type CloudHypervisorVersion struct {
@ -221,7 +223,6 @@ func (clh *cloudHypervisor) createSandbox(ctx context.Context, id string, networ
// Set initial memomory size of the virtual machine
// Convert to int64 openApiClient only support int64
clh.vmconfig.Memory.Size = int64((utils.MemUnit(clh.config.MemorySize) * utils.MiB).ToBytes())
clh.vmconfig.Memory.File = "/dev/shm"
// shared memory should be enabled if using vhost-user(kata uses virtiofsd)
clh.vmconfig.Memory.Shared = true
hostMemKb, err := getHostMemorySizeKb(procMemInfo)
@ -297,6 +298,12 @@ func (clh *cloudHypervisor) createSandbox(ctx context.Context, id string, networ
Mode: cctOFF,
}
clh.vmconfig.Cpus.Topology = chclient.CpuTopology{
ThreadsPerCore: 1,
CoresPerDie: int32(clh.config.DefaultMaxVCPUs),
DiesPerPackage: 1,
Packages: 1,
}
// Overwrite the default value of HTTP API socket path for cloud hypervisor
apiSocketPath, err := clh.apiSocketPath(id)
if err != nil {
@ -403,7 +410,11 @@ func (clh *cloudHypervisor) getThreadIDs() (vcpuThreadIDs, error) {
return vcpuInfo, nil
}
func (clh *cloudHypervisor) hotplugBlockDevice(drive *config.BlockDrive) error {
func clhDriveIndexToID(i int) string {
return "clh_drive_" + strconv.Itoa(i)
}
func (clh *cloudHypervisor) hotplugAddBlockDevice(drive *config.BlockDrive) error {
if clh.config.BlockDeviceDriver != config.VirtioBlock {
return fmt.Errorf("incorrect hypervisor configuration on 'block_device_driver':"+
" using '%v' but only support '%v'", clh.config.BlockDeviceDriver, config.VirtioBlock)
@ -418,6 +429,8 @@ func (clh *cloudHypervisor) hotplugBlockDevice(drive *config.BlockDrive) error {
return openAPIClientError(err)
}
driveID := clhDriveIndexToID(drive.Index)
//Explicitly set PCIAddr to NULL, so that VirtPath can be used
drive.PCIAddr = ""
@ -428,8 +441,9 @@ func (clh *cloudHypervisor) hotplugBlockDevice(drive *config.BlockDrive) error {
Path: drive.File,
Readonly: drive.ReadOnly,
VhostUser: false,
Id: driveID,
}
_, err = cl.VmAddDiskPut(ctx, blkDevice)
_, _, err = cl.VmAddDiskPut(ctx, blkDevice)
}
if err != nil {
@ -448,7 +462,7 @@ func (clh *cloudHypervisor) hotPlugVFIODevice(device config.VFIODev) error {
return openAPIClientError(err)
}
_, err = cl.VmAddDevicePut(ctx, chclient.VmAddDevice{Path: device.SysfsDev})
_, _, err = cl.VmAddDevicePut(ctx, chclient.VmAddDevice{Path: device.SysfsDev})
if err != nil {
err = fmt.Errorf("Failed to hotplug device %+v %s", device, openAPIClientError(err))
}
@ -462,7 +476,7 @@ func (clh *cloudHypervisor) hotplugAddDevice(devInfo interface{}, devType device
switch devType {
case blockDev:
drive := devInfo.(*config.BlockDrive)
return nil, clh.hotplugBlockDevice(drive)
return nil, clh.hotplugAddBlockDevice(drive)
case vfioDev:
device := devInfo.(*config.VFIODev)
return nil, clh.hotPlugVFIODevice(*device)
@ -472,9 +486,39 @@ func (clh *cloudHypervisor) hotplugAddDevice(devInfo interface{}, devType device
}
func (clh *cloudHypervisor) hotplugRemoveBlockDevice(drive *config.BlockDrive) error {
cl := clh.client()
ctx, cancel := context.WithTimeout(context.Background(), clhHotPlugAPITimeout*time.Second)
defer cancel()
driveID := clhDriveIndexToID(drive.Index)
if drive.Pmem {
return fmt.Errorf("pmem device hotplug remove not supported")
}
_, err := cl.VmRemoveDevicePut(ctx, chclient.VmRemoveDevice{Id: driveID})
if err != nil {
err = fmt.Errorf("failed to hotplug remove block device %+v %s", drive, openAPIClientError(err))
}
return err
}
func (clh *cloudHypervisor) hotplugRemoveDevice(devInfo interface{}, devType deviceType) (interface{}, error) {
clh.Logger().WithField("function", "hotplugRemoveDevice").Warn("hotplug remove device not supported")
return nil, nil
span, _ := clh.trace("hotplugRemoveDevice")
defer span.Finish()
switch devType {
case blockDev:
return nil, clh.hotplugRemoveBlockDevice(devInfo.(*config.BlockDrive))
default:
clh.Logger().WithFields(log.Fields{"devInfo": devInfo,
"deviceType": devType}).Error("hotplugRemoveDevice: unsupported device")
return nil, fmt.Errorf("Could not hot remove device: unsupported device: %v, type: %v",
devInfo, devType)
}
}
func (clh *cloudHypervisor) hypervisorConfig() HypervisorConfig {

View File

@ -95,12 +95,17 @@ func (c *clhClientMock) VmResizePut(ctx context.Context, vmResize chclient.VmRes
}
//nolint:golint
func (c *clhClientMock) VmAddDevicePut(ctx context.Context, vmAddDevice chclient.VmAddDevice) (*http.Response, error) {
return nil, nil
func (c *clhClientMock) VmAddDevicePut(ctx context.Context, vmAddDevice chclient.VmAddDevice) (chclient.PciDeviceInfo, *http.Response, error) {
return chclient.PciDeviceInfo{}, nil, nil
}
//nolint:golint
func (c *clhClientMock) VmAddDiskPut(ctx context.Context, diskConfig chclient.DiskConfig) (*http.Response, error) {
func (c *clhClientMock) VmAddDiskPut(ctx context.Context, diskConfig chclient.DiskConfig) (chclient.PciDeviceInfo, *http.Response, error) {
return chclient.PciDeviceInfo{}, nil, nil
}
//nolint:golint
func (c *clhClientMock) VmRemoveDevicePut(ctx context.Context, vmRemoveDevice chclient.VmRemoveDevice) (*http.Response, error) {
return nil, nil
}
@ -363,7 +368,7 @@ func TestCheckVersion(t *testing.T) {
}
}
func TestCloudHypervisorHotplugBlockDevice(t *testing.T) {
func TestCloudHypervisorHotplugAddBlockDevice(t *testing.T) {
assert := assert.New(t)
clhConfig, err := newClhConfig()
@ -374,13 +379,31 @@ func TestCloudHypervisorHotplugBlockDevice(t *testing.T) {
clh.APIClient = &clhClientMock{}
clh.config.BlockDeviceDriver = config.VirtioBlock
err = clh.hotplugBlockDevice(&config.BlockDrive{Pmem: false})
err = clh.hotplugAddBlockDevice(&config.BlockDrive{Pmem: false})
assert.NoError(err, "Hotplug disk block device expected no error")
err = clh.hotplugBlockDevice(&config.BlockDrive{Pmem: true})
err = clh.hotplugAddBlockDevice(&config.BlockDrive{Pmem: true})
assert.Error(err, "Hotplug pmem block device expected error")
clh.config.BlockDeviceDriver = config.VirtioSCSI
err = clh.hotplugBlockDevice(&config.BlockDrive{Pmem: false})
err = clh.hotplugAddBlockDevice(&config.BlockDrive{Pmem: false})
assert.Error(err, "Hotplug block device not using 'virtio-blk' expected error")
}
func TestCloudHypervisorHotplugRemoveBlockDevice(t *testing.T) {
assert := assert.New(t)
clhConfig, err := newClhConfig()
assert.NoError(err)
clh := &cloudHypervisor{}
clh.config = clhConfig
clh.APIClient = &clhClientMock{}
clh.config.BlockDeviceDriver = config.VirtioBlock
err = clh.hotplugRemoveBlockDevice(&config.BlockDrive{Pmem: false})
assert.NoError(err, "Hotplug remove disk block device expected no error")
err = clh.hotplugRemoveBlockDevice(&config.BlockDrive{Pmem: true})
assert.Error(err, "Hotplug remove pmem block device expected error")
}

View File

@ -819,17 +819,28 @@ func ProcessListContainer(sandboxID, containerID string, options ProcessListOpti
```Go
import (
"context"
"fmt"
"strings"
vc "github.com/kata-containers/kata-containers/src/runtime/virtcontainers"
"github.com/kata-containers/kata-containers/src/runtime/virtcontainers/types"
)
var containerRootfs = vc.RootFs{Target: "/var/lib/container/bundle/", Mounted: true}
// This example creates and starts a single container sandbox,
// using qemu as the hypervisor and kata as the VM agent.
func Example_createAndStartSandbox() {
envs := []vc.EnvVar{
envs := []types.EnvVar{
{
Var: "PATH",
Value: "/bin:/usr/bin:/sbin:/usr/sbin",
},
}
cmd := vc.Cmd{
cmd := types.Cmd{
Args: strings.Split("/bin/sh", " "),
Envs: envs,
WorkDir: "/",
@ -844,27 +855,20 @@ func Example_createAndStartSandbox() {
// Sets the hypervisor configuration.
hypervisorConfig := vc.HypervisorConfig{
KernelPath: "/usr/share/clear-containers/vmlinux.container",
ImagePath: "/usr/share/clear-containers/clear-containers.img",
KernelPath: "/usr/share/kata-containers/vmlinux.container",
ImagePath: "/usr/share/kata-containers/clear-containers.img",
HypervisorPath: "/usr/bin/qemu-system-x86_64",
MemorySize: 1024,
}
// Use kata default values for the agent.
agConfig := vc.KataAgentConfig{}
// VM resources
vmConfig := vc.Resources{
VCPUs: 4,
Memory: 1024,
}
// The sandbox configuration:
// - One container
// - Hypervisor is QEMU
// - Agent is kata
sandboxConfig := vc.SandboxConfig{
VMConfig: vmConfig,
HypervisorType: vc.QemuHypervisor,
HypervisorConfig: hypervisorConfig,

Some files were not shown because too many files have changed in this diff Show More