Compare commits

..

107 Commits

Author SHA1 Message Date
Fabiano Fidêncio
119edcc443 Merge pull request #2837 from fidencio/2.3.0-alpha2-branch-bump
# Kata Containers 2.3.0-alpha2
2021-10-14 09:52:37 +02:00
Fabiano Fidêncio
8873ddab9e release: Kata Containers 2.3.0-alpha2
- kata-monitor: add index page
- clh: Refine the usage of guest console and kernel parameters with Cloud Hypervisor
- agent: exec should inherit container process capabilities
- GitHubActions: fix invalid format of require-pr-porting-labels.yaml
- agent: flush root span before process finish
- Extend PCI submodules to represent non-zero functions and addresses
- packaging/kernel: Add CONFIG_PCI_MMCONFIG to x86 guest kernel configuration
- runtime: don't start shim management server in tests
- qemu: use GitLab repos instead of qemu.org
- runtime: optimize code for managing temp users for rootless mode
- Agent configuration file and API restriction
- Delete file virtcontainers-setup.sh
- vendor: Update containerd to v1.5.7
- runtime: Optimize func noNeedForOutput and add test cases
- runtime: Fix !x86 static checks
- #2676: fixing centos gpg key url for ppc64le
- Pass the host route IP family to the guest
- cmd: get return value for setCPUtype
- packaging: Configure QEMU with --enable-pie
- clh: Enable guest userland output
- cmd: Fix mismatched types in testModuleData
- runtime: update .gitignore to ignore monitor_address file
- runtime: fix the make check-go-static command error
- virtcontainers: clean up useless code
- Remove forced PCI rescans from agent
- kernel: Enable SGX in experimental kernel.
- runtime: fix nil reference in cleanup rootless user
- qemu: prepare to upgrade qemu version to 6.1.0 for arm
- kata-monitor (minor) improvements
- virtcontainers: Fix incorrect scripts path
- runtime: clear virtcontainers cgroup duplicated function
- Kata monitor: cache improvements
- virtiofs: fix error report in TestVirtiofsdStart when go test running

176dee6f agent: exec should inherit container process capabilities
7b2bfd4e virtcontainers: clh: Use 'quiet' as the default kernel parameter
3e24e46c virtcontainers: clh: Turn-off serial and virtio-console by default
2d7b65e8 agent: flush root span before process finish
5c77cc2c runtime: don't start shim management server in tests
72044180 agent/device: Return PCI address from wait_for_pci_device()
e50b05d9 agent/pci: Add type to represent PCI addresses
8528157b agent/pci:  Extend Slot type to represent PCI function as well
bf8f582c runtime: optimize code for managing temp users for rootless mode
a9c2a4ba GitHubActions: fix invalid format of require-pr-porting-labels.yaml
c4236cb2 packaging/kernel: Add CONFIG_PCI_MMCONFIG to x86 guest kernel configuration
08360c98 agent: Add an agent configutation file example
8a4e69d2 agent: rpc: Return UNIMPLEMENTED for not allowed endpoints
0ea2e3af agent: config: Allow for building the configuration from a file
63539dc9 agent: config: Add allowed endpoints
a953fea3 agent: config: Simplify configuration creation
b888edc2 agent: config: Implement Default
7eac2ec7 protection: add confidential compute frame for arm
8acfc154 check: fix typecheck failure in qemu_arm64_test.go
5b02d54e virtcontainers: fix lint failure on ppc64le
ff9728f0 virtcontainers: nolint guestProtection
5c138c8f runtime: Fix field alignment on s390x
191d0016 vendor: Update containerd to v1.5.7
f7f6bd01 kata-monitor: add index page
a44cde7e agent: netlink: Use the grpc IP family field when updating the route
71ce6cfe runtime: Pass the route IP family to the agent
99450bd1 agent: protos: Add a Family field to the Route payload
f85fe702 runtime: vendor: Bump the netlink package dependency
e439cec7 cmd: fix field alignment on ppc64le
e5159ea7 cmd: get return value for setCPUtype
2ce8d426 clh: Suppress hypervisor output to make guest output visible
cd1064b1 packaging: Configure QEMU with --enable-pie
762922a5 runtime: delete func ConstraintsToVCPUs
4f485430 runtime: delete virtcontainers-setup.sh
80f6b977 osbuilder: fixing centos gpg key url for ppc64le
bb99bfb4 runtime: fix the make check-go-static command error
870771d7 runtime: update .gitignore to ignore monitor_address file
18bff584 runtime: Optimize func noNeedForOutput and add test cases
e5fe53f0 runtime: fix nil reference in cleanup rootless user
2304a596 runtime: set the sandbox storage path static
315295e0 runtime: rename GetSanboxesStoragePath() --> GetSandboxesStoragePath()
13e65f2e cmd: Fix mismatched types in testModuleData
da42cbc0 actions: Build experimental kernel on kata-deploy push action
dffc5092 kernel: Enable SGX in experimental kernel.
ff6a677d kernel-build: Enable multiple config types.
90046964 experimental-kernel: bump 5.13.10
1fbb7304 build: kata-deploy kernel experimental
907459c1 agent/device: Don't force PCI rescans
75f426dd agent: Simplify do_add_swap()
aad1a873 runtime/device: Give the agent information about VFIO devices
ebd7b618 runtime: Don't repeat GetDeviceByID between appendDevices() and append*()
ad45c52f runtime/device: Record guest PCI path for VFIO devices
5c2af3e3 runtime/device: Refactor hotplugVFIODevice() to have common exit path
8bc71105 agent/device: Add device type for VFIO devices
f7a27075 agent: Move driver type constants into device.rs
5b1eb08b agent/uevent: Improve logging of wait_for_uevent()
cf36fd87 runtime: Fix some leftover go fmt errors
6d94957a kernel: reduce alignment size of memory hotplug to 128M
48090f62 qemu: disable plug on arm64 when pie is added
57e3712d virtiofs: fix error report in TestVirtiofsdStart when go test running
8b0bc1f4 kata-monitor: bump version to 0.2.0
bfb556d5 kata-monitor: refresh kata sandbox list on fs events
0e854f3b kata-monitor: improve detection of kata workloads
80463b44 qemu: use GitLab repos instead of qemu.org
3b0c4bf9 runtime: clear virtcontainers cgroup duplicated function
afad910d kata-monitor: add getSandboxFS()
e38686f7 runtime: add GetSandboxesStoragePath()
245a12bb kata-monitor: improve sandbox caching
fc067d61 kata-monitor: warn when unable to retrive the lower level runtime
53ec4df9 kata-monitor: minor fixes
47516988 virtcontainers: Fix incorrect scripts path
814cea96 virtcontainers: clean up useless code

Signed-off-by: Fabiano Fidêncio <fidencio@redhat.com>
2021-10-14 06:56:30 +02:00
Bin Liu
8be85fda4f Merge pull request #2775 from fgiudici/kata-monitor_issue2292
kata-monitor: add index page
2021-10-14 09:12:57 +08:00
GabyCT
5c7e1b457c Merge pull request #2821 from likebreath/1011/clh_console
clh: Refine the usage of guest console and kernel parameters with Cloud Hypervisor
2021-10-13 13:36:32 -05:00
Eric Ernst
6cc4d6b54e Merge pull request #2829 from bergwolf/capability
agent: exec should inherit container process capabilities
2021-10-13 09:02:03 -07:00
Peng Tao
176dee6f37 agent: exec should inherit container process capabilities
Otherwise rustjail would not set its capabilities and it ends up getting
all capabilities.

Fixes: #2828
Signed-off-by: Peng Tao <bergwolf@hyper.sh>
2021-10-13 17:24:52 +08:00
Bo Chen
7b2bfd4eca virtcontainers: clh: Use 'quiet' as the default kernel parameter
The 'quiet' kernel parameter can avoid guest kernel logs while booting,
which can reduce boot time.

Fix: #2820

Signed-off-by: Bo Chen <chen.bo@intel.com>
2021-10-11 22:06:27 -07:00
Bo Chen
3e24e46c70 virtcontainers: clh: Turn-off serial and virtio-console by default
We will need to have console output from the guest only for debugging
purposes. As a result, we can turn-off both the serial and
virtio-console devices by default for better boot time.

Fixes: #2820

Signed-off-by: Bo Chen <chen.bo@intel.com>
2021-10-11 22:06:23 -07:00
GabyCT
88affdb7b7 Merge pull request #2799 from liubin/fix/github-action-format
GitHubActions: fix invalid format of require-pr-porting-labels.yaml
2021-10-11 09:36:04 -05:00
Bin Liu
b7cd4ca2b8 Merge pull request #2813 from liubin/fix/2812-flush-root-span
agent: flush root span before process finish
2021-10-11 18:46:09 +08:00
bin
2d7b65e8eb agent: flush root span before process finish
Variables in rust will be dropped at the end of the function.

In function real_main the trace will be shut down by `tracer::end_tracing()`,
but at this time the root span is in an active state, so this root span
will not be sent to the trace collector.

This can be fixed by dropping the root span manually.

Fixes: #2812

Signed-off-by: bin <bin@hyper.sh>
2021-10-11 17:14:37 +08:00
Marcel Apfelbaum
06f4ab10b4 Merge pull request #2764 from dgibson/more-pci
Extend PCI submodules to represent non-zero functions and addresses
2021-10-10 15:57:54 +03:00
Marcel Apfelbaum
9796babd92 Merge pull request #2311 from dgibson/mmconfig
packaging/kernel: Add CONFIG_PCI_MMCONFIG to x86 guest kernel configuration
2021-10-10 15:11:33 +03:00
Bin Liu
0300e91cd0 Merge pull request #2808 from liubin/fix/2805-fix-test-leak-of-monitor-socket
runtime: don't start shim management server in tests
2021-10-08 19:42:09 +08:00
bin
5c77cc2c49 runtime: don't start shim management server in tests
Shim management server is running in a go routine, in test mode
this will cause the directory where the listen socket
file(/run/vc/sbs/777-77-77777777/shim-monitor.sock) in leak
after the tests finished.

Fixes: #2805

Signed-off-by: bin <bin@hyper.sh>
2021-10-08 18:41:53 +08:00
Jakob Naucke
4152c45e4c Merge pull request #2706 from yuanzhe-liu0/qemu_link
qemu: use GitLab repos instead of qemu.org
2021-10-08 12:03:55 +02:00
David Gibson
72044180e4 agent/device: Return PCI address from wait_for_pci_device()
wait_for_pci_device() waits for the PCI device at the given path to become
ready, but it doesn't currently give you any meaningful handle on that
device.

Change the signature, so that it returns the PCI address of the device.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-10-08 16:52:49 +11:00
David Gibson
e50b05d93c agent/pci: Add type to represent PCI addresses
Add a new pci::Address type which represents a guest PCI address in
DDDD:BB:SS.F form.

fixes #2745

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-10-08 16:52:49 +11:00
David Gibson
8528157b9b agent/pci: Extend Slot type to represent PCI function as well
pci::Slot represents a PCI slot.  However, in all cases where we use it, we
actually care about addressing a specific PCI function.  So, at the moment
we can only refer to function 0 in each slot.

Replace pci::Slot with pci::SlotFn to represent both the slot and function.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-10-08 16:52:49 +11:00
Fupan Li
988eb95621 Merge pull request #2760 from liubin/fix/2759-optimize-code-for-managing-temp-users
runtime: optimize code for managing temp users for rootless mode
2021-10-08 13:49:14 +08:00
bin
bf8f582c1d runtime: optimize code for managing temp users for rootless mode
This commit does two chagnes:

- move code for managing temp users to rootless.go.
- use common function in qemu.go when shutdown the VM.

Fixes: #2759

Signed-off-by: bin <bin@hyper.sh>
2021-10-08 11:04:21 +08:00
Eric Ernst
011c58d626 Merge pull request #2517 from sameo/topic/agent-config
Agent configuration file and API restriction
2021-10-07 08:54:51 -07:00
Bin Liu
10ec4b133c Merge pull request #2742 from liubin/fix/2741-delete-file-code
Delete file virtcontainers-setup.sh
2021-10-07 11:54:47 +08:00
bin
a9c2a4ba8e GitHubActions: fix invalid format of require-pr-porting-labels.yaml
The yaml file has an indent issue from line 15.

And the branches filter should be under pull_request_target but
not the pull_request trigger.

Also actions/checkout@v2 does not need the token parameter.

Fixes: #2798

Signed-off-by: bin <bin@hyper.sh>
2021-10-07 10:23:44 +08:00
David Gibson
c4236cb2d1 packaging/kernel: Add CONFIG_PCI_MMCONFIG to x86 guest kernel configuration
The guest kernel configuration suggested for Kata, and which is used by the
CI didn't include CONFIG_PCI_MMCONFIG.  That's kind of weird, MMCONFIG is
the modern normal way of handling configuration cycles.

In addition, due to a complex set of interactions through the ACPI code,
disabling MMCONFIG means that SHPC hotplug doesn't work: the driver is
included in the guest kernel, but will fail to probe on PCI to PCI bridges,
meaning it won't actually be activated.

Enable MMCONFIG so that we suggest and testa more typical guest kernel
configuration.

fixes #2288

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-10-07 13:21:48 +11:00
Samuel Ortiz
08360c981d agent: Add an agent configutation file example
With all endpoints allowed.

Signed-off-by: Samuel Ortiz <s.ortiz@apple.com>
2021-10-07 04:04:52 +02:00
Samuel Ortiz
8a4e69d237 agent: rpc: Return UNIMPLEMENTED for not allowed endpoints
From the endpoints string described through the configuration file, we
build a hash set of allowed enpoints. If a configuration files does not
include an endpoints section, we assume all endpoints are not allowed.
If there is no configuration file, then all endpoints are allowed.

Then for every ttrpc request, we check if the name of the endpoint is
part of the hashset. If it is not, then we return ttrcp::UNIMPLEMENTED.

Fixes: #1837

Signed-off-by: Samuel Ortiz <samuel.e.ortiz@protonmail.com>
2021-10-07 04:04:32 +02:00
Samuel Ortiz
0ea2e3af07 agent: config: Allow for building the configuration from a file
When the kernel command line includes a agent.config_file=<path> entry,
then we will try to override the default confiuguration values with the
ones we parse from a TOML file at <path>.

As the configuration file overrides the default values, we need to go
through a simplified builder that convert a set of Option<> fields into
the actual AgentConfig structure.

Fixes: #1837

Signed-off-by: Samuel Ortiz <samuel.e.ortiz@protonmail.com>
2021-10-07 00:37:40 +02:00
Samuel Ortiz
63539dc9fd agent: config: Add allowed endpoints
They will define the list of endpoints that an agent supports.
They're empty and non actionable for now.

Signed-off-by: Samuel Ortiz <samuel.e.ortiz@protonmail.com>
2021-10-07 00:37:40 +02:00
Samuel Ortiz
a953fea324 agent: config: Simplify configuration creation
We dont need a constructor and derive directly from the command line
parsing.

Signed-off-by: Samuel Ortiz <samuel.e.ortiz@protonmail.com>
2021-10-07 00:37:40 +02:00
Samuel Ortiz
b888edc2fc agent: config: Implement Default
A single constructor setting default value is a typical pattern for a
Default implementation.

Signed-off-by: Samuel Ortiz <samuel.e.ortiz@protonmail.com>
2021-10-07 00:37:40 +02:00
Fabiano Fidêncio
4cde619c68 Merge pull request #2797 from fidencio/wip/upgrade-vendored-containerd
vendor: Update containerd to v1.5.7
2021-10-06 21:05:44 +02:00
Chelsea Mafrica
6e3fcce2a2 Merge pull request #2748 from liubin/fix/2747-add-test
runtime: Optimize func noNeedForOutput and add test cases
2021-10-06 11:24:57 -07:00
Fabiano Fidêncio
04cdf5b1f0 Merge pull request #2774 from Jakob-Naucke/fix-s390x-alignment
runtime: Fix !x86 static checks
2021-10-06 19:57:00 +02:00
Jianyong Wu
7eac2ec786 protection: add confidential compute frame for arm
Even CCA, which is the confidential compute archtecture, has not been
ready, add a empty implementation to avoid static check error.

Fixes: #2789
Signed-off-by: Jianyong Wu <jianyong.wu@arm.com>
Suggested-by: Fabiano Fidêncio <fidencio@redhat.com>
2021-10-06 15:53:36 +02:00
Jianyong Wu
8acfc154de check: fix typecheck failure in qemu_arm64_test.go
fix typecheck failure in qemu_arm64_test.go

Fixes: #2789
Signed-off-by: Jianyong Wu <jianyong.wu@arm.com>
2021-10-06 15:53:35 +02:00
Amulya Meka
5b02d54e23 virtcontainers: fix lint failure on ppc64le
Add nolint for arch specific code to exclude
from lint check.

Fixes: #2773

Signed-off-by: Amulya Meka <amulmek1@in.ibm.com>
2021-10-06 15:53:35 +02:00
Jakob Naucke
ff9728f032 virtcontainers: nolint guestProtection
Exclude from lint checking for it is ultimately only used in
architecture-specific code.

Fixes: #2273
Signed-off-by: Jakob Naucke <jakob.naucke@ibm.com>
2021-10-06 15:53:35 +02:00
Jakob Naucke
5c138c8f12 runtime: Fix field alignment on s390x
Follow-up of #2237 for s390x -- field alignment isn't always minimal

Fixes: #2773
Signed-off-by: Jakob Naucke <jakob.naucke@ibm.com>
2021-10-06 15:53:35 +02:00
Fabiano Fidêncio
191d001610 vendor: Update containerd to v1.5.7
Bump containerd to v1.5.7 in order to bring in a fix for CVE-2021-41103,
"insufficiently restricted permissions ons plugins directories
(https://github.com/advisories/GHSA-c2h3-6mxw-7mvq)".

dependabot found a potential security vulnerability and raised a PR to
fix it.  However, dependabot does not properly follows nor understands
the needed of our CIs (mainly related to formatting the PR and whatnot),
thus I'm re-raising it.

Fixes: #2796
Supersedes: #2787

Signed-off-by: Fabiano Fidêncio <fidencio@redhat.com>
2021-10-06 10:40:43 +02:00
Chelsea Mafrica
1f6a551570 Merge pull request #2755 from paleozogt/centos-ppc64le-gpg
#2676: fixing centos gpg key url for ppc64le
2021-10-05 09:37:58 -07:00
Eric Ernst
2bc7561561 Merge pull request #2769 from sameo/topic/agent-route
Pass the host route IP family to the guest
2021-10-05 07:20:33 -07:00
Chelsea Mafrica
db7d3b91bd Merge pull request #2780 from Amulyam24/checks
cmd: get return value for setCPUtype
2021-10-04 22:19:59 -07:00
Bin Liu
f7f6bd0142 kata-monitor: add index page
Add an index page to the kata-monitor endpoint.

Porting of https://github.com/liubin/kata-containers/commit/a45aa0696d55

Fixes: #2292
Signed-off-by: Francesco Giudici <fgiudici@redhat.com>
2021-10-04 18:13:56 +02:00
Archana Shinde
5fd963530d Merge pull request #2756 from gkurz/fix-enable-pie
packaging: Configure QEMU with --enable-pie
2021-10-04 03:50:00 -07:00
Samuel Ortiz
a44cde7e8d agent: netlink: Use the grpc IP family field when updating the route
Not all routes have either a gateway or a destination IP.
Interface routes, where the source, destination and gateway are undefined,
will default to IP v4 with the current is_ipv6() check even when they
are v6 routes.

We use the provided gRPC Route.Family field instead. This field is built
from the host netlink messages, and is a reliable way of finding out
a route's IP family.

Fixes: #2768

Signed-off-by: Samuel Ortiz <s.ortiz@apple.com>
2021-10-01 14:39:46 +02:00
Samuel Ortiz
71ce6cfe9e runtime: Pass the route IP family to the agent
When updating the guest routing table, we should forward the IP family
information up to the guest.

Signed-off-by: Samuel Ortiz <s.ortiz@apple.com>
2021-10-01 14:35:17 +02:00
Samuel Ortiz
99450bd1f7 agent: protos: Add a Family field to the Route payload
Our check for the IP family is working as long as we have either a
gateway or a destination IP. Some routes are missing both.
The RT netlink messages provide the IP family information for each
route, so we can carry that piece of information up to the guest. That
will allow for a more reliable route IP family determination.

Signed-off-by: Samuel Ortiz <s.ortiz@apple.com>
2021-10-01 14:35:17 +02:00
Samuel Ortiz
f85fe70231 runtime: vendor: Bump the netlink package dependency
We need to be able to get the IP family from the netlink route meesages,
and the Route.Family field only got recently added to the netlink
package.

The update generates static check warnings about the call for
nethandler.Delete() being deprecated in favor of a Close() call instead.
So we include the s/Delete()/Close()/ change as part of this PR.

Signed-off-by: Samuel Ortiz <s.ortiz@apple.com>
2021-10-01 14:35:01 +02:00
Amulya Meka
e439cec7c5 cmd: fix field alignment on ppc64le
Optimising structure field alignment.

Fixes: #2779

Signed-off-by: Amulya Meka <amulmek1@in.ibm.com>
2021-10-01 11:45:27 +00:00
Amulya Meka
e5159ea755 cmd: get return value for setCPUtype
Accept and assert the return value in testSetCPUTypeGeneric.

Fixes: #2779

Signed-off-by: Amulya Meka <amulmek1@in.ibm.com>
2021-10-01 11:44:14 +00:00
Bo Chen
fd5c858390 Merge pull request #2751 from jodh-intel/clh-fix-guest-output
clh: Enable guest userland output
2021-09-30 09:05:30 -07:00
James O. D. Hunt
2ce8d4263c clh: Suppress hypervisor output to make guest output visible
Reduce the cloud-hypervisor log level from `Debug` to `Info` when hypervisor
debug is enabled. This is required since `Debug` level:

- Is overkill for debugging hypervisor failures.
- Effectively hides the output from the guest kernel and userland: CLH
  generates so much output that the output from the guest gets "lost in
  the noise" (experiments show that for each full CLH debug message, at most
  1 _byte_ of guest output is displayed).

Fixes: #2726.

Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
2021-09-30 14:22:09 +01:00
Greg Kurz
cd1064b16f packaging: Configure QEMU with --enable-pie
We explicitely set the Postion Independant Executlable (PIE) options
in the extra CFLAGS and LDFLAGS that are passed to the QEMU configure
script for all archs. This means that these options are used pretty
much everywhere, including when building the sample plugins under the
test directory. These cannot be linked with -pie and break the build,
as experienced recently on ARM (see PR #2732).

This only broke on ARM because other archs are configured with
--disable-tcg : this disables plugins which are built by default
otherwise.

The --enable-pie option is all that is needed. The QEMU build system
knows which binaries should be created as PIE, e.g. the important
bits like QEMU and virtiofsd, and which ones should not, e.g. the
sample plugins that aren't used in production.

Rely on --enable-pie only, for all archs. This allows to drop the
workaround that was put in place in PR #2732.

Fixes: #2757
Signed-off-by: Greg Kurz <groug@kaod.org>
2021-09-30 11:17:41 +02:00
Jakob Naucke
8739a73dd3 Merge pull request #2736 from Amulyam24/kata-check-test
cmd: Fix mismatched types in testModuleData
2021-09-30 10:20:19 +02:00
bin
762922a521 runtime: delete func ConstraintsToVCPUs
ConstraintsToVCPUs is not used any more.

Fixes: #2741

Signed-off-by: bin <bin@hyper.sh>
2021-09-30 14:44:41 +08:00
bin
4f4854308a runtime: delete virtcontainers-setup.sh
This file is not used anymore.

Fixes: #2741

Signed-off-by: bin <bin@hyper.sh>
2021-09-30 14:44:30 +08:00
Chelsea Mafrica
96c033ba6c Merge pull request #2763 from liubin/fix/2762-update-gitignore
runtime: update .gitignore to ignore monitor_address file
2021-09-29 09:45:57 -07:00
Carlos Venegas
7183de47df Merge pull request #2766 from YchauWang/wyc-runtime-cmd
runtime: fix the make check-go-static command error
2021-09-29 10:53:02 -05:00
Aaron Simmons
80f6b97710 osbuilder: fixing centos gpg key url for ppc64le
The centos ppc64le gpg key at mirror.centos.org doesn't exist (link rot?).
Replacing it with url from CentOS/sig-core-AltArch on github.

Fixes: #2676

Signed-off-by: Aaron Simmons <paleozogt@gmail.com>
2021-09-29 09:20:51 -06:00
Bin Liu
4ac7199282 Merge pull request #2494 from rapiz1/clean-up-code
virtcontainers: clean up useless code
2021-09-29 22:56:13 +08:00
wangyongchao.bj
bb99bfb45d runtime: fix the make check-go-static command error
modify the make script of the check-go-static, changing the `./cli` path to `./cmd/kata-runtime`

Fixes: #2765

Signed-off-by: wangyongchao.bj <wangyongchao.bj@inspur.com>
2021-09-29 15:37:25 +08:00
David Gibson
b57613f53e Merge pull request #1682 from dgibson/rescan
Remove forced PCI rescans from agent
2021-09-29 13:03:55 +10:00
bin
870771d76d runtime: update .gitignore to ignore monitor_address file
Run tests sometimes generate pkg/containerd-shim-v2/monitor_address,
and `git status` will treat it as a new file.

Package containerd-shim-v2 has moved to pkg/containerd-shim-v2,
the monitor_address in .gitignore should be updated too.

Fixes: #2762

Signed-off-by: bin <bin@hyper.sh>
2021-09-29 09:24:14 +08:00
Chelsea Mafrica
20f4c252b8 Merge pull request #2519 from jcvenegas/kernel-experimental-5.13.10
kernel: Enable SGX in experimental kernel.
2021-09-28 11:00:46 -07:00
Fupan Li
823818cfbc Merge pull request #2744 from fengwang666/nil-bug
runtime: fix nil reference in cleanup rootless user
2021-09-28 22:43:24 +08:00
Fabiano Fidêncio
f9ecaaa6be Merge pull request #2732 from jongwu/plugin
qemu: prepare to upgrade qemu version to 6.1.0 for arm
2021-09-28 12:12:48 +02:00
bin
18bff58487 runtime: Optimize func noNeedForOutput and add test cases
Optimize func noNeedForOutput and add test cases for this func.

Fixes: #2747

Signed-off-by: bin <bin@hyper.sh>
2021-09-28 16:58:44 +08:00
Feng Wang
e5fe53f0a9 runtime: fix nil reference in cleanup rootless user
It seems the client (crio) can send multiple requests to stop the Kata VM,
resulting a nil reference if the uid has already been cleaned up by a different thread.

Fixes #2743

Signed-off-by: Feng Wang <feng.wang@databricks.com>
2021-09-27 21:28:47 -07:00
Chelsea Mafrica
0b087a873d Merge pull request #2739 from fgiudici/kata-monitor_improvements3
kata-monitor (minor) improvements
2021-09-27 15:45:21 -07:00
Francesco Giudici
2304a59601 runtime: set the sandbox storage path static
Since we now have "unix://" kind of socket returned by the
SocketAddress() function, there is no more need to build the sandbox
storage path dynamically to keep OS compatibility.

Fixes: #2738
Suggested-by: Christophe de Dinechin <dinechin@redhat.com>
Signed-off-by: Francesco Giudici <fgiudici@redhat.com>
2021-09-27 15:57:34 +02:00
Francesco Giudici
315295e0ef runtime: rename GetSanboxesStoragePath() --> GetSandboxesStoragePath()
Add the missing 'd'.

Fixes: #2738
Suggested-by: Jakob Naucke <jakob.naucke@ibm.com>
Signed-off-by: Francesco Giudici <fgiudici@redhat.com>
2021-09-27 15:56:14 +02:00
Bin Liu
3217b03b17 Merge pull request #2522 from Bevisy/main-2515
virtcontainers: Fix incorrect scripts path
2021-09-27 21:14:40 +08:00
Bin Liu
39df808f6a Merge pull request #2695 from YchauWang/wyc-vc-cgroup
runtime: clear virtcontainers cgroup duplicated function
2021-09-27 21:12:39 +08:00
Amulya Meka
13e65f2ee8 cmd: Fix mismatched types in testModuleData
Rectify the values of testModuleData with the correct
types in TestCCCheckCLiFunction in kata-check_(!x86)_test.go

Fixes: #2735

Signed-off-by: Amulya Meka <amulmek1@in.ibm.com>
2021-09-27 07:17:07 +00:00
Peng Tao
05995632c3 Merge pull request #2566 from fgiudici/kata-monitor_improvements
Kata monitor: cache improvements
2021-09-27 12:29:13 +08:00
Carlos Venegas
da42cbc0a7 actions: Build experimental kernel on kata-deploy push action
Build experimental kernel on kata-deploy push action.

Signed-off-by: Carlos Venegas <jose.carlos.venegas.munoz@intel.com>
2021-09-27 03:39:50 +00:00
Carlos Venegas
dffc50928a kernel: Enable SGX in experimental kernel.
Enable Intel SGX support in experimental kernel.

Fixes: #2518

Signed-off-by: Carlos Venegas <jose.carlos.venegas.munoz@intel.com>
2021-09-27 03:39:50 +00:00
Carlos Venegas
ff6a677d16 kernel-build: Enable multiple config types.
Optional build types are common for early adoption.
Lets add a flag to build and optional config.

e.g.
kernel-build.sh -b experimental

In the future instead of add more flags just add a new build type.

Signed-off-by: Carlos Venegas <jose.carlos.venegas.munoz@intel.com>
2021-09-27 03:39:50 +00:00
Carlos Venegas
90046964ef experimental-kernel: bump 5.13.10
Upgrade Linux kernel to latest stable release.

Signed-off-by: Carlos Venegas <jose.carlos.venegas.munoz@intel.com>
2021-09-27 03:39:50 +00:00
Carlos Venegas
1fbb73041b build: kata-deploy kernel experimental
Allow build experimental kernel from kata-deploy.

Signed-off-by: Carlos Venegas <jose.carlos.venegas.munoz@intel.com>
2021-09-27 02:56:59 +00:00
David Gibson
907459c1c1 agent/device: Don't force PCI rescans
The agent initiates a PCI rescan from two places.  One is triggered
for each virtio-blk PCI device, and one is triggered unconditionally
when we start a new container.

The PCI bus rescan code was added long time ago in Clear Containers due to
lack of ACPI support in QEMU 2.9 + q35.  Since Kata routinely plugs devices
under a PCIe-to-PCI bridge, that left SHPC as the only available hotplug
mechanism.

However, while Kata was using SHPC on the qemu side, it wasn't actually
using it on the guest side.  Due to a quirk of our guest kernel
configuration, the SHPC driver never bound to the bridge, and *no* hotplug
was working at all.  To work around that, Kata was forcing the rescan
manually, which would discover the new device.  That was very fragile (we
were arguably relying on a kernel bug).  Even if we were using SHPC
propertly, it includes a mandatory 5s delay during plug operations
(designed for physical cards and human operators), which makes it
unsuitable quick start up.

Worse, the forced PCI rescans could race with either SHPC or PCIe native
hotplug sequences, causing several problems.  In some cases this could put
the device into an entirely broken state where it wouldn't respond to
config space accesses at all.

Since pull request #2323 was merged, we have instead used ACPI hotplug
which is both fast, and more solid in terms of semantics and races.  So,
the forced PCI rescans are no longer necessary.  Remove them all.

fixes #683

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-09-27 12:46:33 +10:00
David Gibson
75f426dd1e agent: Simplify do_add_swap()
do_add_swap() has some mildly complex code to translate the PCI path of
a virtio-blk device (where the swap will reside) into a /dev path. However,
the device module already has get_virtio_blk_pci_device_name() which does
exactly that.  The existing code has some further advantages: it uses
more precise matching of the sysfs paths, and if necessary it will wait for
the device to be added to the guest.

While we're there, remove an unnecessary 'as u8' from the PCI path
construction: pci::Path::new() already accepts anything which implements
TryInfo<u8>, which u32 certainly does.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-09-27 12:46:33 +10:00
David Gibson
aad1a8734f runtime/device: Give the agent information about VFIO devices
We send information about several kinds of devices to the agent so
that it can apply specific handling.  We don't currently do this with
VFIO devices.  However we need to do that so that the agent can
properly wait for VFIO devices to be ready (previously it did that
using a PCI rescan which may not be reliable and has some very bad
side effects).

This patch collates and sends the relevant information.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-09-27 12:46:33 +10:00
David Gibson
ebd7b61884 runtime: Don't repeat GetDeviceByID between appendDevices() and append*()
Both appendBlockDevice and appendVhostUserBlkDevice start by using
GetDeviceByID to lookup the api.Device object corresponding to their
ContainerDevice object.  However their common caller, appendDevices() has
already done this.

This changes it so the looked up api.Device is passed to the individual
append*Device() functions.  This slightly reduces duplicated work, but more
importantly it makes it clearer that append*Device() don't need to check
for a nil result from GetDeviceByID, since the caller has already done
that.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-09-27 12:46:33 +10:00
David Gibson
ad45c52fbe runtime/device: Record guest PCI path for VFIO devices
For several device types which correspond to a PCI device in the guest
we record the device's PCI path in the guest.  We don't currently do
that for VFIO devices, but we're going to need to for better handling
of SR-IOV devices.

To accomplish this, we have to determine the guest PCI path from the
information the VMM gives us:

For qemu, we query the slot of the device and its bridge from QMP.

For cloud-hypervisor, the device add interface gives us a guest PCI
address.  In fact this represents a design error in the clh API -
there's no way it can really know the guest PCI address in general.
It works in this case, because clh doesn't use PCI bridges, so the
device will always be on the root bus.  Based on that, the PCI path is
simply the device's slot number.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-09-27 12:46:33 +10:00
David Gibson
5c2af3e308 runtime/device: Refactor hotplugVFIODevice() to have common exit path
hotplugVFIODevice() has several different paths depending if we're
plugging into a root port or a PCIE<->PCI bridge and if we're using a
regular or mediated VFIO device.

We're going to want some common code on the successful exit path here,
so refactor the function to allow that without duplication.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-09-27 12:46:33 +10:00
David Gibson
8bc71105f4 agent/device: Add device type for VFIO devices
Currently, VFIO devices attached to a Kata container aren't described to
the agent at all.  We essentially just hope they're ready by the time
we've entered the container proper, which is usually the case because of
the PCI rescan - but that causes other problems.

This adds a new device type to the agent representing VFIO devices.  The
agent will use its existing uevent watching mechanisms to wait for the
associated guest PCI device to appear before proceeding.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-09-27 12:46:33 +10:00
David Gibson
f7a2707505 agent: Move driver type constants into device.rs
Currently the constants giving the names for each device/driver type in
the protocol are in mount.rs, and used in device.rs.  Since these constants
are inherently related to, well, devices, it makes more sense to put them
in device.rs and use them from mount.rs.

This will become even more so with planned extensions which will add some
device types that will not be used in mount.rs at all.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-09-27 12:46:33 +10:00
David Gibson
5b1eb08bde agent/uevent: Improve logging of wait_for_uevent()
These messages will help when debugging matchers not matching properly.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-09-27 12:46:33 +10:00
David Gibson
cf36fd87ad runtime: Fix some leftover go fmt errors
A few "go fmt" errors appear to have crept it.  Clean them up with
"go fmt ./..." in the src/runtime directory.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2021-09-27 12:46:33 +10:00
Jianyong Wu
6d94957a14 kernel: reduce alignment size of memory hotplug to 128M
After 5.11-rc4, memory hotplug alignment size is reduced to 128M for 4K
page.
It works better for memory hotplug and nvdimm plug in kata on arm.
without this patch, memory hotplug will fail for the current memory
hotplug alignment is 1G but the nvdimm size align with 128M in kata.
After port it here, we can avoid a fix in qemu side.

Note: if you change the page size to other size than 4K, memory hotplug
will has no effect.

Fixes: #2707
Signed-off-by: Jianyong Wu <jianyong.wu@arm.com>
2021-09-26 15:33:33 +08:00
Jianyong Wu
48090f624a qemu: disable plug on arm64 when pie is added
For qemu 6.1.0 build on arm64, compile error occurs when "-pie" is added
 to ldflag.
tests/plugins/empty.c won't be linked as a sysmbol is missing.
I consider there maybe a bug.
Before figure it out, we should disable plugins for qemu 6.1.0 on arm64.

Fixes: #2707
Signed-off-by: Jianyong Wu <jianyong.wu@arm.com>
2021-09-26 15:33:33 +08:00
Fabiano Fidêncio
c811dd7484 Merge pull request #2720 from Kvasscn/kata_dev_virtiofsd_ctx
virtiofs: fix error report in TestVirtiofsdStart when go test running
2021-09-25 12:17:00 +02:00
zhanghj
57e3712dbd virtiofs: fix error report in TestVirtiofsdStart when go test running
Initialize ctx with context.Background() instead of nil value.

Fixes: #2718

Signed-off-by: zhanghj <zhanghj.lc@inspur.com>
2021-09-24 16:06:06 +08:00
Francesco Giudici
8b0bc1f45e kata-monitor: bump version to 0.2.0
We now support any container engine CRI compliant. Let's bump the
kata-monitor version to 0.2.0.

Signed-off-by: Francesco Giudici <fgiudici@redhat.com>
2021-09-23 14:32:09 +02:00
Francesco Giudici
bfb556d56a kata-monitor: refresh kata sandbox list on fs events
This commit stops the container engine polling in favor of
the kata sandbox storage path monitoring.
The pod cache list is now refreshed based on fs events and synced with
the container engine only when needed.

Signed-off-by: Francesco Giudici <fgiudici@redhat.com>
2021-09-23 14:32:09 +02:00
Francesco Giudici
0e854f3b80 kata-monitor: improve detection of kata workloads
When the container engine is different than containerd or CRI-O we
lack proper detection of kata workloads and consider all the pods as
kata ones.
Instead of querying the container engine for the lower level runtime
used in each pod, check if a directory matching the pod exists in
the virtualcontainers sandboxes storage path.
This provides a container engine independent way to check for kata pods.

Signed-off-by: Francesco Giudici <fgiudici@redhat.com>
2021-09-23 14:32:09 +02:00
Yuanzhe Liu
80463b445a qemu: use GitLab repos instead of qemu.org
arm using qemu 5.1.0, thus is affected by the wired submodules
link.

Fixes: #2705
Signed-off-by: Yuanzhe Liu <yuanzheliu09@gmail.com>
2021-09-23 12:07:44 +00:00
wangyongchao.bj
3b0c4bf9a0 runtime: clear virtcontainers cgroup duplicated function
There are `DeviceToDeviceCgroup` and `deviceToDeviceCgroup` two functions,
 creating a `specs.LinuxDeviceCgroup` object. We clear the new function `deviceToDeviceCgroup`.

Fixes: #2694

Signed-off-by: wangyongchao.bj <wangyongchao.bj@inspur.com>
2021-09-22 15:13:34 +08:00
Francesco Giudici
afad910d0e kata-monitor: add getSandboxFS()
Retrieve the absolute sandbox storage path. We will soon need this to
monitor the creation/deletion of new kata sandboxes.

Signed-off-by: Francesco Giudici <fgiudici@redhat.com>
2021-09-20 10:37:55 +02:00
Francesco Giudici
e38686f74d runtime: add GetSandboxesStoragePath()
The storage path we use to collect the sandbox files is defined in the
virtcontainers/persist/fs package.
We create the runtime socket in that storage path, by hardcoding the
full path in the SocketAddress() function in the runtime package.
This commit splits the hardcoded path by the socket address path so that
the runtime package will be able to provide the storage path to all the
components that may need it.

Signed-off-by: Francesco Giudici <fgiudici@redhat.com>
2021-09-20 10:37:55 +02:00
Francesco Giudici
245a12bbb7 kata-monitor: improve sandbox caching
In order to retrieve the list of sandboxes, we poll the container engine
every 15 seconds via the CRI. Once we have the list we have to inspect
each pod to find out the kata ones.
This commit extend the sandbox cache to keep track of all the pods,
marking the kata ones, so that during the next polling only the new
sandboxes should be inspected to figure out which ones are using the
kata runtime.

Fixes: #2563
Signed-off-by: Francesco Giudici <fgiudici@redhat.com>
2021-09-20 10:37:55 +02:00
Francesco Giudici
fc067d61d4 kata-monitor: warn when unable to retrive the lower level runtime
this is an unexpected event (likely a change in how containerd/cri-o
record the lower level runtime in the pod) and should be more visible:
raise the log level to "warning".

Signed-off-by: Francesco Giudici <fgiudici@redhat.com>
2021-09-20 10:37:54 +02:00
Francesco Giudici
53ec4df953 kata-monitor: minor fixes
fix comment and use literals

Signed-off-by: Francesco Giudici <fgiudici@redhat.com>
2021-09-20 10:37:54 +02:00
Binbin Zhang
4751698829 virtcontainers: Fix incorrect scripts path
modify to the correct relative path

Fixes: #2515

Signed-off-by: Binbin Zhang <binbin36520@gmail.com>
2021-08-27 19:16:53 +00:00
Yujia Qiao
814cea9601 virtcontainers: clean up useless code
Fixes: #2275

Signed-off-by: Yujia Qiao <rapiz3142@gmail.com>
2021-08-24 16:04:34 +08:00
152 changed files with 6537 additions and 1574 deletions

View File

@@ -9,6 +9,7 @@ jobs:
matrix:
asset:
- kernel
- kernel-experimental
- shim-v2
- qemu
- cloud-hypervisor

View File

@@ -12,8 +12,7 @@ on:
- reopened
- labeled
- unlabeled
pull_request:
branches:
branches:
- main
jobs:
@@ -32,8 +31,6 @@ jobs:
- name: Checkout code to allow hub to communicate with the project
uses: actions/checkout@v2
with:
token: ${{ secrets.KATA_GITHUB_ACTIONS_TOKEN }}
- name: Install porting checker script
run: |

View File

@@ -1 +1 @@
2.3.0-alpha1
2.3.0-alpha2

19
src/agent/Cargo.lock generated
View File

@@ -544,6 +544,7 @@ dependencies = [
"rustjail",
"scan_fmt",
"scopeguard",
"serde",
"serde_json",
"slog",
"slog-scope",
@@ -552,6 +553,7 @@ dependencies = [
"thiserror",
"tokio",
"tokio-vsock",
"toml",
"tracing",
"tracing-opentelemetry",
"tracing-subscriber",
@@ -1323,18 +1325,18 @@ checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd"
[[package]]
name = "serde"
version = "1.0.126"
version = "1.0.129"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ec7505abeacaec74ae4778d9d9328fe5a5d04253220a85c4ee022239fc996d03"
checksum = "d1f72836d2aa753853178eda473a3b9d8e4eefdaf20523b919677e6de489f8f1"
dependencies = [
"serde_derive",
]
[[package]]
name = "serde_derive"
version = "1.0.126"
version = "1.0.129"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "963a7dbc9895aeac7ac90e74f34a5d5261828f79df35cbed41e10189d3804d43"
checksum = "e57ae87ad533d9a56427558b516d0adac283614e347abf85b0dc0cbbf0a249f3"
dependencies = [
"proc-macro2 1.0.26",
"quote 1.0.9",
@@ -1618,6 +1620,15 @@ dependencies = [
"vsock",
]
[[package]]
name = "toml"
version = "0.5.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a31142970826733df8241ef35dc040ef98c679ab14d7c3e54d827099b3acecaa"
dependencies = [
"serde",
]
[[package]]
name = "tracing"
version = "0.1.26"

View File

@@ -58,6 +58,10 @@ tracing-opentelemetry = "0.13.0"
opentelemetry = { version = "0.14.0", features = ["rt-tokio-current-thread"]}
vsock-exporter = { path = "vsock-exporter" }
# Configuration
serde = { version = "1.0.129", features = ["derive"] }
toml = "0.5.8"
[dev-dependencies]
tempfile = "3.1.0"

View File

@@ -46,6 +46,7 @@ message Route {
string device = 3;
string source = 4;
uint32 scope = 5;
IPFamily family = 6;
}
message ARPNeighbor {

View File

@@ -833,6 +833,20 @@ impl BaseContainer for LinuxContainer {
}
let linux = spec.linux.as_ref().unwrap();
if p.oci.capabilities.is_none() {
// No capabilities, inherit from container process
let process = spec
.process
.as_ref()
.ok_or_else(|| anyhow!("no process config"))?;
p.oci.capabilities = Some(
process
.capabilities
.clone()
.ok_or_else(|| anyhow!("missing process capabilities"))?,
);
}
let (pfd_log, cfd_log) = unistd::pipe().context("failed to create pipe")?;
let _ = fcntl::fcntl(pfd_log, FcntlArg::F_SETFD(FdFlag::FD_CLOEXEC))

View File

@@ -0,0 +1,21 @@
# This is an agent configuration file example.
dev_mode = true
server_addr = 'vsock://8:2048'
[endpoints]
# All endpoints are allowed
allowed = [ "CreateContainer", "StartContainer", "RemoveContainer",
"ExecProcess", "SignalProcess", "WaitProcess",
"UpdateContainer", "StatsContainer", "PauseContainer", "ResumeContainer",
"WriteStdin", "ReadStdout", "ReadStderr", "CloseStdin", "TtyWinResize",
"UpdateInterface", "UpdateRoutes", "ListInterfaces", "ListRoutes", "AddARPNeighbors",
"StartTracing", "StopTracing", "GetMetrics",
"CreateSandbox", "DestroySandbox",
"OnlineCPUMem",
"ReseedRandomDev",
"GetGuestDetails",
"MemHotplugByProbe",
"SetGuestDateTime",
"CopyFile",
"GetOOMEvent",
"AddSwap"]

View File

@@ -4,8 +4,11 @@
//
use crate::tracer;
use anyhow::{bail, ensure, Context, Result};
use serde::Deserialize;
use std::collections::HashSet;
use std::env;
use std::fs;
use std::str::FromStr;
use std::time;
use tracing::instrument;
@@ -19,6 +22,7 @@ const DEBUG_CONSOLE_VPORT_OPTION: &str = "agent.debug_console_vport";
const LOG_VPORT_OPTION: &str = "agent.log_vport";
const CONTAINER_PIPE_SIZE_OPTION: &str = "agent.container_pipe_size";
const UNIFIED_CGROUP_HIERARCHY_OPTION: &str = "agent.unified_cgroup_hierarchy";
const CONFIG_FILE: &str = "agent.config_file";
const DEFAULT_LOG_LEVEL: slog::Level = slog::Level::Info;
const DEFAULT_HOTPLUG_TIMEOUT: time::Duration = time::Duration::from_secs(3);
@@ -47,6 +51,17 @@ const ERR_INVALID_CONTAINER_PIPE_SIZE_PARAM: &str = "unable to parse container p
const ERR_INVALID_CONTAINER_PIPE_SIZE_KEY: &str = "invalid container pipe size key name";
const ERR_INVALID_CONTAINER_PIPE_NEGATIVE: &str = "container pipe size should not be negative";
#[derive(Debug, Default, Deserialize)]
pub struct EndpointsConfig {
pub allowed: Vec<String>,
}
#[derive(Debug, Default)]
pub struct AgentEndpoints {
pub allowed: HashSet<String>,
pub all_allowed: bool,
}
#[derive(Debug)]
pub struct AgentConfig {
pub debug_console: bool,
@@ -59,6 +74,36 @@ pub struct AgentConfig {
pub server_addr: String,
pub unified_cgroup_hierarchy: bool,
pub tracing: tracer::TraceType,
pub endpoints: AgentEndpoints,
}
#[derive(Debug, Deserialize)]
pub struct AgentConfigBuilder {
pub debug_console: Option<bool>,
pub dev_mode: Option<bool>,
pub log_level: Option<String>,
pub hotplug_timeout: Option<time::Duration>,
pub debug_console_vport: Option<i32>,
pub log_vport: Option<i32>,
pub container_pipe_size: Option<i32>,
pub server_addr: Option<String>,
pub unified_cgroup_hierarchy: Option<bool>,
pub tracing: Option<tracer::TraceType>,
pub endpoints: Option<EndpointsConfig>,
}
macro_rules! config_override {
($builder:ident, $config:ident, $field:ident) => {
if let Some(v) = $builder.$field {
$config.$field = v;
}
};
($builder:ident, $config:ident, $field:ident, $func: ident) => {
if let Some(v) = $builder.$field {
$config.$field = $func(&v)?;
}
};
}
// parse_cmdline_param parse commandline parameters.
@@ -91,8 +136,8 @@ macro_rules! parse_cmdline_param {
};
}
impl AgentConfig {
pub fn new() -> AgentConfig {
impl Default for AgentConfig {
fn default() -> Self {
AgentConfig {
debug_console: false,
dev_mode: false,
@@ -104,33 +149,82 @@ impl AgentConfig {
server_addr: format!("{}:{}", VSOCK_ADDR, VSOCK_PORT),
unified_cgroup_hierarchy: false,
tracing: tracer::TraceType::Disabled,
endpoints: Default::default(),
}
}
}
impl FromStr for AgentConfig {
type Err = anyhow::Error;
fn from_str(s: &str) -> Result<Self, Self::Err> {
let agent_config_builder: AgentConfigBuilder =
toml::from_str(s).map_err(anyhow::Error::new)?;
let mut agent_config: AgentConfig = Default::default();
// Overwrite default values with the configuration files ones.
config_override!(agent_config_builder, agent_config, debug_console);
config_override!(agent_config_builder, agent_config, dev_mode);
config_override!(
agent_config_builder,
agent_config,
log_level,
logrus_to_slog_level
);
config_override!(agent_config_builder, agent_config, hotplug_timeout);
config_override!(agent_config_builder, agent_config, debug_console_vport);
config_override!(agent_config_builder, agent_config, log_vport);
config_override!(agent_config_builder, agent_config, container_pipe_size);
config_override!(agent_config_builder, agent_config, server_addr);
config_override!(agent_config_builder, agent_config, unified_cgroup_hierarchy);
config_override!(agent_config_builder, agent_config, tracing);
// Populate the allowed endpoints hash set, if we got any from the config file.
if let Some(endpoints) = agent_config_builder.endpoints {
for ep in endpoints.allowed {
agent_config.endpoints.allowed.insert(ep);
}
}
Ok(agent_config)
}
}
impl AgentConfig {
#[instrument]
pub fn parse_cmdline(&mut self, file: &str) -> Result<()> {
pub fn from_cmdline(file: &str) -> Result<AgentConfig> {
let mut config: AgentConfig = Default::default();
let cmdline = fs::read_to_string(file)?;
let params: Vec<&str> = cmdline.split_ascii_whitespace().collect();
for param in params.iter() {
// If we get a configuration file path from the command line, we
// generate our config from it.
// The agent will fail to start if the configuration file is not present,
// or if it can't be parsed properly.
if param.starts_with(format!("{}=", CONFIG_FILE).as_str()) {
let config_file = get_string_value(param)?;
return AgentConfig::from_config_file(&config_file);
}
// parse cmdline flags
parse_cmdline_param!(param, DEBUG_CONSOLE_FLAG, self.debug_console);
parse_cmdline_param!(param, DEV_MODE_FLAG, self.dev_mode);
parse_cmdline_param!(param, DEBUG_CONSOLE_FLAG, config.debug_console);
parse_cmdline_param!(param, DEV_MODE_FLAG, config.dev_mode);
// Support "bare" tracing option for backwards compatibility with
// Kata 1.x.
if param == &TRACE_MODE_OPTION {
self.tracing = tracer::TraceType::Isolated;
config.tracing = tracer::TraceType::Isolated;
continue;
}
parse_cmdline_param!(param, TRACE_MODE_OPTION, self.tracing, get_trace_type);
parse_cmdline_param!(param, TRACE_MODE_OPTION, config.tracing, get_trace_type);
// parse cmdline options
parse_cmdline_param!(param, LOG_LEVEL_OPTION, self.log_level, get_log_level);
parse_cmdline_param!(param, LOG_LEVEL_OPTION, config.log_level, get_log_level);
parse_cmdline_param!(
param,
SERVER_ADDR_OPTION,
self.server_addr,
config.server_addr,
get_string_value
);
@@ -138,7 +232,7 @@ impl AgentConfig {
parse_cmdline_param!(
param,
HOTPLUG_TIMOUT_OPTION,
self.hotplug_timeout,
config.hotplug_timeout,
get_hotplug_timeout,
|hotplug_timeout: time::Duration| hotplug_timeout.as_secs() > 0
);
@@ -147,14 +241,14 @@ impl AgentConfig {
parse_cmdline_param!(
param,
DEBUG_CONSOLE_VPORT_OPTION,
self.debug_console_vport,
config.debug_console_vport,
get_vsock_port,
|port| port > 0
);
parse_cmdline_param!(
param,
LOG_VPORT_OPTION,
self.log_vport,
config.log_vport,
get_vsock_port,
|port| port > 0
);
@@ -162,34 +256,47 @@ impl AgentConfig {
parse_cmdline_param!(
param,
CONTAINER_PIPE_SIZE_OPTION,
self.container_pipe_size,
config.container_pipe_size,
get_container_pipe_size
);
parse_cmdline_param!(
param,
UNIFIED_CGROUP_HIERARCHY_OPTION,
self.unified_cgroup_hierarchy,
config.unified_cgroup_hierarchy,
get_bool_value
);
}
if let Ok(addr) = env::var(SERVER_ADDR_ENV_VAR) {
self.server_addr = addr;
config.server_addr = addr;
}
if let Ok(addr) = env::var(LOG_LEVEL_ENV_VAR) {
if let Ok(level) = logrus_to_slog_level(&addr) {
self.log_level = level;
config.log_level = level;
}
}
if let Ok(value) = env::var(TRACE_TYPE_ENV_VAR) {
if let Ok(result) = value.parse::<tracer::TraceType>() {
self.tracing = result;
config.tracing = result;
}
}
Ok(())
// We did not get a configuration file: allow all endpoints.
config.endpoints.all_allowed = true;
Ok(config)
}
#[instrument]
pub fn from_config_file(file: &str) -> Result<AgentConfig> {
let config = fs::read_to_string(file)?;
AgentConfig::from_str(&config)
}
pub fn is_allowed_endpoint(&self, ep: &str) -> bool {
self.endpoints.all_allowed || self.endpoints.allowed.contains(ep)
}
}
@@ -371,7 +478,7 @@ mod tests {
#[test]
fn test_new() {
let config = AgentConfig::new();
let config: AgentConfig = Default::default();
assert!(!config.debug_console);
assert!(!config.dev_mode);
assert_eq!(config.log_level, DEFAULT_LOG_LEVEL);
@@ -379,7 +486,7 @@ mod tests {
}
#[test]
fn test_parse_cmdline() {
fn test_from_cmdline() {
const TEST_SERVER_ADDR: &str = "vsock://-1:1024";
#[derive(Debug)]
@@ -716,15 +823,6 @@ mod tests {
let dir = tempdir().expect("failed to create tmpdir");
// First, check a missing file is handled
let file_path = dir.path().join("enoent");
let filename = file_path.to_str().expect("failed to create filename");
let mut config = AgentConfig::new();
let result = config.parse_cmdline(&filename.to_owned());
assert!(result.is_err());
// Now, test various combinations of file contents and environment
// variables.
for (i, d) in tests.iter().enumerate() {
@@ -753,22 +851,7 @@ mod tests {
vars_to_unset.push(name);
}
let mut config = AgentConfig::new();
assert!(!config.debug_console, "{}", msg);
assert!(!config.dev_mode, "{}", msg);
assert!(!config.unified_cgroup_hierarchy, "{}", msg);
assert_eq!(
config.hotplug_timeout,
time::Duration::from_secs(3),
"{}",
msg
);
assert_eq!(config.container_pipe_size, 0, "{}", msg);
assert_eq!(config.server_addr, TEST_SERVER_ADDR, "{}", msg);
assert_eq!(config.tracing, tracer::TraceType::Disabled, "{}", msg);
let result = config.parse_cmdline(filename);
assert!(result.is_ok(), "{}", msg);
let config = AgentConfig::from_cmdline(filename).expect("Failed to parse command line");
assert_eq!(d.debug_console, config.debug_console, "{}", msg);
assert_eq!(d.dev_mode, config.dev_mode, "{}", msg);
@@ -1276,4 +1359,35 @@ Caused by:
assert_result!(d.result, result, msg);
}
}
#[test]
fn test_config_builder_from_string() {
let config = AgentConfig::from_str(
r#"
dev_mode = true
server_addr = 'vsock://8:2048'
[endpoints]
allowed = ["CreateContainer", "StartContainer"]
"#,
)
.unwrap();
// Verify that the all_allowed flag is false
assert!(!config.endpoints.all_allowed);
// Verify that the override worked
assert!(config.dev_mode);
assert_eq!(config.server_addr, "vsock://8:2048");
assert_eq!(
config.endpoints.allowed,
vec!["CreateContainer".to_string(), "StartContainer".to_string()]
.iter()
.cloned()
.collect()
);
// Verify that the default values are valid
assert_eq!(config.hotplug_timeout, DEFAULT_HOTPLUG_TIMEOUT);
}
}

View File

@@ -17,10 +17,6 @@ use tokio::sync::Mutex;
#[cfg(target_arch = "s390x")]
use crate::ccw;
use crate::linux_abi::*;
use crate::mount::{
DRIVER_BLK_CCW_TYPE, DRIVER_BLK_TYPE, DRIVER_MMIO_BLK_TYPE, DRIVER_NVDIMM_TYPE,
DRIVER_SCSI_TYPE,
};
use crate::pci;
use crate::sandbox::Sandbox;
use crate::uevent::{wait_for_uevent, Uevent, UeventMatcher};
@@ -38,6 +34,19 @@ macro_rules! sl {
const VM_ROOTFS: &str = "/";
pub const DRIVER_9P_TYPE: &str = "9p";
pub const DRIVER_VIRTIOFS_TYPE: &str = "virtio-fs";
pub const DRIVER_BLK_TYPE: &str = "blk";
pub const DRIVER_BLK_CCW_TYPE: &str = "blk-ccw";
pub const DRIVER_MMIO_BLK_TYPE: &str = "mmioblk";
pub const DRIVER_SCSI_TYPE: &str = "scsi";
pub const DRIVER_NVDIMM_TYPE: &str = "nvdimm";
pub const DRIVER_EPHEMERAL_TYPE: &str = "ephemeral";
pub const DRIVER_LOCAL_TYPE: &str = "local";
pub const DRIVER_WATCHABLE_BIND_TYPE: &str = "watchable-bind";
// VFIO device to be bound to a guest kernel driver
pub const DRIVER_VFIO_GK_TYPE: &str = "vfio-gk";
#[derive(Debug)]
struct DevIndexEntry {
idx: usize,
@@ -47,11 +56,6 @@ struct DevIndexEntry {
#[derive(Debug)]
struct DevIndex(HashMap<String, DevIndexEntry>);
#[instrument]
pub fn rescan_pci_bus() -> Result<()> {
online_device(SYSFS_PCI_BUS_RESCAN_FILE)
}
#[instrument]
pub fn online_device(path: &str) -> Result<()> {
fs::write(path, "1")?;
@@ -67,7 +71,7 @@ pub fn pcipath_to_sysfs(root_bus_sysfs: &str, pcipath: &pci::Path) -> Result<Str
let mut relpath = String::new();
for i in 0..pcipath.len() {
let bdf = format!("{}:{}.0", bus, pcipath[i]);
let bdf = format!("{}:{}", bus, pcipath[i]);
relpath = format!("{}/{}", relpath, bdf);
@@ -162,8 +166,6 @@ pub async fn get_virtio_blk_pci_device_name(
let sysfs_rel_path = pcipath_to_sysfs(&root_bus_sysfs, pcipath)?;
let matcher = VirtioBlkPciMatcher::new(&sysfs_rel_path);
rescan_pci_bus()?;
let uev = wait_for_uevent(sandbox, matcher).await?;
Ok(format!("{}/{}", SYSTEM_DEV_PATH, &uev.devname))
}
@@ -255,6 +257,45 @@ pub async fn wait_for_pmem_device(sandbox: &Arc<Mutex<Sandbox>>, devpath: &str)
Ok(())
}
#[derive(Debug)]
struct PciMatcher {
devpath: String,
}
impl PciMatcher {
fn new(relpath: &str) -> Result<PciMatcher> {
let root_bus = create_pci_root_bus_path();
Ok(PciMatcher {
devpath: format!("{}{}", root_bus, relpath),
})
}
}
impl UeventMatcher for PciMatcher {
fn is_match(&self, uev: &Uevent) -> bool {
uev.devpath == self.devpath
}
}
pub async fn wait_for_pci_device(
sandbox: &Arc<Mutex<Sandbox>>,
pcipath: &pci::Path,
) -> Result<pci::Address> {
let root_bus_sysfs = format!("{}{}", SYSFS_DIR, create_pci_root_bus_path());
let sysfs_rel_path = pcipath_to_sysfs(&root_bus_sysfs, pcipath)?;
let matcher = PciMatcher::new(&sysfs_rel_path)?;
let uev = wait_for_uevent(sandbox, matcher).await?;
let addr = uev
.devpath
.rsplit('/')
.next()
.ok_or_else(|| anyhow!("Bad device path {:?} in uevent", &uev.devpath))?;
let addr = pci::Address::from_str(addr)?;
Ok(addr)
}
/// Scan SCSI bus for the given SCSI address(SCSI-Id and LUN)
#[instrument]
fn scan_scsi_bus(scsi_addr: &str) -> Result<()> {
@@ -451,6 +492,37 @@ async fn virtio_nvdimm_device_handler(
update_spec_device_list(device, spec, devidx)
}
fn split_vfio_option(opt: &str) -> Option<(&str, &str)> {
let mut tokens = opt.split('=');
let hostbdf = tokens.next()?;
let path = tokens.next()?;
if tokens.next().is_some() {
None
} else {
Some((hostbdf, path))
}
}
// device.options should have one entry for each PCI device in the VFIO group
// Each option should have the form "DDDD:BB:DD.F=<pcipath>"
// DDDD:BB:DD.F is the device's PCI address in the host
// <pcipath> is a PCI path to the device in the guest (see pci.rs)
async fn vfio_gk_device_handler(
device: &Device,
_: &mut Spec,
sandbox: &Arc<Mutex<Sandbox>>,
_: &DevIndex,
) -> Result<()> {
for opt in device.options.iter() {
let (_, pcipath) =
split_vfio_option(opt).ok_or_else(|| anyhow!("Malformed VFIO option {:?}", opt))?;
let pcipath = pci::Path::from_str(pcipath)?;
wait_for_pci_device(sandbox, &pcipath).await?;
}
Ok(())
}
impl DevIndex {
fn new(spec: &Spec) -> DevIndex {
let mut map = HashMap::new();
@@ -520,6 +592,7 @@ async fn add_device(
DRIVER_MMIO_BLK_TYPE => virtiommio_blk_device_handler(device, spec, sandbox, devidx).await,
DRIVER_NVDIMM_TYPE => virtio_nvdimm_device_handler(device, spec, sandbox, devidx).await,
DRIVER_SCSI_TYPE => virtio_scsi_device_handler(device, spec, sandbox, devidx).await,
DRIVER_VFIO_GK_TYPE => vfio_gk_device_handler(device, spec, sandbox, devidx).await,
_ => Err(anyhow!("Unknown device type {}", device.field_type)),
}
}
@@ -1068,4 +1141,14 @@ mod tests {
assert!(!matcher_b.is_match(&uev_a));
assert!(!matcher_a.is_match(&uev_b));
}
#[test]
fn test_split_vfio_option() {
assert_eq!(
split_vfio_option("0000:01:00.0=02/01"),
Some(("0000:01:00.0", "02/01"))
);
assert_eq!(split_vfio_option("0000:01:00.0=02/01=rubbish"), None);
assert_eq!(split_vfio_option("0000:01:00.0"), None);
}
}

View File

@@ -9,7 +9,6 @@
use std::fs;
pub const SYSFS_DIR: &str = "/sys";
pub const SYSFS_PCI_BUS_RESCAN_FILE: &str = "/sys/bus/pci/rescan";
#[cfg(any(
target_arch = "powerpc64",
target_arch = "s390x",

View File

@@ -77,11 +77,11 @@ mod rpc;
mod tracer;
const NAME: &str = "kata-agent";
const KERNEL_CMDLINE_FILE: &str = "/proc/cmdline";
lazy_static! {
static ref AGENT_CONFIG: Arc<RwLock<AgentConfig>> =
Arc::new(RwLock::new(config::AgentConfig::new()));
static ref AGENT_CONFIG: Arc<RwLock<AgentConfig>> = Arc::new(RwLock::new(
AgentConfig::from_cmdline("/proc/cmdline").unwrap()
));
}
#[instrument]
@@ -134,15 +134,11 @@ async fn real_main() -> std::result::Result<(), Box<dyn std::error::Error>> {
console::initialize();
lazy_static::initialize(&AGENT_CONFIG);
// support vsock log
let (rfd, wfd) = unistd::pipe2(OFlag::O_CLOEXEC)?;
let (shutdown_tx, shutdown_rx) = channel(true);
let agent_config = AGENT_CONFIG.clone();
let init_mode = unistd::getpid() == Pid::from_raw(1);
if init_mode {
// dup a new file descriptor for this temporary logger writer,
@@ -163,20 +159,15 @@ async fn real_main() -> std::result::Result<(), Box<dyn std::error::Error>> {
e
})?;
let mut config = agent_config.write().await;
config.parse_cmdline(KERNEL_CMDLINE_FILE)?;
lazy_static::initialize(&AGENT_CONFIG);
init_agent_as_init(&logger, config.unified_cgroup_hierarchy)?;
init_agent_as_init(&logger, AGENT_CONFIG.read().await.unified_cgroup_hierarchy)?;
drop(logger_async_guard);
} else {
// once parsed cmdline and set the config, release the write lock
// as soon as possible in case other thread would get read lock on
// it.
let mut config = agent_config.write().await;
config.parse_cmdline(KERNEL_CMDLINE_FILE)?;
lazy_static::initialize(&AGENT_CONFIG);
}
let config = agent_config.read().await;
let config = AGENT_CONFIG.read().await;
let log_vport = config.log_vport as u32;
let log_handle = tokio::spawn(create_logger_task(rfd, log_vport, shutdown_rx.clone()));
@@ -209,12 +200,12 @@ async fn real_main() -> std::result::Result<(), Box<dyn std::error::Error>> {
let _ = tracer::setup_tracing(NAME, &logger, &config)?;
}
let root = span!(tracing::Level::TRACE, "root-span", work_units = 2);
let root_span = span!(tracing::Level::TRACE, "root-span");
// XXX: Start the root trace transaction.
//
// XXX: Note that *ALL* spans needs to start after this point!!
let _enter = root.enter();
let span_guard = root_span.enter();
// Start the sandbox and wait for its ttRPC server to end
start_sandbox(&logger, &config, init_mode, &mut tasks, shutdown_rx.clone()).await?;
@@ -244,6 +235,10 @@ async fn real_main() -> std::result::Result<(), Box<dyn std::error::Error>> {
}
}
// force flushing spans
drop(span_guard);
drop(root_span);
if config.tracing != tracer::TraceType::Disabled {
tracer::end_tracing();
}

View File

@@ -22,6 +22,9 @@ use regex::Regex;
use crate::device::{
get_scsi_device_name, get_virtio_blk_pci_device_name, online_device, wait_for_pmem_device,
DRIVER_9P_TYPE, DRIVER_BLK_CCW_TYPE, DRIVER_BLK_TYPE, DRIVER_EPHEMERAL_TYPE, DRIVER_LOCAL_TYPE,
DRIVER_MMIO_BLK_TYPE, DRIVER_NVDIMM_TYPE, DRIVER_SCSI_TYPE, DRIVER_VIRTIOFS_TYPE,
DRIVER_WATCHABLE_BIND_TYPE,
};
use crate::linux_abi::*;
use crate::pci;
@@ -33,17 +36,6 @@ use anyhow::{anyhow, Context, Result};
use slog::Logger;
use tracing::instrument;
pub const DRIVER_9P_TYPE: &str = "9p";
pub const DRIVER_VIRTIOFS_TYPE: &str = "virtio-fs";
pub const DRIVER_BLK_TYPE: &str = "blk";
pub const DRIVER_BLK_CCW_TYPE: &str = "blk-ccw";
pub const DRIVER_MMIO_BLK_TYPE: &str = "mmioblk";
pub const DRIVER_SCSI_TYPE: &str = "scsi";
pub const DRIVER_NVDIMM_TYPE: &str = "nvdimm";
pub const DRIVER_EPHEMERAL_TYPE: &str = "ephemeral";
pub const DRIVER_LOCAL_TYPE: &str = "local";
pub const DRIVER_WATCHABLE_BIND_TYPE: &str = "watchable-bind";
pub const TYPE_ROOTFS: &str = "rootfs";
pub const MOUNT_GUEST_TAG: &str = "kataShared";

View File

@@ -312,7 +312,6 @@ impl Handle {
for route in list {
let link = self.find_link(LinkFilter::Name(&route.device)).await?;
let is_v6 = is_ipv6(route.get_gateway()) || is_ipv6(route.get_dest());
const MAIN_TABLE: u8 = packet::constants::RT_TABLE_MAIN;
const UNICAST: u8 = packet::constants::RTN_UNICAST;
@@ -334,7 +333,7 @@ impl Handle {
// `rtnetlink` offers a separate request builders for different IP versions (IP v4 and v6).
// This if branch is a bit clumsy because it does almost the same.
if is_v6 {
if route.get_family() == IPFamily::v6 {
let dest_addr = if !route.dest.is_empty() {
Ipv6Network::from_str(&route.dest)?
} else {
@@ -594,10 +593,6 @@ fn format_address(data: &[u8]) -> Result<String> {
}
}
fn is_ipv6(str: &str) -> bool {
Ipv6Addr::from_str(str).is_ok()
}
fn parse_mac_address(addr: &str) -> Result<[u8; 6]> {
let mut split = addr.splitn(6, ':');
@@ -932,16 +927,6 @@ mod tests {
assert_eq!(bytes, [0xAB, 0x0C, 0xDE, 0x12, 0x34, 0x56]);
}
#[test]
fn check_ipv6() {
assert!(is_ipv6("::1"));
assert!(is_ipv6("2001:0:3238:DFE1:63::FEFB"));
assert!(!is_ipv6(""));
assert!(!is_ipv6("127.0.0.1"));
assert!(!is_ipv6("10.10.10.10"));
}
fn clean_env_for_test_add_one_arp_neighbor(dummy_name: &str, ip: &str) {
// ip link delete dummy
Command::new("ip")

View File

@@ -9,51 +9,143 @@ use std::str::FromStr;
use anyhow::anyhow;
// The PCI spec reserves 5 bits for slot number (a.k.a. device
// number), giving slots 0..31
// The PCI spec reserves 5 bits (0..31) for slot number (a.k.a. device
// number)
const SLOT_BITS: u8 = 5;
const SLOT_MAX: u8 = (1 << SLOT_BITS) - 1;
// Represents a PCI function's slot number (a.k.a. device number),
// giving its location on a single bus
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
pub struct Slot(u8);
// The PCI spec reserves 3 bits (0..7) for function number
const FUNCTION_BITS: u8 = 3;
const FUNCTION_MAX: u8 = (1 << FUNCTION_BITS) - 1;
impl Slot {
pub fn new<T: TryInto<u8> + fmt::Display + Copy>(v: T) -> anyhow::Result<Self> {
if let Ok(v8) = v.try_into() {
if v8 <= SLOT_MAX {
return Ok(Slot(v8));
// Represents a PCI function's slot (a.k.a. device) and function
// numbers, giving its location on a single logical bus
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
pub struct SlotFn(u8);
impl SlotFn {
pub fn new<T, U>(ss: T, f: U) -> anyhow::Result<Self>
where
T: TryInto<u8> + fmt::Display + Copy,
U: TryInto<u8> + fmt::Display + Copy,
{
let ss8 = match ss.try_into() {
Ok(ss8) if ss8 <= SLOT_MAX => ss8,
_ => {
return Err(anyhow!(
"PCI slot {} should be in range [0..{:#x}]",
ss,
SLOT_MAX
));
}
}
Err(anyhow!(
"PCI slot {} should be in range [0..{:#x}]",
v,
SLOT_MAX
))
};
let f8 = match f.try_into() {
Ok(f8) if f8 <= FUNCTION_MAX => f8,
_ => {
return Err(anyhow!(
"PCI function {} should be in range [0..{:#x}]",
f,
FUNCTION_MAX
));
}
};
Ok(SlotFn(ss8 << FUNCTION_BITS | f8))
}
pub fn slot(self) -> u8 {
self.0 >> FUNCTION_BITS
}
pub fn function(self) -> u8 {
self.0 & FUNCTION_MAX
}
}
impl FromStr for Slot {
impl FromStr for SlotFn {
type Err = anyhow::Error;
fn from_str(s: &str) -> anyhow::Result<Self> {
let v = isize::from_str_radix(s, 16)?;
Slot::new(v)
let mut tokens = s.split('.').fuse();
let slot = tokens.next();
let func = tokens.next();
if slot.is_none() || tokens.next().is_some() {
return Err(anyhow!(
"PCI slot/function {} should have the format SS.F",
s
));
}
let slot = isize::from_str_radix(slot.unwrap(), 16)?;
let func = match func {
Some(func) => isize::from_str_radix(func, 16)?,
None => 0,
};
SlotFn::new(slot, func)
}
}
impl fmt::Display for Slot {
impl fmt::Display for SlotFn {
fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
write!(f, "{:02x}", self.0)
write!(f, "{:02x}.{:01x}", self.slot(), self.function())
}
}
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
pub struct Address {
domain: u16,
bus: u8,
slotfn: SlotFn,
}
impl Address {
pub fn new(domain: u16, bus: u8, slotfn: SlotFn) -> Self {
Address {
domain,
bus,
slotfn,
}
}
}
impl FromStr for Address {
type Err = anyhow::Error;
fn from_str(s: &str) -> anyhow::Result<Self> {
let mut tokens = s.split(':').fuse();
let domain = tokens.next();
let bus = tokens.next();
let slotfn = tokens.next();
if domain.is_none() || bus.is_none() || slotfn.is_none() || tokens.next().is_some() {
return Err(anyhow!(
"PCI address {} should have the format DDDD:BB:SS.F",
s
));
}
let domain = u16::from_str_radix(domain.unwrap(), 16)?;
let bus = u8::from_str_radix(bus.unwrap(), 16)?;
let slotfn = SlotFn::from_str(slotfn.unwrap())?;
Ok(Address::new(domain, bus, slotfn))
}
}
impl fmt::Display for Address {
fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
write!(f, "{:04x}:{:02x}:{}", self.domain, self.bus, self.slotfn)
}
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct Path(Vec<Slot>);
pub struct Path(Vec<SlotFn>);
impl Path {
pub fn new(slots: Vec<Slot>) -> anyhow::Result<Self> {
pub fn new(slots: Vec<SlotFn>) -> anyhow::Result<Self> {
if slots.is_empty() {
return Err(anyhow!("PCI path must have at least one element"));
}
@@ -63,7 +155,7 @@ impl Path {
// Let Path be treated as a slice of Slots
impl Deref for Path {
type Target = [Slot];
type Target = [SlotFn];
fn deref(&self) -> &Self::Target {
&self.0
@@ -85,83 +177,170 @@ impl FromStr for Path {
type Err = anyhow::Error;
fn from_str(s: &str) -> anyhow::Result<Self> {
let rslots: anyhow::Result<Vec<Slot>> = s.split('/').map(Slot::from_str).collect();
let rslots: anyhow::Result<Vec<SlotFn>> = s.split('/').map(SlotFn::from_str).collect();
Path::new(rslots?)
}
}
#[cfg(test)]
mod tests {
use crate::pci::{Path, Slot};
use super::*;
use std::str::FromStr;
#[test]
fn test_slot() {
fn test_slotfn() {
// Valid slots
let slot = Slot::new(0x00).unwrap();
assert_eq!(format!("{}", slot), "00");
let sf = SlotFn::new(0x00, 0x0).unwrap();
assert_eq!(format!("{}", sf), "00.0");
let slot = Slot::from_str("00").unwrap();
assert_eq!(format!("{}", slot), "00");
let sf = SlotFn::from_str("00.0").unwrap();
assert_eq!(format!("{}", sf), "00.0");
let slot = Slot::new(31).unwrap();
let slot2 = Slot::from_str("1f").unwrap();
assert_eq!(slot, slot2);
let sf = SlotFn::from_str("00").unwrap();
assert_eq!(format!("{}", sf), "00.0");
let sf = SlotFn::new(31, 7).unwrap();
let sf2 = SlotFn::from_str("1f.7").unwrap();
assert_eq!(sf, sf2);
// Bad slots
let slot = Slot::new(-1);
assert!(slot.is_err());
let sf = SlotFn::new(-1, 0);
assert!(sf.is_err());
let slot = Slot::new(32);
assert!(slot.is_err());
let sf = SlotFn::new(32, 0);
assert!(sf.is_err());
let slot = Slot::from_str("20");
assert!(slot.is_err());
let sf = SlotFn::from_str("20.0");
assert!(sf.is_err());
let slot = Slot::from_str("xy");
assert!(slot.is_err());
let sf = SlotFn::from_str("20");
assert!(sf.is_err());
let slot = Slot::from_str("00/");
assert!(slot.is_err());
let sf = SlotFn::from_str("xy.0");
assert!(sf.is_err());
let slot = Slot::from_str("");
assert!(slot.is_err());
let sf = SlotFn::from_str("xy");
assert!(sf.is_err());
// Bad functions
let sf = SlotFn::new(0, -1);
assert!(sf.is_err());
let sf = SlotFn::new(0, 8);
assert!(sf.is_err());
let sf = SlotFn::from_str("00.8");
assert!(sf.is_err());
let sf = SlotFn::from_str("00.x");
assert!(sf.is_err());
// Bad formats
let sf = SlotFn::from_str("");
assert!(sf.is_err());
let sf = SlotFn::from_str("00.0.0");
assert!(sf.is_err());
let sf = SlotFn::from_str("00.0/");
assert!(sf.is_err());
let sf = SlotFn::from_str("00/");
assert!(sf.is_err());
}
#[test]
fn test_address() {
// Valid addresses
let sf0_0 = SlotFn::new(0, 0).unwrap();
let sf1f_7 = SlotFn::new(0x1f, 7).unwrap();
let addr = Address::new(0, 0, sf0_0);
assert_eq!(format!("{}", addr), "0000:00:00.0");
let addr2 = Address::from_str("0000:00:00.0").unwrap();
assert_eq!(addr, addr2);
let addr = Address::new(0xffff, 0xff, sf1f_7);
assert_eq!(format!("{}", addr), "ffff:ff:1f.7");
let addr2 = Address::from_str("ffff:ff:1f.7").unwrap();
assert_eq!(addr, addr2);
// Bad addresses
let addr = Address::from_str("10000:00:00.0");
assert!(addr.is_err());
let addr = Address::from_str("0000:100:00.0");
assert!(addr.is_err());
let addr = Address::from_str("0000:00:20.0");
assert!(addr.is_err());
let addr = Address::from_str("0000:00:00.8");
assert!(addr.is_err());
let addr = Address::from_str("xyz");
assert!(addr.is_err());
let addr = Address::from_str("xyxy:xy:xy.z");
assert!(addr.is_err());
let addr = Address::from_str("0000:00:00.0:00");
assert!(addr.is_err());
}
#[test]
fn test_path() {
let slot3 = Slot::new(0x03).unwrap();
let slot4 = Slot::new(0x04).unwrap();
let slot5 = Slot::new(0x05).unwrap();
let sf3_0 = SlotFn::new(0x03, 0).unwrap();
let sf4_0 = SlotFn::new(0x04, 0).unwrap();
let sf5_0 = SlotFn::new(0x05, 0).unwrap();
let sfa_5 = SlotFn::new(0x0a, 5).unwrap();
let sfb_6 = SlotFn::new(0x0b, 6).unwrap();
let sfc_7 = SlotFn::new(0x0c, 7).unwrap();
// Valid paths
let pcipath = Path::new(vec![slot3]).unwrap();
assert_eq!(format!("{}", pcipath), "03");
let pcipath = Path::new(vec![sf3_0]).unwrap();
assert_eq!(format!("{}", pcipath), "03.0");
let pcipath2 = Path::from_str("03.0").unwrap();
assert_eq!(pcipath, pcipath2);
let pcipath2 = Path::from_str("03").unwrap();
assert_eq!(pcipath, pcipath2);
assert_eq!(pcipath.len(), 1);
assert_eq!(pcipath[0], slot3);
assert_eq!(pcipath[0], sf3_0);
let pcipath = Path::new(vec![slot3, slot4]).unwrap();
assert_eq!(format!("{}", pcipath), "03/04");
let pcipath = Path::new(vec![sf3_0, sf4_0]).unwrap();
assert_eq!(format!("{}", pcipath), "03.0/04.0");
let pcipath2 = Path::from_str("03.0/04.0").unwrap();
assert_eq!(pcipath, pcipath2);
let pcipath2 = Path::from_str("03/04").unwrap();
assert_eq!(pcipath, pcipath2);
assert_eq!(pcipath.len(), 2);
assert_eq!(pcipath[0], slot3);
assert_eq!(pcipath[1], slot4);
assert_eq!(pcipath[0], sf3_0);
assert_eq!(pcipath[1], sf4_0);
let pcipath = Path::new(vec![slot3, slot4, slot5]).unwrap();
assert_eq!(format!("{}", pcipath), "03/04/05");
let pcipath = Path::new(vec![sf3_0, sf4_0, sf5_0]).unwrap();
assert_eq!(format!("{}", pcipath), "03.0/04.0/05.0");
let pcipath2 = Path::from_str("03.0/04.0/05.0").unwrap();
assert_eq!(pcipath, pcipath2);
let pcipath2 = Path::from_str("03/04/05").unwrap();
assert_eq!(pcipath, pcipath2);
assert_eq!(pcipath.len(), 3);
assert_eq!(pcipath[0], slot3);
assert_eq!(pcipath[1], slot4);
assert_eq!(pcipath[2], slot5);
assert_eq!(pcipath[0], sf3_0);
assert_eq!(pcipath[1], sf4_0);
assert_eq!(pcipath[2], sf5_0);
let pcipath = Path::new(vec![sfa_5, sfb_6, sfc_7]).unwrap();
assert_eq!(format!("{}", pcipath), "0a.5/0b.6/0c.7");
let pcipath2 = Path::from_str("0a.5/0b.6/0c.7").unwrap();
assert_eq!(pcipath, pcipath2);
assert_eq!(pcipath.len(), 3);
assert_eq!(pcipath[0], sfa_5);
assert_eq!(pcipath[1], sfb_6);
assert_eq!(pcipath[2], sfc_7);
// Bad paths
assert!(Path::new(vec!()).is_err());
assert!(Path::from_str("20").is_err());
assert!(Path::from_str("00.8").is_err());
assert!(Path::from_str("//").is_err());
assert!(Path::from_str("xyz").is_err());
}

View File

@@ -3,7 +3,6 @@
// SPDX-License-Identifier: Apache-2.0
//
use crate::pci;
use async_trait::async_trait;
use rustjail::{pipestream::PipeStream, process::StreamType};
use tokio::io::{AsyncReadExt, AsyncWriteExt, ReadHalf};
@@ -21,7 +20,7 @@ use ttrpc::{
use anyhow::{anyhow, Context, Result};
use oci::{LinuxNamespace, Root, Spec};
use protobuf::{RepeatedField, SingularPtrField};
use protobuf::{Message, RepeatedField, SingularPtrField};
use protocols::agent::{
AddSwapRequest, AgentDetails, CopyFileRequest, GuestDetailsResponse, Interfaces, Metrics,
OOMEvent, ReadStreamResponse, Routes, StatsContainerResponse, WaitProcessResponse,
@@ -44,12 +43,13 @@ use nix::sys::stat;
use nix::unistd::{self, Pid};
use rustjail::process::ProcessOperations;
use crate::device::{add_devices, pcipath_to_sysfs, rescan_pci_bus, update_device_cgroup};
use crate::device::{add_devices, get_virtio_blk_pci_device_name, update_device_cgroup};
use crate::linux_abi::*;
use crate::metrics::get_metrics;
use crate::mount::{add_storages, baremount, remove_mounts, STORAGE_HANDLER_LIST};
use crate::namespace::{NSTYPEIPC, NSTYPEPID, NSTYPEUTS};
use crate::network::setup_guest_dns;
use crate::pci;
use crate::random;
use crate::sandbox::Sandbox;
use crate::version::{AGENT_VERSION, API_VERSION};
@@ -86,6 +86,21 @@ macro_rules! sl {
};
}
macro_rules! is_allowed {
($req:ident) => {
if !AGENT_CONFIG
.read()
.await
.is_allowed_endpoint($req.descriptor().name())
{
return Err(ttrpc_error(
ttrpc::Code::UNIMPLEMENTED,
format!("{} is blocked", $req.descriptor().name()),
));
}
};
}
#[derive(Clone, Debug)]
pub struct AgentService {
sandbox: Arc<Mutex<Sandbox>>,
@@ -134,10 +149,6 @@ impl AgentService {
info!(sl!(), "receive createcontainer, spec: {:?}", &oci);
// re-scan PCI bus
// looking for hidden devices
rescan_pci_bus().context("Could not rescan PCI bus")?;
// Some devices need some extra processing (the ones invoked with
// --device for instance), and that's what this call is doing. It
// updates the devices listed in the OCI spec, so that they actually
@@ -535,6 +546,7 @@ impl protocols::agent_ttrpc::AgentService for AgentService {
req: protocols::agent::CreateContainerRequest,
) -> ttrpc::Result<Empty> {
trace_rpc_call!(ctx, "create_container", req);
is_allowed!(req);
match self.do_create_container(req).await {
Err(e) => Err(ttrpc_error(ttrpc::Code::INTERNAL, e.to_string())),
Ok(_) => Ok(Empty::new()),
@@ -547,6 +559,7 @@ impl protocols::agent_ttrpc::AgentService for AgentService {
req: protocols::agent::StartContainerRequest,
) -> ttrpc::Result<Empty> {
trace_rpc_call!(ctx, "start_container", req);
is_allowed!(req);
match self.do_start_container(req).await {
Err(e) => Err(ttrpc_error(ttrpc::Code::INTERNAL, e.to_string())),
Ok(_) => Ok(Empty::new()),
@@ -559,6 +572,7 @@ impl protocols::agent_ttrpc::AgentService for AgentService {
req: protocols::agent::RemoveContainerRequest,
) -> ttrpc::Result<Empty> {
trace_rpc_call!(ctx, "remove_container", req);
is_allowed!(req);
match self.do_remove_container(req).await {
Err(e) => Err(ttrpc_error(ttrpc::Code::INTERNAL, e.to_string())),
Ok(_) => Ok(Empty::new()),
@@ -571,6 +585,7 @@ impl protocols::agent_ttrpc::AgentService for AgentService {
req: protocols::agent::ExecProcessRequest,
) -> ttrpc::Result<Empty> {
trace_rpc_call!(ctx, "exec_process", req);
is_allowed!(req);
match self.do_exec_process(req).await {
Err(e) => Err(ttrpc_error(ttrpc::Code::INTERNAL, e.to_string())),
Ok(_) => Ok(Empty::new()),
@@ -583,6 +598,7 @@ impl protocols::agent_ttrpc::AgentService for AgentService {
req: protocols::agent::SignalProcessRequest,
) -> ttrpc::Result<Empty> {
trace_rpc_call!(ctx, "signal_process", req);
is_allowed!(req);
match self.do_signal_process(req).await {
Err(e) => Err(ttrpc_error(ttrpc::Code::INTERNAL, e.to_string())),
Ok(_) => Ok(Empty::new()),
@@ -595,6 +611,7 @@ impl protocols::agent_ttrpc::AgentService for AgentService {
req: protocols::agent::WaitProcessRequest,
) -> ttrpc::Result<WaitProcessResponse> {
trace_rpc_call!(ctx, "wait_process", req);
is_allowed!(req);
self.do_wait_process(req)
.await
.map_err(|e| ttrpc_error(ttrpc::Code::INTERNAL, e.to_string()))
@@ -606,6 +623,7 @@ impl protocols::agent_ttrpc::AgentService for AgentService {
req: protocols::agent::UpdateContainerRequest,
) -> ttrpc::Result<Empty> {
trace_rpc_call!(ctx, "update_container", req);
is_allowed!(req);
let cid = req.container_id.clone();
let res = req.resources;
@@ -641,6 +659,7 @@ impl protocols::agent_ttrpc::AgentService for AgentService {
req: protocols::agent::StatsContainerRequest,
) -> ttrpc::Result<StatsContainerResponse> {
trace_rpc_call!(ctx, "stats_container", req);
is_allowed!(req);
let cid = req.container_id;
let s = Arc::clone(&self.sandbox);
let mut sandbox = s.lock().await;
@@ -662,6 +681,7 @@ impl protocols::agent_ttrpc::AgentService for AgentService {
req: protocols::agent::PauseContainerRequest,
) -> ttrpc::Result<protocols::empty::Empty> {
trace_rpc_call!(ctx, "pause_container", req);
is_allowed!(req);
let cid = req.get_container_id();
let s = Arc::clone(&self.sandbox);
let mut sandbox = s.lock().await;
@@ -685,6 +705,7 @@ impl protocols::agent_ttrpc::AgentService for AgentService {
req: protocols::agent::ResumeContainerRequest,
) -> ttrpc::Result<protocols::empty::Empty> {
trace_rpc_call!(ctx, "resume_container", req);
is_allowed!(req);
let cid = req.get_container_id();
let s = Arc::clone(&self.sandbox);
let mut sandbox = s.lock().await;
@@ -707,6 +728,7 @@ impl protocols::agent_ttrpc::AgentService for AgentService {
_ctx: &TtrpcContext,
req: protocols::agent::WriteStreamRequest,
) -> ttrpc::Result<WriteStreamResponse> {
is_allowed!(req);
self.do_write_stream(req)
.await
.map_err(|e| ttrpc_error(ttrpc::Code::INTERNAL, e.to_string()))
@@ -717,6 +739,7 @@ impl protocols::agent_ttrpc::AgentService for AgentService {
_ctx: &TtrpcContext,
req: protocols::agent::ReadStreamRequest,
) -> ttrpc::Result<ReadStreamResponse> {
is_allowed!(req);
self.do_read_stream(req, true)
.await
.map_err(|e| ttrpc_error(ttrpc::Code::INTERNAL, e.to_string()))
@@ -727,6 +750,7 @@ impl protocols::agent_ttrpc::AgentService for AgentService {
_ctx: &TtrpcContext,
req: protocols::agent::ReadStreamRequest,
) -> ttrpc::Result<ReadStreamResponse> {
is_allowed!(req);
self.do_read_stream(req, false)
.await
.map_err(|e| ttrpc_error(ttrpc::Code::INTERNAL, e.to_string()))
@@ -738,6 +762,7 @@ impl protocols::agent_ttrpc::AgentService for AgentService {
req: protocols::agent::CloseStdinRequest,
) -> ttrpc::Result<Empty> {
trace_rpc_call!(ctx, "close_stdin", req);
is_allowed!(req);
let cid = req.container_id.clone();
let eid = req.exec_id;
@@ -774,6 +799,7 @@ impl protocols::agent_ttrpc::AgentService for AgentService {
req: protocols::agent::TtyWinResizeRequest,
) -> ttrpc::Result<Empty> {
trace_rpc_call!(ctx, "tty_win_resize", req);
is_allowed!(req);
let cid = req.container_id.clone();
let eid = req.exec_id.clone();
@@ -814,6 +840,7 @@ impl protocols::agent_ttrpc::AgentService for AgentService {
req: protocols::agent::UpdateInterfaceRequest,
) -> ttrpc::Result<Interface> {
trace_rpc_call!(ctx, "update_interface", req);
is_allowed!(req);
let interface = req.interface.into_option().ok_or_else(|| {
ttrpc_error(
@@ -841,6 +868,7 @@ impl protocols::agent_ttrpc::AgentService for AgentService {
req: protocols::agent::UpdateRoutesRequest,
) -> ttrpc::Result<Routes> {
trace_rpc_call!(ctx, "update_routes", req);
is_allowed!(req);
let new_routes = req
.routes
@@ -881,6 +909,7 @@ impl protocols::agent_ttrpc::AgentService for AgentService {
req: protocols::agent::ListInterfacesRequest,
) -> ttrpc::Result<Interfaces> {
trace_rpc_call!(ctx, "list_interfaces", req);
is_allowed!(req);
let list = self
.sandbox
@@ -908,6 +937,7 @@ impl protocols::agent_ttrpc::AgentService for AgentService {
req: protocols::agent::ListRoutesRequest,
) -> ttrpc::Result<Routes> {
trace_rpc_call!(ctx, "list_routes", req);
is_allowed!(req);
let list = self
.sandbox
@@ -930,14 +960,16 @@ impl protocols::agent_ttrpc::AgentService for AgentService {
req: protocols::agent::StartTracingRequest,
) -> ttrpc::Result<Empty> {
info!(sl!(), "start_tracing {:?}", req);
is_allowed!(req);
Ok(Empty::new())
}
async fn stop_tracing(
&self,
_ctx: &TtrpcContext,
_req: protocols::agent::StopTracingRequest,
req: protocols::agent::StopTracingRequest,
) -> ttrpc::Result<Empty> {
is_allowed!(req);
Ok(Empty::new())
}
@@ -947,6 +979,7 @@ impl protocols::agent_ttrpc::AgentService for AgentService {
req: protocols::agent::CreateSandboxRequest,
) -> ttrpc::Result<Empty> {
trace_rpc_call!(ctx, "create_sandbox", req);
is_allowed!(req);
{
let sandbox = self.sandbox.clone();
@@ -1012,6 +1045,7 @@ impl protocols::agent_ttrpc::AgentService for AgentService {
req: protocols::agent::DestroySandboxRequest,
) -> ttrpc::Result<Empty> {
trace_rpc_call!(ctx, "destroy_sandbox", req);
is_allowed!(req);
let s = Arc::clone(&self.sandbox);
let mut sandbox = s.lock().await;
@@ -1033,6 +1067,7 @@ impl protocols::agent_ttrpc::AgentService for AgentService {
req: protocols::agent::AddARPNeighborsRequest,
) -> ttrpc::Result<Empty> {
trace_rpc_call!(ctx, "add_arp_neighbors", req);
is_allowed!(req);
let neighs = req
.neighbors
@@ -1066,6 +1101,7 @@ impl protocols::agent_ttrpc::AgentService for AgentService {
ctx: &TtrpcContext,
req: protocols::agent::OnlineCPUMemRequest,
) -> ttrpc::Result<Empty> {
is_allowed!(req);
let s = Arc::clone(&self.sandbox);
let sandbox = s.lock().await;
trace_rpc_call!(ctx, "online_cpu_mem", req);
@@ -1083,6 +1119,7 @@ impl protocols::agent_ttrpc::AgentService for AgentService {
req: protocols::agent::ReseedRandomDevRequest,
) -> ttrpc::Result<Empty> {
trace_rpc_call!(ctx, "reseed_random_dev", req);
is_allowed!(req);
random::reseed_rng(req.data.as_slice())
.map_err(|e| ttrpc_error(ttrpc::Code::INTERNAL, e.to_string()))?;
@@ -1096,6 +1133,7 @@ impl protocols::agent_ttrpc::AgentService for AgentService {
req: protocols::agent::GuestDetailsRequest,
) -> ttrpc::Result<GuestDetailsResponse> {
trace_rpc_call!(ctx, "get_guest_details", req);
is_allowed!(req);
info!(sl!(), "get guest details!");
let mut resp = GuestDetailsResponse::new();
@@ -1124,6 +1162,7 @@ impl protocols::agent_ttrpc::AgentService for AgentService {
req: protocols::agent::MemHotplugByProbeRequest,
) -> ttrpc::Result<Empty> {
trace_rpc_call!(ctx, "mem_hotplug_by_probe", req);
is_allowed!(req);
do_mem_hotplug_by_probe(&req.memHotplugProbeAddr)
.map_err(|e| ttrpc_error(ttrpc::Code::INTERNAL, e.to_string()))?;
@@ -1137,6 +1176,7 @@ impl protocols::agent_ttrpc::AgentService for AgentService {
req: protocols::agent::SetGuestDateTimeRequest,
) -> ttrpc::Result<Empty> {
trace_rpc_call!(ctx, "set_guest_date_time", req);
is_allowed!(req);
do_set_guest_date_time(req.Sec, req.Usec)
.map_err(|e| ttrpc_error(ttrpc::Code::INTERNAL, e.to_string()))?;
@@ -1150,6 +1190,7 @@ impl protocols::agent_ttrpc::AgentService for AgentService {
req: protocols::agent::CopyFileRequest,
) -> ttrpc::Result<Empty> {
trace_rpc_call!(ctx, "copy_file", req);
is_allowed!(req);
do_copy_file(&req).map_err(|e| ttrpc_error(ttrpc::Code::INTERNAL, e.to_string()))?;
@@ -1162,6 +1203,7 @@ impl protocols::agent_ttrpc::AgentService for AgentService {
req: protocols::agent::GetMetricsRequest,
) -> ttrpc::Result<Metrics> {
trace_rpc_call!(ctx, "get_metrics", req);
is_allowed!(req);
match get_metrics(&req) {
Err(e) => Err(ttrpc_error(ttrpc::Code::INTERNAL, e.to_string())),
@@ -1176,8 +1218,9 @@ impl protocols::agent_ttrpc::AgentService for AgentService {
async fn get_oom_event(
&self,
_ctx: &TtrpcContext,
_req: protocols::agent::GetOOMEventRequest,
req: protocols::agent::GetOOMEventRequest,
) -> ttrpc::Result<OOMEvent> {
is_allowed!(req);
let sandbox = self.sandbox.clone();
let s = sandbox.lock().await;
let event_rx = &s.event_rx.clone();
@@ -1203,8 +1246,11 @@ impl protocols::agent_ttrpc::AgentService for AgentService {
req: protocols::agent::AddSwapRequest,
) -> ttrpc::Result<Empty> {
trace_rpc_call!(ctx, "add_swap", req);
is_allowed!(req);
do_add_swap(&req).map_err(|e| ttrpc_error(ttrpc::Code::INTERNAL, e.to_string()))?;
do_add_swap(&self.sandbox, &req)
.await
.map_err(|e| ttrpc_error(ttrpc::Code::INTERNAL, e.to_string()))?;
Ok(Empty::new())
}
@@ -1557,43 +1603,13 @@ fn do_copy_file(req: &CopyFileRequest) -> Result<()> {
Ok(())
}
pub fn path_name_lookup<P: std::clone::Clone + AsRef<Path> + std::fmt::Debug>(
path: P,
lookup: &str,
) -> Result<(PathBuf, String)> {
for entry in fs::read_dir(path.clone())? {
let entry = entry?;
if let Some(name) = entry.path().file_name() {
if let Some(name) = name.to_str() {
if Some(0) == name.find(lookup) {
return Ok((entry.path(), name.to_string()));
}
}
}
}
Err(anyhow!("cannot get {} dir in {:?}", lookup, path))
}
fn do_add_swap(req: &AddSwapRequest) -> Result<()> {
// re-scan PCI bus
// looking for hidden devices
rescan_pci_bus().context("Could not rescan PCI bus")?;
async fn do_add_swap(sandbox: &Arc<Mutex<Sandbox>>, req: &AddSwapRequest) -> Result<()> {
let mut slots = Vec::new();
for slot in &req.PCIPath {
slots.push(pci::Slot::new(*slot as u8)?);
slots.push(pci::SlotFn::new(*slot, 0)?);
}
let pcipath = pci::Path::new(slots)?;
let root_bus_sysfs = format!("{}{}", SYSFS_DIR, create_pci_root_bus_path());
let sysfs_rel_path = format!(
"{}{}",
root_bus_sysfs,
pcipath_to_sysfs(&root_bus_sysfs, &pcipath)?
);
let (mut virtio_path, _) = path_name_lookup(sysfs_rel_path, "virtio")?;
virtio_path.push("block");
let (_, dev_name) = path_name_lookup(virtio_path, "vd")?;
let dev_name = format!("/dev/{}", dev_name);
let dev_name = get_virtio_blk_pci_device_name(sandbox, &pcipath).await?;
let c_str = CString::new(dev_name)?;
let ret = unsafe { libc::swapon(c_str.as_ptr() as *const c_char, 0) };

View File

@@ -7,6 +7,7 @@ use crate::config::AgentConfig;
use anyhow::Result;
use opentelemetry::sdk::propagation::TraceContextPropagator;
use opentelemetry::{global, sdk::trace::Config, trace::TracerProvider};
use serde::Deserialize;
use slog::{info, o, Logger};
use std::collections::HashMap;
use std::error::Error;
@@ -17,7 +18,7 @@ use tracing_subscriber::layer::SubscriberExt;
use tracing_subscriber::Registry;
use ttrpc::r#async::TtrpcContext;
#[derive(Debug, PartialEq)]
#[derive(Debug, Deserialize, PartialEq)]
pub enum TraceType {
Disabled,
Isolated,

View File

@@ -111,10 +111,13 @@ pub async fn wait_for_uevent(
sandbox: &Arc<Mutex<Sandbox>>,
matcher: impl UeventMatcher,
) -> Result<Uevent> {
let logprefix = format!("Waiting for {:?}", &matcher);
info!(sl!(), "{}", logprefix);
let mut sb = sandbox.lock().await;
for uev in sb.uevent_map.values() {
if matcher.is_match(uev) {
info!(sl!(), "Device {:?} found in device map", uev);
info!(sl!(), "{}: found {:?} in uevent map", logprefix, &uev);
return Ok(uev.clone());
}
}
@@ -129,7 +132,8 @@ pub async fn wait_for_uevent(
sb.uevent_watchers.push(Some((Box::new(matcher), tx)));
drop(sb); // unlock
info!(sl!(), "Waiting on channel for uevent notification\n");
info!(sl!(), "{}: waiting on channel", logprefix);
let hotplug_timeout = AGENT_CONFIG.read().await.hotplug_timeout;
let uev = match tokio::time::timeout(hotplug_timeout, rx).await {
@@ -146,6 +150,7 @@ pub async fn wait_for_uevent(
}
};
info!(sl!(), "{}: found {:?} on channel", logprefix, &uev);
Ok(uev)
}

View File

@@ -8,7 +8,7 @@ coverage.html
/config/*.toml
config-generated.go
/containerd-shim-kata-v2
/cmd/containerd-shim-v2/monitor_address
/pkg/containerd-shim-v2/monitor_address
/data/kata-collect-data.sh
/kata-monitor
/kata-netmon

View File

@@ -611,7 +611,7 @@ go-test: $(GENERATED_FILES)
go test -v -mod=vendor ./...
check-go-static:
$(QUIET_CHECK)../../ci/go-no-os-exit.sh ./cli
$(QUIET_CHECK)../../ci/go-no-os-exit.sh ./cmd/kata-runtime
$(QUIET_CHECK)../../ci/go-no-os-exit.sh ./virtcontainers
coverage:

View File

@@ -7,6 +7,7 @@ package main
import (
"flag"
"fmt"
"net/http"
"os"
goruntime "runtime"
@@ -25,7 +26,7 @@ var logLevel = flag.String("log-level", "info", "Log level of logrus(trace/debug
var (
appName = "kata-monitor"
// version is the kata monitor version.
version = "0.1.0"
version = "0.2.0"
GitCommit = "unknown-commit"
)
@@ -54,6 +55,15 @@ func printVersion(ver versionInfo) {
}
}
type endpoint struct {
handler http.HandlerFunc
path string
desc string
}
// global variable endpoints contains all available endpoints
var endpoints []endpoint
func main() {
ver := versionInfo{
AppName: appName,
@@ -97,19 +107,62 @@ func main() {
panic(err)
}
// setup handlers, now only metrics is supported
// setup handlers, currently only metrics are supported
m := http.NewServeMux()
m.Handle("/metrics", http.HandlerFunc(km.ProcessMetricsRequest))
m.Handle("/sandboxes", http.HandlerFunc(km.ListSandboxes))
m.Handle("/agent-url", http.HandlerFunc(km.GetAgentURL))
endpoints = []endpoint{
{
path: "/metrics",
desc: "Get metrics from sandboxes.",
handler: km.ProcessMetricsRequest,
},
{
path: "/sandboxes",
desc: "List all Kata Containers sandboxes.",
handler: km.ListSandboxes,
},
{
path: "/agent-url",
desc: "Get sandbox agent URL.",
handler: km.GetAgentURL,
},
{
path: "/debug/vars",
desc: "Golang pprof `/debug/vars` endpoint for kata runtime shim process.",
handler: km.ExpvarHandler,
},
{
path: "/debug/pprof/",
desc: "Golang pprof `/debug/pprof/` endpoint for kata runtime shim process.",
handler: km.PprofIndex,
},
{
path: "/debug/pprof/cmdline",
desc: "Golang pprof `/debug/pprof/cmdline` endpoint for kata runtime shim process.",
handler: km.PprofCmdline,
},
{
path: "/debug/pprof/profile",
desc: "Golang pprof `/debug/pprof/profile` endpoint for kata runtime shim process.",
handler: km.PprofProfile,
},
{
path: "/debug/pprof/symbol",
desc: "Golang pprof `/debug/pprof/symbol` endpoint for kata runtime shim process.",
handler: km.PprofSymbol,
},
{
path: "/debug/pprof/trace",
desc: "Golang pprof `/debug/pprof/trace` endpoint for kata runtime shim process.",
handler: km.PprofTrace,
},
}
// for debug shim process
m.Handle("/debug/vars", http.HandlerFunc(km.ExpvarHandler))
m.Handle("/debug/pprof/", http.HandlerFunc(km.PprofIndex))
m.Handle("/debug/pprof/cmdline", http.HandlerFunc(km.PprofCmdline))
m.Handle("/debug/pprof/profile", http.HandlerFunc(km.PprofProfile))
m.Handle("/debug/pprof/symbol", http.HandlerFunc(km.PprofSymbol))
m.Handle("/debug/pprof/trace", http.HandlerFunc(km.PprofTrace))
for _, endpoint := range endpoints {
m.Handle(endpoint.path, endpoint.handler)
}
// root index page to show all endpoints in kata-monitor
m.Handle("/", http.HandlerFunc(indexPage))
// listening on the server
svr := &http.Server{
@@ -119,6 +172,23 @@ func main() {
logrus.Fatal(svr.ListenAndServe())
}
func indexPage(w http.ResponseWriter, r *http.Request) {
w.Write([]byte("Available HTTP endpoints:\n"))
spacing := 0
for _, endpoint := range endpoints {
if len(endpoint.path) > spacing {
spacing = len(endpoint.path)
}
}
spacing = spacing + 3
formattedString := fmt.Sprintf("%%-%ds: %%s\n", spacing)
for _, endpoint := range endpoints {
w.Write([]byte(fmt.Sprintf(formattedString, endpoint.path, endpoint.desc)))
}
}
// initLog setup logger
func initLog() {
kataMonitorLog := logrus.WithFields(logrus.Fields{

View File

@@ -28,9 +28,9 @@ func setupCheckHostIsVMContainerCapable(assert *assert.Assertions, cpuInfoFile s
func TestCCCheckCLIFunction(t *testing.T) {
var cpuData []testCPUData
moduleData := []testModuleData{
{filepath.Join(sysModuleDir, "kvm"), true, ""},
{filepath.Join(sysModuleDir, "vhost"), true, ""},
{filepath.Join(sysModuleDir, "vhost_net"), true, ""},
{filepath.Join(sysModuleDir, "kvm"), "", true},
{filepath.Join(sysModuleDir, "vhost"), "", true},
{filepath.Join(sysModuleDir, "vhost_net"), "", true},
}
genericCheckCLIFunction(t, cpuData, moduleData)

View File

@@ -39,7 +39,8 @@ func testSetCPUTypeGeneric(t *testing.T) {
_, config, err := makeRuntimeConfig(tmpdir)
assert.NoError(err)
setCPUtype(config.HypervisorType)
err = setCPUtype(config.HypervisorType)
assert.NoError(err)
assert.Equal(archRequiredCPUFlags, savedArchRequiredCPUFlags)
assert.Equal(archRequiredCPUAttribs, savedArchRequiredCPUAttribs)

View File

@@ -47,8 +47,8 @@ func TestCCCheckCLIFunction(t *testing.T) {
}
moduleData := []testModuleData{
{filepath.Join(sysModuleDir, "kvm"), false, "Y"},
{filepath.Join(sysModuleDir, "kvm_hv"), false, "Y"},
{filepath.Join(sysModuleDir, "kvm"), "", true},
{filepath.Join(sysModuleDir, "kvm_hv"), "", true},
}
genericCheckCLIFunction(t, cpuData, moduleData)
@@ -58,51 +58,51 @@ func TestArchKernelParamHandler(t *testing.T) {
assert := assert.New(t)
type testData struct {
onVMM bool
expectIgnore bool
fields logrus.Fields
msg string
onVMM bool
expectIgnore bool
}
data := []testData{
{true, false, logrus.Fields{}, ""},
{false, false, logrus.Fields{}, ""},
{logrus.Fields{}, "", true, false},
{logrus.Fields{}, "", false, false},
{
false,
false,
logrus.Fields{
// wrong type
"parameter": 123,
},
"foo",
false,
false,
},
{
false,
false,
logrus.Fields{
"parameter": "unrestricted_guest",
},
"",
false,
false,
},
{
true,
true,
logrus.Fields{
"parameter": "unrestricted_guest",
},
"",
true,
true,
},
{
false,
true,
logrus.Fields{
"parameter": "nested",
},
"",
false,
true,
},
}

View File

@@ -47,7 +47,7 @@ func TestCCCheckCLIFunction(t *testing.T) {
}
moduleData := []testModuleData{
{filepath.Join(sysModuleDir, "kvm"), false, "Y"},
{filepath.Join(sysModuleDir, "kvm"), "", true},
}
genericCheckCLIFunction(t, cpuData, moduleData)
@@ -57,51 +57,51 @@ func TestArchKernelParamHandler(t *testing.T) {
assert := assert.New(t)
type testData struct {
onVMM bool
expectIgnore bool
fields logrus.Fields
msg string
onVMM bool
expectIgnore bool
}
data := []testData{
{true, false, logrus.Fields{}, ""},
{false, false, logrus.Fields{}, ""},
{logrus.Fields{}, "", true, false},
{logrus.Fields{}, "", false, false},
{
false,
false,
logrus.Fields{
// wrong type
"parameter": 123,
},
"foo",
false,
false,
},
{
false,
false,
logrus.Fields{
"parameter": "unrestricted_guest",
},
"",
false,
false,
},
{
true,
true,
logrus.Fields{
"parameter": "unrestricted_guest",
},
"",
true,
true,
},
{
false,
true,
logrus.Fields{
"parameter": "nested",
},
"",
false,
true,
},
}

View File

@@ -170,7 +170,7 @@ func newNetmon(params netmonParams) (*netmon, error) {
func (n *netmon) cleanup() {
os.RemoveAll(n.storagePath)
n.netHandler.Delete()
n.netHandler.Close()
close(n.linkDoneCh)
close(n.rtDoneCh)
}

View File

@@ -348,7 +348,7 @@ func TestScanNetwork(t *testing.T) {
handler, err := netlink.NewHandle(netlinkFamily)
assert.Nil(t, err)
assert.NotNil(t, handler)
defer handler.Delete()
defer handler.Close()
idx, expected := testCreateDummyNetwork(t, handler)
@@ -480,7 +480,7 @@ func TestActionsCLI(t *testing.T) {
handler, err := netlink.NewHandle(netlinkFamily)
assert.Nil(t, err)
assert.NotNil(t, handler)
defer handler.Delete()
defer handler.Close()
n.netHandler = handler
@@ -569,7 +569,7 @@ func TestHandleRTMNewLink(t *testing.T) {
handler, err := netlink.NewHandle(netlinkFamily)
assert.Nil(t, err)
assert.NotNil(t, handler)
defer handler.Delete()
defer handler.Close()
n.netHandler = handler
err = n.handleRTMNewLink(ev)
assert.NotNil(t, err)
@@ -690,7 +690,7 @@ func TestHandleRouteEvent(t *testing.T) {
handler, err := netlink.NewHandle(netlinkFamily)
assert.Nil(t, err)
assert.NotNil(t, handler)
defer handler.Delete()
defer handler.Close()
n.netHandler = handler

View File

@@ -8,13 +8,14 @@ require (
github.com/blang/semver/v4 v4.0.0
github.com/containerd/cgroups v1.0.1
github.com/containerd/console v1.0.2
github.com/containerd/containerd v1.5.4
github.com/containerd/containerd v1.5.7
github.com/containerd/cri-containerd v1.11.1-0.20190125013620-4dd6735020f5
github.com/containerd/fifo v1.0.0
github.com/containerd/ttrpc v1.0.2
github.com/containerd/typeurl v1.0.2
github.com/containernetworking/plugins v0.9.1
github.com/cri-o/cri-o v1.0.0-rc2.0.20170928185954-3394b3b2d6af
github.com/fsnotify/fsnotify v1.4.9
github.com/go-ini/ini v1.28.2
github.com/go-openapi/errors v0.18.0
github.com/go-openapi/runtime v0.18.0
@@ -26,7 +27,7 @@ require (
github.com/intel-go/cpuid v0.0.0-20210602155658-5747e5cec0d9
github.com/kata-containers/govmm v0.0.0-20210909155007-1b60b536f3c7
github.com/mdlayher/vsock v0.0.0-20191108225356-d9c65923cb8f
github.com/opencontainers/runc v1.0.1
github.com/opencontainers/runc v1.0.2
github.com/opencontainers/runtime-spec v1.0.3-0.20210326190908-1c3f411f0417
github.com/opencontainers/selinux v1.8.2
github.com/pkg/errors v0.9.1
@@ -39,9 +40,8 @@ require (
github.com/smartystreets/goconvey v1.6.4 // indirect
github.com/stretchr/testify v1.6.1
github.com/urfave/cli v1.22.2
github.com/vishvananda/netlink v1.1.1-0.20201029203352-d40f9887b852
github.com/vishvananda/netlink v1.1.1-0.20210924202909-187053b97868
github.com/vishvananda/netns v0.0.0-20200728191858-db3c7e526aae
github.com/xeipuuv/gojsonpointer v0.0.0-20190905194746-02993c407bfb
go.opentelemetry.io/otel v0.15.0
go.opentelemetry.io/otel/exporters/trace/jaeger v0.15.0
go.opentelemetry.io/otel/sdk v0.15.0
@@ -54,7 +54,7 @@ require (
)
replace (
github.com/containerd/containerd => github.com/containerd/containerd v1.5.4
github.com/containerd/containerd => github.com/containerd/containerd v1.5.7
github.com/opencontainers/runc => github.com/opencontainers/runc v1.0.1
github.com/uber-go/atomic => go.uber.org/atomic v1.5.1
google.golang.org/genproto => google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8

View File

@@ -53,8 +53,8 @@ github.com/Microsoft/go-winio v0.4.17 h1:iT12IBVClFevaf8PuVyi3UmZOVh4OqnaLxDTW2O
github.com/Microsoft/go-winio v0.4.17/go.mod h1:JPGBdM1cNvN/6ISo+n8V5iA4v8pBzdOpzfwIujj1a84=
github.com/Microsoft/hcsshim v0.8.6/go.mod h1:Op3hHsoHPAvb6lceZHDtd9OkTew38wNoXnJs8iY7rUg=
github.com/Microsoft/hcsshim v0.8.16/go.mod h1:o5/SZqmR7x9JNKsW3pu+nqHm0MF8vbA+VxGOoXdC600=
github.com/Microsoft/hcsshim v0.8.18 h1:cYnKADiM1869gvBpos3YCteeT6sZLB48lB5dmMMs8Tg=
github.com/Microsoft/hcsshim v0.8.18/go.mod h1:+w2gRZ5ReXQhFOrvSQeNfhrYB/dg3oDwTOcER2fw4I4=
github.com/Microsoft/hcsshim v0.8.21 h1:btRfUDThBE5IKcvI8O8jOiIkujUsAMBSRsYDYmEi6oM=
github.com/Microsoft/hcsshim v0.8.21/go.mod h1:+w2gRZ5ReXQhFOrvSQeNfhrYB/dg3oDwTOcER2fw4I4=
github.com/NYTimes/gziphandler v0.0.0-20170623195520-56545f4a5d46/go.mod h1:3wb06e3pkSAbeQ52E9H9iFoQsEEwGN64994WTCIhntQ=
github.com/OneOfOne/xxhash v1.2.2/go.mod h1:HSdplMjZKSmBqAxg5vPj2TmRDmfkzw+cTzAElWljhcU=
github.com/PuerkitoBio/purell v1.1.0/go.mod h1:c11w/QuzBsJSee3cPx9rAFu61PvFxuPbtSwDGJws/X0=
@@ -109,8 +109,8 @@ github.com/containerd/cgroups v1.0.1/go.mod h1:0SJrPIenamHDcZhEcJMNBB85rHcUsw4f2
github.com/containerd/console v1.0.1/go.mod h1:XUsP6YE/mKtz6bxc+I8UiKKTP04qjQL4qcS3XoQ5xkw=
github.com/containerd/console v1.0.2 h1:Pi6D+aZXM+oUw1czuKgH5IJ+y0jhYcwBJfx5/Ghn9dE=
github.com/containerd/console v1.0.2/go.mod h1:ytZPjGgY2oeTkAONYafi2kSj0aYggsf8acV1PGKCbzQ=
github.com/containerd/containerd v1.5.4 h1:uPF0og3ByFzDnaStfiQj3fVGTEtaSNyU+bW7GR/nqGA=
github.com/containerd/containerd v1.5.4/go.mod h1:sx18RgvW6ABJ4iYUw7Q5x7bgFOAB9B6G7+yO0XBc4zw=
github.com/containerd/containerd v1.5.7 h1:rQyoYtj4KddB3bxG6SAqd4+08gePNyJjRqvOIfV3rkM=
github.com/containerd/containerd v1.5.7/go.mod h1:gyvv6+ugqY25TiXxcZC3L5yOeYgEw0QMhscqVp1AR9c=
github.com/containerd/continuity v0.0.0-20210208174643-50096c924a4e/go.mod h1:EXlVlkqNba9rJe3j7w3Xa924itAMLgZH4UD/Q4PExuQ=
github.com/containerd/continuity v0.1.0 h1:UFRRY5JemiAhPZrr/uE0n8fMTLcZsUvySPr1+D7pgr8=
github.com/containerd/continuity v0.1.0/go.mod h1:ICJu0PwR54nI0yPEnJ6jcS+J7CZAUXrLh8lPo2knzsM=
@@ -343,7 +343,7 @@ github.com/hashicorp/hcl v1.0.0/go.mod h1:E5yfLk+7swimpb2L/Alb/PJmXilQ/rhwaUYs4T
github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU=
github.com/ianlancetaylor/demangle v0.0.0-20181102032728-5e5cf60278f6/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc=
github.com/imdario/mergo v0.3.5/go.mod h1:2EnlNZ0deacrJVfApfmtdGgDfMuh/nq6Ok1EcJh5FfA=
github.com/imdario/mergo v0.3.11/go.mod h1:jmQim1M+e3UYxmgPu/WyfjB3N3VflVyUjjjwH0dnCYA=
github.com/imdario/mergo v0.3.12/go.mod h1:jmQim1M+e3UYxmgPu/WyfjB3N3VflVyUjjjwH0dnCYA=
github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANytuPF1OarO4DADm73n8=
github.com/intel-go/cpuid v0.0.0-20210602155658-5747e5cec0d9 h1:x9HFDMDCsaxTvC4X3o0ZN6mw99dT/wYnTItGwhBRmg0=
github.com/intel-go/cpuid v0.0.0-20210602155658-5747e5cec0d9/go.mod h1:RmeVYf9XrPRbRc3XIx0gLYA8qOFvNoPOfaEZduRlEp4=
@@ -434,7 +434,6 @@ github.com/opencontainers/runtime-spec v1.0.2/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/
github.com/opencontainers/runtime-spec v1.0.3-0.20200929063507-e6143ca7d51d/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0=
github.com/opencontainers/runtime-spec v1.0.3-0.20210326190908-1c3f411f0417 h1:3snG66yBm59tKhhSPQrQ/0bCrv1LQbKt40LnUPiUxdc=
github.com/opencontainers/runtime-spec v1.0.3-0.20210326190908-1c3f411f0417/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0=
github.com/opencontainers/selinux v1.8.0/go.mod h1:RScLhm78qiWa2gbVCcGkC7tCGdgk3ogry1nUQF8Evvo=
github.com/opencontainers/selinux v1.8.2 h1:c4ca10UMgRcvZ6h0K4HtS15UaVSBEaE+iln2LVpAuGc=
github.com/opencontainers/selinux v1.8.2/go.mod h1:MUIHuUEvKB1wtJjQdOyYRgOnLD2xAPP8dBsCoU0KuF8=
github.com/pborman/uuid v1.2.0 h1:J7Q5mO4ysT1dv8hyrUGHb9+ooztCXu1D8MY8DZYsu3g=
@@ -532,14 +531,12 @@ github.com/urfave/cli v1.22.1/go.mod h1:Gos4lmkARVdJ6EkW0WaNv/tZAAMe9V7XWyB60NtX
github.com/urfave/cli v1.22.2 h1:gsqYFH8bb9ekPA12kRo0hfjngWQjkJPlN9R0N78BoUo=
github.com/urfave/cli v1.22.2/go.mod h1:Gos4lmkARVdJ6EkW0WaNv/tZAAMe9V7XWyB60NtXRu0=
github.com/vishvananda/netlink v1.1.0/go.mod h1:cTgwzPIzzgDAYoQrMm0EdrjRUBkTqKYppBueQtXaqoE=
github.com/vishvananda/netlink v1.1.1-0.20201029203352-d40f9887b852 h1:cPXZWzzG0NllBLdjWoD1nDfaqu98YMv+OneaKc8sPOA=
github.com/vishvananda/netlink v1.1.1-0.20201029203352-d40f9887b852/go.mod h1:twkDnbuQxJYemMlGd4JFIcuhgX83tXhKS2B/PRMpOho=
github.com/vishvananda/netlink v1.1.1-0.20210924202909-187053b97868 h1:FFT5/l13iFxg+2dzyoiXZPmMtoclsyBKnUqTEzYpDXw=
github.com/vishvananda/netlink v1.1.1-0.20210924202909-187053b97868/go.mod h1:twkDnbuQxJYemMlGd4JFIcuhgX83tXhKS2B/PRMpOho=
github.com/vishvananda/netns v0.0.0-20191106174202-0a2b9b5464df/go.mod h1:JP3t17pCcGlemwknint6hfoeCVQrEMVwxRLRjXpq+BU=
github.com/vishvananda/netns v0.0.0-20200728191858-db3c7e526aae h1:4hwBBUfQCFe3Cym0ZtKyq7L16eZUtYKs+BaHDN6mAns=
github.com/vishvananda/netns v0.0.0-20200728191858-db3c7e526aae/go.mod h1:DD4vA1DwXk04H54A1oHXtwZmA0grkVMdPxx/VGLCah0=
github.com/willf/bitset v1.1.11/go.mod h1:83CECat5yLh5zVOf4P1ErAgKA5UDvKtgyUABdr3+MjI=
github.com/xeipuuv/gojsonpointer v0.0.0-20190905194746-02993c407bfb h1:zGWFAtiMcyryUHoUjUJX0/lt1H2+i2Ka2n+D3DImSNo=
github.com/xeipuuv/gojsonpointer v0.0.0-20190905194746-02993c407bfb/go.mod h1:N2zxlSyiKSe5eX1tZViRH5QA0qijqEDrYZiPEAiq3wU=
github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2/go.mod h1:UETIi67q53MR2AWcXfiuqkDkRtnGDLqkBTpCHuJHxtU=
github.com/xordataexchange/crypt v0.0.3-0.20170626215501-b2862e3d0a77/go.mod h1:aYKd//L2LvnjZzWKhF00oedf4jCCReLcmhLdhm1A27Q=
github.com/yuin/goldmark v1.1.25/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=

View File

@@ -10,9 +10,6 @@ package containerdshim
import (
"context"
"fmt"
"github.com/kata-containers/kata-containers/src/runtime/pkg/utils"
"github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/rootless"
"math/rand"
"os"
"os/user"
"path"
@@ -24,6 +21,8 @@ import (
"github.com/containerd/containerd/mount"
taskAPI "github.com/containerd/containerd/runtime/v2/task"
"github.com/containerd/typeurl"
"github.com/kata-containers/kata-containers/src/runtime/pkg/utils"
"github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/rootless"
"github.com/opencontainers/runtime-spec/specs-go"
"github.com/pkg/errors"
@@ -40,6 +39,13 @@ import (
"github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/oci"
)
type startManagementServerFunc func(s *service, ctx context.Context, ociSpec *specs.Spec)
var defaultStartManagementServerFunc startManagementServerFunc = func(s *service, ctx context.Context, ociSpec *specs.Spec) {
go s.startManagementServer(ctx, ociSpec)
shimLog.Info("management server started")
}
func create(ctx context.Context, s *service, r *taskAPI.CreateTaskRequest) (*container, error) {
rootFs := vc.RootFs{}
if len(r.Rootfs) == 1 {
@@ -132,7 +138,9 @@ func create(ctx context.Context, s *service, r *taskAPI.CreateTaskRequest) (*con
}
s.hpid = uint32(pid)
go s.startManagementServer(ctx, ociSpec)
if defaultStartManagementServerFunc != nil {
defaultStartManagementServerFunc(s, ctx, ociSpec)
}
case vc.PodContainer:
span, ctx := katatrace.Trace(s.ctx, shimLog, "create", shimTracingTags)
@@ -274,13 +282,15 @@ func doMount(mounts []*containerd_types.Mount, rootfs string) error {
}
func configureNonRootHypervisor(runtimeConfig *oci.RuntimeConfig) error {
userName, err := createVmmUser()
userName, err := utils.CreateVmmUser()
if err != nil {
return err
}
defer func() {
if err != nil {
removeVmmUser(userName)
if err2 := utils.RemoveVmmUser(userName); err2 != nil {
shimLog.WithField("userName", userName).WithError(err).Warn("failed to remove user")
}
}
}()
@@ -336,48 +346,3 @@ func configureNonRootHypervisor(runtimeConfig *oci.RuntimeConfig) error {
}
return fmt.Errorf("failed to get the gid of /dev/kvm")
}
func createVmmUser() (string, error) {
var (
err error
userName string
)
useraddPath, err := utils.FirstValidExecutable([]string{"/usr/sbin/useradd", "/sbin/useradd", "/bin/useradd"})
if err != nil {
return "", err
}
nologinPath, err := utils.FirstValidExecutable([]string{"/usr/sbin/nologin", "/sbin/nologin", "/bin/nologin"})
if err != nil {
return "", err
}
// Add retries to mitigate temporary errors and race conditions. For example, the user already exists
// or another instance of the runtime is also creating a user.
maxAttempt := 5
for i := 0; i < maxAttempt; i++ {
userName = fmt.Sprintf("kata-%v", rand.Intn(100000))
_, err = utils.RunCommand([]string{useraddPath, "-M", "-s", nologinPath, userName, "-c", "\"Kata Containers temporary hypervisor user\""})
if err == nil {
return userName, nil
}
shimLog.WithField("attempt", i+1).WithField("username", userName).
WithError(err).Warn("failed to add user, will try again")
}
return "", fmt.Errorf("could not create VMM user: %v", err)
}
func removeVmmUser(user string) {
userdelPath, err := utils.FirstValidExecutable([]string{"/usr/sbin/userdel", "/sbin/userdel", "/bin/userdel"})
if err != nil {
shimLog.WithField("username", user).WithError(err).Warn("failed to remove user")
}
// Add retries to mitigate temporary errors and race conditions.
for i := 0; i < 5; i++ {
_, err := utils.RunCommand([]string{userdelPath, "-f", user})
if err == nil {
return
}
shimLog.WithField("username", user).WithField("attempt", i+1).WithError(err).Warn("failed to remove user")
}
}

View File

@@ -183,8 +183,13 @@ func (s *service) mountPprofHandle(m *http.ServeMux, ociSpec *specs.Spec) {
m.Handle("/debug/pprof/trace", http.HandlerFunc(pprof.Trace))
}
// SocketAddress returns the address of the abstract domain socket for communicating with the
// GetSandboxesStoragePath returns the storage path where sandboxes info are stored
func GetSandboxesStoragePath() string {
return "/run/vc/sbs"
}
// SocketAddress returns the address of the unix domain socket for communicating with the
// shim management endpoint
func SocketAddress(id string) string {
return fmt.Sprintf("unix://%s", filepath.Join(string(filepath.Separator), "run", "vc", "sbs", id, "shim-monitor.sock"))
return fmt.Sprintf("unix://%s", filepath.Join(string(filepath.Separator), GetSandboxesStoragePath(), id, "shim-monitor.sock"))
}

View File

@@ -112,13 +112,5 @@ func getAddress(ctx context.Context, bundlePath, address, id string) (string, er
}
func noNeedForOutput(detach bool, tty bool) bool {
if !detach {
return false
}
if !tty {
return false
}
return true
return detach && tty
}

View File

@@ -16,6 +16,9 @@ import (
"path"
"path/filepath"
"strings"
"testing"
"github.com/stretchr/testify/assert"
"github.com/opencontainers/runtime-spec/specs-go"
@@ -122,6 +125,10 @@ func init() {
}
tc = ktu.NewTestConstraint(false)
// disable shim management server.
// all tests are not using this, so just set it to nil
defaultStartManagementServerFunc = nil
}
// createOCIConfig creates an OCI configuration (spec) file in
@@ -326,3 +333,39 @@ func writeOCIConfigFile(spec specs.Spec, configPath string) error {
return ioutil.WriteFile(configPath, bytes, testFileMode)
}
func TestNoNeedForOutput(t *testing.T) {
assert := assert.New(t)
testCases := []struct {
detach bool
tty bool
result bool
}{
{
detach: true,
tty: true,
result: true,
},
{
detach: false,
tty: true,
result: false,
},
{
detach: true,
tty: false,
result: false,
},
{
detach: false,
tty: false,
result: false,
},
}
for i := range testCases {
result := noNeedForOutput(testCases[i].detach, testCases[i].tty)
assert.Equal(testCases[i].result, result)
}
}

View File

@@ -8,15 +8,12 @@ package katamonitor
import (
"context"
"encoding/json"
"fmt"
"net"
"net/url"
"strings"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
"github.com/xeipuuv/gojsonpointer"
"google.golang.org/grpc"
pb "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
@@ -117,14 +114,12 @@ func parseEndpoint(endpoint string) (string, string, error) {
}
}
// getSandboxes get kata sandbox from the container engine.
// this will be called only after monitor start.
func (km *KataMonitor) getSandboxes() (map[string]struct{}, error) {
sandboxMap := make(map[string]struct{})
// getSandboxes gets ready sandboxes from the container engine and returns an updated sandboxMap
func (km *KataMonitor) getSandboxes(sandboxMap map[string]bool) (map[string]bool, error) {
newMap := make(map[string]bool)
runtimeClient, runtimeConn, err := getRuntimeClient(km.runtimeEndpoint)
if err != nil {
return sandboxMap, err
return newMap, err
}
defer closeConnection(runtimeConn)
@@ -140,61 +135,26 @@ func (km *KataMonitor) getSandboxes() (map[string]struct{}, error) {
monitorLog.Debugf("ListPodSandboxRequest: %v", request)
r, err := runtimeClient.ListPodSandbox(context.Background(), request)
if err != nil {
return sandboxMap, err
return newMap, err
}
monitorLog.Debugf("ListPodSandboxResponse: %v", r)
for _, pod := range r.Items {
request := &pb.PodSandboxStatusRequest{
PodSandboxId: pod.Id,
Verbose: true,
}
r, err := runtimeClient.PodSandboxStatus(context.Background(), request)
if err != nil {
return sandboxMap, err
}
lowRuntime := ""
var res map[string]interface{}
if err := json.Unmarshal([]byte(r.Info["info"]), &res); err != nil {
monitorLog.WithError(err).WithField("pod", r).Error("failed to Unmarshal pod info")
continue
} else {
monitorLog.WithField("pod info", res).Debug("")
// get low level container runtime
// containerd stores the pod runtime in "/runtimeType" while CRI-O stores it the
// io.kubernetes.cri-o.RuntimeHandler annotation: check for both.
keys := []string{"/runtimeType", "/runtimeSpec/annotations/io.kubernetes.cri-o.RuntimeHandler"}
for _, key := range keys {
pointer, _ := gojsonpointer.NewJsonPointer(key)
rt, _, _ := pointer.Get(res)
if rt != nil {
if str, ok := rt.(string); ok {
lowRuntime = str
break
}
}
}
}
// If lowRuntime is empty something changed in containerd/CRI-O or we are dealing with an unknown container engine.
// Safest options is to add the POD in the list: we will be able to connect to the shim to retrieve the actual info
// only for kata PODs.
if lowRuntime == "" {
monitorLog.WithField("pod", r).Info("unable to retrieve the runtime type")
sandboxMap[pod.Id] = struct{}{}
// Use the cached data if available
if isKata, ok := sandboxMap[pod.Id]; ok {
newMap[pod.Id] = isKata
continue
}
// Check if a directory associated with the POD ID exist on the kata fs:
// if so we know that the POD is a kata one.
newMap[pod.Id] = checkSandboxFSExists(pod.Id)
monitorLog.WithFields(logrus.Fields{
"low runtime": lowRuntime,
"id": pod.Id,
"is kata": newMap[pod.Id],
"pod": pod,
}).Debug("")
if strings.Contains(lowRuntime, "kata") {
sandboxMap[pod.Id] = struct{}{}
}
}
return sandboxMap, nil
return newMap, nil
}

View File

@@ -140,8 +140,8 @@ func encodeMetricFamily(mfs []*dto.MetricFamily, encoder expfmt.Encoder) error {
// aggregateSandboxMetrics will get metrics from one sandbox and do some process
func (km *KataMonitor) aggregateSandboxMetrics(encoder expfmt.Encoder) error {
// get all sandboxes from cache
sandboxes := km.sandboxCache.getAllSandboxes()
// get all kata sandboxes from cache
sandboxes := km.sandboxCache.getKataSandboxes()
// save running kata pods as a metrics.
runningShimCount.Set(float64(len(sandboxes)))
@@ -159,7 +159,7 @@ func (km *KataMonitor) aggregateSandboxMetrics(encoder expfmt.Encoder) error {
monitorLog.WithField("sandbox_count", len(sandboxes)).Debugf("sandboxes count")
// get metrics from sandbox's shim
for sandboxID := range sandboxes {
for _, sandboxID := range sandboxes {
wg.Add(1)
go func(sandboxID string, results chan<- []*dto.MetricFamily) {
sandboxMetrics, err := getParsedMetrics(sandboxID)

View File

@@ -9,19 +9,22 @@ import (
"errors"
"fmt"
"net/http"
"os"
"strings"
"sync"
"time"
"github.com/fsnotify/fsnotify"
"github.com/sirupsen/logrus"
)
var monitorLog = logrus.WithField("source", "kata-monitor")
const (
RuntimeContainerd = "containerd"
RuntimeCRIO = "cri-o"
podCacheRefreshTimeSeconds = 15
RuntimeContainerd = "containerd"
RuntimeCRIO = "cri-o"
fsMonitorRetryDelaySeconds = 60
podCacheRefreshDelaySeconds = 5
)
// SetLogger sets the logger for katamonitor package.
@@ -50,7 +53,7 @@ func NewKataMonitor(runtimeEndpoint string) (*KataMonitor, error) {
runtimeEndpoint: runtimeEndpoint,
sandboxCache: &sandboxCache{
Mutex: &sync.Mutex{},
sandboxes: make(map[string]struct{}),
sandboxes: make(map[string]bool),
},
}
@@ -64,15 +67,74 @@ func NewKataMonitor(runtimeEndpoint string) (*KataMonitor, error) {
// startPodCacheUpdater will boot a thread to manage sandbox cache
func (km *KataMonitor) startPodCacheUpdater() {
sbsWatcher, err := fsnotify.NewWatcher()
if err != nil {
monitorLog.WithError(err).Fatal("failed to setup sandbox events watcher")
os.Exit(1)
}
defer sbsWatcher.Close()
for {
time.Sleep(podCacheRefreshTimeSeconds * time.Second)
sandboxes, err := km.getSandboxes()
err = sbsWatcher.Add(getSandboxFS())
if err != nil {
monitorLog.WithError(err).Error("failed to get sandboxes")
// if there are no kata pods (yet), the kata /run/vc directory may not be there: retry later
monitorLog.WithError(err).Warnf("cannot monitor %s, retry in %d sec.", getSandboxFS(), fsMonitorRetryDelaySeconds)
time.Sleep(fsMonitorRetryDelaySeconds * time.Second)
continue
}
monitorLog.WithField("count", len(sandboxes)).Debug("update sandboxes list")
km.sandboxCache.set(sandboxes)
monitorLog.Debugf("started fs monitoring @%s", getSandboxFS())
break
}
// we refresh the pod cache once if we get multiple add/delete pod events in a short time (< podCacheRefreshDelaySeconds)
cacheUpdateTimer := time.NewTimer(podCacheRefreshDelaySeconds * time.Second)
cacheUpdateTimerWasSet := false
for {
select {
case event, ok := <-sbsWatcher.Events:
if !ok {
monitorLog.WithError(err).Fatal("cannot watch sandboxes fs")
os.Exit(1)
}
monitorLog.WithField("event", event).Debug("got sandbox event")
switch event.Op {
case fsnotify.Create:
splitPath := strings.Split(event.Name, string(os.PathSeparator))
id := splitPath[len(splitPath)-1]
if !km.sandboxCache.putIfNotExists(id, true) {
monitorLog.WithField("pod", id).Warn(
"CREATE event but pod already present in the sandbox cache")
}
monitorLog.WithField("pod", id).Info("sandbox cache: added pod")
case fsnotify.Remove:
splitPath := strings.Split(event.Name, string(os.PathSeparator))
id := splitPath[len(splitPath)-1]
if !km.sandboxCache.deleteIfExists(id) {
monitorLog.WithField("pod", id).Warn(
"REMOVE event but pod was missing from the sandbox cache")
}
monitorLog.WithField("pod", id).Info("sandbox cache: removed pod")
default:
monitorLog.WithField("event", event).Warn("got unexpected fs event")
}
// While we process fs events directly to update the sandbox cache we need to sync with the
// container engine to ensure we are on sync with it: we can get out of sync in environments
// where kata workloads can be started by other processes than the container engine.
cacheUpdateTimerWasSet = cacheUpdateTimer.Reset(podCacheRefreshDelaySeconds * time.Second)
monitorLog.WithField("was reset", cacheUpdateTimerWasSet).Debugf(
"cache update timer fires in %d secs", podCacheRefreshDelaySeconds)
case <-cacheUpdateTimer.C:
sandboxes, err := km.getSandboxes(km.sandboxCache.getAllSandboxes())
if err != nil {
monitorLog.WithError(err).Error("failed to get sandboxes")
continue
}
monitorLog.WithField("count", len(sandboxes)).Info("synced sandbox cache with the container engine")
monitorLog.WithField("sandboxes", sandboxes).Debug("dump sandbox cache")
km.sandboxCache.set(sandboxes)
}
}
}
@@ -95,20 +157,8 @@ func (km *KataMonitor) GetAgentURL(w http.ResponseWriter, r *http.Request) {
// ListSandboxes list all sandboxes running in Kata
func (km *KataMonitor) ListSandboxes(w http.ResponseWriter, r *http.Request) {
sandboxes := km.getSandboxList()
sandboxes := km.sandboxCache.getKataSandboxes()
for _, s := range sandboxes {
w.Write([]byte(fmt.Sprintf("%s\n", s)))
}
}
func (km *KataMonitor) getSandboxList() []string {
sn := km.sandboxCache.getAllSandboxes()
result := make([]string, len(sn))
i := 0
for k := range sn {
result[i] = k
i++
}
return result
}

View File

@@ -11,15 +11,28 @@ import (
type sandboxCache struct {
*sync.Mutex
sandboxes map[string]struct{}
// the bool value tracks if the pod is a kata one (true) or not (false)
sandboxes map[string]bool
}
func (sc *sandboxCache) getAllSandboxes() map[string]struct{} {
func (sc *sandboxCache) getAllSandboxes() map[string]bool {
sc.Lock()
defer sc.Unlock()
return sc.sandboxes
}
func (sc *sandboxCache) getKataSandboxes() []string {
sc.Lock()
defer sc.Unlock()
var katasandboxes []string
for id, isKata := range sc.sandboxes {
if isKata {
katasandboxes = append(katasandboxes, id)
}
}
return katasandboxes
}
func (sc *sandboxCache) deleteIfExists(id string) bool {
sc.Lock()
defer sc.Unlock()
@@ -33,12 +46,12 @@ func (sc *sandboxCache) deleteIfExists(id string) bool {
return false
}
func (sc *sandboxCache) putIfNotExists(id string) bool {
func (sc *sandboxCache) putIfNotExists(id string, value bool) bool {
sc.Lock()
defer sc.Unlock()
if _, found := sc.sandboxes[id]; !found {
sc.sandboxes[id] = struct{}{}
sc.sandboxes[id] = value
return true
}
@@ -46,7 +59,7 @@ func (sc *sandboxCache) putIfNotExists(id string) bool {
return false
}
func (sc *sandboxCache) set(sandboxes map[string]struct{}) {
func (sc *sandboxCache) set(sandboxes map[string]bool) {
sc.Lock()
defer sc.Unlock()
sc.sandboxes = sandboxes

View File

@@ -16,10 +16,10 @@ func TestSandboxCache(t *testing.T) {
assert := assert.New(t)
sc := &sandboxCache{
Mutex: &sync.Mutex{},
sandboxes: make(map[string]struct{}),
sandboxes: make(map[string]bool),
}
scMap := map[string]struct{}{"111": {}}
scMap := map[string]bool{"111": true}
sc.set(scMap)
@@ -28,12 +28,12 @@ func TestSandboxCache(t *testing.T) {
// put new item
id := "new-id"
b := sc.putIfNotExists(id)
b := sc.putIfNotExists(id, true)
assert.Equal(true, b)
assert.Equal(2, len(scMap))
// put key that alreay exists
b = sc.putIfNotExists(id)
b = sc.putIfNotExists(id, true)
assert.Equal(false, b)
b = sc.deleteIfExists(id)

View File

@@ -10,6 +10,8 @@ import (
"io/ioutil"
"net"
"net/http"
"os"
"path/filepath"
"time"
cdshim "github.com/containerd/containerd/runtime/v2/shim"
@@ -37,6 +39,17 @@ func getSandboxIDFromReq(r *http.Request) (string, error) {
return "", fmt.Errorf("sandbox not found in %+v", r.URL.Query())
}
func getSandboxFS() string {
return shim.GetSandboxesStoragePath()
}
func checkSandboxFSExists(sandboxID string) bool {
sbsPath := filepath.Join(string(filepath.Separator), getSandboxFS(), sandboxID)
_, err := os.Stat(sbsPath)
return !os.IsNotExist(err)
}
// BuildShimClient builds and returns an http client for communicating with the provided sandbox
func BuildShimClient(sandboxID string, timeout time.Duration) (*http.Client, error) {
return buildUnixSocketClient(shim.SocketAddress(sandboxID), timeout)

View File

@@ -7,17 +7,22 @@ package utils
import (
"fmt"
"math/rand"
"net/http"
"os"
"os/exec"
"path/filepath"
"strings"
"github.com/sirupsen/logrus"
)
const (
acceptEncodingHeader = "Accept-Encoding"
)
var utilsLog = logrus.WithFields(logrus.Fields{"source": "pkg/utils"})
// GzipAccepted returns whether the client will accept gzip-encoded content.
func GzipAccepted(header http.Header) bool {
a := header.Get(acceptEncodingHeader)
@@ -99,3 +104,52 @@ func FirstValidExecutable(paths []string) (string, error) {
}
return "", fmt.Errorf("all the executables are invalid")
}
// CreateVmmUser create a temp user for running Kata Containers under rootless mode.
func CreateVmmUser() (string, error) {
var (
err error
userName string
)
useraddPath, err := FirstValidExecutable([]string{"/usr/sbin/useradd", "/sbin/useradd", "/bin/useradd"})
if err != nil {
return "", err
}
nologinPath, err := FirstValidExecutable([]string{"/usr/sbin/nologin", "/sbin/nologin", "/bin/nologin"})
if err != nil {
return "", err
}
// Add retries to mitigate temporary errors and race conditions. For example, the user already exists
// or another instance of the runtime is also creating a user.
maxAttempt := 5
for i := 0; i < maxAttempt; i++ {
userName = fmt.Sprintf("kata-%v", rand.Intn(100000))
_, err = RunCommand([]string{useraddPath, "-M", "-s", nologinPath, userName, "-c", "\"Kata Containers temporary hypervisor user\""})
if err == nil {
return userName, nil
}
utilsLog.WithField("attempt", i+1).WithField("username", userName).
WithError(err).Warn("failed to add user, will try again")
}
return "", fmt.Errorf("could not create VMM user: %v", err)
}
// RemoveVmmUser delete user created by CreateVmmUser.
func RemoveVmmUser(user string) error {
userdelPath, err := FirstValidExecutable([]string{"/usr/sbin/userdel", "/sbin/userdel", "/bin/userdel"})
if err != nil {
utilsLog.WithField("username", user).WithError(err).Warn("failed to remove user")
return err
}
// Add retries to mitigate temporary errors and race conditions.
for i := 0; i < 5; i++ {
if _, err = RunCommand([]string{userdelPath, "-f", user}); err == nil {
return nil
}
utilsLog.WithField("username", user).WithField("attempt", i+1).WithError(err).Warn("failed to remove user")
}
return err
}

View File

@@ -59,7 +59,7 @@ var (
// ErrVmcomputeOperationInvalidState is an error encountered when the compute system is not in a valid state for the requested operation
ErrVmcomputeOperationInvalidState = hcs.ErrVmcomputeOperationInvalidState
// ErrProcNotFound is an error encountered when the the process cannot be found
// ErrProcNotFound is an error encountered when a procedure look up fails.
ErrProcNotFound = hcs.ErrProcNotFound
// ErrVmcomputeOperationAccessIsDenied is an error which can be encountered when enumerating compute systems in RS1/RS2
@@ -159,7 +159,7 @@ func (e *ProcessError) Error() string {
// IsNotExist checks if an error is caused by the Container or Process not existing.
// Note: Currently, ErrElementNotFound can mean that a Process has either
// already exited, or does not exist. Both IsAlreadyStopped and IsNotExist
// will currently return true when the error is ErrElementNotFound or ErrProcNotFound.
// will currently return true when the error is ErrElementNotFound.
func IsNotExist(err error) bool {
if _, ok := err.(EndpointNotFoundError); ok {
return true
@@ -192,7 +192,7 @@ func IsTimeout(err error) bool {
// a Container or Process being already stopped.
// Note: Currently, ErrElementNotFound can mean that a Process has either
// already exited, or does not exist. Both IsAlreadyStopped and IsNotExist
// will currently return true when the error is ErrElementNotFound or ErrProcNotFound.
// will currently return true when the error is ErrElementNotFound.
func IsAlreadyStopped(err error) bool {
return hcs.IsAlreadyStopped(getInnerError(err))
}

View File

@@ -7,6 +7,9 @@ import (
// HNSEndpoint represents a network endpoint in HNS
type HNSEndpoint = hns.HNSEndpoint
// HNSEndpointStats represent the stats for an networkendpoint in HNS
type HNSEndpointStats = hns.EndpointStats
// Namespace represents a Compartment.
type Namespace = hns.Namespace
@@ -108,3 +111,8 @@ func GetHNSEndpointByID(endpointID string) (*HNSEndpoint, error) {
func GetHNSEndpointByName(endpointName string) (*HNSEndpoint, error) {
return hns.GetHNSEndpointByName(endpointName)
}
// GetHNSEndpointStats gets the endpoint stats by ID
func GetHNSEndpointStats(endpointName string) (*HNSEndpointStats, error) {
return hns.GetHNSEndpointStats(endpointName)
}

View File

@@ -60,7 +60,7 @@ var (
// ErrVmcomputeOperationInvalidState is an error encountered when the compute system is not in a valid state for the requested operation
ErrVmcomputeOperationInvalidState = syscall.Errno(0xc0370105)
// ErrProcNotFound is an error encountered when the the process cannot be found
// ErrProcNotFound is an error encountered when a procedure look up fails.
ErrProcNotFound = syscall.Errno(0x7f)
// ErrVmcomputeOperationAccessIsDenied is an error which can be encountered when enumerating compute systems in RS1/RS2
@@ -242,12 +242,11 @@ func makeProcessError(process *Process, op string, err error, events []ErrorEven
// IsNotExist checks if an error is caused by the Container or Process not existing.
// Note: Currently, ErrElementNotFound can mean that a Process has either
// already exited, or does not exist. Both IsAlreadyStopped and IsNotExist
// will currently return true when the error is ErrElementNotFound or ErrProcNotFound.
// will currently return true when the error is ErrElementNotFound.
func IsNotExist(err error) bool {
err = getInnerError(err)
return err == ErrComputeSystemDoesNotExist ||
err == ErrElementNotFound ||
err == ErrProcNotFound
err == ErrElementNotFound
}
// IsAlreadyClosed checks if an error is caused by the Container or Process having been
@@ -278,12 +277,11 @@ func IsTimeout(err error) bool {
// a Container or Process being already stopped.
// Note: Currently, ErrElementNotFound can mean that a Process has either
// already exited, or does not exist. Both IsAlreadyStopped and IsNotExist
// will currently return true when the error is ErrElementNotFound or ErrProcNotFound.
// will currently return true when the error is ErrElementNotFound.
func IsAlreadyStopped(err error) bool {
err = getInnerError(err)
return err == ErrVmcomputeAlreadyStopped ||
err == ErrElementNotFound ||
err == ErrProcNotFound
err == ErrElementNotFound
}
// IsNotSupported returns a boolean indicating whether the error is caused by

View File

@@ -30,6 +30,7 @@ type HNSEndpoint struct {
EnableLowMetric bool `json:",omitempty"`
Namespace *Namespace `json:",omitempty"`
EncapOverhead uint16 `json:",omitempty"`
SharedContainers []string `json:",omitempty"`
}
//SystemType represents the type of the system on which actions are done
@@ -57,6 +58,18 @@ type EndpointResquestResponse struct {
Error string
}
// EndpointStats is the object that has stats for a given endpoint
type EndpointStats struct {
BytesReceived uint64 `json:"BytesReceived"`
BytesSent uint64 `json:"BytesSent"`
DroppedPacketsIncoming uint64 `json:"DroppedPacketsIncoming"`
DroppedPacketsOutgoing uint64 `json:"DroppedPacketsOutgoing"`
EndpointID string `json:"EndpointId"`
InstanceID string `json:"InstanceId"`
PacketsReceived uint64 `json:"PacketsReceived"`
PacketsSent uint64 `json:"PacketsSent"`
}
// HNSEndpointRequest makes a HNS call to modify/query a network endpoint
func HNSEndpointRequest(method, path, request string) (*HNSEndpoint, error) {
endpoint := &HNSEndpoint{}
@@ -79,11 +92,27 @@ func HNSListEndpointRequest() ([]HNSEndpoint, error) {
return endpoint, nil
}
// hnsEndpointStatsRequest makes a HNS call to query the stats for a given endpoint ID
func hnsEndpointStatsRequest(id string) (*EndpointStats, error) {
var stats EndpointStats
err := hnsCall("GET", "/endpointstats/"+id, "", &stats)
if err != nil {
return nil, err
}
return &stats, nil
}
// GetHNSEndpointByID get the Endpoint by ID
func GetHNSEndpointByID(endpointID string) (*HNSEndpoint, error) {
return HNSEndpointRequest("GET", endpointID, "")
}
// GetHNSEndpointStats get the stats for a n Endpoint by ID
func GetHNSEndpointStats(endpointID string) (*EndpointStats, error) {
return hnsEndpointStatsRequest(endpointID)
}
// GetHNSEndpointByName gets the endpoint filtered by Name
func GetHNSEndpointByName(endpointName string) (*HNSEndpoint, error) {
hnsResponse, err := HNSListEndpointRequest()

View File

@@ -23,7 +23,7 @@ var (
Package = "github.com/containerd/containerd"
// Version holds the complete version number. Filled in at linking time.
Version = "1.5.4+unknown"
Version = "1.5.7+unknown"
// Revision is filled with the VCS (e.g. git) revision being used to build
// the program at linking time.

View File

@@ -0,0 +1,12 @@
root = true
[*.go]
indent_style = tab
indent_size = 4
insert_final_newline = true
[*.{yml,yaml}]
indent_style = space
indent_size = 2
insert_final_newline = true
trim_trailing_whitespace = true

View File

@@ -0,0 +1 @@
go.sum linguist-generated

View File

@@ -0,0 +1,6 @@
# Setup a Global .gitignore for OS and editor generated files:
# https://help.github.com/articles/ignoring-files
# git config --global core.excludesfile ~/.gitignore_global
.vagrant
*.sublime-project

View File

@@ -0,0 +1,36 @@
sudo: false
language: go
go:
- "stable"
- "1.11.x"
- "1.10.x"
- "1.9.x"
matrix:
include:
- go: "stable"
env: GOLINT=true
allow_failures:
- go: tip
fast_finish: true
before_install:
- if [ ! -z "${GOLINT}" ]; then go get -u golang.org/x/lint/golint; fi
script:
- go test --race ./...
after_script:
- test -z "$(gofmt -s -l -w . | tee /dev/stderr)"
- if [ ! -z "${GOLINT}" ]; then echo running golint; golint --set_exit_status ./...; else echo skipping golint; fi
- go vet ./...
os:
- linux
- osx
- windows
notifications:
email: false

View File

@@ -0,0 +1,52 @@
# Names should be added to this file as
# Name or Organization <email address>
# The email address is not required for organizations.
# You can update this list using the following command:
#
# $ git shortlog -se | awk '{print $2 " " $3 " " $4}'
# Please keep the list sorted.
Aaron L <aaron@bettercoder.net>
Adrien Bustany <adrien@bustany.org>
Amit Krishnan <amit.krishnan@oracle.com>
Anmol Sethi <me@anmol.io>
Bjørn Erik Pedersen <bjorn.erik.pedersen@gmail.com>
Bruno Bigras <bigras.bruno@gmail.com>
Caleb Spare <cespare@gmail.com>
Case Nelson <case@teammating.com>
Chris Howey <chris@howey.me> <howeyc@gmail.com>
Christoffer Buchholz <christoffer.buchholz@gmail.com>
Daniel Wagner-Hall <dawagner@gmail.com>
Dave Cheney <dave@cheney.net>
Evan Phoenix <evan@fallingsnow.net>
Francisco Souza <f@souza.cc>
Hari haran <hariharan.uno@gmail.com>
John C Barstow
Kelvin Fo <vmirage@gmail.com>
Ken-ichirou MATSUZAWA <chamas@h4.dion.ne.jp>
Matt Layher <mdlayher@gmail.com>
Nathan Youngman <git@nathany.com>
Nickolai Zeldovich <nickolai@csail.mit.edu>
Patrick <patrick@dropbox.com>
Paul Hammond <paul@paulhammond.org>
Pawel Knap <pawelknap88@gmail.com>
Pieter Droogendijk <pieter@binky.org.uk>
Pursuit92 <JoshChase@techpursuit.net>
Riku Voipio <riku.voipio@linaro.org>
Rob Figueiredo <robfig@gmail.com>
Rodrigo Chiossi <rodrigochiossi@gmail.com>
Slawek Ligus <root@ooz.ie>
Soge Zhang <zhssoge@gmail.com>
Tiffany Jernigan <tiffany.jernigan@intel.com>
Tilak Sharma <tilaks@google.com>
Tom Payne <twpayne@gmail.com>
Travis Cline <travis.cline@gmail.com>
Tudor Golubenco <tudor.g@gmail.com>
Vahe Khachikyan <vahe@live.ca>
Yukang <moorekang@gmail.com>
bronze1man <bronze1man@gmail.com>
debrando <denis.brandolini@gmail.com>
henrikedwards <henrik.edwards@gmail.com>
铁哥 <guotie.9@gmail.com>

View File

@@ -0,0 +1,317 @@
# Changelog
## v1.4.7 / 2018-01-09
* BSD/macOS: Fix possible deadlock on closing the watcher on kqueue (thanks @nhooyr and @glycerine)
* Tests: Fix missing verb on format string (thanks @rchiossi)
* Linux: Fix deadlock in Remove (thanks @aarondl)
* Linux: Watch.Add improvements (avoid race, fix consistency, reduce garbage) (thanks @twpayne)
* Docs: Moved FAQ into the README (thanks @vahe)
* Linux: Properly handle inotify's IN_Q_OVERFLOW event (thanks @zeldovich)
* Docs: replace references to OS X with macOS
## v1.4.2 / 2016-10-10
* Linux: use InotifyInit1 with IN_CLOEXEC to stop leaking a file descriptor to a child process when using fork/exec [#178](https://github.com/fsnotify/fsnotify/pull/178) (thanks @pattyshack)
## v1.4.1 / 2016-10-04
* Fix flaky inotify stress test on Linux [#177](https://github.com/fsnotify/fsnotify/pull/177) (thanks @pattyshack)
## v1.4.0 / 2016-10-01
* add a String() method to Event.Op [#165](https://github.com/fsnotify/fsnotify/pull/165) (thanks @oozie)
## v1.3.1 / 2016-06-28
* Windows: fix for double backslash when watching the root of a drive [#151](https://github.com/fsnotify/fsnotify/issues/151) (thanks @brunoqc)
## v1.3.0 / 2016-04-19
* Support linux/arm64 by [patching](https://go-review.googlesource.com/#/c/21971/) x/sys/unix and switching to to it from syscall (thanks @suihkulokki) [#135](https://github.com/fsnotify/fsnotify/pull/135)
## v1.2.10 / 2016-03-02
* Fix golint errors in windows.go [#121](https://github.com/fsnotify/fsnotify/pull/121) (thanks @tiffanyfj)
## v1.2.9 / 2016-01-13
kqueue: Fix logic for CREATE after REMOVE [#111](https://github.com/fsnotify/fsnotify/pull/111) (thanks @bep)
## v1.2.8 / 2015-12-17
* kqueue: fix race condition in Close [#105](https://github.com/fsnotify/fsnotify/pull/105) (thanks @djui for reporting the issue and @ppknap for writing a failing test)
* inotify: fix race in test
* enable race detection for continuous integration (Linux, Mac, Windows)
## v1.2.5 / 2015-10-17
* inotify: use epoll_create1 for arm64 support (requires Linux 2.6.27 or later) [#100](https://github.com/fsnotify/fsnotify/pull/100) (thanks @suihkulokki)
* inotify: fix path leaks [#73](https://github.com/fsnotify/fsnotify/pull/73) (thanks @chamaken)
* kqueue: watch for rename events on subdirectories [#83](https://github.com/fsnotify/fsnotify/pull/83) (thanks @guotie)
* kqueue: avoid infinite loops from symlinks cycles [#101](https://github.com/fsnotify/fsnotify/pull/101) (thanks @illicitonion)
## v1.2.1 / 2015-10-14
* kqueue: don't watch named pipes [#98](https://github.com/fsnotify/fsnotify/pull/98) (thanks @evanphx)
## v1.2.0 / 2015-02-08
* inotify: use epoll to wake up readEvents [#66](https://github.com/fsnotify/fsnotify/pull/66) (thanks @PieterD)
* inotify: closing watcher should now always shut down goroutine [#63](https://github.com/fsnotify/fsnotify/pull/63) (thanks @PieterD)
* kqueue: close kqueue after removing watches, fixes [#59](https://github.com/fsnotify/fsnotify/issues/59)
## v1.1.1 / 2015-02-05
* inotify: Retry read on EINTR [#61](https://github.com/fsnotify/fsnotify/issues/61) (thanks @PieterD)
## v1.1.0 / 2014-12-12
* kqueue: rework internals [#43](https://github.com/fsnotify/fsnotify/pull/43)
* add low-level functions
* only need to store flags on directories
* less mutexes [#13](https://github.com/fsnotify/fsnotify/issues/13)
* done can be an unbuffered channel
* remove calls to os.NewSyscallError
* More efficient string concatenation for Event.String() [#52](https://github.com/fsnotify/fsnotify/pull/52) (thanks @mdlayher)
* kqueue: fix regression in rework causing subdirectories to be watched [#48](https://github.com/fsnotify/fsnotify/issues/48)
* kqueue: cleanup internal watch before sending remove event [#51](https://github.com/fsnotify/fsnotify/issues/51)
## v1.0.4 / 2014-09-07
* kqueue: add dragonfly to the build tags.
* Rename source code files, rearrange code so exported APIs are at the top.
* Add done channel to example code. [#37](https://github.com/fsnotify/fsnotify/pull/37) (thanks @chenyukang)
## v1.0.3 / 2014-08-19
* [Fix] Windows MOVED_TO now translates to Create like on BSD and Linux. [#36](https://github.com/fsnotify/fsnotify/issues/36)
## v1.0.2 / 2014-08-17
* [Fix] Missing create events on macOS. [#14](https://github.com/fsnotify/fsnotify/issues/14) (thanks @zhsso)
* [Fix] Make ./path and path equivalent. (thanks @zhsso)
## v1.0.0 / 2014-08-15
* [API] Remove AddWatch on Windows, use Add.
* Improve documentation for exported identifiers. [#30](https://github.com/fsnotify/fsnotify/issues/30)
* Minor updates based on feedback from golint.
## dev / 2014-07-09
* Moved to [github.com/fsnotify/fsnotify](https://github.com/fsnotify/fsnotify).
* Use os.NewSyscallError instead of returning errno (thanks @hariharan-uno)
## dev / 2014-07-04
* kqueue: fix incorrect mutex used in Close()
* Update example to demonstrate usage of Op.
## dev / 2014-06-28
* [API] Don't set the Write Op for attribute notifications [#4](https://github.com/fsnotify/fsnotify/issues/4)
* Fix for String() method on Event (thanks Alex Brainman)
* Don't build on Plan 9 or Solaris (thanks @4ad)
## dev / 2014-06-21
* Events channel of type Event rather than *Event.
* [internal] use syscall constants directly for inotify and kqueue.
* [internal] kqueue: rename events to kevents and fileEvent to event.
## dev / 2014-06-19
* Go 1.3+ required on Windows (uses syscall.ERROR_MORE_DATA internally).
* [internal] remove cookie from Event struct (unused).
* [internal] Event struct has the same definition across every OS.
* [internal] remove internal watch and removeWatch methods.
## dev / 2014-06-12
* [API] Renamed Watch() to Add() and RemoveWatch() to Remove().
* [API] Pluralized channel names: Events and Errors.
* [API] Renamed FileEvent struct to Event.
* [API] Op constants replace methods like IsCreate().
## dev / 2014-06-12
* Fix data race on kevent buffer (thanks @tilaks) [#98](https://github.com/howeyc/fsnotify/pull/98)
## dev / 2014-05-23
* [API] Remove current implementation of WatchFlags.
* current implementation doesn't take advantage of OS for efficiency
* provides little benefit over filtering events as they are received, but has extra bookkeeping and mutexes
* no tests for the current implementation
* not fully implemented on Windows [#93](https://github.com/howeyc/fsnotify/issues/93#issuecomment-39285195)
## v0.9.3 / 2014-12-31
* kqueue: cleanup internal watch before sending remove event [#51](https://github.com/fsnotify/fsnotify/issues/51)
## v0.9.2 / 2014-08-17
* [Backport] Fix missing create events on macOS. [#14](https://github.com/fsnotify/fsnotify/issues/14) (thanks @zhsso)
## v0.9.1 / 2014-06-12
* Fix data race on kevent buffer (thanks @tilaks) [#98](https://github.com/howeyc/fsnotify/pull/98)
## v0.9.0 / 2014-01-17
* IsAttrib() for events that only concern a file's metadata [#79][] (thanks @abustany)
* [Fix] kqueue: fix deadlock [#77][] (thanks @cespare)
* [NOTICE] Development has moved to `code.google.com/p/go.exp/fsnotify` in preparation for inclusion in the Go standard library.
## v0.8.12 / 2013-11-13
* [API] Remove FD_SET and friends from Linux adapter
## v0.8.11 / 2013-11-02
* [Doc] Add Changelog [#72][] (thanks @nathany)
* [Doc] Spotlight and double modify events on macOS [#62][] (reported by @paulhammond)
## v0.8.10 / 2013-10-19
* [Fix] kqueue: remove file watches when parent directory is removed [#71][] (reported by @mdwhatcott)
* [Fix] kqueue: race between Close and readEvents [#70][] (reported by @bernerdschaefer)
* [Doc] specify OS-specific limits in README (thanks @debrando)
## v0.8.9 / 2013-09-08
* [Doc] Contributing (thanks @nathany)
* [Doc] update package path in example code [#63][] (thanks @paulhammond)
* [Doc] GoCI badge in README (Linux only) [#60][]
* [Doc] Cross-platform testing with Vagrant [#59][] (thanks @nathany)
## v0.8.8 / 2013-06-17
* [Fix] Windows: handle `ERROR_MORE_DATA` on Windows [#49][] (thanks @jbowtie)
## v0.8.7 / 2013-06-03
* [API] Make syscall flags internal
* [Fix] inotify: ignore event changes
* [Fix] race in symlink test [#45][] (reported by @srid)
* [Fix] tests on Windows
* lower case error messages
## v0.8.6 / 2013-05-23
* kqueue: Use EVT_ONLY flag on Darwin
* [Doc] Update README with full example
## v0.8.5 / 2013-05-09
* [Fix] inotify: allow monitoring of "broken" symlinks (thanks @tsg)
## v0.8.4 / 2013-04-07
* [Fix] kqueue: watch all file events [#40][] (thanks @ChrisBuchholz)
## v0.8.3 / 2013-03-13
* [Fix] inoitfy/kqueue memory leak [#36][] (reported by @nbkolchin)
* [Fix] kqueue: use fsnFlags for watching a directory [#33][] (reported by @nbkolchin)
## v0.8.2 / 2013-02-07
* [Doc] add Authors
* [Fix] fix data races for map access [#29][] (thanks @fsouza)
## v0.8.1 / 2013-01-09
* [Fix] Windows path separators
* [Doc] BSD License
## v0.8.0 / 2012-11-09
* kqueue: directory watching improvements (thanks @vmirage)
* inotify: add `IN_MOVED_TO` [#25][] (requested by @cpisto)
* [Fix] kqueue: deleting watched directory [#24][] (reported by @jakerr)
## v0.7.4 / 2012-10-09
* [Fix] inotify: fixes from https://codereview.appspot.com/5418045/ (ugorji)
* [Fix] kqueue: preserve watch flags when watching for delete [#21][] (reported by @robfig)
* [Fix] kqueue: watch the directory even if it isn't a new watch (thanks @robfig)
* [Fix] kqueue: modify after recreation of file
## v0.7.3 / 2012-09-27
* [Fix] kqueue: watch with an existing folder inside the watched folder (thanks @vmirage)
* [Fix] kqueue: no longer get duplicate CREATE events
## v0.7.2 / 2012-09-01
* kqueue: events for created directories
## v0.7.1 / 2012-07-14
* [Fix] for renaming files
## v0.7.0 / 2012-07-02
* [Feature] FSNotify flags
* [Fix] inotify: Added file name back to event path
## v0.6.0 / 2012-06-06
* kqueue: watch files after directory created (thanks @tmc)
## v0.5.1 / 2012-05-22
* [Fix] inotify: remove all watches before Close()
## v0.5.0 / 2012-05-03
* [API] kqueue: return errors during watch instead of sending over channel
* kqueue: match symlink behavior on Linux
* inotify: add `DELETE_SELF` (requested by @taralx)
* [Fix] kqueue: handle EINTR (reported by @robfig)
* [Doc] Godoc example [#1][] (thanks @davecheney)
## v0.4.0 / 2012-03-30
* Go 1 released: build with go tool
* [Feature] Windows support using winfsnotify
* Windows does not have attribute change notifications
* Roll attribute notifications into IsModify
## v0.3.0 / 2012-02-19
* kqueue: add files when watch directory
## v0.2.0 / 2011-12-30
* update to latest Go weekly code
## v0.1.0 / 2011-10-19
* kqueue: add watch on file creation to match inotify
* kqueue: create file event
* inotify: ignore `IN_IGNORED` events
* event String()
* linux: common FileEvent functions
* initial commit
[#79]: https://github.com/howeyc/fsnotify/pull/79
[#77]: https://github.com/howeyc/fsnotify/pull/77
[#72]: https://github.com/howeyc/fsnotify/issues/72
[#71]: https://github.com/howeyc/fsnotify/issues/71
[#70]: https://github.com/howeyc/fsnotify/issues/70
[#63]: https://github.com/howeyc/fsnotify/issues/63
[#62]: https://github.com/howeyc/fsnotify/issues/62
[#60]: https://github.com/howeyc/fsnotify/issues/60
[#59]: https://github.com/howeyc/fsnotify/issues/59
[#49]: https://github.com/howeyc/fsnotify/issues/49
[#45]: https://github.com/howeyc/fsnotify/issues/45
[#40]: https://github.com/howeyc/fsnotify/issues/40
[#36]: https://github.com/howeyc/fsnotify/issues/36
[#33]: https://github.com/howeyc/fsnotify/issues/33
[#29]: https://github.com/howeyc/fsnotify/issues/29
[#25]: https://github.com/howeyc/fsnotify/issues/25
[#24]: https://github.com/howeyc/fsnotify/issues/24
[#21]: https://github.com/howeyc/fsnotify/issues/21

View File

@@ -0,0 +1,77 @@
# Contributing
## Issues
* Request features and report bugs using the [GitHub Issue Tracker](https://github.com/fsnotify/fsnotify/issues).
* Please indicate the platform you are using fsnotify on.
* A code example to reproduce the problem is appreciated.
## Pull Requests
### Contributor License Agreement
fsnotify is derived from code in the [golang.org/x/exp](https://godoc.org/golang.org/x/exp) package and it may be included [in the standard library](https://github.com/fsnotify/fsnotify/issues/1) in the future. Therefore fsnotify carries the same [LICENSE](https://github.com/fsnotify/fsnotify/blob/master/LICENSE) as Go. Contributors retain their copyright, so you need to fill out a short form before we can accept your contribution: [Google Individual Contributor License Agreement](https://developers.google.com/open-source/cla/individual).
Please indicate that you have signed the CLA in your pull request.
### How fsnotify is Developed
* Development is done on feature branches.
* Tests are run on BSD, Linux, macOS and Windows.
* Pull requests are reviewed and [applied to master][am] using [hub][].
* Maintainers may modify or squash commits rather than asking contributors to.
* To issue a new release, the maintainers will:
* Update the CHANGELOG
* Tag a version, which will become available through gopkg.in.
### How to Fork
For smooth sailing, always use the original import path. Installing with `go get` makes this easy.
1. Install from GitHub (`go get -u github.com/fsnotify/fsnotify`)
2. Create your feature branch (`git checkout -b my-new-feature`)
3. Ensure everything works and the tests pass (see below)
4. Commit your changes (`git commit -am 'Add some feature'`)
Contribute upstream:
1. Fork fsnotify on GitHub
2. Add your remote (`git remote add fork git@github.com:mycompany/repo.git`)
3. Push to the branch (`git push fork my-new-feature`)
4. Create a new Pull Request on GitHub
This workflow is [thoroughly explained by Katrina Owen](https://splice.com/blog/contributing-open-source-git-repositories-go/).
### Testing
fsnotify uses build tags to compile different code on Linux, BSD, macOS, and Windows.
Before doing a pull request, please do your best to test your changes on multiple platforms, and list which platforms you were able/unable to test on.
To aid in cross-platform testing there is a Vagrantfile for Linux and BSD.
* Install [Vagrant](http://www.vagrantup.com/) and [VirtualBox](https://www.virtualbox.org/)
* Setup [Vagrant Gopher](https://github.com/nathany/vagrant-gopher) in your `src` folder.
* Run `vagrant up` from the project folder. You can also setup just one box with `vagrant up linux` or `vagrant up bsd` (note: the BSD box doesn't support Windows hosts at this time, and NFS may prompt for your host OS password)
* Once setup, you can run the test suite on a given OS with a single command `vagrant ssh linux -c 'cd fsnotify/fsnotify; go test'`.
* When you're done, you will want to halt or destroy the Vagrant boxes.
Notice: fsnotify file system events won't trigger in shared folders. The tests get around this limitation by using the /tmp directory.
Right now there is no equivalent solution for Windows and macOS, but there are Windows VMs [freely available from Microsoft](http://www.modern.ie/en-us/virtualization-tools#downloads).
### Maintainers
Help maintaining fsnotify is welcome. To be a maintainer:
* Submit a pull request and sign the CLA as above.
* You must be able to run the test suite on Mac, Windows, Linux and BSD.
To keep master clean, the fsnotify project uses the "apply mail" workflow outlined in Nathaniel Talbott's post ["Merge pull request" Considered Harmful][am]. This requires installing [hub][].
All code changes should be internal pull requests.
Releases are tagged using [Semantic Versioning](http://semver.org/).
[hub]: https://github.com/github/hub
[am]: http://blog.spreedly.com/2014/06/24/merge-pull-request-considered-harmful/#.VGa5yZPF_Zs

View File

@@ -0,0 +1,28 @@
Copyright (c) 2012 The Go Authors. All rights reserved.
Copyright (c) 2012-2019 fsnotify Authors. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
* Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

View File

@@ -0,0 +1,130 @@
# File system notifications for Go
[![GoDoc](https://godoc.org/github.com/fsnotify/fsnotify?status.svg)](https://godoc.org/github.com/fsnotify/fsnotify) [![Go Report Card](https://goreportcard.com/badge/github.com/fsnotify/fsnotify)](https://goreportcard.com/report/github.com/fsnotify/fsnotify)
fsnotify utilizes [golang.org/x/sys](https://godoc.org/golang.org/x/sys) rather than `syscall` from the standard library. Ensure you have the latest version installed by running:
```console
go get -u golang.org/x/sys/...
```
Cross platform: Windows, Linux, BSD and macOS.
| Adapter | OS | Status |
| --------------------- | -------------------------------- | ------------------------------------------------------------------------------------------------------------------------------- |
| inotify | Linux 2.6.27 or later, Android\* | Supported [![Build Status](https://travis-ci.org/fsnotify/fsnotify.svg?branch=master)](https://travis-ci.org/fsnotify/fsnotify) |
| kqueue | BSD, macOS, iOS\* | Supported [![Build Status](https://travis-ci.org/fsnotify/fsnotify.svg?branch=master)](https://travis-ci.org/fsnotify/fsnotify) |
| ReadDirectoryChangesW | Windows | Supported [![Build Status](https://travis-ci.org/fsnotify/fsnotify.svg?branch=master)](https://travis-ci.org/fsnotify/fsnotify) |
| FSEvents | macOS | [Planned](https://github.com/fsnotify/fsnotify/issues/11) |
| FEN | Solaris 11 | [In Progress](https://github.com/fsnotify/fsnotify/issues/12) |
| fanotify | Linux 2.6.37+ | [Planned](https://github.com/fsnotify/fsnotify/issues/114) |
| USN Journals | Windows | [Maybe](https://github.com/fsnotify/fsnotify/issues/53) |
| Polling | *All* | [Maybe](https://github.com/fsnotify/fsnotify/issues/9) |
\* Android and iOS are untested.
Please see [the documentation](https://godoc.org/github.com/fsnotify/fsnotify) and consult the [FAQ](#faq) for usage information.
## API stability
fsnotify is a fork of [howeyc/fsnotify](https://godoc.org/github.com/howeyc/fsnotify) with a new API as of v1.0. The API is based on [this design document](http://goo.gl/MrYxyA).
All [releases](https://github.com/fsnotify/fsnotify/releases) are tagged based on [Semantic Versioning](http://semver.org/). Further API changes are [planned](https://github.com/fsnotify/fsnotify/milestones), and will be tagged with a new major revision number.
Go 1.6 supports dependencies located in the `vendor/` folder. Unless you are creating a library, it is recommended that you copy fsnotify into `vendor/github.com/fsnotify/fsnotify` within your project, and likewise for `golang.org/x/sys`.
## Usage
```go
package main
import (
"log"
"github.com/fsnotify/fsnotify"
)
func main() {
watcher, err := fsnotify.NewWatcher()
if err != nil {
log.Fatal(err)
}
defer watcher.Close()
done := make(chan bool)
go func() {
for {
select {
case event, ok := <-watcher.Events:
if !ok {
return
}
log.Println("event:", event)
if event.Op&fsnotify.Write == fsnotify.Write {
log.Println("modified file:", event.Name)
}
case err, ok := <-watcher.Errors:
if !ok {
return
}
log.Println("error:", err)
}
}
}()
err = watcher.Add("/tmp/foo")
if err != nil {
log.Fatal(err)
}
<-done
}
```
## Contributing
Please refer to [CONTRIBUTING][] before opening an issue or pull request.
## Example
See [example_test.go](https://github.com/fsnotify/fsnotify/blob/master/example_test.go).
## FAQ
**When a file is moved to another directory is it still being watched?**
No (it shouldn't be, unless you are watching where it was moved to).
**When I watch a directory, are all subdirectories watched as well?**
No, you must add watches for any directory you want to watch (a recursive watcher is on the roadmap [#18][]).
**Do I have to watch the Error and Event channels in a separate goroutine?**
As of now, yes. Looking into making this single-thread friendly (see [howeyc #7][#7])
**Why am I receiving multiple events for the same file on OS X?**
Spotlight indexing on OS X can result in multiple events (see [howeyc #62][#62]). A temporary workaround is to add your folder(s) to the *Spotlight Privacy settings* until we have a native FSEvents implementation (see [#11][]).
**How many files can be watched at once?**
There are OS-specific limits as to how many watches can be created:
* Linux: /proc/sys/fs/inotify/max_user_watches contains the limit, reaching this limit results in a "no space left on device" error.
* BSD / OSX: sysctl variables "kern.maxfiles" and "kern.maxfilesperproc", reaching these limits results in a "too many open files" error.
**Why don't notifications work with NFS filesystems or filesystem in userspace (FUSE)?**
fsnotify requires support from underlying OS to work. The current NFS protocol does not provide network level support for file notifications.
[#62]: https://github.com/howeyc/fsnotify/issues/62
[#18]: https://github.com/fsnotify/fsnotify/issues/18
[#11]: https://github.com/fsnotify/fsnotify/issues/11
[#7]: https://github.com/howeyc/fsnotify/issues/7
[contributing]: https://github.com/fsnotify/fsnotify/blob/master/CONTRIBUTING.md
## Related Projects
* [notify](https://github.com/rjeczalik/notify)
* [fsevents](https://github.com/fsnotify/fsevents)

37
src/runtime/vendor/github.com/fsnotify/fsnotify/fen.go generated vendored Normal file
View File

@@ -0,0 +1,37 @@
// Copyright 2010 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build solaris
package fsnotify
import (
"errors"
)
// Watcher watches a set of files, delivering events to a channel.
type Watcher struct {
Events chan Event
Errors chan error
}
// NewWatcher establishes a new watcher with the underlying OS and begins waiting for events.
func NewWatcher() (*Watcher, error) {
return nil, errors.New("FEN based watcher not yet supported for fsnotify\n")
}
// Close removes all watches and closes the events channel.
func (w *Watcher) Close() error {
return nil
}
// Add starts watching the named file or directory (non-recursively).
func (w *Watcher) Add(name string) error {
return nil
}
// Remove stops watching the the named file or directory (non-recursively).
func (w *Watcher) Remove(name string) error {
return nil
}

View File

@@ -0,0 +1,68 @@
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build !plan9
// Package fsnotify provides a platform-independent interface for file system notifications.
package fsnotify
import (
"bytes"
"errors"
"fmt"
)
// Event represents a single file system notification.
type Event struct {
Name string // Relative path to the file or directory.
Op Op // File operation that triggered the event.
}
// Op describes a set of file operations.
type Op uint32
// These are the generalized file operations that can trigger a notification.
const (
Create Op = 1 << iota
Write
Remove
Rename
Chmod
)
func (op Op) String() string {
// Use a buffer for efficient string concatenation
var buffer bytes.Buffer
if op&Create == Create {
buffer.WriteString("|CREATE")
}
if op&Remove == Remove {
buffer.WriteString("|REMOVE")
}
if op&Write == Write {
buffer.WriteString("|WRITE")
}
if op&Rename == Rename {
buffer.WriteString("|RENAME")
}
if op&Chmod == Chmod {
buffer.WriteString("|CHMOD")
}
if buffer.Len() == 0 {
return ""
}
return buffer.String()[1:] // Strip leading pipe
}
// String returns a string representation of the event in the form
// "file: REMOVE|WRITE|..."
func (e Event) String() string {
return fmt.Sprintf("%q: %s", e.Name, e.Op.String())
}
// Common errors that can be reported by a watcher
var (
ErrEventOverflow = errors.New("fsnotify queue overflow")
)

View File

@@ -0,0 +1,5 @@
module github.com/fsnotify/fsnotify
go 1.13
require golang.org/x/sys v0.0.0-20191005200804-aed5e4c7ecf9

View File

@@ -0,0 +1,2 @@
golang.org/x/sys v0.0.0-20191005200804-aed5e4c7ecf9 h1:L2auWcuQIvxz9xSEqzESnV/QN/gNRXNApHi3fYwl2w0=
golang.org/x/sys v0.0.0-20191005200804-aed5e4c7ecf9/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=

View File

@@ -0,0 +1,337 @@
// Copyright 2010 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build linux
package fsnotify
import (
"errors"
"fmt"
"io"
"os"
"path/filepath"
"strings"
"sync"
"unsafe"
"golang.org/x/sys/unix"
)
// Watcher watches a set of files, delivering events to a channel.
type Watcher struct {
Events chan Event
Errors chan error
mu sync.Mutex // Map access
fd int
poller *fdPoller
watches map[string]*watch // Map of inotify watches (key: path)
paths map[int]string // Map of watched paths (key: watch descriptor)
done chan struct{} // Channel for sending a "quit message" to the reader goroutine
doneResp chan struct{} // Channel to respond to Close
}
// NewWatcher establishes a new watcher with the underlying OS and begins waiting for events.
func NewWatcher() (*Watcher, error) {
// Create inotify fd
fd, errno := unix.InotifyInit1(unix.IN_CLOEXEC)
if fd == -1 {
return nil, errno
}
// Create epoll
poller, err := newFdPoller(fd)
if err != nil {
unix.Close(fd)
return nil, err
}
w := &Watcher{
fd: fd,
poller: poller,
watches: make(map[string]*watch),
paths: make(map[int]string),
Events: make(chan Event),
Errors: make(chan error),
done: make(chan struct{}),
doneResp: make(chan struct{}),
}
go w.readEvents()
return w, nil
}
func (w *Watcher) isClosed() bool {
select {
case <-w.done:
return true
default:
return false
}
}
// Close removes all watches and closes the events channel.
func (w *Watcher) Close() error {
if w.isClosed() {
return nil
}
// Send 'close' signal to goroutine, and set the Watcher to closed.
close(w.done)
// Wake up goroutine
w.poller.wake()
// Wait for goroutine to close
<-w.doneResp
return nil
}
// Add starts watching the named file or directory (non-recursively).
func (w *Watcher) Add(name string) error {
name = filepath.Clean(name)
if w.isClosed() {
return errors.New("inotify instance already closed")
}
const agnosticEvents = unix.IN_MOVED_TO | unix.IN_MOVED_FROM |
unix.IN_CREATE | unix.IN_ATTRIB | unix.IN_MODIFY |
unix.IN_MOVE_SELF | unix.IN_DELETE | unix.IN_DELETE_SELF
var flags uint32 = agnosticEvents
w.mu.Lock()
defer w.mu.Unlock()
watchEntry := w.watches[name]
if watchEntry != nil {
flags |= watchEntry.flags | unix.IN_MASK_ADD
}
wd, errno := unix.InotifyAddWatch(w.fd, name, flags)
if wd == -1 {
return errno
}
if watchEntry == nil {
w.watches[name] = &watch{wd: uint32(wd), flags: flags}
w.paths[wd] = name
} else {
watchEntry.wd = uint32(wd)
watchEntry.flags = flags
}
return nil
}
// Remove stops watching the named file or directory (non-recursively).
func (w *Watcher) Remove(name string) error {
name = filepath.Clean(name)
// Fetch the watch.
w.mu.Lock()
defer w.mu.Unlock()
watch, ok := w.watches[name]
// Remove it from inotify.
if !ok {
return fmt.Errorf("can't remove non-existent inotify watch for: %s", name)
}
// We successfully removed the watch if InotifyRmWatch doesn't return an
// error, we need to clean up our internal state to ensure it matches
// inotify's kernel state.
delete(w.paths, int(watch.wd))
delete(w.watches, name)
// inotify_rm_watch will return EINVAL if the file has been deleted;
// the inotify will already have been removed.
// watches and pathes are deleted in ignoreLinux() implicitly and asynchronously
// by calling inotify_rm_watch() below. e.g. readEvents() goroutine receives IN_IGNORE
// so that EINVAL means that the wd is being rm_watch()ed or its file removed
// by another thread and we have not received IN_IGNORE event.
success, errno := unix.InotifyRmWatch(w.fd, watch.wd)
if success == -1 {
// TODO: Perhaps it's not helpful to return an error here in every case.
// the only two possible errors are:
// EBADF, which happens when w.fd is not a valid file descriptor of any kind.
// EINVAL, which is when fd is not an inotify descriptor or wd is not a valid watch descriptor.
// Watch descriptors are invalidated when they are removed explicitly or implicitly;
// explicitly by inotify_rm_watch, implicitly when the file they are watching is deleted.
return errno
}
return nil
}
type watch struct {
wd uint32 // Watch descriptor (as returned by the inotify_add_watch() syscall)
flags uint32 // inotify flags of this watch (see inotify(7) for the list of valid flags)
}
// readEvents reads from the inotify file descriptor, converts the
// received events into Event objects and sends them via the Events channel
func (w *Watcher) readEvents() {
var (
buf [unix.SizeofInotifyEvent * 4096]byte // Buffer for a maximum of 4096 raw events
n int // Number of bytes read with read()
errno error // Syscall errno
ok bool // For poller.wait
)
defer close(w.doneResp)
defer close(w.Errors)
defer close(w.Events)
defer unix.Close(w.fd)
defer w.poller.close()
for {
// See if we have been closed.
if w.isClosed() {
return
}
ok, errno = w.poller.wait()
if errno != nil {
select {
case w.Errors <- errno:
case <-w.done:
return
}
continue
}
if !ok {
continue
}
n, errno = unix.Read(w.fd, buf[:])
// If a signal interrupted execution, see if we've been asked to close, and try again.
// http://man7.org/linux/man-pages/man7/signal.7.html :
// "Before Linux 3.8, reads from an inotify(7) file descriptor were not restartable"
if errno == unix.EINTR {
continue
}
// unix.Read might have been woken up by Close. If so, we're done.
if w.isClosed() {
return
}
if n < unix.SizeofInotifyEvent {
var err error
if n == 0 {
// If EOF is received. This should really never happen.
err = io.EOF
} else if n < 0 {
// If an error occurred while reading.
err = errno
} else {
// Read was too short.
err = errors.New("notify: short read in readEvents()")
}
select {
case w.Errors <- err:
case <-w.done:
return
}
continue
}
var offset uint32
// We don't know how many events we just read into the buffer
// While the offset points to at least one whole event...
for offset <= uint32(n-unix.SizeofInotifyEvent) {
// Point "raw" to the event in the buffer
raw := (*unix.InotifyEvent)(unsafe.Pointer(&buf[offset]))
mask := uint32(raw.Mask)
nameLen := uint32(raw.Len)
if mask&unix.IN_Q_OVERFLOW != 0 {
select {
case w.Errors <- ErrEventOverflow:
case <-w.done:
return
}
}
// If the event happened to the watched directory or the watched file, the kernel
// doesn't append the filename to the event, but we would like to always fill the
// the "Name" field with a valid filename. We retrieve the path of the watch from
// the "paths" map.
w.mu.Lock()
name, ok := w.paths[int(raw.Wd)]
// IN_DELETE_SELF occurs when the file/directory being watched is removed.
// This is a sign to clean up the maps, otherwise we are no longer in sync
// with the inotify kernel state which has already deleted the watch
// automatically.
if ok && mask&unix.IN_DELETE_SELF == unix.IN_DELETE_SELF {
delete(w.paths, int(raw.Wd))
delete(w.watches, name)
}
w.mu.Unlock()
if nameLen > 0 {
// Point "bytes" at the first byte of the filename
bytes := (*[unix.PathMax]byte)(unsafe.Pointer(&buf[offset+unix.SizeofInotifyEvent]))
// The filename is padded with NULL bytes. TrimRight() gets rid of those.
name += "/" + strings.TrimRight(string(bytes[0:nameLen]), "\000")
}
event := newEvent(name, mask)
// Send the events that are not ignored on the events channel
if !event.ignoreLinux(mask) {
select {
case w.Events <- event:
case <-w.done:
return
}
}
// Move to the next event in the buffer
offset += unix.SizeofInotifyEvent + nameLen
}
}
}
// Certain types of events can be "ignored" and not sent over the Events
// channel. Such as events marked ignore by the kernel, or MODIFY events
// against files that do not exist.
func (e *Event) ignoreLinux(mask uint32) bool {
// Ignore anything the inotify API says to ignore
if mask&unix.IN_IGNORED == unix.IN_IGNORED {
return true
}
// If the event is not a DELETE or RENAME, the file must exist.
// Otherwise the event is ignored.
// *Note*: this was put in place because it was seen that a MODIFY
// event was sent after the DELETE. This ignores that MODIFY and
// assumes a DELETE will come or has come if the file doesn't exist.
if !(e.Op&Remove == Remove || e.Op&Rename == Rename) {
_, statErr := os.Lstat(e.Name)
return os.IsNotExist(statErr)
}
return false
}
// newEvent returns an platform-independent Event based on an inotify mask.
func newEvent(name string, mask uint32) Event {
e := Event{Name: name}
if mask&unix.IN_CREATE == unix.IN_CREATE || mask&unix.IN_MOVED_TO == unix.IN_MOVED_TO {
e.Op |= Create
}
if mask&unix.IN_DELETE_SELF == unix.IN_DELETE_SELF || mask&unix.IN_DELETE == unix.IN_DELETE {
e.Op |= Remove
}
if mask&unix.IN_MODIFY == unix.IN_MODIFY {
e.Op |= Write
}
if mask&unix.IN_MOVE_SELF == unix.IN_MOVE_SELF || mask&unix.IN_MOVED_FROM == unix.IN_MOVED_FROM {
e.Op |= Rename
}
if mask&unix.IN_ATTRIB == unix.IN_ATTRIB {
e.Op |= Chmod
}
return e
}

View File

@@ -0,0 +1,187 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build linux
package fsnotify
import (
"errors"
"golang.org/x/sys/unix"
)
type fdPoller struct {
fd int // File descriptor (as returned by the inotify_init() syscall)
epfd int // Epoll file descriptor
pipe [2]int // Pipe for waking up
}
func emptyPoller(fd int) *fdPoller {
poller := new(fdPoller)
poller.fd = fd
poller.epfd = -1
poller.pipe[0] = -1
poller.pipe[1] = -1
return poller
}
// Create a new inotify poller.
// This creates an inotify handler, and an epoll handler.
func newFdPoller(fd int) (*fdPoller, error) {
var errno error
poller := emptyPoller(fd)
defer func() {
if errno != nil {
poller.close()
}
}()
poller.fd = fd
// Create epoll fd
poller.epfd, errno = unix.EpollCreate1(unix.EPOLL_CLOEXEC)
if poller.epfd == -1 {
return nil, errno
}
// Create pipe; pipe[0] is the read end, pipe[1] the write end.
errno = unix.Pipe2(poller.pipe[:], unix.O_NONBLOCK|unix.O_CLOEXEC)
if errno != nil {
return nil, errno
}
// Register inotify fd with epoll
event := unix.EpollEvent{
Fd: int32(poller.fd),
Events: unix.EPOLLIN,
}
errno = unix.EpollCtl(poller.epfd, unix.EPOLL_CTL_ADD, poller.fd, &event)
if errno != nil {
return nil, errno
}
// Register pipe fd with epoll
event = unix.EpollEvent{
Fd: int32(poller.pipe[0]),
Events: unix.EPOLLIN,
}
errno = unix.EpollCtl(poller.epfd, unix.EPOLL_CTL_ADD, poller.pipe[0], &event)
if errno != nil {
return nil, errno
}
return poller, nil
}
// Wait using epoll.
// Returns true if something is ready to be read,
// false if there is not.
func (poller *fdPoller) wait() (bool, error) {
// 3 possible events per fd, and 2 fds, makes a maximum of 6 events.
// I don't know whether epoll_wait returns the number of events returned,
// or the total number of events ready.
// I decided to catch both by making the buffer one larger than the maximum.
events := make([]unix.EpollEvent, 7)
for {
n, errno := unix.EpollWait(poller.epfd, events, -1)
if n == -1 {
if errno == unix.EINTR {
continue
}
return false, errno
}
if n == 0 {
// If there are no events, try again.
continue
}
if n > 6 {
// This should never happen. More events were returned than should be possible.
return false, errors.New("epoll_wait returned more events than I know what to do with")
}
ready := events[:n]
epollhup := false
epollerr := false
epollin := false
for _, event := range ready {
if event.Fd == int32(poller.fd) {
if event.Events&unix.EPOLLHUP != 0 {
// This should not happen, but if it does, treat it as a wakeup.
epollhup = true
}
if event.Events&unix.EPOLLERR != 0 {
// If an error is waiting on the file descriptor, we should pretend
// something is ready to read, and let unix.Read pick up the error.
epollerr = true
}
if event.Events&unix.EPOLLIN != 0 {
// There is data to read.
epollin = true
}
}
if event.Fd == int32(poller.pipe[0]) {
if event.Events&unix.EPOLLHUP != 0 {
// Write pipe descriptor was closed, by us. This means we're closing down the
// watcher, and we should wake up.
}
if event.Events&unix.EPOLLERR != 0 {
// If an error is waiting on the pipe file descriptor.
// This is an absolute mystery, and should never ever happen.
return false, errors.New("Error on the pipe descriptor.")
}
if event.Events&unix.EPOLLIN != 0 {
// This is a regular wakeup, so we have to clear the buffer.
err := poller.clearWake()
if err != nil {
return false, err
}
}
}
}
if epollhup || epollerr || epollin {
return true, nil
}
return false, nil
}
}
// Close the write end of the poller.
func (poller *fdPoller) wake() error {
buf := make([]byte, 1)
n, errno := unix.Write(poller.pipe[1], buf)
if n == -1 {
if errno == unix.EAGAIN {
// Buffer is full, poller will wake.
return nil
}
return errno
}
return nil
}
func (poller *fdPoller) clearWake() error {
// You have to be woken up a LOT in order to get to 100!
buf := make([]byte, 100)
n, errno := unix.Read(poller.pipe[0], buf)
if n == -1 {
if errno == unix.EAGAIN {
// Buffer is empty, someone else cleared our wake.
return nil
}
return errno
}
return nil
}
// Close all poller file descriptors, but not the one passed to it.
func (poller *fdPoller) close() {
if poller.pipe[1] != -1 {
unix.Close(poller.pipe[1])
}
if poller.pipe[0] != -1 {
unix.Close(poller.pipe[0])
}
if poller.epfd != -1 {
unix.Close(poller.epfd)
}
}

View File

@@ -0,0 +1,521 @@
// Copyright 2010 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build freebsd openbsd netbsd dragonfly darwin
package fsnotify
import (
"errors"
"fmt"
"io/ioutil"
"os"
"path/filepath"
"sync"
"time"
"golang.org/x/sys/unix"
)
// Watcher watches a set of files, delivering events to a channel.
type Watcher struct {
Events chan Event
Errors chan error
done chan struct{} // Channel for sending a "quit message" to the reader goroutine
kq int // File descriptor (as returned by the kqueue() syscall).
mu sync.Mutex // Protects access to watcher data
watches map[string]int // Map of watched file descriptors (key: path).
externalWatches map[string]bool // Map of watches added by user of the library.
dirFlags map[string]uint32 // Map of watched directories to fflags used in kqueue.
paths map[int]pathInfo // Map file descriptors to path names for processing kqueue events.
fileExists map[string]bool // Keep track of if we know this file exists (to stop duplicate create events).
isClosed bool // Set to true when Close() is first called
}
type pathInfo struct {
name string
isDir bool
}
// NewWatcher establishes a new watcher with the underlying OS and begins waiting for events.
func NewWatcher() (*Watcher, error) {
kq, err := kqueue()
if err != nil {
return nil, err
}
w := &Watcher{
kq: kq,
watches: make(map[string]int),
dirFlags: make(map[string]uint32),
paths: make(map[int]pathInfo),
fileExists: make(map[string]bool),
externalWatches: make(map[string]bool),
Events: make(chan Event),
Errors: make(chan error),
done: make(chan struct{}),
}
go w.readEvents()
return w, nil
}
// Close removes all watches and closes the events channel.
func (w *Watcher) Close() error {
w.mu.Lock()
if w.isClosed {
w.mu.Unlock()
return nil
}
w.isClosed = true
// copy paths to remove while locked
var pathsToRemove = make([]string, 0, len(w.watches))
for name := range w.watches {
pathsToRemove = append(pathsToRemove, name)
}
w.mu.Unlock()
// unlock before calling Remove, which also locks
for _, name := range pathsToRemove {
w.Remove(name)
}
// send a "quit" message to the reader goroutine
close(w.done)
return nil
}
// Add starts watching the named file or directory (non-recursively).
func (w *Watcher) Add(name string) error {
w.mu.Lock()
w.externalWatches[name] = true
w.mu.Unlock()
_, err := w.addWatch(name, noteAllEvents)
return err
}
// Remove stops watching the the named file or directory (non-recursively).
func (w *Watcher) Remove(name string) error {
name = filepath.Clean(name)
w.mu.Lock()
watchfd, ok := w.watches[name]
w.mu.Unlock()
if !ok {
return fmt.Errorf("can't remove non-existent kevent watch for: %s", name)
}
const registerRemove = unix.EV_DELETE
if err := register(w.kq, []int{watchfd}, registerRemove, 0); err != nil {
return err
}
unix.Close(watchfd)
w.mu.Lock()
isDir := w.paths[watchfd].isDir
delete(w.watches, name)
delete(w.paths, watchfd)
delete(w.dirFlags, name)
w.mu.Unlock()
// Find all watched paths that are in this directory that are not external.
if isDir {
var pathsToRemove []string
w.mu.Lock()
for _, path := range w.paths {
wdir, _ := filepath.Split(path.name)
if filepath.Clean(wdir) == name {
if !w.externalWatches[path.name] {
pathsToRemove = append(pathsToRemove, path.name)
}
}
}
w.mu.Unlock()
for _, name := range pathsToRemove {
// Since these are internal, not much sense in propagating error
// to the user, as that will just confuse them with an error about
// a path they did not explicitly watch themselves.
w.Remove(name)
}
}
return nil
}
// Watch all events (except NOTE_EXTEND, NOTE_LINK, NOTE_REVOKE)
const noteAllEvents = unix.NOTE_DELETE | unix.NOTE_WRITE | unix.NOTE_ATTRIB | unix.NOTE_RENAME
// keventWaitTime to block on each read from kevent
var keventWaitTime = durationToTimespec(100 * time.Millisecond)
// addWatch adds name to the watched file set.
// The flags are interpreted as described in kevent(2).
// Returns the real path to the file which was added, if any, which may be different from the one passed in the case of symlinks.
func (w *Watcher) addWatch(name string, flags uint32) (string, error) {
var isDir bool
// Make ./name and name equivalent
name = filepath.Clean(name)
w.mu.Lock()
if w.isClosed {
w.mu.Unlock()
return "", errors.New("kevent instance already closed")
}
watchfd, alreadyWatching := w.watches[name]
// We already have a watch, but we can still override flags.
if alreadyWatching {
isDir = w.paths[watchfd].isDir
}
w.mu.Unlock()
if !alreadyWatching {
fi, err := os.Lstat(name)
if err != nil {
return "", err
}
// Don't watch sockets.
if fi.Mode()&os.ModeSocket == os.ModeSocket {
return "", nil
}
// Don't watch named pipes.
if fi.Mode()&os.ModeNamedPipe == os.ModeNamedPipe {
return "", nil
}
// Follow Symlinks
// Unfortunately, Linux can add bogus symlinks to watch list without
// issue, and Windows can't do symlinks period (AFAIK). To maintain
// consistency, we will act like everything is fine. There will simply
// be no file events for broken symlinks.
// Hence the returns of nil on errors.
if fi.Mode()&os.ModeSymlink == os.ModeSymlink {
name, err = filepath.EvalSymlinks(name)
if err != nil {
return "", nil
}
w.mu.Lock()
_, alreadyWatching = w.watches[name]
w.mu.Unlock()
if alreadyWatching {
return name, nil
}
fi, err = os.Lstat(name)
if err != nil {
return "", nil
}
}
watchfd, err = unix.Open(name, openMode, 0700)
if watchfd == -1 {
return "", err
}
isDir = fi.IsDir()
}
const registerAdd = unix.EV_ADD | unix.EV_CLEAR | unix.EV_ENABLE
if err := register(w.kq, []int{watchfd}, registerAdd, flags); err != nil {
unix.Close(watchfd)
return "", err
}
if !alreadyWatching {
w.mu.Lock()
w.watches[name] = watchfd
w.paths[watchfd] = pathInfo{name: name, isDir: isDir}
w.mu.Unlock()
}
if isDir {
// Watch the directory if it has not been watched before,
// or if it was watched before, but perhaps only a NOTE_DELETE (watchDirectoryFiles)
w.mu.Lock()
watchDir := (flags&unix.NOTE_WRITE) == unix.NOTE_WRITE &&
(!alreadyWatching || (w.dirFlags[name]&unix.NOTE_WRITE) != unix.NOTE_WRITE)
// Store flags so this watch can be updated later
w.dirFlags[name] = flags
w.mu.Unlock()
if watchDir {
if err := w.watchDirectoryFiles(name); err != nil {
return "", err
}
}
}
return name, nil
}
// readEvents reads from kqueue and converts the received kevents into
// Event values that it sends down the Events channel.
func (w *Watcher) readEvents() {
eventBuffer := make([]unix.Kevent_t, 10)
loop:
for {
// See if there is a message on the "done" channel
select {
case <-w.done:
break loop
default:
}
// Get new events
kevents, err := read(w.kq, eventBuffer, &keventWaitTime)
// EINTR is okay, the syscall was interrupted before timeout expired.
if err != nil && err != unix.EINTR {
select {
case w.Errors <- err:
case <-w.done:
break loop
}
continue
}
// Flush the events we received to the Events channel
for len(kevents) > 0 {
kevent := &kevents[0]
watchfd := int(kevent.Ident)
mask := uint32(kevent.Fflags)
w.mu.Lock()
path := w.paths[watchfd]
w.mu.Unlock()
event := newEvent(path.name, mask)
if path.isDir && !(event.Op&Remove == Remove) {
// Double check to make sure the directory exists. This can happen when
// we do a rm -fr on a recursively watched folders and we receive a
// modification event first but the folder has been deleted and later
// receive the delete event
if _, err := os.Lstat(event.Name); os.IsNotExist(err) {
// mark is as delete event
event.Op |= Remove
}
}
if event.Op&Rename == Rename || event.Op&Remove == Remove {
w.Remove(event.Name)
w.mu.Lock()
delete(w.fileExists, event.Name)
w.mu.Unlock()
}
if path.isDir && event.Op&Write == Write && !(event.Op&Remove == Remove) {
w.sendDirectoryChangeEvents(event.Name)
} else {
// Send the event on the Events channel.
select {
case w.Events <- event:
case <-w.done:
break loop
}
}
if event.Op&Remove == Remove {
// Look for a file that may have overwritten this.
// For example, mv f1 f2 will delete f2, then create f2.
if path.isDir {
fileDir := filepath.Clean(event.Name)
w.mu.Lock()
_, found := w.watches[fileDir]
w.mu.Unlock()
if found {
// make sure the directory exists before we watch for changes. When we
// do a recursive watch and perform rm -fr, the parent directory might
// have gone missing, ignore the missing directory and let the
// upcoming delete event remove the watch from the parent directory.
if _, err := os.Lstat(fileDir); err == nil {
w.sendDirectoryChangeEvents(fileDir)
}
}
} else {
filePath := filepath.Clean(event.Name)
if fileInfo, err := os.Lstat(filePath); err == nil {
w.sendFileCreatedEventIfNew(filePath, fileInfo)
}
}
}
// Move to next event
kevents = kevents[1:]
}
}
// cleanup
err := unix.Close(w.kq)
if err != nil {
// only way the previous loop breaks is if w.done was closed so we need to async send to w.Errors.
select {
case w.Errors <- err:
default:
}
}
close(w.Events)
close(w.Errors)
}
// newEvent returns an platform-independent Event based on kqueue Fflags.
func newEvent(name string, mask uint32) Event {
e := Event{Name: name}
if mask&unix.NOTE_DELETE == unix.NOTE_DELETE {
e.Op |= Remove
}
if mask&unix.NOTE_WRITE == unix.NOTE_WRITE {
e.Op |= Write
}
if mask&unix.NOTE_RENAME == unix.NOTE_RENAME {
e.Op |= Rename
}
if mask&unix.NOTE_ATTRIB == unix.NOTE_ATTRIB {
e.Op |= Chmod
}
return e
}
func newCreateEvent(name string) Event {
return Event{Name: name, Op: Create}
}
// watchDirectoryFiles to mimic inotify when adding a watch on a directory
func (w *Watcher) watchDirectoryFiles(dirPath string) error {
// Get all files
files, err := ioutil.ReadDir(dirPath)
if err != nil {
return err
}
for _, fileInfo := range files {
filePath := filepath.Join(dirPath, fileInfo.Name())
filePath, err = w.internalWatch(filePath, fileInfo)
if err != nil {
return err
}
w.mu.Lock()
w.fileExists[filePath] = true
w.mu.Unlock()
}
return nil
}
// sendDirectoryEvents searches the directory for newly created files
// and sends them over the event channel. This functionality is to have
// the BSD version of fsnotify match Linux inotify which provides a
// create event for files created in a watched directory.
func (w *Watcher) sendDirectoryChangeEvents(dirPath string) {
// Get all files
files, err := ioutil.ReadDir(dirPath)
if err != nil {
select {
case w.Errors <- err:
case <-w.done:
return
}
}
// Search for new files
for _, fileInfo := range files {
filePath := filepath.Join(dirPath, fileInfo.Name())
err := w.sendFileCreatedEventIfNew(filePath, fileInfo)
if err != nil {
return
}
}
}
// sendFileCreatedEvent sends a create event if the file isn't already being tracked.
func (w *Watcher) sendFileCreatedEventIfNew(filePath string, fileInfo os.FileInfo) (err error) {
w.mu.Lock()
_, doesExist := w.fileExists[filePath]
w.mu.Unlock()
if !doesExist {
// Send create event
select {
case w.Events <- newCreateEvent(filePath):
case <-w.done:
return
}
}
// like watchDirectoryFiles (but without doing another ReadDir)
filePath, err = w.internalWatch(filePath, fileInfo)
if err != nil {
return err
}
w.mu.Lock()
w.fileExists[filePath] = true
w.mu.Unlock()
return nil
}
func (w *Watcher) internalWatch(name string, fileInfo os.FileInfo) (string, error) {
if fileInfo.IsDir() {
// mimic Linux providing delete events for subdirectories
// but preserve the flags used if currently watching subdirectory
w.mu.Lock()
flags := w.dirFlags[name]
w.mu.Unlock()
flags |= unix.NOTE_DELETE | unix.NOTE_RENAME
return w.addWatch(name, flags)
}
// watch file to mimic Linux inotify
return w.addWatch(name, noteAllEvents)
}
// kqueue creates a new kernel event queue and returns a descriptor.
func kqueue() (kq int, err error) {
kq, err = unix.Kqueue()
if kq == -1 {
return kq, err
}
return kq, nil
}
// register events with the queue
func register(kq int, fds []int, flags int, fflags uint32) error {
changes := make([]unix.Kevent_t, len(fds))
for i, fd := range fds {
// SetKevent converts int to the platform-specific types:
unix.SetKevent(&changes[i], fd, unix.EVFILT_VNODE, flags)
changes[i].Fflags = fflags
}
// register the events
success, err := unix.Kevent(kq, changes, nil, nil)
if success == -1 {
return err
}
return nil
}
// read retrieves pending events, or waits until an event occurs.
// A timeout of nil blocks indefinitely, while 0 polls the queue.
func read(kq int, events []unix.Kevent_t, timeout *unix.Timespec) ([]unix.Kevent_t, error) {
n, err := unix.Kevent(kq, nil, events, timeout)
if err != nil {
return nil, err
}
return events[0:n], nil
}
// durationToTimespec prepares a timeout value
func durationToTimespec(d time.Duration) unix.Timespec {
return unix.NsecToTimespec(d.Nanoseconds())
}

View File

@@ -0,0 +1,11 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build freebsd openbsd netbsd dragonfly
package fsnotify
import "golang.org/x/sys/unix"
const openMode = unix.O_NONBLOCK | unix.O_RDONLY | unix.O_CLOEXEC

View File

@@ -0,0 +1,12 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build darwin
package fsnotify
import "golang.org/x/sys/unix"
// note: this constant is not defined on BSD
const openMode = unix.O_EVTONLY | unix.O_CLOEXEC

View File

@@ -0,0 +1,561 @@
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build windows
package fsnotify
import (
"errors"
"fmt"
"os"
"path/filepath"
"runtime"
"sync"
"syscall"
"unsafe"
)
// Watcher watches a set of files, delivering events to a channel.
type Watcher struct {
Events chan Event
Errors chan error
isClosed bool // Set to true when Close() is first called
mu sync.Mutex // Map access
port syscall.Handle // Handle to completion port
watches watchMap // Map of watches (key: i-number)
input chan *input // Inputs to the reader are sent on this channel
quit chan chan<- error
}
// NewWatcher establishes a new watcher with the underlying OS and begins waiting for events.
func NewWatcher() (*Watcher, error) {
port, e := syscall.CreateIoCompletionPort(syscall.InvalidHandle, 0, 0, 0)
if e != nil {
return nil, os.NewSyscallError("CreateIoCompletionPort", e)
}
w := &Watcher{
port: port,
watches: make(watchMap),
input: make(chan *input, 1),
Events: make(chan Event, 50),
Errors: make(chan error),
quit: make(chan chan<- error, 1),
}
go w.readEvents()
return w, nil
}
// Close removes all watches and closes the events channel.
func (w *Watcher) Close() error {
if w.isClosed {
return nil
}
w.isClosed = true
// Send "quit" message to the reader goroutine
ch := make(chan error)
w.quit <- ch
if err := w.wakeupReader(); err != nil {
return err
}
return <-ch
}
// Add starts watching the named file or directory (non-recursively).
func (w *Watcher) Add(name string) error {
if w.isClosed {
return errors.New("watcher already closed")
}
in := &input{
op: opAddWatch,
path: filepath.Clean(name),
flags: sysFSALLEVENTS,
reply: make(chan error),
}
w.input <- in
if err := w.wakeupReader(); err != nil {
return err
}
return <-in.reply
}
// Remove stops watching the the named file or directory (non-recursively).
func (w *Watcher) Remove(name string) error {
in := &input{
op: opRemoveWatch,
path: filepath.Clean(name),
reply: make(chan error),
}
w.input <- in
if err := w.wakeupReader(); err != nil {
return err
}
return <-in.reply
}
const (
// Options for AddWatch
sysFSONESHOT = 0x80000000
sysFSONLYDIR = 0x1000000
// Events
sysFSACCESS = 0x1
sysFSALLEVENTS = 0xfff
sysFSATTRIB = 0x4
sysFSCLOSE = 0x18
sysFSCREATE = 0x100
sysFSDELETE = 0x200
sysFSDELETESELF = 0x400
sysFSMODIFY = 0x2
sysFSMOVE = 0xc0
sysFSMOVEDFROM = 0x40
sysFSMOVEDTO = 0x80
sysFSMOVESELF = 0x800
// Special events
sysFSIGNORED = 0x8000
sysFSQOVERFLOW = 0x4000
)
func newEvent(name string, mask uint32) Event {
e := Event{Name: name}
if mask&sysFSCREATE == sysFSCREATE || mask&sysFSMOVEDTO == sysFSMOVEDTO {
e.Op |= Create
}
if mask&sysFSDELETE == sysFSDELETE || mask&sysFSDELETESELF == sysFSDELETESELF {
e.Op |= Remove
}
if mask&sysFSMODIFY == sysFSMODIFY {
e.Op |= Write
}
if mask&sysFSMOVE == sysFSMOVE || mask&sysFSMOVESELF == sysFSMOVESELF || mask&sysFSMOVEDFROM == sysFSMOVEDFROM {
e.Op |= Rename
}
if mask&sysFSATTRIB == sysFSATTRIB {
e.Op |= Chmod
}
return e
}
const (
opAddWatch = iota
opRemoveWatch
)
const (
provisional uint64 = 1 << (32 + iota)
)
type input struct {
op int
path string
flags uint32
reply chan error
}
type inode struct {
handle syscall.Handle
volume uint32
index uint64
}
type watch struct {
ov syscall.Overlapped
ino *inode // i-number
path string // Directory path
mask uint64 // Directory itself is being watched with these notify flags
names map[string]uint64 // Map of names being watched and their notify flags
rename string // Remembers the old name while renaming a file
buf [4096]byte
}
type indexMap map[uint64]*watch
type watchMap map[uint32]indexMap
func (w *Watcher) wakeupReader() error {
e := syscall.PostQueuedCompletionStatus(w.port, 0, 0, nil)
if e != nil {
return os.NewSyscallError("PostQueuedCompletionStatus", e)
}
return nil
}
func getDir(pathname string) (dir string, err error) {
attr, e := syscall.GetFileAttributes(syscall.StringToUTF16Ptr(pathname))
if e != nil {
return "", os.NewSyscallError("GetFileAttributes", e)
}
if attr&syscall.FILE_ATTRIBUTE_DIRECTORY != 0 {
dir = pathname
} else {
dir, _ = filepath.Split(pathname)
dir = filepath.Clean(dir)
}
return
}
func getIno(path string) (ino *inode, err error) {
h, e := syscall.CreateFile(syscall.StringToUTF16Ptr(path),
syscall.FILE_LIST_DIRECTORY,
syscall.FILE_SHARE_READ|syscall.FILE_SHARE_WRITE|syscall.FILE_SHARE_DELETE,
nil, syscall.OPEN_EXISTING,
syscall.FILE_FLAG_BACKUP_SEMANTICS|syscall.FILE_FLAG_OVERLAPPED, 0)
if e != nil {
return nil, os.NewSyscallError("CreateFile", e)
}
var fi syscall.ByHandleFileInformation
if e = syscall.GetFileInformationByHandle(h, &fi); e != nil {
syscall.CloseHandle(h)
return nil, os.NewSyscallError("GetFileInformationByHandle", e)
}
ino = &inode{
handle: h,
volume: fi.VolumeSerialNumber,
index: uint64(fi.FileIndexHigh)<<32 | uint64(fi.FileIndexLow),
}
return ino, nil
}
// Must run within the I/O thread.
func (m watchMap) get(ino *inode) *watch {
if i := m[ino.volume]; i != nil {
return i[ino.index]
}
return nil
}
// Must run within the I/O thread.
func (m watchMap) set(ino *inode, watch *watch) {
i := m[ino.volume]
if i == nil {
i = make(indexMap)
m[ino.volume] = i
}
i[ino.index] = watch
}
// Must run within the I/O thread.
func (w *Watcher) addWatch(pathname string, flags uint64) error {
dir, err := getDir(pathname)
if err != nil {
return err
}
if flags&sysFSONLYDIR != 0 && pathname != dir {
return nil
}
ino, err := getIno(dir)
if err != nil {
return err
}
w.mu.Lock()
watchEntry := w.watches.get(ino)
w.mu.Unlock()
if watchEntry == nil {
if _, e := syscall.CreateIoCompletionPort(ino.handle, w.port, 0, 0); e != nil {
syscall.CloseHandle(ino.handle)
return os.NewSyscallError("CreateIoCompletionPort", e)
}
watchEntry = &watch{
ino: ino,
path: dir,
names: make(map[string]uint64),
}
w.mu.Lock()
w.watches.set(ino, watchEntry)
w.mu.Unlock()
flags |= provisional
} else {
syscall.CloseHandle(ino.handle)
}
if pathname == dir {
watchEntry.mask |= flags
} else {
watchEntry.names[filepath.Base(pathname)] |= flags
}
if err = w.startRead(watchEntry); err != nil {
return err
}
if pathname == dir {
watchEntry.mask &= ^provisional
} else {
watchEntry.names[filepath.Base(pathname)] &= ^provisional
}
return nil
}
// Must run within the I/O thread.
func (w *Watcher) remWatch(pathname string) error {
dir, err := getDir(pathname)
if err != nil {
return err
}
ino, err := getIno(dir)
if err != nil {
return err
}
w.mu.Lock()
watch := w.watches.get(ino)
w.mu.Unlock()
if watch == nil {
return fmt.Errorf("can't remove non-existent watch for: %s", pathname)
}
if pathname == dir {
w.sendEvent(watch.path, watch.mask&sysFSIGNORED)
watch.mask = 0
} else {
name := filepath.Base(pathname)
w.sendEvent(filepath.Join(watch.path, name), watch.names[name]&sysFSIGNORED)
delete(watch.names, name)
}
return w.startRead(watch)
}
// Must run within the I/O thread.
func (w *Watcher) deleteWatch(watch *watch) {
for name, mask := range watch.names {
if mask&provisional == 0 {
w.sendEvent(filepath.Join(watch.path, name), mask&sysFSIGNORED)
}
delete(watch.names, name)
}
if watch.mask != 0 {
if watch.mask&provisional == 0 {
w.sendEvent(watch.path, watch.mask&sysFSIGNORED)
}
watch.mask = 0
}
}
// Must run within the I/O thread.
func (w *Watcher) startRead(watch *watch) error {
if e := syscall.CancelIo(watch.ino.handle); e != nil {
w.Errors <- os.NewSyscallError("CancelIo", e)
w.deleteWatch(watch)
}
mask := toWindowsFlags(watch.mask)
for _, m := range watch.names {
mask |= toWindowsFlags(m)
}
if mask == 0 {
if e := syscall.CloseHandle(watch.ino.handle); e != nil {
w.Errors <- os.NewSyscallError("CloseHandle", e)
}
w.mu.Lock()
delete(w.watches[watch.ino.volume], watch.ino.index)
w.mu.Unlock()
return nil
}
e := syscall.ReadDirectoryChanges(watch.ino.handle, &watch.buf[0],
uint32(unsafe.Sizeof(watch.buf)), false, mask, nil, &watch.ov, 0)
if e != nil {
err := os.NewSyscallError("ReadDirectoryChanges", e)
if e == syscall.ERROR_ACCESS_DENIED && watch.mask&provisional == 0 {
// Watched directory was probably removed
if w.sendEvent(watch.path, watch.mask&sysFSDELETESELF) {
if watch.mask&sysFSONESHOT != 0 {
watch.mask = 0
}
}
err = nil
}
w.deleteWatch(watch)
w.startRead(watch)
return err
}
return nil
}
// readEvents reads from the I/O completion port, converts the
// received events into Event objects and sends them via the Events channel.
// Entry point to the I/O thread.
func (w *Watcher) readEvents() {
var (
n, key uint32
ov *syscall.Overlapped
)
runtime.LockOSThread()
for {
e := syscall.GetQueuedCompletionStatus(w.port, &n, &key, &ov, syscall.INFINITE)
watch := (*watch)(unsafe.Pointer(ov))
if watch == nil {
select {
case ch := <-w.quit:
w.mu.Lock()
var indexes []indexMap
for _, index := range w.watches {
indexes = append(indexes, index)
}
w.mu.Unlock()
for _, index := range indexes {
for _, watch := range index {
w.deleteWatch(watch)
w.startRead(watch)
}
}
var err error
if e := syscall.CloseHandle(w.port); e != nil {
err = os.NewSyscallError("CloseHandle", e)
}
close(w.Events)
close(w.Errors)
ch <- err
return
case in := <-w.input:
switch in.op {
case opAddWatch:
in.reply <- w.addWatch(in.path, uint64(in.flags))
case opRemoveWatch:
in.reply <- w.remWatch(in.path)
}
default:
}
continue
}
switch e {
case syscall.ERROR_MORE_DATA:
if watch == nil {
w.Errors <- errors.New("ERROR_MORE_DATA has unexpectedly null lpOverlapped buffer")
} else {
// The i/o succeeded but the buffer is full.
// In theory we should be building up a full packet.
// In practice we can get away with just carrying on.
n = uint32(unsafe.Sizeof(watch.buf))
}
case syscall.ERROR_ACCESS_DENIED:
// Watched directory was probably removed
w.sendEvent(watch.path, watch.mask&sysFSDELETESELF)
w.deleteWatch(watch)
w.startRead(watch)
continue
case syscall.ERROR_OPERATION_ABORTED:
// CancelIo was called on this handle
continue
default:
w.Errors <- os.NewSyscallError("GetQueuedCompletionPort", e)
continue
case nil:
}
var offset uint32
for {
if n == 0 {
w.Events <- newEvent("", sysFSQOVERFLOW)
w.Errors <- errors.New("short read in readEvents()")
break
}
// Point "raw" to the event in the buffer
raw := (*syscall.FileNotifyInformation)(unsafe.Pointer(&watch.buf[offset]))
buf := (*[syscall.MAX_PATH]uint16)(unsafe.Pointer(&raw.FileName))
name := syscall.UTF16ToString(buf[:raw.FileNameLength/2])
fullname := filepath.Join(watch.path, name)
var mask uint64
switch raw.Action {
case syscall.FILE_ACTION_REMOVED:
mask = sysFSDELETESELF
case syscall.FILE_ACTION_MODIFIED:
mask = sysFSMODIFY
case syscall.FILE_ACTION_RENAMED_OLD_NAME:
watch.rename = name
case syscall.FILE_ACTION_RENAMED_NEW_NAME:
if watch.names[watch.rename] != 0 {
watch.names[name] |= watch.names[watch.rename]
delete(watch.names, watch.rename)
mask = sysFSMOVESELF
}
}
sendNameEvent := func() {
if w.sendEvent(fullname, watch.names[name]&mask) {
if watch.names[name]&sysFSONESHOT != 0 {
delete(watch.names, name)
}
}
}
if raw.Action != syscall.FILE_ACTION_RENAMED_NEW_NAME {
sendNameEvent()
}
if raw.Action == syscall.FILE_ACTION_REMOVED {
w.sendEvent(fullname, watch.names[name]&sysFSIGNORED)
delete(watch.names, name)
}
if w.sendEvent(fullname, watch.mask&toFSnotifyFlags(raw.Action)) {
if watch.mask&sysFSONESHOT != 0 {
watch.mask = 0
}
}
if raw.Action == syscall.FILE_ACTION_RENAMED_NEW_NAME {
fullname = filepath.Join(watch.path, watch.rename)
sendNameEvent()
}
// Move to the next event in the buffer
if raw.NextEntryOffset == 0 {
break
}
offset += raw.NextEntryOffset
// Error!
if offset >= n {
w.Errors <- errors.New("Windows system assumed buffer larger than it is, events have likely been missed.")
break
}
}
if err := w.startRead(watch); err != nil {
w.Errors <- err
}
}
}
func (w *Watcher) sendEvent(name string, mask uint64) bool {
if mask == 0 {
return false
}
event := newEvent(name, uint32(mask))
select {
case ch := <-w.quit:
w.quit <- ch
case w.Events <- event:
}
return true
}
func toWindowsFlags(mask uint64) uint32 {
var m uint32
if mask&sysFSACCESS != 0 {
m |= syscall.FILE_NOTIFY_CHANGE_LAST_ACCESS
}
if mask&sysFSMODIFY != 0 {
m |= syscall.FILE_NOTIFY_CHANGE_LAST_WRITE
}
if mask&sysFSATTRIB != 0 {
m |= syscall.FILE_NOTIFY_CHANGE_ATTRIBUTES
}
if mask&(sysFSMOVE|sysFSCREATE|sysFSDELETE) != 0 {
m |= syscall.FILE_NOTIFY_CHANGE_FILE_NAME | syscall.FILE_NOTIFY_CHANGE_DIR_NAME
}
return m
}
func toFSnotifyFlags(action uint32) uint64 {
switch action {
case syscall.FILE_ACTION_ADDED:
return sysFSCREATE
case syscall.FILE_ACTION_REMOVED:
return sysFSDELETE
case syscall.FILE_ACTION_MODIFIED:
return sysFSMODIFY
case syscall.FILE_ACTION_RENAMED_OLD_NAME:
return sysFSMOVEDFROM
case syscall.FILE_ACTION_RENAMED_NEW_NAME:
return sysFSMOVEDTO
}
return 0
}

View File

@@ -1,6 +1,6 @@
# netlink - netlink library for go #
[![Build Status](https://travis-ci.org/vishvananda/netlink.png?branch=master)](https://travis-ci.org/vishvananda/netlink) [![GoDoc](https://godoc.org/github.com/vishvananda/netlink?status.svg)](https://godoc.org/github.com/vishvananda/netlink)
[![Build Status](https://app.travis-ci.com/vishvananda/netlink.svg?branch=master)](https://app.travis-ci.com/github/vishvananda/netlink) [![GoDoc](https://godoc.org/github.com/vishvananda/netlink?status.svg)](https://godoc.org/github.com/vishvananda/netlink)
The netlink package provides a simple netlink library for go. Netlink
is the interface a user-space program in linux uses to communicate with

View File

@@ -268,7 +268,7 @@ func parseAddr(m []byte) (addr Addr, family int, err error) {
// But obviously, as there are IPv6 PtP addresses, too,
// IFA_LOCAL should also be handled for IPv6.
if local != nil {
if family == FAMILY_V4 && local.IP.Equal(dst.IP) {
if family == FAMILY_V4 && dst != nil && local.IP.Equal(dst.IP) {
addr.IPNet = dst
} else {
addr.IPNet = local
@@ -357,7 +357,8 @@ func addrSubscribeAt(newNs, curNs netns.NsHandle, ch chan<- AddrUpdate, done <-c
msgs, from, err := s.Receive()
if err != nil {
if cberr != nil {
cberr(err)
cberr(fmt.Errorf("Receive failed: %v",
err))
}
return
}
@@ -372,7 +373,6 @@ func addrSubscribeAt(newNs, curNs netns.NsHandle, ch chan<- AddrUpdate, done <-c
continue
}
if m.Header.Type == unix.NLMSG_ERROR {
native := nl.NativeEndian()
error := int32(native.Uint32(m.Data[0:4]))
if error == 0 {
continue

View File

@@ -16,6 +16,30 @@ const (
BPF_PROG_TYPE_SCHED_ACT
BPF_PROG_TYPE_TRACEPOINT
BPF_PROG_TYPE_XDP
BPF_PROG_TYPE_PERF_EVENT
BPF_PROG_TYPE_CGROUP_SKB
BPF_PROG_TYPE_CGROUP_SOCK
BPF_PROG_TYPE_LWT_IN
BPF_PROG_TYPE_LWT_OUT
BPF_PROG_TYPE_LWT_XMIT
BPF_PROG_TYPE_SOCK_OPS
BPF_PROG_TYPE_SK_SKB
BPF_PROG_TYPE_CGROUP_DEVICE
BPF_PROG_TYPE_SK_MSG
BPF_PROG_TYPE_RAW_TRACEPOINT
BPF_PROG_TYPE_CGROUP_SOCK_ADDR
BPF_PROG_TYPE_LWT_SEG6LOCAL
BPF_PROG_TYPE_LIRC_MODE2
BPF_PROG_TYPE_SK_REUSEPORT
BPF_PROG_TYPE_FLOW_DISSECTOR
BPF_PROG_TYPE_CGROUP_SYSCTL
BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE
BPF_PROG_TYPE_CGROUP_SOCKOPT
BPF_PROG_TYPE_TRACING
BPF_PROG_TYPE_STRUCT_OPS
BPF_PROG_TYPE_EXT
BPF_PROG_TYPE_LSM
BPF_PROG_TYPE_SK_LOOKUP
)
type BPFAttr struct {

View File

@@ -176,6 +176,12 @@ func classPayload(req *nl.NetlinkRequest, class Class) error {
options.AddRtAttr(nl.TCA_HTB_PARMS, opt.Serialize())
options.AddRtAttr(nl.TCA_HTB_RTAB, SerializeRtab(rtab))
options.AddRtAttr(nl.TCA_HTB_CTAB, SerializeRtab(ctab))
if htb.Rate >= uint64(1<<32) {
options.AddRtAttr(nl.TCA_HTB_RATE64, nl.Uint64Attr(htb.Rate))
}
if htb.Ceil >= uint64(1<<32) {
options.AddRtAttr(nl.TCA_HTB_CEIL64, nl.Uint64Attr(htb.Ceil))
}
case "hfsc":
hfsc := class.(*HfscClass)
opt := nl.HfscCopt{}
@@ -185,9 +191,9 @@ func classPayload(req *nl.NetlinkRequest, class Class) error {
opt.Fsc.Set(fm1/8, fd, fm2/8)
um1, ud, um2 := hfsc.Usc.Attrs()
opt.Usc.Set(um1/8, ud, um2/8)
nl.NewRtAttrChild(options, nl.TCA_HFSC_RSC, nl.SerializeHfscCurve(&opt.Rsc))
nl.NewRtAttrChild(options, nl.TCA_HFSC_FSC, nl.SerializeHfscCurve(&opt.Fsc))
nl.NewRtAttrChild(options, nl.TCA_HFSC_USC, nl.SerializeHfscCurve(&opt.Usc))
options.AddRtAttr(nl.TCA_HFSC_RSC, nl.SerializeHfscCurve(&opt.Rsc))
options.AddRtAttr(nl.TCA_HFSC_FSC, nl.SerializeHfscCurve(&opt.Fsc))
options.AddRtAttr(nl.TCA_HFSC_USC, nl.SerializeHfscCurve(&opt.Usc))
}
req.AddData(options)
return nil
@@ -306,6 +312,10 @@ func parseHtbClassData(class Class, data []syscall.NetlinkRouteAttr) (bool, erro
htb.Quantum = opt.Quantum
htb.Level = opt.Level
htb.Prio = opt.Prio
case nl.TCA_HTB_RATE64:
htb.Rate = native.Uint64(datum.Value[0:8])
case nl.TCA_HTB_CEIL64:
htb.Ceil = native.Uint64(datum.Value[0:8])
}
}
return detailed, nil
@@ -331,7 +341,6 @@ func parseHfscClassData(class Class, data []syscall.NetlinkRouteAttr) (bool, err
func parseTcStats(data []byte) (*ClassStatistics, error) {
buf := &bytes.Buffer{}
buf.Write(data)
native := nl.NativeEndian()
tcStats := &tcStats{}
if err := binary.Read(buf, native, tcStats); err != nil {
return nil, err
@@ -353,7 +362,6 @@ func parseTcStats(data []byte) (*ClassStatistics, error) {
func parseGnetStats(data []byte, gnetStats interface{}) error {
buf := &bytes.Buffer{}
buf.Write(data)
native := nl.NativeEndian()
return binary.Read(buf, native, gnetStats)
}

View File

@@ -6,6 +6,7 @@ import (
"errors"
"fmt"
"net"
"time"
"github.com/vishvananda/netlink/nl"
"golang.org/x/sys/unix"
@@ -145,16 +146,23 @@ type ConntrackFlow struct {
Forward ipTuple
Reverse ipTuple
Mark uint32
TimeStart uint64
TimeStop uint64
TimeOut uint32
}
func (s *ConntrackFlow) String() string {
// conntrack cmd output:
// udp 17 src=127.0.0.1 dst=127.0.0.1 sport=4001 dport=1234 packets=5 bytes=532 [UNREPLIED] src=127.0.0.1 dst=127.0.0.1 sport=1234 dport=4001 packets=10 bytes=1078 mark=0
return fmt.Sprintf("%s\t%d src=%s dst=%s sport=%d dport=%d packets=%d bytes=%d\tsrc=%s dst=%s sport=%d dport=%d packets=%d bytes=%d mark=%d",
// start=2019-07-26 01:26:21.557800506 +0000 UTC stop=1970-01-01 00:00:00 +0000 UTC timeout=30(sec)
start := time.Unix(0, int64(s.TimeStart))
stop := time.Unix(0, int64(s.TimeStop))
timeout := int32(s.TimeOut)
return fmt.Sprintf("%s\t%d src=%s dst=%s sport=%d dport=%d packets=%d bytes=%d\tsrc=%s dst=%s sport=%d dport=%d packets=%d bytes=%d mark=0x%x start=%v stop=%v timeout=%d(sec)",
nl.L4ProtoMap[s.Forward.Protocol], s.Forward.Protocol,
s.Forward.SrcIP.String(), s.Forward.DstIP.String(), s.Forward.SrcPort, s.Forward.DstPort, s.Forward.Packets, s.Forward.Bytes,
s.Reverse.SrcIP.String(), s.Reverse.DstIP.String(), s.Reverse.SrcPort, s.Reverse.DstPort, s.Reverse.Packets, s.Reverse.Bytes,
s.Mark)
s.Mark, start, stop, timeout)
}
// This method parse the ip tuple structure
@@ -174,25 +182,43 @@ func parseIpTuple(reader *bytes.Reader, tpl *ipTuple) uint8 {
tpl.DstIP = v
}
}
// Skip the next 4 bytes nl.NLA_F_NESTED|nl.CTA_TUPLE_PROTO
reader.Seek(4, seekCurrent)
_, t, _, v := parseNfAttrTLV(reader)
// Get total length of nested protocol-specific info.
_, _, protoInfoTotalLen := parseNfAttrTL(reader)
_, t, l, v := parseNfAttrTLV(reader)
// Track the number of bytes read.
protoInfoBytesRead := uint16(nl.SizeofNfattr) + l
if t == nl.CTA_PROTO_NUM {
tpl.Protocol = uint8(v[0])
}
// Skip some padding 3 bytes
// We only parse TCP & UDP headers. Skip the others.
if tpl.Protocol != 6 && tpl.Protocol != 17 {
// skip the rest
bytesRemaining := protoInfoTotalLen - protoInfoBytesRead
reader.Seek(int64(bytesRemaining), seekCurrent)
return tpl.Protocol
}
// Skip 3 bytes of padding
reader.Seek(3, seekCurrent)
protoInfoBytesRead += 3
for i := 0; i < 2; i++ {
_, t, _ := parseNfAttrTL(reader)
protoInfoBytesRead += uint16(nl.SizeofNfattr)
switch t {
case nl.CTA_PROTO_SRC_PORT:
parseBERaw16(reader, &tpl.SrcPort)
protoInfoBytesRead += 2
case nl.CTA_PROTO_DST_PORT:
parseBERaw16(reader, &tpl.DstPort)
protoInfoBytesRead += 2
}
// Skip some padding 2 byte
// Skip 2 bytes of padding
reader.Seek(2, seekCurrent)
protoInfoBytesRead += 2
}
// Skip any remaining/unknown parts of the message
bytesRemaining := protoInfoTotalLen - protoInfoBytesRead
reader.Seek(int64(bytesRemaining), seekCurrent)
return tpl.Protocol
}
@@ -211,10 +237,14 @@ func parseNfAttrTL(r *bytes.Reader) (isNested bool, attrType, len uint16) {
binary.Read(r, nl.NativeEndian(), &attrType)
isNested = (attrType & nl.NLA_F_NESTED) == nl.NLA_F_NESTED
attrType = attrType & (nl.NLA_F_NESTED - 1)
return isNested, attrType, len
}
func skipNfAttrValue(r *bytes.Reader, len uint16) {
len = (len + nl.NLA_ALIGNTO - 1) & ^(nl.NLA_ALIGNTO - 1)
r.Seek(int64(len), seekCurrent)
}
func parseBERaw16(r *bytes.Reader, v *uint16) {
binary.Read(r, binary.BigEndian, v)
}
@@ -241,6 +271,36 @@ func parseByteAndPacketCounters(r *bytes.Reader) (bytes, packets uint64) {
return
}
// when the flow is alive, only the timestamp_start is returned in structure
func parseTimeStamp(r *bytes.Reader, readSize uint16) (tstart, tstop uint64) {
var numTimeStamps int
oneItem := nl.SizeofNfattr + 8 // 4 bytes attr header + 8 bytes timestamp
if readSize == uint16(oneItem) {
numTimeStamps = 1
} else if readSize == 2*uint16(oneItem) {
numTimeStamps = 2
} else {
return
}
for i := 0; i < numTimeStamps; i++ {
switch _, t, _ := parseNfAttrTL(r); t {
case nl.CTA_TIMESTAMP_START:
parseBERaw64(r, &tstart)
case nl.CTA_TIMESTAMP_STOP:
parseBERaw64(r, &tstop)
default:
return
}
}
return
}
func parseTimeOut(r *bytes.Reader) (ttimeout uint32) {
parseBERaw32(r, &ttimeout)
return
}
func parseConnectionMark(r *bytes.Reader) (mark uint32) {
parseBERaw32(r, &mark)
return
@@ -266,25 +326,37 @@ func parseRawData(data []byte) *ConntrackFlow {
if nested, t, l := parseNfAttrTL(reader); nested {
switch t {
case nl.CTA_TUPLE_ORIG:
if nested, t, _ = parseNfAttrTL(reader); nested && t == nl.CTA_TUPLE_IP {
if nested, t, l = parseNfAttrTL(reader); nested && t == nl.CTA_TUPLE_IP {
parseIpTuple(reader, &s.Forward)
}
case nl.CTA_TUPLE_REPLY:
if nested, t, _ = parseNfAttrTL(reader); nested && t == nl.CTA_TUPLE_IP {
if nested, t, l = parseNfAttrTL(reader); nested && t == nl.CTA_TUPLE_IP {
parseIpTuple(reader, &s.Reverse)
} else {
// Header not recognized skip it
reader.Seek(int64(l), seekCurrent)
skipNfAttrValue(reader, l)
}
case nl.CTA_COUNTERS_ORIG:
s.Forward.Bytes, s.Forward.Packets = parseByteAndPacketCounters(reader)
case nl.CTA_COUNTERS_REPLY:
s.Reverse.Bytes, s.Reverse.Packets = parseByteAndPacketCounters(reader)
case nl.CTA_TIMESTAMP:
s.TimeStart, s.TimeStop = parseTimeStamp(reader, l)
case nl.CTA_PROTOINFO:
skipNfAttrValue(reader, l)
default:
skipNfAttrValue(reader, l)
}
} else {
switch t {
case nl.CTA_MARK:
s.Mark = parseConnectionMark(reader)
case nl.CTA_TIMEOUT:
s.TimeOut = parseTimeOut(reader)
case nl.CTA_STATUS, nl.CTA_USE, nl.CTA_ID:
skipNfAttrValue(reader, l)
default:
skipNfAttrValue(reader, l)
}
}
}
@@ -346,23 +418,34 @@ type CustomConntrackFilter interface {
}
type ConntrackFilter struct {
ipFilter map[ConntrackFilterType]net.IP
ipNetFilter map[ConntrackFilterType]*net.IPNet
portFilter map[ConntrackFilterType]uint16
protoFilter uint8
}
// AddIP adds an IP to the conntrack filter
func (f *ConntrackFilter) AddIP(tp ConntrackFilterType, ip net.IP) error {
if f.ipFilter == nil {
f.ipFilter = make(map[ConntrackFilterType]net.IP)
// AddIPNet adds a IP subnet to the conntrack filter
func (f *ConntrackFilter) AddIPNet(tp ConntrackFilterType, ipNet *net.IPNet) error {
if ipNet == nil {
return fmt.Errorf("Filter attribute empty")
}
if _, ok := f.ipFilter[tp]; ok {
if f.ipNetFilter == nil {
f.ipNetFilter = make(map[ConntrackFilterType]*net.IPNet)
}
if _, ok := f.ipNetFilter[tp]; ok {
return errors.New("Filter attribute already present")
}
f.ipFilter[tp] = ip
f.ipNetFilter[tp] = ipNet
return nil
}
// AddIP adds an IP to the conntrack filter
func (f *ConntrackFilter) AddIP(tp ConntrackFilterType, ip net.IP) error {
if ip == nil {
return fmt.Errorf("Filter attribute empty")
}
return f.AddIPNet(tp, NewIPNet(ip))
}
// AddPort adds a Port to the conntrack filter if the Layer 4 protocol allows it
func (f *ConntrackFilter) AddPort(tp ConntrackFilterType, port uint16) error {
switch f.protoFilter {
@@ -394,7 +477,7 @@ func (f *ConntrackFilter) AddProtocol(proto uint8) error {
// MatchConntrackFlow applies the filter to the flow and returns true if the flow matches the filter
// false otherwise
func (f *ConntrackFilter) MatchConntrackFlow(flow *ConntrackFlow) bool {
if len(f.ipFilter) == 0 && len(f.portFilter) == 0 && f.protoFilter == 0 {
if len(f.ipNetFilter) == 0 && len(f.portFilter) == 0 && f.protoFilter == 0 {
// empty filter always not match
return false
}
@@ -408,30 +491,30 @@ func (f *ConntrackFilter) MatchConntrackFlow(flow *ConntrackFlow) bool {
match := true
// IP conntrack filter
if len(f.ipFilter) > 0 {
if len(f.ipNetFilter) > 0 {
// -orig-src ip Source address from original direction
if elem, found := f.ipFilter[ConntrackOrigSrcIP]; found {
match = match && elem.Equal(flow.Forward.SrcIP)
if elem, found := f.ipNetFilter[ConntrackOrigSrcIP]; found {
match = match && elem.Contains(flow.Forward.SrcIP)
}
// -orig-dst ip Destination address from original direction
if elem, found := f.ipFilter[ConntrackOrigDstIP]; match && found {
match = match && elem.Equal(flow.Forward.DstIP)
if elem, found := f.ipNetFilter[ConntrackOrigDstIP]; match && found {
match = match && elem.Contains(flow.Forward.DstIP)
}
// -src-nat ip Source NAT ip
if elem, found := f.ipFilter[ConntrackReplySrcIP]; match && found {
match = match && elem.Equal(flow.Reverse.SrcIP)
if elem, found := f.ipNetFilter[ConntrackReplySrcIP]; match && found {
match = match && elem.Contains(flow.Reverse.SrcIP)
}
// -dst-nat ip Destination NAT ip
if elem, found := f.ipFilter[ConntrackReplyDstIP]; match && found {
match = match && elem.Equal(flow.Reverse.DstIP)
if elem, found := f.ipNetFilter[ConntrackReplyDstIP]; match && found {
match = match && elem.Contains(flow.Reverse.DstIP)
}
// Match source or destination reply IP
if elem, found := f.ipFilter[ConntrackReplyAnyIP]; match && found {
match = match && (elem.Equal(flow.Reverse.SrcIP) || elem.Equal(flow.Reverse.DstIP))
if elem, found := f.ipNetFilter[ConntrackReplyAnyIP]; match && found {
match = match && (elem.Contains(flow.Reverse.SrcIP) || elem.Contains(flow.Reverse.DstIP))
}
}

View File

@@ -1,9 +1,10 @@
package netlink
import (
"fmt"
"net"
"syscall"
"fmt"
"github.com/vishvananda/netlink/nl"
"golang.org/x/sys/unix"
)
@@ -27,6 +28,43 @@ type DevlinkDevice struct {
Attrs DevlinkDevAttrs
}
// DevlinkPortFn represents port function and its attributes
type DevlinkPortFn struct {
HwAddr net.HardwareAddr
State uint8
OpState uint8
}
// DevlinkPortFnSetAttrs represents attributes to set
type DevlinkPortFnSetAttrs struct {
FnAttrs DevlinkPortFn
HwAddrValid bool
StateValid bool
}
// DevlinkPort represents port and its attributes
type DevlinkPort struct {
BusName string
DeviceName string
PortIndex uint32
PortType uint16
NetdeviceName string
NetdevIfIndex uint32
RdmaDeviceName string
PortFlavour uint16
Fn *DevlinkPortFn
}
type DevLinkPortAddAttrs struct {
Controller uint32
SfNumber uint32
PortIndex uint32
PfNumber uint16
SfNumberValid bool
PortIndexValid bool
ControllerValid bool
}
func parseDevLinkDeviceList(msgs [][]byte) ([]*DevlinkDevice, error) {
devices := make([]*DevlinkDevice, 0, len(msgs))
for _, m := range msgs {
@@ -95,9 +133,9 @@ func (d *DevlinkDevice) parseAttributes(attrs []syscall.NetlinkRouteAttr) error
for _, a := range attrs {
switch a.Attr.Type {
case nl.DEVLINK_ATTR_BUS_NAME:
d.BusName = string(a.Value)
d.BusName = string(a.Value[:len(a.Value)-1])
case nl.DEVLINK_ATTR_DEV_NAME:
d.DeviceName = string(a.Value)
d.DeviceName = string(a.Value[:len(a.Value)-1])
case nl.DEVLINK_ATTR_ESWITCH_MODE:
d.Attrs.Eswitch.Mode = parseEswitchMode(native.Uint16(a.Value))
case nl.DEVLINK_ATTR_ESWITCH_INLINE_MODE:
@@ -126,12 +164,12 @@ func (h *Handle) getEswitchAttrs(family *GenlFamily, dev *DevlinkDevice) {
req := h.newNetlinkRequest(int(family.ID), unix.NLM_F_REQUEST|unix.NLM_F_ACK)
req.AddData(msg)
b := make([]byte, len(dev.BusName))
b := make([]byte, len(dev.BusName)+1)
copy(b, dev.BusName)
data := nl.NewRtAttr(nl.DEVLINK_ATTR_BUS_NAME, b)
req.AddData(data)
b = make([]byte, len(dev.DeviceName))
b = make([]byte, len(dev.DeviceName)+1)
copy(b, dev.DeviceName)
data = nl.NewRtAttr(nl.DEVLINK_ATTR_DEV_NAME, b)
req.AddData(data)
@@ -270,3 +308,206 @@ func (h *Handle) DevLinkSetEswitchMode(Dev *DevlinkDevice, NewMode string) error
func DevLinkSetEswitchMode(Dev *DevlinkDevice, NewMode string) error {
return pkgHandle.DevLinkSetEswitchMode(Dev, NewMode)
}
func (port *DevlinkPort) parseAttributes(attrs []syscall.NetlinkRouteAttr) error {
for _, a := range attrs {
switch a.Attr.Type {
case nl.DEVLINK_ATTR_BUS_NAME:
port.BusName = string(a.Value[:len(a.Value)-1])
case nl.DEVLINK_ATTR_DEV_NAME:
port.DeviceName = string(a.Value[:len(a.Value)-1])
case nl.DEVLINK_ATTR_PORT_INDEX:
port.PortIndex = native.Uint32(a.Value)
case nl.DEVLINK_ATTR_PORT_TYPE:
port.PortType = native.Uint16(a.Value)
case nl.DEVLINK_ATTR_PORT_NETDEV_NAME:
port.NetdeviceName = string(a.Value[:len(a.Value)-1])
case nl.DEVLINK_ATTR_PORT_NETDEV_IFINDEX:
port.NetdevIfIndex = native.Uint32(a.Value)
case nl.DEVLINK_ATTR_PORT_IBDEV_NAME:
port.RdmaDeviceName = string(a.Value[:len(a.Value)-1])
case nl.DEVLINK_ATTR_PORT_FLAVOUR:
port.PortFlavour = native.Uint16(a.Value)
case nl.DEVLINK_ATTR_PORT_FUNCTION:
port.Fn = &DevlinkPortFn{}
for nested := range nl.ParseAttributes(a.Value) {
switch nested.Type {
case nl.DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR:
port.Fn.HwAddr = nested.Value[:]
case nl.DEVLINK_PORT_FN_ATTR_STATE:
port.Fn.State = uint8(nested.Value[0])
case nl.DEVLINK_PORT_FN_ATTR_OPSTATE:
port.Fn.OpState = uint8(nested.Value[0])
}
}
}
}
return nil
}
func parseDevLinkAllPortList(msgs [][]byte) ([]*DevlinkPort, error) {
ports := make([]*DevlinkPort, 0, len(msgs))
for _, m := range msgs {
attrs, err := nl.ParseRouteAttr(m[nl.SizeofGenlmsg:])
if err != nil {
return nil, err
}
port := &DevlinkPort{}
if err = port.parseAttributes(attrs); err != nil {
return nil, err
}
ports = append(ports, port)
}
return ports, nil
}
// DevLinkGetPortList provides a pointer to devlink ports and nil error,
// otherwise returns an error code.
func (h *Handle) DevLinkGetAllPortList() ([]*DevlinkPort, error) {
f, err := h.GenlFamilyGet(nl.GENL_DEVLINK_NAME)
if err != nil {
return nil, err
}
msg := &nl.Genlmsg{
Command: nl.DEVLINK_CMD_PORT_GET,
Version: nl.GENL_DEVLINK_VERSION,
}
req := h.newNetlinkRequest(int(f.ID),
unix.NLM_F_REQUEST|unix.NLM_F_ACK|unix.NLM_F_DUMP)
req.AddData(msg)
msgs, err := req.Execute(unix.NETLINK_GENERIC, 0)
if err != nil {
return nil, err
}
ports, err := parseDevLinkAllPortList(msgs)
if err != nil {
return nil, err
}
return ports, nil
}
// DevLinkGetPortList provides a pointer to devlink ports and nil error,
// otherwise returns an error code.
func DevLinkGetAllPortList() ([]*DevlinkPort, error) {
return pkgHandle.DevLinkGetAllPortList()
}
func parseDevlinkPortMsg(msgs [][]byte) (*DevlinkPort, error) {
m := msgs[0]
attrs, err := nl.ParseRouteAttr(m[nl.SizeofGenlmsg:])
if err != nil {
return nil, err
}
port := &DevlinkPort{}
if err = port.parseAttributes(attrs); err != nil {
return nil, err
}
return port, nil
}
// DevLinkGetPortByIndexprovides a pointer to devlink device and nil error,
// otherwise returns an error code.
func (h *Handle) DevLinkGetPortByIndex(Bus string, Device string, PortIndex uint32) (*DevlinkPort, error) {
_, req, err := h.createCmdReq(nl.DEVLINK_CMD_PORT_GET, Bus, Device)
if err != nil {
return nil, err
}
req.AddData(nl.NewRtAttr(nl.DEVLINK_ATTR_PORT_INDEX, nl.Uint32Attr(PortIndex)))
respmsg, err := req.Execute(unix.NETLINK_GENERIC, 0)
if err != nil {
return nil, err
}
port, err := parseDevlinkPortMsg(respmsg)
return port, err
}
// DevLinkGetPortByIndex provides a pointer to devlink portand nil error,
// otherwise returns an error code.
func DevLinkGetPortByIndex(Bus string, Device string, PortIndex uint32) (*DevlinkPort, error) {
return pkgHandle.DevLinkGetPortByIndex(Bus, Device, PortIndex)
}
// DevLinkPortAdd adds a devlink port and returns a port on success
// otherwise returns nil port and an error code.
func (h *Handle) DevLinkPortAdd(Bus string, Device string, Flavour uint16, Attrs DevLinkPortAddAttrs) (*DevlinkPort, error) {
_, req, err := h.createCmdReq(nl.DEVLINK_CMD_PORT_NEW, Bus, Device)
if err != nil {
return nil, err
}
req.AddData(nl.NewRtAttr(nl.DEVLINK_ATTR_PORT_FLAVOUR, nl.Uint16Attr(Flavour)))
req.AddData(nl.NewRtAttr(nl.DEVLINK_ATTR_PORT_PCI_PF_NUMBER, nl.Uint16Attr(Attrs.PfNumber)))
if Flavour == nl.DEVLINK_PORT_FLAVOUR_PCI_SF && Attrs.SfNumberValid {
req.AddData(nl.NewRtAttr(nl.DEVLINK_ATTR_PORT_PCI_SF_NUMBER, nl.Uint32Attr(Attrs.SfNumber)))
}
if Attrs.PortIndexValid {
req.AddData(nl.NewRtAttr(nl.DEVLINK_ATTR_PORT_INDEX, nl.Uint32Attr(Attrs.PortIndex)))
}
if Attrs.ControllerValid {
req.AddData(nl.NewRtAttr(nl.DEVLINK_ATTR_PORT_CONTROLLER_NUMBER, nl.Uint32Attr(Attrs.Controller)))
}
respmsg, err := req.Execute(unix.NETLINK_GENERIC, 0)
if err != nil {
return nil, err
}
port, err := parseDevlinkPortMsg(respmsg)
return port, err
}
// DevLinkPortAdd adds a devlink port and returns a port on success
// otherwise returns nil port and an error code.
func DevLinkPortAdd(Bus string, Device string, Flavour uint16, Attrs DevLinkPortAddAttrs) (*DevlinkPort, error) {
return pkgHandle.DevLinkPortAdd(Bus, Device, Flavour, Attrs)
}
// DevLinkPortDel deletes a devlink port and returns success or error code.
func (h *Handle) DevLinkPortDel(Bus string, Device string, PortIndex uint32) error {
_, req, err := h.createCmdReq(nl.DEVLINK_CMD_PORT_DEL, Bus, Device)
if err != nil {
return err
}
req.AddData(nl.NewRtAttr(nl.DEVLINK_ATTR_PORT_INDEX, nl.Uint32Attr(PortIndex)))
_, err = req.Execute(unix.NETLINK_GENERIC, 0)
return err
}
// DevLinkPortDel deletes a devlink port and returns success or error code.
func DevLinkPortDel(Bus string, Device string, PortIndex uint32) error {
return pkgHandle.DevLinkPortDel(Bus, Device, PortIndex)
}
// DevlinkPortFnSet sets one or more port function attributes specified by the attribute mask.
// It returns 0 on success or error code.
func (h *Handle) DevlinkPortFnSet(Bus string, Device string, PortIndex uint32, FnAttrs DevlinkPortFnSetAttrs) error {
_, req, err := h.createCmdReq(nl.DEVLINK_CMD_PORT_SET, Bus, Device)
if err != nil {
return err
}
req.AddData(nl.NewRtAttr(nl.DEVLINK_ATTR_PORT_INDEX, nl.Uint32Attr(PortIndex)))
fnAttr := nl.NewRtAttr(nl.DEVLINK_ATTR_PORT_FUNCTION|unix.NLA_F_NESTED, nil)
if FnAttrs.HwAddrValid {
fnAttr.AddRtAttr(nl.DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR, []byte(FnAttrs.FnAttrs.HwAddr))
}
if FnAttrs.StateValid {
fnAttr.AddRtAttr(nl.DEVLINK_PORT_FN_ATTR_STATE, nl.Uint8Attr(FnAttrs.FnAttrs.State))
}
req.AddData(fnAttr)
_, err = req.Execute(unix.NETLINK_GENERIC, 0)
return err
}
// DevlinkPortFnSet sets one or more port function attributes specified by the attribute mask.
// It returns 0 on success or error code.
func DevlinkPortFnSet(Bus string, Device string, PortIndex uint32, FnAttrs DevlinkPortFnSetAttrs) error {
return pkgHandle.DevlinkPortFnSet(Bus, Device, PortIndex, FnAttrs)
}

View File

@@ -6,6 +6,7 @@ import (
"encoding/hex"
"errors"
"fmt"
"net"
"syscall"
"github.com/vishvananda/netlink/nl"
@@ -36,6 +37,7 @@ type U32 struct {
ClassId uint32
Divisor uint32 // Divisor MUST be power of 2.
Hash uint32
Link uint32
RedirIndex int
Sel *TcU32Sel
Actions []Action
@@ -119,6 +121,131 @@ func (filter *Fw) Type() string {
return "fw"
}
type Flower struct {
FilterAttrs
DestIP net.IP
DestIPMask net.IPMask
SrcIP net.IP
SrcIPMask net.IPMask
EthType uint16
EncDestIP net.IP
EncDestIPMask net.IPMask
EncSrcIP net.IP
EncSrcIPMask net.IPMask
EncDestPort uint16
EncKeyId uint32
Actions []Action
}
func (filter *Flower) Attrs() *FilterAttrs {
return &filter.FilterAttrs
}
func (filter *Flower) Type() string {
return "flower"
}
func (filter *Flower) encodeIP(parent *nl.RtAttr, ip net.IP, mask net.IPMask, v4Type, v6Type int, v4MaskType, v6MaskType int) {
ipType := v4Type
maskType := v4MaskType
encodeMask := mask
if mask == nil {
encodeMask = net.CIDRMask(32, 32)
}
v4IP := ip.To4()
if v4IP == nil {
ipType = v6Type
maskType = v6MaskType
if mask == nil {
encodeMask = net.CIDRMask(128, 128)
}
} else {
ip = v4IP
}
parent.AddRtAttr(ipType, ip)
parent.AddRtAttr(maskType, encodeMask)
}
func (filter *Flower) encode(parent *nl.RtAttr) error {
if filter.EthType != 0 {
parent.AddRtAttr(nl.TCA_FLOWER_KEY_ETH_TYPE, htons(filter.EthType))
}
if filter.SrcIP != nil {
filter.encodeIP(parent, filter.SrcIP, filter.SrcIPMask,
nl.TCA_FLOWER_KEY_IPV4_SRC, nl.TCA_FLOWER_KEY_IPV6_SRC,
nl.TCA_FLOWER_KEY_IPV4_SRC_MASK, nl.TCA_FLOWER_KEY_IPV6_SRC_MASK)
}
if filter.DestIP != nil {
filter.encodeIP(parent, filter.DestIP, filter.DestIPMask,
nl.TCA_FLOWER_KEY_IPV4_DST, nl.TCA_FLOWER_KEY_IPV6_DST,
nl.TCA_FLOWER_KEY_IPV4_DST_MASK, nl.TCA_FLOWER_KEY_IPV6_DST_MASK)
}
if filter.EncSrcIP != nil {
filter.encodeIP(parent, filter.EncSrcIP, filter.EncSrcIPMask,
nl.TCA_FLOWER_KEY_ENC_IPV4_SRC, nl.TCA_FLOWER_KEY_ENC_IPV6_SRC,
nl.TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK, nl.TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK)
}
if filter.EncDestIP != nil {
filter.encodeIP(parent, filter.EncDestIP, filter.EncSrcIPMask,
nl.TCA_FLOWER_KEY_ENC_IPV4_DST, nl.TCA_FLOWER_KEY_ENC_IPV6_DST,
nl.TCA_FLOWER_KEY_ENC_IPV4_DST_MASK, nl.TCA_FLOWER_KEY_ENC_IPV6_DST_MASK)
}
if filter.EncDestPort != 0 {
parent.AddRtAttr(nl.TCA_FLOWER_KEY_ENC_UDP_DST_PORT, htons(filter.EncDestPort))
}
if filter.EncKeyId != 0 {
parent.AddRtAttr(nl.TCA_FLOWER_KEY_ENC_KEY_ID, htonl(filter.EncKeyId))
}
actionsAttr := parent.AddRtAttr(nl.TCA_FLOWER_ACT, nil)
if err := EncodeActions(actionsAttr, filter.Actions); err != nil {
return err
}
return nil
}
func (filter *Flower) decode(data []syscall.NetlinkRouteAttr) error {
for _, datum := range data {
switch datum.Attr.Type {
case nl.TCA_FLOWER_KEY_ETH_TYPE:
filter.EthType = ntohs(datum.Value)
case nl.TCA_FLOWER_KEY_IPV4_SRC, nl.TCA_FLOWER_KEY_IPV6_SRC:
filter.SrcIP = datum.Value
case nl.TCA_FLOWER_KEY_IPV4_SRC_MASK, nl.TCA_FLOWER_KEY_IPV6_SRC_MASK:
filter.SrcIPMask = datum.Value
case nl.TCA_FLOWER_KEY_IPV4_DST, nl.TCA_FLOWER_KEY_IPV6_DST:
filter.DestIP = datum.Value
case nl.TCA_FLOWER_KEY_IPV4_DST_MASK, nl.TCA_FLOWER_KEY_IPV6_DST_MASK:
filter.DestIPMask = datum.Value
case nl.TCA_FLOWER_KEY_ENC_IPV4_SRC, nl.TCA_FLOWER_KEY_ENC_IPV6_SRC:
filter.EncSrcIP = datum.Value
case nl.TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK, nl.TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK:
filter.EncSrcIPMask = datum.Value
case nl.TCA_FLOWER_KEY_ENC_IPV4_DST, nl.TCA_FLOWER_KEY_ENC_IPV6_DST:
filter.EncDestIP = datum.Value
case nl.TCA_FLOWER_KEY_ENC_IPV4_DST_MASK, nl.TCA_FLOWER_KEY_ENC_IPV6_DST_MASK:
filter.EncDestIPMask = datum.Value
case nl.TCA_FLOWER_KEY_ENC_UDP_DST_PORT:
filter.EncDestPort = ntohs(datum.Value)
case nl.TCA_FLOWER_KEY_ENC_KEY_ID:
filter.EncKeyId = ntohl(datum.Value)
case nl.TCA_FLOWER_ACT:
tables, err := nl.ParseRouteAttr(datum.Value)
if err != nil {
return err
}
filter.Actions, err = parseActions(tables)
if err != nil {
return err
}
}
}
return nil
}
// FilterDel will delete a filter from the system.
// Equivalent to: `tc filter del $filter`
func FilterDel(filter Filter) error {
@@ -168,7 +295,6 @@ func (h *Handle) FilterReplace(filter Filter) error {
}
func (h *Handle) filterModify(filter Filter, flags int) error {
native = nl.NativeEndian()
req := h.newNetlinkRequest(unix.RTM_NEWTFILTER, flags|unix.NLM_F_ACK)
base := filter.Attrs()
msg := &nl.TcMsg{
@@ -225,6 +351,9 @@ func (h *Handle) filterModify(filter Filter, flags int) error {
if filter.Hash != 0 {
options.AddRtAttr(nl.TCA_U32_HASH, nl.Uint32Attr(filter.Hash))
}
if filter.Link != 0 {
options.AddRtAttr(nl.TCA_U32_LINK, nl.Uint32Attr(filter.Link))
}
actionsAttr := options.AddRtAttr(nl.TCA_U32_ACT, nil)
// backwards compatibility
if filter.RedirIndex != 0 {
@@ -283,6 +412,10 @@ func (h *Handle) filterModify(filter Filter, flags int) error {
if filter.ClassId != 0 {
options.AddRtAttr(nl.TCA_MATCHALL_CLASSID, nl.Uint32Attr(filter.ClassId))
}
case *Flower:
if err := filter.encode(options); err != nil {
return err
}
}
req.AddData(options)
@@ -351,6 +484,8 @@ func (h *Handle) FilterList(link Link, parent uint32) ([]Filter, error) {
filter = &BpfFilter{}
case "matchall":
filter = &MatchAll{}
case "flower":
filter = &Flower{}
default:
filter = &GenericFilter{FilterType: filterType}
}
@@ -380,6 +515,11 @@ func (h *Handle) FilterList(link Link, parent uint32) ([]Filter, error) {
if err != nil {
return nil, err
}
case "flower":
detailed, err = parseFlowerData(filter, data)
if err != nil {
return nil, err
}
default:
detailed = true
}
@@ -628,7 +768,6 @@ func parseActions(tables []syscall.NetlinkRouteAttr) ([]Action, error) {
}
func parseU32Data(filter Filter, data []syscall.NetlinkRouteAttr) (bool, error) {
native = nl.NativeEndian()
u32 := filter.(*U32)
detailed := false
for _, datum := range data {
@@ -666,13 +805,14 @@ func parseU32Data(filter Filter, data []syscall.NetlinkRouteAttr) (bool, error)
u32.Divisor = native.Uint32(datum.Value)
case nl.TCA_U32_HASH:
u32.Hash = native.Uint32(datum.Value)
case nl.TCA_U32_LINK:
u32.Link = native.Uint32(datum.Value)
}
}
return detailed, nil
}
func parseFwData(filter Filter, data []syscall.NetlinkRouteAttr) (bool, error) {
native = nl.NativeEndian()
fw := filter.(*Fw)
detailed := true
for _, datum := range data {
@@ -701,7 +841,6 @@ func parseFwData(filter Filter, data []syscall.NetlinkRouteAttr) (bool, error) {
}
func parseBpfData(filter Filter, data []syscall.NetlinkRouteAttr) (bool, error) {
native = nl.NativeEndian()
bpf := filter.(*BpfFilter)
detailed := true
for _, datum := range data {
@@ -727,7 +866,6 @@ func parseBpfData(filter Filter, data []syscall.NetlinkRouteAttr) (bool, error)
}
func parseMatchAllData(filter Filter, data []syscall.NetlinkRouteAttr) (bool, error) {
native = nl.NativeEndian()
matchall := filter.(*MatchAll)
detailed := true
for _, datum := range data {
@@ -748,6 +886,10 @@ func parseMatchAllData(filter Filter, data []syscall.NetlinkRouteAttr) (bool, er
return detailed, nil
}
func parseFlowerData(filter Filter, data []syscall.NetlinkRouteAttr) (bool, error) {
return true, filter.(*Flower).decode(data)
}
func AlignToAtm(size uint) uint {
var linksize, cells int
cells = int(size / nl.ATM_CELL_PAYLOAD)
@@ -795,14 +937,12 @@ func CalcRtable(rate *nl.TcRateSpec, rtab []uint32, cellLog int, mtu uint32, lin
func DeserializeRtab(b []byte) [256]uint32 {
var rtab [256]uint32
native := nl.NativeEndian()
r := bytes.NewReader(b)
_ = binary.Read(r, native, &rtab)
return rtab
}
func SerializeRtab(rtab [256]uint32) []byte {
native := nl.NativeEndian()
var w bytes.Buffer
_ = binary.Write(&w, native, rtab)
return w.Bytes()

View File

@@ -15,12 +15,28 @@ var pkgHandle = &Handle{}
// Handle is an handle for the netlink requests on a
// specific network namespace. All the requests on the
// same netlink family share the same netlink socket,
// which gets released when the handle is deleted.
// which gets released when the handle is Close'd.
type Handle struct {
sockets map[int]*nl.SocketHandle
lookupByDump bool
}
// SetSocketTimeout configures timeout for default netlink sockets
func SetSocketTimeout(to time.Duration) error {
if to < time.Microsecond {
return fmt.Errorf("invalid timeout, minimul value is %s", time.Microsecond)
}
nl.SocketTimeoutTv = unix.NsecToTimeval(to.Nanoseconds())
return nil
}
// GetSocketTimeout returns the timeout value used by default netlink sockets
func GetSocketTimeout() time.Duration {
nsec := unix.TimevalToNsec(nl.SocketTimeoutTv)
return time.Duration(nsec) * time.Nanosecond
}
// SupportsNetlinkFamily reports whether the passed netlink family is supported by this Handle
func (h *Handle) SupportsNetlinkFamily(nlFamily int) bool {
_, ok := h.sockets[nlFamily]
@@ -120,14 +136,22 @@ func newHandle(newNs, curNs netns.NsHandle, nlFamilies ...int) (*Handle, error)
return h, nil
}
// Delete releases the resources allocated to this handle
func (h *Handle) Delete() {
// Close releases the resources allocated to this handle
func (h *Handle) Close() {
for _, sh := range h.sockets {
sh.Close()
}
h.sockets = nil
}
// Delete releases the resources allocated to this handle
//
// Deprecated: use Close instead which is in line with typical resource release
// patterns for files and other resources.
func (h *Handle) Delete() {
h.Close()
}
func (h *Handle) newNetlinkRequest(proto, flags int) *nl.NetlinkRequest {
// Do this so that package API still use nl package variable nextSeqNr
if h.sockets == nil {

View File

@@ -23,6 +23,8 @@ func NewHandleAtFrom(newNs, curNs netns.NsHandle) (*Handle, error) {
return nil, ErrNotImplemented
}
func (h *Handle) Close() {}
func (h *Handle) Delete() {}
func (h *Handle) SupportsNetlinkFamily(nlFamily int) bool {
@@ -237,6 +239,10 @@ func (h *Handle) RouteAdd(route *Route) error {
return ErrNotImplemented
}
func (h *Handle) RouteAppend(route *Route) error {
return ErrNotImplemented
}
func (h *Handle) RouteDel(route *Route) error {
return ErrNotImplemented
}

View File

@@ -27,4 +27,5 @@ const (
type InetDiagTCPInfoResp struct {
InetDiagMsg *Socket
TCPInfo *TCPInfo
TCPBBRInfo *TCPBBRInfo
}

View File

@@ -1,6 +1,7 @@
package netlink
import (
"encoding/binary"
"log"
"net"
"syscall"
@@ -11,25 +12,40 @@ import (
// IPSetEntry is used for adding, updating, retreiving and deleting entries
type IPSetEntry struct {
Comment string
MAC net.HardwareAddr
IP net.IP
Timeout *uint32
Packets *uint64
Bytes *uint64
Comment string
MAC net.HardwareAddr
IP net.IP
CIDR uint8
Timeout *uint32
Packets *uint64
Bytes *uint64
Protocol *uint8
Port *uint16
IP2 net.IP
CIDR2 uint8
IFace string
Mark *uint32
Replace bool // replace existing entry
}
// IPSetResult is the result of a dump request for a set
type IPSetResult struct {
Nfgenmsg *nl.Nfgenmsg
Protocol uint8
Revision uint8
Family uint8
Flags uint8
SetName string
TypeName string
Nfgenmsg *nl.Nfgenmsg
Protocol uint8
ProtocolMinVersion uint8
Revision uint8
Family uint8
Flags uint8
SetName string
TypeName string
Comment string
MarkMask uint32
IPFrom net.IP
IPTo net.IP
PortFrom uint16
PortTo uint16
HashSize uint32
NumEntries uint32
@@ -38,6 +54,7 @@ type IPSetResult struct {
SizeInMemory uint32
CadtFlags uint32
Timeout *uint32
LineNo uint32
Entries []IPSetEntry
}
@@ -49,10 +66,16 @@ type IpsetCreateOptions struct {
Counters bool
Comments bool
Skbinfo bool
Revision uint8
IPFrom net.IP
IPTo net.IP
PortFrom uint16
PortTo uint16
}
// IpsetProtocol returns the ipset protocol version from the kernel
func IpsetProtocol() (uint8, error) {
func IpsetProtocol() (uint8, uint8, error) {
return pkgHandle.IpsetProtocol()
}
@@ -83,23 +106,23 @@ func IpsetListAll() ([]IPSetResult, error) {
// IpsetAdd adds an entry to an existing ipset.
func IpsetAdd(setname string, entry *IPSetEntry) error {
return pkgHandle.ipsetAddDel(nl.IPSET_CMD_ADD, setname, entry)
return pkgHandle.IpsetAdd(setname, entry)
}
// IpsetDele deletes an entry from an existing ipset.
// IpsetDel deletes an entry from an existing ipset.
func IpsetDel(setname string, entry *IPSetEntry) error {
return pkgHandle.ipsetAddDel(nl.IPSET_CMD_DEL, setname, entry)
return pkgHandle.IpsetDel(setname, entry)
}
func (h *Handle) IpsetProtocol() (uint8, error) {
func (h *Handle) IpsetProtocol() (protocol uint8, minVersion uint8, err error) {
req := h.newIpsetRequest(nl.IPSET_CMD_PROTOCOL)
msgs, err := req.Execute(unix.NETLINK_NETFILTER, 0)
if err != nil {
return 0, err
return 0, 0, err
}
return ipsetUnserialize(msgs).Protocol, nil
response := ipsetUnserialize(msgs)
return response.Protocol, response.ProtocolMinVersion, nil
}
func (h *Handle) IpsetCreate(setname, typename string, options IpsetCreateOptions) error {
@@ -111,11 +134,30 @@ func (h *Handle) IpsetCreate(setname, typename string, options IpsetCreateOption
req.AddData(nl.NewRtAttr(nl.IPSET_ATTR_SETNAME, nl.ZeroTerminated(setname)))
req.AddData(nl.NewRtAttr(nl.IPSET_ATTR_TYPENAME, nl.ZeroTerminated(typename)))
req.AddData(nl.NewRtAttr(nl.IPSET_ATTR_REVISION, nl.Uint8Attr(0)))
req.AddData(nl.NewRtAttr(nl.IPSET_ATTR_FAMILY, nl.Uint8Attr(0)))
revision := options.Revision
if revision == 0 {
revision = getIpsetDefaultWithTypeName(typename)
}
req.AddData(nl.NewRtAttr(nl.IPSET_ATTR_REVISION, nl.Uint8Attr(revision)))
data := nl.NewRtAttr(nl.IPSET_ATTR_DATA|int(nl.NLA_F_NESTED), nil)
var family uint8
switch typename {
case "hash:mac":
case "bitmap:port":
buf := make([]byte, 4)
binary.BigEndian.PutUint16(buf, options.PortFrom)
binary.BigEndian.PutUint16(buf[2:], options.PortTo)
data.AddChild(nl.NewRtAttr(nl.IPSET_ATTR_PORT_FROM|int(nl.NLA_F_NET_BYTEORDER), buf[:2]))
data.AddChild(nl.NewRtAttr(nl.IPSET_ATTR_PORT_TO|int(nl.NLA_F_NET_BYTEORDER), buf[2:]))
default:
family = unix.AF_INET
}
req.AddData(nl.NewRtAttr(nl.IPSET_ATTR_FAMILY, nl.Uint8Attr(family)))
if timeout := options.Timeout; timeout != nil {
data.AddChild(&nl.Uint32Attribute{Type: nl.IPSET_ATTR_TIMEOUT | nl.NLA_F_NET_BYTEORDER, Value: *timeout})
}
@@ -184,9 +226,24 @@ func (h *Handle) IpsetListAll() ([]IPSetResult, error) {
return result, nil
}
// IpsetAdd adds an entry to an existing ipset.
func (h *Handle) IpsetAdd(setname string, entry *IPSetEntry) error {
return h.ipsetAddDel(nl.IPSET_CMD_ADD, setname, entry)
}
// IpsetDel deletes an entry from an existing ipset.
func (h *Handle) IpsetDel(setname string, entry *IPSetEntry) error {
return h.ipsetAddDel(nl.IPSET_CMD_DEL, setname, entry)
}
func (h *Handle) ipsetAddDel(nlCmd int, setname string, entry *IPSetEntry) error {
req := h.newIpsetRequest(nlCmd)
req.AddData(nl.NewRtAttr(nl.IPSET_ATTR_SETNAME, nl.ZeroTerminated(setname)))
if entry.Comment != "" {
req.AddData(nl.NewRtAttr(nl.IPSET_ATTR_COMMENT, nl.ZeroTerminated(entry.Comment)))
}
data := nl.NewRtAttr(nl.IPSET_ATTR_DATA|int(nl.NLA_F_NESTED), nil)
if !entry.Replace {
@@ -196,10 +253,49 @@ func (h *Handle) ipsetAddDel(nlCmd int, setname string, entry *IPSetEntry) error
if entry.Timeout != nil {
data.AddChild(&nl.Uint32Attribute{Type: nl.IPSET_ATTR_TIMEOUT | nl.NLA_F_NET_BYTEORDER, Value: *entry.Timeout})
}
if entry.IP != nil {
nestedData := nl.NewRtAttr(nl.IPSET_ATTR_IP|int(nl.NLA_F_NET_BYTEORDER), entry.IP)
data.AddChild(nl.NewRtAttr(nl.IPSET_ATTR_IP|int(nl.NLA_F_NESTED), nestedData.Serialize()))
}
if entry.MAC != nil {
data.AddChild(nl.NewRtAttr(nl.IPSET_ATTR_ETHER, entry.MAC))
}
if entry.CIDR != 0 {
data.AddChild(nl.NewRtAttr(nl.IPSET_ATTR_CIDR, nl.Uint8Attr(entry.CIDR)))
}
if entry.IP2 != nil {
nestedData := nl.NewRtAttr(nl.IPSET_ATTR_IP|int(nl.NLA_F_NET_BYTEORDER), entry.IP2)
data.AddChild(nl.NewRtAttr(nl.IPSET_ATTR_IP2|int(nl.NLA_F_NESTED), nestedData.Serialize()))
}
if entry.CIDR2 != 0 {
data.AddChild(nl.NewRtAttr(nl.IPSET_ATTR_CIDR2, nl.Uint8Attr(entry.CIDR2)))
}
if entry.Port != nil {
if entry.Protocol == nil {
// use tcp protocol as default
val := uint8(unix.IPPROTO_TCP)
entry.Protocol = &val
}
data.AddChild(nl.NewRtAttr(nl.IPSET_ATTR_PROTO, nl.Uint8Attr(*entry.Protocol)))
buf := make([]byte, 2)
binary.BigEndian.PutUint16(buf, *entry.Port)
data.AddChild(nl.NewRtAttr(int(nl.IPSET_ATTR_PORT|nl.NLA_F_NET_BYTEORDER), buf))
}
if entry.IFace != "" {
data.AddChild(nl.NewRtAttr(nl.IPSET_ATTR_IFACE, nl.ZeroTerminated(entry.IFace)))
}
if entry.Mark != nil {
data.AddChild(&nl.Uint32Attribute{Type: nl.IPSET_ATTR_MARK | nl.NLA_F_NET_BYTEORDER, Value: *entry.Mark})
}
data.AddChild(&nl.Uint32Attribute{Type: nl.IPSET_ATTR_LINENO | nl.NLA_F_NET_BYTEORDER, Value: 0})
req.AddData(data)
@@ -222,6 +318,17 @@ func (h *Handle) newIpsetRequest(cmd int) *nl.NetlinkRequest {
return req
}
func getIpsetDefaultWithTypeName(typename string) uint8 {
switch typename {
case "hash:ip,port",
"hash:ip,port,ip",
"hash:ip,port,net",
"hash:net,port":
return 1
}
return 0
}
func ipsetExecute(req *nl.NetlinkRequest) (msgs [][]byte, err error) {
msgs, err = req.Execute(unix.NETLINK_NETFILTER, 0)
@@ -249,6 +356,8 @@ func (result *IPSetResult) unserialize(msg []byte) {
result.Protocol = attr.Value[0]
case nl.IPSET_ATTR_SETNAME:
result.SetName = nl.BytesToString(attr.Value)
case nl.IPSET_ATTR_COMMENT:
result.Comment = nl.BytesToString(attr.Value)
case nl.IPSET_ATTR_TYPENAME:
result.TypeName = nl.BytesToString(attr.Value)
case nl.IPSET_ATTR_REVISION:
@@ -261,6 +370,10 @@ func (result *IPSetResult) unserialize(msg []byte) {
result.parseAttrData(attr.Value)
case nl.IPSET_ATTR_ADT | nl.NLA_F_NESTED:
result.parseAttrADT(attr.Value)
case nl.IPSET_ATTR_PROTOCOL_MIN:
result.ProtocolMinVersion = attr.Value[0]
case nl.IPSET_ATTR_MARKMASK:
result.MarkMask = attr.Uint32()
default:
log.Printf("unknown ipset attribute from kernel: %+v %v", attr, attr.Type&nl.NLA_TYPE_MASK)
}
@@ -285,6 +398,36 @@ func (result *IPSetResult) parseAttrData(data []byte) {
result.SizeInMemory = attr.Uint32()
case nl.IPSET_ATTR_CADT_FLAGS | nl.NLA_F_NET_BYTEORDER:
result.CadtFlags = attr.Uint32()
case nl.IPSET_ATTR_IP | nl.NLA_F_NESTED:
for nested := range nl.ParseAttributes(attr.Value) {
switch nested.Type {
case nl.IPSET_ATTR_IP | nl.NLA_F_NET_BYTEORDER:
result.Entries = append(result.Entries, IPSetEntry{IP: nested.Value})
case nl.IPSET_ATTR_IP:
result.IPFrom = nested.Value
default:
log.Printf("unknown nested ipset data attribute from kernel: %+v %v", nested, nested.Type&nl.NLA_TYPE_MASK)
}
}
case nl.IPSET_ATTR_IP_TO | nl.NLA_F_NESTED:
for nested := range nl.ParseAttributes(attr.Value) {
switch nested.Type {
case nl.IPSET_ATTR_IP:
result.IPTo = nested.Value
default:
log.Printf("unknown nested ipset data attribute from kernel: %+v %v", nested, nested.Type&nl.NLA_TYPE_MASK)
}
}
case nl.IPSET_ATTR_PORT_FROM | nl.NLA_F_NET_BYTEORDER:
result.PortFrom = networkOrder.Uint16(attr.Value)
case nl.IPSET_ATTR_PORT_TO | nl.NLA_F_NET_BYTEORDER:
result.PortTo = networkOrder.Uint16(attr.Value)
case nl.IPSET_ATTR_CADT_LINENO | nl.NLA_F_NET_BYTEORDER:
result.LineNo = attr.Uint32()
case nl.IPSET_ATTR_COMMENT:
result.Comment = nl.BytesToString(attr.Value)
case nl.IPSET_ATTR_MARKMASK:
result.MarkMask = attr.Uint32()
default:
log.Printf("unknown ipset data attribute from kernel: %+v %v", attr, attr.Type&nl.NLA_TYPE_MASK)
}
@@ -316,6 +459,8 @@ func parseIPSetEntry(data []byte) (entry IPSetEntry) {
entry.Packets = &val
case nl.IPSET_ATTR_ETHER:
entry.MAC = net.HardwareAddr(attr.Value)
case nl.IPSET_ATTR_IP:
entry.IP = net.IP(attr.Value)
case nl.IPSET_ATTR_COMMENT:
entry.Comment = nl.BytesToString(attr.Value)
case nl.IPSET_ATTR_IP | nl.NLA_F_NESTED:
@@ -327,6 +472,30 @@ func parseIPSetEntry(data []byte) (entry IPSetEntry) {
log.Printf("unknown nested ADT attribute from kernel: %+v", attr)
}
}
case nl.IPSET_ATTR_IP2 | nl.NLA_F_NESTED:
for attr := range nl.ParseAttributes(attr.Value) {
switch attr.Type {
case nl.IPSET_ATTR_IP:
entry.IP2 = net.IP(attr.Value)
default:
log.Printf("unknown nested ADT attribute from kernel: %+v", attr)
}
}
case nl.IPSET_ATTR_CIDR:
entry.CIDR = attr.Value[0]
case nl.IPSET_ATTR_CIDR2:
entry.CIDR2 = attr.Value[0]
case nl.IPSET_ATTR_PORT | nl.NLA_F_NET_BYTEORDER:
val := networkOrder.Uint16(attr.Value)
entry.Port = &val
case nl.IPSET_ATTR_PROTO:
val := attr.Value[0]
entry.Protocol = &val
case nl.IPSET_ATTR_IFACE:
entry.IFace = nl.BytesToString(attr.Value)
case nl.IPSET_ATTR_MARK | nl.NLA_F_NET_BYTEORDER:
val := attr.Uint32()
entry.Mark = &val
default:
log.Printf("unknown ADT attribute from kernel: %+v", attr)
}

View File

@@ -35,10 +35,13 @@ type LinkAttrs struct {
Alias string
Statistics *LinkStatistics
Promisc int
Allmulti int
Multi int
Xdp *LinkXdp
EncapType string
Protinfo *Protinfo
OperState LinkOperState
PhysSwitchID int
NetNsID int
NumTxQueues int
NumRxQueues int
@@ -456,6 +459,19 @@ func (ipvlan *IPVlan) Type() string {
return "ipvlan"
}
// IPVtap - IPVtap is a virtual interfaces based on ipvlan
type IPVtap struct {
IPVlan
}
func (ipvtap *IPVtap) Attrs() *LinkAttrs {
return &ipvtap.LinkAttrs
}
func (ipvtap IPVtap) Type() string {
return "ipvtap"
}
// VlanProtocol type
type VlanProtocol int
@@ -555,6 +571,27 @@ const (
BOND_ARP_VALIDATE_ALL
)
var bondArpValidateToString = map[BondArpValidate]string{
BOND_ARP_VALIDATE_NONE: "none",
BOND_ARP_VALIDATE_ACTIVE: "active",
BOND_ARP_VALIDATE_BACKUP: "backup",
BOND_ARP_VALIDATE_ALL: "none",
}
var StringToBondArpValidateMap = map[string]BondArpValidate{
"none": BOND_ARP_VALIDATE_NONE,
"active": BOND_ARP_VALIDATE_ACTIVE,
"backup": BOND_ARP_VALIDATE_BACKUP,
"all": BOND_ARP_VALIDATE_ALL,
}
func (b BondArpValidate) String() string {
s, ok := bondArpValidateToString[b]
if !ok {
return fmt.Sprintf("BondArpValidate(%d)", b)
}
return s
}
// BondPrimaryReselect type
type BondPrimaryReselect int
@@ -565,6 +602,25 @@ const (
BOND_PRIMARY_RESELECT_FAILURE
)
var bondPrimaryReselectToString = map[BondPrimaryReselect]string{
BOND_PRIMARY_RESELECT_ALWAYS: "always",
BOND_PRIMARY_RESELECT_BETTER: "better",
BOND_PRIMARY_RESELECT_FAILURE: "failure",
}
var StringToBondPrimaryReselectMap = map[string]BondPrimaryReselect{
"always": BOND_PRIMARY_RESELECT_ALWAYS,
"better": BOND_PRIMARY_RESELECT_BETTER,
"failure": BOND_PRIMARY_RESELECT_FAILURE,
}
func (b BondPrimaryReselect) String() string {
s, ok := bondPrimaryReselectToString[b]
if !ok {
return fmt.Sprintf("BondPrimaryReselect(%d)", b)
}
return s
}
// BondArpAllTargets type
type BondArpAllTargets int
@@ -574,6 +630,23 @@ const (
BOND_ARP_ALL_TARGETS_ALL
)
var bondArpAllTargetsToString = map[BondArpAllTargets]string{
BOND_ARP_ALL_TARGETS_ANY: "any",
BOND_ARP_ALL_TARGETS_ALL: "all",
}
var StringToBondArpAllTargetsMap = map[string]BondArpAllTargets{
"any": BOND_ARP_ALL_TARGETS_ANY,
"all": BOND_ARP_ALL_TARGETS_ALL,
}
func (b BondArpAllTargets) String() string {
s, ok := bondArpAllTargetsToString[b]
if !ok {
return fmt.Sprintf("BondArpAllTargets(%d)", b)
}
return s
}
// BondFailOverMac type
type BondFailOverMac int
@@ -584,6 +657,25 @@ const (
BOND_FAIL_OVER_MAC_FOLLOW
)
var bondFailOverMacToString = map[BondFailOverMac]string{
BOND_FAIL_OVER_MAC_NONE: "none",
BOND_FAIL_OVER_MAC_ACTIVE: "active",
BOND_FAIL_OVER_MAC_FOLLOW: "follow",
}
var StringToBondFailOverMacMap = map[string]BondFailOverMac{
"none": BOND_FAIL_OVER_MAC_NONE,
"active": BOND_FAIL_OVER_MAC_ACTIVE,
"follow": BOND_FAIL_OVER_MAC_FOLLOW,
}
func (b BondFailOverMac) String() string {
s, ok := bondFailOverMacToString[b]
if !ok {
return fmt.Sprintf("BondFailOverMac(%d)", b)
}
return s
}
// BondXmitHashPolicy type
type BondXmitHashPolicy int
@@ -675,6 +767,25 @@ const (
BOND_AD_SELECT_COUNT
)
var bondAdSelectToString = map[BondAdSelect]string{
BOND_AD_SELECT_STABLE: "stable",
BOND_AD_SELECT_BANDWIDTH: "bandwidth",
BOND_AD_SELECT_COUNT: "count",
}
var StringToBondAdSelectMap = map[string]BondAdSelect{
"stable": BOND_AD_SELECT_STABLE,
"bandwidth": BOND_AD_SELECT_BANDWIDTH,
"count": BOND_AD_SELECT_COUNT,
}
func (b BondAdSelect) String() string {
s, ok := bondAdSelectToString[b]
if !ok {
return fmt.Sprintf("BondAdSelect(%d)", b)
}
return s
}
// BondAdInfo represents ad info for bond
type BondAdInfo struct {
AggregatorId int
@@ -706,7 +817,7 @@ type Bond struct {
AllSlavesActive int
MinLinks int
LpInterval int
PackersPerSlave int
PacketsPerSlave int
LacpRate BondLacpRate
AdSelect BondAdSelect
// looking at iproute tool AdInfo can only be retrived. It can't be set.
@@ -739,7 +850,7 @@ func NewLinkBond(atr LinkAttrs) *Bond {
AllSlavesActive: -1,
MinLinks: -1,
LpInterval: -1,
PackersPerSlave: -1,
PacketsPerSlave: -1,
LacpRate: -1,
AdSelect: -1,
AdActorSysPrio: -1,
@@ -789,8 +900,10 @@ func (bond *Bond) Type() string {
type BondSlaveState uint8
const (
BondStateActive = iota // Link is active.
BondStateBackup // Link is backup.
//BondStateActive Link is active.
BondStateActive BondSlaveState = iota
//BondStateBackup Link is backup.
BondStateBackup
)
func (s BondSlaveState) String() string {
@@ -804,15 +917,19 @@ func (s BondSlaveState) String() string {
}
}
// BondSlaveState represents the values of the IFLA_BOND_SLAVE_MII_STATUS bond slave
// BondSlaveMiiStatus represents the values of the IFLA_BOND_SLAVE_MII_STATUS bond slave
// attribute, which contains the status of MII link monitoring
type BondSlaveMiiStatus uint8
const (
BondLinkUp = iota // link is up and running.
BondLinkFail // link has just gone down.
BondLinkDown // link has been down for too long time.
BondLinkBack // link is going back.
//BondLinkUp link is up and running.
BondLinkUp BondSlaveMiiStatus = iota
//BondLinkFail link has just gone down.
BondLinkFail
//BondLinkDown link has been down for too long time.
BondLinkDown
//BondLinkBack link is going back.
BondLinkBack
)
func (s BondSlaveMiiStatus) String() string {
@@ -845,6 +962,38 @@ func (b *BondSlave) SlaveType() string {
return "bond"
}
type VrfSlave struct {
Table uint32
}
func (v *VrfSlave) SlaveType() string {
return "vrf"
}
// Geneve devices must specify RemoteIP and ID (VNI) on create
// https://github.com/torvalds/linux/blob/47ec5303d73ea344e84f46660fff693c57641386/drivers/net/geneve.c#L1209-L1223
type Geneve struct {
LinkAttrs
ID uint32 // vni
Remote net.IP
Ttl uint8
Tos uint8
Dport uint16
UdpCsum uint8
UdpZeroCsum6Tx uint8
UdpZeroCsum6Rx uint8
Link uint32
FlowBased bool
}
func (geneve *Geneve) Attrs() *LinkAttrs {
return &geneve.LinkAttrs
}
func (geneve *Geneve) Type() string {
return "geneve"
}
// Gretap devices must specify LocalIP and RemoteIP on create
type Gretap struct {
LinkAttrs
@@ -1068,6 +1217,58 @@ var StringToIPoIBMode = map[string]IPoIBMode{
"connected": IPOIB_MODE_CONNECTED,
}
const (
CAN_STATE_ERROR_ACTIVE = iota
CAN_STATE_ERROR_WARNING
CAN_STATE_ERROR_PASSIVE
CAN_STATE_BUS_OFF
CAN_STATE_STOPPED
CAN_STATE_SLEEPING
)
type Can struct {
LinkAttrs
BitRate uint32
SamplePoint uint32
TimeQuanta uint32
PropagationSegment uint32
PhaseSegment1 uint32
PhaseSegment2 uint32
SyncJumpWidth uint32
BitRatePreScaler uint32
Name string
TimeSegment1Min uint32
TimeSegment1Max uint32
TimeSegment2Min uint32
TimeSegment2Max uint32
SyncJumpWidthMax uint32
BitRatePreScalerMin uint32
BitRatePreScalerMax uint32
BitRatePreScalerInc uint32
ClockFrequency uint32
State uint32
Mask uint32
Flags uint32
TxError uint16
RxError uint16
RestartMs uint32
}
func (can *Can) Attrs() *LinkAttrs {
return &can.LinkAttrs
}
func (can *Can) Type() string {
return "can"
}
type IPoIB struct {
LinkAttrs
Pkey uint16
@@ -1083,11 +1284,27 @@ func (ipoib *IPoIB) Type() string {
return "ipoib"
}
type BareUDP struct {
LinkAttrs
Port uint16
EtherType uint16
SrcPortMin uint16
MultiProto bool
}
func (bareudp *BareUDP) Attrs() *LinkAttrs {
return &bareudp.LinkAttrs
}
func (bareudp *BareUDP) Type() string {
return "bareudp"
}
// iproute2 supported devices;
// vlan | veth | vcan | dummy | ifb | macvlan | macvtap |
// bridge | bond | ipoib | ip6tnl | ipip | sit | vxlan |
// gre | gretap | ip6gre | ip6gretap | vti | vti6 | nlmon |
// bond_slave | ipvlan | xfrm
// bond_slave | ipvlan | xfrm | bareudp
// LinkNotFoundError wraps the various not found errors when
// getting/reading links. This is intended for better error

View File

@@ -34,14 +34,27 @@ const (
TUNTAP_MULTI_QUEUE_DEFAULTS TuntapFlag = TUNTAP_MULTI_QUEUE | TUNTAP_NO_PI
)
var StringToTuntapModeMap = map[string]TuntapMode{
"tun": TUNTAP_MODE_TUN,
"tap": TUNTAP_MODE_TAP,
}
func (ttm TuntapMode) String() string {
switch ttm {
case TUNTAP_MODE_TUN:
return "tun"
case TUNTAP_MODE_TAP:
return "tap"
}
return "unknown"
}
const (
VF_LINK_STATE_AUTO uint32 = 0
VF_LINK_STATE_ENABLE uint32 = 1
VF_LINK_STATE_DISABLE uint32 = 2
)
var lookupByDump = false
var macvlanModes = [...]uint32{
0,
nl.MACVLAN_MODE_PRIVATE,
@@ -138,7 +151,6 @@ func (h *Handle) LinkSetAllmulticastOn(link Link) error {
msg := nl.NewIfInfomsg(unix.AF_UNSPEC)
msg.Change = unix.IFF_ALLMULTI
msg.Flags = unix.IFF_ALLMULTI
msg.Index = int32(base.Index)
req.AddData(msg)
@@ -168,6 +180,51 @@ func (h *Handle) LinkSetAllmulticastOff(link Link) error {
return err
}
// LinkSetMulticastOn enables the reception of multicast packets for the link device.
// Equivalent to: `ip link set $link multicast on`
func LinkSetMulticastOn(link Link) error {
return pkgHandle.LinkSetMulticastOn(link)
}
// LinkSetMulticastOn enables the reception of multicast packets for the link device.
// Equivalent to: `ip link set $link multicast on`
func (h *Handle) LinkSetMulticastOn(link Link) error {
base := link.Attrs()
h.ensureIndex(base)
req := h.newNetlinkRequest(unix.RTM_NEWLINK, unix.NLM_F_ACK)
msg := nl.NewIfInfomsg(unix.AF_UNSPEC)
msg.Change = unix.IFF_MULTICAST
msg.Flags = unix.IFF_MULTICAST
msg.Index = int32(base.Index)
req.AddData(msg)
_, err := req.Execute(unix.NETLINK_ROUTE, 0)
return err
}
// LinkSetAllmulticastOff disables the reception of multicast packets for the link device.
// Equivalent to: `ip link set $link multicast off`
func LinkSetMulticastOff(link Link) error {
return pkgHandle.LinkSetMulticastOff(link)
}
// LinkSetAllmulticastOff disables the reception of multicast packets for the link device.
// Equivalent to: `ip link set $link multicast off`
func (h *Handle) LinkSetMulticastOff(link Link) error {
base := link.Attrs()
h.ensureIndex(base)
req := h.newNetlinkRequest(unix.RTM_NEWLINK, unix.NLM_F_ACK)
msg := nl.NewIfInfomsg(unix.AF_UNSPEC)
msg.Change = unix.IFF_MULTICAST
msg.Index = int32(base.Index)
req.AddData(msg)
_, err := req.Execute(unix.NETLINK_ROUTE, 0)
return err
}
func MacvlanMACAddrAdd(link Link, addr net.HardwareAddr) error {
return pkgHandle.MacvlanMACAddrAdd(link, addr)
}
@@ -532,13 +589,13 @@ func (h *Handle) LinkSetVfVlanQos(link Link, vf, vlan, qos int) error {
req.AddData(msg)
data := nl.NewRtAttr(unix.IFLA_VFINFO_LIST, nil)
info := nl.NewRtAttrChild(data, nl.IFLA_VF_INFO, nil)
info := data.AddRtAttr(nl.IFLA_VF_INFO, nil)
vfmsg := nl.VfVlan{
Vf: uint32(vf),
Vlan: uint32(vlan),
Qos: uint32(qos),
}
nl.NewRtAttrChild(info, nl.IFLA_VF_VLAN, vfmsg.Serialize())
info.AddRtAttr(nl.IFLA_VF_VLAN, vfmsg.Serialize())
req.AddData(data)
_, err := req.Execute(unix.NETLINK_ROUTE, 0)
@@ -1046,8 +1103,8 @@ func addBondAttrs(bond *Bond, linkInfo *nl.RtAttr) {
if bond.LpInterval >= 0 {
data.AddRtAttr(nl.IFLA_BOND_LP_INTERVAL, nl.Uint32Attr(uint32(bond.LpInterval)))
}
if bond.PackersPerSlave >= 0 {
data.AddRtAttr(nl.IFLA_BOND_PACKETS_PER_SLAVE, nl.Uint32Attr(uint32(bond.PackersPerSlave)))
if bond.PacketsPerSlave >= 0 {
data.AddRtAttr(nl.IFLA_BOND_PACKETS_PER_SLAVE, nl.Uint32Attr(uint32(bond.PacketsPerSlave)))
}
if bond.LacpRate >= 0 {
data.AddRtAttr(nl.IFLA_BOND_AD_LACP_RATE, nl.Uint8Attr(uint8(bond.LacpRate)))
@@ -1203,9 +1260,26 @@ func (h *Handle) linkModify(link Link, flags int) error {
}
control := func(file *os.File, f func(fd uintptr)) error {
name := file.Name()
conn, err := file.SyscallConn()
if err != nil {
return fmt.Errorf("SyscallConn() failed on %s: %v", name, err)
}
if err := conn.Control(f); err != nil {
return fmt.Errorf("Failed to get file descriptor for %s: %v", name, err)
}
return nil
}
// only persist interface if NonPersist is NOT set
if !tuntap.NonPersist {
_, _, errno := unix.Syscall(unix.SYS_IOCTL, fds[0].Fd(), uintptr(unix.TUNSETPERSIST), 1)
var errno syscall.Errno
if err := control(fds[0], func(fd uintptr) {
_, _, errno = unix.Syscall(unix.SYS_IOCTL, fd, uintptr(unix.TUNSETPERSIST), 1)
}); err != nil {
return err
}
if errno != 0 {
cleanupFds(fds)
return fmt.Errorf("Tuntap IOCTL TUNSETPERSIST failed, errno %v", errno)
@@ -1222,7 +1296,10 @@ func (h *Handle) linkModify(link Link, flags int) error {
// un-persist (e.g. allow the interface to be removed) the tuntap
// should not hurt if not set prior, condition might be not needed
if !tuntap.NonPersist {
_, _, _ = unix.Syscall(unix.SYS_IOCTL, fds[0].Fd(), uintptr(unix.TUNSETPERSIST), 0)
// ignore error
_ = control(fds[0], func(fd uintptr) {
_, _, _ = unix.Syscall(unix.SYS_IOCTL, fd, uintptr(unix.TUNSETPERSIST), 0)
})
}
cleanupFds(fds)
return err
@@ -1394,6 +1471,10 @@ func (h *Handle) linkModify(link Link, flags int) error {
data := linkInfo.AddRtAttr(nl.IFLA_INFO_DATA, nil)
data.AddRtAttr(nl.IFLA_IPVLAN_MODE, nl.Uint16Attr(uint16(link.Mode)))
data.AddRtAttr(nl.IFLA_IPVLAN_FLAG, nl.Uint16Attr(uint16(link.Flag)))
case *IPVtap:
data := linkInfo.AddRtAttr(nl.IFLA_INFO_DATA, nil)
data.AddRtAttr(nl.IFLA_IPVLAN_MODE, nl.Uint16Attr(uint16(link.Mode)))
data.AddRtAttr(nl.IFLA_IPVLAN_FLAG, nl.Uint16Attr(uint16(link.Flag)))
case *Macvlan:
if link.Mode != MACVLAN_MODE_DEFAULT {
data := linkInfo.AddRtAttr(nl.IFLA_INFO_DATA, nil)
@@ -1404,6 +1485,8 @@ func (h *Handle) linkModify(link Link, flags int) error {
data := linkInfo.AddRtAttr(nl.IFLA_INFO_DATA, nil)
data.AddRtAttr(nl.IFLA_MACVLAN_MODE, nl.Uint32Attr(macvlanModes[link.Mode]))
}
case *Geneve:
addGeneveAttrs(link, linkInfo)
case *Gretap:
addGretapAttrs(link, linkInfo)
case *Iptun:
@@ -1426,6 +1509,8 @@ func (h *Handle) linkModify(link Link, flags int) error {
addXfrmiAttrs(link, linkInfo)
case *IPoIB:
addIPoIBAttrs(link, linkInfo)
case *BareUDP:
addBareUDPAttrs(link, linkInfo)
}
req.AddData(linkInfo)
@@ -1607,7 +1692,7 @@ func execGetLink(req *nl.NetlinkRequest) (Link, error) {
}
}
// linkDeserialize deserializes a raw message received from netlink into
// LinkDeserialize deserializes a raw message received from netlink into
// a link object.
func LinkDeserialize(hdr *unix.NlMsghdr, m []byte) (Link, error) {
msg := nl.DeserializeIfInfomsg(m)
@@ -1625,6 +1710,13 @@ func LinkDeserialize(hdr *unix.NlMsghdr, m []byte) (Link, error) {
if msg.Flags&unix.IFF_PROMISC != 0 {
base.Promisc = 1
}
if msg.Flags&unix.IFF_ALLMULTI != 0 {
base.Allmulti = 1
}
if msg.Flags&unix.IFF_MULTICAST != 0 {
base.Multi = 1
}
var (
link Link
stats32 *LinkStatistics32
@@ -1663,10 +1755,14 @@ func LinkDeserialize(hdr *unix.NlMsghdr, m []byte) (Link, error) {
link = &Bond{}
case "ipvlan":
link = &IPVlan{}
case "ipvtap":
link = &IPVtap{}
case "macvlan":
link = &Macvlan{}
case "macvtap":
link = &Macvtap{}
case "geneve":
link = &Geneve{}
case "gretap":
link = &Gretap{}
case "ip6gretap":
@@ -1693,6 +1789,10 @@ func LinkDeserialize(hdr *unix.NlMsghdr, m []byte) (Link, error) {
link = &Tuntap{}
case "ipoib":
link = &IPoIB{}
case "can":
link = &Can{}
case "bareudp":
link = &BareUDP{}
default:
link = &GenericLink{LinkType: linkType}
}
@@ -1710,10 +1810,14 @@ func LinkDeserialize(hdr *unix.NlMsghdr, m []byte) (Link, error) {
parseBondData(link, data)
case "ipvlan":
parseIPVlanData(link, data)
case "ipvtap":
parseIPVtapData(link, data)
case "macvlan":
parseMacvlanData(link, data)
case "macvtap":
parseMacvtapData(link, data)
case "geneve":
parseGeneveData(link, data)
case "gretap":
parseGretapData(link, data)
case "ip6gretap":
@@ -1742,13 +1846,21 @@ func LinkDeserialize(hdr *unix.NlMsghdr, m []byte) (Link, error) {
parseTuntapData(link, data)
case "ipoib":
parseIPoIBData(link, data)
case "can":
parseCanData(link, data)
case "bareudp":
parseBareUDPData(link, data)
}
case nl.IFLA_INFO_SLAVE_KIND:
slaveType = string(info.Value[:len(info.Value)-1])
switch slaveType {
case "bond":
linkSlave = &BondSlave{}
case "vrf":
linkSlave = &VrfSlave{}
}
case nl.IFLA_INFO_SLAVE_DATA:
switch slaveType {
case "bond":
@@ -1757,6 +1869,12 @@ func LinkDeserialize(hdr *unix.NlMsghdr, m []byte) (Link, error) {
return nil, err
}
parseBondSlaveData(linkSlave, data)
case "vrf":
data, err := nl.ParseRouteAttr(info.Value)
if err != nil {
return nil, err
}
parseVrfSlaveData(linkSlave, data)
}
}
}
@@ -1810,6 +1928,8 @@ func LinkDeserialize(hdr *unix.NlMsghdr, m []byte) (Link, error) {
}
case unix.IFLA_OPERSTATE:
base.OperState = LinkOperState(uint8(attr.Value[0]))
case unix.IFLA_PHYS_SWITCH_ID:
base.PhysSwitchID = int(native.Uint32(attr.Value[0:4]))
case unix.IFLA_LINK_NETNSID:
base.NetNsID = int(native.Uint32(attr.Value[0:4]))
case unix.IFLA_GSO_MAX_SIZE:
@@ -1998,7 +2118,8 @@ func linkSubscribeAt(newNs, curNs netns.NsHandle, ch chan<- LinkUpdate, done <-c
msgs, from, err := s.Receive()
if err != nil {
if cberr != nil {
cberr(err)
cberr(fmt.Errorf("Receive failed: %v",
err))
}
return
}
@@ -2013,15 +2134,15 @@ func linkSubscribeAt(newNs, curNs netns.NsHandle, ch chan<- LinkUpdate, done <-c
continue
}
if m.Header.Type == unix.NLMSG_ERROR {
native := nl.NativeEndian()
error := int32(native.Uint32(m.Data[0:4]))
if error == 0 {
continue
}
if cberr != nil {
cberr(syscall.Errno(-error))
cberr(fmt.Errorf("error message: %v",
syscall.Errno(-error)))
}
return
continue
}
ifmsg := nl.DeserializeIfInfomsg(m.Data)
header := unix.NlMsghdr(m.Header)
@@ -2030,7 +2151,7 @@ func linkSubscribeAt(newNs, curNs netns.NsHandle, ch chan<- LinkUpdate, done <-c
if cberr != nil {
cberr(err)
}
return
continue
}
ch <- LinkUpdate{IfInfomsg: *ifmsg, Header: header, Link: link}
}
@@ -2299,7 +2420,7 @@ func parseBondData(link Link, data []syscall.NetlinkRouteAttr) {
case nl.IFLA_BOND_LP_INTERVAL:
bond.LpInterval = int(native.Uint32(data[i].Value[0:4]))
case nl.IFLA_BOND_PACKETS_PER_SLAVE:
bond.PackersPerSlave = int(native.Uint32(data[i].Value[0:4]))
bond.PacketsPerSlave = int(native.Uint32(data[i].Value[0:4]))
case nl.IFLA_BOND_AD_LACP_RATE:
bond.LacpRate = BondLacpRate(data[i].Value[0])
case nl.IFLA_BOND_AD_SELECT:
@@ -2379,6 +2500,16 @@ func parseBondSlaveData(slave LinkSlave, data []syscall.NetlinkRouteAttr) {
}
}
func parseVrfSlaveData(slave LinkSlave, data []syscall.NetlinkRouteAttr) {
vrfSlave := slave.(*VrfSlave)
for i := range data {
switch data[i].Attr.Type {
case nl.IFLA_BOND_SLAVE_STATE:
vrfSlave.Table = native.Uint32(data[i].Value[0:4])
}
}
}
func parseIPVlanData(link Link, data []syscall.NetlinkRouteAttr) {
ipv := link.(*IPVlan)
for _, datum := range data {
@@ -2391,6 +2522,18 @@ func parseIPVlanData(link Link, data []syscall.NetlinkRouteAttr) {
}
}
func parseIPVtapData(link Link, data []syscall.NetlinkRouteAttr) {
ipv := link.(*IPVtap)
for _, datum := range data {
switch datum.Attr.Type {
case nl.IFLA_IPVLAN_MODE:
ipv.Mode = IPVlanMode(native.Uint32(datum.Value[0:4]))
case nl.IFLA_IPVLAN_FLAG:
ipv.Flag = IPVlanFlag(native.Uint32(datum.Value[0:4]))
}
}
}
func parseMacvtapData(link Link, data []syscall.NetlinkRouteAttr) {
macv := link.(*Macvtap)
parseMacvlanData(&macv.Macvlan, data)
@@ -2448,6 +2591,58 @@ func linkFlags(rawFlags uint32) net.Flags {
return f
}
func addGeneveAttrs(geneve *Geneve, linkInfo *nl.RtAttr) {
data := linkInfo.AddRtAttr(nl.IFLA_INFO_DATA, nil)
if geneve.FlowBased {
// In flow based mode, no other attributes need to be configured
linkInfo.AddRtAttr(nl.IFLA_GENEVE_COLLECT_METADATA, boolAttr(geneve.FlowBased))
return
}
if ip := geneve.Remote; ip != nil {
if ip4 := ip.To4(); ip4 != nil {
data.AddRtAttr(nl.IFLA_GENEVE_REMOTE, ip.To4())
} else {
data.AddRtAttr(nl.IFLA_GENEVE_REMOTE6, []byte(ip))
}
}
if geneve.ID != 0 {
data.AddRtAttr(nl.IFLA_GENEVE_ID, nl.Uint32Attr(geneve.ID))
}
if geneve.Dport != 0 {
data.AddRtAttr(nl.IFLA_GENEVE_PORT, htons(geneve.Dport))
}
if geneve.Ttl != 0 {
data.AddRtAttr(nl.IFLA_GENEVE_TTL, nl.Uint8Attr(geneve.Ttl))
}
if geneve.Tos != 0 {
data.AddRtAttr(nl.IFLA_GENEVE_TOS, nl.Uint8Attr(geneve.Tos))
}
}
func parseGeneveData(link Link, data []syscall.NetlinkRouteAttr) {
geneve := link.(*Geneve)
for _, datum := range data {
switch datum.Attr.Type {
case nl.IFLA_GENEVE_ID:
geneve.ID = native.Uint32(datum.Value[0:4])
case nl.IFLA_GENEVE_REMOTE, nl.IFLA_GENEVE_REMOTE6:
geneve.Remote = datum.Value
case nl.IFLA_GENEVE_PORT:
geneve.Dport = ntohs(datum.Value[0:2])
case nl.IFLA_GENEVE_TTL:
geneve.Ttl = uint8(datum.Value[0])
case nl.IFLA_GENEVE_TOS:
geneve.Tos = uint8(datum.Value[0])
}
}
}
func addGretapAttrs(gretap *Gretap, linkInfo *nl.RtAttr) {
data := linkInfo.AddRtAttr(nl.IFLA_INFO_DATA, nil)
@@ -2679,6 +2874,10 @@ func addIptunAttrs(iptun *Iptun, linkInfo *nl.RtAttr) {
func parseIptunData(link Link, data []syscall.NetlinkRouteAttr) {
iptun := link.(*Iptun)
for _, datum := range data {
// NOTE: same with vxlan, ip tunnel may also has null datum.Value
if len(datum.Value) == 0 {
continue
}
switch datum.Attr.Type {
case nl.IFLA_IPTUN_LOCAL:
iptun.Local = net.IP(datum.Value[0:4])
@@ -3172,9 +3371,86 @@ func parseIPoIBData(link Link, data []syscall.NetlinkRouteAttr) {
}
}
func parseCanData(link Link, data []syscall.NetlinkRouteAttr) {
can := link.(*Can)
for _, datum := range data {
switch datum.Attr.Type {
case nl.IFLA_CAN_BITTIMING:
can.BitRate = native.Uint32(datum.Value)
can.SamplePoint = native.Uint32(datum.Value[4:])
can.TimeQuanta = native.Uint32(datum.Value[8:])
can.PropagationSegment = native.Uint32(datum.Value[12:])
can.PhaseSegment1 = native.Uint32(datum.Value[16:])
can.PhaseSegment2 = native.Uint32(datum.Value[20:])
can.SyncJumpWidth = native.Uint32(datum.Value[24:])
can.BitRatePreScaler = native.Uint32(datum.Value[28:])
case nl.IFLA_CAN_BITTIMING_CONST:
can.Name = string(datum.Value[:16])
can.TimeSegment1Min = native.Uint32(datum.Value[16:])
can.TimeSegment1Max = native.Uint32(datum.Value[20:])
can.TimeSegment2Min = native.Uint32(datum.Value[24:])
can.TimeSegment2Max = native.Uint32(datum.Value[28:])
can.SyncJumpWidthMax = native.Uint32(datum.Value[32:])
can.BitRatePreScalerMin = native.Uint32(datum.Value[36:])
can.BitRatePreScalerMax = native.Uint32(datum.Value[40:])
can.BitRatePreScalerInc = native.Uint32(datum.Value[44:])
case nl.IFLA_CAN_CLOCK:
can.ClockFrequency = native.Uint32(datum.Value)
case nl.IFLA_CAN_STATE:
can.State = native.Uint32(datum.Value)
case nl.IFLA_CAN_CTRLMODE:
can.Mask = native.Uint32(datum.Value)
can.Flags = native.Uint32(datum.Value[4:])
case nl.IFLA_CAN_BERR_COUNTER:
can.TxError = native.Uint16(datum.Value)
can.RxError = native.Uint16(datum.Value[2:])
case nl.IFLA_CAN_RESTART_MS:
can.RestartMs = native.Uint32(datum.Value)
case nl.IFLA_CAN_DATA_BITTIMING_CONST:
case nl.IFLA_CAN_RESTART:
case nl.IFLA_CAN_DATA_BITTIMING:
case nl.IFLA_CAN_TERMINATION:
case nl.IFLA_CAN_TERMINATION_CONST:
case nl.IFLA_CAN_BITRATE_CONST:
case nl.IFLA_CAN_DATA_BITRATE_CONST:
case nl.IFLA_CAN_BITRATE_MAX:
}
}
}
func addIPoIBAttrs(ipoib *IPoIB, linkInfo *nl.RtAttr) {
data := linkInfo.AddRtAttr(nl.IFLA_INFO_DATA, nil)
data.AddRtAttr(nl.IFLA_IPOIB_PKEY, nl.Uint16Attr(uint16(ipoib.Pkey)))
data.AddRtAttr(nl.IFLA_IPOIB_MODE, nl.Uint16Attr(uint16(ipoib.Mode)))
data.AddRtAttr(nl.IFLA_IPOIB_UMCAST, nl.Uint16Attr(uint16(ipoib.Umcast)))
}
func addBareUDPAttrs(bareudp *BareUDP, linkInfo *nl.RtAttr) {
data := linkInfo.AddRtAttr(nl.IFLA_INFO_DATA, nil)
data.AddRtAttr(nl.IFLA_BAREUDP_PORT, nl.Uint16Attr(nl.Swap16(bareudp.Port)))
data.AddRtAttr(nl.IFLA_BAREUDP_ETHERTYPE, nl.Uint16Attr(nl.Swap16(bareudp.EtherType)))
if bareudp.SrcPortMin != 0 {
data.AddRtAttr(nl.IFLA_BAREUDP_SRCPORT_MIN, nl.Uint16Attr(bareudp.SrcPortMin))
}
if bareudp.MultiProto {
data.AddRtAttr(nl.IFLA_BAREUDP_MULTIPROTO_MODE, []byte{})
}
}
func parseBareUDPData(link Link, data []syscall.NetlinkRouteAttr) {
bareudp := link.(*BareUDP)
for _, attr := range data {
switch attr.Attr.Type {
case nl.IFLA_BAREUDP_PORT:
bareudp.Port = binary.BigEndian.Uint16(attr.Value)
case nl.IFLA_BAREUDP_ETHERTYPE:
bareudp.EtherType = binary.BigEndian.Uint16(attr.Value)
case nl.IFLA_BAREUDP_SRCPORT_MIN:
bareudp.SrcPortMin = native.Uint16(attr.Value)
case nl.IFLA_BAREUDP_MULTIPROTO_MODE:
bareudp.MultiProto = true
}
}
}

View File

@@ -42,11 +42,12 @@ const (
// Neighbor Flags
const (
NTF_USE = 0x01
NTF_SELF = 0x02
NTF_MASTER = 0x04
NTF_PROXY = 0x08
NTF_ROUTER = 0x80
NTF_USE = 0x01
NTF_SELF = 0x02
NTF_MASTER = 0x04
NTF_PROXY = 0x08
NTF_EXT_LEARNED = 0x10
NTF_ROUTER = 0x80
)
// Ndmsg is for adding, removing or receiving information about a neighbor table entry
@@ -408,7 +409,6 @@ func neighSubscribeAt(newNs, curNs netns.NsHandle, ch chan<- NeighUpdate, done <
continue
}
if m.Header.Type == unix.NLMSG_ERROR {
native := nl.NativeEndian()
error := int32(native.Uint32(m.Data[0:4]))
if error == 0 {
continue

View File

@@ -180,14 +180,30 @@ func RouteAdd(route *Route) error {
return ErrNotImplemented
}
func RouteAppend(route *Route) error {
return ErrNotImplemented
}
func RouteDel(route *Route) error {
return ErrNotImplemented
}
func RouteGet(destination net.IP) ([]Route, error) {
return nil, ErrNotImplemented
}
func RouteList(link Link, family int) ([]Route, error) {
return nil, ErrNotImplemented
}
func RouteListFiltered(family int, filter *Route, filterMask uint64) ([]Route, error) {
return nil, ErrNotImplemented
}
func RouteReplace(route *Route) error {
return ErrNotImplemented
}
func XfrmPolicyAdd(policy *XfrmPolicy) error {
return ErrNotImplemented
}

View File

@@ -87,7 +87,7 @@ func (h *Handle) getNetNsId(attrType int, val uint32) (int, error) {
rtgen := nl.NewRtGenMsg()
req.AddData(rtgen)
b := make([]byte, 4, 4)
b := make([]byte, 4)
native.PutUint32(b, val)
attr := nl.NewRtAttr(attrType, b)
req.AddData(attr)
@@ -126,12 +126,12 @@ func (h *Handle) setNetNsId(attrType int, val uint32, newnsid uint32) error {
rtgen := nl.NewRtGenMsg()
req.AddData(rtgen)
b := make([]byte, 4, 4)
b := make([]byte, 4)
native.PutUint32(b, val)
attr := nl.NewRtAttr(attrType, b)
req.AddData(attr)
b1 := make([]byte, 4, 4)
b1 := make([]byte, 4)
native.PutUint32(b1, newnsid)
attr1 := nl.NewRtAttr(NETNSA_NSID, b1)
req.AddData(attr1)

View File

@@ -44,6 +44,7 @@ const (
NLA_F_NESTED uint16 = (1 << 15) // #define NLA_F_NESTED (1 << 15)
NLA_F_NET_BYTEORDER uint16 = (1 << 14) // #define NLA_F_NESTED (1 << 14)
NLA_TYPE_MASK = ^(NLA_F_NESTED | NLA_F_NET_BYTEORDER)
NLA_ALIGNTO uint16 = 4 // #define NLA_ALIGNTO 4
)
// enum ctattr_type {

View File

@@ -10,16 +10,30 @@ const (
const (
DEVLINK_CMD_GET = 1
DEVLINK_CMD_PORT_GET = 5
DEVLINK_CMD_PORT_SET = 6
DEVLINK_CMD_PORT_NEW = 7
DEVLINK_CMD_PORT_DEL = 8
DEVLINK_CMD_ESWITCH_GET = 29
DEVLINK_CMD_ESWITCH_SET = 30
)
const (
DEVLINK_ATTR_BUS_NAME = 1
DEVLINK_ATTR_DEV_NAME = 2
DEVLINK_ATTR_ESWITCH_MODE = 25
DEVLINK_ATTR_ESWITCH_INLINE_MODE = 26
DEVLINK_ATTR_ESWITCH_ENCAP_MODE = 62
DEVLINK_ATTR_BUS_NAME = 1
DEVLINK_ATTR_DEV_NAME = 2
DEVLINK_ATTR_PORT_INDEX = 3
DEVLINK_ATTR_PORT_TYPE = 4
DEVLINK_ATTR_PORT_NETDEV_IFINDEX = 6
DEVLINK_ATTR_PORT_NETDEV_NAME = 7
DEVLINK_ATTR_PORT_IBDEV_NAME = 8
DEVLINK_ATTR_ESWITCH_MODE = 25
DEVLINK_ATTR_ESWITCH_INLINE_MODE = 26
DEVLINK_ATTR_ESWITCH_ENCAP_MODE = 62
DEVLINK_ATTR_PORT_FLAVOUR = 77
DEVLINK_ATTR_PORT_PCI_PF_NUMBER = 127
DEVLINK_ATTR_PORT_FUNCTION = 145
DEVLINK_ATTR_PORT_CONTROLLER_NUMBER = 150
DEVLINK_ATTR_PORT_PCI_SF_NUMBER = 164
)
const (
@@ -38,3 +52,37 @@ const (
DEVLINK_ESWITCH_ENCAP_MODE_NONE = 0
DEVLINK_ESWITCH_ENCAP_MODE_BASIC = 1
)
const (
DEVLINK_PORT_FLAVOUR_PHYSICAL = 0
DEVLINK_PORT_FLAVOUR_CPU = 1
DEVLINK_PORT_FLAVOUR_DSA = 2
DEVLINK_PORT_FLAVOUR_PCI_PF = 3
DEVLINK_PORT_FLAVOUR_PCI_VF = 4
DEVLINK_PORT_FLAVOUR_VIRTUAL = 5
DEVLINK_PORT_FLAVOUR_UNUSED = 6
DEVLINK_PORT_FLAVOUR_PCI_SF = 7
)
const (
DEVLINK_PORT_TYPE_NOTSET = 0
DEVLINK_PORT_TYPE_AUTO = 1
DEVLINK_PORT_TYPE_ETH = 2
DEVLINK_PORT_TYPE_IB = 3
)
const (
DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR = 1
DEVLINK_PORT_FN_ATTR_STATE = 2
DEVLINK_PORT_FN_ATTR_OPSTATE = 3
)
const (
DEVLINK_PORT_FN_STATE_INACTIVE = 0
DEVLINK_PORT_FN_STATE_ACTIVE = 1
)
const (
DEVLINK_PORT_FN_OPSTATE_DETACHED = 0
DEVLINK_PORT_FN_OPSTATE_ATTACHED = 1
)

View File

@@ -173,6 +173,22 @@ const (
IFLA_BOND_SLAVE_AD_PARTNER_OPER_PORT_STATE
)
const (
IFLA_GENEVE_UNSPEC = iota
IFLA_GENEVE_ID // vni
IFLA_GENEVE_REMOTE
IFLA_GENEVE_TTL
IFLA_GENEVE_TOS
IFLA_GENEVE_PORT // destination port
IFLA_GENEVE_COLLECT_METADATA
IFLA_GENEVE_REMOTE6
IFLA_GENEVE_UDP_CSUM
IFLA_GENEVE_UDP_ZERO_CSUM6_TX
IFLA_GENEVE_UDP_ZERO_CSUM6_RX
IFLA_GENEVE_LABEL
IFLA_GENEVE_MAX = IFLA_GENEVE_LABEL
)
const (
IFLA_GRE_UNSPEC = iota
IFLA_GRE_LINK
@@ -673,3 +689,32 @@ const (
IFLA_IPOIB_UMCAST
IFLA_IPOIB_MAX = IFLA_IPOIB_UMCAST
)
const (
IFLA_CAN_UNSPEC = iota
IFLA_CAN_BITTIMING
IFLA_CAN_BITTIMING_CONST
IFLA_CAN_CLOCK
IFLA_CAN_STATE
IFLA_CAN_CTRLMODE
IFLA_CAN_RESTART_MS
IFLA_CAN_RESTART
IFLA_CAN_BERR_COUNTER
IFLA_CAN_DATA_BITTIMING
IFLA_CAN_DATA_BITTIMING_CONST
IFLA_CAN_TERMINATION
IFLA_CAN_TERMINATION_CONST
IFLA_CAN_BITRATE_CONST
IFLA_CAN_DATA_BITRATE_CONST
IFLA_CAN_BITRATE_MAX
IFLA_CAN_MAX = IFLA_CAN_BITRATE_MAX
)
const (
IFLA_BAREUDP_UNSPEC = iota
IFLA_BAREUDP_PORT
IFLA_BAREUDP_ETHERTYPE
IFLA_BAREUDP_SRCPORT_MIN
IFLA_BAREUDP_MULTIPROTO_MODE
IFLA_BAREUDP_MAX = IFLA_BAREUDP_MULTIPROTO_MODE
)

View File

@@ -0,0 +1,29 @@
package nl
const (
LWT_BPF_PROG_UNSPEC = iota
LWT_BPF_PROG_FD
LWT_BPF_PROG_NAME
__LWT_BPF_PROG_MAX
)
const (
LWT_BPF_PROG_MAX = __LWT_BPF_PROG_MAX - 1
)
const (
LWT_BPF_UNSPEC = iota
LWT_BPF_IN
LWT_BPF_OUT
LWT_BPF_XMIT
LWT_BPF_XMIT_HEADROOM
__LWT_BPF_MAX
)
const (
LWT_BPF_MAX = __LWT_BPF_MAX - 1
)
const (
LWT_BPF_MAX_HEADROOM = 256
)

View File

@@ -35,6 +35,9 @@ var SupportedNlFamilies = []int{unix.NETLINK_ROUTE, unix.NETLINK_XFRM, unix.NETL
var nextSeqNr uint32
// Default netlink socket timeout, 60s
var SocketTimeoutTv = unix.Timeval{Sec: 60, Usec: 0}
// GetIPFamily returns the family type of a net.IP.
func GetIPFamily(ip net.IP) int {
if len(ip) <= net.IPv4len {
@@ -426,6 +429,14 @@ func (req *NetlinkRequest) Execute(sockType int, resType uint16) ([][]byte, erro
if err != nil {
return nil, err
}
if err := s.SetSendTimeout(&SocketTimeoutTv); err != nil {
return nil, err
}
if err := s.SetReceiveTimeout(&SocketTimeoutTv); err != nil {
return nil, err
}
defer s.Close()
} else {
s.Lock()

View File

@@ -3,6 +3,7 @@ package nl
import (
"encoding/binary"
"fmt"
"log"
)
type Attribute struct {
@@ -18,9 +19,20 @@ func ParseAttributes(data []byte) <-chan Attribute {
i := 0
for i+4 < len(data) {
length := int(native.Uint16(data[i : i+2]))
attrType := native.Uint16(data[i+2 : i+4])
if length < 4 {
log.Printf("attribute 0x%02x has invalid length of %d bytes", attrType, length)
break
}
if len(data) < i+length {
log.Printf("attribute 0x%02x of length %d is truncated, only %d bytes remaining", attrType, length, len(data)-i)
break
}
result <- Attribute{
Type: native.Uint16(data[i+2 : i+4]),
Type: attrType,
Value: data[i+4 : i+length],
}
i += rtaAlignOf(length)

View File

@@ -11,6 +11,8 @@ const (
const (
RDMA_NLDEV_CMD_GET = 1
RDMA_NLDEV_CMD_SET = 2
RDMA_NLDEV_CMD_NEWLINK = 3
RDMA_NLDEV_CMD_DELLINK = 4
RDMA_NLDEV_CMD_SYS_GET = 6
RDMA_NLDEV_CMD_SYS_SET = 7
)
@@ -30,6 +32,8 @@ const (
RDMA_NLDEV_ATTR_PORT_STATE = 12
RDMA_NLDEV_ATTR_PORT_PHYS_STATE = 13
RDMA_NLDEV_ATTR_DEV_NODE_TYPE = 14
RDMA_NLDEV_ATTR_NDEV_NAME = 51
RDMA_NLDEV_ATTR_LINK_TYPE = 65
RDMA_NLDEV_SYS_ATTR_NETNS_MODE = 66
RDMA_NLDEV_NET_NS_FD = 68
)

View File

@@ -23,7 +23,7 @@ func (s1 *IPv6SrHdr) Equal(s2 IPv6SrHdr) bool {
return false
}
for i := range s1.Segments {
if s1.Segments[i].Equal(s2.Segments[i]) != true {
if !s1.Segments[i].Equal(s2.Segments[i]) {
return false
}
}
@@ -89,7 +89,7 @@ func DecodeSEG6Encap(buf []byte) (int, []net.IP, error) {
}
buf = buf[12:]
if len(buf)%16 != 0 {
err := fmt.Errorf("DecodeSEG6Encap: error parsing Segment List (buf len: %d)\n", len(buf))
err := fmt.Errorf("DecodeSEG6Encap: error parsing Segment List (buf len: %d)", len(buf))
return mode, nil, err
}
for len(buf) > 0 {

View File

@@ -1,6 +1,6 @@
package nl
// syscall package lack of rule atributes type.
// syscall package lack of rule attributes type.
// Thus there are defined below
const (
FRA_UNSPEC = iota

View File

@@ -882,6 +882,111 @@ const (
TCA_HFSC_USC
)
const (
TCA_FLOWER_UNSPEC = iota
TCA_FLOWER_CLASSID
TCA_FLOWER_INDEV
TCA_FLOWER_ACT
TCA_FLOWER_KEY_ETH_DST /* ETH_ALEN */
TCA_FLOWER_KEY_ETH_DST_MASK /* ETH_ALEN */
TCA_FLOWER_KEY_ETH_SRC /* ETH_ALEN */
TCA_FLOWER_KEY_ETH_SRC_MASK /* ETH_ALEN */
TCA_FLOWER_KEY_ETH_TYPE /* be16 */
TCA_FLOWER_KEY_IP_PROTO /* u8 */
TCA_FLOWER_KEY_IPV4_SRC /* be32 */
TCA_FLOWER_KEY_IPV4_SRC_MASK /* be32 */
TCA_FLOWER_KEY_IPV4_DST /* be32 */
TCA_FLOWER_KEY_IPV4_DST_MASK /* be32 */
TCA_FLOWER_KEY_IPV6_SRC /* struct in6_addr */
TCA_FLOWER_KEY_IPV6_SRC_MASK /* struct in6_addr */
TCA_FLOWER_KEY_IPV6_DST /* struct in6_addr */
TCA_FLOWER_KEY_IPV6_DST_MASK /* struct in6_addr */
TCA_FLOWER_KEY_TCP_SRC /* be16 */
TCA_FLOWER_KEY_TCP_DST /* be16 */
TCA_FLOWER_KEY_UDP_SRC /* be16 */
TCA_FLOWER_KEY_UDP_DST /* be16 */
TCA_FLOWER_FLAGS
TCA_FLOWER_KEY_VLAN_ID /* be16 */
TCA_FLOWER_KEY_VLAN_PRIO /* u8 */
TCA_FLOWER_KEY_VLAN_ETH_TYPE /* be16 */
TCA_FLOWER_KEY_ENC_KEY_ID /* be32 */
TCA_FLOWER_KEY_ENC_IPV4_SRC /* be32 */
TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK /* be32 */
TCA_FLOWER_KEY_ENC_IPV4_DST /* be32 */
TCA_FLOWER_KEY_ENC_IPV4_DST_MASK /* be32 */
TCA_FLOWER_KEY_ENC_IPV6_SRC /* struct in6_addr */
TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK /* struct in6_addr */
TCA_FLOWER_KEY_ENC_IPV6_DST /* struct in6_addr */
TCA_FLOWER_KEY_ENC_IPV6_DST_MASK /* struct in6_addr */
TCA_FLOWER_KEY_TCP_SRC_MASK /* be16 */
TCA_FLOWER_KEY_TCP_DST_MASK /* be16 */
TCA_FLOWER_KEY_UDP_SRC_MASK /* be16 */
TCA_FLOWER_KEY_UDP_DST_MASK /* be16 */
TCA_FLOWER_KEY_SCTP_SRC_MASK /* be16 */
TCA_FLOWER_KEY_SCTP_DST_MASK /* be16 */
TCA_FLOWER_KEY_SCTP_SRC /* be16 */
TCA_FLOWER_KEY_SCTP_DST /* be16 */
TCA_FLOWER_KEY_ENC_UDP_SRC_PORT /* be16 */
TCA_FLOWER_KEY_ENC_UDP_SRC_PORT_MASK /* be16 */
TCA_FLOWER_KEY_ENC_UDP_DST_PORT /* be16 */
TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK /* be16 */
TCA_FLOWER_KEY_FLAGS /* be32 */
TCA_FLOWER_KEY_FLAGS_MASK /* be32 */
TCA_FLOWER_KEY_ICMPV4_CODE /* u8 */
TCA_FLOWER_KEY_ICMPV4_CODE_MASK /* u8 */
TCA_FLOWER_KEY_ICMPV4_TYPE /* u8 */
TCA_FLOWER_KEY_ICMPV4_TYPE_MASK /* u8 */
TCA_FLOWER_KEY_ICMPV6_CODE /* u8 */
TCA_FLOWER_KEY_ICMPV6_CODE_MASK /* u8 */
TCA_FLOWER_KEY_ICMPV6_TYPE /* u8 */
TCA_FLOWER_KEY_ICMPV6_TYPE_MASK /* u8 */
TCA_FLOWER_KEY_ARP_SIP /* be32 */
TCA_FLOWER_KEY_ARP_SIP_MASK /* be32 */
TCA_FLOWER_KEY_ARP_TIP /* be32 */
TCA_FLOWER_KEY_ARP_TIP_MASK /* be32 */
TCA_FLOWER_KEY_ARP_OP /* u8 */
TCA_FLOWER_KEY_ARP_OP_MASK /* u8 */
TCA_FLOWER_KEY_ARP_SHA /* ETH_ALEN */
TCA_FLOWER_KEY_ARP_SHA_MASK /* ETH_ALEN */
TCA_FLOWER_KEY_ARP_THA /* ETH_ALEN */
TCA_FLOWER_KEY_ARP_THA_MASK /* ETH_ALEN */
TCA_FLOWER_KEY_MPLS_TTL /* u8 - 8 bits */
TCA_FLOWER_KEY_MPLS_BOS /* u8 - 1 bit */
TCA_FLOWER_KEY_MPLS_TC /* u8 - 3 bits */
TCA_FLOWER_KEY_MPLS_LABEL /* be32 - 20 bits */
TCA_FLOWER_KEY_TCP_FLAGS /* be16 */
TCA_FLOWER_KEY_TCP_FLAGS_MASK /* be16 */
TCA_FLOWER_KEY_IP_TOS /* u8 */
TCA_FLOWER_KEY_IP_TOS_MASK /* u8 */
TCA_FLOWER_KEY_IP_TTL /* u8 */
TCA_FLOWER_KEY_IP_TTL_MASK /* u8 */
TCA_FLOWER_KEY_CVLAN_ID /* be16 */
TCA_FLOWER_KEY_CVLAN_PRIO /* u8 */
TCA_FLOWER_KEY_CVLAN_ETH_TYPE /* be16 */
TCA_FLOWER_KEY_ENC_IP_TOS /* u8 */
TCA_FLOWER_KEY_ENC_IP_TOS_MASK /* u8 */
TCA_FLOWER_KEY_ENC_IP_TTL /* u8 */
TCA_FLOWER_KEY_ENC_IP_TTL_MASK /* u8 */
TCA_FLOWER_KEY_ENC_OPTS
TCA_FLOWER_KEY_ENC_OPTS_MASK
__TCA_FLOWER_MAX
)
// struct tc_sfq_qopt {
// unsigned quantum; /* Bytes per round allocated to flow */
// int perturb_period; /* Period of hash perturbation */

View File

@@ -308,13 +308,15 @@ func (qdisc *Fq) Type() string {
// FQ_Codel (Fair Queuing Controlled Delay) is queuing discipline that combines Fair Queuing with the CoDel AQM scheme.
type FqCodel struct {
QdiscAttrs
Target uint32
Limit uint32
Interval uint32
ECN uint32
Flows uint32
Quantum uint32
// There are some more attributes here, but support for them seems not ubiquitous
Target uint32
Limit uint32
Interval uint32
ECN uint32
Flows uint32
Quantum uint32
CEThreshold uint32
DropBatchSize uint32
MemoryLimit uint32
}
func (fqcodel *FqCodel) String() string {

View File

@@ -250,7 +250,15 @@ func qdiscPayload(req *nl.NetlinkRequest, qdisc Qdisc) error {
if qdisc.Quantum > 0 {
options.AddRtAttr(nl.TCA_FQ_CODEL_QUANTUM, nl.Uint32Attr((uint32(qdisc.Quantum))))
}
if qdisc.CEThreshold > 0 {
options.AddRtAttr(nl.TCA_FQ_CODEL_CE_THRESHOLD, nl.Uint32Attr(qdisc.CEThreshold))
}
if qdisc.DropBatchSize > 0 {
options.AddRtAttr(nl.TCA_FQ_CODEL_DROP_BATCH_SIZE, nl.Uint32Attr(qdisc.DropBatchSize))
}
if qdisc.MemoryLimit > 0 {
options.AddRtAttr(nl.TCA_FQ_CODEL_MEMORY_LIMIT, nl.Uint32Attr(qdisc.MemoryLimit))
}
case *Fq:
options.AddRtAttr(nl.TCA_FQ_RATE_ENABLE, nl.Uint32Attr((uint32(qdisc.Pacing))))
@@ -460,7 +468,6 @@ func parsePrioData(qdisc Qdisc, value []byte) error {
}
func parseHtbData(qdisc Qdisc, data []syscall.NetlinkRouteAttr) error {
native = nl.NativeEndian()
htb := qdisc.(*Htb)
for _, datum := range data {
switch datum.Attr.Type {
@@ -480,7 +487,6 @@ func parseHtbData(qdisc Qdisc, data []syscall.NetlinkRouteAttr) error {
}
func parseFqCodelData(qdisc Qdisc, data []syscall.NetlinkRouteAttr) error {
native = nl.NativeEndian()
fqCodel := qdisc.(*FqCodel)
for _, datum := range data {
@@ -497,6 +503,12 @@ func parseFqCodelData(qdisc Qdisc, data []syscall.NetlinkRouteAttr) error {
fqCodel.Flows = native.Uint32(datum.Value)
case nl.TCA_FQ_CODEL_QUANTUM:
fqCodel.Quantum = native.Uint32(datum.Value)
case nl.TCA_FQ_CODEL_CE_THRESHOLD:
fqCodel.CEThreshold = native.Uint32(datum.Value)
case nl.TCA_FQ_CODEL_DROP_BATCH_SIZE:
fqCodel.DropBatchSize = native.Uint32(datum.Value)
case nl.TCA_FQ_CODEL_MEMORY_LIMIT:
fqCodel.MemoryLimit = native.Uint32(datum.Value)
}
}
return nil
@@ -504,13 +516,11 @@ func parseFqCodelData(qdisc Qdisc, data []syscall.NetlinkRouteAttr) error {
func parseHfscData(qdisc Qdisc, data []byte) error {
Hfsc := qdisc.(*Hfsc)
native = nl.NativeEndian()
Hfsc.Defcls = native.Uint16(data)
return nil
}
func parseFqData(qdisc Qdisc, data []syscall.NetlinkRouteAttr) error {
native = nl.NativeEndian()
fq := qdisc.(*Fq)
for _, datum := range data {
switch datum.Attr.Type {
@@ -575,7 +585,6 @@ func parseNetemData(qdisc Qdisc, value []byte) error {
}
func parseTbfData(qdisc Qdisc, data []syscall.NetlinkRouteAttr) error {
native = nl.NativeEndian()
tbf := qdisc.(*Tbf)
for _, datum := range data {
switch datum.Attr.Type {

View File

@@ -278,3 +278,54 @@ func (h *Handle) RdmaLinkSetNsFd(link *RdmaLink, fd uint32) error {
return execRdmaSetLink(req)
}
// RdmaLinkDel deletes an rdma link
//
// Similar to: rdma link delete NAME
// REF: https://man7.org/linux/man-pages/man8/rdma-link.8.html
func RdmaLinkDel(name string) error {
return pkgHandle.RdmaLinkDel(name)
}
// RdmaLinkDel deletes an rdma link.
func (h *Handle) RdmaLinkDel(name string) error {
link, err := h.RdmaLinkByName(name)
if err != nil {
return err
}
proto := getProtoField(nl.RDMA_NL_NLDEV, nl.RDMA_NLDEV_CMD_DELLINK)
req := h.newNetlinkRequest(proto, unix.NLM_F_ACK)
b := make([]byte, 4)
native.PutUint32(b, link.Attrs.Index)
req.AddData(nl.NewRtAttr(nl.RDMA_NLDEV_ATTR_DEV_INDEX, b))
_, err = req.Execute(unix.NETLINK_RDMA, 0)
return err
}
// RdmaLinkAdd adds an rdma link for the specified type to the network device.
// Similar to: rdma link add NAME type TYPE netdev NETDEV
// NAME - specifies the new name of the rdma link to add
// TYPE - specifies which rdma type to use. Link types:
// rxe - Soft RoCE driver
// siw - Soft iWARP driver
// NETDEV - specifies the network device to which the link is bound
//
// REF: https://man7.org/linux/man-pages/man8/rdma-link.8.html
func RdmaLinkAdd(linkName, linkType, netdev string) error {
return pkgHandle.RdmaLinkAdd(linkName, linkType, netdev)
}
// RdmaLinkAdd adds an rdma link for the specified type to the network device.
func (h *Handle) RdmaLinkAdd(linkName string, linkType string, netdev string) error {
proto := getProtoField(nl.RDMA_NL_NLDEV, nl.RDMA_NLDEV_CMD_NEWLINK)
req := h.newNetlinkRequest(proto, unix.NLM_F_ACK)
req.AddData(nl.NewRtAttr(nl.RDMA_NLDEV_ATTR_DEV_NAME, nl.ZeroTerminated(linkName)))
req.AddData(nl.NewRtAttr(nl.RDMA_NLDEV_ATTR_LINK_TYPE, nl.ZeroTerminated(linkType)))
req.AddData(nl.NewRtAttr(nl.RDMA_NLDEV_ATTR_NDEV_NAME, nl.ZeroTerminated(netdev)))
_, err := req.Execute(unix.NETLINK_RDMA, 0)
return err
}

View File

@@ -27,6 +27,9 @@ type Encap interface {
Equal(Encap) bool
}
//Protocol describe what was the originator of the route
type RouteProtocol int
// Route represents a netlink route.
type Route struct {
LinkIndex int
@@ -36,8 +39,9 @@ type Route struct {
Src net.IP
Gw net.IP
MultiPath []*NexthopInfo
Protocol int
Protocol RouteProtocol
Priority int
Family int
Table int
Type int
Tos int
@@ -45,6 +49,8 @@ type Route struct {
MPLSDst *int
NewDst Destination
Encap Encap
Via Destination
Realm int
MTU int
Window int
Rtt int
@@ -79,6 +85,9 @@ func (r Route) String() string {
if r.Encap != nil {
elems = append(elems, fmt.Sprintf("Encap: %s", r.Encap))
}
if r.Via != nil {
elems = append(elems, fmt.Sprintf("Via: %s", r.Via))
}
elems = append(elems, fmt.Sprintf("Src: %s", r.Src))
if len(r.MultiPath) > 0 {
elems = append(elems, fmt.Sprintf("Gw: %s", r.MultiPath))
@@ -87,6 +96,7 @@ func (r Route) String() string {
}
elems = append(elems, fmt.Sprintf("Flags: %s", r.ListFlags()))
elems = append(elems, fmt.Sprintf("Table: %d", r.Table))
elems = append(elems, fmt.Sprintf("Realm: %d", r.Realm))
return fmt.Sprintf("{%s}", strings.Join(elems, " "))
}
@@ -100,6 +110,7 @@ func (r Route) Equal(x Route) bool {
nexthopInfoSlice(r.MultiPath).Equal(x.MultiPath) &&
r.Protocol == x.Protocol &&
r.Priority == x.Priority &&
r.Realm == x.Realm &&
r.Table == x.Table &&
r.Type == x.Type &&
r.Tos == x.Tos &&
@@ -107,6 +118,7 @@ func (r Route) Equal(x Route) bool {
r.Flags == x.Flags &&
(r.MPLSDst == x.MPLSDst || (r.MPLSDst != nil && x.MPLSDst != nil && *r.MPLSDst == *x.MPLSDst)) &&
(r.NewDst == x.NewDst || (r.NewDst != nil && r.NewDst.Equal(x.NewDst))) &&
(r.Via == x.Via || (r.Via != nil && r.Via.Equal(x.Via))) &&
(r.Encap == x.Encap || (r.Encap != nil && r.Encap.Equal(x.Encap)))
}
@@ -136,6 +148,7 @@ type NexthopInfo struct {
Flags int
NewDst Destination
Encap Encap
Via Destination
}
func (n *NexthopInfo) String() string {
@@ -147,6 +160,9 @@ func (n *NexthopInfo) String() string {
if n.Encap != nil {
elems = append(elems, fmt.Sprintf("Encap: %s", n.Encap))
}
if n.Via != nil {
elems = append(elems, fmt.Sprintf("Via: %s", n.Via))
}
elems = append(elems, fmt.Sprintf("Weight: %d", n.Hops+1))
elems = append(elems, fmt.Sprintf("Gw: %s", n.Gw))
elems = append(elems, fmt.Sprintf("Flags: %s", n.ListFlags()))

View File

@@ -1,8 +1,11 @@
package netlink
import (
"bytes"
"encoding/binary"
"fmt"
"net"
"strconv"
"strings"
"syscall"
@@ -21,6 +24,23 @@ const (
SCOPE_NOWHERE Scope = unix.RT_SCOPE_NOWHERE
)
func (s Scope) String() string {
switch s {
case SCOPE_UNIVERSE:
return "universe"
case SCOPE_SITE:
return "site"
case SCOPE_LINK:
return "link"
case SCOPE_HOST:
return "host"
case SCOPE_NOWHERE:
return "nowhere"
default:
return "unknown"
}
}
const (
RT_FILTER_PROTOCOL uint64 = 1 << (1 + iota)
RT_FILTER_SCOPE
@@ -36,6 +56,7 @@ const (
RT_FILTER_PRIORITY
RT_FILTER_MARK
RT_FILTER_MASK
RT_FILTER_REALM
)
const (
@@ -131,7 +152,6 @@ func (e *MPLSEncap) Decode(buf []byte) error {
if len(buf) < 4 {
return fmt.Errorf("lack of bytes")
}
native := nl.NativeEndian()
l := native.Uint16(buf)
if len(buf) < int(l) {
return fmt.Errorf("lack of bytes")
@@ -147,7 +167,6 @@ func (e *MPLSEncap) Decode(buf []byte) error {
func (e *MPLSEncap) Encode() ([]byte, error) {
s := nl.EncodeMPLSStack(e.Labels...)
native := nl.NativeEndian()
hdr := make([]byte, 4)
native.PutUint16(hdr, uint16(len(s)+4))
native.PutUint16(hdr[2:], nl.MPLS_IPTUNNEL_DST)
@@ -203,7 +222,6 @@ func (e *SEG6Encap) Decode(buf []byte) error {
if len(buf) < 4 {
return fmt.Errorf("lack of bytes")
}
native := nl.NativeEndian()
// Get Length(l) & Type(typ) : 2 + 2 bytes
l := native.Uint16(buf)
if len(buf) < int(l) {
@@ -223,7 +241,6 @@ func (e *SEG6Encap) Decode(buf []byte) error {
}
func (e *SEG6Encap) Encode() ([]byte, error) {
s, err := nl.EncodeSEG6Encap(e.Mode, e.Segments)
native := nl.NativeEndian()
hdr := make([]byte, 4)
native.PutUint16(hdr, uint16(len(s)+4))
native.PutUint16(hdr[2:], nl.SEG6_IPTUNNEL_SRH)
@@ -233,7 +250,7 @@ func (e *SEG6Encap) String() string {
segs := make([]string, 0, len(e.Segments))
// append segment backwards (from n to 0) since seg#0 is the last segment.
for i := len(e.Segments); i > 0; i-- {
segs = append(segs, fmt.Sprintf("%s", e.Segments[i-1]))
segs = append(segs, e.Segments[i-1].String())
}
str := fmt.Sprintf("mode %s segs %d [ %s ]", nl.SEG6EncapModeString(e.Mode),
len(e.Segments), strings.Join(segs, " "))
@@ -284,7 +301,6 @@ func (e *SEG6LocalEncap) Decode(buf []byte) error {
if err != nil {
return err
}
native := nl.NativeEndian()
for _, attr := range attrs {
switch attr.Attr.Type {
case nl.SEG6_LOCAL_ACTION:
@@ -314,7 +330,6 @@ func (e *SEG6LocalEncap) Decode(buf []byte) error {
}
func (e *SEG6LocalEncap) Encode() ([]byte, error) {
var err error
native := nl.NativeEndian()
res := make([]byte, 8)
native.PutUint16(res, 8) // length
native.PutUint16(res[2:], nl.SEG6_LOCAL_ACTION)
@@ -405,7 +420,7 @@ func (e *SEG6LocalEncap) String() string {
segs := make([]string, 0, len(e.Segments))
//append segment backwards (from n to 0) since seg#0 is the last segment.
for i := len(e.Segments); i > 0; i-- {
segs = append(segs, fmt.Sprintf("%s", e.Segments[i-1]))
segs = append(segs, e.Segments[i-1].String())
}
strs = append(strs, fmt.Sprintf("segs %d [ %s ]", len(e.Segments), strings.Join(segs, " ")))
}
@@ -446,6 +461,207 @@ func (e *SEG6LocalEncap) Equal(x Encap) bool {
return true
}
// Encap BPF definitions
type bpfObj struct {
progFd int
progName string
}
type BpfEncap struct {
progs [nl.LWT_BPF_MAX]bpfObj
headroom int
}
// SetProg adds a bpf function to the route via netlink RTA_ENCAP. The fd must be a bpf
// program loaded with bpf(type=BPF_PROG_TYPE_LWT_*) matching the direction the program should
// be applied to (LWT_BPF_IN, LWT_BPF_OUT, LWT_BPF_XMIT).
func (e *BpfEncap) SetProg(mode, progFd int, progName string) error {
if progFd <= 0 {
return fmt.Errorf("lwt bpf SetProg: invalid fd")
}
if mode <= nl.LWT_BPF_UNSPEC || mode >= nl.LWT_BPF_XMIT_HEADROOM {
return fmt.Errorf("lwt bpf SetProg:invalid mode")
}
e.progs[mode].progFd = progFd
e.progs[mode].progName = fmt.Sprintf("%s[fd:%d]", progName, progFd)
return nil
}
// SetXmitHeadroom sets the xmit headroom (LWT_BPF_MAX_HEADROOM) via netlink RTA_ENCAP.
// maximum headroom is LWT_BPF_MAX_HEADROOM
func (e *BpfEncap) SetXmitHeadroom(headroom int) error {
if headroom > nl.LWT_BPF_MAX_HEADROOM || headroom < 0 {
return fmt.Errorf("invalid headroom size. range is 0 - %d", nl.LWT_BPF_MAX_HEADROOM)
}
e.headroom = headroom
return nil
}
func (e *BpfEncap) Type() int {
return nl.LWTUNNEL_ENCAP_BPF
}
func (e *BpfEncap) Decode(buf []byte) error {
if len(buf) < 4 {
return fmt.Errorf("lwt bpf decode: lack of bytes")
}
native := nl.NativeEndian()
attrs, err := nl.ParseRouteAttr(buf)
if err != nil {
return fmt.Errorf("lwt bpf decode: failed parsing attribute. err: %v", err)
}
for _, attr := range attrs {
if int(attr.Attr.Type) < 1 {
// nl.LWT_BPF_UNSPEC
continue
}
if int(attr.Attr.Type) > nl.LWT_BPF_MAX {
return fmt.Errorf("lwt bpf decode: received unknown attribute type: %d", attr.Attr.Type)
}
switch int(attr.Attr.Type) {
case nl.LWT_BPF_MAX_HEADROOM:
e.headroom = int(native.Uint32(attr.Value))
default:
bpfO := bpfObj{}
parsedAttrs, err := nl.ParseRouteAttr(attr.Value)
if err != nil {
return fmt.Errorf("lwt bpf decode: failed parsing route attribute")
}
for _, parsedAttr := range parsedAttrs {
switch int(parsedAttr.Attr.Type) {
case nl.LWT_BPF_PROG_FD:
bpfO.progFd = int(native.Uint32(parsedAttr.Value))
case nl.LWT_BPF_PROG_NAME:
bpfO.progName = string(parsedAttr.Value)
default:
return fmt.Errorf("lwt bpf decode: received unknown attribute: type: %d, len: %d", parsedAttr.Attr.Type, parsedAttr.Attr.Len)
}
}
e.progs[attr.Attr.Type] = bpfO
}
}
return nil
}
func (e *BpfEncap) Encode() ([]byte, error) {
buf := make([]byte, 0)
native = nl.NativeEndian()
for index, attr := range e.progs {
nlMsg := nl.NewRtAttr(index, []byte{})
if attr.progFd != 0 {
nlMsg.AddRtAttr(nl.LWT_BPF_PROG_FD, nl.Uint32Attr(uint32(attr.progFd)))
}
if attr.progName != "" {
nlMsg.AddRtAttr(nl.LWT_BPF_PROG_NAME, nl.ZeroTerminated(attr.progName))
}
if nlMsg.Len() > 4 {
buf = append(buf, nlMsg.Serialize()...)
}
}
if len(buf) <= 4 {
return nil, fmt.Errorf("lwt bpf encode: bpf obj definitions returned empty buffer")
}
if e.headroom > 0 {
hRoom := nl.NewRtAttr(nl.LWT_BPF_XMIT_HEADROOM, nl.Uint32Attr(uint32(e.headroom)))
buf = append(buf, hRoom.Serialize()...)
}
return buf, nil
}
func (e *BpfEncap) String() string {
progs := make([]string, 0)
for index, obj := range e.progs {
empty := bpfObj{}
switch index {
case nl.LWT_BPF_IN:
if obj != empty {
progs = append(progs, fmt.Sprintf("in: %s", obj.progName))
}
case nl.LWT_BPF_OUT:
if obj != empty {
progs = append(progs, fmt.Sprintf("out: %s", obj.progName))
}
case nl.LWT_BPF_XMIT:
if obj != empty {
progs = append(progs, fmt.Sprintf("xmit: %s", obj.progName))
}
}
}
if e.headroom > 0 {
progs = append(progs, fmt.Sprintf("xmit headroom: %d", e.headroom))
}
return strings.Join(progs, " ")
}
func (e *BpfEncap) Equal(x Encap) bool {
o, ok := x.(*BpfEncap)
if !ok {
return false
}
if e.headroom != o.headroom {
return false
}
for i := range o.progs {
if o.progs[i] != e.progs[i] {
return false
}
}
return true
}
type Via struct {
AddrFamily int
Addr net.IP
}
func (v *Via) Equal(x Destination) bool {
o, ok := x.(*Via)
if !ok {
return false
}
if v.AddrFamily == x.Family() && v.Addr.Equal(o.Addr) {
return true
}
return false
}
func (v *Via) String() string {
return fmt.Sprintf("Family: %d, Address: %s", v.AddrFamily, v.Addr.String())
}
func (v *Via) Family() int {
return v.AddrFamily
}
func (v *Via) Encode() ([]byte, error) {
buf := &bytes.Buffer{}
err := binary.Write(buf, native, uint16(v.AddrFamily))
if err != nil {
return nil, err
}
err = binary.Write(buf, native, v.Addr)
if err != nil {
return nil, err
}
return buf.Bytes(), nil
}
func (v *Via) Decode(b []byte) error {
if len(b) < 6 {
return fmt.Errorf("decoding failed: buffer too small (%d bytes)", len(b))
}
v.AddrFamily = int(native.Uint16(b[0:2]))
if v.AddrFamily == nl.FAMILY_V4 {
v.Addr = net.IP(b[2:6])
return nil
} else if v.AddrFamily == nl.FAMILY_V6 {
if len(b) < 18 {
return fmt.Errorf("decoding failed: buffer too small (%d bytes)", len(b))
}
v.Addr = net.IP(b[2:])
return nil
}
return fmt.Errorf("decoding failed: address family %d unknown", v.AddrFamily)
}
// RouteAdd will add a route to the system.
// Equivalent to: `ip route add $route`
func RouteAdd(route *Route) error {
@@ -460,6 +676,32 @@ func (h *Handle) RouteAdd(route *Route) error {
return h.routeHandle(route, req, nl.NewRtMsg())
}
// RouteAppend will append a route to the system.
// Equivalent to: `ip route append $route`
func RouteAppend(route *Route) error {
return pkgHandle.RouteAppend(route)
}
// RouteAppend will append a route to the system.
// Equivalent to: `ip route append $route`
func (h *Handle) RouteAppend(route *Route) error {
flags := unix.NLM_F_CREATE | unix.NLM_F_APPEND | unix.NLM_F_ACK
req := h.newNetlinkRequest(unix.RTM_NEWROUTE, flags)
return h.routeHandle(route, req, nl.NewRtMsg())
}
// RouteAddEcmp will add a route to the system.
func RouteAddEcmp(route *Route) error {
return pkgHandle.RouteAddEcmp(route)
}
// RouteAddEcmp will add a route to the system.
func (h *Handle) RouteAddEcmp(route *Route) error {
flags := unix.NLM_F_CREATE | unix.NLM_F_ACK
req := h.newNetlinkRequest(unix.RTM_NEWROUTE, flags)
return h.routeHandle(route, req, nl.NewRtMsg())
}
// RouteReplace will add a route to the system.
// Equivalent to: `ip route replace $route`
func RouteReplace(route *Route) error {
@@ -533,7 +775,13 @@ func (h *Handle) routeHandle(route *Route, req *nl.NetlinkRequest, msg *nl.RtMsg
if err != nil {
return err
}
rtAttrs = append(rtAttrs, nl.NewRtAttr(unix.RTA_ENCAP, buf))
switch route.Encap.Type() {
case nl.LWTUNNEL_ENCAP_BPF:
rtAttrs = append(rtAttrs, nl.NewRtAttr(unix.RTA_ENCAP|unix.NLA_F_NESTED, buf))
default:
rtAttrs = append(rtAttrs, nl.NewRtAttr(unix.RTA_ENCAP, buf))
}
}
if route.Src != nil {
@@ -567,6 +815,14 @@ func (h *Handle) routeHandle(route *Route, req *nl.NetlinkRequest, msg *nl.RtMsg
rtAttrs = append(rtAttrs, nl.NewRtAttr(unix.RTA_GATEWAY, gwData))
}
if route.Via != nil {
buf, err := route.Via.Encode()
if err != nil {
return fmt.Errorf("failed to encode RTA_VIA: %v", err)
}
rtAttrs = append(rtAttrs, nl.NewRtAttr(unix.RTA_VIA, buf))
}
if len(route.MultiPath) > 0 {
buf := []byte{}
for _, nh := range route.MultiPath {
@@ -609,6 +865,13 @@ func (h *Handle) routeHandle(route *Route, req *nl.NetlinkRequest, msg *nl.RtMsg
}
children = append(children, nl.NewRtAttr(unix.RTA_ENCAP, buf))
}
if nh.Via != nil {
buf, err := nh.Via.Encode()
if err != nil {
return err
}
children = append(children, nl.NewRtAttr(unix.RTA_VIA, buf))
}
rtnh.Children = children
buf = append(buf, rtnh.Serialize()...)
}
@@ -631,6 +894,11 @@ func (h *Handle) routeHandle(route *Route, req *nl.NetlinkRequest, msg *nl.RtMsg
native.PutUint32(b, uint32(route.Priority))
rtAttrs = append(rtAttrs, nl.NewRtAttr(unix.RTA_PRIORITY, b))
}
if route.Realm > 0 {
b := make([]byte, 4)
native.PutUint32(b, uint32(route.Realm))
rtAttrs = append(rtAttrs, nl.NewRtAttr(unix.RTA_FLOW, b))
}
if route.Tos > 0 {
msg.Tos = uint8(route.Tos)
}
@@ -723,10 +991,7 @@ func (h *Handle) routeHandle(route *Route, req *nl.NetlinkRequest, msg *nl.RtMsg
req.AddData(attr)
}
var (
b = make([]byte, 4)
native = nl.NativeEndian()
)
b := make([]byte, 4)
native.PutUint32(b, uint32(route.LinkIndex))
req.AddData(nl.NewRtAttr(unix.RTA_OIF, b))
@@ -802,6 +1067,8 @@ func (h *Handle) RouteListFiltered(family int, filter *Route, filterMask uint64)
continue
case filterMask&RT_FILTER_TOS != 0 && route.Tos != filter.Tos:
continue
case filterMask&RT_FILTER_REALM != 0 && route.Realm != filter.Realm:
continue
case filterMask&RT_FILTER_OIF != 0 && route.LinkIndex != filter.LinkIndex:
continue
case filterMask&RT_FILTER_IIF != 0 && route.ILinkIndex != filter.ILinkIndex:
@@ -834,14 +1101,14 @@ func deserializeRoute(m []byte) (Route, error) {
}
route := Route{
Scope: Scope(msg.Scope),
Protocol: int(msg.Protocol),
Protocol: RouteProtocol(int(msg.Protocol)),
Table: int(msg.Table),
Type: int(msg.Type),
Tos: int(msg.Tos),
Flags: int(msg.Flags),
Family: int(msg.Family),
}
native := nl.NativeEndian()
var encap, encapType syscall.NetlinkRouteAttr
for _, attr := range attrs {
switch attr.Attr.Type {
@@ -868,6 +1135,8 @@ func deserializeRoute(m []byte) (Route, error) {
route.ILinkIndex = int(native.Uint32(attr.Value[0:4]))
case unix.RTA_PRIORITY:
route.Priority = int(native.Uint32(attr.Value[0:4]))
case unix.RTA_FLOW:
route.Realm = int(native.Uint32(attr.Value[0:4]))
case unix.RTA_TABLE:
route.Table = int(native.Uint32(attr.Value[0:4]))
case unix.RTA_MULTIPATH:
@@ -907,6 +1176,12 @@ func deserializeRoute(m []byte) (Route, error) {
encapType = attr
case unix.RTA_ENCAP:
encap = attr
case unix.RTA_VIA:
d := &Via{}
if err := d.Decode(attr.Value); err != nil {
return nil, nil, err
}
info.Via = d
}
}
@@ -944,6 +1219,12 @@ func deserializeRoute(m []byte) (Route, error) {
return route, err
}
route.NewDst = d
case unix.RTA_VIA:
v := &Via{}
if err := v.Decode(attr.Value); err != nil {
return route, err
}
route.Via = v
case unix.RTA_ENCAP_TYPE:
encapType = attr
case unix.RTA_ENCAP:
@@ -1011,6 +1292,11 @@ func deserializeRoute(m []byte) (Route, error) {
if err := e.Decode(encap.Value); err != nil {
return route, err
}
case nl.LWTUNNEL_ENCAP_BPF:
e = &BpfEncap{}
if err := e.Decode(encap.Value); err != nil {
return route, err
}
}
route.Encap = e
}
@@ -1021,7 +1307,10 @@ func deserializeRoute(m []byte) (Route, error) {
// RouteGetOptions contains a set of options to use with
// RouteGetWithOptions
type RouteGetOptions struct {
Iif string
Oif string
VrfName string
SrcAddr net.IP
}
// RouteGetWithOptions gets a route to a specific destination from the host system.
@@ -1053,23 +1342,61 @@ func (h *Handle) RouteGetWithOptions(destination net.IP, options *RouteGetOption
msg := &nl.RtMsg{}
msg.Family = uint8(family)
msg.Dst_len = bitlen
if options != nil && options.SrcAddr != nil {
msg.Src_len = bitlen
}
msg.Flags = unix.RTM_F_LOOKUP_TABLE
req.AddData(msg)
rtaDst := nl.NewRtAttr(unix.RTA_DST, destinationData)
req.AddData(rtaDst)
if options != nil {
link, err := LinkByName(options.VrfName)
if err != nil {
return nil, err
}
var (
b = make([]byte, 4)
native = nl.NativeEndian()
)
native.PutUint32(b, uint32(link.Attrs().Index))
if options.VrfName != "" {
link, err := LinkByName(options.VrfName)
if err != nil {
return nil, err
}
b := make([]byte, 4)
native.PutUint32(b, uint32(link.Attrs().Index))
req.AddData(nl.NewRtAttr(unix.RTA_OIF, b))
req.AddData(nl.NewRtAttr(unix.RTA_OIF, b))
}
if len(options.Iif) > 0 {
link, err := LinkByName(options.Iif)
if err != nil {
return nil, err
}
b := make([]byte, 4)
native.PutUint32(b, uint32(link.Attrs().Index))
req.AddData(nl.NewRtAttr(unix.RTA_IIF, b))
}
if len(options.Oif) > 0 {
link, err := LinkByName(options.Oif)
if err != nil {
return nil, err
}
b := make([]byte, 4)
native.PutUint32(b, uint32(link.Attrs().Index))
req.AddData(nl.NewRtAttr(unix.RTA_OIF, b))
}
if options.SrcAddr != nil {
var srcAddr []byte
if family == FAMILY_V4 {
srcAddr = options.SrcAddr.To4()
} else {
srcAddr = options.SrcAddr.To16()
}
req.AddData(nl.NewRtAttr(unix.RTA_SRC, srcAddr))
}
}
msgs, err := req.Execute(unix.NETLINK_ROUTE, unix.RTM_NEWROUTE)
@@ -1151,7 +1478,8 @@ func routeSubscribeAt(newNs, curNs netns.NsHandle, ch chan<- RouteUpdate, done <
msgs, from, err := s.Receive()
if err != nil {
if cberr != nil {
cberr(err)
cberr(fmt.Errorf("Receive failed: %v",
err))
}
return
}
@@ -1166,22 +1494,22 @@ func routeSubscribeAt(newNs, curNs netns.NsHandle, ch chan<- RouteUpdate, done <
continue
}
if m.Header.Type == unix.NLMSG_ERROR {
native := nl.NativeEndian()
error := int32(native.Uint32(m.Data[0:4]))
if error == 0 {
continue
}
if cberr != nil {
cberr(syscall.Errno(-error))
cberr(fmt.Errorf("error message: %v",
syscall.Errno(-error)))
}
return
continue
}
route, err := deserializeRoute(m.Data)
if err != nil {
if cberr != nil {
cberr(err)
}
return
continue
}
ch <- RouteUpdate{Type: m.Header.Type, Route: route}
}
@@ -1190,3 +1518,54 @@ func routeSubscribeAt(newNs, curNs netns.NsHandle, ch chan<- RouteUpdate, done <
return nil
}
func (p RouteProtocol) String() string {
switch int(p) {
case unix.RTPROT_BABEL:
return "babel"
case unix.RTPROT_BGP:
return "bgp"
case unix.RTPROT_BIRD:
return "bird"
case unix.RTPROT_BOOT:
return "boot"
case unix.RTPROT_DHCP:
return "dhcp"
case unix.RTPROT_DNROUTED:
return "dnrouted"
case unix.RTPROT_EIGRP:
return "eigrp"
case unix.RTPROT_GATED:
return "gated"
case unix.RTPROT_ISIS:
return "isis"
//case unix.RTPROT_KEEPALIVED:
// return "keepalived"
case unix.RTPROT_KERNEL:
return "kernel"
case unix.RTPROT_MROUTED:
return "mrouted"
case unix.RTPROT_MRT:
return "mrt"
case unix.RTPROT_NTK:
return "ntk"
case unix.RTPROT_OSPF:
return "ospf"
case unix.RTPROT_RA:
return "ra"
case unix.RTPROT_REDIRECT:
return "redirect"
case unix.RTPROT_RIP:
return "rip"
case unix.RTPROT_STATIC:
return "static"
case unix.RTPROT_UNSPEC:
return "unspec"
case unix.RTPROT_XORP:
return "xorp"
case unix.RTPROT_ZEBRA:
return "zebra"
default:
return strconv.Itoa(int(p))
}
}

View File

@@ -2,6 +2,8 @@
package netlink
import "strconv"
func (r *Route) ListFlags() []string {
return []string{}
}
@@ -9,3 +11,11 @@ func (r *Route) ListFlags() []string {
func (n *NexthopInfo) ListFlags() []string {
return []string{}
}
func (s Scope) String() string {
return "unknown"
}
func (p RouteProtocol) String() string {
return strconv.Itoa(int(p))
}

View File

@@ -28,7 +28,18 @@ type Rule struct {
}
func (r Rule) String() string {
return fmt.Sprintf("ip rule %d: from %s table %d", r.Priority, r.Src, r.Table)
from := "all"
if r.Src != nil && r.Src.String() != "<nil>" {
from = r.Src.String()
}
to := "all"
if r.Dst != nil && r.Dst.String() != "<nil>" {
to = r.Dst.String()
}
return fmt.Sprintf("ip rule %d: from %s to %s table %d",
r.Priority, from, to, r.Table)
}
// NewRule return empty rules.

View File

@@ -97,8 +97,6 @@ func ruleHandle(rule *Rule, req *nl.NetlinkRequest) error {
req.AddData(rtAttrs[i])
}
native := nl.NativeEndian()
if rule.Priority >= 0 {
b := make([]byte, 4)
native.PutUint32(b, uint32(rule.Priority))
@@ -199,7 +197,6 @@ func (h *Handle) RuleListFiltered(family int, filter *Rule, filterMask uint64) (
return nil, err
}
native := nl.NativeEndian()
var res = make([]Rule, 0)
for i := range msgs {
msg := nl.DeserializeRtMsg(msgs[i])
@@ -232,7 +229,7 @@ func (h *Handle) RuleListFiltered(family int, filter *Rule, filterMask uint64) (
case nl.FRA_FWMASK:
rule.Mask = int(native.Uint32(attrs[j].Value[0:4]))
case nl.FRA_TUN_ID:
rule.TunID = uint(native.Uint64(attrs[j].Value[0:4]))
rule.TunID = uint(native.Uint64(attrs[j].Value[0:8]))
case nl.FRA_IIFNAME:
rule.IifName = string(attrs[j].Value[:len(attrs[j].Value)-1])
case nl.FRA_OIFNAME:

View File

@@ -172,35 +172,78 @@ func SocketGet(local, remote net.Addr) (*Socket, error) {
return sock, nil
}
// SocketDiagTCPInfo requests INET_DIAG_INFO for TCP protocol for specified family type.
// SocketDiagTCPInfo requests INET_DIAG_INFO for TCP protocol for specified family type and return with extension TCP info.
func SocketDiagTCPInfo(family uint8) ([]*InetDiagTCPInfoResp, error) {
s, err := nl.Subscribe(unix.NETLINK_INET_DIAG)
var result []*InetDiagTCPInfoResp
err := socketDiagTCPExecutor(family, func(m syscall.NetlinkMessage) error {
sockInfo := &Socket{}
if err := sockInfo.deserialize(m.Data); err != nil {
return err
}
attrs, err := nl.ParseRouteAttr(m.Data[sizeofSocket:])
if err != nil {
return err
}
res, err := attrsToInetDiagTCPInfoResp(attrs, sockInfo)
if err != nil {
return err
}
result = append(result, res)
return nil
})
if err != nil {
return nil, err
}
return result, nil
}
// SocketDiagTCP requests INET_DIAG_INFO for TCP protocol for specified family type and return related socket.
func SocketDiagTCP(family uint8) ([]*Socket, error) {
var result []*Socket
err := socketDiagTCPExecutor(family, func(m syscall.NetlinkMessage) error {
sockInfo := &Socket{}
if err := sockInfo.deserialize(m.Data); err != nil {
return err
}
result = append(result, sockInfo)
return nil
})
if err != nil {
return nil, err
}
return result, nil
}
// socketDiagTCPExecutor requests INET_DIAG_INFO for TCP protocol for specified family type.
func socketDiagTCPExecutor(family uint8, receiver func(syscall.NetlinkMessage) error) error {
s, err := nl.Subscribe(unix.NETLINK_INET_DIAG)
if err != nil {
return err
}
defer s.Close()
req := nl.NewNetlinkRequest(nl.SOCK_DIAG_BY_FAMILY, unix.NLM_F_DUMP)
req.AddData(&socketRequest{
Family: family,
Protocol: unix.IPPROTO_TCP,
Ext: INET_DIAG_INFO,
Ext: (1 << (INET_DIAG_VEGASINFO - 1)) | (1 << (INET_DIAG_INFO - 1)),
States: uint32(0xfff), // All TCP states
})
s.Send(req)
var result []*InetDiagTCPInfoResp
loop:
for {
msgs, from, err := s.Receive()
if err != nil {
return nil, err
return err
}
if from.Pid != nl.PidKernel {
return nil, fmt.Errorf("Wrong sender portid %d, expected %d", from.Pid, nl.PidKernel)
return fmt.Errorf("Wrong sender portid %d, expected %d", from.Pid, nl.PidKernel)
}
if len(msgs) == 0 {
return nil, errors.New("no message nor error from netlink")
return errors.New("no message nor error from netlink")
}
for _, m := range msgs {
@@ -208,31 +251,41 @@ loop:
case unix.NLMSG_DONE:
break loop
case unix.NLMSG_ERROR:
native := nl.NativeEndian()
error := int32(native.Uint32(m.Data[0:4]))
return nil, syscall.Errno(-error)
return syscall.Errno(-error)
}
sockInfo := &Socket{}
if err := sockInfo.deserialize(m.Data); err != nil {
return nil, err
if err := receiver(m); err != nil {
return err
}
attrs, err := nl.ParseRouteAttr(m.Data[sizeofSocket:])
if err != nil {
return nil, err
}
var tcpInfo *TCPInfo
for _, a := range attrs {
if a.Attr.Type == INET_DIAG_INFO {
tcpInfo = &TCPInfo{}
if err := tcpInfo.deserialize(a.Value); err != nil {
return nil, err
}
break
}
}
r := &InetDiagTCPInfoResp{InetDiagMsg: sockInfo, TCPInfo: tcpInfo}
result = append(result, r)
}
}
return result, nil
return nil
}
func attrsToInetDiagTCPInfoResp(attrs []syscall.NetlinkRouteAttr, sockInfo *Socket) (*InetDiagTCPInfoResp, error) {
var tcpInfo *TCPInfo
var tcpBBRInfo *TCPBBRInfo
for _, a := range attrs {
if a.Attr.Type == INET_DIAG_INFO {
tcpInfo = &TCPInfo{}
if err := tcpInfo.deserialize(a.Value); err != nil {
return nil, err
}
continue
}
if a.Attr.Type == INET_DIAG_BBRINFO {
tcpBBRInfo = &TCPBBRInfo{}
if err := tcpBBRInfo.deserialize(a.Value); err != nil {
return nil, err
}
continue
}
}
return &InetDiagTCPInfoResp{
InetDiagMsg: sockInfo,
TCPInfo: tcpInfo,
TCPBBRInfo: tcpBBRInfo,
}, nil
}

View File

@@ -16,3 +16,69 @@ const (
TCP_NEW_SYN_REC
TCP_MAX_STATES
)
type TCPInfo struct {
State uint8
Ca_state uint8
Retransmits uint8
Probes uint8
Backoff uint8
Options uint8
Snd_wscale uint8 // no uint4
Rcv_wscale uint8
Delivery_rate_app_limited uint8
Fastopen_client_fail uint8
Rto uint32
Ato uint32
Snd_mss uint32
Rcv_mss uint32
Unacked uint32
Sacked uint32
Lost uint32
Retrans uint32
Fackets uint32
Last_data_sent uint32
Last_ack_sent uint32
Last_data_recv uint32
Last_ack_recv uint32
Pmtu uint32
Rcv_ssthresh uint32
Rtt uint32
Rttvar uint32
Snd_ssthresh uint32
Snd_cwnd uint32
Advmss uint32
Reordering uint32
Rcv_rtt uint32
Rcv_space uint32
Total_retrans uint32
Pacing_rate uint64
Max_pacing_rate uint64
Bytes_acked uint64 /* RFC4898 tcpEStatsAppHCThruOctetsAcked */
Bytes_received uint64 /* RFC4898 tcpEStatsAppHCThruOctetsReceived */
Segs_out uint32 /* RFC4898 tcpEStatsPerfSegsOut */
Segs_in uint32 /* RFC4898 tcpEStatsPerfSegsIn */
Notsent_bytes uint32
Min_rtt uint32
Data_segs_in uint32 /* RFC4898 tcpEStatsDataSegsIn */
Data_segs_out uint32 /* RFC4898 tcpEStatsDataSegsOut */
Delivery_rate uint64
Busy_time uint64 /* Time (usec) busy sending data */
Rwnd_limited uint64 /* Time (usec) limited by receive window */
Sndbuf_limited uint64 /* Time (usec) limited by send buffer */
Delivered uint32
Delivered_ce uint32
Bytes_sent uint64 /* RFC4898 tcpEStatsPerfHCDataOctetsOut */
Bytes_retrans uint64 /* RFC4898 tcpEStatsPerfOctetsRetrans */
Dsack_dups uint32 /* RFC4898 tcpEStatsStackDSACKDups */
Reord_seen uint32 /* reordering events seen */
Rcv_ooopack uint32 /* Out-of-order packets received */
Snd_wnd uint32 /* peer's advertised receive window after * scaling (bytes) */
}
type TCPBBRInfo struct {
BBRBW uint64
BBRMinRTT uint32
BBRPacingGain uint32
BBRCwndGain uint32
}

Some files were not shown because too many files have changed in this diff Show More