Merge pull request #103483 from odinuge/revert-102508-runc-1.0

Revert "Update runc to 1.0.0"
This commit is contained in:
Kubernetes Prow Robot 2021-07-06 10:42:56 -07:00 committed by GitHub
commit eae87bfe7e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
146 changed files with 1196 additions and 2702 deletions

View File

@ -1,4 +1,4 @@
= vendor/github.com/bits-and-blooms/bitset licensed under: =
= vendor/github.com/willf/bitset licensed under: =
Copyright (c) 2014 Will Fitzgerald. All rights reserved.
@ -28,4 +28,4 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
= vendor/github.com/bits-and-blooms/bitset/LICENSE 0d2b5d66dabaff0df8e1ffe191a21bd1
= vendor/github.com/willf/bitset/LICENSE 0d2b5d66dabaff0df8e1ffe191a21bd1

14
go.mod
View File

@ -67,8 +67,8 @@ require (
github.com/onsi/ginkgo v1.14.0
github.com/onsi/gomega v1.10.1
github.com/opencontainers/go-digest v1.0.0
github.com/opencontainers/runc v1.0.0
github.com/opencontainers/selinux v1.8.2
github.com/opencontainers/runc v1.0.0-rc95
github.com/opencontainers/selinux v1.8.0
github.com/pkg/errors v0.9.1
github.com/pmezard/go-difflib v1.0.0
github.com/prometheus/client_golang v1.11.0
@ -179,7 +179,6 @@ replace (
github.com/benbjohnson/clock => github.com/benbjohnson/clock v1.0.3
github.com/beorn7/perks => github.com/beorn7/perks v1.0.1
github.com/bgentry/speakeasy => github.com/bgentry/speakeasy v0.1.0
github.com/bits-and-blooms/bitset => github.com/bits-and-blooms/bitset v1.2.0
github.com/bketelsen/crypt => github.com/bketelsen/crypt v0.0.3-0.20200106085610-5cbc8cc4026c
github.com/blang/semver => github.com/blang/semver v3.5.1+incompatible
github.com/boltdb/bolt => github.com/boltdb/bolt v1.3.1
@ -191,7 +190,7 @@ replace (
github.com/chzyer/logex => github.com/chzyer/logex v1.1.10
github.com/chzyer/readline => github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e
github.com/chzyer/test => github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1
github.com/cilium/ebpf => github.com/cilium/ebpf v0.6.1
github.com/cilium/ebpf => github.com/cilium/ebpf v0.5.0
github.com/clusterhq/flocker-go => github.com/clusterhq/flocker-go v0.0.0-20160920122132-2b8b7259d313
github.com/cncf/udpa/go => github.com/cncf/udpa/go v0.0.0-20201120205902-5459f2c99403
github.com/cockroachdb/datadriven => github.com/cockroachdb/datadriven v0.0.0-20200714090401-bf6692d28da5
@ -367,9 +366,9 @@ replace (
github.com/onsi/gomega => github.com/onsi/gomega v1.10.1
github.com/opencontainers/go-digest => github.com/opencontainers/go-digest v1.0.0
github.com/opencontainers/image-spec => github.com/opencontainers/image-spec v1.0.1
github.com/opencontainers/runc => github.com/opencontainers/runc v1.0.0
github.com/opencontainers/runc => github.com/opencontainers/runc v1.0.0-rc95
github.com/opencontainers/runtime-spec => github.com/opencontainers/runtime-spec v1.0.3-0.20210326190908-1c3f411f0417
github.com/opencontainers/selinux => github.com/opencontainers/selinux v1.8.2
github.com/opencontainers/selinux => github.com/opencontainers/selinux v1.8.0
github.com/opentracing/opentracing-go => github.com/opentracing/opentracing-go v1.1.0
github.com/pascaldekloe/goe => github.com/pascaldekloe/goe v0.0.0-20180627143212-57f6aae5913c
github.com/pelletier/go-toml => github.com/pelletier/go-toml v1.2.0
@ -395,7 +394,7 @@ replace (
github.com/seccomp/libseccomp-golang => github.com/seccomp/libseccomp-golang v0.9.1
github.com/sergi/go-diff => github.com/sergi/go-diff v1.1.0
github.com/shurcooL/sanitized_anchor_name => github.com/shurcooL/sanitized_anchor_name v1.0.0
github.com/sirupsen/logrus => github.com/sirupsen/logrus v1.8.1
github.com/sirupsen/logrus => github.com/sirupsen/logrus v1.7.0
github.com/smartystreets/assertions => github.com/smartystreets/assertions v1.1.0
github.com/smartystreets/goconvey => github.com/smartystreets/goconvey v1.6.4
github.com/soheilhy/cmux => github.com/soheilhy/cmux v0.1.5
@ -417,6 +416,7 @@ replace (
github.com/vishvananda/netlink => github.com/vishvananda/netlink v1.1.0
github.com/vishvananda/netns => github.com/vishvananda/netns v0.0.0-20200728191858-db3c7e526aae
github.com/vmware/govmomi => github.com/vmware/govmomi v0.20.3
github.com/willf/bitset => github.com/willf/bitset v1.1.11
github.com/xiang90/probing => github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2
github.com/xlab/treeprint => github.com/xlab/treeprint v0.0.0-20181112141820-a009c3971eca
github.com/yuin/goldmark => github.com/yuin/goldmark v1.3.5

20
go.sum
View File

@ -67,8 +67,6 @@ github.com/benbjohnson/clock v1.0.3/go.mod h1:bGMdMPoPVvcYyt1gHDf4J2KE153Yf9BuiU
github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
github.com/bgentry/speakeasy v0.1.0/go.mod h1:+zsyZBPWlz7T6j88CTgSN5bM796AkVf0kBD4zp0CCIs=
github.com/bits-and-blooms/bitset v1.2.0 h1:Kn4yilvwNtMACtf1eYDlG8H77R07mZSPbMjLyS07ChA=
github.com/bits-and-blooms/bitset v1.2.0/go.mod h1:gIdJ4wp64HaoK2YrL1Q5/N7Y16edYb8uY+O0FJTyyDA=
github.com/bketelsen/crypt v0.0.3-0.20200106085610-5cbc8cc4026c/go.mod h1:MKsuJmJgSg28kpZDP6UIiPt0e0Oz0kqKNGyRaWEPv84=
github.com/blang/semver v3.5.1+incompatible h1:cQNTCjp13qL8KC3Nbxr/y2Bqb63oX6wdnnjpJbkM4JQ=
github.com/blang/semver v3.5.1+incompatible/go.mod h1:kRBLl5iJ+tD4TcOOxsy/0fnwebNt5EWlYSAyrTnjyyk=
@ -86,8 +84,8 @@ github.com/checkpoint-restore/go-criu/v5 v5.0.0/go.mod h1:cfwC0EG7HMUenopBsUf9d8
github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI=
github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI=
github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU=
github.com/cilium/ebpf v0.6.1 h1:n6ZUOkSFi6OwcMeTCFaDQx2Onx2rEikQo69315MNbdc=
github.com/cilium/ebpf v0.6.1/go.mod h1:4tRaxcgiL706VnOzHOdBlY8IEAIdxINsQBcU4xJJXRs=
github.com/cilium/ebpf v0.5.0 h1:E1KshmrMEtkMP2UjlWzfmUV1owWY+BnbL5FxxuatnrU=
github.com/cilium/ebpf v0.5.0/go.mod h1:4tRaxcgiL706VnOzHOdBlY8IEAIdxINsQBcU4xJJXRs=
github.com/clusterhq/flocker-go v0.0.0-20160920122132-2b8b7259d313 h1:eIHD9GNM3Hp7kcRW5mvcz7WTR3ETeoYYKwpgA04kaXE=
github.com/clusterhq/flocker-go v0.0.0-20160920122132-2b8b7259d313/go.mod h1:P1wt9Z3DP8O6W3rvwCt0REIlshg1InHImaLW0t3ObY0=
github.com/cncf/udpa/go v0.0.0-20201120205902-5459f2c99403/go.mod h1:WmhPx2Nbnhtbo57+VJT5O0JRkEi1Wbu0z5j0R8u5Hbk=
@ -374,12 +372,12 @@ github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8
github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3IKzErnv2BNG4W4MAM=
github.com/opencontainers/image-spec v1.0.1 h1:JMemWkRwHx4Zj+fVxWoMCFm/8sYGGrUVojFA6h/TRcI=
github.com/opencontainers/image-spec v1.0.1/go.mod h1:BtxoFyWECRxE4U/7sNtV5W15zMzWCbyJoFRP3s7yZA0=
github.com/opencontainers/runc v1.0.0 h1:QOhAQAYUlKeofuyeKdR6ITvOnXLPbEAjPMjz9wCUXcU=
github.com/opencontainers/runc v1.0.0/go.mod h1:MU2S3KEB2ZExnhnAQYbwjdYV6HwKtDlNbA2Z2OeNDeA=
github.com/opencontainers/runc v1.0.0-rc95 h1:RMuWVfY3E1ILlVsC3RhIq38n4sJtlOFwU9gfFZSqrd0=
github.com/opencontainers/runc v1.0.0-rc95/go.mod h1:z+bZxa/+Tz/FmYVWkhUajJdzFeOqjc5vrqskhVyHGUM=
github.com/opencontainers/runtime-spec v1.0.3-0.20210326190908-1c3f411f0417 h1:3snG66yBm59tKhhSPQrQ/0bCrv1LQbKt40LnUPiUxdc=
github.com/opencontainers/runtime-spec v1.0.3-0.20210326190908-1c3f411f0417/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0=
github.com/opencontainers/selinux v1.8.2 h1:c4ca10UMgRcvZ6h0K4HtS15UaVSBEaE+iln2LVpAuGc=
github.com/opencontainers/selinux v1.8.2/go.mod h1:MUIHuUEvKB1wtJjQdOyYRgOnLD2xAPP8dBsCoU0KuF8=
github.com/opencontainers/selinux v1.8.0 h1:+77ba4ar4jsCbL1GLbFL8fFM57w6suPfSS9PDLDY7KM=
github.com/opencontainers/selinux v1.8.0/go.mod h1:RScLhm78qiWa2gbVCcGkC7tCGdgk3ogry1nUQF8Evvo=
github.com/opentracing/opentracing-go v1.1.0/go.mod h1:UkNAQd3GIcIGf0SeVgPpRdFStlNbqXla1AfSYxPUl2o=
github.com/pascaldekloe/goe v0.0.0-20180627143212-57f6aae5913c/go.mod h1:lzWF7FIEvWOWxwDKqyGYQf6ZUaNfKdP144TG7ZOy1lc=
github.com/pelletier/go-toml v1.2.0/go.mod h1:5z9KED0ma1S8pY6P1sdut58dfprrGBbd/94hg7ilaic=
@ -421,8 +419,8 @@ github.com/sergi/go-diff v1.1.0 h1:we8PVUC3FE2uYfodKH/nBHMSetSfHDR6scGdBi+erh0=
github.com/sergi/go-diff v1.1.0/go.mod h1:STckp+ISIX8hZLjrqAeVduY0gWCT9IjLuqbuNXdaHfM=
github.com/shurcooL/sanitized_anchor_name v1.0.0 h1:PdmoCO6wvbs+7yrJyMORt4/BmY5IYyJwS/kOiWx8mHo=
github.com/shurcooL/sanitized_anchor_name v1.0.0/go.mod h1:1NzhyTcUVG4SuEtjjoZeVRXNmyL/1OwPU0+IJeTBvfc=
github.com/sirupsen/logrus v1.8.1 h1:dJKuHgqk1NNQlqoA6BTlM1Wf9DOH3NBjQyu0h9+AZZE=
github.com/sirupsen/logrus v1.8.1/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0=
github.com/sirupsen/logrus v1.7.0 h1:ShrD1U9pZB12TX0cVy0DtePoCH97K8EtX+mg7ZARUtM=
github.com/sirupsen/logrus v1.7.0/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0=
github.com/smartystreets/assertions v1.1.0/go.mod h1:tcbTF8ujkAEcZ8TElKY+i30BzYlVhC/LOxJk7iOWnoo=
github.com/smartystreets/goconvey v1.6.4/go.mod h1:syvi0/a8iFYH4r/RixwvyeAJjdLS9QV7WQ/tjFTllLA=
github.com/soheilhy/cmux v0.1.5 h1:jjzc5WVemNEDTLwv9tlmemhC73tI08BNOIGwBOo10Js=
@ -457,6 +455,8 @@ github.com/vishvananda/netns v0.0.0-20200728191858-db3c7e526aae h1:4hwBBUfQCFe3C
github.com/vishvananda/netns v0.0.0-20200728191858-db3c7e526aae/go.mod h1:DD4vA1DwXk04H54A1oHXtwZmA0grkVMdPxx/VGLCah0=
github.com/vmware/govmomi v0.20.3 h1:gpw/0Ku+6RgF3jsi7fnCLmlcikBHfKBCUcu1qgc16OU=
github.com/vmware/govmomi v0.20.3/go.mod h1:URlwyTFZX72RmxtxuaFL2Uj3fD1JTvZdx59bHWk6aFU=
github.com/willf/bitset v1.1.11 h1:N7Z7E9UvjW+sGsEl7k/SJrvY2reP1A07MrGuCjIOjRE=
github.com/willf/bitset v1.1.11/go.mod h1:83CECat5yLh5zVOf4P1ErAgKA5UDvKtgyUABdr3+MjI=
github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2 h1:eY9dn8+vbi4tKz5Qo6v2eYzo7kUS51QINcR5jNpbZS8=
github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2/go.mod h1:UETIi67q53MR2AWcXfiuqkDkRtnGDLqkBTpCHuJHxtU=
github.com/xlab/treeprint v0.0.0-20181112141820-a009c3971eca h1:1CFlNzQhALwjS9mBAUkycX616GzgsuYUOCHA5+HSlXI=

View File

@ -33,6 +33,7 @@ import (
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
cgroupsystemd "github.com/opencontainers/runc/libcontainer/cgroups/systemd"
libcontainerconfigs "github.com/opencontainers/runc/libcontainer/configs"
libcontainerdevices "github.com/opencontainers/runc/libcontainer/devices"
"k8s.io/klog/v2"
v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper"
@ -375,6 +376,15 @@ func getSupportedUnifiedControllers() sets.String {
func (m *cgroupManagerImpl) toResources(resourceConfig *ResourceConfig) *libcontainerconfigs.Resources {
resources := &libcontainerconfigs.Resources{
Devices: []*libcontainerdevices.Rule{
{
Type: 'a',
Permissions: "rwm",
Allow: true,
Minor: libcontainerdevices.Wildcard,
Major: libcontainerdevices.Wildcard,
},
},
SkipDevices: true,
}
if resourceConfig == nil {

View File

@ -38,6 +38,7 @@ import (
utilio "k8s.io/utils/io"
utilpath "k8s.io/utils/path"
libcontainerdevices "github.com/opencontainers/runc/libcontainer/devices"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
utilerrors "k8s.io/apimachinery/pkg/util/errors"
@ -392,6 +393,15 @@ func createManager(containerName string) (cgroups.Manager, error) {
Parent: "/",
Name: containerName,
Resources: &configs.Resources{
Devices: []*libcontainerdevices.Rule{
{
Type: 'a',
Permissions: "rwm",
Allow: true,
Minor: libcontainerdevices.Wildcard,
Major: libcontainerdevices.Wildcard,
},
},
SkipDevices: true,
},
}

View File

@ -28,6 +28,7 @@ import (
"github.com/opencontainers/runc/libcontainer/cgroups"
cgroupfs "github.com/opencontainers/runc/libcontainer/cgroups/fs"
"github.com/opencontainers/runc/libcontainer/configs"
libcontainerdevices "github.com/opencontainers/runc/libcontainer/devices"
utilversion "k8s.io/apimachinery/pkg/util/version"
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/klog/v2"
@ -126,6 +127,15 @@ func createCgroupManager(name string) (cgroups.Manager, error) {
Memory: int64(memoryLimit),
MemorySwap: -1,
SkipDevices: true,
Devices: []*libcontainerdevices.Rule{
{
Minor: libcontainerdevices.Wildcard,
Major: libcontainerdevices.Wildcard,
Type: 'a',
Permissions: "rwm",
Allow: true,
},
},
},
}
return cgroupfs.NewManager(cg, nil, false), nil

View File

@ -383,9 +383,8 @@ github.com/shurcooL/sanitized_anchor_name v1.0.0/go.mod h1:1NzhyTcUVG4SuEtjjoZeV
github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo=
github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE=
github.com/sirupsen/logrus v1.6.0/go.mod h1:7uNnSEd1DgxDLC74fIahvMZmmYsHGZGEOFrfsX/uA88=
github.com/sirupsen/logrus v1.7.0 h1:ShrD1U9pZB12TX0cVy0DtePoCH97K8EtX+mg7ZARUtM=
github.com/sirupsen/logrus v1.7.0/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0=
github.com/sirupsen/logrus v1.8.1 h1:dJKuHgqk1NNQlqoA6BTlM1Wf9DOH3NBjQyu0h9+AZZE=
github.com/sirupsen/logrus v1.8.1/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0=
github.com/smartystreets/assertions v0.0.0-20180927180507-b2de0cb4f26d/go.mod h1:OnSkiWE9lh6wB0YB77sQom3nweQdgAjqCqsofrRNTgc=
github.com/smartystreets/goconvey v1.6.4/go.mod h1:syvi0/a8iFYH4r/RixwvyeAJjdLS9QV7WQ/tjFTllLA=
github.com/soheilhy/cmux v0.1.4/go.mod h1:IM3LyeVVIOuxMH7sFAkER9+bJ4dT7Ms6E4xg4kGIyLM=

View File

@ -23,7 +23,6 @@ require (
github.com/hashicorp/golang-lru v0.5.1
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822
github.com/pquerna/cachecontrol v0.0.0-20171018203845-0dec1b30a021 // indirect
github.com/sirupsen/logrus v1.8.1 // indirect
github.com/spf13/pflag v1.0.5
github.com/stretchr/testify v1.7.0
go.etcd.io/etcd/api/v3 v3.5.0

View File

@ -377,9 +377,8 @@ github.com/shurcooL/sanitized_anchor_name v1.0.0/go.mod h1:1NzhyTcUVG4SuEtjjoZeV
github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo=
github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE=
github.com/sirupsen/logrus v1.6.0/go.mod h1:7uNnSEd1DgxDLC74fIahvMZmmYsHGZGEOFrfsX/uA88=
github.com/sirupsen/logrus v1.7.0 h1:ShrD1U9pZB12TX0cVy0DtePoCH97K8EtX+mg7ZARUtM=
github.com/sirupsen/logrus v1.7.0/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0=
github.com/sirupsen/logrus v1.8.1 h1:dJKuHgqk1NNQlqoA6BTlM1Wf9DOH3NBjQyu0h9+AZZE=
github.com/sirupsen/logrus v1.8.1/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0=
github.com/smartystreets/assertions v0.0.0-20180927180507-b2de0cb4f26d/go.mod h1:OnSkiWE9lh6wB0YB77sQom3nweQdgAjqCqsofrRNTgc=
github.com/smartystreets/goconvey v1.6.4/go.mod h1:syvi0/a8iFYH4r/RixwvyeAJjdLS9QV7WQ/tjFTllLA=
github.com/soheilhy/cmux v0.1.4/go.mod h1:IM3LyeVVIOuxMH7sFAkER9+bJ4dT7Ms6E4xg4kGIyLM=

View File

@ -372,9 +372,8 @@ github.com/shurcooL/sanitized_anchor_name v1.0.0/go.mod h1:1NzhyTcUVG4SuEtjjoZeV
github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo=
github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE=
github.com/sirupsen/logrus v1.6.0/go.mod h1:7uNnSEd1DgxDLC74fIahvMZmmYsHGZGEOFrfsX/uA88=
github.com/sirupsen/logrus v1.7.0 h1:ShrD1U9pZB12TX0cVy0DtePoCH97K8EtX+mg7ZARUtM=
github.com/sirupsen/logrus v1.7.0/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0=
github.com/sirupsen/logrus v1.8.1 h1:dJKuHgqk1NNQlqoA6BTlM1Wf9DOH3NBjQyu0h9+AZZE=
github.com/sirupsen/logrus v1.8.1/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0=
github.com/smartystreets/assertions v0.0.0-20180927180507-b2de0cb4f26d/go.mod h1:OnSkiWE9lh6wB0YB77sQom3nweQdgAjqCqsofrRNTgc=
github.com/smartystreets/goconvey v1.6.4/go.mod h1:syvi0/a8iFYH4r/RixwvyeAJjdLS9QV7WQ/tjFTllLA=
github.com/soheilhy/cmux v0.1.4/go.mod h1:IM3LyeVVIOuxMH7sFAkER9+bJ4dT7Ms6E4xg4kGIyLM=

View File

@ -480,9 +480,8 @@ github.com/shurcooL/sanitized_anchor_name v1.0.0/go.mod h1:1NzhyTcUVG4SuEtjjoZeV
github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo=
github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE=
github.com/sirupsen/logrus v1.6.0/go.mod h1:7uNnSEd1DgxDLC74fIahvMZmmYsHGZGEOFrfsX/uA88=
github.com/sirupsen/logrus v1.7.0 h1:ShrD1U9pZB12TX0cVy0DtePoCH97K8EtX+mg7ZARUtM=
github.com/sirupsen/logrus v1.7.0/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0=
github.com/sirupsen/logrus v1.8.1 h1:dJKuHgqk1NNQlqoA6BTlM1Wf9DOH3NBjQyu0h9+AZZE=
github.com/sirupsen/logrus v1.8.1/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0=
github.com/smartystreets/assertions v0.0.0-20180927180507-b2de0cb4f26d/go.mod h1:OnSkiWE9lh6wB0YB77sQom3nweQdgAjqCqsofrRNTgc=
github.com/smartystreets/goconvey v1.6.4/go.mod h1:syvi0/a8iFYH4r/RixwvyeAJjdLS9QV7WQ/tjFTllLA=
github.com/soheilhy/cmux v0.1.4/go.mod h1:IM3LyeVVIOuxMH7sFAkER9+bJ4dT7Ms6E4xg4kGIyLM=

View File

@ -373,9 +373,8 @@ github.com/shurcooL/sanitized_anchor_name v1.0.0/go.mod h1:1NzhyTcUVG4SuEtjjoZeV
github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo=
github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE=
github.com/sirupsen/logrus v1.6.0/go.mod h1:7uNnSEd1DgxDLC74fIahvMZmmYsHGZGEOFrfsX/uA88=
github.com/sirupsen/logrus v1.7.0 h1:ShrD1U9pZB12TX0cVy0DtePoCH97K8EtX+mg7ZARUtM=
github.com/sirupsen/logrus v1.7.0/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0=
github.com/sirupsen/logrus v1.8.1 h1:dJKuHgqk1NNQlqoA6BTlM1Wf9DOH3NBjQyu0h9+AZZE=
github.com/sirupsen/logrus v1.8.1/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0=
github.com/smartystreets/assertions v0.0.0-20180927180507-b2de0cb4f26d/go.mod h1:OnSkiWE9lh6wB0YB77sQom3nweQdgAjqCqsofrRNTgc=
github.com/smartystreets/goconvey v1.6.4/go.mod h1:syvi0/a8iFYH4r/RixwvyeAJjdLS9QV7WQ/tjFTllLA=
github.com/soheilhy/cmux v0.1.4/go.mod h1:IM3LyeVVIOuxMH7sFAkER9+bJ4dT7Ms6E4xg4kGIyLM=

View File

@ -324,7 +324,6 @@ github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPx
github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE=
github.com/sirupsen/logrus v1.6.0/go.mod h1:7uNnSEd1DgxDLC74fIahvMZmmYsHGZGEOFrfsX/uA88=
github.com/sirupsen/logrus v1.7.0/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0=
github.com/sirupsen/logrus v1.8.1/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0=
github.com/smartystreets/assertions v0.0.0-20180927180507-b2de0cb4f26d/go.mod h1:OnSkiWE9lh6wB0YB77sQom3nweQdgAjqCqsofrRNTgc=
github.com/smartystreets/goconvey v1.6.4/go.mod h1:syvi0/a8iFYH4r/RixwvyeAJjdLS9QV7WQ/tjFTllLA=
github.com/soheilhy/cmux v0.1.4/go.mod h1:IM3LyeVVIOuxMH7sFAkER9+bJ4dT7Ms6E4xg4kGIyLM=

View File

@ -380,7 +380,6 @@ github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPx
github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE=
github.com/sirupsen/logrus v1.6.0/go.mod h1:7uNnSEd1DgxDLC74fIahvMZmmYsHGZGEOFrfsX/uA88=
github.com/sirupsen/logrus v1.7.0/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0=
github.com/sirupsen/logrus v1.8.1/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0=
github.com/smartystreets/assertions v0.0.0-20180927180507-b2de0cb4f26d/go.mod h1:OnSkiWE9lh6wB0YB77sQom3nweQdgAjqCqsofrRNTgc=
github.com/smartystreets/goconvey v1.6.4/go.mod h1:syvi0/a8iFYH4r/RixwvyeAJjdLS9QV7WQ/tjFTllLA=
github.com/soheilhy/cmux v0.1.4/go.mod h1:IM3LyeVVIOuxMH7sFAkER9+bJ4dT7Ms6E4xg4kGIyLM=

View File

@ -336,7 +336,6 @@ github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPx
github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE=
github.com/sirupsen/logrus v1.6.0/go.mod h1:7uNnSEd1DgxDLC74fIahvMZmmYsHGZGEOFrfsX/uA88=
github.com/sirupsen/logrus v1.7.0/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0=
github.com/sirupsen/logrus v1.8.1/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0=
github.com/smartystreets/assertions v0.0.0-20180927180507-b2de0cb4f26d/go.mod h1:OnSkiWE9lh6wB0YB77sQom3nweQdgAjqCqsofrRNTgc=
github.com/smartystreets/goconvey v1.6.4/go.mod h1:syvi0/a8iFYH4r/RixwvyeAJjdLS9QV7WQ/tjFTllLA=
github.com/soheilhy/cmux v0.1.4/go.mod h1:IM3LyeVVIOuxMH7sFAkER9+bJ4dT7Ms6E4xg4kGIyLM=

View File

@ -370,9 +370,8 @@ github.com/shurcooL/sanitized_anchor_name v1.0.0/go.mod h1:1NzhyTcUVG4SuEtjjoZeV
github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo=
github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE=
github.com/sirupsen/logrus v1.6.0/go.mod h1:7uNnSEd1DgxDLC74fIahvMZmmYsHGZGEOFrfsX/uA88=
github.com/sirupsen/logrus v1.7.0 h1:ShrD1U9pZB12TX0cVy0DtePoCH97K8EtX+mg7ZARUtM=
github.com/sirupsen/logrus v1.7.0/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0=
github.com/sirupsen/logrus v1.8.1 h1:dJKuHgqk1NNQlqoA6BTlM1Wf9DOH3NBjQyu0h9+AZZE=
github.com/sirupsen/logrus v1.8.1/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0=
github.com/smartystreets/assertions v0.0.0-20180927180507-b2de0cb4f26d/go.mod h1:OnSkiWE9lh6wB0YB77sQom3nweQdgAjqCqsofrRNTgc=
github.com/smartystreets/goconvey v1.6.4/go.mod h1:syvi0/a8iFYH4r/RixwvyeAJjdLS9QV7WQ/tjFTllLA=
github.com/soheilhy/cmux v0.1.4/go.mod h1:IM3LyeVVIOuxMH7sFAkER9+bJ4dT7Ms6E4xg4kGIyLM=

View File

@ -1,3 +0,0 @@
module github.com/bits-and-blooms/bitset
go 1.14

View File

@ -18,23 +18,6 @@ reason about the proposed changes.
## Running the tests
Many of the tests require privileges to set resource limits and load eBPF code.
The easiest way to obtain these is to run the tests with `sudo`.
To test the current package with your local kernel you can simply run:
```
go test -exec sudo ./...
```
To test the current package with a different kernel version you can use the [run-tests.sh](run-tests.sh) script.
It requires [virtme](https://github.com/amluto/virtme) and qemu to be installed.
Examples:
```bash
# Run all tests on a 5.4 kernel
./run-tests.sh 5.4
# Run a subset of tests:
./run-tests.sh 5.4 go test ./link
```
The easiest way to obtain these is to run the tests with `sudo`:
sudo go test ./...

View File

@ -1,7 +1,7 @@
# The development version of clang is distributed as the 'clang' binary,
# while stable/released versions have a version number attached.
# Pin the default clang to a stable version.
CLANG ?= clang-12
CLANG ?= clang-11
CFLAGS := -target bpf -O2 -g -Wall -Werror $(CFLAGS)
# Obtain an absolute path to the directory of the Makefile.
@ -17,7 +17,7 @@ VERSION := $(shell cat ${REPODIR}/testdata/docker/VERSION)
TARGETS := \
testdata/loader-clang-7 \
testdata/loader-clang-9 \
testdata/loader-$(CLANG) \
testdata/loader-clang-11 \
testdata/invalid_map \
testdata/raw_tracepoint \
testdata/invalid_map_static \
@ -33,7 +33,6 @@ TARGETS := \
docker-all:
docker run --rm --user "${UIDGID}" \
-v "${REPODIR}":/ebpf -w /ebpf --env MAKEFLAGS \
--env CFLAGS="-fdebug-prefix-map=/ebpf=." \
"${IMAGE}:${VERSION}" \
make all
@ -48,8 +47,6 @@ clean:
-$(RM) internal/btf/testdata/*.elf
all: $(addsuffix -el.elf,$(TARGETS)) $(addsuffix -eb.elf,$(TARGETS))
ln -srf testdata/loader-$(CLANG)-el.elf testdata/loader-el.elf
ln -srf testdata/loader-$(CLANG)-eb.elf testdata/loader-eb.elf
testdata/loader-%-el.elf: testdata/loader.c
$* $(CFLAGS) -mlittle-endian -c $< -o $@

View File

@ -57,7 +57,7 @@ func (ins *Instruction) Unmarshal(r io.Reader, bo binary.ByteOrder) (uint64, err
return 0, fmt.Errorf("can't unmarshal registers: %s", err)
}
if !bi.OpCode.IsDWordLoad() {
if !bi.OpCode.isDWordLoad() {
return InstructionSize, nil
}
@ -80,7 +80,7 @@ func (ins Instruction) Marshal(w io.Writer, bo binary.ByteOrder) (uint64, error)
return 0, errors.New("invalid opcode")
}
isDWordLoad := ins.OpCode.IsDWordLoad()
isDWordLoad := ins.OpCode.isDWordLoad()
cons := int32(ins.Constant)
if isDWordLoad {
@ -123,7 +123,7 @@ func (ins Instruction) Marshal(w io.Writer, bo binary.ByteOrder) (uint64, error)
//
// Returns an error if the instruction doesn't load a map.
func (ins *Instruction) RewriteMapPtr(fd int) error {
if !ins.OpCode.IsDWordLoad() {
if !ins.OpCode.isDWordLoad() {
return fmt.Errorf("%s is not a 64 bit load", ins.OpCode)
}
@ -138,19 +138,15 @@ func (ins *Instruction) RewriteMapPtr(fd int) error {
return nil
}
// MapPtr returns the map fd for this instruction.
//
// The result is undefined if the instruction is not a load from a map,
// see IsLoadFromMap.
func (ins *Instruction) MapPtr() int {
return int(int32(uint64(ins.Constant) & math.MaxUint32))
func (ins *Instruction) mapPtr() uint32 {
return uint32(uint64(ins.Constant) & math.MaxUint32)
}
// RewriteMapOffset changes the offset of a direct load from a map.
//
// Returns an error if the instruction is not a direct load.
func (ins *Instruction) RewriteMapOffset(offset uint32) error {
if !ins.OpCode.IsDWordLoad() {
if !ins.OpCode.isDWordLoad() {
return fmt.Errorf("%s is not a 64 bit load", ins.OpCode)
}
@ -167,10 +163,10 @@ func (ins *Instruction) mapOffset() uint32 {
return uint32(uint64(ins.Constant) >> 32)
}
// IsLoadFromMap returns true if the instruction loads from a map.
// isLoadFromMap returns true if the instruction loads from a map.
//
// This covers both loading the map pointer and direct map value loads.
func (ins *Instruction) IsLoadFromMap() bool {
func (ins *Instruction) isLoadFromMap() bool {
return ins.OpCode == LoadImmOp(DWord) && (ins.Src == PseudoMapFD || ins.Src == PseudoMapValue)
}
@ -181,12 +177,6 @@ func (ins *Instruction) IsFunctionCall() bool {
return ins.OpCode.JumpOp() == Call && ins.Src == PseudoCall
}
// IsConstantLoad returns true if the instruction loads a constant of the
// given size.
func (ins *Instruction) IsConstantLoad(size Size) bool {
return ins.OpCode == LoadImmOp(size) && ins.Src == R0 && ins.Offset == 0
}
// Format implements fmt.Formatter.
func (ins Instruction) Format(f fmt.State, c rune) {
if c != 'v' {
@ -207,8 +197,8 @@ func (ins Instruction) Format(f fmt.State, c rune) {
return
}
if ins.IsLoadFromMap() {
fd := ins.MapPtr()
if ins.isLoadFromMap() {
fd := int32(ins.mapPtr())
switch ins.Src {
case PseudoMapFD:
fmt.Fprintf(f, "LoadMapPtr dst: %s fd: %d", ins.Dst, fd)
@ -413,7 +403,7 @@ func (insns Instructions) Marshal(w io.Writer, bo binary.ByteOrder) error {
func (insns Instructions) Tag(bo binary.ByteOrder) (string, error) {
h := sha1.New()
for i, ins := range insns {
if ins.IsLoadFromMap() {
if ins.isLoadFromMap() {
ins.Constant = 0
}
_, err := ins.Marshal(h, bo)

View File

@ -111,7 +111,7 @@ func LoadMapPtr(dst Register, fd int) Instruction {
OpCode: LoadImmOp(DWord),
Dst: dst,
Src: PseudoMapFD,
Constant: int64(uint32(fd)),
Constant: int64(fd),
}
}

View File

@ -69,13 +69,13 @@ const InvalidOpCode OpCode = 0xff
// rawInstructions returns the number of BPF instructions required
// to encode this opcode.
func (op OpCode) rawInstructions() int {
if op.IsDWordLoad() {
if op.isDWordLoad() {
return 2
}
return 1
}
func (op OpCode) IsDWordLoad() bool {
func (op OpCode) isDWordLoad() bool {
return op == LoadImmOp(DWord)
}

View File

@ -3,7 +3,6 @@ package ebpf
import (
"errors"
"fmt"
"io"
"math"
"reflect"
"strings"
@ -90,8 +89,8 @@ func (cs *CollectionSpec) RewriteMaps(maps map[string]*Map) error {
//
// The constant must be defined like so in the C program:
//
// volatile const type foobar;
// volatile const type foobar = default;
// static volatile const type foobar;
// static volatile const type foobar = default;
//
// Replacement values must be of the same length as the C sizeof(type).
// If necessary, they are marshalled according to the same rules as
@ -270,21 +269,11 @@ func NewCollectionWithOptions(spec *CollectionSpec, opts CollectionOptions) (*Co
}, nil
}
type handleCache struct {
btfHandles map[*btf.Spec]*btf.Handle
btfSpecs map[io.ReaderAt]*btf.Spec
}
type btfHandleCache map[*btf.Spec]*btf.Handle
func newHandleCache() *handleCache {
return &handleCache{
btfHandles: make(map[*btf.Spec]*btf.Handle),
btfSpecs: make(map[io.ReaderAt]*btf.Spec),
}
}
func (hc handleCache) btfHandle(spec *btf.Spec) (*btf.Handle, error) {
if hc.btfHandles[spec] != nil {
return hc.btfHandles[spec], nil
func (btfs btfHandleCache) load(spec *btf.Spec) (*btf.Handle, error) {
if btfs[spec] != nil {
return btfs[spec], nil
}
handle, err := btf.NewHandle(spec)
@ -292,30 +281,14 @@ func (hc handleCache) btfHandle(spec *btf.Spec) (*btf.Handle, error) {
return nil, err
}
hc.btfHandles[spec] = handle
btfs[spec] = handle
return handle, nil
}
func (hc handleCache) btfSpec(rd io.ReaderAt) (*btf.Spec, error) {
if hc.btfSpecs[rd] != nil {
return hc.btfSpecs[rd], nil
}
spec, err := btf.LoadSpecFromReader(rd)
if err != nil {
return nil, err
}
hc.btfSpecs[rd] = spec
return spec, nil
}
func (hc handleCache) close() {
for _, handle := range hc.btfHandles {
func (btfs btfHandleCache) close() {
for _, handle := range btfs {
handle.Close()
}
hc.btfHandles = nil
hc.btfSpecs = nil
}
func lazyLoadCollection(coll *CollectionSpec, opts *CollectionOptions) (
@ -327,12 +300,12 @@ func lazyLoadCollection(coll *CollectionSpec, opts *CollectionOptions) (
var (
maps = make(map[string]*Map)
progs = make(map[string]*Program)
handles = newHandleCache()
btfs = make(btfHandleCache)
skipMapsAndProgs = false
)
cleanup = func() {
handles.close()
btfs.close()
if skipMapsAndProgs {
return
@ -362,7 +335,7 @@ func lazyLoadCollection(coll *CollectionSpec, opts *CollectionOptions) (
return nil, fmt.Errorf("missing map %s", mapName)
}
m, err := newMapWithOptions(mapSpec, opts.Maps, handles)
m, err := newMapWithOptions(mapSpec, opts.Maps, btfs)
if err != nil {
return nil, fmt.Errorf("map %s: %w", mapName, err)
}
@ -387,7 +360,7 @@ func lazyLoadCollection(coll *CollectionSpec, opts *CollectionOptions) (
for i := range progSpec.Instructions {
ins := &progSpec.Instructions[i]
if !ins.IsLoadFromMap() || ins.Reference == "" {
if ins.OpCode != asm.LoadImmOp(asm.DWord) || ins.Reference == "" {
continue
}
@ -399,7 +372,7 @@ func lazyLoadCollection(coll *CollectionSpec, opts *CollectionOptions) (
m, err := loadMap(ins.Reference)
if err != nil {
return nil, fmt.Errorf("program %s: %w", progName, err)
return nil, fmt.Errorf("program %s: %s", progName, err)
}
fd := m.FD()
@ -411,7 +384,7 @@ func lazyLoadCollection(coll *CollectionSpec, opts *CollectionOptions) (
}
}
prog, err := newProgramWithOptions(progSpec, opts.Programs, handles)
prog, err := newProgramWithOptions(progSpec, opts.Programs, btfs)
if err != nil {
return nil, fmt.Errorf("program %s: %w", progName, err)
}
@ -561,7 +534,7 @@ func assignValues(to interface{}, valueOf func(reflect.Type, string) (reflect.Va
}
if err != nil {
return fmt.Errorf("field %s: %w", field.Name, err)
return fmt.Errorf("field %s: %s", field.Name, err)
}
}

View File

@ -96,7 +96,7 @@ func LoadCollectionSpecFromReader(rd io.ReaderAt) (*CollectionSpec, error) {
}
btfSpec, err := btf.LoadSpecFromReader(rd)
if err != nil && !errors.Is(err, btf.ErrNotFound) {
if err != nil {
return nil, fmt.Errorf("load BTF: %w", err)
}
@ -159,7 +159,7 @@ func LoadCollectionSpecFromReader(rd io.ReaderAt) (*CollectionSpec, error) {
}
if target.Flags&elf.SHF_STRINGS > 0 {
return nil, fmt.Errorf("section %q: string is not stack allocated: %w", section.Name, ErrNotSupported)
return nil, fmt.Errorf("section %q: string %q is not stack allocated: %w", section.Name, rel.Name, ErrNotSupported)
}
target.references++
@ -374,25 +374,17 @@ func (ec *elfCode) relocateInstruction(ins *asm.Instruction, rel elf.Symbol) err
}
case dataSection:
var offset uint32
switch typ {
case elf.STT_SECTION:
if bind != elf.STB_LOCAL {
return fmt.Errorf("direct load: %s: unsupported relocation %s", name, bind)
}
// This is really a reference to a static symbol, which clang doesn't
// emit a symbol table entry for. Instead it encodes the offset in
// the instruction itself.
offset = uint32(uint64(ins.Constant))
case elf.STT_OBJECT:
if bind != elf.STB_GLOBAL {
return fmt.Errorf("direct load: %s: unsupported relocation %s", name, bind)
}
offset = uint32(rel.Value)
default:
return fmt.Errorf("incorrect relocation type %v for direct map load", typ)
}
@ -402,8 +394,10 @@ func (ec *elfCode) relocateInstruction(ins *asm.Instruction, rel elf.Symbol) err
// it's not clear how to encode that into Instruction.
name = target.Name
// The kernel expects the offset in the second basic BPF instruction.
ins.Constant = int64(uint64(offset) << 32)
// For some reason, clang encodes the offset of the symbol its
// section in the first basic BPF instruction, while the kernel
// expects it in the second one.
ins.Constant <<= 32
ins.Src = asm.PseudoMapValue
// Mark the instruction as needing an update when creating the
@ -497,38 +491,33 @@ func (ec *elfCode) loadMaps(maps map[string]*MapSpec) error {
return fmt.Errorf("section %s: missing symbol for map at offset %d", sec.Name, offset)
}
mapName := mapSym.Name
if maps[mapName] != nil {
if maps[mapSym.Name] != nil {
return fmt.Errorf("section %v: map %v already exists", sec.Name, mapSym)
}
lr := io.LimitReader(r, int64(size))
spec := MapSpec{
Name: SanitizeName(mapName, -1),
Name: SanitizeName(mapSym.Name, -1),
}
switch {
case binary.Read(lr, ec.ByteOrder, &spec.Type) != nil:
return fmt.Errorf("map %s: missing type", mapName)
return fmt.Errorf("map %v: missing type", mapSym)
case binary.Read(lr, ec.ByteOrder, &spec.KeySize) != nil:
return fmt.Errorf("map %s: missing key size", mapName)
return fmt.Errorf("map %v: missing key size", mapSym)
case binary.Read(lr, ec.ByteOrder, &spec.ValueSize) != nil:
return fmt.Errorf("map %s: missing value size", mapName)
return fmt.Errorf("map %v: missing value size", mapSym)
case binary.Read(lr, ec.ByteOrder, &spec.MaxEntries) != nil:
return fmt.Errorf("map %s: missing max entries", mapName)
return fmt.Errorf("map %v: missing max entries", mapSym)
case binary.Read(lr, ec.ByteOrder, &spec.Flags) != nil:
return fmt.Errorf("map %s: missing flags", mapName)
return fmt.Errorf("map %v: missing flags", mapSym)
}
if _, err := io.Copy(internal.DiscardZeroes{}, lr); err != nil {
return fmt.Errorf("map %s: unknown and non-zero fields in definition", mapName)
return fmt.Errorf("map %v: unknown and non-zero fields in definition", mapSym)
}
if err := spec.clampPerfEventArraySize(); err != nil {
return fmt.Errorf("map %s: %w", mapName, err)
}
maps[mapName] = &spec
maps[mapSym.Name] = &spec
}
}
@ -576,10 +565,6 @@ func (ec *elfCode) loadBTFMaps(maps map[string]*MapSpec) error {
return fmt.Errorf("map %v: %w", name, err)
}
if err := mapSpec.clampPerfEventArraySize(); err != nil {
return fmt.Errorf("map %v: %w", name, err)
}
maps[name] = mapSpec
}
}
@ -862,8 +847,6 @@ func getProgType(sectionName string) (ProgramType, AttachType, uint32, string) {
"uretprobe/": {Kprobe, AttachNone, 0},
"tracepoint/": {TracePoint, AttachNone, 0},
"raw_tracepoint/": {RawTracepoint, AttachNone, 0},
"raw_tp/": {RawTracepoint, AttachNone, 0},
"tp_btf/": {Tracing, AttachTraceRawTp, 0},
"xdp": {XDP, AttachNone, 0},
"perf_event": {PerfEvent, AttachNone, 0},
"lwt_in": {LWTIn, AttachNone, 0},

View File

@ -35,7 +35,7 @@ type Spec struct {
namedTypes map[string][]namedType
funcInfos map[string]extInfo
lineInfos map[string]extInfo
coreRelos map[string]coreRelos
coreRelos map[string]bpfCoreRelos
byteOrder binary.ByteOrder
}
@ -53,7 +53,7 @@ type btfHeader struct {
// LoadSpecFromReader reads BTF sections from an ELF.
//
// Returns ErrNotFound if the reader contains no BTF.
// Returns a nil Spec and no error if no BTF was present.
func LoadSpecFromReader(rd io.ReaderAt) (*Spec, error) {
file, err := internal.NewSafeELFFile(rd)
if err != nil {
@ -67,7 +67,7 @@ func LoadSpecFromReader(rd io.ReaderAt) (*Spec, error) {
}
if btfSection == nil {
return nil, fmt.Errorf("btf: %w", ErrNotFound)
return nil, nil
}
symbols, err := file.Symbols()
@ -438,13 +438,13 @@ func (s *Spec) Program(name string, length uint64) (*Program, error) {
funcInfos, funcOK := s.funcInfos[name]
lineInfos, lineOK := s.lineInfos[name]
relos, coreOK := s.coreRelos[name]
coreRelos, coreOK := s.coreRelos[name]
if !funcOK && !lineOK && !coreOK {
return nil, fmt.Errorf("no extended BTF info for section %s", name)
}
return &Program{s, length, funcInfos, lineInfos, relos}, nil
return &Program{s, length, funcInfos, lineInfos, coreRelos}, nil
}
// Datasec returns the BTF required to create maps which represent data sections.
@ -491,8 +491,7 @@ func (s *Spec) FindType(name string, typ Type) error {
return fmt.Errorf("type %s: %w", name, ErrNotFound)
}
cpy, _ := copyType(candidate, nil)
value := reflect.Indirect(reflect.ValueOf(cpy))
value := reflect.Indirect(reflect.ValueOf(copyType(candidate)))
reflect.Indirect(reflect.ValueOf(typ)).Set(value)
return nil
}
@ -607,7 +606,7 @@ type Program struct {
spec *Spec
length uint64
funcInfos, lineInfos extInfo
coreRelos coreRelos
coreRelos bpfCoreRelos
}
// ProgramSpec returns the Spec needed for loading function and line infos into the kernel.
@ -666,23 +665,16 @@ func ProgramLineInfos(s *Program) (recordSize uint32, bytes []byte, err error) {
return s.lineInfos.recordSize, bytes, nil
}
// ProgramFixups returns the changes required to adjust the program to the target.
// ProgramRelocations returns the CO-RE relocations required to adjust the
// program to the target.
//
// This is a free function instead of a method to hide it from users
// of package ebpf.
func ProgramFixups(s *Program, target *Spec) (COREFixups, error) {
func ProgramRelocations(s *Program, target *Spec) (map[uint64]Relocation, error) {
if len(s.coreRelos) == 0 {
return nil, nil
}
if target == nil {
var err error
target, err = LoadKernelSpec()
if err != nil {
return nil, err
}
}
return coreRelocate(s.spec, target, s.coreRelos)
}

View File

@ -3,160 +3,43 @@ package btf
import (
"errors"
"fmt"
"math"
"reflect"
"sort"
"strconv"
"strings"
"github.com/cilium/ebpf/asm"
)
// Code in this file is derived from libbpf, which is available under a BSD
// 2-Clause license.
// COREFixup is the result of computing a CO-RE relocation for a target.
type COREFixup struct {
Kind COREKind
Local uint32
Target uint32
Poison bool
// Relocation describes a CO-RE relocation.
type Relocation struct {
Current uint32
New uint32
}
func (f COREFixup) equal(other COREFixup) bool {
return f.Local == other.Local && f.Target == other.Target
func (r Relocation) equal(other Relocation) bool {
return r.Current == other.Current && r.New == other.New
}
func (f COREFixup) String() string {
if f.Poison {
return fmt.Sprintf("%s=poison", f.Kind)
}
return fmt.Sprintf("%s=%d->%d", f.Kind, f.Local, f.Target)
}
func (f COREFixup) apply(ins *asm.Instruction) error {
if f.Poison {
return errors.New("can't poison individual instruction")
}
switch class := ins.OpCode.Class(); class {
case asm.LdXClass, asm.StClass, asm.StXClass:
if want := int16(f.Local); want != ins.Offset {
return fmt.Errorf("invalid offset %d, expected %d", ins.Offset, want)
}
if f.Target > math.MaxInt16 {
return fmt.Errorf("offset %d exceeds MaxInt16", f.Target)
}
ins.Offset = int16(f.Target)
case asm.LdClass:
if !ins.IsConstantLoad(asm.DWord) {
return fmt.Errorf("not a dword-sized immediate load")
}
if want := int64(f.Local); want != ins.Constant {
return fmt.Errorf("invalid immediate %d, expected %d", ins.Constant, want)
}
ins.Constant = int64(f.Target)
case asm.ALUClass:
if ins.OpCode.ALUOp() == asm.Swap {
return fmt.Errorf("relocation against swap")
}
fallthrough
case asm.ALU64Class:
if src := ins.OpCode.Source(); src != asm.ImmSource {
return fmt.Errorf("invalid source %s", src)
}
if want := int64(f.Local); want != ins.Constant {
return fmt.Errorf("invalid immediate %d, expected %d", ins.Constant, want)
}
if f.Target > math.MaxInt32 {
return fmt.Errorf("immediate %d exceeds MaxInt32", f.Target)
}
ins.Constant = int64(f.Target)
default:
return fmt.Errorf("invalid class %s", class)
}
return nil
}
func (f COREFixup) isNonExistant() bool {
return f.Kind.checksForExistence() && f.Target == 0
}
type COREFixups map[uint64]COREFixup
// Apply a set of CO-RE relocations to a BPF program.
func (fs COREFixups) Apply(insns asm.Instructions) (asm.Instructions, error) {
if len(fs) == 0 {
cpy := make(asm.Instructions, len(insns))
copy(cpy, insns)
return insns, nil
}
cpy := make(asm.Instructions, 0, len(insns))
iter := insns.Iterate()
for iter.Next() {
fixup, ok := fs[iter.Offset.Bytes()]
if !ok {
cpy = append(cpy, *iter.Ins)
continue
}
ins := *iter.Ins
if fixup.Poison {
const badRelo = asm.BuiltinFunc(0xbad2310)
cpy = append(cpy, badRelo.Call())
if ins.OpCode.IsDWordLoad() {
// 64 bit constant loads occupy two raw bpf instructions, so
// we need to add another instruction as padding.
cpy = append(cpy, badRelo.Call())
}
continue
}
if err := fixup.apply(&ins); err != nil {
return nil, fmt.Errorf("instruction %d, offset %d: %s: %w", iter.Index, iter.Offset.Bytes(), fixup.Kind, err)
}
cpy = append(cpy, ins)
}
return cpy, nil
}
// COREKind is the type of CO-RE relocation
type COREKind uint32
// coreReloKind is the type of CO-RE relocation
type coreReloKind uint32
const (
reloFieldByteOffset COREKind = iota /* field byte offset */
reloFieldByteSize /* field size in bytes */
reloFieldExists /* field existence in target kernel */
reloFieldSigned /* field signedness (0 - unsigned, 1 - signed) */
reloFieldLShiftU64 /* bitfield-specific left bitshift */
reloFieldRShiftU64 /* bitfield-specific right bitshift */
reloTypeIDLocal /* type ID in local BPF object */
reloTypeIDTarget /* type ID in target kernel */
reloTypeExists /* type existence in target kernel */
reloTypeSize /* type size in bytes */
reloEnumvalExists /* enum value existence in target kernel */
reloEnumvalValue /* enum value integer value */
reloFieldByteOffset coreReloKind = iota /* field byte offset */
reloFieldByteSize /* field size in bytes */
reloFieldExists /* field existence in target kernel */
reloFieldSigned /* field signedness (0 - unsigned, 1 - signed) */
reloFieldLShiftU64 /* bitfield-specific left bitshift */
reloFieldRShiftU64 /* bitfield-specific right bitshift */
reloTypeIDLocal /* type ID in local BPF object */
reloTypeIDTarget /* type ID in target kernel */
reloTypeExists /* type existence in target kernel */
reloTypeSize /* type size in bytes */
reloEnumvalExists /* enum value existence in target kernel */
reloEnumvalValue /* enum value integer value */
)
func (k COREKind) String() string {
func (k coreReloKind) String() string {
switch k {
case reloFieldByteOffset:
return "byte_off"
@ -187,249 +70,103 @@ func (k COREKind) String() string {
}
}
func (k COREKind) checksForExistence() bool {
return k == reloEnumvalExists || k == reloTypeExists || k == reloFieldExists
}
func coreRelocate(local, target *Spec, coreRelos bpfCoreRelos) (map[uint64]Relocation, error) {
if target == nil {
var err error
target, err = loadKernelSpec()
if err != nil {
return nil, err
}
}
func coreRelocate(local, target *Spec, relos coreRelos) (COREFixups, error) {
if local.byteOrder != target.byteOrder {
return nil, fmt.Errorf("can't relocate %s against %s", local.byteOrder, target.byteOrder)
}
var ids []TypeID
relosByID := make(map[TypeID]coreRelos)
result := make(COREFixups, len(relos))
for _, relo := range relos {
if relo.kind == reloTypeIDLocal {
// Filtering out reloTypeIDLocal here makes our lives a lot easier
// down the line, since it doesn't have a target at all.
if len(relo.accessor) > 1 || relo.accessor[0] != 0 {
return nil, fmt.Errorf("%s: unexpected accessor %v", relo.kind, relo.accessor)
}
result[uint64(relo.insnOff)] = COREFixup{
relo.kind,
uint32(relo.typeID),
uint32(relo.typeID),
false,
}
continue
}
relos, ok := relosByID[relo.typeID]
if !ok {
ids = append(ids, relo.typeID)
}
relosByID[relo.typeID] = append(relos, relo)
}
// Ensure we work on relocations in a deterministic order.
sort.Slice(ids, func(i, j int) bool {
return ids[i] < ids[j]
})
for _, id := range ids {
if int(id) >= len(local.types) {
return nil, fmt.Errorf("invalid type id %d", id)
}
localType := local.types[id]
named, ok := localType.(namedType)
if !ok || named.name() == "" {
return nil, fmt.Errorf("relocate unnamed or anonymous type %s: %w", localType, ErrNotSupported)
}
relos := relosByID[id]
targets := target.namedTypes[named.essentialName()]
fixups, err := coreCalculateFixups(localType, targets, relos)
if err != nil {
return nil, fmt.Errorf("relocate %s: %w", localType, err)
}
for i, relo := range relos {
result[uint64(relo.insnOff)] = fixups[i]
}
}
return result, nil
}
var errAmbiguousRelocation = errors.New("ambiguous relocation")
var errImpossibleRelocation = errors.New("impossible relocation")
// coreCalculateFixups calculates the fixups for the given relocations using
// the "best" target.
//
// The best target is determined by scoring: the less poisoning we have to do
// the better the target is.
func coreCalculateFixups(local Type, targets []namedType, relos coreRelos) ([]COREFixup, error) {
localID := local.ID()
local, err := copyType(local, skipQualifierAndTypedef)
if err != nil {
return nil, err
}
bestScore := len(relos)
var bestFixups []COREFixup
for i := range targets {
targetID := targets[i].ID()
target, err := copyType(targets[i], skipQualifierAndTypedef)
relocations := make(map[uint64]Relocation, len(coreRelos))
for _, relo := range coreRelos {
accessorStr, err := local.strings.Lookup(relo.AccessStrOff)
if err != nil {
return nil, err
}
score := 0 // lower is better
fixups := make([]COREFixup, 0, len(relos))
for _, relo := range relos {
fixup, err := coreCalculateFixup(local, localID, target, targetID, relo)
if err != nil {
return nil, fmt.Errorf("target %s: %w", target, err)
}
if fixup.Poison || fixup.isNonExistant() {
score++
}
fixups = append(fixups, fixup)
accessor, err := parseCoreAccessor(accessorStr)
if err != nil {
return nil, fmt.Errorf("accessor %q: %s", accessorStr, err)
}
if score > bestScore {
// We have a better target already, ignore this one.
if int(relo.TypeID) >= len(local.types) {
return nil, fmt.Errorf("invalid type id %d", relo.TypeID)
}
typ := local.types[relo.TypeID]
if relo.ReloKind == reloTypeIDLocal {
relocations[uint64(relo.InsnOff)] = Relocation{
uint32(typ.ID()),
uint32(typ.ID()),
}
continue
}
if score < bestScore {
// This is the best target yet, use it.
bestScore = score
bestFixups = fixups
continue
named, ok := typ.(namedType)
if !ok || named.name() == "" {
return nil, fmt.Errorf("relocate anonymous type %s: %w", typ.String(), ErrNotSupported)
}
// Some other target has the same score as the current one. Make sure
// the fixups agree with each other.
for i, fixup := range bestFixups {
if !fixup.equal(fixups[i]) {
return nil, fmt.Errorf("%s: multiple types match: %w", fixup.Kind, errAmbiguousRelocation)
}
name := essentialName(named.name())
res, err := coreCalculateRelocation(typ, target.namedTypes[name], relo.ReloKind, accessor)
if err != nil {
return nil, fmt.Errorf("relocate %s: %w", name, err)
}
relocations[uint64(relo.InsnOff)] = res
}
if bestFixups == nil {
// Nothing at all matched, probably because there are no suitable
// targets at all. Poison everything!
bestFixups = make([]COREFixup, len(relos))
for i, relo := range relos {
bestFixups[i] = COREFixup{Kind: relo.kind, Poison: true}
}
}
return bestFixups, nil
return relocations, nil
}
// coreCalculateFixup calculates the fixup for a single local type, target type
// and relocation.
func coreCalculateFixup(local Type, localID TypeID, target Type, targetID TypeID, relo coreRelo) (COREFixup, error) {
fixup := func(local, target uint32) (COREFixup, error) {
return COREFixup{relo.kind, local, target, false}, nil
}
poison := func() (COREFixup, error) {
if relo.kind.checksForExistence() {
return fixup(1, 0)
}
return COREFixup{relo.kind, 0, 0, true}, nil
}
zero := COREFixup{}
switch relo.kind {
case reloTypeIDTarget, reloTypeSize, reloTypeExists:
if len(relo.accessor) > 1 || relo.accessor[0] != 0 {
return zero, fmt.Errorf("%s: unexpected accessor %v", relo.kind, relo.accessor)
}
err := coreAreTypesCompatible(local, target)
if errors.Is(err, errImpossibleRelocation) {
return poison()
}
if err != nil {
return zero, fmt.Errorf("relocation %s: %w", relo.kind, err)
}
switch relo.kind {
case reloTypeExists:
return fixup(1, 1)
var errAmbiguousRelocation = errors.New("ambiguous relocation")
func coreCalculateRelocation(local Type, targets []namedType, kind coreReloKind, localAccessor coreAccessor) (Relocation, error) {
var relos []Relocation
var matches []Type
for _, target := range targets {
switch kind {
case reloTypeIDTarget:
return fixup(uint32(localID), uint32(targetID))
case reloTypeSize:
localSize, err := Sizeof(local)
if err != nil {
return zero, err
if localAccessor[0] != 0 {
return Relocation{}, fmt.Errorf("%s: unexpected non-zero accessor", kind)
}
targetSize, err := Sizeof(target)
if err != nil {
return zero, err
if compat, err := coreAreTypesCompatible(local, target); err != nil {
return Relocation{}, fmt.Errorf("%s: %s", kind, err)
} else if !compat {
continue
}
return fixup(uint32(localSize), uint32(targetSize))
relos = append(relos, Relocation{uint32(target.ID()), uint32(target.ID())})
default:
return Relocation{}, fmt.Errorf("relocation %s: %w", kind, ErrNotSupported)
}
matches = append(matches, target)
}
case reloEnumvalValue, reloEnumvalExists:
localValue, targetValue, err := coreFindEnumValue(local, relo.accessor, target)
if errors.Is(err, errImpossibleRelocation) {
return poison()
}
if err != nil {
return zero, fmt.Errorf("relocation %s: %w", relo.kind, err)
}
if len(relos) == 0 {
// TODO: Add switch for existence checks like reloEnumvalExists here.
switch relo.kind {
case reloEnumvalExists:
return fixup(1, 1)
case reloEnumvalValue:
return fixup(uint32(localValue.Value), uint32(targetValue.Value))
}
case reloFieldByteOffset, reloFieldByteSize, reloFieldExists:
if _, ok := target.(*Fwd); ok {
// We can't relocate fields using a forward declaration, so
// skip it. If a non-forward declaration is present in the BTF
// we'll find it in one of the other iterations.
return poison()
}
localField, targetField, err := coreFindField(local, relo.accessor, target)
if errors.Is(err, errImpossibleRelocation) {
return poison()
}
if err != nil {
return zero, fmt.Errorf("target %s: %w", target, err)
}
switch relo.kind {
case reloFieldExists:
return fixup(1, 1)
case reloFieldByteOffset:
return fixup(localField.offset/8, targetField.offset/8)
case reloFieldByteSize:
localSize, err := Sizeof(localField.Type)
if err != nil {
return zero, err
}
targetSize, err := Sizeof(targetField.Type)
if err != nil {
return zero, err
}
return fixup(uint32(localSize), uint32(targetSize))
// TODO: This might have to be poisoned.
return Relocation{}, fmt.Errorf("no relocation found, tried %v", targets)
}
relo := relos[0]
for _, altRelo := range relos[1:] {
if !altRelo.equal(relo) {
return Relocation{}, fmt.Errorf("multiple types %v match: %w", matches, errAmbiguousRelocation)
}
}
return zero, fmt.Errorf("relocation %s: %w", relo.kind, ErrNotSupported)
return relo, nil
}
/* coreAccessor contains a path through a struct. It contains at least one index.
@ -482,240 +219,6 @@ func parseCoreAccessor(accessor string) (coreAccessor, error) {
return result, nil
}
func (ca coreAccessor) String() string {
strs := make([]string, 0, len(ca))
for _, i := range ca {
strs = append(strs, strconv.Itoa(i))
}
return strings.Join(strs, ":")
}
func (ca coreAccessor) enumValue(t Type) (*EnumValue, error) {
e, ok := t.(*Enum)
if !ok {
return nil, fmt.Errorf("not an enum: %s", t)
}
if len(ca) > 1 {
return nil, fmt.Errorf("invalid accessor %s for enum", ca)
}
i := ca[0]
if i >= len(e.Values) {
return nil, fmt.Errorf("invalid index %d for %s", i, e)
}
return &e.Values[i], nil
}
type coreField struct {
Type Type
offset uint32
}
func adjustOffset(base uint32, t Type, n int) (uint32, error) {
size, err := Sizeof(t)
if err != nil {
return 0, err
}
return base + (uint32(n) * uint32(size) * 8), nil
}
// coreFindField descends into the local type using the accessor and tries to
// find an equivalent field in target at each step.
//
// Returns the field and the offset of the field from the start of
// target in bits.
func coreFindField(local Type, localAcc coreAccessor, target Type) (_, _ coreField, _ error) {
// The first index is used to offset a pointer of the base type like
// when accessing an array.
localOffset, err := adjustOffset(0, local, localAcc[0])
if err != nil {
return coreField{}, coreField{}, err
}
targetOffset, err := adjustOffset(0, target, localAcc[0])
if err != nil {
return coreField{}, coreField{}, err
}
if err := coreAreMembersCompatible(local, target); err != nil {
return coreField{}, coreField{}, fmt.Errorf("fields: %w", err)
}
var localMaybeFlex, targetMaybeFlex bool
for _, acc := range localAcc[1:] {
switch localType := local.(type) {
case composite:
// For composite types acc is used to find the field in the local type,
// and then we try to find a field in target with the same name.
localMembers := localType.members()
if acc >= len(localMembers) {
return coreField{}, coreField{}, fmt.Errorf("invalid accessor %d for %s", acc, local)
}
localMember := localMembers[acc]
if localMember.Name == "" {
_, ok := localMember.Type.(composite)
if !ok {
return coreField{}, coreField{}, fmt.Errorf("unnamed field with type %s: %s", localMember.Type, ErrNotSupported)
}
// This is an anonymous struct or union, ignore it.
local = localMember.Type
localOffset += localMember.Offset
localMaybeFlex = false
continue
}
targetType, ok := target.(composite)
if !ok {
return coreField{}, coreField{}, fmt.Errorf("target not composite: %w", errImpossibleRelocation)
}
targetMember, last, err := coreFindMember(targetType, localMember.Name)
if err != nil {
return coreField{}, coreField{}, err
}
if targetMember.BitfieldSize > 0 {
return coreField{}, coreField{}, fmt.Errorf("field %q is a bitfield: %w", targetMember.Name, ErrNotSupported)
}
local = localMember.Type
localMaybeFlex = acc == len(localMembers)-1
localOffset += localMember.Offset
target = targetMember.Type
targetMaybeFlex = last
targetOffset += targetMember.Offset
case *Array:
// For arrays, acc is the index in the target.
targetType, ok := target.(*Array)
if !ok {
return coreField{}, coreField{}, fmt.Errorf("target not array: %w", errImpossibleRelocation)
}
if localType.Nelems == 0 && !localMaybeFlex {
return coreField{}, coreField{}, fmt.Errorf("local type has invalid flexible array")
}
if targetType.Nelems == 0 && !targetMaybeFlex {
return coreField{}, coreField{}, fmt.Errorf("target type has invalid flexible array")
}
if localType.Nelems > 0 && acc >= int(localType.Nelems) {
return coreField{}, coreField{}, fmt.Errorf("invalid access of %s at index %d", localType, acc)
}
if targetType.Nelems > 0 && acc >= int(targetType.Nelems) {
return coreField{}, coreField{}, fmt.Errorf("out of bounds access of target: %w", errImpossibleRelocation)
}
local = localType.Type
localMaybeFlex = false
localOffset, err = adjustOffset(localOffset, local, acc)
if err != nil {
return coreField{}, coreField{}, err
}
target = targetType.Type
targetMaybeFlex = false
targetOffset, err = adjustOffset(targetOffset, target, acc)
if err != nil {
return coreField{}, coreField{}, err
}
default:
return coreField{}, coreField{}, fmt.Errorf("relocate field of %T: %w", localType, ErrNotSupported)
}
if err := coreAreMembersCompatible(local, target); err != nil {
return coreField{}, coreField{}, err
}
}
return coreField{local, localOffset}, coreField{target, targetOffset}, nil
}
// coreFindMember finds a member in a composite type while handling anonymous
// structs and unions.
func coreFindMember(typ composite, name Name) (Member, bool, error) {
if name == "" {
return Member{}, false, errors.New("can't search for anonymous member")
}
type offsetTarget struct {
composite
offset uint32
}
targets := []offsetTarget{{typ, 0}}
visited := make(map[composite]bool)
for i := 0; i < len(targets); i++ {
target := targets[i]
// Only visit targets once to prevent infinite recursion.
if visited[target] {
continue
}
if len(visited) >= maxTypeDepth {
// This check is different than libbpf, which restricts the entire
// path to BPF_CORE_SPEC_MAX_LEN items.
return Member{}, false, fmt.Errorf("type is nested too deep")
}
visited[target] = true
members := target.members()
for j, member := range members {
if member.Name == name {
// NB: This is safe because member is a copy.
member.Offset += target.offset
return member, j == len(members)-1, nil
}
// The names don't match, but this member could be an anonymous struct
// or union.
if member.Name != "" {
continue
}
comp, ok := member.Type.(composite)
if !ok {
return Member{}, false, fmt.Errorf("anonymous non-composite type %T not allowed", member.Type)
}
targets = append(targets, offsetTarget{comp, target.offset + member.Offset})
}
}
return Member{}, false, fmt.Errorf("no matching member: %w", errImpossibleRelocation)
}
// coreFindEnumValue follows localAcc to find the equivalent enum value in target.
func coreFindEnumValue(local Type, localAcc coreAccessor, target Type) (localValue, targetValue *EnumValue, _ error) {
localValue, err := localAcc.enumValue(local)
if err != nil {
return nil, nil, err
}
targetEnum, ok := target.(*Enum)
if !ok {
return nil, nil, errImpossibleRelocation
}
localName := localValue.Name.essentialName()
for i, targetValue := range targetEnum.Values {
if targetValue.Name.essentialName() != localName {
continue
}
return localValue, &targetEnum.Values[i], nil
}
return nil, nil, errImpossibleRelocation
}
/* The comment below is from bpf_core_types_are_compat in libbpf.c:
*
* Check local and target types for compatibility. This check is used for
@ -736,10 +239,8 @@ func coreFindEnumValue(local Type, localAcc coreAccessor, target Type) (localVal
* number of input args and compatible return and argument types.
* These rules are not set in stone and probably will be adjusted as we get
* more experience with using BPF CO-RE relocations.
*
* Returns errImpossibleRelocation if types are not compatible.
*/
func coreAreTypesCompatible(localType Type, targetType Type) error {
func coreAreTypesCompatible(localType Type, targetType Type) (bool, error) {
var (
localTs, targetTs typeDeque
l, t = &localType, &targetType
@ -748,14 +249,14 @@ func coreAreTypesCompatible(localType Type, targetType Type) error {
for ; l != nil && t != nil; l, t = localTs.shift(), targetTs.shift() {
if depth >= maxTypeDepth {
return errors.New("types are nested too deep")
return false, errors.New("types are nested too deep")
}
localType = *l
targetType = *t
localType = skipQualifierAndTypedef(*l)
targetType = skipQualifierAndTypedef(*t)
if reflect.TypeOf(localType) != reflect.TypeOf(targetType) {
return fmt.Errorf("type mismatch: %w", errImpossibleRelocation)
return false, nil
}
switch lv := (localType).(type) {
@ -765,7 +266,7 @@ func coreAreTypesCompatible(localType Type, targetType Type) error {
case *Int:
tv := targetType.(*Int)
if lv.isBitfield() || tv.isBitfield() {
return fmt.Errorf("bitfield: %w", errImpossibleRelocation)
return false, nil
}
case *Pointer, *Array:
@ -776,7 +277,7 @@ func coreAreTypesCompatible(localType Type, targetType Type) error {
case *FuncProto:
tv := targetType.(*FuncProto)
if len(lv.Params) != len(tv.Params) {
return fmt.Errorf("function param mismatch: %w", errImpossibleRelocation)
return false, nil
}
depth++
@ -784,24 +285,22 @@ func coreAreTypesCompatible(localType Type, targetType Type) error {
targetType.walk(&targetTs)
default:
return fmt.Errorf("unsupported type %T", localType)
return false, fmt.Errorf("unsupported type %T", localType)
}
}
if l != nil {
return fmt.Errorf("dangling local type %T", *l)
return false, fmt.Errorf("dangling local type %T", *l)
}
if t != nil {
return fmt.Errorf("dangling target type %T", *t)
return false, fmt.Errorf("dangling target type %T", *t)
}
return nil
return true, nil
}
/* coreAreMembersCompatible checks two types for field-based relocation compatibility.
*
* The comment below is from bpf_core_fields_are_compat in libbpf.c:
/* The comment below is from bpf_core_fields_are_compat in libbpf.c:
*
* Check two types for compatibility for the purpose of field access
* relocation. const/volatile/restrict and typedefs are skipped to ensure we
@ -815,63 +314,65 @@ func coreAreTypesCompatible(localType Type, targetType Type) error {
* - for INT, size and signedness are ignored;
* - for ARRAY, dimensionality is ignored, element types are checked for
* compatibility recursively;
* [ NB: coreAreMembersCompatible doesn't recurse, this check is done
* by coreFindField. ]
* - everything else shouldn't be ever a target of relocation.
* These rules are not set in stone and probably will be adjusted as we get
* more experience with using BPF CO-RE relocations.
*
* Returns errImpossibleRelocation if the members are not compatible.
*/
func coreAreMembersCompatible(localType Type, targetType Type) error {
doNamesMatch := func(a, b string) error {
func coreAreMembersCompatible(localType Type, targetType Type) (bool, error) {
doNamesMatch := func(a, b string) bool {
if a == "" || b == "" {
// allow anonymous and named type to match
return nil
return true
}
if essentialName(a) == essentialName(b) {
return nil
return essentialName(a) == essentialName(b)
}
for depth := 0; depth <= maxTypeDepth; depth++ {
localType = skipQualifierAndTypedef(localType)
targetType = skipQualifierAndTypedef(targetType)
_, lok := localType.(composite)
_, tok := targetType.(composite)
if lok && tok {
return true, nil
}
return fmt.Errorf("names don't match: %w", errImpossibleRelocation)
}
_, lok := localType.(composite)
_, tok := targetType.(composite)
if lok && tok {
return nil
}
if reflect.TypeOf(localType) != reflect.TypeOf(targetType) {
return fmt.Errorf("type mismatch: %w", errImpossibleRelocation)
}
switch lv := localType.(type) {
case *Array, *Pointer:
return nil
case *Enum:
tv := targetType.(*Enum)
return doNamesMatch(lv.name(), tv.name())
case *Fwd:
tv := targetType.(*Fwd)
return doNamesMatch(lv.name(), tv.name())
case *Int:
tv := targetType.(*Int)
if lv.isBitfield() || tv.isBitfield() {
return fmt.Errorf("bitfield: %w", errImpossibleRelocation)
if reflect.TypeOf(localType) != reflect.TypeOf(targetType) {
return false, nil
}
return nil
default:
return fmt.Errorf("type %s: %w", localType, ErrNotSupported)
switch lv := localType.(type) {
case *Pointer:
return true, nil
case *Enum:
tv := targetType.(*Enum)
return doNamesMatch(lv.name(), tv.name()), nil
case *Fwd:
tv := targetType.(*Fwd)
return doNamesMatch(lv.name(), tv.name()), nil
case *Int:
tv := targetType.(*Int)
return !lv.isBitfield() && !tv.isBitfield(), nil
case *Array:
tv := targetType.(*Array)
localType = lv.Type
targetType = tv.Type
default:
return false, fmt.Errorf("unsupported type %T", localType)
}
}
return false, errors.New("types are nested too deep")
}
func skipQualifierAndTypedef(typ Type) (Type, error) {
func skipQualifierAndTypedef(typ Type) Type {
result := typ
for depth := 0; depth <= maxTypeDepth; depth++ {
switch v := (result).(type) {
@ -880,8 +381,8 @@ func skipQualifierAndTypedef(typ Type) (Type, error) {
case *Typedef:
result = v.Type
default:
return result, nil
return result
}
}
return nil, errors.New("exceeded type depth")
return typ
}

View File

@ -30,7 +30,7 @@ type btfExtCoreHeader struct {
CoreReloLen uint32
}
func parseExtInfos(r io.ReadSeeker, bo binary.ByteOrder, strings stringTable) (funcInfo, lineInfo map[string]extInfo, relos map[string]coreRelos, err error) {
func parseExtInfos(r io.ReadSeeker, bo binary.ByteOrder, strings stringTable) (funcInfo, lineInfo map[string]extInfo, coreRelos map[string]bpfCoreRelos, err error) {
var header btfExtHeader
var coreHeader btfExtCoreHeader
if err := binary.Read(r, bo, &header); err != nil {
@ -94,13 +94,13 @@ func parseExtInfos(r io.ReadSeeker, bo binary.ByteOrder, strings stringTable) (f
return nil, nil, nil, fmt.Errorf("can't seek to CO-RE relocation section: %v", err)
}
relos, err = parseExtInfoRelos(io.LimitReader(r, int64(coreHeader.CoreReloLen)), bo, strings)
coreRelos, err = parseExtInfoRelos(io.LimitReader(r, int64(coreHeader.CoreReloLen)), bo, strings)
if err != nil {
return nil, nil, nil, fmt.Errorf("CO-RE relocation info: %w", err)
}
}
return funcInfo, lineInfo, relos, nil
return funcInfo, lineInfo, coreRelos, nil
}
type btfExtInfoSec struct {
@ -208,25 +208,18 @@ type bpfCoreRelo struct {
InsnOff uint32
TypeID TypeID
AccessStrOff uint32
Kind COREKind
ReloKind coreReloKind
}
type coreRelo struct {
insnOff uint32
typeID TypeID
accessor coreAccessor
kind COREKind
}
type coreRelos []coreRelo
type bpfCoreRelos []bpfCoreRelo
// append two slices of extInfoRelo to each other. The InsnOff of b are adjusted
// by offset.
func (r coreRelos) append(other coreRelos, offset uint64) coreRelos {
result := make([]coreRelo, 0, len(r)+len(other))
func (r bpfCoreRelos) append(other bpfCoreRelos, offset uint64) bpfCoreRelos {
result := make([]bpfCoreRelo, 0, len(r)+len(other))
result = append(result, r...)
for _, relo := range other {
relo.insnOff += uint32(offset)
relo.InsnOff += uint32(offset)
result = append(result, relo)
}
return result
@ -234,7 +227,7 @@ func (r coreRelos) append(other coreRelos, offset uint64) coreRelos {
var extInfoReloSize = binary.Size(bpfCoreRelo{})
func parseExtInfoRelos(r io.Reader, bo binary.ByteOrder, strings stringTable) (map[string]coreRelos, error) {
func parseExtInfoRelos(r io.Reader, bo binary.ByteOrder, strings stringTable) (map[string]bpfCoreRelos, error) {
var recordSize uint32
if err := binary.Read(r, bo, &recordSize); err != nil {
return nil, fmt.Errorf("read record size: %v", err)
@ -244,14 +237,14 @@ func parseExtInfoRelos(r io.Reader, bo binary.ByteOrder, strings stringTable) (m
return nil, fmt.Errorf("expected record size %d, got %d", extInfoReloSize, recordSize)
}
result := make(map[string]coreRelos)
result := make(map[string]bpfCoreRelos)
for {
secName, infoHeader, err := parseExtInfoHeader(r, bo, strings)
if errors.Is(err, io.EOF) {
return result, nil
}
var relos coreRelos
var relos []bpfCoreRelo
for i := uint32(0); i < infoHeader.NumInfo; i++ {
var relo bpfCoreRelo
if err := binary.Read(r, bo, &relo); err != nil {
@ -262,22 +255,7 @@ func parseExtInfoRelos(r io.Reader, bo binary.ByteOrder, strings stringTable) (m
return nil, fmt.Errorf("section %v: offset %v is not aligned with instruction size", secName, relo.InsnOff)
}
accessorStr, err := strings.Lookup(relo.AccessStrOff)
if err != nil {
return nil, err
}
accessor, err := parseCoreAccessor(accessorStr)
if err != nil {
return nil, fmt.Errorf("accessor %q: %s", accessorStr, err)
}
relos = append(relos, coreRelo{
relo.InsnOff,
relo.TypeID,
accessor,
relo.Kind,
})
relos = append(relos, relo)
}
result[secName] = relos

View File

@ -1,6 +1,7 @@
package btf
import (
"errors"
"fmt"
"math"
"strings"
@ -36,7 +37,6 @@ type Type interface {
type namedType interface {
Type
name() string
essentialName() string
}
// Name identifies a type.
@ -48,10 +48,6 @@ func (n Name) name() string {
return string(n)
}
func (n Name) essentialName() string {
return essentialName(string(n))
}
// Void is the unit type of BTF.
type Void struct{}
@ -178,7 +174,8 @@ func (s *Struct) walk(tdq *typeDeque) {
func (s *Struct) copy() Type {
cpy := *s
cpy.Members = copyMembers(s.Members)
cpy.Members = make([]Member, len(s.Members))
copy(cpy.Members, s.Members)
return &cpy
}
@ -209,7 +206,8 @@ func (u *Union) walk(tdq *typeDeque) {
func (u *Union) copy() Type {
cpy := *u
cpy.Members = copyMembers(u.Members)
cpy.Members = make([]Member, len(u.Members))
copy(cpy.Members, u.Members)
return &cpy
}
@ -217,12 +215,6 @@ func (u *Union) members() []Member {
return u.Members
}
func copyMembers(orig []Member) []Member {
cpy := make([]Member, len(orig))
copy(cpy, orig)
return cpy
}
type composite interface {
members() []Member
}
@ -519,7 +511,7 @@ func Sizeof(typ Type) (int, error) {
switch v := typ.(type) {
case *Array:
if n > 0 && int64(v.Nelems) > math.MaxInt64/n {
return 0, fmt.Errorf("type %s: overflow", typ)
return 0, errors.New("overflow")
}
// Arrays may be of zero length, which allows
@ -540,30 +532,28 @@ func Sizeof(typ Type) (int, error) {
continue
default:
return 0, fmt.Errorf("unsized type %T", typ)
return 0, fmt.Errorf("unrecognized type %T", typ)
}
if n > 0 && elem > math.MaxInt64/n {
return 0, fmt.Errorf("type %s: overflow", typ)
return 0, errors.New("overflow")
}
size := n * elem
if int64(int(size)) != size {
return 0, fmt.Errorf("type %s: overflow", typ)
return 0, errors.New("overflow")
}
return int(size), nil
}
return 0, fmt.Errorf("type %s: exceeded type depth", typ)
return 0, errors.New("exceeded type depth")
}
// copy a Type recursively.
//
// typ may form a cycle.
//
// Returns any errors from transform verbatim.
func copyType(typ Type, transform func(Type) (Type, error)) (Type, error) {
func copyType(typ Type) Type {
var (
copies = make(map[Type]Type)
work typeDeque
@ -576,17 +566,7 @@ func copyType(typ Type, transform func(Type) (Type, error)) (Type, error) {
continue
}
var cpy Type
if transform != nil {
tf, err := transform(*t)
if err != nil {
return nil, fmt.Errorf("copy %s: %w", typ, err)
}
cpy = tf.copy()
} else {
cpy = (*t).copy()
}
cpy := (*t).copy()
copies[*t] = cpy
*t = cpy
@ -594,7 +574,7 @@ func copyType(typ Type, transform func(Type) (Type, error)) (Type, error) {
cpy.walk(&work)
}
return typ, nil
return typ
}
// typeDeque keeps track of pointers to types which still

View File

@ -50,19 +50,3 @@ func (se *SafeELFFile) Symbols() (syms []elf.Symbol, err error) {
syms, err = se.File.Symbols()
return
}
// DynamicSymbols is the safe version of elf.File.DynamicSymbols.
func (se *SafeELFFile) DynamicSymbols() (syms []elf.Symbol, err error) {
defer func() {
r := recover()
if r == nil {
return
}
syms = nil
err = fmt.Errorf("reading ELF dynamic symbols panicked: %s", r)
}()
syms, err = se.File.DynamicSymbols()
return
}

View File

@ -9,16 +9,11 @@ import (
// depending on the host's endianness.
var NativeEndian binary.ByteOrder
// Clang is set to either "el" or "eb" depending on the host's endianness.
var ClangEndian string
func init() {
if isBigEndian() {
NativeEndian = binary.BigEndian
ClangEndian = "eb"
} else {
NativeEndian = binary.LittleEndian
ClangEndian = "el"
}
}

View File

@ -29,10 +29,6 @@ type VerifierError struct {
log string
}
func (le *VerifierError) Unwrap() error {
return le.cause
}
func (le *VerifierError) Error() string {
if le.log == "" {
return le.cause.Error()

View File

@ -22,6 +22,10 @@ func NewSlicePointer(buf []byte) Pointer {
// NewStringPointer creates a 64-bit pointer from a string.
func NewStringPointer(str string) Pointer {
if str == "" {
return Pointer{}
}
p, err := unix.BytePtrFromString(str)
if err != nil {
return Pointer{}

View File

@ -42,7 +42,6 @@ const (
PROT_READ = linux.PROT_READ
PROT_WRITE = linux.PROT_WRITE
MAP_SHARED = linux.MAP_SHARED
PERF_ATTR_SIZE_VER1 = linux.PERF_ATTR_SIZE_VER1
PERF_TYPE_SOFTWARE = linux.PERF_TYPE_SOFTWARE
PERF_TYPE_TRACEPOINT = linux.PERF_TYPE_TRACEPOINT
PERF_COUNT_SW_BPF_OUTPUT = linux.PERF_COUNT_SW_BPF_OUTPUT

View File

@ -43,7 +43,6 @@ const (
PROT_READ = 0x1
PROT_WRITE = 0x2
MAP_SHARED = 0x1
PERF_ATTR_SIZE_VER1 = 0
PERF_TYPE_SOFTWARE = 0x1
PERF_TYPE_TRACEPOINT = 0
PERF_COUNT_SW_BPF_OUTPUT = 0xa

View File

@ -1,16 +1,12 @@
package link
import (
"bytes"
"crypto/rand"
"errors"
"fmt"
"io/ioutil"
"os"
"path/filepath"
"runtime"
"sync"
"unsafe"
"github.com/cilium/ebpf"
"github.com/cilium/ebpf/internal"
@ -19,60 +15,13 @@ import (
var (
kprobeEventsPath = filepath.Join(tracefsPath, "kprobe_events")
kprobeRetprobeBit = struct {
once sync.Once
value uint64
err error
}{}
)
type probeType uint8
const (
kprobeType probeType = iota
uprobeType
)
func (pt probeType) String() string {
if pt == kprobeType {
return "kprobe"
}
return "uprobe"
}
func (pt probeType) EventsPath() string {
if pt == kprobeType {
return kprobeEventsPath
}
return uprobeEventsPath
}
func (pt probeType) PerfEventType(ret bool) perfEventType {
if pt == kprobeType {
if ret {
return kretprobeEvent
}
return kprobeEvent
}
if ret {
return uretprobeEvent
}
return uprobeEvent
}
func (pt probeType) RetprobeBit() (uint64, error) {
if pt == kprobeType {
return kretprobeBit()
}
return uretprobeBit()
}
// Kprobe attaches the given eBPF program to a perf event that fires when the
// given kernel symbol starts executing. See /proc/kallsyms for available
// symbols. For example, printk():
//
// Kprobe("printk", prog)
// Kprobe("printk")
//
// The resulting Link must be Closed during program shutdown to avoid leaking
// system resources.
@ -95,7 +44,7 @@ func Kprobe(symbol string, prog *ebpf.Program) (Link, error) {
// before the given kernel symbol exits, with the function stack left intact.
// See /proc/kallsyms for available symbols. For example, printk():
//
// Kretprobe("printk", prog)
// Kretprobe("printk")
//
// The resulting Link must be Closed during program shutdown to avoid leaking
// system resources.
@ -131,10 +80,7 @@ func kprobe(symbol string, prog *ebpf.Program, ret bool) (*perfEvent, error) {
}
// Use kprobe PMU if the kernel has it available.
tp, err := pmuKprobe(platformPrefix(symbol), ret)
if errors.Is(err, os.ErrNotExist) {
tp, err = pmuKprobe(symbol, ret)
}
tp, err := pmuKprobe(symbol, ret)
if err == nil {
return tp, nil
}
@ -143,10 +89,7 @@ func kprobe(symbol string, prog *ebpf.Program, ret bool) (*perfEvent, error) {
}
// Use tracefs if kprobe PMU is missing.
tp, err = tracefsKprobe(platformPrefix(symbol), ret)
if errors.Is(err, os.ErrNotExist) {
tp, err = tracefsKprobe(symbol, ret)
}
tp, err = tracefsKprobe(symbol, ret)
if err != nil {
return nil, fmt.Errorf("creating trace event '%s' in tracefs: %w", symbol, err)
}
@ -154,70 +97,36 @@ func kprobe(symbol string, prog *ebpf.Program, ret bool) (*perfEvent, error) {
return tp, nil
}
// pmuKprobe opens a perf event based on the kprobe PMU.
// Returns os.ErrNotExist if the given symbol does not exist in the kernel.
// pmuKprobe opens a perf event based on a Performance Monitoring Unit.
// Requires at least 4.17 (e12f03d7031a "perf/core: Implement the
// 'perf_kprobe' PMU").
// Returns ErrNotSupported if the kernel doesn't support perf_kprobe PMU,
// or os.ErrNotExist if the given symbol does not exist in the kernel.
func pmuKprobe(symbol string, ret bool) (*perfEvent, error) {
return pmuProbe(kprobeType, symbol, "", 0, ret)
}
// pmuProbe opens a perf event based on a Performance Monitoring Unit.
//
// Requires at least a 4.17 kernel.
// e12f03d7031a "perf/core: Implement the 'perf_kprobe' PMU"
// 33ea4b24277b "perf/core: Implement the 'perf_uprobe' PMU"
//
// Returns ErrNotSupported if the kernel doesn't support perf_[k,u]probe PMU
func pmuProbe(typ probeType, symbol, path string, offset uint64, ret bool) (*perfEvent, error) {
// Getting the PMU type will fail if the kernel doesn't support
// the perf_[k,u]probe PMU.
et, err := getPMUEventType(typ)
// the perf_kprobe PMU.
et, err := getPMUEventType("kprobe")
if err != nil {
return nil, err
}
var config uint64
if ret {
bit, err := typ.RetprobeBit()
if err != nil {
return nil, err
}
config |= 1 << bit
// Create a pointer to a NUL-terminated string for the kernel.
sp, err := unsafeStringPtr(symbol)
if err != nil {
return nil, err
}
var (
attr unix.PerfEventAttr
sp unsafe.Pointer
)
switch typ {
case kprobeType:
// Create a pointer to a NUL-terminated string for the kernel.
sp, err := unsafeStringPtr(symbol)
if err != nil {
return nil, err
}
// TODO: Parse the position of the bit from /sys/bus/event_source/devices/%s/format/retprobe.
config := 0
if ret {
config = 1
}
attr = unix.PerfEventAttr{
Type: uint32(et), // PMU event type read from sysfs
Ext1: uint64(uintptr(sp)), // Kernel symbol to trace
Config: config, // Retprobe flag
}
case uprobeType:
sp, err := unsafeStringPtr(path)
if err != nil {
return nil, err
}
attr = unix.PerfEventAttr{
// The minimum size required for PMU uprobes is PERF_ATTR_SIZE_VER1,
// since it added the config2 (Ext2) field. The Size field controls the
// size of the internal buffer the kernel allocates for reading the
// perf_event_attr argument from userspace.
Size: unix.PERF_ATTR_SIZE_VER1,
Type: uint32(et), // PMU event type read from sysfs
Ext1: uint64(uintptr(sp)), // Uprobe path
Ext2: offset, // Uprobe offset
Config: config, // Retprobe flag
}
attr := unix.PerfEventAttr{
Type: uint32(et), // PMU event type read from sysfs
Ext1: uint64(uintptr(sp)), // Kernel symbol to trace
Config: uint64(config), // perf_kprobe PMU treats config as flags
}
fd, err := unix.PerfEventOpen(&attr, perfAllThreads, 0, -1, unix.PERF_FLAG_FD_CLOEXEC)
@ -235,27 +144,22 @@ func pmuProbe(typ probeType, symbol, path string, offset uint64, ret bool) (*per
// Ensure the string pointer is not collected before PerfEventOpen returns.
runtime.KeepAlive(sp)
// Kernel has perf_[k,u]probe PMU available, initialize perf event.
// Kernel has perf_kprobe PMU available, initialize perf event.
return &perfEvent{
fd: internal.NewFD(uint32(fd)),
pmuID: et,
name: symbol,
typ: typ.PerfEventType(ret),
fd: internal.NewFD(uint32(fd)),
pmuID: et,
name: symbol,
ret: ret,
progType: ebpf.Kprobe,
}, nil
}
// tracefsKprobe creates a Kprobe tracefs entry.
func tracefsKprobe(symbol string, ret bool) (*perfEvent, error) {
return tracefsProbe(kprobeType, symbol, "", 0, ret)
}
// tracefsProbe creates a trace event by writing an entry to <tracefs>/[k,u]probe_events.
// tracefsKprobe creates a trace event by writing an entry to <tracefs>/kprobe_events.
// A new trace event group name is generated on every call to support creating
// multiple trace events for the same kernel or userspace symbol.
// Path and offset are only set in the case of uprobe(s) and are used to set
// the executable/library path on the filesystem and the offset where the probe is inserted.
// A perf event is then opened on the newly-created trace event and returned to the caller.
func tracefsProbe(typ probeType, symbol, path string, offset uint64, ret bool) (*perfEvent, error) {
// multiple trace events for the same kernel symbol. A perf event is then opened
// on the newly-created trace event and returned to the caller.
func tracefsKprobe(symbol string, ret bool) (*perfEvent, error) {
// Generate a random string for each trace event we attempt to create.
// This value is used as the 'group' token in tracefs to allow creating
// multiple kprobe trace events with the same name.
@ -272,13 +176,14 @@ func tracefsProbe(typ probeType, symbol, path string, offset uint64, ret bool) (
if err == nil {
return nil, fmt.Errorf("trace event already exists: %s/%s", group, symbol)
}
if err != nil && !errors.Is(err, os.ErrNotExist) {
// The read is expected to fail with ErrNotSupported due to a non-existing event.
if err != nil && !errors.Is(err, ErrNotSupported) {
return nil, fmt.Errorf("checking trace event %s/%s: %w", group, symbol, err)
}
// Create the [k,u]probe trace event using tracefs.
if err := createTraceFSProbeEvent(typ, group, symbol, path, offset, ret); err != nil {
return nil, fmt.Errorf("creating probe entry on tracefs: %w", err)
// Create the kprobe trace event using tracefs.
if err := createTraceFSKprobeEvent(group, symbol, ret); err != nil {
return nil, fmt.Errorf("creating kprobe event on tracefs: %w", err)
}
// Get the newly-created trace event's id.
@ -297,83 +202,65 @@ func tracefsProbe(typ probeType, symbol, path string, offset uint64, ret bool) (
fd: fd,
group: group,
name: symbol,
ret: ret,
tracefsID: tid,
typ: typ.PerfEventType(ret),
progType: ebpf.Kprobe, // kernel only allows attaching kprobe programs to kprobe events
}, nil
}
// createTraceFSProbeEvent creates a new ephemeral trace event by writing to
// <tracefs>/[k,u]probe_events. Returns os.ErrNotExist if symbol is not a valid
// kernel symbol, or if it is not traceable with kprobes. Returns os.ErrExist
// if a probe with the same group and symbol already exists.
func createTraceFSProbeEvent(typ probeType, group, symbol, path string, offset uint64, ret bool) error {
// createTraceFSKprobeEvent creates a new ephemeral trace event by writing to
// <tracefs>/kprobe_events. Returns ErrNotSupported if symbol is not a valid
// kernel symbol, or if it is not traceable with kprobes.
func createTraceFSKprobeEvent(group, symbol string, ret bool) error {
// Open the kprobe_events file in tracefs.
f, err := os.OpenFile(typ.EventsPath(), os.O_APPEND|os.O_WRONLY, 0666)
f, err := os.OpenFile(kprobeEventsPath, os.O_APPEND|os.O_WRONLY, 0666)
if err != nil {
return fmt.Errorf("error opening '%s': %w", typ.EventsPath(), err)
return fmt.Errorf("error opening kprobe_events: %w", err)
}
defer f.Close()
var pe string
switch typ {
case kprobeType:
// The kprobe_events syntax is as follows (see Documentation/trace/kprobetrace.txt):
// p[:[GRP/]EVENT] [MOD:]SYM[+offs]|MEMADDR [FETCHARGS] : Set a probe
// r[MAXACTIVE][:[GRP/]EVENT] [MOD:]SYM[+0] [FETCHARGS] : Set a return probe
// -:[GRP/]EVENT : Clear a probe
//
// Some examples:
// r:ebpf_1234/r_my_kretprobe nf_conntrack_destroy
// p:ebpf_5678/p_my_kprobe __x64_sys_execve
//
// Leaving the kretprobe's MAXACTIVE set to 0 (or absent) will make the
// kernel default to NR_CPUS. This is desired in most eBPF cases since
// subsampling or rate limiting logic can be more accurately implemented in
// the eBPF program itself.
// See Documentation/kprobes.txt for more details.
pe = fmt.Sprintf("%s:%s/%s %s", probePrefix(ret), group, symbol, symbol)
case uprobeType:
// The uprobe_events syntax is as follows:
// p[:[GRP/]EVENT] PATH:OFFSET [FETCHARGS] : Set a probe
// r[:[GRP/]EVENT] PATH:OFFSET [FETCHARGS] : Set a return probe
// -:[GRP/]EVENT : Clear a probe
//
// Some examples:
// r:ebpf_1234/readline /bin/bash:0x12345
// p:ebpf_5678/main_mySymbol /bin/mybin:0x12345
//
// See Documentation/trace/uprobetracer.txt for more details.
pathOffset := uprobePathOffset(path, offset)
pe = fmt.Sprintf("%s:%s/%s %s", probePrefix(ret), group, symbol, pathOffset)
}
// The kprobe_events syntax is as follows (see Documentation/trace/kprobetrace.txt):
// p[:[GRP/]EVENT] [MOD:]SYM[+offs]|MEMADDR [FETCHARGS] : Set a probe
// r[MAXACTIVE][:[GRP/]EVENT] [MOD:]SYM[+0] [FETCHARGS] : Set a return probe
// -:[GRP/]EVENT : Clear a probe
//
// Some examples:
// r:ebpf_1234/r_my_kretprobe nf_conntrack_destroy
// p:ebpf_5678/p_my_kprobe __x64_sys_execve
//
// Leaving the kretprobe's MAXACTIVE set to 0 (or absent) will make the
// kernel default to NR_CPUS. This is desired in most eBPF cases since
// subsampling or rate limiting logic can be more accurately implemented in
// the eBPF program itself. See Documentation/kprobes.txt for more details.
pe := fmt.Sprintf("%s:%s/%s %s", kprobePrefix(ret), group, symbol, symbol)
_, err = f.WriteString(pe)
// Since commit 97c753e62e6c, ENOENT is correctly returned instead of EINVAL
// when trying to create a kretprobe for a missing symbol. Make sure ENOENT
// is returned to the caller.
if errors.Is(err, os.ErrNotExist) || errors.Is(err, unix.EINVAL) {
return fmt.Errorf("symbol %s not found: %w", symbol, os.ErrNotExist)
return fmt.Errorf("kernel symbol %s not found: %w", symbol, os.ErrNotExist)
}
if err != nil {
return fmt.Errorf("writing '%s' to '%s': %w", pe, typ.EventsPath(), err)
return fmt.Errorf("writing '%s' to kprobe_events: %w", pe, err)
}
return nil
}
// closeTraceFSProbeEvent removes the [k,u]probe with the given type, group and symbol
// from <tracefs>/[k,u]probe_events.
func closeTraceFSProbeEvent(typ probeType, group, symbol string) error {
f, err := os.OpenFile(typ.EventsPath(), os.O_APPEND|os.O_WRONLY, 0666)
// closeTraceFSKprobeEvent removes the kprobe with the given group, symbol and kind
// from <tracefs>/kprobe_events.
func closeTraceFSKprobeEvent(group, symbol string) error {
f, err := os.OpenFile(kprobeEventsPath, os.O_APPEND|os.O_WRONLY, 0666)
if err != nil {
return fmt.Errorf("error opening %s: %w", typ.EventsPath(), err)
return fmt.Errorf("error opening kprobe_events: %w", err)
}
defer f.Close()
// See [k,u]probe_events syntax above. The probe type does not need to be specified
// See kprobe_events syntax above. Kprobe type does not need to be specified
// for removals.
pe := fmt.Sprintf("-:%s/%s", group, symbol)
if _, err = f.WriteString(pe); err != nil {
return fmt.Errorf("writing '%s' to '%s': %w", pe, typ.EventsPath(), err)
return fmt.Errorf("writing '%s' to kprobe_events: %w", pe, err)
}
return nil
@ -401,38 +288,9 @@ func randomGroup(prefix string) (string, error) {
return group, nil
}
func probePrefix(ret bool) string {
func kprobePrefix(ret bool) string {
if ret {
return "r"
}
return "p"
}
// determineRetprobeBit reads a Performance Monitoring Unit's retprobe bit
// from /sys/bus/event_source/devices/<pmu>/format/retprobe.
func determineRetprobeBit(typ probeType) (uint64, error) {
p := filepath.Join("/sys/bus/event_source/devices/", typ.String(), "/format/retprobe")
data, err := ioutil.ReadFile(p)
if err != nil {
return 0, err
}
var rp uint64
n, err := fmt.Sscanf(string(bytes.TrimSpace(data)), "config:%d", &rp)
if err != nil {
return 0, fmt.Errorf("parse retprobe bit: %w", err)
}
if n != 1 {
return 0, fmt.Errorf("parse retprobe bit: expected 1 item, got %d", n)
}
return rp, nil
}
func kretprobeBit() (uint64, error) {
kprobeRetprobeBit.once.Do(func() {
kprobeRetprobeBit.value, kprobeRetprobeBit.err = determineRetprobeBit(kprobeType)
})
return kprobeRetprobeBit.value, kprobeRetprobeBit.err
}

View File

@ -31,10 +31,6 @@ import (
// exported kernel symbols. kprobe-based (tracefs) trace events can be
// created system-wide by writing to the <tracefs>/kprobe_events file, or
// they can be scoped to the current process by creating PMU perf events.
// - u(ret)probe: Ephemeral trace events based on user provides ELF binaries
// and offsets. uprobe-based (tracefs) trace events can be
// created system-wide by writing to the <tracefs>/uprobe_events file, or
// they can be scoped to the current process by creating PMU perf events.
// - perf event: An object instantiated based on an existing trace event or
// kernel symbol. Referred to by fd in userspace.
// Exactly one eBPF program can be attached to a perf event. Multiple perf
@ -56,16 +52,6 @@ const (
perfAllThreads = -1
)
type perfEventType uint8
const (
tracepointEvent perfEventType = iota
kprobeEvent
kretprobeEvent
uprobeEvent
uretprobeEvent
)
// A perfEvent represents a perf event kernel object. Exactly one eBPF program
// can be attached to it. It is created based on a tracefs trace event or a
// Performance Monitoring Unit (PMU).
@ -80,10 +66,11 @@ type perfEvent struct {
// ID of the trace event read from tracefs. Valid IDs are non-zero.
tracefsID uint64
// The event type determines the types of programs that can be attached.
typ perfEventType
// True for kretprobes/uretprobes.
ret bool
fd *internal.FD
fd *internal.FD
progType ebpf.ProgramType
}
func (pe *perfEvent) isLink() {}
@ -130,18 +117,13 @@ func (pe *perfEvent) Close() error {
return fmt.Errorf("closing perf event fd: %w", err)
}
switch pe.typ {
case kprobeEvent, kretprobeEvent:
// Clean up kprobe tracefs entry.
switch t := pe.progType; t {
case ebpf.Kprobe:
// For kprobes created using tracefs, clean up the <tracefs>/kprobe_events entry.
if pe.tracefsID != 0 {
return closeTraceFSProbeEvent(kprobeType, pe.group, pe.name)
return closeTraceFSKprobeEvent(pe.group, pe.name)
}
case uprobeEvent, uretprobeEvent:
// Clean up uprobe tracefs entry.
if pe.tracefsID != 0 {
return closeTraceFSProbeEvent(uprobeType, pe.group, pe.name)
}
case tracepointEvent:
case ebpf.TracePoint:
// Tracepoint trace events don't hold any extra resources.
return nil
}
@ -159,21 +141,12 @@ func (pe *perfEvent) attach(prog *ebpf.Program) error {
if pe.fd == nil {
return errors.New("cannot attach to nil perf event")
}
if t := prog.Type(); t != pe.progType {
return fmt.Errorf("invalid program type (expected %s): %s", pe.progType, t)
}
if prog.FD() < 0 {
return fmt.Errorf("invalid program: %w", internal.ErrClosedFd)
}
switch pe.typ {
case kprobeEvent, kretprobeEvent, uprobeEvent, uretprobeEvent:
if t := prog.Type(); t != ebpf.Kprobe {
return fmt.Errorf("invalid program type (expected %s): %s", ebpf.Kprobe, t)
}
case tracepointEvent:
if t := prog.Type(); t != ebpf.TracePoint {
return fmt.Errorf("invalid program type (expected %s): %s", ebpf.TracePoint, t)
}
default:
return fmt.Errorf("unknown perf event type: %d", pe.typ)
}
// The ioctl below will fail when the fd is invalid.
kfd, _ := pe.fd.Value()
@ -207,8 +180,8 @@ func unsafeStringPtr(str string) (unsafe.Pointer, error) {
// group and name must be alphanumeric or underscore, as required by the kernel.
func getTraceEventID(group, name string) (uint64, error) {
tid, err := uint64FromFile(tracefsPath, "events", group, name, "id")
if errors.Is(err, os.ErrNotExist) {
return 0, fmt.Errorf("trace event %s/%s: %w", group, name, os.ErrNotExist)
if errors.Is(err, ErrNotSupported) {
return 0, fmt.Errorf("trace event %s/%s: %w", group, name, ErrNotSupported)
}
if err != nil {
return 0, fmt.Errorf("reading trace event ID of %s/%s: %w", group, name, err)
@ -219,22 +192,20 @@ func getTraceEventID(group, name string) (uint64, error) {
// getPMUEventType reads a Performance Monitoring Unit's type (numeric identifier)
// from /sys/bus/event_source/devices/<pmu>/type.
//
// Returns ErrNotSupported if the pmu type is not supported.
func getPMUEventType(typ probeType) (uint64, error) {
et, err := uint64FromFile("/sys/bus/event_source/devices", typ.String(), "type")
if errors.Is(err, os.ErrNotExist) {
return 0, fmt.Errorf("pmu type %s: %w", typ, ErrNotSupported)
func getPMUEventType(pmu string) (uint64, error) {
et, err := uint64FromFile("/sys/bus/event_source/devices", pmu, "type")
if errors.Is(err, ErrNotSupported) {
return 0, fmt.Errorf("pmu type %s: %w", pmu, ErrNotSupported)
}
if err != nil {
return 0, fmt.Errorf("reading pmu type %s: %w", typ, err)
return 0, fmt.Errorf("reading pmu type %s: %w", pmu, err)
}
return et, nil
}
// openTracepointPerfEvent opens a tracepoint-type perf event. System-wide
// [k,u]probes created by writing to <tracefs>/[k,u]probe_events are tracepoints
// kprobes created by writing to <tracefs>/kprobe_events are tracepoints
// behind the scenes, and can be attached to using these perf events.
func openTracepointPerfEvent(tid uint64) (*internal.FD, error) {
attr := unix.PerfEventAttr{
@ -257,13 +228,22 @@ func openTracepointPerfEvent(tid uint64) (*internal.FD, error) {
// and joined onto base. Returns error if base no longer prefixes the path after
// joining all components.
func uint64FromFile(base string, path ...string) (uint64, error) {
// Resolve leaf path separately for error feedback. Makes the join onto
// base more readable (can't mix with variadic args).
l := filepath.Join(path...)
p := filepath.Join(base, l)
if !strings.HasPrefix(p, base) {
return 0, fmt.Errorf("path '%s' attempts to escape base path '%s': %w", l, base, errInvalidInput)
}
data, err := ioutil.ReadFile(p)
if os.IsNotExist(err) {
// Only echo leaf path, the base path can be prepended at the call site
// if more verbosity is required.
return 0, fmt.Errorf("symbol %s: %w", l, ErrNotSupported)
}
if err != nil {
return 0, fmt.Errorf("reading file %s: %w", p, err)
}

View File

@ -1,25 +0,0 @@
package link
import (
"fmt"
"runtime"
)
func platformPrefix(symbol string) string {
prefix := runtime.GOARCH
// per https://github.com/golang/go/blob/master/src/go/build/syslist.go
switch prefix {
case "386":
prefix = "ia32"
case "amd64", "amd64p32":
prefix = "x64"
case "arm64", "arm64be":
prefix = "arm64"
default:
return symbol
}
return fmt.Sprintf("__%s_%s", prefix, symbol)
}

View File

@ -43,7 +43,7 @@ func RawAttachProgram(opts RawAttachProgramOptions) error {
}
if err := internal.BPFProgAttach(&attr); err != nil {
return fmt.Errorf("can't attach program: %w", err)
return fmt.Errorf("can't attach program: %s", err)
}
return nil
}
@ -69,7 +69,7 @@ func RawDetachProgram(opts RawDetachProgramOptions) error {
AttachType: uint32(opts.Attach),
}
if err := internal.BPFProgDetach(&attr); err != nil {
return fmt.Errorf("can't detach program: %w", err)
return fmt.Errorf("can't detach program: %s", err)
}
return nil

View File

@ -11,7 +11,7 @@ import (
// tracepoints. The top-level directory is the group, the event's subdirectory
// is the name. Example:
//
// Tracepoint("syscalls", "sys_enter_fork", prog)
// Tracepoint("syscalls", "sys_enter_fork")
//
// Note that attaching eBPF programs to syscalls (sys_enter_*/sys_exit_*) is
// only possible as of kernel 4.14 (commit cf5f5ce).
@ -44,7 +44,7 @@ func Tracepoint(group, name string, prog *ebpf.Program) (Link, error) {
tracefsID: tid,
group: group,
name: name,
typ: tracepointEvent,
progType: ebpf.TracePoint,
}
if err := pe.attach(prog); err != nil {

View File

@ -1,207 +0,0 @@
package link
import (
"debug/elf"
"errors"
"fmt"
"os"
"path/filepath"
"regexp"
"sync"
"github.com/cilium/ebpf"
"github.com/cilium/ebpf/internal"
)
var (
uprobeEventsPath = filepath.Join(tracefsPath, "uprobe_events")
// rgxUprobeSymbol is used to strip invalid characters from the uprobe symbol
// as they are not allowed to be used as the EVENT token in tracefs.
rgxUprobeSymbol = regexp.MustCompile("[^a-zA-Z0-9]+")
uprobeRetprobeBit = struct {
once sync.Once
value uint64
err error
}{}
)
// Executable defines an executable program on the filesystem.
type Executable struct {
// Path of the executable on the filesystem.
path string
// Parsed ELF symbols and dynamic symbols.
symbols map[string]elf.Symbol
}
// To open a new Executable, use:
//
// OpenExecutable("/bin/bash")
//
// The returned value can then be used to open Uprobe(s).
func OpenExecutable(path string) (*Executable, error) {
if path == "" {
return nil, fmt.Errorf("path cannot be empty")
}
f, err := os.Open(path)
if err != nil {
return nil, fmt.Errorf("open file '%s': %w", path, err)
}
defer f.Close()
se, err := internal.NewSafeELFFile(f)
if err != nil {
return nil, fmt.Errorf("parse ELF file: %w", err)
}
var ex = Executable{
path: path,
symbols: make(map[string]elf.Symbol),
}
if err := ex.addSymbols(se.Symbols); err != nil {
return nil, err
}
if err := ex.addSymbols(se.DynamicSymbols); err != nil {
return nil, err
}
return &ex, nil
}
func (ex *Executable) addSymbols(f func() ([]elf.Symbol, error)) error {
// elf.Symbols and elf.DynamicSymbols return ErrNoSymbols if the section is not found.
syms, err := f()
if err != nil && !errors.Is(err, elf.ErrNoSymbols) {
return err
}
for _, s := range syms {
ex.symbols[s.Name] = s
}
return nil
}
func (ex *Executable) symbol(symbol string) (*elf.Symbol, error) {
if s, ok := ex.symbols[symbol]; ok {
return &s, nil
}
return nil, fmt.Errorf("symbol %s not found", symbol)
}
// Uprobe attaches the given eBPF program to a perf event that fires when the
// given symbol starts executing in the given Executable.
// For example, /bin/bash::main():
//
// ex, _ = OpenExecutable("/bin/bash")
// ex.Uprobe("main", prog)
//
// The resulting Link must be Closed during program shutdown to avoid leaking
// system resources. Functions provided by shared libraries can currently not
// be traced and will result in an ErrNotSupported.
func (ex *Executable) Uprobe(symbol string, prog *ebpf.Program) (Link, error) {
u, err := ex.uprobe(symbol, prog, false)
if err != nil {
return nil, err
}
err = u.attach(prog)
if err != nil {
u.Close()
return nil, err
}
return u, nil
}
// Uretprobe attaches the given eBPF program to a perf event that fires right
// before the given symbol exits. For example, /bin/bash::main():
//
// ex, _ = OpenExecutable("/bin/bash")
// ex.Uretprobe("main", prog)
//
// The resulting Link must be Closed during program shutdown to avoid leaking
// system resources. Functions provided by shared libraries can currently not
// be traced and will result in an ErrNotSupported.
func (ex *Executable) Uretprobe(symbol string, prog *ebpf.Program) (Link, error) {
u, err := ex.uprobe(symbol, prog, true)
if err != nil {
return nil, err
}
err = u.attach(prog)
if err != nil {
u.Close()
return nil, err
}
return u, nil
}
// uprobe opens a perf event for the given binary/symbol and attaches prog to it.
// If ret is true, create a uretprobe.
func (ex *Executable) uprobe(symbol string, prog *ebpf.Program, ret bool) (*perfEvent, error) {
if prog == nil {
return nil, fmt.Errorf("prog cannot be nil: %w", errInvalidInput)
}
if prog.Type() != ebpf.Kprobe {
return nil, fmt.Errorf("eBPF program type %s is not Kprobe: %w", prog.Type(), errInvalidInput)
}
sym, err := ex.symbol(symbol)
if err != nil {
return nil, fmt.Errorf("symbol '%s' not found in '%s': %w", symbol, ex.path, err)
}
// Symbols with location 0 from section undef are shared library calls and
// are relocated before the binary is executed. Dynamic linking is not
// implemented by the library, so mark this as unsupported for now.
if sym.Section == elf.SHN_UNDEF && sym.Value == 0 {
return nil, fmt.Errorf("cannot resolve %s library call '%s': %w", ex.path, symbol, ErrNotSupported)
}
// Use uprobe PMU if the kernel has it available.
tp, err := pmuUprobe(sym.Name, ex.path, sym.Value, ret)
if err == nil {
return tp, nil
}
if err != nil && !errors.Is(err, ErrNotSupported) {
return nil, fmt.Errorf("creating perf_uprobe PMU: %w", err)
}
// Use tracefs if uprobe PMU is missing.
tp, err = tracefsUprobe(uprobeSanitizedSymbol(sym.Name), ex.path, sym.Value, ret)
if err != nil {
return nil, fmt.Errorf("creating trace event '%s:%s' in tracefs: %w", ex.path, symbol, err)
}
return tp, nil
}
// pmuUprobe opens a perf event based on the uprobe PMU.
func pmuUprobe(symbol, path string, offset uint64, ret bool) (*perfEvent, error) {
return pmuProbe(uprobeType, symbol, path, offset, ret)
}
// tracefsUprobe creates a Uprobe tracefs entry.
func tracefsUprobe(symbol, path string, offset uint64, ret bool) (*perfEvent, error) {
return tracefsProbe(uprobeType, symbol, path, offset, ret)
}
// uprobeSanitizedSymbol replaces every invalid characted for the tracefs api with an underscore.
func uprobeSanitizedSymbol(symbol string) string {
return rgxUprobeSymbol.ReplaceAllString(symbol, "_")
}
// uprobePathOffset creates the PATH:OFFSET token for the tracefs api.
func uprobePathOffset(path string, offset uint64) string {
return fmt.Sprintf("%s:%#x", path, offset)
}
func uretprobeBit() (uint64, error) {
uprobeRetprobeBit.once.Do(func() {
uprobeRetprobeBit.value, uprobeRetprobeBit.err = determineRetprobeBit(uprobeType)
})
return uprobeRetprobeBit.value, uprobeRetprobeBit.err
}

View File

@ -108,16 +108,12 @@ func fixupJumpsAndCalls(insns asm.Instructions) error {
offset := iter.Offset
ins := iter.Ins
if ins.Reference == "" {
continue
}
switch {
case ins.IsFunctionCall() && ins.Constant == -1:
// Rewrite bpf to bpf call
callOffset, ok := symbolOffsets[ins.Reference]
if !ok {
return fmt.Errorf("call at %d: reference to missing symbol %q", i, ins.Reference)
return fmt.Errorf("instruction %d: reference to missing symbol %q", i, ins.Reference)
}
ins.Constant = int64(callOffset - offset - 1)
@ -126,13 +122,10 @@ func fixupJumpsAndCalls(insns asm.Instructions) error {
// Rewrite jump to label
jumpOffset, ok := symbolOffsets[ins.Reference]
if !ok {
return fmt.Errorf("jump at %d: reference to missing symbol %q", i, ins.Reference)
return fmt.Errorf("instruction %d: reference to missing symbol %q", i, ins.Reference)
}
ins.Offset = int16(jumpOffset - offset - 1)
case ins.IsLoadFromMap() && ins.MapPtr() == -1:
return fmt.Errorf("map %s: %w", ins.Reference, errUnsatisfiedReference)
}
}

50
vendor/github.com/cilium/ebpf/map.go generated vendored
View File

@ -18,7 +18,6 @@ var (
ErrKeyNotExist = errors.New("key does not exist")
ErrKeyExist = errors.New("key already exists")
ErrIterationAborted = errors.New("iteration aborted")
ErrMapIncompatible = errors.New("map's spec is incompatible with pinned map")
)
// MapOptions control loading a map into the kernel.
@ -88,23 +87,6 @@ func (ms *MapSpec) Copy() *MapSpec {
return &cpy
}
func (ms *MapSpec) clampPerfEventArraySize() error {
if ms.Type != PerfEventArray {
return nil
}
n, err := internal.PossibleCPUs()
if err != nil {
return fmt.Errorf("perf event array: %w", err)
}
if n := uint32(n); ms.MaxEntries > n {
ms.MaxEntries = n
}
return nil
}
// MapKV is used to initialize the contents of a Map.
type MapKV struct {
Key interface{}
@ -114,19 +96,19 @@ type MapKV struct {
func (ms *MapSpec) checkCompatibility(m *Map) error {
switch {
case m.typ != ms.Type:
return fmt.Errorf("expected type %v, got %v: %w", ms.Type, m.typ, ErrMapIncompatible)
return fmt.Errorf("expected type %v, got %v", ms.Type, m.typ)
case m.keySize != ms.KeySize:
return fmt.Errorf("expected key size %v, got %v: %w", ms.KeySize, m.keySize, ErrMapIncompatible)
return fmt.Errorf("expected key size %v, got %v", ms.KeySize, m.keySize)
case m.valueSize != ms.ValueSize:
return fmt.Errorf("expected value size %v, got %v: %w", ms.ValueSize, m.valueSize, ErrMapIncompatible)
return fmt.Errorf("expected value size %v, got %v", ms.ValueSize, m.valueSize)
case m.maxEntries != ms.MaxEntries:
return fmt.Errorf("expected max entries %v, got %v: %w", ms.MaxEntries, m.maxEntries, ErrMapIncompatible)
return fmt.Errorf("expected max entries %v, got %v", ms.MaxEntries, m.maxEntries)
case m.flags != ms.Flags:
return fmt.Errorf("expected flags %v, got %v: %w", ms.Flags, m.flags, ErrMapIncompatible)
return fmt.Errorf("expected flags %v, got %v", ms.Flags, m.flags)
}
return nil
}
@ -189,16 +171,14 @@ func NewMap(spec *MapSpec) (*Map, error) {
// The caller is responsible for ensuring the process' rlimit is set
// sufficiently high for locking memory during map creation. This can be done
// by calling unix.Setrlimit with unix.RLIMIT_MEMLOCK prior to calling NewMapWithOptions.
//
// May return an error wrapping ErrMapIncompatible.
func NewMapWithOptions(spec *MapSpec, opts MapOptions) (*Map, error) {
handles := newHandleCache()
defer handles.close()
btfs := make(btfHandleCache)
defer btfs.close()
return newMapWithOptions(spec, opts, handles)
return newMapWithOptions(spec, opts, btfs)
}
func newMapWithOptions(spec *MapSpec, opts MapOptions, handles *handleCache) (_ *Map, err error) {
func newMapWithOptions(spec *MapSpec, opts MapOptions, btfs btfHandleCache) (_ *Map, err error) {
closeOnError := func(c io.Closer) {
if err != nil {
c.Close()
@ -222,7 +202,7 @@ func newMapWithOptions(spec *MapSpec, opts MapOptions, handles *handleCache) (_
defer closeOnError(m)
if err := spec.checkCompatibility(m); err != nil {
return nil, fmt.Errorf("use pinned map %s: %w", spec.Name, err)
return nil, fmt.Errorf("use pinned map %s: %s", spec.Name, err)
}
return m, nil
@ -231,7 +211,7 @@ func newMapWithOptions(spec *MapSpec, opts MapOptions, handles *handleCache) (_
// Nothing to do here
default:
return nil, fmt.Errorf("pin type %d: %w", int(spec.Pinning), ErrNotSupported)
return nil, fmt.Errorf("unsupported pin type %d", int(spec.Pinning))
}
var innerFd *internal.FD
@ -244,7 +224,7 @@ func newMapWithOptions(spec *MapSpec, opts MapOptions, handles *handleCache) (_
return nil, errors.New("inner maps cannot be pinned")
}
template, err := createMap(spec.InnerMap, nil, opts, handles)
template, err := createMap(spec.InnerMap, nil, opts, btfs)
if err != nil {
return nil, err
}
@ -253,7 +233,7 @@ func newMapWithOptions(spec *MapSpec, opts MapOptions, handles *handleCache) (_
innerFd = template.fd
}
m, err := createMap(spec, innerFd, opts, handles)
m, err := createMap(spec, innerFd, opts, btfs)
if err != nil {
return nil, err
}
@ -269,7 +249,7 @@ func newMapWithOptions(spec *MapSpec, opts MapOptions, handles *handleCache) (_
return m, nil
}
func createMap(spec *MapSpec, inner *internal.FD, opts MapOptions, handles *handleCache) (_ *Map, err error) {
func createMap(spec *MapSpec, inner *internal.FD, opts MapOptions, btfs btfHandleCache) (_ *Map, err error) {
closeOnError := func(closer io.Closer) {
if err != nil {
closer.Close()
@ -340,7 +320,7 @@ func createMap(spec *MapSpec, inner *internal.FD, opts MapOptions, handles *hand
var btfDisabled bool
if spec.BTF != nil {
handle, err := handles.btfHandle(btf.MapSpec(spec.BTF))
handle, err := btfs.load(btf.MapSpec(spec.BTF))
btfDisabled = errors.Is(err, btf.ErrNotSupported)
if err != nil && !btfDisabled {
return nil, fmt.Errorf("load BTF: %w", err)

149
vendor/github.com/cilium/ebpf/prog.go generated vendored
View File

@ -5,7 +5,6 @@ import (
"encoding/binary"
"errors"
"fmt"
"io"
"math"
"path/filepath"
"strings"
@ -20,8 +19,6 @@ import (
// ErrNotSupported is returned whenever the kernel doesn't support a feature.
var ErrNotSupported = internal.ErrNotSupported
var errUnsatisfiedReference = errors.New("unsatisfied reference")
// ProgramID represents the unique ID of an eBPF program.
type ProgramID uint32
@ -44,12 +41,6 @@ type ProgramOptions struct {
// Controls the output buffer size for the verifier. Defaults to
// DefaultVerifierLogSize.
LogSize int
// An ELF containing the target BTF for this program. It is used both to
// find the correct function to trace and to apply CO-RE relocations.
// This is useful in environments where the kernel BTF is not available
// (containers) or where it is in a non-standard location. Defaults to
// use the kernel BTF from a well-known location.
TargetBTF io.ReaderAt
}
// ProgramSpec defines a Program.
@ -134,21 +125,21 @@ func NewProgram(spec *ProgramSpec) (*Program, error) {
// Loading a program for the first time will perform
// feature detection by loading small, temporary programs.
func NewProgramWithOptions(spec *ProgramSpec, opts ProgramOptions) (*Program, error) {
handles := newHandleCache()
defer handles.close()
btfs := make(btfHandleCache)
defer btfs.close()
prog, err := newProgramWithOptions(spec, opts, handles)
if errors.Is(err, errUnsatisfiedReference) {
return nil, fmt.Errorf("cannot load program without loading its whole collection: %w", err)
}
return prog, err
return newProgramWithOptions(spec, opts, btfs)
}
func newProgramWithOptions(spec *ProgramSpec, opts ProgramOptions, handles *handleCache) (*Program, error) {
func newProgramWithOptions(spec *ProgramSpec, opts ProgramOptions, btfs btfHandleCache) (*Program, error) {
if len(spec.Instructions) == 0 {
return nil, errors.New("Instructions cannot be empty")
}
if len(spec.License) == 0 {
return nil, errors.New("License cannot be empty")
}
if spec.ByteOrder != nil && spec.ByteOrder != internal.NativeEndian {
return nil, fmt.Errorf("can't load %s program on %s", spec.ByteOrder, internal.NativeEndian)
}
@ -166,10 +157,27 @@ func newProgramWithOptions(spec *ProgramSpec, opts ProgramOptions, handles *hand
kv = v.Kernel()
}
insns := make(asm.Instructions, len(spec.Instructions))
copy(insns, spec.Instructions)
if err := fixupJumpsAndCalls(insns); err != nil {
return nil, err
}
buf := bytes.NewBuffer(make([]byte, 0, len(spec.Instructions)*asm.InstructionSize))
err := insns.Marshal(buf, internal.NativeEndian)
if err != nil {
return nil, err
}
bytecode := buf.Bytes()
insCount := uint32(len(bytecode) / asm.InstructionSize)
attr := &bpfProgLoadAttr{
progType: spec.Type,
progFlags: spec.Flags,
expectedAttachType: spec.AttachType,
insCount: insCount,
instructions: internal.NewSlicePointer(bytecode),
license: internal.NewStringPointer(spec.License),
kernelVersion: kv,
}
@ -178,24 +186,15 @@ func newProgramWithOptions(spec *ProgramSpec, opts ProgramOptions, handles *hand
attr.progName = newBPFObjName(spec.Name)
}
var err error
var targetBTF *btf.Spec
if opts.TargetBTF != nil {
targetBTF, err = handles.btfSpec(opts.TargetBTF)
if err != nil {
return nil, fmt.Errorf("load target BTF: %w", err)
}
}
var btfDisabled bool
var core btf.COREFixups
if spec.BTF != nil {
core, err = btf.ProgramFixups(spec.BTF, targetBTF)
if err != nil {
return nil, fmt.Errorf("CO-RE relocations: %w", err)
if relos, err := btf.ProgramRelocations(spec.BTF, nil); err != nil {
return nil, fmt.Errorf("CO-RE relocations: %s", err)
} else if len(relos) > 0 {
return nil, fmt.Errorf("applying CO-RE relocations: %w", ErrNotSupported)
}
handle, err := handles.btfHandle(btf.ProgramSpec(spec.BTF))
handle, err := btfs.load(btf.ProgramSpec(spec.BTF))
btfDisabled = errors.Is(err, btf.ErrNotSupported)
if err != nil && !btfDisabled {
return nil, fmt.Errorf("load BTF: %w", err)
@ -222,27 +221,8 @@ func newProgramWithOptions(spec *ProgramSpec, opts ProgramOptions, handles *hand
}
}
insns, err := core.Apply(spec.Instructions)
if err != nil {
return nil, fmt.Errorf("CO-RE fixup: %w", err)
}
if err := fixupJumpsAndCalls(insns); err != nil {
return nil, err
}
buf := bytes.NewBuffer(make([]byte, 0, len(spec.Instructions)*asm.InstructionSize))
err = insns.Marshal(buf, internal.NativeEndian)
if err != nil {
return nil, err
}
bytecode := buf.Bytes()
attr.instructions = internal.NewSlicePointer(bytecode)
attr.insCount = uint32(len(bytecode) / asm.InstructionSize)
if spec.AttachTo != "" {
target, err := resolveBTFType(targetBTF, spec.AttachTo, spec.Type, spec.AttachType)
target, err := resolveBTFType(spec.AttachTo, spec.Type, spec.AttachType)
if err != nil {
return nil, err
}
@ -270,7 +250,7 @@ func newProgramWithOptions(spec *ProgramSpec, opts ProgramOptions, handles *hand
}
logErr := err
if opts.LogLevel == 0 && opts.LogSize >= 0 {
if opts.LogLevel == 0 {
// Re-run with the verifier enabled to get better error messages.
logBuf = make([]byte, logSize)
attr.logLevel = 1
@ -684,45 +664,52 @@ func (p *Program) ID() (ProgramID, error) {
return ProgramID(info.id), nil
}
func resolveBTFType(kernel *btf.Spec, name string, progType ProgramType, attachType AttachType) (btf.Type, error) {
func findKernelType(name string, typ btf.Type) error {
kernel, err := btf.LoadKernelSpec()
if err != nil {
return fmt.Errorf("can't load kernel spec: %w", err)
}
return kernel.FindType(name, typ)
}
func resolveBTFType(name string, progType ProgramType, attachType AttachType) (btf.Type, error) {
type match struct {
p ProgramType
a AttachType
}
var target btf.Type
var typeName, featureName string
switch (match{progType, attachType}) {
target := match{progType, attachType}
switch target {
case match{LSM, AttachLSMMac}:
target = new(btf.Func)
typeName = "bpf_lsm_" + name
featureName = name + " LSM hook"
var target btf.Func
err := findKernelType("bpf_lsm_"+name, &target)
if errors.Is(err, btf.ErrNotFound) {
return nil, &internal.UnsupportedFeatureError{
Name: name + " LSM hook",
}
}
if err != nil {
return nil, fmt.Errorf("resolve BTF for LSM hook %s: %w", name, err)
}
return &target, nil
case match{Tracing, AttachTraceIter}:
target = new(btf.Func)
typeName = "bpf_iter_" + name
featureName = name + " iterator"
var target btf.Func
err := findKernelType("bpf_iter_"+name, &target)
if errors.Is(err, btf.ErrNotFound) {
return nil, &internal.UnsupportedFeatureError{
Name: name + " iterator",
}
}
if err != nil {
return nil, fmt.Errorf("resolve BTF for iterator %s: %w", name, err)
}
return &target, nil
default:
return nil, nil
}
if kernel == nil {
var err error
kernel, err = btf.LoadKernelSpec()
if err != nil {
return nil, fmt.Errorf("load kernel spec: %w", err)
}
}
err := kernel.FindType(typeName, target)
if errors.Is(err, btf.ErrNotFound) {
return nil, &internal.UnsupportedFeatureError{
Name: featureName,
}
}
if err != nil {
return nil, fmt.Errorf("resolve BTF for %s: %w", featureName, err)
}
return target, nil
}

View File

@ -1,95 +1,56 @@
#!/bin/bash
# Test the current package under a different kernel.
# Requires virtme and qemu to be installed.
# Examples:
# Run all tests on a 5.4 kernel
# $ ./run-tests.sh 5.4
# Run a subset of tests:
# $ ./run-tests.sh 5.4 go test ./link
set -euo pipefail
set -eu
set -o pipefail
script="$(realpath "$0")"
readonly script
# This script is a bit like a Matryoshka doll since it keeps re-executing itself
# in various different contexts:
#
# 1. invoked by the user like run-tests.sh 5.4
# 2. invoked by go test like run-tests.sh --exec-vm
# 3. invoked by init in the vm like run-tests.sh --exec-test
#
# This allows us to use all available CPU on the host machine to compile our
# code, and then only use the VM to execute the test. This is because the VM
# is usually slower at compiling than the host.
if [[ "${1:-}" = "--exec-vm" ]]; then
shift
input="$1"
shift
# Use sudo if /dev/kvm isn't accessible by the current user.
sudo=""
if [[ ! -r /dev/kvm || ! -w /dev/kvm ]]; then
sudo="sudo"
fi
readonly sudo
testdir="$(dirname "$1")"
output="$(mktemp -d)"
printf -v cmd "%q " "$@"
if [[ "$(stat -c '%t:%T' -L /proc/$$/fd/0)" == "1:3" ]]; then
# stdin is /dev/null, which doesn't play well with qemu. Use a fifo as a
# blocking substitute.
mkfifo "${output}/fake-stdin"
# Open for reading and writing to avoid blocking.
exec 0<> "${output}/fake-stdin"
rm "${output}/fake-stdin"
fi
$sudo virtme-run --kimg "${input}/bzImage" --memory 768M --pwd \
--rwdir="${testdir}=${testdir}" \
--rodir=/run/input="${input}" \
--rwdir=/run/output="${output}" \
--script-sh "PATH=\"$PATH\" \"$script\" --exec-test $cmd" \
--qemu-opts -smp 2 # need at least two CPUs for some tests
if [[ ! -e "${output}/success" ]]; then
exit 1
fi
$sudo rm -r "$output"
exit 0
elif [[ "${1:-}" = "--exec-test" ]]; then
if [[ "${1:-}" = "--in-vm" ]]; then
shift
mount -t bpf bpf /sys/fs/bpf
mount -t tracefs tracefs /sys/kernel/debug/tracing
export CGO_ENABLED=0
export GOFLAGS=-mod=readonly
export GOPATH=/run/go-path
export GOPROXY=file:///run/go-path/pkg/mod/cache/download
export GOSUMDB=off
export GOCACHE=/run/go-cache
if [[ -d "/run/input/bpf" ]]; then
export KERNEL_SELFTESTS="/run/input/bpf"
fi
dmesg -C
if ! "$@"; then
dmesg
exit 1
fi
touch "/run/output/success"
readonly output="${1}"
shift
echo Running tests...
go test -v -coverpkg=./... -coverprofile="$output/coverage.txt" -count 1 ./...
touch "$output/success"
exit 0
fi
# Pull all dependencies, so that we can run tests without the
# vm having network access.
go mod download
# Use sudo if /dev/kvm isn't accessible by the current user.
sudo=""
if [[ ! -r /dev/kvm || ! -w /dev/kvm ]]; then
sudo="sudo"
fi
readonly sudo
readonly kernel_version="${1:-}"
if [[ -z "${kernel_version}" ]]; then
echo "Expecting kernel version as first argument"
exit 1
fi
shift
readonly kernel="linux-${kernel_version}.bz"
readonly selftests="linux-${kernel_version}-selftests-bpf.bz"
readonly input="$(mktemp -d)"
readonly output="$(mktemp -d)"
readonly tmp_dir="${TMPDIR:-/tmp}"
readonly branch="${BRANCH:-master}"
@ -99,7 +60,6 @@ fetch() {
}
fetch "${kernel}"
cp "${tmp_dir}/${kernel}" "${input}/bzImage"
if fetch "${selftests}"; then
mkdir "${input}/bpf"
@ -108,16 +68,25 @@ else
echo "No selftests found, disabling"
fi
args=(-v -short -coverpkg=./... -coverprofile=coverage.out -count 1 ./...)
if (( $# > 0 )); then
args=("$@")
echo Testing on "${kernel_version}"
$sudo virtme-run --kimg "${tmp_dir}/${kernel}" --memory 512M --pwd \
--rw \
--rwdir=/run/input="${input}" \
--rwdir=/run/output="${output}" \
--rodir=/run/go-path="$(go env GOPATH)" \
--rwdir=/run/go-cache="$(go env GOCACHE)" \
--script-sh "PATH=\"$PATH\" $(realpath "$0") --in-vm /run/output" \
--qemu-opts -smp 2 # need at least two CPUs for some tests
if [[ ! -e "${output}/success" ]]; then
echo "Test failed on ${kernel_version}"
exit 1
else
echo "Test successful on ${kernel_version}"
if [[ -v COVERALLS_TOKEN ]]; then
goveralls -coverprofile="${output}/coverage.txt" -service=semaphore -repotoken "$COVERALLS_TOKEN"
fi
fi
export GOFLAGS=-mod=readonly
export CGO_ENABLED=0
echo Testing on "${kernel_version}"
go test -exec "$script --exec-vm $input" "${args[@]}"
echo "Test successful on ${kernel_version}"
rm -r "${input}"
$sudo rm -r "${input}"
$sudo rm -r "${output}"

View File

@ -1,16 +0,0 @@
package apparmor
import "errors"
var (
// IsEnabled returns true if apparmor is enabled for the host.
IsEnabled = isEnabled
// ApplyProfile will apply the profile with the specified name to the process after
// the next exec. It is only supported on Linux and produces an ErrApparmorNotEnabled
// on other platforms.
ApplyProfile = applyProfile
// ErrApparmorNotEnabled indicates that AppArmor is not enabled or not supported.
ErrApparmorNotEnabled = errors.New("apparmor: config provided but apparmor not supported")
)

View File

@ -15,8 +15,8 @@ var (
checkAppArmor sync.Once
)
// isEnabled returns true if apparmor is enabled for the host.
func isEnabled() bool {
// IsEnabled returns true if apparmor is enabled for the host.
func IsEnabled() bool {
checkAppArmor.Do(func() {
if _, err := os.Stat("/sys/kernel/security/apparmor"); err == nil {
buf, err := ioutil.ReadFile("/sys/module/apparmor/parameters/enabled")
@ -57,10 +57,9 @@ func changeOnExec(name string) error {
return nil
}
// applyProfile will apply the profile with the specified name to the process after
// the next exec. It is only supported on Linux and produces an error on other
// platforms.
func applyProfile(name string) error {
// ApplyProfile will apply the profile with the specified name to the process after
// the next exec.
func ApplyProfile(name string) error {
if name == "" {
return nil
}

View File

@ -2,11 +2,17 @@
package apparmor
func isEnabled() bool {
import (
"errors"
)
var ErrApparmorNotEnabled = errors.New("apparmor: config provided but apparmor not supported")
func IsEnabled() bool {
return false
}
func applyProfile(name string) error {
func ApplyProfile(name string) error {
if name != "" {
return ErrApparmorNotEnabled
}

View File

@ -258,9 +258,9 @@ func (e *Emulator) Apply(rule devices.Rule) error {
if rule.Allow {
return e.allow(innerRule)
} else {
return e.deny(innerRule)
}
return e.deny(innerRule)
}
// EmulatorFromList takes a reader to a "devices.list"-like source, and returns
@ -371,12 +371,3 @@ func (source *Emulator) Transition(target *Emulator) ([]*devices.Rule, error) {
}
return transitionRules, nil
}
// Rules returns the minimum set of rules necessary to convert a *deny-all*
// cgroup to the emulated filter state (note that this is not the same as a
// default cgroupv1 cgroup -- which is allow-all). This is effectively just a
// wrapper around Transition() with the source emulator being an empty cgroup.
func (e *Emulator) Rules() ([]*devices.Rule, error) {
defaultCgroup := &Emulator{defaultAllow: false}
return defaultCgroup.Transition(e)
}

View File

@ -11,7 +11,6 @@ import (
"strconv"
"github.com/cilium/ebpf/asm"
devicesemulator "github.com/opencontainers/runc/libcontainer/cgroups/devices"
"github.com/opencontainers/runc/libcontainer/devices"
"github.com/pkg/errors"
"golang.org/x/sys/unix"
@ -23,44 +22,11 @@ const (
)
// DeviceFilter returns eBPF device filter program and its license string
func DeviceFilter(rules []*devices.Rule) (asm.Instructions, string, error) {
// Generate the minimum ruleset for the device rules we are given. While we
// don't care about minimum transitions in cgroupv2, using the emulator
// gives us a guarantee that the behaviour of devices filtering is the same
// as cgroupv1, including security hardenings to avoid misconfiguration
// (such as punching holes in wildcard rules).
emu := new(devicesemulator.Emulator)
for _, rule := range rules {
if err := emu.Apply(*rule); err != nil {
return nil, "", err
}
}
cleanRules, err := emu.Rules()
if err != nil {
return nil, "", err
}
p := &program{
defaultAllow: emu.IsBlacklist(),
}
func DeviceFilter(devices []*devices.Rule) (asm.Instructions, string, error) {
p := &program{}
p.init()
for idx, rule := range cleanRules {
if rule.Type == devices.WildcardDevice {
// We can safely skip over wildcard entries because there should
// only be one (at most) at the very start to instruct cgroupv1 to
// go into allow-list mode. However we do double-check this here.
if idx != 0 || rule.Allow != emu.IsBlacklist() {
return nil, "", errors.Errorf("[internal error] emulated cgroupv2 devices ruleset had bad wildcard at idx %v (%s)", idx, rule.CgroupString())
}
continue
}
if rule.Allow == p.defaultAllow {
// There should be no rules which have an action equal to the
// default action, the emulator removes those.
return nil, "", errors.Errorf("[internal error] emulated cgroupv2 devices ruleset had no-op rule at idx %v (%s)", idx, rule.CgroupString())
}
if err := p.appendRule(rule); err != nil {
for i := len(devices) - 1; i >= 0; i-- {
if err := p.appendDevice(devices[i]); err != nil {
return nil, "", err
}
}
@ -69,9 +35,9 @@ func DeviceFilter(rules []*devices.Rule) (asm.Instructions, string, error) {
}
type program struct {
insts asm.Instructions
defaultAllow bool
blockID int
insts asm.Instructions
hasWildCard bool
blockID int
}
func (p *program) init() {
@ -101,35 +67,39 @@ func (p *program) init() {
asm.LoadMem(asm.R5, asm.R1, 8, asm.Word))
}
// appendRule rule converts an OCI rule to the relevant eBPF block and adds it
// to the in-progress filter program. In order to operate properly, it must be
// called with a "clean" rule list (generated by devices.Emulator.Rules() --
// with any "a" rules removed).
func (p *program) appendRule(rule *devices.Rule) error {
// appendDevice needs to be called from the last element of OCI linux.resources.devices to the head element.
func (p *program) appendDevice(dev *devices.Rule) error {
if p.blockID < 0 {
return errors.New("the program is finalized")
}
if p.hasWildCard {
// All entries after wildcard entry are ignored
return nil
}
var bpfType int32
switch rule.Type {
case devices.CharDevice:
bpfType := int32(-1)
hasType := true
switch dev.Type {
case 'c':
bpfType = int32(unix.BPF_DEVCG_DEV_CHAR)
case devices.BlockDevice:
case 'b':
bpfType = int32(unix.BPF_DEVCG_DEV_BLOCK)
case 'a':
hasType = false
default:
// We do not permit 'a', nor any other types we don't know about.
return errors.Errorf("invalid type %q", string(rule.Type))
// if not specified in OCI json, typ is set to DeviceTypeAll
return errors.Errorf("invalid Type %q", string(dev.Type))
}
if rule.Major > math.MaxUint32 {
return errors.Errorf("invalid major %d", rule.Major)
if dev.Major > math.MaxUint32 {
return errors.Errorf("invalid major %d", dev.Major)
}
if rule.Minor > math.MaxUint32 {
return errors.Errorf("invalid minor %d", rule.Major)
if dev.Minor > math.MaxUint32 {
return errors.Errorf("invalid minor %d", dev.Major)
}
hasMajor := rule.Major >= 0 // if not specified in OCI json, major is set to -1
hasMinor := rule.Minor >= 0
hasMajor := dev.Major >= 0 // if not specified in OCI json, major is set to -1
hasMinor := dev.Minor >= 0
bpfAccess := int32(0)
for _, r := range rule.Permissions {
for _, r := range dev.Permissions {
switch r {
case 'r':
bpfAccess |= unix.BPF_DEVCG_ACC_READ
@ -149,10 +119,12 @@ func (p *program) appendRule(rule *devices.Rule) error {
nextBlockSym = "block-" + strconv.Itoa(p.blockID+1)
prevBlockLastIdx = len(p.insts) - 1
)
p.insts = append(p.insts,
// if (R2 != bpfType) goto next
asm.JNE.Imm(asm.R2, bpfType, nextBlockSym),
)
if hasType {
p.insts = append(p.insts,
// if (R2 != bpfType) goto next
asm.JNE.Imm(asm.R2, bpfType, nextBlockSym),
)
}
if hasAccess {
p.insts = append(p.insts,
// if (R3 & bpfAccess != R3 /* use R1 as a temp var */) goto next
@ -164,16 +136,19 @@ func (p *program) appendRule(rule *devices.Rule) error {
if hasMajor {
p.insts = append(p.insts,
// if (R4 != major) goto next
asm.JNE.Imm(asm.R4, int32(rule.Major), nextBlockSym),
asm.JNE.Imm(asm.R4, int32(dev.Major), nextBlockSym),
)
}
if hasMinor {
p.insts = append(p.insts,
// if (R5 != minor) goto next
asm.JNE.Imm(asm.R5, int32(rule.Minor), nextBlockSym),
asm.JNE.Imm(asm.R5, int32(dev.Minor), nextBlockSym),
)
}
p.insts = append(p.insts, acceptBlock(rule.Allow)...)
if !hasType && !hasAccess && !hasMajor && !hasMinor {
p.hasWildCard = true
}
p.insts = append(p.insts, acceptBlock(dev.Allow)...)
// set blockSym to the first instruction we added in this iteration
p.insts[prevBlockLastIdx+1] = p.insts[prevBlockLastIdx+1].Sym(blockSym)
p.blockID++
@ -181,14 +156,14 @@ func (p *program) appendRule(rule *devices.Rule) error {
}
func (p *program) finalize() (asm.Instructions, error) {
var v int32
if p.defaultAllow {
v = 1
if p.hasWildCard {
// acceptBlock with asm.Return() is already inserted
return p.insts, nil
}
blockSym := "block-" + strconv.Itoa(p.blockID)
p.insts = append(p.insts,
// R0 <- v
asm.Mov.Imm32(asm.R0, v).Sym(blockSym),
// R0 <- 0
asm.Mov.Imm32(asm.R0, 0).Sym(blockSym),
asm.Return(),
)
p.blockID = -1
@ -196,7 +171,7 @@ func (p *program) finalize() (asm.Instructions, error) {
}
func acceptBlock(accept bool) asm.Instructions {
var v int32
v := int32(0)
if accept {
v = 1
}

View File

@ -0,0 +1,57 @@
package ebpf
import (
"github.com/cilium/ebpf"
"github.com/cilium/ebpf/asm"
"github.com/cilium/ebpf/link"
"github.com/pkg/errors"
"golang.org/x/sys/unix"
)
// LoadAttachCgroupDeviceFilter installs eBPF device filter program to /sys/fs/cgroup/<foo> directory.
//
// Requires the system to be running in cgroup2 unified-mode with kernel >= 4.15 .
//
// https://github.com/torvalds/linux/commit/ebc614f687369f9df99828572b1d85a7c2de3d92
func LoadAttachCgroupDeviceFilter(insts asm.Instructions, license string, dirFD int) (func() error, error) {
nilCloser := func() error {
return nil
}
// Increase `ulimit -l` limit to avoid BPF_PROG_LOAD error (#2167).
// This limit is not inherited into the container.
memlockLimit := &unix.Rlimit{
Cur: unix.RLIM_INFINITY,
Max: unix.RLIM_INFINITY,
}
_ = unix.Setrlimit(unix.RLIMIT_MEMLOCK, memlockLimit)
spec := &ebpf.ProgramSpec{
Type: ebpf.CGroupDevice,
Instructions: insts,
License: license,
}
prog, err := ebpf.NewProgram(spec)
if err != nil {
return nilCloser, err
}
err = link.RawAttachProgram(link.RawAttachProgramOptions{
Target: dirFD,
Program: prog,
Attach: ebpf.AttachCGroupDevice,
Flags: unix.BPF_F_ALLOW_MULTI,
})
if err != nil {
return nilCloser, errors.Wrap(err, "failed to call BPF_PROG_ATTACH (BPF_CGROUP_DEVICE, BPF_F_ALLOW_MULTI)")
}
closer := func() error {
err = link.RawDetachProgram(link.RawDetachProgramOptions{
Target: dirFD,
Program: prog,
Attach: ebpf.AttachCGroupDevice,
})
if err != nil {
return errors.Wrap(err, "failed to call BPF_PROG_DETACH (BPF_CGROUP_DEVICE)")
}
return nil
}
return closer, nil
}

View File

@ -1,240 +0,0 @@
package ebpf
import (
"fmt"
"os"
"runtime"
"sync"
"unsafe"
"github.com/cilium/ebpf"
"github.com/cilium/ebpf/asm"
"github.com/cilium/ebpf/link"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
"golang.org/x/sys/unix"
)
func nilCloser() error {
return nil
}
func findAttachedCgroupDeviceFilters(dirFd int) ([]*ebpf.Program, error) {
type bpfAttrQuery struct {
TargetFd uint32
AttachType uint32
QueryType uint32
AttachFlags uint32
ProgIds uint64 // __aligned_u64
ProgCnt uint32
}
// Currently you can only have 64 eBPF programs attached to a cgroup.
size := 64
retries := 0
for retries < 10 {
progIds := make([]uint32, size)
query := bpfAttrQuery{
TargetFd: uint32(dirFd),
AttachType: uint32(unix.BPF_CGROUP_DEVICE),
ProgIds: uint64(uintptr(unsafe.Pointer(&progIds[0]))),
ProgCnt: uint32(len(progIds)),
}
// Fetch the list of program ids.
_, _, errno := unix.Syscall(unix.SYS_BPF,
uintptr(unix.BPF_PROG_QUERY),
uintptr(unsafe.Pointer(&query)),
unsafe.Sizeof(query))
size = int(query.ProgCnt)
runtime.KeepAlive(query)
if errno != 0 {
// On ENOSPC we get the correct number of programs.
if errno == unix.ENOSPC {
retries++
continue
}
return nil, fmt.Errorf("bpf_prog_query(BPF_CGROUP_DEVICE) failed: %w", errno)
}
// Convert the ids to program handles.
progIds = progIds[:size]
programs := make([]*ebpf.Program, len(progIds))
for idx, progId := range progIds {
program, err := ebpf.NewProgramFromID(ebpf.ProgramID(progId))
if err != nil {
return nil, fmt.Errorf("cannot fetch program from id: %w", err)
}
programs[idx] = program
}
runtime.KeepAlive(progIds)
return programs, nil
}
return nil, errors.New("could not get complete list of CGROUP_DEVICE programs")
}
var (
haveBpfProgReplaceBool bool
haveBpfProgReplaceOnce sync.Once
)
// Loosely based on the BPF_F_REPLACE support check in
// <https://github.com/cilium/ebpf/blob/v0.6.0/link/syscalls.go>.
//
// TODO: move this logic to cilium/ebpf
func haveBpfProgReplace() bool {
haveBpfProgReplaceOnce.Do(func() {
prog, err := ebpf.NewProgram(&ebpf.ProgramSpec{
Type: ebpf.CGroupDevice,
License: "MIT",
Instructions: asm.Instructions{
asm.Mov.Imm(asm.R0, 0),
asm.Return(),
},
})
if err != nil {
logrus.Debugf("checking for BPF_F_REPLACE support: ebpf.NewProgram failed: %v", err)
return
}
defer prog.Close()
devnull, err := os.Open("/dev/null")
if err != nil {
logrus.Debugf("checking for BPF_F_REPLACE support: open dummy target fd: %v", err)
return
}
defer devnull.Close()
// We know that we have BPF_PROG_ATTACH since we can load
// BPF_CGROUP_DEVICE programs. If passing BPF_F_REPLACE gives us EINVAL
// we know that the feature isn't present.
err = link.RawAttachProgram(link.RawAttachProgramOptions{
// We rely on this fd being checked after attachFlags.
Target: int(devnull.Fd()),
// Attempt to "replace" bad fds with this program.
Program: prog,
Attach: ebpf.AttachCGroupDevice,
Flags: unix.BPF_F_ALLOW_MULTI | unix.BPF_F_REPLACE,
})
if errors.Is(err, unix.EINVAL) {
// not supported
return
}
// attach_flags test succeded.
if !errors.Is(err, unix.EBADF) {
logrus.Debugf("checking for BPF_F_REPLACE: got unexpected (not EBADF or EINVAL) error: %v", err)
}
haveBpfProgReplaceBool = true
})
return haveBpfProgReplaceBool
}
// LoadAttachCgroupDeviceFilter installs eBPF device filter program to /sys/fs/cgroup/<foo> directory.
//
// Requires the system to be running in cgroup2 unified-mode with kernel >= 4.15 .
//
// https://github.com/torvalds/linux/commit/ebc614f687369f9df99828572b1d85a7c2de3d92
func LoadAttachCgroupDeviceFilter(insts asm.Instructions, license string, dirFd int) (func() error, error) {
// Increase `ulimit -l` limit to avoid BPF_PROG_LOAD error (#2167).
// This limit is not inherited into the container.
memlockLimit := &unix.Rlimit{
Cur: unix.RLIM_INFINITY,
Max: unix.RLIM_INFINITY,
}
_ = unix.Setrlimit(unix.RLIMIT_MEMLOCK, memlockLimit)
// Get the list of existing programs.
oldProgs, err := findAttachedCgroupDeviceFilters(dirFd)
if err != nil {
return nilCloser, err
}
useReplaceProg := haveBpfProgReplace() && len(oldProgs) == 1
// Generate new program.
spec := &ebpf.ProgramSpec{
Type: ebpf.CGroupDevice,
Instructions: insts,
License: license,
}
prog, err := ebpf.NewProgram(spec)
if err != nil {
return nilCloser, err
}
// If there is only one old program, we can just replace it directly.
var (
replaceProg *ebpf.Program
attachFlags uint32 = unix.BPF_F_ALLOW_MULTI
)
if useReplaceProg {
replaceProg = oldProgs[0]
attachFlags |= unix.BPF_F_REPLACE
}
err = link.RawAttachProgram(link.RawAttachProgramOptions{
Target: dirFd,
Program: prog,
Replace: replaceProg,
Attach: ebpf.AttachCGroupDevice,
Flags: attachFlags,
})
if err != nil {
return nilCloser, fmt.Errorf("failed to call BPF_PROG_ATTACH (BPF_CGROUP_DEVICE, BPF_F_ALLOW_MULTI): %w", err)
}
closer := func() error {
err = link.RawDetachProgram(link.RawDetachProgramOptions{
Target: dirFd,
Program: prog,
Attach: ebpf.AttachCGroupDevice,
})
if err != nil {
return fmt.Errorf("failed to call BPF_PROG_DETACH (BPF_CGROUP_DEVICE): %w", err)
}
// TODO: Should we attach the old filters back in this case? Otherwise
// we fail-open on a security feature, which is a bit scary.
return nil
}
if !useReplaceProg {
logLevel := logrus.DebugLevel
// If there was more than one old program, give a warning (since this
// really shouldn't happen with runc-managed cgroups) and then detach
// all the old programs.
if len(oldProgs) > 1 {
// NOTE: Ideally this should be a warning but it turns out that
// systemd-managed cgroups trigger this warning (apparently
// systemd doesn't delete old non-systemd programs when
// setting properties).
logrus.Infof("found more than one filter (%d) attached to a cgroup -- removing extra filters!", len(oldProgs))
logLevel = logrus.InfoLevel
}
for idx, oldProg := range oldProgs {
// Output some extra debug info.
if info, err := oldProg.Info(); err == nil {
fields := logrus.Fields{
"type": info.Type.String(),
"tag": info.Tag,
"name": info.Name,
}
if id, ok := info.ID(); ok {
fields["id"] = id
}
if runCount, ok := info.RunCount(); ok {
fields["run_count"] = runCount
}
if runtime, ok := info.Runtime(); ok {
fields["runtime"] = runtime.String()
}
logrus.WithFields(fields).Logf(logLevel, "removing old filter %d from cgroup", idx)
}
err = link.RawDetachProgram(link.RawDetachProgramOptions{
Target: dirFd,
Program: oldProg,
Attach: ebpf.AttachCGroupDevice,
})
if err != nil {
return closer, fmt.Errorf("failed to call BPF_PROG_DETACH (BPF_CGROUP_DEVICE) on old filter program: %w", err)
}
}
}
return closer, nil
}

View File

@ -6,17 +6,15 @@ import (
"bufio"
"fmt"
"os"
"path/filepath"
"strconv"
"strings"
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
"github.com/opencontainers/runc/libcontainer/configs"
)
type BlkioGroup struct {
weightFilename string
weightDeviceFilename string
}
func (s *BlkioGroup) Name() string {
@ -28,47 +26,42 @@ func (s *BlkioGroup) Apply(path string, d *cgroupData) error {
}
func (s *BlkioGroup) Set(path string, r *configs.Resources) error {
s.detectWeightFilenames(path)
if r.BlkioWeight != 0 {
if err := cgroups.WriteFile(path, s.weightFilename, strconv.FormatUint(uint64(r.BlkioWeight), 10)); err != nil {
if err := fscommon.WriteFile(path, "blkio.weight", strconv.FormatUint(uint64(r.BlkioWeight), 10)); err != nil {
return err
}
}
if r.BlkioLeafWeight != 0 {
if err := cgroups.WriteFile(path, "blkio.leaf_weight", strconv.FormatUint(uint64(r.BlkioLeafWeight), 10)); err != nil {
if err := fscommon.WriteFile(path, "blkio.leaf_weight", strconv.FormatUint(uint64(r.BlkioLeafWeight), 10)); err != nil {
return err
}
}
for _, wd := range r.BlkioWeightDevice {
if wd.Weight != 0 {
if err := cgroups.WriteFile(path, s.weightDeviceFilename, wd.WeightString()); err != nil {
return err
}
if err := fscommon.WriteFile(path, "blkio.weight_device", wd.WeightString()); err != nil {
return err
}
if wd.LeafWeight != 0 {
if err := cgroups.WriteFile(path, "blkio.leaf_weight_device", wd.LeafWeightString()); err != nil {
return err
}
if err := fscommon.WriteFile(path, "blkio.leaf_weight_device", wd.LeafWeightString()); err != nil {
return err
}
}
for _, td := range r.BlkioThrottleReadBpsDevice {
if err := cgroups.WriteFile(path, "blkio.throttle.read_bps_device", td.String()); err != nil {
if err := fscommon.WriteFile(path, "blkio.throttle.read_bps_device", td.String()); err != nil {
return err
}
}
for _, td := range r.BlkioThrottleWriteBpsDevice {
if err := cgroups.WriteFile(path, "blkio.throttle.write_bps_device", td.String()); err != nil {
if err := fscommon.WriteFile(path, "blkio.throttle.write_bps_device", td.String()); err != nil {
return err
}
}
for _, td := range r.BlkioThrottleReadIOPSDevice {
if err := cgroups.WriteFile(path, "blkio.throttle.read_iops_device", td.String()); err != nil {
if err := fscommon.WriteFile(path, "blkio.throttle.read_iops_device", td.String()); err != nil {
return err
}
}
for _, td := range r.BlkioThrottleWriteIOPSDevice {
if err := cgroups.WriteFile(path, "blkio.throttle.write_iops_device", td.String()); err != nil {
if err := fscommon.WriteFile(path, "blkio.throttle.write_iops_device", td.String()); err != nil {
return err
}
}
@ -113,7 +106,7 @@ func splitBlkioStatLine(r rune) bool {
func getBlkioStat(dir, file string) ([]cgroups.BlkioStatEntry, error) {
var blkioStats []cgroups.BlkioStatEntry
f, err := cgroups.OpenFile(dir, file, os.O_RDONLY)
f, err := fscommon.OpenFile(dir, file, os.O_RDONLY)
if err != nil {
if os.IsNotExist(err) {
return blkioStats, nil
@ -168,7 +161,7 @@ func (s *BlkioGroup) GetStats(path string, stats *cgroups.Stats) error {
filename string
blkioStatEntriesPtr *[]cgroups.BlkioStatEntry
}
bfqDebugStats := []blkioStatInfo{
var bfqDebugStats = []blkioStatInfo{
{
filename: "blkio.bfq.sectors_recursive",
blkioStatEntriesPtr: &stats.BlkioStats.SectorsRecursive,
@ -202,7 +195,7 @@ func (s *BlkioGroup) GetStats(path string, stats *cgroups.Stats) error {
blkioStatEntriesPtr: &stats.BlkioStats.IoServiceBytesRecursive,
},
}
bfqStats := []blkioStatInfo{
var bfqStats = []blkioStatInfo{
{
filename: "blkio.bfq.io_serviced_recursive",
blkioStatEntriesPtr: &stats.BlkioStats.IoServicedRecursive,
@ -212,7 +205,7 @@ func (s *BlkioGroup) GetStats(path string, stats *cgroups.Stats) error {
blkioStatEntriesPtr: &stats.BlkioStats.IoServiceBytesRecursive,
},
}
cfqStats := []blkioStatInfo{
var cfqStats = []blkioStatInfo{
{
filename: "blkio.sectors_recursive",
blkioStatEntriesPtr: &stats.BlkioStats.SectorsRecursive,
@ -246,7 +239,7 @@ func (s *BlkioGroup) GetStats(path string, stats *cgroups.Stats) error {
blkioStatEntriesPtr: &stats.BlkioStats.IoServiceBytesRecursive,
},
}
throttleRecursiveStats := []blkioStatInfo{
var throttleRecursiveStats = []blkioStatInfo{
{
filename: "blkio.throttle.io_serviced_recursive",
blkioStatEntriesPtr: &stats.BlkioStats.IoServicedRecursive,
@ -256,7 +249,7 @@ func (s *BlkioGroup) GetStats(path string, stats *cgroups.Stats) error {
blkioStatEntriesPtr: &stats.BlkioStats.IoServiceBytesRecursive,
},
}
baseStats := []blkioStatInfo{
var baseStats = []blkioStatInfo{
{
filename: "blkio.throttle.io_serviced",
blkioStatEntriesPtr: &stats.BlkioStats.IoServicedRecursive,
@ -266,7 +259,7 @@ func (s *BlkioGroup) GetStats(path string, stats *cgroups.Stats) error {
blkioStatEntriesPtr: &stats.BlkioStats.IoServiceBytesRecursive,
},
}
orderedStats := [][]blkioStatInfo{
var orderedStats = [][]blkioStatInfo{
bfqDebugStats,
bfqStats,
cfqStats,
@ -287,7 +280,7 @@ func (s *BlkioGroup) GetStats(path string, stats *cgroups.Stats) error {
return err
}
*statInfo.blkioStatEntriesPtr = blkioStats
// finish if all stats are gathered
//finish if all stats are gathered
if i == len(statGroup)-1 {
return nil
}
@ -295,17 +288,3 @@ func (s *BlkioGroup) GetStats(path string, stats *cgroups.Stats) error {
}
return nil
}
func (s *BlkioGroup) detectWeightFilenames(path string) {
if s.weightFilename != "" {
// Already detected.
return
}
if cgroups.PathExists(filepath.Join(path, "blkio.weight")) {
s.weightFilename = "blkio.weight"
s.weightDeviceFilename = "blkio.weight_device"
} else {
s.weightFilename = "blkio.bfq.weight"
s.weightDeviceFilename = "blkio.bfq.weight_device"
}
}

View File

@ -13,7 +13,8 @@ import (
"github.com/opencontainers/runc/libcontainer/configs"
)
type CpuGroup struct{}
type CpuGroup struct {
}
func (s *CpuGroup) Name() string {
return "cpu"
@ -25,7 +26,7 @@ func (s *CpuGroup) Apply(path string, d *cgroupData) error {
if path == "" {
return nil
}
if err := os.MkdirAll(path, 0o755); err != nil {
if err := os.MkdirAll(path, 0755); err != nil {
return err
}
// We should set the real-Time group scheduling settings before moving
@ -41,12 +42,12 @@ func (s *CpuGroup) Apply(path string, d *cgroupData) error {
func (s *CpuGroup) SetRtSched(path string, r *configs.Resources) error {
if r.CpuRtPeriod != 0 {
if err := cgroups.WriteFile(path, "cpu.rt_period_us", strconv.FormatUint(r.CpuRtPeriod, 10)); err != nil {
if err := fscommon.WriteFile(path, "cpu.rt_period_us", strconv.FormatUint(r.CpuRtPeriod, 10)); err != nil {
return err
}
}
if r.CpuRtRuntime != 0 {
if err := cgroups.WriteFile(path, "cpu.rt_runtime_us", strconv.FormatInt(r.CpuRtRuntime, 10)); err != nil {
if err := fscommon.WriteFile(path, "cpu.rt_runtime_us", strconv.FormatInt(r.CpuRtRuntime, 10)); err != nil {
return err
}
}
@ -56,7 +57,7 @@ func (s *CpuGroup) SetRtSched(path string, r *configs.Resources) error {
func (s *CpuGroup) Set(path string, r *configs.Resources) error {
if r.CpuShares != 0 {
shares := r.CpuShares
if err := cgroups.WriteFile(path, "cpu.shares", strconv.FormatUint(shares, 10)); err != nil {
if err := fscommon.WriteFile(path, "cpu.shares", strconv.FormatUint(shares, 10)); err != nil {
return err
}
// read it back
@ -72,12 +73,12 @@ func (s *CpuGroup) Set(path string, r *configs.Resources) error {
}
}
if r.CpuPeriod != 0 {
if err := cgroups.WriteFile(path, "cpu.cfs_period_us", strconv.FormatUint(r.CpuPeriod, 10)); err != nil {
if err := fscommon.WriteFile(path, "cpu.cfs_period_us", strconv.FormatUint(r.CpuPeriod, 10)); err != nil {
return err
}
}
if r.CpuQuota != 0 {
if err := cgroups.WriteFile(path, "cpu.cfs_quota_us", strconv.FormatInt(r.CpuQuota, 10)); err != nil {
if err := fscommon.WriteFile(path, "cpu.cfs_quota_us", strconv.FormatInt(r.CpuQuota, 10)); err != nil {
return err
}
}
@ -85,7 +86,7 @@ func (s *CpuGroup) Set(path string, r *configs.Resources) error {
}
func (s *CpuGroup) GetStats(path string, stats *cgroups.Stats) error {
f, err := cgroups.OpenFile(path, "cpu.stat", os.O_RDONLY)
f, err := fscommon.OpenFile(path, "cpu.stat", os.O_RDONLY)
if err != nil {
if os.IsNotExist(err) {
return nil

View File

@ -32,7 +32,8 @@ const (
clockTicks uint64 = 100
)
type CpuacctGroup struct{}
type CpuacctGroup struct {
}
func (s *CpuacctGroup) Name() string {
return "cpuacct"
@ -90,7 +91,7 @@ func getCpuUsageBreakdown(path string) (uint64, uint64, error) {
// Expected format:
// user <usage in ticks>
// system <usage in ticks>
data, err := cgroups.ReadFile(path, cgroupCpuacctStat)
data, err := fscommon.ReadFile(path, cgroupCpuacctStat)
if err != nil {
return 0, 0, err
}
@ -116,7 +117,7 @@ func getCpuUsageBreakdown(path string) (uint64, uint64, error) {
func getPercpuUsage(path string) ([]uint64, error) {
percpuUsage := []uint64{}
data, err := cgroups.ReadFile(path, "cpuacct.usage_percpu")
data, err := fscommon.ReadFile(path, "cpuacct.usage_percpu")
if err != nil {
return percpuUsage, err
}
@ -134,7 +135,7 @@ func getPercpuUsageInModes(path string) ([]uint64, []uint64, error) {
usageKernelMode := []uint64{}
usageUserMode := []uint64{}
file, err := cgroups.OpenFile(path, cgroupCpuacctUsageAll, os.O_RDONLY)
file, err := fscommon.OpenFile(path, cgroupCpuacctUsageAll, os.O_RDONLY)
if os.IsNotExist(err) {
return usageKernelMode, usageUserMode, nil
} else if err != nil {
@ -143,7 +144,7 @@ func getPercpuUsageInModes(path string) ([]uint64, []uint64, error) {
defer file.Close()
scanner := bufio.NewScanner(file)
scanner.Scan() // skipping header line
scanner.Scan() //skipping header line
for scanner.Scan() {
lineFields := strings.SplitN(scanner.Text(), " ", cuacctUsageAllColumnsNumber+1)

View File

@ -16,7 +16,8 @@ import (
"golang.org/x/sys/unix"
)
type CpusetGroup struct{}
type CpusetGroup struct {
}
func (s *CpusetGroup) Name() string {
return "cpuset"
@ -28,12 +29,12 @@ func (s *CpusetGroup) Apply(path string, d *cgroupData) error {
func (s *CpusetGroup) Set(path string, r *configs.Resources) error {
if r.CpusetCpus != "" {
if err := cgroups.WriteFile(path, "cpuset.cpus", r.CpusetCpus); err != nil {
if err := fscommon.WriteFile(path, "cpuset.cpus", r.CpusetCpus); err != nil {
return err
}
}
if r.CpusetMems != "" {
if err := cgroups.WriteFile(path, "cpuset.mems", r.CpusetMems); err != nil {
if err := fscommon.WriteFile(path, "cpuset.mems", r.CpusetMems); err != nil {
return err
}
}
@ -155,7 +156,7 @@ func (s *CpusetGroup) ApplyDir(dir string, r *configs.Resources, pid int) error
if err := cpusetEnsureParent(filepath.Dir(dir)); err != nil {
return err
}
if err := os.Mkdir(dir, 0o755); err != nil && !os.IsExist(err) {
if err := os.Mkdir(dir, 0755); err != nil && !os.IsExist(err) {
return err
}
// We didn't inherit cpuset configs from parent, but we have
@ -175,10 +176,10 @@ func (s *CpusetGroup) ApplyDir(dir string, r *configs.Resources, pid int) error
}
func getCpusetSubsystemSettings(parent string) (cpus, mems string, err error) {
if cpus, err = cgroups.ReadFile(parent, "cpuset.cpus"); err != nil {
if cpus, err = fscommon.ReadFile(parent, "cpuset.cpus"); err != nil {
return
}
if mems, err = cgroups.ReadFile(parent, "cpuset.mems"); err != nil {
if mems, err = fscommon.ReadFile(parent, "cpuset.mems"); err != nil {
return
}
return cpus, mems, nil
@ -205,7 +206,7 @@ func cpusetEnsureParent(current string) error {
if err := cpusetEnsureParent(parent); err != nil {
return err
}
if err := os.Mkdir(current, 0o755); err != nil && !os.IsExist(err) {
if err := os.Mkdir(current, 0755); err != nil && !os.IsExist(err) {
return err
}
return cpusetCopyIfNeeded(current, parent)
@ -224,12 +225,12 @@ func cpusetCopyIfNeeded(current, parent string) error {
}
if isEmptyCpuset(currentCpus) {
if err := cgroups.WriteFile(current, "cpuset.cpus", string(parentCpus)); err != nil {
if err := fscommon.WriteFile(current, "cpuset.cpus", string(parentCpus)); err != nil {
return err
}
}
if isEmptyCpuset(currentMems) {
if err := cgroups.WriteFile(current, "cpuset.mems", string(parentMems)); err != nil {
if err := fscommon.WriteFile(current, "cpuset.mems", string(parentMems)); err != nil {
return err
}
}

View File

@ -9,6 +9,7 @@ import (
"github.com/opencontainers/runc/libcontainer/cgroups"
cgroupdevices "github.com/opencontainers/runc/libcontainer/cgroups/devices"
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/devices"
"github.com/opencontainers/runc/libcontainer/userns"
@ -35,7 +36,7 @@ func (s *DevicesGroup) Apply(path string, d *cgroupData) error {
}
func loadEmulator(path string) (*cgroupdevices.Emulator, error) {
list, err := cgroups.ReadFile(path, "devices.list")
list, err := fscommon.ReadFile(path, "devices.list")
if err != nil {
return nil, err
}
@ -80,7 +81,7 @@ func (s *DevicesGroup) Set(path string, r *configs.Resources) error {
if rule.Allow {
file = "devices.allow"
}
if err := cgroups.WriteFile(path, file, rule.CgroupString()); err != nil {
if err := fscommon.WriteFile(path, file, rule.CgroupString()); err != nil {
return err
}
}

View File

@ -10,12 +10,14 @@ import (
"time"
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/sirupsen/logrus"
"golang.org/x/sys/unix"
)
type FreezerGroup struct{}
type FreezerGroup struct {
}
func (s *FreezerGroup) Name() string {
return "freezer"
@ -33,7 +35,7 @@ func (s *FreezerGroup) Set(path string, r *configs.Resources) (Err error) {
// Freezing failed, and it is bad and dangerous
// to leave the cgroup in FROZEN or FREEZING
// state, so (try to) thaw it back.
_ = cgroups.WriteFile(path, "freezer.state", string(configs.Thawed))
_ = fscommon.WriteFile(path, "freezer.state", string(configs.Thawed))
}
}()
@ -66,11 +68,11 @@ func (s *FreezerGroup) Set(path string, r *configs.Resources) (Err error) {
// the chances to succeed in freezing
// in case new processes keep appearing
// in the cgroup.
_ = cgroups.WriteFile(path, "freezer.state", string(configs.Thawed))
_ = fscommon.WriteFile(path, "freezer.state", string(configs.Thawed))
time.Sleep(10 * time.Millisecond)
}
if err := cgroups.WriteFile(path, "freezer.state", string(configs.Frozen)); err != nil {
if err := fscommon.WriteFile(path, "freezer.state", string(configs.Frozen)); err != nil {
return err
}
@ -81,7 +83,7 @@ func (s *FreezerGroup) Set(path string, r *configs.Resources) (Err error) {
// system.
time.Sleep(10 * time.Microsecond)
}
state, err := cgroups.ReadFile(path, "freezer.state")
state, err := fscommon.ReadFile(path, "freezer.state")
if err != nil {
return err
}
@ -102,7 +104,7 @@ func (s *FreezerGroup) Set(path string, r *configs.Resources) (Err error) {
// Despite our best efforts, it got stuck in FREEZING.
return errors.New("unable to freeze")
case configs.Thawed:
return cgroups.WriteFile(path, "freezer.state", string(configs.Thawed))
return fscommon.WriteFile(path, "freezer.state", string(configs.Thawed))
case configs.Undefined:
return nil
default:
@ -116,7 +118,7 @@ func (s *FreezerGroup) GetStats(path string, stats *cgroups.Stats) error {
func (s *FreezerGroup) GetState(path string) (configs.FreezerState, error) {
for {
state, err := cgroups.ReadFile(path, "freezer.state")
state, err := fscommon.ReadFile(path, "freezer.state")
if err != nil {
// If the kernel is too old, then we just treat the freezer as
// being in an "undefined" state.

View File

@ -64,10 +64,8 @@ func NewManager(cg *configs.Cgroup, paths map[string]string, rootless bool) cgro
}
// The absolute path to the root of the cgroup hierarchies.
var (
cgroupRootLock sync.Mutex
cgroupRoot string
)
var cgroupRootLock sync.Mutex
var cgroupRoot string
const defaultCgroupRoot = "/sys/fs/cgroup"
@ -395,7 +393,7 @@ func join(path string, pid int) error {
if path == "" {
return nil
}
if err := os.MkdirAll(path, 0o755); err != nil {
if err := os.MkdirAll(path, 0755); err != nil {
return err
}
return cgroups.WriteCgroupProc(path, pid)

View File

@ -11,7 +11,8 @@ import (
"github.com/opencontainers/runc/libcontainer/configs"
)
type HugetlbGroup struct{}
type HugetlbGroup struct {
}
func (s *HugetlbGroup) Name() string {
return "hugetlb"
@ -23,7 +24,7 @@ func (s *HugetlbGroup) Apply(path string, d *cgroupData) error {
func (s *HugetlbGroup) Set(path string, r *configs.Resources) error {
for _, hugetlb := range r.HugetlbLimit {
if err := cgroups.WriteFile(path, "hugetlb."+hugetlb.Pagesize+".limit_in_bytes", strconv.FormatUint(hugetlb.Limit, 10)); err != nil {
if err := fscommon.WriteFile(path, "hugetlb."+hugetlb.Pagesize+".limit_in_bytes", strconv.FormatUint(hugetlb.Limit, 10)); err != nil {
return err
}
}

View File

@ -25,7 +25,8 @@ const (
cgroupMemoryMaxUsage = "memory.max_usage_in_bytes"
)
type MemoryGroup struct{}
type MemoryGroup struct {
}
func (s *MemoryGroup) Name() string {
return "memory"
@ -40,7 +41,7 @@ func setMemory(path string, val int64) error {
return nil
}
err := cgroups.WriteFile(path, cgroupMemoryLimit, strconv.FormatInt(val, 10))
err := fscommon.WriteFile(path, cgroupMemoryLimit, strconv.FormatInt(val, 10))
if !errors.Is(err, unix.EBUSY) {
return err
}
@ -64,7 +65,7 @@ func setSwap(path string, val int64) error {
return nil
}
return cgroups.WriteFile(path, cgroupMemorySwapLimit, strconv.FormatInt(val, 10))
return fscommon.WriteFile(path, cgroupMemorySwapLimit, strconv.FormatInt(val, 10))
}
func setMemoryAndSwap(path string, r *configs.Resources) error {
@ -117,20 +118,20 @@ func (s *MemoryGroup) Set(path string, r *configs.Resources) error {
// ignore KernelMemory and KernelMemoryTCP
if r.MemoryReservation != 0 {
if err := cgroups.WriteFile(path, "memory.soft_limit_in_bytes", strconv.FormatInt(r.MemoryReservation, 10)); err != nil {
if err := fscommon.WriteFile(path, "memory.soft_limit_in_bytes", strconv.FormatInt(r.MemoryReservation, 10)); err != nil {
return err
}
}
if r.OomKillDisable {
if err := cgroups.WriteFile(path, "memory.oom_control", "1"); err != nil {
if err := fscommon.WriteFile(path, "memory.oom_control", "1"); err != nil {
return err
}
}
if r.MemorySwappiness == nil || int64(*r.MemorySwappiness) == -1 {
return nil
} else if *r.MemorySwappiness <= 100 {
if err := cgroups.WriteFile(path, "memory.swappiness", strconv.FormatUint(*r.MemorySwappiness, 10)); err != nil {
if err := fscommon.WriteFile(path, "memory.swappiness", strconv.FormatUint(*r.MemorySwappiness, 10)); err != nil {
return err
}
} else {
@ -142,7 +143,7 @@ func (s *MemoryGroup) Set(path string, r *configs.Resources) error {
func (s *MemoryGroup) GetStats(path string, stats *cgroups.Stats) error {
// Set stats from memory.stat.
statsFile, err := cgroups.OpenFile(path, "memory.stat", os.O_RDONLY)
statsFile, err := fscommon.OpenFile(path, "memory.stat", os.O_RDONLY)
if err != nil {
if os.IsNotExist(err) {
return nil
@ -199,6 +200,14 @@ func (s *MemoryGroup) GetStats(path string, stats *cgroups.Stats) error {
return nil
}
func memoryAssigned(cgroup *configs.Cgroup) bool {
return cgroup.Resources.Memory != 0 ||
cgroup.Resources.MemoryReservation != 0 ||
cgroup.Resources.MemorySwap > 0 ||
cgroup.Resources.OomKillDisable ||
(cgroup.Resources.MemorySwappiness != nil && int64(*cgroup.Resources.MemorySwappiness) != -1)
}
func getMemoryData(path, name string) (cgroups.MemoryData, error) {
memoryData := cgroups.MemoryData{}
@ -249,13 +258,12 @@ func getPageUsageByNUMA(cgroupPath string) (cgroups.PageUsageByNUMA, error) {
)
stats := cgroups.PageUsageByNUMA{}
file, err := cgroups.OpenFile(cgroupPath, filename, os.O_RDONLY)
file, err := fscommon.OpenFile(cgroupPath, filename, os.O_RDONLY)
if os.IsNotExist(err) {
return stats, nil
} else if err != nil {
return stats, err
}
defer file.Close()
// File format is documented in linux/Documentation/cgroup-v1/memory.txt
// and it looks like this:

View File

@ -6,10 +6,12 @@ import (
"strconv"
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
"github.com/opencontainers/runc/libcontainer/configs"
)
type NetClsGroup struct{}
type NetClsGroup struct {
}
func (s *NetClsGroup) Name() string {
return "net_cls"
@ -21,7 +23,7 @@ func (s *NetClsGroup) Apply(path string, d *cgroupData) error {
func (s *NetClsGroup) Set(path string, r *configs.Resources) error {
if r.NetClsClassid != 0 {
if err := cgroups.WriteFile(path, "net_cls.classid", strconv.FormatUint(uint64(r.NetClsClassid), 10)); err != nil {
if err := fscommon.WriteFile(path, "net_cls.classid", strconv.FormatUint(uint64(r.NetClsClassid), 10)); err != nil {
return err
}
}

View File

@ -4,10 +4,12 @@ package fs
import (
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
"github.com/opencontainers/runc/libcontainer/configs"
)
type NetPrioGroup struct{}
type NetPrioGroup struct {
}
func (s *NetPrioGroup) Name() string {
return "net_prio"
@ -19,7 +21,7 @@ func (s *NetPrioGroup) Apply(path string, d *cgroupData) error {
func (s *NetPrioGroup) Set(path string, r *configs.Resources) error {
for _, prioMap := range r.NetPrioIfpriomap {
if err := cgroups.WriteFile(path, "net_prio.ifpriomap", prioMap.CgroupString()); err != nil {
if err := fscommon.WriteFile(path, "net_prio.ifpriomap", prioMap.CgroupString()); err != nil {
return err
}
}

View File

@ -7,7 +7,8 @@ import (
"github.com/opencontainers/runc/libcontainer/configs"
)
type PerfEventGroup struct{}
type PerfEventGroup struct {
}
func (s *PerfEventGroup) Name() string {
return "perf_event"

View File

@ -12,7 +12,8 @@ import (
"github.com/opencontainers/runc/libcontainer/configs"
)
type PidsGroup struct{}
type PidsGroup struct {
}
func (s *PidsGroup) Name() string {
return "pids"
@ -31,7 +32,7 @@ func (s *PidsGroup) Set(path string, r *configs.Resources) error {
limit = strconv.FormatInt(r.PidsLimit, 10)
}
if err := cgroups.WriteFile(path, "pids.max", limit); err != nil {
if err := fscommon.WriteFile(path, "pids.max", limit); err != nil {
return err
}
}

View File

@ -23,7 +23,7 @@ func setCpu(dirPath string, r *configs.Resources) error {
// NOTE: .CpuShares is not used here. Conversion is the caller's responsibility.
if r.CpuWeight != 0 {
if err := cgroups.WriteFile(dirPath, "cpu.weight", strconv.FormatUint(r.CpuWeight, 10)); err != nil {
if err := fscommon.WriteFile(dirPath, "cpu.weight", strconv.FormatUint(r.CpuWeight, 10)); err != nil {
return err
}
}
@ -40,16 +40,15 @@ func setCpu(dirPath string, r *configs.Resources) error {
period = 100000
}
str += " " + strconv.FormatUint(period, 10)
if err := cgroups.WriteFile(dirPath, "cpu.max", str); err != nil {
if err := fscommon.WriteFile(dirPath, "cpu.max", str); err != nil {
return err
}
}
return nil
}
func statCpu(dirPath string, stats *cgroups.Stats) error {
f, err := cgroups.OpenFile(dirPath, "cpu.stat", os.O_RDONLY)
f, err := fscommon.OpenFile(dirPath, "cpu.stat", os.O_RDONLY)
if err != nil {
return err
}

View File

@ -3,7 +3,7 @@
package fs2
import (
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
"github.com/opencontainers/runc/libcontainer/configs"
)
@ -17,12 +17,12 @@ func setCpuset(dirPath string, r *configs.Resources) error {
}
if r.CpusetCpus != "" {
if err := cgroups.WriteFile(dirPath, "cpuset.cpus", r.CpusetCpus); err != nil {
if err := fscommon.WriteFile(dirPath, "cpuset.cpus", r.CpusetCpus); err != nil {
return err
}
}
if r.CpusetMems != "" {
if err := cgroups.WriteFile(dirPath, "cpuset.mems", r.CpusetMems); err != nil {
if err := fscommon.WriteFile(dirPath, "cpuset.mems", r.CpusetMems); err != nil {
return err
}
}

View File

@ -6,12 +6,12 @@ import (
"path/filepath"
"strings"
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
"github.com/opencontainers/runc/libcontainer/configs"
)
func supportedControllers() (string, error) {
return cgroups.ReadFile(UnifiedMountpoint, "/cgroup.controllers")
return fscommon.ReadFile(UnifiedMountpoint, "/cgroup.controllers")
}
// needAnyControllers returns whether we enable some supported controllers or not,
@ -92,7 +92,7 @@ func CreateCgroupPath(path string, c *configs.Cgroup) (Err error) {
for i, e := range elements {
current = filepath.Join(current, e)
if i > 0 {
if err := os.Mkdir(current, 0o755); err != nil {
if err := os.Mkdir(current, 0755); err != nil {
if !os.IsExist(err) {
return err
}
@ -105,7 +105,7 @@ func CreateCgroupPath(path string, c *configs.Cgroup) (Err error) {
}
}()
}
cgType, _ := cgroups.ReadFile(current, cgTypeFile)
cgType, _ := fscommon.ReadFile(current, cgTypeFile)
cgType = strings.TrimSpace(cgType)
switch cgType {
// If the cgroup is in an invalid mode (usually this means there's an internal
@ -122,7 +122,7 @@ func CreateCgroupPath(path string, c *configs.Cgroup) (Err error) {
// since that means we're a properly delegated cgroup subtree) but in
// this case there's not much we can do and it's better than giving an
// error.
_ = cgroups.WriteFile(current, cgTypeFile, "threaded")
_ = fscommon.WriteFile(current, cgTypeFile, "threaded")
}
// If the cgroup is in (threaded) or (domain threaded) mode, we can only use thread-aware controllers
// (and you cannot usually take a cgroup out of threaded mode).
@ -136,11 +136,11 @@ func CreateCgroupPath(path string, c *configs.Cgroup) (Err error) {
}
// enable all supported controllers
if i < len(elements)-1 {
if err := cgroups.WriteFile(current, cgStCtlFile, res); err != nil {
if err := fscommon.WriteFile(current, cgStCtlFile, res); err != nil {
// try write one by one
allCtrs := strings.Split(res, " ")
for _, ctr := range allCtrs {
_ = cgroups.WriteFile(current, cgStCtlFile, ctr)
_ = fscommon.WriteFile(current, cgStCtlFile, ctr)
}
}
// Some controllers might not be enabled when rootless or containerized,

View File

@ -82,7 +82,9 @@ func parseCgroupFile(path string) (string, error) {
}
func parseCgroupFromReader(r io.Reader) (string, error) {
s := bufio.NewScanner(r)
var (
s = bufio.NewScanner(r)
)
for s.Scan() {
var (
text = s.Text()

View File

@ -58,15 +58,29 @@ func setDevices(dirPath string, r *configs.Resources) error {
if r.SkipDevices {
return nil
}
// XXX: This is currently a white-list (but all callers pass a blacklist of
// devices). This is bad for a whole variety of reasons, but will need
// to be fixed with co-ordinated effort with downstreams.
insts, license, err := devicefilter.DeviceFilter(r.Devices)
if err != nil {
return err
}
dirFD, err := unix.Open(dirPath, unix.O_DIRECTORY|unix.O_RDONLY, 0o600)
dirFD, err := unix.Open(dirPath, unix.O_DIRECTORY|unix.O_RDONLY, 0600)
if err != nil {
return errors.Errorf("cannot get dir FD for %s", dirPath)
}
defer unix.Close(dirFD)
// XXX: This code is currently incorrect when it comes to updating an
// existing cgroup with new rules (new rulesets are just appended to
// the program list because this uses BPF_F_ALLOW_MULTI). If we didn't
// use BPF_F_ALLOW_MULTI we could actually atomically swap the
// programs.
//
// The real issue is that BPF_F_ALLOW_MULTI makes it hard to have a
// race-free blacklist because it acts as a whitelist by default, and
// having a deny-everything program cannot be overridden by other
// programs. You could temporarily insert a deny-everything program
// but that would result in spurrious failures during updates.
if _, err := ebpf.LoadAttachCgroupDeviceFilter(insts, license, dirFD); err != nil {
if !canSkipEBPFError(r) {
return err

View File

@ -3,20 +3,27 @@
package fs2
import (
"bufio"
stdErrors "errors"
"fmt"
"os"
"strings"
"time"
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/pkg/errors"
"golang.org/x/sys/unix"
)
func setFreezer(dirPath string, state configs.FreezerState) error {
if err := supportsFreezer(dirPath); err != nil {
// We can ignore this request as long as the user didn't ask us to
// freeze the container (since without the freezer cgroup, that's a
// no-op).
if state == configs.Undefined || state == configs.Thawed {
return nil
}
return errors.Wrap(err, "freezer not supported")
}
var stateStr string
switch state {
case configs.Undefined:
@ -29,23 +36,11 @@ func setFreezer(dirPath string, state configs.FreezerState) error {
return errors.Errorf("invalid freezer state %q requested", state)
}
fd, err := cgroups.OpenFile(dirPath, "cgroup.freeze", unix.O_RDWR)
if err != nil {
// We can ignore this request as long as the user didn't ask us to
// freeze the container (since without the freezer cgroup, that's a
// no-op).
if state != configs.Frozen {
return nil
}
return errors.Wrap(err, "freezer not supported")
}
defer fd.Close()
if _, err := fd.WriteString(stateStr); err != nil {
if err := fscommon.WriteFile(dirPath, "cgroup.freeze", stateStr); err != nil {
return err
}
// Confirm that the cgroup did actually change states.
if actualState, err := readFreezer(dirPath, fd); err != nil {
if actualState, err := getFreezer(dirPath); err != nil {
return err
} else if actualState != state {
return errors.Errorf(`expected "cgroup.freeze" to be in state %q but was in %q`, state, actualState)
@ -53,8 +48,13 @@ func setFreezer(dirPath string, state configs.FreezerState) error {
return nil
}
func supportsFreezer(dirPath string) error {
_, err := fscommon.ReadFile(dirPath, "cgroup.freeze")
return err
}
func getFreezer(dirPath string) (configs.FreezerState, error) {
fd, err := cgroups.OpenFile(dirPath, "cgroup.freeze", unix.O_RDONLY)
state, err := fscommon.ReadFile(dirPath, "cgroup.freeze")
if err != nil {
// If the kernel is too old, then we just treat the freezer as being in
// an "undefined" state.
@ -63,67 +63,12 @@ func getFreezer(dirPath string) (configs.FreezerState, error) {
}
return configs.Undefined, err
}
defer fd.Close()
return readFreezer(dirPath, fd)
}
func readFreezer(dirPath string, fd *os.File) (configs.FreezerState, error) {
if _, err := fd.Seek(0, 0); err != nil {
return configs.Undefined, err
}
state := make([]byte, 2)
if _, err := fd.Read(state); err != nil {
return configs.Undefined, err
}
switch string(state) {
case "0\n":
switch strings.TrimSpace(state) {
case "0":
return configs.Thawed, nil
case "1\n":
return waitFrozen(dirPath)
case "1":
return configs.Frozen, nil
default:
return configs.Undefined, errors.Errorf(`unknown "cgroup.freeze" state: %q`, state)
}
}
// waitFrozen polls cgroup.events until it sees "frozen 1" in it.
func waitFrozen(dirPath string) (configs.FreezerState, error) {
fd, err := cgroups.OpenFile(dirPath, "cgroup.events", unix.O_RDONLY)
if err != nil {
return configs.Undefined, err
}
defer fd.Close()
// XXX: Simple wait/read/retry is used here. An implementation
// based on poll(2) or inotify(7) is possible, but it makes the code
// much more complicated. Maybe address this later.
const (
// Perform maxIter with waitTime in between iterations.
waitTime = 10 * time.Millisecond
maxIter = 1000
)
scanner := bufio.NewScanner(fd)
for i := 0; scanner.Scan(); {
if i == maxIter {
return configs.Undefined, fmt.Errorf("timeout of %s reached waiting for the cgroup to freeze", waitTime*maxIter)
}
line := scanner.Text()
val := strings.TrimPrefix(line, "frozen ")
if val != line { // got prefix
if val[0] == '1' {
return configs.Frozen, nil
}
i++
// wait, then re-read
time.Sleep(waitTime)
_, err := fd.Seek(0, 0)
if err != nil {
return configs.Undefined, err
}
}
}
// Should only reach here either on read error,
// or if the file does not contain "frozen " line.
return configs.Undefined, scanner.Err()
}

View File

@ -51,7 +51,7 @@ func (m *manager) getControllers() error {
return nil
}
data, err := cgroups.ReadFile(m.dirPath, "cgroup.controllers")
data, err := fscommon.ReadFile(m.dirPath, "cgroup.controllers")
if err != nil {
if m.rootless && m.config.Path == "" {
return nil
@ -98,7 +98,9 @@ func (m *manager) GetAllPids() ([]int, error) {
}
func (m *manager) GetStats() (*cgroups.Stats, error) {
var errs []error
var (
errs []error
)
st := cgroups.NewStats()
@ -197,7 +199,7 @@ func (m *manager) setUnified(res map[string]string) error {
if strings.Contains(k, "/") {
return fmt.Errorf("unified resource %q must be a file name (no slashes)", k)
}
if err := cgroups.WriteFile(m.dirPath, k, v); err != nil {
if err := fscommon.WriteFile(m.dirPath, k, v); err != nil {
errC := errors.Cause(err)
// Check for both EPERM and ENOENT since O_CREAT is used by WriteFile.
if errors.Is(errC, os.ErrPermission) || errors.Is(errC, os.ErrNotExist) {

View File

@ -21,7 +21,7 @@ func setHugeTlb(dirPath string, r *configs.Resources) error {
return nil
}
for _, hugetlb := range r.HugetlbLimit {
if err := cgroups.WriteFile(dirPath, "hugetlb."+hugetlb.Pagesize+".max", strconv.FormatUint(hugetlb.Limit, 10)); err != nil {
if err := fscommon.WriteFile(dirPath, "hugetlb."+hugetlb.Pagesize+".max", strconv.FormatUint(hugetlb.Limit, 10)); err != nil {
return err
}
}

View File

@ -4,95 +4,60 @@ package fs2
import (
"bufio"
"bytes"
"fmt"
"os"
"strconv"
"strings"
"github.com/sirupsen/logrus"
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
"github.com/opencontainers/runc/libcontainer/configs"
)
func isIoSet(r *configs.Resources) bool {
return r.BlkioWeight != 0 ||
len(r.BlkioWeightDevice) > 0 ||
len(r.BlkioThrottleReadBpsDevice) > 0 ||
len(r.BlkioThrottleWriteBpsDevice) > 0 ||
len(r.BlkioThrottleReadIOPSDevice) > 0 ||
len(r.BlkioThrottleWriteIOPSDevice) > 0
}
// bfqDeviceWeightSupported checks for per-device BFQ weight support (added
// in kernel v5.4, commit 795fe54c2a8) by reading from "io.bfq.weight".
func bfqDeviceWeightSupported(bfq *os.File) bool {
if bfq == nil {
return false
}
_, _ = bfq.Seek(0, 0)
buf := make([]byte, 32)
_, _ = bfq.Read(buf)
// If only a single number (default weight) if read back, we have older kernel.
_, err := strconv.ParseInt(string(bytes.TrimSpace(buf)), 10, 64)
return err != nil
}
func setIo(dirPath string, r *configs.Resources) error {
if !isIoSet(r) {
return nil
}
// If BFQ IO scheduler is available, use it.
var bfq *os.File
if r.BlkioWeight != 0 || len(r.BlkioWeightDevice) > 0 {
var err error
bfq, err = cgroups.OpenFile(dirPath, "io.bfq.weight", os.O_RDWR)
if err == nil {
defer bfq.Close()
} else if !os.IsNotExist(err) {
return err
}
}
if r.BlkioWeight != 0 {
if bfq != nil { // Use BFQ.
if _, err := bfq.WriteString(strconv.FormatUint(uint64(r.BlkioWeight), 10)); err != nil {
filename := "io.bfq.weight"
if err := fscommon.WriteFile(dirPath, filename,
strconv.FormatUint(uint64(r.BlkioWeight), 10)); err != nil {
// if io.bfq.weight does not exist, then bfq module is not loaded.
// Fallback to use io.weight with a conversion scheme
if !os.IsNotExist(err) {
return err
}
} else {
// Fallback to io.weight with a conversion scheme.
v := cgroups.ConvertBlkIOToIOWeightValue(r.BlkioWeight)
if err := cgroups.WriteFile(dirPath, "io.weight", strconv.FormatUint(v, 10)); err != nil {
if err := fscommon.WriteFile(dirPath, "io.weight", strconv.FormatUint(v, 10)); err != nil {
return err
}
}
}
if bfqDeviceWeightSupported(bfq) {
for _, wd := range r.BlkioWeightDevice {
if _, err := bfq.WriteString(wd.WeightString() + "\n"); err != nil {
return fmt.Errorf("setting device weight %q: %w", wd.WeightString(), err)
}
}
}
for _, td := range r.BlkioThrottleReadBpsDevice {
if err := cgroups.WriteFile(dirPath, "io.max", td.StringName("rbps")); err != nil {
if err := fscommon.WriteFile(dirPath, "io.max", td.StringName("rbps")); err != nil {
return err
}
}
for _, td := range r.BlkioThrottleWriteBpsDevice {
if err := cgroups.WriteFile(dirPath, "io.max", td.StringName("wbps")); err != nil {
if err := fscommon.WriteFile(dirPath, "io.max", td.StringName("wbps")); err != nil {
return err
}
}
for _, td := range r.BlkioThrottleReadIOPSDevice {
if err := cgroups.WriteFile(dirPath, "io.max", td.StringName("riops")); err != nil {
if err := fscommon.WriteFile(dirPath, "io.max", td.StringName("riops")); err != nil {
return err
}
}
for _, td := range r.BlkioThrottleWriteIOPSDevice {
if err := cgroups.WriteFile(dirPath, "io.max", td.StringName("wiops")); err != nil {
if err := fscommon.WriteFile(dirPath, "io.max", td.StringName("wiops")); err != nil {
return err
}
}
@ -102,7 +67,7 @@ func setIo(dirPath string, r *configs.Resources) error {
func readCgroup2MapFile(dirPath string, name string) (map[string][]string, error) {
ret := map[string][]string{}
f, err := cgroups.OpenFile(dirPath, name, os.O_RDONLY)
f, err := fscommon.OpenFile(dirPath, name, os.O_RDONLY)
if err != nil {
return nil, err
}
@ -123,22 +88,22 @@ func readCgroup2MapFile(dirPath string, name string) (map[string][]string, error
}
func statIo(dirPath string, stats *cgroups.Stats) error {
// more details on the io.stat file format: https://www.kernel.org/doc/Documentation/cgroup-v2.txt
var ioServiceBytesRecursive []cgroups.BlkioStatEntry
values, err := readCgroup2MapFile(dirPath, "io.stat")
if err != nil {
return err
}
// more details on the io.stat file format: https://www.kernel.org/doc/Documentation/cgroup-v2.txt
var parsedStats cgroups.BlkioStats
for k, v := range values {
d := strings.Split(k, ":")
if len(d) != 2 {
continue
}
major, err := strconv.ParseUint(d[0], 10, 64)
major, err := strconv.ParseUint(d[0], 10, 0)
if err != nil {
return err
}
minor, err := strconv.ParseUint(d[1], 10, 64)
minor, err := strconv.ParseUint(d[1], 10, 0)
if err != nil {
return err
}
@ -150,32 +115,15 @@ func statIo(dirPath string, stats *cgroups.Stats) error {
}
op := d[0]
// Map to the cgroupv1 naming and layout (in separate tables).
var targetTable *[]cgroups.BlkioStatEntry
// Accommodate the cgroup v1 naming
switch op {
// Equivalent to cgroupv1's blkio.io_service_bytes.
case "rbytes":
op = "Read"
targetTable = &parsedStats.IoServiceBytesRecursive
op = "read"
case "wbytes":
op = "Write"
targetTable = &parsedStats.IoServiceBytesRecursive
// Equivalent to cgroupv1's blkio.io_serviced.
case "rios":
op = "Read"
targetTable = &parsedStats.IoServicedRecursive
case "wios":
op = "Write"
targetTable = &parsedStats.IoServicedRecursive
default:
// Skip over entries we cannot map to cgroupv1 stats for now.
// In the future we should expand the stats struct to include
// them.
logrus.Debugf("cgroupv2 io stats: skipping over unmappable %s entry", item)
continue
op = "write"
}
value, err := strconv.ParseUint(d[1], 10, 64)
value, err := strconv.ParseUint(d[1], 10, 0)
if err != nil {
return err
}
@ -186,9 +134,9 @@ func statIo(dirPath string, stats *cgroups.Stats) error {
Minor: minor,
Value: value,
}
*targetTable = append(*targetTable, entry)
ioServiceBytesRecursive = append(ioServiceBytesRecursive, entry)
}
}
stats.BlkioStats = parsedStats
stats.BlkioStats = cgroups.BlkioStats{IoServiceBytesRecursive: ioServiceBytesRecursive}
return nil
}

View File

@ -52,13 +52,13 @@ func setMemory(dirPath string, r *configs.Resources) error {
}
// never write empty string to `memory.swap.max`, it means set to 0.
if swapStr != "" {
if err := cgroups.WriteFile(dirPath, "memory.swap.max", swapStr); err != nil {
if err := fscommon.WriteFile(dirPath, "memory.swap.max", swapStr); err != nil {
return err
}
}
if val := numToStr(r.Memory); val != "" {
if err := cgroups.WriteFile(dirPath, "memory.max", val); err != nil {
if err := fscommon.WriteFile(dirPath, "memory.max", val); err != nil {
return err
}
}
@ -66,7 +66,7 @@ func setMemory(dirPath string, r *configs.Resources) error {
// cgroup.Resources.KernelMemory is ignored
if val := numToStr(r.MemoryReservation); val != "" {
if err := cgroups.WriteFile(dirPath, "memory.low", val); err != nil {
if err := fscommon.WriteFile(dirPath, "memory.low", val); err != nil {
return err
}
}
@ -76,7 +76,7 @@ func setMemory(dirPath string, r *configs.Resources) error {
func statMemory(dirPath string, stats *cgroups.Stats) error {
// Set stats from memory.stat.
statsFile, err := cgroups.OpenFile(dirPath, "memory.stat", os.O_RDONLY)
statsFile, err := fscommon.OpenFile(dirPath, "memory.stat", os.O_RDONLY)
if err != nil {
return err
}

View File

@ -23,7 +23,7 @@ func setPids(dirPath string, r *configs.Resources) error {
return nil
}
if val := numToStr(r.PidsLimit); val != "" {
if err := cgroups.WriteFile(dirPath, "pids.max", val); err != nil {
if err := fscommon.WriteFile(dirPath, "pids.max", val); err != nil {
return err
}
}
@ -34,9 +34,9 @@ func setPids(dirPath string, r *configs.Resources) error {
func statPidsFromCgroupProcs(dirPath string, stats *cgroups.Stats) error {
// if the controller is not enabled, let's read PIDS from cgroups.procs
// (or threads if cgroup.threads is enabled)
contents, err := cgroups.ReadFile(dirPath, "cgroup.procs")
contents, err := fscommon.ReadFile(dirPath, "cgroup.procs")
if errors.Is(err, unix.ENOTSUP) {
contents, err = cgroups.ReadFile(dirPath, "cgroup.threads")
contents, err = fscommon.ReadFile(dirPath, "cgroup.threads")
}
if err != nil {
return err

View File

@ -0,0 +1,51 @@
// +build linux
package fscommon
import (
"bytes"
"os"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
"golang.org/x/sys/unix"
)
// WriteFile writes data to a cgroup file in dir.
// It is supposed to be used for cgroup files only.
func WriteFile(dir, file, data string) error {
fd, err := OpenFile(dir, file, unix.O_WRONLY)
if err != nil {
return err
}
defer fd.Close()
if err := retryingWriteFile(fd, data); err != nil {
return errors.Wrapf(err, "failed to write %q", data)
}
return nil
}
// ReadFile reads data from a cgroup file in dir.
// It is supposed to be used for cgroup files only.
func ReadFile(dir, file string) (string, error) {
fd, err := OpenFile(dir, file, unix.O_RDONLY)
if err != nil {
return "", err
}
defer fd.Close()
var buf bytes.Buffer
_, err = buf.ReadFrom(fd)
return buf.String(), err
}
func retryingWriteFile(fd *os.File, data string) error {
for {
_, err := fd.Write([]byte(data))
if errors.Is(err, unix.EINTR) {
logrus.Infof("interrupted while writing %s to %s", data, fd.Name())
continue
}
return err
}
}

View File

@ -1,7 +1,6 @@
package cgroups
package fscommon
import (
"bytes"
"os"
"strings"
"sync"
@ -11,54 +10,6 @@ import (
"golang.org/x/sys/unix"
)
// OpenFile opens a cgroup file in a given dir with given flags.
// It is supposed to be used for cgroup files only.
func OpenFile(dir, file string, flags int) (*os.File, error) {
if dir == "" {
return nil, errors.Errorf("no directory specified for %s", file)
}
return openFile(dir, file, flags)
}
// ReadFile reads data from a cgroup file in dir.
// It is supposed to be used for cgroup files only.
func ReadFile(dir, file string) (string, error) {
fd, err := OpenFile(dir, file, unix.O_RDONLY)
if err != nil {
return "", err
}
defer fd.Close()
var buf bytes.Buffer
_, err = buf.ReadFrom(fd)
return buf.String(), err
}
// WriteFile writes data to a cgroup file in dir.
// It is supposed to be used for cgroup files only.
func WriteFile(dir, file, data string) error {
fd, err := OpenFile(dir, file, unix.O_WRONLY)
if err != nil {
return err
}
defer fd.Close()
if err := retryingWriteFile(fd, data); err != nil {
return errors.Wrapf(err, "failed to write %q", data)
}
return nil
}
func retryingWriteFile(fd *os.File, data string) error {
for {
_, err := fd.Write([]byte(data))
if errors.Is(err, unix.EINTR) {
logrus.Infof("interrupted while writing %s to %s", data, fd.Name())
continue
}
return err
}
}
const (
cgroupfsDir = "/sys/fs/cgroup"
cgroupfsPrefix = cgroupfsDir + "/"
@ -77,8 +28,7 @@ var (
func prepareOpenat2() error {
prepOnce.Do(func() {
fd, err := unix.Openat2(-1, cgroupfsDir, &unix.OpenHow{
Flags: unix.O_DIRECTORY | unix.O_PATH,
})
Flags: unix.O_DIRECTORY | unix.O_PATH})
if err != nil {
prepErr = &os.PathError{Op: "openat2", Path: cgroupfsDir, Err: err}
if err != unix.ENOSYS {
@ -102,6 +52,7 @@ func prepareOpenat2() error {
// cgroupv2 has a single mountpoint and no "cpu,cpuacct" symlinks
resolveFlags |= unix.RESOLVE_NO_XDEV | unix.RESOLVE_NO_SYMLINKS
}
})
return prepErr
@ -109,7 +60,10 @@ func prepareOpenat2() error {
// OpenFile opens a cgroup file in a given dir with given flags.
// It is supposed to be used for cgroup files only.
func openFile(dir, file string, flags int) (*os.File, error) {
func OpenFile(dir, file string, flags int) (*os.File, error) {
if dir == "" {
return nil, errors.Errorf("no directory specified for %s", file)
}
mode := os.FileMode(0)
if TestMode && flags&os.O_WRONLY != 0 {
// "emulate" cgroup fs for unit tests

View File

@ -8,19 +8,10 @@ import (
"math"
"strconv"
"strings"
"github.com/opencontainers/runc/libcontainer/cgroups"
)
var (
ErrNotValidFormat = errors.New("line is not a valid key value format")
// Deprecated: use cgroups.OpenFile instead.
OpenFile = cgroups.OpenFile
// Deprecated: use cgroups.ReadFile instead.
ReadFile = cgroups.ReadFile
// Deprecated: use cgroups.WriteFile instead.
WriteFile = cgroups.WriteFile
)
// ParseUint converts a string to an uint64 integer.
@ -66,7 +57,7 @@ func ParseKeyValue(t string) (string, uint64, error) {
// and returns a value of the specified key. ParseUint is used for value
// conversion.
func GetValueByKey(path, file, key string) (uint64, error) {
content, err := cgroups.ReadFile(path, file)
content, err := ReadFile(path, file)
if err != nil {
return 0, err
}
@ -104,7 +95,7 @@ func GetCgroupParamUint(path, file string) (uint64, error) {
// GetCgroupParamInt reads a single int64 value from specified cgroup file.
// If the value read is "max", the math.MaxInt64 is returned.
func GetCgroupParamInt(path, file string) (int64, error) {
contents, err := cgroups.ReadFile(path, file)
contents, err := ReadFile(path, file)
if err != nil {
return 0, err
}
@ -122,7 +113,7 @@ func GetCgroupParamInt(path, file string) (int64, error) {
// GetCgroupParamString reads a string from the specified cgroup file.
func GetCgroupParamString(path, file string) (string, error) {
contents, err := cgroups.ReadFile(path, file)
contents, err := ReadFile(path, file)
if err != nil {
return "", err
}

View File

@ -158,27 +158,14 @@ func findDeviceGroup(ruleType devices.Type, ruleMajor int64) (string, error) {
return "", nil
}
// DeviceAllow is the dbus type "a(ss)" which means we need a struct
// to represent it in Go.
type deviceAllowEntry struct {
Path string
Perms string
}
func allowAllDevices() []systemdDbus.Property {
// Setting mode to auto and removing all DeviceAllow rules
// results in allowing access to all devices.
return []systemdDbus.Property{
newProp("DevicePolicy", "auto"),
newProp("DeviceAllow", []deviceAllowEntry{}),
}
}
// generateDeviceProperties takes the configured device rules and generates a
// corresponding set of systemd properties to configure the devices correctly.
func generateDeviceProperties(r *configs.Resources) ([]systemdDbus.Property, error) {
if r.SkipDevices {
return nil, nil
func generateDeviceProperties(rules []*devices.Rule) ([]systemdDbus.Property, error) {
// DeviceAllow is the type "a(ss)" which means we need a temporary struct
// to represent it in Go.
type deviceAllowEntry struct {
Path string
Perms string
}
properties := []systemdDbus.Property{
@ -190,7 +177,7 @@ func generateDeviceProperties(r *configs.Resources) ([]systemdDbus.Property, err
// Figure out the set of rules.
configEmu := &cgroupdevices.Emulator{}
for _, rule := range r.Devices {
for _, rule := range rules {
if err := configEmu.Apply(*rule); err != nil {
return nil, errors.Wrap(err, "apply rule for systemd")
}
@ -202,7 +189,12 @@ func generateDeviceProperties(r *configs.Resources) ([]systemdDbus.Property, err
if configEmu.IsBlacklist() {
// However, if we're dealing with an allow-all rule then we can do it.
if configEmu.IsAllowAll() {
return allowAllDevices(), nil
return []systemdDbus.Property{
// Run in white-list mode by setting to "auto" and removing all
// DeviceAllow rules.
newProp("DevicePolicy", "auto"),
newProp("DeviceAllow", []deviceAllowEntry{}),
}, nil
}
logrus.Warn("systemd doesn't support blacklist device rules -- applying temporary deny-all rule")
return properties, nil
@ -211,7 +203,8 @@ func generateDeviceProperties(r *configs.Resources) ([]systemdDbus.Property, err
// Now generate the set of rules we actually need to apply. Unlike the
// normal devices cgroup, in "strict" mode systemd defaults to a deny-all
// whitelist which is the default for devices.Emulator.
finalRules, err := configEmu.Rules()
baseEmu := &cgroupdevices.Emulator{}
finalRules, err := baseEmu.Transition(configEmu)
if err != nil {
return nil, errors.Wrap(err, "get simplified rules for systemd")
}
@ -313,7 +306,7 @@ func getUnitName(c *configs.Cgroup) string {
// isDbusError returns true if the error is a specific dbus error.
func isDbusError(err error, name string) bool {
if err != nil {
var derr dbus.Error
var derr *dbus.Error
if errors.As(err, &derr) {
return strings.Contains(derr.Name, name)
}
@ -362,9 +355,6 @@ func stopUnit(cm *dbusConnManager, unitName string) error {
return err
})
if err == nil {
timeout := time.NewTimer(30 * time.Second)
defer timeout.Stop()
select {
case s := <-statusChan:
close(statusChan)
@ -372,8 +362,8 @@ func stopUnit(cm *dbusConnManager, unitName string) error {
if s != "done" {
logrus.Warnf("error removing unit `%s`: got `%s`. Continuing...", unitName, s)
}
case <-timeout.C:
return errors.New("Timed out while waiting for systemd to remove " + unitName)
case <-time.After(time.Second):
logrus.Warnf("Timed out while waiting for StopUnit(%s) completion signal from dbus. Continuing...", unitName)
}
}
return nil
@ -486,7 +476,7 @@ func addCpuset(cm *dbusConnManager, props *[]systemdDbus.Property, cpus, mems st
}
if cpus != "" {
bits, err := RangeToBits(cpus)
bits, err := rangeToBits(cpus)
if err != nil {
return fmt.Errorf("resources.CPU.Cpus=%q conversion error: %w",
cpus, err)
@ -495,7 +485,7 @@ func addCpuset(cm *dbusConnManager, props *[]systemdDbus.Property, cpus, mems st
newProp("AllowedCPUs", bits))
}
if mems != "" {
bits, err := RangeToBits(mems)
bits, err := rangeToBits(mems)
if err != nil {
return fmt.Errorf("resources.CPU.Mems=%q conversion error: %w",
mems, err)

View File

@ -5,15 +5,15 @@ import (
"strconv"
"strings"
"github.com/bits-and-blooms/bitset"
"github.com/pkg/errors"
"github.com/willf/bitset"
)
// RangeToBits converts a text representation of a CPU mask (as written to
// rangeToBits converts a text representation of a CPU mask (as written to
// or read from cgroups' cpuset.* files, e.g. "1,3-5") to a slice of bytes
// with the corresponding bits set (as consumed by systemd over dbus as
// AllowedCPUs/AllowedMemoryNodes unit property value).
func RangeToBits(str string) ([]byte, error) {
func rangeToBits(str string) ([]byte, error) {
bits := &bitset.BitSet{}
for _, r := range strings.Split(str, ",") {

View File

@ -17,16 +17,14 @@ var (
dbusRootless bool
)
type dbusConnManager struct{}
type dbusConnManager struct {
}
// newDbusConnManager initializes systemd dbus connection manager.
func newDbusConnManager(rootless bool) *dbusConnManager {
dbusMu.Lock()
defer dbusMu.Unlock()
if dbusInited && rootless != dbusRootless {
panic("can't have both root and rootless dbus")
}
dbusInited = true
dbusRootless = rootless
return &dbusConnManager{}
}

View File

@ -61,7 +61,7 @@ var legacySubsystems = []subsystem{
func genV1ResourcesProperties(r *configs.Resources, cm *dbusConnManager) ([]systemdDbus.Property, error) {
var properties []systemdDbus.Property
deviceProperties, err := generateDeviceProperties(r)
deviceProperties, err := generateDeviceProperties(r.Devices)
if err != nil {
return nil, err
}
@ -207,10 +207,9 @@ func (m *legacyManager) Destroy() error {
stopErr := stopUnit(m.dbus, getUnitName(m.cgroups))
// Both on success and on error, cleanup all the cgroups
// we are aware of, as some of them were created directly
// by Apply() and are not managed by systemd.
if err := cgroups.RemovePaths(m.paths); err != nil && stopErr == nil {
// Both on success and on error, cleanup all the cgroups we are aware of.
// Some of them were created directly by Apply() and are not managed by systemd.
if err := cgroups.RemovePaths(m.paths); err != nil {
return err
}
@ -238,7 +237,7 @@ func (m *legacyManager) joinCgroups(pid int) error {
}
default:
if path, ok := m.paths[name]; ok {
if err := os.MkdirAll(path, 0o755); err != nil {
if err := os.MkdirAll(path, 0755); err != nil {
return err
}
if err := cgroups.WriteCgroupProc(path, pid); err != nil {
@ -339,24 +338,27 @@ func (m *legacyManager) Set(r *configs.Resources) error {
return err
}
// Figure out the current freezer state, so we can revert to it after we
// temporarily freeze the container.
targetFreezerState, err := m.GetFreezerState()
if err != nil {
return err
}
if targetFreezerState == configs.Undefined {
targetFreezerState = configs.Thawed
}
// We have to freeze the container while systemd sets the cgroup settings.
// The reason for this is that systemd's application of DeviceAllow rules
// is done disruptively, resulting in spurrious errors to common devices
// (unlike our fs driver, they will happily write deny-all rules to running
// containers). So we freeze the container to avoid them hitting the cgroup
// error. But if the freezer cgroup isn't supported, we just warn about it.
if err := m.Freeze(configs.Frozen); err != nil {
logrus.Infof("freeze container before SetUnitProperties failed: %v", err)
targetFreezerState := configs.Undefined
if !m.cgroups.SkipDevices {
// Figure out the current freezer state, so we can revert to it after we
// temporarily freeze the container.
targetFreezerState, err = m.GetFreezerState()
if err != nil {
return err
}
if targetFreezerState == configs.Undefined {
targetFreezerState = configs.Thawed
}
if err := m.Freeze(configs.Frozen); err != nil {
logrus.Infof("freeze container before SetUnitProperties failed: %v", err)
}
}
if err := setUnitProperties(m.dbus, getUnitName(m.cgroups), properties...); err != nil {

View File

@ -96,7 +96,7 @@ func unifiedResToSystemdProps(cm *dbusConnManager, res map[string]string) (props
newProp("CPUWeight", num))
case "cpuset.cpus", "cpuset.mems":
bits, err := RangeToBits(v)
bits, err := rangeToBits(v)
if err != nil {
return nil, fmt.Errorf("unified resource %q=%q conversion error: %w", k, v, err)
}
@ -172,7 +172,7 @@ func genV2ResourcesProperties(r *configs.Resources, cm *dbusConnManager) ([]syst
// aren't the end of the world, but it is a bit concerning. However
// it's unclear if systemd removes all eBPF programs attached when
// doing SetUnitProperties...
deviceProperties, err := generateDeviceProperties(r)
deviceProperties, err := generateDeviceProperties(r.Devices)
if err != nil {
return nil, err
}
@ -418,24 +418,27 @@ func (m *unifiedManager) Set(r *configs.Resources) error {
return err
}
// Figure out the current freezer state, so we can revert to it after we
// temporarily freeze the container.
targetFreezerState, err := m.GetFreezerState()
if err != nil {
return err
}
if targetFreezerState == configs.Undefined {
targetFreezerState = configs.Thawed
}
// We have to freeze the container while systemd sets the cgroup settings.
// The reason for this is that systemd's application of DeviceAllow rules
// is done disruptively, resulting in spurrious errors to common devices
// (unlike our fs driver, they will happily write deny-all rules to running
// containers). So we freeze the container to avoid them hitting the cgroup
// error. But if the freezer cgroup isn't supported, we just warn about it.
if err := m.Freeze(configs.Frozen); err != nil {
logrus.Infof("freeze container before SetUnitProperties failed: %v", err)
targetFreezerState := configs.Undefined
if !m.cgroups.SkipDevices {
// Figure out the current freezer state, so we can revert to it after we
// temporarily freeze the container.
targetFreezerState, err = m.GetFreezerState()
if err != nil {
return err
}
if targetFreezerState == configs.Undefined {
targetFreezerState = configs.Thawed
}
if err := m.Freeze(configs.Frozen); err != nil {
logrus.Infof("freeze container before SetUnitProperties failed: %v", err)
}
}
if err := setUnitProperties(m.dbus, getUnitName(m.cgroups), properties...); err != nil {

View File

@ -15,6 +15,7 @@ import (
"sync"
"time"
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
"github.com/opencontainers/runc/libcontainer/userns"
"github.com/sirupsen/logrus"
"golang.org/x/sys/unix"
@ -87,7 +88,7 @@ func GetAllSubsystems() ([]string, error) {
// - freezer: implemented in kernel 5.2
// We assume these are always available, as it is hard to detect availability.
pseudo := []string{"devices", "freezer"}
data, err := ReadFile("/sys/fs/cgroup", "cgroup.controllers")
data, err := fscommon.ReadFile("/sys/fs/cgroup", "cgroup.controllers")
if err != nil {
return nil, err
}
@ -266,6 +267,7 @@ func RemovePaths(paths map[string]string) (err error) {
case retries - 1:
logrus.WithError(err).Error("Failed to remove cgroup")
}
}
_, err := os.Stat(p)
// We need this strange way of checking cgroups existence because
@ -374,7 +376,7 @@ func WriteCgroupProc(dir string, pid int) error {
return nil
}
file, err := OpenFile(dir, CgroupProcesses, os.O_WRONLY)
file, err := fscommon.OpenFile(dir, CgroupProcesses, os.O_WRONLY)
if err != nil {
return fmt.Errorf("failed to write %v to %v: %v", pid, CgroupProcesses, err)
}

View File

@ -13,12 +13,12 @@ const (
Thawed FreezerState = "THAWED"
)
// Cgroup holds properties of a cgroup on Linux.
type Cgroup struct {
// Name specifies the name of the cgroup
// Deprecated, use Path instead
Name string `json:"name,omitempty"`
// Parent specifies the name of parent of cgroup or slice
// name of parent of cgroup or slice
// Deprecated, use Path instead
Parent string `json:"parent,omitempty"`
// Path specifies the path to cgroups that are created and/or joined by the container.
@ -127,8 +127,8 @@ type Resources struct {
// SkipDevices allows to skip configuring device permissions.
// Used by e.g. kubelet while creating a parent cgroup (kubepods)
// common for many containers, and by runc update.
// common for many containers.
//
// NOTE it is impossible to start a container which has this flag set.
SkipDevices bool `json:"-"`
SkipDevices bool `json:"skip_devices"`
}

View File

@ -2,7 +2,7 @@
package configs
// Cgroup holds properties of a cgroup on Linux
// TODO Windows: This can ultimately be entirely factored out on Windows as
// cgroups are a Unix-specific construct.
type Cgroup struct{}
type Cgroup struct {
}

View File

@ -208,11 +208,9 @@ type Config struct {
RootlessCgroups bool `json:"rootless_cgroups,omitempty"`
}
type (
HookName string
HookList []Hook
Hooks map[HookName]HookList
)
type HookName string
type HookList []Hook
type Hooks map[HookName]HookList
const (
// Prestart commands are executed after the container namespaces are created,
@ -389,7 +387,7 @@ func (c Command) Run(s *specs.State) error {
case err := <-errC:
return err
case <-timerCh:
_ = cmd.Process.Kill()
cmd.Process.Kill()
<-errC
return fmt.Errorf("hook ran past specified timeout of %.1fs", c.Timeout.Seconds())
}

View File

@ -0,0 +1,17 @@
package configs
import "github.com/opencontainers/runc/libcontainer/devices"
type (
// Deprecated: use libcontainer/devices.Device
Device = devices.Device
// Deprecated: use libcontainer/devices.Rule
DeviceRule = devices.Rule
// Deprecated: use libcontainer/devices.Type
DeviceType = devices.Type
// Deprecated: use libcontainer/devices.Permissions
DevicePermissions = devices.Permissions
)

View File

@ -3,7 +3,7 @@ package configs
const (
// EXT_COPYUP is a directive to copy up the contents of a directory when
// a tmpfs is mounted over it.
EXT_COPYUP = 1 << iota //nolint:golint // ignore "don't use ALL_CAPS" warning
EXT_COPYUP = 1 << iota
)
type Mount struct {

View File

@ -4,4 +4,5 @@ package configs
// Namespace defines configuration for each namespace. It specifies an
// alternate path that is able to be joined via setns.
type Namespace struct{}
type Namespace struct {
}

View File

@ -50,10 +50,7 @@ type Network struct {
HairpinMode bool `json:"hairpin_mode"`
}
// Route defines a routing table entry.
//
// Routes can be specified to create entries in the routing table as the container
// is started.
// Routes can be specified to create entries in the route table as the container is started
//
// All of destination, source, and gateway should be either IPv4 or IPv6.
// One of the three options must be present, and omitted entries will use their
@ -61,15 +58,15 @@ type Network struct {
// gateway to 1.2.3.4 and the interface to eth0 will set up a standard
// destination of 0.0.0.0(or *) when viewed in the route table.
type Route struct {
// Destination specifies the destination IP address and mask in the CIDR form.
// Sets the destination and mask, should be a CIDR. Accepts IPv4 and IPv6
Destination string `json:"destination"`
// Source specifies the source IP address and mask in the CIDR form.
// Sets the source and mask, should be a CIDR. Accepts IPv4 and IPv6
Source string `json:"source"`
// Gateway specifies the gateway IP address.
// Sets the gateway. Accepts IPv4 and IPv6
Gateway string `json:"gateway"`
// InterfaceName specifies the device to set this route up for, for example eth0.
// The device to set this route up for, for example: eth0
InterfaceName string `json:"interface_name"`
}

View File

@ -12,7 +12,6 @@ import (
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/intelrdt"
selinux "github.com/opencontainers/selinux/go-selinux"
"github.com/sirupsen/logrus"
"golang.org/x/sys/unix"
)
@ -24,13 +23,13 @@ func New() Validator {
return &ConfigValidator{}
}
type ConfigValidator struct{}
type ConfigValidator struct {
}
type check func(config *configs.Config) error
func (v *ConfigValidator) Validate(config *configs.Config) error {
checks := []check{
v.cgroups,
v.rootfs,
v.network,
v.hostname,
@ -40,21 +39,17 @@ func (v *ConfigValidator) Validate(config *configs.Config) error {
v.sysctl,
v.intelrdt,
v.rootlessEUID,
v.mounts,
}
for _, c := range checks {
if err := c(config); err != nil {
return err
}
}
// Relaxed validation rules for backward compatibility
warns := []check{
v.mounts, // TODO (runc v1.x.x): make this an error instead of a warning
}
for _, c := range warns {
if err := c(config); err != nil {
logrus.WithError(err).Warnf("invalid configuration")
}
if err := v.cgroups(config); err != nil {
return err
}
return nil
}

View File

@ -9,7 +9,7 @@ import (
// mount initializes the console inside the rootfs mounting with the specified mount label
// and applying the correct ownership of the console.
func mountConsole(slavePath string) error {
oldMask := unix.Umask(0o000)
oldMask := unix.Umask(0000)
defer unix.Umask(oldMask)
f, err := os.Create("/dev/console")
if err != nil && !os.IsExist(err) {

View File

@ -437,8 +437,8 @@ func (c *linuxContainer) createExecFifo() error {
if _, err := os.Stat(fifoName); err == nil {
return fmt.Errorf("exec fifo %s already exists", fifoName)
}
oldMask := unix.Umask(0o000)
if err := unix.Mkfifo(fifoName, 0o622); err != nil {
oldMask := unix.Umask(0000)
if err := unix.Mkfifo(fifoName, 0622); err != nil {
unix.Umask(oldMask)
return err
}
@ -699,6 +699,7 @@ func (c *linuxContainer) NotifyMemoryPressure(level PressureLevel) (<-chan struc
var criuFeatures *criurpc.CriuFeatures
func (c *linuxContainer) checkCriuFeatures(criuOpts *CriuOpts, rpcOpts *criurpc.CriuOpts, criuFeat *criurpc.CriuFeatures) error {
t := criurpc.CriuReqType_FEATURE_CHECK
// make sure the features we are looking for are really not from
@ -760,6 +761,7 @@ func compareCriuVersion(criuVersion int, minVersion int) error {
// checkCriuVersion checks Criu version greater than or equal to minVersion
func (c *linuxContainer) checkCriuVersion(minVersion int) error {
// If the version of criu has already been determined there is no need
// to ask criu for the version again. Use the value from c.criuVersion.
if c.criuVersion != 0 {
@ -968,7 +970,7 @@ func (c *linuxContainer) Checkpoint(criuOpts *CriuOpts) error {
// Since a container can be C/R'ed multiple times,
// the checkpoint directory may already exist.
if err := os.Mkdir(criuOpts.ImagesDirectory, 0o700); err != nil && !os.IsExist(err) {
if err := os.Mkdir(criuOpts.ImagesDirectory, 0700); err != nil && !os.IsExist(err) {
return err
}
@ -976,7 +978,7 @@ func (c *linuxContainer) Checkpoint(criuOpts *CriuOpts) error {
criuOpts.WorkDirectory = filepath.Join(c.root, "criu.work")
}
if err := os.Mkdir(criuOpts.WorkDirectory, 0o700); err != nil && !os.IsExist(err) {
if err := os.Mkdir(criuOpts.WorkDirectory, 0700); err != nil && !os.IsExist(err) {
return err
}
@ -1046,7 +1048,7 @@ func (c *linuxContainer) Checkpoint(criuOpts *CriuOpts) error {
}
}
// pre-dump may need parentImage param to complete iterative migration
//pre-dump may need parentImage param to complete iterative migration
if criuOpts.ParentImage != "" {
rpcOpts.ParentImg = proto.String(criuOpts.ParentImage)
rpcOpts.TrackMem = proto.Bool(true)
@ -1144,7 +1146,7 @@ func (c *linuxContainer) Checkpoint(criuOpts *CriuOpts) error {
return err
}
err = ioutil.WriteFile(filepath.Join(criuOpts.ImagesDirectory, descriptorsFilename), fdsJSON, 0o600)
err = ioutil.WriteFile(filepath.Join(criuOpts.ImagesDirectory, descriptorsFilename), fdsJSON, 0600)
if err != nil {
return err
}
@ -1215,7 +1217,7 @@ func (c *linuxContainer) makeCriuRestoreMountpoints(m *configs.Mount) error {
if err := checkProcMount(c.config.Rootfs, dest, ""); err != nil {
return err
}
if err := os.MkdirAll(dest, 0o755); err != nil {
if err := os.MkdirAll(dest, 0755); err != nil {
return err
}
}
@ -1316,7 +1318,7 @@ func (c *linuxContainer) Restore(process *Process, criuOpts *CriuOpts) error {
}
// Since a container can be C/R'ed multiple times,
// the work directory may already exist.
if err := os.Mkdir(criuOpts.WorkDirectory, 0o700); err != nil && !os.IsExist(err) {
if err := os.Mkdir(criuOpts.WorkDirectory, 0700); err != nil && !os.IsExist(err) {
return err
}
workDir, err := os.Open(criuOpts.WorkDirectory)
@ -1338,7 +1340,7 @@ func (c *linuxContainer) Restore(process *Process, criuOpts *CriuOpts) error {
// c.config.Rootfs is bind-mounted to a temporary directory
// to satisfy these requirements.
root := filepath.Join(c.root, "criu-root")
if err := os.Mkdir(root, 0o755); err != nil {
if err := os.Mkdir(root, 0755); err != nil {
return err
}
defer os.Remove(root)
@ -1350,7 +1352,7 @@ func (c *linuxContainer) Restore(process *Process, criuOpts *CriuOpts) error {
if err != nil {
return err
}
defer unix.Unmount(root, unix.MNT_DETACH) //nolint: errcheck
defer unix.Unmount(root, unix.MNT_DETACH)
t := criurpc.CriuReqType_RESTORE
req := &criurpc.CriuReq{
Type: &t,
@ -1375,15 +1377,6 @@ func (c *linuxContainer) Restore(process *Process, criuOpts *CriuOpts) error {
},
}
if criuOpts.LsmProfile != "" {
// CRIU older than 3.16 has a bug which breaks the possibility
// to set a different LSM profile.
if err := c.checkCriuVersion(31600); err != nil {
return errors.New("--lsm-profile requires at least CRIU 3.16")
}
req.Opts.LsmProfile = proto.String(criuOpts.LsmProfile)
}
c.handleCriuConfigurationFile(req.Opts)
if err := c.handleRestoringNamespaces(req.Opts, &extraFiles); err != nil {
@ -1672,7 +1665,7 @@ func (c *linuxContainer) criuSwrk(process *Process, req *criurpc.CriuReq, opts *
break
}
_ = criuClientCon.CloseWrite()
criuClientCon.CloseWrite()
// cmd.Wait() waits cmd.goroutines which are used for proxying file descriptors.
// Here we want to wait only the CRIU process.
criuProcessState, err = criuProcess.Wait()

View File

@ -29,5 +29,4 @@ type CriuOpts struct {
AutoDedup bool // auto deduplication for incremental dumps
LazyPages bool // restore memory pages lazily using userfaultfd
StatusFd int // fd for feedback when lazy server is ready
LsmProfile string // LSM profile used to restore the container
}

View File

@ -11,8 +11,10 @@ import (
"golang.org/x/sys/unix"
)
// ErrNotADevice denotes that a file is not a valid linux device.
var ErrNotADevice = errors.New("not a device node")
var (
// ErrNotADevice denotes that a file is not a valid linux device.
ErrNotADevice = errors.New("not a device node")
)
// Testing dependencies
var (
@ -27,9 +29,8 @@ func mkDev(d *Rule) (uint64, error) {
return unix.Mkdev(uint32(d.Major), uint32(d.Minor)), nil
}
// DeviceFromPath takes the path to a device and its cgroup_permissions (which
// cannot be easily queried) to look up the information about a linux device
// and returns that information as a Device struct.
// Given the path to a device and its cgroup_permissions(which cannot be easily queried) look up the
// information about a linux device and return that information as a Device struct.
func DeviceFromPath(path, permissions string) (*Device, error) {
var stat unix.Stat_t
err := unixLstat(path, &stat)

View File

@ -196,11 +196,7 @@ func New(root string, options ...func(*LinuxFactory) error) (Factory, error) {
Validator: validate.New(),
CriuPath: "criu",
}
if err := Cgroupfs(l); err != nil {
return nil, err
}
Cgroupfs(l)
for _, opt := range options {
if opt == nil {
continue
@ -291,7 +287,7 @@ func (l *LinuxFactory) Load(id string) (Container, error) {
if l.Root == "" {
return nil, newGenericError(fmt.Errorf("invalid root"), ConfigInvalid)
}
// when load, we need to check id is valid or not.
//when load, we need to check id is valid or not.
if err := l.validateID(id); err != nil {
return nil, err
}

View File

@ -112,19 +112,9 @@ func populateProcessEnvironment(env []string) error {
for _, pair := range env {
p := strings.SplitN(pair, "=", 2)
if len(p) < 2 {
return fmt.Errorf("invalid environment variable: %q", pair)
return fmt.Errorf("invalid environment '%v'", pair)
}
name, val := p[0], p[1]
if name == "" {
return fmt.Errorf("environment variable name can't be empty: %q", pair)
}
if strings.IndexByte(name, 0) >= 0 {
return fmt.Errorf("environment variable name can't contain null(\\x00): %q", pair)
}
if strings.IndexByte(val, 0) >= 0 {
return fmt.Errorf("environment variable value can't contain null(\\x00): %q", pair)
}
if err := os.Setenv(name, val); err != nil {
if err := os.Setenv(p[0], p[1]); err != nil {
return err
}
}

View File

@ -1,6 +1,8 @@
package intelrdt
var cmtEnabled bool
var (
cmtEnabled bool
)
// Check if Intel RDT/CMT is enabled.
func IsCMTEnabled() bool {

View File

@ -2,8 +2,10 @@
package intelrdt
// The flag to indicate if Intel RDT/MBM is enabled
var mbmEnabled bool
var (
// The flag to indicate if Intel RDT/MBM is enabled
mbmEnabled bool
)
// Check if Intel RDT/MBM is enabled.
func IsMBMEnabled() bool {

Some files were not shown because too many files have changed in this diff Show More