Merge pull request #103483 from odinuge/revert-102508-runc-1.0

Revert "Update runc to 1.0.0"
This commit is contained in:
Kubernetes Prow Robot 2021-07-06 10:42:56 -07:00 committed by GitHub
commit eae87bfe7e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
146 changed files with 1196 additions and 2702 deletions

View File

@ -1,4 +1,4 @@
= vendor/github.com/bits-and-blooms/bitset licensed under: = = vendor/github.com/willf/bitset licensed under: =
Copyright (c) 2014 Will Fitzgerald. All rights reserved. Copyright (c) 2014 Will Fitzgerald. All rights reserved.
@ -28,4 +28,4 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
= vendor/github.com/bits-and-blooms/bitset/LICENSE 0d2b5d66dabaff0df8e1ffe191a21bd1 = vendor/github.com/willf/bitset/LICENSE 0d2b5d66dabaff0df8e1ffe191a21bd1

14
go.mod
View File

@ -67,8 +67,8 @@ require (
github.com/onsi/ginkgo v1.14.0 github.com/onsi/ginkgo v1.14.0
github.com/onsi/gomega v1.10.1 github.com/onsi/gomega v1.10.1
github.com/opencontainers/go-digest v1.0.0 github.com/opencontainers/go-digest v1.0.0
github.com/opencontainers/runc v1.0.0 github.com/opencontainers/runc v1.0.0-rc95
github.com/opencontainers/selinux v1.8.2 github.com/opencontainers/selinux v1.8.0
github.com/pkg/errors v0.9.1 github.com/pkg/errors v0.9.1
github.com/pmezard/go-difflib v1.0.0 github.com/pmezard/go-difflib v1.0.0
github.com/prometheus/client_golang v1.11.0 github.com/prometheus/client_golang v1.11.0
@ -179,7 +179,6 @@ replace (
github.com/benbjohnson/clock => github.com/benbjohnson/clock v1.0.3 github.com/benbjohnson/clock => github.com/benbjohnson/clock v1.0.3
github.com/beorn7/perks => github.com/beorn7/perks v1.0.1 github.com/beorn7/perks => github.com/beorn7/perks v1.0.1
github.com/bgentry/speakeasy => github.com/bgentry/speakeasy v0.1.0 github.com/bgentry/speakeasy => github.com/bgentry/speakeasy v0.1.0
github.com/bits-and-blooms/bitset => github.com/bits-and-blooms/bitset v1.2.0
github.com/bketelsen/crypt => github.com/bketelsen/crypt v0.0.3-0.20200106085610-5cbc8cc4026c github.com/bketelsen/crypt => github.com/bketelsen/crypt v0.0.3-0.20200106085610-5cbc8cc4026c
github.com/blang/semver => github.com/blang/semver v3.5.1+incompatible github.com/blang/semver => github.com/blang/semver v3.5.1+incompatible
github.com/boltdb/bolt => github.com/boltdb/bolt v1.3.1 github.com/boltdb/bolt => github.com/boltdb/bolt v1.3.1
@ -191,7 +190,7 @@ replace (
github.com/chzyer/logex => github.com/chzyer/logex v1.1.10 github.com/chzyer/logex => github.com/chzyer/logex v1.1.10
github.com/chzyer/readline => github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e github.com/chzyer/readline => github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e
github.com/chzyer/test => github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1 github.com/chzyer/test => github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1
github.com/cilium/ebpf => github.com/cilium/ebpf v0.6.1 github.com/cilium/ebpf => github.com/cilium/ebpf v0.5.0
github.com/clusterhq/flocker-go => github.com/clusterhq/flocker-go v0.0.0-20160920122132-2b8b7259d313 github.com/clusterhq/flocker-go => github.com/clusterhq/flocker-go v0.0.0-20160920122132-2b8b7259d313
github.com/cncf/udpa/go => github.com/cncf/udpa/go v0.0.0-20201120205902-5459f2c99403 github.com/cncf/udpa/go => github.com/cncf/udpa/go v0.0.0-20201120205902-5459f2c99403
github.com/cockroachdb/datadriven => github.com/cockroachdb/datadriven v0.0.0-20200714090401-bf6692d28da5 github.com/cockroachdb/datadriven => github.com/cockroachdb/datadriven v0.0.0-20200714090401-bf6692d28da5
@ -367,9 +366,9 @@ replace (
github.com/onsi/gomega => github.com/onsi/gomega v1.10.1 github.com/onsi/gomega => github.com/onsi/gomega v1.10.1
github.com/opencontainers/go-digest => github.com/opencontainers/go-digest v1.0.0 github.com/opencontainers/go-digest => github.com/opencontainers/go-digest v1.0.0
github.com/opencontainers/image-spec => github.com/opencontainers/image-spec v1.0.1 github.com/opencontainers/image-spec => github.com/opencontainers/image-spec v1.0.1
github.com/opencontainers/runc => github.com/opencontainers/runc v1.0.0 github.com/opencontainers/runc => github.com/opencontainers/runc v1.0.0-rc95
github.com/opencontainers/runtime-spec => github.com/opencontainers/runtime-spec v1.0.3-0.20210326190908-1c3f411f0417 github.com/opencontainers/runtime-spec => github.com/opencontainers/runtime-spec v1.0.3-0.20210326190908-1c3f411f0417
github.com/opencontainers/selinux => github.com/opencontainers/selinux v1.8.2 github.com/opencontainers/selinux => github.com/opencontainers/selinux v1.8.0
github.com/opentracing/opentracing-go => github.com/opentracing/opentracing-go v1.1.0 github.com/opentracing/opentracing-go => github.com/opentracing/opentracing-go v1.1.0
github.com/pascaldekloe/goe => github.com/pascaldekloe/goe v0.0.0-20180627143212-57f6aae5913c github.com/pascaldekloe/goe => github.com/pascaldekloe/goe v0.0.0-20180627143212-57f6aae5913c
github.com/pelletier/go-toml => github.com/pelletier/go-toml v1.2.0 github.com/pelletier/go-toml => github.com/pelletier/go-toml v1.2.0
@ -395,7 +394,7 @@ replace (
github.com/seccomp/libseccomp-golang => github.com/seccomp/libseccomp-golang v0.9.1 github.com/seccomp/libseccomp-golang => github.com/seccomp/libseccomp-golang v0.9.1
github.com/sergi/go-diff => github.com/sergi/go-diff v1.1.0 github.com/sergi/go-diff => github.com/sergi/go-diff v1.1.0
github.com/shurcooL/sanitized_anchor_name => github.com/shurcooL/sanitized_anchor_name v1.0.0 github.com/shurcooL/sanitized_anchor_name => github.com/shurcooL/sanitized_anchor_name v1.0.0
github.com/sirupsen/logrus => github.com/sirupsen/logrus v1.8.1 github.com/sirupsen/logrus => github.com/sirupsen/logrus v1.7.0
github.com/smartystreets/assertions => github.com/smartystreets/assertions v1.1.0 github.com/smartystreets/assertions => github.com/smartystreets/assertions v1.1.0
github.com/smartystreets/goconvey => github.com/smartystreets/goconvey v1.6.4 github.com/smartystreets/goconvey => github.com/smartystreets/goconvey v1.6.4
github.com/soheilhy/cmux => github.com/soheilhy/cmux v0.1.5 github.com/soheilhy/cmux => github.com/soheilhy/cmux v0.1.5
@ -417,6 +416,7 @@ replace (
github.com/vishvananda/netlink => github.com/vishvananda/netlink v1.1.0 github.com/vishvananda/netlink => github.com/vishvananda/netlink v1.1.0
github.com/vishvananda/netns => github.com/vishvananda/netns v0.0.0-20200728191858-db3c7e526aae github.com/vishvananda/netns => github.com/vishvananda/netns v0.0.0-20200728191858-db3c7e526aae
github.com/vmware/govmomi => github.com/vmware/govmomi v0.20.3 github.com/vmware/govmomi => github.com/vmware/govmomi v0.20.3
github.com/willf/bitset => github.com/willf/bitset v1.1.11
github.com/xiang90/probing => github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2 github.com/xiang90/probing => github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2
github.com/xlab/treeprint => github.com/xlab/treeprint v0.0.0-20181112141820-a009c3971eca github.com/xlab/treeprint => github.com/xlab/treeprint v0.0.0-20181112141820-a009c3971eca
github.com/yuin/goldmark => github.com/yuin/goldmark v1.3.5 github.com/yuin/goldmark => github.com/yuin/goldmark v1.3.5

20
go.sum
View File

@ -67,8 +67,6 @@ github.com/benbjohnson/clock v1.0.3/go.mod h1:bGMdMPoPVvcYyt1gHDf4J2KE153Yf9BuiU
github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
github.com/bgentry/speakeasy v0.1.0/go.mod h1:+zsyZBPWlz7T6j88CTgSN5bM796AkVf0kBD4zp0CCIs= github.com/bgentry/speakeasy v0.1.0/go.mod h1:+zsyZBPWlz7T6j88CTgSN5bM796AkVf0kBD4zp0CCIs=
github.com/bits-and-blooms/bitset v1.2.0 h1:Kn4yilvwNtMACtf1eYDlG8H77R07mZSPbMjLyS07ChA=
github.com/bits-and-blooms/bitset v1.2.0/go.mod h1:gIdJ4wp64HaoK2YrL1Q5/N7Y16edYb8uY+O0FJTyyDA=
github.com/bketelsen/crypt v0.0.3-0.20200106085610-5cbc8cc4026c/go.mod h1:MKsuJmJgSg28kpZDP6UIiPt0e0Oz0kqKNGyRaWEPv84= github.com/bketelsen/crypt v0.0.3-0.20200106085610-5cbc8cc4026c/go.mod h1:MKsuJmJgSg28kpZDP6UIiPt0e0Oz0kqKNGyRaWEPv84=
github.com/blang/semver v3.5.1+incompatible h1:cQNTCjp13qL8KC3Nbxr/y2Bqb63oX6wdnnjpJbkM4JQ= github.com/blang/semver v3.5.1+incompatible h1:cQNTCjp13qL8KC3Nbxr/y2Bqb63oX6wdnnjpJbkM4JQ=
github.com/blang/semver v3.5.1+incompatible/go.mod h1:kRBLl5iJ+tD4TcOOxsy/0fnwebNt5EWlYSAyrTnjyyk= github.com/blang/semver v3.5.1+incompatible/go.mod h1:kRBLl5iJ+tD4TcOOxsy/0fnwebNt5EWlYSAyrTnjyyk=
@ -86,8 +84,8 @@ github.com/checkpoint-restore/go-criu/v5 v5.0.0/go.mod h1:cfwC0EG7HMUenopBsUf9d8
github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI= github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI=
github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI= github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI=
github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU= github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU=
github.com/cilium/ebpf v0.6.1 h1:n6ZUOkSFi6OwcMeTCFaDQx2Onx2rEikQo69315MNbdc= github.com/cilium/ebpf v0.5.0 h1:E1KshmrMEtkMP2UjlWzfmUV1owWY+BnbL5FxxuatnrU=
github.com/cilium/ebpf v0.6.1/go.mod h1:4tRaxcgiL706VnOzHOdBlY8IEAIdxINsQBcU4xJJXRs= github.com/cilium/ebpf v0.5.0/go.mod h1:4tRaxcgiL706VnOzHOdBlY8IEAIdxINsQBcU4xJJXRs=
github.com/clusterhq/flocker-go v0.0.0-20160920122132-2b8b7259d313 h1:eIHD9GNM3Hp7kcRW5mvcz7WTR3ETeoYYKwpgA04kaXE= github.com/clusterhq/flocker-go v0.0.0-20160920122132-2b8b7259d313 h1:eIHD9GNM3Hp7kcRW5mvcz7WTR3ETeoYYKwpgA04kaXE=
github.com/clusterhq/flocker-go v0.0.0-20160920122132-2b8b7259d313/go.mod h1:P1wt9Z3DP8O6W3rvwCt0REIlshg1InHImaLW0t3ObY0= github.com/clusterhq/flocker-go v0.0.0-20160920122132-2b8b7259d313/go.mod h1:P1wt9Z3DP8O6W3rvwCt0REIlshg1InHImaLW0t3ObY0=
github.com/cncf/udpa/go v0.0.0-20201120205902-5459f2c99403/go.mod h1:WmhPx2Nbnhtbo57+VJT5O0JRkEi1Wbu0z5j0R8u5Hbk= github.com/cncf/udpa/go v0.0.0-20201120205902-5459f2c99403/go.mod h1:WmhPx2Nbnhtbo57+VJT5O0JRkEi1Wbu0z5j0R8u5Hbk=
@ -374,12 +372,12 @@ github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8
github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3IKzErnv2BNG4W4MAM= github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3IKzErnv2BNG4W4MAM=
github.com/opencontainers/image-spec v1.0.1 h1:JMemWkRwHx4Zj+fVxWoMCFm/8sYGGrUVojFA6h/TRcI= github.com/opencontainers/image-spec v1.0.1 h1:JMemWkRwHx4Zj+fVxWoMCFm/8sYGGrUVojFA6h/TRcI=
github.com/opencontainers/image-spec v1.0.1/go.mod h1:BtxoFyWECRxE4U/7sNtV5W15zMzWCbyJoFRP3s7yZA0= github.com/opencontainers/image-spec v1.0.1/go.mod h1:BtxoFyWECRxE4U/7sNtV5W15zMzWCbyJoFRP3s7yZA0=
github.com/opencontainers/runc v1.0.0 h1:QOhAQAYUlKeofuyeKdR6ITvOnXLPbEAjPMjz9wCUXcU= github.com/opencontainers/runc v1.0.0-rc95 h1:RMuWVfY3E1ILlVsC3RhIq38n4sJtlOFwU9gfFZSqrd0=
github.com/opencontainers/runc v1.0.0/go.mod h1:MU2S3KEB2ZExnhnAQYbwjdYV6HwKtDlNbA2Z2OeNDeA= github.com/opencontainers/runc v1.0.0-rc95/go.mod h1:z+bZxa/+Tz/FmYVWkhUajJdzFeOqjc5vrqskhVyHGUM=
github.com/opencontainers/runtime-spec v1.0.3-0.20210326190908-1c3f411f0417 h1:3snG66yBm59tKhhSPQrQ/0bCrv1LQbKt40LnUPiUxdc= github.com/opencontainers/runtime-spec v1.0.3-0.20210326190908-1c3f411f0417 h1:3snG66yBm59tKhhSPQrQ/0bCrv1LQbKt40LnUPiUxdc=
github.com/opencontainers/runtime-spec v1.0.3-0.20210326190908-1c3f411f0417/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0= github.com/opencontainers/runtime-spec v1.0.3-0.20210326190908-1c3f411f0417/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0=
github.com/opencontainers/selinux v1.8.2 h1:c4ca10UMgRcvZ6h0K4HtS15UaVSBEaE+iln2LVpAuGc= github.com/opencontainers/selinux v1.8.0 h1:+77ba4ar4jsCbL1GLbFL8fFM57w6suPfSS9PDLDY7KM=
github.com/opencontainers/selinux v1.8.2/go.mod h1:MUIHuUEvKB1wtJjQdOyYRgOnLD2xAPP8dBsCoU0KuF8= github.com/opencontainers/selinux v1.8.0/go.mod h1:RScLhm78qiWa2gbVCcGkC7tCGdgk3ogry1nUQF8Evvo=
github.com/opentracing/opentracing-go v1.1.0/go.mod h1:UkNAQd3GIcIGf0SeVgPpRdFStlNbqXla1AfSYxPUl2o= github.com/opentracing/opentracing-go v1.1.0/go.mod h1:UkNAQd3GIcIGf0SeVgPpRdFStlNbqXla1AfSYxPUl2o=
github.com/pascaldekloe/goe v0.0.0-20180627143212-57f6aae5913c/go.mod h1:lzWF7FIEvWOWxwDKqyGYQf6ZUaNfKdP144TG7ZOy1lc= github.com/pascaldekloe/goe v0.0.0-20180627143212-57f6aae5913c/go.mod h1:lzWF7FIEvWOWxwDKqyGYQf6ZUaNfKdP144TG7ZOy1lc=
github.com/pelletier/go-toml v1.2.0/go.mod h1:5z9KED0ma1S8pY6P1sdut58dfprrGBbd/94hg7ilaic= github.com/pelletier/go-toml v1.2.0/go.mod h1:5z9KED0ma1S8pY6P1sdut58dfprrGBbd/94hg7ilaic=
@ -421,8 +419,8 @@ github.com/sergi/go-diff v1.1.0 h1:we8PVUC3FE2uYfodKH/nBHMSetSfHDR6scGdBi+erh0=
github.com/sergi/go-diff v1.1.0/go.mod h1:STckp+ISIX8hZLjrqAeVduY0gWCT9IjLuqbuNXdaHfM= github.com/sergi/go-diff v1.1.0/go.mod h1:STckp+ISIX8hZLjrqAeVduY0gWCT9IjLuqbuNXdaHfM=
github.com/shurcooL/sanitized_anchor_name v1.0.0 h1:PdmoCO6wvbs+7yrJyMORt4/BmY5IYyJwS/kOiWx8mHo= github.com/shurcooL/sanitized_anchor_name v1.0.0 h1:PdmoCO6wvbs+7yrJyMORt4/BmY5IYyJwS/kOiWx8mHo=
github.com/shurcooL/sanitized_anchor_name v1.0.0/go.mod h1:1NzhyTcUVG4SuEtjjoZeVRXNmyL/1OwPU0+IJeTBvfc= github.com/shurcooL/sanitized_anchor_name v1.0.0/go.mod h1:1NzhyTcUVG4SuEtjjoZeVRXNmyL/1OwPU0+IJeTBvfc=
github.com/sirupsen/logrus v1.8.1 h1:dJKuHgqk1NNQlqoA6BTlM1Wf9DOH3NBjQyu0h9+AZZE= github.com/sirupsen/logrus v1.7.0 h1:ShrD1U9pZB12TX0cVy0DtePoCH97K8EtX+mg7ZARUtM=
github.com/sirupsen/logrus v1.8.1/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0= github.com/sirupsen/logrus v1.7.0/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0=
github.com/smartystreets/assertions v1.1.0/go.mod h1:tcbTF8ujkAEcZ8TElKY+i30BzYlVhC/LOxJk7iOWnoo= github.com/smartystreets/assertions v1.1.0/go.mod h1:tcbTF8ujkAEcZ8TElKY+i30BzYlVhC/LOxJk7iOWnoo=
github.com/smartystreets/goconvey v1.6.4/go.mod h1:syvi0/a8iFYH4r/RixwvyeAJjdLS9QV7WQ/tjFTllLA= github.com/smartystreets/goconvey v1.6.4/go.mod h1:syvi0/a8iFYH4r/RixwvyeAJjdLS9QV7WQ/tjFTllLA=
github.com/soheilhy/cmux v0.1.5 h1:jjzc5WVemNEDTLwv9tlmemhC73tI08BNOIGwBOo10Js= github.com/soheilhy/cmux v0.1.5 h1:jjzc5WVemNEDTLwv9tlmemhC73tI08BNOIGwBOo10Js=
@ -457,6 +455,8 @@ github.com/vishvananda/netns v0.0.0-20200728191858-db3c7e526aae h1:4hwBBUfQCFe3C
github.com/vishvananda/netns v0.0.0-20200728191858-db3c7e526aae/go.mod h1:DD4vA1DwXk04H54A1oHXtwZmA0grkVMdPxx/VGLCah0= github.com/vishvananda/netns v0.0.0-20200728191858-db3c7e526aae/go.mod h1:DD4vA1DwXk04H54A1oHXtwZmA0grkVMdPxx/VGLCah0=
github.com/vmware/govmomi v0.20.3 h1:gpw/0Ku+6RgF3jsi7fnCLmlcikBHfKBCUcu1qgc16OU= github.com/vmware/govmomi v0.20.3 h1:gpw/0Ku+6RgF3jsi7fnCLmlcikBHfKBCUcu1qgc16OU=
github.com/vmware/govmomi v0.20.3/go.mod h1:URlwyTFZX72RmxtxuaFL2Uj3fD1JTvZdx59bHWk6aFU= github.com/vmware/govmomi v0.20.3/go.mod h1:URlwyTFZX72RmxtxuaFL2Uj3fD1JTvZdx59bHWk6aFU=
github.com/willf/bitset v1.1.11 h1:N7Z7E9UvjW+sGsEl7k/SJrvY2reP1A07MrGuCjIOjRE=
github.com/willf/bitset v1.1.11/go.mod h1:83CECat5yLh5zVOf4P1ErAgKA5UDvKtgyUABdr3+MjI=
github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2 h1:eY9dn8+vbi4tKz5Qo6v2eYzo7kUS51QINcR5jNpbZS8= github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2 h1:eY9dn8+vbi4tKz5Qo6v2eYzo7kUS51QINcR5jNpbZS8=
github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2/go.mod h1:UETIi67q53MR2AWcXfiuqkDkRtnGDLqkBTpCHuJHxtU= github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2/go.mod h1:UETIi67q53MR2AWcXfiuqkDkRtnGDLqkBTpCHuJHxtU=
github.com/xlab/treeprint v0.0.0-20181112141820-a009c3971eca h1:1CFlNzQhALwjS9mBAUkycX616GzgsuYUOCHA5+HSlXI= github.com/xlab/treeprint v0.0.0-20181112141820-a009c3971eca h1:1CFlNzQhALwjS9mBAUkycX616GzgsuYUOCHA5+HSlXI=

View File

@ -33,6 +33,7 @@ import (
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon" "github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
cgroupsystemd "github.com/opencontainers/runc/libcontainer/cgroups/systemd" cgroupsystemd "github.com/opencontainers/runc/libcontainer/cgroups/systemd"
libcontainerconfigs "github.com/opencontainers/runc/libcontainer/configs" libcontainerconfigs "github.com/opencontainers/runc/libcontainer/configs"
libcontainerdevices "github.com/opencontainers/runc/libcontainer/devices"
"k8s.io/klog/v2" "k8s.io/klog/v2"
v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper" v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper"
@ -375,6 +376,15 @@ func getSupportedUnifiedControllers() sets.String {
func (m *cgroupManagerImpl) toResources(resourceConfig *ResourceConfig) *libcontainerconfigs.Resources { func (m *cgroupManagerImpl) toResources(resourceConfig *ResourceConfig) *libcontainerconfigs.Resources {
resources := &libcontainerconfigs.Resources{ resources := &libcontainerconfigs.Resources{
Devices: []*libcontainerdevices.Rule{
{
Type: 'a',
Permissions: "rwm",
Allow: true,
Minor: libcontainerdevices.Wildcard,
Major: libcontainerdevices.Wildcard,
},
},
SkipDevices: true, SkipDevices: true,
} }
if resourceConfig == nil { if resourceConfig == nil {

View File

@ -38,6 +38,7 @@ import (
utilio "k8s.io/utils/io" utilio "k8s.io/utils/io"
utilpath "k8s.io/utils/path" utilpath "k8s.io/utils/path"
libcontainerdevices "github.com/opencontainers/runc/libcontainer/devices"
v1 "k8s.io/api/core/v1" v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource" "k8s.io/apimachinery/pkg/api/resource"
utilerrors "k8s.io/apimachinery/pkg/util/errors" utilerrors "k8s.io/apimachinery/pkg/util/errors"
@ -392,6 +393,15 @@ func createManager(containerName string) (cgroups.Manager, error) {
Parent: "/", Parent: "/",
Name: containerName, Name: containerName,
Resources: &configs.Resources{ Resources: &configs.Resources{
Devices: []*libcontainerdevices.Rule{
{
Type: 'a',
Permissions: "rwm",
Allow: true,
Minor: libcontainerdevices.Wildcard,
Major: libcontainerdevices.Wildcard,
},
},
SkipDevices: true, SkipDevices: true,
}, },
} }

View File

@ -28,6 +28,7 @@ import (
"github.com/opencontainers/runc/libcontainer/cgroups" "github.com/opencontainers/runc/libcontainer/cgroups"
cgroupfs "github.com/opencontainers/runc/libcontainer/cgroups/fs" cgroupfs "github.com/opencontainers/runc/libcontainer/cgroups/fs"
"github.com/opencontainers/runc/libcontainer/configs" "github.com/opencontainers/runc/libcontainer/configs"
libcontainerdevices "github.com/opencontainers/runc/libcontainer/devices"
utilversion "k8s.io/apimachinery/pkg/util/version" utilversion "k8s.io/apimachinery/pkg/util/version"
"k8s.io/apimachinery/pkg/util/wait" "k8s.io/apimachinery/pkg/util/wait"
"k8s.io/klog/v2" "k8s.io/klog/v2"
@ -126,6 +127,15 @@ func createCgroupManager(name string) (cgroups.Manager, error) {
Memory: int64(memoryLimit), Memory: int64(memoryLimit),
MemorySwap: -1, MemorySwap: -1,
SkipDevices: true, SkipDevices: true,
Devices: []*libcontainerdevices.Rule{
{
Minor: libcontainerdevices.Wildcard,
Major: libcontainerdevices.Wildcard,
Type: 'a',
Permissions: "rwm",
Allow: true,
},
},
}, },
} }
return cgroupfs.NewManager(cg, nil, false), nil return cgroupfs.NewManager(cg, nil, false), nil

View File

@ -383,9 +383,8 @@ github.com/shurcooL/sanitized_anchor_name v1.0.0/go.mod h1:1NzhyTcUVG4SuEtjjoZeV
github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo= github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo=
github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE= github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE=
github.com/sirupsen/logrus v1.6.0/go.mod h1:7uNnSEd1DgxDLC74fIahvMZmmYsHGZGEOFrfsX/uA88= github.com/sirupsen/logrus v1.6.0/go.mod h1:7uNnSEd1DgxDLC74fIahvMZmmYsHGZGEOFrfsX/uA88=
github.com/sirupsen/logrus v1.7.0 h1:ShrD1U9pZB12TX0cVy0DtePoCH97K8EtX+mg7ZARUtM=
github.com/sirupsen/logrus v1.7.0/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0= github.com/sirupsen/logrus v1.7.0/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0=
github.com/sirupsen/logrus v1.8.1 h1:dJKuHgqk1NNQlqoA6BTlM1Wf9DOH3NBjQyu0h9+AZZE=
github.com/sirupsen/logrus v1.8.1/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0=
github.com/smartystreets/assertions v0.0.0-20180927180507-b2de0cb4f26d/go.mod h1:OnSkiWE9lh6wB0YB77sQom3nweQdgAjqCqsofrRNTgc= github.com/smartystreets/assertions v0.0.0-20180927180507-b2de0cb4f26d/go.mod h1:OnSkiWE9lh6wB0YB77sQom3nweQdgAjqCqsofrRNTgc=
github.com/smartystreets/goconvey v1.6.4/go.mod h1:syvi0/a8iFYH4r/RixwvyeAJjdLS9QV7WQ/tjFTllLA= github.com/smartystreets/goconvey v1.6.4/go.mod h1:syvi0/a8iFYH4r/RixwvyeAJjdLS9QV7WQ/tjFTllLA=
github.com/soheilhy/cmux v0.1.4/go.mod h1:IM3LyeVVIOuxMH7sFAkER9+bJ4dT7Ms6E4xg4kGIyLM= github.com/soheilhy/cmux v0.1.4/go.mod h1:IM3LyeVVIOuxMH7sFAkER9+bJ4dT7Ms6E4xg4kGIyLM=

View File

@ -23,7 +23,6 @@ require (
github.com/hashicorp/golang-lru v0.5.1 github.com/hashicorp/golang-lru v0.5.1
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822
github.com/pquerna/cachecontrol v0.0.0-20171018203845-0dec1b30a021 // indirect github.com/pquerna/cachecontrol v0.0.0-20171018203845-0dec1b30a021 // indirect
github.com/sirupsen/logrus v1.8.1 // indirect
github.com/spf13/pflag v1.0.5 github.com/spf13/pflag v1.0.5
github.com/stretchr/testify v1.7.0 github.com/stretchr/testify v1.7.0
go.etcd.io/etcd/api/v3 v3.5.0 go.etcd.io/etcd/api/v3 v3.5.0

View File

@ -377,9 +377,8 @@ github.com/shurcooL/sanitized_anchor_name v1.0.0/go.mod h1:1NzhyTcUVG4SuEtjjoZeV
github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo= github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo=
github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE= github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE=
github.com/sirupsen/logrus v1.6.0/go.mod h1:7uNnSEd1DgxDLC74fIahvMZmmYsHGZGEOFrfsX/uA88= github.com/sirupsen/logrus v1.6.0/go.mod h1:7uNnSEd1DgxDLC74fIahvMZmmYsHGZGEOFrfsX/uA88=
github.com/sirupsen/logrus v1.7.0 h1:ShrD1U9pZB12TX0cVy0DtePoCH97K8EtX+mg7ZARUtM=
github.com/sirupsen/logrus v1.7.0/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0= github.com/sirupsen/logrus v1.7.0/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0=
github.com/sirupsen/logrus v1.8.1 h1:dJKuHgqk1NNQlqoA6BTlM1Wf9DOH3NBjQyu0h9+AZZE=
github.com/sirupsen/logrus v1.8.1/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0=
github.com/smartystreets/assertions v0.0.0-20180927180507-b2de0cb4f26d/go.mod h1:OnSkiWE9lh6wB0YB77sQom3nweQdgAjqCqsofrRNTgc= github.com/smartystreets/assertions v0.0.0-20180927180507-b2de0cb4f26d/go.mod h1:OnSkiWE9lh6wB0YB77sQom3nweQdgAjqCqsofrRNTgc=
github.com/smartystreets/goconvey v1.6.4/go.mod h1:syvi0/a8iFYH4r/RixwvyeAJjdLS9QV7WQ/tjFTllLA= github.com/smartystreets/goconvey v1.6.4/go.mod h1:syvi0/a8iFYH4r/RixwvyeAJjdLS9QV7WQ/tjFTllLA=
github.com/soheilhy/cmux v0.1.4/go.mod h1:IM3LyeVVIOuxMH7sFAkER9+bJ4dT7Ms6E4xg4kGIyLM= github.com/soheilhy/cmux v0.1.4/go.mod h1:IM3LyeVVIOuxMH7sFAkER9+bJ4dT7Ms6E4xg4kGIyLM=

View File

@ -372,9 +372,8 @@ github.com/shurcooL/sanitized_anchor_name v1.0.0/go.mod h1:1NzhyTcUVG4SuEtjjoZeV
github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo= github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo=
github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE= github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE=
github.com/sirupsen/logrus v1.6.0/go.mod h1:7uNnSEd1DgxDLC74fIahvMZmmYsHGZGEOFrfsX/uA88= github.com/sirupsen/logrus v1.6.0/go.mod h1:7uNnSEd1DgxDLC74fIahvMZmmYsHGZGEOFrfsX/uA88=
github.com/sirupsen/logrus v1.7.0 h1:ShrD1U9pZB12TX0cVy0DtePoCH97K8EtX+mg7ZARUtM=
github.com/sirupsen/logrus v1.7.0/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0= github.com/sirupsen/logrus v1.7.0/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0=
github.com/sirupsen/logrus v1.8.1 h1:dJKuHgqk1NNQlqoA6BTlM1Wf9DOH3NBjQyu0h9+AZZE=
github.com/sirupsen/logrus v1.8.1/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0=
github.com/smartystreets/assertions v0.0.0-20180927180507-b2de0cb4f26d/go.mod h1:OnSkiWE9lh6wB0YB77sQom3nweQdgAjqCqsofrRNTgc= github.com/smartystreets/assertions v0.0.0-20180927180507-b2de0cb4f26d/go.mod h1:OnSkiWE9lh6wB0YB77sQom3nweQdgAjqCqsofrRNTgc=
github.com/smartystreets/goconvey v1.6.4/go.mod h1:syvi0/a8iFYH4r/RixwvyeAJjdLS9QV7WQ/tjFTllLA= github.com/smartystreets/goconvey v1.6.4/go.mod h1:syvi0/a8iFYH4r/RixwvyeAJjdLS9QV7WQ/tjFTllLA=
github.com/soheilhy/cmux v0.1.4/go.mod h1:IM3LyeVVIOuxMH7sFAkER9+bJ4dT7Ms6E4xg4kGIyLM= github.com/soheilhy/cmux v0.1.4/go.mod h1:IM3LyeVVIOuxMH7sFAkER9+bJ4dT7Ms6E4xg4kGIyLM=

View File

@ -480,9 +480,8 @@ github.com/shurcooL/sanitized_anchor_name v1.0.0/go.mod h1:1NzhyTcUVG4SuEtjjoZeV
github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo= github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo=
github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE= github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE=
github.com/sirupsen/logrus v1.6.0/go.mod h1:7uNnSEd1DgxDLC74fIahvMZmmYsHGZGEOFrfsX/uA88= github.com/sirupsen/logrus v1.6.0/go.mod h1:7uNnSEd1DgxDLC74fIahvMZmmYsHGZGEOFrfsX/uA88=
github.com/sirupsen/logrus v1.7.0 h1:ShrD1U9pZB12TX0cVy0DtePoCH97K8EtX+mg7ZARUtM=
github.com/sirupsen/logrus v1.7.0/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0= github.com/sirupsen/logrus v1.7.0/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0=
github.com/sirupsen/logrus v1.8.1 h1:dJKuHgqk1NNQlqoA6BTlM1Wf9DOH3NBjQyu0h9+AZZE=
github.com/sirupsen/logrus v1.8.1/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0=
github.com/smartystreets/assertions v0.0.0-20180927180507-b2de0cb4f26d/go.mod h1:OnSkiWE9lh6wB0YB77sQom3nweQdgAjqCqsofrRNTgc= github.com/smartystreets/assertions v0.0.0-20180927180507-b2de0cb4f26d/go.mod h1:OnSkiWE9lh6wB0YB77sQom3nweQdgAjqCqsofrRNTgc=
github.com/smartystreets/goconvey v1.6.4/go.mod h1:syvi0/a8iFYH4r/RixwvyeAJjdLS9QV7WQ/tjFTllLA= github.com/smartystreets/goconvey v1.6.4/go.mod h1:syvi0/a8iFYH4r/RixwvyeAJjdLS9QV7WQ/tjFTllLA=
github.com/soheilhy/cmux v0.1.4/go.mod h1:IM3LyeVVIOuxMH7sFAkER9+bJ4dT7Ms6E4xg4kGIyLM= github.com/soheilhy/cmux v0.1.4/go.mod h1:IM3LyeVVIOuxMH7sFAkER9+bJ4dT7Ms6E4xg4kGIyLM=

View File

@ -373,9 +373,8 @@ github.com/shurcooL/sanitized_anchor_name v1.0.0/go.mod h1:1NzhyTcUVG4SuEtjjoZeV
github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo= github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo=
github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE= github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE=
github.com/sirupsen/logrus v1.6.0/go.mod h1:7uNnSEd1DgxDLC74fIahvMZmmYsHGZGEOFrfsX/uA88= github.com/sirupsen/logrus v1.6.0/go.mod h1:7uNnSEd1DgxDLC74fIahvMZmmYsHGZGEOFrfsX/uA88=
github.com/sirupsen/logrus v1.7.0 h1:ShrD1U9pZB12TX0cVy0DtePoCH97K8EtX+mg7ZARUtM=
github.com/sirupsen/logrus v1.7.0/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0= github.com/sirupsen/logrus v1.7.0/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0=
github.com/sirupsen/logrus v1.8.1 h1:dJKuHgqk1NNQlqoA6BTlM1Wf9DOH3NBjQyu0h9+AZZE=
github.com/sirupsen/logrus v1.8.1/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0=
github.com/smartystreets/assertions v0.0.0-20180927180507-b2de0cb4f26d/go.mod h1:OnSkiWE9lh6wB0YB77sQom3nweQdgAjqCqsofrRNTgc= github.com/smartystreets/assertions v0.0.0-20180927180507-b2de0cb4f26d/go.mod h1:OnSkiWE9lh6wB0YB77sQom3nweQdgAjqCqsofrRNTgc=
github.com/smartystreets/goconvey v1.6.4/go.mod h1:syvi0/a8iFYH4r/RixwvyeAJjdLS9QV7WQ/tjFTllLA= github.com/smartystreets/goconvey v1.6.4/go.mod h1:syvi0/a8iFYH4r/RixwvyeAJjdLS9QV7WQ/tjFTllLA=
github.com/soheilhy/cmux v0.1.4/go.mod h1:IM3LyeVVIOuxMH7sFAkER9+bJ4dT7Ms6E4xg4kGIyLM= github.com/soheilhy/cmux v0.1.4/go.mod h1:IM3LyeVVIOuxMH7sFAkER9+bJ4dT7Ms6E4xg4kGIyLM=

View File

@ -324,7 +324,6 @@ github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPx
github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE= github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE=
github.com/sirupsen/logrus v1.6.0/go.mod h1:7uNnSEd1DgxDLC74fIahvMZmmYsHGZGEOFrfsX/uA88= github.com/sirupsen/logrus v1.6.0/go.mod h1:7uNnSEd1DgxDLC74fIahvMZmmYsHGZGEOFrfsX/uA88=
github.com/sirupsen/logrus v1.7.0/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0= github.com/sirupsen/logrus v1.7.0/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0=
github.com/sirupsen/logrus v1.8.1/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0=
github.com/smartystreets/assertions v0.0.0-20180927180507-b2de0cb4f26d/go.mod h1:OnSkiWE9lh6wB0YB77sQom3nweQdgAjqCqsofrRNTgc= github.com/smartystreets/assertions v0.0.0-20180927180507-b2de0cb4f26d/go.mod h1:OnSkiWE9lh6wB0YB77sQom3nweQdgAjqCqsofrRNTgc=
github.com/smartystreets/goconvey v1.6.4/go.mod h1:syvi0/a8iFYH4r/RixwvyeAJjdLS9QV7WQ/tjFTllLA= github.com/smartystreets/goconvey v1.6.4/go.mod h1:syvi0/a8iFYH4r/RixwvyeAJjdLS9QV7WQ/tjFTllLA=
github.com/soheilhy/cmux v0.1.4/go.mod h1:IM3LyeVVIOuxMH7sFAkER9+bJ4dT7Ms6E4xg4kGIyLM= github.com/soheilhy/cmux v0.1.4/go.mod h1:IM3LyeVVIOuxMH7sFAkER9+bJ4dT7Ms6E4xg4kGIyLM=

View File

@ -380,7 +380,6 @@ github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPx
github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE= github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE=
github.com/sirupsen/logrus v1.6.0/go.mod h1:7uNnSEd1DgxDLC74fIahvMZmmYsHGZGEOFrfsX/uA88= github.com/sirupsen/logrus v1.6.0/go.mod h1:7uNnSEd1DgxDLC74fIahvMZmmYsHGZGEOFrfsX/uA88=
github.com/sirupsen/logrus v1.7.0/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0= github.com/sirupsen/logrus v1.7.0/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0=
github.com/sirupsen/logrus v1.8.1/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0=
github.com/smartystreets/assertions v0.0.0-20180927180507-b2de0cb4f26d/go.mod h1:OnSkiWE9lh6wB0YB77sQom3nweQdgAjqCqsofrRNTgc= github.com/smartystreets/assertions v0.0.0-20180927180507-b2de0cb4f26d/go.mod h1:OnSkiWE9lh6wB0YB77sQom3nweQdgAjqCqsofrRNTgc=
github.com/smartystreets/goconvey v1.6.4/go.mod h1:syvi0/a8iFYH4r/RixwvyeAJjdLS9QV7WQ/tjFTllLA= github.com/smartystreets/goconvey v1.6.4/go.mod h1:syvi0/a8iFYH4r/RixwvyeAJjdLS9QV7WQ/tjFTllLA=
github.com/soheilhy/cmux v0.1.4/go.mod h1:IM3LyeVVIOuxMH7sFAkER9+bJ4dT7Ms6E4xg4kGIyLM= github.com/soheilhy/cmux v0.1.4/go.mod h1:IM3LyeVVIOuxMH7sFAkER9+bJ4dT7Ms6E4xg4kGIyLM=

View File

@ -336,7 +336,6 @@ github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPx
github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE= github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE=
github.com/sirupsen/logrus v1.6.0/go.mod h1:7uNnSEd1DgxDLC74fIahvMZmmYsHGZGEOFrfsX/uA88= github.com/sirupsen/logrus v1.6.0/go.mod h1:7uNnSEd1DgxDLC74fIahvMZmmYsHGZGEOFrfsX/uA88=
github.com/sirupsen/logrus v1.7.0/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0= github.com/sirupsen/logrus v1.7.0/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0=
github.com/sirupsen/logrus v1.8.1/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0=
github.com/smartystreets/assertions v0.0.0-20180927180507-b2de0cb4f26d/go.mod h1:OnSkiWE9lh6wB0YB77sQom3nweQdgAjqCqsofrRNTgc= github.com/smartystreets/assertions v0.0.0-20180927180507-b2de0cb4f26d/go.mod h1:OnSkiWE9lh6wB0YB77sQom3nweQdgAjqCqsofrRNTgc=
github.com/smartystreets/goconvey v1.6.4/go.mod h1:syvi0/a8iFYH4r/RixwvyeAJjdLS9QV7WQ/tjFTllLA= github.com/smartystreets/goconvey v1.6.4/go.mod h1:syvi0/a8iFYH4r/RixwvyeAJjdLS9QV7WQ/tjFTllLA=
github.com/soheilhy/cmux v0.1.4/go.mod h1:IM3LyeVVIOuxMH7sFAkER9+bJ4dT7Ms6E4xg4kGIyLM= github.com/soheilhy/cmux v0.1.4/go.mod h1:IM3LyeVVIOuxMH7sFAkER9+bJ4dT7Ms6E4xg4kGIyLM=

View File

@ -370,9 +370,8 @@ github.com/shurcooL/sanitized_anchor_name v1.0.0/go.mod h1:1NzhyTcUVG4SuEtjjoZeV
github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo= github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo=
github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE= github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE=
github.com/sirupsen/logrus v1.6.0/go.mod h1:7uNnSEd1DgxDLC74fIahvMZmmYsHGZGEOFrfsX/uA88= github.com/sirupsen/logrus v1.6.0/go.mod h1:7uNnSEd1DgxDLC74fIahvMZmmYsHGZGEOFrfsX/uA88=
github.com/sirupsen/logrus v1.7.0 h1:ShrD1U9pZB12TX0cVy0DtePoCH97K8EtX+mg7ZARUtM=
github.com/sirupsen/logrus v1.7.0/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0= github.com/sirupsen/logrus v1.7.0/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0=
github.com/sirupsen/logrus v1.8.1 h1:dJKuHgqk1NNQlqoA6BTlM1Wf9DOH3NBjQyu0h9+AZZE=
github.com/sirupsen/logrus v1.8.1/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0=
github.com/smartystreets/assertions v0.0.0-20180927180507-b2de0cb4f26d/go.mod h1:OnSkiWE9lh6wB0YB77sQom3nweQdgAjqCqsofrRNTgc= github.com/smartystreets/assertions v0.0.0-20180927180507-b2de0cb4f26d/go.mod h1:OnSkiWE9lh6wB0YB77sQom3nweQdgAjqCqsofrRNTgc=
github.com/smartystreets/goconvey v1.6.4/go.mod h1:syvi0/a8iFYH4r/RixwvyeAJjdLS9QV7WQ/tjFTllLA= github.com/smartystreets/goconvey v1.6.4/go.mod h1:syvi0/a8iFYH4r/RixwvyeAJjdLS9QV7WQ/tjFTllLA=
github.com/soheilhy/cmux v0.1.4/go.mod h1:IM3LyeVVIOuxMH7sFAkER9+bJ4dT7Ms6E4xg4kGIyLM= github.com/soheilhy/cmux v0.1.4/go.mod h1:IM3LyeVVIOuxMH7sFAkER9+bJ4dT7Ms6E4xg4kGIyLM=

View File

@ -1,3 +0,0 @@
module github.com/bits-and-blooms/bitset
go 1.14

View File

@ -18,23 +18,6 @@ reason about the proposed changes.
## Running the tests ## Running the tests
Many of the tests require privileges to set resource limits and load eBPF code. Many of the tests require privileges to set resource limits and load eBPF code.
The easiest way to obtain these is to run the tests with `sudo`. The easiest way to obtain these is to run the tests with `sudo`:
To test the current package with your local kernel you can simply run:
```
go test -exec sudo ./...
```
To test the current package with a different kernel version you can use the [run-tests.sh](run-tests.sh) script.
It requires [virtme](https://github.com/amluto/virtme) and qemu to be installed.
Examples:
```bash
# Run all tests on a 5.4 kernel
./run-tests.sh 5.4
# Run a subset of tests:
./run-tests.sh 5.4 go test ./link
```
sudo go test ./...

View File

@ -1,7 +1,7 @@
# The development version of clang is distributed as the 'clang' binary, # The development version of clang is distributed as the 'clang' binary,
# while stable/released versions have a version number attached. # while stable/released versions have a version number attached.
# Pin the default clang to a stable version. # Pin the default clang to a stable version.
CLANG ?= clang-12 CLANG ?= clang-11
CFLAGS := -target bpf -O2 -g -Wall -Werror $(CFLAGS) CFLAGS := -target bpf -O2 -g -Wall -Werror $(CFLAGS)
# Obtain an absolute path to the directory of the Makefile. # Obtain an absolute path to the directory of the Makefile.
@ -17,7 +17,7 @@ VERSION := $(shell cat ${REPODIR}/testdata/docker/VERSION)
TARGETS := \ TARGETS := \
testdata/loader-clang-7 \ testdata/loader-clang-7 \
testdata/loader-clang-9 \ testdata/loader-clang-9 \
testdata/loader-$(CLANG) \ testdata/loader-clang-11 \
testdata/invalid_map \ testdata/invalid_map \
testdata/raw_tracepoint \ testdata/raw_tracepoint \
testdata/invalid_map_static \ testdata/invalid_map_static \
@ -33,7 +33,6 @@ TARGETS := \
docker-all: docker-all:
docker run --rm --user "${UIDGID}" \ docker run --rm --user "${UIDGID}" \
-v "${REPODIR}":/ebpf -w /ebpf --env MAKEFLAGS \ -v "${REPODIR}":/ebpf -w /ebpf --env MAKEFLAGS \
--env CFLAGS="-fdebug-prefix-map=/ebpf=." \
"${IMAGE}:${VERSION}" \ "${IMAGE}:${VERSION}" \
make all make all
@ -48,8 +47,6 @@ clean:
-$(RM) internal/btf/testdata/*.elf -$(RM) internal/btf/testdata/*.elf
all: $(addsuffix -el.elf,$(TARGETS)) $(addsuffix -eb.elf,$(TARGETS)) all: $(addsuffix -el.elf,$(TARGETS)) $(addsuffix -eb.elf,$(TARGETS))
ln -srf testdata/loader-$(CLANG)-el.elf testdata/loader-el.elf
ln -srf testdata/loader-$(CLANG)-eb.elf testdata/loader-eb.elf
testdata/loader-%-el.elf: testdata/loader.c testdata/loader-%-el.elf: testdata/loader.c
$* $(CFLAGS) -mlittle-endian -c $< -o $@ $* $(CFLAGS) -mlittle-endian -c $< -o $@

View File

@ -57,7 +57,7 @@ func (ins *Instruction) Unmarshal(r io.Reader, bo binary.ByteOrder) (uint64, err
return 0, fmt.Errorf("can't unmarshal registers: %s", err) return 0, fmt.Errorf("can't unmarshal registers: %s", err)
} }
if !bi.OpCode.IsDWordLoad() { if !bi.OpCode.isDWordLoad() {
return InstructionSize, nil return InstructionSize, nil
} }
@ -80,7 +80,7 @@ func (ins Instruction) Marshal(w io.Writer, bo binary.ByteOrder) (uint64, error)
return 0, errors.New("invalid opcode") return 0, errors.New("invalid opcode")
} }
isDWordLoad := ins.OpCode.IsDWordLoad() isDWordLoad := ins.OpCode.isDWordLoad()
cons := int32(ins.Constant) cons := int32(ins.Constant)
if isDWordLoad { if isDWordLoad {
@ -123,7 +123,7 @@ func (ins Instruction) Marshal(w io.Writer, bo binary.ByteOrder) (uint64, error)
// //
// Returns an error if the instruction doesn't load a map. // Returns an error if the instruction doesn't load a map.
func (ins *Instruction) RewriteMapPtr(fd int) error { func (ins *Instruction) RewriteMapPtr(fd int) error {
if !ins.OpCode.IsDWordLoad() { if !ins.OpCode.isDWordLoad() {
return fmt.Errorf("%s is not a 64 bit load", ins.OpCode) return fmt.Errorf("%s is not a 64 bit load", ins.OpCode)
} }
@ -138,19 +138,15 @@ func (ins *Instruction) RewriteMapPtr(fd int) error {
return nil return nil
} }
// MapPtr returns the map fd for this instruction. func (ins *Instruction) mapPtr() uint32 {
// return uint32(uint64(ins.Constant) & math.MaxUint32)
// The result is undefined if the instruction is not a load from a map,
// see IsLoadFromMap.
func (ins *Instruction) MapPtr() int {
return int(int32(uint64(ins.Constant) & math.MaxUint32))
} }
// RewriteMapOffset changes the offset of a direct load from a map. // RewriteMapOffset changes the offset of a direct load from a map.
// //
// Returns an error if the instruction is not a direct load. // Returns an error if the instruction is not a direct load.
func (ins *Instruction) RewriteMapOffset(offset uint32) error { func (ins *Instruction) RewriteMapOffset(offset uint32) error {
if !ins.OpCode.IsDWordLoad() { if !ins.OpCode.isDWordLoad() {
return fmt.Errorf("%s is not a 64 bit load", ins.OpCode) return fmt.Errorf("%s is not a 64 bit load", ins.OpCode)
} }
@ -167,10 +163,10 @@ func (ins *Instruction) mapOffset() uint32 {
return uint32(uint64(ins.Constant) >> 32) return uint32(uint64(ins.Constant) >> 32)
} }
// IsLoadFromMap returns true if the instruction loads from a map. // isLoadFromMap returns true if the instruction loads from a map.
// //
// This covers both loading the map pointer and direct map value loads. // This covers both loading the map pointer and direct map value loads.
func (ins *Instruction) IsLoadFromMap() bool { func (ins *Instruction) isLoadFromMap() bool {
return ins.OpCode == LoadImmOp(DWord) && (ins.Src == PseudoMapFD || ins.Src == PseudoMapValue) return ins.OpCode == LoadImmOp(DWord) && (ins.Src == PseudoMapFD || ins.Src == PseudoMapValue)
} }
@ -181,12 +177,6 @@ func (ins *Instruction) IsFunctionCall() bool {
return ins.OpCode.JumpOp() == Call && ins.Src == PseudoCall return ins.OpCode.JumpOp() == Call && ins.Src == PseudoCall
} }
// IsConstantLoad returns true if the instruction loads a constant of the
// given size.
func (ins *Instruction) IsConstantLoad(size Size) bool {
return ins.OpCode == LoadImmOp(size) && ins.Src == R0 && ins.Offset == 0
}
// Format implements fmt.Formatter. // Format implements fmt.Formatter.
func (ins Instruction) Format(f fmt.State, c rune) { func (ins Instruction) Format(f fmt.State, c rune) {
if c != 'v' { if c != 'v' {
@ -207,8 +197,8 @@ func (ins Instruction) Format(f fmt.State, c rune) {
return return
} }
if ins.IsLoadFromMap() { if ins.isLoadFromMap() {
fd := ins.MapPtr() fd := int32(ins.mapPtr())
switch ins.Src { switch ins.Src {
case PseudoMapFD: case PseudoMapFD:
fmt.Fprintf(f, "LoadMapPtr dst: %s fd: %d", ins.Dst, fd) fmt.Fprintf(f, "LoadMapPtr dst: %s fd: %d", ins.Dst, fd)
@ -413,7 +403,7 @@ func (insns Instructions) Marshal(w io.Writer, bo binary.ByteOrder) error {
func (insns Instructions) Tag(bo binary.ByteOrder) (string, error) { func (insns Instructions) Tag(bo binary.ByteOrder) (string, error) {
h := sha1.New() h := sha1.New()
for i, ins := range insns { for i, ins := range insns {
if ins.IsLoadFromMap() { if ins.isLoadFromMap() {
ins.Constant = 0 ins.Constant = 0
} }
_, err := ins.Marshal(h, bo) _, err := ins.Marshal(h, bo)

View File

@ -111,7 +111,7 @@ func LoadMapPtr(dst Register, fd int) Instruction {
OpCode: LoadImmOp(DWord), OpCode: LoadImmOp(DWord),
Dst: dst, Dst: dst,
Src: PseudoMapFD, Src: PseudoMapFD,
Constant: int64(uint32(fd)), Constant: int64(fd),
} }
} }

View File

@ -69,13 +69,13 @@ const InvalidOpCode OpCode = 0xff
// rawInstructions returns the number of BPF instructions required // rawInstructions returns the number of BPF instructions required
// to encode this opcode. // to encode this opcode.
func (op OpCode) rawInstructions() int { func (op OpCode) rawInstructions() int {
if op.IsDWordLoad() { if op.isDWordLoad() {
return 2 return 2
} }
return 1 return 1
} }
func (op OpCode) IsDWordLoad() bool { func (op OpCode) isDWordLoad() bool {
return op == LoadImmOp(DWord) return op == LoadImmOp(DWord)
} }

View File

@ -3,7 +3,6 @@ package ebpf
import ( import (
"errors" "errors"
"fmt" "fmt"
"io"
"math" "math"
"reflect" "reflect"
"strings" "strings"
@ -90,8 +89,8 @@ func (cs *CollectionSpec) RewriteMaps(maps map[string]*Map) error {
// //
// The constant must be defined like so in the C program: // The constant must be defined like so in the C program:
// //
// volatile const type foobar; // static volatile const type foobar;
// volatile const type foobar = default; // static volatile const type foobar = default;
// //
// Replacement values must be of the same length as the C sizeof(type). // Replacement values must be of the same length as the C sizeof(type).
// If necessary, they are marshalled according to the same rules as // If necessary, they are marshalled according to the same rules as
@ -270,21 +269,11 @@ func NewCollectionWithOptions(spec *CollectionSpec, opts CollectionOptions) (*Co
}, nil }, nil
} }
type handleCache struct { type btfHandleCache map[*btf.Spec]*btf.Handle
btfHandles map[*btf.Spec]*btf.Handle
btfSpecs map[io.ReaderAt]*btf.Spec
}
func newHandleCache() *handleCache { func (btfs btfHandleCache) load(spec *btf.Spec) (*btf.Handle, error) {
return &handleCache{ if btfs[spec] != nil {
btfHandles: make(map[*btf.Spec]*btf.Handle), return btfs[spec], nil
btfSpecs: make(map[io.ReaderAt]*btf.Spec),
}
}
func (hc handleCache) btfHandle(spec *btf.Spec) (*btf.Handle, error) {
if hc.btfHandles[spec] != nil {
return hc.btfHandles[spec], nil
} }
handle, err := btf.NewHandle(spec) handle, err := btf.NewHandle(spec)
@ -292,30 +281,14 @@ func (hc handleCache) btfHandle(spec *btf.Spec) (*btf.Handle, error) {
return nil, err return nil, err
} }
hc.btfHandles[spec] = handle btfs[spec] = handle
return handle, nil return handle, nil
} }
func (hc handleCache) btfSpec(rd io.ReaderAt) (*btf.Spec, error) { func (btfs btfHandleCache) close() {
if hc.btfSpecs[rd] != nil { for _, handle := range btfs {
return hc.btfSpecs[rd], nil
}
spec, err := btf.LoadSpecFromReader(rd)
if err != nil {
return nil, err
}
hc.btfSpecs[rd] = spec
return spec, nil
}
func (hc handleCache) close() {
for _, handle := range hc.btfHandles {
handle.Close() handle.Close()
} }
hc.btfHandles = nil
hc.btfSpecs = nil
} }
func lazyLoadCollection(coll *CollectionSpec, opts *CollectionOptions) ( func lazyLoadCollection(coll *CollectionSpec, opts *CollectionOptions) (
@ -327,12 +300,12 @@ func lazyLoadCollection(coll *CollectionSpec, opts *CollectionOptions) (
var ( var (
maps = make(map[string]*Map) maps = make(map[string]*Map)
progs = make(map[string]*Program) progs = make(map[string]*Program)
handles = newHandleCache() btfs = make(btfHandleCache)
skipMapsAndProgs = false skipMapsAndProgs = false
) )
cleanup = func() { cleanup = func() {
handles.close() btfs.close()
if skipMapsAndProgs { if skipMapsAndProgs {
return return
@ -362,7 +335,7 @@ func lazyLoadCollection(coll *CollectionSpec, opts *CollectionOptions) (
return nil, fmt.Errorf("missing map %s", mapName) return nil, fmt.Errorf("missing map %s", mapName)
} }
m, err := newMapWithOptions(mapSpec, opts.Maps, handles) m, err := newMapWithOptions(mapSpec, opts.Maps, btfs)
if err != nil { if err != nil {
return nil, fmt.Errorf("map %s: %w", mapName, err) return nil, fmt.Errorf("map %s: %w", mapName, err)
} }
@ -387,7 +360,7 @@ func lazyLoadCollection(coll *CollectionSpec, opts *CollectionOptions) (
for i := range progSpec.Instructions { for i := range progSpec.Instructions {
ins := &progSpec.Instructions[i] ins := &progSpec.Instructions[i]
if !ins.IsLoadFromMap() || ins.Reference == "" { if ins.OpCode != asm.LoadImmOp(asm.DWord) || ins.Reference == "" {
continue continue
} }
@ -399,7 +372,7 @@ func lazyLoadCollection(coll *CollectionSpec, opts *CollectionOptions) (
m, err := loadMap(ins.Reference) m, err := loadMap(ins.Reference)
if err != nil { if err != nil {
return nil, fmt.Errorf("program %s: %w", progName, err) return nil, fmt.Errorf("program %s: %s", progName, err)
} }
fd := m.FD() fd := m.FD()
@ -411,7 +384,7 @@ func lazyLoadCollection(coll *CollectionSpec, opts *CollectionOptions) (
} }
} }
prog, err := newProgramWithOptions(progSpec, opts.Programs, handles) prog, err := newProgramWithOptions(progSpec, opts.Programs, btfs)
if err != nil { if err != nil {
return nil, fmt.Errorf("program %s: %w", progName, err) return nil, fmt.Errorf("program %s: %w", progName, err)
} }
@ -561,7 +534,7 @@ func assignValues(to interface{}, valueOf func(reflect.Type, string) (reflect.Va
} }
if err != nil { if err != nil {
return fmt.Errorf("field %s: %w", field.Name, err) return fmt.Errorf("field %s: %s", field.Name, err)
} }
} }

View File

@ -96,7 +96,7 @@ func LoadCollectionSpecFromReader(rd io.ReaderAt) (*CollectionSpec, error) {
} }
btfSpec, err := btf.LoadSpecFromReader(rd) btfSpec, err := btf.LoadSpecFromReader(rd)
if err != nil && !errors.Is(err, btf.ErrNotFound) { if err != nil {
return nil, fmt.Errorf("load BTF: %w", err) return nil, fmt.Errorf("load BTF: %w", err)
} }
@ -159,7 +159,7 @@ func LoadCollectionSpecFromReader(rd io.ReaderAt) (*CollectionSpec, error) {
} }
if target.Flags&elf.SHF_STRINGS > 0 { if target.Flags&elf.SHF_STRINGS > 0 {
return nil, fmt.Errorf("section %q: string is not stack allocated: %w", section.Name, ErrNotSupported) return nil, fmt.Errorf("section %q: string %q is not stack allocated: %w", section.Name, rel.Name, ErrNotSupported)
} }
target.references++ target.references++
@ -374,25 +374,17 @@ func (ec *elfCode) relocateInstruction(ins *asm.Instruction, rel elf.Symbol) err
} }
case dataSection: case dataSection:
var offset uint32
switch typ { switch typ {
case elf.STT_SECTION: case elf.STT_SECTION:
if bind != elf.STB_LOCAL { if bind != elf.STB_LOCAL {
return fmt.Errorf("direct load: %s: unsupported relocation %s", name, bind) return fmt.Errorf("direct load: %s: unsupported relocation %s", name, bind)
} }
// This is really a reference to a static symbol, which clang doesn't
// emit a symbol table entry for. Instead it encodes the offset in
// the instruction itself.
offset = uint32(uint64(ins.Constant))
case elf.STT_OBJECT: case elf.STT_OBJECT:
if bind != elf.STB_GLOBAL { if bind != elf.STB_GLOBAL {
return fmt.Errorf("direct load: %s: unsupported relocation %s", name, bind) return fmt.Errorf("direct load: %s: unsupported relocation %s", name, bind)
} }
offset = uint32(rel.Value)
default: default:
return fmt.Errorf("incorrect relocation type %v for direct map load", typ) return fmt.Errorf("incorrect relocation type %v for direct map load", typ)
} }
@ -402,8 +394,10 @@ func (ec *elfCode) relocateInstruction(ins *asm.Instruction, rel elf.Symbol) err
// it's not clear how to encode that into Instruction. // it's not clear how to encode that into Instruction.
name = target.Name name = target.Name
// The kernel expects the offset in the second basic BPF instruction. // For some reason, clang encodes the offset of the symbol its
ins.Constant = int64(uint64(offset) << 32) // section in the first basic BPF instruction, while the kernel
// expects it in the second one.
ins.Constant <<= 32
ins.Src = asm.PseudoMapValue ins.Src = asm.PseudoMapValue
// Mark the instruction as needing an update when creating the // Mark the instruction as needing an update when creating the
@ -497,38 +491,33 @@ func (ec *elfCode) loadMaps(maps map[string]*MapSpec) error {
return fmt.Errorf("section %s: missing symbol for map at offset %d", sec.Name, offset) return fmt.Errorf("section %s: missing symbol for map at offset %d", sec.Name, offset)
} }
mapName := mapSym.Name if maps[mapSym.Name] != nil {
if maps[mapName] != nil {
return fmt.Errorf("section %v: map %v already exists", sec.Name, mapSym) return fmt.Errorf("section %v: map %v already exists", sec.Name, mapSym)
} }
lr := io.LimitReader(r, int64(size)) lr := io.LimitReader(r, int64(size))
spec := MapSpec{ spec := MapSpec{
Name: SanitizeName(mapName, -1), Name: SanitizeName(mapSym.Name, -1),
} }
switch { switch {
case binary.Read(lr, ec.ByteOrder, &spec.Type) != nil: case binary.Read(lr, ec.ByteOrder, &spec.Type) != nil:
return fmt.Errorf("map %s: missing type", mapName) return fmt.Errorf("map %v: missing type", mapSym)
case binary.Read(lr, ec.ByteOrder, &spec.KeySize) != nil: case binary.Read(lr, ec.ByteOrder, &spec.KeySize) != nil:
return fmt.Errorf("map %s: missing key size", mapName) return fmt.Errorf("map %v: missing key size", mapSym)
case binary.Read(lr, ec.ByteOrder, &spec.ValueSize) != nil: case binary.Read(lr, ec.ByteOrder, &spec.ValueSize) != nil:
return fmt.Errorf("map %s: missing value size", mapName) return fmt.Errorf("map %v: missing value size", mapSym)
case binary.Read(lr, ec.ByteOrder, &spec.MaxEntries) != nil: case binary.Read(lr, ec.ByteOrder, &spec.MaxEntries) != nil:
return fmt.Errorf("map %s: missing max entries", mapName) return fmt.Errorf("map %v: missing max entries", mapSym)
case binary.Read(lr, ec.ByteOrder, &spec.Flags) != nil: case binary.Read(lr, ec.ByteOrder, &spec.Flags) != nil:
return fmt.Errorf("map %s: missing flags", mapName) return fmt.Errorf("map %v: missing flags", mapSym)
} }
if _, err := io.Copy(internal.DiscardZeroes{}, lr); err != nil { if _, err := io.Copy(internal.DiscardZeroes{}, lr); err != nil {
return fmt.Errorf("map %s: unknown and non-zero fields in definition", mapName) return fmt.Errorf("map %v: unknown and non-zero fields in definition", mapSym)
} }
if err := spec.clampPerfEventArraySize(); err != nil { maps[mapSym.Name] = &spec
return fmt.Errorf("map %s: %w", mapName, err)
}
maps[mapName] = &spec
} }
} }
@ -576,10 +565,6 @@ func (ec *elfCode) loadBTFMaps(maps map[string]*MapSpec) error {
return fmt.Errorf("map %v: %w", name, err) return fmt.Errorf("map %v: %w", name, err)
} }
if err := mapSpec.clampPerfEventArraySize(); err != nil {
return fmt.Errorf("map %v: %w", name, err)
}
maps[name] = mapSpec maps[name] = mapSpec
} }
} }
@ -862,8 +847,6 @@ func getProgType(sectionName string) (ProgramType, AttachType, uint32, string) {
"uretprobe/": {Kprobe, AttachNone, 0}, "uretprobe/": {Kprobe, AttachNone, 0},
"tracepoint/": {TracePoint, AttachNone, 0}, "tracepoint/": {TracePoint, AttachNone, 0},
"raw_tracepoint/": {RawTracepoint, AttachNone, 0}, "raw_tracepoint/": {RawTracepoint, AttachNone, 0},
"raw_tp/": {RawTracepoint, AttachNone, 0},
"tp_btf/": {Tracing, AttachTraceRawTp, 0},
"xdp": {XDP, AttachNone, 0}, "xdp": {XDP, AttachNone, 0},
"perf_event": {PerfEvent, AttachNone, 0}, "perf_event": {PerfEvent, AttachNone, 0},
"lwt_in": {LWTIn, AttachNone, 0}, "lwt_in": {LWTIn, AttachNone, 0},

View File

@ -35,7 +35,7 @@ type Spec struct {
namedTypes map[string][]namedType namedTypes map[string][]namedType
funcInfos map[string]extInfo funcInfos map[string]extInfo
lineInfos map[string]extInfo lineInfos map[string]extInfo
coreRelos map[string]coreRelos coreRelos map[string]bpfCoreRelos
byteOrder binary.ByteOrder byteOrder binary.ByteOrder
} }
@ -53,7 +53,7 @@ type btfHeader struct {
// LoadSpecFromReader reads BTF sections from an ELF. // LoadSpecFromReader reads BTF sections from an ELF.
// //
// Returns ErrNotFound if the reader contains no BTF. // Returns a nil Spec and no error if no BTF was present.
func LoadSpecFromReader(rd io.ReaderAt) (*Spec, error) { func LoadSpecFromReader(rd io.ReaderAt) (*Spec, error) {
file, err := internal.NewSafeELFFile(rd) file, err := internal.NewSafeELFFile(rd)
if err != nil { if err != nil {
@ -67,7 +67,7 @@ func LoadSpecFromReader(rd io.ReaderAt) (*Spec, error) {
} }
if btfSection == nil { if btfSection == nil {
return nil, fmt.Errorf("btf: %w", ErrNotFound) return nil, nil
} }
symbols, err := file.Symbols() symbols, err := file.Symbols()
@ -438,13 +438,13 @@ func (s *Spec) Program(name string, length uint64) (*Program, error) {
funcInfos, funcOK := s.funcInfos[name] funcInfos, funcOK := s.funcInfos[name]
lineInfos, lineOK := s.lineInfos[name] lineInfos, lineOK := s.lineInfos[name]
relos, coreOK := s.coreRelos[name] coreRelos, coreOK := s.coreRelos[name]
if !funcOK && !lineOK && !coreOK { if !funcOK && !lineOK && !coreOK {
return nil, fmt.Errorf("no extended BTF info for section %s", name) return nil, fmt.Errorf("no extended BTF info for section %s", name)
} }
return &Program{s, length, funcInfos, lineInfos, relos}, nil return &Program{s, length, funcInfos, lineInfos, coreRelos}, nil
} }
// Datasec returns the BTF required to create maps which represent data sections. // Datasec returns the BTF required to create maps which represent data sections.
@ -491,8 +491,7 @@ func (s *Spec) FindType(name string, typ Type) error {
return fmt.Errorf("type %s: %w", name, ErrNotFound) return fmt.Errorf("type %s: %w", name, ErrNotFound)
} }
cpy, _ := copyType(candidate, nil) value := reflect.Indirect(reflect.ValueOf(copyType(candidate)))
value := reflect.Indirect(reflect.ValueOf(cpy))
reflect.Indirect(reflect.ValueOf(typ)).Set(value) reflect.Indirect(reflect.ValueOf(typ)).Set(value)
return nil return nil
} }
@ -607,7 +606,7 @@ type Program struct {
spec *Spec spec *Spec
length uint64 length uint64
funcInfos, lineInfos extInfo funcInfos, lineInfos extInfo
coreRelos coreRelos coreRelos bpfCoreRelos
} }
// ProgramSpec returns the Spec needed for loading function and line infos into the kernel. // ProgramSpec returns the Spec needed for loading function and line infos into the kernel.
@ -666,23 +665,16 @@ func ProgramLineInfos(s *Program) (recordSize uint32, bytes []byte, err error) {
return s.lineInfos.recordSize, bytes, nil return s.lineInfos.recordSize, bytes, nil
} }
// ProgramFixups returns the changes required to adjust the program to the target. // ProgramRelocations returns the CO-RE relocations required to adjust the
// program to the target.
// //
// This is a free function instead of a method to hide it from users // This is a free function instead of a method to hide it from users
// of package ebpf. // of package ebpf.
func ProgramFixups(s *Program, target *Spec) (COREFixups, error) { func ProgramRelocations(s *Program, target *Spec) (map[uint64]Relocation, error) {
if len(s.coreRelos) == 0 { if len(s.coreRelos) == 0 {
return nil, nil return nil, nil
} }
if target == nil {
var err error
target, err = LoadKernelSpec()
if err != nil {
return nil, err
}
}
return coreRelocate(s.spec, target, s.coreRelos) return coreRelocate(s.spec, target, s.coreRelos)
} }

View File

@ -3,160 +3,43 @@ package btf
import ( import (
"errors" "errors"
"fmt" "fmt"
"math"
"reflect" "reflect"
"sort"
"strconv" "strconv"
"strings" "strings"
"github.com/cilium/ebpf/asm"
) )
// Code in this file is derived from libbpf, which is available under a BSD // Code in this file is derived from libbpf, which is available under a BSD
// 2-Clause license. // 2-Clause license.
// COREFixup is the result of computing a CO-RE relocation for a target. // Relocation describes a CO-RE relocation.
type COREFixup struct { type Relocation struct {
Kind COREKind Current uint32
Local uint32 New uint32
Target uint32
Poison bool
} }
func (f COREFixup) equal(other COREFixup) bool { func (r Relocation) equal(other Relocation) bool {
return f.Local == other.Local && f.Target == other.Target return r.Current == other.Current && r.New == other.New
} }
func (f COREFixup) String() string { // coreReloKind is the type of CO-RE relocation
if f.Poison { type coreReloKind uint32
return fmt.Sprintf("%s=poison", f.Kind)
}
return fmt.Sprintf("%s=%d->%d", f.Kind, f.Local, f.Target)
}
func (f COREFixup) apply(ins *asm.Instruction) error {
if f.Poison {
return errors.New("can't poison individual instruction")
}
switch class := ins.OpCode.Class(); class {
case asm.LdXClass, asm.StClass, asm.StXClass:
if want := int16(f.Local); want != ins.Offset {
return fmt.Errorf("invalid offset %d, expected %d", ins.Offset, want)
}
if f.Target > math.MaxInt16 {
return fmt.Errorf("offset %d exceeds MaxInt16", f.Target)
}
ins.Offset = int16(f.Target)
case asm.LdClass:
if !ins.IsConstantLoad(asm.DWord) {
return fmt.Errorf("not a dword-sized immediate load")
}
if want := int64(f.Local); want != ins.Constant {
return fmt.Errorf("invalid immediate %d, expected %d", ins.Constant, want)
}
ins.Constant = int64(f.Target)
case asm.ALUClass:
if ins.OpCode.ALUOp() == asm.Swap {
return fmt.Errorf("relocation against swap")
}
fallthrough
case asm.ALU64Class:
if src := ins.OpCode.Source(); src != asm.ImmSource {
return fmt.Errorf("invalid source %s", src)
}
if want := int64(f.Local); want != ins.Constant {
return fmt.Errorf("invalid immediate %d, expected %d", ins.Constant, want)
}
if f.Target > math.MaxInt32 {
return fmt.Errorf("immediate %d exceeds MaxInt32", f.Target)
}
ins.Constant = int64(f.Target)
default:
return fmt.Errorf("invalid class %s", class)
}
return nil
}
func (f COREFixup) isNonExistant() bool {
return f.Kind.checksForExistence() && f.Target == 0
}
type COREFixups map[uint64]COREFixup
// Apply a set of CO-RE relocations to a BPF program.
func (fs COREFixups) Apply(insns asm.Instructions) (asm.Instructions, error) {
if len(fs) == 0 {
cpy := make(asm.Instructions, len(insns))
copy(cpy, insns)
return insns, nil
}
cpy := make(asm.Instructions, 0, len(insns))
iter := insns.Iterate()
for iter.Next() {
fixup, ok := fs[iter.Offset.Bytes()]
if !ok {
cpy = append(cpy, *iter.Ins)
continue
}
ins := *iter.Ins
if fixup.Poison {
const badRelo = asm.BuiltinFunc(0xbad2310)
cpy = append(cpy, badRelo.Call())
if ins.OpCode.IsDWordLoad() {
// 64 bit constant loads occupy two raw bpf instructions, so
// we need to add another instruction as padding.
cpy = append(cpy, badRelo.Call())
}
continue
}
if err := fixup.apply(&ins); err != nil {
return nil, fmt.Errorf("instruction %d, offset %d: %s: %w", iter.Index, iter.Offset.Bytes(), fixup.Kind, err)
}
cpy = append(cpy, ins)
}
return cpy, nil
}
// COREKind is the type of CO-RE relocation
type COREKind uint32
const ( const (
reloFieldByteOffset COREKind = iota /* field byte offset */ reloFieldByteOffset coreReloKind = iota /* field byte offset */
reloFieldByteSize /* field size in bytes */ reloFieldByteSize /* field size in bytes */
reloFieldExists /* field existence in target kernel */ reloFieldExists /* field existence in target kernel */
reloFieldSigned /* field signedness (0 - unsigned, 1 - signed) */ reloFieldSigned /* field signedness (0 - unsigned, 1 - signed) */
reloFieldLShiftU64 /* bitfield-specific left bitshift */ reloFieldLShiftU64 /* bitfield-specific left bitshift */
reloFieldRShiftU64 /* bitfield-specific right bitshift */ reloFieldRShiftU64 /* bitfield-specific right bitshift */
reloTypeIDLocal /* type ID in local BPF object */ reloTypeIDLocal /* type ID in local BPF object */
reloTypeIDTarget /* type ID in target kernel */ reloTypeIDTarget /* type ID in target kernel */
reloTypeExists /* type existence in target kernel */ reloTypeExists /* type existence in target kernel */
reloTypeSize /* type size in bytes */ reloTypeSize /* type size in bytes */
reloEnumvalExists /* enum value existence in target kernel */ reloEnumvalExists /* enum value existence in target kernel */
reloEnumvalValue /* enum value integer value */ reloEnumvalValue /* enum value integer value */
) )
func (k COREKind) String() string { func (k coreReloKind) String() string {
switch k { switch k {
case reloFieldByteOffset: case reloFieldByteOffset:
return "byte_off" return "byte_off"
@ -187,249 +70,103 @@ func (k COREKind) String() string {
} }
} }
func (k COREKind) checksForExistence() bool { func coreRelocate(local, target *Spec, coreRelos bpfCoreRelos) (map[uint64]Relocation, error) {
return k == reloEnumvalExists || k == reloTypeExists || k == reloFieldExists if target == nil {
} var err error
target, err = loadKernelSpec()
if err != nil {
return nil, err
}
}
func coreRelocate(local, target *Spec, relos coreRelos) (COREFixups, error) {
if local.byteOrder != target.byteOrder { if local.byteOrder != target.byteOrder {
return nil, fmt.Errorf("can't relocate %s against %s", local.byteOrder, target.byteOrder) return nil, fmt.Errorf("can't relocate %s against %s", local.byteOrder, target.byteOrder)
} }
var ids []TypeID relocations := make(map[uint64]Relocation, len(coreRelos))
relosByID := make(map[TypeID]coreRelos) for _, relo := range coreRelos {
result := make(COREFixups, len(relos)) accessorStr, err := local.strings.Lookup(relo.AccessStrOff)
for _, relo := range relos {
if relo.kind == reloTypeIDLocal {
// Filtering out reloTypeIDLocal here makes our lives a lot easier
// down the line, since it doesn't have a target at all.
if len(relo.accessor) > 1 || relo.accessor[0] != 0 {
return nil, fmt.Errorf("%s: unexpected accessor %v", relo.kind, relo.accessor)
}
result[uint64(relo.insnOff)] = COREFixup{
relo.kind,
uint32(relo.typeID),
uint32(relo.typeID),
false,
}
continue
}
relos, ok := relosByID[relo.typeID]
if !ok {
ids = append(ids, relo.typeID)
}
relosByID[relo.typeID] = append(relos, relo)
}
// Ensure we work on relocations in a deterministic order.
sort.Slice(ids, func(i, j int) bool {
return ids[i] < ids[j]
})
for _, id := range ids {
if int(id) >= len(local.types) {
return nil, fmt.Errorf("invalid type id %d", id)
}
localType := local.types[id]
named, ok := localType.(namedType)
if !ok || named.name() == "" {
return nil, fmt.Errorf("relocate unnamed or anonymous type %s: %w", localType, ErrNotSupported)
}
relos := relosByID[id]
targets := target.namedTypes[named.essentialName()]
fixups, err := coreCalculateFixups(localType, targets, relos)
if err != nil {
return nil, fmt.Errorf("relocate %s: %w", localType, err)
}
for i, relo := range relos {
result[uint64(relo.insnOff)] = fixups[i]
}
}
return result, nil
}
var errAmbiguousRelocation = errors.New("ambiguous relocation")
var errImpossibleRelocation = errors.New("impossible relocation")
// coreCalculateFixups calculates the fixups for the given relocations using
// the "best" target.
//
// The best target is determined by scoring: the less poisoning we have to do
// the better the target is.
func coreCalculateFixups(local Type, targets []namedType, relos coreRelos) ([]COREFixup, error) {
localID := local.ID()
local, err := copyType(local, skipQualifierAndTypedef)
if err != nil {
return nil, err
}
bestScore := len(relos)
var bestFixups []COREFixup
for i := range targets {
targetID := targets[i].ID()
target, err := copyType(targets[i], skipQualifierAndTypedef)
if err != nil { if err != nil {
return nil, err return nil, err
} }
score := 0 // lower is better accessor, err := parseCoreAccessor(accessorStr)
fixups := make([]COREFixup, 0, len(relos)) if err != nil {
for _, relo := range relos { return nil, fmt.Errorf("accessor %q: %s", accessorStr, err)
fixup, err := coreCalculateFixup(local, localID, target, targetID, relo)
if err != nil {
return nil, fmt.Errorf("target %s: %w", target, err)
}
if fixup.Poison || fixup.isNonExistant() {
score++
}
fixups = append(fixups, fixup)
} }
if score > bestScore { if int(relo.TypeID) >= len(local.types) {
// We have a better target already, ignore this one. return nil, fmt.Errorf("invalid type id %d", relo.TypeID)
}
typ := local.types[relo.TypeID]
if relo.ReloKind == reloTypeIDLocal {
relocations[uint64(relo.InsnOff)] = Relocation{
uint32(typ.ID()),
uint32(typ.ID()),
}
continue continue
} }
if score < bestScore { named, ok := typ.(namedType)
// This is the best target yet, use it. if !ok || named.name() == "" {
bestScore = score return nil, fmt.Errorf("relocate anonymous type %s: %w", typ.String(), ErrNotSupported)
bestFixups = fixups
continue
} }
// Some other target has the same score as the current one. Make sure name := essentialName(named.name())
// the fixups agree with each other. res, err := coreCalculateRelocation(typ, target.namedTypes[name], relo.ReloKind, accessor)
for i, fixup := range bestFixups { if err != nil {
if !fixup.equal(fixups[i]) { return nil, fmt.Errorf("relocate %s: %w", name, err)
return nil, fmt.Errorf("%s: multiple types match: %w", fixup.Kind, errAmbiguousRelocation)
}
} }
relocations[uint64(relo.InsnOff)] = res
} }
if bestFixups == nil { return relocations, nil
// Nothing at all matched, probably because there are no suitable
// targets at all. Poison everything!
bestFixups = make([]COREFixup, len(relos))
for i, relo := range relos {
bestFixups[i] = COREFixup{Kind: relo.kind, Poison: true}
}
}
return bestFixups, nil
} }
// coreCalculateFixup calculates the fixup for a single local type, target type var errAmbiguousRelocation = errors.New("ambiguous relocation")
// and relocation.
func coreCalculateFixup(local Type, localID TypeID, target Type, targetID TypeID, relo coreRelo) (COREFixup, error) {
fixup := func(local, target uint32) (COREFixup, error) {
return COREFixup{relo.kind, local, target, false}, nil
}
poison := func() (COREFixup, error) {
if relo.kind.checksForExistence() {
return fixup(1, 0)
}
return COREFixup{relo.kind, 0, 0, true}, nil
}
zero := COREFixup{}
switch relo.kind {
case reloTypeIDTarget, reloTypeSize, reloTypeExists:
if len(relo.accessor) > 1 || relo.accessor[0] != 0 {
return zero, fmt.Errorf("%s: unexpected accessor %v", relo.kind, relo.accessor)
}
err := coreAreTypesCompatible(local, target)
if errors.Is(err, errImpossibleRelocation) {
return poison()
}
if err != nil {
return zero, fmt.Errorf("relocation %s: %w", relo.kind, err)
}
switch relo.kind {
case reloTypeExists:
return fixup(1, 1)
func coreCalculateRelocation(local Type, targets []namedType, kind coreReloKind, localAccessor coreAccessor) (Relocation, error) {
var relos []Relocation
var matches []Type
for _, target := range targets {
switch kind {
case reloTypeIDTarget: case reloTypeIDTarget:
return fixup(uint32(localID), uint32(targetID)) if localAccessor[0] != 0 {
return Relocation{}, fmt.Errorf("%s: unexpected non-zero accessor", kind)
case reloTypeSize:
localSize, err := Sizeof(local)
if err != nil {
return zero, err
} }
targetSize, err := Sizeof(target) if compat, err := coreAreTypesCompatible(local, target); err != nil {
if err != nil { return Relocation{}, fmt.Errorf("%s: %s", kind, err)
return zero, err } else if !compat {
continue
} }
return fixup(uint32(localSize), uint32(targetSize)) relos = append(relos, Relocation{uint32(target.ID()), uint32(target.ID())})
default:
return Relocation{}, fmt.Errorf("relocation %s: %w", kind, ErrNotSupported)
} }
matches = append(matches, target)
}
case reloEnumvalValue, reloEnumvalExists: if len(relos) == 0 {
localValue, targetValue, err := coreFindEnumValue(local, relo.accessor, target) // TODO: Add switch for existence checks like reloEnumvalExists here.
if errors.Is(err, errImpossibleRelocation) {
return poison()
}
if err != nil {
return zero, fmt.Errorf("relocation %s: %w", relo.kind, err)
}
switch relo.kind { // TODO: This might have to be poisoned.
case reloEnumvalExists: return Relocation{}, fmt.Errorf("no relocation found, tried %v", targets)
return fixup(1, 1) }
case reloEnumvalValue:
return fixup(uint32(localValue.Value), uint32(targetValue.Value))
}
case reloFieldByteOffset, reloFieldByteSize, reloFieldExists:
if _, ok := target.(*Fwd); ok {
// We can't relocate fields using a forward declaration, so
// skip it. If a non-forward declaration is present in the BTF
// we'll find it in one of the other iterations.
return poison()
}
localField, targetField, err := coreFindField(local, relo.accessor, target)
if errors.Is(err, errImpossibleRelocation) {
return poison()
}
if err != nil {
return zero, fmt.Errorf("target %s: %w", target, err)
}
switch relo.kind {
case reloFieldExists:
return fixup(1, 1)
case reloFieldByteOffset:
return fixup(localField.offset/8, targetField.offset/8)
case reloFieldByteSize:
localSize, err := Sizeof(localField.Type)
if err != nil {
return zero, err
}
targetSize, err := Sizeof(targetField.Type)
if err != nil {
return zero, err
}
return fixup(uint32(localSize), uint32(targetSize))
relo := relos[0]
for _, altRelo := range relos[1:] {
if !altRelo.equal(relo) {
return Relocation{}, fmt.Errorf("multiple types %v match: %w", matches, errAmbiguousRelocation)
} }
} }
return zero, fmt.Errorf("relocation %s: %w", relo.kind, ErrNotSupported) return relo, nil
} }
/* coreAccessor contains a path through a struct. It contains at least one index. /* coreAccessor contains a path through a struct. It contains at least one index.
@ -482,240 +219,6 @@ func parseCoreAccessor(accessor string) (coreAccessor, error) {
return result, nil return result, nil
} }
func (ca coreAccessor) String() string {
strs := make([]string, 0, len(ca))
for _, i := range ca {
strs = append(strs, strconv.Itoa(i))
}
return strings.Join(strs, ":")
}
func (ca coreAccessor) enumValue(t Type) (*EnumValue, error) {
e, ok := t.(*Enum)
if !ok {
return nil, fmt.Errorf("not an enum: %s", t)
}
if len(ca) > 1 {
return nil, fmt.Errorf("invalid accessor %s for enum", ca)
}
i := ca[0]
if i >= len(e.Values) {
return nil, fmt.Errorf("invalid index %d for %s", i, e)
}
return &e.Values[i], nil
}
type coreField struct {
Type Type
offset uint32
}
func adjustOffset(base uint32, t Type, n int) (uint32, error) {
size, err := Sizeof(t)
if err != nil {
return 0, err
}
return base + (uint32(n) * uint32(size) * 8), nil
}
// coreFindField descends into the local type using the accessor and tries to
// find an equivalent field in target at each step.
//
// Returns the field and the offset of the field from the start of
// target in bits.
func coreFindField(local Type, localAcc coreAccessor, target Type) (_, _ coreField, _ error) {
// The first index is used to offset a pointer of the base type like
// when accessing an array.
localOffset, err := adjustOffset(0, local, localAcc[0])
if err != nil {
return coreField{}, coreField{}, err
}
targetOffset, err := adjustOffset(0, target, localAcc[0])
if err != nil {
return coreField{}, coreField{}, err
}
if err := coreAreMembersCompatible(local, target); err != nil {
return coreField{}, coreField{}, fmt.Errorf("fields: %w", err)
}
var localMaybeFlex, targetMaybeFlex bool
for _, acc := range localAcc[1:] {
switch localType := local.(type) {
case composite:
// For composite types acc is used to find the field in the local type,
// and then we try to find a field in target with the same name.
localMembers := localType.members()
if acc >= len(localMembers) {
return coreField{}, coreField{}, fmt.Errorf("invalid accessor %d for %s", acc, local)
}
localMember := localMembers[acc]
if localMember.Name == "" {
_, ok := localMember.Type.(composite)
if !ok {
return coreField{}, coreField{}, fmt.Errorf("unnamed field with type %s: %s", localMember.Type, ErrNotSupported)
}
// This is an anonymous struct or union, ignore it.
local = localMember.Type
localOffset += localMember.Offset
localMaybeFlex = false
continue
}
targetType, ok := target.(composite)
if !ok {
return coreField{}, coreField{}, fmt.Errorf("target not composite: %w", errImpossibleRelocation)
}
targetMember, last, err := coreFindMember(targetType, localMember.Name)
if err != nil {
return coreField{}, coreField{}, err
}
if targetMember.BitfieldSize > 0 {
return coreField{}, coreField{}, fmt.Errorf("field %q is a bitfield: %w", targetMember.Name, ErrNotSupported)
}
local = localMember.Type
localMaybeFlex = acc == len(localMembers)-1
localOffset += localMember.Offset
target = targetMember.Type
targetMaybeFlex = last
targetOffset += targetMember.Offset
case *Array:
// For arrays, acc is the index in the target.
targetType, ok := target.(*Array)
if !ok {
return coreField{}, coreField{}, fmt.Errorf("target not array: %w", errImpossibleRelocation)
}
if localType.Nelems == 0 && !localMaybeFlex {
return coreField{}, coreField{}, fmt.Errorf("local type has invalid flexible array")
}
if targetType.Nelems == 0 && !targetMaybeFlex {
return coreField{}, coreField{}, fmt.Errorf("target type has invalid flexible array")
}
if localType.Nelems > 0 && acc >= int(localType.Nelems) {
return coreField{}, coreField{}, fmt.Errorf("invalid access of %s at index %d", localType, acc)
}
if targetType.Nelems > 0 && acc >= int(targetType.Nelems) {
return coreField{}, coreField{}, fmt.Errorf("out of bounds access of target: %w", errImpossibleRelocation)
}
local = localType.Type
localMaybeFlex = false
localOffset, err = adjustOffset(localOffset, local, acc)
if err != nil {
return coreField{}, coreField{}, err
}
target = targetType.Type
targetMaybeFlex = false
targetOffset, err = adjustOffset(targetOffset, target, acc)
if err != nil {
return coreField{}, coreField{}, err
}
default:
return coreField{}, coreField{}, fmt.Errorf("relocate field of %T: %w", localType, ErrNotSupported)
}
if err := coreAreMembersCompatible(local, target); err != nil {
return coreField{}, coreField{}, err
}
}
return coreField{local, localOffset}, coreField{target, targetOffset}, nil
}
// coreFindMember finds a member in a composite type while handling anonymous
// structs and unions.
func coreFindMember(typ composite, name Name) (Member, bool, error) {
if name == "" {
return Member{}, false, errors.New("can't search for anonymous member")
}
type offsetTarget struct {
composite
offset uint32
}
targets := []offsetTarget{{typ, 0}}
visited := make(map[composite]bool)
for i := 0; i < len(targets); i++ {
target := targets[i]
// Only visit targets once to prevent infinite recursion.
if visited[target] {
continue
}
if len(visited) >= maxTypeDepth {
// This check is different than libbpf, which restricts the entire
// path to BPF_CORE_SPEC_MAX_LEN items.
return Member{}, false, fmt.Errorf("type is nested too deep")
}
visited[target] = true
members := target.members()
for j, member := range members {
if member.Name == name {
// NB: This is safe because member is a copy.
member.Offset += target.offset
return member, j == len(members)-1, nil
}
// The names don't match, but this member could be an anonymous struct
// or union.
if member.Name != "" {
continue
}
comp, ok := member.Type.(composite)
if !ok {
return Member{}, false, fmt.Errorf("anonymous non-composite type %T not allowed", member.Type)
}
targets = append(targets, offsetTarget{comp, target.offset + member.Offset})
}
}
return Member{}, false, fmt.Errorf("no matching member: %w", errImpossibleRelocation)
}
// coreFindEnumValue follows localAcc to find the equivalent enum value in target.
func coreFindEnumValue(local Type, localAcc coreAccessor, target Type) (localValue, targetValue *EnumValue, _ error) {
localValue, err := localAcc.enumValue(local)
if err != nil {
return nil, nil, err
}
targetEnum, ok := target.(*Enum)
if !ok {
return nil, nil, errImpossibleRelocation
}
localName := localValue.Name.essentialName()
for i, targetValue := range targetEnum.Values {
if targetValue.Name.essentialName() != localName {
continue
}
return localValue, &targetEnum.Values[i], nil
}
return nil, nil, errImpossibleRelocation
}
/* The comment below is from bpf_core_types_are_compat in libbpf.c: /* The comment below is from bpf_core_types_are_compat in libbpf.c:
* *
* Check local and target types for compatibility. This check is used for * Check local and target types for compatibility. This check is used for
@ -736,10 +239,8 @@ func coreFindEnumValue(local Type, localAcc coreAccessor, target Type) (localVal
* number of input args and compatible return and argument types. * number of input args and compatible return and argument types.
* These rules are not set in stone and probably will be adjusted as we get * These rules are not set in stone and probably will be adjusted as we get
* more experience with using BPF CO-RE relocations. * more experience with using BPF CO-RE relocations.
*
* Returns errImpossibleRelocation if types are not compatible.
*/ */
func coreAreTypesCompatible(localType Type, targetType Type) error { func coreAreTypesCompatible(localType Type, targetType Type) (bool, error) {
var ( var (
localTs, targetTs typeDeque localTs, targetTs typeDeque
l, t = &localType, &targetType l, t = &localType, &targetType
@ -748,14 +249,14 @@ func coreAreTypesCompatible(localType Type, targetType Type) error {
for ; l != nil && t != nil; l, t = localTs.shift(), targetTs.shift() { for ; l != nil && t != nil; l, t = localTs.shift(), targetTs.shift() {
if depth >= maxTypeDepth { if depth >= maxTypeDepth {
return errors.New("types are nested too deep") return false, errors.New("types are nested too deep")
} }
localType = *l localType = skipQualifierAndTypedef(*l)
targetType = *t targetType = skipQualifierAndTypedef(*t)
if reflect.TypeOf(localType) != reflect.TypeOf(targetType) { if reflect.TypeOf(localType) != reflect.TypeOf(targetType) {
return fmt.Errorf("type mismatch: %w", errImpossibleRelocation) return false, nil
} }
switch lv := (localType).(type) { switch lv := (localType).(type) {
@ -765,7 +266,7 @@ func coreAreTypesCompatible(localType Type, targetType Type) error {
case *Int: case *Int:
tv := targetType.(*Int) tv := targetType.(*Int)
if lv.isBitfield() || tv.isBitfield() { if lv.isBitfield() || tv.isBitfield() {
return fmt.Errorf("bitfield: %w", errImpossibleRelocation) return false, nil
} }
case *Pointer, *Array: case *Pointer, *Array:
@ -776,7 +277,7 @@ func coreAreTypesCompatible(localType Type, targetType Type) error {
case *FuncProto: case *FuncProto:
tv := targetType.(*FuncProto) tv := targetType.(*FuncProto)
if len(lv.Params) != len(tv.Params) { if len(lv.Params) != len(tv.Params) {
return fmt.Errorf("function param mismatch: %w", errImpossibleRelocation) return false, nil
} }
depth++ depth++
@ -784,24 +285,22 @@ func coreAreTypesCompatible(localType Type, targetType Type) error {
targetType.walk(&targetTs) targetType.walk(&targetTs)
default: default:
return fmt.Errorf("unsupported type %T", localType) return false, fmt.Errorf("unsupported type %T", localType)
} }
} }
if l != nil { if l != nil {
return fmt.Errorf("dangling local type %T", *l) return false, fmt.Errorf("dangling local type %T", *l)
} }
if t != nil { if t != nil {
return fmt.Errorf("dangling target type %T", *t) return false, fmt.Errorf("dangling target type %T", *t)
} }
return nil return true, nil
} }
/* coreAreMembersCompatible checks two types for field-based relocation compatibility. /* The comment below is from bpf_core_fields_are_compat in libbpf.c:
*
* The comment below is from bpf_core_fields_are_compat in libbpf.c:
* *
* Check two types for compatibility for the purpose of field access * Check two types for compatibility for the purpose of field access
* relocation. const/volatile/restrict and typedefs are skipped to ensure we * relocation. const/volatile/restrict and typedefs are skipped to ensure we
@ -815,63 +314,65 @@ func coreAreTypesCompatible(localType Type, targetType Type) error {
* - for INT, size and signedness are ignored; * - for INT, size and signedness are ignored;
* - for ARRAY, dimensionality is ignored, element types are checked for * - for ARRAY, dimensionality is ignored, element types are checked for
* compatibility recursively; * compatibility recursively;
* [ NB: coreAreMembersCompatible doesn't recurse, this check is done
* by coreFindField. ]
* - everything else shouldn't be ever a target of relocation. * - everything else shouldn't be ever a target of relocation.
* These rules are not set in stone and probably will be adjusted as we get * These rules are not set in stone and probably will be adjusted as we get
* more experience with using BPF CO-RE relocations. * more experience with using BPF CO-RE relocations.
*
* Returns errImpossibleRelocation if the members are not compatible.
*/ */
func coreAreMembersCompatible(localType Type, targetType Type) error { func coreAreMembersCompatible(localType Type, targetType Type) (bool, error) {
doNamesMatch := func(a, b string) error { doNamesMatch := func(a, b string) bool {
if a == "" || b == "" { if a == "" || b == "" {
// allow anonymous and named type to match // allow anonymous and named type to match
return nil return true
} }
if essentialName(a) == essentialName(b) { return essentialName(a) == essentialName(b)
return nil }
for depth := 0; depth <= maxTypeDepth; depth++ {
localType = skipQualifierAndTypedef(localType)
targetType = skipQualifierAndTypedef(targetType)
_, lok := localType.(composite)
_, tok := targetType.(composite)
if lok && tok {
return true, nil
} }
return fmt.Errorf("names don't match: %w", errImpossibleRelocation) if reflect.TypeOf(localType) != reflect.TypeOf(targetType) {
} return false, nil
_, lok := localType.(composite)
_, tok := targetType.(composite)
if lok && tok {
return nil
}
if reflect.TypeOf(localType) != reflect.TypeOf(targetType) {
return fmt.Errorf("type mismatch: %w", errImpossibleRelocation)
}
switch lv := localType.(type) {
case *Array, *Pointer:
return nil
case *Enum:
tv := targetType.(*Enum)
return doNamesMatch(lv.name(), tv.name())
case *Fwd:
tv := targetType.(*Fwd)
return doNamesMatch(lv.name(), tv.name())
case *Int:
tv := targetType.(*Int)
if lv.isBitfield() || tv.isBitfield() {
return fmt.Errorf("bitfield: %w", errImpossibleRelocation)
} }
return nil
default: switch lv := localType.(type) {
return fmt.Errorf("type %s: %w", localType, ErrNotSupported) case *Pointer:
return true, nil
case *Enum:
tv := targetType.(*Enum)
return doNamesMatch(lv.name(), tv.name()), nil
case *Fwd:
tv := targetType.(*Fwd)
return doNamesMatch(lv.name(), tv.name()), nil
case *Int:
tv := targetType.(*Int)
return !lv.isBitfield() && !tv.isBitfield(), nil
case *Array:
tv := targetType.(*Array)
localType = lv.Type
targetType = tv.Type
default:
return false, fmt.Errorf("unsupported type %T", localType)
}
} }
return false, errors.New("types are nested too deep")
} }
func skipQualifierAndTypedef(typ Type) (Type, error) { func skipQualifierAndTypedef(typ Type) Type {
result := typ result := typ
for depth := 0; depth <= maxTypeDepth; depth++ { for depth := 0; depth <= maxTypeDepth; depth++ {
switch v := (result).(type) { switch v := (result).(type) {
@ -880,8 +381,8 @@ func skipQualifierAndTypedef(typ Type) (Type, error) {
case *Typedef: case *Typedef:
result = v.Type result = v.Type
default: default:
return result, nil return result
} }
} }
return nil, errors.New("exceeded type depth") return typ
} }

View File

@ -30,7 +30,7 @@ type btfExtCoreHeader struct {
CoreReloLen uint32 CoreReloLen uint32
} }
func parseExtInfos(r io.ReadSeeker, bo binary.ByteOrder, strings stringTable) (funcInfo, lineInfo map[string]extInfo, relos map[string]coreRelos, err error) { func parseExtInfos(r io.ReadSeeker, bo binary.ByteOrder, strings stringTable) (funcInfo, lineInfo map[string]extInfo, coreRelos map[string]bpfCoreRelos, err error) {
var header btfExtHeader var header btfExtHeader
var coreHeader btfExtCoreHeader var coreHeader btfExtCoreHeader
if err := binary.Read(r, bo, &header); err != nil { if err := binary.Read(r, bo, &header); err != nil {
@ -94,13 +94,13 @@ func parseExtInfos(r io.ReadSeeker, bo binary.ByteOrder, strings stringTable) (f
return nil, nil, nil, fmt.Errorf("can't seek to CO-RE relocation section: %v", err) return nil, nil, nil, fmt.Errorf("can't seek to CO-RE relocation section: %v", err)
} }
relos, err = parseExtInfoRelos(io.LimitReader(r, int64(coreHeader.CoreReloLen)), bo, strings) coreRelos, err = parseExtInfoRelos(io.LimitReader(r, int64(coreHeader.CoreReloLen)), bo, strings)
if err != nil { if err != nil {
return nil, nil, nil, fmt.Errorf("CO-RE relocation info: %w", err) return nil, nil, nil, fmt.Errorf("CO-RE relocation info: %w", err)
} }
} }
return funcInfo, lineInfo, relos, nil return funcInfo, lineInfo, coreRelos, nil
} }
type btfExtInfoSec struct { type btfExtInfoSec struct {
@ -208,25 +208,18 @@ type bpfCoreRelo struct {
InsnOff uint32 InsnOff uint32
TypeID TypeID TypeID TypeID
AccessStrOff uint32 AccessStrOff uint32
Kind COREKind ReloKind coreReloKind
} }
type coreRelo struct { type bpfCoreRelos []bpfCoreRelo
insnOff uint32
typeID TypeID
accessor coreAccessor
kind COREKind
}
type coreRelos []coreRelo
// append two slices of extInfoRelo to each other. The InsnOff of b are adjusted // append two slices of extInfoRelo to each other. The InsnOff of b are adjusted
// by offset. // by offset.
func (r coreRelos) append(other coreRelos, offset uint64) coreRelos { func (r bpfCoreRelos) append(other bpfCoreRelos, offset uint64) bpfCoreRelos {
result := make([]coreRelo, 0, len(r)+len(other)) result := make([]bpfCoreRelo, 0, len(r)+len(other))
result = append(result, r...) result = append(result, r...)
for _, relo := range other { for _, relo := range other {
relo.insnOff += uint32(offset) relo.InsnOff += uint32(offset)
result = append(result, relo) result = append(result, relo)
} }
return result return result
@ -234,7 +227,7 @@ func (r coreRelos) append(other coreRelos, offset uint64) coreRelos {
var extInfoReloSize = binary.Size(bpfCoreRelo{}) var extInfoReloSize = binary.Size(bpfCoreRelo{})
func parseExtInfoRelos(r io.Reader, bo binary.ByteOrder, strings stringTable) (map[string]coreRelos, error) { func parseExtInfoRelos(r io.Reader, bo binary.ByteOrder, strings stringTable) (map[string]bpfCoreRelos, error) {
var recordSize uint32 var recordSize uint32
if err := binary.Read(r, bo, &recordSize); err != nil { if err := binary.Read(r, bo, &recordSize); err != nil {
return nil, fmt.Errorf("read record size: %v", err) return nil, fmt.Errorf("read record size: %v", err)
@ -244,14 +237,14 @@ func parseExtInfoRelos(r io.Reader, bo binary.ByteOrder, strings stringTable) (m
return nil, fmt.Errorf("expected record size %d, got %d", extInfoReloSize, recordSize) return nil, fmt.Errorf("expected record size %d, got %d", extInfoReloSize, recordSize)
} }
result := make(map[string]coreRelos) result := make(map[string]bpfCoreRelos)
for { for {
secName, infoHeader, err := parseExtInfoHeader(r, bo, strings) secName, infoHeader, err := parseExtInfoHeader(r, bo, strings)
if errors.Is(err, io.EOF) { if errors.Is(err, io.EOF) {
return result, nil return result, nil
} }
var relos coreRelos var relos []bpfCoreRelo
for i := uint32(0); i < infoHeader.NumInfo; i++ { for i := uint32(0); i < infoHeader.NumInfo; i++ {
var relo bpfCoreRelo var relo bpfCoreRelo
if err := binary.Read(r, bo, &relo); err != nil { if err := binary.Read(r, bo, &relo); err != nil {
@ -262,22 +255,7 @@ func parseExtInfoRelos(r io.Reader, bo binary.ByteOrder, strings stringTable) (m
return nil, fmt.Errorf("section %v: offset %v is not aligned with instruction size", secName, relo.InsnOff) return nil, fmt.Errorf("section %v: offset %v is not aligned with instruction size", secName, relo.InsnOff)
} }
accessorStr, err := strings.Lookup(relo.AccessStrOff) relos = append(relos, relo)
if err != nil {
return nil, err
}
accessor, err := parseCoreAccessor(accessorStr)
if err != nil {
return nil, fmt.Errorf("accessor %q: %s", accessorStr, err)
}
relos = append(relos, coreRelo{
relo.InsnOff,
relo.TypeID,
accessor,
relo.Kind,
})
} }
result[secName] = relos result[secName] = relos

View File

@ -1,6 +1,7 @@
package btf package btf
import ( import (
"errors"
"fmt" "fmt"
"math" "math"
"strings" "strings"
@ -36,7 +37,6 @@ type Type interface {
type namedType interface { type namedType interface {
Type Type
name() string name() string
essentialName() string
} }
// Name identifies a type. // Name identifies a type.
@ -48,10 +48,6 @@ func (n Name) name() string {
return string(n) return string(n)
} }
func (n Name) essentialName() string {
return essentialName(string(n))
}
// Void is the unit type of BTF. // Void is the unit type of BTF.
type Void struct{} type Void struct{}
@ -178,7 +174,8 @@ func (s *Struct) walk(tdq *typeDeque) {
func (s *Struct) copy() Type { func (s *Struct) copy() Type {
cpy := *s cpy := *s
cpy.Members = copyMembers(s.Members) cpy.Members = make([]Member, len(s.Members))
copy(cpy.Members, s.Members)
return &cpy return &cpy
} }
@ -209,7 +206,8 @@ func (u *Union) walk(tdq *typeDeque) {
func (u *Union) copy() Type { func (u *Union) copy() Type {
cpy := *u cpy := *u
cpy.Members = copyMembers(u.Members) cpy.Members = make([]Member, len(u.Members))
copy(cpy.Members, u.Members)
return &cpy return &cpy
} }
@ -217,12 +215,6 @@ func (u *Union) members() []Member {
return u.Members return u.Members
} }
func copyMembers(orig []Member) []Member {
cpy := make([]Member, len(orig))
copy(cpy, orig)
return cpy
}
type composite interface { type composite interface {
members() []Member members() []Member
} }
@ -519,7 +511,7 @@ func Sizeof(typ Type) (int, error) {
switch v := typ.(type) { switch v := typ.(type) {
case *Array: case *Array:
if n > 0 && int64(v.Nelems) > math.MaxInt64/n { if n > 0 && int64(v.Nelems) > math.MaxInt64/n {
return 0, fmt.Errorf("type %s: overflow", typ) return 0, errors.New("overflow")
} }
// Arrays may be of zero length, which allows // Arrays may be of zero length, which allows
@ -540,30 +532,28 @@ func Sizeof(typ Type) (int, error) {
continue continue
default: default:
return 0, fmt.Errorf("unsized type %T", typ) return 0, fmt.Errorf("unrecognized type %T", typ)
} }
if n > 0 && elem > math.MaxInt64/n { if n > 0 && elem > math.MaxInt64/n {
return 0, fmt.Errorf("type %s: overflow", typ) return 0, errors.New("overflow")
} }
size := n * elem size := n * elem
if int64(int(size)) != size { if int64(int(size)) != size {
return 0, fmt.Errorf("type %s: overflow", typ) return 0, errors.New("overflow")
} }
return int(size), nil return int(size), nil
} }
return 0, fmt.Errorf("type %s: exceeded type depth", typ) return 0, errors.New("exceeded type depth")
} }
// copy a Type recursively. // copy a Type recursively.
// //
// typ may form a cycle. // typ may form a cycle.
// func copyType(typ Type) Type {
// Returns any errors from transform verbatim.
func copyType(typ Type, transform func(Type) (Type, error)) (Type, error) {
var ( var (
copies = make(map[Type]Type) copies = make(map[Type]Type)
work typeDeque work typeDeque
@ -576,17 +566,7 @@ func copyType(typ Type, transform func(Type) (Type, error)) (Type, error) {
continue continue
} }
var cpy Type cpy := (*t).copy()
if transform != nil {
tf, err := transform(*t)
if err != nil {
return nil, fmt.Errorf("copy %s: %w", typ, err)
}
cpy = tf.copy()
} else {
cpy = (*t).copy()
}
copies[*t] = cpy copies[*t] = cpy
*t = cpy *t = cpy
@ -594,7 +574,7 @@ func copyType(typ Type, transform func(Type) (Type, error)) (Type, error) {
cpy.walk(&work) cpy.walk(&work)
} }
return typ, nil return typ
} }
// typeDeque keeps track of pointers to types which still // typeDeque keeps track of pointers to types which still

View File

@ -50,19 +50,3 @@ func (se *SafeELFFile) Symbols() (syms []elf.Symbol, err error) {
syms, err = se.File.Symbols() syms, err = se.File.Symbols()
return return
} }
// DynamicSymbols is the safe version of elf.File.DynamicSymbols.
func (se *SafeELFFile) DynamicSymbols() (syms []elf.Symbol, err error) {
defer func() {
r := recover()
if r == nil {
return
}
syms = nil
err = fmt.Errorf("reading ELF dynamic symbols panicked: %s", r)
}()
syms, err = se.File.DynamicSymbols()
return
}

View File

@ -9,16 +9,11 @@ import (
// depending on the host's endianness. // depending on the host's endianness.
var NativeEndian binary.ByteOrder var NativeEndian binary.ByteOrder
// Clang is set to either "el" or "eb" depending on the host's endianness.
var ClangEndian string
func init() { func init() {
if isBigEndian() { if isBigEndian() {
NativeEndian = binary.BigEndian NativeEndian = binary.BigEndian
ClangEndian = "eb"
} else { } else {
NativeEndian = binary.LittleEndian NativeEndian = binary.LittleEndian
ClangEndian = "el"
} }
} }

View File

@ -29,10 +29,6 @@ type VerifierError struct {
log string log string
} }
func (le *VerifierError) Unwrap() error {
return le.cause
}
func (le *VerifierError) Error() string { func (le *VerifierError) Error() string {
if le.log == "" { if le.log == "" {
return le.cause.Error() return le.cause.Error()

View File

@ -22,6 +22,10 @@ func NewSlicePointer(buf []byte) Pointer {
// NewStringPointer creates a 64-bit pointer from a string. // NewStringPointer creates a 64-bit pointer from a string.
func NewStringPointer(str string) Pointer { func NewStringPointer(str string) Pointer {
if str == "" {
return Pointer{}
}
p, err := unix.BytePtrFromString(str) p, err := unix.BytePtrFromString(str)
if err != nil { if err != nil {
return Pointer{} return Pointer{}

View File

@ -42,7 +42,6 @@ const (
PROT_READ = linux.PROT_READ PROT_READ = linux.PROT_READ
PROT_WRITE = linux.PROT_WRITE PROT_WRITE = linux.PROT_WRITE
MAP_SHARED = linux.MAP_SHARED MAP_SHARED = linux.MAP_SHARED
PERF_ATTR_SIZE_VER1 = linux.PERF_ATTR_SIZE_VER1
PERF_TYPE_SOFTWARE = linux.PERF_TYPE_SOFTWARE PERF_TYPE_SOFTWARE = linux.PERF_TYPE_SOFTWARE
PERF_TYPE_TRACEPOINT = linux.PERF_TYPE_TRACEPOINT PERF_TYPE_TRACEPOINT = linux.PERF_TYPE_TRACEPOINT
PERF_COUNT_SW_BPF_OUTPUT = linux.PERF_COUNT_SW_BPF_OUTPUT PERF_COUNT_SW_BPF_OUTPUT = linux.PERF_COUNT_SW_BPF_OUTPUT

View File

@ -43,7 +43,6 @@ const (
PROT_READ = 0x1 PROT_READ = 0x1
PROT_WRITE = 0x2 PROT_WRITE = 0x2
MAP_SHARED = 0x1 MAP_SHARED = 0x1
PERF_ATTR_SIZE_VER1 = 0
PERF_TYPE_SOFTWARE = 0x1 PERF_TYPE_SOFTWARE = 0x1
PERF_TYPE_TRACEPOINT = 0 PERF_TYPE_TRACEPOINT = 0
PERF_COUNT_SW_BPF_OUTPUT = 0xa PERF_COUNT_SW_BPF_OUTPUT = 0xa

View File

@ -1,16 +1,12 @@
package link package link
import ( import (
"bytes"
"crypto/rand" "crypto/rand"
"errors" "errors"
"fmt" "fmt"
"io/ioutil"
"os" "os"
"path/filepath" "path/filepath"
"runtime" "runtime"
"sync"
"unsafe"
"github.com/cilium/ebpf" "github.com/cilium/ebpf"
"github.com/cilium/ebpf/internal" "github.com/cilium/ebpf/internal"
@ -19,60 +15,13 @@ import (
var ( var (
kprobeEventsPath = filepath.Join(tracefsPath, "kprobe_events") kprobeEventsPath = filepath.Join(tracefsPath, "kprobe_events")
kprobeRetprobeBit = struct {
once sync.Once
value uint64
err error
}{}
) )
type probeType uint8
const (
kprobeType probeType = iota
uprobeType
)
func (pt probeType) String() string {
if pt == kprobeType {
return "kprobe"
}
return "uprobe"
}
func (pt probeType) EventsPath() string {
if pt == kprobeType {
return kprobeEventsPath
}
return uprobeEventsPath
}
func (pt probeType) PerfEventType(ret bool) perfEventType {
if pt == kprobeType {
if ret {
return kretprobeEvent
}
return kprobeEvent
}
if ret {
return uretprobeEvent
}
return uprobeEvent
}
func (pt probeType) RetprobeBit() (uint64, error) {
if pt == kprobeType {
return kretprobeBit()
}
return uretprobeBit()
}
// Kprobe attaches the given eBPF program to a perf event that fires when the // Kprobe attaches the given eBPF program to a perf event that fires when the
// given kernel symbol starts executing. See /proc/kallsyms for available // given kernel symbol starts executing. See /proc/kallsyms for available
// symbols. For example, printk(): // symbols. For example, printk():
// //
// Kprobe("printk", prog) // Kprobe("printk")
// //
// The resulting Link must be Closed during program shutdown to avoid leaking // The resulting Link must be Closed during program shutdown to avoid leaking
// system resources. // system resources.
@ -95,7 +44,7 @@ func Kprobe(symbol string, prog *ebpf.Program) (Link, error) {
// before the given kernel symbol exits, with the function stack left intact. // before the given kernel symbol exits, with the function stack left intact.
// See /proc/kallsyms for available symbols. For example, printk(): // See /proc/kallsyms for available symbols. For example, printk():
// //
// Kretprobe("printk", prog) // Kretprobe("printk")
// //
// The resulting Link must be Closed during program shutdown to avoid leaking // The resulting Link must be Closed during program shutdown to avoid leaking
// system resources. // system resources.
@ -131,10 +80,7 @@ func kprobe(symbol string, prog *ebpf.Program, ret bool) (*perfEvent, error) {
} }
// Use kprobe PMU if the kernel has it available. // Use kprobe PMU if the kernel has it available.
tp, err := pmuKprobe(platformPrefix(symbol), ret) tp, err := pmuKprobe(symbol, ret)
if errors.Is(err, os.ErrNotExist) {
tp, err = pmuKprobe(symbol, ret)
}
if err == nil { if err == nil {
return tp, nil return tp, nil
} }
@ -143,10 +89,7 @@ func kprobe(symbol string, prog *ebpf.Program, ret bool) (*perfEvent, error) {
} }
// Use tracefs if kprobe PMU is missing. // Use tracefs if kprobe PMU is missing.
tp, err = tracefsKprobe(platformPrefix(symbol), ret) tp, err = tracefsKprobe(symbol, ret)
if errors.Is(err, os.ErrNotExist) {
tp, err = tracefsKprobe(symbol, ret)
}
if err != nil { if err != nil {
return nil, fmt.Errorf("creating trace event '%s' in tracefs: %w", symbol, err) return nil, fmt.Errorf("creating trace event '%s' in tracefs: %w", symbol, err)
} }
@ -154,70 +97,36 @@ func kprobe(symbol string, prog *ebpf.Program, ret bool) (*perfEvent, error) {
return tp, nil return tp, nil
} }
// pmuKprobe opens a perf event based on the kprobe PMU. // pmuKprobe opens a perf event based on a Performance Monitoring Unit.
// Returns os.ErrNotExist if the given symbol does not exist in the kernel. // Requires at least 4.17 (e12f03d7031a "perf/core: Implement the
// 'perf_kprobe' PMU").
// Returns ErrNotSupported if the kernel doesn't support perf_kprobe PMU,
// or os.ErrNotExist if the given symbol does not exist in the kernel.
func pmuKprobe(symbol string, ret bool) (*perfEvent, error) { func pmuKprobe(symbol string, ret bool) (*perfEvent, error) {
return pmuProbe(kprobeType, symbol, "", 0, ret)
}
// pmuProbe opens a perf event based on a Performance Monitoring Unit.
//
// Requires at least a 4.17 kernel.
// e12f03d7031a "perf/core: Implement the 'perf_kprobe' PMU"
// 33ea4b24277b "perf/core: Implement the 'perf_uprobe' PMU"
//
// Returns ErrNotSupported if the kernel doesn't support perf_[k,u]probe PMU
func pmuProbe(typ probeType, symbol, path string, offset uint64, ret bool) (*perfEvent, error) {
// Getting the PMU type will fail if the kernel doesn't support // Getting the PMU type will fail if the kernel doesn't support
// the perf_[k,u]probe PMU. // the perf_kprobe PMU.
et, err := getPMUEventType(typ) et, err := getPMUEventType("kprobe")
if err != nil { if err != nil {
return nil, err return nil, err
} }
var config uint64 // Create a pointer to a NUL-terminated string for the kernel.
if ret { sp, err := unsafeStringPtr(symbol)
bit, err := typ.RetprobeBit() if err != nil {
if err != nil { return nil, err
return nil, err
}
config |= 1 << bit
} }
var ( // TODO: Parse the position of the bit from /sys/bus/event_source/devices/%s/format/retprobe.
attr unix.PerfEventAttr config := 0
sp unsafe.Pointer if ret {
) config = 1
switch typ { }
case kprobeType:
// Create a pointer to a NUL-terminated string for the kernel.
sp, err := unsafeStringPtr(symbol)
if err != nil {
return nil, err
}
attr = unix.PerfEventAttr{ attr := unix.PerfEventAttr{
Type: uint32(et), // PMU event type read from sysfs Type: uint32(et), // PMU event type read from sysfs
Ext1: uint64(uintptr(sp)), // Kernel symbol to trace Ext1: uint64(uintptr(sp)), // Kernel symbol to trace
Config: config, // Retprobe flag Config: uint64(config), // perf_kprobe PMU treats config as flags
}
case uprobeType:
sp, err := unsafeStringPtr(path)
if err != nil {
return nil, err
}
attr = unix.PerfEventAttr{
// The minimum size required for PMU uprobes is PERF_ATTR_SIZE_VER1,
// since it added the config2 (Ext2) field. The Size field controls the
// size of the internal buffer the kernel allocates for reading the
// perf_event_attr argument from userspace.
Size: unix.PERF_ATTR_SIZE_VER1,
Type: uint32(et), // PMU event type read from sysfs
Ext1: uint64(uintptr(sp)), // Uprobe path
Ext2: offset, // Uprobe offset
Config: config, // Retprobe flag
}
} }
fd, err := unix.PerfEventOpen(&attr, perfAllThreads, 0, -1, unix.PERF_FLAG_FD_CLOEXEC) fd, err := unix.PerfEventOpen(&attr, perfAllThreads, 0, -1, unix.PERF_FLAG_FD_CLOEXEC)
@ -235,27 +144,22 @@ func pmuProbe(typ probeType, symbol, path string, offset uint64, ret bool) (*per
// Ensure the string pointer is not collected before PerfEventOpen returns. // Ensure the string pointer is not collected before PerfEventOpen returns.
runtime.KeepAlive(sp) runtime.KeepAlive(sp)
// Kernel has perf_[k,u]probe PMU available, initialize perf event. // Kernel has perf_kprobe PMU available, initialize perf event.
return &perfEvent{ return &perfEvent{
fd: internal.NewFD(uint32(fd)), fd: internal.NewFD(uint32(fd)),
pmuID: et, pmuID: et,
name: symbol, name: symbol,
typ: typ.PerfEventType(ret), ret: ret,
progType: ebpf.Kprobe,
}, nil }, nil
} }
// tracefsKprobe creates a Kprobe tracefs entry. // tracefsKprobe creates a trace event by writing an entry to <tracefs>/kprobe_events.
func tracefsKprobe(symbol string, ret bool) (*perfEvent, error) {
return tracefsProbe(kprobeType, symbol, "", 0, ret)
}
// tracefsProbe creates a trace event by writing an entry to <tracefs>/[k,u]probe_events.
// A new trace event group name is generated on every call to support creating // A new trace event group name is generated on every call to support creating
// multiple trace events for the same kernel or userspace symbol. // multiple trace events for the same kernel symbol. A perf event is then opened
// Path and offset are only set in the case of uprobe(s) and are used to set // on the newly-created trace event and returned to the caller.
// the executable/library path on the filesystem and the offset where the probe is inserted. func tracefsKprobe(symbol string, ret bool) (*perfEvent, error) {
// A perf event is then opened on the newly-created trace event and returned to the caller.
func tracefsProbe(typ probeType, symbol, path string, offset uint64, ret bool) (*perfEvent, error) {
// Generate a random string for each trace event we attempt to create. // Generate a random string for each trace event we attempt to create.
// This value is used as the 'group' token in tracefs to allow creating // This value is used as the 'group' token in tracefs to allow creating
// multiple kprobe trace events with the same name. // multiple kprobe trace events with the same name.
@ -272,13 +176,14 @@ func tracefsProbe(typ probeType, symbol, path string, offset uint64, ret bool) (
if err == nil { if err == nil {
return nil, fmt.Errorf("trace event already exists: %s/%s", group, symbol) return nil, fmt.Errorf("trace event already exists: %s/%s", group, symbol)
} }
if err != nil && !errors.Is(err, os.ErrNotExist) { // The read is expected to fail with ErrNotSupported due to a non-existing event.
if err != nil && !errors.Is(err, ErrNotSupported) {
return nil, fmt.Errorf("checking trace event %s/%s: %w", group, symbol, err) return nil, fmt.Errorf("checking trace event %s/%s: %w", group, symbol, err)
} }
// Create the [k,u]probe trace event using tracefs. // Create the kprobe trace event using tracefs.
if err := createTraceFSProbeEvent(typ, group, symbol, path, offset, ret); err != nil { if err := createTraceFSKprobeEvent(group, symbol, ret); err != nil {
return nil, fmt.Errorf("creating probe entry on tracefs: %w", err) return nil, fmt.Errorf("creating kprobe event on tracefs: %w", err)
} }
// Get the newly-created trace event's id. // Get the newly-created trace event's id.
@ -297,83 +202,65 @@ func tracefsProbe(typ probeType, symbol, path string, offset uint64, ret bool) (
fd: fd, fd: fd,
group: group, group: group,
name: symbol, name: symbol,
ret: ret,
tracefsID: tid, tracefsID: tid,
typ: typ.PerfEventType(ret), progType: ebpf.Kprobe, // kernel only allows attaching kprobe programs to kprobe events
}, nil }, nil
} }
// createTraceFSProbeEvent creates a new ephemeral trace event by writing to // createTraceFSKprobeEvent creates a new ephemeral trace event by writing to
// <tracefs>/[k,u]probe_events. Returns os.ErrNotExist if symbol is not a valid // <tracefs>/kprobe_events. Returns ErrNotSupported if symbol is not a valid
// kernel symbol, or if it is not traceable with kprobes. Returns os.ErrExist // kernel symbol, or if it is not traceable with kprobes.
// if a probe with the same group and symbol already exists. func createTraceFSKprobeEvent(group, symbol string, ret bool) error {
func createTraceFSProbeEvent(typ probeType, group, symbol, path string, offset uint64, ret bool) error {
// Open the kprobe_events file in tracefs. // Open the kprobe_events file in tracefs.
f, err := os.OpenFile(typ.EventsPath(), os.O_APPEND|os.O_WRONLY, 0666) f, err := os.OpenFile(kprobeEventsPath, os.O_APPEND|os.O_WRONLY, 0666)
if err != nil { if err != nil {
return fmt.Errorf("error opening '%s': %w", typ.EventsPath(), err) return fmt.Errorf("error opening kprobe_events: %w", err)
} }
defer f.Close() defer f.Close()
var pe string // The kprobe_events syntax is as follows (see Documentation/trace/kprobetrace.txt):
switch typ { // p[:[GRP/]EVENT] [MOD:]SYM[+offs]|MEMADDR [FETCHARGS] : Set a probe
case kprobeType: // r[MAXACTIVE][:[GRP/]EVENT] [MOD:]SYM[+0] [FETCHARGS] : Set a return probe
// The kprobe_events syntax is as follows (see Documentation/trace/kprobetrace.txt): // -:[GRP/]EVENT : Clear a probe
// p[:[GRP/]EVENT] [MOD:]SYM[+offs]|MEMADDR [FETCHARGS] : Set a probe //
// r[MAXACTIVE][:[GRP/]EVENT] [MOD:]SYM[+0] [FETCHARGS] : Set a return probe // Some examples:
// -:[GRP/]EVENT : Clear a probe // r:ebpf_1234/r_my_kretprobe nf_conntrack_destroy
// // p:ebpf_5678/p_my_kprobe __x64_sys_execve
// Some examples: //
// r:ebpf_1234/r_my_kretprobe nf_conntrack_destroy // Leaving the kretprobe's MAXACTIVE set to 0 (or absent) will make the
// p:ebpf_5678/p_my_kprobe __x64_sys_execve // kernel default to NR_CPUS. This is desired in most eBPF cases since
// // subsampling or rate limiting logic can be more accurately implemented in
// Leaving the kretprobe's MAXACTIVE set to 0 (or absent) will make the // the eBPF program itself. See Documentation/kprobes.txt for more details.
// kernel default to NR_CPUS. This is desired in most eBPF cases since pe := fmt.Sprintf("%s:%s/%s %s", kprobePrefix(ret), group, symbol, symbol)
// subsampling or rate limiting logic can be more accurately implemented in
// the eBPF program itself.
// See Documentation/kprobes.txt for more details.
pe = fmt.Sprintf("%s:%s/%s %s", probePrefix(ret), group, symbol, symbol)
case uprobeType:
// The uprobe_events syntax is as follows:
// p[:[GRP/]EVENT] PATH:OFFSET [FETCHARGS] : Set a probe
// r[:[GRP/]EVENT] PATH:OFFSET [FETCHARGS] : Set a return probe
// -:[GRP/]EVENT : Clear a probe
//
// Some examples:
// r:ebpf_1234/readline /bin/bash:0x12345
// p:ebpf_5678/main_mySymbol /bin/mybin:0x12345
//
// See Documentation/trace/uprobetracer.txt for more details.
pathOffset := uprobePathOffset(path, offset)
pe = fmt.Sprintf("%s:%s/%s %s", probePrefix(ret), group, symbol, pathOffset)
}
_, err = f.WriteString(pe) _, err = f.WriteString(pe)
// Since commit 97c753e62e6c, ENOENT is correctly returned instead of EINVAL // Since commit 97c753e62e6c, ENOENT is correctly returned instead of EINVAL
// when trying to create a kretprobe for a missing symbol. Make sure ENOENT // when trying to create a kretprobe for a missing symbol. Make sure ENOENT
// is returned to the caller. // is returned to the caller.
if errors.Is(err, os.ErrNotExist) || errors.Is(err, unix.EINVAL) { if errors.Is(err, os.ErrNotExist) || errors.Is(err, unix.EINVAL) {
return fmt.Errorf("symbol %s not found: %w", symbol, os.ErrNotExist) return fmt.Errorf("kernel symbol %s not found: %w", symbol, os.ErrNotExist)
} }
if err != nil { if err != nil {
return fmt.Errorf("writing '%s' to '%s': %w", pe, typ.EventsPath(), err) return fmt.Errorf("writing '%s' to kprobe_events: %w", pe, err)
} }
return nil return nil
} }
// closeTraceFSProbeEvent removes the [k,u]probe with the given type, group and symbol // closeTraceFSKprobeEvent removes the kprobe with the given group, symbol and kind
// from <tracefs>/[k,u]probe_events. // from <tracefs>/kprobe_events.
func closeTraceFSProbeEvent(typ probeType, group, symbol string) error { func closeTraceFSKprobeEvent(group, symbol string) error {
f, err := os.OpenFile(typ.EventsPath(), os.O_APPEND|os.O_WRONLY, 0666) f, err := os.OpenFile(kprobeEventsPath, os.O_APPEND|os.O_WRONLY, 0666)
if err != nil { if err != nil {
return fmt.Errorf("error opening %s: %w", typ.EventsPath(), err) return fmt.Errorf("error opening kprobe_events: %w", err)
} }
defer f.Close() defer f.Close()
// See [k,u]probe_events syntax above. The probe type does not need to be specified // See kprobe_events syntax above. Kprobe type does not need to be specified
// for removals. // for removals.
pe := fmt.Sprintf("-:%s/%s", group, symbol) pe := fmt.Sprintf("-:%s/%s", group, symbol)
if _, err = f.WriteString(pe); err != nil { if _, err = f.WriteString(pe); err != nil {
return fmt.Errorf("writing '%s' to '%s': %w", pe, typ.EventsPath(), err) return fmt.Errorf("writing '%s' to kprobe_events: %w", pe, err)
} }
return nil return nil
@ -401,38 +288,9 @@ func randomGroup(prefix string) (string, error) {
return group, nil return group, nil
} }
func probePrefix(ret bool) string { func kprobePrefix(ret bool) string {
if ret { if ret {
return "r" return "r"
} }
return "p" return "p"
} }
// determineRetprobeBit reads a Performance Monitoring Unit's retprobe bit
// from /sys/bus/event_source/devices/<pmu>/format/retprobe.
func determineRetprobeBit(typ probeType) (uint64, error) {
p := filepath.Join("/sys/bus/event_source/devices/", typ.String(), "/format/retprobe")
data, err := ioutil.ReadFile(p)
if err != nil {
return 0, err
}
var rp uint64
n, err := fmt.Sscanf(string(bytes.TrimSpace(data)), "config:%d", &rp)
if err != nil {
return 0, fmt.Errorf("parse retprobe bit: %w", err)
}
if n != 1 {
return 0, fmt.Errorf("parse retprobe bit: expected 1 item, got %d", n)
}
return rp, nil
}
func kretprobeBit() (uint64, error) {
kprobeRetprobeBit.once.Do(func() {
kprobeRetprobeBit.value, kprobeRetprobeBit.err = determineRetprobeBit(kprobeType)
})
return kprobeRetprobeBit.value, kprobeRetprobeBit.err
}

View File

@ -31,10 +31,6 @@ import (
// exported kernel symbols. kprobe-based (tracefs) trace events can be // exported kernel symbols. kprobe-based (tracefs) trace events can be
// created system-wide by writing to the <tracefs>/kprobe_events file, or // created system-wide by writing to the <tracefs>/kprobe_events file, or
// they can be scoped to the current process by creating PMU perf events. // they can be scoped to the current process by creating PMU perf events.
// - u(ret)probe: Ephemeral trace events based on user provides ELF binaries
// and offsets. uprobe-based (tracefs) trace events can be
// created system-wide by writing to the <tracefs>/uprobe_events file, or
// they can be scoped to the current process by creating PMU perf events.
// - perf event: An object instantiated based on an existing trace event or // - perf event: An object instantiated based on an existing trace event or
// kernel symbol. Referred to by fd in userspace. // kernel symbol. Referred to by fd in userspace.
// Exactly one eBPF program can be attached to a perf event. Multiple perf // Exactly one eBPF program can be attached to a perf event. Multiple perf
@ -56,16 +52,6 @@ const (
perfAllThreads = -1 perfAllThreads = -1
) )
type perfEventType uint8
const (
tracepointEvent perfEventType = iota
kprobeEvent
kretprobeEvent
uprobeEvent
uretprobeEvent
)
// A perfEvent represents a perf event kernel object. Exactly one eBPF program // A perfEvent represents a perf event kernel object. Exactly one eBPF program
// can be attached to it. It is created based on a tracefs trace event or a // can be attached to it. It is created based on a tracefs trace event or a
// Performance Monitoring Unit (PMU). // Performance Monitoring Unit (PMU).
@ -80,10 +66,11 @@ type perfEvent struct {
// ID of the trace event read from tracefs. Valid IDs are non-zero. // ID of the trace event read from tracefs. Valid IDs are non-zero.
tracefsID uint64 tracefsID uint64
// The event type determines the types of programs that can be attached. // True for kretprobes/uretprobes.
typ perfEventType ret bool
fd *internal.FD fd *internal.FD
progType ebpf.ProgramType
} }
func (pe *perfEvent) isLink() {} func (pe *perfEvent) isLink() {}
@ -130,18 +117,13 @@ func (pe *perfEvent) Close() error {
return fmt.Errorf("closing perf event fd: %w", err) return fmt.Errorf("closing perf event fd: %w", err)
} }
switch pe.typ { switch t := pe.progType; t {
case kprobeEvent, kretprobeEvent: case ebpf.Kprobe:
// Clean up kprobe tracefs entry. // For kprobes created using tracefs, clean up the <tracefs>/kprobe_events entry.
if pe.tracefsID != 0 { if pe.tracefsID != 0 {
return closeTraceFSProbeEvent(kprobeType, pe.group, pe.name) return closeTraceFSKprobeEvent(pe.group, pe.name)
} }
case uprobeEvent, uretprobeEvent: case ebpf.TracePoint:
// Clean up uprobe tracefs entry.
if pe.tracefsID != 0 {
return closeTraceFSProbeEvent(uprobeType, pe.group, pe.name)
}
case tracepointEvent:
// Tracepoint trace events don't hold any extra resources. // Tracepoint trace events don't hold any extra resources.
return nil return nil
} }
@ -159,21 +141,12 @@ func (pe *perfEvent) attach(prog *ebpf.Program) error {
if pe.fd == nil { if pe.fd == nil {
return errors.New("cannot attach to nil perf event") return errors.New("cannot attach to nil perf event")
} }
if t := prog.Type(); t != pe.progType {
return fmt.Errorf("invalid program type (expected %s): %s", pe.progType, t)
}
if prog.FD() < 0 { if prog.FD() < 0 {
return fmt.Errorf("invalid program: %w", internal.ErrClosedFd) return fmt.Errorf("invalid program: %w", internal.ErrClosedFd)
} }
switch pe.typ {
case kprobeEvent, kretprobeEvent, uprobeEvent, uretprobeEvent:
if t := prog.Type(); t != ebpf.Kprobe {
return fmt.Errorf("invalid program type (expected %s): %s", ebpf.Kprobe, t)
}
case tracepointEvent:
if t := prog.Type(); t != ebpf.TracePoint {
return fmt.Errorf("invalid program type (expected %s): %s", ebpf.TracePoint, t)
}
default:
return fmt.Errorf("unknown perf event type: %d", pe.typ)
}
// The ioctl below will fail when the fd is invalid. // The ioctl below will fail when the fd is invalid.
kfd, _ := pe.fd.Value() kfd, _ := pe.fd.Value()
@ -207,8 +180,8 @@ func unsafeStringPtr(str string) (unsafe.Pointer, error) {
// group and name must be alphanumeric or underscore, as required by the kernel. // group and name must be alphanumeric or underscore, as required by the kernel.
func getTraceEventID(group, name string) (uint64, error) { func getTraceEventID(group, name string) (uint64, error) {
tid, err := uint64FromFile(tracefsPath, "events", group, name, "id") tid, err := uint64FromFile(tracefsPath, "events", group, name, "id")
if errors.Is(err, os.ErrNotExist) { if errors.Is(err, ErrNotSupported) {
return 0, fmt.Errorf("trace event %s/%s: %w", group, name, os.ErrNotExist) return 0, fmt.Errorf("trace event %s/%s: %w", group, name, ErrNotSupported)
} }
if err != nil { if err != nil {
return 0, fmt.Errorf("reading trace event ID of %s/%s: %w", group, name, err) return 0, fmt.Errorf("reading trace event ID of %s/%s: %w", group, name, err)
@ -219,22 +192,20 @@ func getTraceEventID(group, name string) (uint64, error) {
// getPMUEventType reads a Performance Monitoring Unit's type (numeric identifier) // getPMUEventType reads a Performance Monitoring Unit's type (numeric identifier)
// from /sys/bus/event_source/devices/<pmu>/type. // from /sys/bus/event_source/devices/<pmu>/type.
// func getPMUEventType(pmu string) (uint64, error) {
// Returns ErrNotSupported if the pmu type is not supported. et, err := uint64FromFile("/sys/bus/event_source/devices", pmu, "type")
func getPMUEventType(typ probeType) (uint64, error) { if errors.Is(err, ErrNotSupported) {
et, err := uint64FromFile("/sys/bus/event_source/devices", typ.String(), "type") return 0, fmt.Errorf("pmu type %s: %w", pmu, ErrNotSupported)
if errors.Is(err, os.ErrNotExist) {
return 0, fmt.Errorf("pmu type %s: %w", typ, ErrNotSupported)
} }
if err != nil { if err != nil {
return 0, fmt.Errorf("reading pmu type %s: %w", typ, err) return 0, fmt.Errorf("reading pmu type %s: %w", pmu, err)
} }
return et, nil return et, nil
} }
// openTracepointPerfEvent opens a tracepoint-type perf event. System-wide // openTracepointPerfEvent opens a tracepoint-type perf event. System-wide
// [k,u]probes created by writing to <tracefs>/[k,u]probe_events are tracepoints // kprobes created by writing to <tracefs>/kprobe_events are tracepoints
// behind the scenes, and can be attached to using these perf events. // behind the scenes, and can be attached to using these perf events.
func openTracepointPerfEvent(tid uint64) (*internal.FD, error) { func openTracepointPerfEvent(tid uint64) (*internal.FD, error) {
attr := unix.PerfEventAttr{ attr := unix.PerfEventAttr{
@ -257,13 +228,22 @@ func openTracepointPerfEvent(tid uint64) (*internal.FD, error) {
// and joined onto base. Returns error if base no longer prefixes the path after // and joined onto base. Returns error if base no longer prefixes the path after
// joining all components. // joining all components.
func uint64FromFile(base string, path ...string) (uint64, error) { func uint64FromFile(base string, path ...string) (uint64, error) {
// Resolve leaf path separately for error feedback. Makes the join onto
// base more readable (can't mix with variadic args).
l := filepath.Join(path...) l := filepath.Join(path...)
p := filepath.Join(base, l) p := filepath.Join(base, l)
if !strings.HasPrefix(p, base) { if !strings.HasPrefix(p, base) {
return 0, fmt.Errorf("path '%s' attempts to escape base path '%s': %w", l, base, errInvalidInput) return 0, fmt.Errorf("path '%s' attempts to escape base path '%s': %w", l, base, errInvalidInput)
} }
data, err := ioutil.ReadFile(p) data, err := ioutil.ReadFile(p)
if os.IsNotExist(err) {
// Only echo leaf path, the base path can be prepended at the call site
// if more verbosity is required.
return 0, fmt.Errorf("symbol %s: %w", l, ErrNotSupported)
}
if err != nil { if err != nil {
return 0, fmt.Errorf("reading file %s: %w", p, err) return 0, fmt.Errorf("reading file %s: %w", p, err)
} }

View File

@ -1,25 +0,0 @@
package link
import (
"fmt"
"runtime"
)
func platformPrefix(symbol string) string {
prefix := runtime.GOARCH
// per https://github.com/golang/go/blob/master/src/go/build/syslist.go
switch prefix {
case "386":
prefix = "ia32"
case "amd64", "amd64p32":
prefix = "x64"
case "arm64", "arm64be":
prefix = "arm64"
default:
return symbol
}
return fmt.Sprintf("__%s_%s", prefix, symbol)
}

View File

@ -43,7 +43,7 @@ func RawAttachProgram(opts RawAttachProgramOptions) error {
} }
if err := internal.BPFProgAttach(&attr); err != nil { if err := internal.BPFProgAttach(&attr); err != nil {
return fmt.Errorf("can't attach program: %w", err) return fmt.Errorf("can't attach program: %s", err)
} }
return nil return nil
} }
@ -69,7 +69,7 @@ func RawDetachProgram(opts RawDetachProgramOptions) error {
AttachType: uint32(opts.Attach), AttachType: uint32(opts.Attach),
} }
if err := internal.BPFProgDetach(&attr); err != nil { if err := internal.BPFProgDetach(&attr); err != nil {
return fmt.Errorf("can't detach program: %w", err) return fmt.Errorf("can't detach program: %s", err)
} }
return nil return nil

View File

@ -11,7 +11,7 @@ import (
// tracepoints. The top-level directory is the group, the event's subdirectory // tracepoints. The top-level directory is the group, the event's subdirectory
// is the name. Example: // is the name. Example:
// //
// Tracepoint("syscalls", "sys_enter_fork", prog) // Tracepoint("syscalls", "sys_enter_fork")
// //
// Note that attaching eBPF programs to syscalls (sys_enter_*/sys_exit_*) is // Note that attaching eBPF programs to syscalls (sys_enter_*/sys_exit_*) is
// only possible as of kernel 4.14 (commit cf5f5ce). // only possible as of kernel 4.14 (commit cf5f5ce).
@ -44,7 +44,7 @@ func Tracepoint(group, name string, prog *ebpf.Program) (Link, error) {
tracefsID: tid, tracefsID: tid,
group: group, group: group,
name: name, name: name,
typ: tracepointEvent, progType: ebpf.TracePoint,
} }
if err := pe.attach(prog); err != nil { if err := pe.attach(prog); err != nil {

View File

@ -1,207 +0,0 @@
package link
import (
"debug/elf"
"errors"
"fmt"
"os"
"path/filepath"
"regexp"
"sync"
"github.com/cilium/ebpf"
"github.com/cilium/ebpf/internal"
)
var (
uprobeEventsPath = filepath.Join(tracefsPath, "uprobe_events")
// rgxUprobeSymbol is used to strip invalid characters from the uprobe symbol
// as they are not allowed to be used as the EVENT token in tracefs.
rgxUprobeSymbol = regexp.MustCompile("[^a-zA-Z0-9]+")
uprobeRetprobeBit = struct {
once sync.Once
value uint64
err error
}{}
)
// Executable defines an executable program on the filesystem.
type Executable struct {
// Path of the executable on the filesystem.
path string
// Parsed ELF symbols and dynamic symbols.
symbols map[string]elf.Symbol
}
// To open a new Executable, use:
//
// OpenExecutable("/bin/bash")
//
// The returned value can then be used to open Uprobe(s).
func OpenExecutable(path string) (*Executable, error) {
if path == "" {
return nil, fmt.Errorf("path cannot be empty")
}
f, err := os.Open(path)
if err != nil {
return nil, fmt.Errorf("open file '%s': %w", path, err)
}
defer f.Close()
se, err := internal.NewSafeELFFile(f)
if err != nil {
return nil, fmt.Errorf("parse ELF file: %w", err)
}
var ex = Executable{
path: path,
symbols: make(map[string]elf.Symbol),
}
if err := ex.addSymbols(se.Symbols); err != nil {
return nil, err
}
if err := ex.addSymbols(se.DynamicSymbols); err != nil {
return nil, err
}
return &ex, nil
}
func (ex *Executable) addSymbols(f func() ([]elf.Symbol, error)) error {
// elf.Symbols and elf.DynamicSymbols return ErrNoSymbols if the section is not found.
syms, err := f()
if err != nil && !errors.Is(err, elf.ErrNoSymbols) {
return err
}
for _, s := range syms {
ex.symbols[s.Name] = s
}
return nil
}
func (ex *Executable) symbol(symbol string) (*elf.Symbol, error) {
if s, ok := ex.symbols[symbol]; ok {
return &s, nil
}
return nil, fmt.Errorf("symbol %s not found", symbol)
}
// Uprobe attaches the given eBPF program to a perf event that fires when the
// given symbol starts executing in the given Executable.
// For example, /bin/bash::main():
//
// ex, _ = OpenExecutable("/bin/bash")
// ex.Uprobe("main", prog)
//
// The resulting Link must be Closed during program shutdown to avoid leaking
// system resources. Functions provided by shared libraries can currently not
// be traced and will result in an ErrNotSupported.
func (ex *Executable) Uprobe(symbol string, prog *ebpf.Program) (Link, error) {
u, err := ex.uprobe(symbol, prog, false)
if err != nil {
return nil, err
}
err = u.attach(prog)
if err != nil {
u.Close()
return nil, err
}
return u, nil
}
// Uretprobe attaches the given eBPF program to a perf event that fires right
// before the given symbol exits. For example, /bin/bash::main():
//
// ex, _ = OpenExecutable("/bin/bash")
// ex.Uretprobe("main", prog)
//
// The resulting Link must be Closed during program shutdown to avoid leaking
// system resources. Functions provided by shared libraries can currently not
// be traced and will result in an ErrNotSupported.
func (ex *Executable) Uretprobe(symbol string, prog *ebpf.Program) (Link, error) {
u, err := ex.uprobe(symbol, prog, true)
if err != nil {
return nil, err
}
err = u.attach(prog)
if err != nil {
u.Close()
return nil, err
}
return u, nil
}
// uprobe opens a perf event for the given binary/symbol and attaches prog to it.
// If ret is true, create a uretprobe.
func (ex *Executable) uprobe(symbol string, prog *ebpf.Program, ret bool) (*perfEvent, error) {
if prog == nil {
return nil, fmt.Errorf("prog cannot be nil: %w", errInvalidInput)
}
if prog.Type() != ebpf.Kprobe {
return nil, fmt.Errorf("eBPF program type %s is not Kprobe: %w", prog.Type(), errInvalidInput)
}
sym, err := ex.symbol(symbol)
if err != nil {
return nil, fmt.Errorf("symbol '%s' not found in '%s': %w", symbol, ex.path, err)
}
// Symbols with location 0 from section undef are shared library calls and
// are relocated before the binary is executed. Dynamic linking is not
// implemented by the library, so mark this as unsupported for now.
if sym.Section == elf.SHN_UNDEF && sym.Value == 0 {
return nil, fmt.Errorf("cannot resolve %s library call '%s': %w", ex.path, symbol, ErrNotSupported)
}
// Use uprobe PMU if the kernel has it available.
tp, err := pmuUprobe(sym.Name, ex.path, sym.Value, ret)
if err == nil {
return tp, nil
}
if err != nil && !errors.Is(err, ErrNotSupported) {
return nil, fmt.Errorf("creating perf_uprobe PMU: %w", err)
}
// Use tracefs if uprobe PMU is missing.
tp, err = tracefsUprobe(uprobeSanitizedSymbol(sym.Name), ex.path, sym.Value, ret)
if err != nil {
return nil, fmt.Errorf("creating trace event '%s:%s' in tracefs: %w", ex.path, symbol, err)
}
return tp, nil
}
// pmuUprobe opens a perf event based on the uprobe PMU.
func pmuUprobe(symbol, path string, offset uint64, ret bool) (*perfEvent, error) {
return pmuProbe(uprobeType, symbol, path, offset, ret)
}
// tracefsUprobe creates a Uprobe tracefs entry.
func tracefsUprobe(symbol, path string, offset uint64, ret bool) (*perfEvent, error) {
return tracefsProbe(uprobeType, symbol, path, offset, ret)
}
// uprobeSanitizedSymbol replaces every invalid characted for the tracefs api with an underscore.
func uprobeSanitizedSymbol(symbol string) string {
return rgxUprobeSymbol.ReplaceAllString(symbol, "_")
}
// uprobePathOffset creates the PATH:OFFSET token for the tracefs api.
func uprobePathOffset(path string, offset uint64) string {
return fmt.Sprintf("%s:%#x", path, offset)
}
func uretprobeBit() (uint64, error) {
uprobeRetprobeBit.once.Do(func() {
uprobeRetprobeBit.value, uprobeRetprobeBit.err = determineRetprobeBit(uprobeType)
})
return uprobeRetprobeBit.value, uprobeRetprobeBit.err
}

View File

@ -108,16 +108,12 @@ func fixupJumpsAndCalls(insns asm.Instructions) error {
offset := iter.Offset offset := iter.Offset
ins := iter.Ins ins := iter.Ins
if ins.Reference == "" {
continue
}
switch { switch {
case ins.IsFunctionCall() && ins.Constant == -1: case ins.IsFunctionCall() && ins.Constant == -1:
// Rewrite bpf to bpf call // Rewrite bpf to bpf call
callOffset, ok := symbolOffsets[ins.Reference] callOffset, ok := symbolOffsets[ins.Reference]
if !ok { if !ok {
return fmt.Errorf("call at %d: reference to missing symbol %q", i, ins.Reference) return fmt.Errorf("instruction %d: reference to missing symbol %q", i, ins.Reference)
} }
ins.Constant = int64(callOffset - offset - 1) ins.Constant = int64(callOffset - offset - 1)
@ -126,13 +122,10 @@ func fixupJumpsAndCalls(insns asm.Instructions) error {
// Rewrite jump to label // Rewrite jump to label
jumpOffset, ok := symbolOffsets[ins.Reference] jumpOffset, ok := symbolOffsets[ins.Reference]
if !ok { if !ok {
return fmt.Errorf("jump at %d: reference to missing symbol %q", i, ins.Reference) return fmt.Errorf("instruction %d: reference to missing symbol %q", i, ins.Reference)
} }
ins.Offset = int16(jumpOffset - offset - 1) ins.Offset = int16(jumpOffset - offset - 1)
case ins.IsLoadFromMap() && ins.MapPtr() == -1:
return fmt.Errorf("map %s: %w", ins.Reference, errUnsatisfiedReference)
} }
} }

50
vendor/github.com/cilium/ebpf/map.go generated vendored
View File

@ -18,7 +18,6 @@ var (
ErrKeyNotExist = errors.New("key does not exist") ErrKeyNotExist = errors.New("key does not exist")
ErrKeyExist = errors.New("key already exists") ErrKeyExist = errors.New("key already exists")
ErrIterationAborted = errors.New("iteration aborted") ErrIterationAborted = errors.New("iteration aborted")
ErrMapIncompatible = errors.New("map's spec is incompatible with pinned map")
) )
// MapOptions control loading a map into the kernel. // MapOptions control loading a map into the kernel.
@ -88,23 +87,6 @@ func (ms *MapSpec) Copy() *MapSpec {
return &cpy return &cpy
} }
func (ms *MapSpec) clampPerfEventArraySize() error {
if ms.Type != PerfEventArray {
return nil
}
n, err := internal.PossibleCPUs()
if err != nil {
return fmt.Errorf("perf event array: %w", err)
}
if n := uint32(n); ms.MaxEntries > n {
ms.MaxEntries = n
}
return nil
}
// MapKV is used to initialize the contents of a Map. // MapKV is used to initialize the contents of a Map.
type MapKV struct { type MapKV struct {
Key interface{} Key interface{}
@ -114,19 +96,19 @@ type MapKV struct {
func (ms *MapSpec) checkCompatibility(m *Map) error { func (ms *MapSpec) checkCompatibility(m *Map) error {
switch { switch {
case m.typ != ms.Type: case m.typ != ms.Type:
return fmt.Errorf("expected type %v, got %v: %w", ms.Type, m.typ, ErrMapIncompatible) return fmt.Errorf("expected type %v, got %v", ms.Type, m.typ)
case m.keySize != ms.KeySize: case m.keySize != ms.KeySize:
return fmt.Errorf("expected key size %v, got %v: %w", ms.KeySize, m.keySize, ErrMapIncompatible) return fmt.Errorf("expected key size %v, got %v", ms.KeySize, m.keySize)
case m.valueSize != ms.ValueSize: case m.valueSize != ms.ValueSize:
return fmt.Errorf("expected value size %v, got %v: %w", ms.ValueSize, m.valueSize, ErrMapIncompatible) return fmt.Errorf("expected value size %v, got %v", ms.ValueSize, m.valueSize)
case m.maxEntries != ms.MaxEntries: case m.maxEntries != ms.MaxEntries:
return fmt.Errorf("expected max entries %v, got %v: %w", ms.MaxEntries, m.maxEntries, ErrMapIncompatible) return fmt.Errorf("expected max entries %v, got %v", ms.MaxEntries, m.maxEntries)
case m.flags != ms.Flags: case m.flags != ms.Flags:
return fmt.Errorf("expected flags %v, got %v: %w", ms.Flags, m.flags, ErrMapIncompatible) return fmt.Errorf("expected flags %v, got %v", ms.Flags, m.flags)
} }
return nil return nil
} }
@ -189,16 +171,14 @@ func NewMap(spec *MapSpec) (*Map, error) {
// The caller is responsible for ensuring the process' rlimit is set // The caller is responsible for ensuring the process' rlimit is set
// sufficiently high for locking memory during map creation. This can be done // sufficiently high for locking memory during map creation. This can be done
// by calling unix.Setrlimit with unix.RLIMIT_MEMLOCK prior to calling NewMapWithOptions. // by calling unix.Setrlimit with unix.RLIMIT_MEMLOCK prior to calling NewMapWithOptions.
//
// May return an error wrapping ErrMapIncompatible.
func NewMapWithOptions(spec *MapSpec, opts MapOptions) (*Map, error) { func NewMapWithOptions(spec *MapSpec, opts MapOptions) (*Map, error) {
handles := newHandleCache() btfs := make(btfHandleCache)
defer handles.close() defer btfs.close()
return newMapWithOptions(spec, opts, handles) return newMapWithOptions(spec, opts, btfs)
} }
func newMapWithOptions(spec *MapSpec, opts MapOptions, handles *handleCache) (_ *Map, err error) { func newMapWithOptions(spec *MapSpec, opts MapOptions, btfs btfHandleCache) (_ *Map, err error) {
closeOnError := func(c io.Closer) { closeOnError := func(c io.Closer) {
if err != nil { if err != nil {
c.Close() c.Close()
@ -222,7 +202,7 @@ func newMapWithOptions(spec *MapSpec, opts MapOptions, handles *handleCache) (_
defer closeOnError(m) defer closeOnError(m)
if err := spec.checkCompatibility(m); err != nil { if err := spec.checkCompatibility(m); err != nil {
return nil, fmt.Errorf("use pinned map %s: %w", spec.Name, err) return nil, fmt.Errorf("use pinned map %s: %s", spec.Name, err)
} }
return m, nil return m, nil
@ -231,7 +211,7 @@ func newMapWithOptions(spec *MapSpec, opts MapOptions, handles *handleCache) (_
// Nothing to do here // Nothing to do here
default: default:
return nil, fmt.Errorf("pin type %d: %w", int(spec.Pinning), ErrNotSupported) return nil, fmt.Errorf("unsupported pin type %d", int(spec.Pinning))
} }
var innerFd *internal.FD var innerFd *internal.FD
@ -244,7 +224,7 @@ func newMapWithOptions(spec *MapSpec, opts MapOptions, handles *handleCache) (_
return nil, errors.New("inner maps cannot be pinned") return nil, errors.New("inner maps cannot be pinned")
} }
template, err := createMap(spec.InnerMap, nil, opts, handles) template, err := createMap(spec.InnerMap, nil, opts, btfs)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -253,7 +233,7 @@ func newMapWithOptions(spec *MapSpec, opts MapOptions, handles *handleCache) (_
innerFd = template.fd innerFd = template.fd
} }
m, err := createMap(spec, innerFd, opts, handles) m, err := createMap(spec, innerFd, opts, btfs)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -269,7 +249,7 @@ func newMapWithOptions(spec *MapSpec, opts MapOptions, handles *handleCache) (_
return m, nil return m, nil
} }
func createMap(spec *MapSpec, inner *internal.FD, opts MapOptions, handles *handleCache) (_ *Map, err error) { func createMap(spec *MapSpec, inner *internal.FD, opts MapOptions, btfs btfHandleCache) (_ *Map, err error) {
closeOnError := func(closer io.Closer) { closeOnError := func(closer io.Closer) {
if err != nil { if err != nil {
closer.Close() closer.Close()
@ -340,7 +320,7 @@ func createMap(spec *MapSpec, inner *internal.FD, opts MapOptions, handles *hand
var btfDisabled bool var btfDisabled bool
if spec.BTF != nil { if spec.BTF != nil {
handle, err := handles.btfHandle(btf.MapSpec(spec.BTF)) handle, err := btfs.load(btf.MapSpec(spec.BTF))
btfDisabled = errors.Is(err, btf.ErrNotSupported) btfDisabled = errors.Is(err, btf.ErrNotSupported)
if err != nil && !btfDisabled { if err != nil && !btfDisabled {
return nil, fmt.Errorf("load BTF: %w", err) return nil, fmt.Errorf("load BTF: %w", err)

149
vendor/github.com/cilium/ebpf/prog.go generated vendored
View File

@ -5,7 +5,6 @@ import (
"encoding/binary" "encoding/binary"
"errors" "errors"
"fmt" "fmt"
"io"
"math" "math"
"path/filepath" "path/filepath"
"strings" "strings"
@ -20,8 +19,6 @@ import (
// ErrNotSupported is returned whenever the kernel doesn't support a feature. // ErrNotSupported is returned whenever the kernel doesn't support a feature.
var ErrNotSupported = internal.ErrNotSupported var ErrNotSupported = internal.ErrNotSupported
var errUnsatisfiedReference = errors.New("unsatisfied reference")
// ProgramID represents the unique ID of an eBPF program. // ProgramID represents the unique ID of an eBPF program.
type ProgramID uint32 type ProgramID uint32
@ -44,12 +41,6 @@ type ProgramOptions struct {
// Controls the output buffer size for the verifier. Defaults to // Controls the output buffer size for the verifier. Defaults to
// DefaultVerifierLogSize. // DefaultVerifierLogSize.
LogSize int LogSize int
// An ELF containing the target BTF for this program. It is used both to
// find the correct function to trace and to apply CO-RE relocations.
// This is useful in environments where the kernel BTF is not available
// (containers) or where it is in a non-standard location. Defaults to
// use the kernel BTF from a well-known location.
TargetBTF io.ReaderAt
} }
// ProgramSpec defines a Program. // ProgramSpec defines a Program.
@ -134,21 +125,21 @@ func NewProgram(spec *ProgramSpec) (*Program, error) {
// Loading a program for the first time will perform // Loading a program for the first time will perform
// feature detection by loading small, temporary programs. // feature detection by loading small, temporary programs.
func NewProgramWithOptions(spec *ProgramSpec, opts ProgramOptions) (*Program, error) { func NewProgramWithOptions(spec *ProgramSpec, opts ProgramOptions) (*Program, error) {
handles := newHandleCache() btfs := make(btfHandleCache)
defer handles.close() defer btfs.close()
prog, err := newProgramWithOptions(spec, opts, handles) return newProgramWithOptions(spec, opts, btfs)
if errors.Is(err, errUnsatisfiedReference) {
return nil, fmt.Errorf("cannot load program without loading its whole collection: %w", err)
}
return prog, err
} }
func newProgramWithOptions(spec *ProgramSpec, opts ProgramOptions, handles *handleCache) (*Program, error) { func newProgramWithOptions(spec *ProgramSpec, opts ProgramOptions, btfs btfHandleCache) (*Program, error) {
if len(spec.Instructions) == 0 { if len(spec.Instructions) == 0 {
return nil, errors.New("Instructions cannot be empty") return nil, errors.New("Instructions cannot be empty")
} }
if len(spec.License) == 0 {
return nil, errors.New("License cannot be empty")
}
if spec.ByteOrder != nil && spec.ByteOrder != internal.NativeEndian { if spec.ByteOrder != nil && spec.ByteOrder != internal.NativeEndian {
return nil, fmt.Errorf("can't load %s program on %s", spec.ByteOrder, internal.NativeEndian) return nil, fmt.Errorf("can't load %s program on %s", spec.ByteOrder, internal.NativeEndian)
} }
@ -166,10 +157,27 @@ func newProgramWithOptions(spec *ProgramSpec, opts ProgramOptions, handles *hand
kv = v.Kernel() kv = v.Kernel()
} }
insns := make(asm.Instructions, len(spec.Instructions))
copy(insns, spec.Instructions)
if err := fixupJumpsAndCalls(insns); err != nil {
return nil, err
}
buf := bytes.NewBuffer(make([]byte, 0, len(spec.Instructions)*asm.InstructionSize))
err := insns.Marshal(buf, internal.NativeEndian)
if err != nil {
return nil, err
}
bytecode := buf.Bytes()
insCount := uint32(len(bytecode) / asm.InstructionSize)
attr := &bpfProgLoadAttr{ attr := &bpfProgLoadAttr{
progType: spec.Type, progType: spec.Type,
progFlags: spec.Flags, progFlags: spec.Flags,
expectedAttachType: spec.AttachType, expectedAttachType: spec.AttachType,
insCount: insCount,
instructions: internal.NewSlicePointer(bytecode),
license: internal.NewStringPointer(spec.License), license: internal.NewStringPointer(spec.License),
kernelVersion: kv, kernelVersion: kv,
} }
@ -178,24 +186,15 @@ func newProgramWithOptions(spec *ProgramSpec, opts ProgramOptions, handles *hand
attr.progName = newBPFObjName(spec.Name) attr.progName = newBPFObjName(spec.Name)
} }
var err error
var targetBTF *btf.Spec
if opts.TargetBTF != nil {
targetBTF, err = handles.btfSpec(opts.TargetBTF)
if err != nil {
return nil, fmt.Errorf("load target BTF: %w", err)
}
}
var btfDisabled bool var btfDisabled bool
var core btf.COREFixups
if spec.BTF != nil { if spec.BTF != nil {
core, err = btf.ProgramFixups(spec.BTF, targetBTF) if relos, err := btf.ProgramRelocations(spec.BTF, nil); err != nil {
if err != nil { return nil, fmt.Errorf("CO-RE relocations: %s", err)
return nil, fmt.Errorf("CO-RE relocations: %w", err) } else if len(relos) > 0 {
return nil, fmt.Errorf("applying CO-RE relocations: %w", ErrNotSupported)
} }
handle, err := handles.btfHandle(btf.ProgramSpec(spec.BTF)) handle, err := btfs.load(btf.ProgramSpec(spec.BTF))
btfDisabled = errors.Is(err, btf.ErrNotSupported) btfDisabled = errors.Is(err, btf.ErrNotSupported)
if err != nil && !btfDisabled { if err != nil && !btfDisabled {
return nil, fmt.Errorf("load BTF: %w", err) return nil, fmt.Errorf("load BTF: %w", err)
@ -222,27 +221,8 @@ func newProgramWithOptions(spec *ProgramSpec, opts ProgramOptions, handles *hand
} }
} }
insns, err := core.Apply(spec.Instructions)
if err != nil {
return nil, fmt.Errorf("CO-RE fixup: %w", err)
}
if err := fixupJumpsAndCalls(insns); err != nil {
return nil, err
}
buf := bytes.NewBuffer(make([]byte, 0, len(spec.Instructions)*asm.InstructionSize))
err = insns.Marshal(buf, internal.NativeEndian)
if err != nil {
return nil, err
}
bytecode := buf.Bytes()
attr.instructions = internal.NewSlicePointer(bytecode)
attr.insCount = uint32(len(bytecode) / asm.InstructionSize)
if spec.AttachTo != "" { if spec.AttachTo != "" {
target, err := resolveBTFType(targetBTF, spec.AttachTo, spec.Type, spec.AttachType) target, err := resolveBTFType(spec.AttachTo, spec.Type, spec.AttachType)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -270,7 +250,7 @@ func newProgramWithOptions(spec *ProgramSpec, opts ProgramOptions, handles *hand
} }
logErr := err logErr := err
if opts.LogLevel == 0 && opts.LogSize >= 0 { if opts.LogLevel == 0 {
// Re-run with the verifier enabled to get better error messages. // Re-run with the verifier enabled to get better error messages.
logBuf = make([]byte, logSize) logBuf = make([]byte, logSize)
attr.logLevel = 1 attr.logLevel = 1
@ -684,45 +664,52 @@ func (p *Program) ID() (ProgramID, error) {
return ProgramID(info.id), nil return ProgramID(info.id), nil
} }
func resolveBTFType(kernel *btf.Spec, name string, progType ProgramType, attachType AttachType) (btf.Type, error) { func findKernelType(name string, typ btf.Type) error {
kernel, err := btf.LoadKernelSpec()
if err != nil {
return fmt.Errorf("can't load kernel spec: %w", err)
}
return kernel.FindType(name, typ)
}
func resolveBTFType(name string, progType ProgramType, attachType AttachType) (btf.Type, error) {
type match struct { type match struct {
p ProgramType p ProgramType
a AttachType a AttachType
} }
var target btf.Type target := match{progType, attachType}
var typeName, featureName string switch target {
switch (match{progType, attachType}) {
case match{LSM, AttachLSMMac}: case match{LSM, AttachLSMMac}:
target = new(btf.Func) var target btf.Func
typeName = "bpf_lsm_" + name err := findKernelType("bpf_lsm_"+name, &target)
featureName = name + " LSM hook" if errors.Is(err, btf.ErrNotFound) {
return nil, &internal.UnsupportedFeatureError{
Name: name + " LSM hook",
}
}
if err != nil {
return nil, fmt.Errorf("resolve BTF for LSM hook %s: %w", name, err)
}
return &target, nil
case match{Tracing, AttachTraceIter}: case match{Tracing, AttachTraceIter}:
target = new(btf.Func) var target btf.Func
typeName = "bpf_iter_" + name err := findKernelType("bpf_iter_"+name, &target)
featureName = name + " iterator" if errors.Is(err, btf.ErrNotFound) {
return nil, &internal.UnsupportedFeatureError{
Name: name + " iterator",
}
}
if err != nil {
return nil, fmt.Errorf("resolve BTF for iterator %s: %w", name, err)
}
return &target, nil
default: default:
return nil, nil return nil, nil
} }
if kernel == nil {
var err error
kernel, err = btf.LoadKernelSpec()
if err != nil {
return nil, fmt.Errorf("load kernel spec: %w", err)
}
}
err := kernel.FindType(typeName, target)
if errors.Is(err, btf.ErrNotFound) {
return nil, &internal.UnsupportedFeatureError{
Name: featureName,
}
}
if err != nil {
return nil, fmt.Errorf("resolve BTF for %s: %w", featureName, err)
}
return target, nil
} }

View File

@ -1,95 +1,56 @@
#!/bin/bash #!/bin/bash
# Test the current package under a different kernel. # Test the current package under a different kernel.
# Requires virtme and qemu to be installed. # Requires virtme and qemu to be installed.
# Examples:
# Run all tests on a 5.4 kernel
# $ ./run-tests.sh 5.4
# Run a subset of tests:
# $ ./run-tests.sh 5.4 go test ./link
set -euo pipefail set -eu
set -o pipefail
script="$(realpath "$0")" if [[ "${1:-}" = "--in-vm" ]]; then
readonly script
# This script is a bit like a Matryoshka doll since it keeps re-executing itself
# in various different contexts:
#
# 1. invoked by the user like run-tests.sh 5.4
# 2. invoked by go test like run-tests.sh --exec-vm
# 3. invoked by init in the vm like run-tests.sh --exec-test
#
# This allows us to use all available CPU on the host machine to compile our
# code, and then only use the VM to execute the test. This is because the VM
# is usually slower at compiling than the host.
if [[ "${1:-}" = "--exec-vm" ]]; then
shift
input="$1"
shift
# Use sudo if /dev/kvm isn't accessible by the current user.
sudo=""
if [[ ! -r /dev/kvm || ! -w /dev/kvm ]]; then
sudo="sudo"
fi
readonly sudo
testdir="$(dirname "$1")"
output="$(mktemp -d)"
printf -v cmd "%q " "$@"
if [[ "$(stat -c '%t:%T' -L /proc/$$/fd/0)" == "1:3" ]]; then
# stdin is /dev/null, which doesn't play well with qemu. Use a fifo as a
# blocking substitute.
mkfifo "${output}/fake-stdin"
# Open for reading and writing to avoid blocking.
exec 0<> "${output}/fake-stdin"
rm "${output}/fake-stdin"
fi
$sudo virtme-run --kimg "${input}/bzImage" --memory 768M --pwd \
--rwdir="${testdir}=${testdir}" \
--rodir=/run/input="${input}" \
--rwdir=/run/output="${output}" \
--script-sh "PATH=\"$PATH\" \"$script\" --exec-test $cmd" \
--qemu-opts -smp 2 # need at least two CPUs for some tests
if [[ ! -e "${output}/success" ]]; then
exit 1
fi
$sudo rm -r "$output"
exit 0
elif [[ "${1:-}" = "--exec-test" ]]; then
shift shift
mount -t bpf bpf /sys/fs/bpf mount -t bpf bpf /sys/fs/bpf
mount -t tracefs tracefs /sys/kernel/debug/tracing mount -t tracefs tracefs /sys/kernel/debug/tracing
export CGO_ENABLED=0
export GOFLAGS=-mod=readonly
export GOPATH=/run/go-path
export GOPROXY=file:///run/go-path/pkg/mod/cache/download
export GOSUMDB=off
export GOCACHE=/run/go-cache
if [[ -d "/run/input/bpf" ]]; then if [[ -d "/run/input/bpf" ]]; then
export KERNEL_SELFTESTS="/run/input/bpf" export KERNEL_SELFTESTS="/run/input/bpf"
fi fi
dmesg -C readonly output="${1}"
if ! "$@"; then shift
dmesg
exit 1 echo Running tests...
fi go test -v -coverpkg=./... -coverprofile="$output/coverage.txt" -count 1 ./...
touch "/run/output/success" touch "$output/success"
exit 0 exit 0
fi fi
# Pull all dependencies, so that we can run tests without the
# vm having network access.
go mod download
# Use sudo if /dev/kvm isn't accessible by the current user.
sudo=""
if [[ ! -r /dev/kvm || ! -w /dev/kvm ]]; then
sudo="sudo"
fi
readonly sudo
readonly kernel_version="${1:-}" readonly kernel_version="${1:-}"
if [[ -z "${kernel_version}" ]]; then if [[ -z "${kernel_version}" ]]; then
echo "Expecting kernel version as first argument" echo "Expecting kernel version as first argument"
exit 1 exit 1
fi fi
shift
readonly kernel="linux-${kernel_version}.bz" readonly kernel="linux-${kernel_version}.bz"
readonly selftests="linux-${kernel_version}-selftests-bpf.bz" readonly selftests="linux-${kernel_version}-selftests-bpf.bz"
readonly input="$(mktemp -d)" readonly input="$(mktemp -d)"
readonly output="$(mktemp -d)"
readonly tmp_dir="${TMPDIR:-/tmp}" readonly tmp_dir="${TMPDIR:-/tmp}"
readonly branch="${BRANCH:-master}" readonly branch="${BRANCH:-master}"
@ -99,7 +60,6 @@ fetch() {
} }
fetch "${kernel}" fetch "${kernel}"
cp "${tmp_dir}/${kernel}" "${input}/bzImage"
if fetch "${selftests}"; then if fetch "${selftests}"; then
mkdir "${input}/bpf" mkdir "${input}/bpf"
@ -108,16 +68,25 @@ else
echo "No selftests found, disabling" echo "No selftests found, disabling"
fi fi
args=(-v -short -coverpkg=./... -coverprofile=coverage.out -count 1 ./...) echo Testing on "${kernel_version}"
if (( $# > 0 )); then $sudo virtme-run --kimg "${tmp_dir}/${kernel}" --memory 512M --pwd \
args=("$@") --rw \
--rwdir=/run/input="${input}" \
--rwdir=/run/output="${output}" \
--rodir=/run/go-path="$(go env GOPATH)" \
--rwdir=/run/go-cache="$(go env GOCACHE)" \
--script-sh "PATH=\"$PATH\" $(realpath "$0") --in-vm /run/output" \
--qemu-opts -smp 2 # need at least two CPUs for some tests
if [[ ! -e "${output}/success" ]]; then
echo "Test failed on ${kernel_version}"
exit 1
else
echo "Test successful on ${kernel_version}"
if [[ -v COVERALLS_TOKEN ]]; then
goveralls -coverprofile="${output}/coverage.txt" -service=semaphore -repotoken "$COVERALLS_TOKEN"
fi
fi fi
export GOFLAGS=-mod=readonly $sudo rm -r "${input}"
export CGO_ENABLED=0 $sudo rm -r "${output}"
echo Testing on "${kernel_version}"
go test -exec "$script --exec-vm $input" "${args[@]}"
echo "Test successful on ${kernel_version}"
rm -r "${input}"

View File

@ -1,16 +0,0 @@
package apparmor
import "errors"
var (
// IsEnabled returns true if apparmor is enabled for the host.
IsEnabled = isEnabled
// ApplyProfile will apply the profile with the specified name to the process after
// the next exec. It is only supported on Linux and produces an ErrApparmorNotEnabled
// on other platforms.
ApplyProfile = applyProfile
// ErrApparmorNotEnabled indicates that AppArmor is not enabled or not supported.
ErrApparmorNotEnabled = errors.New("apparmor: config provided but apparmor not supported")
)

View File

@ -15,8 +15,8 @@ var (
checkAppArmor sync.Once checkAppArmor sync.Once
) )
// isEnabled returns true if apparmor is enabled for the host. // IsEnabled returns true if apparmor is enabled for the host.
func isEnabled() bool { func IsEnabled() bool {
checkAppArmor.Do(func() { checkAppArmor.Do(func() {
if _, err := os.Stat("/sys/kernel/security/apparmor"); err == nil { if _, err := os.Stat("/sys/kernel/security/apparmor"); err == nil {
buf, err := ioutil.ReadFile("/sys/module/apparmor/parameters/enabled") buf, err := ioutil.ReadFile("/sys/module/apparmor/parameters/enabled")
@ -57,10 +57,9 @@ func changeOnExec(name string) error {
return nil return nil
} }
// applyProfile will apply the profile with the specified name to the process after // ApplyProfile will apply the profile with the specified name to the process after
// the next exec. It is only supported on Linux and produces an error on other // the next exec.
// platforms. func ApplyProfile(name string) error {
func applyProfile(name string) error {
if name == "" { if name == "" {
return nil return nil
} }

View File

@ -2,11 +2,17 @@
package apparmor package apparmor
func isEnabled() bool { import (
"errors"
)
var ErrApparmorNotEnabled = errors.New("apparmor: config provided but apparmor not supported")
func IsEnabled() bool {
return false return false
} }
func applyProfile(name string) error { func ApplyProfile(name string) error {
if name != "" { if name != "" {
return ErrApparmorNotEnabled return ErrApparmorNotEnabled
} }

View File

@ -258,9 +258,9 @@ func (e *Emulator) Apply(rule devices.Rule) error {
if rule.Allow { if rule.Allow {
return e.allow(innerRule) return e.allow(innerRule)
} else {
return e.deny(innerRule)
} }
return e.deny(innerRule)
} }
// EmulatorFromList takes a reader to a "devices.list"-like source, and returns // EmulatorFromList takes a reader to a "devices.list"-like source, and returns
@ -371,12 +371,3 @@ func (source *Emulator) Transition(target *Emulator) ([]*devices.Rule, error) {
} }
return transitionRules, nil return transitionRules, nil
} }
// Rules returns the minimum set of rules necessary to convert a *deny-all*
// cgroup to the emulated filter state (note that this is not the same as a
// default cgroupv1 cgroup -- which is allow-all). This is effectively just a
// wrapper around Transition() with the source emulator being an empty cgroup.
func (e *Emulator) Rules() ([]*devices.Rule, error) {
defaultCgroup := &Emulator{defaultAllow: false}
return defaultCgroup.Transition(e)
}

View File

@ -11,7 +11,6 @@ import (
"strconv" "strconv"
"github.com/cilium/ebpf/asm" "github.com/cilium/ebpf/asm"
devicesemulator "github.com/opencontainers/runc/libcontainer/cgroups/devices"
"github.com/opencontainers/runc/libcontainer/devices" "github.com/opencontainers/runc/libcontainer/devices"
"github.com/pkg/errors" "github.com/pkg/errors"
"golang.org/x/sys/unix" "golang.org/x/sys/unix"
@ -23,44 +22,11 @@ const (
) )
// DeviceFilter returns eBPF device filter program and its license string // DeviceFilter returns eBPF device filter program and its license string
func DeviceFilter(rules []*devices.Rule) (asm.Instructions, string, error) { func DeviceFilter(devices []*devices.Rule) (asm.Instructions, string, error) {
// Generate the minimum ruleset for the device rules we are given. While we p := &program{}
// don't care about minimum transitions in cgroupv2, using the emulator
// gives us a guarantee that the behaviour of devices filtering is the same
// as cgroupv1, including security hardenings to avoid misconfiguration
// (such as punching holes in wildcard rules).
emu := new(devicesemulator.Emulator)
for _, rule := range rules {
if err := emu.Apply(*rule); err != nil {
return nil, "", err
}
}
cleanRules, err := emu.Rules()
if err != nil {
return nil, "", err
}
p := &program{
defaultAllow: emu.IsBlacklist(),
}
p.init() p.init()
for i := len(devices) - 1; i >= 0; i-- {
for idx, rule := range cleanRules { if err := p.appendDevice(devices[i]); err != nil {
if rule.Type == devices.WildcardDevice {
// We can safely skip over wildcard entries because there should
// only be one (at most) at the very start to instruct cgroupv1 to
// go into allow-list mode. However we do double-check this here.
if idx != 0 || rule.Allow != emu.IsBlacklist() {
return nil, "", errors.Errorf("[internal error] emulated cgroupv2 devices ruleset had bad wildcard at idx %v (%s)", idx, rule.CgroupString())
}
continue
}
if rule.Allow == p.defaultAllow {
// There should be no rules which have an action equal to the
// default action, the emulator removes those.
return nil, "", errors.Errorf("[internal error] emulated cgroupv2 devices ruleset had no-op rule at idx %v (%s)", idx, rule.CgroupString())
}
if err := p.appendRule(rule); err != nil {
return nil, "", err return nil, "", err
} }
} }
@ -69,9 +35,9 @@ func DeviceFilter(rules []*devices.Rule) (asm.Instructions, string, error) {
} }
type program struct { type program struct {
insts asm.Instructions insts asm.Instructions
defaultAllow bool hasWildCard bool
blockID int blockID int
} }
func (p *program) init() { func (p *program) init() {
@ -101,35 +67,39 @@ func (p *program) init() {
asm.LoadMem(asm.R5, asm.R1, 8, asm.Word)) asm.LoadMem(asm.R5, asm.R1, 8, asm.Word))
} }
// appendRule rule converts an OCI rule to the relevant eBPF block and adds it // appendDevice needs to be called from the last element of OCI linux.resources.devices to the head element.
// to the in-progress filter program. In order to operate properly, it must be func (p *program) appendDevice(dev *devices.Rule) error {
// called with a "clean" rule list (generated by devices.Emulator.Rules() --
// with any "a" rules removed).
func (p *program) appendRule(rule *devices.Rule) error {
if p.blockID < 0 { if p.blockID < 0 {
return errors.New("the program is finalized") return errors.New("the program is finalized")
} }
if p.hasWildCard {
// All entries after wildcard entry are ignored
return nil
}
var bpfType int32 bpfType := int32(-1)
switch rule.Type { hasType := true
case devices.CharDevice: switch dev.Type {
case 'c':
bpfType = int32(unix.BPF_DEVCG_DEV_CHAR) bpfType = int32(unix.BPF_DEVCG_DEV_CHAR)
case devices.BlockDevice: case 'b':
bpfType = int32(unix.BPF_DEVCG_DEV_BLOCK) bpfType = int32(unix.BPF_DEVCG_DEV_BLOCK)
case 'a':
hasType = false
default: default:
// We do not permit 'a', nor any other types we don't know about. // if not specified in OCI json, typ is set to DeviceTypeAll
return errors.Errorf("invalid type %q", string(rule.Type)) return errors.Errorf("invalid Type %q", string(dev.Type))
} }
if rule.Major > math.MaxUint32 { if dev.Major > math.MaxUint32 {
return errors.Errorf("invalid major %d", rule.Major) return errors.Errorf("invalid major %d", dev.Major)
} }
if rule.Minor > math.MaxUint32 { if dev.Minor > math.MaxUint32 {
return errors.Errorf("invalid minor %d", rule.Major) return errors.Errorf("invalid minor %d", dev.Major)
} }
hasMajor := rule.Major >= 0 // if not specified in OCI json, major is set to -1 hasMajor := dev.Major >= 0 // if not specified in OCI json, major is set to -1
hasMinor := rule.Minor >= 0 hasMinor := dev.Minor >= 0
bpfAccess := int32(0) bpfAccess := int32(0)
for _, r := range rule.Permissions { for _, r := range dev.Permissions {
switch r { switch r {
case 'r': case 'r':
bpfAccess |= unix.BPF_DEVCG_ACC_READ bpfAccess |= unix.BPF_DEVCG_ACC_READ
@ -149,10 +119,12 @@ func (p *program) appendRule(rule *devices.Rule) error {
nextBlockSym = "block-" + strconv.Itoa(p.blockID+1) nextBlockSym = "block-" + strconv.Itoa(p.blockID+1)
prevBlockLastIdx = len(p.insts) - 1 prevBlockLastIdx = len(p.insts) - 1
) )
p.insts = append(p.insts, if hasType {
// if (R2 != bpfType) goto next p.insts = append(p.insts,
asm.JNE.Imm(asm.R2, bpfType, nextBlockSym), // if (R2 != bpfType) goto next
) asm.JNE.Imm(asm.R2, bpfType, nextBlockSym),
)
}
if hasAccess { if hasAccess {
p.insts = append(p.insts, p.insts = append(p.insts,
// if (R3 & bpfAccess != R3 /* use R1 as a temp var */) goto next // if (R3 & bpfAccess != R3 /* use R1 as a temp var */) goto next
@ -164,16 +136,19 @@ func (p *program) appendRule(rule *devices.Rule) error {
if hasMajor { if hasMajor {
p.insts = append(p.insts, p.insts = append(p.insts,
// if (R4 != major) goto next // if (R4 != major) goto next
asm.JNE.Imm(asm.R4, int32(rule.Major), nextBlockSym), asm.JNE.Imm(asm.R4, int32(dev.Major), nextBlockSym),
) )
} }
if hasMinor { if hasMinor {
p.insts = append(p.insts, p.insts = append(p.insts,
// if (R5 != minor) goto next // if (R5 != minor) goto next
asm.JNE.Imm(asm.R5, int32(rule.Minor), nextBlockSym), asm.JNE.Imm(asm.R5, int32(dev.Minor), nextBlockSym),
) )
} }
p.insts = append(p.insts, acceptBlock(rule.Allow)...) if !hasType && !hasAccess && !hasMajor && !hasMinor {
p.hasWildCard = true
}
p.insts = append(p.insts, acceptBlock(dev.Allow)...)
// set blockSym to the first instruction we added in this iteration // set blockSym to the first instruction we added in this iteration
p.insts[prevBlockLastIdx+1] = p.insts[prevBlockLastIdx+1].Sym(blockSym) p.insts[prevBlockLastIdx+1] = p.insts[prevBlockLastIdx+1].Sym(blockSym)
p.blockID++ p.blockID++
@ -181,14 +156,14 @@ func (p *program) appendRule(rule *devices.Rule) error {
} }
func (p *program) finalize() (asm.Instructions, error) { func (p *program) finalize() (asm.Instructions, error) {
var v int32 if p.hasWildCard {
if p.defaultAllow { // acceptBlock with asm.Return() is already inserted
v = 1 return p.insts, nil
} }
blockSym := "block-" + strconv.Itoa(p.blockID) blockSym := "block-" + strconv.Itoa(p.blockID)
p.insts = append(p.insts, p.insts = append(p.insts,
// R0 <- v // R0 <- 0
asm.Mov.Imm32(asm.R0, v).Sym(blockSym), asm.Mov.Imm32(asm.R0, 0).Sym(blockSym),
asm.Return(), asm.Return(),
) )
p.blockID = -1 p.blockID = -1
@ -196,7 +171,7 @@ func (p *program) finalize() (asm.Instructions, error) {
} }
func acceptBlock(accept bool) asm.Instructions { func acceptBlock(accept bool) asm.Instructions {
var v int32 v := int32(0)
if accept { if accept {
v = 1 v = 1
} }

View File

@ -0,0 +1,57 @@
package ebpf
import (
"github.com/cilium/ebpf"
"github.com/cilium/ebpf/asm"
"github.com/cilium/ebpf/link"
"github.com/pkg/errors"
"golang.org/x/sys/unix"
)
// LoadAttachCgroupDeviceFilter installs eBPF device filter program to /sys/fs/cgroup/<foo> directory.
//
// Requires the system to be running in cgroup2 unified-mode with kernel >= 4.15 .
//
// https://github.com/torvalds/linux/commit/ebc614f687369f9df99828572b1d85a7c2de3d92
func LoadAttachCgroupDeviceFilter(insts asm.Instructions, license string, dirFD int) (func() error, error) {
nilCloser := func() error {
return nil
}
// Increase `ulimit -l` limit to avoid BPF_PROG_LOAD error (#2167).
// This limit is not inherited into the container.
memlockLimit := &unix.Rlimit{
Cur: unix.RLIM_INFINITY,
Max: unix.RLIM_INFINITY,
}
_ = unix.Setrlimit(unix.RLIMIT_MEMLOCK, memlockLimit)
spec := &ebpf.ProgramSpec{
Type: ebpf.CGroupDevice,
Instructions: insts,
License: license,
}
prog, err := ebpf.NewProgram(spec)
if err != nil {
return nilCloser, err
}
err = link.RawAttachProgram(link.RawAttachProgramOptions{
Target: dirFD,
Program: prog,
Attach: ebpf.AttachCGroupDevice,
Flags: unix.BPF_F_ALLOW_MULTI,
})
if err != nil {
return nilCloser, errors.Wrap(err, "failed to call BPF_PROG_ATTACH (BPF_CGROUP_DEVICE, BPF_F_ALLOW_MULTI)")
}
closer := func() error {
err = link.RawDetachProgram(link.RawDetachProgramOptions{
Target: dirFD,
Program: prog,
Attach: ebpf.AttachCGroupDevice,
})
if err != nil {
return errors.Wrap(err, "failed to call BPF_PROG_DETACH (BPF_CGROUP_DEVICE)")
}
return nil
}
return closer, nil
}

View File

@ -1,240 +0,0 @@
package ebpf
import (
"fmt"
"os"
"runtime"
"sync"
"unsafe"
"github.com/cilium/ebpf"
"github.com/cilium/ebpf/asm"
"github.com/cilium/ebpf/link"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
"golang.org/x/sys/unix"
)
func nilCloser() error {
return nil
}
func findAttachedCgroupDeviceFilters(dirFd int) ([]*ebpf.Program, error) {
type bpfAttrQuery struct {
TargetFd uint32
AttachType uint32
QueryType uint32
AttachFlags uint32
ProgIds uint64 // __aligned_u64
ProgCnt uint32
}
// Currently you can only have 64 eBPF programs attached to a cgroup.
size := 64
retries := 0
for retries < 10 {
progIds := make([]uint32, size)
query := bpfAttrQuery{
TargetFd: uint32(dirFd),
AttachType: uint32(unix.BPF_CGROUP_DEVICE),
ProgIds: uint64(uintptr(unsafe.Pointer(&progIds[0]))),
ProgCnt: uint32(len(progIds)),
}
// Fetch the list of program ids.
_, _, errno := unix.Syscall(unix.SYS_BPF,
uintptr(unix.BPF_PROG_QUERY),
uintptr(unsafe.Pointer(&query)),
unsafe.Sizeof(query))
size = int(query.ProgCnt)
runtime.KeepAlive(query)
if errno != 0 {
// On ENOSPC we get the correct number of programs.
if errno == unix.ENOSPC {
retries++
continue
}
return nil, fmt.Errorf("bpf_prog_query(BPF_CGROUP_DEVICE) failed: %w", errno)
}
// Convert the ids to program handles.
progIds = progIds[:size]
programs := make([]*ebpf.Program, len(progIds))
for idx, progId := range progIds {
program, err := ebpf.NewProgramFromID(ebpf.ProgramID(progId))
if err != nil {
return nil, fmt.Errorf("cannot fetch program from id: %w", err)
}
programs[idx] = program
}
runtime.KeepAlive(progIds)
return programs, nil
}
return nil, errors.New("could not get complete list of CGROUP_DEVICE programs")
}
var (
haveBpfProgReplaceBool bool
haveBpfProgReplaceOnce sync.Once
)
// Loosely based on the BPF_F_REPLACE support check in
// <https://github.com/cilium/ebpf/blob/v0.6.0/link/syscalls.go>.
//
// TODO: move this logic to cilium/ebpf
func haveBpfProgReplace() bool {
haveBpfProgReplaceOnce.Do(func() {
prog, err := ebpf.NewProgram(&ebpf.ProgramSpec{
Type: ebpf.CGroupDevice,
License: "MIT",
Instructions: asm.Instructions{
asm.Mov.Imm(asm.R0, 0),
asm.Return(),
},
})
if err != nil {
logrus.Debugf("checking for BPF_F_REPLACE support: ebpf.NewProgram failed: %v", err)
return
}
defer prog.Close()
devnull, err := os.Open("/dev/null")
if err != nil {
logrus.Debugf("checking for BPF_F_REPLACE support: open dummy target fd: %v", err)
return
}
defer devnull.Close()
// We know that we have BPF_PROG_ATTACH since we can load
// BPF_CGROUP_DEVICE programs. If passing BPF_F_REPLACE gives us EINVAL
// we know that the feature isn't present.
err = link.RawAttachProgram(link.RawAttachProgramOptions{
// We rely on this fd being checked after attachFlags.
Target: int(devnull.Fd()),
// Attempt to "replace" bad fds with this program.
Program: prog,
Attach: ebpf.AttachCGroupDevice,
Flags: unix.BPF_F_ALLOW_MULTI | unix.BPF_F_REPLACE,
})
if errors.Is(err, unix.EINVAL) {
// not supported
return
}
// attach_flags test succeded.
if !errors.Is(err, unix.EBADF) {
logrus.Debugf("checking for BPF_F_REPLACE: got unexpected (not EBADF or EINVAL) error: %v", err)
}
haveBpfProgReplaceBool = true
})
return haveBpfProgReplaceBool
}
// LoadAttachCgroupDeviceFilter installs eBPF device filter program to /sys/fs/cgroup/<foo> directory.
//
// Requires the system to be running in cgroup2 unified-mode with kernel >= 4.15 .
//
// https://github.com/torvalds/linux/commit/ebc614f687369f9df99828572b1d85a7c2de3d92
func LoadAttachCgroupDeviceFilter(insts asm.Instructions, license string, dirFd int) (func() error, error) {
// Increase `ulimit -l` limit to avoid BPF_PROG_LOAD error (#2167).
// This limit is not inherited into the container.
memlockLimit := &unix.Rlimit{
Cur: unix.RLIM_INFINITY,
Max: unix.RLIM_INFINITY,
}
_ = unix.Setrlimit(unix.RLIMIT_MEMLOCK, memlockLimit)
// Get the list of existing programs.
oldProgs, err := findAttachedCgroupDeviceFilters(dirFd)
if err != nil {
return nilCloser, err
}
useReplaceProg := haveBpfProgReplace() && len(oldProgs) == 1
// Generate new program.
spec := &ebpf.ProgramSpec{
Type: ebpf.CGroupDevice,
Instructions: insts,
License: license,
}
prog, err := ebpf.NewProgram(spec)
if err != nil {
return nilCloser, err
}
// If there is only one old program, we can just replace it directly.
var (
replaceProg *ebpf.Program
attachFlags uint32 = unix.BPF_F_ALLOW_MULTI
)
if useReplaceProg {
replaceProg = oldProgs[0]
attachFlags |= unix.BPF_F_REPLACE
}
err = link.RawAttachProgram(link.RawAttachProgramOptions{
Target: dirFd,
Program: prog,
Replace: replaceProg,
Attach: ebpf.AttachCGroupDevice,
Flags: attachFlags,
})
if err != nil {
return nilCloser, fmt.Errorf("failed to call BPF_PROG_ATTACH (BPF_CGROUP_DEVICE, BPF_F_ALLOW_MULTI): %w", err)
}
closer := func() error {
err = link.RawDetachProgram(link.RawDetachProgramOptions{
Target: dirFd,
Program: prog,
Attach: ebpf.AttachCGroupDevice,
})
if err != nil {
return fmt.Errorf("failed to call BPF_PROG_DETACH (BPF_CGROUP_DEVICE): %w", err)
}
// TODO: Should we attach the old filters back in this case? Otherwise
// we fail-open on a security feature, which is a bit scary.
return nil
}
if !useReplaceProg {
logLevel := logrus.DebugLevel
// If there was more than one old program, give a warning (since this
// really shouldn't happen with runc-managed cgroups) and then detach
// all the old programs.
if len(oldProgs) > 1 {
// NOTE: Ideally this should be a warning but it turns out that
// systemd-managed cgroups trigger this warning (apparently
// systemd doesn't delete old non-systemd programs when
// setting properties).
logrus.Infof("found more than one filter (%d) attached to a cgroup -- removing extra filters!", len(oldProgs))
logLevel = logrus.InfoLevel
}
for idx, oldProg := range oldProgs {
// Output some extra debug info.
if info, err := oldProg.Info(); err == nil {
fields := logrus.Fields{
"type": info.Type.String(),
"tag": info.Tag,
"name": info.Name,
}
if id, ok := info.ID(); ok {
fields["id"] = id
}
if runCount, ok := info.RunCount(); ok {
fields["run_count"] = runCount
}
if runtime, ok := info.Runtime(); ok {
fields["runtime"] = runtime.String()
}
logrus.WithFields(fields).Logf(logLevel, "removing old filter %d from cgroup", idx)
}
err = link.RawDetachProgram(link.RawDetachProgramOptions{
Target: dirFd,
Program: oldProg,
Attach: ebpf.AttachCGroupDevice,
})
if err != nil {
return closer, fmt.Errorf("failed to call BPF_PROG_DETACH (BPF_CGROUP_DEVICE) on old filter program: %w", err)
}
}
}
return closer, nil
}

View File

@ -6,17 +6,15 @@ import (
"bufio" "bufio"
"fmt" "fmt"
"os" "os"
"path/filepath"
"strconv" "strconv"
"strings" "strings"
"github.com/opencontainers/runc/libcontainer/cgroups" "github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
"github.com/opencontainers/runc/libcontainer/configs" "github.com/opencontainers/runc/libcontainer/configs"
) )
type BlkioGroup struct { type BlkioGroup struct {
weightFilename string
weightDeviceFilename string
} }
func (s *BlkioGroup) Name() string { func (s *BlkioGroup) Name() string {
@ -28,47 +26,42 @@ func (s *BlkioGroup) Apply(path string, d *cgroupData) error {
} }
func (s *BlkioGroup) Set(path string, r *configs.Resources) error { func (s *BlkioGroup) Set(path string, r *configs.Resources) error {
s.detectWeightFilenames(path)
if r.BlkioWeight != 0 { if r.BlkioWeight != 0 {
if err := cgroups.WriteFile(path, s.weightFilename, strconv.FormatUint(uint64(r.BlkioWeight), 10)); err != nil { if err := fscommon.WriteFile(path, "blkio.weight", strconv.FormatUint(uint64(r.BlkioWeight), 10)); err != nil {
return err return err
} }
} }
if r.BlkioLeafWeight != 0 { if r.BlkioLeafWeight != 0 {
if err := cgroups.WriteFile(path, "blkio.leaf_weight", strconv.FormatUint(uint64(r.BlkioLeafWeight), 10)); err != nil { if err := fscommon.WriteFile(path, "blkio.leaf_weight", strconv.FormatUint(uint64(r.BlkioLeafWeight), 10)); err != nil {
return err return err
} }
} }
for _, wd := range r.BlkioWeightDevice { for _, wd := range r.BlkioWeightDevice {
if wd.Weight != 0 { if err := fscommon.WriteFile(path, "blkio.weight_device", wd.WeightString()); err != nil {
if err := cgroups.WriteFile(path, s.weightDeviceFilename, wd.WeightString()); err != nil { return err
return err
}
} }
if wd.LeafWeight != 0 { if err := fscommon.WriteFile(path, "blkio.leaf_weight_device", wd.LeafWeightString()); err != nil {
if err := cgroups.WriteFile(path, "blkio.leaf_weight_device", wd.LeafWeightString()); err != nil { return err
return err
}
} }
} }
for _, td := range r.BlkioThrottleReadBpsDevice { for _, td := range r.BlkioThrottleReadBpsDevice {
if err := cgroups.WriteFile(path, "blkio.throttle.read_bps_device", td.String()); err != nil { if err := fscommon.WriteFile(path, "blkio.throttle.read_bps_device", td.String()); err != nil {
return err return err
} }
} }
for _, td := range r.BlkioThrottleWriteBpsDevice { for _, td := range r.BlkioThrottleWriteBpsDevice {
if err := cgroups.WriteFile(path, "blkio.throttle.write_bps_device", td.String()); err != nil { if err := fscommon.WriteFile(path, "blkio.throttle.write_bps_device", td.String()); err != nil {
return err return err
} }
} }
for _, td := range r.BlkioThrottleReadIOPSDevice { for _, td := range r.BlkioThrottleReadIOPSDevice {
if err := cgroups.WriteFile(path, "blkio.throttle.read_iops_device", td.String()); err != nil { if err := fscommon.WriteFile(path, "blkio.throttle.read_iops_device", td.String()); err != nil {
return err return err
} }
} }
for _, td := range r.BlkioThrottleWriteIOPSDevice { for _, td := range r.BlkioThrottleWriteIOPSDevice {
if err := cgroups.WriteFile(path, "blkio.throttle.write_iops_device", td.String()); err != nil { if err := fscommon.WriteFile(path, "blkio.throttle.write_iops_device", td.String()); err != nil {
return err return err
} }
} }
@ -113,7 +106,7 @@ func splitBlkioStatLine(r rune) bool {
func getBlkioStat(dir, file string) ([]cgroups.BlkioStatEntry, error) { func getBlkioStat(dir, file string) ([]cgroups.BlkioStatEntry, error) {
var blkioStats []cgroups.BlkioStatEntry var blkioStats []cgroups.BlkioStatEntry
f, err := cgroups.OpenFile(dir, file, os.O_RDONLY) f, err := fscommon.OpenFile(dir, file, os.O_RDONLY)
if err != nil { if err != nil {
if os.IsNotExist(err) { if os.IsNotExist(err) {
return blkioStats, nil return blkioStats, nil
@ -168,7 +161,7 @@ func (s *BlkioGroup) GetStats(path string, stats *cgroups.Stats) error {
filename string filename string
blkioStatEntriesPtr *[]cgroups.BlkioStatEntry blkioStatEntriesPtr *[]cgroups.BlkioStatEntry
} }
bfqDebugStats := []blkioStatInfo{ var bfqDebugStats = []blkioStatInfo{
{ {
filename: "blkio.bfq.sectors_recursive", filename: "blkio.bfq.sectors_recursive",
blkioStatEntriesPtr: &stats.BlkioStats.SectorsRecursive, blkioStatEntriesPtr: &stats.BlkioStats.SectorsRecursive,
@ -202,7 +195,7 @@ func (s *BlkioGroup) GetStats(path string, stats *cgroups.Stats) error {
blkioStatEntriesPtr: &stats.BlkioStats.IoServiceBytesRecursive, blkioStatEntriesPtr: &stats.BlkioStats.IoServiceBytesRecursive,
}, },
} }
bfqStats := []blkioStatInfo{ var bfqStats = []blkioStatInfo{
{ {
filename: "blkio.bfq.io_serviced_recursive", filename: "blkio.bfq.io_serviced_recursive",
blkioStatEntriesPtr: &stats.BlkioStats.IoServicedRecursive, blkioStatEntriesPtr: &stats.BlkioStats.IoServicedRecursive,
@ -212,7 +205,7 @@ func (s *BlkioGroup) GetStats(path string, stats *cgroups.Stats) error {
blkioStatEntriesPtr: &stats.BlkioStats.IoServiceBytesRecursive, blkioStatEntriesPtr: &stats.BlkioStats.IoServiceBytesRecursive,
}, },
} }
cfqStats := []blkioStatInfo{ var cfqStats = []blkioStatInfo{
{ {
filename: "blkio.sectors_recursive", filename: "blkio.sectors_recursive",
blkioStatEntriesPtr: &stats.BlkioStats.SectorsRecursive, blkioStatEntriesPtr: &stats.BlkioStats.SectorsRecursive,
@ -246,7 +239,7 @@ func (s *BlkioGroup) GetStats(path string, stats *cgroups.Stats) error {
blkioStatEntriesPtr: &stats.BlkioStats.IoServiceBytesRecursive, blkioStatEntriesPtr: &stats.BlkioStats.IoServiceBytesRecursive,
}, },
} }
throttleRecursiveStats := []blkioStatInfo{ var throttleRecursiveStats = []blkioStatInfo{
{ {
filename: "blkio.throttle.io_serviced_recursive", filename: "blkio.throttle.io_serviced_recursive",
blkioStatEntriesPtr: &stats.BlkioStats.IoServicedRecursive, blkioStatEntriesPtr: &stats.BlkioStats.IoServicedRecursive,
@ -256,7 +249,7 @@ func (s *BlkioGroup) GetStats(path string, stats *cgroups.Stats) error {
blkioStatEntriesPtr: &stats.BlkioStats.IoServiceBytesRecursive, blkioStatEntriesPtr: &stats.BlkioStats.IoServiceBytesRecursive,
}, },
} }
baseStats := []blkioStatInfo{ var baseStats = []blkioStatInfo{
{ {
filename: "blkio.throttle.io_serviced", filename: "blkio.throttle.io_serviced",
blkioStatEntriesPtr: &stats.BlkioStats.IoServicedRecursive, blkioStatEntriesPtr: &stats.BlkioStats.IoServicedRecursive,
@ -266,7 +259,7 @@ func (s *BlkioGroup) GetStats(path string, stats *cgroups.Stats) error {
blkioStatEntriesPtr: &stats.BlkioStats.IoServiceBytesRecursive, blkioStatEntriesPtr: &stats.BlkioStats.IoServiceBytesRecursive,
}, },
} }
orderedStats := [][]blkioStatInfo{ var orderedStats = [][]blkioStatInfo{
bfqDebugStats, bfqDebugStats,
bfqStats, bfqStats,
cfqStats, cfqStats,
@ -287,7 +280,7 @@ func (s *BlkioGroup) GetStats(path string, stats *cgroups.Stats) error {
return err return err
} }
*statInfo.blkioStatEntriesPtr = blkioStats *statInfo.blkioStatEntriesPtr = blkioStats
// finish if all stats are gathered //finish if all stats are gathered
if i == len(statGroup)-1 { if i == len(statGroup)-1 {
return nil return nil
} }
@ -295,17 +288,3 @@ func (s *BlkioGroup) GetStats(path string, stats *cgroups.Stats) error {
} }
return nil return nil
} }
func (s *BlkioGroup) detectWeightFilenames(path string) {
if s.weightFilename != "" {
// Already detected.
return
}
if cgroups.PathExists(filepath.Join(path, "blkio.weight")) {
s.weightFilename = "blkio.weight"
s.weightDeviceFilename = "blkio.weight_device"
} else {
s.weightFilename = "blkio.bfq.weight"
s.weightDeviceFilename = "blkio.bfq.weight_device"
}
}

View File

@ -13,7 +13,8 @@ import (
"github.com/opencontainers/runc/libcontainer/configs" "github.com/opencontainers/runc/libcontainer/configs"
) )
type CpuGroup struct{} type CpuGroup struct {
}
func (s *CpuGroup) Name() string { func (s *CpuGroup) Name() string {
return "cpu" return "cpu"
@ -25,7 +26,7 @@ func (s *CpuGroup) Apply(path string, d *cgroupData) error {
if path == "" { if path == "" {
return nil return nil
} }
if err := os.MkdirAll(path, 0o755); err != nil { if err := os.MkdirAll(path, 0755); err != nil {
return err return err
} }
// We should set the real-Time group scheduling settings before moving // We should set the real-Time group scheduling settings before moving
@ -41,12 +42,12 @@ func (s *CpuGroup) Apply(path string, d *cgroupData) error {
func (s *CpuGroup) SetRtSched(path string, r *configs.Resources) error { func (s *CpuGroup) SetRtSched(path string, r *configs.Resources) error {
if r.CpuRtPeriod != 0 { if r.CpuRtPeriod != 0 {
if err := cgroups.WriteFile(path, "cpu.rt_period_us", strconv.FormatUint(r.CpuRtPeriod, 10)); err != nil { if err := fscommon.WriteFile(path, "cpu.rt_period_us", strconv.FormatUint(r.CpuRtPeriod, 10)); err != nil {
return err return err
} }
} }
if r.CpuRtRuntime != 0 { if r.CpuRtRuntime != 0 {
if err := cgroups.WriteFile(path, "cpu.rt_runtime_us", strconv.FormatInt(r.CpuRtRuntime, 10)); err != nil { if err := fscommon.WriteFile(path, "cpu.rt_runtime_us", strconv.FormatInt(r.CpuRtRuntime, 10)); err != nil {
return err return err
} }
} }
@ -56,7 +57,7 @@ func (s *CpuGroup) SetRtSched(path string, r *configs.Resources) error {
func (s *CpuGroup) Set(path string, r *configs.Resources) error { func (s *CpuGroup) Set(path string, r *configs.Resources) error {
if r.CpuShares != 0 { if r.CpuShares != 0 {
shares := r.CpuShares shares := r.CpuShares
if err := cgroups.WriteFile(path, "cpu.shares", strconv.FormatUint(shares, 10)); err != nil { if err := fscommon.WriteFile(path, "cpu.shares", strconv.FormatUint(shares, 10)); err != nil {
return err return err
} }
// read it back // read it back
@ -72,12 +73,12 @@ func (s *CpuGroup) Set(path string, r *configs.Resources) error {
} }
} }
if r.CpuPeriod != 0 { if r.CpuPeriod != 0 {
if err := cgroups.WriteFile(path, "cpu.cfs_period_us", strconv.FormatUint(r.CpuPeriod, 10)); err != nil { if err := fscommon.WriteFile(path, "cpu.cfs_period_us", strconv.FormatUint(r.CpuPeriod, 10)); err != nil {
return err return err
} }
} }
if r.CpuQuota != 0 { if r.CpuQuota != 0 {
if err := cgroups.WriteFile(path, "cpu.cfs_quota_us", strconv.FormatInt(r.CpuQuota, 10)); err != nil { if err := fscommon.WriteFile(path, "cpu.cfs_quota_us", strconv.FormatInt(r.CpuQuota, 10)); err != nil {
return err return err
} }
} }
@ -85,7 +86,7 @@ func (s *CpuGroup) Set(path string, r *configs.Resources) error {
} }
func (s *CpuGroup) GetStats(path string, stats *cgroups.Stats) error { func (s *CpuGroup) GetStats(path string, stats *cgroups.Stats) error {
f, err := cgroups.OpenFile(path, "cpu.stat", os.O_RDONLY) f, err := fscommon.OpenFile(path, "cpu.stat", os.O_RDONLY)
if err != nil { if err != nil {
if os.IsNotExist(err) { if os.IsNotExist(err) {
return nil return nil

View File

@ -32,7 +32,8 @@ const (
clockTicks uint64 = 100 clockTicks uint64 = 100
) )
type CpuacctGroup struct{} type CpuacctGroup struct {
}
func (s *CpuacctGroup) Name() string { func (s *CpuacctGroup) Name() string {
return "cpuacct" return "cpuacct"
@ -90,7 +91,7 @@ func getCpuUsageBreakdown(path string) (uint64, uint64, error) {
// Expected format: // Expected format:
// user <usage in ticks> // user <usage in ticks>
// system <usage in ticks> // system <usage in ticks>
data, err := cgroups.ReadFile(path, cgroupCpuacctStat) data, err := fscommon.ReadFile(path, cgroupCpuacctStat)
if err != nil { if err != nil {
return 0, 0, err return 0, 0, err
} }
@ -116,7 +117,7 @@ func getCpuUsageBreakdown(path string) (uint64, uint64, error) {
func getPercpuUsage(path string) ([]uint64, error) { func getPercpuUsage(path string) ([]uint64, error) {
percpuUsage := []uint64{} percpuUsage := []uint64{}
data, err := cgroups.ReadFile(path, "cpuacct.usage_percpu") data, err := fscommon.ReadFile(path, "cpuacct.usage_percpu")
if err != nil { if err != nil {
return percpuUsage, err return percpuUsage, err
} }
@ -134,7 +135,7 @@ func getPercpuUsageInModes(path string) ([]uint64, []uint64, error) {
usageKernelMode := []uint64{} usageKernelMode := []uint64{}
usageUserMode := []uint64{} usageUserMode := []uint64{}
file, err := cgroups.OpenFile(path, cgroupCpuacctUsageAll, os.O_RDONLY) file, err := fscommon.OpenFile(path, cgroupCpuacctUsageAll, os.O_RDONLY)
if os.IsNotExist(err) { if os.IsNotExist(err) {
return usageKernelMode, usageUserMode, nil return usageKernelMode, usageUserMode, nil
} else if err != nil { } else if err != nil {
@ -143,7 +144,7 @@ func getPercpuUsageInModes(path string) ([]uint64, []uint64, error) {
defer file.Close() defer file.Close()
scanner := bufio.NewScanner(file) scanner := bufio.NewScanner(file)
scanner.Scan() // skipping header line scanner.Scan() //skipping header line
for scanner.Scan() { for scanner.Scan() {
lineFields := strings.SplitN(scanner.Text(), " ", cuacctUsageAllColumnsNumber+1) lineFields := strings.SplitN(scanner.Text(), " ", cuacctUsageAllColumnsNumber+1)

View File

@ -16,7 +16,8 @@ import (
"golang.org/x/sys/unix" "golang.org/x/sys/unix"
) )
type CpusetGroup struct{} type CpusetGroup struct {
}
func (s *CpusetGroup) Name() string { func (s *CpusetGroup) Name() string {
return "cpuset" return "cpuset"
@ -28,12 +29,12 @@ func (s *CpusetGroup) Apply(path string, d *cgroupData) error {
func (s *CpusetGroup) Set(path string, r *configs.Resources) error { func (s *CpusetGroup) Set(path string, r *configs.Resources) error {
if r.CpusetCpus != "" { if r.CpusetCpus != "" {
if err := cgroups.WriteFile(path, "cpuset.cpus", r.CpusetCpus); err != nil { if err := fscommon.WriteFile(path, "cpuset.cpus", r.CpusetCpus); err != nil {
return err return err
} }
} }
if r.CpusetMems != "" { if r.CpusetMems != "" {
if err := cgroups.WriteFile(path, "cpuset.mems", r.CpusetMems); err != nil { if err := fscommon.WriteFile(path, "cpuset.mems", r.CpusetMems); err != nil {
return err return err
} }
} }
@ -155,7 +156,7 @@ func (s *CpusetGroup) ApplyDir(dir string, r *configs.Resources, pid int) error
if err := cpusetEnsureParent(filepath.Dir(dir)); err != nil { if err := cpusetEnsureParent(filepath.Dir(dir)); err != nil {
return err return err
} }
if err := os.Mkdir(dir, 0o755); err != nil && !os.IsExist(err) { if err := os.Mkdir(dir, 0755); err != nil && !os.IsExist(err) {
return err return err
} }
// We didn't inherit cpuset configs from parent, but we have // We didn't inherit cpuset configs from parent, but we have
@ -175,10 +176,10 @@ func (s *CpusetGroup) ApplyDir(dir string, r *configs.Resources, pid int) error
} }
func getCpusetSubsystemSettings(parent string) (cpus, mems string, err error) { func getCpusetSubsystemSettings(parent string) (cpus, mems string, err error) {
if cpus, err = cgroups.ReadFile(parent, "cpuset.cpus"); err != nil { if cpus, err = fscommon.ReadFile(parent, "cpuset.cpus"); err != nil {
return return
} }
if mems, err = cgroups.ReadFile(parent, "cpuset.mems"); err != nil { if mems, err = fscommon.ReadFile(parent, "cpuset.mems"); err != nil {
return return
} }
return cpus, mems, nil return cpus, mems, nil
@ -205,7 +206,7 @@ func cpusetEnsureParent(current string) error {
if err := cpusetEnsureParent(parent); err != nil { if err := cpusetEnsureParent(parent); err != nil {
return err return err
} }
if err := os.Mkdir(current, 0o755); err != nil && !os.IsExist(err) { if err := os.Mkdir(current, 0755); err != nil && !os.IsExist(err) {
return err return err
} }
return cpusetCopyIfNeeded(current, parent) return cpusetCopyIfNeeded(current, parent)
@ -224,12 +225,12 @@ func cpusetCopyIfNeeded(current, parent string) error {
} }
if isEmptyCpuset(currentCpus) { if isEmptyCpuset(currentCpus) {
if err := cgroups.WriteFile(current, "cpuset.cpus", string(parentCpus)); err != nil { if err := fscommon.WriteFile(current, "cpuset.cpus", string(parentCpus)); err != nil {
return err return err
} }
} }
if isEmptyCpuset(currentMems) { if isEmptyCpuset(currentMems) {
if err := cgroups.WriteFile(current, "cpuset.mems", string(parentMems)); err != nil { if err := fscommon.WriteFile(current, "cpuset.mems", string(parentMems)); err != nil {
return err return err
} }
} }

View File

@ -9,6 +9,7 @@ import (
"github.com/opencontainers/runc/libcontainer/cgroups" "github.com/opencontainers/runc/libcontainer/cgroups"
cgroupdevices "github.com/opencontainers/runc/libcontainer/cgroups/devices" cgroupdevices "github.com/opencontainers/runc/libcontainer/cgroups/devices"
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
"github.com/opencontainers/runc/libcontainer/configs" "github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/devices" "github.com/opencontainers/runc/libcontainer/devices"
"github.com/opencontainers/runc/libcontainer/userns" "github.com/opencontainers/runc/libcontainer/userns"
@ -35,7 +36,7 @@ func (s *DevicesGroup) Apply(path string, d *cgroupData) error {
} }
func loadEmulator(path string) (*cgroupdevices.Emulator, error) { func loadEmulator(path string) (*cgroupdevices.Emulator, error) {
list, err := cgroups.ReadFile(path, "devices.list") list, err := fscommon.ReadFile(path, "devices.list")
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -80,7 +81,7 @@ func (s *DevicesGroup) Set(path string, r *configs.Resources) error {
if rule.Allow { if rule.Allow {
file = "devices.allow" file = "devices.allow"
} }
if err := cgroups.WriteFile(path, file, rule.CgroupString()); err != nil { if err := fscommon.WriteFile(path, file, rule.CgroupString()); err != nil {
return err return err
} }
} }

View File

@ -10,12 +10,14 @@ import (
"time" "time"
"github.com/opencontainers/runc/libcontainer/cgroups" "github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
"github.com/opencontainers/runc/libcontainer/configs" "github.com/opencontainers/runc/libcontainer/configs"
"github.com/sirupsen/logrus" "github.com/sirupsen/logrus"
"golang.org/x/sys/unix" "golang.org/x/sys/unix"
) )
type FreezerGroup struct{} type FreezerGroup struct {
}
func (s *FreezerGroup) Name() string { func (s *FreezerGroup) Name() string {
return "freezer" return "freezer"
@ -33,7 +35,7 @@ func (s *FreezerGroup) Set(path string, r *configs.Resources) (Err error) {
// Freezing failed, and it is bad and dangerous // Freezing failed, and it is bad and dangerous
// to leave the cgroup in FROZEN or FREEZING // to leave the cgroup in FROZEN or FREEZING
// state, so (try to) thaw it back. // state, so (try to) thaw it back.
_ = cgroups.WriteFile(path, "freezer.state", string(configs.Thawed)) _ = fscommon.WriteFile(path, "freezer.state", string(configs.Thawed))
} }
}() }()
@ -66,11 +68,11 @@ func (s *FreezerGroup) Set(path string, r *configs.Resources) (Err error) {
// the chances to succeed in freezing // the chances to succeed in freezing
// in case new processes keep appearing // in case new processes keep appearing
// in the cgroup. // in the cgroup.
_ = cgroups.WriteFile(path, "freezer.state", string(configs.Thawed)) _ = fscommon.WriteFile(path, "freezer.state", string(configs.Thawed))
time.Sleep(10 * time.Millisecond) time.Sleep(10 * time.Millisecond)
} }
if err := cgroups.WriteFile(path, "freezer.state", string(configs.Frozen)); err != nil { if err := fscommon.WriteFile(path, "freezer.state", string(configs.Frozen)); err != nil {
return err return err
} }
@ -81,7 +83,7 @@ func (s *FreezerGroup) Set(path string, r *configs.Resources) (Err error) {
// system. // system.
time.Sleep(10 * time.Microsecond) time.Sleep(10 * time.Microsecond)
} }
state, err := cgroups.ReadFile(path, "freezer.state") state, err := fscommon.ReadFile(path, "freezer.state")
if err != nil { if err != nil {
return err return err
} }
@ -102,7 +104,7 @@ func (s *FreezerGroup) Set(path string, r *configs.Resources) (Err error) {
// Despite our best efforts, it got stuck in FREEZING. // Despite our best efforts, it got stuck in FREEZING.
return errors.New("unable to freeze") return errors.New("unable to freeze")
case configs.Thawed: case configs.Thawed:
return cgroups.WriteFile(path, "freezer.state", string(configs.Thawed)) return fscommon.WriteFile(path, "freezer.state", string(configs.Thawed))
case configs.Undefined: case configs.Undefined:
return nil return nil
default: default:
@ -116,7 +118,7 @@ func (s *FreezerGroup) GetStats(path string, stats *cgroups.Stats) error {
func (s *FreezerGroup) GetState(path string) (configs.FreezerState, error) { func (s *FreezerGroup) GetState(path string) (configs.FreezerState, error) {
for { for {
state, err := cgroups.ReadFile(path, "freezer.state") state, err := fscommon.ReadFile(path, "freezer.state")
if err != nil { if err != nil {
// If the kernel is too old, then we just treat the freezer as // If the kernel is too old, then we just treat the freezer as
// being in an "undefined" state. // being in an "undefined" state.

View File

@ -64,10 +64,8 @@ func NewManager(cg *configs.Cgroup, paths map[string]string, rootless bool) cgro
} }
// The absolute path to the root of the cgroup hierarchies. // The absolute path to the root of the cgroup hierarchies.
var ( var cgroupRootLock sync.Mutex
cgroupRootLock sync.Mutex var cgroupRoot string
cgroupRoot string
)
const defaultCgroupRoot = "/sys/fs/cgroup" const defaultCgroupRoot = "/sys/fs/cgroup"
@ -395,7 +393,7 @@ func join(path string, pid int) error {
if path == "" { if path == "" {
return nil return nil
} }
if err := os.MkdirAll(path, 0o755); err != nil { if err := os.MkdirAll(path, 0755); err != nil {
return err return err
} }
return cgroups.WriteCgroupProc(path, pid) return cgroups.WriteCgroupProc(path, pid)

View File

@ -11,7 +11,8 @@ import (
"github.com/opencontainers/runc/libcontainer/configs" "github.com/opencontainers/runc/libcontainer/configs"
) )
type HugetlbGroup struct{} type HugetlbGroup struct {
}
func (s *HugetlbGroup) Name() string { func (s *HugetlbGroup) Name() string {
return "hugetlb" return "hugetlb"
@ -23,7 +24,7 @@ func (s *HugetlbGroup) Apply(path string, d *cgroupData) error {
func (s *HugetlbGroup) Set(path string, r *configs.Resources) error { func (s *HugetlbGroup) Set(path string, r *configs.Resources) error {
for _, hugetlb := range r.HugetlbLimit { for _, hugetlb := range r.HugetlbLimit {
if err := cgroups.WriteFile(path, "hugetlb."+hugetlb.Pagesize+".limit_in_bytes", strconv.FormatUint(hugetlb.Limit, 10)); err != nil { if err := fscommon.WriteFile(path, "hugetlb."+hugetlb.Pagesize+".limit_in_bytes", strconv.FormatUint(hugetlb.Limit, 10)); err != nil {
return err return err
} }
} }

View File

@ -25,7 +25,8 @@ const (
cgroupMemoryMaxUsage = "memory.max_usage_in_bytes" cgroupMemoryMaxUsage = "memory.max_usage_in_bytes"
) )
type MemoryGroup struct{} type MemoryGroup struct {
}
func (s *MemoryGroup) Name() string { func (s *MemoryGroup) Name() string {
return "memory" return "memory"
@ -40,7 +41,7 @@ func setMemory(path string, val int64) error {
return nil return nil
} }
err := cgroups.WriteFile(path, cgroupMemoryLimit, strconv.FormatInt(val, 10)) err := fscommon.WriteFile(path, cgroupMemoryLimit, strconv.FormatInt(val, 10))
if !errors.Is(err, unix.EBUSY) { if !errors.Is(err, unix.EBUSY) {
return err return err
} }
@ -64,7 +65,7 @@ func setSwap(path string, val int64) error {
return nil return nil
} }
return cgroups.WriteFile(path, cgroupMemorySwapLimit, strconv.FormatInt(val, 10)) return fscommon.WriteFile(path, cgroupMemorySwapLimit, strconv.FormatInt(val, 10))
} }
func setMemoryAndSwap(path string, r *configs.Resources) error { func setMemoryAndSwap(path string, r *configs.Resources) error {
@ -117,20 +118,20 @@ func (s *MemoryGroup) Set(path string, r *configs.Resources) error {
// ignore KernelMemory and KernelMemoryTCP // ignore KernelMemory and KernelMemoryTCP
if r.MemoryReservation != 0 { if r.MemoryReservation != 0 {
if err := cgroups.WriteFile(path, "memory.soft_limit_in_bytes", strconv.FormatInt(r.MemoryReservation, 10)); err != nil { if err := fscommon.WriteFile(path, "memory.soft_limit_in_bytes", strconv.FormatInt(r.MemoryReservation, 10)); err != nil {
return err return err
} }
} }
if r.OomKillDisable { if r.OomKillDisable {
if err := cgroups.WriteFile(path, "memory.oom_control", "1"); err != nil { if err := fscommon.WriteFile(path, "memory.oom_control", "1"); err != nil {
return err return err
} }
} }
if r.MemorySwappiness == nil || int64(*r.MemorySwappiness) == -1 { if r.MemorySwappiness == nil || int64(*r.MemorySwappiness) == -1 {
return nil return nil
} else if *r.MemorySwappiness <= 100 { } else if *r.MemorySwappiness <= 100 {
if err := cgroups.WriteFile(path, "memory.swappiness", strconv.FormatUint(*r.MemorySwappiness, 10)); err != nil { if err := fscommon.WriteFile(path, "memory.swappiness", strconv.FormatUint(*r.MemorySwappiness, 10)); err != nil {
return err return err
} }
} else { } else {
@ -142,7 +143,7 @@ func (s *MemoryGroup) Set(path string, r *configs.Resources) error {
func (s *MemoryGroup) GetStats(path string, stats *cgroups.Stats) error { func (s *MemoryGroup) GetStats(path string, stats *cgroups.Stats) error {
// Set stats from memory.stat. // Set stats from memory.stat.
statsFile, err := cgroups.OpenFile(path, "memory.stat", os.O_RDONLY) statsFile, err := fscommon.OpenFile(path, "memory.stat", os.O_RDONLY)
if err != nil { if err != nil {
if os.IsNotExist(err) { if os.IsNotExist(err) {
return nil return nil
@ -199,6 +200,14 @@ func (s *MemoryGroup) GetStats(path string, stats *cgroups.Stats) error {
return nil return nil
} }
func memoryAssigned(cgroup *configs.Cgroup) bool {
return cgroup.Resources.Memory != 0 ||
cgroup.Resources.MemoryReservation != 0 ||
cgroup.Resources.MemorySwap > 0 ||
cgroup.Resources.OomKillDisable ||
(cgroup.Resources.MemorySwappiness != nil && int64(*cgroup.Resources.MemorySwappiness) != -1)
}
func getMemoryData(path, name string) (cgroups.MemoryData, error) { func getMemoryData(path, name string) (cgroups.MemoryData, error) {
memoryData := cgroups.MemoryData{} memoryData := cgroups.MemoryData{}
@ -249,13 +258,12 @@ func getPageUsageByNUMA(cgroupPath string) (cgroups.PageUsageByNUMA, error) {
) )
stats := cgroups.PageUsageByNUMA{} stats := cgroups.PageUsageByNUMA{}
file, err := cgroups.OpenFile(cgroupPath, filename, os.O_RDONLY) file, err := fscommon.OpenFile(cgroupPath, filename, os.O_RDONLY)
if os.IsNotExist(err) { if os.IsNotExist(err) {
return stats, nil return stats, nil
} else if err != nil { } else if err != nil {
return stats, err return stats, err
} }
defer file.Close()
// File format is documented in linux/Documentation/cgroup-v1/memory.txt // File format is documented in linux/Documentation/cgroup-v1/memory.txt
// and it looks like this: // and it looks like this:

View File

@ -6,10 +6,12 @@ import (
"strconv" "strconv"
"github.com/opencontainers/runc/libcontainer/cgroups" "github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
"github.com/opencontainers/runc/libcontainer/configs" "github.com/opencontainers/runc/libcontainer/configs"
) )
type NetClsGroup struct{} type NetClsGroup struct {
}
func (s *NetClsGroup) Name() string { func (s *NetClsGroup) Name() string {
return "net_cls" return "net_cls"
@ -21,7 +23,7 @@ func (s *NetClsGroup) Apply(path string, d *cgroupData) error {
func (s *NetClsGroup) Set(path string, r *configs.Resources) error { func (s *NetClsGroup) Set(path string, r *configs.Resources) error {
if r.NetClsClassid != 0 { if r.NetClsClassid != 0 {
if err := cgroups.WriteFile(path, "net_cls.classid", strconv.FormatUint(uint64(r.NetClsClassid), 10)); err != nil { if err := fscommon.WriteFile(path, "net_cls.classid", strconv.FormatUint(uint64(r.NetClsClassid), 10)); err != nil {
return err return err
} }
} }

View File

@ -4,10 +4,12 @@ package fs
import ( import (
"github.com/opencontainers/runc/libcontainer/cgroups" "github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
"github.com/opencontainers/runc/libcontainer/configs" "github.com/opencontainers/runc/libcontainer/configs"
) )
type NetPrioGroup struct{} type NetPrioGroup struct {
}
func (s *NetPrioGroup) Name() string { func (s *NetPrioGroup) Name() string {
return "net_prio" return "net_prio"
@ -19,7 +21,7 @@ func (s *NetPrioGroup) Apply(path string, d *cgroupData) error {
func (s *NetPrioGroup) Set(path string, r *configs.Resources) error { func (s *NetPrioGroup) Set(path string, r *configs.Resources) error {
for _, prioMap := range r.NetPrioIfpriomap { for _, prioMap := range r.NetPrioIfpriomap {
if err := cgroups.WriteFile(path, "net_prio.ifpriomap", prioMap.CgroupString()); err != nil { if err := fscommon.WriteFile(path, "net_prio.ifpriomap", prioMap.CgroupString()); err != nil {
return err return err
} }
} }

View File

@ -7,7 +7,8 @@ import (
"github.com/opencontainers/runc/libcontainer/configs" "github.com/opencontainers/runc/libcontainer/configs"
) )
type PerfEventGroup struct{} type PerfEventGroup struct {
}
func (s *PerfEventGroup) Name() string { func (s *PerfEventGroup) Name() string {
return "perf_event" return "perf_event"

View File

@ -12,7 +12,8 @@ import (
"github.com/opencontainers/runc/libcontainer/configs" "github.com/opencontainers/runc/libcontainer/configs"
) )
type PidsGroup struct{} type PidsGroup struct {
}
func (s *PidsGroup) Name() string { func (s *PidsGroup) Name() string {
return "pids" return "pids"
@ -31,7 +32,7 @@ func (s *PidsGroup) Set(path string, r *configs.Resources) error {
limit = strconv.FormatInt(r.PidsLimit, 10) limit = strconv.FormatInt(r.PidsLimit, 10)
} }
if err := cgroups.WriteFile(path, "pids.max", limit); err != nil { if err := fscommon.WriteFile(path, "pids.max", limit); err != nil {
return err return err
} }
} }

View File

@ -23,7 +23,7 @@ func setCpu(dirPath string, r *configs.Resources) error {
// NOTE: .CpuShares is not used here. Conversion is the caller's responsibility. // NOTE: .CpuShares is not used here. Conversion is the caller's responsibility.
if r.CpuWeight != 0 { if r.CpuWeight != 0 {
if err := cgroups.WriteFile(dirPath, "cpu.weight", strconv.FormatUint(r.CpuWeight, 10)); err != nil { if err := fscommon.WriteFile(dirPath, "cpu.weight", strconv.FormatUint(r.CpuWeight, 10)); err != nil {
return err return err
} }
} }
@ -40,16 +40,15 @@ func setCpu(dirPath string, r *configs.Resources) error {
period = 100000 period = 100000
} }
str += " " + strconv.FormatUint(period, 10) str += " " + strconv.FormatUint(period, 10)
if err := cgroups.WriteFile(dirPath, "cpu.max", str); err != nil { if err := fscommon.WriteFile(dirPath, "cpu.max", str); err != nil {
return err return err
} }
} }
return nil return nil
} }
func statCpu(dirPath string, stats *cgroups.Stats) error { func statCpu(dirPath string, stats *cgroups.Stats) error {
f, err := cgroups.OpenFile(dirPath, "cpu.stat", os.O_RDONLY) f, err := fscommon.OpenFile(dirPath, "cpu.stat", os.O_RDONLY)
if err != nil { if err != nil {
return err return err
} }

View File

@ -3,7 +3,7 @@
package fs2 package fs2
import ( import (
"github.com/opencontainers/runc/libcontainer/cgroups" "github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
"github.com/opencontainers/runc/libcontainer/configs" "github.com/opencontainers/runc/libcontainer/configs"
) )
@ -17,12 +17,12 @@ func setCpuset(dirPath string, r *configs.Resources) error {
} }
if r.CpusetCpus != "" { if r.CpusetCpus != "" {
if err := cgroups.WriteFile(dirPath, "cpuset.cpus", r.CpusetCpus); err != nil { if err := fscommon.WriteFile(dirPath, "cpuset.cpus", r.CpusetCpus); err != nil {
return err return err
} }
} }
if r.CpusetMems != "" { if r.CpusetMems != "" {
if err := cgroups.WriteFile(dirPath, "cpuset.mems", r.CpusetMems); err != nil { if err := fscommon.WriteFile(dirPath, "cpuset.mems", r.CpusetMems); err != nil {
return err return err
} }
} }

View File

@ -6,12 +6,12 @@ import (
"path/filepath" "path/filepath"
"strings" "strings"
"github.com/opencontainers/runc/libcontainer/cgroups" "github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
"github.com/opencontainers/runc/libcontainer/configs" "github.com/opencontainers/runc/libcontainer/configs"
) )
func supportedControllers() (string, error) { func supportedControllers() (string, error) {
return cgroups.ReadFile(UnifiedMountpoint, "/cgroup.controllers") return fscommon.ReadFile(UnifiedMountpoint, "/cgroup.controllers")
} }
// needAnyControllers returns whether we enable some supported controllers or not, // needAnyControllers returns whether we enable some supported controllers or not,
@ -92,7 +92,7 @@ func CreateCgroupPath(path string, c *configs.Cgroup) (Err error) {
for i, e := range elements { for i, e := range elements {
current = filepath.Join(current, e) current = filepath.Join(current, e)
if i > 0 { if i > 0 {
if err := os.Mkdir(current, 0o755); err != nil { if err := os.Mkdir(current, 0755); err != nil {
if !os.IsExist(err) { if !os.IsExist(err) {
return err return err
} }
@ -105,7 +105,7 @@ func CreateCgroupPath(path string, c *configs.Cgroup) (Err error) {
} }
}() }()
} }
cgType, _ := cgroups.ReadFile(current, cgTypeFile) cgType, _ := fscommon.ReadFile(current, cgTypeFile)
cgType = strings.TrimSpace(cgType) cgType = strings.TrimSpace(cgType)
switch cgType { switch cgType {
// If the cgroup is in an invalid mode (usually this means there's an internal // If the cgroup is in an invalid mode (usually this means there's an internal
@ -122,7 +122,7 @@ func CreateCgroupPath(path string, c *configs.Cgroup) (Err error) {
// since that means we're a properly delegated cgroup subtree) but in // since that means we're a properly delegated cgroup subtree) but in
// this case there's not much we can do and it's better than giving an // this case there's not much we can do and it's better than giving an
// error. // error.
_ = cgroups.WriteFile(current, cgTypeFile, "threaded") _ = fscommon.WriteFile(current, cgTypeFile, "threaded")
} }
// If the cgroup is in (threaded) or (domain threaded) mode, we can only use thread-aware controllers // If the cgroup is in (threaded) or (domain threaded) mode, we can only use thread-aware controllers
// (and you cannot usually take a cgroup out of threaded mode). // (and you cannot usually take a cgroup out of threaded mode).
@ -136,11 +136,11 @@ func CreateCgroupPath(path string, c *configs.Cgroup) (Err error) {
} }
// enable all supported controllers // enable all supported controllers
if i < len(elements)-1 { if i < len(elements)-1 {
if err := cgroups.WriteFile(current, cgStCtlFile, res); err != nil { if err := fscommon.WriteFile(current, cgStCtlFile, res); err != nil {
// try write one by one // try write one by one
allCtrs := strings.Split(res, " ") allCtrs := strings.Split(res, " ")
for _, ctr := range allCtrs { for _, ctr := range allCtrs {
_ = cgroups.WriteFile(current, cgStCtlFile, ctr) _ = fscommon.WriteFile(current, cgStCtlFile, ctr)
} }
} }
// Some controllers might not be enabled when rootless or containerized, // Some controllers might not be enabled when rootless or containerized,

View File

@ -82,7 +82,9 @@ func parseCgroupFile(path string) (string, error) {
} }
func parseCgroupFromReader(r io.Reader) (string, error) { func parseCgroupFromReader(r io.Reader) (string, error) {
s := bufio.NewScanner(r) var (
s = bufio.NewScanner(r)
)
for s.Scan() { for s.Scan() {
var ( var (
text = s.Text() text = s.Text()

View File

@ -58,15 +58,29 @@ func setDevices(dirPath string, r *configs.Resources) error {
if r.SkipDevices { if r.SkipDevices {
return nil return nil
} }
// XXX: This is currently a white-list (but all callers pass a blacklist of
// devices). This is bad for a whole variety of reasons, but will need
// to be fixed with co-ordinated effort with downstreams.
insts, license, err := devicefilter.DeviceFilter(r.Devices) insts, license, err := devicefilter.DeviceFilter(r.Devices)
if err != nil { if err != nil {
return err return err
} }
dirFD, err := unix.Open(dirPath, unix.O_DIRECTORY|unix.O_RDONLY, 0o600) dirFD, err := unix.Open(dirPath, unix.O_DIRECTORY|unix.O_RDONLY, 0600)
if err != nil { if err != nil {
return errors.Errorf("cannot get dir FD for %s", dirPath) return errors.Errorf("cannot get dir FD for %s", dirPath)
} }
defer unix.Close(dirFD) defer unix.Close(dirFD)
// XXX: This code is currently incorrect when it comes to updating an
// existing cgroup with new rules (new rulesets are just appended to
// the program list because this uses BPF_F_ALLOW_MULTI). If we didn't
// use BPF_F_ALLOW_MULTI we could actually atomically swap the
// programs.
//
// The real issue is that BPF_F_ALLOW_MULTI makes it hard to have a
// race-free blacklist because it acts as a whitelist by default, and
// having a deny-everything program cannot be overridden by other
// programs. You could temporarily insert a deny-everything program
// but that would result in spurrious failures during updates.
if _, err := ebpf.LoadAttachCgroupDeviceFilter(insts, license, dirFD); err != nil { if _, err := ebpf.LoadAttachCgroupDeviceFilter(insts, license, dirFD); err != nil {
if !canSkipEBPFError(r) { if !canSkipEBPFError(r) {
return err return err

View File

@ -3,20 +3,27 @@
package fs2 package fs2
import ( import (
"bufio"
stdErrors "errors" stdErrors "errors"
"fmt"
"os" "os"
"strings" "strings"
"time"
"github.com/opencontainers/runc/libcontainer/cgroups" "github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
"github.com/opencontainers/runc/libcontainer/configs" "github.com/opencontainers/runc/libcontainer/configs"
"github.com/pkg/errors" "github.com/pkg/errors"
"golang.org/x/sys/unix" "golang.org/x/sys/unix"
) )
func setFreezer(dirPath string, state configs.FreezerState) error { func setFreezer(dirPath string, state configs.FreezerState) error {
if err := supportsFreezer(dirPath); err != nil {
// We can ignore this request as long as the user didn't ask us to
// freeze the container (since without the freezer cgroup, that's a
// no-op).
if state == configs.Undefined || state == configs.Thawed {
return nil
}
return errors.Wrap(err, "freezer not supported")
}
var stateStr string var stateStr string
switch state { switch state {
case configs.Undefined: case configs.Undefined:
@ -29,23 +36,11 @@ func setFreezer(dirPath string, state configs.FreezerState) error {
return errors.Errorf("invalid freezer state %q requested", state) return errors.Errorf("invalid freezer state %q requested", state)
} }
fd, err := cgroups.OpenFile(dirPath, "cgroup.freeze", unix.O_RDWR) if err := fscommon.WriteFile(dirPath, "cgroup.freeze", stateStr); err != nil {
if err != nil {
// We can ignore this request as long as the user didn't ask us to
// freeze the container (since without the freezer cgroup, that's a
// no-op).
if state != configs.Frozen {
return nil
}
return errors.Wrap(err, "freezer not supported")
}
defer fd.Close()
if _, err := fd.WriteString(stateStr); err != nil {
return err return err
} }
// Confirm that the cgroup did actually change states. // Confirm that the cgroup did actually change states.
if actualState, err := readFreezer(dirPath, fd); err != nil { if actualState, err := getFreezer(dirPath); err != nil {
return err return err
} else if actualState != state { } else if actualState != state {
return errors.Errorf(`expected "cgroup.freeze" to be in state %q but was in %q`, state, actualState) return errors.Errorf(`expected "cgroup.freeze" to be in state %q but was in %q`, state, actualState)
@ -53,8 +48,13 @@ func setFreezer(dirPath string, state configs.FreezerState) error {
return nil return nil
} }
func supportsFreezer(dirPath string) error {
_, err := fscommon.ReadFile(dirPath, "cgroup.freeze")
return err
}
func getFreezer(dirPath string) (configs.FreezerState, error) { func getFreezer(dirPath string) (configs.FreezerState, error) {
fd, err := cgroups.OpenFile(dirPath, "cgroup.freeze", unix.O_RDONLY) state, err := fscommon.ReadFile(dirPath, "cgroup.freeze")
if err != nil { if err != nil {
// If the kernel is too old, then we just treat the freezer as being in // If the kernel is too old, then we just treat the freezer as being in
// an "undefined" state. // an "undefined" state.
@ -63,67 +63,12 @@ func getFreezer(dirPath string) (configs.FreezerState, error) {
} }
return configs.Undefined, err return configs.Undefined, err
} }
defer fd.Close() switch strings.TrimSpace(state) {
case "0":
return readFreezer(dirPath, fd)
}
func readFreezer(dirPath string, fd *os.File) (configs.FreezerState, error) {
if _, err := fd.Seek(0, 0); err != nil {
return configs.Undefined, err
}
state := make([]byte, 2)
if _, err := fd.Read(state); err != nil {
return configs.Undefined, err
}
switch string(state) {
case "0\n":
return configs.Thawed, nil return configs.Thawed, nil
case "1\n": case "1":
return waitFrozen(dirPath) return configs.Frozen, nil
default: default:
return configs.Undefined, errors.Errorf(`unknown "cgroup.freeze" state: %q`, state) return configs.Undefined, errors.Errorf(`unknown "cgroup.freeze" state: %q`, state)
} }
} }
// waitFrozen polls cgroup.events until it sees "frozen 1" in it.
func waitFrozen(dirPath string) (configs.FreezerState, error) {
fd, err := cgroups.OpenFile(dirPath, "cgroup.events", unix.O_RDONLY)
if err != nil {
return configs.Undefined, err
}
defer fd.Close()
// XXX: Simple wait/read/retry is used here. An implementation
// based on poll(2) or inotify(7) is possible, but it makes the code
// much more complicated. Maybe address this later.
const (
// Perform maxIter with waitTime in between iterations.
waitTime = 10 * time.Millisecond
maxIter = 1000
)
scanner := bufio.NewScanner(fd)
for i := 0; scanner.Scan(); {
if i == maxIter {
return configs.Undefined, fmt.Errorf("timeout of %s reached waiting for the cgroup to freeze", waitTime*maxIter)
}
line := scanner.Text()
val := strings.TrimPrefix(line, "frozen ")
if val != line { // got prefix
if val[0] == '1' {
return configs.Frozen, nil
}
i++
// wait, then re-read
time.Sleep(waitTime)
_, err := fd.Seek(0, 0)
if err != nil {
return configs.Undefined, err
}
}
}
// Should only reach here either on read error,
// or if the file does not contain "frozen " line.
return configs.Undefined, scanner.Err()
}

View File

@ -51,7 +51,7 @@ func (m *manager) getControllers() error {
return nil return nil
} }
data, err := cgroups.ReadFile(m.dirPath, "cgroup.controllers") data, err := fscommon.ReadFile(m.dirPath, "cgroup.controllers")
if err != nil { if err != nil {
if m.rootless && m.config.Path == "" { if m.rootless && m.config.Path == "" {
return nil return nil
@ -98,7 +98,9 @@ func (m *manager) GetAllPids() ([]int, error) {
} }
func (m *manager) GetStats() (*cgroups.Stats, error) { func (m *manager) GetStats() (*cgroups.Stats, error) {
var errs []error var (
errs []error
)
st := cgroups.NewStats() st := cgroups.NewStats()
@ -197,7 +199,7 @@ func (m *manager) setUnified(res map[string]string) error {
if strings.Contains(k, "/") { if strings.Contains(k, "/") {
return fmt.Errorf("unified resource %q must be a file name (no slashes)", k) return fmt.Errorf("unified resource %q must be a file name (no slashes)", k)
} }
if err := cgroups.WriteFile(m.dirPath, k, v); err != nil { if err := fscommon.WriteFile(m.dirPath, k, v); err != nil {
errC := errors.Cause(err) errC := errors.Cause(err)
// Check for both EPERM and ENOENT since O_CREAT is used by WriteFile. // Check for both EPERM and ENOENT since O_CREAT is used by WriteFile.
if errors.Is(errC, os.ErrPermission) || errors.Is(errC, os.ErrNotExist) { if errors.Is(errC, os.ErrPermission) || errors.Is(errC, os.ErrNotExist) {

View File

@ -21,7 +21,7 @@ func setHugeTlb(dirPath string, r *configs.Resources) error {
return nil return nil
} }
for _, hugetlb := range r.HugetlbLimit { for _, hugetlb := range r.HugetlbLimit {
if err := cgroups.WriteFile(dirPath, "hugetlb."+hugetlb.Pagesize+".max", strconv.FormatUint(hugetlb.Limit, 10)); err != nil { if err := fscommon.WriteFile(dirPath, "hugetlb."+hugetlb.Pagesize+".max", strconv.FormatUint(hugetlb.Limit, 10)); err != nil {
return err return err
} }
} }

View File

@ -4,95 +4,60 @@ package fs2
import ( import (
"bufio" "bufio"
"bytes"
"fmt"
"os" "os"
"strconv" "strconv"
"strings" "strings"
"github.com/sirupsen/logrus"
"github.com/opencontainers/runc/libcontainer/cgroups" "github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
"github.com/opencontainers/runc/libcontainer/configs" "github.com/opencontainers/runc/libcontainer/configs"
) )
func isIoSet(r *configs.Resources) bool { func isIoSet(r *configs.Resources) bool {
return r.BlkioWeight != 0 || return r.BlkioWeight != 0 ||
len(r.BlkioWeightDevice) > 0 ||
len(r.BlkioThrottleReadBpsDevice) > 0 || len(r.BlkioThrottleReadBpsDevice) > 0 ||
len(r.BlkioThrottleWriteBpsDevice) > 0 || len(r.BlkioThrottleWriteBpsDevice) > 0 ||
len(r.BlkioThrottleReadIOPSDevice) > 0 || len(r.BlkioThrottleReadIOPSDevice) > 0 ||
len(r.BlkioThrottleWriteIOPSDevice) > 0 len(r.BlkioThrottleWriteIOPSDevice) > 0
} }
// bfqDeviceWeightSupported checks for per-device BFQ weight support (added
// in kernel v5.4, commit 795fe54c2a8) by reading from "io.bfq.weight".
func bfqDeviceWeightSupported(bfq *os.File) bool {
if bfq == nil {
return false
}
_, _ = bfq.Seek(0, 0)
buf := make([]byte, 32)
_, _ = bfq.Read(buf)
// If only a single number (default weight) if read back, we have older kernel.
_, err := strconv.ParseInt(string(bytes.TrimSpace(buf)), 10, 64)
return err != nil
}
func setIo(dirPath string, r *configs.Resources) error { func setIo(dirPath string, r *configs.Resources) error {
if !isIoSet(r) { if !isIoSet(r) {
return nil return nil
} }
// If BFQ IO scheduler is available, use it.
var bfq *os.File
if r.BlkioWeight != 0 || len(r.BlkioWeightDevice) > 0 {
var err error
bfq, err = cgroups.OpenFile(dirPath, "io.bfq.weight", os.O_RDWR)
if err == nil {
defer bfq.Close()
} else if !os.IsNotExist(err) {
return err
}
}
if r.BlkioWeight != 0 { if r.BlkioWeight != 0 {
if bfq != nil { // Use BFQ. filename := "io.bfq.weight"
if _, err := bfq.WriteString(strconv.FormatUint(uint64(r.BlkioWeight), 10)); err != nil { if err := fscommon.WriteFile(dirPath, filename,
strconv.FormatUint(uint64(r.BlkioWeight), 10)); err != nil {
// if io.bfq.weight does not exist, then bfq module is not loaded.
// Fallback to use io.weight with a conversion scheme
if !os.IsNotExist(err) {
return err return err
} }
} else {
// Fallback to io.weight with a conversion scheme.
v := cgroups.ConvertBlkIOToIOWeightValue(r.BlkioWeight) v := cgroups.ConvertBlkIOToIOWeightValue(r.BlkioWeight)
if err := cgroups.WriteFile(dirPath, "io.weight", strconv.FormatUint(v, 10)); err != nil { if err := fscommon.WriteFile(dirPath, "io.weight", strconv.FormatUint(v, 10)); err != nil {
return err return err
} }
} }
} }
if bfqDeviceWeightSupported(bfq) {
for _, wd := range r.BlkioWeightDevice {
if _, err := bfq.WriteString(wd.WeightString() + "\n"); err != nil {
return fmt.Errorf("setting device weight %q: %w", wd.WeightString(), err)
}
}
}
for _, td := range r.BlkioThrottleReadBpsDevice { for _, td := range r.BlkioThrottleReadBpsDevice {
if err := cgroups.WriteFile(dirPath, "io.max", td.StringName("rbps")); err != nil { if err := fscommon.WriteFile(dirPath, "io.max", td.StringName("rbps")); err != nil {
return err return err
} }
} }
for _, td := range r.BlkioThrottleWriteBpsDevice { for _, td := range r.BlkioThrottleWriteBpsDevice {
if err := cgroups.WriteFile(dirPath, "io.max", td.StringName("wbps")); err != nil { if err := fscommon.WriteFile(dirPath, "io.max", td.StringName("wbps")); err != nil {
return err return err
} }
} }
for _, td := range r.BlkioThrottleReadIOPSDevice { for _, td := range r.BlkioThrottleReadIOPSDevice {
if err := cgroups.WriteFile(dirPath, "io.max", td.StringName("riops")); err != nil { if err := fscommon.WriteFile(dirPath, "io.max", td.StringName("riops")); err != nil {
return err return err
} }
} }
for _, td := range r.BlkioThrottleWriteIOPSDevice { for _, td := range r.BlkioThrottleWriteIOPSDevice {
if err := cgroups.WriteFile(dirPath, "io.max", td.StringName("wiops")); err != nil { if err := fscommon.WriteFile(dirPath, "io.max", td.StringName("wiops")); err != nil {
return err return err
} }
} }
@ -102,7 +67,7 @@ func setIo(dirPath string, r *configs.Resources) error {
func readCgroup2MapFile(dirPath string, name string) (map[string][]string, error) { func readCgroup2MapFile(dirPath string, name string) (map[string][]string, error) {
ret := map[string][]string{} ret := map[string][]string{}
f, err := cgroups.OpenFile(dirPath, name, os.O_RDONLY) f, err := fscommon.OpenFile(dirPath, name, os.O_RDONLY)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -123,22 +88,22 @@ func readCgroup2MapFile(dirPath string, name string) (map[string][]string, error
} }
func statIo(dirPath string, stats *cgroups.Stats) error { func statIo(dirPath string, stats *cgroups.Stats) error {
// more details on the io.stat file format: https://www.kernel.org/doc/Documentation/cgroup-v2.txt
var ioServiceBytesRecursive []cgroups.BlkioStatEntry
values, err := readCgroup2MapFile(dirPath, "io.stat") values, err := readCgroup2MapFile(dirPath, "io.stat")
if err != nil { if err != nil {
return err return err
} }
// more details on the io.stat file format: https://www.kernel.org/doc/Documentation/cgroup-v2.txt
var parsedStats cgroups.BlkioStats
for k, v := range values { for k, v := range values {
d := strings.Split(k, ":") d := strings.Split(k, ":")
if len(d) != 2 { if len(d) != 2 {
continue continue
} }
major, err := strconv.ParseUint(d[0], 10, 64) major, err := strconv.ParseUint(d[0], 10, 0)
if err != nil { if err != nil {
return err return err
} }
minor, err := strconv.ParseUint(d[1], 10, 64) minor, err := strconv.ParseUint(d[1], 10, 0)
if err != nil { if err != nil {
return err return err
} }
@ -150,32 +115,15 @@ func statIo(dirPath string, stats *cgroups.Stats) error {
} }
op := d[0] op := d[0]
// Map to the cgroupv1 naming and layout (in separate tables). // Accommodate the cgroup v1 naming
var targetTable *[]cgroups.BlkioStatEntry
switch op { switch op {
// Equivalent to cgroupv1's blkio.io_service_bytes.
case "rbytes": case "rbytes":
op = "Read" op = "read"
targetTable = &parsedStats.IoServiceBytesRecursive
case "wbytes": case "wbytes":
op = "Write" op = "write"
targetTable = &parsedStats.IoServiceBytesRecursive
// Equivalent to cgroupv1's blkio.io_serviced.
case "rios":
op = "Read"
targetTable = &parsedStats.IoServicedRecursive
case "wios":
op = "Write"
targetTable = &parsedStats.IoServicedRecursive
default:
// Skip over entries we cannot map to cgroupv1 stats for now.
// In the future we should expand the stats struct to include
// them.
logrus.Debugf("cgroupv2 io stats: skipping over unmappable %s entry", item)
continue
} }
value, err := strconv.ParseUint(d[1], 10, 64) value, err := strconv.ParseUint(d[1], 10, 0)
if err != nil { if err != nil {
return err return err
} }
@ -186,9 +134,9 @@ func statIo(dirPath string, stats *cgroups.Stats) error {
Minor: minor, Minor: minor,
Value: value, Value: value,
} }
*targetTable = append(*targetTable, entry) ioServiceBytesRecursive = append(ioServiceBytesRecursive, entry)
} }
} }
stats.BlkioStats = parsedStats stats.BlkioStats = cgroups.BlkioStats{IoServiceBytesRecursive: ioServiceBytesRecursive}
return nil return nil
} }

View File

@ -52,13 +52,13 @@ func setMemory(dirPath string, r *configs.Resources) error {
} }
// never write empty string to `memory.swap.max`, it means set to 0. // never write empty string to `memory.swap.max`, it means set to 0.
if swapStr != "" { if swapStr != "" {
if err := cgroups.WriteFile(dirPath, "memory.swap.max", swapStr); err != nil { if err := fscommon.WriteFile(dirPath, "memory.swap.max", swapStr); err != nil {
return err return err
} }
} }
if val := numToStr(r.Memory); val != "" { if val := numToStr(r.Memory); val != "" {
if err := cgroups.WriteFile(dirPath, "memory.max", val); err != nil { if err := fscommon.WriteFile(dirPath, "memory.max", val); err != nil {
return err return err
} }
} }
@ -66,7 +66,7 @@ func setMemory(dirPath string, r *configs.Resources) error {
// cgroup.Resources.KernelMemory is ignored // cgroup.Resources.KernelMemory is ignored
if val := numToStr(r.MemoryReservation); val != "" { if val := numToStr(r.MemoryReservation); val != "" {
if err := cgroups.WriteFile(dirPath, "memory.low", val); err != nil { if err := fscommon.WriteFile(dirPath, "memory.low", val); err != nil {
return err return err
} }
} }
@ -76,7 +76,7 @@ func setMemory(dirPath string, r *configs.Resources) error {
func statMemory(dirPath string, stats *cgroups.Stats) error { func statMemory(dirPath string, stats *cgroups.Stats) error {
// Set stats from memory.stat. // Set stats from memory.stat.
statsFile, err := cgroups.OpenFile(dirPath, "memory.stat", os.O_RDONLY) statsFile, err := fscommon.OpenFile(dirPath, "memory.stat", os.O_RDONLY)
if err != nil { if err != nil {
return err return err
} }

View File

@ -23,7 +23,7 @@ func setPids(dirPath string, r *configs.Resources) error {
return nil return nil
} }
if val := numToStr(r.PidsLimit); val != "" { if val := numToStr(r.PidsLimit); val != "" {
if err := cgroups.WriteFile(dirPath, "pids.max", val); err != nil { if err := fscommon.WriteFile(dirPath, "pids.max", val); err != nil {
return err return err
} }
} }
@ -34,9 +34,9 @@ func setPids(dirPath string, r *configs.Resources) error {
func statPidsFromCgroupProcs(dirPath string, stats *cgroups.Stats) error { func statPidsFromCgroupProcs(dirPath string, stats *cgroups.Stats) error {
// if the controller is not enabled, let's read PIDS from cgroups.procs // if the controller is not enabled, let's read PIDS from cgroups.procs
// (or threads if cgroup.threads is enabled) // (or threads if cgroup.threads is enabled)
contents, err := cgroups.ReadFile(dirPath, "cgroup.procs") contents, err := fscommon.ReadFile(dirPath, "cgroup.procs")
if errors.Is(err, unix.ENOTSUP) { if errors.Is(err, unix.ENOTSUP) {
contents, err = cgroups.ReadFile(dirPath, "cgroup.threads") contents, err = fscommon.ReadFile(dirPath, "cgroup.threads")
} }
if err != nil { if err != nil {
return err return err

View File

@ -0,0 +1,51 @@
// +build linux
package fscommon
import (
"bytes"
"os"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
"golang.org/x/sys/unix"
)
// WriteFile writes data to a cgroup file in dir.
// It is supposed to be used for cgroup files only.
func WriteFile(dir, file, data string) error {
fd, err := OpenFile(dir, file, unix.O_WRONLY)
if err != nil {
return err
}
defer fd.Close()
if err := retryingWriteFile(fd, data); err != nil {
return errors.Wrapf(err, "failed to write %q", data)
}
return nil
}
// ReadFile reads data from a cgroup file in dir.
// It is supposed to be used for cgroup files only.
func ReadFile(dir, file string) (string, error) {
fd, err := OpenFile(dir, file, unix.O_RDONLY)
if err != nil {
return "", err
}
defer fd.Close()
var buf bytes.Buffer
_, err = buf.ReadFrom(fd)
return buf.String(), err
}
func retryingWriteFile(fd *os.File, data string) error {
for {
_, err := fd.Write([]byte(data))
if errors.Is(err, unix.EINTR) {
logrus.Infof("interrupted while writing %s to %s", data, fd.Name())
continue
}
return err
}
}

View File

@ -1,7 +1,6 @@
package cgroups package fscommon
import ( import (
"bytes"
"os" "os"
"strings" "strings"
"sync" "sync"
@ -11,54 +10,6 @@ import (
"golang.org/x/sys/unix" "golang.org/x/sys/unix"
) )
// OpenFile opens a cgroup file in a given dir with given flags.
// It is supposed to be used for cgroup files only.
func OpenFile(dir, file string, flags int) (*os.File, error) {
if dir == "" {
return nil, errors.Errorf("no directory specified for %s", file)
}
return openFile(dir, file, flags)
}
// ReadFile reads data from a cgroup file in dir.
// It is supposed to be used for cgroup files only.
func ReadFile(dir, file string) (string, error) {
fd, err := OpenFile(dir, file, unix.O_RDONLY)
if err != nil {
return "", err
}
defer fd.Close()
var buf bytes.Buffer
_, err = buf.ReadFrom(fd)
return buf.String(), err
}
// WriteFile writes data to a cgroup file in dir.
// It is supposed to be used for cgroup files only.
func WriteFile(dir, file, data string) error {
fd, err := OpenFile(dir, file, unix.O_WRONLY)
if err != nil {
return err
}
defer fd.Close()
if err := retryingWriteFile(fd, data); err != nil {
return errors.Wrapf(err, "failed to write %q", data)
}
return nil
}
func retryingWriteFile(fd *os.File, data string) error {
for {
_, err := fd.Write([]byte(data))
if errors.Is(err, unix.EINTR) {
logrus.Infof("interrupted while writing %s to %s", data, fd.Name())
continue
}
return err
}
}
const ( const (
cgroupfsDir = "/sys/fs/cgroup" cgroupfsDir = "/sys/fs/cgroup"
cgroupfsPrefix = cgroupfsDir + "/" cgroupfsPrefix = cgroupfsDir + "/"
@ -77,8 +28,7 @@ var (
func prepareOpenat2() error { func prepareOpenat2() error {
prepOnce.Do(func() { prepOnce.Do(func() {
fd, err := unix.Openat2(-1, cgroupfsDir, &unix.OpenHow{ fd, err := unix.Openat2(-1, cgroupfsDir, &unix.OpenHow{
Flags: unix.O_DIRECTORY | unix.O_PATH, Flags: unix.O_DIRECTORY | unix.O_PATH})
})
if err != nil { if err != nil {
prepErr = &os.PathError{Op: "openat2", Path: cgroupfsDir, Err: err} prepErr = &os.PathError{Op: "openat2", Path: cgroupfsDir, Err: err}
if err != unix.ENOSYS { if err != unix.ENOSYS {
@ -102,6 +52,7 @@ func prepareOpenat2() error {
// cgroupv2 has a single mountpoint and no "cpu,cpuacct" symlinks // cgroupv2 has a single mountpoint and no "cpu,cpuacct" symlinks
resolveFlags |= unix.RESOLVE_NO_XDEV | unix.RESOLVE_NO_SYMLINKS resolveFlags |= unix.RESOLVE_NO_XDEV | unix.RESOLVE_NO_SYMLINKS
} }
}) })
return prepErr return prepErr
@ -109,7 +60,10 @@ func prepareOpenat2() error {
// OpenFile opens a cgroup file in a given dir with given flags. // OpenFile opens a cgroup file in a given dir with given flags.
// It is supposed to be used for cgroup files only. // It is supposed to be used for cgroup files only.
func openFile(dir, file string, flags int) (*os.File, error) { func OpenFile(dir, file string, flags int) (*os.File, error) {
if dir == "" {
return nil, errors.Errorf("no directory specified for %s", file)
}
mode := os.FileMode(0) mode := os.FileMode(0)
if TestMode && flags&os.O_WRONLY != 0 { if TestMode && flags&os.O_WRONLY != 0 {
// "emulate" cgroup fs for unit tests // "emulate" cgroup fs for unit tests

View File

@ -8,19 +8,10 @@ import (
"math" "math"
"strconv" "strconv"
"strings" "strings"
"github.com/opencontainers/runc/libcontainer/cgroups"
) )
var ( var (
ErrNotValidFormat = errors.New("line is not a valid key value format") ErrNotValidFormat = errors.New("line is not a valid key value format")
// Deprecated: use cgroups.OpenFile instead.
OpenFile = cgroups.OpenFile
// Deprecated: use cgroups.ReadFile instead.
ReadFile = cgroups.ReadFile
// Deprecated: use cgroups.WriteFile instead.
WriteFile = cgroups.WriteFile
) )
// ParseUint converts a string to an uint64 integer. // ParseUint converts a string to an uint64 integer.
@ -66,7 +57,7 @@ func ParseKeyValue(t string) (string, uint64, error) {
// and returns a value of the specified key. ParseUint is used for value // and returns a value of the specified key. ParseUint is used for value
// conversion. // conversion.
func GetValueByKey(path, file, key string) (uint64, error) { func GetValueByKey(path, file, key string) (uint64, error) {
content, err := cgroups.ReadFile(path, file) content, err := ReadFile(path, file)
if err != nil { if err != nil {
return 0, err return 0, err
} }
@ -104,7 +95,7 @@ func GetCgroupParamUint(path, file string) (uint64, error) {
// GetCgroupParamInt reads a single int64 value from specified cgroup file. // GetCgroupParamInt reads a single int64 value from specified cgroup file.
// If the value read is "max", the math.MaxInt64 is returned. // If the value read is "max", the math.MaxInt64 is returned.
func GetCgroupParamInt(path, file string) (int64, error) { func GetCgroupParamInt(path, file string) (int64, error) {
contents, err := cgroups.ReadFile(path, file) contents, err := ReadFile(path, file)
if err != nil { if err != nil {
return 0, err return 0, err
} }
@ -122,7 +113,7 @@ func GetCgroupParamInt(path, file string) (int64, error) {
// GetCgroupParamString reads a string from the specified cgroup file. // GetCgroupParamString reads a string from the specified cgroup file.
func GetCgroupParamString(path, file string) (string, error) { func GetCgroupParamString(path, file string) (string, error) {
contents, err := cgroups.ReadFile(path, file) contents, err := ReadFile(path, file)
if err != nil { if err != nil {
return "", err return "", err
} }

View File

@ -158,27 +158,14 @@ func findDeviceGroup(ruleType devices.Type, ruleMajor int64) (string, error) {
return "", nil return "", nil
} }
// DeviceAllow is the dbus type "a(ss)" which means we need a struct
// to represent it in Go.
type deviceAllowEntry struct {
Path string
Perms string
}
func allowAllDevices() []systemdDbus.Property {
// Setting mode to auto and removing all DeviceAllow rules
// results in allowing access to all devices.
return []systemdDbus.Property{
newProp("DevicePolicy", "auto"),
newProp("DeviceAllow", []deviceAllowEntry{}),
}
}
// generateDeviceProperties takes the configured device rules and generates a // generateDeviceProperties takes the configured device rules and generates a
// corresponding set of systemd properties to configure the devices correctly. // corresponding set of systemd properties to configure the devices correctly.
func generateDeviceProperties(r *configs.Resources) ([]systemdDbus.Property, error) { func generateDeviceProperties(rules []*devices.Rule) ([]systemdDbus.Property, error) {
if r.SkipDevices { // DeviceAllow is the type "a(ss)" which means we need a temporary struct
return nil, nil // to represent it in Go.
type deviceAllowEntry struct {
Path string
Perms string
} }
properties := []systemdDbus.Property{ properties := []systemdDbus.Property{
@ -190,7 +177,7 @@ func generateDeviceProperties(r *configs.Resources) ([]systemdDbus.Property, err
// Figure out the set of rules. // Figure out the set of rules.
configEmu := &cgroupdevices.Emulator{} configEmu := &cgroupdevices.Emulator{}
for _, rule := range r.Devices { for _, rule := range rules {
if err := configEmu.Apply(*rule); err != nil { if err := configEmu.Apply(*rule); err != nil {
return nil, errors.Wrap(err, "apply rule for systemd") return nil, errors.Wrap(err, "apply rule for systemd")
} }
@ -202,7 +189,12 @@ func generateDeviceProperties(r *configs.Resources) ([]systemdDbus.Property, err
if configEmu.IsBlacklist() { if configEmu.IsBlacklist() {
// However, if we're dealing with an allow-all rule then we can do it. // However, if we're dealing with an allow-all rule then we can do it.
if configEmu.IsAllowAll() { if configEmu.IsAllowAll() {
return allowAllDevices(), nil return []systemdDbus.Property{
// Run in white-list mode by setting to "auto" and removing all
// DeviceAllow rules.
newProp("DevicePolicy", "auto"),
newProp("DeviceAllow", []deviceAllowEntry{}),
}, nil
} }
logrus.Warn("systemd doesn't support blacklist device rules -- applying temporary deny-all rule") logrus.Warn("systemd doesn't support blacklist device rules -- applying temporary deny-all rule")
return properties, nil return properties, nil
@ -211,7 +203,8 @@ func generateDeviceProperties(r *configs.Resources) ([]systemdDbus.Property, err
// Now generate the set of rules we actually need to apply. Unlike the // Now generate the set of rules we actually need to apply. Unlike the
// normal devices cgroup, in "strict" mode systemd defaults to a deny-all // normal devices cgroup, in "strict" mode systemd defaults to a deny-all
// whitelist which is the default for devices.Emulator. // whitelist which is the default for devices.Emulator.
finalRules, err := configEmu.Rules() baseEmu := &cgroupdevices.Emulator{}
finalRules, err := baseEmu.Transition(configEmu)
if err != nil { if err != nil {
return nil, errors.Wrap(err, "get simplified rules for systemd") return nil, errors.Wrap(err, "get simplified rules for systemd")
} }
@ -313,7 +306,7 @@ func getUnitName(c *configs.Cgroup) string {
// isDbusError returns true if the error is a specific dbus error. // isDbusError returns true if the error is a specific dbus error.
func isDbusError(err error, name string) bool { func isDbusError(err error, name string) bool {
if err != nil { if err != nil {
var derr dbus.Error var derr *dbus.Error
if errors.As(err, &derr) { if errors.As(err, &derr) {
return strings.Contains(derr.Name, name) return strings.Contains(derr.Name, name)
} }
@ -362,9 +355,6 @@ func stopUnit(cm *dbusConnManager, unitName string) error {
return err return err
}) })
if err == nil { if err == nil {
timeout := time.NewTimer(30 * time.Second)
defer timeout.Stop()
select { select {
case s := <-statusChan: case s := <-statusChan:
close(statusChan) close(statusChan)
@ -372,8 +362,8 @@ func stopUnit(cm *dbusConnManager, unitName string) error {
if s != "done" { if s != "done" {
logrus.Warnf("error removing unit `%s`: got `%s`. Continuing...", unitName, s) logrus.Warnf("error removing unit `%s`: got `%s`. Continuing...", unitName, s)
} }
case <-timeout.C: case <-time.After(time.Second):
return errors.New("Timed out while waiting for systemd to remove " + unitName) logrus.Warnf("Timed out while waiting for StopUnit(%s) completion signal from dbus. Continuing...", unitName)
} }
} }
return nil return nil
@ -486,7 +476,7 @@ func addCpuset(cm *dbusConnManager, props *[]systemdDbus.Property, cpus, mems st
} }
if cpus != "" { if cpus != "" {
bits, err := RangeToBits(cpus) bits, err := rangeToBits(cpus)
if err != nil { if err != nil {
return fmt.Errorf("resources.CPU.Cpus=%q conversion error: %w", return fmt.Errorf("resources.CPU.Cpus=%q conversion error: %w",
cpus, err) cpus, err)
@ -495,7 +485,7 @@ func addCpuset(cm *dbusConnManager, props *[]systemdDbus.Property, cpus, mems st
newProp("AllowedCPUs", bits)) newProp("AllowedCPUs", bits))
} }
if mems != "" { if mems != "" {
bits, err := RangeToBits(mems) bits, err := rangeToBits(mems)
if err != nil { if err != nil {
return fmt.Errorf("resources.CPU.Mems=%q conversion error: %w", return fmt.Errorf("resources.CPU.Mems=%q conversion error: %w",
mems, err) mems, err)

View File

@ -5,15 +5,15 @@ import (
"strconv" "strconv"
"strings" "strings"
"github.com/bits-and-blooms/bitset"
"github.com/pkg/errors" "github.com/pkg/errors"
"github.com/willf/bitset"
) )
// RangeToBits converts a text representation of a CPU mask (as written to // rangeToBits converts a text representation of a CPU mask (as written to
// or read from cgroups' cpuset.* files, e.g. "1,3-5") to a slice of bytes // or read from cgroups' cpuset.* files, e.g. "1,3-5") to a slice of bytes
// with the corresponding bits set (as consumed by systemd over dbus as // with the corresponding bits set (as consumed by systemd over dbus as
// AllowedCPUs/AllowedMemoryNodes unit property value). // AllowedCPUs/AllowedMemoryNodes unit property value).
func RangeToBits(str string) ([]byte, error) { func rangeToBits(str string) ([]byte, error) {
bits := &bitset.BitSet{} bits := &bitset.BitSet{}
for _, r := range strings.Split(str, ",") { for _, r := range strings.Split(str, ",") {

View File

@ -17,16 +17,14 @@ var (
dbusRootless bool dbusRootless bool
) )
type dbusConnManager struct{} type dbusConnManager struct {
}
// newDbusConnManager initializes systemd dbus connection manager. // newDbusConnManager initializes systemd dbus connection manager.
func newDbusConnManager(rootless bool) *dbusConnManager { func newDbusConnManager(rootless bool) *dbusConnManager {
dbusMu.Lock()
defer dbusMu.Unlock()
if dbusInited && rootless != dbusRootless { if dbusInited && rootless != dbusRootless {
panic("can't have both root and rootless dbus") panic("can't have both root and rootless dbus")
} }
dbusInited = true
dbusRootless = rootless dbusRootless = rootless
return &dbusConnManager{} return &dbusConnManager{}
} }

View File

@ -61,7 +61,7 @@ var legacySubsystems = []subsystem{
func genV1ResourcesProperties(r *configs.Resources, cm *dbusConnManager) ([]systemdDbus.Property, error) { func genV1ResourcesProperties(r *configs.Resources, cm *dbusConnManager) ([]systemdDbus.Property, error) {
var properties []systemdDbus.Property var properties []systemdDbus.Property
deviceProperties, err := generateDeviceProperties(r) deviceProperties, err := generateDeviceProperties(r.Devices)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -207,10 +207,9 @@ func (m *legacyManager) Destroy() error {
stopErr := stopUnit(m.dbus, getUnitName(m.cgroups)) stopErr := stopUnit(m.dbus, getUnitName(m.cgroups))
// Both on success and on error, cleanup all the cgroups // Both on success and on error, cleanup all the cgroups we are aware of.
// we are aware of, as some of them were created directly // Some of them were created directly by Apply() and are not managed by systemd.
// by Apply() and are not managed by systemd. if err := cgroups.RemovePaths(m.paths); err != nil {
if err := cgroups.RemovePaths(m.paths); err != nil && stopErr == nil {
return err return err
} }
@ -238,7 +237,7 @@ func (m *legacyManager) joinCgroups(pid int) error {
} }
default: default:
if path, ok := m.paths[name]; ok { if path, ok := m.paths[name]; ok {
if err := os.MkdirAll(path, 0o755); err != nil { if err := os.MkdirAll(path, 0755); err != nil {
return err return err
} }
if err := cgroups.WriteCgroupProc(path, pid); err != nil { if err := cgroups.WriteCgroupProc(path, pid); err != nil {
@ -339,24 +338,27 @@ func (m *legacyManager) Set(r *configs.Resources) error {
return err return err
} }
// Figure out the current freezer state, so we can revert to it after we
// temporarily freeze the container.
targetFreezerState, err := m.GetFreezerState()
if err != nil {
return err
}
if targetFreezerState == configs.Undefined {
targetFreezerState = configs.Thawed
}
// We have to freeze the container while systemd sets the cgroup settings. // We have to freeze the container while systemd sets the cgroup settings.
// The reason for this is that systemd's application of DeviceAllow rules // The reason for this is that systemd's application of DeviceAllow rules
// is done disruptively, resulting in spurrious errors to common devices // is done disruptively, resulting in spurrious errors to common devices
// (unlike our fs driver, they will happily write deny-all rules to running // (unlike our fs driver, they will happily write deny-all rules to running
// containers). So we freeze the container to avoid them hitting the cgroup // containers). So we freeze the container to avoid them hitting the cgroup
// error. But if the freezer cgroup isn't supported, we just warn about it. // error. But if the freezer cgroup isn't supported, we just warn about it.
if err := m.Freeze(configs.Frozen); err != nil { targetFreezerState := configs.Undefined
logrus.Infof("freeze container before SetUnitProperties failed: %v", err) if !m.cgroups.SkipDevices {
// Figure out the current freezer state, so we can revert to it after we
// temporarily freeze the container.
targetFreezerState, err = m.GetFreezerState()
if err != nil {
return err
}
if targetFreezerState == configs.Undefined {
targetFreezerState = configs.Thawed
}
if err := m.Freeze(configs.Frozen); err != nil {
logrus.Infof("freeze container before SetUnitProperties failed: %v", err)
}
} }
if err := setUnitProperties(m.dbus, getUnitName(m.cgroups), properties...); err != nil { if err := setUnitProperties(m.dbus, getUnitName(m.cgroups), properties...); err != nil {

View File

@ -96,7 +96,7 @@ func unifiedResToSystemdProps(cm *dbusConnManager, res map[string]string) (props
newProp("CPUWeight", num)) newProp("CPUWeight", num))
case "cpuset.cpus", "cpuset.mems": case "cpuset.cpus", "cpuset.mems":
bits, err := RangeToBits(v) bits, err := rangeToBits(v)
if err != nil { if err != nil {
return nil, fmt.Errorf("unified resource %q=%q conversion error: %w", k, v, err) return nil, fmt.Errorf("unified resource %q=%q conversion error: %w", k, v, err)
} }
@ -172,7 +172,7 @@ func genV2ResourcesProperties(r *configs.Resources, cm *dbusConnManager) ([]syst
// aren't the end of the world, but it is a bit concerning. However // aren't the end of the world, but it is a bit concerning. However
// it's unclear if systemd removes all eBPF programs attached when // it's unclear if systemd removes all eBPF programs attached when
// doing SetUnitProperties... // doing SetUnitProperties...
deviceProperties, err := generateDeviceProperties(r) deviceProperties, err := generateDeviceProperties(r.Devices)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -418,24 +418,27 @@ func (m *unifiedManager) Set(r *configs.Resources) error {
return err return err
} }
// Figure out the current freezer state, so we can revert to it after we
// temporarily freeze the container.
targetFreezerState, err := m.GetFreezerState()
if err != nil {
return err
}
if targetFreezerState == configs.Undefined {
targetFreezerState = configs.Thawed
}
// We have to freeze the container while systemd sets the cgroup settings. // We have to freeze the container while systemd sets the cgroup settings.
// The reason for this is that systemd's application of DeviceAllow rules // The reason for this is that systemd's application of DeviceAllow rules
// is done disruptively, resulting in spurrious errors to common devices // is done disruptively, resulting in spurrious errors to common devices
// (unlike our fs driver, they will happily write deny-all rules to running // (unlike our fs driver, they will happily write deny-all rules to running
// containers). So we freeze the container to avoid them hitting the cgroup // containers). So we freeze the container to avoid them hitting the cgroup
// error. But if the freezer cgroup isn't supported, we just warn about it. // error. But if the freezer cgroup isn't supported, we just warn about it.
if err := m.Freeze(configs.Frozen); err != nil { targetFreezerState := configs.Undefined
logrus.Infof("freeze container before SetUnitProperties failed: %v", err) if !m.cgroups.SkipDevices {
// Figure out the current freezer state, so we can revert to it after we
// temporarily freeze the container.
targetFreezerState, err = m.GetFreezerState()
if err != nil {
return err
}
if targetFreezerState == configs.Undefined {
targetFreezerState = configs.Thawed
}
if err := m.Freeze(configs.Frozen); err != nil {
logrus.Infof("freeze container before SetUnitProperties failed: %v", err)
}
} }
if err := setUnitProperties(m.dbus, getUnitName(m.cgroups), properties...); err != nil { if err := setUnitProperties(m.dbus, getUnitName(m.cgroups), properties...); err != nil {

View File

@ -15,6 +15,7 @@ import (
"sync" "sync"
"time" "time"
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
"github.com/opencontainers/runc/libcontainer/userns" "github.com/opencontainers/runc/libcontainer/userns"
"github.com/sirupsen/logrus" "github.com/sirupsen/logrus"
"golang.org/x/sys/unix" "golang.org/x/sys/unix"
@ -87,7 +88,7 @@ func GetAllSubsystems() ([]string, error) {
// - freezer: implemented in kernel 5.2 // - freezer: implemented in kernel 5.2
// We assume these are always available, as it is hard to detect availability. // We assume these are always available, as it is hard to detect availability.
pseudo := []string{"devices", "freezer"} pseudo := []string{"devices", "freezer"}
data, err := ReadFile("/sys/fs/cgroup", "cgroup.controllers") data, err := fscommon.ReadFile("/sys/fs/cgroup", "cgroup.controllers")
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -266,6 +267,7 @@ func RemovePaths(paths map[string]string) (err error) {
case retries - 1: case retries - 1:
logrus.WithError(err).Error("Failed to remove cgroup") logrus.WithError(err).Error("Failed to remove cgroup")
} }
} }
_, err := os.Stat(p) _, err := os.Stat(p)
// We need this strange way of checking cgroups existence because // We need this strange way of checking cgroups existence because
@ -374,7 +376,7 @@ func WriteCgroupProc(dir string, pid int) error {
return nil return nil
} }
file, err := OpenFile(dir, CgroupProcesses, os.O_WRONLY) file, err := fscommon.OpenFile(dir, CgroupProcesses, os.O_WRONLY)
if err != nil { if err != nil {
return fmt.Errorf("failed to write %v to %v: %v", pid, CgroupProcesses, err) return fmt.Errorf("failed to write %v to %v: %v", pid, CgroupProcesses, err)
} }

View File

@ -13,12 +13,12 @@ const (
Thawed FreezerState = "THAWED" Thawed FreezerState = "THAWED"
) )
// Cgroup holds properties of a cgroup on Linux.
type Cgroup struct { type Cgroup struct {
// Name specifies the name of the cgroup // Deprecated, use Path instead
Name string `json:"name,omitempty"` Name string `json:"name,omitempty"`
// Parent specifies the name of parent of cgroup or slice // name of parent of cgroup or slice
// Deprecated, use Path instead
Parent string `json:"parent,omitempty"` Parent string `json:"parent,omitempty"`
// Path specifies the path to cgroups that are created and/or joined by the container. // Path specifies the path to cgroups that are created and/or joined by the container.
@ -127,8 +127,8 @@ type Resources struct {
// SkipDevices allows to skip configuring device permissions. // SkipDevices allows to skip configuring device permissions.
// Used by e.g. kubelet while creating a parent cgroup (kubepods) // Used by e.g. kubelet while creating a parent cgroup (kubepods)
// common for many containers, and by runc update. // common for many containers.
// //
// NOTE it is impossible to start a container which has this flag set. // NOTE it is impossible to start a container which has this flag set.
SkipDevices bool `json:"-"` SkipDevices bool `json:"skip_devices"`
} }

View File

@ -2,7 +2,7 @@
package configs package configs
// Cgroup holds properties of a cgroup on Linux
// TODO Windows: This can ultimately be entirely factored out on Windows as // TODO Windows: This can ultimately be entirely factored out on Windows as
// cgroups are a Unix-specific construct. // cgroups are a Unix-specific construct.
type Cgroup struct{} type Cgroup struct {
}

View File

@ -208,11 +208,9 @@ type Config struct {
RootlessCgroups bool `json:"rootless_cgroups,omitempty"` RootlessCgroups bool `json:"rootless_cgroups,omitempty"`
} }
type ( type HookName string
HookName string type HookList []Hook
HookList []Hook type Hooks map[HookName]HookList
Hooks map[HookName]HookList
)
const ( const (
// Prestart commands are executed after the container namespaces are created, // Prestart commands are executed after the container namespaces are created,
@ -389,7 +387,7 @@ func (c Command) Run(s *specs.State) error {
case err := <-errC: case err := <-errC:
return err return err
case <-timerCh: case <-timerCh:
_ = cmd.Process.Kill() cmd.Process.Kill()
<-errC <-errC
return fmt.Errorf("hook ran past specified timeout of %.1fs", c.Timeout.Seconds()) return fmt.Errorf("hook ran past specified timeout of %.1fs", c.Timeout.Seconds())
} }

View File

@ -0,0 +1,17 @@
package configs
import "github.com/opencontainers/runc/libcontainer/devices"
type (
// Deprecated: use libcontainer/devices.Device
Device = devices.Device
// Deprecated: use libcontainer/devices.Rule
DeviceRule = devices.Rule
// Deprecated: use libcontainer/devices.Type
DeviceType = devices.Type
// Deprecated: use libcontainer/devices.Permissions
DevicePermissions = devices.Permissions
)

View File

@ -3,7 +3,7 @@ package configs
const ( const (
// EXT_COPYUP is a directive to copy up the contents of a directory when // EXT_COPYUP is a directive to copy up the contents of a directory when
// a tmpfs is mounted over it. // a tmpfs is mounted over it.
EXT_COPYUP = 1 << iota //nolint:golint // ignore "don't use ALL_CAPS" warning EXT_COPYUP = 1 << iota
) )
type Mount struct { type Mount struct {

View File

@ -4,4 +4,5 @@ package configs
// Namespace defines configuration for each namespace. It specifies an // Namespace defines configuration for each namespace. It specifies an
// alternate path that is able to be joined via setns. // alternate path that is able to be joined via setns.
type Namespace struct{} type Namespace struct {
}

View File

@ -50,10 +50,7 @@ type Network struct {
HairpinMode bool `json:"hairpin_mode"` HairpinMode bool `json:"hairpin_mode"`
} }
// Route defines a routing table entry. // Routes can be specified to create entries in the route table as the container is started
//
// Routes can be specified to create entries in the routing table as the container
// is started.
// //
// All of destination, source, and gateway should be either IPv4 or IPv6. // All of destination, source, and gateway should be either IPv4 or IPv6.
// One of the three options must be present, and omitted entries will use their // One of the three options must be present, and omitted entries will use their
@ -61,15 +58,15 @@ type Network struct {
// gateway to 1.2.3.4 and the interface to eth0 will set up a standard // gateway to 1.2.3.4 and the interface to eth0 will set up a standard
// destination of 0.0.0.0(or *) when viewed in the route table. // destination of 0.0.0.0(or *) when viewed in the route table.
type Route struct { type Route struct {
// Destination specifies the destination IP address and mask in the CIDR form. // Sets the destination and mask, should be a CIDR. Accepts IPv4 and IPv6
Destination string `json:"destination"` Destination string `json:"destination"`
// Source specifies the source IP address and mask in the CIDR form. // Sets the source and mask, should be a CIDR. Accepts IPv4 and IPv6
Source string `json:"source"` Source string `json:"source"`
// Gateway specifies the gateway IP address. // Sets the gateway. Accepts IPv4 and IPv6
Gateway string `json:"gateway"` Gateway string `json:"gateway"`
// InterfaceName specifies the device to set this route up for, for example eth0. // The device to set this route up for, for example: eth0
InterfaceName string `json:"interface_name"` InterfaceName string `json:"interface_name"`
} }

View File

@ -12,7 +12,6 @@ import (
"github.com/opencontainers/runc/libcontainer/configs" "github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/intelrdt" "github.com/opencontainers/runc/libcontainer/intelrdt"
selinux "github.com/opencontainers/selinux/go-selinux" selinux "github.com/opencontainers/selinux/go-selinux"
"github.com/sirupsen/logrus"
"golang.org/x/sys/unix" "golang.org/x/sys/unix"
) )
@ -24,13 +23,13 @@ func New() Validator {
return &ConfigValidator{} return &ConfigValidator{}
} }
type ConfigValidator struct{} type ConfigValidator struct {
}
type check func(config *configs.Config) error type check func(config *configs.Config) error
func (v *ConfigValidator) Validate(config *configs.Config) error { func (v *ConfigValidator) Validate(config *configs.Config) error {
checks := []check{ checks := []check{
v.cgroups,
v.rootfs, v.rootfs,
v.network, v.network,
v.hostname, v.hostname,
@ -40,21 +39,17 @@ func (v *ConfigValidator) Validate(config *configs.Config) error {
v.sysctl, v.sysctl,
v.intelrdt, v.intelrdt,
v.rootlessEUID, v.rootlessEUID,
v.mounts,
} }
for _, c := range checks { for _, c := range checks {
if err := c(config); err != nil { if err := c(config); err != nil {
return err return err
} }
} }
// Relaxed validation rules for backward compatibility if err := v.cgroups(config); err != nil {
warns := []check{ return err
v.mounts, // TODO (runc v1.x.x): make this an error instead of a warning
}
for _, c := range warns {
if err := c(config); err != nil {
logrus.WithError(err).Warnf("invalid configuration")
}
} }
return nil return nil
} }

View File

@ -9,7 +9,7 @@ import (
// mount initializes the console inside the rootfs mounting with the specified mount label // mount initializes the console inside the rootfs mounting with the specified mount label
// and applying the correct ownership of the console. // and applying the correct ownership of the console.
func mountConsole(slavePath string) error { func mountConsole(slavePath string) error {
oldMask := unix.Umask(0o000) oldMask := unix.Umask(0000)
defer unix.Umask(oldMask) defer unix.Umask(oldMask)
f, err := os.Create("/dev/console") f, err := os.Create("/dev/console")
if err != nil && !os.IsExist(err) { if err != nil && !os.IsExist(err) {

View File

@ -437,8 +437,8 @@ func (c *linuxContainer) createExecFifo() error {
if _, err := os.Stat(fifoName); err == nil { if _, err := os.Stat(fifoName); err == nil {
return fmt.Errorf("exec fifo %s already exists", fifoName) return fmt.Errorf("exec fifo %s already exists", fifoName)
} }
oldMask := unix.Umask(0o000) oldMask := unix.Umask(0000)
if err := unix.Mkfifo(fifoName, 0o622); err != nil { if err := unix.Mkfifo(fifoName, 0622); err != nil {
unix.Umask(oldMask) unix.Umask(oldMask)
return err return err
} }
@ -699,6 +699,7 @@ func (c *linuxContainer) NotifyMemoryPressure(level PressureLevel) (<-chan struc
var criuFeatures *criurpc.CriuFeatures var criuFeatures *criurpc.CriuFeatures
func (c *linuxContainer) checkCriuFeatures(criuOpts *CriuOpts, rpcOpts *criurpc.CriuOpts, criuFeat *criurpc.CriuFeatures) error { func (c *linuxContainer) checkCriuFeatures(criuOpts *CriuOpts, rpcOpts *criurpc.CriuOpts, criuFeat *criurpc.CriuFeatures) error {
t := criurpc.CriuReqType_FEATURE_CHECK t := criurpc.CriuReqType_FEATURE_CHECK
// make sure the features we are looking for are really not from // make sure the features we are looking for are really not from
@ -760,6 +761,7 @@ func compareCriuVersion(criuVersion int, minVersion int) error {
// checkCriuVersion checks Criu version greater than or equal to minVersion // checkCriuVersion checks Criu version greater than or equal to minVersion
func (c *linuxContainer) checkCriuVersion(minVersion int) error { func (c *linuxContainer) checkCriuVersion(minVersion int) error {
// If the version of criu has already been determined there is no need // If the version of criu has already been determined there is no need
// to ask criu for the version again. Use the value from c.criuVersion. // to ask criu for the version again. Use the value from c.criuVersion.
if c.criuVersion != 0 { if c.criuVersion != 0 {
@ -968,7 +970,7 @@ func (c *linuxContainer) Checkpoint(criuOpts *CriuOpts) error {
// Since a container can be C/R'ed multiple times, // Since a container can be C/R'ed multiple times,
// the checkpoint directory may already exist. // the checkpoint directory may already exist.
if err := os.Mkdir(criuOpts.ImagesDirectory, 0o700); err != nil && !os.IsExist(err) { if err := os.Mkdir(criuOpts.ImagesDirectory, 0700); err != nil && !os.IsExist(err) {
return err return err
} }
@ -976,7 +978,7 @@ func (c *linuxContainer) Checkpoint(criuOpts *CriuOpts) error {
criuOpts.WorkDirectory = filepath.Join(c.root, "criu.work") criuOpts.WorkDirectory = filepath.Join(c.root, "criu.work")
} }
if err := os.Mkdir(criuOpts.WorkDirectory, 0o700); err != nil && !os.IsExist(err) { if err := os.Mkdir(criuOpts.WorkDirectory, 0700); err != nil && !os.IsExist(err) {
return err return err
} }
@ -1046,7 +1048,7 @@ func (c *linuxContainer) Checkpoint(criuOpts *CriuOpts) error {
} }
} }
// pre-dump may need parentImage param to complete iterative migration //pre-dump may need parentImage param to complete iterative migration
if criuOpts.ParentImage != "" { if criuOpts.ParentImage != "" {
rpcOpts.ParentImg = proto.String(criuOpts.ParentImage) rpcOpts.ParentImg = proto.String(criuOpts.ParentImage)
rpcOpts.TrackMem = proto.Bool(true) rpcOpts.TrackMem = proto.Bool(true)
@ -1144,7 +1146,7 @@ func (c *linuxContainer) Checkpoint(criuOpts *CriuOpts) error {
return err return err
} }
err = ioutil.WriteFile(filepath.Join(criuOpts.ImagesDirectory, descriptorsFilename), fdsJSON, 0o600) err = ioutil.WriteFile(filepath.Join(criuOpts.ImagesDirectory, descriptorsFilename), fdsJSON, 0600)
if err != nil { if err != nil {
return err return err
} }
@ -1215,7 +1217,7 @@ func (c *linuxContainer) makeCriuRestoreMountpoints(m *configs.Mount) error {
if err := checkProcMount(c.config.Rootfs, dest, ""); err != nil { if err := checkProcMount(c.config.Rootfs, dest, ""); err != nil {
return err return err
} }
if err := os.MkdirAll(dest, 0o755); err != nil { if err := os.MkdirAll(dest, 0755); err != nil {
return err return err
} }
} }
@ -1316,7 +1318,7 @@ func (c *linuxContainer) Restore(process *Process, criuOpts *CriuOpts) error {
} }
// Since a container can be C/R'ed multiple times, // Since a container can be C/R'ed multiple times,
// the work directory may already exist. // the work directory may already exist.
if err := os.Mkdir(criuOpts.WorkDirectory, 0o700); err != nil && !os.IsExist(err) { if err := os.Mkdir(criuOpts.WorkDirectory, 0700); err != nil && !os.IsExist(err) {
return err return err
} }
workDir, err := os.Open(criuOpts.WorkDirectory) workDir, err := os.Open(criuOpts.WorkDirectory)
@ -1338,7 +1340,7 @@ func (c *linuxContainer) Restore(process *Process, criuOpts *CriuOpts) error {
// c.config.Rootfs is bind-mounted to a temporary directory // c.config.Rootfs is bind-mounted to a temporary directory
// to satisfy these requirements. // to satisfy these requirements.
root := filepath.Join(c.root, "criu-root") root := filepath.Join(c.root, "criu-root")
if err := os.Mkdir(root, 0o755); err != nil { if err := os.Mkdir(root, 0755); err != nil {
return err return err
} }
defer os.Remove(root) defer os.Remove(root)
@ -1350,7 +1352,7 @@ func (c *linuxContainer) Restore(process *Process, criuOpts *CriuOpts) error {
if err != nil { if err != nil {
return err return err
} }
defer unix.Unmount(root, unix.MNT_DETACH) //nolint: errcheck defer unix.Unmount(root, unix.MNT_DETACH)
t := criurpc.CriuReqType_RESTORE t := criurpc.CriuReqType_RESTORE
req := &criurpc.CriuReq{ req := &criurpc.CriuReq{
Type: &t, Type: &t,
@ -1375,15 +1377,6 @@ func (c *linuxContainer) Restore(process *Process, criuOpts *CriuOpts) error {
}, },
} }
if criuOpts.LsmProfile != "" {
// CRIU older than 3.16 has a bug which breaks the possibility
// to set a different LSM profile.
if err := c.checkCriuVersion(31600); err != nil {
return errors.New("--lsm-profile requires at least CRIU 3.16")
}
req.Opts.LsmProfile = proto.String(criuOpts.LsmProfile)
}
c.handleCriuConfigurationFile(req.Opts) c.handleCriuConfigurationFile(req.Opts)
if err := c.handleRestoringNamespaces(req.Opts, &extraFiles); err != nil { if err := c.handleRestoringNamespaces(req.Opts, &extraFiles); err != nil {
@ -1672,7 +1665,7 @@ func (c *linuxContainer) criuSwrk(process *Process, req *criurpc.CriuReq, opts *
break break
} }
_ = criuClientCon.CloseWrite() criuClientCon.CloseWrite()
// cmd.Wait() waits cmd.goroutines which are used for proxying file descriptors. // cmd.Wait() waits cmd.goroutines which are used for proxying file descriptors.
// Here we want to wait only the CRIU process. // Here we want to wait only the CRIU process.
criuProcessState, err = criuProcess.Wait() criuProcessState, err = criuProcess.Wait()

View File

@ -29,5 +29,4 @@ type CriuOpts struct {
AutoDedup bool // auto deduplication for incremental dumps AutoDedup bool // auto deduplication for incremental dumps
LazyPages bool // restore memory pages lazily using userfaultfd LazyPages bool // restore memory pages lazily using userfaultfd
StatusFd int // fd for feedback when lazy server is ready StatusFd int // fd for feedback when lazy server is ready
LsmProfile string // LSM profile used to restore the container
} }

View File

@ -11,8 +11,10 @@ import (
"golang.org/x/sys/unix" "golang.org/x/sys/unix"
) )
// ErrNotADevice denotes that a file is not a valid linux device. var (
var ErrNotADevice = errors.New("not a device node") // ErrNotADevice denotes that a file is not a valid linux device.
ErrNotADevice = errors.New("not a device node")
)
// Testing dependencies // Testing dependencies
var ( var (
@ -27,9 +29,8 @@ func mkDev(d *Rule) (uint64, error) {
return unix.Mkdev(uint32(d.Major), uint32(d.Minor)), nil return unix.Mkdev(uint32(d.Major), uint32(d.Minor)), nil
} }
// DeviceFromPath takes the path to a device and its cgroup_permissions (which // Given the path to a device and its cgroup_permissions(which cannot be easily queried) look up the
// cannot be easily queried) to look up the information about a linux device // information about a linux device and return that information as a Device struct.
// and returns that information as a Device struct.
func DeviceFromPath(path, permissions string) (*Device, error) { func DeviceFromPath(path, permissions string) (*Device, error) {
var stat unix.Stat_t var stat unix.Stat_t
err := unixLstat(path, &stat) err := unixLstat(path, &stat)

View File

@ -196,11 +196,7 @@ func New(root string, options ...func(*LinuxFactory) error) (Factory, error) {
Validator: validate.New(), Validator: validate.New(),
CriuPath: "criu", CriuPath: "criu",
} }
Cgroupfs(l)
if err := Cgroupfs(l); err != nil {
return nil, err
}
for _, opt := range options { for _, opt := range options {
if opt == nil { if opt == nil {
continue continue
@ -291,7 +287,7 @@ func (l *LinuxFactory) Load(id string) (Container, error) {
if l.Root == "" { if l.Root == "" {
return nil, newGenericError(fmt.Errorf("invalid root"), ConfigInvalid) return nil, newGenericError(fmt.Errorf("invalid root"), ConfigInvalid)
} }
// when load, we need to check id is valid or not. //when load, we need to check id is valid or not.
if err := l.validateID(id); err != nil { if err := l.validateID(id); err != nil {
return nil, err return nil, err
} }

View File

@ -112,19 +112,9 @@ func populateProcessEnvironment(env []string) error {
for _, pair := range env { for _, pair := range env {
p := strings.SplitN(pair, "=", 2) p := strings.SplitN(pair, "=", 2)
if len(p) < 2 { if len(p) < 2 {
return fmt.Errorf("invalid environment variable: %q", pair) return fmt.Errorf("invalid environment '%v'", pair)
} }
name, val := p[0], p[1] if err := os.Setenv(p[0], p[1]); err != nil {
if name == "" {
return fmt.Errorf("environment variable name can't be empty: %q", pair)
}
if strings.IndexByte(name, 0) >= 0 {
return fmt.Errorf("environment variable name can't contain null(\\x00): %q", pair)
}
if strings.IndexByte(val, 0) >= 0 {
return fmt.Errorf("environment variable value can't contain null(\\x00): %q", pair)
}
if err := os.Setenv(name, val); err != nil {
return err return err
} }
} }

View File

@ -1,6 +1,8 @@
package intelrdt package intelrdt
var cmtEnabled bool var (
cmtEnabled bool
)
// Check if Intel RDT/CMT is enabled. // Check if Intel RDT/CMT is enabled.
func IsCMTEnabled() bool { func IsCMTEnabled() bool {

View File

@ -2,8 +2,10 @@
package intelrdt package intelrdt
// The flag to indicate if Intel RDT/MBM is enabled var (
var mbmEnabled bool // The flag to indicate if Intel RDT/MBM is enabled
mbmEnabled bool
)
// Check if Intel RDT/MBM is enabled. // Check if Intel RDT/MBM is enabled.
func IsMBMEnabled() bool { func IsMBMEnabled() bool {

Some files were not shown because too many files have changed in this diff Show More