runtime: update runc dependency

To bring fix to CVE-2022-29162.

Fixes: #5217
Signed-off-by: Peng Tao <bergwolf@hyper.sh>
This commit is contained in:
Peng Tao 2022-09-21 17:19:16 +08:00
parent 7fbc883879
commit 9628c7df0c
85 changed files with 1197 additions and 2476 deletions

View File

@ -62,7 +62,7 @@ require (
replace (
github.com/opencontainers/image-spec => github.com/opencontainers/image-spec v1.0.2
github.com/opencontainers/runc => github.com/opencontainers/runc v1.0.3
github.com/opencontainers/runc => github.com/opencontainers/runc v1.1.3
github.com/uber-go/atomic => go.uber.org/atomic v1.5.1
google.golang.org/genproto => google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8
)

View File

@ -188,7 +188,6 @@ github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
github.com/bgentry/speakeasy v0.1.0/go.mod h1:+zsyZBPWlz7T6j88CTgSN5bM796AkVf0kBD4zp0CCIs=
github.com/bitly/go-simplejson v0.5.0/go.mod h1:cXHtHw4XUPsvGaxgjIAn8PhEWG9NfngEKAMDJEczWVA=
github.com/bits-and-blooms/bitset v1.2.0 h1:Kn4yilvwNtMACtf1eYDlG8H77R07mZSPbMjLyS07ChA=
github.com/bits-and-blooms/bitset v1.2.0/go.mod h1:gIdJ4wp64HaoK2YrL1Q5/N7Y16edYb8uY+O0FJTyyDA=
github.com/bketelsen/crypt v0.0.3-0.20200106085610-5cbc8cc4026c/go.mod h1:MKsuJmJgSg28kpZDP6UIiPt0e0Oz0kqKNGyRaWEPv84=
github.com/bketelsen/crypt v0.0.4/go.mod h1:aI6NrJ0pMGgvZKL1iVgXLnfIFJtfV+bKCoqOes/6LfM=
@ -219,7 +218,6 @@ github.com/cespare/xxhash/v2 v2.1.1/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XL
github.com/cespare/xxhash/v2 v2.1.2 h1:YRXhKfTDauu4ajMg1TPgFO5jnlC2HCbmLXMcTG5cbYE=
github.com/cespare/xxhash/v2 v2.1.2/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
github.com/checkpoint-restore/checkpointctl v0.0.0-20220321135231-33f4a66335f0/go.mod h1:67kWC1PXQLR3lM/mmNnu3Kzn7K4TSWZAGUuQP1JSngk=
github.com/checkpoint-restore/go-criu/v5 v5.0.0/go.mod h1:cfwC0EG7HMUenopBsUf9d89JlCLQIfgVcNsNN0t6T2M=
github.com/checkpoint-restore/go-criu/v5 v5.2.0/go.mod h1:E/eQpaFtUKGOOSEBZgmKAcn+zUUwWxqcaKZlF54wK8E=
github.com/checkpoint-restore/go-criu/v5 v5.3.0/go.mod h1:E/eQpaFtUKGOOSEBZgmKAcn+zUUwWxqcaKZlF54wK8E=
github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI=
@ -229,7 +227,6 @@ github.com/cilium/ebpf v0.0.0-20200110133405-4032b1d8aae3/go.mod h1:MA5e5Lr8slmE
github.com/cilium/ebpf v0.0.0-20200702112145-1c8d4c9ef775/go.mod h1:7cR51M8ViRLIdUjrmSXlK9pkrsDlLHbO8jiB8X8JnOc=
github.com/cilium/ebpf v0.2.0/go.mod h1:To2CFviqOWL/M0gIMsvSMlqe7em/l1ALkX1PyjrX2Qs=
github.com/cilium/ebpf v0.4.0/go.mod h1:4tRaxcgiL706VnOzHOdBlY8IEAIdxINsQBcU4xJJXRs=
github.com/cilium/ebpf v0.6.2/go.mod h1:4tRaxcgiL706VnOzHOdBlY8IEAIdxINsQBcU4xJJXRs=
github.com/cilium/ebpf v0.7.0 h1:1k/q3ATgxSXRdrmPfH8d7YK0GfqVsEKZAX9dQZvs56k=
github.com/cilium/ebpf v0.7.0/go.mod h1:/oI2+1shJiTGAMgl6/RgJr36Eo1jzrRcAWbcXO2usCA=
github.com/circonus-labs/circonus-gometrics v2.3.1+incompatible/go.mod h1:nmEj6Dob7S7YxXgwXpfOuvO54S+tGdZdw9fuRZt25Ag=
@ -414,7 +411,6 @@ github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46t
github.com/creack/pty v1.1.7/go.mod h1:lj5s0c3V2DBrqTV7llrYr5NG6My20zk30Fl46Y7DoTY=
github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
github.com/creack/pty v1.1.11/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
github.com/cyphar/filepath-securejoin v0.2.2/go.mod h1:FpkQEhXnPnOthhzymB7CGsFk2G9VLXONKD9G7QGMM+4=
github.com/cyphar/filepath-securejoin v0.2.3 h1:YX6ebbZCZP7VkM3scTTokDgBL2TY741X51MTk3ycuNI=
github.com/cyphar/filepath-securejoin v0.2.3/go.mod h1:aPGpWjXOXUn2NCNjFvBE6aRxGGx79pTxQpKOJNYHHl4=
github.com/d2g/dhcp4 v0.0.0-20170904100407-a1d1b6c41b1c/go.mod h1:Ct2BUK8SB0YC1SMSibvLzxjeJLnrYEVLULFNiHY9YfQ=
@ -1201,8 +1197,8 @@ github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8
github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3IKzErnv2BNG4W4MAM=
github.com/opencontainers/image-spec v1.0.2 h1:9yCKha/T5XdGtO0q9Q9a6T5NUCsTn/DrBg0D7ufOcFM=
github.com/opencontainers/image-spec v1.0.2/go.mod h1:BtxoFyWECRxE4U/7sNtV5W15zMzWCbyJoFRP3s7yZA0=
github.com/opencontainers/runc v1.0.3 h1:1hbqejyQWCJBvtKAfdO0b1FmaEf2z/bxnjqbARass5k=
github.com/opencontainers/runc v1.0.3/go.mod h1:aTaHFFwQXuA71CiyxOdFFIorAoemI04suvGRQFzWTD0=
github.com/opencontainers/runc v1.1.3 h1:vIXrkId+0/J2Ymu2m7VjGvbSlAId9XNRPhn2p4b+d8w=
github.com/opencontainers/runc v1.1.3/go.mod h1:1J5XiS+vdZ3wCyZybsuxXZWGrgSr8fFJHLXuG2PsnNg=
github.com/opencontainers/runtime-spec v0.1.2-0.20190507144316-5b71a03e2700/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0=
github.com/opencontainers/runtime-spec v1.0.1/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0=
github.com/opencontainers/runtime-spec v1.0.2-0.20190207185410-29686dbc5559/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0=
@ -1324,7 +1320,7 @@ github.com/sclevine/agouti v3.0.0+incompatible/go.mod h1:b4WX9W9L1sfQKXeJf1mUTLZ
github.com/sclevine/spec v1.2.0/go.mod h1:W4J29eT/Kzv7/b9IWLB055Z+qvVC9vt0Arko24q7p+U=
github.com/sean-/seed v0.0.0-20170313163322-e2103e2c3529/go.mod h1:DxrIzT+xaE7yg65j358z/aeFdxmN0P9QXhEzd20vsDc=
github.com/sebdah/goldie/v2 v2.5.3/go.mod h1:oZ9fp0+se1eapSRjfYbsV/0Hqhbuu3bJVvKI/NNtssI=
github.com/seccomp/libseccomp-golang v0.9.1/go.mod h1:GbW5+tmTXfcxTToHLXlScSlAvWlF4P2Ca7zGrPiEpWo=
github.com/seccomp/libseccomp-golang v0.9.2-0.20220502022130-f33da4d89646/go.mod h1:JA8cRccbGaA1s33RQf7Y1+q9gHmZX1yB/z9WDN1C6fg=
github.com/seccomp/libseccomp-golang v0.10.0 h1:aA4bp+/Zzi0BnWZ2F1wgNBs5gTpm+na2rWM6M9YjLpY=
github.com/seccomp/libseccomp-golang v0.10.0/go.mod h1:JA8cRccbGaA1s33RQf7Y1+q9gHmZX1yB/z9WDN1C6fg=
github.com/secure-systems-lab/go-securesystemslib v0.3.1/go.mod h1:o8hhjkbNl2gOamKUA/eNW3xUrntHT9L4W89W1nfj43U=

View File

@ -1,26 +0,0 @@
# Compiled Object files, Static and Dynamic libs (Shared Objects)
*.o
*.a
*.so
# Folders
_obj
_test
# Architecture specific extensions/prefixes
*.[568vq]
[568vq].out
*.cgo1.go
*.cgo2.c
_cgo_defun.c
_cgo_gotypes.go
_cgo_export.*
_testmain.go
*.exe
*.test
*.prof
target

View File

@ -1,37 +0,0 @@
language: go
sudo: false
branches:
except:
- release
branches:
only:
- master
- travis
go:
- "1.11.x"
- tip
matrix:
allow_failures:
- go: tip
before_install:
- if [ -n "$GH_USER" ]; then git config --global github.user ${GH_USER}; fi;
- if [ -n "$GH_TOKEN" ]; then git config --global github.token ${GH_TOKEN}; fi;
- go get github.com/mattn/goveralls
before_script:
- make deps
script:
- make qa
after_failure:
- cat ./target/test/report.xml
after_success:
- if [ "$TRAVIS_GO_VERSION" = "1.11.1" ]; then $HOME/gopath/bin/goveralls -covermode=count -coverprofile=target/report/coverage.out -service=travis-ci; fi;

View File

@ -1,27 +0,0 @@
Copyright (c) 2014 Will Fitzgerald. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
* Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

View File

@ -1,93 +0,0 @@
# bitset
*Go language library to map between non-negative integers and boolean values*
[![Test](https://github.com/bits-and-blooms/bitset/workflows/Test/badge.svg)](https://github.com/willf/bitset/actions?query=workflow%3ATest)
[![Go Report Card](https://goreportcard.com/badge/github.com/willf/bitset)](https://goreportcard.com/report/github.com/willf/bitset)
[![PkgGoDev](https://pkg.go.dev/badge/github.com/bits-and-blooms/bitset?tab=doc)](https://pkg.go.dev/github.com/bits-and-blooms/bitset?tab=doc)
## Description
Package bitset implements bitsets, a mapping between non-negative integers and boolean values.
It should be more efficient than map[uint] bool.
It provides methods for setting, clearing, flipping, and testing individual integers.
But it also provides set intersection, union, difference, complement, and symmetric operations, as well as tests to check whether any, all, or no bits are set, and querying a bitset's current length and number of positive bits.
BitSets are expanded to the size of the largest set bit; the memory allocation is approximately Max bits, where Max is the largest set bit. BitSets are never shrunk. On creation, a hint can be given for the number of bits that will be used.
Many of the methods, including Set, Clear, and Flip, return a BitSet pointer, which allows for chaining.
### Example use:
```go
package main
import (
"fmt"
"math/rand"
"github.com/bits-and-blooms/bitset"
)
func main() {
fmt.Printf("Hello from BitSet!\n")
var b bitset.BitSet
// play some Go Fish
for i := 0; i < 100; i++ {
card1 := uint(rand.Intn(52))
card2 := uint(rand.Intn(52))
b.Set(card1)
if b.Test(card2) {
fmt.Println("Go Fish!")
}
b.Clear(card1)
}
// Chaining
b.Set(10).Set(11)
for i, e := b.NextSet(0); e; i, e = b.NextSet(i + 1) {
fmt.Println("The following bit is set:", i)
}
if b.Intersection(bitset.New(100).Set(10)).Count() == 1 {
fmt.Println("Intersection works.")
} else {
fmt.Println("Intersection doesn't work???")
}
}
```
As an alternative to BitSets, one should check out the 'big' package, which provides a (less set-theoretical) view of bitsets.
Package documentation is at: https://pkg.go.dev/github.com/bits-and-blooms/bitset?tab=doc
## Memory Usage
The memory usage of a bitset using N bits is at least N/8 bytes. The number of bits in a bitset is at least as large as one plus the greatest bit index you have accessed. Thus it is possible to run out of memory while using a bitset. If you have lots of bits, you might prefer compressed bitsets, like the [Roaring bitmaps](http://roaringbitmap.org) and its [Go implementation](https://github.com/RoaringBitmap/roaring).
## Implementation Note
Go 1.9 introduced a native `math/bits` library. We provide backward compatibility to Go 1.7, which might be removed.
It is possible that a later version will match the `math/bits` return signature for counts (which is `int`, rather than our library's `unit64`). If so, the version will be bumped.
## Installation
```bash
go get github.com/bits-and-blooms/bitset
```
## Contributing
If you wish to contribute to this project, please branch and issue a pull request against master ("[GitHub Flow](https://guides.github.com/introduction/flow/)")
## Running all tests
Before committing the code, please check if it passes tests, has adequate coverage, etc.
```bash
go test
go test -cover
```

View File

@ -1,39 +0,0 @@
# Go
# Build your Go project.
# Add steps that test, save build artifacts, deploy, and more:
# https://docs.microsoft.com/azure/devops/pipelines/languages/go
trigger:
- master
pool:
vmImage: 'Ubuntu-16.04'
variables:
GOBIN: '$(GOPATH)/bin' # Go binaries path
GOROOT: '/usr/local/go1.11' # Go installation path
GOPATH: '$(system.defaultWorkingDirectory)/gopath' # Go workspace path
modulePath: '$(GOPATH)/src/github.com/$(build.repository.name)' # Path to the module's code
steps:
- script: |
mkdir -p '$(GOBIN)'
mkdir -p '$(GOPATH)/pkg'
mkdir -p '$(modulePath)'
shopt -s extglob
shopt -s dotglob
mv !(gopath) '$(modulePath)'
echo '##vso[task.prependpath]$(GOBIN)'
echo '##vso[task.prependpath]$(GOROOT)/bin'
displayName: 'Set up the Go workspace'
- script: |
go version
go get -v -t -d ./...
if [ -f Gopkg.toml ]; then
curl https://raw.githubusercontent.com/golang/dep/master/install.sh | sh
dep ensure
fi
go build -v .
workingDirectory: '$(modulePath)'
displayName: 'Get dependencies, then build'

View File

@ -1,952 +0,0 @@
/*
Package bitset implements bitsets, a mapping
between non-negative integers and boolean values. It should be more
efficient than map[uint] bool.
It provides methods for setting, clearing, flipping, and testing
individual integers.
But it also provides set intersection, union, difference,
complement, and symmetric operations, as well as tests to
check whether any, all, or no bits are set, and querying a
bitset's current length and number of positive bits.
BitSets are expanded to the size of the largest set bit; the
memory allocation is approximately Max bits, where Max is
the largest set bit. BitSets are never shrunk. On creation,
a hint can be given for the number of bits that will be used.
Many of the methods, including Set,Clear, and Flip, return
a BitSet pointer, which allows for chaining.
Example use:
import "bitset"
var b BitSet
b.Set(10).Set(11)
if b.Test(1000) {
b.Clear(1000)
}
if B.Intersection(bitset.New(100).Set(10)).Count() > 1 {
fmt.Println("Intersection works.")
}
As an alternative to BitSets, one should check out the 'big' package,
which provides a (less set-theoretical) view of bitsets.
*/
package bitset
import (
"bufio"
"bytes"
"encoding/base64"
"encoding/binary"
"encoding/json"
"errors"
"fmt"
"io"
"strconv"
)
// the wordSize of a bit set
const wordSize = uint(64)
// log2WordSize is lg(wordSize)
const log2WordSize = uint(6)
// allBits has every bit set
const allBits uint64 = 0xffffffffffffffff
// default binary BigEndian
var binaryOrder binary.ByteOrder = binary.BigEndian
// default json encoding base64.URLEncoding
var base64Encoding = base64.URLEncoding
// Base64StdEncoding Marshal/Unmarshal BitSet with base64.StdEncoding(Default: base64.URLEncoding)
func Base64StdEncoding() { base64Encoding = base64.StdEncoding }
// LittleEndian Marshal/Unmarshal Binary as Little Endian(Default: binary.BigEndian)
func LittleEndian() { binaryOrder = binary.LittleEndian }
// A BitSet is a set of bits. The zero value of a BitSet is an empty set of length 0.
type BitSet struct {
length uint
set []uint64
}
// Error is used to distinguish errors (panics) generated in this package.
type Error string
// safeSet will fixup b.set to be non-nil and return the field value
func (b *BitSet) safeSet() []uint64 {
if b.set == nil {
b.set = make([]uint64, wordsNeeded(0))
}
return b.set
}
// From is a constructor used to create a BitSet from an array of integers
func From(buf []uint64) *BitSet {
return &BitSet{uint(len(buf)) * 64, buf}
}
// Bytes returns the bitset as array of integers
func (b *BitSet) Bytes() []uint64 {
return b.set
}
// wordsNeeded calculates the number of words needed for i bits
func wordsNeeded(i uint) int {
if i > (Cap() - wordSize + 1) {
return int(Cap() >> log2WordSize)
}
return int((i + (wordSize - 1)) >> log2WordSize)
}
// New creates a new BitSet with a hint that length bits will be required
func New(length uint) (bset *BitSet) {
defer func() {
if r := recover(); r != nil {
bset = &BitSet{
0,
make([]uint64, 0),
}
}
}()
bset = &BitSet{
length,
make([]uint64, wordsNeeded(length)),
}
return bset
}
// Cap returns the total possible capacity, or number of bits
func Cap() uint {
return ^uint(0)
}
// Len returns the number of bits in the BitSet.
// Note the difference to method Count, see example.
func (b *BitSet) Len() uint {
return b.length
}
// extendSetMaybe adds additional words to incorporate new bits if needed
func (b *BitSet) extendSetMaybe(i uint) {
if i >= b.length { // if we need more bits, make 'em
if i >= Cap() {
panic("You are exceeding the capacity")
}
nsize := wordsNeeded(i + 1)
if b.set == nil {
b.set = make([]uint64, nsize)
} else if cap(b.set) >= nsize {
b.set = b.set[:nsize] // fast resize
} else if len(b.set) < nsize {
newset := make([]uint64, nsize, 2*nsize) // increase capacity 2x
copy(newset, b.set)
b.set = newset
}
b.length = i + 1
}
}
// Test whether bit i is set.
func (b *BitSet) Test(i uint) bool {
if i >= b.length {
return false
}
return b.set[i>>log2WordSize]&(1<<(i&(wordSize-1))) != 0
}
// Set bit i to 1, the capacity of the bitset is automatically
// increased accordingly.
// If i>= Cap(), this function will panic.
// Warning: using a very large value for 'i'
// may lead to a memory shortage and a panic: the caller is responsible
// for providing sensible parameters in line with their memory capacity.
func (b *BitSet) Set(i uint) *BitSet {
b.extendSetMaybe(i)
b.set[i>>log2WordSize] |= 1 << (i & (wordSize - 1))
return b
}
// Clear bit i to 0
func (b *BitSet) Clear(i uint) *BitSet {
if i >= b.length {
return b
}
b.set[i>>log2WordSize] &^= 1 << (i & (wordSize - 1))
return b
}
// SetTo sets bit i to value.
// If i>= Cap(), this function will panic.
// Warning: using a very large value for 'i'
// may lead to a memory shortage and a panic: the caller is responsible
// for providing sensible parameters in line with their memory capacity.
func (b *BitSet) SetTo(i uint, value bool) *BitSet {
if value {
return b.Set(i)
}
return b.Clear(i)
}
// Flip bit at i.
// If i>= Cap(), this function will panic.
// Warning: using a very large value for 'i'
// may lead to a memory shortage and a panic: the caller is responsible
// for providing sensible parameters in line with their memory capacity.
func (b *BitSet) Flip(i uint) *BitSet {
if i >= b.length {
return b.Set(i)
}
b.set[i>>log2WordSize] ^= 1 << (i & (wordSize - 1))
return b
}
// FlipRange bit in [start, end).
// If end>= Cap(), this function will panic.
// Warning: using a very large value for 'end'
// may lead to a memory shortage and a panic: the caller is responsible
// for providing sensible parameters in line with their memory capacity.
func (b *BitSet) FlipRange(start, end uint) *BitSet {
if start >= end {
return b
}
b.extendSetMaybe(end - 1)
var startWord uint = start >> log2WordSize
var endWord uint = end >> log2WordSize
b.set[startWord] ^= ^(^uint64(0) << (start & (wordSize - 1)))
for i := startWord; i < endWord; i++ {
b.set[i] = ^b.set[i]
}
b.set[endWord] ^= ^uint64(0) >> (-end & (wordSize - 1))
return b
}
// Shrink shrinks BitSet so that the provided value is the last possible
// set value. It clears all bits > the provided index and reduces the size
// and length of the set.
//
// Note that the parameter value is not the new length in bits: it is the
// maximal value that can be stored in the bitset after the function call.
// The new length in bits is the parameter value + 1. Thus it is not possible
// to use this function to set the length to 0, the minimal value of the length
// after this function call is 1.
//
// A new slice is allocated to store the new bits, so you may see an increase in
// memory usage until the GC runs. Normally this should not be a problem, but if you
// have an extremely large BitSet its important to understand that the old BitSet will
// remain in memory until the GC frees it.
func (b *BitSet) Shrink(lastbitindex uint) *BitSet {
length := lastbitindex + 1
idx := wordsNeeded(length)
if idx > len(b.set) {
return b
}
shrunk := make([]uint64, idx)
copy(shrunk, b.set[:idx])
b.set = shrunk
b.length = length
b.set[idx-1] &= (allBits >> (uint64(64) - uint64(length&(wordSize-1))))
return b
}
// Compact shrinks BitSet to so that we preserve all set bits, while minimizing
// memory usage. Compact calls Shrink.
func (b *BitSet) Compact() *BitSet {
idx := len(b.set) - 1
for ; idx >= 0 && b.set[idx] == 0; idx-- {
}
newlength := uint((idx + 1) << log2WordSize)
if newlength >= b.length {
return b // nothing to do
}
if newlength > 0 {
return b.Shrink(newlength - 1)
}
// We preserve one word
return b.Shrink(63)
}
// InsertAt takes an index which indicates where a bit should be
// inserted. Then it shifts all the bits in the set to the left by 1, starting
// from the given index position, and sets the index position to 0.
//
// Depending on the size of your BitSet, and where you are inserting the new entry,
// this method could be extremely slow and in some cases might cause the entire BitSet
// to be recopied.
func (b *BitSet) InsertAt(idx uint) *BitSet {
insertAtElement := (idx >> log2WordSize)
// if length of set is a multiple of wordSize we need to allocate more space first
if b.isLenExactMultiple() {
b.set = append(b.set, uint64(0))
}
var i uint
for i = uint(len(b.set) - 1); i > insertAtElement; i-- {
// all elements above the position where we want to insert can simply by shifted
b.set[i] <<= 1
// we take the most significant bit of the previous element and set it as
// the least significant bit of the current element
b.set[i] |= (b.set[i-1] & 0x8000000000000000) >> 63
}
// generate a mask to extract the data that we need to shift left
// within the element where we insert a bit
dataMask := ^(uint64(1)<<uint64(idx&(wordSize-1)) - 1)
// extract that data that we'll shift
data := b.set[i] & dataMask
// set the positions of the data mask to 0 in the element where we insert
b.set[i] &= ^dataMask
// shift data mask to the left and insert its data to the slice element
b.set[i] |= data << 1
// add 1 to length of BitSet
b.length++
return b
}
// String creates a string representation of the Bitmap
func (b *BitSet) String() string {
// follows code from https://github.com/RoaringBitmap/roaring
var buffer bytes.Buffer
start := []byte("{")
buffer.Write(start)
counter := 0
i, e := b.NextSet(0)
for e {
counter = counter + 1
// to avoid exhausting the memory
if counter > 0x40000 {
buffer.WriteString("...")
break
}
buffer.WriteString(strconv.FormatInt(int64(i), 10))
i, e = b.NextSet(i + 1)
if e {
buffer.WriteString(",")
}
}
buffer.WriteString("}")
return buffer.String()
}
// DeleteAt deletes the bit at the given index position from
// within the bitset
// All the bits residing on the left of the deleted bit get
// shifted right by 1
// The running time of this operation may potentially be
// relatively slow, O(length)
func (b *BitSet) DeleteAt(i uint) *BitSet {
// the index of the slice element where we'll delete a bit
deleteAtElement := i >> log2WordSize
// generate a mask for the data that needs to be shifted right
// within that slice element that gets modified
dataMask := ^((uint64(1) << (i & (wordSize - 1))) - 1)
// extract the data that we'll shift right from the slice element
data := b.set[deleteAtElement] & dataMask
// set the masked area to 0 while leaving the rest as it is
b.set[deleteAtElement] &= ^dataMask
// shift the previously extracted data to the right and then
// set it in the previously masked area
b.set[deleteAtElement] |= (data >> 1) & dataMask
// loop over all the consecutive slice elements to copy each
// lowest bit into the highest position of the previous element,
// then shift the entire content to the right by 1
for i := int(deleteAtElement) + 1; i < len(b.set); i++ {
b.set[i-1] |= (b.set[i] & 1) << 63
b.set[i] >>= 1
}
b.length = b.length - 1
return b
}
// NextSet returns the next bit set from the specified index,
// including possibly the current index
// along with an error code (true = valid, false = no set bit found)
// for i,e := v.NextSet(0); e; i,e = v.NextSet(i + 1) {...}
//
// Users concerned with performance may want to use NextSetMany to
// retrieve several values at once.
func (b *BitSet) NextSet(i uint) (uint, bool) {
x := int(i >> log2WordSize)
if x >= len(b.set) {
return 0, false
}
w := b.set[x]
w = w >> (i & (wordSize - 1))
if w != 0 {
return i + trailingZeroes64(w), true
}
x = x + 1
for x < len(b.set) {
if b.set[x] != 0 {
return uint(x)*wordSize + trailingZeroes64(b.set[x]), true
}
x = x + 1
}
return 0, false
}
// NextSetMany returns many next bit sets from the specified index,
// including possibly the current index and up to cap(buffer).
// If the returned slice has len zero, then no more set bits were found
//
// buffer := make([]uint, 256) // this should be reused
// j := uint(0)
// j, buffer = bitmap.NextSetMany(j, buffer)
// for ; len(buffer) > 0; j, buffer = bitmap.NextSetMany(j,buffer) {
// for k := range buffer {
// do something with buffer[k]
// }
// j += 1
// }
//
//
// It is possible to retrieve all set bits as follow:
//
// indices := make([]uint, bitmap.Count())
// bitmap.NextSetMany(0, indices)
//
// However if bitmap.Count() is large, it might be preferable to
// use several calls to NextSetMany, for performance reasons.
func (b *BitSet) NextSetMany(i uint, buffer []uint) (uint, []uint) {
myanswer := buffer
capacity := cap(buffer)
x := int(i >> log2WordSize)
if x >= len(b.set) || capacity == 0 {
return 0, myanswer[:0]
}
skip := i & (wordSize - 1)
word := b.set[x] >> skip
myanswer = myanswer[:capacity]
size := int(0)
for word != 0 {
r := trailingZeroes64(word)
t := word & ((^word) + 1)
myanswer[size] = r + i
size++
if size == capacity {
goto End
}
word = word ^ t
}
x++
for idx, word := range b.set[x:] {
for word != 0 {
r := trailingZeroes64(word)
t := word & ((^word) + 1)
myanswer[size] = r + (uint(x+idx) << 6)
size++
if size == capacity {
goto End
}
word = word ^ t
}
}
End:
if size > 0 {
return myanswer[size-1], myanswer[:size]
}
return 0, myanswer[:0]
}
// NextClear returns the next clear bit from the specified index,
// including possibly the current index
// along with an error code (true = valid, false = no bit found i.e. all bits are set)
func (b *BitSet) NextClear(i uint) (uint, bool) {
x := int(i >> log2WordSize)
if x >= len(b.set) {
return 0, false
}
w := b.set[x]
w = w >> (i & (wordSize - 1))
wA := allBits >> (i & (wordSize - 1))
index := i + trailingZeroes64(^w)
if w != wA && index < b.length {
return index, true
}
x++
for x < len(b.set) {
index = uint(x)*wordSize + trailingZeroes64(^b.set[x])
if b.set[x] != allBits && index < b.length {
return index, true
}
x++
}
return 0, false
}
// ClearAll clears the entire BitSet
func (b *BitSet) ClearAll() *BitSet {
if b != nil && b.set != nil {
for i := range b.set {
b.set[i] = 0
}
}
return b
}
// wordCount returns the number of words used in a bit set
func (b *BitSet) wordCount() int {
return len(b.set)
}
// Clone this BitSet
func (b *BitSet) Clone() *BitSet {
c := New(b.length)
if b.set != nil { // Clone should not modify current object
copy(c.set, b.set)
}
return c
}
// Copy into a destination BitSet
// Returning the size of the destination BitSet
// like array copy
func (b *BitSet) Copy(c *BitSet) (count uint) {
if c == nil {
return
}
if b.set != nil { // Copy should not modify current object
copy(c.set, b.set)
}
count = c.length
if b.length < c.length {
count = b.length
}
return
}
// Count (number of set bits).
// Also known as "popcount" or "population count".
func (b *BitSet) Count() uint {
if b != nil && b.set != nil {
return uint(popcntSlice(b.set))
}
return 0
}
// Equal tests the equivalence of two BitSets.
// False if they are of different sizes, otherwise true
// only if all the same bits are set
func (b *BitSet) Equal(c *BitSet) bool {
if c == nil || b == nil {
return c == b
}
if b.length != c.length {
return false
}
if b.length == 0 { // if they have both length == 0, then could have nil set
return true
}
// testing for equality shoud not transform the bitset (no call to safeSet)
for p, v := range b.set {
if c.set[p] != v {
return false
}
}
return true
}
func panicIfNull(b *BitSet) {
if b == nil {
panic(Error("BitSet must not be null"))
}
}
// Difference of base set and other set
// This is the BitSet equivalent of &^ (and not)
func (b *BitSet) Difference(compare *BitSet) (result *BitSet) {
panicIfNull(b)
panicIfNull(compare)
result = b.Clone() // clone b (in case b is bigger than compare)
l := int(compare.wordCount())
if l > int(b.wordCount()) {
l = int(b.wordCount())
}
for i := 0; i < l; i++ {
result.set[i] = b.set[i] &^ compare.set[i]
}
return
}
// DifferenceCardinality computes the cardinality of the differnce
func (b *BitSet) DifferenceCardinality(compare *BitSet) uint {
panicIfNull(b)
panicIfNull(compare)
l := int(compare.wordCount())
if l > int(b.wordCount()) {
l = int(b.wordCount())
}
cnt := uint64(0)
cnt += popcntMaskSlice(b.set[:l], compare.set[:l])
cnt += popcntSlice(b.set[l:])
return uint(cnt)
}
// InPlaceDifference computes the difference of base set and other set
// This is the BitSet equivalent of &^ (and not)
func (b *BitSet) InPlaceDifference(compare *BitSet) {
panicIfNull(b)
panicIfNull(compare)
l := int(compare.wordCount())
if l > int(b.wordCount()) {
l = int(b.wordCount())
}
for i := 0; i < l; i++ {
b.set[i] &^= compare.set[i]
}
}
// Convenience function: return two bitsets ordered by
// increasing length. Note: neither can be nil
func sortByLength(a *BitSet, b *BitSet) (ap *BitSet, bp *BitSet) {
if a.length <= b.length {
ap, bp = a, b
} else {
ap, bp = b, a
}
return
}
// Intersection of base set and other set
// This is the BitSet equivalent of & (and)
func (b *BitSet) Intersection(compare *BitSet) (result *BitSet) {
panicIfNull(b)
panicIfNull(compare)
b, compare = sortByLength(b, compare)
result = New(b.length)
for i, word := range b.set {
result.set[i] = word & compare.set[i]
}
return
}
// IntersectionCardinality computes the cardinality of the union
func (b *BitSet) IntersectionCardinality(compare *BitSet) uint {
panicIfNull(b)
panicIfNull(compare)
b, compare = sortByLength(b, compare)
cnt := popcntAndSlice(b.set, compare.set)
return uint(cnt)
}
// InPlaceIntersection destructively computes the intersection of
// base set and the compare set.
// This is the BitSet equivalent of & (and)
func (b *BitSet) InPlaceIntersection(compare *BitSet) {
panicIfNull(b)
panicIfNull(compare)
l := int(compare.wordCount())
if l > int(b.wordCount()) {
l = int(b.wordCount())
}
for i := 0; i < l; i++ {
b.set[i] &= compare.set[i]
}
for i := l; i < len(b.set); i++ {
b.set[i] = 0
}
if compare.length > 0 {
b.extendSetMaybe(compare.length - 1)
}
}
// Union of base set and other set
// This is the BitSet equivalent of | (or)
func (b *BitSet) Union(compare *BitSet) (result *BitSet) {
panicIfNull(b)
panicIfNull(compare)
b, compare = sortByLength(b, compare)
result = compare.Clone()
for i, word := range b.set {
result.set[i] = word | compare.set[i]
}
return
}
// UnionCardinality computes the cardinality of the uniton of the base set
// and the compare set.
func (b *BitSet) UnionCardinality(compare *BitSet) uint {
panicIfNull(b)
panicIfNull(compare)
b, compare = sortByLength(b, compare)
cnt := popcntOrSlice(b.set, compare.set)
if len(compare.set) > len(b.set) {
cnt += popcntSlice(compare.set[len(b.set):])
}
return uint(cnt)
}
// InPlaceUnion creates the destructive union of base set and compare set.
// This is the BitSet equivalent of | (or).
func (b *BitSet) InPlaceUnion(compare *BitSet) {
panicIfNull(b)
panicIfNull(compare)
l := int(compare.wordCount())
if l > int(b.wordCount()) {
l = int(b.wordCount())
}
if compare.length > 0 {
b.extendSetMaybe(compare.length - 1)
}
for i := 0; i < l; i++ {
b.set[i] |= compare.set[i]
}
if len(compare.set) > l {
for i := l; i < len(compare.set); i++ {
b.set[i] = compare.set[i]
}
}
}
// SymmetricDifference of base set and other set
// This is the BitSet equivalent of ^ (xor)
func (b *BitSet) SymmetricDifference(compare *BitSet) (result *BitSet) {
panicIfNull(b)
panicIfNull(compare)
b, compare = sortByLength(b, compare)
// compare is bigger, so clone it
result = compare.Clone()
for i, word := range b.set {
result.set[i] = word ^ compare.set[i]
}
return
}
// SymmetricDifferenceCardinality computes the cardinality of the symmetric difference
func (b *BitSet) SymmetricDifferenceCardinality(compare *BitSet) uint {
panicIfNull(b)
panicIfNull(compare)
b, compare = sortByLength(b, compare)
cnt := popcntXorSlice(b.set, compare.set)
if len(compare.set) > len(b.set) {
cnt += popcntSlice(compare.set[len(b.set):])
}
return uint(cnt)
}
// InPlaceSymmetricDifference creates the destructive SymmetricDifference of base set and other set
// This is the BitSet equivalent of ^ (xor)
func (b *BitSet) InPlaceSymmetricDifference(compare *BitSet) {
panicIfNull(b)
panicIfNull(compare)
l := int(compare.wordCount())
if l > int(b.wordCount()) {
l = int(b.wordCount())
}
if compare.length > 0 {
b.extendSetMaybe(compare.length - 1)
}
for i := 0; i < l; i++ {
b.set[i] ^= compare.set[i]
}
if len(compare.set) > l {
for i := l; i < len(compare.set); i++ {
b.set[i] = compare.set[i]
}
}
}
// Is the length an exact multiple of word sizes?
func (b *BitSet) isLenExactMultiple() bool {
return b.length%wordSize == 0
}
// Clean last word by setting unused bits to 0
func (b *BitSet) cleanLastWord() {
if !b.isLenExactMultiple() {
b.set[len(b.set)-1] &= allBits >> (wordSize - b.length%wordSize)
}
}
// Complement computes the (local) complement of a biset (up to length bits)
func (b *BitSet) Complement() (result *BitSet) {
panicIfNull(b)
result = New(b.length)
for i, word := range b.set {
result.set[i] = ^word
}
result.cleanLastWord()
return
}
// All returns true if all bits are set, false otherwise. Returns true for
// empty sets.
func (b *BitSet) All() bool {
panicIfNull(b)
return b.Count() == b.length
}
// None returns true if no bit is set, false otherwise. Returns true for
// empty sets.
func (b *BitSet) None() bool {
panicIfNull(b)
if b != nil && b.set != nil {
for _, word := range b.set {
if word > 0 {
return false
}
}
return true
}
return true
}
// Any returns true if any bit is set, false otherwise
func (b *BitSet) Any() bool {
panicIfNull(b)
return !b.None()
}
// IsSuperSet returns true if this is a superset of the other set
func (b *BitSet) IsSuperSet(other *BitSet) bool {
for i, e := other.NextSet(0); e; i, e = other.NextSet(i + 1) {
if !b.Test(i) {
return false
}
}
return true
}
// IsStrictSuperSet returns true if this is a strict superset of the other set
func (b *BitSet) IsStrictSuperSet(other *BitSet) bool {
return b.Count() > other.Count() && b.IsSuperSet(other)
}
// DumpAsBits dumps a bit set as a string of bits
func (b *BitSet) DumpAsBits() string {
if b.set == nil {
return "."
}
buffer := bytes.NewBufferString("")
i := len(b.set) - 1
for ; i >= 0; i-- {
fmt.Fprintf(buffer, "%064b.", b.set[i])
}
return buffer.String()
}
// BinaryStorageSize returns the binary storage requirements
func (b *BitSet) BinaryStorageSize() int {
return binary.Size(uint64(0)) + binary.Size(b.set)
}
// WriteTo writes a BitSet to a stream
func (b *BitSet) WriteTo(stream io.Writer) (int64, error) {
length := uint64(b.length)
// Write length
err := binary.Write(stream, binaryOrder, length)
if err != nil {
return 0, err
}
// Write set
err = binary.Write(stream, binaryOrder, b.set)
return int64(b.BinaryStorageSize()), err
}
// ReadFrom reads a BitSet from a stream written using WriteTo
func (b *BitSet) ReadFrom(stream io.Reader) (int64, error) {
var length uint64
// Read length first
err := binary.Read(stream, binaryOrder, &length)
if err != nil {
return 0, err
}
newset := New(uint(length))
if uint64(newset.length) != length {
return 0, errors.New("unmarshalling error: type mismatch")
}
// Read remaining bytes as set
err = binary.Read(stream, binaryOrder, newset.set)
if err != nil {
return 0, err
}
*b = *newset
return int64(b.BinaryStorageSize()), nil
}
// MarshalBinary encodes a BitSet into a binary form and returns the result.
func (b *BitSet) MarshalBinary() ([]byte, error) {
var buf bytes.Buffer
writer := bufio.NewWriter(&buf)
_, err := b.WriteTo(writer)
if err != nil {
return []byte{}, err
}
err = writer.Flush()
return buf.Bytes(), err
}
// UnmarshalBinary decodes the binary form generated by MarshalBinary.
func (b *BitSet) UnmarshalBinary(data []byte) error {
buf := bytes.NewReader(data)
reader := bufio.NewReader(buf)
_, err := b.ReadFrom(reader)
return err
}
// MarshalJSON marshals a BitSet as a JSON structure
func (b *BitSet) MarshalJSON() ([]byte, error) {
buffer := bytes.NewBuffer(make([]byte, 0, b.BinaryStorageSize()))
_, err := b.WriteTo(buffer)
if err != nil {
return nil, err
}
// URLEncode all bytes
return json.Marshal(base64Encoding.EncodeToString(buffer.Bytes()))
}
// UnmarshalJSON unmarshals a BitSet from JSON created using MarshalJSON
func (b *BitSet) UnmarshalJSON(data []byte) error {
// Unmarshal as string
var s string
err := json.Unmarshal(data, &s)
if err != nil {
return err
}
// URLDecode string
buf, err := base64Encoding.DecodeString(s)
if err != nil {
return err
}
_, err = b.ReadFrom(bytes.NewReader(buf))
return err
}

View File

@ -1,3 +0,0 @@
module github.com/bits-and-blooms/bitset
go 1.14

View File

@ -1,53 +0,0 @@
package bitset
// bit population count, take from
// https://code.google.com/p/go/issues/detail?id=4988#c11
// credit: https://code.google.com/u/arnehormann/
func popcount(x uint64) (n uint64) {
x -= (x >> 1) & 0x5555555555555555
x = (x>>2)&0x3333333333333333 + x&0x3333333333333333
x += x >> 4
x &= 0x0f0f0f0f0f0f0f0f
x *= 0x0101010101010101
return x >> 56
}
func popcntSliceGo(s []uint64) uint64 {
cnt := uint64(0)
for _, x := range s {
cnt += popcount(x)
}
return cnt
}
func popcntMaskSliceGo(s, m []uint64) uint64 {
cnt := uint64(0)
for i := range s {
cnt += popcount(s[i] &^ m[i])
}
return cnt
}
func popcntAndSliceGo(s, m []uint64) uint64 {
cnt := uint64(0)
for i := range s {
cnt += popcount(s[i] & m[i])
}
return cnt
}
func popcntOrSliceGo(s, m []uint64) uint64 {
cnt := uint64(0)
for i := range s {
cnt += popcount(s[i] | m[i])
}
return cnt
}
func popcntXorSliceGo(s, m []uint64) uint64 {
cnt := uint64(0)
for i := range s {
cnt += popcount(s[i] ^ m[i])
}
return cnt
}

View File

@ -1,45 +0,0 @@
// +build go1.9
package bitset
import "math/bits"
func popcntSlice(s []uint64) uint64 {
var cnt int
for _, x := range s {
cnt += bits.OnesCount64(x)
}
return uint64(cnt)
}
func popcntMaskSlice(s, m []uint64) uint64 {
var cnt int
for i := range s {
cnt += bits.OnesCount64(s[i] &^ m[i])
}
return uint64(cnt)
}
func popcntAndSlice(s, m []uint64) uint64 {
var cnt int
for i := range s {
cnt += bits.OnesCount64(s[i] & m[i])
}
return uint64(cnt)
}
func popcntOrSlice(s, m []uint64) uint64 {
var cnt int
for i := range s {
cnt += bits.OnesCount64(s[i] | m[i])
}
return uint64(cnt)
}
func popcntXorSlice(s, m []uint64) uint64 {
var cnt int
for i := range s {
cnt += bits.OnesCount64(s[i] ^ m[i])
}
return uint64(cnt)
}

View File

@ -1,68 +0,0 @@
// +build !go1.9
// +build amd64,!appengine
package bitset
// *** the following functions are defined in popcnt_amd64.s
//go:noescape
func hasAsm() bool
// useAsm is a flag used to select the GO or ASM implementation of the popcnt function
var useAsm = hasAsm()
//go:noescape
func popcntSliceAsm(s []uint64) uint64
//go:noescape
func popcntMaskSliceAsm(s, m []uint64) uint64
//go:noescape
func popcntAndSliceAsm(s, m []uint64) uint64
//go:noescape
func popcntOrSliceAsm(s, m []uint64) uint64
//go:noescape
func popcntXorSliceAsm(s, m []uint64) uint64
func popcntSlice(s []uint64) uint64 {
if useAsm {
return popcntSliceAsm(s)
}
return popcntSliceGo(s)
}
func popcntMaskSlice(s, m []uint64) uint64 {
if useAsm {
return popcntMaskSliceAsm(s, m)
}
return popcntMaskSliceGo(s, m)
}
func popcntAndSlice(s, m []uint64) uint64 {
if useAsm {
return popcntAndSliceAsm(s, m)
}
return popcntAndSliceGo(s, m)
}
func popcntOrSlice(s, m []uint64) uint64 {
if useAsm {
return popcntOrSliceAsm(s, m)
}
return popcntOrSliceGo(s, m)
}
func popcntXorSlice(s, m []uint64) uint64 {
if useAsm {
return popcntXorSliceAsm(s, m)
}
return popcntXorSliceGo(s, m)
}

View File

@ -1,104 +0,0 @@
// +build !go1.9
// +build amd64,!appengine
TEXT ·hasAsm(SB),4,$0-1
MOVQ $1, AX
CPUID
SHRQ $23, CX
ANDQ $1, CX
MOVB CX, ret+0(FP)
RET
#define POPCNTQ_DX_DX BYTE $0xf3; BYTE $0x48; BYTE $0x0f; BYTE $0xb8; BYTE $0xd2
TEXT ·popcntSliceAsm(SB),4,$0-32
XORQ AX, AX
MOVQ s+0(FP), SI
MOVQ s_len+8(FP), CX
TESTQ CX, CX
JZ popcntSliceEnd
popcntSliceLoop:
BYTE $0xf3; BYTE $0x48; BYTE $0x0f; BYTE $0xb8; BYTE $0x16 // POPCNTQ (SI), DX
ADDQ DX, AX
ADDQ $8, SI
LOOP popcntSliceLoop
popcntSliceEnd:
MOVQ AX, ret+24(FP)
RET
TEXT ·popcntMaskSliceAsm(SB),4,$0-56
XORQ AX, AX
MOVQ s+0(FP), SI
MOVQ s_len+8(FP), CX
TESTQ CX, CX
JZ popcntMaskSliceEnd
MOVQ m+24(FP), DI
popcntMaskSliceLoop:
MOVQ (DI), DX
NOTQ DX
ANDQ (SI), DX
POPCNTQ_DX_DX
ADDQ DX, AX
ADDQ $8, SI
ADDQ $8, DI
LOOP popcntMaskSliceLoop
popcntMaskSliceEnd:
MOVQ AX, ret+48(FP)
RET
TEXT ·popcntAndSliceAsm(SB),4,$0-56
XORQ AX, AX
MOVQ s+0(FP), SI
MOVQ s_len+8(FP), CX
TESTQ CX, CX
JZ popcntAndSliceEnd
MOVQ m+24(FP), DI
popcntAndSliceLoop:
MOVQ (DI), DX
ANDQ (SI), DX
POPCNTQ_DX_DX
ADDQ DX, AX
ADDQ $8, SI
ADDQ $8, DI
LOOP popcntAndSliceLoop
popcntAndSliceEnd:
MOVQ AX, ret+48(FP)
RET
TEXT ·popcntOrSliceAsm(SB),4,$0-56
XORQ AX, AX
MOVQ s+0(FP), SI
MOVQ s_len+8(FP), CX
TESTQ CX, CX
JZ popcntOrSliceEnd
MOVQ m+24(FP), DI
popcntOrSliceLoop:
MOVQ (DI), DX
ORQ (SI), DX
POPCNTQ_DX_DX
ADDQ DX, AX
ADDQ $8, SI
ADDQ $8, DI
LOOP popcntOrSliceLoop
popcntOrSliceEnd:
MOVQ AX, ret+48(FP)
RET
TEXT ·popcntXorSliceAsm(SB),4,$0-56
XORQ AX, AX
MOVQ s+0(FP), SI
MOVQ s_len+8(FP), CX
TESTQ CX, CX
JZ popcntXorSliceEnd
MOVQ m+24(FP), DI
popcntXorSliceLoop:
MOVQ (DI), DX
XORQ (SI), DX
POPCNTQ_DX_DX
ADDQ DX, AX
ADDQ $8, SI
ADDQ $8, DI
LOOP popcntXorSliceLoop
popcntXorSliceEnd:
MOVQ AX, ret+48(FP)
RET

View File

@ -1,24 +0,0 @@
// +build !go1.9
// +build !amd64 appengine
package bitset
func popcntSlice(s []uint64) uint64 {
return popcntSliceGo(s)
}
func popcntMaskSlice(s, m []uint64) uint64 {
return popcntMaskSliceGo(s, m)
}
func popcntAndSlice(s, m []uint64) uint64 {
return popcntAndSliceGo(s, m)
}
func popcntOrSlice(s, m []uint64) uint64 {
return popcntOrSliceGo(s, m)
}
func popcntXorSlice(s, m []uint64) uint64 {
return popcntXorSliceGo(s, m)
}

View File

@ -1,14 +0,0 @@
// +build !go1.9
package bitset
var deBruijn = [...]byte{
0, 1, 56, 2, 57, 49, 28, 3, 61, 58, 42, 50, 38, 29, 17, 4,
62, 47, 59, 36, 45, 43, 51, 22, 53, 39, 33, 30, 24, 18, 12, 5,
63, 55, 48, 27, 60, 41, 37, 16, 46, 35, 44, 21, 52, 32, 23, 11,
54, 26, 40, 15, 34, 20, 31, 10, 25, 14, 19, 9, 13, 8, 7, 6,
}
func trailingZeroes64(v uint64) uint {
return uint(deBruijn[((v&-v)*0x03f79d71b4ca8b09)>>58])
}

View File

@ -1,9 +0,0 @@
// +build go1.9
package bitset
import "math/bits"
func trailingZeroes64(v uint64) uint {
return uint(bits.TrailingZeros64(v))
}

View File

@ -1,5 +1,3 @@
// +build linux
package cgroups
import (

View File

@ -1,3 +0,0 @@
// +build !linux
package cgroups

View File

@ -1,5 +1,3 @@
// +build linux
// SPDX-License-Identifier: Apache-2.0
/*
* Copyright (C) 2020 Aleksa Sarai <cyphar@cyphar.com>
@ -22,14 +20,13 @@ package devices
import (
"bufio"
"fmt"
"io"
"regexp"
"sort"
"strconv"
"strings"
"github.com/opencontainers/runc/libcontainer/devices"
"github.com/pkg/errors"
)
// deviceMeta is a Rule without the Allow or Permissions fields, and no
@ -79,19 +76,21 @@ func (e *Emulator) IsAllowAll() bool {
return e.IsBlacklist() && len(e.rules) == 0
}
var devicesListRegexp = regexp.MustCompile(`^([abc])\s+(\d+|\*):(\d+|\*)\s+([rwm]+)$`)
func parseLine(line string) (*deviceRule, error) {
matches := devicesListRegexp.FindStringSubmatch(line)
if matches == nil {
return nil, errors.Errorf("line doesn't match devices.list format")
// Input: node major:minor perms.
fields := strings.FieldsFunc(line, func(r rune) bool {
return r == ' ' || r == ':'
})
if len(fields) != 4 {
return nil, fmt.Errorf("malformed devices.list rule %s", line)
}
var (
rule deviceRule
node = matches[1]
major = matches[2]
minor = matches[3]
perms = matches[4]
node = fields[0]
major = fields[1]
minor = fields[2]
perms = fields[3]
)
// Parse the node type.
@ -107,8 +106,7 @@ func parseLine(line string) (*deviceRule, error) {
case "c":
rule.meta.node = devices.CharDevice
default:
// Should never happen!
return nil, errors.Errorf("unknown device type %q", node)
return nil, fmt.Errorf("unknown device type %q", node)
}
// Parse the major number.
@ -117,7 +115,7 @@ func parseLine(line string) (*deviceRule, error) {
} else {
val, err := strconv.ParseUint(major, 10, 32)
if err != nil {
return nil, errors.Wrap(err, "parse major number")
return nil, fmt.Errorf("invalid major number: %w", err)
}
rule.meta.major = int64(val)
}
@ -128,7 +126,7 @@ func parseLine(line string) (*deviceRule, error) {
} else {
val, err := strconv.ParseUint(minor, 10, 32)
if err != nil {
return nil, errors.Wrap(err, "parse minor number")
return nil, fmt.Errorf("invalid minor number: %w", err)
}
rule.meta.minor = int64(val)
}
@ -136,13 +134,12 @@ func parseLine(line string) (*deviceRule, error) {
// Parse the access permissions.
rule.perms = devices.Permissions(perms)
if !rule.perms.IsValid() || rule.perms.IsEmpty() {
// Should never happen!
return nil, errors.Errorf("parse access mode: contained unknown modes or is empty: %q", perms)
return nil, fmt.Errorf("parse access mode: contained unknown modes or is empty: %q", perms)
}
return &rule, nil
}
func (e *Emulator) addRule(rule deviceRule) error {
func (e *Emulator) addRule(rule deviceRule) error { //nolint:unparam
if e.rules == nil {
e.rules = make(map[deviceMeta]devices.Permissions)
}
@ -180,7 +177,7 @@ func (e *Emulator) rmRule(rule deviceRule) error {
// Only give an error if the set of permissions overlap.
partialPerms := e.rules[partialMeta]
if !partialPerms.Intersection(rule.perms).IsEmpty() {
return errors.Errorf("requested rule [%v %v] not supported by devices cgroupv1 (cannot punch hole in existing wildcard rule [%v %v])", rule.meta, rule.perms, partialMeta, partialPerms)
return fmt.Errorf("requested rule [%v %v] not supported by devices cgroupv1 (cannot punch hole in existing wildcard rule [%v %v])", rule.meta, rule.perms, partialMeta, partialPerms)
}
}
@ -212,9 +209,9 @@ func (e *Emulator) allow(rule *deviceRule) error {
var err error
if e.defaultAllow {
err = errors.Wrap(e.rmRule(*rule), "remove 'deny' exception")
err = wrapErr(e.rmRule(*rule), "unable to remove 'deny' exception")
} else {
err = errors.Wrap(e.addRule(*rule), "add 'allow' exception")
err = wrapErr(e.addRule(*rule), "unable to add 'allow' exception")
}
return err
}
@ -232,16 +229,16 @@ func (e *Emulator) deny(rule *deviceRule) error {
var err error
if e.defaultAllow {
err = errors.Wrap(e.addRule(*rule), "add 'deny' exception")
err = wrapErr(e.addRule(*rule), "unable to add 'deny' exception")
} else {
err = errors.Wrap(e.rmRule(*rule), "remove 'allow' exception")
err = wrapErr(e.rmRule(*rule), "unable to remove 'allow' exception")
}
return err
}
func (e *Emulator) Apply(rule devices.Rule) error {
if !rule.Type.CanCgroup() {
return errors.Errorf("cannot add rule [%#v] with non-cgroup type %q", rule, rule.Type)
return fmt.Errorf("cannot add rule [%#v] with non-cgroup type %q", rule, rule.Type)
}
innerRule := &deviceRule{
@ -283,17 +280,17 @@ func EmulatorFromList(list io.Reader) (*Emulator, error) {
line := s.Text()
deviceRule, err := parseLine(line)
if err != nil {
return nil, errors.Wrapf(err, "parsing line %q", line)
return nil, fmt.Errorf("error parsing line %q: %w", line, err)
}
// "devices.list" is an allow list. Note that this means that in
// black-list mode, we have no idea what rules are in play. As a
// result, we need to be very careful in Transition().
if err := e.allow(deviceRule); err != nil {
return nil, errors.Wrapf(err, "adding devices.list rule")
return nil, fmt.Errorf("error adding devices.list rule: %w", err)
}
}
if err := s.Err(); err != nil {
return nil, errors.Wrap(err, "reading devices.list lines")
return nil, fmt.Errorf("error reading devices.list lines: %w", err)
}
return e, nil
}
@ -305,7 +302,7 @@ func EmulatorFromList(list io.Reader) (*Emulator, error) {
// necessary.
//
// This function is the sole reason for all of Emulator -- to allow us
// to figure out how to update a containers' cgroups without causing spurrious
// to figure out how to update a containers' cgroups without causing spurious
// device errors (if possible).
func (source *Emulator) Transition(target *Emulator) ([]*devices.Rule, error) {
var transitionRules []*devices.Rule
@ -380,3 +377,10 @@ func (e *Emulator) Rules() ([]*devices.Rule, error) {
defaultCgroup := &Emulator{defaultAllow: false}
return defaultCgroup.Transition(e)
}
func wrapErr(err error, text string) error {
if err == nil {
return nil
}
return fmt.Errorf(text+": %w", err)
}

View File

@ -7,13 +7,14 @@
package devicefilter
import (
"errors"
"fmt"
"math"
"strconv"
"github.com/cilium/ebpf/asm"
devicesemulator "github.com/opencontainers/runc/libcontainer/cgroups/devices"
"github.com/opencontainers/runc/libcontainer/devices"
"github.com/pkg/errors"
"golang.org/x/sys/unix"
)
@ -51,21 +52,20 @@ func DeviceFilter(rules []*devices.Rule) (asm.Instructions, string, error) {
// only be one (at most) at the very start to instruct cgroupv1 to
// go into allow-list mode. However we do double-check this here.
if idx != 0 || rule.Allow != emu.IsBlacklist() {
return nil, "", errors.Errorf("[internal error] emulated cgroupv2 devices ruleset had bad wildcard at idx %v (%s)", idx, rule.CgroupString())
return nil, "", fmt.Errorf("[internal error] emulated cgroupv2 devices ruleset had bad wildcard at idx %v (%s)", idx, rule.CgroupString())
}
continue
}
if rule.Allow == p.defaultAllow {
// There should be no rules which have an action equal to the
// default action, the emulator removes those.
return nil, "", errors.Errorf("[internal error] emulated cgroupv2 devices ruleset had no-op rule at idx %v (%s)", idx, rule.CgroupString())
return nil, "", fmt.Errorf("[internal error] emulated cgroupv2 devices ruleset had no-op rule at idx %v (%s)", idx, rule.CgroupString())
}
if err := p.appendRule(rule); err != nil {
return nil, "", err
}
}
insts, err := p.finalize()
return insts, license, err
return p.finalize(), license, nil
}
type program struct {
@ -118,13 +118,13 @@ func (p *program) appendRule(rule *devices.Rule) error {
bpfType = int32(unix.BPF_DEVCG_DEV_BLOCK)
default:
// We do not permit 'a', nor any other types we don't know about.
return errors.Errorf("invalid type %q", string(rule.Type))
return fmt.Errorf("invalid type %q", string(rule.Type))
}
if rule.Major > math.MaxUint32 {
return errors.Errorf("invalid major %d", rule.Major)
return fmt.Errorf("invalid major %d", rule.Major)
}
if rule.Minor > math.MaxUint32 {
return errors.Errorf("invalid minor %d", rule.Major)
return fmt.Errorf("invalid minor %d", rule.Major)
}
hasMajor := rule.Major >= 0 // if not specified in OCI json, major is set to -1
hasMinor := rule.Minor >= 0
@ -138,7 +138,7 @@ func (p *program) appendRule(rule *devices.Rule) error {
case 'm':
bpfAccess |= unix.BPF_DEVCG_ACC_MKNOD
default:
return errors.Errorf("unknown device access %v", r)
return fmt.Errorf("unknown device access %v", r)
}
}
// If the access is rwm, skip the check.
@ -180,7 +180,7 @@ func (p *program) appendRule(rule *devices.Rule) error {
return nil
}
func (p *program) finalize() (asm.Instructions, error) {
func (p *program) finalize() asm.Instructions {
var v int32
if p.defaultAllow {
v = 1
@ -192,7 +192,7 @@ func (p *program) finalize() (asm.Instructions, error) {
asm.Return(),
)
p.blockID = -1
return p.insts, nil
return p.insts
}
func acceptBlock(accept bool) asm.Instructions {

View File

@ -1,6 +1,7 @@
package ebpf
import (
"errors"
"fmt"
"os"
"runtime"
@ -10,7 +11,6 @@ import (
"github.com/cilium/ebpf"
"github.com/cilium/ebpf/asm"
"github.com/cilium/ebpf/link"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
"golang.org/x/sys/unix"
)
@ -134,7 +134,7 @@ func haveBpfProgReplace() bool {
// not supported
return
}
// attach_flags test succeded.
// attach_flags test succeeded.
if !errors.Is(err, unix.EBADF) {
logrus.Debugf("checking for BPF_F_REPLACE: got unexpected (not EBADF or EINVAL) error: %v", err)
}

View File

@ -2,20 +2,27 @@ package cgroups
import (
"bytes"
"errors"
"fmt"
"os"
"path"
"strconv"
"strings"
"sync"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
"golang.org/x/sys/unix"
)
// OpenFile opens a cgroup file in a given dir with given flags.
// It is supposed to be used for cgroup files only.
// It is supposed to be used for cgroup files only, and returns
// an error if the file is not a cgroup file.
//
// Arguments dir and file are joined together to form an absolute path
// to a file being opened.
func OpenFile(dir, file string, flags int) (*os.File, error) {
if dir == "" {
return nil, errors.Errorf("no directory specified for %s", file)
return nil, fmt.Errorf("no directory specified for %s", file)
}
return openFile(dir, file, flags)
}
@ -43,7 +50,8 @@ func WriteFile(dir, file, data string) error {
}
defer fd.Close()
if err := retryingWriteFile(fd, data); err != nil {
return errors.Wrapf(err, "failed to write %q", data)
// Having data in the error message helps in debugging.
return fmt.Errorf("failed to write %q: %w", data, err)
}
return nil
}
@ -81,7 +89,7 @@ func prepareOpenat2() error {
})
if err != nil {
prepErr = &os.PathError{Op: "openat2", Path: cgroupfsDir, Err: err}
if err != unix.ENOSYS {
if err != unix.ENOSYS { //nolint:errorlint // unix errors are bare
logrus.Warnf("falling back to securejoin: %s", prepErr)
} else {
logrus.Debug("openat2 not available, falling back to securejoin")
@ -107,8 +115,6 @@ func prepareOpenat2() error {
return prepErr
}
// OpenFile opens a cgroup file in a given dir with given flags.
// It is supposed to be used for cgroup files only.
func openFile(dir, file string, flags int) (*os.File, error) {
mode := os.FileMode(0)
if TestMode && flags&os.O_WRONLY != 0 {
@ -116,34 +122,52 @@ func openFile(dir, file string, flags int) (*os.File, error) {
flags |= os.O_TRUNC | os.O_CREATE
mode = 0o600
}
path := path.Join(dir, file)
if prepareOpenat2() != nil {
return openFallback(dir, file, flags, mode)
return openFallback(path, flags, mode)
}
reldir := strings.TrimPrefix(dir, cgroupfsPrefix)
if len(reldir) == len(dir) { // non-standard path, old system?
return openFallback(dir, file, flags, mode)
relPath := strings.TrimPrefix(path, cgroupfsPrefix)
if len(relPath) == len(path) { // non-standard path, old system?
return openFallback(path, flags, mode)
}
relname := reldir + "/" + file
fd, err := unix.Openat2(cgroupFd, relname,
fd, err := unix.Openat2(cgroupFd, relPath,
&unix.OpenHow{
Resolve: resolveFlags,
Flags: uint64(flags) | unix.O_CLOEXEC,
Mode: uint64(mode),
})
if err != nil {
return nil, &os.PathError{Op: "openat2", Path: dir + "/" + file, Err: err}
err = &os.PathError{Op: "openat2", Path: path, Err: err}
// Check if cgroupFd is still opened to cgroupfsDir
// (happens when this package is incorrectly used
// across the chroot/pivot_root/mntns boundary, or
// when /sys/fs/cgroup is remounted).
//
// TODO: if such usage will ever be common, amend this
// to reopen cgroupFd and retry openat2.
fdStr := strconv.Itoa(cgroupFd)
fdDest, _ := os.Readlink("/proc/self/fd/" + fdStr)
if fdDest != cgroupfsDir {
// Wrap the error so it is clear that cgroupFd
// is opened to an unexpected/wrong directory.
err = fmt.Errorf("cgroupFd %s unexpectedly opened to %s != %s: %w",
fdStr, fdDest, cgroupfsDir, err)
}
return nil, err
}
return os.NewFile(uintptr(fd), cgroupfsPrefix+relname), nil
return os.NewFile(uintptr(fd), path), nil
}
var errNotCgroupfs = errors.New("not a cgroup file")
// openFallback is used when openat2(2) is not available. It checks the opened
// Can be changed by unit tests.
var openFallback = openAndCheck
// openAndCheck is used when openat2(2) is not available. It checks the opened
// file is on cgroupfs, returning an error otherwise.
func openFallback(dir, file string, flags int, mode os.FileMode) (*os.File, error) {
path := dir + "/" + file
func openAndCheck(path string, flags int, mode os.FileMode) (*os.File, error) {
fd, err := os.OpenFile(path, flags, mode)
if err != nil {
return nil, err

View File

@ -1,10 +1,7 @@
// +build linux
package fs
import (
"bufio"
"fmt"
"os"
"path/filepath"
"strconv"
@ -23,8 +20,8 @@ func (s *BlkioGroup) Name() string {
return "blkio"
}
func (s *BlkioGroup) Apply(path string, d *cgroupData) error {
return join(path, d.pid)
func (s *BlkioGroup) Apply(path string, _ *configs.Resources, pid int) error {
return apply(path, pid)
}
func (s *BlkioGroup) Set(path string, r *configs.Resources) error {
@ -131,19 +128,19 @@ func getBlkioStat(dir, file string) ([]cgroups.BlkioStatEntry, error) {
// skip total line
continue
} else {
return nil, fmt.Errorf("Invalid line found while parsing %s/%s: %s", dir, file, sc.Text())
return nil, malformedLine(dir, file, sc.Text())
}
}
v, err := strconv.ParseUint(fields[0], 10, 64)
if err != nil {
return nil, err
return nil, &parseError{Path: dir, File: file, Err: err}
}
major := v
v, err = strconv.ParseUint(fields[1], 10, 64)
if err != nil {
return nil, err
return nil, &parseError{Path: dir, File: file, Err: err}
}
minor := v
@ -155,10 +152,13 @@ func getBlkioStat(dir, file string) ([]cgroups.BlkioStatEntry, error) {
}
v, err = strconv.ParseUint(fields[valueField], 10, 64)
if err != nil {
return nil, err
return nil, &parseError{Path: dir, File: file, Err: err}
}
blkioStats = append(blkioStats, cgroups.BlkioStatEntry{Major: major, Minor: minor, Op: op, Value: v})
}
if err := sc.Err(); err != nil {
return nil, &parseError{Path: dir, File: file, Err: err}
}
return blkioStats, nil
}

View File

@ -1,5 +1,3 @@
// +build linux
package fs
import (
@ -21,24 +19,19 @@ func (s *CpuGroup) Name() string {
return "cpu"
}
func (s *CpuGroup) Apply(path string, d *cgroupData) error {
// This might happen if we have no cpu cgroup mounted.
// Just do nothing and don't fail.
if path == "" {
return nil
}
func (s *CpuGroup) Apply(path string, r *configs.Resources, pid int) error {
if err := os.MkdirAll(path, 0o755); err != nil {
return err
}
// We should set the real-Time group scheduling settings before moving
// in the process because if the process is already in SCHED_RR mode
// and no RT bandwidth is set, adding it will fail.
if err := s.SetRtSched(path, d.config.Resources); err != nil {
if err := s.SetRtSched(path, r); err != nil {
return err
}
// Since we are not using join(), we need to place the pid
// into the procs file unlike other subsystems.
return cgroups.WriteCgroupProc(path, d.pid)
// Since we are not using apply(), we need to place the pid
// into the procs file.
return cgroups.WriteCgroupProc(path, pid)
}
func (s *CpuGroup) SetRtSched(path string, r *configs.Resources) error {
@ -105,7 +98,8 @@ func (s *CpuGroup) Set(path string, r *configs.Resources) error {
}
func (s *CpuGroup) GetStats(path string, stats *cgroups.Stats) error {
f, err := cgroups.OpenFile(path, "cpu.stat", os.O_RDONLY)
const file = "cpu.stat"
f, err := cgroups.OpenFile(path, file, os.O_RDONLY)
if err != nil {
if os.IsNotExist(err) {
return nil
@ -118,7 +112,7 @@ func (s *CpuGroup) GetStats(path string, stats *cgroups.Stats) error {
for sc.Scan() {
t, v, err := fscommon.ParseKeyValue(sc.Text())
if err != nil {
return err
return &parseError{Path: path, File: file, Err: err}
}
switch t {
case "nr_periods":

View File

@ -1,12 +1,8 @@
// +build linux
package fs
import (
"bufio"
"fmt"
"os"
"path/filepath"
"strconv"
"strings"
@ -38,8 +34,8 @@ func (s *CpuacctGroup) Name() string {
return "cpuacct"
}
func (s *CpuacctGroup) Apply(path string, d *cgroupData) error {
return join(path, d.pid)
func (s *CpuacctGroup) Apply(path string, _ *configs.Resources, pid int) error {
return apply(path, pid)
}
func (s *CpuacctGroup) Set(_ string, _ *configs.Resources) error {
@ -85,45 +81,43 @@ func getCpuUsageBreakdown(path string) (uint64, uint64, error) {
const (
userField = "user"
systemField = "system"
file = cgroupCpuacctStat
)
// Expected format:
// user <usage in ticks>
// system <usage in ticks>
data, err := cgroups.ReadFile(path, cgroupCpuacctStat)
data, err := cgroups.ReadFile(path, file)
if err != nil {
return 0, 0, err
}
// TODO: use strings.SplitN instead.
fields := strings.Fields(data)
if len(fields) < 4 {
return 0, 0, fmt.Errorf("failure - %s is expected to have at least 4 fields", filepath.Join(path, cgroupCpuacctStat))
}
if fields[0] != userField {
return 0, 0, fmt.Errorf("unexpected field %q in %q, expected %q", fields[0], cgroupCpuacctStat, userField)
}
if fields[2] != systemField {
return 0, 0, fmt.Errorf("unexpected field %q in %q, expected %q", fields[2], cgroupCpuacctStat, systemField)
if len(fields) < 4 || fields[0] != userField || fields[2] != systemField {
return 0, 0, malformedLine(path, file, data)
}
if userModeUsage, err = strconv.ParseUint(fields[1], 10, 64); err != nil {
return 0, 0, err
return 0, 0, &parseError{Path: path, File: file, Err: err}
}
if kernelModeUsage, err = strconv.ParseUint(fields[3], 10, 64); err != nil {
return 0, 0, err
return 0, 0, &parseError{Path: path, File: file, Err: err}
}
return (userModeUsage * nanosecondsInSecond) / clockTicks, (kernelModeUsage * nanosecondsInSecond) / clockTicks, nil
}
func getPercpuUsage(path string) ([]uint64, error) {
const file = "cpuacct.usage_percpu"
percpuUsage := []uint64{}
data, err := cgroups.ReadFile(path, "cpuacct.usage_percpu")
data, err := cgroups.ReadFile(path, file)
if err != nil {
return percpuUsage, err
}
// TODO: use strings.SplitN instead.
for _, value := range strings.Fields(data) {
value, err := strconv.ParseUint(value, 10, 64)
if err != nil {
return percpuUsage, fmt.Errorf("Unable to convert param value to uint64: %s", err)
return percpuUsage, &parseError{Path: path, File: file, Err: err}
}
percpuUsage = append(percpuUsage, value)
}
@ -133,16 +127,17 @@ func getPercpuUsage(path string) ([]uint64, error) {
func getPercpuUsageInModes(path string) ([]uint64, []uint64, error) {
usageKernelMode := []uint64{}
usageUserMode := []uint64{}
const file = cgroupCpuacctUsageAll
file, err := cgroups.OpenFile(path, cgroupCpuacctUsageAll, os.O_RDONLY)
fd, err := cgroups.OpenFile(path, file, os.O_RDONLY)
if os.IsNotExist(err) {
return usageKernelMode, usageUserMode, nil
} else if err != nil {
return nil, nil, err
}
defer file.Close()
defer fd.Close()
scanner := bufio.NewScanner(file)
scanner := bufio.NewScanner(fd)
scanner.Scan() // skipping header line
for scanner.Scan() {
@ -153,19 +148,18 @@ func getPercpuUsageInModes(path string) ([]uint64, []uint64, error) {
usageInKernelMode, err := strconv.ParseUint(lineFields[kernelModeColumn], 10, 64)
if err != nil {
return nil, nil, fmt.Errorf("Unable to convert CPU usage in kernel mode to uint64: %s", err)
return nil, nil, &parseError{Path: path, File: file, Err: err}
}
usageKernelMode = append(usageKernelMode, usageInKernelMode)
usageInUserMode, err := strconv.ParseUint(lineFields[userModeColumn], 10, 64)
if err != nil {
return nil, nil, fmt.Errorf("Unable to convert CPU usage in user mode to uint64: %s", err)
return nil, nil, &parseError{Path: path, File: file, Err: err}
}
usageUserMode = append(usageUserMode, usageInUserMode)
}
if err := scanner.Err(); err != nil {
return nil, nil, fmt.Errorf("Problem in reading %s line by line, %s", cgroupCpuacctUsageAll, err)
return nil, nil, &parseError{Path: path, File: file, Err: err}
}
return usageKernelMode, usageUserMode, nil

View File

@ -1,19 +1,17 @@
// +build linux
package fs
import (
"fmt"
"errors"
"os"
"path/filepath"
"strconv"
"strings"
"golang.org/x/sys/unix"
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/pkg/errors"
"golang.org/x/sys/unix"
)
type CpusetGroup struct{}
@ -22,8 +20,8 @@ func (s *CpusetGroup) Name() string {
return "cpuset"
}
func (s *CpusetGroup) Apply(path string, d *cgroupData) error {
return s.ApplyDir(path, d.config.Resources, d.pid)
func (s *CpusetGroup) Apply(path string, r *configs.Resources, pid int) error {
return s.ApplyDir(path, r, pid)
}
func (s *CpusetGroup) Set(path string, r *configs.Resources) error {
@ -40,32 +38,32 @@ func (s *CpusetGroup) Set(path string, r *configs.Resources) error {
return nil
}
func getCpusetStat(path string, filename string) ([]uint16, error) {
func getCpusetStat(path string, file string) ([]uint16, error) {
var extracted []uint16
fileContent, err := fscommon.GetCgroupParamString(path, filename)
fileContent, err := fscommon.GetCgroupParamString(path, file)
if err != nil {
return extracted, err
}
if len(fileContent) == 0 {
return extracted, fmt.Errorf("%s found to be empty", filepath.Join(path, filename))
return extracted, &parseError{Path: path, File: file, Err: errors.New("empty file")}
}
for _, s := range strings.Split(fileContent, ",") {
splitted := strings.SplitN(s, "-", 3)
switch len(splitted) {
sp := strings.SplitN(s, "-", 3)
switch len(sp) {
case 3:
return extracted, fmt.Errorf("invalid values in %s", filepath.Join(path, filename))
return extracted, &parseError{Path: path, File: file, Err: errors.New("extra dash")}
case 2:
min, err := strconv.ParseUint(splitted[0], 10, 16)
min, err := strconv.ParseUint(sp[0], 10, 16)
if err != nil {
return extracted, err
return extracted, &parseError{Path: path, File: file, Err: err}
}
max, err := strconv.ParseUint(splitted[1], 10, 16)
max, err := strconv.ParseUint(sp[1], 10, 16)
if err != nil {
return extracted, err
return extracted, &parseError{Path: path, File: file, Err: err}
}
if min > max {
return extracted, fmt.Errorf("invalid values in %s", filepath.Join(path, filename))
return extracted, &parseError{Path: path, File: file, Err: errors.New("invalid values, min > max")}
}
for i := min; i <= max; i++ {
extracted = append(extracted, uint16(i))
@ -73,7 +71,7 @@ func getCpusetStat(path string, filename string) ([]uint16, error) {
case 1:
value, err := strconv.ParseUint(s, 10, 16)
if err != nil {
return extracted, err
return extracted, &parseError{Path: path, File: file, Err: err}
}
extracted = append(extracted, uint16(value))
}
@ -168,9 +166,8 @@ func (s *CpusetGroup) ApplyDir(dir string, r *configs.Resources, pid int) error
if err := s.ensureCpusAndMems(dir, r); err != nil {
return err
}
// because we are not using d.join we need to place the pid into the procs file
// unlike the other subsystems
// Since we are not using apply(), we need to place the pid
// into the procs file.
return cgroups.WriteCgroupProc(dir, pid)
}
@ -198,7 +195,7 @@ func cpusetEnsureParent(current string) error {
}
// Treat non-existing directory as cgroupfs as it will be created,
// and the root cpuset directory obviously exists.
if err != nil && err != unix.ENOENT {
if err != nil && err != unix.ENOENT { //nolint:errorlint // unix errors are bare
return &os.PathError{Op: "statfs", Path: parent, Err: err}
}
@ -224,12 +221,12 @@ func cpusetCopyIfNeeded(current, parent string) error {
}
if isEmptyCpuset(currentCpus) {
if err := cgroups.WriteFile(current, "cpuset.cpus", string(parentCpus)); err != nil {
if err := cgroups.WriteFile(current, "cpuset.cpus", parentCpus); err != nil {
return err
}
}
if isEmptyCpuset(currentMems) {
if err := cgroups.WriteFile(current, "cpuset.mems", string(parentMems)); err != nil {
if err := cgroups.WriteFile(current, "cpuset.mems", parentMems); err != nil {
return err
}
}

View File

@ -1,5 +1,3 @@
// +build linux
package fs
import (
@ -15,15 +13,15 @@ import (
)
type DevicesGroup struct {
testingSkipFinalCheck bool
TestingSkipFinalCheck bool
}
func (s *DevicesGroup) Name() string {
return "devices"
}
func (s *DevicesGroup) Apply(path string, d *cgroupData) error {
if d.config.SkipDevices {
func (s *DevicesGroup) Apply(path string, r *configs.Resources, pid int) error {
if r.SkipDevices {
return nil
}
if path == "" {
@ -31,7 +29,8 @@ func (s *DevicesGroup) Apply(path string, d *cgroupData) error {
// is a hard requirement for container's security.
return errSubsystemDoesNotExist
}
return join(path, d.pid)
return apply(path, pid)
}
func loadEmulator(path string) (*cgroupdevices.Emulator, error) {
@ -91,7 +90,7 @@ func (s *DevicesGroup) Set(path string, r *configs.Resources) error {
//
// This safety-check is skipped for the unit tests because we cannot
// currently mock devices.list correctly.
if !s.testingSkipFinalCheck {
if !s.TestingSkipFinalCheck {
currentAfter, err := loadEmulator(path)
if err != nil {
return err

View File

@ -0,0 +1,15 @@
package fs
import (
"fmt"
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
)
type parseError = fscommon.ParseError
// malformedLine is used by all cgroupfs file parsers that expect a line
// in a particular format but get some garbage instead.
func malformedLine(path, file, line string) error {
return &parseError{Path: path, File: file, Err: fmt.Errorf("malformed line: %s", line)}
}

View File

@ -1,5 +1,3 @@
// +build linux
package fs
import (
@ -21,8 +19,8 @@ func (s *FreezerGroup) Name() string {
return "freezer"
}
func (s *FreezerGroup) Apply(path string, d *cgroupData) error {
return join(path, d.pid)
func (s *FreezerGroup) Apply(path string, _ *configs.Resources, pid int) error {
return apply(path, pid)
}
func (s *FreezerGroup) Set(path string, r *configs.Resources) (Err error) {

View File

@ -1,165 +1,86 @@
// +build linux
package fs
import (
"errors"
"fmt"
"os"
"path/filepath"
"sync"
"golang.org/x/sys/unix"
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
"github.com/opencontainers/runc/libcontainer/configs"
libcontainerUtils "github.com/opencontainers/runc/libcontainer/utils"
"github.com/pkg/errors"
"golang.org/x/sys/unix"
)
var (
subsystems = []subsystem{
&CpusetGroup{},
&DevicesGroup{},
&MemoryGroup{},
&CpuGroup{},
&CpuacctGroup{},
&PidsGroup{},
&BlkioGroup{},
&HugetlbGroup{},
&NetClsGroup{},
&NetPrioGroup{},
&PerfEventGroup{},
&FreezerGroup{},
&NameGroup{GroupName: "name=systemd", Join: true},
}
HugePageSizes, _ = cgroups.GetHugePageSize()
)
var subsystems = []subsystem{
&CpusetGroup{},
&DevicesGroup{},
&MemoryGroup{},
&CpuGroup{},
&CpuacctGroup{},
&PidsGroup{},
&BlkioGroup{},
&HugetlbGroup{},
&NetClsGroup{},
&NetPrioGroup{},
&PerfEventGroup{},
&FreezerGroup{},
&RdmaGroup{},
&NameGroup{GroupName: "name=systemd", Join: true},
}
var errSubsystemDoesNotExist = errors.New("cgroup: subsystem does not exist")
func init() {
// If using cgroups-hybrid mode then add a "" controller indicating
// it should join the cgroups v2.
if cgroups.IsCgroup2HybridMode() {
subsystems = append(subsystems, &NameGroup{GroupName: "", Join: true})
}
}
type subsystem interface {
// Name returns the name of the subsystem.
Name() string
// Returns the stats, as 'stats', corresponding to the cgroup under 'path'.
// GetStats fills in the stats for the subsystem.
GetStats(path string, stats *cgroups.Stats) error
// Creates and joins the cgroup represented by 'cgroupData'.
Apply(path string, c *cgroupData) error
// Apply creates and joins a cgroup, adding pid into it. Some
// subsystems use resources to pre-configure the cgroup parents
// before creating or joining it.
Apply(path string, r *configs.Resources, pid int) error
// Set sets the cgroup resources.
Set(path string, r *configs.Resources) error
}
type manager struct {
mu sync.Mutex
cgroups *configs.Cgroup
rootless bool // ignore permission-related errors
paths map[string]string
mu sync.Mutex
cgroups *configs.Cgroup
paths map[string]string
}
func NewManager(cg *configs.Cgroup, paths map[string]string, rootless bool) cgroups.Manager {
func NewManager(cg *configs.Cgroup, paths map[string]string) (cgroups.Manager, error) {
// Some v1 controllers (cpu, cpuset, and devices) expect
// cgroups.Resources to not be nil in Apply.
if cg.Resources == nil {
return nil, errors.New("cgroup v1 manager needs configs.Resources to be set during manager creation")
}
if cg.Resources.Unified != nil {
return nil, cgroups.ErrV1NoUnified
}
if paths == nil {
var err error
paths, err = initPaths(cg)
if err != nil {
return nil, err
}
}
return &manager{
cgroups: cg,
paths: paths,
rootless: rootless,
}
}
// The absolute path to the root of the cgroup hierarchies.
var (
cgroupRootLock sync.Mutex
cgroupRoot string
)
const defaultCgroupRoot = "/sys/fs/cgroup"
func tryDefaultCgroupRoot() string {
var st, pst unix.Stat_t
// (1) it should be a directory...
err := unix.Lstat(defaultCgroupRoot, &st)
if err != nil || st.Mode&unix.S_IFDIR == 0 {
return ""
}
// (2) ... and a mount point ...
err = unix.Lstat(filepath.Dir(defaultCgroupRoot), &pst)
if err != nil {
return ""
}
if st.Dev == pst.Dev {
// parent dir has the same dev -- not a mount point
return ""
}
// (3) ... of 'tmpfs' fs type.
var fst unix.Statfs_t
err = unix.Statfs(defaultCgroupRoot, &fst)
if err != nil || fst.Type != unix.TMPFS_MAGIC {
return ""
}
// (4) it should have at least 1 entry ...
dir, err := os.Open(defaultCgroupRoot)
if err != nil {
return ""
}
names, err := dir.Readdirnames(1)
if err != nil {
return ""
}
if len(names) < 1 {
return ""
}
// ... which is a cgroup mount point.
err = unix.Statfs(filepath.Join(defaultCgroupRoot, names[0]), &fst)
if err != nil || fst.Type != unix.CGROUP_SUPER_MAGIC {
return ""
}
return defaultCgroupRoot
}
// Gets the cgroupRoot.
func getCgroupRoot() (string, error) {
cgroupRootLock.Lock()
defer cgroupRootLock.Unlock()
if cgroupRoot != "" {
return cgroupRoot, nil
}
// fast path
cgroupRoot = tryDefaultCgroupRoot()
if cgroupRoot != "" {
return cgroupRoot, nil
}
// slow path: parse mountinfo
mi, err := cgroups.GetCgroupMounts(false)
if err != nil {
return "", err
}
if len(mi) < 1 {
return "", errors.New("no cgroup mount found in mountinfo")
}
// Get the first cgroup mount (e.g. "/sys/fs/cgroup/memory"),
// use its parent directory.
root := filepath.Dir(mi[0].Mountpoint)
if _, err := os.Stat(root); err != nil {
return "", err
}
cgroupRoot = root
return cgroupRoot, nil
}
type cgroupData struct {
root string
innerPath string
config *configs.Cgroup
pid int
cgroups: cg,
paths: paths,
}, nil
}
// isIgnorableError returns whether err is a permission error (in the loose
@ -171,8 +92,6 @@ func isIgnorableError(rootless bool, err error) bool {
if !rootless {
return false
}
// TODO: rm errors.Cause once we switch to %w everywhere
err = errors.Cause(err)
// Is it an ordinary EPERM?
if errors.Is(err, os.ErrPermission) {
return true
@ -186,56 +105,30 @@ func isIgnorableError(rootless bool, err error) bool {
}
func (m *manager) Apply(pid int) (err error) {
if m.cgroups == nil {
return nil
}
m.mu.Lock()
defer m.mu.Unlock()
c := m.cgroups
if c.Resources.Unified != nil {
return cgroups.ErrV1NoUnified
}
m.paths = make(map[string]string)
if c.Paths != nil {
cgMap, err := cgroups.ParseCgroupFile("/proc/self/cgroup")
if err != nil {
return err
}
for name, path := range c.Paths {
// XXX(kolyshkin@): why this check is needed?
if _, ok := cgMap[name]; ok {
m.paths[name] = path
}
}
return cgroups.EnterPid(m.paths, pid)
}
d, err := getCgroupData(m.cgroups, pid)
if err != nil {
return err
}
for _, sys := range subsystems {
p, err := d.path(sys.Name())
if err != nil {
// The non-presence of the devices subsystem is
// considered fatal for security reasons.
if cgroups.IsNotFound(err) && (c.SkipDevices || sys.Name() != "devices") {
continue
}
return err
name := sys.Name()
p, ok := m.paths[name]
if !ok {
continue
}
m.paths[sys.Name()] = p
if err := sys.Apply(p, d); err != nil {
if err := sys.Apply(p, c.Resources, pid); err != nil {
// In the case of rootless (including euid=0 in userns), where an
// explicit cgroup path hasn't been set, we don't bail on error in
// case of permission problems. Cases where limits have been set
// (and we couldn't create our own cgroup) are handled by Set.
if isIgnorableError(m.rootless, err) && m.cgroups.Path == "" {
delete(m.paths, sys.Name())
// case of permission problems here, but do delete the path from
// the m.paths map, since it is either non-existent and could not
// be created, or the pid could not be added to it.
//
// Cases where limits for the subsystem have been set are handled
// later by Set, which fails with a friendly error (see
// if path == "" in Set).
if isIgnorableError(c.Rootless, err) && c.Path == "" {
delete(m.paths, name)
continue
}
return err
@ -246,9 +139,6 @@ func (m *manager) Apply(pid int) (err error) {
}
func (m *manager) Destroy() error {
if m.cgroups == nil || m.cgroups.Paths != nil {
return nil
}
m.mu.Lock()
defer m.mu.Unlock()
return cgroups.RemovePaths(m.paths)
@ -281,11 +171,6 @@ func (m *manager) Set(r *configs.Resources) error {
return nil
}
// If Paths are set, then we are just joining cgroups paths
// and there is no need to set any values.
if m.cgroups != nil && m.cgroups.Paths != nil {
return nil
}
if r.Unified != nil {
return cgroups.ErrV1NoUnified
}
@ -295,10 +180,11 @@ func (m *manager) Set(r *configs.Resources) error {
for _, sys := range subsystems {
path := m.paths[sys.Name()]
if err := sys.Set(path, r); err != nil {
if m.rootless && sys.Name() == "devices" {
// When rootless is true, errors from the device subsystem
// are ignored, as it is really not expected to work.
if m.cgroups.Rootless && sys.Name() == "devices" {
continue
}
// When m.rootless is true, errors from the device subsystem are ignored because it is really not expected to work.
// However, errors from other subsystems are not ignored.
// see @test "runc create (rootless + limits + no cgrouppath + no permission) fails with informative error"
if path == "" {
@ -317,7 +203,7 @@ func (m *manager) Set(r *configs.Resources) error {
// provided
func (m *manager) Freeze(state configs.FreezerState) error {
path := m.Path("freezer")
if m.cgroups == nil || path == "" {
if path == "" {
return errors.New("cannot toggle freezer: cgroups not configured for container")
}
@ -339,68 +225,6 @@ func (m *manager) GetAllPids() ([]int, error) {
return cgroups.GetAllPids(m.Path("devices"))
}
func getCgroupData(c *configs.Cgroup, pid int) (*cgroupData, error) {
root, err := getCgroupRoot()
if err != nil {
return nil, err
}
if (c.Name != "" || c.Parent != "") && c.Path != "" {
return nil, errors.New("cgroup: either Path or Name and Parent should be used")
}
// XXX: Do not remove this code. Path safety is important! -- cyphar
cgPath := libcontainerUtils.CleanPath(c.Path)
cgParent := libcontainerUtils.CleanPath(c.Parent)
cgName := libcontainerUtils.CleanPath(c.Name)
innerPath := cgPath
if innerPath == "" {
innerPath = filepath.Join(cgParent, cgName)
}
return &cgroupData{
root: root,
innerPath: innerPath,
config: c,
pid: pid,
}, nil
}
func (raw *cgroupData) path(subsystem string) (string, error) {
// If the cgroup name/path is absolute do not look relative to the cgroup of the init process.
if filepath.IsAbs(raw.innerPath) {
mnt, err := cgroups.FindCgroupMountpoint(raw.root, subsystem)
// If we didn't mount the subsystem, there is no point we make the path.
if err != nil {
return "", err
}
// Sometimes subsystems can be mounted together as 'cpu,cpuacct'.
return filepath.Join(raw.root, filepath.Base(mnt), raw.innerPath), nil
}
// Use GetOwnCgroupPath instead of GetInitCgroupPath, because the creating
// process could in container and shared pid namespace with host, and
// /proc/1/cgroup could point to whole other world of cgroups.
parentPath, err := cgroups.GetOwnCgroupPath(subsystem)
if err != nil {
return "", err
}
return filepath.Join(parentPath, raw.innerPath), nil
}
func join(path string, pid int) error {
if path == "" {
return nil
}
if err := os.MkdirAll(path, 0o755); err != nil {
return err
}
return cgroups.WriteCgroupProc(path, pid)
}
func (m *manager) GetPaths() map[string]string {
m.mu.Lock()
defer m.mu.Unlock()
@ -432,7 +256,7 @@ func OOMKillCount(path string) (uint64, error) {
func (m *manager) OOMKillCount() (uint64, error) {
c, err := OOMKillCount(m.Path("memory"))
// Ignore ENOENT when rootless as it couldn't create cgroup.
if err != nil && m.rootless && os.IsNotExist(err) {
if err != nil && m.cgroups.Rootless && os.IsNotExist(err) {
err = nil
}

View File

@ -1,9 +1,6 @@
// +build linux
package fs
import (
"fmt"
"strconv"
"github.com/opencontainers/runc/libcontainer/cgroups"
@ -17,8 +14,8 @@ func (s *HugetlbGroup) Name() string {
return "hugetlb"
}
func (s *HugetlbGroup) Apply(path string, d *cgroupData) error {
return join(path, d.pid)
func (s *HugetlbGroup) Apply(path string, _ *configs.Resources, pid int) error {
return apply(path, pid)
}
func (s *HugetlbGroup) Set(path string, r *configs.Resources) error {
@ -32,29 +29,29 @@ func (s *HugetlbGroup) Set(path string, r *configs.Resources) error {
}
func (s *HugetlbGroup) GetStats(path string, stats *cgroups.Stats) error {
hugetlbStats := cgroups.HugetlbStats{}
if !cgroups.PathExists(path) {
return nil
}
for _, pageSize := range HugePageSizes {
hugetlbStats := cgroups.HugetlbStats{}
for _, pageSize := range cgroups.HugePageSizes() {
usage := "hugetlb." + pageSize + ".usage_in_bytes"
value, err := fscommon.GetCgroupParamUint(path, usage)
if err != nil {
return fmt.Errorf("failed to parse %s - %v", usage, err)
return err
}
hugetlbStats.Usage = value
maxUsage := "hugetlb." + pageSize + ".max_usage_in_bytes"
value, err = fscommon.GetCgroupParamUint(path, maxUsage)
if err != nil {
return fmt.Errorf("failed to parse %s - %v", maxUsage, err)
return err
}
hugetlbStats.MaxUsage = value
failcnt := "hugetlb." + pageSize + ".failcnt"
value, err = fscommon.GetCgroupParamUint(path, failcnt)
if err != nil {
return fmt.Errorf("failed to parse %s - %v", failcnt, err)
return err
}
hugetlbStats.Failcnt = value

View File

@ -1,9 +1,8 @@
// +build linux
package fs
import (
"bufio"
"errors"
"fmt"
"math"
"os"
@ -11,11 +10,11 @@ import (
"strconv"
"strings"
"golang.org/x/sys/unix"
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/pkg/errors"
"golang.org/x/sys/unix"
)
const (
@ -31,8 +30,8 @@ func (s *MemoryGroup) Name() string {
return "memory"
}
func (s *MemoryGroup) Apply(path string, d *cgroupData) (err error) {
return join(path, d.pid)
func (s *MemoryGroup) Apply(path string, _ *configs.Resources, pid int) error {
return apply(path, pid)
}
func setMemory(path string, val int64) error {
@ -56,7 +55,7 @@ func setMemory(path string, val int64) error {
return err
}
return errors.Errorf("unable to set memory limit to %d (current usage: %d, peak usage: %d)", val, usage, max)
return fmt.Errorf("unable to set memory limit to %d (current usage: %d, peak usage: %d)", val, usage, max)
}
func setSwap(path string, val int64) error {
@ -134,15 +133,15 @@ func (s *MemoryGroup) Set(path string, r *configs.Resources) error {
return err
}
} else {
return fmt.Errorf("invalid value:%d. valid memory swappiness range is 0-100", *r.MemorySwappiness)
return fmt.Errorf("invalid memory swappiness value: %d (valid range is 0-100)", *r.MemorySwappiness)
}
return nil
}
func (s *MemoryGroup) GetStats(path string, stats *cgroups.Stats) error {
// Set stats from memory.stat.
statsFile, err := cgroups.OpenFile(path, "memory.stat", os.O_RDONLY)
const file = "memory.stat"
statsFile, err := cgroups.OpenFile(path, file, os.O_RDONLY)
if err != nil {
if os.IsNotExist(err) {
return nil
@ -155,7 +154,7 @@ func (s *MemoryGroup) GetStats(path string, stats *cgroups.Stats) error {
for sc.Scan() {
t, v, err := fscommon.ParseKeyValue(sc.Text())
if err != nil {
return fmt.Errorf("failed to parse memory.stat (%q) - %v", sc.Text(), err)
return &parseError{Path: path, File: file, Err: err}
}
stats.MemoryStats.Stats[t] = v
}
@ -220,42 +219,42 @@ func getMemoryData(path, name string) (cgroups.MemoryData, error) {
// are optional in the kernel.
return cgroups.MemoryData{}, nil
}
return cgroups.MemoryData{}, fmt.Errorf("failed to parse %s - %v", usage, err)
return cgroups.MemoryData{}, err
}
memoryData.Usage = value
value, err = fscommon.GetCgroupParamUint(path, maxUsage)
if err != nil {
return cgroups.MemoryData{}, fmt.Errorf("failed to parse %s - %v", maxUsage, err)
return cgroups.MemoryData{}, err
}
memoryData.MaxUsage = value
value, err = fscommon.GetCgroupParamUint(path, failcnt)
if err != nil {
return cgroups.MemoryData{}, fmt.Errorf("failed to parse %s - %v", failcnt, err)
return cgroups.MemoryData{}, err
}
memoryData.Failcnt = value
value, err = fscommon.GetCgroupParamUint(path, limit)
if err != nil {
return cgroups.MemoryData{}, fmt.Errorf("failed to parse %s - %v", limit, err)
return cgroups.MemoryData{}, err
}
memoryData.Limit = value
return memoryData, nil
}
func getPageUsageByNUMA(cgroupPath string) (cgroups.PageUsageByNUMA, error) {
func getPageUsageByNUMA(path string) (cgroups.PageUsageByNUMA, error) {
const (
maxColumns = math.MaxUint8 + 1
filename = "memory.numa_stat"
file = "memory.numa_stat"
)
stats := cgroups.PageUsageByNUMA{}
file, err := cgroups.OpenFile(cgroupPath, filename, os.O_RDONLY)
fd, err := cgroups.OpenFile(path, file, os.O_RDONLY)
if os.IsNotExist(err) {
return stats, nil
} else if err != nil {
return stats, err
}
defer file.Close()
defer fd.Close()
// File format is documented in linux/Documentation/cgroup-v1/memory.txt
// and it looks like this:
@ -266,7 +265,7 @@ func getPageUsageByNUMA(cgroupPath string) (cgroups.PageUsageByNUMA, error) {
// unevictable=<total anon pages> N0=<node 0 pages> N1=<node 1 pages> ...
// hierarchical_<counter>=<counter pages> N0=<node 0 pages> N1=<node 1 pages> ...
scanner := bufio.NewScanner(file)
scanner := bufio.NewScanner(fd)
for scanner.Scan() {
var field *cgroups.PageStats
@ -284,8 +283,7 @@ func getPageUsageByNUMA(cgroupPath string) (cgroups.PageUsageByNUMA, error) {
} else {
// The first column was already validated,
// so be strict to the rest.
return stats, fmt.Errorf("malformed line %q in %s",
line, filename)
return stats, malformedLine(path, file, line)
}
}
key, val := byNode[0], byNode[1]
@ -296,24 +294,23 @@ func getPageUsageByNUMA(cgroupPath string) (cgroups.PageUsageByNUMA, error) {
}
field.Total, err = strconv.ParseUint(val, 0, 64)
if err != nil {
return stats, err
return stats, &parseError{Path: path, File: file, Err: err}
}
field.Nodes = map[uint8]uint64{}
} else { // Subsequent columns: key is N<id>, val is usage.
if len(key) < 2 || key[0] != 'N' {
// This is definitely an error.
return stats, fmt.Errorf("malformed line %q in %s",
line, filename)
return stats, malformedLine(path, file, line)
}
n, err := strconv.ParseUint(key[1:], 10, 8)
if err != nil {
return cgroups.PageUsageByNUMA{}, err
return stats, &parseError{Path: path, File: file, Err: err}
}
usage, err := strconv.ParseUint(val, 10, 64)
if err != nil {
return cgroups.PageUsageByNUMA{}, err
return stats, &parseError{Path: path, File: file, Err: err}
}
field.Nodes[uint8(n)] = usage
@ -321,9 +318,8 @@ func getPageUsageByNUMA(cgroupPath string) (cgroups.PageUsageByNUMA, error) {
}
}
err = scanner.Err()
if err != nil {
return cgroups.PageUsageByNUMA{}, err
if err := scanner.Err(); err != nil {
return cgroups.PageUsageByNUMA{}, &parseError{Path: path, File: file, Err: err}
}
return stats, nil

View File

@ -1,5 +1,3 @@
// +build linux
package fs
import (
@ -16,10 +14,10 @@ func (s *NameGroup) Name() string {
return s.GroupName
}
func (s *NameGroup) Apply(path string, d *cgroupData) error {
func (s *NameGroup) Apply(path string, _ *configs.Resources, pid int) error {
if s.Join {
// ignore errors if the named cgroup does not exist
_ = join(path, d.pid)
// Ignore errors if the named cgroup does not exist.
_ = apply(path, pid)
}
return nil
}

View File

@ -1,5 +1,3 @@
// +build linux
package fs
import (
@ -15,8 +13,8 @@ func (s *NetClsGroup) Name() string {
return "net_cls"
}
func (s *NetClsGroup) Apply(path string, d *cgroupData) error {
return join(path, d.pid)
func (s *NetClsGroup) Apply(path string, _ *configs.Resources, pid int) error {
return apply(path, pid)
}
func (s *NetClsGroup) Set(path string, r *configs.Resources) error {

View File

@ -1,5 +1,3 @@
// +build linux
package fs
import (
@ -13,8 +11,8 @@ func (s *NetPrioGroup) Name() string {
return "net_prio"
}
func (s *NetPrioGroup) Apply(path string, d *cgroupData) error {
return join(path, d.pid)
func (s *NetPrioGroup) Apply(path string, _ *configs.Resources, pid int) error {
return apply(path, pid)
}
func (s *NetPrioGroup) Set(path string, r *configs.Resources) error {

View File

@ -0,0 +1,186 @@
package fs
import (
"errors"
"os"
"path/filepath"
"sync"
"golang.org/x/sys/unix"
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/utils"
)
// The absolute path to the root of the cgroup hierarchies.
var (
cgroupRootLock sync.Mutex
cgroupRoot string
)
const defaultCgroupRoot = "/sys/fs/cgroup"
func initPaths(cg *configs.Cgroup) (map[string]string, error) {
root, err := rootPath()
if err != nil {
return nil, err
}
inner, err := innerPath(cg)
if err != nil {
return nil, err
}
paths := make(map[string]string)
for _, sys := range subsystems {
name := sys.Name()
path, err := subsysPath(root, inner, name)
if err != nil {
// The non-presence of the devices subsystem
// is considered fatal for security reasons.
if cgroups.IsNotFound(err) && (cg.SkipDevices || name != "devices") {
continue
}
return nil, err
}
paths[name] = path
}
return paths, nil
}
func tryDefaultCgroupRoot() string {
var st, pst unix.Stat_t
// (1) it should be a directory...
err := unix.Lstat(defaultCgroupRoot, &st)
if err != nil || st.Mode&unix.S_IFDIR == 0 {
return ""
}
// (2) ... and a mount point ...
err = unix.Lstat(filepath.Dir(defaultCgroupRoot), &pst)
if err != nil {
return ""
}
if st.Dev == pst.Dev {
// parent dir has the same dev -- not a mount point
return ""
}
// (3) ... of 'tmpfs' fs type.
var fst unix.Statfs_t
err = unix.Statfs(defaultCgroupRoot, &fst)
if err != nil || fst.Type != unix.TMPFS_MAGIC {
return ""
}
// (4) it should have at least 1 entry ...
dir, err := os.Open(defaultCgroupRoot)
if err != nil {
return ""
}
names, err := dir.Readdirnames(1)
if err != nil {
return ""
}
if len(names) < 1 {
return ""
}
// ... which is a cgroup mount point.
err = unix.Statfs(filepath.Join(defaultCgroupRoot, names[0]), &fst)
if err != nil || fst.Type != unix.CGROUP_SUPER_MAGIC {
return ""
}
return defaultCgroupRoot
}
// rootPath finds and returns path to the root of the cgroup hierarchies.
func rootPath() (string, error) {
cgroupRootLock.Lock()
defer cgroupRootLock.Unlock()
if cgroupRoot != "" {
return cgroupRoot, nil
}
// fast path
cgroupRoot = tryDefaultCgroupRoot()
if cgroupRoot != "" {
return cgroupRoot, nil
}
// slow path: parse mountinfo
mi, err := cgroups.GetCgroupMounts(false)
if err != nil {
return "", err
}
if len(mi) < 1 {
return "", errors.New("no cgroup mount found in mountinfo")
}
// Get the first cgroup mount (e.g. "/sys/fs/cgroup/memory"),
// use its parent directory.
root := filepath.Dir(mi[0].Mountpoint)
if _, err := os.Stat(root); err != nil {
return "", err
}
cgroupRoot = root
return cgroupRoot, nil
}
func innerPath(c *configs.Cgroup) (string, error) {
if (c.Name != "" || c.Parent != "") && c.Path != "" {
return "", errors.New("cgroup: either Path or Name and Parent should be used")
}
// XXX: Do not remove CleanPath. Path safety is important! -- cyphar
innerPath := utils.CleanPath(c.Path)
if innerPath == "" {
cgParent := utils.CleanPath(c.Parent)
cgName := utils.CleanPath(c.Name)
innerPath = filepath.Join(cgParent, cgName)
}
return innerPath, nil
}
func subsysPath(root, inner, subsystem string) (string, error) {
// If the cgroup name/path is absolute do not look relative to the cgroup of the init process.
if filepath.IsAbs(inner) {
mnt, err := cgroups.FindCgroupMountpoint(root, subsystem)
// If we didn't mount the subsystem, there is no point we make the path.
if err != nil {
return "", err
}
// Sometimes subsystems can be mounted together as 'cpu,cpuacct'.
return filepath.Join(root, filepath.Base(mnt), inner), nil
}
// Use GetOwnCgroupPath instead of GetInitCgroupPath, because the creating
// process could in container and shared pid namespace with host, and
// /proc/1/cgroup could point to whole other world of cgroups.
parentPath, err := cgroups.GetOwnCgroupPath(subsystem)
if err != nil {
return "", err
}
return filepath.Join(parentPath, inner), nil
}
func apply(path string, pid int) error {
if path == "" {
return nil
}
if err := os.MkdirAll(path, 0o755); err != nil {
return err
}
return cgroups.WriteCgroupProc(path, pid)
}

View File

@ -1,5 +1,3 @@
// +build linux
package fs
import (
@ -13,8 +11,8 @@ func (s *PerfEventGroup) Name() string {
return "perf_event"
}
func (s *PerfEventGroup) Apply(path string, d *cgroupData) error {
return join(path, d.pid)
func (s *PerfEventGroup) Apply(path string, _ *configs.Resources, pid int) error {
return apply(path, pid)
}
func (s *PerfEventGroup) Set(_ string, _ *configs.Resources) error {

View File

@ -1,10 +1,7 @@
// +build linux
package fs
import (
"fmt"
"path/filepath"
"math"
"strconv"
"github.com/opencontainers/runc/libcontainer/cgroups"
@ -18,8 +15,8 @@ func (s *PidsGroup) Name() string {
return "pids"
}
func (s *PidsGroup) Apply(path string, d *cgroupData) error {
return join(path, d.pid)
func (s *PidsGroup) Apply(path string, _ *configs.Resources, pid int) error {
return apply(path, pid)
}
func (s *PidsGroup) Set(path string, r *configs.Resources) error {
@ -45,21 +42,18 @@ func (s *PidsGroup) GetStats(path string, stats *cgroups.Stats) error {
}
current, err := fscommon.GetCgroupParamUint(path, "pids.current")
if err != nil {
return fmt.Errorf("failed to parse pids.current - %s", err)
return err
}
maxString, err := fscommon.GetCgroupParamString(path, "pids.max")
max, err := fscommon.GetCgroupParamUint(path, "pids.max")
if err != nil {
return fmt.Errorf("failed to parse pids.max - %s", err)
return err
}
// Default if pids.max == "max" is 0 -- which represents "no limit".
var max uint64
if maxString != "max" {
max, err = fscommon.ParseUint(maxString, 10, 64)
if err != nil {
return fmt.Errorf("failed to parse pids.max - unable to parse %q as a uint from Cgroup file %q", maxString, filepath.Join(path, "pids.max"))
}
// If no limit is set, read from pids.max returns "max", which is
// converted to MaxUint64 by GetCgroupParamUint. Historically, we
// represent "no limit" for pids as 0, thus this conversion.
if max == math.MaxUint64 {
max = 0
}
stats.PidsStats.Current = current

View File

@ -0,0 +1,25 @@
package fs
import (
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
"github.com/opencontainers/runc/libcontainer/configs"
)
type RdmaGroup struct{}
func (s *RdmaGroup) Name() string {
return "rdma"
}
func (s *RdmaGroup) Apply(path string, _ *configs.Resources, pid int) error {
return apply(path, pid)
}
func (s *RdmaGroup) Set(path string, r *configs.Resources) error {
return fscommon.RdmaSet(path, r)
}
func (s *RdmaGroup) GetStats(path string, stats *cgroups.Stats) error {
return fscommon.RdmaGetStats(path, stats)
}

View File

@ -1,3 +0,0 @@
// +build !linux
package fs

View File

@ -1,5 +1,3 @@
// +build linux
package fs2
import (
@ -49,7 +47,8 @@ func setCpu(dirPath string, r *configs.Resources) error {
}
func statCpu(dirPath string, stats *cgroups.Stats) error {
f, err := cgroups.OpenFile(dirPath, "cpu.stat", os.O_RDONLY)
const file = "cpu.stat"
f, err := cgroups.OpenFile(dirPath, file, os.O_RDONLY)
if err != nil {
return err
}
@ -59,7 +58,7 @@ func statCpu(dirPath string, stats *cgroups.Stats) error {
for sc.Scan() {
t, v, err := fscommon.ParseKeyValue(sc.Text())
if err != nil {
return err
return &parseError{Path: dirPath, File: file, Err: err}
}
switch t {
case "usage_usec":
@ -81,5 +80,8 @@ func statCpu(dirPath string, stats *cgroups.Stats) error {
stats.CpuStats.ThrottlingData.ThrottledTime = v * 1000
}
}
if err := sc.Err(); err != nil {
return &parseError{Path: dirPath, File: file, Err: err}
}
return nil
}

View File

@ -1,5 +1,3 @@
// +build linux
package fs2
import (

View File

@ -18,41 +18,37 @@ package fs2
import (
"bufio"
"errors"
"fmt"
"io"
"os"
"path/filepath"
"strings"
"github.com/opencontainers/runc/libcontainer/configs"
libcontainerUtils "github.com/opencontainers/runc/libcontainer/utils"
"github.com/pkg/errors"
"github.com/opencontainers/runc/libcontainer/utils"
)
const UnifiedMountpoint = "/sys/fs/cgroup"
func defaultDirPath(c *configs.Cgroup) (string, error) {
if (c.Name != "" || c.Parent != "") && c.Path != "" {
return "", errors.Errorf("cgroup: either Path or Name and Parent should be used, got %+v", c)
}
if len(c.Paths) != 0 {
// never set by specconv
return "", errors.Errorf("cgroup: Paths is unsupported, use Path, got %+v", c)
return "", fmt.Errorf("cgroup: either Path or Name and Parent should be used, got %+v", c)
}
// XXX: Do not remove this code. Path safety is important! -- cyphar
cgPath := libcontainerUtils.CleanPath(c.Path)
cgParent := libcontainerUtils.CleanPath(c.Parent)
cgName := libcontainerUtils.CleanPath(c.Name)
return _defaultDirPath(UnifiedMountpoint, cgPath, cgParent, cgName)
return _defaultDirPath(UnifiedMountpoint, c.Path, c.Parent, c.Name)
}
func _defaultDirPath(root, cgPath, cgParent, cgName string) (string, error) {
if (cgName != "" || cgParent != "") && cgPath != "" {
return "", errors.New("cgroup: either Path or Name and Parent should be used")
}
innerPath := cgPath
// XXX: Do not remove CleanPath. Path safety is important! -- cyphar
innerPath := utils.CleanPath(cgPath)
if innerPath == "" {
cgParent := utils.CleanPath(cgParent)
cgName := utils.CleanPath(cgName)
innerPath = filepath.Join(cgParent, cgName)
}
if filepath.IsAbs(innerPath) {
@ -89,7 +85,7 @@ func parseCgroupFromReader(r io.Reader) (string, error) {
parts = strings.SplitN(text, ":", 3)
)
if len(parts) < 3 {
return "", errors.Errorf("invalid cgroup entry: %q", text)
return "", fmt.Errorf("invalid cgroup entry: %q", text)
}
// text is like "0::/user.slice/user-1001.slice/session-1.scope"
if parts[0] == "0" && parts[1] == "" {

View File

@ -1,16 +1,15 @@
// +build linux
package fs2
import (
"fmt"
"golang.org/x/sys/unix"
"github.com/opencontainers/runc/libcontainer/cgroups/ebpf"
"github.com/opencontainers/runc/libcontainer/cgroups/ebpf/devicefilter"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/devices"
"github.com/opencontainers/runc/libcontainer/userns"
"github.com/pkg/errors"
"golang.org/x/sys/unix"
)
func isRWM(perms devices.Permissions) bool {
@ -64,7 +63,7 @@ func setDevices(dirPath string, r *configs.Resources) error {
}
dirFD, err := unix.Open(dirPath, unix.O_DIRECTORY|unix.O_RDONLY, 0o600)
if err != nil {
return errors.Errorf("cannot get dir FD for %s", dirPath)
return fmt.Errorf("cannot get dir FD for %s", dirPath)
}
defer unix.Close(dirFD)
if _, err := ebpf.LoadAttachCgroupDeviceFilter(insts, license, dirFD); err != nil {

View File

@ -1,19 +1,17 @@
// +build linux
package fs2
import (
"bufio"
stdErrors "errors"
"errors"
"fmt"
"os"
"strings"
"time"
"golang.org/x/sys/unix"
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/pkg/errors"
"golang.org/x/sys/unix"
)
func setFreezer(dirPath string, state configs.FreezerState) error {
@ -26,7 +24,7 @@ func setFreezer(dirPath string, state configs.FreezerState) error {
case configs.Thawed:
stateStr = "0"
default:
return errors.Errorf("invalid freezer state %q requested", state)
return fmt.Errorf("invalid freezer state %q requested", state)
}
fd, err := cgroups.OpenFile(dirPath, "cgroup.freeze", unix.O_RDWR)
@ -37,7 +35,7 @@ func setFreezer(dirPath string, state configs.FreezerState) error {
if state != configs.Frozen {
return nil
}
return errors.Wrap(err, "freezer not supported")
return fmt.Errorf("freezer not supported: %w", err)
}
defer fd.Close()
@ -48,7 +46,7 @@ func setFreezer(dirPath string, state configs.FreezerState) error {
if actualState, err := readFreezer(dirPath, fd); err != nil {
return err
} else if actualState != state {
return errors.Errorf(`expected "cgroup.freeze" to be in state %q but was in %q`, state, actualState)
return fmt.Errorf(`expected "cgroup.freeze" to be in state %q but was in %q`, state, actualState)
}
return nil
}
@ -58,7 +56,7 @@ func getFreezer(dirPath string) (configs.FreezerState, error) {
if err != nil {
// If the kernel is too old, then we just treat the freezer as being in
// an "undefined" state.
if os.IsNotExist(err) || stdErrors.Is(err, unix.ENODEV) {
if os.IsNotExist(err) || errors.Is(err, unix.ENODEV) {
err = nil
}
return configs.Undefined, err
@ -82,7 +80,7 @@ func readFreezer(dirPath string, fd *os.File) (configs.FreezerState, error) {
case "1\n":
return waitFrozen(dirPath)
default:
return configs.Undefined, errors.Errorf(`unknown "cgroup.freeze" state: %q`, state)
return configs.Undefined, fmt.Errorf(`unknown "cgroup.freeze" state: %q`, state)
}
}

View File

@ -1,8 +1,7 @@
// +build linux
package fs2
import (
"errors"
"fmt"
"os"
"strings"
@ -10,9 +9,10 @@ import (
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/pkg/errors"
)
type parseError = fscommon.ParseError
type manager struct {
config *configs.Cgroup
// dirPath is like "/sys/fs/cgroup/user.slice/user-1001.slice/session-1.scope"
@ -20,16 +20,12 @@ type manager struct {
// controllers is content of "cgroup.controllers" file.
// excludes pseudo-controllers ("devices" and "freezer").
controllers map[string]struct{}
rootless bool
}
// NewManager creates a manager for cgroup v2 unified hierarchy.
// dirPath is like "/sys/fs/cgroup/user.slice/user-1001.slice/session-1.scope".
// If dirPath is empty, it is automatically set using config.
func NewManager(config *configs.Cgroup, dirPath string, rootless bool) (cgroups.Manager, error) {
if config == nil {
config = &configs.Cgroup{}
}
func NewManager(config *configs.Cgroup, dirPath string) (cgroups.Manager, error) {
if dirPath == "" {
var err error
dirPath, err = defaultDirPath(config)
@ -39,9 +35,8 @@ func NewManager(config *configs.Cgroup, dirPath string, rootless bool) (cgroups.
}
m := &manager{
config: config,
dirPath: dirPath,
rootless: rootless,
config: config,
dirPath: dirPath,
}
return m, nil
}
@ -53,7 +48,7 @@ func (m *manager) getControllers() error {
data, err := cgroups.ReadFile(m.dirPath, "cgroup.controllers")
if err != nil {
if m.rootless && m.config.Path == "" {
if m.config.Rootless && m.config.Path == "" {
return nil
}
return err
@ -73,12 +68,12 @@ func (m *manager) Apply(pid int) error {
// - "runc create (no limits + no cgrouppath + no permission) succeeds"
// - "runc create (rootless + no limits + cgrouppath + no permission) fails with permission error"
// - "runc create (rootless + limits + no cgrouppath + no permission) fails with informative error"
if m.rootless {
if m.config.Rootless {
if m.config.Path == "" {
if blNeed, nErr := needAnyControllers(m.config.Resources); nErr == nil && !blNeed {
return nil
}
return errors.Wrap(err, "rootless needs no limits + no cgrouppath when no permission is granted for cgroups")
return fmt.Errorf("rootless needs no limits + no cgrouppath when no permission is granted for cgroups: %w", err)
}
}
return err
@ -123,13 +118,20 @@ func (m *manager) GetStats() (*cgroups.Stats, error) {
if err := statHugeTlb(m.dirPath, st); err != nil && !os.IsNotExist(err) {
errs = append(errs, err)
}
if len(errs) > 0 && !m.rootless {
return st, errors.Errorf("error while statting cgroup v2: %+v", errs)
// rdma (since kernel 4.11)
if err := fscommon.RdmaGetStats(m.dirPath, st); err != nil && !os.IsNotExist(err) {
errs = append(errs, err)
}
if len(errs) > 0 && !m.config.Rootless {
return st, fmt.Errorf("error while statting cgroup v2: %+v", errs)
}
return st, nil
}
func (m *manager) Freeze(state configs.FreezerState) error {
if m.config.Resources == nil {
return errors.New("cannot toggle freezer: cgroups not configured for container")
}
if err := setFreezer(m.dirPath, state); err != nil {
return err
}
@ -146,6 +148,9 @@ func (m *manager) Path(_ string) string {
}
func (m *manager) Set(r *configs.Resources) error {
if r == nil {
return nil
}
if err := m.getControllers(); err != nil {
return err
}
@ -167,10 +172,10 @@ func (m *manager) Set(r *configs.Resources) error {
}
// devices (since kernel 4.15, pseudo-controller)
//
// When m.rootless is true, errors from the device subsystem are ignored because it is really not expected to work.
// When rootless is true, errors from the device subsystem are ignored because it is really not expected to work.
// However, errors from other subsystems are not ignored.
// see @test "runc create (rootless + limits + no cgrouppath + no permission) fails with informative error"
if err := setDevices(m.dirPath, r); err != nil && !m.rootless {
if err := setDevices(m.dirPath, r); err != nil && !m.config.Rootless {
return err
}
// cpuset (since kernel 5.0)
@ -181,6 +186,10 @@ func (m *manager) Set(r *configs.Resources) error {
if err := setHugeTlb(m.dirPath, r); err != nil {
return err
}
// rdma (since kernel 4.11)
if err := fscommon.RdmaSet(m.dirPath, r); err != nil {
return err
}
// freezer (since kernel 5.2, pseudo-controller)
if err := setFreezer(m.dirPath, r.Freezer); err != nil {
return err
@ -198,9 +207,8 @@ func (m *manager) setUnified(res map[string]string) error {
return fmt.Errorf("unified resource %q must be a file name (no slashes)", k)
}
if err := cgroups.WriteFile(m.dirPath, k, v); err != nil {
errC := errors.Cause(err)
// Check for both EPERM and ENOENT since O_CREAT is used by WriteFile.
if errors.Is(errC, os.ErrPermission) || errors.Is(errC, os.ErrNotExist) {
if errors.Is(err, os.ErrPermission) || errors.Is(err, os.ErrNotExist) {
// Check if a controller is available,
// to give more specific error if not.
sk := strings.SplitN(k, ".", 2)
@ -212,7 +220,7 @@ func (m *manager) setUnified(res map[string]string) error {
return fmt.Errorf("unified resource %q can't be set: controller %q not available", k, c)
}
}
return errors.Wrapf(err, "can't set unified resource %q", k)
return fmt.Errorf("unable to set unified resource %q: %w", k, err)
}
}
@ -243,7 +251,7 @@ func OOMKillCount(path string) (uint64, error) {
func (m *manager) OOMKillCount() (uint64, error) {
c, err := OOMKillCount(m.dirPath)
if err != nil && m.rootless && os.IsNotExist(err) {
if err != nil && m.config.Rootless && os.IsNotExist(err) {
err = nil
}

View File

@ -1,12 +1,8 @@
// +build linux
package fs2
import (
"strconv"
"github.com/pkg/errors"
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
"github.com/opencontainers/runc/libcontainer/configs"
@ -30,10 +26,8 @@ func setHugeTlb(dirPath string, r *configs.Resources) error {
}
func statHugeTlb(dirPath string, stats *cgroups.Stats) error {
hugePageSizes, _ := cgroups.GetHugePageSize()
hugetlbStats := cgroups.HugetlbStats{}
for _, pagesize := range hugePageSizes {
for _, pagesize := range cgroups.HugePageSizes() {
value, err := fscommon.GetCgroupParamUint(dirPath, "hugetlb."+pagesize+".current")
if err != nil {
return err
@ -43,7 +37,7 @@ func statHugeTlb(dirPath string, stats *cgroups.Stats) error {
fileName := "hugetlb." + pagesize + ".events"
value, err = fscommon.GetValueByKey(dirPath, fileName, "max")
if err != nil {
return errors.Wrap(err, "failed to read stats")
return err
}
hugetlbStats.Failcnt = value

View File

@ -1,5 +1,3 @@
// +build linux
package fs2
import (
@ -117,13 +115,14 @@ func readCgroup2MapFile(dirPath string, name string) (map[string][]string, error
ret[parts[0]] = parts[1:]
}
if err := scanner.Err(); err != nil {
return nil, err
return nil, &parseError{Path: dirPath, File: name, Err: err}
}
return ret, nil
}
func statIo(dirPath string, stats *cgroups.Stats) error {
values, err := readCgroup2MapFile(dirPath, "io.stat")
const file = "io.stat"
values, err := readCgroup2MapFile(dirPath, file)
if err != nil {
return err
}
@ -136,11 +135,11 @@ func statIo(dirPath string, stats *cgroups.Stats) error {
}
major, err := strconv.ParseUint(d[0], 10, 64)
if err != nil {
return err
return &parseError{Path: dirPath, File: file, Err: err}
}
minor, err := strconv.ParseUint(d[1], 10, 64)
if err != nil {
return err
return &parseError{Path: dirPath, File: file, Err: err}
}
for _, item := range v {
@ -177,7 +176,7 @@ func statIo(dirPath string, stats *cgroups.Stats) error {
value, err := strconv.ParseUint(d[1], 10, 64)
if err != nil {
return err
return &parseError{Path: dirPath, File: file, Err: err}
}
entry := cgroups.BlkioStatEntry{

View File

@ -1,19 +1,18 @@
// +build linux
package fs2
import (
"bufio"
"errors"
"math"
"os"
"strconv"
"strings"
"golang.org/x/sys/unix"
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/pkg/errors"
"golang.org/x/sys/unix"
)
// numToStr converts an int64 value to a string for writing to a
@ -75,8 +74,8 @@ func setMemory(dirPath string, r *configs.Resources) error {
}
func statMemory(dirPath string, stats *cgroups.Stats) error {
// Set stats from memory.stat.
statsFile, err := cgroups.OpenFile(dirPath, "memory.stat", os.O_RDONLY)
const file = "memory.stat"
statsFile, err := cgroups.OpenFile(dirPath, file, os.O_RDONLY)
if err != nil {
return err
}
@ -86,10 +85,13 @@ func statMemory(dirPath string, stats *cgroups.Stats) error {
for sc.Scan() {
t, v, err := fscommon.ParseKeyValue(sc.Text())
if err != nil {
return errors.Wrapf(err, "failed to parse memory.stat (%q)", sc.Text())
return &parseError{Path: dirPath, File: file, Err: err}
}
stats.MemoryStats.Stats[t] = v
}
if err := sc.Err(); err != nil {
return &parseError{Path: dirPath, File: file, Err: err}
}
stats.MemoryStats.Cache = stats.MemoryStats.Stats["file"]
// Unlike cgroup v1 which has memory.use_hierarchy binary knob,
// cgroup v2 is always hierarchical.
@ -139,13 +141,13 @@ func getMemoryDataV2(path, name string) (cgroups.MemoryData, error) {
// swapaccount=0 kernel boot parameter is given.
return cgroups.MemoryData{}, nil
}
return cgroups.MemoryData{}, errors.Wrapf(err, "failed to parse %s", usage)
return cgroups.MemoryData{}, err
}
memoryData.Usage = value
value, err = fscommon.GetCgroupParamUint(path, limit)
if err != nil {
return cgroups.MemoryData{}, errors.Wrapf(err, "failed to parse %s", limit)
return cgroups.MemoryData{}, err
}
memoryData.Limit = value
@ -153,7 +155,8 @@ func getMemoryDataV2(path, name string) (cgroups.MemoryData, error) {
}
func statsFromMeminfo(stats *cgroups.Stats) error {
f, err := os.Open("/proc/meminfo")
const file = "/proc/meminfo"
f, err := os.Open(file)
if err != nil {
return err
}
@ -190,7 +193,7 @@ func statsFromMeminfo(stats *cgroups.Stats) error {
vStr := strings.TrimSpace(strings.TrimSuffix(parts[1], " kB"))
*p, err = strconv.ParseUint(vStr, 10, 64)
if err != nil {
return errors.Wrap(err, "parsing /proc/meminfo "+k)
return &parseError{File: file, Err: errors.New("bad value for " + k)}
}
found++
@ -199,8 +202,8 @@ func statsFromMeminfo(stats *cgroups.Stats) error {
break
}
}
if sc.Err() != nil {
return sc.Err()
if err := sc.Err(); err != nil {
return &parseError{Path: "", File: file, Err: err}
}
stats.MemoryStats.SwapUsage.Usage = (swap_total - swap_free) * 1024

View File

@ -1,17 +1,16 @@
// +build linux
package fs2
import (
"errors"
"math"
"os"
"path/filepath"
"strings"
"golang.org/x/sys/unix"
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/pkg/errors"
"golang.org/x/sys/unix"
)
func isPidsSet(r *configs.Resources) bool {
@ -53,22 +52,18 @@ func statPids(dirPath string, stats *cgroups.Stats) error {
if os.IsNotExist(err) {
return statPidsFromCgroupProcs(dirPath, stats)
}
return errors.Wrap(err, "failed to parse pids.current")
return err
}
maxString, err := fscommon.GetCgroupParamString(dirPath, "pids.max")
max, err := fscommon.GetCgroupParamUint(dirPath, "pids.max")
if err != nil {
return errors.Wrap(err, "failed to parse pids.max")
return err
}
// Default if pids.max == "max" is 0 -- which represents "no limit".
var max uint64
if maxString != "max" {
max, err = fscommon.ParseUint(maxString, 10, 64)
if err != nil {
return errors.Wrapf(err, "failed to parse pids.max - unable to parse %q as a uint from Cgroup file %q",
maxString, filepath.Join(dirPath, "pids.max"))
}
// If no limit is set, read from pids.max returns "max", which is
// converted to MaxUint64 by GetCgroupParamUint. Historically, we
// represent "no limit" for pids as 0, thus this conversion.
if max == math.MaxUint64 {
max = 0
}
stats.PidsStats.Current = current

View File

@ -0,0 +1,121 @@
package fscommon
import (
"bufio"
"errors"
"math"
"os"
"strconv"
"strings"
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/configs"
"golang.org/x/sys/unix"
)
// parseRdmaKV parses raw string to RdmaEntry.
func parseRdmaKV(raw string, entry *cgroups.RdmaEntry) error {
var value uint32
parts := strings.SplitN(raw, "=", 3)
if len(parts) != 2 {
return errors.New("Unable to parse RDMA entry")
}
k, v := parts[0], parts[1]
if v == "max" {
value = math.MaxUint32
} else {
val64, err := strconv.ParseUint(v, 10, 32)
if err != nil {
return err
}
value = uint32(val64)
}
if k == "hca_handle" {
entry.HcaHandles = value
} else if k == "hca_object" {
entry.HcaObjects = value
}
return nil
}
// readRdmaEntries reads and converts array of rawstrings to RdmaEntries from file.
// example entry: mlx4_0 hca_handle=2 hca_object=2000
func readRdmaEntries(dir, file string) ([]cgroups.RdmaEntry, error) {
rdmaEntries := make([]cgroups.RdmaEntry, 0)
fd, err := cgroups.OpenFile(dir, file, unix.O_RDONLY)
if err != nil {
return nil, err
}
defer fd.Close() //nolint:errorlint
scanner := bufio.NewScanner(fd)
for scanner.Scan() {
parts := strings.SplitN(scanner.Text(), " ", 4)
if len(parts) == 3 {
entry := new(cgroups.RdmaEntry)
entry.Device = parts[0]
err = parseRdmaKV(parts[1], entry)
if err != nil {
continue
}
err = parseRdmaKV(parts[2], entry)
if err != nil {
continue
}
rdmaEntries = append(rdmaEntries, *entry)
}
}
return rdmaEntries, scanner.Err()
}
// RdmaGetStats returns rdma stats such as totalLimit and current entries.
func RdmaGetStats(path string, stats *cgroups.Stats) error {
currentEntries, err := readRdmaEntries(path, "rdma.current")
if err != nil {
if errors.Is(err, os.ErrNotExist) {
err = nil
}
return err
}
maxEntries, err := readRdmaEntries(path, "rdma.max")
if err != nil {
return err
}
// If device got removed between reading two files, ignore returning stats.
if len(currentEntries) != len(maxEntries) {
return nil
}
stats.RdmaStats = cgroups.RdmaStats{
RdmaLimit: maxEntries,
RdmaCurrent: currentEntries,
}
return nil
}
func createCmdString(device string, limits configs.LinuxRdma) string {
cmdString := device
if limits.HcaHandles != nil {
cmdString += " hca_handle=" + strconv.FormatUint(uint64(*limits.HcaHandles), 10)
}
if limits.HcaObjects != nil {
cmdString += " hca_object=" + strconv.FormatUint(uint64(*limits.HcaObjects), 10)
}
return cmdString
}
// RdmaSet sets RDMA resources.
func RdmaSet(path string, r *configs.Resources) error {
for device, limits := range r.Rdma {
if err := cgroups.WriteFile(path, "rdma.max", createCmdString(device, limits)); err != nil {
return err
}
}
return nil
}

View File

@ -1,11 +1,10 @@
// +build linux
package fscommon
import (
"errors"
"fmt"
"math"
"path"
"strconv"
"strings"
@ -13,8 +12,6 @@ import (
)
var (
ErrNotValidFormat = errors.New("line is not a valid key value format")
// Deprecated: use cgroups.OpenFile instead.
OpenFile = cgroups.OpenFile
// Deprecated: use cgroups.ReadFile instead.
@ -23,6 +20,19 @@ var (
WriteFile = cgroups.WriteFile
)
// ParseError records a parse error details, including the file path.
type ParseError struct {
Path string
File string
Err error
}
func (e *ParseError) Error() string {
return "unable to parse " + path.Join(e.Path, e.File) + ": " + e.Err.Error()
}
func (e *ParseError) Unwrap() error { return e.Err }
// ParseUint converts a string to an uint64 integer.
// Negative values are returned at zero as, due to kernel bugs,
// some of the memory cgroup stats can be negative.
@ -34,7 +44,7 @@ func ParseUint(s string, base, bitSize int) (uint64, error) {
// 2. Handle negative values lesser than MinInt64
if intErr == nil && intValue < 0 {
return 0, nil
} else if intErr != nil && intErr.(*strconv.NumError).Err == strconv.ErrRange && intValue < 0 {
} else if errors.Is(intErr, strconv.ErrRange) && intValue < 0 {
return 0, nil
}
@ -56,7 +66,7 @@ func ParseKeyValue(t string) (string, uint64, error) {
value, err := ParseUint(parts[1], 10, 64)
if err != nil {
return "", 0, fmt.Errorf("unable to convert to uint64: %v", err)
return "", 0, err
}
return parts[0], value, nil
@ -71,11 +81,15 @@ func GetValueByKey(path, file, key string) (uint64, error) {
return 0, err
}
lines := strings.Split(string(content), "\n")
lines := strings.Split(content, "\n")
for _, line := range lines {
arr := strings.Split(line, " ")
if len(arr) == 2 && arr[0] == key {
return ParseUint(arr[1], 10, 64)
val, err := ParseUint(arr[1], 10, 64)
if err != nil {
err = &ParseError{Path: path, File: file, Err: err}
}
return val, err
}
}
@ -96,7 +110,7 @@ func GetCgroupParamUint(path, file string) (uint64, error) {
res, err := ParseUint(contents, 10, 64)
if err != nil {
return res, fmt.Errorf("unable to parse file %q", path+"/"+file)
return res, &ParseError{Path: path, File: file, Err: err}
}
return res, nil
}
@ -115,7 +129,7 @@ func GetCgroupParamInt(path, file string) (int64, error) {
res, err := strconv.ParseInt(contents, 10, 64)
if err != nil {
return res, fmt.Errorf("unable to parse %q as a int from Cgroup file %q", contents, path+"/"+file)
return res, &ParseError{Path: path, File: file, Err: err}
}
return res, nil
}

View File

@ -0,0 +1,27 @@
package cgroups
import (
"io/fs"
"path/filepath"
)
// GetAllPids returns all pids from the cgroup identified by path, and all its
// sub-cgroups.
func GetAllPids(path string) ([]int, error) {
var pids []int
err := filepath.WalkDir(path, func(p string, d fs.DirEntry, iErr error) error {
if iErr != nil {
return iErr
}
if !d.IsDir() {
return nil
}
cPids, err := readProcsFile(p)
if err != nil {
return err
}
pids = append(pids, cPids...)
return nil
})
return pids, err
}

View File

@ -1,5 +1,3 @@
// +build linux
package cgroups
type ThrottlingData struct {
@ -126,7 +124,7 @@ type BlkioStatEntry struct {
}
type BlkioStats struct {
// number of bytes tranferred to and from the block device
// number of bytes transferred to and from the block device
IoServiceBytesRecursive []BlkioStatEntry `json:"io_service_bytes_recursive,omitempty"`
IoServicedRecursive []BlkioStatEntry `json:"io_serviced_recursive,omitempty"`
IoQueuedRecursive []BlkioStatEntry `json:"io_queue_recursive,omitempty"`
@ -146,6 +144,17 @@ type HugetlbStats struct {
Failcnt uint64 `json:"failcnt"`
}
type RdmaEntry struct {
Device string `json:"device,omitempty"`
HcaHandles uint32 `json:"hca_handles,omitempty"`
HcaObjects uint32 `json:"hca_objects,omitempty"`
}
type RdmaStats struct {
RdmaLimit []RdmaEntry `json:"rdma_limit,omitempty"`
RdmaCurrent []RdmaEntry `json:"rdma_current,omitempty"`
}
type Stats struct {
CpuStats CpuStats `json:"cpu_stats,omitempty"`
CPUSetStats CPUSetStats `json:"cpuset_stats,omitempty"`
@ -154,6 +163,7 @@ type Stats struct {
BlkioStats BlkioStats `json:"blkio_stats,omitempty"`
// the map is in the format "size of hugepage: stats of the hugepage"
HugetlbStats map[string]HugetlbStats `json:"hugetlb_stats,omitempty"`
RdmaStats RdmaStats `json:"rdma_stats,omitempty"`
}
func NewStats() *Stats {

View File

@ -3,6 +3,7 @@ package systemd
import (
"bufio"
"context"
"errors"
"fmt"
"math"
"os"
@ -14,11 +15,11 @@ import (
systemdDbus "github.com/coreos/go-systemd/v22/dbus"
dbus "github.com/godbus/dbus/v5"
"github.com/sirupsen/logrus"
cgroupdevices "github.com/opencontainers/runc/libcontainer/cgroups/devices"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/devices"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
)
const (
@ -92,7 +93,7 @@ func groupPrefix(ruleType devices.Type) (string, error) {
case devices.CharDevice:
return "char-", nil
default:
return "", errors.Errorf("device type %v has no group prefix", ruleType)
return "", fmt.Errorf("device type %v has no group prefix", ruleType)
}
}
@ -142,9 +143,9 @@ func findDeviceGroup(ruleType devices.Type, ruleMajor int64) (string, error) {
)
if n, err := fmt.Sscanf(line, "%d %s", &currMajor, &currName); err != nil || n != 2 {
if err == nil {
err = errors.Errorf("wrong number of fields")
err = errors.New("wrong number of fields")
}
return "", errors.Wrapf(err, "scan /proc/devices line %q", line)
return "", fmt.Errorf("scan /proc/devices line %q: %w", line, err)
}
if currMajor == ruleMajor {
@ -152,7 +153,7 @@ func findDeviceGroup(ruleType devices.Type, ruleMajor int64) (string, error) {
}
}
if err := scanner.Err(); err != nil {
return "", errors.Wrap(err, "reading /proc/devices")
return "", fmt.Errorf("reading /proc/devices: %w", err)
}
// Couldn't find the device group.
return "", nil
@ -192,12 +193,12 @@ func generateDeviceProperties(r *configs.Resources) ([]systemdDbus.Property, err
configEmu := &cgroupdevices.Emulator{}
for _, rule := range r.Devices {
if err := configEmu.Apply(*rule); err != nil {
return nil, errors.Wrap(err, "apply rule for systemd")
return nil, fmt.Errorf("unable to apply rule for systemd: %w", err)
}
}
// systemd doesn't support blacklists. So we log a warning, and tell
// systemd to act as a deny-all whitelist. This ruleset will be replaced
// with our normal fallback code. This may result in spurrious errors, but
// with our normal fallback code. This may result in spurious errors, but
// the only other option is to error out here.
if configEmu.IsBlacklist() {
// However, if we're dealing with an allow-all rule then we can do it.
@ -213,19 +214,19 @@ func generateDeviceProperties(r *configs.Resources) ([]systemdDbus.Property, err
// whitelist which is the default for devices.Emulator.
finalRules, err := configEmu.Rules()
if err != nil {
return nil, errors.Wrap(err, "get simplified rules for systemd")
return nil, fmt.Errorf("unable to get simplified rules for systemd: %w", err)
}
var deviceAllowList []deviceAllowEntry
for _, rule := range finalRules {
if !rule.Allow {
// Should never happen.
return nil, errors.Errorf("[internal error] cannot add deny rule to systemd DeviceAllow list: %v", *rule)
return nil, fmt.Errorf("[internal error] cannot add deny rule to systemd DeviceAllow list: %v", *rule)
}
switch rule.Type {
case devices.BlockDevice, devices.CharDevice:
default:
// Should never happen.
return nil, errors.Errorf("invalid device type for DeviceAllow: %v", rule.Type)
return nil, fmt.Errorf("invalid device type for DeviceAllow: %v", rule.Type)
}
entry := deviceAllowEntry{
@ -271,7 +272,7 @@ func generateDeviceProperties(r *configs.Resources) ([]systemdDbus.Property, err
// "_ n:* _" rules require a device group from /proc/devices.
group, err := findDeviceGroup(rule.Type, rule.Major)
if err != nil {
return nil, errors.Wrapf(err, "find device '%v/%d'", rule.Type, rule.Major)
return nil, fmt.Errorf("unable to find device '%v/%d': %w", rule.Type, rule.Major, err)
}
if group == "" {
// Couldn't find a group.
@ -288,7 +289,13 @@ func generateDeviceProperties(r *configs.Resources) ([]systemdDbus.Property, err
entry.Path = fmt.Sprintf("/dev/char/%d:%d", rule.Major, rule.Minor)
}
}
deviceAllowList = append(deviceAllowList, entry)
// systemd will issue a warning if the path we give here doesn't exist.
// Since all of this logic is best-effort anyway (we manually set these
// rules separately to systemd) we can safely skip entries that don't
// have a corresponding path.
if _, err := os.Stat(entry.Path); err == nil {
deviceAllowList = append(deviceAllowList, entry)
}
}
properties = append(properties, newProp("DeviceAllow", deviceAllowList))
@ -350,7 +357,7 @@ func startUnit(cm *dbusConnManager, unitName string, properties []systemdDbus.Pr
// Please refer to https://pkg.go.dev/github.com/coreos/go-systemd/v22/dbus#Conn.StartUnit
if s != "done" {
resetFailedUnit(cm, unitName)
return errors.Errorf("error creating systemd unit `%s`: got `%s`", unitName, s)
return fmt.Errorf("error creating systemd unit `%s`: got `%s`", unitName, s)
}
case <-timeout.C:
resetFailedUnit(cm, unitName)
@ -449,10 +456,13 @@ func systemdVersionAtoi(verStr string) (int, error) {
re := regexp.MustCompile(`v?([0-9]+)`)
matches := re.FindStringSubmatch(verStr)
if len(matches) < 2 {
return 0, errors.Errorf("can't parse version %s: incorrect number of matches %v", verStr, matches)
return 0, fmt.Errorf("can't parse version %s: incorrect number of matches %v", verStr, matches)
}
ver, err := strconv.Atoi(matches[1])
return ver, errors.Wrapf(err, "can't parse version %s", verStr)
if err != nil {
return -1, fmt.Errorf("can't parse version: %w", err)
}
return ver, nil
}
func addCpuQuota(cm *dbusConnManager, properties *[]systemdDbus.Property, quota int64, period uint64) {

View File

@ -1,12 +1,10 @@
package systemd
import (
"encoding/binary"
"errors"
"math/big"
"strconv"
"strings"
"github.com/bits-and-blooms/bitset"
"github.com/pkg/errors"
)
// RangeToBits converts a text representation of a CPU mask (as written to
@ -14,7 +12,7 @@ import (
// with the corresponding bits set (as consumed by systemd over dbus as
// AllowedCPUs/AllowedMemoryNodes unit property value).
func RangeToBits(str string) ([]byte, error) {
bits := &bitset.BitSet{}
bits := new(big.Int)
for _, r := range strings.Split(str, ",") {
// allow extra spaces around
@ -36,32 +34,22 @@ func RangeToBits(str string) ([]byte, error) {
if start > end {
return nil, errors.New("invalid range: " + r)
}
for i := uint(start); i <= uint(end); i++ {
bits.Set(i)
for i := start; i <= end; i++ {
bits.SetBit(bits, int(i), 1)
}
} else {
val, err := strconv.ParseUint(ranges[0], 10, 32)
if err != nil {
return nil, err
}
bits.Set(uint(val))
bits.SetBit(bits, int(val), 1)
}
}
val := bits.Bytes()
if len(val) == 0 {
ret := bits.Bytes()
if len(ret) == 0 {
// do not allow empty values
return nil, errors.New("empty value")
}
ret := make([]byte, len(val)*8)
for i := range val {
// bitset uses BigEndian internally
binary.BigEndian.PutUint64(ret[i*8:], val[len(val)-1-i])
}
// remove upper all-zero bytes
for ret[0] == 0 {
ret = ret[1:]
}
return ret, nil
}

View File

@ -1,9 +1,8 @@
// +build linux
package systemd
import (
"context"
"errors"
"fmt"
"sync"
@ -82,8 +81,6 @@ func (d *dbusConnManager) resetConnection(conn *systemdDbus.Conn) {
}
}
var errDbusConnClosed = dbus.ErrClosed.Error()
// retryOnDisconnect calls op, and if the error it returns is about closed dbus
// connection, the connection is re-established and the op is retried. This helps
// with the situation when dbus is restarted and we have a stale connection.
@ -94,7 +91,10 @@ func (d *dbusConnManager) retryOnDisconnect(op func(*systemdDbus.Conn) error) er
return err
}
err = op(conn)
if !isDbusError(err, errDbusConnClosed) {
if err == nil {
return nil
}
if !errors.Is(err, dbus.ErrClosed) {
return err
}
d.resetConnection(conn)

View File

@ -1,71 +0,0 @@
// +build !linux
package systemd
import (
"errors"
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/configs"
)
type Manager struct {
Cgroups *configs.Cgroup
Paths map[string]string
}
func IsRunningSystemd() bool {
return false
}
func NewSystemdCgroupsManager() (func(config *configs.Cgroup, paths map[string]string) cgroups.Manager, error) {
return nil, errors.New("Systemd not supported")
}
func (m *Manager) Apply(pid int) error {
return errors.New("Systemd not supported")
}
func (m *Manager) GetPids() ([]int, error) {
return nil, errors.New("Systemd not supported")
}
func (m *Manager) GetAllPids() ([]int, error) {
return nil, errors.New("Systemd not supported")
}
func (m *Manager) Destroy() error {
return errors.New("Systemd not supported")
}
func (m *Manager) GetPaths() map[string]string {
return nil
}
func (m *Manager) Path(_ string) string {
return ""
}
func (m *Manager) GetStats() (*cgroups.Stats, error) {
return nil, errors.New("Systemd not supported")
}
func (m *Manager) Set(container *configs.Config) error {
return errors.New("Systemd not supported")
}
func (m *Manager) Freeze(state configs.FreezerState) error {
return errors.New("Systemd not supported")
}
func Freeze(c *configs.Cgroup, state configs.FreezerState) error {
return errors.New("Systemd not supported")
}
func (m *Manager) GetCgroups() (*configs.Cgroup, error) {
return nil, errors.New("Systemd not supported")
}
func (m *Manager) Exists() bool {
return false
}

View File

@ -1,10 +1,10 @@
// +build linux
package systemd
import (
"bufio"
"bytes"
"errors"
"fmt"
"os"
"os/exec"
"path/filepath"
@ -13,8 +13,8 @@ import (
systemdDbus "github.com/coreos/go-systemd/v22/dbus"
dbus "github.com/godbus/dbus/v5"
"github.com/opencontainers/runc/libcontainer/userns"
"github.com/pkg/errors"
)
// newUserSystemdDbus creates a connection for systemd user-instance.
@ -31,17 +31,17 @@ func newUserSystemdDbus() (*systemdDbus.Conn, error) {
return systemdDbus.NewConnection(func() (*dbus.Conn, error) {
conn, err := dbus.Dial(addr)
if err != nil {
return nil, errors.Wrapf(err, "error while dialing %q", addr)
return nil, fmt.Errorf("error while dialing %q: %w", addr, err)
}
methods := []dbus.Auth{dbus.AuthExternal(strconv.Itoa(uid))}
err = conn.Auth(methods)
if err != nil {
conn.Close()
return nil, errors.Wrapf(err, "error while authenticating connection, address=%q, UID=%d", addr, uid)
return nil, fmt.Errorf("error while authenticating connection (address=%q, UID=%d): %w", addr, uid, err)
}
if err = conn.Hello(); err != nil {
conn.Close()
return nil, errors.Wrapf(err, "error while sending Hello message, address=%q, UID=%d", addr, uid)
return nil, fmt.Errorf("error while sending Hello message (address=%q, UID=%d): %w", addr, uid, err)
}
return conn, nil
})
@ -57,7 +57,7 @@ func DetectUID() (int, error) {
}
b, err := exec.Command("busctl", "--user", "--no-pager", "status").CombinedOutput()
if err != nil {
return -1, errors.Wrapf(err, "could not execute `busctl --user --no-pager status`: %q", string(b))
return -1, fmt.Errorf("could not execute `busctl --user --no-pager status` (output: %q): %w", string(b), err)
}
scanner := bufio.NewScanner(bytes.NewReader(b))
for scanner.Scan() {
@ -66,7 +66,7 @@ func DetectUID() (int, error) {
uidStr := strings.TrimPrefix(s, "OwnerUID=")
i, err := strconv.Atoi(uidStr)
if err != nil {
return -1, errors.Wrapf(err, "could not detect the OwnerUID: %s", s)
return -1, fmt.Errorf("could not detect the OwnerUID: %w", err)
}
return i, nil
}
@ -93,7 +93,7 @@ func DetectUserDbusSessionBusAddress() (string, error) {
}
b, err := exec.Command("systemctl", "--user", "--no-pager", "show-environment").CombinedOutput()
if err != nil {
return "", errors.Wrapf(err, "could not execute `systemctl --user --no-pager show-environment`, output=%q", string(b))
return "", fmt.Errorf("could not execute `systemctl --user --no-pager show-environment` (output=%q): %w", string(b), err)
}
scanner := bufio.NewScanner(bytes.NewReader(b))
for scanner.Scan() {

View File

@ -1,5 +1,3 @@
// +build linux
package systemd
import (
@ -26,12 +24,25 @@ type legacyManager struct {
dbus *dbusConnManager
}
func NewLegacyManager(cg *configs.Cgroup, paths map[string]string) cgroups.Manager {
func NewLegacyManager(cg *configs.Cgroup, paths map[string]string) (cgroups.Manager, error) {
if cg.Rootless {
return nil, errors.New("cannot use rootless systemd cgroups manager on cgroup v1")
}
if cg.Resources != nil && cg.Resources.Unified != nil {
return nil, cgroups.ErrV1NoUnified
}
if paths == nil {
var err error
paths, err = initPaths(cg)
if err != nil {
return nil, err
}
}
return &legacyManager{
cgroups: cg,
paths: paths,
dbus: newDbusConnManager(false),
}
}, nil
}
type subsystem interface {
@ -59,6 +70,7 @@ var legacySubsystems = []subsystem{
&fs.NetPrioGroup{},
&fs.NetClsGroup{},
&fs.NameGroup{GroupName: "name=systemd"},
&fs.RdmaGroup{},
}
func genV1ResourcesProperties(r *configs.Resources, cm *dbusConnManager) ([]systemdDbus.Property, error) {
@ -100,6 +112,53 @@ func genV1ResourcesProperties(r *configs.Resources, cm *dbusConnManager) ([]syst
return properties, nil
}
// initPaths figures out and returns paths to cgroups.
func initPaths(c *configs.Cgroup) (map[string]string, error) {
slice := "system.slice"
if c.Parent != "" {
var err error
slice, err = ExpandSlice(c.Parent)
if err != nil {
return nil, err
}
}
unit := getUnitName(c)
paths := make(map[string]string)
for _, s := range legacySubsystems {
subsystemPath, err := getSubsystemPath(slice, unit, s.Name())
if err != nil {
// Even if it's `not found` error, we'll return err
// because devices cgroup is hard requirement for
// container security.
if s.Name() == "devices" {
return nil, err
}
// Don't fail if a cgroup hierarchy was not found, just skip this subsystem
if cgroups.IsNotFound(err) {
continue
}
return nil, err
}
paths[s.Name()] = subsystemPath
}
// If systemd is using cgroups-hybrid mode then add the slice path of
// this container to the paths so the following process executed with
// "runc exec" joins that cgroup as well.
if cgroups.IsCgroup2HybridMode() {
// "" means cgroup-hybrid path
cgroupsHybridPath, err := getSubsystemPath(slice, unit, "")
if err != nil && cgroups.IsNotFound(err) {
return nil, err
}
paths[""] = cgroupsHybridPath
}
return paths, nil
}
func (m *legacyManager) Apply(pid int) error {
var (
c = m.cgroups
@ -108,27 +167,8 @@ func (m *legacyManager) Apply(pid int) error {
properties []systemdDbus.Property
)
if c.Resources.Unified != nil {
return cgroups.ErrV1NoUnified
}
m.mu.Lock()
defer m.mu.Unlock()
if c.Paths != nil {
paths := make(map[string]string)
cgMap, err := cgroups.ParseCgroupFile("/proc/self/cgroup")
if err != nil {
return err
}
// XXX(kolyshkin@): why this check is needed?
for name, path := range c.Paths {
if _, ok := cgMap[name]; ok {
paths[name] = path
}
}
m.paths = paths
return cgroups.EnterPid(m.paths, pid)
}
if c.Parent != "" {
slice = c.Parent
@ -136,12 +176,14 @@ func (m *legacyManager) Apply(pid int) error {
properties = append(properties, systemdDbus.PropDescription("libcontainer container "+c.Name))
// if we create a slice, the parent is defined via a Wants=
if strings.HasSuffix(unitName, ".slice") {
// If we create a slice, the parent is defined via a Wants=.
properties = append(properties, systemdDbus.PropWants(slice))
} else {
// otherwise, we use Slice=
// Otherwise it's a scope, which we put into a Slice=.
properties = append(properties, systemdDbus.PropSlice(slice))
// Assume scopes always support delegation (supported since systemd v218).
properties = append(properties, newProp("Delegate", true))
}
// only add pid if its valid, -1 is used w/ general slice creation.
@ -149,12 +191,6 @@ func (m *legacyManager) Apply(pid int) error {
properties = append(properties, newProp("PIDs", []uint32{uint32(pid)}))
}
// Check if we can delegate. This is only supported on systemd versions 218 and above.
if !strings.HasSuffix(unitName, ".slice") {
// Assume scopes always support delegation.
properties = append(properties, newProp("Delegate", true))
}
// Always enable accounting, this gets us the same behaviour as the fs implementation,
// plus the kernel has some problems with joining the memory cgroup at a later time.
properties = append(properties,
@ -174,26 +210,6 @@ func (m *legacyManager) Apply(pid int) error {
return err
}
paths := make(map[string]string)
for _, s := range legacySubsystems {
subsystemPath, err := getSubsystemPath(m.cgroups, s.Name())
if err != nil {
// Even if it's `not found` error, we'll return err
// because devices cgroup is hard requirement for
// container security.
if s.Name() == "devices" {
return err
}
// Don't fail if a cgroup hierarchy was not found, just skip this subsystem
if cgroups.IsNotFound(err) {
continue
}
return err
}
paths[s.Name()] = subsystemPath
}
m.paths = paths
if err := m.joinCgroups(pid); err != nil {
return err
}
@ -202,9 +218,6 @@ func (m *legacyManager) Apply(pid int) error {
}
func (m *legacyManager) Destroy() error {
if m.cgroups.Paths != nil {
return nil
}
m.mu.Lock()
defer m.mu.Unlock()
@ -254,7 +267,7 @@ func (m *legacyManager) joinCgroups(pid int) error {
return nil
}
func getSubsystemPath(c *configs.Cgroup, subsystem string) (string, error) {
func getSubsystemPath(slice, unit, subsystem string) (string, error) {
mountpoint, err := cgroups.FindCgroupMountpoint("", subsystem)
if err != nil {
return "", err
@ -267,17 +280,7 @@ func getSubsystemPath(c *configs.Cgroup, subsystem string) (string, error) {
// if pid 1 is systemd 226 or later, it will be in init.scope, not the root
initPath = strings.TrimSuffix(filepath.Clean(initPath), "init.scope")
slice := "system.slice"
if c.Parent != "" {
slice = c.Parent
}
slice, err = ExpandSlice(slice)
if err != nil {
return "", err
}
return filepath.Join(mountpoint, initPath, slice, getUnitName(c)), nil
return filepath.Join(mountpoint, initPath, slice, unit), nil
}
func (m *legacyManager) Freeze(state configs.FreezerState) error {
@ -399,9 +402,7 @@ func (m *legacyManager) freezeBeforeSet(unitName string, r *configs.Resources) (
}
func (m *legacyManager) Set(r *configs.Resources) error {
// If Paths are set, then we are just joining cgroups paths
// and there is no need to set any values.
if m.cgroups.Paths != nil {
if r == nil {
return nil
}
if r.Unified != nil {

View File

@ -1,10 +1,11 @@
// +build linux
package systemd
import (
"bufio"
"errors"
"fmt"
"math"
"os"
"path/filepath"
"strconv"
"strings"
@ -12,29 +13,39 @@ import (
systemdDbus "github.com/coreos/go-systemd/v22/dbus"
securejoin "github.com/cyphar/filepath-securejoin"
"github.com/sirupsen/logrus"
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/cgroups/fs2"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
)
type unifiedManager struct {
mu sync.Mutex
cgroups *configs.Cgroup
// path is like "/sys/fs/cgroup/user.slice/user-1001.slice/session-1.scope"
path string
rootless bool
dbus *dbusConnManager
path string
dbus *dbusConnManager
fsMgr cgroups.Manager
}
func NewUnifiedManager(config *configs.Cgroup, path string, rootless bool) cgroups.Manager {
return &unifiedManager{
cgroups: config,
path: path,
rootless: rootless,
dbus: newDbusConnManager(rootless),
func NewUnifiedManager(config *configs.Cgroup, path string) (cgroups.Manager, error) {
m := &unifiedManager{
cgroups: config,
path: path,
dbus: newDbusConnManager(config.Rootless),
}
if err := m.initPath(); err != nil {
return nil, err
}
fsMgr, err := fs2.NewManager(config, m.path)
if err != nil {
return nil, err
}
m.fsMgr = fsMgr
return m, nil
}
// unifiedResToSystemdProps tries to convert from Cgroup.Resources.Unified
@ -233,12 +244,8 @@ func (m *unifiedManager) Apply(pid int) error {
properties []systemdDbus.Property
)
if c.Paths != nil {
return cgroups.WriteCgroupProc(m.path, pid)
}
slice := "system.slice"
if m.rootless {
if m.cgroups.Rootless {
slice = "user.slice"
}
if c.Parent != "" {
@ -247,12 +254,14 @@ func (m *unifiedManager) Apply(pid int) error {
properties = append(properties, systemdDbus.PropDescription("libcontainer container "+c.Name))
// if we create a slice, the parent is defined via a Wants=
if strings.HasSuffix(unitName, ".slice") {
// If we create a slice, the parent is defined via a Wants=.
properties = append(properties, systemdDbus.PropWants(slice))
} else {
// otherwise, we use Slice=
// Otherwise it's a scope, which we put into a Slice=.
properties = append(properties, systemdDbus.PropSlice(slice))
// Assume scopes always support delegation (supported since systemd v218).
properties = append(properties, newProp("Delegate", true))
}
// only add pid if its valid, -1 is used w/ general slice creation.
@ -260,12 +269,6 @@ func (m *unifiedManager) Apply(pid int) error {
properties = append(properties, newProp("PIDs", []uint32{uint32(pid)}))
}
// Check if we can delegate. This is only supported on systemd versions 218 and above.
if !strings.HasSuffix(unitName, ".slice") {
// Assume scopes always support delegation.
properties = append(properties, newProp("Delegate", true))
}
// Always enable accounting, this gets us the same behaviour as the fs implementation,
// plus the kernel has some problems with joining the memory cgroup at a later time.
properties = append(properties,
@ -282,22 +285,62 @@ func (m *unifiedManager) Apply(pid int) error {
properties = append(properties, c.SystemdProps...)
if err := startUnit(m.dbus, unitName, properties); err != nil {
return errors.Wrapf(err, "error while starting unit %q with properties %+v", unitName, properties)
return fmt.Errorf("unable to start unit %q (properties %+v): %w", unitName, properties, err)
}
if err := m.initPath(); err != nil {
return err
}
if err := fs2.CreateCgroupPath(m.path, m.cgroups); err != nil {
return err
}
if c.OwnerUID != nil {
// The directory itself must be chowned.
err := os.Chown(m.path, *c.OwnerUID, -1)
if err != nil {
return err
}
filesToChown, err := cgroupFilesToChown()
if err != nil {
return err
}
for _, v := range filesToChown {
err := os.Chown(m.path+"/"+v, *c.OwnerUID, -1)
// Some files might not be present.
if err != nil && !errors.Is(err, os.ErrNotExist) {
return err
}
}
}
return nil
}
func (m *unifiedManager) Destroy() error {
if m.cgroups.Paths != nil {
return nil
// The kernel exposes a list of files that should be chowned to the delegate
// uid in /sys/kernel/cgroup/delegate. If the file is not present
// (Linux < 4.15), use the initial values mentioned in cgroups(7).
func cgroupFilesToChown() ([]string, error) {
const cgroupDelegateFile = "/sys/kernel/cgroup/delegate"
f, err := os.Open(cgroupDelegateFile)
if err != nil {
return []string{"cgroup.procs", "cgroup.subtree_control", "cgroup.threads"}, nil
}
defer f.Close()
filesToChown := []string{}
scanner := bufio.NewScanner(f)
for scanner.Scan() {
filesToChown = append(filesToChown, scanner.Text())
}
if err := scanner.Err(); err != nil {
return nil, fmt.Errorf("error reading %s: %w", cgroupDelegateFile, err)
}
return filesToChown, nil
}
func (m *unifiedManager) Destroy() error {
m.mu.Lock()
defer m.mu.Unlock()
@ -307,8 +350,8 @@ func (m *unifiedManager) Destroy() error {
}
// systemd 239 do not remove sub-cgroups.
err := cgroups.RemovePath(m.path)
// cgroups.RemovePath has handled ErrNotExist
err := m.fsMgr.Destroy()
// fsMgr.Destroy has handled ErrNotExist
if err != nil {
return err
}
@ -317,7 +360,6 @@ func (m *unifiedManager) Destroy() error {
}
func (m *unifiedManager) Path(_ string) string {
_ = m.initPath()
return m.path
}
@ -326,7 +368,7 @@ func (m *unifiedManager) Path(_ string) string {
func (m *unifiedManager) getSliceFull() (string, error) {
c := m.cgroups
slice := "system.slice"
if m.rootless {
if c.Rootless {
slice = "user.slice"
}
if c.Parent != "" {
@ -337,7 +379,7 @@ func (m *unifiedManager) getSliceFull() (string, error) {
}
}
if m.rootless {
if c.Rootless {
// managerCG is typically "/user.slice/user-${uid}.slice/user@${uid}.service".
managerCG, err := getManagerProperty(m.dbus, "ControlGroup")
if err != nil {
@ -375,58 +417,36 @@ func (m *unifiedManager) initPath() error {
return nil
}
func (m *unifiedManager) fsManager() (cgroups.Manager, error) {
if err := m.initPath(); err != nil {
return nil, err
}
return fs2.NewManager(m.cgroups, m.path, m.rootless)
}
func (m *unifiedManager) Freeze(state configs.FreezerState) error {
fsMgr, err := m.fsManager()
if err != nil {
return err
}
return fsMgr.Freeze(state)
return m.fsMgr.Freeze(state)
}
func (m *unifiedManager) GetPids() ([]int, error) {
if err := m.initPath(); err != nil {
return nil, err
}
return cgroups.GetPids(m.path)
}
func (m *unifiedManager) GetAllPids() ([]int, error) {
if err := m.initPath(); err != nil {
return nil, err
}
return cgroups.GetAllPids(m.path)
}
func (m *unifiedManager) GetStats() (*cgroups.Stats, error) {
fsMgr, err := m.fsManager()
if err != nil {
return nil, err
}
return fsMgr.GetStats()
return m.fsMgr.GetStats()
}
func (m *unifiedManager) Set(r *configs.Resources) error {
if r == nil {
return nil
}
properties, err := genV2ResourcesProperties(r, m.dbus)
if err != nil {
return err
}
if err := setUnitProperties(m.dbus, getUnitName(m.cgroups), properties...); err != nil {
return errors.Wrap(err, "error while setting unit properties")
return fmt.Errorf("unable to set unit properties: %w", err)
}
fsMgr, err := m.fsManager()
if err != nil {
return err
}
return fsMgr.Set(r)
return m.fsMgr.Set(r)
}
func (m *unifiedManager) GetPaths() map[string]string {
@ -440,11 +460,7 @@ func (m *unifiedManager) GetCgroups() (*configs.Cgroup, error) {
}
func (m *unifiedManager) GetFreezerState() (configs.FreezerState, error) {
fsMgr, err := m.fsManager()
if err != nil {
return configs.Undefined, err
}
return fsMgr.GetFreezerState()
return m.fsMgr.GetFreezerState()
}
func (m *unifiedManager) Exists() bool {
@ -452,9 +468,5 @@ func (m *unifiedManager) Exists() bool {
}
func (m *unifiedManager) OOMKillCount() (uint64, error) {
fsMgr, err := m.fsManager()
if err != nil {
return 0, err
}
return fsMgr.OOMKillCount()
return m.fsMgr.OOMKillCount()
}

View File

@ -1,5 +1,3 @@
// +build linux
package cgroups
import (
@ -7,7 +5,6 @@ import (
"errors"
"fmt"
"io"
"io/ioutil"
"os"
"path/filepath"
"strconv"
@ -23,11 +20,14 @@ import (
const (
CgroupProcesses = "cgroup.procs"
unifiedMountpoint = "/sys/fs/cgroup"
hybridMountpoint = "/sys/fs/cgroup/unified"
)
var (
isUnifiedOnce sync.Once
isUnified bool
isHybridOnce sync.Once
isHybrid bool
)
// IsCgroup2UnifiedMode returns whether we are running in cgroup v2 unified mode.
@ -49,6 +49,24 @@ func IsCgroup2UnifiedMode() bool {
return isUnified
}
// IsCgroup2HybridMode returns whether we are running in cgroup v2 hybrid mode.
func IsCgroup2HybridMode() bool {
isHybridOnce.Do(func() {
var st unix.Statfs_t
err := unix.Statfs(hybridMountpoint, &st)
if err != nil {
isHybrid = false
if !os.IsNotExist(err) {
// Report unexpected errors.
logrus.WithError(err).Debugf("statfs(%q) failed", hybridMountpoint)
}
return
}
isHybrid = st.Type == unix.CGROUP2_SUPER_MAGIC
})
return isHybrid
}
type Mount struct {
Mountpoint string
Root string
@ -118,8 +136,8 @@ func GetAllSubsystems() ([]string, error) {
return subsystems, nil
}
func readProcsFile(file string) ([]int, error) {
f, err := os.Open(file)
func readProcsFile(dir string) ([]int, error) {
f, err := OpenFile(dir, CgroupProcesses, os.O_RDONLY)
if err != nil {
return nil, err
}
@ -210,7 +228,7 @@ func EnterPid(cgroupPaths map[string]string, pid int) error {
func rmdir(path string) error {
err := unix.Rmdir(path)
if err == nil || err == unix.ENOENT {
if err == nil || err == unix.ENOENT { //nolint:errorlint // unix errors are bare
return nil
}
return &os.PathError{Op: "rmdir", Path: path, Err: err}
@ -224,7 +242,7 @@ func RemovePath(path string) error {
return nil
}
infos, err := ioutil.ReadDir(path)
infos, err := os.ReadDir(path)
if err != nil {
if os.IsNotExist(err) {
err = nil
@ -284,40 +302,61 @@ func RemovePaths(paths map[string]string) (err error) {
return fmt.Errorf("Failed to remove paths: %v", paths)
}
func GetHugePageSize() ([]string, error) {
dir, err := os.OpenFile("/sys/kernel/mm/hugepages", unix.O_DIRECTORY|unix.O_RDONLY, 0)
if err != nil {
return nil, err
}
files, err := dir.Readdirnames(0)
dir.Close()
if err != nil {
return nil, err
}
var (
hugePageSizes []string
initHPSOnce sync.Once
)
return getHugePageSizeFromFilenames(files)
func HugePageSizes() []string {
initHPSOnce.Do(func() {
dir, err := os.OpenFile("/sys/kernel/mm/hugepages", unix.O_DIRECTORY|unix.O_RDONLY, 0)
if err != nil {
return
}
files, err := dir.Readdirnames(0)
dir.Close()
if err != nil {
return
}
hugePageSizes, err = getHugePageSizeFromFilenames(files)
if err != nil {
logrus.Warn("HugePageSizes: ", err)
}
})
return hugePageSizes
}
func getHugePageSizeFromFilenames(fileNames []string) ([]string, error) {
pageSizes := make([]string, 0, len(fileNames))
var warn error
for _, file := range fileNames {
// example: hugepages-1048576kB
val := strings.TrimPrefix(file, "hugepages-")
if len(val) == len(file) {
// unexpected file name: no prefix found
// Unexpected file name: no prefix found, ignore it.
continue
}
// The suffix is always "kB" (as of Linux 5.9)
// The suffix is always "kB" (as of Linux 5.13). If we find
// something else, produce an error but keep going.
eLen := len(val) - 2
val = strings.TrimSuffix(val, "kB")
if len(val) != eLen {
logrus.Warnf("GetHugePageSize: %s: invalid filename suffix (expected \"kB\")", file)
// Highly unlikely.
if warn == nil {
warn = errors.New(file + `: invalid suffix (expected "kB")`)
}
continue
}
size, err := strconv.Atoi(val)
if err != nil {
return nil, err
// Highly unlikely.
if warn == nil {
warn = fmt.Errorf("%s: %w", file, err)
}
continue
}
// Model after https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/mm/hugetlb_cgroup.c?id=eff48ddeab782e35e58ccc8853f7386bbae9dec4#n574
// but in our case the size is in KB already.
@ -331,34 +370,12 @@ func getHugePageSizeFromFilenames(fileNames []string) ([]string, error) {
pageSizes = append(pageSizes, val)
}
return pageSizes, nil
return pageSizes, warn
}
// GetPids returns all pids, that were added to cgroup at path.
func GetPids(dir string) ([]int, error) {
return readProcsFile(filepath.Join(dir, CgroupProcesses))
}
// GetAllPids returns all pids, that were added to cgroup at path and to all its
// subcgroups.
func GetAllPids(path string) ([]int, error) {
var pids []int
// collect pids from all sub-cgroups
err := filepath.Walk(path, func(p string, info os.FileInfo, iErr error) error {
if iErr != nil {
return iErr
}
if info.IsDir() || info.Name() != CgroupProcesses {
return nil
}
cPids, err := readProcsFile(p)
if err != nil {
return err
}
pids = append(pids, cPids...)
return nil
})
return pids, err
return readProcsFile(dir)
}
// WriteCgroupProc writes the specified pid into the cgroup's cgroup.procs file
@ -376,7 +393,7 @@ func WriteCgroupProc(dir string, pid int) error {
file, err := OpenFile(dir, CgroupProcesses, os.O_WRONLY)
if err != nil {
return fmt.Errorf("failed to write %v to %v: %v", pid, CgroupProcesses, err)
return fmt.Errorf("failed to write %v: %w", pid, err)
}
defer file.Close()
@ -393,7 +410,7 @@ func WriteCgroupProc(dir string, pid int) error {
continue
}
return fmt.Errorf("failed to write %v to %v: %v", pid, CgroupProcesses, err)
return fmt.Errorf("failed to write %v: %w", pid, err)
}
return err
}
@ -446,5 +463,5 @@ func ConvertBlkIOToIOWeightValue(blkIoWeight uint16) uint64 {
if blkIoWeight == 0 {
return 0
}
return uint64(1 + (uint64(blkIoWeight)-10)*9999/990)
return 1 + (uint64(blkIoWeight)-10)*9999/990
}

View File

@ -46,11 +46,8 @@ func NewNotFoundError(sub string) error {
}
func IsNotFound(err error) bool {
if err == nil {
return false
}
_, ok := err.(*NotFoundError)
return ok
var nfErr *NotFoundError
return errors.As(err, &nfErr)
}
func tryDefaultPath(cgroupPath, subsystem string) string {
@ -116,6 +113,11 @@ func FindCgroupMountpoint(cgroupPath, subsystem string) (string, error) {
return "", errUnified
}
// If subsystem is empty, we look for the cgroupv2 hybrid path.
if len(subsystem) == 0 {
return hybridMountpoint, nil
}
// Avoid parsing mountinfo by trying the default path first, if possible.
if path := tryDefaultPath(cgroupPath, subsystem); path != "" {
return path, nil
@ -154,7 +156,7 @@ func findCgroupMountpointAndRootFromMI(mounts []*mountinfo.Info, cgroupPath, sub
func (m Mount) GetOwnCgroup(cgroups map[string]string) (string, error) {
if len(m.Subsystems) == 0 {
return "", fmt.Errorf("no subsystem for mount")
return "", errors.New("no subsystem for mount")
}
return getControllerPath(m.Subsystems[0], cgroups)
@ -226,6 +228,11 @@ func GetOwnCgroupPath(subsystem string) (string, error) {
return "", err
}
// If subsystem is empty, we look for the cgroupv2 hybrid path.
if len(subsystem) == 0 {
return hybridMountpoint, nil
}
return getCgroupPathHelper(subsystem, cgroup)
}

View File

@ -28,17 +28,26 @@ type Cgroup struct {
// ScopePrefix describes prefix for the scope name
ScopePrefix string `json:"scope_prefix"`
// Paths represent the absolute cgroups paths to join.
// This takes precedence over Path.
Paths map[string]string
// Resources contains various cgroups settings to apply
*Resources
// Systemd tells if systemd should be used to manage cgroups.
Systemd bool
// SystemdProps are any additional properties for systemd,
// derived from org.systemd.property.xxx annotations.
// Ignored unless systemd is used for managing cgroups.
SystemdProps []systemdDbus.Property `json:"-"`
// Rootless tells if rootless cgroups should be used.
Rootless bool
// The host UID that should own the cgroup, or nil to accept
// the default ownership. This should only be set when the
// cgroupfs is to be mounted read/write.
// Not all cgroup manager implementations support changing
// the ownership.
OwnerUID *int `json:"owner_uid,omitempty"`
}
type Resources struct {
@ -117,6 +126,9 @@ type Resources struct {
// Set class identifier for container's network packets
NetClsClassid uint32 `json:"net_cls_classid_u"`
// Rdma resource restriction configuration
Rdma map[string]LinuxRdma `json:"rdma"`
// Used on cgroups v2:
// CpuWeight sets a proportional bandwidth limit.

View File

@ -1,3 +1,4 @@
//go:build !linux
// +build !linux
package configs

View File

@ -7,10 +7,10 @@ import (
"os/exec"
"time"
"github.com/sirupsen/logrus"
"github.com/opencontainers/runc/libcontainer/devices"
"github.com/opencontainers/runtime-spec/specs-go"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
)
type Rlimit struct {
@ -31,10 +31,12 @@ type IDMap struct {
// for syscalls. Additional architectures can be added by specifying them in
// Architectures.
type Seccomp struct {
DefaultAction Action `json:"default_action"`
Architectures []string `json:"architectures"`
Syscalls []*Syscall `json:"syscalls"`
DefaultErrnoRet *uint `json:"default_errno_ret"`
DefaultAction Action `json:"default_action"`
Architectures []string `json:"architectures"`
Syscalls []*Syscall `json:"syscalls"`
DefaultErrnoRet *uint `json:"default_errno_ret"`
ListenerPath string `json:"listener_path,omitempty"`
ListenerMetadata string `json:"listener_metadata,omitempty"`
}
// Action is taken upon rule match in Seccomp
@ -47,6 +49,9 @@ const (
Allow
Trace
Log
Notify
KillThread
KillProcess
)
// Operator is a comparison operator to be used when matching syscall arguments in Seccomp
@ -246,6 +251,19 @@ const (
Poststop HookName = "poststop"
)
// KnownHookNames returns the known hook names.
// Used by `runc features`.
func KnownHookNames() []string {
return []string{
string(Prestart), // deprecated
string(CreateRuntime),
string(CreateContainer),
string(StartContainer),
string(Poststart),
string(Poststop),
}
}
type Capabilities struct {
// Bounding is the set of capabilities checked by the kernel.
Bounding []string
@ -262,7 +280,7 @@ type Capabilities struct {
func (hooks HookList) RunHooks(state *specs.State) error {
for i, h := range hooks {
if err := h.Run(state); err != nil {
return errors.Wrapf(err, "Running hook #%d:", i)
return fmt.Errorf("error running hook #%d: %w", i, err)
}
}
@ -375,7 +393,7 @@ func (c Command) Run(s *specs.State) error {
go func() {
err := cmd.Wait()
if err != nil {
err = fmt.Errorf("error running hook: %v, stdout: %s, stderr: %s", err, stdout.String(), stderr.String())
err = fmt.Errorf("error running hook: %w, stdout: %s, stderr: %s", err, stdout.String(), stderr.String())
}
errC <- err
}()

View File

@ -1,17 +1,24 @@
package configs
import "fmt"
import "errors"
var (
errNoUIDMap = errors.New("User namespaces enabled, but no uid mappings found.")
errNoUserMap = errors.New("User namespaces enabled, but no user mapping found.")
errNoGIDMap = errors.New("User namespaces enabled, but no gid mappings found.")
errNoGroupMap = errors.New("User namespaces enabled, but no group mapping found.")
)
// HostUID gets the translated uid for the process on host which could be
// different when user namespaces are enabled.
func (c Config) HostUID(containerId int) (int, error) {
if c.Namespaces.Contains(NEWUSER) {
if c.UidMappings == nil {
return -1, fmt.Errorf("User namespaces enabled, but no uid mappings found.")
return -1, errNoUIDMap
}
id, found := c.hostIDFromMapping(containerId, c.UidMappings)
if !found {
return -1, fmt.Errorf("User namespaces enabled, but no user mapping found.")
return -1, errNoUserMap
}
return id, nil
}
@ -30,11 +37,11 @@ func (c Config) HostRootUID() (int, error) {
func (c Config) HostGID(containerId int) (int, error) {
if c.Namespaces.Contains(NEWUSER) {
if c.GidMappings == nil {
return -1, fmt.Errorf("User namespaces enabled, but no gid mappings found.")
return -1, errNoGIDMap
}
id, found := c.hostIDFromMapping(containerId, c.GidMappings)
if !found {
return -1, fmt.Errorf("User namespaces enabled, but no group mapping found.")
return -1, errNoGroupMap
}
return id, nil
}

View File

@ -1,3 +1,4 @@
//go:build gofuzz
// +build gofuzz
package configs

View File

@ -1,6 +1,9 @@
package configs
type IntelRdt struct {
// The identity for RDT Class of Service
ClosID string `json:"closID,omitempty"`
// The schema for L3 cache id and capacity bitmask (CBM)
// Format: "L3:<cache_id0>=<cbm0>;<cache_id1>=<cbm1>;..."
L3CacheSchema string `json:"l3_cache_schema,omitempty"`

View File

@ -1,5 +1,7 @@
package configs
import "golang.org/x/sys/unix"
const (
// EXT_COPYUP is a directive to copy up the contents of a directory when
// a tmpfs is mounted over it.
@ -28,6 +30,9 @@ type Mount struct {
// Relabel source if set, "z" indicates shared, "Z" indicates unshared.
Relabel string `json:"relabel"`
// RecAttr represents mount properties to be applied recursively (AT_RECURSIVE), see mount_setattr(2).
RecAttr *unix.MountAttr `json:"rec_attr"`
// Extensions are additional flags that are specific to runc.
Extensions int `json:"extensions"`
@ -37,3 +42,7 @@ type Mount struct {
// Optional Command to be run after Source is mounted.
PostmountCmds []Command `json:"postmount_cmds"`
}
func (m *Mount) IsBind() bool {
return m.Flags&unix.MS_BIND != 0
}

View File

@ -1,3 +1,4 @@
//go:build linux
// +build linux
package configs

View File

@ -1,3 +1,4 @@
//go:build !linux && !windows
// +build !linux,!windows
package configs

View File

@ -1,3 +1,4 @@
//go:build !linux
// +build !linux
package configs

View File

@ -0,0 +1,9 @@
package configs
// LinuxRdma for Linux cgroup 'rdma' resource management (Linux 4.11)
type LinuxRdma struct {
// Maximum number of HCA handles that can be opened. Default is "no limit".
HcaHandles *uint32 `json:"hca_handles,omitempty"`
// Maximum number of HCA objects that can be created. Default is "no limit".
HcaObjects *uint32 `json:"hca_objects,omitempty"`
}

View File

@ -1,10 +1,10 @@
//go:build !windows
// +build !windows
package devices
import (
"errors"
"io/ioutil"
"os"
"path/filepath"
@ -16,8 +16,8 @@ var ErrNotADevice = errors.New("not a device node")
// Testing dependencies
var (
unixLstat = unix.Lstat
ioutilReadDir = ioutil.ReadDir
unixLstat = unix.Lstat
osReadDir = os.ReadDir
)
func mkDev(d *Rule) (uint64, error) {
@ -40,7 +40,7 @@ func DeviceFromPath(path, permissions string) (*Device, error) {
var (
devType Type
mode = stat.Mode
devNumber = uint64(stat.Rdev)
devNumber = uint64(stat.Rdev) //nolint:unconvert // Rdev is uint32 on e.g. MIPS.
major = unix.Major(devNumber)
minor = unix.Minor(devNumber)
)
@ -76,7 +76,7 @@ func HostDevices() ([]*Device, error) {
// GetDevices recursively traverses a directory specified by path
// and returns all devices found there.
func GetDevices(path string) ([]*Device, error) {
files, err := ioutilReadDir(path)
files, err := osReadDir(path)
if err != nil {
return nil, err
}
@ -103,7 +103,7 @@ func GetDevices(path string) ([]*Device, error) {
}
device, err := DeviceFromPath(filepath.Join(path, f.Name()), "rwm")
if err != nil {
if err == ErrNotADevice {
if errors.Is(err, ErrNotADevice) {
continue
}
if os.IsNotExist(err) {

View File

@ -1,3 +1,4 @@
//go:build darwin || dragonfly || freebsd || linux || netbsd || openbsd || solaris
// +build darwin dragonfly freebsd linux netbsd openbsd solaris
package user

View File

@ -120,7 +120,7 @@ func ParsePasswdFileFilter(path string, filter func(User) bool) ([]User, error)
func ParsePasswdFilter(r io.Reader, filter func(User) bool) ([]User, error) {
if r == nil {
return nil, fmt.Errorf("nil source for passwd-formatted data")
return nil, errors.New("nil source for passwd-formatted data")
}
var (
@ -178,7 +178,7 @@ func ParseGroupFileFilter(path string, filter func(Group) bool) ([]Group, error)
func ParseGroupFilter(r io.Reader, filter func(Group) bool) ([]Group, error) {
if r == nil {
return nil, fmt.Errorf("nil source for group-formatted data")
return nil, errors.New("nil source for group-formatted data")
}
rd := bufio.NewReader(r)
out := []Group{}
@ -339,7 +339,7 @@ func GetExecUser(userSpec string, defaults *ExecUser, passwd, group io.Reader) (
if userArg == "" {
userArg = strconv.Itoa(user.Uid)
}
return nil, fmt.Errorf("unable to find user %s: %v", userArg, err)
return nil, fmt.Errorf("unable to find user %s: %w", userArg, err)
}
var matchedUserName string
@ -355,7 +355,7 @@ func GetExecUser(userSpec string, defaults *ExecUser, passwd, group io.Reader) (
if uidErr != nil {
// Not numeric.
return nil, fmt.Errorf("unable to find user %s: %v", userArg, ErrNoPasswdEntries)
return nil, fmt.Errorf("unable to find user %s: %w", userArg, ErrNoPasswdEntries)
}
user.Uid = uidArg
@ -390,7 +390,7 @@ func GetExecUser(userSpec string, defaults *ExecUser, passwd, group io.Reader) (
return g.Name == groupArg
})
if err != nil && group != nil {
return nil, fmt.Errorf("unable to find groups for spec %v: %v", matchedUserName, err)
return nil, fmt.Errorf("unable to find groups for spec %v: %w", matchedUserName, err)
}
// Only start modifying user.Gid if it is in explicit form.
@ -404,7 +404,7 @@ func GetExecUser(userSpec string, defaults *ExecUser, passwd, group io.Reader) (
if gidErr != nil {
// Not numeric.
return nil, fmt.Errorf("unable to find group %s: %v", groupArg, ErrNoGroupEntries)
return nil, fmt.Errorf("unable to find group %s: %w", groupArg, ErrNoGroupEntries)
}
user.Gid = gidArg
@ -445,7 +445,7 @@ func GetAdditionalGroups(additionalGroups []string, group io.Reader) ([]int, err
return false
})
if err != nil {
return nil, fmt.Errorf("Unable to find additional groups %v: %v", additionalGroups, err)
return nil, fmt.Errorf("Unable to find additional groups %v: %w", additionalGroups, err)
}
}
@ -468,7 +468,8 @@ func GetAdditionalGroups(additionalGroups []string, group io.Reader) ([]int, err
if !found {
gid, err := strconv.ParseInt(ag, 10, 64)
if err != nil {
return nil, fmt.Errorf("Unable to find group %s", ag)
// Not a numeric ID either.
return nil, fmt.Errorf("Unable to find group %s: %w", ag, ErrNoGroupEntries)
}
// Ensure gid is inside gid range.
if gid < minID || gid > maxID {
@ -521,7 +522,7 @@ func ParseSubIDFileFilter(path string, filter func(SubID) bool) ([]SubID, error)
func ParseSubIDFilter(r io.Reader, filter func(SubID) bool) ([]SubID, error) {
if r == nil {
return nil, fmt.Errorf("nil source for subid-formatted data")
return nil, errors.New("nil source for subid-formatted data")
}
var (
@ -574,7 +575,7 @@ func ParseIDMapFileFilter(path string, filter func(IDMap) bool) ([]IDMap, error)
func ParseIDMapFilter(r io.Reader, filter func(IDMap) bool) ([]IDMap, error) {
if r == nil {
return nil, fmt.Errorf("nil source for idmap-formatted data")
return nil, errors.New("nil source for idmap-formatted data")
}
var (

View File

@ -1,3 +1,4 @@
//go:build gofuzz
// +build gofuzz
package user

View File

@ -1,3 +1,4 @@
//go:build gofuzz
// +build gofuzz
package userns

View File

@ -1,3 +1,4 @@
//go:build !linux
// +build !linux
package userns

View File

@ -1,5 +1,3 @@
// +build linux
package utils
/*
@ -88,6 +86,11 @@ func SendFd(socket *os.File, name string, fd uintptr) error {
if len(name) >= MaxNameLen {
return fmt.Errorf("sendfd: filename too long: %s", name)
}
oob := unix.UnixRights(int(fd))
return unix.Sendmsg(int(socket.Fd()), []byte(name), oob, nil, 0)
return SendFds(socket, []byte(name), int(fd))
}
// SendFds sends a list of files descriptor and msg over the given AF_UNIX socket.
func SendFds(socket *os.File, msg []byte, fds ...int) error {
oob := unix.UnixRights(fds...)
return unix.Sendmsg(int(socket.Fd()), msg, oob, nil, 0)
}

View File

@ -11,7 +11,7 @@ import (
"strings"
"unsafe"
"github.com/cyphar/filepath-securejoin"
securejoin "github.com/cyphar/filepath-securejoin"
"golang.org/x/sys/unix"
)
@ -33,16 +33,6 @@ func init() {
}
}
// ResolveRootfs ensures that the current working directory is
// not a symlink and returns the absolute path to the rootfs
func ResolveRootfs(uncleanRootfs string) (string, error) {
rootfs, err := filepath.Abs(uncleanRootfs)
if err != nil {
return "", err
}
return filepath.EvalSymlinks(rootfs)
}
// ExitStatus returns the correct exit status for a process based on if it
// was signaled or exited cleanly
func ExitStatus(status unix.WaitStatus) int {
@ -120,7 +110,7 @@ func WithProcfd(root, unsafePath string, fn func(procfd string) error) error {
unsafePath = stripRoot(root, unsafePath)
path, err := securejoin.SecureJoin(root, unsafePath)
if err != nil {
return fmt.Errorf("resolving path inside rootfs failed: %v", err)
return fmt.Errorf("resolving path inside rootfs failed: %w", err)
}
// Open the target path.

View File

@ -1,3 +1,4 @@
//go:build !windows
// +build !windows
package utils
@ -14,7 +15,7 @@ import (
func EnsureProcHandle(fh *os.File) error {
var buf unix.Statfs_t
if err := unix.Fstatfs(int(fh.Fd()), &buf); err != nil {
return fmt.Errorf("ensure %s is on procfs: %v", fh.Name(), err)
return fmt.Errorf("ensure %s is on procfs: %w", fh.Name(), err)
}
if buf.Type != unix.PROC_SUPER_MAGIC {
return fmt.Errorf("%s is not on procfs", fh.Name())
@ -52,7 +53,7 @@ func CloseExecFrom(minFd int) error {
// Intentionally ignore errors from unix.CloseOnExec -- the cases where
// this might fail are basically file descriptors that have already
// been closed (including and especially the one that was created when
// ioutil.ReadDir did the "opendir" syscall).
// os.ReadDir did the "opendir" syscall).
unix.CloseOnExec(fd)
}
return nil

View File

@ -41,8 +41,6 @@ github.com/PuerkitoBio/urlesc
github.com/asaskevich/govalidator
# github.com/beorn7/perks v1.0.1
github.com/beorn7/perks/quantile
# github.com/bits-and-blooms/bitset v1.2.0
github.com/bits-and-blooms/bitset
# github.com/blang/semver v3.5.1+incompatible
## explicit
github.com/blang/semver
@ -248,7 +246,7 @@ github.com/opencontainers/go-digest
# github.com/opencontainers/image-spec v1.0.3-0.20220114050600-8b9d41f48198 => github.com/opencontainers/image-spec v1.0.2
github.com/opencontainers/image-spec/specs-go
github.com/opencontainers/image-spec/specs-go/v1
# github.com/opencontainers/runc v1.1.3 => github.com/opencontainers/runc v1.0.3
# github.com/opencontainers/runc v1.1.3 => github.com/opencontainers/runc v1.1.3
## explicit
github.com/opencontainers/runc/libcontainer/cgroups
github.com/opencontainers/runc/libcontainer/cgroups/devices
@ -504,6 +502,6 @@ k8s.io/apimachinery/pkg/api/resource
## explicit
k8s.io/cri-api/pkg/apis/runtime/v1alpha2
# github.com/opencontainers/image-spec => github.com/opencontainers/image-spec v1.0.2
# github.com/opencontainers/runc => github.com/opencontainers/runc v1.0.3
# github.com/opencontainers/runc => github.com/opencontainers/runc v1.1.3
# github.com/uber-go/atomic => go.uber.org/atomic v1.5.1
# google.golang.org/genproto => google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8