mirror of
				https://github.com/k3s-io/kubernetes.git
				synced 2025-10-25 10:00:53 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			936 lines
		
	
	
		
			29 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
			
		
		
	
	
			936 lines
		
	
	
		
			29 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
| // +build linux
 | |
| 
 | |
| // Public API specification for libseccomp Go bindings
 | |
| // Contains public API for the bindings
 | |
| 
 | |
| // Package seccomp provides bindings for libseccomp, a library wrapping the Linux
 | |
| // seccomp syscall. Seccomp enables an application to restrict system call use
 | |
| // for itself and its children.
 | |
| package seccomp
 | |
| 
 | |
| import (
 | |
| 	"fmt"
 | |
| 	"os"
 | |
| 	"runtime"
 | |
| 	"strings"
 | |
| 	"sync"
 | |
| 	"syscall"
 | |
| 	"unsafe"
 | |
| )
 | |
| 
 | |
| // C wrapping code
 | |
| 
 | |
| // #cgo pkg-config: libseccomp
 | |
| // #include <stdlib.h>
 | |
| // #include <seccomp.h>
 | |
| import "C"
 | |
| 
 | |
| // Exported types
 | |
| 
 | |
| // VersionError denotes that the system libseccomp version is incompatible
 | |
| // with this package.
 | |
| type VersionError struct {
 | |
| 	message string
 | |
| 	minimum string
 | |
| }
 | |
| 
 | |
| func (e VersionError) Error() string {
 | |
| 	format := "Libseccomp version too low: "
 | |
| 	if e.message != "" {
 | |
| 		format += e.message + ": "
 | |
| 	}
 | |
| 	format += "minimum supported is "
 | |
| 	if e.minimum != "" {
 | |
| 		format += e.minimum + ": "
 | |
| 	} else {
 | |
| 		format += "2.2.0: "
 | |
| 	}
 | |
| 	format += "detected %d.%d.%d"
 | |
| 	return fmt.Sprintf(format, verMajor, verMinor, verMicro)
 | |
| }
 | |
| 
 | |
| // ScmpArch represents a CPU architecture. Seccomp can restrict syscalls on a
 | |
| // per-architecture basis.
 | |
| type ScmpArch uint
 | |
| 
 | |
| // ScmpAction represents an action to be taken on a filter rule match in
 | |
| // libseccomp
 | |
| type ScmpAction uint
 | |
| 
 | |
| // ScmpCompareOp represents a comparison operator which can be used in a filter
 | |
| // rule
 | |
| type ScmpCompareOp uint
 | |
| 
 | |
| // ScmpCondition represents a rule in a libseccomp filter context
 | |
| type ScmpCondition struct {
 | |
| 	Argument uint          `json:"argument,omitempty"`
 | |
| 	Op       ScmpCompareOp `json:"operator,omitempty"`
 | |
| 	Operand1 uint64        `json:"operand_one,omitempty"`
 | |
| 	Operand2 uint64        `json:"operand_two,omitempty"`
 | |
| }
 | |
| 
 | |
| // ScmpSyscall represents a Linux System Call
 | |
| type ScmpSyscall int32
 | |
| 
 | |
| // Exported Constants
 | |
| 
 | |
| const (
 | |
| 	// Valid architectures recognized by libseccomp
 | |
| 	// PowerPC and S390(x) architectures are unavailable below library version
 | |
| 	// v2.3.0 and will returns errors if used with incompatible libraries
 | |
| 
 | |
| 	// ArchInvalid is a placeholder to ensure uninitialized ScmpArch
 | |
| 	// variables are invalid
 | |
| 	ArchInvalid ScmpArch = iota
 | |
| 	// ArchNative is the native architecture of the kernel
 | |
| 	ArchNative ScmpArch = iota
 | |
| 	// ArchX86 represents 32-bit x86 syscalls
 | |
| 	ArchX86 ScmpArch = iota
 | |
| 	// ArchAMD64 represents 64-bit x86-64 syscalls
 | |
| 	ArchAMD64 ScmpArch = iota
 | |
| 	// ArchX32 represents 64-bit x86-64 syscalls (32-bit pointers)
 | |
| 	ArchX32 ScmpArch = iota
 | |
| 	// ArchARM represents 32-bit ARM syscalls
 | |
| 	ArchARM ScmpArch = iota
 | |
| 	// ArchARM64 represents 64-bit ARM syscalls
 | |
| 	ArchARM64 ScmpArch = iota
 | |
| 	// ArchMIPS represents 32-bit MIPS syscalls
 | |
| 	ArchMIPS ScmpArch = iota
 | |
| 	// ArchMIPS64 represents 64-bit MIPS syscalls
 | |
| 	ArchMIPS64 ScmpArch = iota
 | |
| 	// ArchMIPS64N32 represents 64-bit MIPS syscalls (32-bit pointers)
 | |
| 	ArchMIPS64N32 ScmpArch = iota
 | |
| 	// ArchMIPSEL represents 32-bit MIPS syscalls (little endian)
 | |
| 	ArchMIPSEL ScmpArch = iota
 | |
| 	// ArchMIPSEL64 represents 64-bit MIPS syscalls (little endian)
 | |
| 	ArchMIPSEL64 ScmpArch = iota
 | |
| 	// ArchMIPSEL64N32 represents 64-bit MIPS syscalls (little endian,
 | |
| 	// 32-bit pointers)
 | |
| 	ArchMIPSEL64N32 ScmpArch = iota
 | |
| 	// ArchPPC represents 32-bit POWERPC syscalls
 | |
| 	ArchPPC ScmpArch = iota
 | |
| 	// ArchPPC64 represents 64-bit POWER syscalls (big endian)
 | |
| 	ArchPPC64 ScmpArch = iota
 | |
| 	// ArchPPC64LE represents 64-bit POWER syscalls (little endian)
 | |
| 	ArchPPC64LE ScmpArch = iota
 | |
| 	// ArchS390 represents 31-bit System z/390 syscalls
 | |
| 	ArchS390 ScmpArch = iota
 | |
| 	// ArchS390X represents 64-bit System z/390 syscalls
 | |
| 	ArchS390X ScmpArch = iota
 | |
| )
 | |
| 
 | |
| const (
 | |
| 	// Supported actions on filter match
 | |
| 
 | |
| 	// ActInvalid is a placeholder to ensure uninitialized ScmpAction
 | |
| 	// variables are invalid
 | |
| 	ActInvalid ScmpAction = iota
 | |
| 	// ActKill kills the process
 | |
| 	ActKill ScmpAction = iota
 | |
| 	// ActTrap throws SIGSYS
 | |
| 	ActTrap ScmpAction = iota
 | |
| 	// ActErrno causes the syscall to return a negative error code. This
 | |
| 	// code can be set with the SetReturnCode method
 | |
| 	ActErrno ScmpAction = iota
 | |
| 	// ActTrace causes the syscall to notify tracing processes with the
 | |
| 	// given error code. This code can be set with the SetReturnCode method
 | |
| 	ActTrace ScmpAction = iota
 | |
| 	// ActAllow permits the syscall to continue execution
 | |
| 	ActAllow ScmpAction = iota
 | |
| 	// ActLog permits the syscall to continue execution after logging it.
 | |
| 	// This action is only usable when libseccomp API level 3 or higher is
 | |
| 	// supported.
 | |
| 	ActLog ScmpAction = iota
 | |
| )
 | |
| 
 | |
| const (
 | |
| 	// These are comparison operators used in conditional seccomp rules
 | |
| 	// They are used to compare the value of a single argument of a syscall
 | |
| 	// against a user-defined constant
 | |
| 
 | |
| 	// CompareInvalid is a placeholder to ensure uninitialized ScmpCompareOp
 | |
| 	// variables are invalid
 | |
| 	CompareInvalid ScmpCompareOp = iota
 | |
| 	// CompareNotEqual returns true if the argument is not equal to the
 | |
| 	// given value
 | |
| 	CompareNotEqual ScmpCompareOp = iota
 | |
| 	// CompareLess returns true if the argument is less than the given value
 | |
| 	CompareLess ScmpCompareOp = iota
 | |
| 	// CompareLessOrEqual returns true if the argument is less than or equal
 | |
| 	// to the given value
 | |
| 	CompareLessOrEqual ScmpCompareOp = iota
 | |
| 	// CompareEqual returns true if the argument is equal to the given value
 | |
| 	CompareEqual ScmpCompareOp = iota
 | |
| 	// CompareGreaterEqual returns true if the argument is greater than or
 | |
| 	// equal to the given value
 | |
| 	CompareGreaterEqual ScmpCompareOp = iota
 | |
| 	// CompareGreater returns true if the argument is greater than the given
 | |
| 	// value
 | |
| 	CompareGreater ScmpCompareOp = iota
 | |
| 	// CompareMaskedEqual returns true if the argument is equal to the given
 | |
| 	// value, when masked (bitwise &) against the second given value
 | |
| 	CompareMaskedEqual ScmpCompareOp = iota
 | |
| )
 | |
| 
 | |
| // Helpers for types
 | |
| 
 | |
| // GetArchFromString returns an ScmpArch constant from a string representing an
 | |
| // architecture
 | |
| func GetArchFromString(arch string) (ScmpArch, error) {
 | |
| 	if err := ensureSupportedVersion(); err != nil {
 | |
| 		return ArchInvalid, err
 | |
| 	}
 | |
| 
 | |
| 	switch strings.ToLower(arch) {
 | |
| 	case "x86":
 | |
| 		return ArchX86, nil
 | |
| 	case "amd64", "x86-64", "x86_64", "x64":
 | |
| 		return ArchAMD64, nil
 | |
| 	case "x32":
 | |
| 		return ArchX32, nil
 | |
| 	case "arm":
 | |
| 		return ArchARM, nil
 | |
| 	case "arm64", "aarch64":
 | |
| 		return ArchARM64, nil
 | |
| 	case "mips":
 | |
| 		return ArchMIPS, nil
 | |
| 	case "mips64":
 | |
| 		return ArchMIPS64, nil
 | |
| 	case "mips64n32":
 | |
| 		return ArchMIPS64N32, nil
 | |
| 	case "mipsel":
 | |
| 		return ArchMIPSEL, nil
 | |
| 	case "mipsel64":
 | |
| 		return ArchMIPSEL64, nil
 | |
| 	case "mipsel64n32":
 | |
| 		return ArchMIPSEL64N32, nil
 | |
| 	case "ppc":
 | |
| 		return ArchPPC, nil
 | |
| 	case "ppc64":
 | |
| 		return ArchPPC64, nil
 | |
| 	case "ppc64le":
 | |
| 		return ArchPPC64LE, nil
 | |
| 	case "s390":
 | |
| 		return ArchS390, nil
 | |
| 	case "s390x":
 | |
| 		return ArchS390X, nil
 | |
| 	default:
 | |
| 		return ArchInvalid, fmt.Errorf("cannot convert unrecognized string %q", arch)
 | |
| 	}
 | |
| }
 | |
| 
 | |
| // String returns a string representation of an architecture constant
 | |
| func (a ScmpArch) String() string {
 | |
| 	switch a {
 | |
| 	case ArchX86:
 | |
| 		return "x86"
 | |
| 	case ArchAMD64:
 | |
| 		return "amd64"
 | |
| 	case ArchX32:
 | |
| 		return "x32"
 | |
| 	case ArchARM:
 | |
| 		return "arm"
 | |
| 	case ArchARM64:
 | |
| 		return "arm64"
 | |
| 	case ArchMIPS:
 | |
| 		return "mips"
 | |
| 	case ArchMIPS64:
 | |
| 		return "mips64"
 | |
| 	case ArchMIPS64N32:
 | |
| 		return "mips64n32"
 | |
| 	case ArchMIPSEL:
 | |
| 		return "mipsel"
 | |
| 	case ArchMIPSEL64:
 | |
| 		return "mipsel64"
 | |
| 	case ArchMIPSEL64N32:
 | |
| 		return "mipsel64n32"
 | |
| 	case ArchPPC:
 | |
| 		return "ppc"
 | |
| 	case ArchPPC64:
 | |
| 		return "ppc64"
 | |
| 	case ArchPPC64LE:
 | |
| 		return "ppc64le"
 | |
| 	case ArchS390:
 | |
| 		return "s390"
 | |
| 	case ArchS390X:
 | |
| 		return "s390x"
 | |
| 	case ArchNative:
 | |
| 		return "native"
 | |
| 	case ArchInvalid:
 | |
| 		return "Invalid architecture"
 | |
| 	default:
 | |
| 		return fmt.Sprintf("Unknown architecture %#x", uint(a))
 | |
| 	}
 | |
| }
 | |
| 
 | |
| // String returns a string representation of a comparison operator constant
 | |
| func (a ScmpCompareOp) String() string {
 | |
| 	switch a {
 | |
| 	case CompareNotEqual:
 | |
| 		return "Not equal"
 | |
| 	case CompareLess:
 | |
| 		return "Less than"
 | |
| 	case CompareLessOrEqual:
 | |
| 		return "Less than or equal to"
 | |
| 	case CompareEqual:
 | |
| 		return "Equal"
 | |
| 	case CompareGreaterEqual:
 | |
| 		return "Greater than or equal to"
 | |
| 	case CompareGreater:
 | |
| 		return "Greater than"
 | |
| 	case CompareMaskedEqual:
 | |
| 		return "Masked equality"
 | |
| 	case CompareInvalid:
 | |
| 		return "Invalid comparison operator"
 | |
| 	default:
 | |
| 		return fmt.Sprintf("Unrecognized comparison operator %#x", uint(a))
 | |
| 	}
 | |
| }
 | |
| 
 | |
| // String returns a string representation of a seccomp match action
 | |
| func (a ScmpAction) String() string {
 | |
| 	switch a & 0xFFFF {
 | |
| 	case ActKill:
 | |
| 		return "Action: Kill Process"
 | |
| 	case ActTrap:
 | |
| 		return "Action: Send SIGSYS"
 | |
| 	case ActErrno:
 | |
| 		return fmt.Sprintf("Action: Return error code %d", (a >> 16))
 | |
| 	case ActTrace:
 | |
| 		return fmt.Sprintf("Action: Notify tracing processes with code %d",
 | |
| 			(a >> 16))
 | |
| 	case ActLog:
 | |
| 		return "Action: Log system call"
 | |
| 	case ActAllow:
 | |
| 		return "Action: Allow system call"
 | |
| 	default:
 | |
| 		return fmt.Sprintf("Unrecognized Action %#x", uint(a))
 | |
| 	}
 | |
| }
 | |
| 
 | |
| // SetReturnCode adds a return code to a supporting ScmpAction, clearing any
 | |
| // existing code Only valid on ActErrno and ActTrace. Takes no action otherwise.
 | |
| // Accepts 16-bit return code as argument.
 | |
| // Returns a valid ScmpAction of the original type with the new error code set.
 | |
| func (a ScmpAction) SetReturnCode(code int16) ScmpAction {
 | |
| 	aTmp := a & 0x0000FFFF
 | |
| 	if aTmp == ActErrno || aTmp == ActTrace {
 | |
| 		return (aTmp | (ScmpAction(code)&0xFFFF)<<16)
 | |
| 	}
 | |
| 	return a
 | |
| }
 | |
| 
 | |
| // GetReturnCode returns the return code of an ScmpAction
 | |
| func (a ScmpAction) GetReturnCode() int16 {
 | |
| 	return int16(a >> 16)
 | |
| }
 | |
| 
 | |
| // General utility functions
 | |
| 
 | |
| // GetLibraryVersion returns the version of the library the bindings are built
 | |
| // against.
 | |
| // The version is formatted as follows: Major.Minor.Micro
 | |
| func GetLibraryVersion() (major, minor, micro uint) {
 | |
| 	return verMajor, verMinor, verMicro
 | |
| }
 | |
| 
 | |
| // GetApi returns the API level supported by the system.
 | |
| // Returns a positive int containing the API level, or 0 with an error if the
 | |
| // API level could not be detected due to the library being older than v2.4.0.
 | |
| // See the seccomp_api_get(3) man page for details on available API levels:
 | |
| // https://github.com/seccomp/libseccomp/blob/master/doc/man/man3/seccomp_api_get.3
 | |
| func GetApi() (uint, error) {
 | |
| 	return getApi()
 | |
| }
 | |
| 
 | |
| // SetApi forcibly sets the API level. General use of this function is strongly
 | |
| // discouraged.
 | |
| // Returns an error if the API level could not be set. An error is always
 | |
| // returned if the library is older than v2.4.0
 | |
| // See the seccomp_api_get(3) man page for details on available API levels:
 | |
| // https://github.com/seccomp/libseccomp/blob/master/doc/man/man3/seccomp_api_get.3
 | |
| func SetApi(api uint) error {
 | |
| 	return setApi(api)
 | |
| }
 | |
| 
 | |
| // Syscall functions
 | |
| 
 | |
| // GetName retrieves the name of a syscall from its number.
 | |
| // Acts on any syscall number.
 | |
| // Returns either a string containing the name of the syscall, or an error.
 | |
| func (s ScmpSyscall) GetName() (string, error) {
 | |
| 	return s.GetNameByArch(ArchNative)
 | |
| }
 | |
| 
 | |
| // GetNameByArch retrieves the name of a syscall from its number for a given
 | |
| // architecture.
 | |
| // Acts on any syscall number.
 | |
| // Accepts a valid architecture constant.
 | |
| // Returns either a string containing the name of the syscall, or an error.
 | |
| // if the syscall is unrecognized or an issue occurred.
 | |
| func (s ScmpSyscall) GetNameByArch(arch ScmpArch) (string, error) {
 | |
| 	if err := sanitizeArch(arch); err != nil {
 | |
| 		return "", err
 | |
| 	}
 | |
| 
 | |
| 	cString := C.seccomp_syscall_resolve_num_arch(arch.toNative(), C.int(s))
 | |
| 	if cString == nil {
 | |
| 		return "", fmt.Errorf("could not resolve syscall name for %#x", int32(s))
 | |
| 	}
 | |
| 	defer C.free(unsafe.Pointer(cString))
 | |
| 
 | |
| 	finalStr := C.GoString(cString)
 | |
| 	return finalStr, nil
 | |
| }
 | |
| 
 | |
| // GetSyscallFromName returns the number of a syscall by name on the kernel's
 | |
| // native architecture.
 | |
| // Accepts a string containing the name of a syscall.
 | |
| // Returns the number of the syscall, or an error if no syscall with that name
 | |
| // was found.
 | |
| func GetSyscallFromName(name string) (ScmpSyscall, error) {
 | |
| 	if err := ensureSupportedVersion(); err != nil {
 | |
| 		return 0, err
 | |
| 	}
 | |
| 
 | |
| 	cString := C.CString(name)
 | |
| 	defer C.free(unsafe.Pointer(cString))
 | |
| 
 | |
| 	result := C.seccomp_syscall_resolve_name(cString)
 | |
| 	if result == scmpError {
 | |
| 		return 0, fmt.Errorf("could not resolve name to syscall: %q", name)
 | |
| 	}
 | |
| 
 | |
| 	return ScmpSyscall(result), nil
 | |
| }
 | |
| 
 | |
| // GetSyscallFromNameByArch returns the number of a syscall by name for a given
 | |
| // architecture's ABI.
 | |
| // Accepts the name of a syscall and an architecture constant.
 | |
| // Returns the number of the syscall, or an error if an invalid architecture is
 | |
| // passed or a syscall with that name was not found.
 | |
| func GetSyscallFromNameByArch(name string, arch ScmpArch) (ScmpSyscall, error) {
 | |
| 	if err := ensureSupportedVersion(); err != nil {
 | |
| 		return 0, err
 | |
| 	}
 | |
| 	if err := sanitizeArch(arch); err != nil {
 | |
| 		return 0, err
 | |
| 	}
 | |
| 
 | |
| 	cString := C.CString(name)
 | |
| 	defer C.free(unsafe.Pointer(cString))
 | |
| 
 | |
| 	result := C.seccomp_syscall_resolve_name_arch(arch.toNative(), cString)
 | |
| 	if result == scmpError {
 | |
| 		return 0, fmt.Errorf("could not resolve name to syscall: %q on %v", name, arch)
 | |
| 	}
 | |
| 
 | |
| 	return ScmpSyscall(result), nil
 | |
| }
 | |
| 
 | |
| // MakeCondition creates and returns a new condition to attach to a filter rule.
 | |
| // Associated rules will only match if this condition is true.
 | |
| // Accepts the number the argument we are checking, and a comparison operator
 | |
| // and value to compare to.
 | |
| // The rule will match if argument $arg (zero-indexed) of the syscall is
 | |
| // $COMPARE_OP the provided comparison value.
 | |
| // Some comparison operators accept two values. Masked equals, for example,
 | |
| // will mask $arg of the syscall with the second value provided (via bitwise
 | |
| // AND) and then compare against the first value provided.
 | |
| // For example, in the less than or equal case, if the syscall argument was
 | |
| // 0 and the value provided was 1, the condition would match, as 0 is less
 | |
| // than or equal to 1.
 | |
| // Return either an error on bad argument or a valid ScmpCondition struct.
 | |
| func MakeCondition(arg uint, comparison ScmpCompareOp, values ...uint64) (ScmpCondition, error) {
 | |
| 	var condStruct ScmpCondition
 | |
| 
 | |
| 	if err := ensureSupportedVersion(); err != nil {
 | |
| 		return condStruct, err
 | |
| 	}
 | |
| 
 | |
| 	if comparison == CompareInvalid {
 | |
| 		return condStruct, fmt.Errorf("invalid comparison operator")
 | |
| 	} else if arg > 5 {
 | |
| 		return condStruct, fmt.Errorf("syscalls only have up to 6 arguments (%d given)", arg)
 | |
| 	} else if len(values) > 2 {
 | |
| 		return condStruct, fmt.Errorf("conditions can have at most 2 arguments (%d given)", len(values))
 | |
| 	} else if len(values) == 0 {
 | |
| 		return condStruct, fmt.Errorf("must provide at least one value to compare against")
 | |
| 	}
 | |
| 
 | |
| 	condStruct.Argument = arg
 | |
| 	condStruct.Op = comparison
 | |
| 	condStruct.Operand1 = values[0]
 | |
| 	if len(values) == 2 {
 | |
| 		condStruct.Operand2 = values[1]
 | |
| 	} else {
 | |
| 		condStruct.Operand2 = 0 // Unused
 | |
| 	}
 | |
| 
 | |
| 	return condStruct, nil
 | |
| }
 | |
| 
 | |
| // Utility Functions
 | |
| 
 | |
| // GetNativeArch returns architecture token representing the native kernel
 | |
| // architecture
 | |
| func GetNativeArch() (ScmpArch, error) {
 | |
| 	if err := ensureSupportedVersion(); err != nil {
 | |
| 		return ArchInvalid, err
 | |
| 	}
 | |
| 
 | |
| 	arch := C.seccomp_arch_native()
 | |
| 
 | |
| 	return archFromNative(arch)
 | |
| }
 | |
| 
 | |
| // Public Filter API
 | |
| 
 | |
| // ScmpFilter represents a filter context in libseccomp.
 | |
| // A filter context is initially empty. Rules can be added to it, and it can
 | |
| // then be loaded into the kernel.
 | |
| type ScmpFilter struct {
 | |
| 	filterCtx C.scmp_filter_ctx
 | |
| 	valid     bool
 | |
| 	lock      sync.Mutex
 | |
| }
 | |
| 
 | |
| // NewFilter creates and returns a new filter context.
 | |
| // Accepts a default action to be taken for syscalls which match no rules in
 | |
| // the filter.
 | |
| // Returns a reference to a valid filter context, or nil and an error if the
 | |
| // filter context could not be created or an invalid default action was given.
 | |
| func NewFilter(defaultAction ScmpAction) (*ScmpFilter, error) {
 | |
| 	if err := ensureSupportedVersion(); err != nil {
 | |
| 		return nil, err
 | |
| 	}
 | |
| 
 | |
| 	if err := sanitizeAction(defaultAction); err != nil {
 | |
| 		return nil, err
 | |
| 	}
 | |
| 
 | |
| 	fPtr := C.seccomp_init(defaultAction.toNative())
 | |
| 	if fPtr == nil {
 | |
| 		return nil, fmt.Errorf("could not create filter")
 | |
| 	}
 | |
| 
 | |
| 	filter := new(ScmpFilter)
 | |
| 	filter.filterCtx = fPtr
 | |
| 	filter.valid = true
 | |
| 	runtime.SetFinalizer(filter, filterFinalizer)
 | |
| 
 | |
| 	// Enable TSync so all goroutines will receive the same rules
 | |
| 	// If the kernel does not support TSYNC, allow us to continue without error
 | |
| 	if err := filter.setFilterAttr(filterAttrTsync, 0x1); err != nil && err != syscall.ENOTSUP {
 | |
| 		filter.Release()
 | |
| 		return nil, fmt.Errorf("could not create filter - error setting tsync bit: %v", err)
 | |
| 	}
 | |
| 
 | |
| 	return filter, nil
 | |
| }
 | |
| 
 | |
| // IsValid determines whether a filter context is valid to use.
 | |
| // Some operations (Release and Merge) render filter contexts invalid and
 | |
| // consequently prevent further use.
 | |
| func (f *ScmpFilter) IsValid() bool {
 | |
| 	f.lock.Lock()
 | |
| 	defer f.lock.Unlock()
 | |
| 
 | |
| 	return f.valid
 | |
| }
 | |
| 
 | |
| // Reset resets a filter context, removing all its existing state.
 | |
| // Accepts a new default action to be taken for syscalls which do not match.
 | |
| // Returns an error if the filter or action provided are invalid.
 | |
| func (f *ScmpFilter) Reset(defaultAction ScmpAction) error {
 | |
| 	f.lock.Lock()
 | |
| 	defer f.lock.Unlock()
 | |
| 
 | |
| 	if err := sanitizeAction(defaultAction); err != nil {
 | |
| 		return err
 | |
| 	} else if !f.valid {
 | |
| 		return errBadFilter
 | |
| 	}
 | |
| 
 | |
| 	retCode := C.seccomp_reset(f.filterCtx, defaultAction.toNative())
 | |
| 	if retCode != 0 {
 | |
| 		return syscall.Errno(-1 * retCode)
 | |
| 	}
 | |
| 
 | |
| 	return nil
 | |
| }
 | |
| 
 | |
| // Release releases a filter context, freeing its memory. Should be called after
 | |
| // loading into the kernel, when the filter is no longer needed.
 | |
| // After calling this function, the given filter is no longer valid and cannot
 | |
| // be used.
 | |
| // Release() will be invoked automatically when a filter context is garbage
 | |
| // collected, but can also be called manually to free memory.
 | |
| func (f *ScmpFilter) Release() {
 | |
| 	f.lock.Lock()
 | |
| 	defer f.lock.Unlock()
 | |
| 
 | |
| 	if !f.valid {
 | |
| 		return
 | |
| 	}
 | |
| 
 | |
| 	f.valid = false
 | |
| 	C.seccomp_release(f.filterCtx)
 | |
| }
 | |
| 
 | |
| // Merge merges two filter contexts.
 | |
| // The source filter src will be released as part of the process, and will no
 | |
| // longer be usable or valid after this call.
 | |
| // To be merged, filters must NOT share any architectures, and all their
 | |
| // attributes (Default Action, Bad Arch Action, and No New Privs bools)
 | |
| // must match.
 | |
| // The filter src will be merged into the filter this is called on.
 | |
| // The architectures of the src filter not present in the destination, and all
 | |
| // associated rules, will be added to the destination.
 | |
| // Returns an error if merging the filters failed.
 | |
| func (f *ScmpFilter) Merge(src *ScmpFilter) error {
 | |
| 	f.lock.Lock()
 | |
| 	defer f.lock.Unlock()
 | |
| 
 | |
| 	src.lock.Lock()
 | |
| 	defer src.lock.Unlock()
 | |
| 
 | |
| 	if !src.valid || !f.valid {
 | |
| 		return fmt.Errorf("one or more of the filter contexts is invalid or uninitialized")
 | |
| 	}
 | |
| 
 | |
| 	// Merge the filters
 | |
| 	retCode := C.seccomp_merge(f.filterCtx, src.filterCtx)
 | |
| 	if syscall.Errno(-1*retCode) == syscall.EINVAL {
 | |
| 		return fmt.Errorf("filters could not be merged due to a mismatch in attributes or invalid filter")
 | |
| 	} else if retCode != 0 {
 | |
| 		return syscall.Errno(-1 * retCode)
 | |
| 	}
 | |
| 
 | |
| 	src.valid = false
 | |
| 
 | |
| 	return nil
 | |
| }
 | |
| 
 | |
| // IsArchPresent checks if an architecture is present in a filter.
 | |
| // If a filter contains an architecture, it uses its default action for
 | |
| // syscalls which do not match rules in it, and its rules can match syscalls
 | |
| // for that ABI.
 | |
| // If a filter does not contain an architecture, all syscalls made to that
 | |
| // kernel ABI will fail with the filter's default Bad Architecture Action
 | |
| // (by default, killing the process).
 | |
| // Accepts an architecture constant.
 | |
| // Returns true if the architecture is present in the filter, false otherwise,
 | |
| // and an error on an invalid filter context, architecture constant, or an
 | |
| // issue with the call to libseccomp.
 | |
| func (f *ScmpFilter) IsArchPresent(arch ScmpArch) (bool, error) {
 | |
| 	f.lock.Lock()
 | |
| 	defer f.lock.Unlock()
 | |
| 
 | |
| 	if err := sanitizeArch(arch); err != nil {
 | |
| 		return false, err
 | |
| 	} else if !f.valid {
 | |
| 		return false, errBadFilter
 | |
| 	}
 | |
| 
 | |
| 	retCode := C.seccomp_arch_exist(f.filterCtx, arch.toNative())
 | |
| 	if syscall.Errno(-1*retCode) == syscall.EEXIST {
 | |
| 		// -EEXIST is "arch not present"
 | |
| 		return false, nil
 | |
| 	} else if retCode != 0 {
 | |
| 		return false, syscall.Errno(-1 * retCode)
 | |
| 	}
 | |
| 
 | |
| 	return true, nil
 | |
| }
 | |
| 
 | |
| // AddArch adds an architecture to the filter.
 | |
| // Accepts an architecture constant.
 | |
| // Returns an error on invalid filter context or architecture token, or an
 | |
| // issue with the call to libseccomp.
 | |
| func (f *ScmpFilter) AddArch(arch ScmpArch) error {
 | |
| 	f.lock.Lock()
 | |
| 	defer f.lock.Unlock()
 | |
| 
 | |
| 	if err := sanitizeArch(arch); err != nil {
 | |
| 		return err
 | |
| 	} else if !f.valid {
 | |
| 		return errBadFilter
 | |
| 	}
 | |
| 
 | |
| 	// Libseccomp returns -EEXIST if the specified architecture is already
 | |
| 	// present. Succeed silently in this case, as it's not fatal, and the
 | |
| 	// architecture is present already.
 | |
| 	retCode := C.seccomp_arch_add(f.filterCtx, arch.toNative())
 | |
| 	if retCode != 0 && syscall.Errno(-1*retCode) != syscall.EEXIST {
 | |
| 		return syscall.Errno(-1 * retCode)
 | |
| 	}
 | |
| 
 | |
| 	return nil
 | |
| }
 | |
| 
 | |
| // RemoveArch removes an architecture from the filter.
 | |
| // Accepts an architecture constant.
 | |
| // Returns an error on invalid filter context or architecture token, or an
 | |
| // issue with the call to libseccomp.
 | |
| func (f *ScmpFilter) RemoveArch(arch ScmpArch) error {
 | |
| 	f.lock.Lock()
 | |
| 	defer f.lock.Unlock()
 | |
| 
 | |
| 	if err := sanitizeArch(arch); err != nil {
 | |
| 		return err
 | |
| 	} else if !f.valid {
 | |
| 		return errBadFilter
 | |
| 	}
 | |
| 
 | |
| 	// Similar to AddArch, -EEXIST is returned if the arch is not present
 | |
| 	// Succeed silently in that case, this is not fatal and the architecture
 | |
| 	// is not present in the filter after RemoveArch
 | |
| 	retCode := C.seccomp_arch_remove(f.filterCtx, arch.toNative())
 | |
| 	if retCode != 0 && syscall.Errno(-1*retCode) != syscall.EEXIST {
 | |
| 		return syscall.Errno(-1 * retCode)
 | |
| 	}
 | |
| 
 | |
| 	return nil
 | |
| }
 | |
| 
 | |
| // Load loads a filter context into the kernel.
 | |
| // Returns an error if the filter context is invalid or the syscall failed.
 | |
| func (f *ScmpFilter) Load() error {
 | |
| 	f.lock.Lock()
 | |
| 	defer f.lock.Unlock()
 | |
| 
 | |
| 	if !f.valid {
 | |
| 		return errBadFilter
 | |
| 	}
 | |
| 
 | |
| 	if retCode := C.seccomp_load(f.filterCtx); retCode != 0 {
 | |
| 		return syscall.Errno(-1 * retCode)
 | |
| 	}
 | |
| 
 | |
| 	return nil
 | |
| }
 | |
| 
 | |
| // GetDefaultAction returns the default action taken on a syscall which does not
 | |
| // match a rule in the filter, or an error if an issue was encountered
 | |
| // retrieving the value.
 | |
| func (f *ScmpFilter) GetDefaultAction() (ScmpAction, error) {
 | |
| 	action, err := f.getFilterAttr(filterAttrActDefault)
 | |
| 	if err != nil {
 | |
| 		return 0x0, err
 | |
| 	}
 | |
| 
 | |
| 	return actionFromNative(action)
 | |
| }
 | |
| 
 | |
| // GetBadArchAction returns the default action taken on a syscall for an
 | |
| // architecture not in the filter, or an error if an issue was encountered
 | |
| // retrieving the value.
 | |
| func (f *ScmpFilter) GetBadArchAction() (ScmpAction, error) {
 | |
| 	action, err := f.getFilterAttr(filterAttrActBadArch)
 | |
| 	if err != nil {
 | |
| 		return 0x0, err
 | |
| 	}
 | |
| 
 | |
| 	return actionFromNative(action)
 | |
| }
 | |
| 
 | |
| // GetNoNewPrivsBit returns the current state the No New Privileges bit will be set
 | |
| // to on the filter being loaded, or an error if an issue was encountered
 | |
| // retrieving the value.
 | |
| // The No New Privileges bit tells the kernel that new processes run with exec()
 | |
| // cannot gain more privileges than the process that ran exec().
 | |
| // For example, a process with No New Privileges set would be unable to exec
 | |
| // setuid/setgid executables.
 | |
| func (f *ScmpFilter) GetNoNewPrivsBit() (bool, error) {
 | |
| 	noNewPrivs, err := f.getFilterAttr(filterAttrNNP)
 | |
| 	if err != nil {
 | |
| 		return false, err
 | |
| 	}
 | |
| 
 | |
| 	if noNewPrivs == 0 {
 | |
| 		return false, nil
 | |
| 	}
 | |
| 
 | |
| 	return true, nil
 | |
| }
 | |
| 
 | |
| // GetLogBit returns the current state the Log bit will be set to on the filter
 | |
| // being loaded, or an error if an issue was encountered retrieving the value.
 | |
| // The Log bit tells the kernel that all actions taken by the filter, with the
 | |
| // exception of ActAllow, should be logged.
 | |
| // The Log bit is only usable when libseccomp API level 3 or higher is
 | |
| // supported.
 | |
| func (f *ScmpFilter) GetLogBit() (bool, error) {
 | |
| 	log, err := f.getFilterAttr(filterAttrLog)
 | |
| 	if err != nil {
 | |
| 		api, apiErr := getApi()
 | |
| 		if (apiErr != nil && api == 0) || (apiErr == nil && api < 3) {
 | |
| 			return false, fmt.Errorf("getting the log bit is only supported in libseccomp 2.4.0 and newer with API level 3 or higher")
 | |
| 		}
 | |
| 
 | |
| 		return false, err
 | |
| 	}
 | |
| 
 | |
| 	if log == 0 {
 | |
| 		return false, nil
 | |
| 	}
 | |
| 
 | |
| 	return true, nil
 | |
| }
 | |
| 
 | |
| // SetBadArchAction sets the default action taken on a syscall for an
 | |
| // architecture not in the filter, or an error if an issue was encountered
 | |
| // setting the value.
 | |
| func (f *ScmpFilter) SetBadArchAction(action ScmpAction) error {
 | |
| 	if err := sanitizeAction(action); err != nil {
 | |
| 		return err
 | |
| 	}
 | |
| 
 | |
| 	return f.setFilterAttr(filterAttrActBadArch, action.toNative())
 | |
| }
 | |
| 
 | |
| // SetNoNewPrivsBit sets the state of the No New Privileges bit, which will be
 | |
| // applied on filter load, or an error if an issue was encountered setting the
 | |
| // value.
 | |
| // Filters with No New Privileges set to 0 can only be loaded if the process
 | |
| // has the CAP_SYS_ADMIN capability.
 | |
| func (f *ScmpFilter) SetNoNewPrivsBit(state bool) error {
 | |
| 	var toSet C.uint32_t = 0x0
 | |
| 
 | |
| 	if state {
 | |
| 		toSet = 0x1
 | |
| 	}
 | |
| 
 | |
| 	return f.setFilterAttr(filterAttrNNP, toSet)
 | |
| }
 | |
| 
 | |
| // SetLogBit sets the state of the Log bit, which will be applied on filter
 | |
| // load, or an error if an issue was encountered setting the value.
 | |
| // The Log bit is only usable when libseccomp API level 3 or higher is
 | |
| // supported.
 | |
| func (f *ScmpFilter) SetLogBit(state bool) error {
 | |
| 	var toSet C.uint32_t = 0x0
 | |
| 
 | |
| 	if state {
 | |
| 		toSet = 0x1
 | |
| 	}
 | |
| 
 | |
| 	err := f.setFilterAttr(filterAttrLog, toSet)
 | |
| 	if err != nil {
 | |
| 		api, apiErr := getApi()
 | |
| 		if (apiErr != nil && api == 0) || (apiErr == nil && api < 3) {
 | |
| 			return fmt.Errorf("setting the log bit is only supported in libseccomp 2.4.0 and newer with API level 3 or higher")
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	return err
 | |
| }
 | |
| 
 | |
| // SetSyscallPriority sets a syscall's priority.
 | |
| // This provides a hint to the filter generator in libseccomp about the
 | |
| // importance of this syscall. High-priority syscalls are placed
 | |
| // first in the filter code, and incur less overhead (at the expense of
 | |
| // lower-priority syscalls).
 | |
| func (f *ScmpFilter) SetSyscallPriority(call ScmpSyscall, priority uint8) error {
 | |
| 	f.lock.Lock()
 | |
| 	defer f.lock.Unlock()
 | |
| 
 | |
| 	if !f.valid {
 | |
| 		return errBadFilter
 | |
| 	}
 | |
| 
 | |
| 	if retCode := C.seccomp_syscall_priority(f.filterCtx, C.int(call),
 | |
| 		C.uint8_t(priority)); retCode != 0 {
 | |
| 		return syscall.Errno(-1 * retCode)
 | |
| 	}
 | |
| 
 | |
| 	return nil
 | |
| }
 | |
| 
 | |
| // AddRule adds a single rule for an unconditional action on a syscall.
 | |
| // Accepts the number of the syscall and the action to be taken on the call
 | |
| // being made.
 | |
| // Returns an error if an issue was encountered adding the rule.
 | |
| func (f *ScmpFilter) AddRule(call ScmpSyscall, action ScmpAction) error {
 | |
| 	return f.addRuleGeneric(call, action, false, nil)
 | |
| }
 | |
| 
 | |
| // AddRuleExact adds a single rule for an unconditional action on a syscall.
 | |
| // Accepts the number of the syscall and the action to be taken on the call
 | |
| // being made.
 | |
| // No modifications will be made to the rule, and it will fail to add if it
 | |
| // cannot be applied to the current architecture without modification.
 | |
| // The rule will function exactly as described, but it may not function identically
 | |
| // (or be able to be applied to) all architectures.
 | |
| // Returns an error if an issue was encountered adding the rule.
 | |
| func (f *ScmpFilter) AddRuleExact(call ScmpSyscall, action ScmpAction) error {
 | |
| 	return f.addRuleGeneric(call, action, true, nil)
 | |
| }
 | |
| 
 | |
| // AddRuleConditional adds a single rule for a conditional action on a syscall.
 | |
| // Returns an error if an issue was encountered adding the rule.
 | |
| // All conditions must match for the rule to match.
 | |
| // There is a bug in library versions below v2.2.1 which can, in some cases,
 | |
| // cause conditions to be lost when more than one are used. Consequently,
 | |
| // AddRuleConditional is disabled on library versions lower than v2.2.1
 | |
| func (f *ScmpFilter) AddRuleConditional(call ScmpSyscall, action ScmpAction, conds []ScmpCondition) error {
 | |
| 	return f.addRuleGeneric(call, action, false, conds)
 | |
| }
 | |
| 
 | |
| // AddRuleConditionalExact adds a single rule for a conditional action on a
 | |
| // syscall.
 | |
| // No modifications will be made to the rule, and it will fail to add if it
 | |
| // cannot be applied to the current architecture without modification.
 | |
| // The rule will function exactly as described, but it may not function identically
 | |
| // (or be able to be applied to) all architectures.
 | |
| // Returns an error if an issue was encountered adding the rule.
 | |
| // There is a bug in library versions below v2.2.1 which can, in some cases,
 | |
| // cause conditions to be lost when more than one are used. Consequently,
 | |
| // AddRuleConditionalExact is disabled on library versions lower than v2.2.1
 | |
| func (f *ScmpFilter) AddRuleConditionalExact(call ScmpSyscall, action ScmpAction, conds []ScmpCondition) error {
 | |
| 	return f.addRuleGeneric(call, action, true, conds)
 | |
| }
 | |
| 
 | |
| // ExportPFC output PFC-formatted, human-readable dump of a filter context's
 | |
| // rules to a file.
 | |
| // Accepts file to write to (must be open for writing).
 | |
| // Returns an error if writing to the file fails.
 | |
| func (f *ScmpFilter) ExportPFC(file *os.File) error {
 | |
| 	f.lock.Lock()
 | |
| 	defer f.lock.Unlock()
 | |
| 
 | |
| 	fd := file.Fd()
 | |
| 
 | |
| 	if !f.valid {
 | |
| 		return errBadFilter
 | |
| 	}
 | |
| 
 | |
| 	if retCode := C.seccomp_export_pfc(f.filterCtx, C.int(fd)); retCode != 0 {
 | |
| 		return syscall.Errno(-1 * retCode)
 | |
| 	}
 | |
| 
 | |
| 	return nil
 | |
| }
 | |
| 
 | |
| // ExportBPF outputs Berkeley Packet Filter-formatted, kernel-readable dump of a
 | |
| // filter context's rules to a file.
 | |
| // Accepts file to write to (must be open for writing).
 | |
| // Returns an error if writing to the file fails.
 | |
| func (f *ScmpFilter) ExportBPF(file *os.File) error {
 | |
| 	f.lock.Lock()
 | |
| 	defer f.lock.Unlock()
 | |
| 
 | |
| 	fd := file.Fd()
 | |
| 
 | |
| 	if !f.valid {
 | |
| 		return errBadFilter
 | |
| 	}
 | |
| 
 | |
| 	if retCode := C.seccomp_export_bpf(f.filterCtx, C.int(fd)); retCode != 0 {
 | |
| 		return syscall.Errno(-1 * retCode)
 | |
| 	}
 | |
| 
 | |
| 	return nil
 | |
| }
 |