Merge pull request #116377 from kinvolk/rata/userns

KEP-127: user namespace support for stateless pods
This commit is contained in:
Kubernetes Prow Robot 2023-03-14 10:40:43 -07:00 committed by GitHub
commit 6a111bebe2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
15 changed files with 600 additions and 705 deletions

View File

@ -816,10 +816,6 @@ func (adc *attachDetachController) GetPodVolumeDir(podUID types.UID, pluginName,
return ""
}
func (adc *attachDetachController) GetHostIDsForPod(pod *v1.Pod, containerUID, containerGID *int64) (hostUID, hostGID *int64, err error) {
return nil, nil, nil
}
func (adc *attachDetachController) GetPodPluginDir(podUID types.UID, pluginName string) string {
return ""
}

View File

@ -393,10 +393,6 @@ func (expc *expandController) GetPodsDir() string {
return ""
}
func (expc *expandController) GetHostIDsForPod(pod *v1.Pod, containerUID, containerGID *int64) (hostUID, hostGID *int64, err error) {
return nil, nil, nil
}
func (expc *expandController) GetPodVolumeDir(podUID types.UID, pluginName string, volumeName string) string {
return ""
}

View File

@ -55,10 +55,6 @@ func (ctrl *PersistentVolumeController) GetPodVolumeDir(podUID types.UID, plugin
return ""
}
func (ctrl *PersistentVolumeController) GetHostIDsForPod(pod *v1.Pod, containerUID, containerGID *int64) (hostUID, hostGID *int64, err error) {
return nil, nil, nil
}
func (ctrl *PersistentVolumeController) GetPodPluginDir(podUID types.UID, pluginName string) string {
return ""
}

View File

@ -110,6 +110,7 @@ import (
"k8s.io/kubernetes/pkg/kubelet/sysctl"
"k8s.io/kubernetes/pkg/kubelet/token"
kubetypes "k8s.io/kubernetes/pkg/kubelet/types"
"k8s.io/kubernetes/pkg/kubelet/userns"
"k8s.io/kubernetes/pkg/kubelet/util"
"k8s.io/kubernetes/pkg/kubelet/util/manager"
"k8s.io/kubernetes/pkg/kubelet/util/queue"
@ -908,7 +909,7 @@ func NewMainKubelet(kubeCfg *kubeletconfiginternal.KubeletConfiguration,
StateDirectory: rootDirectory,
})
klet.shutdownManager = shutdownManager
klet.usernsManager, err = MakeUserNsManager(klet)
klet.usernsManager, err = userns.MakeUserNsManager(klet)
if err != nil {
return nil, err
}
@ -1256,7 +1257,7 @@ type Kubelet struct {
shutdownManager nodeshutdown.Manager
// Manage user namespaces
usernsManager *usernsManager
usernsManager *userns.UsernsManager
// Mutex to serialize new pod admission and existing pod resizing
podResizeMutex sync.Mutex

View File

@ -104,6 +104,11 @@ func (kl *Kubelet) GetPodDir(podUID types.UID) string {
return kl.getPodDir(podUID)
}
// ListPodsFromDisk gets a list of pods that have data directories.
func (kl *Kubelet) ListPodsFromDisk() ([]types.UID, error) {
return kl.listPodsFromDisk()
}
// getPodDir returns the full path to the per-pod directory for the pod with
// the given UID.
func (kl *Kubelet) getPodDir(podUID types.UID) string {

View File

@ -426,10 +426,6 @@ func (kl *Kubelet) GetOrCreateUserNamespaceMappings(pod *v1.Pod) (*runtimeapi.Us
return kl.usernsManager.GetOrCreateUserNamespaceMappings(pod)
}
func (kl *Kubelet) getHostIDsForPod(pod *v1.Pod, containerUID, containerGID *int64) (hostUID, hostGID *int64, err error) {
return kl.usernsManager.getHostIDsForPod(pod, containerUID, containerGID)
}
// GeneratePodHostNameAndDomain creates a hostname and domain name for a pod,
// given that pod's spec and annotations or returns an error.
func (kl *Kubelet) GeneratePodHostNameAndDomain(pod *v1.Pod) (string, string, error) {

View File

@ -54,6 +54,15 @@ func (m *kubeGenericRuntimeManager) applyPlatformSpecificContainerConfig(config
return err
}
config.Linux = cl
if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.UserNamespacesStatelessPodsSupport) {
if cl.SecurityContext.NamespaceOptions.UsernsOptions != nil {
for _, mount := range config.Mounts {
mount.UidMappings = cl.SecurityContext.NamespaceOptions.UsernsOptions.Uids
mount.GidMappings = cl.SecurityContext.NamespaceOptions.UsernsOptions.Gids
}
}
}
return nil
}

View File

@ -14,7 +14,7 @@ See the License for the specific language governing permissions and
limitations under the License.
*/
package kubelet
package userns
import (
"encoding/json"
@ -49,11 +49,11 @@ const maxPods = 1024
const mapReInitializeThreshold = 1000
type userNsPodsManager interface {
getPodDir(podUID types.UID) string
listPodsFromDisk() ([]types.UID, error)
GetPodDir(podUID types.UID) string
ListPodsFromDisk() ([]types.UID, error)
}
type usernsManager struct {
type UsernsManager struct {
used *allocator.AllocationBitmap
usedBy map[types.UID]uint32 // Map pod.UID to range used
removed int
@ -86,8 +86,8 @@ const mappingsFile = "userns"
// writeMappingsToFile writes the specified user namespace configuration to the pod
// directory.
func (m *usernsManager) writeMappingsToFile(pod types.UID, userNs userNamespace) error {
dir := m.kl.getPodDir(pod)
func (m *UsernsManager) writeMappingsToFile(pod types.UID, userNs userNamespace) error {
dir := m.kl.GetPodDir(pod)
data, err := json.Marshal(userNs)
if err != nil {
@ -119,8 +119,8 @@ func (m *usernsManager) writeMappingsToFile(pod types.UID, userNs userNamespace)
}
// readMappingsFromFile reads the user namespace configuration from the pod directory.
func (m *usernsManager) readMappingsFromFile(pod types.UID) ([]byte, error) {
dir := m.kl.getPodDir(pod)
func (m *UsernsManager) readMappingsFromFile(pod types.UID) ([]byte, error) {
dir := m.kl.GetPodDir(pod)
fstore, err := utilstore.NewFileStore(dir, &utilfs.DefaultFs{})
if err != nil {
return nil, err
@ -128,8 +128,8 @@ func (m *usernsManager) readMappingsFromFile(pod types.UID) ([]byte, error) {
return fstore.Read(mappingsFile)
}
func MakeUserNsManager(kl userNsPodsManager) (*usernsManager, error) {
m := usernsManager{
func MakeUserNsManager(kl userNsPodsManager) (*UsernsManager, error) {
m := UsernsManager{
// Create a bitArray for all the UID space (2^32).
// As a by product of that, no index param to bitArray can be out of bounds (index is uint32).
used: allocator.NewAllocationMap((math.MaxUint32+1)/userNsLength, "user namespaces"),
@ -141,17 +141,12 @@ func MakeUserNsManager(kl userNsPodsManager) (*usernsManager, error) {
return nil, err
}
// Second block will be used for phase II. Don't assign that range for now.
if _, err := m.used.Allocate(1); err != nil {
return nil, err
}
// do not bother reading the list of pods if user namespaces are not enabled.
if !utilfeature.DefaultFeatureGate.Enabled(features.UserNamespacesStatelessPodsSupport) {
return &m, nil
}
found, err := kl.listPodsFromDisk()
found, err := kl.ListPodsFromDisk()
if err != nil {
if os.IsNotExist(err) {
return &m, nil
@ -171,7 +166,7 @@ func MakeUserNsManager(kl userNsPodsManager) (*usernsManager, error) {
// recordPodMappings registers the range used for the user namespace if the
// usernsConfFile exists in the pod directory.
func (m *usernsManager) recordPodMappings(pod types.UID) error {
func (m *UsernsManager) recordPodMappings(pod types.UID) error {
content, err := m.readMappingsFromFile(pod)
if err != nil && err != utilstore.ErrKeyNotFound {
return err
@ -187,7 +182,7 @@ func (m *usernsManager) recordPodMappings(pod types.UID) error {
}
// isSet checks if the specified index is already set.
func (m *usernsManager) isSet(v uint32) bool {
func (m *UsernsManager) isSet(v uint32) bool {
index := int(v / userNsLength)
return m.used.Has(index)
}
@ -195,7 +190,7 @@ func (m *usernsManager) isSet(v uint32) bool {
// allocateOne finds a free user namespace and allocate it to the specified pod.
// The first return value is the first ID in the user namespace, the second returns
// the length for the user namespace range.
func (m *usernsManager) allocateOne(pod types.UID) (firstID uint32, length uint32, err error) {
func (m *UsernsManager) allocateOne(pod types.UID) (firstID uint32, length uint32, err error) {
if m.numAllocated >= maxPods {
return 0, 0, fmt.Errorf("limit on count of pods with user namespaces exceeded (limit is %v, current pods with userns: %v)", maxPods, m.numAllocated)
}
@ -222,7 +217,7 @@ func (m *usernsManager) allocateOne(pod types.UID) (firstID uint32, length uint3
}
// record stores the user namespace [from; from+length] to the specified pod.
func (m *usernsManager) record(pod types.UID, from, length uint32) (err error) {
func (m *UsernsManager) record(pod types.UID, from, length uint32) (err error) {
if length != userNsLength {
return fmt.Errorf("wrong user namespace length %v", length)
}
@ -262,7 +257,7 @@ func (m *usernsManager) record(pod types.UID, from, length uint32) (err error) {
}
// Release releases the user namespace allocated to the specified pod.
func (m *usernsManager) Release(podUID types.UID) {
func (m *UsernsManager) Release(podUID types.UID) {
if !utilfeature.DefaultFeatureGate.Enabled(features.UserNamespacesStatelessPodsSupport) {
return
}
@ -273,7 +268,7 @@ func (m *usernsManager) Release(podUID types.UID) {
m.releaseWithLock(podUID)
}
func (m *usernsManager) releaseWithLock(pod types.UID) {
func (m *UsernsManager) releaseWithLock(pod types.UID) {
v, ok := m.usedBy[pod]
if !ok {
klog.V(5).InfoS("pod user namespace allocation not present", "podUID", pod)
@ -285,7 +280,7 @@ func (m *usernsManager) releaseWithLock(pod types.UID) {
m.numAllocated--
m.removed++
_ = os.Remove(filepath.Join(m.kl.getPodDir(pod), mappingsFile))
_ = os.Remove(filepath.Join(m.kl.GetPodDir(pod), mappingsFile))
if m.removed%mapReInitializeThreshold == 0 {
n := make(map[types.UID]uint32)
@ -298,7 +293,7 @@ func (m *usernsManager) releaseWithLock(pod types.UID) {
m.used.Release(int(v / userNsLength))
}
func (m *usernsManager) parseUserNsFileAndRecord(pod types.UID, content []byte) (userNs userNamespace, err error) {
func (m *UsernsManager) parseUserNsFileAndRecord(pod types.UID, content []byte) (userNs userNamespace, err error) {
if err = json.Unmarshal([]byte(content), &userNs); err != nil {
err = fmt.Errorf("can't parse file: %w", err)
return
@ -338,7 +333,7 @@ func (m *usernsManager) parseUserNsFileAndRecord(pod types.UID, content []byte)
return
}
func (m *usernsManager) createUserNs(pod *v1.Pod) (userNs userNamespace, err error) {
func (m *UsernsManager) createUserNs(pod *v1.Pod) (userNs userNamespace, err error) {
firstID, length, err := m.allocateOne(pod.UID)
if err != nil {
return
@ -371,7 +366,7 @@ func (m *usernsManager) createUserNs(pod *v1.Pod) (userNs userNamespace, err err
}
// GetOrCreateUserNamespaceMappings returns the configuration for the sandbox user namespace
func (m *usernsManager) GetOrCreateUserNamespaceMappings(pod *v1.Pod) (*runtimeapi.UserNamespace, error) {
func (m *UsernsManager) GetOrCreateUserNamespaceMappings(pod *v1.Pod) (*runtimeapi.UserNamespace, error) {
if !utilfeature.DefaultFeatureGate.Enabled(features.UserNamespacesStatelessPodsSupport) {
return nil, nil
}
@ -431,7 +426,7 @@ func (m *usernsManager) GetOrCreateUserNamespaceMappings(pod *v1.Pod) (*runtimea
// CleanupOrphanedPodUsernsAllocations reconciliates the state of user namespace
// allocations with the pods actually running. It frees any user namespace
// allocation for orphaned pods.
func (m *usernsManager) CleanupOrphanedPodUsernsAllocations(pods []*v1.Pod, runningPods []*kubecontainer.Pod) error {
func (m *UsernsManager) CleanupOrphanedPodUsernsAllocations(pods []*v1.Pod, runningPods []*kubecontainer.Pod) error {
if !utilfeature.DefaultFeatureGate.Enabled(features.UserNamespacesStatelessPodsSupport) {
return nil
}
@ -448,7 +443,7 @@ func (m *usernsManager) CleanupOrphanedPodUsernsAllocations(pods []*v1.Pod, runn
}
allFound := sets.NewString()
found, err := m.kl.listPodsFromDisk()
found, err := m.kl.ListPodsFromDisk()
if err != nil {
return err
}
@ -479,68 +474,3 @@ func (m *usernsManager) CleanupOrphanedPodUsernsAllocations(pods []*v1.Pod, runn
return nil
}
// getHostIDsForPod if the pod uses user namespaces, takes the uid and gid
// inside the container and returns the host UID and GID those are mapped to on
// the host. If containerUID/containerGID is nil, then it returns the host
// UID/GID for ID 0 inside the container.
// If the pod is not using user namespaces, as there is no mapping needed, the
// same containerUID and containerGID params are returned.
func (m *usernsManager) getHostIDsForPod(pod *v1.Pod, containerUID, containerGID *int64) (hostUID, hostGID *int64, err error) {
if !utilfeature.DefaultFeatureGate.Enabled(features.UserNamespacesStatelessPodsSupport) {
return containerUID, containerGID, nil
}
if pod == nil || pod.Spec.HostUsers == nil || *pod.Spec.HostUsers == true {
return containerUID, containerGID, nil
}
mapping, err := m.GetOrCreateUserNamespaceMappings(pod)
if err != nil {
err = fmt.Errorf("Error getting pod user namespace mapping: %w", err)
return
}
uid, err := hostIDFromMapping(mapping.Uids, containerUID)
if err != nil {
err = fmt.Errorf("Error getting host UID: %w", err)
return
}
gid, err := hostIDFromMapping(mapping.Gids, containerGID)
if err != nil {
err = fmt.Errorf("Error getting host GID: %w", err)
return
}
return &uid, &gid, nil
}
func hostIDFromMapping(mapping []*runtimeapi.IDMapping, containerId *int64) (int64, error) {
if len(mapping) == 0 {
return 0, fmt.Errorf("can't use empty user namespace mapping")
}
// If none is requested, root inside the container is used
id := int64(0)
if containerId != nil {
id = *containerId
}
for _, m := range mapping {
if m == nil {
continue
}
firstId := int64(m.ContainerId)
lastId := firstId + int64(m.Length) - 1
// The id we are looking for is in the range
if id >= firstId && id <= lastId {
// Return the host id for this container id
return int64(m.HostId) + id - firstId, nil
}
}
return 0, fmt.Errorf("ID: %v not present in pod user namespace", id)
}

View File

@ -14,7 +14,7 @@ See the License for the specific language governing permissions and
limitations under the License.
*/
package kubelet
package userns
import (
"fmt"
@ -25,18 +25,17 @@ import (
"k8s.io/apimachinery/pkg/types"
utilfeature "k8s.io/apiserver/pkg/util/feature"
featuregatetesting "k8s.io/component-base/featuregate/testing"
runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1"
pkgfeatures "k8s.io/kubernetes/pkg/features"
)
type testUserNsPodsManager struct {
}
func (m *testUserNsPodsManager) getPodDir(podUID types.UID) string {
func (m *testUserNsPodsManager) GetPodDir(podUID types.UID) string {
return "/tmp/non-existant-dir.This-is-not-used-in-tests"
}
func (m *testUserNsPodsManager) listPodsFromDisk() ([]types.UID, error) {
func (m *testUserNsPodsManager) ListPodsFromDisk() ([]types.UID, error) {
return nil, nil
}
@ -47,8 +46,7 @@ func TestUserNsManagerAllocate(t *testing.T) {
m, err := MakeUserNsManager(testUserNsPodsManager)
require.NoError(t, err)
assert.Equal(t, true, m.isSet(0), "m.isSet(0) should be true")
assert.Equal(t, true, m.isSet(1), "m.isSet(1) should be true")
assert.Equal(t, true, m.isSet(0*65536), "m.isSet(0) should be true")
allocated, length, err := m.allocateOne("one")
assert.NoError(t, err)
@ -173,133 +171,3 @@ func TestUserNsManagerParseUserNsFile(t *testing.T) {
})
}
}
func TestUserNsManagerHostIDFromMapping(t *testing.T) {
// mapping []*runtimeapi.IDMapping, containerId *int64
cases := []struct {
name string
success bool
containerId int64 // -1 means a nil ptr will be used.
expHostId int64
m []*runtimeapi.IDMapping
}{
{
name: "one basic mapping",
success: true,
containerId: -1,
expHostId: 0,
m: []*runtimeapi.IDMapping{
{
HostId: 0,
ContainerId: 0,
Length: userNsLength,
},
},
},
{
name: "one unprivileged mapping",
success: true,
containerId: -1,
expHostId: userNsLength * 2,
m: []*runtimeapi.IDMapping{
{
HostId: userNsLength * 2,
ContainerId: 0,
Length: userNsLength,
},
},
},
{
name: "one unprivileged mapping random id",
success: true,
containerId: 3,
expHostId: userNsLength*2 + 3,
m: []*runtimeapi.IDMapping{
{
HostId: userNsLength * 2,
ContainerId: 0,
Length: userNsLength,
},
},
},
{
name: "two unprivileged mapping",
success: true,
containerId: 0,
expHostId: userNsLength*2 + 0,
m: []*runtimeapi.IDMapping{
{
HostId: userNsLength * 2,
ContainerId: 0,
Length: 1,
},
{
HostId: userNsLength*2 + 10,
ContainerId: 1,
Length: 1,
},
},
},
{
name: "two unprivileged mapping - random id",
success: true,
containerId: 1,
expHostId: userNsLength*2 + 10,
m: []*runtimeapi.IDMapping{
{
HostId: userNsLength * 2,
ContainerId: 0,
Length: 1,
},
{
HostId: userNsLength*2 + 10,
ContainerId: 1,
Length: 1,
},
},
},
{
name: "two unprivileged mapping - not mapped user",
success: false,
containerId: 3,
m: []*runtimeapi.IDMapping{
{
HostId: userNsLength * 2,
ContainerId: 0,
Length: 1,
},
{
HostId: userNsLength*2 + 1,
ContainerId: 1,
Length: 1,
},
},
},
{
name: "no mappings",
success: false,
},
}
for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
var containerId *int64
if tc.containerId != -1 {
containerId = &tc.containerId
}
id, err := hostIDFromMapping(tc.m, containerId)
if (tc.success && err != nil) || (!tc.success && err == nil) {
t.Fatalf("%v: expected success: %v - got error: %v", tc.name, tc.success, err)
}
if !tc.success && err != nil {
return
}
if id != tc.expHostId {
t.Errorf("expected: %v - got: %v", tc.expHostId, id)
}
})
}
}

View File

@ -128,16 +128,6 @@ func (kvh *kubeletVolumeHost) GetPodsDir() string {
return kvh.kubelet.getPodsDir()
}
// GetHostIDsForPod if the pod uses user namespaces, takes the uid and gid
// inside the container and returns the host UID and GID those are mapped to on
// the host. If containerUID/containerGID is nil, then it returns the host
// UID/GID for ID 0 inside the container.
// If the pod is not using user namespaces, as there is no mapping needed, the
// same containerUID and containerGID params are returned.
func (kvh *kubeletVolumeHost) GetHostIDsForPod(pod *v1.Pod, containerUID, containerGID *int64) (hostUID, hostGID *int64, err error) {
return kvh.kubelet.getHostIDsForPod(pod, containerUID, containerGID)
}
func (kvh *kubeletVolumeHost) GetPodVolumeDir(podUID types.UID, pluginName string, volumeName string) string {
dir := kvh.kubelet.getPodVolumeDir(podUID, pluginName, volumeName)
if runtime.GOOS == "windows" {

View File

@ -334,13 +334,6 @@ type KubeletVolumeHost interface {
WaitForCacheSync() error
// Returns hostutil.HostUtils
GetHostUtil() hostutil.HostUtils
// GetHostIDsForPod if the pod uses user namespaces, takes the uid and
// gid inside the container and returns the host UID and GID those are
// mapped to on the host. If containerUID/containerGID is nil, then it
// returns the host UID/GID for ID 0 inside the container.
// If the pod is not using user namespaces, as there is no mapping needed, the
// same containerUID and containerGID params are returned.
GetHostIDsForPod(pod *v1.Pod, containerUID, containerGID *int64) (hostUID, hostGID *int64, err error)
}
// AttachDetachVolumeHost is a AttachDetach Controller specific interface that plugins can use

View File

@ -123,10 +123,6 @@ func (f *fakeVolumeHost) GetPodsDir() string {
return filepath.Join(f.rootDir, "pods")
}
func (f *fakeVolumeHost) GetHostIDsForPod(pod *v1.Pod, containerUID, containerGID *int64) (hostUID, hostGID *int64, err error) {
return containerUID, containerGID, nil
}
func (f *fakeVolumeHost) GetPodVolumeDir(podUID types.UID, pluginName, volumeName string) string {
return filepath.Join(f.rootDir, "pods", string(podUID), "volumes", pluginName, volumeName)
}

View File

@ -682,35 +682,10 @@ func (og *operationGenerator) GenerateMountVolumeFunc(
resizeOptions.DeviceStagePath = deviceStagePath
}
// No mapping is needed for hostUID/hostGID if userns is not used.
// Therefore, just assign the container users to host UID/GID.
hostUID := util.FsUserFrom(volumeToMount.Pod)
hostGID := fsGroup
if utilfeature.DefaultFeatureGate.Enabled(features.UserNamespacesStatelessPodsSupport) {
// Without userns hostUID/GID was the user inside the container too.
containerUID, containerGID := hostUID, hostGID
kvh, ok := og.GetVolumePluginMgr().Host.(volume.KubeletVolumeHost)
if !ok {
msg := fmt.Errorf("volume host does not implement KubeletVolumeHost interface")
eventErr, detailedErr := volumeToMount.GenerateError("MountVolume type assertion error", msg)
return volumetypes.NewOperationContext(eventErr, detailedErr, migrated)
}
// This pod _might_ use userns. GetHostIDsForPod() will give us the right
// UID/GID to use for this pod (no matter if the pod uses userns or not).
hostUID, hostGID, err = kvh.GetHostIDsForPod(volumeToMount.Pod, containerUID, containerGID)
if err != nil {
msg := fmt.Sprintf("MountVolume.GetHostIDsForPod failed to find host ID in user namespace (UID: %v GID: %v)", containerUID, containerGID)
eventErr, detailedErr := volumeToMount.GenerateError(msg, err)
return volumetypes.NewOperationContext(eventErr, detailedErr, migrated)
}
}
// Execute mount
mountErr := volumeMounter.SetUp(volume.MounterArgs{
FsUser: hostUID,
FsGroup: hostGID,
FsUser: util.FsUserFrom(volumeToMount.Pod),
FsGroup: fsGroup,
DesiredSize: volumeToMount.DesiredSizeLimit,
FSGroupChangePolicy: fsGroupChangePolicy,
SELinuxLabel: volumeToMount.SELinuxLabel,

File diff suppressed because it is too large Load Diff

View File

@ -222,6 +222,10 @@ message Mount {
bool selinux_relabel = 4;
// Requested propagation mode.
MountPropagation propagation = 5;
// UidMappings specifies the runtime UID mappings for the mount.
repeated IDMapping uidMappings = 6;
// GidMappings specifies the runtime GID mappings for the mount.
repeated IDMapping gidMappings = 7;
}
// IDMapping describes host to container ID mappings for a pod sandbox.