collect ephemeral storage capacity on initialization

This commit is contained in:
David Ashpole 2018-02-15 17:33:22 -08:00
parent 271c267fff
commit b259543985
4 changed files with 55 additions and 46 deletions

View File

@ -533,6 +533,14 @@ func (cm *containerManagerImpl) Start(node *v1.Node,
// allocatable of the node
cm.nodeInfo = node
rootfs, err := cm.cadvisorInterface.RootFsInfo()
if err != nil {
return fmt.Errorf("failed to get rootfs info: %v", err)
}
for rName, rCap := range cadvisor.EphemeralStorageCapacityFromFsInfo(rootfs) {
cm.capacity[rName] = rCap
}
// Ensure that node allocatable configuration is valid.
if err := cm.validateNodeAllocatable(); err != nil {
return err
@ -575,36 +583,11 @@ func (cm *containerManagerImpl) Start(node *v1.Node,
}, 5*time.Minute, wait.NeverStop)
}
// Local storage filesystem information from `RootFsInfo` and `ImagesFsInfo` is available at a later time
// depending on the time when cadvisor manager updates container stats. Therefore use a go routine to keep
// retrieving the information until it is available.
stopChan := make(chan struct{})
go wait.Until(func() {
if err := cm.setFsCapacity(); err != nil {
glog.Errorf("[ContainerManager]: %v", err)
return
}
close(stopChan)
}, time.Second, stopChan)
// Starts device manager.
if err := cm.deviceManager.Start(devicemanager.ActivePodsFunc(activePods), sourcesReady); err != nil {
return err
}
return nil
}
func (cm *containerManagerImpl) setFsCapacity() error {
rootfs, err := cm.cadvisorInterface.RootFsInfo()
if err != nil {
return fmt.Errorf("Fail to get rootfs information %v", err)
}
cm.Lock()
for rName, rCap := range cadvisor.EphemeralStorageCapacityFromFsInfo(rootfs) {
cm.capacity[rName] = rCap
}
cm.Unlock()
return nil
}
@ -884,8 +867,6 @@ func getDockerAPIVersion(cadvisor cadvisor.Interface) *utilversion.Version {
}
func (cm *containerManagerImpl) GetCapacity() v1.ResourceList {
cm.RLock()
defer cm.RUnlock()
return cm.capacity
}

View File

@ -1293,16 +1293,6 @@ func (kl *Kubelet) initializeModules() error {
kl.serverCertificateManager.Start()
}
// Start container manager.
node, err := kl.getNodeAnyWay()
if err != nil {
return fmt.Errorf("Kubelet failed to get node info: %v", err)
}
if err := kl.containerManager.Start(node, kl.GetActivePods, kl.sourcesReady, kl.statusManager, kl.runtimeService); err != nil {
return fmt.Errorf("Failed to start ContainerManager %v", err)
}
// Start out of memory watcher.
if err := kl.oomWatcher.Start(kl.nodeRef); err != nil {
return fmt.Errorf("Failed to start OOM watcher %v", err)
@ -1328,6 +1318,21 @@ func (kl *Kubelet) initializeRuntimeDependentModules() {
}
// eviction manager must start after cadvisor because it needs to know if the container runtime has a dedicated imagefs
kl.evictionManager.Start(kl.StatsProvider, kl.GetActivePods, kl.podResourcesAreReclaimed, kl.containerManager, evictionMonitoringPeriod)
// trigger on-demand stats collection once so that we have capacity information for ephemeral storage.
// ignore any errors, since if stats collection is not successful, the container manager will fail to start below.
kl.StatsProvider.GetCgroupStats("/", true)
// Start container manager.
node, err := kl.getNodeAnyWay()
if err != nil {
// Fail kubelet and rely on the babysitter to retry starting kubelet.
glog.Fatalf("Kubelet failed to get node info: %v", err)
}
// containerManager must start after cAdvisor because it needs filesystem capacity information
if err := kl.containerManager.Start(node, kl.GetActivePods, kl.sourcesReady, kl.statusManager, kl.runtimeService); err != nil {
// Fail kubelet and rely on the babysitter to retry starting kubelet.
glog.Fatalf("Failed to start ContainerManager %v", err)
}
}
// Run starts the kubelet reacting to config updates

View File

@ -735,17 +735,28 @@ func (kl *Kubelet) setNodeReadyCondition(node *v1.Node) {
// This is due to an issue with version skewed kubelet and master components.
// ref: https://github.com/kubernetes/kubernetes/issues/16961
currentTime := metav1.NewTime(kl.clock.Now())
var newNodeReadyCondition v1.NodeCondition
newNodeReadyCondition := v1.NodeCondition{
Type: v1.NodeReady,
Status: v1.ConditionTrue,
Reason: "KubeletReady",
Message: "kubelet is posting ready status",
LastHeartbeatTime: currentTime,
}
rs := append(kl.runtimeState.runtimeErrors(), kl.runtimeState.networkErrors()...)
if len(rs) == 0 {
newNodeReadyCondition = v1.NodeCondition{
Type: v1.NodeReady,
Status: v1.ConditionTrue,
Reason: "KubeletReady",
Message: "kubelet is posting ready status",
LastHeartbeatTime: currentTime,
requiredCapacities := []v1.ResourceName{v1.ResourceCPU, v1.ResourceMemory, v1.ResourcePods}
if utilfeature.DefaultFeatureGate.Enabled(features.LocalStorageCapacityIsolation) {
requiredCapacities = append(requiredCapacities, v1.ResourceEphemeralStorage)
}
missingCapacities := []string{}
for _, resource := range requiredCapacities {
if _, found := node.Status.Capacity[resource]; !found {
missingCapacities = append(missingCapacities, string(resource))
}
} else {
}
if len(missingCapacities) > 0 {
rs = append(rs, fmt.Sprintf("Missing node capacity for resources: %s", strings.Join(missingCapacities, ", ")))
}
if len(rs) > 0 {
newNodeReadyCondition = v1.NodeCondition{
Type: v1.NodeReady,
Status: v1.ConditionFalse,

View File

@ -249,6 +249,9 @@ func TestUpdateNewNodeStatus(t *testing.T) {
ContainerOsVersion: "Debian GNU/Linux 7 (wheezy)",
}
mockCadvisor.On("VersionInfo").Return(versionInfo, nil)
maxAge := 0 * time.Second
options := cadvisorapiv2.RequestOptions{IdType: cadvisorapiv2.TypeName, Count: 2, Recursive: false, MaxAge: &maxAge}
mockCadvisor.On("ContainerInfoV2", "/", options).Return(map[string]cadvisorapiv2.ContainerInfo{}, nil)
expectedNode := &v1.Node{
ObjectMeta: metav1.ObjectMeta{Name: testKubeletHostname},
@ -424,6 +427,9 @@ func TestUpdateExistingNodeStatus(t *testing.T) {
ContainerOsVersion: "Debian GNU/Linux 7 (wheezy)",
}
mockCadvisor.On("VersionInfo").Return(versionInfo, nil)
maxAge := 0 * time.Second
options := cadvisorapiv2.RequestOptions{IdType: cadvisorapiv2.TypeName, Count: 2, Recursive: false, MaxAge: &maxAge}
mockCadvisor.On("ContainerInfoV2", "/", options).Return(map[string]cadvisorapiv2.ContainerInfo{}, nil)
expectedNode := &v1.Node{
ObjectMeta: metav1.ObjectMeta{Name: testKubeletHostname},
@ -620,6 +626,9 @@ func TestUpdateNodeStatusWithRuntimeStateError(t *testing.T) {
ContainerOsVersion: "Debian GNU/Linux 7 (wheezy)",
}
mockCadvisor.On("VersionInfo").Return(versionInfo, nil)
maxAge := 0 * time.Second
options := cadvisorapiv2.RequestOptions{IdType: cadvisorapiv2.TypeName, Count: 2, Recursive: false, MaxAge: &maxAge}
mockCadvisor.On("ContainerInfoV2", "/", options).Return(map[string]cadvisorapiv2.ContainerInfo{}, nil)
expectedNode := &v1.Node{
ObjectMeta: metav1.ObjectMeta{Name: testKubeletHostname},
@ -1089,6 +1098,9 @@ func TestUpdateNewNodeStatusTooLargeReservation(t *testing.T) {
ContainerOsVersion: "Debian GNU/Linux 7 (wheezy)",
}
mockCadvisor.On("VersionInfo").Return(versionInfo, nil)
maxAge := 0 * time.Second
options := cadvisorapiv2.RequestOptions{IdType: cadvisorapiv2.TypeName, Count: 2, Recursive: false, MaxAge: &maxAge}
mockCadvisor.On("ContainerInfoV2", "/", options).Return(map[string]cadvisorapiv2.ContainerInfo{}, nil)
expectedNode := &v1.Node{
ObjectMeta: metav1.ObjectMeta{Name: testKubeletHostname},