mirror of
				https://github.com/k3s-io/kubernetes.git
				synced 2025-10-31 13:50:01 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			682 lines
		
	
	
		
			20 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
			
		
		
	
	
			682 lines
		
	
	
		
			20 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
| // Copyright 2014 Google Inc. All Rights Reserved.
 | |
| //
 | |
| // Licensed under the Apache License, Version 2.0 (the "License");
 | |
| // you may not use this file except in compliance with the License.
 | |
| // You may obtain a copy of the License at
 | |
| //
 | |
| //     http://www.apache.org/licenses/LICENSE-2.0
 | |
| //
 | |
| // Unless required by applicable law or agreed to in writing, software
 | |
| // distributed under the License is distributed on an "AS IS" BASIS,
 | |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | |
| // See the License for the specific language governing permissions and
 | |
| // limitations under the License.
 | |
| 
 | |
| package manager
 | |
| 
 | |
| import (
 | |
| 	"flag"
 | |
| 	"fmt"
 | |
| 	"io/ioutil"
 | |
| 	"math"
 | |
| 	"math/rand"
 | |
| 	"os/exec"
 | |
| 	"path"
 | |
| 	"regexp"
 | |
| 	"sort"
 | |
| 	"strconv"
 | |
| 	"strings"
 | |
| 	"sync"
 | |
| 	"time"
 | |
| 
 | |
| 	"github.com/google/cadvisor/accelerators"
 | |
| 	"github.com/google/cadvisor/cache/memory"
 | |
| 	"github.com/google/cadvisor/collector"
 | |
| 	"github.com/google/cadvisor/container"
 | |
| 	info "github.com/google/cadvisor/info/v1"
 | |
| 	"github.com/google/cadvisor/info/v2"
 | |
| 	"github.com/google/cadvisor/summary"
 | |
| 	"github.com/google/cadvisor/utils/cpuload"
 | |
| 
 | |
| 	units "github.com/docker/go-units"
 | |
| 	"k8s.io/klog"
 | |
| 	"k8s.io/utils/clock"
 | |
| )
 | |
| 
 | |
| // Housekeeping interval.
 | |
| var enableLoadReader = flag.Bool("enable_load_reader", false, "Whether to enable cpu load reader")
 | |
| var HousekeepingInterval = flag.Duration("housekeeping_interval", 1*time.Second, "Interval between container housekeepings")
 | |
| 
 | |
| // cgroup type chosen to fetch the cgroup path of a process.
 | |
| // Memory has been chosen, as it is one of the default cgroups that is enabled for most containers.
 | |
| var cgroupPathRegExp = regexp.MustCompile(`memory[^:]*:(.*?)[,;$]`)
 | |
| 
 | |
| type containerInfo struct {
 | |
| 	info.ContainerReference
 | |
| 	Subcontainers []info.ContainerReference
 | |
| 	Spec          info.ContainerSpec
 | |
| }
 | |
| 
 | |
| type containerData struct {
 | |
| 	handler                  container.ContainerHandler
 | |
| 	info                     containerInfo
 | |
| 	memoryCache              *memory.InMemoryCache
 | |
| 	lock                     sync.Mutex
 | |
| 	loadReader               cpuload.CpuLoadReader
 | |
| 	summaryReader            *summary.StatsSummary
 | |
| 	loadAvg                  float64 // smoothed load average seen so far.
 | |
| 	housekeepingInterval     time.Duration
 | |
| 	maxHousekeepingInterval  time.Duration
 | |
| 	allowDynamicHousekeeping bool
 | |
| 	infoLastUpdatedTime      time.Time
 | |
| 	statsLastUpdatedTime     time.Time
 | |
| 	lastErrorTime            time.Time
 | |
| 	//  used to track time
 | |
| 	clock clock.Clock
 | |
| 
 | |
| 	// Decay value used for load average smoothing. Interval length of 10 seconds is used.
 | |
| 	loadDecay float64
 | |
| 
 | |
| 	// Whether to log the usage of this container when it is updated.
 | |
| 	logUsage bool
 | |
| 
 | |
| 	// Tells the container to stop.
 | |
| 	stop chan bool
 | |
| 
 | |
| 	// Tells the container to immediately collect stats
 | |
| 	onDemandChan chan chan struct{}
 | |
| 
 | |
| 	// Runs custom metric collectors.
 | |
| 	collectorManager collector.CollectorManager
 | |
| 
 | |
| 	// nvidiaCollector updates stats for Nvidia GPUs attached to the container.
 | |
| 	nvidiaCollector accelerators.AcceleratorCollector
 | |
| }
 | |
| 
 | |
| // jitter returns a time.Duration between duration and duration + maxFactor * duration,
 | |
| // to allow clients to avoid converging on periodic behavior.  If maxFactor is 0.0, a
 | |
| // suggested default value will be chosen.
 | |
| func jitter(duration time.Duration, maxFactor float64) time.Duration {
 | |
| 	if maxFactor <= 0.0 {
 | |
| 		maxFactor = 1.0
 | |
| 	}
 | |
| 	wait := duration + time.Duration(rand.Float64()*maxFactor*float64(duration))
 | |
| 	return wait
 | |
| }
 | |
| 
 | |
| func (c *containerData) Start() error {
 | |
| 	go c.housekeeping()
 | |
| 	return nil
 | |
| }
 | |
| 
 | |
| func (c *containerData) Stop() error {
 | |
| 	err := c.memoryCache.RemoveContainer(c.info.Name)
 | |
| 	if err != nil {
 | |
| 		return err
 | |
| 	}
 | |
| 	c.stop <- true
 | |
| 	return nil
 | |
| }
 | |
| 
 | |
| func (c *containerData) allowErrorLogging() bool {
 | |
| 	if c.clock.Since(c.lastErrorTime) > time.Minute {
 | |
| 		c.lastErrorTime = c.clock.Now()
 | |
| 		return true
 | |
| 	}
 | |
| 	return false
 | |
| }
 | |
| 
 | |
| // OnDemandHousekeeping performs housekeeping on the container and blocks until it has completed.
 | |
| // It is designed to be used in conjunction with periodic housekeeping, and will cause the timer for
 | |
| // periodic housekeeping to reset.  This should be used sparingly, as calling OnDemandHousekeeping frequently
 | |
| // can have serious performance costs.
 | |
| func (c *containerData) OnDemandHousekeeping(maxAge time.Duration) {
 | |
| 	if c.clock.Since(c.statsLastUpdatedTime) > maxAge {
 | |
| 		housekeepingFinishedChan := make(chan struct{})
 | |
| 		c.onDemandChan <- housekeepingFinishedChan
 | |
| 		select {
 | |
| 		case <-c.stop:
 | |
| 		case <-housekeepingFinishedChan:
 | |
| 		}
 | |
| 	}
 | |
| }
 | |
| 
 | |
| // notifyOnDemand notifies all calls to OnDemandHousekeeping that housekeeping is finished
 | |
| func (c *containerData) notifyOnDemand() {
 | |
| 	for {
 | |
| 		select {
 | |
| 		case finishedChan := <-c.onDemandChan:
 | |
| 			close(finishedChan)
 | |
| 		default:
 | |
| 			return
 | |
| 		}
 | |
| 	}
 | |
| }
 | |
| 
 | |
| func (c *containerData) GetInfo(shouldUpdateSubcontainers bool) (*containerInfo, error) {
 | |
| 	// Get spec and subcontainers.
 | |
| 	if c.clock.Since(c.infoLastUpdatedTime) > 5*time.Second {
 | |
| 		err := c.updateSpec()
 | |
| 		if err != nil {
 | |
| 			return nil, err
 | |
| 		}
 | |
| 		if shouldUpdateSubcontainers {
 | |
| 			err = c.updateSubcontainers()
 | |
| 			if err != nil {
 | |
| 				return nil, err
 | |
| 			}
 | |
| 		}
 | |
| 		c.infoLastUpdatedTime = c.clock.Now()
 | |
| 	}
 | |
| 	// Make a copy of the info for the user.
 | |
| 	c.lock.Lock()
 | |
| 	defer c.lock.Unlock()
 | |
| 	return &c.info, nil
 | |
| }
 | |
| 
 | |
| func (c *containerData) DerivedStats() (v2.DerivedStats, error) {
 | |
| 	if c.summaryReader == nil {
 | |
| 		return v2.DerivedStats{}, fmt.Errorf("derived stats not enabled for container %q", c.info.Name)
 | |
| 	}
 | |
| 	return c.summaryReader.DerivedStats()
 | |
| }
 | |
| 
 | |
| func (c *containerData) getCgroupPath(cgroups string) (string, error) {
 | |
| 	if cgroups == "-" {
 | |
| 		return "/", nil
 | |
| 	}
 | |
| 	if strings.HasPrefix(cgroups, "0::") {
 | |
| 		return cgroups[3:], nil
 | |
| 	}
 | |
| 	matches := cgroupPathRegExp.FindSubmatch([]byte(cgroups))
 | |
| 	if len(matches) != 2 {
 | |
| 		klog.V(3).Infof("failed to get memory cgroup path from %q", cgroups)
 | |
| 		// return root in case of failures - memory hierarchy might not be enabled.
 | |
| 		return "/", nil
 | |
| 	}
 | |
| 	return string(matches[1]), nil
 | |
| }
 | |
| 
 | |
| // Returns contents of a file inside the container root.
 | |
| // Takes in a path relative to container root.
 | |
| func (c *containerData) ReadFile(filepath string, inHostNamespace bool) ([]byte, error) {
 | |
| 	pids, err := c.getContainerPids(inHostNamespace)
 | |
| 	if err != nil {
 | |
| 		return nil, err
 | |
| 	}
 | |
| 	// TODO(rjnagal): Optimize by just reading container's cgroup.proc file when in host namespace.
 | |
| 	rootfs := "/"
 | |
| 	if !inHostNamespace {
 | |
| 		rootfs = "/rootfs"
 | |
| 	}
 | |
| 	for _, pid := range pids {
 | |
| 		filePath := path.Join(rootfs, "/proc", pid, "/root", filepath)
 | |
| 		klog.V(3).Infof("Trying path %q", filePath)
 | |
| 		data, err := ioutil.ReadFile(filePath)
 | |
| 		if err == nil {
 | |
| 			return data, err
 | |
| 		}
 | |
| 	}
 | |
| 	// No process paths could be found. Declare config non-existent.
 | |
| 	return nil, fmt.Errorf("file %q does not exist.", filepath)
 | |
| }
 | |
| 
 | |
| // Return output for ps command in host /proc with specified format
 | |
| func (c *containerData) getPsOutput(inHostNamespace bool, format string) ([]byte, error) {
 | |
| 	args := []string{}
 | |
| 	command := "ps"
 | |
| 	if !inHostNamespace {
 | |
| 		command = "/usr/sbin/chroot"
 | |
| 		args = append(args, "/rootfs", "ps")
 | |
| 	}
 | |
| 	args = append(args, "-e", "-o", format)
 | |
| 	out, err := exec.Command(command, args...).Output()
 | |
| 	if err != nil {
 | |
| 		return nil, fmt.Errorf("failed to execute %q command: %v", command, err)
 | |
| 	}
 | |
| 	return out, err
 | |
| }
 | |
| 
 | |
| // Get pids of processes in this container.
 | |
| // A slightly lighterweight call than GetProcessList if other details are not required.
 | |
| func (c *containerData) getContainerPids(inHostNamespace bool) ([]string, error) {
 | |
| 	format := "pid,cgroup"
 | |
| 	out, err := c.getPsOutput(inHostNamespace, format)
 | |
| 	if err != nil {
 | |
| 		return nil, err
 | |
| 	}
 | |
| 	expectedFields := 2
 | |
| 	lines := strings.Split(string(out), "\n")
 | |
| 	pids := []string{}
 | |
| 	for _, line := range lines[1:] {
 | |
| 		if len(line) == 0 {
 | |
| 			continue
 | |
| 		}
 | |
| 		fields := strings.Fields(line)
 | |
| 		if len(fields) < expectedFields {
 | |
| 			return nil, fmt.Errorf("expected at least %d fields, found %d: output: %q", expectedFields, len(fields), line)
 | |
| 		}
 | |
| 		pid := fields[0]
 | |
| 		cgroup, err := c.getCgroupPath(fields[1])
 | |
| 		if err != nil {
 | |
| 			return nil, fmt.Errorf("could not parse cgroup path from %q: %v", fields[1], err)
 | |
| 		}
 | |
| 		if c.info.Name == cgroup {
 | |
| 			pids = append(pids, pid)
 | |
| 		}
 | |
| 	}
 | |
| 	return pids, nil
 | |
| }
 | |
| 
 | |
| func (c *containerData) GetProcessList(cadvisorContainer string, inHostNamespace bool) ([]v2.ProcessInfo, error) {
 | |
| 	// report all processes for root.
 | |
| 	isRoot := c.info.Name == "/"
 | |
| 	rootfs := "/"
 | |
| 	if !inHostNamespace {
 | |
| 		rootfs = "/rootfs"
 | |
| 	}
 | |
| 	format := "user,pid,ppid,stime,pcpu,pmem,rss,vsz,stat,time,comm,cgroup"
 | |
| 	out, err := c.getPsOutput(inHostNamespace, format)
 | |
| 	if err != nil {
 | |
| 		return nil, err
 | |
| 	}
 | |
| 	expectedFields := 12
 | |
| 	processes := []v2.ProcessInfo{}
 | |
| 	lines := strings.Split(string(out), "\n")
 | |
| 	for _, line := range lines[1:] {
 | |
| 		if len(line) == 0 {
 | |
| 			continue
 | |
| 		}
 | |
| 		fields := strings.Fields(line)
 | |
| 		if len(fields) < expectedFields {
 | |
| 			return nil, fmt.Errorf("expected at least %d fields, found %d: output: %q", expectedFields, len(fields), line)
 | |
| 		}
 | |
| 		pid, err := strconv.Atoi(fields[1])
 | |
| 		if err != nil {
 | |
| 			return nil, fmt.Errorf("invalid pid %q: %v", fields[1], err)
 | |
| 		}
 | |
| 		ppid, err := strconv.Atoi(fields[2])
 | |
| 		if err != nil {
 | |
| 			return nil, fmt.Errorf("invalid ppid %q: %v", fields[2], err)
 | |
| 		}
 | |
| 		percentCpu, err := strconv.ParseFloat(fields[4], 32)
 | |
| 		if err != nil {
 | |
| 			return nil, fmt.Errorf("invalid cpu percent %q: %v", fields[4], err)
 | |
| 		}
 | |
| 		percentMem, err := strconv.ParseFloat(fields[5], 32)
 | |
| 		if err != nil {
 | |
| 			return nil, fmt.Errorf("invalid memory percent %q: %v", fields[5], err)
 | |
| 		}
 | |
| 		rss, err := strconv.ParseUint(fields[6], 0, 64)
 | |
| 		if err != nil {
 | |
| 			return nil, fmt.Errorf("invalid rss %q: %v", fields[6], err)
 | |
| 		}
 | |
| 		// convert to bytes
 | |
| 		rss *= 1024
 | |
| 		vs, err := strconv.ParseUint(fields[7], 0, 64)
 | |
| 		if err != nil {
 | |
| 			return nil, fmt.Errorf("invalid virtual size %q: %v", fields[7], err)
 | |
| 		}
 | |
| 		// convert to bytes
 | |
| 		vs *= 1024
 | |
| 		cgroup, err := c.getCgroupPath(fields[11])
 | |
| 		if err != nil {
 | |
| 			return nil, fmt.Errorf("could not parse cgroup path from %q: %v", fields[11], err)
 | |
| 		}
 | |
| 		// Remove the ps command we just ran from cadvisor container.
 | |
| 		// Not necessary, but makes the cadvisor page look cleaner.
 | |
| 		if !inHostNamespace && cadvisorContainer == cgroup && fields[10] == "ps" {
 | |
| 			continue
 | |
| 		}
 | |
| 		var cgroupPath string
 | |
| 		if isRoot {
 | |
| 			cgroupPath = cgroup
 | |
| 		}
 | |
| 
 | |
| 		var fdCount int
 | |
| 		dirPath := path.Join(rootfs, "/proc", strconv.Itoa(pid), "fd")
 | |
| 		fds, err := ioutil.ReadDir(dirPath)
 | |
| 		if err != nil {
 | |
| 			klog.V(4).Infof("error while listing directory %q to measure fd count: %v", dirPath, err)
 | |
| 			continue
 | |
| 		}
 | |
| 		fdCount = len(fds)
 | |
| 
 | |
| 		if isRoot || c.info.Name == cgroup {
 | |
| 			processes = append(processes, v2.ProcessInfo{
 | |
| 				User:          fields[0],
 | |
| 				Pid:           pid,
 | |
| 				Ppid:          ppid,
 | |
| 				StartTime:     fields[3],
 | |
| 				PercentCpu:    float32(percentCpu),
 | |
| 				PercentMemory: float32(percentMem),
 | |
| 				RSS:           rss,
 | |
| 				VirtualSize:   vs,
 | |
| 				Status:        fields[8],
 | |
| 				RunningTime:   fields[9],
 | |
| 				Cmd:           fields[10],
 | |
| 				CgroupPath:    cgroupPath,
 | |
| 				FdCount:       fdCount,
 | |
| 			})
 | |
| 		}
 | |
| 	}
 | |
| 	return processes, nil
 | |
| }
 | |
| 
 | |
| func newContainerData(containerName string, memoryCache *memory.InMemoryCache, handler container.ContainerHandler, logUsage bool, collectorManager collector.CollectorManager, maxHousekeepingInterval time.Duration, allowDynamicHousekeeping bool, clock clock.Clock) (*containerData, error) {
 | |
| 	if memoryCache == nil {
 | |
| 		return nil, fmt.Errorf("nil memory storage")
 | |
| 	}
 | |
| 	if handler == nil {
 | |
| 		return nil, fmt.Errorf("nil container handler")
 | |
| 	}
 | |
| 	ref, err := handler.ContainerReference()
 | |
| 	if err != nil {
 | |
| 		return nil, err
 | |
| 	}
 | |
| 
 | |
| 	cont := &containerData{
 | |
| 		handler:                  handler,
 | |
| 		memoryCache:              memoryCache,
 | |
| 		housekeepingInterval:     *HousekeepingInterval,
 | |
| 		maxHousekeepingInterval:  maxHousekeepingInterval,
 | |
| 		allowDynamicHousekeeping: allowDynamicHousekeeping,
 | |
| 		logUsage:                 logUsage,
 | |
| 		loadAvg:                  -1.0, // negative value indicates uninitialized.
 | |
| 		stop:                     make(chan bool, 1),
 | |
| 		collectorManager:         collectorManager,
 | |
| 		onDemandChan:             make(chan chan struct{}, 100),
 | |
| 		clock:                    clock,
 | |
| 	}
 | |
| 	cont.info.ContainerReference = ref
 | |
| 
 | |
| 	cont.loadDecay = math.Exp(float64(-cont.housekeepingInterval.Seconds() / 10))
 | |
| 
 | |
| 	if *enableLoadReader {
 | |
| 		// Create cpu load reader.
 | |
| 		loadReader, err := cpuload.New()
 | |
| 		if err != nil {
 | |
| 			klog.Warningf("Could not initialize cpu load reader for %q: %s", ref.Name, err)
 | |
| 		} else {
 | |
| 			cont.loadReader = loadReader
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	err = cont.updateSpec()
 | |
| 	if err != nil {
 | |
| 		return nil, err
 | |
| 	}
 | |
| 	cont.summaryReader, err = summary.New(cont.info.Spec)
 | |
| 	if err != nil {
 | |
| 		cont.summaryReader = nil
 | |
| 		klog.Warningf("Failed to create summary reader for %q: %v", ref.Name, err)
 | |
| 	}
 | |
| 
 | |
| 	return cont, nil
 | |
| }
 | |
| 
 | |
| // Determine when the next housekeeping should occur.
 | |
| func (self *containerData) nextHousekeepingInterval() time.Duration {
 | |
| 	if self.allowDynamicHousekeeping {
 | |
| 		var empty time.Time
 | |
| 		stats, err := self.memoryCache.RecentStats(self.info.Name, empty, empty, 2)
 | |
| 		if err != nil {
 | |
| 			if self.allowErrorLogging() {
 | |
| 				klog.Warningf("Failed to get RecentStats(%q) while determining the next housekeeping: %v", self.info.Name, err)
 | |
| 			}
 | |
| 		} else if len(stats) == 2 {
 | |
| 			// TODO(vishnuk): Use no processes as a signal.
 | |
| 			// Raise the interval if usage hasn't changed in the last housekeeping.
 | |
| 			if stats[0].StatsEq(stats[1]) && (self.housekeepingInterval < self.maxHousekeepingInterval) {
 | |
| 				self.housekeepingInterval *= 2
 | |
| 				if self.housekeepingInterval > self.maxHousekeepingInterval {
 | |
| 					self.housekeepingInterval = self.maxHousekeepingInterval
 | |
| 				}
 | |
| 			} else if self.housekeepingInterval != *HousekeepingInterval {
 | |
| 				// Lower interval back to the baseline.
 | |
| 				self.housekeepingInterval = *HousekeepingInterval
 | |
| 			}
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	return jitter(self.housekeepingInterval, 1.0)
 | |
| }
 | |
| 
 | |
| // TODO(vmarmol): Implement stats collecting as a custom collector.
 | |
| func (c *containerData) housekeeping() {
 | |
| 	// Start any background goroutines - must be cleaned up in c.handler.Cleanup().
 | |
| 	c.handler.Start()
 | |
| 	defer c.handler.Cleanup()
 | |
| 
 | |
| 	// Initialize cpuload reader - must be cleaned up in c.loadReader.Stop()
 | |
| 	if c.loadReader != nil {
 | |
| 		err := c.loadReader.Start()
 | |
| 		if err != nil {
 | |
| 			klog.Warningf("Could not start cpu load stat collector for %q: %s", c.info.Name, err)
 | |
| 		}
 | |
| 		defer c.loadReader.Stop()
 | |
| 	}
 | |
| 
 | |
| 	// Long housekeeping is either 100ms or half of the housekeeping interval.
 | |
| 	longHousekeeping := 100 * time.Millisecond
 | |
| 	if *HousekeepingInterval/2 < longHousekeeping {
 | |
| 		longHousekeeping = *HousekeepingInterval / 2
 | |
| 	}
 | |
| 
 | |
| 	// Housekeep every second.
 | |
| 	klog.V(3).Infof("Start housekeeping for container %q\n", c.info.Name)
 | |
| 	houseKeepingTimer := c.clock.NewTimer(0 * time.Second)
 | |
| 	defer houseKeepingTimer.Stop()
 | |
| 	for {
 | |
| 		if !c.housekeepingTick(houseKeepingTimer.C(), longHousekeeping) {
 | |
| 			return
 | |
| 		}
 | |
| 		// Stop and drain the timer so that it is safe to reset it
 | |
| 		if !houseKeepingTimer.Stop() {
 | |
| 			select {
 | |
| 			case <-houseKeepingTimer.C():
 | |
| 			default:
 | |
| 			}
 | |
| 		}
 | |
| 		// Log usage if asked to do so.
 | |
| 		if c.logUsage {
 | |
| 			const numSamples = 60
 | |
| 			var empty time.Time
 | |
| 			stats, err := c.memoryCache.RecentStats(c.info.Name, empty, empty, numSamples)
 | |
| 			if err != nil {
 | |
| 				if c.allowErrorLogging() {
 | |
| 					klog.Warningf("[%s] Failed to get recent stats for logging usage: %v", c.info.Name, err)
 | |
| 				}
 | |
| 			} else if len(stats) < numSamples {
 | |
| 				// Ignore, not enough stats yet.
 | |
| 			} else {
 | |
| 				usageCpuNs := uint64(0)
 | |
| 				for i := range stats {
 | |
| 					if i > 0 {
 | |
| 						usageCpuNs += (stats[i].Cpu.Usage.Total - stats[i-1].Cpu.Usage.Total)
 | |
| 					}
 | |
| 				}
 | |
| 				usageMemory := stats[numSamples-1].Memory.Usage
 | |
| 
 | |
| 				instantUsageInCores := float64(stats[numSamples-1].Cpu.Usage.Total-stats[numSamples-2].Cpu.Usage.Total) / float64(stats[numSamples-1].Timestamp.Sub(stats[numSamples-2].Timestamp).Nanoseconds())
 | |
| 				usageInCores := float64(usageCpuNs) / float64(stats[numSamples-1].Timestamp.Sub(stats[0].Timestamp).Nanoseconds())
 | |
| 				usageInHuman := units.HumanSize(float64(usageMemory))
 | |
| 				// Don't set verbosity since this is already protected by the logUsage flag.
 | |
| 				klog.Infof("[%s] %.3f cores (average: %.3f cores), %s of memory", c.info.Name, instantUsageInCores, usageInCores, usageInHuman)
 | |
| 			}
 | |
| 		}
 | |
| 		houseKeepingTimer.Reset(c.nextHousekeepingInterval())
 | |
| 	}
 | |
| }
 | |
| 
 | |
| func (c *containerData) housekeepingTick(timer <-chan time.Time, longHousekeeping time.Duration) bool {
 | |
| 	select {
 | |
| 	case <-c.stop:
 | |
| 		// Stop housekeeping when signaled.
 | |
| 		return false
 | |
| 	case finishedChan := <-c.onDemandChan:
 | |
| 		// notify the calling function once housekeeping has completed
 | |
| 		defer close(finishedChan)
 | |
| 	case <-timer:
 | |
| 	}
 | |
| 	start := c.clock.Now()
 | |
| 	err := c.updateStats()
 | |
| 	if err != nil {
 | |
| 		if c.allowErrorLogging() {
 | |
| 			klog.Warningf("Failed to update stats for container \"%s\": %s", c.info.Name, err)
 | |
| 		}
 | |
| 	}
 | |
| 	// Log if housekeeping took too long.
 | |
| 	duration := c.clock.Since(start)
 | |
| 	if duration >= longHousekeeping {
 | |
| 		klog.V(3).Infof("[%s] Housekeeping took %s", c.info.Name, duration)
 | |
| 	}
 | |
| 	c.notifyOnDemand()
 | |
| 	c.statsLastUpdatedTime = c.clock.Now()
 | |
| 	return true
 | |
| }
 | |
| 
 | |
| func (c *containerData) updateSpec() error {
 | |
| 	spec, err := c.handler.GetSpec()
 | |
| 	if err != nil {
 | |
| 		// Ignore errors if the container is dead.
 | |
| 		if !c.handler.Exists() {
 | |
| 			return nil
 | |
| 		}
 | |
| 		return err
 | |
| 	}
 | |
| 
 | |
| 	customMetrics, err := c.collectorManager.GetSpec()
 | |
| 	if err != nil {
 | |
| 		return err
 | |
| 	}
 | |
| 	if len(customMetrics) > 0 {
 | |
| 		spec.HasCustomMetrics = true
 | |
| 		spec.CustomMetrics = customMetrics
 | |
| 	}
 | |
| 	c.lock.Lock()
 | |
| 	defer c.lock.Unlock()
 | |
| 	c.info.Spec = spec
 | |
| 	return nil
 | |
| }
 | |
| 
 | |
| // Calculate new smoothed load average using the new sample of runnable threads.
 | |
| // The decay used ensures that the load will stabilize on a new constant value within
 | |
| // 10 seconds.
 | |
| func (c *containerData) updateLoad(newLoad uint64) {
 | |
| 	if c.loadAvg < 0 {
 | |
| 		c.loadAvg = float64(newLoad) // initialize to the first seen sample for faster stabilization.
 | |
| 	} else {
 | |
| 		c.loadAvg = c.loadAvg*c.loadDecay + float64(newLoad)*(1.0-c.loadDecay)
 | |
| 	}
 | |
| }
 | |
| 
 | |
| func (c *containerData) updateStats() error {
 | |
| 	stats, statsErr := c.handler.GetStats()
 | |
| 	if statsErr != nil {
 | |
| 		// Ignore errors if the container is dead.
 | |
| 		if !c.handler.Exists() {
 | |
| 			return nil
 | |
| 		}
 | |
| 
 | |
| 		// Stats may be partially populated, push those before we return an error.
 | |
| 		statsErr = fmt.Errorf("%v, continuing to push stats", statsErr)
 | |
| 	}
 | |
| 	if stats == nil {
 | |
| 		return statsErr
 | |
| 	}
 | |
| 	if c.loadReader != nil {
 | |
| 		// TODO(vmarmol): Cache this path.
 | |
| 		path, err := c.handler.GetCgroupPath("cpu")
 | |
| 		if err == nil {
 | |
| 			loadStats, err := c.loadReader.GetCpuLoad(c.info.Name, path)
 | |
| 			if err != nil {
 | |
| 				return fmt.Errorf("failed to get load stat for %q - path %q, error %s", c.info.Name, path, err)
 | |
| 			}
 | |
| 			stats.TaskStats = loadStats
 | |
| 			c.updateLoad(loadStats.NrRunning)
 | |
| 			// convert to 'milliLoad' to avoid floats and preserve precision.
 | |
| 			stats.Cpu.LoadAverage = int32(c.loadAvg * 1000)
 | |
| 		}
 | |
| 	}
 | |
| 	if c.summaryReader != nil {
 | |
| 		err := c.summaryReader.AddSample(*stats)
 | |
| 		if err != nil {
 | |
| 			// Ignore summary errors for now.
 | |
| 			klog.V(2).Infof("Failed to add summary stats for %q: %v", c.info.Name, err)
 | |
| 		}
 | |
| 	}
 | |
| 	var customStatsErr error
 | |
| 	cm := c.collectorManager.(*collector.GenericCollectorManager)
 | |
| 	if len(cm.Collectors) > 0 {
 | |
| 		if cm.NextCollectionTime.Before(c.clock.Now()) {
 | |
| 			customStats, err := c.updateCustomStats()
 | |
| 			if customStats != nil {
 | |
| 				stats.CustomMetrics = customStats
 | |
| 			}
 | |
| 			if err != nil {
 | |
| 				customStatsErr = err
 | |
| 			}
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	var nvidiaStatsErr error
 | |
| 	if c.nvidiaCollector != nil {
 | |
| 		// This updates the Accelerators field of the stats struct
 | |
| 		nvidiaStatsErr = c.nvidiaCollector.UpdateStats(stats)
 | |
| 	}
 | |
| 
 | |
| 	ref, err := c.handler.ContainerReference()
 | |
| 	if err != nil {
 | |
| 		// Ignore errors if the container is dead.
 | |
| 		if !c.handler.Exists() {
 | |
| 			return nil
 | |
| 		}
 | |
| 		return err
 | |
| 	}
 | |
| 
 | |
| 	cInfo := info.ContainerInfo{
 | |
| 		ContainerReference: ref,
 | |
| 	}
 | |
| 
 | |
| 	err = c.memoryCache.AddStats(&cInfo, stats)
 | |
| 	if err != nil {
 | |
| 		return err
 | |
| 	}
 | |
| 	if statsErr != nil {
 | |
| 		return statsErr
 | |
| 	}
 | |
| 	if nvidiaStatsErr != nil {
 | |
| 		return nvidiaStatsErr
 | |
| 	}
 | |
| 	return customStatsErr
 | |
| }
 | |
| 
 | |
| func (c *containerData) updateCustomStats() (map[string][]info.MetricVal, error) {
 | |
| 	_, customStats, customStatsErr := c.collectorManager.Collect()
 | |
| 	if customStatsErr != nil {
 | |
| 		if !c.handler.Exists() {
 | |
| 			return customStats, nil
 | |
| 		}
 | |
| 		customStatsErr = fmt.Errorf("%v, continuing to push custom stats", customStatsErr)
 | |
| 	}
 | |
| 	return customStats, customStatsErr
 | |
| }
 | |
| 
 | |
| func (c *containerData) updateSubcontainers() error {
 | |
| 	var subcontainers info.ContainerReferenceSlice
 | |
| 	subcontainers, err := c.handler.ListContainers(container.ListSelf)
 | |
| 	if err != nil {
 | |
| 		// Ignore errors if the container is dead.
 | |
| 		if !c.handler.Exists() {
 | |
| 			return nil
 | |
| 		}
 | |
| 		return err
 | |
| 	}
 | |
| 	sort.Sort(subcontainers)
 | |
| 	c.lock.Lock()
 | |
| 	defer c.lock.Unlock()
 | |
| 	c.info.Subcontainers = subcontainers
 | |
| 	return nil
 | |
| }
 |