Refactor profile-gatherer to work across all master components

This commit is contained in:
Shyam Jeedigunta
2018-06-21 18:13:58 +02:00
parent 83ad4d9e96
commit 457548ef7d
3 changed files with 64 additions and 49 deletions

View File

@@ -61,25 +61,54 @@ func checkProfileGatheringPrerequisites() error {
return nil
}
func gatherProfileOfKind(profileBaseName, kind string) error {
func getPortForComponent(componentName string) (int, error) {
switch componentName {
case "kube-apiserver":
return 8080, nil
case "kube-scheduler":
return 10251, nil
case "kube-controller-manager":
return 10252, nil
}
return -1, fmt.Errorf("Port for component %v unknown", componentName)
}
// Gathers profiles from a master component through SSH. E.g usages:
// - gatherProfile("kube-apiserver", "someTest", "heap")
// - gatherProfile("kube-scheduler", "someTest", "profile")
// - gatherProfile("kube-controller-manager", "someTest", "profile?seconds=20")
//
// We don't export this method but wrappers around it (see below).
func gatherProfile(componentName, profileBaseName, profileKind string) error {
if err := checkProfileGatheringPrerequisites(); err != nil {
return fmt.Errorf("Profile gathering pre-requisite failed: %v", err)
}
profilePort, err := getPortForComponent(componentName)
if err != nil {
return fmt.Errorf("Profile gathering failed finding component port: %v", err)
}
if profileBaseName == "" {
profileBaseName = time.Now().Format(time.RFC3339)
}
// Get the profile data over SSH.
getCommand := fmt.Sprintf("curl -s localhost:8080/debug/pprof/%s", kind)
getCommand := fmt.Sprintf("curl -s localhost:%v/debug/pprof/%s", profilePort, profileKind)
sshResult, err := SSH(getCommand, GetMasterHost()+":22", TestContext.Provider)
if err != nil {
return fmt.Errorf("Failed to execute curl command on master through SSH: %v", err)
}
var profilePrefix string
profilePrefix := componentName
switch {
case kind == "heap":
profilePrefix = "ApiserverMemoryProfile_"
case strings.HasPrefix(kind, "profile"):
profilePrefix = "ApiserverCPUProfile_"
case profileKind == "heap":
profilePrefix += "_MemoryProfile_"
case strings.HasPrefix(profileKind, "profile"):
profilePrefix += "_CPUProfile_"
default:
return fmt.Errorf("Unknown profile kind provided: %s", kind)
return fmt.Errorf("Unknown profile kind provided: %s", profileKind)
}
// Write the data to a file.
// Write the profile data to a file.
rawprofilePath := path.Join(getProfilesDirectoryPath(), profilePrefix+profileBaseName+".pprof")
rawprofile, err := os.Create(rawprofilePath)
if err != nil {
@@ -97,12 +126,12 @@ func gatherProfileOfKind(profileBaseName, kind string) error {
var cmd *exec.Cmd
switch {
// TODO: Support other profile kinds if needed (e.g inuse_space, alloc_objects, mutex, etc)
case kind == "heap":
case profileKind == "heap":
cmd = exec.Command("go", "tool", "pprof", "-pdf", "-symbolize=none", "--alloc_space", rawprofile.Name())
case strings.HasPrefix(kind, "profile"):
case strings.HasPrefix(profileKind, "profile"):
cmd = exec.Command("go", "tool", "pprof", "-pdf", "-symbolize=none", rawprofile.Name())
default:
return fmt.Errorf("Unknown profile kind provided: %s", kind)
return fmt.Errorf("Unknown profile kind provided: %s", profileKind)
}
outfilePath := path.Join(getProfilesDirectoryPath(), profilePrefix+profileBaseName+".pdf")
outfile, err := os.Create(outfilePath)
@@ -124,67 +153,53 @@ func gatherProfileOfKind(profileBaseName, kind string) error {
// finish before the parent goroutine itself finishes, we accept a sync.WaitGroup
// argument in these functions. Typically you would use the following pattern:
//
// func TestFooBar() {
// func TestFoo() {
// var wg sync.WaitGroup
// wg.Add(3)
// go framework.GatherApiserverCPUProfile(&wg, "doing_foo")
// go framework.GatherApiserverMemoryProfile(&wg, "doing_foo")
// go framework.GatherCPUProfile("kube-apiserver", "before_foo", &wg)
// go framework.GatherMemoryProfile("kube-apiserver", "before_foo", &wg)
// <<<< some code doing foo >>>>>>
// go framework.GatherApiserverCPUProfile(&wg, "doing_bar")
// <<<< some code doing bar >>>>>>
// go framework.GatherCPUProfile("kube-scheduler", "after_foo", &wg)
// wg.Wait()
// }
//
// If you do not wish to exercise the waiting logic, pass a nil value for the
// waitgroup argument instead. However, then you would be responsible for ensuring
// that the function finishes.
// that the function finishes. There's also a polling-based gatherer utility for
// CPU profiles available below.
func GatherApiserverCPUProfile(wg *sync.WaitGroup, profileBaseName string) {
GatherApiserverCPUProfileForNSeconds(wg, profileBaseName, DefaultCPUProfileSeconds)
func GatherCPUProfile(componentName string, profileBaseName string, wg *sync.WaitGroup) {
GatherCPUProfileForSeconds(componentName, profileBaseName, DefaultCPUProfileSeconds, wg)
}
func GatherApiserverCPUProfileForNSeconds(wg *sync.WaitGroup, profileBaseName string, n int) {
func GatherCPUProfileForSeconds(componentName string, profileBaseName string, seconds int, wg *sync.WaitGroup) {
if wg != nil {
defer wg.Done()
}
if err := checkProfileGatheringPrerequisites(); err != nil {
Logf("Profile gathering pre-requisite failed: %v", err)
return
}
if profileBaseName == "" {
profileBaseName = time.Now().Format(time.RFC3339)
}
if err := gatherProfileOfKind(profileBaseName, fmt.Sprintf("profile?seconds=%v", n)); err != nil {
Logf("Failed to gather apiserver CPU profile: %v", err)
if err := gatherProfile(componentName, profileBaseName, fmt.Sprintf("profile?seconds=%v", seconds)); err != nil {
Logf("Failed to gather %v CPU profile: %v", componentName, err)
}
}
func GatherApiserverMemoryProfile(wg *sync.WaitGroup, profileBaseName string) {
func GatherMemoryProfile(componentName string, profileBaseName string, wg *sync.WaitGroup) {
if wg != nil {
defer wg.Done()
}
if err := checkProfileGatheringPrerequisites(); err != nil {
Logf("Profile gathering pre-requisite failed: %v", err)
return
}
if profileBaseName == "" {
profileBaseName = time.Now().Format(time.RFC3339)
}
if err := gatherProfileOfKind(profileBaseName, "heap"); err != nil {
Logf("Failed to gather apiserver memory profile: %v", err)
if err := gatherProfile(componentName, profileBaseName, "heap"); err != nil {
Logf("Failed to gather %v memory profile: %v", componentName, err)
}
}
// StartApiserverCPUProfileGatherer is a polling-based gatherer of the apiserver's
// CPU profile. It takes the delay b/w consecutive gatherings as an argument and
// StartCPUProfileGatherer performs polling-based gathering of the component's CPU
// profile. It takes the interval b/w consecutive gatherings as an argument and
// starts the gathering goroutine. To stop the gatherer, close the returned channel.
func StartApiserverCPUProfileGatherer(delay time.Duration) chan struct{} {
func StartCPUProfileGatherer(componentName string, profileBaseName string, interval time.Duration) chan struct{} {
stopCh := make(chan struct{})
go func() {
for {
select {
case <-time.After(delay):
GatherApiserverCPUProfile(nil, "")
case <-time.After(interval):
GatherCPUProfile(componentName, profileBaseName+"_"+time.Now().Format(time.RFC3339), nil)
case <-stopCh:
return
}

View File

@@ -388,7 +388,7 @@ var _ = SIGDescribe("Density", func() {
close(profileGathererStopCh)
wg := sync.WaitGroup{}
wg.Add(1)
framework.GatherApiserverMemoryProfile(&wg, "density")
framework.GatherMemoryProfile("kube-apiserver", "density", &wg)
wg.Wait()
saturationThreshold := time.Duration((totalPods / MinPodsPerSecondThroughput)) * time.Second
@@ -487,7 +487,7 @@ var _ = SIGDescribe("Density", func() {
// Start apiserver CPU profile gatherer with frequency based on cluster size.
profileGatheringDelay := time.Duration(5+nodeCount/100) * time.Minute
profileGathererStopCh = framework.StartApiserverCPUProfileGatherer(profileGatheringDelay)
profileGathererStopCh = framework.StartCPUProfileGatherer("kube-apiserver", "density", profileGatheringDelay)
})
type Density struct {

View File

@@ -106,7 +106,7 @@ var _ = SIGDescribe("Load capacity", func() {
close(profileGathererStopCh)
wg := sync.WaitGroup{}
wg.Add(1)
framework.GatherApiserverMemoryProfile(&wg, "load")
framework.GatherMemoryProfile("kube-apiserver", "load", &wg)
wg.Wait()
// Verify latency metrics
@@ -159,7 +159,7 @@ var _ = SIGDescribe("Load capacity", func() {
// Start apiserver CPU profile gatherer with frequency based on cluster size.
profileGatheringDelay := time.Duration(5+nodeCount/100) * time.Minute
profileGathererStopCh = framework.StartApiserverCPUProfileGatherer(profileGatheringDelay)
profileGathererStopCh = framework.StartCPUProfileGatherer("kube-apiserver", "load", profileGatheringDelay)
})
type Load struct {