runtime: add pprof interface for shim

Add new http interfaces to support pprof:

- /sandboxes
- /debug/vars
- /debug/pprof/
- /debug/pprof/cmdline
- /debug/pprof/profile
- /debug/pprof/symbol
- /debug/pprof/trace

Fixes: #397

Signed-off-by: bin liu <bin@hyper.sh>
This commit is contained in:
bin liu 2020-07-09 01:10:02 +08:00
parent e3a3818f7a
commit bbf8517050
18 changed files with 336 additions and 21 deletions

View File

@ -235,3 +235,7 @@ sandbox_cgroup_only=@DEFSANDBOXCGROUPONLY@
# Supported experimental features:
# (default: [])
experimental=@DEFAULTEXPFEATURES@
# If enabled, user can run pprof tools with shim v2 process through kata-monitor.
# (default: false)
# EnablePprof = true

View File

@ -236,3 +236,7 @@ sandbox_cgroup_only=@DEFSANDBOXCGROUPONLY@
# Supported experimental features:
# (default: [])
experimental=@DEFAULTEXPFEATURES@
# If enabled, user can run pprof tools with shim v2 process through kata-monitor.
# (default: false)
# EnablePprof = true

View File

@ -355,3 +355,7 @@ sandbox_cgroup_only=@DEFSANDBOXCGROUPONLY@
# Supported experimental features:
# (default: [])
experimental=@DEFAULTEXPFEATURES@
# If enabled, user can run pprof tools with shim v2 process through kata-monitor.
# (default: false)
# EnablePprof = true

View File

@ -462,3 +462,7 @@ sandbox_cgroup_only=@DEFSANDBOXCGROUPONLY@
# Supported experimental features:
# (default: [])
experimental=@DEFAULTEXPFEATURES@
# If enabled, user can run pprof tools with shim v2 process through kata-monitor.
# (default: false)
# EnablePprof = true

View File

@ -480,3 +480,7 @@ sandbox_cgroup_only=@DEFSANDBOXCGROUPONLY@
# Supported experimental features:
# (default: [])
experimental=@DEFAULTEXPFEATURES@
# If enabled, user can run pprof tools with shim v2 process through kata-monitor.
# (default: false)
# EnablePprof = true

View File

@ -15,7 +15,7 @@ import (
"github.com/sirupsen/logrus"
)
var metricListenAddr = flag.String("listen-address", ":8090", "The address to listen on for HTTP requests.")
var monitorListenAddr = flag.String("listen-address", ":8090", "The address to listen on for HTTP requests.")
var containerdAddr = flag.String("containerd-address", "/run/containerd/containerd.sock", "Containerd address to accept client requests.")
var containerdConfig = flag.String("containerd-conf", "/etc/containerd/config.toml", "Containerd config file.")
var logLevel = flag.String("log-level", "info", "Log level of logrus(trace/debug/info/warn/error/fatal/panic).")
@ -26,17 +26,31 @@ func main() {
// init logrus
initLog()
// create new MAgent
ma, err := kataMonitor.NewKataMonitor(*containerdAddr, *containerdConfig)
// create new kataMonitor
km, err := kataMonitor.NewKataMonitor(*containerdAddr, *containerdConfig)
if err != nil {
panic(err)
}
// setup handlers, now only metrics is supported
http.HandleFunc("/metrics", ma.ProcessMetricsRequest)
m := http.NewServeMux()
m.Handle("/metrics", http.HandlerFunc(km.ProcessMetricsRequest))
m.Handle("/sandboxes", http.HandlerFunc(km.ListSandboxes))
// for debug shim process
m.Handle("/debug/vars", http.HandlerFunc(km.ExpvarHandler))
m.Handle("/debug/pprof/", http.HandlerFunc(km.PprofIndex))
m.Handle("/debug/pprof/cmdline", http.HandlerFunc(km.PprofCmdline))
m.Handle("/debug/pprof/profile", http.HandlerFunc(km.PprofProfile))
m.Handle("/debug/pprof/symbol", http.HandlerFunc(km.PprofSymbol))
m.Handle("/debug/pprof/trace", http.HandlerFunc(km.PprofTrace))
// listening on the server
logrus.Fatal(http.ListenAndServe(*metricListenAddr, nil))
svr := &http.Server{
Handler: m,
Addr: *monitorListenAddr,
}
logrus.Fatal(svr.ListenAndServe())
}
// initLog setup logger

View File

@ -88,7 +88,7 @@ func create(ctx context.Context, s *service, r *taskAPI.CreateTaskRequest) (*con
return nil, err
}
s.sandbox = sandbox
go s.startManagementServer(ctx)
go s.startManagementServer(ctx, ociSpec)
case vc.PodContainer:
if s.sandbox == nil {

View File

@ -7,14 +7,19 @@ package containerdshim
import (
"context"
"expvar"
"io"
"net/http"
"net/http/pprof"
"path/filepath"
"strconv"
"strings"
"github.com/containerd/containerd/namespaces"
cdshim "github.com/containerd/containerd/runtime/v2/shim"
vc "github.com/kata-containers/kata-containers/src/runtime/virtcontainers"
vcAnnotations "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/annotations"
"github.com/opencontainers/runtime-spec/specs-go"
"github.com/prometheus/client_golang/prometheus"
dto "github.com/prometheus/client_model/go"
"github.com/prometheus/common/expfmt"
@ -30,6 +35,7 @@ var (
ifSupportAgentMetricsAPI = true
)
// serveMetrics handle /metrics requests
func (s *service) serveMetrics(w http.ResponseWriter, r *http.Request) {
// update metrics from sandbox
@ -104,7 +110,7 @@ func decodeAgentMetrics(body string) []*dto.MetricFamily {
return list
}
func (s *service) startManagementServer(ctx context.Context) {
func (s *service) startManagementServer(ctx context.Context, ociSpec *specs.Spec) {
// metrics socket will under sandbox's bundle path
metricsAddress, err := socketAddress(ctx, s.id)
if err != nil {
@ -127,7 +133,9 @@ func (s *service) startManagementServer(ctx context.Context) {
logrus.Info("kata monitor inited")
// bind hanlder
http.HandleFunc("/metrics", s.serveMetrics)
m := http.NewServeMux()
m.Handle("/metrics", http.HandlerFunc(s.serveMetrics))
s.mountPprofHandle(m, ociSpec)
// register shim metrics
registerMetrics()
@ -136,10 +144,32 @@ func (s *service) startManagementServer(ctx context.Context) {
vc.RegisterMetrics()
// start serve
svr := &http.Server{Handler: http.DefaultServeMux}
svr := &http.Server{Handler: m}
svr.Serve(listener)
}
// mountServeDebug provides a debug endpoint
func (s *service) mountPprofHandle(m *http.ServeMux, ociSpec *specs.Spec) {
// return if not enabled
if !s.config.EnablePprof {
value, ok := ociSpec.Annotations[vcAnnotations.EnablePprof]
if !ok {
return
}
enabled, err := strconv.ParseBool(value)
if err != nil || !enabled {
return
}
}
m.Handle("/debug/vars", expvar.Handler())
m.Handle("/debug/pprof/", http.HandlerFunc(pprof.Index))
m.Handle("/debug/pprof/cmdline", http.HandlerFunc(pprof.Cmdline))
m.Handle("/debug/pprof/profile", http.HandlerFunc(pprof.Profile))
m.Handle("/debug/pprof/symbol", http.HandlerFunc(pprof.Symbol))
m.Handle("/debug/pprof/trace", http.HandlerFunc(pprof.Trace))
}
func socketAddress(ctx context.Context, id string) (string, error) {
ns, err := namespaces.NamespaceRequired(ctx)
if err != nil {

View File

@ -73,11 +73,10 @@ func registerMetrics() {
prometheus.MustRegister(scrapeDurationsHistogram)
}
// getMetricsAddress get metrics address for a sandbox, the abstract unix socket address is saved
// getMonitorAddress get metrics address for a sandbox, the abstract unix socket address is saved
// in `metrics_address` with the same place of `address`.
func (km *KataMonitor) getMetricsAddress(sandboxID, namespace string) (string, error) {
func (km *KataMonitor) getMonitorAddress(sandboxID, namespace string) (string, error) {
path := filepath.Join(km.containerdStatePath, types.ContainerdRuntimeTaskPath, namespace, sandboxID, "monitor_address")
data, err := ioutil.ReadFile(path)
if err != nil {
return "", err
@ -237,7 +236,7 @@ func (km *KataMonitor) aggregateSandboxMetrics(encoder expfmt.Encoder) error {
// getSandboxMetrics will get sandbox's metrics from shim
func (km *KataMonitor) getSandboxMetrics(sandboxID, namespace string) ([]*dto.MetricFamily, error) {
socket, err := km.getMetricsAddress(sandboxID, namespace)
socket, err := km.getMonitorAddress(sandboxID, namespace)
if err != nil {
return nil, err
}

View File

@ -7,6 +7,7 @@ package katamonitor
import (
"fmt"
"net/http"
"os"
"sync"
@ -48,7 +49,7 @@ func NewKataMonitor(containerdAddr, containerdConfigFile string) (*KataMonitor,
return nil, err
}
ka := &KataMonitor{
km := &KataMonitor{
containerdAddr: containerdAddr,
containerdConfigFile: containerdConfigFile,
containerdStatePath: containerdConf.State,
@ -58,23 +59,47 @@ func NewKataMonitor(containerdAddr, containerdConfigFile string) (*KataMonitor,
},
}
if err := ka.initSandboxCache(); err != nil {
if err := km.initSandboxCache(); err != nil {
return nil, err
}
// register metrics
registerMetrics()
go ka.sandboxCache.startEventsListener(ka.containerdAddr)
go km.sandboxCache.startEventsListener(km.containerdAddr)
return ka, nil
return km, nil
}
func (ka *KataMonitor) initSandboxCache() error {
sandboxes, err := ka.getSandboxes()
func (km *KataMonitor) initSandboxCache() error {
sandboxes, err := km.getSandboxes()
if err != nil {
return err
}
ka.sandboxCache.init(sandboxes)
km.sandboxCache.init(sandboxes)
return nil
}
// ListSandboxes list all sandboxes running in Kata
func (km *KataMonitor) ListSandboxes(w http.ResponseWriter, r *http.Request) {
sandboxes := km.getSandboxList()
for _, s := range sandboxes {
w.Write([]byte(fmt.Sprintf("%s\n", s)))
}
}
func (km *KataMonitor) getSandboxList() []string {
sn := km.sandboxCache.getAllSandboxes()
result := make([]string, len(sn))
i := 0
for k := range sn {
result[i] = k
i++
}
return result
}
func (km *KataMonitor) getSandboxNamespace(sandbox string) (string, error) {
return km.sandboxCache.getSandboxNamespace(sandbox)
}

View File

@ -0,0 +1,119 @@
// Copyright (c) 2020 Ant Financial
//
// SPDX-License-Identifier: Apache-2.0
//
package katamonitor
import (
"fmt"
"io"
"net"
"net/http"
)
func getSandboxIdFromReq(r *http.Request) (string, error) {
sandbox := r.URL.Query().Get("sandbox")
if sandbox != "" {
return sandbox, nil
}
return "", fmt.Errorf("sandbox not found in %+v", r.URL.Query())
}
func serveError(w http.ResponseWriter, status int, txt string) {
w.Header().Set("Content-Type", "text/plain; charset=utf-8")
w.Header().Set("X-Go-Pprof", "1")
w.Header().Del("Content-Disposition")
w.WriteHeader(status)
fmt.Fprintln(w, txt)
}
func (km *KataMonitor) composeSocketAddress(r *http.Request) (string, error) {
sandbox, err := getSandboxIdFromReq(r)
if err != nil {
return "", err
}
namespace, err := km.getSandboxNamespace(sandbox)
if err != nil {
return "", err
}
return km.getMonitorAddress(sandbox, namespace)
}
func (km *KataMonitor) proxyRequest(w http.ResponseWriter, r *http.Request) {
w.Header().Set("X-Content-Type-Options", "nosniff")
socket, err := km.composeSocketAddress(r)
if err != nil {
monitorLog.WithError(err).Error("failed to get shim monitor address")
serveError(w, http.StatusBadRequest, "sandbox may be stopped or deleted")
return
}
transport := &http.Transport{
DisableKeepAlives: true,
Dial: func(proto, addr string) (conn net.Conn, err error) {
return net.Dial("unix", "\x00"+socket)
},
}
client := http.Client{
Transport: transport,
}
uri := fmt.Sprintf("http://shim%s", r.URL.String())
resp, err := client.Get(uri)
if err != nil {
return
}
output := resp.Body
defer output.Close()
contentType := resp.Header.Get("Content-Type")
if contentType != "" {
w.Header().Set("Content-Type", contentType)
}
contentDisposition := resp.Header.Get("Content-Disposition")
if contentDisposition != "" {
w.Header().Set("Content-Disposition", contentDisposition)
}
io.Copy(w, output)
}
// ExpvarHandler handles other `/debug/vars` requests
func (km *KataMonitor) ExpvarHandler(w http.ResponseWriter, r *http.Request) {
km.proxyRequest(w, r)
}
// PprofIndex handles other `/debug/pprof/` requests
func (km *KataMonitor) PprofIndex(w http.ResponseWriter, r *http.Request) {
km.proxyRequest(w, r)
}
// PprofCmdline handles other `/debug/cmdline` requests
func (km *KataMonitor) PprofCmdline(w http.ResponseWriter, r *http.Request) {
km.proxyRequest(w, r)
}
// PprofProfile handles other `/debug/profile` requests
func (km *KataMonitor) PprofProfile(w http.ResponseWriter, r *http.Request) {
km.proxyRequest(w, r)
}
// PprofSymbol handles other `/debug/symbol` requests
func (km *KataMonitor) PprofSymbol(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "text/plain; charset=utf-8")
km.proxyRequest(w, r)
}
// PprofTrace handles other `/debug/trace` requests
func (km *KataMonitor) PprofTrace(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/octet-stream")
w.Header().Set("Content-Disposition", `attachment; filename="trace"`)
km.proxyRequest(w, r)
}

View File

@ -0,0 +1,83 @@
// Copyright (c) 2020 Ant Financial
//
// SPDX-License-Identifier: Apache-2.0
//
package katamonitor
import (
"fmt"
"io/ioutil"
"net/http"
"os"
"path/filepath"
"sync"
"testing"
"time"
"github.com/stretchr/testify/assert"
)
func TestComposeSocketAddress(t *testing.T) {
assert := assert.New(t)
path := fmt.Sprintf("/tmp/TestComposeSocketAddress-%d", time.Now().Nanosecond())
statePath := filepath.Join(path, "io.containerd.runtime.v2.task")
sandboxes := map[string]string{"foo": "ns-foo", "bar": "ns-bar"}
defer func() {
os.RemoveAll(path)
}()
for sandbox, ns := range sandboxes {
err := os.MkdirAll(filepath.Join(statePath, ns, sandbox), 0755)
assert.Nil(err)
f := filepath.Join(statePath, ns, sandbox, "monitor_address")
err = ioutil.WriteFile(f, []byte(sandbox), 0644)
assert.Nil(err)
}
km := &KataMonitor{
containerdStatePath: path,
sandboxCache: &sandboxCache{
Mutex: &sync.Mutex{},
sandboxes: sandboxes,
},
}
testCases := []struct {
url string
err bool
addr string
}{
{
url: "http://localhost:6060/debug/vars",
err: true,
addr: "",
},
{
url: "http://localhost:6060/debug/vars?sandbox=abc",
err: true,
addr: "",
},
{
url: "http://localhost:6060/debug/vars?sandbox=foo",
err: false,
addr: "foo",
},
{
url: "http://localhost:6060/debug/vars?sandbox=bar",
err: false,
addr: "bar",
},
}
for _, tc := range testCases {
r, err := http.NewRequest("GET", tc.url, nil)
assert.Nil(err)
addr, err := km.composeSocketAddress(r)
assert.Equal(tc.err, err != nil)
assert.Equal(tc.addr, addr)
}
}

View File

@ -7,6 +7,7 @@ package katamonitor
import (
"context"
"fmt"
"sync"
"github.com/containerd/containerd"
@ -34,6 +35,17 @@ func (sc *sandboxCache) getAllSandboxes() map[string]string {
return sc.sandboxes
}
func (sc *sandboxCache) getSandboxNamespace(sandbox string) (string, error) {
sc.Lock()
defer sc.Unlock()
if val, found := sc.sandboxes[sandbox]; found {
return val, nil
}
return "", fmt.Errorf("sandbox %s not in cache", sandbox)
}
func (sc *sandboxCache) deleteIfExists(id string) (string, bool) {
sc.Lock()
defer sc.Unlock()

View File

@ -42,6 +42,7 @@ type RuntimeConfigOptions struct {
NetmonDebug bool
AgentDebug bool
AgentTrace bool
EnablePprof bool
}
func MakeRuntimeConfigFileData(config RuntimeConfigOptions) string {
@ -89,5 +90,6 @@ func MakeRuntimeConfigFileData(config RuntimeConfigOptions) string {
[runtime]
enable_debug = ` + strconv.FormatBool(config.RuntimeDebug) + `
enable_tracing = ` + strconv.FormatBool(config.RuntimeTrace) + `
disable_new_netns= ` + strconv.FormatBool(config.DisableNewNetNs)
disable_new_netns= ` + strconv.FormatBool(config.DisableNewNetNs) + `
enable_pprof= ` + strconv.FormatBool(config.EnablePprof)
}

View File

@ -139,6 +139,7 @@ type runtime struct {
SandboxCgroupOnly bool `toml:"sandbox_cgroup_only"`
Experimental []string `toml:"experimental"`
InterNetworkModel string `toml:"internetworking_model"`
EnablePprof bool `toml:"enable_pprof"`
}
type agent struct {
@ -1165,6 +1166,7 @@ func LoadConfiguration(configPath string, ignoreLogging, builtIn bool) (resolved
config.SandboxCgroupOnly = tomlConf.Runtime.SandboxCgroupOnly
config.DisableNewNetNs = tomlConf.Runtime.DisableNewNetNs
config.EnablePprof = tomlConf.Runtime.EnablePprof
for _, f := range tomlConf.Runtime.Experimental {
feature := exp.Get(f)
if feature == nil {

View File

@ -34,6 +34,7 @@ var (
netmonDebug = false
agentDebug = false
agentTrace = false
enablePprof = true
)
type testRuntimeConfig struct {
@ -115,6 +116,7 @@ func createAllRuntimeConfigFiles(dir, hypervisor string) (config testRuntimeConf
AgentTrace: agentTrace,
SharedFS: sharedFS,
VirtioFSDaemon: virtioFSdaemon,
EnablePprof: enablePprof,
}
runtimeConfigFileData := ktu.MakeRuntimeConfigFileData(configFileOptions)
@ -197,6 +199,7 @@ func createAllRuntimeConfigFiles(dir, hypervisor string) (config testRuntimeConf
NetmonConfig: netmonConfig,
DisableNewNetNs: disableNewNetNs,
EnablePprof: enablePprof,
FactoryConfig: factoryConfig,
}

View File

@ -221,6 +221,9 @@ const (
// SandboxCgroupOnly is a sandbox annotation that determines if kata processes are managed only in sandbox cgroup.
SandboxCgroupOnly = kataAnnotRuntimePrefix + "sandbox_cgroup_only"
// EnablePprof is a sandbox annotation that determines if pprof enabled.
EnablePprof = kataAnnotRuntimePrefix + "enable_pprof"
// Experimental is a sandbox annotation that determines if experimental features enabled.
Experimental = kataAnnotRuntimePrefix + "experimental"

View File

@ -121,6 +121,9 @@ type RuntimeConfig struct {
//Experimental features enabled
Experimental []exp.Feature
// Determines if enable pprof
EnablePprof bool
}
// AddKernelParam allows the addition of new kernel parameters to an existing