diff --git a/ci/install_yq.sh b/ci/install_yq.sh index 502fe67a4a..b2923a6d2e 100755 --- a/ci/install_yq.sh +++ b/ci/install_yq.sh @@ -18,7 +18,9 @@ function install_yq() { GOPATH=${GOPATH:-${HOME}/go} local yq_path="${GOPATH}/bin/yq" local yq_pkg="github.com/mikefarah/yq" - [ -x "${GOPATH}/bin/yq" ] && return + local yq_version=3.4.1 + + [ -x "${GOPATH}/bin/yq" ] && [ "`${GOPATH}/bin/yq --version`"X == "yq version ${yq_version}"X ] && return read -r -a sysInfo <<< "$(uname -sm)" @@ -56,8 +58,6 @@ function install_yq() { die "Please install curl" fi - local yq_version=3.4.1 - ## NOTE: ${var,,} => gives lowercase value of var local yq_url="https://${yq_pkg}/releases/download/${yq_version}/yq_${goos,,}_${goarch}" curl -o "${yq_path}" -LSsf "${yq_url}" diff --git a/docs/how-to/how-to-set-sandbox-config-kata.md b/docs/how-to/how-to-set-sandbox-config-kata.md index c2e518baa5..d530a0c5ed 100644 --- a/docs/how-to/how-to-set-sandbox-config-kata.md +++ b/docs/how-to/how-to-set-sandbox-config-kata.md @@ -26,6 +26,7 @@ There are several kinds of Kata configurations and they are listed below. | `io.katacontainers.config.runtime.disable_new_netns` | `boolean` | determines if a new netns is created for the hypervisor process | | `io.katacontainers.config.runtime.internetworking_model` | string| determines how the VM should be connected to the container network interface. Valid values are `macvtap`, `tcfilter` and `none` | | `io.katacontainers.config.runtime.sandbox_cgroup_only`| `boolean` | determines if Kata processes are managed only in sandbox cgroup | +| `io.katacontainers.config.runtime.enable_pprof` | `boolean` | enables Golang `pprof` for `containerd-shim-kata-v2` process | ## Agent Options | Key | Value Type | Comments | diff --git a/src/agent/kata-agent.service.in b/src/agent/kata-agent.service.in index 91dc7f5c8d..2fea8f1b63 100644 --- a/src/agent/kata-agent.service.in +++ b/src/agent/kata-agent.service.in @@ -15,7 +15,7 @@ Wants=kata-containers.target StandardOutput=tty Type=simple ExecStart=@BINDIR@/@AGENT_NAME@ -LimitNOFILE=infinity +LimitNOFILE=1048576 # ExecStop is required for static agent tracing; in all other scenarios # the runtime handles shutting down the VM. ExecStop=/bin/sync ; /usr/bin/systemctl --force poweroff diff --git a/src/runtime/cli/kata-check.go b/src/runtime/cli/kata-check.go index 1a0b130d6b..66f6b2becb 100644 --- a/src/runtime/cli/kata-check.go +++ b/src/runtime/cli/kata-check.go @@ -389,13 +389,6 @@ EXAMPLES: if verbose { kataLog.Logger.SetLevel(logrus.InfoLevel) } - ctx, err := cliContextToContext(context) - if err != nil { - return err - } - - span, _ := katautils.Trace(ctx, "check") - defer span.End() if !context.Bool("no-network-checks") && os.Getenv(noNetworkEnvVar) == "" { cmd := RelCmdCheck @@ -407,8 +400,7 @@ EXAMPLES: if os.Geteuid() == 0 { kataLog.Warn("Not running network checks as super user") } else { - - err = HandleReleaseVersions(cmd, version, context.Bool("include-all-releases")) + err := HandleReleaseVersions(cmd, version, context.Bool("include-all-releases")) if err != nil { return err } @@ -424,7 +416,7 @@ EXAMPLES: return errors.New("check: cannot determine runtime config") } - err = setCPUtype(runtimeConfig.HypervisorType) + err := setCPUtype(runtimeConfig.HypervisorType) if err != nil { return err } @@ -437,7 +429,6 @@ EXAMPLES: } err = hostIsVMContainerCapable(details) - if err != nil { return err } diff --git a/src/runtime/cli/kata-env.go b/src/runtime/cli/kata-env.go index 1caac6ff19..2daf51d8d6 100644 --- a/src/runtime/cli/kata-env.go +++ b/src/runtime/cli/kata-env.go @@ -13,7 +13,6 @@ import ( "strings" "github.com/BurntSushi/toml" - "github.com/kata-containers/kata-containers/src/runtime/pkg/katautils" "github.com/kata-containers/kata-containers/src/runtime/pkg/utils" vc "github.com/kata-containers/kata-containers/src/runtime/virtcontainers" exp "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/experimental" @@ -448,14 +447,6 @@ var kataEnvCLICommand = cli.Command{ }, }, Action: func(context *cli.Context) error { - ctx, err := cliContextToContext(context) - if err != nil { - return err - } - - span, _ := katautils.Trace(ctx, "kata-env") - defer span.End() - return handleSettings(defaultOutputFile, context) }, } diff --git a/src/runtime/cli/kata-exec.go b/src/runtime/cli/kata-exec.go index 6877071589..24c6e2f5b6 100644 --- a/src/runtime/cli/kata-exec.go +++ b/src/runtime/cli/kata-exec.go @@ -14,7 +14,6 @@ import ( "net/http" "net/url" "os" - "path/filepath" "strings" "sync" @@ -26,7 +25,6 @@ import ( clientUtils "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/agent/protocols/client" "github.com/pkg/errors" "github.com/urfave/cli" - "go.opentelemetry.io/otel/label" ) const ( @@ -38,10 +36,8 @@ const ( subCommandName = "exec" // command-line parameters name - paramRuntimeNamespace = "runtime-namespace" paramDebugConsolePort = "kata-debug-port" defaultKernelParamDebugConsoleVPortValue = 1026 - defaultRuntimeNamespace = "k8s.io" ) var ( @@ -57,34 +53,16 @@ var kataExecCLICommand = cli.Command{ Name: subCommandName, Usage: "Enter into guest by debug console", Flags: []cli.Flag{ - cli.StringFlag{ - Name: paramRuntimeNamespace, - Usage: "Namespace that containerd or CRI-O are using for containers. (Default: k8s.io, only works for containerd)", - }, cli.Uint64Flag{ Name: paramDebugConsolePort, Usage: "Port that debug console is listening on. (Default: 1026)", }, }, Action: func(context *cli.Context) error { - ctx, err := cliContextToContext(context) - if err != nil { - return err - } - span, _ := katautils.Trace(ctx, subCommandName) - defer span.End() - - namespace := context.String(paramRuntimeNamespace) - if namespace == "" { - namespace = defaultRuntimeNamespace - } - span.SetAttributes(label.Key("namespace").String(namespace)) - port := context.Uint64(paramDebugConsolePort) if port == 0 { port = defaultKernelParamDebugConsoleVPortValue } - span.SetAttributes(label.Key("port").Uint64(port)) sandboxID := context.Args().Get(0) @@ -92,9 +70,8 @@ var kataExecCLICommand = cli.Command{ return err } - span.SetAttributes(label.Key("sandbox").String(sandboxID)) + conn, err := getConn(sandboxID, port) - conn, err := getConn(namespace, sandboxID, port) if err != nil { return err } @@ -177,9 +154,8 @@ func (s *iostream) Read(data []byte) (n int, err error) { return s.conn.Read(data) } -func getConn(namespace, sandboxID string, port uint64) (net.Conn, error) { - socketAddr := filepath.Join(string(filepath.Separator), "containerd-shim", namespace, sandboxID, "shim-monitor.sock") - client, err := kataMonitor.BuildUnixSocketClient(socketAddr, defaultTimeout) +func getConn(sandboxID string, port uint64) (net.Conn, error) { + client, err := kataMonitor.BuildShimClient(sandboxID, defaultTimeout) if err != nil { return nil, err } @@ -190,7 +166,7 @@ func getConn(namespace, sandboxID string, port uint64) (net.Conn, error) { } if resp.StatusCode != http.StatusOK { - return nil, fmt.Errorf("Failed to get %s: %d", socketAddr, resp.StatusCode) + return nil, fmt.Errorf("Failure from %s shim-monitor: %d", sandboxID, resp.StatusCode) } defer resp.Body.Close() diff --git a/src/runtime/cli/main.go b/src/runtime/cli/main.go index 0fecca787d..78ddb0739f 100644 --- a/src/runtime/cli/main.go +++ b/src/runtime/cli/main.go @@ -27,9 +27,6 @@ import ( specs "github.com/opencontainers/runtime-spec/specs-go" "github.com/sirupsen/logrus" "github.com/urfave/cli" - "go.opentelemetry.io/otel" - "go.opentelemetry.io/otel/label" - otelTrace "go.opentelemetry.io/otel/trace" ) // specConfig is the name of the file holding the containers configuration @@ -132,10 +129,6 @@ var runtimeCommands = []cli.Command{ // parsing occurs. var runtimeBeforeSubcommands = beforeSubcommands -// runtimeAfterSubcommands is the function to run after the command-line -// has been parsed. -var runtimeAfterSubcommands = afterSubcommands - // runtimeCommandNotFound is the function to handle an invalid sub-command. var runtimeCommandNotFound = commandNotFound @@ -168,10 +161,6 @@ func init() { // setupSignalHandler sets up signal handling, starting a go routine to deal // with signals as they arrive. -// -// Note that the specified context is NOT used to create a trace span (since the -// first (root) span must be created in beforeSubcommands()): it is simply -// used to pass to the crash handling functions to finalise tracing. func setupSignalHandler(ctx context.Context) { signals.SetLogger(kataLog) @@ -181,10 +170,6 @@ func setupSignalHandler(ctx context.Context) { signal.Notify(sigCh, sig) } - dieCb := func() { - katautils.StopTracing(ctx) - } - go func() { for { sig := <-sigCh @@ -198,7 +183,6 @@ func setupSignalHandler(ctx context.Context) { if signals.FatalSignal(nativeSignal) { kataLog.WithField("signal", sig).Error("received fatal signal") - signals.Die(dieCb) } else if debug && signals.NonFatalSignal(nativeSignal) { kataLog.WithField("signal", sig).Debug("handling signal") signals.Backtrace() @@ -210,18 +194,6 @@ func setupSignalHandler(ctx context.Context) { // setExternalLoggers registers the specified logger with the external // packages which accept a logger to handle their own logging. func setExternalLoggers(ctx context.Context, logger *logrus.Entry) { - var span otelTrace.Span - - // Only create a new span if a root span already exists. This is - // required to ensure that this function will not disrupt the root - // span logic by creating a span before the proper root span has been - // created. - - if otelTrace.SpanFromContext(ctx) != nil { - span, ctx = katautils.Trace(ctx, "setExternalLoggers") - defer span.End() - } - // Set virtcontainers logger. vci.SetLogger(ctx, logger) @@ -244,7 +216,6 @@ func beforeSubcommands(c *cli.Context) error { var configFile string var runtimeConfig oci.RuntimeConfig var err error - var traceFlushFunc func() katautils.SetConfigOptions(name, defaultRuntimeConfiguration, defaultSysConfRuntimeConfiguration) @@ -270,7 +241,6 @@ func beforeSubcommands(c *cli.Context) error { // Issue: https://github.com/kata-containers/runtime/issues/2428 ignoreConfigLogs := false - var traceRootSpan string subCmdIsCheckCmd := (c.NArg() >= 1 && ((c.Args()[0] == "kata-check") || (c.Args()[0] == "check"))) if subCmdIsCheckCmd { @@ -302,16 +272,13 @@ func beforeSubcommands(c *cli.Context) error { cmdName := c.Args().First() if c.App.Command(cmdName) != nil { kataLog = kataLog.WithField("command", cmdName) - - // Name for the root span (used for tracing) now the - // sub-command name is known. - traceRootSpan = name + " " + cmdName } - // Since a context is required, pass a new (throw-away) one - we - // cannot use the main context as tracing hasn't been enabled yet - // (meaning any spans created at this point will be silently ignored). - setExternalLoggers(context.Background(), kataLog) + ctx, err := cliContextToContext(c) + if err != nil { + return err + } + setExternalLoggers(ctx, kataLog) if c.NArg() == 1 && (c.Args()[0] == "kata-env" || c.Args()[0] == "env") { // simply report the logging setup @@ -325,20 +292,6 @@ func beforeSubcommands(c *cli.Context) error { } if !subCmdIsCheckCmd { debug = runtimeConfig.Debug - - if traceRootSpan != "" { - // Create the tracer. - // - // Note: no spans are created until the command-line has been parsed. - // This delays collection of trace data slightly but benefits the user by - // ensuring the first span is the name of the sub-command being - // invoked from the command-line. - traceFlushFunc, err = setupTracing(c, traceRootSpan, &runtimeConfig) - if err != nil { - return err - } - defer traceFlushFunc() - } } args := strings.Join(c.Args(), " ") @@ -377,36 +330,6 @@ func handleShowConfig(context *cli.Context) { } } -func setupTracing(context *cli.Context, rootSpanName string, config *oci.RuntimeConfig) (func(), error) { - flush, err := katautils.CreateTracer(name, config) - if err != nil { - return nil, err - } - - ctx, err := cliContextToContext(context) - if err != nil { - return nil, err - } - - // Create the root span now that the sub-command name is - // known. - // - // Note that this "Before" function is called (and returns) - // before the subcommand handler is called. As such, we cannot - // "Finish()" the span here - that is handled in the .After - // function. - tracer := otel.Tracer("kata") - newCtx, span := tracer.Start(ctx, rootSpanName) - - span.SetAttributes(label.Key("subsystem").String("runtime")) - - // Add tracer to metadata and update the context - context.App.Metadata["tracer"] = tracer - context.App.Metadata["context"] = newCtx - - return flush, nil -} - // add supported experimental features in context func addExpFeatures(clictx *cli.Context, runtimeConfig oci.RuntimeConfig) error { ctx, err := cliContextToContext(clictx) @@ -420,22 +343,11 @@ func addExpFeatures(clictx *cli.Context, runtimeConfig oci.RuntimeConfig) error } ctx = exp.ContextWithExp(ctx, exps) - // Add tracer to metadata and update the context + // Add experimental features to metadata and update the context clictx.App.Metadata["context"] = ctx return nil } -func afterSubcommands(c *cli.Context) error { - ctx, err := cliContextToContext(c) - if err != nil { - return err - } - - katautils.StopTracing(ctx) - - return nil -} - // function called when an invalid command is specified which causes the // runtime to error. func commandNotFound(c *cli.Context, command string) { @@ -502,7 +414,6 @@ func createRuntimeApp(ctx context.Context, args []string) error { app.Flags = runtimeFlags app.Commands = runtimeCommands app.Before = runtimeBeforeSubcommands - app.After = runtimeAfterSubcommands app.EnableBashCompletion = true // allow sub-commands to access context @@ -578,12 +489,5 @@ func cliContextToContext(c *cli.Context) (context.Context, error) { func main() { // create a new empty context ctx := context.Background() - - dieCb := func() { - katautils.StopTracing(ctx) - } - - defer signals.HandlePanic(dieCb) - createRuntime(ctx) } diff --git a/src/runtime/cli/version.go b/src/runtime/cli/version.go index 9acd0e80c5..d743ef6715 100644 --- a/src/runtime/cli/version.go +++ b/src/runtime/cli/version.go @@ -6,7 +6,6 @@ package main import ( - "github.com/kata-containers/kata-containers/src/runtime/pkg/katautils" "github.com/urfave/cli" ) @@ -14,14 +13,6 @@ var versionCLICommand = cli.Command{ Name: "version", Usage: "display version details", Action: func(context *cli.Context) error { - ctx, err := cliContextToContext(context) - if err != nil { - return err - } - - span, _ := katautils.Trace(ctx, "version") - defer span.End() - cli.VersionPrinter(context) return nil }, diff --git a/src/runtime/containerd-shim-v2/shim_management.go b/src/runtime/containerd-shim-v2/shim_management.go index e03224f072..ab7ebf2041 100644 --- a/src/runtime/containerd-shim-v2/shim_management.go +++ b/src/runtime/containerd-shim-v2/shim_management.go @@ -16,7 +16,6 @@ import ( "strconv" "strings" - "github.com/containerd/containerd/namespaces" cdshim "github.com/containerd/containerd/runtime/v2/shim" vc "github.com/kata-containers/kata-containers/src/runtime/virtcontainers" vcAnnotations "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/annotations" @@ -129,11 +128,7 @@ func decodeAgentMetrics(body string) []*dto.MetricFamily { func (s *service) startManagementServer(ctx context.Context, ociSpec *specs.Spec) { // metrics socket will under sandbox's bundle path - metricsAddress, err := socketAddress(ctx, s.id) - if err != nil { - shimMgtLog.WithError(err).Error("failed to create socket address") - return - } + metricsAddress := SocketAddress(s.id) listener, err := cdshim.NewSocket(metricsAddress) if err != nil { @@ -166,7 +161,7 @@ func (s *service) startManagementServer(ctx context.Context, ociSpec *specs.Spec svr.Serve(listener) } -// mountServeDebug provides a debug endpoint +// mountPprofHandle provides a debug endpoint func (s *service) mountPprofHandle(m *http.ServeMux, ociSpec *specs.Spec) { // return if not enabled @@ -188,10 +183,8 @@ func (s *service) mountPprofHandle(m *http.ServeMux, ociSpec *specs.Spec) { m.Handle("/debug/pprof/trace", http.HandlerFunc(pprof.Trace)) } -func socketAddress(ctx context.Context, id string) (string, error) { - ns, err := namespaces.NamespaceRequired(ctx) - if err != nil { - return "", err - } - return filepath.Join(string(filepath.Separator), "containerd-shim", ns, id, "shim-monitor.sock"), nil +// SocketAddress returns the address of the abstract domain socket for communicating with the +// shim management endpoint +func SocketAddress(id string) string { + return filepath.Join(string(filepath.Separator), "run", "vc", id, "shim-monitor") } diff --git a/src/runtime/pkg/kata-monitor/metrics.go b/src/runtime/pkg/kata-monitor/metrics.go index b4fcd5830a..aeb9f72c5d 100644 --- a/src/runtime/pkg/kata-monitor/metrics.go +++ b/src/runtime/pkg/kata-monitor/metrics.go @@ -176,7 +176,7 @@ func (km *KataMonitor) aggregateSandboxMetrics(encoder expfmt.Encoder) error { for sandboxID, namespace := range sandboxes { wg.Add(1) go func(sandboxID, namespace string, results chan<- []*dto.MetricFamily) { - sandboxMetrics, err := km.getSandboxMetrics(sandboxID, namespace) + sandboxMetrics, err := getSandboxMetrics(sandboxID) if err != nil { monitorLog.WithError(err).WithField("sandbox_id", sandboxID).Errorf("failed to get metrics for sandbox") } @@ -234,8 +234,8 @@ func (km *KataMonitor) aggregateSandboxMetrics(encoder expfmt.Encoder) error { } // getSandboxMetrics will get sandbox's metrics from shim -func (km *KataMonitor) getSandboxMetrics(sandboxID, namespace string) ([]*dto.MetricFamily, error) { - body, err := km.doGet(sandboxID, namespace, defaultTimeout, "metrics") +func getSandboxMetrics(sandboxID string) ([]*dto.MetricFamily, error) { + body, err := doGet(sandboxID, defaultTimeout, "metrics") if err != nil { return nil, err } diff --git a/src/runtime/pkg/kata-monitor/monitor.go b/src/runtime/pkg/kata-monitor/monitor.go index 64266fdb7d..1c663a5fdd 100644 --- a/src/runtime/pkg/kata-monitor/monitor.go +++ b/src/runtime/pkg/kata-monitor/monitor.go @@ -87,13 +87,8 @@ func (km *KataMonitor) GetAgentURL(w http.ResponseWriter, r *http.Request) { commonServeError(w, http.StatusBadRequest, err) return } - namespace, err := km.getSandboxNamespace(sandboxID) - if err != nil { - commonServeError(w, http.StatusBadRequest, err) - return - } - data, err := km.doGet(sandboxID, namespace, defaultTimeout, "agent-url") + data, err := doGet(sandboxID, defaultTimeout, "agent-url") if err != nil { commonServeError(w, http.StatusBadRequest, err) return diff --git a/src/runtime/pkg/kata-monitor/shim_client.go b/src/runtime/pkg/kata-monitor/shim_client.go index b2c462e8fa..ef4dc8a330 100644 --- a/src/runtime/pkg/kata-monitor/shim_client.go +++ b/src/runtime/pkg/kata-monitor/shim_client.go @@ -11,6 +11,8 @@ import ( "net" "net/http" "time" + + shim "github.com/kata-containers/kata-containers/src/runtime/containerd-shim-v2" ) const ( @@ -33,16 +35,13 @@ func getSandboxIDFromReq(r *http.Request) (string, error) { return "", fmt.Errorf("sandbox not found in %+v", r.URL.Query()) } -func (km *KataMonitor) buildShimClient(sandboxID, namespace string, timeout time.Duration) (*http.Client, error) { - socketAddr, err := km.getMonitorAddress(sandboxID, namespace) - if err != nil { - return nil, err - } - return BuildUnixSocketClient(socketAddr, timeout) +// BuildShimClient builds and returns an http client for communicating with the provided sandbox +func BuildShimClient(sandboxID string, timeout time.Duration) (*http.Client, error) { + return buildUnixSocketClient(shim.SocketAddress(sandboxID), timeout) } -// BuildUnixSocketClient build http client for Unix socket -func BuildUnixSocketClient(socketAddr string, timeout time.Duration) (*http.Client, error) { +// buildUnixSocketClient build http client for Unix socket +func buildUnixSocketClient(socketAddr string, timeout time.Duration) (*http.Client, error) { transport := &http.Transport{ DisableKeepAlives: true, Dial: func(proto, addr string) (conn net.Conn, err error) { @@ -61,8 +60,8 @@ func BuildUnixSocketClient(socketAddr string, timeout time.Duration) (*http.Clie return client, nil } -func (km *KataMonitor) doGet(sandboxID, namespace string, timeoutInSeconds time.Duration, urlPath string) ([]byte, error) { - client, err := km.buildShimClient(sandboxID, namespace, timeoutInSeconds) +func doGet(sandboxID string, timeoutInSeconds time.Duration, urlPath string) ([]byte, error) { + client, err := BuildShimClient(sandboxID, timeoutInSeconds) if err != nil { return nil, err }