From c49c344c2a6abdbfdd2b0b693e34cf67e4eda30f Mon Sep 17 00:00:00 2001 From: RoyUP9 <87927115+RoyUP9@users.noreply.github.com> Date: Sun, 9 Jan 2022 10:50:58 +0200 Subject: [PATCH 1/2] Added kubernetes provider singleton (#599) --- agent/pkg/controllers/config_controller.go | 4 ++-- agent/pkg/providers/kubernetes_provider.go | 27 ++++++++++++++++++++++ 2 files changed, 29 insertions(+), 2 deletions(-) create mode 100644 agent/pkg/providers/kubernetes_provider.go diff --git a/agent/pkg/controllers/config_controller.go b/agent/pkg/controllers/config_controller.go index 7adc5a761..cecc94e08 100644 --- a/agent/pkg/controllers/config_controller.go +++ b/agent/pkg/controllers/config_controller.go @@ -45,7 +45,7 @@ func PostTapConfig(c *gin.Context) { podRegex, _ := regexp.Compile(".*") - kubernetesProvider, err := kubernetes.NewProviderInCluster() + kubernetesProvider, err := providers.GetKubernetesProvider() if err != nil { c.JSON(http.StatusInternalServerError, err) return @@ -66,7 +66,7 @@ func PostTapConfig(c *gin.Context) { } func GetTapConfig(c *gin.Context) { - kubernetesProvider, err := kubernetes.NewProviderInCluster() + kubernetesProvider, err := providers.GetKubernetesProvider() if err != nil { c.JSON(http.StatusInternalServerError, err) return diff --git a/agent/pkg/providers/kubernetes_provider.go b/agent/pkg/providers/kubernetes_provider.go new file mode 100644 index 000000000..d485279d4 --- /dev/null +++ b/agent/pkg/providers/kubernetes_provider.go @@ -0,0 +1,27 @@ +package providers + +import ( + "github.com/up9inc/mizu/shared/kubernetes" + "sync" +) + +var lock = &sync.Mutex{} + +var kubernetesProvider *kubernetes.Provider + +func GetKubernetesProvider() (*kubernetes.Provider, error) { + if kubernetesProvider == nil { + lock.Lock() + defer lock.Unlock() + + if kubernetesProvider == nil { + var err error + kubernetesProvider, err = kubernetes.NewProviderInCluster() + if err != nil { + return nil, err + } + } + } + + return kubernetesProvider, nil +} From 5a044875d3947cb360563e61919193eacddd5549 Mon Sep 17 00:00:00 2001 From: Nimrod Gilboa Markevich <59927337+nimrod-up9@users.noreply.github.com> Date: Sun, 9 Jan 2022 13:21:14 +0200 Subject: [PATCH 2/2] Rename Istio to service mesh (#605) - Rename --istio flag to the more general --service-mesh - Rename internal variables, consts and structures to reflect this conceptual change - Update the docs accordingly --- agent/pkg/controllers/config_controller.go | 4 +-- cli/cmd/tap.go | 2 +- cli/cmd/tapRunner.go | 2 +- cli/config/configStructs/tapConfig.go | 4 +-- docs/{ISTIO.md => SERVICE_MESH.md} | 29 ++++++++++++++-------- shared/kubernetes/mizuTapperSyncer.go | 4 +-- shared/kubernetes/provider.go | 8 +++--- tap/passive_tapper.go | 4 +-- 8 files changed, 32 insertions(+), 25 deletions(-) rename docs/{ISTIO.md => SERVICE_MESH.md} (82%) diff --git a/agent/pkg/controllers/config_controller.go b/agent/pkg/controllers/config_controller.go index cecc94e08..99a097d21 100644 --- a/agent/pkg/controllers/config_controller.go +++ b/agent/pkg/controllers/config_controller.go @@ -94,7 +94,7 @@ func GetTapConfig(c *gin.Context) { c.JSON(http.StatusOK, tapConfig) } -func startMizuTapperSyncer(ctx context.Context, provider *kubernetes.Provider, targetNamespaces []string, podFilterRegex regexp.Regexp, ignoredUserAgents []string, mizuApiFilteringOptions tapApi.TrafficFilteringOptions, istio bool) (*kubernetes.MizuTapperSyncer, error) { +func startMizuTapperSyncer(ctx context.Context, provider *kubernetes.Provider, targetNamespaces []string, podFilterRegex regexp.Regexp, ignoredUserAgents []string, mizuApiFilteringOptions tapApi.TrafficFilteringOptions, serviceMesh bool) (*kubernetes.MizuTapperSyncer, error) { tapperSyncer, err := kubernetes.CreateAndStartMizuTapperSyncer(ctx, provider, kubernetes.TapperSyncerConfig{ TargetNamespaces: targetNamespaces, PodFilterRegex: podFilterRegex, @@ -106,7 +106,7 @@ func startMizuTapperSyncer(ctx context.Context, provider *kubernetes.Provider, t IgnoredUserAgents: ignoredUserAgents, MizuApiFilteringOptions: mizuApiFilteringOptions, MizuServiceAccountExists: true, //assume service account exists since install mode will not function without it anyway - Istio: istio, + ServiceMesh: serviceMesh, }, time.Now()) if err != nil { diff --git a/cli/cmd/tap.go b/cli/cmd/tap.go index ebaa2befd..127acda89 100644 --- a/cli/cmd/tap.go +++ b/cli/cmd/tap.go @@ -119,5 +119,5 @@ func init() { tapCmd.Flags().StringP(configStructs.WorkspaceTapName, "w", defaultTapConfig.Workspace, "Uploads traffic to your UP9 workspace for further analysis (requires auth)") tapCmd.Flags().String(configStructs.EnforcePolicyFile, defaultTapConfig.EnforcePolicyFile, "Yaml file path with policy rules") tapCmd.Flags().String(configStructs.ContractFile, defaultTapConfig.ContractFile, "OAS/Swagger file to validate to monitor the contracts") - tapCmd.Flags().Bool(configStructs.IstioName, defaultTapConfig.Istio, "Record decrypted traffic if the cluster configured with istio and mtls") + tapCmd.Flags().Bool(configStructs.ServiceMeshName, defaultTapConfig.ServiceMesh, "Record decrypted traffic if the cluster is configured with a service mesh and with mtls") } diff --git a/cli/cmd/tapRunner.go b/cli/cmd/tapRunner.go index d23a775ea..6287403fd 100644 --- a/cli/cmd/tapRunner.go +++ b/cli/cmd/tapRunner.go @@ -192,7 +192,7 @@ func startTapperSyncer(ctx context.Context, cancel context.CancelFunc, provider IgnoredUserAgents: config.Config.Tap.IgnoredUserAgents, MizuApiFilteringOptions: mizuApiFilteringOptions, MizuServiceAccountExists: state.mizuServiceAccountExists, - Istio: config.Config.Tap.Istio, + ServiceMesh: config.Config.Tap.ServiceMesh, }, startTime) if err != nil { diff --git a/cli/config/configStructs/tapConfig.go b/cli/config/configStructs/tapConfig.go index bd90ca262..6ad3b6a21 100644 --- a/cli/config/configStructs/tapConfig.go +++ b/cli/config/configStructs/tapConfig.go @@ -22,7 +22,7 @@ const ( WorkspaceTapName = "workspace" EnforcePolicyFile = "traffic-validation-file" ContractFile = "contract" - IstioName = "istio" + ServiceMeshName = "service-mesh" ) type TapConfig struct { @@ -44,7 +44,7 @@ type TapConfig struct { AskUploadConfirmation bool `yaml:"ask-upload-confirmation" default:"true"` ApiServerResources shared.Resources `yaml:"api-server-resources"` TapperResources shared.Resources `yaml:"tapper-resources"` - Istio bool `yaml:"istio" default:"false"` + ServiceMesh bool `yaml:"service-mesh" default:"false"` } func (config *TapConfig) PodRegex() *regexp.Regexp { diff --git a/docs/ISTIO.md b/docs/SERVICE_MESH.md similarity index 82% rename from docs/ISTIO.md rename to docs/SERVICE_MESH.md index bda897b65..d3a60107d 100644 --- a/docs/ISTIO.md +++ b/docs/SERVICE_MESH.md @@ -1,37 +1,44 @@ ![Mizu: The API Traffic Viewer for Kubernetes](../assets/mizu-logo.svg) -# Istio mutual tls (mtls) with Mizu +# Service mesh mutual tls (mtls) with Mizu This document describe how Mizu tapper handles workloads configured with mtls, making the internal traffic between services in a cluster to be encrypted. -Besides Istio there are other service meshes that implement mtls. However, as of now Istio is the most used one, and this is why we are focusing on it. +The list of service meshes supported by Mizu include: -In order to create an Istio setup for development, follow those steps: +- Istio +- Linkerd + +In order to create a service mesh setup for development, follow those steps: 1. Deploy a sample application to a Kubernetes cluster, the sample application needs to make internal service to service calls 2. SSH to one of the nodes, and run `tcpdump` 3. Make sure you see the internal service to service calls in a plain text -4. Deploy Istio to the cluster - make sure it is attached to all pods of the sample application, and that it is configured with mtls (default) +4. Deploy a service mesh (Istio, Linkerd) to the cluster - make sure it is attached to all pods of the sample application, and that it is configured with mtls (default) 5. Run `tcpdump` again, make sure you don't see the internal service to service calls in a plain text -## The connection between Istio and Envoy -In order to implement its service mesh capabilities, [Istio](https://istio.io) use an [Envoy](https://www.envoyproxy.io) sidecar in front of every pod in the cluster. The Envoy is responsible for the mtls communication, and that's why we are focusing on Envoy proxy. +## Implementation + +### Istio support + +#### The connection between Istio and Envoy +In order to implement its service mesh capabilities, [Istio](https://istio.io) uses an [Envoy](https://www.envoyproxy.io) sidecar in front of every pod in the cluster. The Envoy is responsible for the mtls communication, and that's why we are focusing on Envoy proxy. In the future we might see more players in that field, then we'll have to either add support for each of them or go with a unified eBPF solution. -## Network namespaces +#### Network namespaces A [linux network namespace](https://man7.org/linux/man-pages/man7/network_namespaces.7.html) is an isolation that limit the process view of the network. In the container world it used to isolate one container from another. In the Kubernetes world it used to isolate a pod from another. That means that two containers running on the same pod share the same network namespace. A container can reach a container in the same pod by accessing `localhost`. An Envoy proxy configured with mtls receives the inbound traffic directed to the pod, decrypts it and sends it via `localhost` to the target container. -## Tapping mtls traffic +#### Tapping mtls traffic In order for Mizu to be able to see the decrypted traffic it needs to listen on the same network namespace of the target pod. Multiple threads of the same process can have different network namespaces. [gopacket](https://github.com/google/gopacket) uses [libpacp](https://github.com/the-tcpdump-group/libpcap) by default for capturing the traffic. Libpacap doesn't support network namespaces and we can't ask it to listen to traffic on a different namespace. However, we can change the network namespace of the calling thread and then start libpcap to see the traffic on a different namespace. -## Finding the network namespace of a running process +#### Finding the network namespace of a running process The network namespace of a running process can be found in `/proc/PID/ns/net` link. Once we have this link, we can ask Linux to change the network namespace of a thread to this one. This mean that Mizu needs to have access to the `/proc` (procfs) of the running node. -## Finding the network namespace of a running pod +#### Finding the network namespace of a running pod In order for Mizu to be able to listen to mtls traffic, it needs to get the PIDs of the the running pods, filter them according to the user filters and then start listen to their internal network namespace traffic. There is no official way in Kubernetes to get from pod to PID. The CRI implementation purposefully doesn't force a pod to be a processes on the host. It can be a Virtual Machine as well like [Kata containers](https://katacontainers.io) @@ -42,5 +49,5 @@ Once Mizu detects an Envoy process, it need to check whether this specific Envoy Istio sends an `INSTANCE_IP` environment variable to every Envoy proxy process. By examining the Envoy process's environment variables we can see whether it's relevant or not. Examining a process environment variables is done by reading the `/proc/PID/envion` file. -## Edge cases +#### Edge cases The method we use to find Envoy processes and correlate them to the cluster IPs may be inaccurate in certain situations. If, for example, a user runs an Envoy process manually, and set its `INSTANCE_IP` environment variable to one of the `CLUSTER_IPS` the tapper gets, then Mizu will capture traffic for it. diff --git a/shared/kubernetes/mizuTapperSyncer.go b/shared/kubernetes/mizuTapperSyncer.go index 8b79540a9..3d7bfbe6e 100644 --- a/shared/kubernetes/mizuTapperSyncer.go +++ b/shared/kubernetes/mizuTapperSyncer.go @@ -44,7 +44,7 @@ type TapperSyncerConfig struct { IgnoredUserAgents []string MizuApiFilteringOptions api.TrafficFilteringOptions MizuServiceAccountExists bool - Istio bool + ServiceMesh bool } func CreateAndStartMizuTapperSyncer(ctx context.Context, kubernetesProvider *Provider, config TapperSyncerConfig, startTime time.Time) (*MizuTapperSyncer, error) { @@ -316,7 +316,7 @@ func (tapperSyncer *MizuTapperSyncer) updateMizuTappers() error { tapperSyncer.config.ImagePullPolicy, tapperSyncer.config.MizuApiFilteringOptions, tapperSyncer.config.LogLevel, - tapperSyncer.config.Istio, + tapperSyncer.config.ServiceMesh, ); err != nil { return err } diff --git a/shared/kubernetes/provider.go b/shared/kubernetes/provider.go index f5694f7c9..ffdcc6e4f 100644 --- a/shared/kubernetes/provider.go +++ b/shared/kubernetes/provider.go @@ -720,7 +720,7 @@ func (provider *Provider) CreateConfigMap(ctx context.Context, namespace string, return nil } -func (provider *Provider) ApplyMizuTapperDaemonSet(ctx context.Context, namespace string, daemonSetName string, podImage string, tapperPodName string, apiServerPodIp string, nodeToTappedPodMap map[string][]core.Pod, serviceAccountName string, resources shared.Resources, imagePullPolicy core.PullPolicy, mizuApiFilteringOptions api.TrafficFilteringOptions, logLevel logging.Level, istio bool) error { +func (provider *Provider) ApplyMizuTapperDaemonSet(ctx context.Context, namespace string, daemonSetName string, podImage string, tapperPodName string, apiServerPodIp string, nodeToTappedPodMap map[string][]core.Pod, serviceAccountName string, resources shared.Resources, imagePullPolicy core.PullPolicy, mizuApiFilteringOptions api.TrafficFilteringOptions, logLevel logging.Level, serviceMesh bool) error { logger.Log.Debugf("Applying %d tapper daemon sets, ns: %s, daemonSetName: %s, podImage: %s, tapperPodName: %s", len(nodeToTappedPodMap), namespace, daemonSetName, podImage, tapperPodName) if len(nodeToTappedPodMap) == 0 { @@ -745,8 +745,8 @@ func (provider *Provider) ApplyMizuTapperDaemonSet(ctx context.Context, namespac "--nodefrag", } - if istio { - mizuCmd = append(mizuCmd, "--procfs", procfsMountPath, "--istio") + if serviceMesh { + mizuCmd = append(mizuCmd, "--procfs", procfsMountPath, "--servicemesh") } agentContainer := applyconfcore.Container() @@ -756,7 +756,7 @@ func (provider *Provider) ApplyMizuTapperDaemonSet(ctx context.Context, namespac caps := applyconfcore.Capabilities().WithDrop("ALL").WithAdd("NET_RAW").WithAdd("NET_ADMIN") - if istio { + if serviceMesh { caps = caps.WithAdd("SYS_ADMIN") // for reading /proc/PID/net/ns caps = caps.WithAdd("SYS_PTRACE") // for setting netns to other process caps = caps.WithAdd("DAC_OVERRIDE") // for reading /proc/PID/environ diff --git a/tap/passive_tapper.go b/tap/passive_tapper.go index fcf0a9eda..2a0a142ce 100644 --- a/tap/passive_tapper.go +++ b/tap/passive_tapper.go @@ -52,7 +52,7 @@ var tstype = flag.String("timestamp_type", "", "Type of timestamps to use") var promisc = flag.Bool("promisc", true, "Set promiscuous mode") var staleTimeoutSeconds = flag.Int("staletimout", 120, "Max time in seconds to keep connections which don't transmit data") var pids = flag.String("pids", "", "A comma separated list of PIDs to capture their network namespaces") -var istio = flag.Bool("istio", false, "Record decrypted traffic if the cluster configured with istio and mtls") +var servicemesh = flag.Bool("servicemesh", false, "Record decrypted traffic if the cluster is configured with a service mesh and with mtls") var memprofile = flag.String("memprofile", "", "Write memory profile") @@ -179,7 +179,7 @@ func initializePacketSources() error { } var err error - if packetSourceManager, err = source.NewPacketSourceManager(*procfs, *pids, *fname, *iface, *istio, tapTargets, behaviour); err != nil { + if packetSourceManager, err = source.NewPacketSourceManager(*procfs, *pids, *fname, *iface, *servicemesh, tapTargets, behaviour); err != nil { return err } else { packetSourceManager.ReadPackets(!*nodefrag, mainPacketInputChan)