TRA-3257 Dynamic tappers (#57)

* Defer cleanup.

* Split createMizuResources into two functions.

* Re-create daemonset when changes to tapped pods occur.

* Reordered imports.

* Use Printf instead of Println.

* Workaround for variable scope.

* WIP Apply daemonset instead of create.

* Whitespaces.

* Fixed: Using the right types for Apply.

* Fixed missing pod IP by adding a delay.

* Debounce pod restart.

* Proper field manager name.
This commit is contained in:
nimrod-up9 2021-05-26 17:25:12 +03:00 committed by GitHub
parent 74e9d44b96
commit 620f046a26
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 204 additions and 112 deletions

View File

@ -3,14 +3,24 @@ package cmd
import ( import (
"context" "context"
"fmt" "fmt"
"github.com/up9inc/mizu/cli/kubernetes"
core "k8s.io/api/core/v1"
"github.com/up9inc/mizu/cli/mizu"
"os" "os"
"os/signal" "os/signal"
"regexp" "regexp"
"syscall" "syscall"
"time" "time"
core "k8s.io/api/core/v1"
"github.com/up9inc/mizu/cli/debounce"
"github.com/up9inc/mizu/cli/kubernetes"
"github.com/up9inc/mizu/cli/mizu"
)
var mizuServiceAccountExists bool
var aggregatorService *core.Service
const (
updateTappersDelay = 5 * time.Second
) )
var currentlyTappedPods []core.Pod var currentlyTappedPods []core.Pod
@ -18,59 +28,87 @@ var currentlyTappedPods []core.Pod
func RunMizuTap(podRegexQuery *regexp.Regexp, tappingOptions *MizuTapOptions) { func RunMizuTap(podRegexQuery *regexp.Regexp, tappingOptions *MizuTapOptions) {
kubernetesProvider := kubernetes.NewProvider(tappingOptions.KubeConfigPath, tappingOptions.Namespace) kubernetesProvider := kubernetes.NewProvider(tappingOptions.KubeConfigPath, tappingOptions.Namespace)
defer cleanUpMizuResources(kubernetesProvider)
ctx, cancel := context.WithCancel(context.Background()) ctx, cancel := context.WithCancel(context.Background())
defer cancel() // cancel will be called when this function exits defer cancel() // cancel will be called when this function exits
matchingPods, err := kubernetesProvider.GetAllPodsMatchingRegex(ctx, podRegexQuery) if matchingPods, err := kubernetesProvider.GetAllPodsMatchingRegex(ctx, podRegexQuery); err != nil {
if err != nil {
fmt.Printf("Error getting pods to tap %v\n", err)
return return
} else {
currentlyTappedPods = matchingPods
} }
currentlyTappedPods = matchingPods
nodeToTappedPodIPMap, err := getNodeHostToTappedPodIpsMap(ctx, kubernetesProvider, matchingPods) nodeToTappedPodIPMap, err := getNodeHostToTappedPodIpsMap(ctx, kubernetesProvider, currentlyTappedPods)
if err != nil { if err != nil {
cleanUpMizuResources(kubernetesProvider)
return
}
err = createMizuResources(ctx, kubernetesProvider, nodeToTappedPodIPMap, tappingOptions)
if err != nil {
cleanUpMizuResources(kubernetesProvider)
return return
} }
go portForwardApiPod(ctx, kubernetesProvider, cancel, tappingOptions) //TODO convert this to job for built in pod ttl or have the running app handle this if err := createMizuResources(ctx, kubernetesProvider, nodeToTappedPodIPMap, tappingOptions); err != nil {
return
}
go portForwardApiPod(ctx, kubernetesProvider, cancel, tappingOptions) // TODO convert this to job for built in pod ttl or have the running app handle this
go watchPodsForTapping(ctx, kubernetesProvider, cancel, podRegexQuery, tappingOptions)
go syncApiStatus(ctx, cancel, tappingOptions) go syncApiStatus(ctx, cancel, tappingOptions)
waitForFinish(ctx, cancel) //block until exit signal or error
//block until exit signal or error
waitForFinish(ctx, cancel)
// TODO handle incoming traffic from tapper using a channel // TODO handle incoming traffic from tapper using a channel
//cleanup
fmt.Printf("\nRemoving mizu resources\n")
cleanUpMizuResources(kubernetesProvider)
} }
func createMizuResources(ctx context.Context, kubernetesProvider *kubernetes.Provider, nodeToTappedPodIPMap map[string][]string, tappingOptions *MizuTapOptions) error { func createMizuResources(ctx context.Context, kubernetesProvider *kubernetes.Provider, nodeToTappedPodIPMap map[string][]string, tappingOptions *MizuTapOptions) error {
mizuServiceAccountExists := createRBACIfNecessary(ctx, kubernetesProvider) if err := createMizuAggregator(ctx, kubernetesProvider, tappingOptions); err != nil {
_, err := kubernetesProvider.CreateMizuAggregatorPod(ctx, mizu.ResourcesNamespace, mizu.AggregatorPodName, tappingOptions.MizuImage, mizuServiceAccountExists) return err
}
if err := createMizuTappers(ctx, kubernetesProvider, nodeToTappedPodIPMap, tappingOptions); err != nil {
return err
}
return nil
}
func createMizuAggregator(ctx context.Context, kubernetesProvider *kubernetes.Provider, tappingOptions *MizuTapOptions) error {
var err error
mizuServiceAccountExists = createRBACIfNecessary(ctx, kubernetesProvider)
_, err = kubernetesProvider.CreateMizuAggregatorPod(ctx, mizu.ResourcesNamespace, mizu.AggregatorPodName, tappingOptions.MizuImage, mizuServiceAccountExists)
if err != nil { if err != nil {
fmt.Printf("Error creating mizu collector pod: %v\n", err) fmt.Printf("Error creating mizu collector pod: %v\n", err)
return err return err
} }
aggregatorService, err := kubernetesProvider.CreateService(ctx, mizu.ResourcesNamespace, mizu.AggregatorPodName, mizu.AggregatorPodName)
aggregatorService, err = kubernetesProvider.CreateService(ctx, mizu.ResourcesNamespace, mizu.AggregatorPodName, mizu.AggregatorPodName)
if err != nil { if err != nil {
fmt.Printf("Error creating mizu collector service: %v\n", err) fmt.Printf("Error creating mizu collector service: %v\n", err)
return err return err
} }
err = kubernetesProvider.CreateMizuTapperDaemonSet(ctx, mizu.ResourcesNamespace, mizu.TapperDaemonSetName, tappingOptions.MizuImage, mizu.TapperPodName, fmt.Sprintf("%s.%s.svc.cluster.local", aggregatorService.Name, aggregatorService.Namespace), nodeToTappedPodIPMap, mizuServiceAccountExists)
if err != nil { return nil
}
func createMizuTappers(ctx context.Context, kubernetesProvider *kubernetes.Provider, nodeToTappedPodIPMap map[string][]string, tappingOptions *MizuTapOptions) error {
if err := kubernetesProvider.ApplyMizuTapperDaemonSet(
ctx,
mizu.ResourcesNamespace,
mizu.TapperDaemonSetName,
tappingOptions.MizuImage,
mizu.TapperPodName,
fmt.Sprintf("%s.%s.svc.cluster.local", aggregatorService.Name, aggregatorService.Namespace),
nodeToTappedPodIPMap,
mizuServiceAccountExists,
); err != nil {
fmt.Printf("Error creating mizu tapper daemonset: %v\n", err) fmt.Printf("Error creating mizu tapper daemonset: %v\n", err)
return err return err
} }
return nil return nil
} }
func cleanUpMizuResources(kubernetesProvider *kubernetes.Provider) { func cleanUpMizuResources(kubernetesProvider *kubernetes.Provider) {
fmt.Printf("\nRemoving mizu resources\n")
removalCtx, _ := context.WithTimeout(context.Background(), 5 * time.Second) removalCtx, _ := context.WithTimeout(context.Background(), 5 * time.Second)
if err := kubernetesProvider.RemovePod(removalCtx, mizu.ResourcesNamespace, mizu.AggregatorPodName); err != nil { if err := kubernetesProvider.RemovePod(removalCtx, mizu.ResourcesNamespace, mizu.AggregatorPodName); err != nil {
fmt.Printf("Error removing Pod %s in namespace %s: %s (%v,%+v)\n", mizu.AggregatorPodName, mizu.ResourcesNamespace, err, err, err); fmt.Printf("Error removing Pod %s in namespace %s: %s (%v,%+v)\n", mizu.AggregatorPodName, mizu.ResourcesNamespace, err, err, err);
@ -83,28 +121,59 @@ func cleanUpMizuResources(kubernetesProvider *kubernetes.Provider) {
} }
} }
// will be relevant in the future func watchPodsForTapping(ctx context.Context, kubernetesProvider *kubernetes.Provider, cancel context.CancelFunc, podRegex *regexp.Regexp, tappingOptions *MizuTapOptions) {
//func watchPodsForTapping(ctx context.Context, kubernetesProvider *kubernetes.Provider, cancel context.CancelFunc, podRegex *regexp.Regexp) { added, modified, removed, errorChan := kubernetes.FilteredWatch(ctx, kubernetesProvider.GetPodWatcher(ctx, kubernetesProvider.Namespace), podRegex)
// added, modified, removed, errorChan := kubernetes.FilteredWatch(ctx, kubernetesProvider.GetPodWatcher(ctx, kubernetesProvider.Namespace), podRegex)
// for { restartTappers := func() {
// select { if matchingPods, err := kubernetesProvider.GetAllPodsMatchingRegex(ctx, podRegex); err != nil {
// case newTarget := <- added: fmt.Printf("Error getting pods by regex: %s (%v,%+v)\n", err, err, err)
// fmt.Printf("+%s\n", newTarget.Name) cancel()
// } else {
// case removedTarget := <- removed: currentlyTappedPods = matchingPods
// fmt.Printf("-%s\n", removedTarget.Name) }
//
// case <- modified: nodeToTappedPodIPMap, err := getNodeHostToTappedPodIpsMap(ctx, kubernetesProvider, currentlyTappedPods)
// continue if err != nil {
// fmt.Printf("Error building node to ips map: %s (%v,%+v)\n", err, err, err)
// case <- errorChan: cancel()
// cancel() }
//
// case <- ctx.Done(): if err := createMizuTappers(ctx, kubernetesProvider, nodeToTappedPodIPMap, tappingOptions); err != nil {
// return fmt.Printf("Error updating daemonset: %s (%v,%+v)\n", err, err, err)
// } cancel()
// } }
//} }
restartTappersDebouncer := debounce.NewDebouncer(updateTappersDelay, restartTappers)
for {
select {
case newTarget := <- added:
fmt.Printf("+%s\n", newTarget.Name)
case removedTarget := <- removed:
fmt.Printf("-%s\n", removedTarget.Name)
restartTappersDebouncer.SetOn()
case modifiedTarget := <- modified:
// Act only if the modified pod has already obtained an IP address.
// After filtering for IPs, on a normal pod restart this includes the following events:
// - Pod deletion
// - Pod reaches start state
// - Pod reaches ready state
// Ready/unready transitions might also trigger this event.
if modifiedTarget.Status.PodIP != "" {
restartTappersDebouncer.SetOn()
}
case <- errorChan:
// TODO: Does this also perform cleanup?
cancel()
case <- ctx.Done():
return
}
}
}
func portForwardApiPod(ctx context.Context, kubernetesProvider *kubernetes.Provider, cancel context.CancelFunc, tappingOptions *MizuTapOptions) { func portForwardApiPod(ctx context.Context, kubernetesProvider *kubernetes.Provider, cancel context.CancelFunc, tappingOptions *MizuTapOptions) {
podExactRegex := regexp.MustCompile(fmt.Sprintf("^%s$", mizu.AggregatorPodName)) podExactRegex := regexp.MustCompile(fmt.Sprintf("^%s$", mizu.AggregatorPodName))

42
cli/debounce/debounce.go Normal file
View File

@ -0,0 +1,42 @@
package debounce
import (
"time"
)
func NewDebouncer(timeout time.Duration, callback func()) *Debouncer {
var debouncer Debouncer
debouncer.setTimeout(timeout)
debouncer.setCallback(callback)
return &debouncer
}
type Debouncer struct {
callback func()
running bool
timeout time.Duration
timer *time.Timer
}
func (d *Debouncer) setTimeout(timeout time.Duration) {
// TODO: Return err if d.running
d.timeout = timeout
}
func (d *Debouncer) setCallback(callback func()) {
callbackWrapped := func() {
callback()
d.running = false
}
d.callback = callbackWrapped
}
func (d *Debouncer) SetOn() {
if d.running == true {
return
}
d.running = true
d.timer = time.AfterFunc(d.timeout, d.callback)
}

View File

@ -6,7 +6,13 @@ import (
"encoding/json" "encoding/json"
"errors" "errors"
"fmt" "fmt"
apps "k8s.io/api/apps/v1"
"path/filepath"
"regexp"
applyconfapp "k8s.io/client-go/applyconfigurations/apps/v1"
applyconfmeta "k8s.io/client-go/applyconfigurations/meta/v1"
applyconfcore "k8s.io/client-go/applyconfigurations/core/v1"
core "k8s.io/api/core/v1" core "k8s.io/api/core/v1"
rbac "k8s.io/api/rbac/v1" rbac "k8s.io/api/rbac/v1"
k8serrors "k8s.io/apimachinery/pkg/api/errors" k8serrors "k8s.io/apimachinery/pkg/api/errors"
@ -22,8 +28,6 @@ import (
"k8s.io/client-go/tools/clientcmd" "k8s.io/client-go/tools/clientcmd"
_ "k8s.io/client-go/tools/portforward" _ "k8s.io/client-go/tools/portforward"
"k8s.io/client-go/util/homedir" "k8s.io/client-go/util/homedir"
"path/filepath"
"regexp"
) )
type Provider struct { type Provider struct {
@ -34,7 +38,8 @@ type Provider struct {
} }
const ( const (
serviceAccountName = "mizu-service-account" serviceAccountName = "mizu-service-account"
fieldManagerName = "mizu-manager"
) )
func NewProvider(kubeConfigPath string, overrideNamespace string) *Provider { func NewProvider(kubeConfigPath string, overrideNamespace string) *Provider {
@ -104,6 +109,7 @@ func (provider *Provider) CreateMizuAggregatorPod(ctx context.Context, namespace
}, },
DNSPolicy: "ClusterFirstWithHostNet", DNSPolicy: "ClusterFirstWithHostNet",
TerminationGracePeriodSeconds: new(int64), TerminationGracePeriodSeconds: new(int64),
// Affinity: TODO: define node selector for all relevant nodes for this mizu instance
}, },
} }
//define the service account only when it exists to prevent pod crash //define the service account only when it exists to prevent pod crash
@ -212,75 +218,50 @@ func (provider *Provider) RemoveDaemonSet(ctx context.Context, namespace string,
return provider.clientSet.AppsV1().DaemonSets(namespace).Delete(ctx, daemonSetName, metav1.DeleteOptions{}) return provider.clientSet.AppsV1().DaemonSets(namespace).Delete(ctx, daemonSetName, metav1.DeleteOptions{})
} }
func (provider *Provider) CreateMizuTapperDaemonSet(ctx context.Context, namespace string, daemonSetName string, podImage string, tapperPodName string, aggregatorPodIp string, nodeToTappedPodIPMap map[string][]string, linkServiceAccount bool) error { func (provider *Provider) ApplyMizuTapperDaemonSet(ctx context.Context, namespace string, daemonSetName string, podImage string, tapperPodName string, aggregatorPodIp string, nodeToTappedPodIPMap map[string][]string, linkServiceAccount bool) error {
nodeToTappedPodIPMapJsonStr, err := json.Marshal(nodeToTappedPodIPMap) nodeToTappedPodIPMapJsonStr, err := json.Marshal(nodeToTappedPodIPMap)
if err != nil { if err != nil {
return err return err
} }
privileged := true privileged := true
podTemplate := core.PodTemplateSpec{ agentContainer := applyconfcore.Container()
ObjectMeta: metav1.ObjectMeta{ agentContainer.WithName(tapperPodName)
Labels: map[string]string{"app": tapperPodName}, agentContainer.WithImage(podImage)
}, agentContainer.WithImagePullPolicy(core.PullAlways)
Spec: core.PodSpec{ agentContainer.WithSecurityContext(applyconfcore.SecurityContext().WithPrivileged(privileged))
HostNetwork: true, // very important to make passive tapper see traffic agentContainer.WithCommand("./mizuagent", "-i", "any", "--tap", "--hardump", "--aggregator-address", fmt.Sprintf("ws://%s/wsTapper", aggregatorPodIp))
Containers: []core.Container{ agentContainer.WithEnv(
{ applyconfcore.EnvVar().WithName("HOST_MODE").WithValue("1"),
Name: tapperPodName, applyconfcore.EnvVar().WithName("AGGREGATOR_ADDRESS").WithValue(aggregatorPodIp),
Image: podImage, applyconfcore.EnvVar().WithName("TAPPED_ADDRESSES_PER_HOST").WithValue(string(nodeToTappedPodIPMapJsonStr)),
ImagePullPolicy: core.PullAlways, )
SecurityContext: &core.SecurityContext{ agentContainer.WithEnv(
Privileged: &privileged, // must be privileged to get node level traffic applyconfcore.EnvVar().WithName("NODE_NAME").WithValueFrom(
}, applyconfcore.EnvVarSource().WithFieldRef(
Command: []string {"./mizuagent", "-i", "any", "--tap", "--hardump", "--aggregator-address", fmt.Sprintf("ws://%s/wsTapper", aggregatorPodIp)}, applyconfcore.ObjectFieldSelector().WithAPIVersion("v1").WithFieldPath("spec.nodeName"),
Env: []core.EnvVar{ ),
{ ),
Name: "HOST_MODE", )
Value: "1",
}, podSpec := applyconfcore.PodSpec().WithHostNetwork(true).WithDNSPolicy("ClusterFirstWithHostNet").WithTerminationGracePeriodSeconds(0)
{
Name: "AGGREGATOR_ADDRESS",
Value: aggregatorPodIp,
},
{
Name: "TAPPED_ADDRESSES_PER_HOST",
Value: string(nodeToTappedPodIPMapJsonStr),
},
{
Name: "NODE_NAME",
ValueFrom: &core.EnvVarSource{
FieldRef: &core.ObjectFieldSelector {
APIVersion: "v1",
FieldPath: "spec.nodeName",
},
},
},
},
},
},
DNSPolicy: "ClusterFirstWithHostNet",
TerminationGracePeriodSeconds: new(int64),
// Affinity: TODO: define node selector for all relevant nodes for this mizu instance
},
}
if linkServiceAccount { if linkServiceAccount {
podTemplate.Spec.ServiceAccountName = serviceAccountName podSpec.WithServiceAccountName(serviceAccountName)
} }
labelSelector := metav1.LabelSelector{ podSpec.WithContainers(agentContainer)
MatchLabels: map[string]string{"app": tapperPodName},
}
daemonSet := apps.DaemonSet{ podTemplate := applyconfcore.PodTemplateSpec()
ObjectMeta: metav1.ObjectMeta{ podTemplate.WithLabels(map[string]string{"app": tapperPodName})
Name: daemonSetName, podTemplate.WithSpec(podSpec)
Namespace: namespace,
}, labelSelector := applyconfmeta.LabelSelector()
Spec: apps.DaemonSetSpec{ labelSelector.WithMatchLabels(map[string]string{"app": tapperPodName})
Selector: &labelSelector,
Template: podTemplate, daemonSet := applyconfapp.DaemonSet(daemonSetName, namespace)
}, daemonSet.WithSpec(applyconfapp.DaemonSetSpec().WithSelector(labelSelector).WithTemplate(podTemplate))
}
_, err = provider.clientSet.AppsV1().DaemonSets(namespace).Create(ctx, &daemonSet, metav1.CreateOptions{}) _, err = provider.clientSet.AppsV1().DaemonSets(namespace).Apply(ctx, daemonSet, metav1.ApplyOptions{FieldManager: fieldManagerName})
return err return err
} }