diff --git a/api/go.mod b/api/go.mod index 4f4c055c0..dcb1edf86 100644 --- a/api/go.mod +++ b/api/go.mod @@ -4,6 +4,7 @@ go 1.16 require ( github.com/antoniodipinto/ikisocket v0.0.0-20210417133349-f1502512d69a + github.com/beevik/etree v1.1.0 github.com/djherbis/atime v1.0.0 github.com/fasthttp/websocket v1.4.3-beta.1 // indirect github.com/go-playground/locales v0.13.0 diff --git a/api/go.sum b/api/go.sum index bc655181d..efa2d4285 100644 --- a/api/go.sum +++ b/api/go.sum @@ -48,6 +48,8 @@ github.com/antoniodipinto/ikisocket v0.0.0-20210417133349-f1502512d69a h1:76llBl github.com/antoniodipinto/ikisocket v0.0.0-20210417133349-f1502512d69a/go.mod h1:QvDfsDQDmGxUsvEeWabVZ5pp2FMXpOkwQV0L6SE6cp0= github.com/asaskevich/govalidator v0.0.0-20190424111038-f61b66f89f4a/go.mod h1:lB+ZfQJz7igIIfQNfa7Ml4HSf2uFQQRzpGGRXenZAgY= github.com/aws/aws-sdk-go v1.34.28/go.mod h1:H7NKnBqNVzoTJpGfLrQkkD+ytBA93eiDYi/+8rV9s48= +github.com/beevik/etree v1.1.0 h1:T0xke/WvNtMoCqgzPhkX2r4rjY3GDZFi+FjpRZY2Jbs= +github.com/beevik/etree v1.1.0/go.mod h1:r8Aw8JqVegEf0w2fDnATrX9VpkMcyFeM0FhwO62wh+A= github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI= github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI= diff --git a/api/main.go b/api/main.go index 187af6d80..b1bd17a6c 100644 --- a/api/main.go +++ b/api/main.go @@ -23,8 +23,6 @@ var aggregator = flag.Bool("aggregator", false, "Run in aggregator mode with API var standalone = flag.Bool("standalone", false, "Run in standalone tapper and API mode") var aggregatorAddress = flag.String("aggregator-address", "", "Address of mizu collector for tapping") -const nodeNameEnvVar = "NODE_NAME" -const tappedAddressesPerNodeDictEnvVar = "TAPPED_ADDRESSES_PER_HOST" func main() { flag.Parse() @@ -36,7 +34,7 @@ func main() { if *standalone { harOutputChannel := tap.StartPassiveTapper() filteredHarChannel := make(chan *tap.OutputChannelItem) - go filterHarHeaders(harOutputChannel, filteredHarChannel) + go filterHarHeaders(harOutputChannel, filteredHarChannel, getTrafficFilteringOptions()) go api.StartReadingEntries(filteredHarChannel, nil) hostApi(nil) } else if *shouldTap { @@ -55,12 +53,12 @@ func main() { if err != nil { panic(fmt.Sprintf("Error connecting to socket server at %s %v", *aggregatorAddress, err)) } - filteredHarChannel := make(chan *tap.OutputChannelItem) - go filterHarHeaders(harOutputChannel, filteredHarChannel) - go pipeChannelToSocket(socketConnection, filteredHarChannel) + go pipeChannelToSocket(socketConnection, harOutputChannel) } else if *aggregator { socketHarOutChannel := make(chan *tap.OutputChannelItem, 1000) - go api.StartReadingEntries(socketHarOutChannel, nil) + filteredHarChannel := make(chan *tap.OutputChannelItem) + go api.StartReadingEntries(filteredHarChannel, nil) + go filterHarHeaders(socketHarOutChannel, filteredHarChannel, getTrafficFilteringOptions()) hostApi(socketHarOutChannel) } @@ -94,18 +92,32 @@ func hostApi(socketHarOutputChannel chan<- *tap.OutputChannelItem) { func getTapTargets() []string { - nodeName := os.Getenv(nodeNameEnvVar) + nodeName := os.Getenv(shared.NodeNameEnvVar) var tappedAddressesPerNodeDict map[string][]string - err := json.Unmarshal([]byte(os.Getenv(tappedAddressesPerNodeDictEnvVar)), &tappedAddressesPerNodeDict) + err := json.Unmarshal([]byte(os.Getenv(shared.TappedAddressesPerNodeDictEnvVar)), &tappedAddressesPerNodeDict) if err != nil { - panic(fmt.Sprintf("env var value of %s is invalid! must be map[string][]string %v", tappedAddressesPerNodeDict, err)) + panic(fmt.Sprintf("env var %s's value of %s is invalid! must be map[string][]string %v", shared.TappedAddressesPerNodeDictEnvVar, tappedAddressesPerNodeDict, err)) } return tappedAddressesPerNodeDict[nodeName] } -func filterHarHeaders(inChannel <- chan *tap.OutputChannelItem, outChannel chan *tap.OutputChannelItem) { +func getTrafficFilteringOptions() *shared.TrafficFilteringOptions { + filteringOptionsJson := os.Getenv(shared.MizuFilteringOptionsEnvVar) + if filteringOptionsJson == "" { + return nil + } + var filteringOptions shared.TrafficFilteringOptions + err := json.Unmarshal([]byte(filteringOptionsJson), &filteringOptions) + if err != nil { + panic(fmt.Sprintf("env var %s's value of %s is invalid! json must match the shared.TrafficFilteringOptions struct %v", shared.MizuFilteringOptionsEnvVar, filteringOptionsJson, err)) + } + + return &filteringOptions +} + +func filterHarHeaders(inChannel <- chan *tap.OutputChannelItem, outChannel chan *tap.OutputChannelItem, filterOptions *shared.TrafficFilteringOptions) { for message := range inChannel { - sensitiveDataFiltering.FilterSensitiveInfoFromHarRequest(message) + sensitiveDataFiltering.FilterSensitiveInfoFromHarRequest(message, filterOptions) outChannel <- message } } diff --git a/api/pkg/sensitiveDataFiltering/consts.go b/api/pkg/sensitiveDataFiltering/consts.go index 09ce6628c..e5624de73 100644 --- a/api/pkg/sensitiveDataFiltering/consts.go +++ b/api/pkg/sensitiveDataFiltering/consts.go @@ -2,7 +2,7 @@ package sensitiveDataFiltering const maskedFieldPlaceholderValue = "[REDACTED]" -//these values MUST be all lower case +//these values MUST be all lower case and contain no `-` or `_` characters var personallyIdentifiableDataFields = []string{"token", "authorization", "authentication", "cookie", "userid", "password", "username", "user", "key", "passcode", "pass", "auth", "authtoken", "jwt", "bearer", "clientid", "clientsecret", "redirecturi", "phonenumber", diff --git a/api/pkg/sensitiveDataFiltering/messageSensitiveDataCleaner.go b/api/pkg/sensitiveDataFiltering/messageSensitiveDataCleaner.go index 468f01d6a..baa46e3e6 100644 --- a/api/pkg/sensitiveDataFiltering/messageSensitiveDataCleaner.go +++ b/api/pkg/sensitiveDataFiltering/messageSensitiveDataCleaner.go @@ -3,13 +3,16 @@ package sensitiveDataFiltering import ( "encoding/json" "fmt" - "github.com/google/martian/har" "mizuserver/pkg/tap" "net/url" "strings" + + "github.com/beevik/etree" + "github.com/google/martian/har" + "github.com/up9inc/mizu/shared" ) -func FilterSensitiveInfoFromHarRequest(harOutputItem *tap.OutputChannelItem) { +func FilterSensitiveInfoFromHarRequest(harOutputItem *tap.OutputChannelItem, options *shared.TrafficFilteringOptions) { filterHarHeaders(harOutputItem.HarEntry.Request.Headers) filterHarHeaders(harOutputItem.HarEntry.Response.Headers) @@ -24,13 +27,15 @@ func FilterSensitiveInfoFromHarRequest(harOutputItem *tap.OutputChannelItem) { } if harOutputItem.HarEntry.Request.PostData != nil { - filteredRequestBody, err := filterHttpBody([]byte(harOutputItem.HarEntry.Request.PostData.Text)) + requestContentType := getContentTypeHeaderValue(harOutputItem.HarEntry.Request.Headers) + filteredRequestBody, err := filterHttpBody([]byte(harOutputItem.HarEntry.Request.PostData.Text), requestContentType, options) if err == nil { harOutputItem.HarEntry.Request.PostData.Text = string(filteredRequestBody) } } if harOutputItem.HarEntry.Response.Content != nil { - filteredResponseBody, err := filterHttpBody(harOutputItem.HarEntry.Response.Content.Text) + responseContentType := getContentTypeHeaderValue(harOutputItem.HarEntry.Response.Headers) + filteredResponseBody, err := filterHttpBody(harOutputItem.HarEntry.Response.Content.Text, responseContentType, options) if err == nil { harOutputItem.HarEntry.Response.Content.Text = filteredResponseBody } @@ -45,6 +50,15 @@ func filterHarHeaders(headers []har.Header) { } } +func getContentTypeHeaderValue(headers []har.Header) string { + for _, header := range headers { + if strings.ToLower(header.Name) == "content-type" { + return header.Value + } + } + return "" +} + func isFieldNameSensitive(fieldName string) bool { name := strings.ToLower(fieldName) name = strings.ReplaceAll(name, "_", "") @@ -60,7 +74,63 @@ func isFieldNameSensitive(fieldName string) bool { return false } -func filterHttpBody(bytes []byte) ([]byte, error){ +func filterHttpBody(bytes []byte, contentType string, options *shared.TrafficFilteringOptions) ([]byte, error) { + mimeType := strings.Split(contentType, ";")[0] + switch strings.ToLower(mimeType) { + case "application/json": + return filterJsonBody(bytes) + case "text/html": + fallthrough + case "application/xhtml+xml": + fallthrough + case "text/xml": + fallthrough + case "application/xml": + return filterXmlEtree(bytes) + case "text/plain": + if options != nil && options.PlainTextMaskingRegexes != nil { + return filterPlainText(bytes, options), nil + } + } + return bytes, nil +} + +func filterPlainText(bytes []byte, options *shared.TrafficFilteringOptions) []byte { + for _, regex := range options.PlainTextMaskingRegexes { + bytes = regex.ReplaceAll(bytes, []byte(maskedFieldPlaceholderValue)) + } + return bytes +} + +func filterXmlEtree(bytes []byte) ([]byte, error) { + xmlDoc := etree.NewDocument() + err := xmlDoc.ReadFromBytes(bytes) + if err != nil { + return nil, err + } else { + filterXmlElement(xmlDoc.Root()) + } + return xmlDoc.WriteToBytes() +} + +func filterXmlElement(element *etree.Element) { + for i, attribute := range element.Attr { + if isFieldNameSensitive(attribute.Key) { + element.Attr[i].Value = maskedFieldPlaceholderValue + } + } + if element.ChildElements() == nil || len(element.ChildElements()) == 0 { + if isFieldNameSensitive(element.Tag) { + element.SetText(maskedFieldPlaceholderValue) + } + } else { + for _, element := range element.ChildElements() { + filterXmlElement(element) + } + } +} + +func filterJsonBody(bytes []byte) ([]byte, error) { var bodyJsonMap map[string] interface{} err := json.Unmarshal(bytes ,&bodyJsonMap) if err != nil { diff --git a/api/pkg/tap/passive_tapper.go b/api/pkg/tap/passive_tapper.go index b5cacc9e8..72f5c1294 100644 --- a/api/pkg/tap/passive_tapper.go +++ b/api/pkg/tap/passive_tapper.go @@ -13,6 +13,7 @@ import ( "encoding/json" "flag" "fmt" + "github.com/up9inc/mizu/shared" "log" "os" "os/signal" @@ -34,7 +35,6 @@ import ( const AppPortsEnvVar = "APP_PORTS" const OutPortEnvVar = "WEB_SOCKET_PORT" const maxHTTP2DataLenEnvVar = "HTTP2_DATA_SIZE_LIMIT" -const hostModeEnvVar = "HOST_MODE" // default is 1MB, more than the max size accepted by collector and traffic-dumper const maxHTTP2DataLenDefault = 1 * 1024 * 1024 const cleanPeriod = time.Second * 10 @@ -258,7 +258,7 @@ func startPassiveTapper(harWriter *HarWriter) { maxHTTP2DataLen = convertedInt } } - hostMode = os.Getenv(hostModeEnvVar) == "1" + hostMode = os.Getenv(shared.HostModeEnvVar) == "1" fmt.Printf("App Ports: %v\n", appPorts) fmt.Printf("Tap output websocket port: %s\n", tapOutputPort) diff --git a/cli/cmd/tap.go b/cli/cmd/tap.go index c392f99ba..5c795d935 100644 --- a/cli/cmd/tap.go +++ b/cli/cmd/tap.go @@ -10,11 +10,12 @@ import ( ) type MizuTapOptions struct { - GuiPort uint16 - Namespace string - KubeConfigPath string - MizuImage string - MizuPodPort uint16 + GuiPort uint16 + Namespace string + KubeConfigPath string + MizuImage string + MizuPodPort uint16 + PlainTextFilterRegexes []string } @@ -50,4 +51,5 @@ func init() { tapCmd.Flags().StringVarP(&mizuTapOptions.KubeConfigPath, "kube-config", "k", "", "Path to kube-config file") tapCmd.Flags().StringVarP(&mizuTapOptions.MizuImage, "mizu-image", "", fmt.Sprintf("gcr.io/up9-docker-hub/mizu/%s:latest", mizu.Branch), "Custom image for mizu collector") tapCmd.Flags().Uint16VarP(&mizuTapOptions.MizuPodPort, "mizu-port", "", 8899, "Port which mizu cli will attempt to forward from the mizu collector pod") + tapCmd.Flags().StringArrayVarP(&mizuTapOptions.PlainTextFilterRegexes, "regex-masking", "r", nil, "List of regex expressions that are used to filter matching values from text/plain http bodies") } diff --git a/cli/cmd/tapRunner.go b/cli/cmd/tapRunner.go index 040957ecb..1fb81edb4 100644 --- a/cli/cmd/tapRunner.go +++ b/cli/cmd/tapRunner.go @@ -3,6 +3,7 @@ package cmd import ( "context" "fmt" + "github.com/up9inc/mizu/shared" "os" "os/signal" "regexp" @@ -26,6 +27,10 @@ const ( var currentlyTappedPods []core.Pod func RunMizuTap(podRegexQuery *regexp.Regexp, tappingOptions *MizuTapOptions) { + mizuApiFilteringOptions, err := getMizuApiFilteringOptions(tappingOptions) + if err != nil { + return + } kubernetesProvider := kubernetes.NewProvider(tappingOptions.KubeConfigPath, tappingOptions.Namespace) defer cleanUpMizuResources(kubernetesProvider) @@ -43,7 +48,7 @@ func RunMizuTap(podRegexQuery *regexp.Regexp, tappingOptions *MizuTapOptions) { return } - if err := createMizuResources(ctx, kubernetesProvider, nodeToTappedPodIPMap, tappingOptions); err != nil { + if err := createMizuResources(ctx, kubernetesProvider, nodeToTappedPodIPMap, tappingOptions, mizuApiFilteringOptions); err != nil { return } @@ -57,8 +62,8 @@ func RunMizuTap(podRegexQuery *regexp.Regexp, tappingOptions *MizuTapOptions) { // TODO handle incoming traffic from tapper using a channel } -func createMizuResources(ctx context.Context, kubernetesProvider *kubernetes.Provider, nodeToTappedPodIPMap map[string][]string, tappingOptions *MizuTapOptions) error { - if err := createMizuAggregator(ctx, kubernetesProvider, tappingOptions); err != nil { +func createMizuResources(ctx context.Context, kubernetesProvider *kubernetes.Provider, nodeToTappedPodIPMap map[string][]string, tappingOptions *MizuTapOptions, mizuApiFilteringOptions *shared.TrafficFilteringOptions) error { + if err := createMizuAggregator(ctx, kubernetesProvider, tappingOptions, mizuApiFilteringOptions); err != nil { return err } @@ -69,11 +74,11 @@ func createMizuResources(ctx context.Context, kubernetesProvider *kubernetes.Pro return nil } -func createMizuAggregator(ctx context.Context, kubernetesProvider *kubernetes.Provider, tappingOptions *MizuTapOptions) error { +func createMizuAggregator(ctx context.Context, kubernetesProvider *kubernetes.Provider, tappingOptions *MizuTapOptions, mizuApiFilteringOptions *shared.TrafficFilteringOptions) error { var err error mizuServiceAccountExists = createRBACIfNecessary(ctx, kubernetesProvider) - _, err = kubernetesProvider.CreateMizuAggregatorPod(ctx, mizu.ResourcesNamespace, mizu.AggregatorPodName, tappingOptions.MizuImage, mizuServiceAccountExists) + _, err = kubernetesProvider.CreateMizuAggregatorPod(ctx, mizu.ResourcesNamespace, mizu.AggregatorPodName, tappingOptions.MizuImage, mizuServiceAccountExists, mizuApiFilteringOptions) if err != nil { fmt.Printf("Error creating mizu collector pod: %v\n", err) return err @@ -88,6 +93,24 @@ func createMizuAggregator(ctx context.Context, kubernetesProvider *kubernetes.Pr return nil } +func getMizuApiFilteringOptions(tappingOptions *MizuTapOptions) (*shared.TrafficFilteringOptions, error) { + if tappingOptions.PlainTextFilterRegexes == nil || len(tappingOptions.PlainTextFilterRegexes) == 0 { + return nil, nil + } + + compiledRegexSlice := make([]*shared.SerializableRegexp, 0) + for _, regexStr := range tappingOptions.PlainTextFilterRegexes { + compiledRegex, err := shared.CompileRegexToSerializableRegexp(regexStr) + if err != nil { + fmt.Printf("Regex %s is invalid: %v", regexStr, err) + return nil, err + } + compiledRegexSlice = append(compiledRegexSlice, compiledRegex) + } + + return &shared.TrafficFilteringOptions{PlainTextMaskingRegexes: compiledRegexSlice}, nil +} + func createMizuTappers(ctx context.Context, kubernetesProvider *kubernetes.Provider, nodeToTappedPodIPMap map[string][]string, tappingOptions *MizuTapOptions) error { if err := kubernetesProvider.ApplyMizuTapperDaemonSet( ctx, diff --git a/cli/kubernetes/provider.go b/cli/kubernetes/provider.go index 348068b4c..20acd52f7 100644 --- a/cli/kubernetes/provider.go +++ b/cli/kubernetes/provider.go @@ -10,15 +10,16 @@ import ( "path/filepath" "regexp" - applyconfapp "k8s.io/client-go/applyconfigurations/apps/v1" - applyconfmeta "k8s.io/client-go/applyconfigurations/meta/v1" - applyconfcore "k8s.io/client-go/applyconfigurations/core/v1" + "github.com/up9inc/mizu/shared" core "k8s.io/api/core/v1" rbac "k8s.io/api/rbac/v1" k8serrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/intstr" "k8s.io/apimachinery/pkg/watch" + applyconfapp "k8s.io/client-go/applyconfigurations/apps/v1" + applyconfcore "k8s.io/client-go/applyconfigurations/core/v1" + applyconfmeta "k8s.io/client-go/applyconfigurations/meta/v1" "k8s.io/client-go/kubernetes" _ "k8s.io/client-go/plugin/pkg/client/auth/azure" _ "k8s.io/client-go/plugin/pkg/client/auth/gcp" @@ -85,7 +86,11 @@ func (provider *Provider) GetPods(ctx context.Context, namespace string) { fmt.Printf("There are %d pods in Namespace %s\n", len(pods.Items), namespace) } -func (provider *Provider) CreateMizuAggregatorPod(ctx context.Context, namespace string, podName string, podImage string, linkServiceAccount bool) (*core.Pod, error) { +func (provider *Provider) CreateMizuAggregatorPod(ctx context.Context, namespace string, podName string, podImage string, linkServiceAccount bool, mizuApiFilteringOptions *shared.TrafficFilteringOptions) (*core.Pod, error) { + marshaledFilteringOptions, err := json.Marshal(mizuApiFilteringOptions) + if err != nil { + return nil, err + } pod := &core.Pod{ ObjectMeta: metav1.ObjectMeta{ Name: podName, @@ -101,9 +106,13 @@ func (provider *Provider) CreateMizuAggregatorPod(ctx context.Context, namespace Command: []string {"./mizuagent", "--aggregator"}, Env: []core.EnvVar{ { - Name: "HOST_MODE", + Name: shared.HostModeEnvVar, Value: "1", }, + { + Name: shared.MizuFilteringOptionsEnvVar, + Value: string(marshaledFilteringOptions), + }, }, }, }, @@ -232,12 +241,11 @@ func (provider *Provider) ApplyMizuTapperDaemonSet(ctx context.Context, namespac agentContainer.WithSecurityContext(applyconfcore.SecurityContext().WithPrivileged(privileged)) agentContainer.WithCommand("./mizuagent", "-i", "any", "--tap", "--hardump", "--aggregator-address", fmt.Sprintf("ws://%s/wsTapper", aggregatorPodIp)) agentContainer.WithEnv( - applyconfcore.EnvVar().WithName("HOST_MODE").WithValue("1"), - applyconfcore.EnvVar().WithName("AGGREGATOR_ADDRESS").WithValue(aggregatorPodIp), - applyconfcore.EnvVar().WithName("TAPPED_ADDRESSES_PER_HOST").WithValue(string(nodeToTappedPodIPMapJsonStr)), + applyconfcore.EnvVar().WithName(shared.HostModeEnvVar).WithValue("1"), + applyconfcore.EnvVar().WithName(shared.TappedAddressesPerNodeDictEnvVar).WithValue(string(nodeToTappedPodIPMapJsonStr)), ) agentContainer.WithEnv( - applyconfcore.EnvVar().WithName("NODE_NAME").WithValueFrom( + applyconfcore.EnvVar().WithName(shared.NodeNameEnvVar).WithValueFrom( applyconfcore.EnvVarSource().WithFieldRef( applyconfcore.ObjectFieldSelector().WithAPIVersion("v1").WithFieldPath("spec.nodeName"), ), diff --git a/shared/consts.go b/shared/consts.go new file mode 100644 index 000000000..ccda67615 --- /dev/null +++ b/shared/consts.go @@ -0,0 +1,8 @@ +package shared + +const ( + MizuFilteringOptionsEnvVar = "SENSITIVE_DATA_FILTERING_OPTIONS" + HostModeEnvVar = "HOST_MODE" + NodeNameEnvVar = "NODE_NAME" + TappedAddressesPerNodeDictEnvVar = "TAPPED_ADDRESSES_PER_HOST" +) diff --git a/shared/models.go b/shared/models.go index dba9e1731..cbf73b594 100644 --- a/shared/models.go +++ b/shared/models.go @@ -33,3 +33,7 @@ func CreateWebSocketStatusMessage(tappingStatus TapStatus) WebSocketStatusMessag TappingStatus: tappingStatus, } } + +type TrafficFilteringOptions struct { + PlainTextMaskingRegexes []*SerializableRegexp +} diff --git a/shared/serializableRegexp.go b/shared/serializableRegexp.go new file mode 100644 index 000000000..e311fdeb5 --- /dev/null +++ b/shared/serializableRegexp.go @@ -0,0 +1,30 @@ +package shared + +import "regexp" + +type SerializableRegexp struct { + regexp.Regexp +} + +func CompileRegexToSerializableRegexp(expr string) (*SerializableRegexp, error) { + re, err := regexp.Compile(expr) + if err != nil { + return nil, err + } + return &SerializableRegexp{*re}, nil +} + +// UnmarshalText is by json.Unmarshal. +func (r *SerializableRegexp) UnmarshalText(text []byte) error { + rr, err := CompileRegexToSerializableRegexp(string(text)) + if err != nil { + return err + } + *r = *rr + return nil +} + +// MarshalText is used by json.Marshal. +func (r *SerializableRegexp) MarshalText() ([]byte, error) { + return []byte(r.String()), nil +}