diff --git a/api/go.mod b/api/go.mod index 4f4c055c0..dcb1edf86 100644 --- a/api/go.mod +++ b/api/go.mod @@ -4,6 +4,7 @@ go 1.16 require ( github.com/antoniodipinto/ikisocket v0.0.0-20210417133349-f1502512d69a + github.com/beevik/etree v1.1.0 github.com/djherbis/atime v1.0.0 github.com/fasthttp/websocket v1.4.3-beta.1 // indirect github.com/go-playground/locales v0.13.0 diff --git a/api/go.sum b/api/go.sum index bc655181d..efa2d4285 100644 --- a/api/go.sum +++ b/api/go.sum @@ -48,6 +48,8 @@ github.com/antoniodipinto/ikisocket v0.0.0-20210417133349-f1502512d69a h1:76llBl github.com/antoniodipinto/ikisocket v0.0.0-20210417133349-f1502512d69a/go.mod h1:QvDfsDQDmGxUsvEeWabVZ5pp2FMXpOkwQV0L6SE6cp0= github.com/asaskevich/govalidator v0.0.0-20190424111038-f61b66f89f4a/go.mod h1:lB+ZfQJz7igIIfQNfa7Ml4HSf2uFQQRzpGGRXenZAgY= github.com/aws/aws-sdk-go v1.34.28/go.mod h1:H7NKnBqNVzoTJpGfLrQkkD+ytBA93eiDYi/+8rV9s48= +github.com/beevik/etree v1.1.0 h1:T0xke/WvNtMoCqgzPhkX2r4rjY3GDZFi+FjpRZY2Jbs= +github.com/beevik/etree v1.1.0/go.mod h1:r8Aw8JqVegEf0w2fDnATrX9VpkMcyFeM0FhwO62wh+A= github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI= github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI= diff --git a/api/main.go b/api/main.go index 187af6d80..7134733d6 100644 --- a/api/main.go +++ b/api/main.go @@ -16,6 +16,7 @@ import ( "mizuserver/pkg/utils" "os" "os/signal" + "regexp" ) var shouldTap = flag.Bool("tap", false, "Run in tapper mode without API") @@ -25,6 +26,7 @@ var aggregatorAddress = flag.String("aggregator-address", "", "Address of mizu c const nodeNameEnvVar = "NODE_NAME" const tappedAddressesPerNodeDictEnvVar = "TAPPED_ADDRESSES_PER_HOST" +const plainTextRegexesEnvVar = "PLAINTEXT_REGEXES" func main() { flag.Parse() @@ -98,14 +100,36 @@ func getTapTargets() []string { var tappedAddressesPerNodeDict map[string][]string err := json.Unmarshal([]byte(os.Getenv(tappedAddressesPerNodeDictEnvVar)), &tappedAddressesPerNodeDict) if err != nil { - panic(fmt.Sprintf("env var value of %s is invalid! must be map[string][]string %v", tappedAddressesPerNodeDict, err)) + panic(fmt.Sprintf("env var %s's value of %s is invalid! must be map[string][]string %v", tappedAddressesPerNodeDictEnvVar, tappedAddressesPerNodeDict, err)) } return tappedAddressesPerNodeDict[nodeName] } +func getFilteringOptions() *sensitiveDataFiltering.FilteringOptions { + regexJsonArr := os.Getenv(plainTextRegexesEnvVar) + if regexJsonArr == "" { + return nil + } + var regexStrSlice []string + err := json.Unmarshal([]byte(regexJsonArr), ®exStrSlice) + if err != nil { + panic(fmt.Sprintf("env var %s's value of %s is invalid! must be []string %v", plainTextRegexesEnvVar, regexJsonArr, err)) + } + + parsedRegexSlice := make([]regexp.Regexp, 0) + for _, regexStr := range regexStrSlice { + regex, err := regexp.Compile(regexStr) + if err != nil { + panic(fmt.Sprintf("env var %s's value of %s is invalid! must be []string %v", plainTextRegexesEnvVar, regexJsonArr, err)) + } + parsedRegexSlice = append(parsedRegexSlice, *regex) + } + return &sensitiveDataFiltering.FilteringOptions{PlainTextFilterRegexes: parsedRegexSlice} +} + func filterHarHeaders(inChannel <- chan *tap.OutputChannelItem, outChannel chan *tap.OutputChannelItem) { for message := range inChannel { - sensitiveDataFiltering.FilterSensitiveInfoFromHarRequest(message) + sensitiveDataFiltering.FilterSensitiveInfoFromHarRequest(message, nil) outChannel <- message } } diff --git a/api/pkg/sensitiveDataFiltering/consts.go b/api/pkg/sensitiveDataFiltering/consts.go index 09ce6628c..8cd0cacf0 100644 --- a/api/pkg/sensitiveDataFiltering/consts.go +++ b/api/pkg/sensitiveDataFiltering/consts.go @@ -2,9 +2,9 @@ package sensitiveDataFiltering const maskedFieldPlaceholderValue = "[REDACTED]" -//these values MUST be all lower case +//these values MUST be all lower case and contain no `-` or `_` characters var personallyIdentifiableDataFields = []string{"token", "authorization", "authentication", "cookie", "userid", "password", "username", "user", "key", "passcode", "pass", "auth", "authtoken", "jwt", "bearer", "clientid", "clientsecret", "redirecturi", "phonenumber", "zip", "zipcode", "address", "country", "firstname", "lastname", - "middlename", "fname", "lname", "birthdate"} + "middlename", "fname", "lname", "birthdate", "title"} diff --git a/api/pkg/sensitiveDataFiltering/messageSensitiveDataCleaner.go b/api/pkg/sensitiveDataFiltering/messageSensitiveDataCleaner.go index 468f01d6a..fa247f196 100644 --- a/api/pkg/sensitiveDataFiltering/messageSensitiveDataCleaner.go +++ b/api/pkg/sensitiveDataFiltering/messageSensitiveDataCleaner.go @@ -3,13 +3,20 @@ package sensitiveDataFiltering import ( "encoding/json" "fmt" - "github.com/google/martian/har" "mizuserver/pkg/tap" "net/url" + "regexp" "strings" + + "github.com/beevik/etree" + "github.com/google/martian/har" ) -func FilterSensitiveInfoFromHarRequest(harOutputItem *tap.OutputChannelItem) { +type FilteringOptions struct { + PlainTextFilterRegexes []regexp.Regexp +} + +func FilterSensitiveInfoFromHarRequest(harOutputItem *tap.OutputChannelItem, options *FilteringOptions) { filterHarHeaders(harOutputItem.HarEntry.Request.Headers) filterHarHeaders(harOutputItem.HarEntry.Response.Headers) @@ -24,13 +31,15 @@ func FilterSensitiveInfoFromHarRequest(harOutputItem *tap.OutputChannelItem) { } if harOutputItem.HarEntry.Request.PostData != nil { - filteredRequestBody, err := filterHttpBody([]byte(harOutputItem.HarEntry.Request.PostData.Text)) + requestContentType := getContentTypeHeaderValue(harOutputItem.HarEntry.Request.Headers) + filteredRequestBody, err := filterHttpBody([]byte(harOutputItem.HarEntry.Request.PostData.Text), requestContentType, options) if err == nil { harOutputItem.HarEntry.Request.PostData.Text = string(filteredRequestBody) } } if harOutputItem.HarEntry.Response.Content != nil { - filteredResponseBody, err := filterHttpBody(harOutputItem.HarEntry.Response.Content.Text) + responseContentType := getContentTypeHeaderValue(harOutputItem.HarEntry.Response.Headers) + filteredResponseBody, err := filterHttpBody(harOutputItem.HarEntry.Response.Content.Text, responseContentType, options) if err == nil { harOutputItem.HarEntry.Response.Content.Text = filteredResponseBody } @@ -45,6 +54,15 @@ func filterHarHeaders(headers []har.Header) { } } +func getContentTypeHeaderValue(headers []har.Header) string { + for _, header := range headers { + if strings.ToLower(header.Name) == "content-type" { + return header.Value + } + } + return "" +} + func isFieldNameSensitive(fieldName string) bool { name := strings.ToLower(fieldName) name = strings.ReplaceAll(name, "_", "") @@ -60,7 +78,63 @@ func isFieldNameSensitive(fieldName string) bool { return false } -func filterHttpBody(bytes []byte) ([]byte, error){ +func filterHttpBody(bytes []byte, contentType string, options *FilteringOptions) ([]byte, error) { + mimeType := strings.Split(contentType, ";")[0] + switch strings.ToLower(mimeType) { + case "application/json": + return filterJsonBody(bytes) + case "text/html": + fallthrough + case "application/xhtml+xml": + fallthrough + case "text/xml": + fallthrough + case "application/xml": + return filterXmlEtree(bytes) + case "text/plain": + if options != nil && options.PlainTextFilterRegexes != nil { + return filterPlainText(bytes, options), nil + } + } + return bytes, nil +} + +func filterPlainText(bytes []byte, options *FilteringOptions) []byte { + for _, regex := range options.PlainTextFilterRegexes { + bytes = regex.ReplaceAll(bytes, []byte(maskedFieldPlaceholderValue)) + } + return bytes +} + +func filterXmlEtree(bytes []byte) ([]byte, error) { + xmlDoc := etree.NewDocument() + err := xmlDoc.ReadFromBytes(bytes) + if err != nil { + return nil, err + } else { + filterXmlElement(xmlDoc.Root()) + } + return xmlDoc.WriteToBytes() +} + +func filterXmlElement(element *etree.Element) { + for i, attribute := range element.Attr { + if isFieldNameSensitive(attribute.Key) { + element.Attr[i].Value = maskedFieldPlaceholderValue + } + } + if element.ChildElements() == nil || len(element.ChildElements()) == 0 { + if isFieldNameSensitive(element.Tag) { + element.SetText(maskedFieldPlaceholderValue) + } + } else { + for _, element := range element.ChildElements() { + filterXmlElement(element) + } + } +} + +func filterJsonBody(bytes []byte) ([]byte, error) { var bodyJsonMap map[string] interface{} err := json.Unmarshal(bytes ,&bodyJsonMap) if err != nil { diff --git a/cli/cmd/tap.go b/cli/cmd/tap.go index c392f99ba..ba6bcc0d0 100644 --- a/cli/cmd/tap.go +++ b/cli/cmd/tap.go @@ -10,11 +10,12 @@ import ( ) type MizuTapOptions struct { - GuiPort uint16 - Namespace string - KubeConfigPath string - MizuImage string - MizuPodPort uint16 + GuiPort uint16 + Namespace string + KubeConfigPath string + MizuImage string + MizuPodPort uint16 + PlainTextFilterRegexes []string } @@ -50,4 +51,5 @@ func init() { tapCmd.Flags().StringVarP(&mizuTapOptions.KubeConfigPath, "kube-config", "k", "", "Path to kube-config file") tapCmd.Flags().StringVarP(&mizuTapOptions.MizuImage, "mizu-image", "", fmt.Sprintf("gcr.io/up9-docker-hub/mizu/%s:latest", mizu.Branch), "Custom image for mizu collector") tapCmd.Flags().Uint16VarP(&mizuTapOptions.MizuPodPort, "mizu-port", "", 8899, "Port which mizu cli will attempt to forward from the mizu collector pod") + tapCmd.Flags().StringArrayVarP(&mizuTapOptions.PlainTextFilterRegexes, "text-value-filter-regex", "", nil, "List of regex expressions that are used to filter matching values from text/plain http bodies") }