kubeshark/agent/pkg/oas/gibberish.go
2022-01-13 09:34:55 +02:00

109 lines
2.2 KiB
Go

package oas
import (
"regexp"
"strings"
"unicode"
)
var (
patBase64 = regexp.MustCompile(`^(?:[A-Za-z0-9+/]{4})*(?:[A-Za-z0-9+/]{2}==|[A-Za-z0-9+/]{3}=)?$`)
patUuid4 = regexp.MustCompile(`(?i)[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}`)
patEmail = regexp.MustCompile(`^\w+([-+.']\w+)*@\w+([-.]\w+)*\.\w+([-.]\w+)*$`)
patHexLower = regexp.MustCompile(`(0x)?[0-9a-f]{6,}`)
patHexUpper = regexp.MustCompile(`(0x)?[0-9A-F]{6,}`)
)
func IsGibberish(str string) bool {
if patBase64.MatchString(str) && len(str) > 32 {
return true
}
if patUuid4.MatchString(str) {
return true
}
if patEmail.MatchString(str) {
return true
}
if patHexLower.MatchString(str) || patHexUpper.MatchString(str) {
return true
}
noise := noiseLevel(str)
if noise >= 0.2 {
return true
}
return false
}
func noiseLevel(str string) (score float64) {
// opinionated algo of certain char pairs marking the non-human strings
prev := *new(rune)
cnt := 0.0
for _, char := range str {
cnt += 1
if prev > 0 {
switch {
// continued class of upper/lower/digit adds no noise
case unicode.IsUpper(prev) && unicode.IsUpper(char):
case unicode.IsLower(prev) && unicode.IsLower(char):
case unicode.IsDigit(prev) && unicode.IsDigit(char):
// upper =>
case unicode.IsUpper(prev) && unicode.IsLower(char):
score += 0.25
case unicode.IsUpper(prev) && unicode.IsDigit(char):
score += 0.25
// lower =>
case unicode.IsLower(prev) && unicode.IsUpper(char):
score += 0.75
case unicode.IsLower(prev) && unicode.IsDigit(char):
score += 0.25
// digit =>
case unicode.IsDigit(prev) && unicode.IsUpper(char):
score += 0.75
case unicode.IsDigit(prev) && unicode.IsLower(char):
score += 0.75
// the rest is 100% noise
default:
score += 1
}
}
prev = char
}
score /= cnt // weigh it
return score
}
func IsVersionString(component string) bool {
if component == "" {
return false
}
hasV := false
if strings.HasPrefix(component, "v") {
component = component[1:]
hasV = true
}
for _, c := range component {
if string(c) != "." && !unicode.IsDigit(c) {
return false
}
}
if !hasV && strings.Contains(component, ".") {
return false
}
return true
}