mirror of
https://github.com/kubeshark/kubeshark.git
synced 2025-07-04 11:58:41 +00:00
OAS: Improve URL clustering (#711)
* Evidence of problems * Improving it * Progressing * Progress * Fixes * progress * log is not right * Fixing thresholds * Almost there * Imptovements * Move some false negatives into TODO * cosmetics * linter (I disagree) * linter (I disagree) * fix test
This commit is contained in:
parent
c37f6478f3
commit
a2118b869e
@ -1,22 +1,26 @@
|
||||
package oas
|
||||
|
||||
import (
|
||||
"math"
|
||||
"regexp"
|
||||
"strings"
|
||||
"unicode"
|
||||
)
|
||||
|
||||
var (
|
||||
patBase64 = regexp.MustCompile(`^(?:[A-Za-z0-9+/]{4})*(?:[A-Za-z0-9+/]{2}==|[A-Za-z0-9+/]{3}=)?$`)
|
||||
patUuid4 = regexp.MustCompile(`(?i)[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}`)
|
||||
patEmail = regexp.MustCompile(`^\w+([-+.']\w+)*@\w+([-.]\w+)*\.\w+([-.]\w+)*$`)
|
||||
patHexLower = regexp.MustCompile(`(0x)?[0-9a-f]{6,}`)
|
||||
patHexUpper = regexp.MustCompile(`(0x)?[0-9A-F]{6,}`)
|
||||
patLongNum = regexp.MustCompile(`\d{6,}`)
|
||||
patLongNum = regexp.MustCompile(`^\d{3,}$`)
|
||||
patLongNumB = regexp.MustCompile(`[^\d]\d{3,}`)
|
||||
patLongNumA = regexp.MustCompile(`\d{3,}[^\d]`)
|
||||
)
|
||||
|
||||
func IsGibberish(str string) bool {
|
||||
if patBase64.MatchString(str) && len(str) > 32 {
|
||||
if IsVersionString(str) {
|
||||
return false
|
||||
}
|
||||
|
||||
if patEmail.MatchString(str) {
|
||||
return true
|
||||
}
|
||||
|
||||
@ -24,16 +28,44 @@ func IsGibberish(str string) bool {
|
||||
return true
|
||||
}
|
||||
|
||||
if patEmail.MatchString(str) {
|
||||
if patLongNum.MatchString(str) || patLongNumB.MatchString(str) || patLongNumA.MatchString(str) {
|
||||
return true
|
||||
}
|
||||
|
||||
if patHexLower.MatchString(str) || patHexUpper.MatchString(str) || patLongNum.MatchString(str) {
|
||||
return true
|
||||
//alNum := cleanStr(str, isAlNumRune)
|
||||
//alpha := cleanStr(str, isAlphaRune)
|
||||
// noiseAll := isNoisy(alNum)
|
||||
//triAll := isTrigramBad(strings.ToLower(alpha))
|
||||
// _ = noiseAll
|
||||
|
||||
isNotAlNum := func(r rune) bool { return !isAlNumRune(r) }
|
||||
chunks := strings.FieldsFunc(str, isNotAlNum)
|
||||
noisyLen := 0
|
||||
alnumLen := 0
|
||||
for _, chunk := range chunks {
|
||||
alnumLen += len(chunk)
|
||||
noise := isNoisy(chunk)
|
||||
tri := isTrigramBad(strings.ToLower(chunk))
|
||||
if noise || tri {
|
||||
noisyLen += len(chunk)
|
||||
}
|
||||
}
|
||||
|
||||
noise := noiseLevel(str)
|
||||
return noise >= 0.2
|
||||
return float64(noisyLen) > 0
|
||||
|
||||
//if float64(noisyLen) > 0 {
|
||||
// return true
|
||||
//}
|
||||
|
||||
//if len(chunks) > 0 && float64(noisyLen) >= float64(alnumLen)/3.0 {
|
||||
// return true
|
||||
//}
|
||||
|
||||
//if triAll {
|
||||
//return true
|
||||
//}
|
||||
|
||||
// return false
|
||||
}
|
||||
|
||||
func noiseLevel(str string) (score float64) {
|
||||
@ -51,21 +83,21 @@ func noiseLevel(str string) (score float64) {
|
||||
|
||||
// upper =>
|
||||
case unicode.IsUpper(prev) && unicode.IsLower(char):
|
||||
score += 0.25
|
||||
score += 0.10
|
||||
case unicode.IsUpper(prev) && unicode.IsDigit(char):
|
||||
score += 0.25
|
||||
score += 0.5
|
||||
|
||||
// lower =>
|
||||
case unicode.IsLower(prev) && unicode.IsUpper(char):
|
||||
score += 0.75
|
||||
case unicode.IsLower(prev) && unicode.IsDigit(char):
|
||||
score += 0.25
|
||||
score += 0.5
|
||||
|
||||
// digit =>
|
||||
case unicode.IsDigit(prev) && unicode.IsUpper(char):
|
||||
score += 0.75
|
||||
case unicode.IsDigit(prev) && unicode.IsLower(char):
|
||||
score += 0.75
|
||||
score += 1.0
|
||||
|
||||
// the rest is 100% noise
|
||||
default:
|
||||
@ -75,8 +107,6 @@ func noiseLevel(str string) (score float64) {
|
||||
prev = char
|
||||
}
|
||||
|
||||
score /= cnt // weigh it
|
||||
|
||||
return score
|
||||
}
|
||||
|
||||
@ -97,9 +127,59 @@ func IsVersionString(component string) bool {
|
||||
}
|
||||
}
|
||||
|
||||
if !hasV && strings.Contains(component, ".") {
|
||||
if !hasV && !strings.Contains(component, ".") {
|
||||
return false
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
func trigramScore(str string) (float64, int) {
|
||||
tgScore := 0.0
|
||||
trigrams := ngrams(str, 3)
|
||||
if len(trigrams) > 0 {
|
||||
for _, trigram := range trigrams {
|
||||
score, found := corpus_trigrams[trigram]
|
||||
if found {
|
||||
tgScore += score
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return tgScore, len(trigrams)
|
||||
}
|
||||
|
||||
func isTrigramBad(s string) bool {
|
||||
tgScore, cnt := trigramScore(s)
|
||||
|
||||
if cnt > 0 {
|
||||
val := math.Sqrt(tgScore) / float64(cnt)
|
||||
val2 := tgScore / float64(cnt)
|
||||
threshold := 0.005
|
||||
bad := val < threshold
|
||||
threshold2 := math.Log(float64(cnt)-2) * 0.1
|
||||
bad2 := val2 < threshold2
|
||||
return bad && bad2 // TODO: improve this logic to be clearer
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func isNoisy(s string) bool {
|
||||
noise := noiseLevel(s)
|
||||
|
||||
if len(s) > 0 {
|
||||
val := (noise * noise) / float64(len(s))
|
||||
threshold := 0.6
|
||||
bad := val > threshold
|
||||
return bad
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func ngrams(s string, n int) []string {
|
||||
result := make([]string, 0)
|
||||
for i := 0; i < len(s)-n+1; i++ {
|
||||
result = append(result, s[i:i+n])
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
@ -1,17 +1,95 @@
|
||||
package oas
|
||||
|
||||
import "testing"
|
||||
import (
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestNegative(t *testing.T) {
|
||||
cases := []string{
|
||||
"",
|
||||
"{}",
|
||||
"0.0.29",
|
||||
"0.1",
|
||||
"1.0",
|
||||
"1.0.0",
|
||||
"2.1.73",
|
||||
"abTestV2",
|
||||
"actionText,setName,setAttribute,save,ignore,onEnd,getContext,end,get",
|
||||
"AddUserGroupLink",
|
||||
"advert-management.adBlockerMessage.html",
|
||||
"agents.author.1.json",
|
||||
"animated-gif",
|
||||
"b", // can be valid hexadecimal
|
||||
"GetUniversalVariableUser",
|
||||
"big-danger-coronavirus-panic-greater-crisis",
|
||||
"breakout-box",
|
||||
"callback",
|
||||
"runs",
|
||||
"tcfv2",
|
||||
"StartUpCheckout",
|
||||
"core.algorithm_execution.view",
|
||||
"core.devices.view",
|
||||
"data.json",
|
||||
"dialog.overlay.infinity.json",
|
||||
"domain-input",
|
||||
"embeddable", // lul, it's a valid HEX!
|
||||
"embeddable_blip",
|
||||
"E PLURIBUS UNUM",
|
||||
"etc",
|
||||
"eu-central-1a",
|
||||
"fcgi-bin",
|
||||
"footer.include.html",
|
||||
"fullHashes:find",
|
||||
"generate-feed",
|
||||
"GetAds",
|
||||
"GetCart",
|
||||
"GetUniversalVariableUser",
|
||||
"github-audit-exports",
|
||||
"g.js",
|
||||
"g.pixel",
|
||||
".html",
|
||||
"Hugo Michiels",
|
||||
"image.sbix",
|
||||
"index.html",
|
||||
"iPad",
|
||||
"Joanna Mazewski",
|
||||
"LibGit2Sharp",
|
||||
"Michael_Vaughan1.png",
|
||||
"New RSS feed has been generated",
|
||||
"nick-clegg",
|
||||
"opt-out",
|
||||
"pixel_details.html",
|
||||
"post.json",
|
||||
"profile-method-info",
|
||||
"project-id",
|
||||
"publisha.1.json",
|
||||
"publish_and_moderate",
|
||||
"Ronna McDaniel",
|
||||
"rtb-h",
|
||||
"runs",
|
||||
"sign-up",
|
||||
"some-uuid-maybe",
|
||||
"stable-4.0-version.json",
|
||||
"StartUpCheckout",
|
||||
"Steve Flunk",
|
||||
"sync_a9",
|
||||
"Ted Cruz",
|
||||
"test.png",
|
||||
"token",
|
||||
"ToList",
|
||||
"v2.1.3",
|
||||
"VersionCheck.php",
|
||||
"v Rusiji",
|
||||
"Walnut St",
|
||||
"web_widget",
|
||||
"zoom_in.cur",
|
||||
"xray",
|
||||
"web",
|
||||
"vipbets1",
|
||||
"trcc",
|
||||
"fbpixel",
|
||||
|
||||
// TODO below
|
||||
// "tcfv2",
|
||||
// "Matt-cartoon-255x206px-small.png",
|
||||
// "TheTelegraph_portal_white-320-small.png",
|
||||
// "testdata-10kB.js",
|
||||
}
|
||||
|
||||
for _, str := range cases {
|
||||
@ -23,44 +101,92 @@ func TestNegative(t *testing.T) {
|
||||
|
||||
func TestPositive(t *testing.T) {
|
||||
cases := []string{
|
||||
"e21f7112-3d3b-4632-9da3-a4af2e0e9166",
|
||||
"952bea17-3776-11ea-9341-42010a84012a",
|
||||
"456795af-b48f-4a8d-9b37-3e932622c2f0",
|
||||
"0a0d0174-b338-4520-a1c3-24f7e3d5ec50.html",
|
||||
"6120c057c7a97b03f6986f1b",
|
||||
"1024807212418223",
|
||||
"11ca096cbc224a67360493d44a9903",
|
||||
"1553183382779",
|
||||
"1554507871",
|
||||
"19180481",
|
||||
"203ef0f713abcebd8d62c35c0e3f12f87d71e5e4",
|
||||
"456795af-b48f-4a8d-9b37-3e932622c2f0",
|
||||
"601a2bdcc5b69137248ddbbf",
|
||||
"60fe9aaeaefe2400012df94f",
|
||||
"610bc3fd5a77a7fa25033fb0",
|
||||
"610bd0315a77a7fa25034368",
|
||||
"610bd0315a77a7fa25034368zh",
|
||||
"6120c057c7a97b03f6986f1b",
|
||||
"710a462e",
|
||||
"1554507871",
|
||||
"qwerqwerasdfqwer@protonmai.com",
|
||||
"john.dow.1981@protonmail.com",
|
||||
"ci12NC01YzkyNTEzYzllMDRhLTAtYy5tb25pdG9yaW5nLmpzb24=", // long base64
|
||||
"11ca096cbc224a67360493d44a9903",
|
||||
"c738338322370b47a79251f7510dd", // prefixed hex
|
||||
"QgAAAC6zw0qH2DJtnXe8Z7rUJP0FgAFKkOhcHdFWzL1ZYggtwBgiB3LSoele9o3ZqFh7iCBhHbVLAnMuJ0HF8hEw7UKecE6wd-MBXgeRMdubGydhAMZSmuUjRpqplML40bmrb8VjJKNZswD1Cg",
|
||||
"QgAAAC6zw0qH2DJtnXe8Z7rUJP0rG4sjLa_KVLlww5WEDJ__30J15en-K_6Y68jb_rU93e2TFY6fb0MYiQ1UrLNMQufqODHZUl39Lo6cXAOVOThjAMZSmuVH7n85JOYSCgzpvowMAVueGG0Xxg",
|
||||
"203ef0f713abcebd8d62c35c0e3f12f87d71e5e4",
|
||||
"MDEyOk9yZ2FuaXphdGlvbjU3MzI0Nzk1",
|
||||
"730970532670-compute@developer.gserviceaccount.com",
|
||||
"arn-aws-ecs-eu-west-2-396248696294-cluster-london-01-ECSCluster-27iuIYva8nO4", // ?
|
||||
"819db2242a648b305395537022523d65",
|
||||
"952bea17-3776-11ea-9341-42010a84012a",
|
||||
"a3226860758.html",
|
||||
"AAAA028295945",
|
||||
"sp_ANQXRpqH_urn$3Auri$3Abase64$3A6698b0a3-97ad-52ce-8fc3-17d99e37a726",
|
||||
"arn-aws-ecs-eu-west-2-396248696294-cluster-london-01-ECSCluster-27iuIYva8nO4",
|
||||
"arn-aws-ecs-eu-west-2-396248696294-cluster-london-01-ECSCluster-27iuIYva8nO4", // ?
|
||||
"bnjksfd897345nl098asd53412kl98",
|
||||
"c738338322370b47a79251f7510dd", // prefixed hex
|
||||
"ci12NC01YzkyNTEzYzllMDRhLTAtYy5tb25pdG9yaW5nLmpzb24=", // long base64
|
||||
"css/login.0f48c49a34eb53ea4623.min.css",
|
||||
"d_fLLxlhzDilixeBEimaZ5",
|
||||
"e21f7112-3d3b-4632-9da3-a4af2e0e9166",
|
||||
"e8782afc112720300c049ff124434b79",
|
||||
"fb6cjraf9cejut2a",
|
||||
"i-0236530c66ed02200",
|
||||
"JEHJW4BKVFDRTMTUQLHKK5WVAU",
|
||||
"john.dow.1981@protonmail.com",
|
||||
"MDEyOk9yZ2FuaXphdGlvbjU3MzI0Nzk1",
|
||||
"MNUTGVFMGLEMFTBH0XSE5E02F6J2DS",
|
||||
"n63nd45qsj",
|
||||
"n9z9QGNiz",
|
||||
"NC4WTmcy",
|
||||
"proxy.3d2100fd7107262ecb55ce6847f01fa5.html",
|
||||
"QgAAAC6zw0qH2DJtnXe8Z7rUJP0FgAFKkOhcHdFWzL1ZYggtwBgiB3LSoele9o3ZqFh7iCBhHbVLAnMuJ0HF8hEw7UKecE6wd-MBXgeRMdubGydhAMZSmuUjRpqplML40bmrb8VjJKNZswD1Cg",
|
||||
"QgAAAC6zw0qH2DJtnXe8Z7rUJP0rG4sjLa_KVLlww5WEDJ__30J15en-K_6Y68jb_rU93e2TFY6fb0MYiQ1UrLNMQufqODHZUl39Lo6cXAOVOThjAMZSmuVH7n85JOYSCgzpvowMAVueGG0Xxg",
|
||||
"qwerqwerasdfqwer@protonmai.com",
|
||||
"r-ext-5579e00a95c90",
|
||||
"r-ext-5579e8b12f11e",
|
||||
"r-v4-5c92513c9e04a",
|
||||
"r-v4-5c92513c9e04a-0-c.monitoring.json",
|
||||
"segments-1563566437171.639994",
|
||||
"sp_ANQXRpqH_urn$3Auri$3Abase64$3A6698b0a3-97ad-52ce-8fc3-17d99e37a726",
|
||||
"sp_dxJTfx11_576742227280287872",
|
||||
"sp_NnUPB5wj_601a2bdcc5b69137248ddbbf",
|
||||
"sp_NxITuoE4_premiumchron-article-14302157_c_ryGQBs_r_yIWvwP",
|
||||
"t_52d94268-8810-4a7e-ba87-ffd657a6752f",
|
||||
"timeouts-1563566437171.639994",
|
||||
"u_YPF3GsGKMo02",
|
||||
|
||||
"0000000000 65535 f",
|
||||
"0000000178 00000 n",
|
||||
"0-10000",
|
||||
"01526123,",
|
||||
"0,18168,183955,3,4,1151616,5663,731,223,5104,207,3204,10,1051,175,364,1435,4,60,576,241,383,246,5,1102",
|
||||
"05/10/2020",
|
||||
"14336456724940333",
|
||||
"fb6cjraf9cejut2a",
|
||||
"JEHJW4BKVFDRTMTUQLHKK5WVAU",
|
||||
|
||||
// TODO
|
||||
// "fb6cjraf9cejut2a",
|
||||
// "Fxvd1timk", // questionable
|
||||
// "JEHJW4BKVFDRTMTUQLHKK5WVAU",
|
||||
/*
|
||||
"0,20",
|
||||
"0.001",
|
||||
"YISAtiX1",
|
||||
"Fxvd1timk", // questionable
|
||||
"B4GCSkORAJs",
|
||||
"D_4EDAqenHQ",
|
||||
"EICJp29EGOk",
|
||||
"Fxvd1timk",
|
||||
"GTqMZELYfQQ",
|
||||
"GZPTpLPEGmwHGWPC",
|
||||
"_HChnE9NDPY",
|
||||
"NwhjgIWHgGg",
|
||||
"production/tsbqksph4xswqjexfbec",
|
||||
"p/u/bguhrxupr23mw3nwxcrw",
|
||||
"nRSNapbJZnc",
|
||||
"zgfpbtolciznub5egzxk",
|
||||
"zufnu7aimadua9wrgwwo",
|
||||
"zznto1jzch9yjsbtbrul",
|
||||
*/
|
||||
}
|
||||
|
||||
for _, str := range cases {
|
||||
@ -69,3 +195,18 @@ func TestPositive(t *testing.T) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestVersionStrings(t *testing.T) {
|
||||
cases := []string{
|
||||
"1.0",
|
||||
"1.0.0",
|
||||
"v2.1.3",
|
||||
"2.1.73",
|
||||
}
|
||||
|
||||
for _, str := range cases {
|
||||
if !IsVersionString(str) {
|
||||
t.Errorf("Mistakenly false: %s", str)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -12,9 +12,9 @@ var ignoredHeaders = []string{
|
||||
"authorization", "cache-control", "connection", "content-encoding", "content-length", "content-type", "cookie",
|
||||
"date", "dnt", "expect", "forwarded", "from", "front-end-https", "host", "http2-settings",
|
||||
"max-forwards", "origin", "pragma", "proxy-authorization", "proxy-connection", "range", "referer",
|
||||
"save-data", "te", "trailer", "transfer-encoding", "upgrade", "upgrade-insecure-requests",
|
||||
"server", "user-agent", "via", "warning", "strict-transport-security",
|
||||
"x-att-deviceid", "x-correlation-id", "correlation-id", "x-client-data",
|
||||
"save-data", "te", "trailer", "transfer-encoding", "upgrade", "upgrade-insecure-requests", "x-download-options",
|
||||
"server", "user-agent", "via", "warning", "strict-transport-security", "x-permitted-cross-domain-policies",
|
||||
"x-att-deviceid", "x-correlation-id", "correlation-id", "x-client-data", "x-dns-prefetch-control",
|
||||
"x-http-method-override", "x-real-ip", "x-request-id", "x-request-start", "x-requested-with", "x-uidh",
|
||||
"x-same-domain", "x-content-type-options", "x-frame-options", "x-xss-protection",
|
||||
"x-wap-profile", "x-scheme", "status", "x-cache", "x-application-context", "retry-after",
|
||||
@ -31,7 +31,7 @@ var ignoredHeaderPrefixes = []string{
|
||||
":", "accept-", "access-control-", "if-", "sec-", "grpc-",
|
||||
"x-forwarded-", "x-original-", "cf-",
|
||||
"x-up9-", "x-envoy-", "x-hasura-", "x-b3-", "x-datadog-", "x-envoy-", "x-amz-", "x-newrelic-", "x-prometheus-",
|
||||
"x-akamai-", "x-spotim-", "x-amzn-", "x-ratelimit-",
|
||||
"x-akamai-", "x-spotim-", "x-amzn-", "x-ratelimit-", "x-goog-",
|
||||
}
|
||||
|
||||
func isCtypeIgnored(ctype string) bool {
|
||||
|
@ -160,22 +160,6 @@ func suggestTags(oas *openapi.OpenAPI) {
|
||||
}
|
||||
}
|
||||
|
||||
func getSimilarPrefix(strs []string) string {
|
||||
chunked := make([][]string, 0)
|
||||
for _, item := range strs {
|
||||
chunked = append(chunked, strings.Split(item, "/"))
|
||||
}
|
||||
|
||||
cmn := longestCommonXfix(chunked, true)
|
||||
res := make([]string, 0)
|
||||
for _, chunk := range cmn {
|
||||
if chunk != "api" && !IsVersionString(chunk) && !strings.HasPrefix(chunk, "{") {
|
||||
res = append(res, chunk)
|
||||
}
|
||||
}
|
||||
return strings.Join(res[1:], ".")
|
||||
}
|
||||
|
||||
func deleteFromSlice(s []string, val string) []string {
|
||||
temp := s[:0]
|
||||
for _, x := range s {
|
||||
|
@ -62,7 +62,6 @@ func (n *Node) getOrSet(path NodePath, existingPathObj *openapi.PathObj) (node *
|
||||
if paramObj != nil {
|
||||
node.pathParam = paramObj
|
||||
} else if chunkIsGibberish {
|
||||
|
||||
newParam := n.createParam()
|
||||
node.pathParam = newParam
|
||||
} else {
|
||||
@ -100,11 +99,14 @@ func (n *Node) createParam() *openapi.ParameterObj {
|
||||
} else if strings.HasSuffix(*n.constant, "s") && len(*n.constant) > 3 {
|
||||
name = *n.constant
|
||||
name = name[:len(name)-1] + "Id"
|
||||
} else if isAlpha(*n.constant) {
|
||||
} else {
|
||||
name = *n.constant + "Id"
|
||||
}
|
||||
|
||||
name = cleanNonAlnum([]byte(name))
|
||||
name = cleanStr(name, isAlNumRune)
|
||||
if !isAlphaRune(rune(name[0])) {
|
||||
name = "_" + name
|
||||
}
|
||||
}
|
||||
|
||||
newParam := createSimpleParam(name, "path", "string")
|
||||
|
@ -15,7 +15,7 @@ func TestTree(t *testing.T) {
|
||||
{"/", 0, ""},
|
||||
{"/v1.0.0/config/launcher/sp_nKNHCzsN/f34efcae-6583-11eb-908a-00b0fcb9d4f6/vendor,init,conversation", 1, "vendor,init,conversation"},
|
||||
{"/v1.0.0/config/launcher/sp_nKNHCzsN/{f34efcae-6583-11eb-908a-00b0fcb9d4f6}/vendor,init,conversation", 0, "vendor,init,conversation"},
|
||||
{"/getSvgs/size/small/brand/SFLY/layoutId/170943/layoutVersion/1/sizeId/742/surface/0/isLandscape/true/childSkus/%7B%7D", 1, ""},
|
||||
{"/getSvgs/size/small/brand/SFLY/layoutId/170943/layoutVersion/1/sizeId/742/surface/0/isLandscape/true/childSkus/%7B%7D", 1, "{}"},
|
||||
}
|
||||
|
||||
tree := new(Node)
|
||||
|
7707
agent/pkg/oas/trigrams.go
Normal file
7707
agent/pkg/oas/trigrams.go
Normal file
File diff suppressed because it is too large
Load Diff
@ -261,6 +261,22 @@ func longestCommonXfix(strs [][]string, pre bool) []string { // https://github.c
|
||||
return xfix
|
||||
}
|
||||
|
||||
func getSimilarPrefix(strs []string) string {
|
||||
chunked := make([][]string, 0)
|
||||
for _, item := range strs {
|
||||
chunked = append(chunked, strings.Split(item, "/"))
|
||||
}
|
||||
|
||||
cmn := longestCommonXfix(chunked, true)
|
||||
res := make([]string, 0)
|
||||
for _, chunk := range cmn {
|
||||
if chunk != "api" && !IsVersionString(chunk) && !strings.HasPrefix(chunk, "{") {
|
||||
res = append(res, chunk)
|
||||
}
|
||||
}
|
||||
return strings.Join(res[1:], ".")
|
||||
}
|
||||
|
||||
// returns all non-nil ops in PathObj
|
||||
func getOps(pathObj *openapi.PathObj) []*openapi.Operation {
|
||||
ops := []**openapi.Operation{&pathObj.Get, &pathObj.Patch, &pathObj.Put, &pathObj.Options, &pathObj.Post, &pathObj.Trace, &pathObj.Head, &pathObj.Delete}
|
||||
@ -324,13 +340,11 @@ func anyJSON(text string) (anyVal interface{}, isJSON bool) {
|
||||
return nil, false
|
||||
}
|
||||
|
||||
func cleanNonAlnum(s []byte) string {
|
||||
func cleanStr(str string, criterion func(r rune) bool) string {
|
||||
s := []byte(str)
|
||||
j := 0
|
||||
for _, b := range s {
|
||||
if ('a' <= b && b <= 'z') ||
|
||||
('A' <= b && b <= 'Z') ||
|
||||
('0' <= b && b <= '9') ||
|
||||
b == ' ' {
|
||||
if criterion(rune(b)) {
|
||||
s[j] = b
|
||||
j++
|
||||
}
|
||||
@ -338,11 +352,21 @@ func cleanNonAlnum(s []byte) string {
|
||||
return string(s[:j])
|
||||
}
|
||||
|
||||
/*
|
||||
func isAlpha(s string) bool {
|
||||
for _, r := range s {
|
||||
if (r < 'a' || r > 'z') && (r < 'A' || r > 'Z') {
|
||||
if isAlphaRune(r) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
*/
|
||||
|
||||
func isAlphaRune(r rune) bool {
|
||||
return !((r < 'a' || r > 'z') && (r < 'A' || r > 'Z'))
|
||||
}
|
||||
|
||||
func isAlNumRune(b rune) bool {
|
||||
return isAlphaRune(b) || ('0' <= b && b <= '9')
|
||||
}
|
||||
|
@ -33,3 +33,27 @@ func TestAnyJSON(t *testing.T) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestStrRunes(t *testing.T) {
|
||||
if isAlphaRune('5') {
|
||||
t.Logf("Failed")
|
||||
}
|
||||
if !isAlphaRune('a') {
|
||||
t.Logf("Failed")
|
||||
}
|
||||
|
||||
if !isAlNumRune('5') {
|
||||
t.Logf("Failed")
|
||||
}
|
||||
if isAlNumRune(' ') {
|
||||
t.Logf("Failed")
|
||||
}
|
||||
|
||||
if cleanStr("-abc_567", isAlphaRune) != "abc" {
|
||||
t.Logf("Failed")
|
||||
}
|
||||
|
||||
if cleanStr("-abc_567", isAlNumRune) != "abc567" {
|
||||
t.Logf("Failed")
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user