mirror of
https://github.com/kubeshark/kubeshark.git
synced 2025-07-04 03:48:58 +00:00
OAS: Improve URL clustering (#711)
* Evidence of problems * Improving it * Progressing * Progress * Fixes * progress * log is not right * Fixing thresholds * Almost there * Imptovements * Move some false negatives into TODO * cosmetics * linter (I disagree) * linter (I disagree) * fix test
This commit is contained in:
parent
c37f6478f3
commit
a2118b869e
@ -1,22 +1,26 @@
|
|||||||
package oas
|
package oas
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"math"
|
||||||
"regexp"
|
"regexp"
|
||||||
"strings"
|
"strings"
|
||||||
"unicode"
|
"unicode"
|
||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
patBase64 = regexp.MustCompile(`^(?:[A-Za-z0-9+/]{4})*(?:[A-Za-z0-9+/]{2}==|[A-Za-z0-9+/]{3}=)?$`)
|
|
||||||
patUuid4 = regexp.MustCompile(`(?i)[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}`)
|
patUuid4 = regexp.MustCompile(`(?i)[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}`)
|
||||||
patEmail = regexp.MustCompile(`^\w+([-+.']\w+)*@\w+([-.]\w+)*\.\w+([-.]\w+)*$`)
|
patEmail = regexp.MustCompile(`^\w+([-+.']\w+)*@\w+([-.]\w+)*\.\w+([-.]\w+)*$`)
|
||||||
patHexLower = regexp.MustCompile(`(0x)?[0-9a-f]{6,}`)
|
patLongNum = regexp.MustCompile(`^\d{3,}$`)
|
||||||
patHexUpper = regexp.MustCompile(`(0x)?[0-9A-F]{6,}`)
|
patLongNumB = regexp.MustCompile(`[^\d]\d{3,}`)
|
||||||
patLongNum = regexp.MustCompile(`\d{6,}`)
|
patLongNumA = regexp.MustCompile(`\d{3,}[^\d]`)
|
||||||
)
|
)
|
||||||
|
|
||||||
func IsGibberish(str string) bool {
|
func IsGibberish(str string) bool {
|
||||||
if patBase64.MatchString(str) && len(str) > 32 {
|
if IsVersionString(str) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
if patEmail.MatchString(str) {
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -24,16 +28,44 @@ func IsGibberish(str string) bool {
|
|||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
if patEmail.MatchString(str) {
|
if patLongNum.MatchString(str) || patLongNumB.MatchString(str) || patLongNumA.MatchString(str) {
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
if patHexLower.MatchString(str) || patHexUpper.MatchString(str) || patLongNum.MatchString(str) {
|
//alNum := cleanStr(str, isAlNumRune)
|
||||||
return true
|
//alpha := cleanStr(str, isAlphaRune)
|
||||||
|
// noiseAll := isNoisy(alNum)
|
||||||
|
//triAll := isTrigramBad(strings.ToLower(alpha))
|
||||||
|
// _ = noiseAll
|
||||||
|
|
||||||
|
isNotAlNum := func(r rune) bool { return !isAlNumRune(r) }
|
||||||
|
chunks := strings.FieldsFunc(str, isNotAlNum)
|
||||||
|
noisyLen := 0
|
||||||
|
alnumLen := 0
|
||||||
|
for _, chunk := range chunks {
|
||||||
|
alnumLen += len(chunk)
|
||||||
|
noise := isNoisy(chunk)
|
||||||
|
tri := isTrigramBad(strings.ToLower(chunk))
|
||||||
|
if noise || tri {
|
||||||
|
noisyLen += len(chunk)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
noise := noiseLevel(str)
|
return float64(noisyLen) > 0
|
||||||
return noise >= 0.2
|
|
||||||
|
//if float64(noisyLen) > 0 {
|
||||||
|
// return true
|
||||||
|
//}
|
||||||
|
|
||||||
|
//if len(chunks) > 0 && float64(noisyLen) >= float64(alnumLen)/3.0 {
|
||||||
|
// return true
|
||||||
|
//}
|
||||||
|
|
||||||
|
//if triAll {
|
||||||
|
//return true
|
||||||
|
//}
|
||||||
|
|
||||||
|
// return false
|
||||||
}
|
}
|
||||||
|
|
||||||
func noiseLevel(str string) (score float64) {
|
func noiseLevel(str string) (score float64) {
|
||||||
@ -51,21 +83,21 @@ func noiseLevel(str string) (score float64) {
|
|||||||
|
|
||||||
// upper =>
|
// upper =>
|
||||||
case unicode.IsUpper(prev) && unicode.IsLower(char):
|
case unicode.IsUpper(prev) && unicode.IsLower(char):
|
||||||
score += 0.25
|
score += 0.10
|
||||||
case unicode.IsUpper(prev) && unicode.IsDigit(char):
|
case unicode.IsUpper(prev) && unicode.IsDigit(char):
|
||||||
score += 0.25
|
score += 0.5
|
||||||
|
|
||||||
// lower =>
|
// lower =>
|
||||||
case unicode.IsLower(prev) && unicode.IsUpper(char):
|
case unicode.IsLower(prev) && unicode.IsUpper(char):
|
||||||
score += 0.75
|
score += 0.75
|
||||||
case unicode.IsLower(prev) && unicode.IsDigit(char):
|
case unicode.IsLower(prev) && unicode.IsDigit(char):
|
||||||
score += 0.25
|
score += 0.5
|
||||||
|
|
||||||
// digit =>
|
// digit =>
|
||||||
case unicode.IsDigit(prev) && unicode.IsUpper(char):
|
case unicode.IsDigit(prev) && unicode.IsUpper(char):
|
||||||
score += 0.75
|
score += 0.75
|
||||||
case unicode.IsDigit(prev) && unicode.IsLower(char):
|
case unicode.IsDigit(prev) && unicode.IsLower(char):
|
||||||
score += 0.75
|
score += 1.0
|
||||||
|
|
||||||
// the rest is 100% noise
|
// the rest is 100% noise
|
||||||
default:
|
default:
|
||||||
@ -75,8 +107,6 @@ func noiseLevel(str string) (score float64) {
|
|||||||
prev = char
|
prev = char
|
||||||
}
|
}
|
||||||
|
|
||||||
score /= cnt // weigh it
|
|
||||||
|
|
||||||
return score
|
return score
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -97,9 +127,59 @@ func IsVersionString(component string) bool {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if !hasV && strings.Contains(component, ".") {
|
if !hasV && !strings.Contains(component, ".") {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func trigramScore(str string) (float64, int) {
|
||||||
|
tgScore := 0.0
|
||||||
|
trigrams := ngrams(str, 3)
|
||||||
|
if len(trigrams) > 0 {
|
||||||
|
for _, trigram := range trigrams {
|
||||||
|
score, found := corpus_trigrams[trigram]
|
||||||
|
if found {
|
||||||
|
tgScore += score
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return tgScore, len(trigrams)
|
||||||
|
}
|
||||||
|
|
||||||
|
func isTrigramBad(s string) bool {
|
||||||
|
tgScore, cnt := trigramScore(s)
|
||||||
|
|
||||||
|
if cnt > 0 {
|
||||||
|
val := math.Sqrt(tgScore) / float64(cnt)
|
||||||
|
val2 := tgScore / float64(cnt)
|
||||||
|
threshold := 0.005
|
||||||
|
bad := val < threshold
|
||||||
|
threshold2 := math.Log(float64(cnt)-2) * 0.1
|
||||||
|
bad2 := val2 < threshold2
|
||||||
|
return bad && bad2 // TODO: improve this logic to be clearer
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
func isNoisy(s string) bool {
|
||||||
|
noise := noiseLevel(s)
|
||||||
|
|
||||||
|
if len(s) > 0 {
|
||||||
|
val := (noise * noise) / float64(len(s))
|
||||||
|
threshold := 0.6
|
||||||
|
bad := val > threshold
|
||||||
|
return bad
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
func ngrams(s string, n int) []string {
|
||||||
|
result := make([]string, 0)
|
||||||
|
for i := 0; i < len(s)-n+1; i++ {
|
||||||
|
result = append(result, s[i:i+n])
|
||||||
|
}
|
||||||
|
return result
|
||||||
|
}
|
||||||
|
@ -1,17 +1,95 @@
|
|||||||
package oas
|
package oas
|
||||||
|
|
||||||
import "testing"
|
import (
|
||||||
|
"testing"
|
||||||
|
)
|
||||||
|
|
||||||
func TestNegative(t *testing.T) {
|
func TestNegative(t *testing.T) {
|
||||||
cases := []string{
|
cases := []string{
|
||||||
"",
|
"",
|
||||||
|
"{}",
|
||||||
|
"0.0.29",
|
||||||
|
"0.1",
|
||||||
|
"1.0",
|
||||||
|
"1.0.0",
|
||||||
|
"2.1.73",
|
||||||
|
"abTestV2",
|
||||||
|
"actionText,setName,setAttribute,save,ignore,onEnd,getContext,end,get",
|
||||||
|
"AddUserGroupLink",
|
||||||
|
"advert-management.adBlockerMessage.html",
|
||||||
|
"agents.author.1.json",
|
||||||
|
"animated-gif",
|
||||||
"b", // can be valid hexadecimal
|
"b", // can be valid hexadecimal
|
||||||
"GetUniversalVariableUser",
|
"big-danger-coronavirus-panic-greater-crisis",
|
||||||
|
"breakout-box",
|
||||||
"callback",
|
"callback",
|
||||||
"runs",
|
"core.algorithm_execution.view",
|
||||||
"tcfv2",
|
"core.devices.view",
|
||||||
"StartUpCheckout",
|
"data.json",
|
||||||
|
"dialog.overlay.infinity.json",
|
||||||
|
"domain-input",
|
||||||
|
"embeddable", // lul, it's a valid HEX!
|
||||||
|
"embeddable_blip",
|
||||||
|
"E PLURIBUS UNUM",
|
||||||
|
"etc",
|
||||||
|
"eu-central-1a",
|
||||||
|
"fcgi-bin",
|
||||||
|
"footer.include.html",
|
||||||
|
"fullHashes:find",
|
||||||
|
"generate-feed",
|
||||||
|
"GetAds",
|
||||||
"GetCart",
|
"GetCart",
|
||||||
|
"GetUniversalVariableUser",
|
||||||
|
"github-audit-exports",
|
||||||
|
"g.js",
|
||||||
|
"g.pixel",
|
||||||
|
".html",
|
||||||
|
"Hugo Michiels",
|
||||||
|
"image.sbix",
|
||||||
|
"index.html",
|
||||||
|
"iPad",
|
||||||
|
"Joanna Mazewski",
|
||||||
|
"LibGit2Sharp",
|
||||||
|
"Michael_Vaughan1.png",
|
||||||
|
"New RSS feed has been generated",
|
||||||
|
"nick-clegg",
|
||||||
|
"opt-out",
|
||||||
|
"pixel_details.html",
|
||||||
|
"post.json",
|
||||||
|
"profile-method-info",
|
||||||
|
"project-id",
|
||||||
|
"publisha.1.json",
|
||||||
|
"publish_and_moderate",
|
||||||
|
"Ronna McDaniel",
|
||||||
|
"rtb-h",
|
||||||
|
"runs",
|
||||||
|
"sign-up",
|
||||||
|
"some-uuid-maybe",
|
||||||
|
"stable-4.0-version.json",
|
||||||
|
"StartUpCheckout",
|
||||||
|
"Steve Flunk",
|
||||||
|
"sync_a9",
|
||||||
|
"Ted Cruz",
|
||||||
|
"test.png",
|
||||||
|
"token",
|
||||||
|
"ToList",
|
||||||
|
"v2.1.3",
|
||||||
|
"VersionCheck.php",
|
||||||
|
"v Rusiji",
|
||||||
|
"Walnut St",
|
||||||
|
"web_widget",
|
||||||
|
"zoom_in.cur",
|
||||||
|
"xray",
|
||||||
|
"web",
|
||||||
|
"vipbets1",
|
||||||
|
"trcc",
|
||||||
|
"fbpixel",
|
||||||
|
|
||||||
|
// TODO below
|
||||||
|
// "tcfv2",
|
||||||
|
// "Matt-cartoon-255x206px-small.png",
|
||||||
|
// "TheTelegraph_portal_white-320-small.png",
|
||||||
|
// "testdata-10kB.js",
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, str := range cases {
|
for _, str := range cases {
|
||||||
@ -23,44 +101,92 @@ func TestNegative(t *testing.T) {
|
|||||||
|
|
||||||
func TestPositive(t *testing.T) {
|
func TestPositive(t *testing.T) {
|
||||||
cases := []string{
|
cases := []string{
|
||||||
"e21f7112-3d3b-4632-9da3-a4af2e0e9166",
|
|
||||||
"952bea17-3776-11ea-9341-42010a84012a",
|
|
||||||
"456795af-b48f-4a8d-9b37-3e932622c2f0",
|
|
||||||
"0a0d0174-b338-4520-a1c3-24f7e3d5ec50.html",
|
"0a0d0174-b338-4520-a1c3-24f7e3d5ec50.html",
|
||||||
"6120c057c7a97b03f6986f1b",
|
"1024807212418223",
|
||||||
|
"11ca096cbc224a67360493d44a9903",
|
||||||
|
"1553183382779",
|
||||||
|
"1554507871",
|
||||||
|
"19180481",
|
||||||
|
"203ef0f713abcebd8d62c35c0e3f12f87d71e5e4",
|
||||||
|
"456795af-b48f-4a8d-9b37-3e932622c2f0",
|
||||||
|
"601a2bdcc5b69137248ddbbf",
|
||||||
|
"60fe9aaeaefe2400012df94f",
|
||||||
"610bc3fd5a77a7fa25033fb0",
|
"610bc3fd5a77a7fa25033fb0",
|
||||||
"610bd0315a77a7fa25034368",
|
"610bd0315a77a7fa25034368",
|
||||||
"610bd0315a77a7fa25034368zh",
|
"610bd0315a77a7fa25034368zh",
|
||||||
|
"6120c057c7a97b03f6986f1b",
|
||||||
"710a462e",
|
"710a462e",
|
||||||
"1554507871",
|
|
||||||
"qwerqwerasdfqwer@protonmai.com",
|
|
||||||
"john.dow.1981@protonmail.com",
|
|
||||||
"ci12NC01YzkyNTEzYzllMDRhLTAtYy5tb25pdG9yaW5nLmpzb24=", // long base64
|
|
||||||
"11ca096cbc224a67360493d44a9903",
|
|
||||||
"c738338322370b47a79251f7510dd", // prefixed hex
|
|
||||||
"QgAAAC6zw0qH2DJtnXe8Z7rUJP0FgAFKkOhcHdFWzL1ZYggtwBgiB3LSoele9o3ZqFh7iCBhHbVLAnMuJ0HF8hEw7UKecE6wd-MBXgeRMdubGydhAMZSmuUjRpqplML40bmrb8VjJKNZswD1Cg",
|
|
||||||
"QgAAAC6zw0qH2DJtnXe8Z7rUJP0rG4sjLa_KVLlww5WEDJ__30J15en-K_6Y68jb_rU93e2TFY6fb0MYiQ1UrLNMQufqODHZUl39Lo6cXAOVOThjAMZSmuVH7n85JOYSCgzpvowMAVueGG0Xxg",
|
|
||||||
"203ef0f713abcebd8d62c35c0e3f12f87d71e5e4",
|
|
||||||
"MDEyOk9yZ2FuaXphdGlvbjU3MzI0Nzk1",
|
|
||||||
"730970532670-compute@developer.gserviceaccount.com",
|
"730970532670-compute@developer.gserviceaccount.com",
|
||||||
"arn-aws-ecs-eu-west-2-396248696294-cluster-london-01-ECSCluster-27iuIYva8nO4", // ?
|
"819db2242a648b305395537022523d65",
|
||||||
|
"952bea17-3776-11ea-9341-42010a84012a",
|
||||||
|
"a3226860758.html",
|
||||||
"AAAA028295945",
|
"AAAA028295945",
|
||||||
"sp_ANQXRpqH_urn$3Auri$3Abase64$3A6698b0a3-97ad-52ce-8fc3-17d99e37a726",
|
"arn-aws-ecs-eu-west-2-396248696294-cluster-london-01-ECSCluster-27iuIYva8nO4",
|
||||||
|
"arn-aws-ecs-eu-west-2-396248696294-cluster-london-01-ECSCluster-27iuIYva8nO4", // ?
|
||||||
|
"bnjksfd897345nl098asd53412kl98",
|
||||||
|
"c738338322370b47a79251f7510dd", // prefixed hex
|
||||||
|
"ci12NC01YzkyNTEzYzllMDRhLTAtYy5tb25pdG9yaW5nLmpzb24=", // long base64
|
||||||
|
"css/login.0f48c49a34eb53ea4623.min.css",
|
||||||
|
"d_fLLxlhzDilixeBEimaZ5",
|
||||||
|
"e21f7112-3d3b-4632-9da3-a4af2e0e9166",
|
||||||
|
"e8782afc112720300c049ff124434b79",
|
||||||
|
"fb6cjraf9cejut2a",
|
||||||
|
"i-0236530c66ed02200",
|
||||||
|
"JEHJW4BKVFDRTMTUQLHKK5WVAU",
|
||||||
|
"john.dow.1981@protonmail.com",
|
||||||
|
"MDEyOk9yZ2FuaXphdGlvbjU3MzI0Nzk1",
|
||||||
|
"MNUTGVFMGLEMFTBH0XSE5E02F6J2DS",
|
||||||
"n63nd45qsj",
|
"n63nd45qsj",
|
||||||
"n9z9QGNiz",
|
"n9z9QGNiz",
|
||||||
|
"NC4WTmcy",
|
||||||
"proxy.3d2100fd7107262ecb55ce6847f01fa5.html",
|
"proxy.3d2100fd7107262ecb55ce6847f01fa5.html",
|
||||||
|
"QgAAAC6zw0qH2DJtnXe8Z7rUJP0FgAFKkOhcHdFWzL1ZYggtwBgiB3LSoele9o3ZqFh7iCBhHbVLAnMuJ0HF8hEw7UKecE6wd-MBXgeRMdubGydhAMZSmuUjRpqplML40bmrb8VjJKNZswD1Cg",
|
||||||
|
"QgAAAC6zw0qH2DJtnXe8Z7rUJP0rG4sjLa_KVLlww5WEDJ__30J15en-K_6Y68jb_rU93e2TFY6fb0MYiQ1UrLNMQufqODHZUl39Lo6cXAOVOThjAMZSmuVH7n85JOYSCgzpvowMAVueGG0Xxg",
|
||||||
|
"qwerqwerasdfqwer@protonmai.com",
|
||||||
"r-ext-5579e00a95c90",
|
"r-ext-5579e00a95c90",
|
||||||
"r-ext-5579e8b12f11e",
|
"r-ext-5579e8b12f11e",
|
||||||
"r-v4-5c92513c9e04a",
|
"r-v4-5c92513c9e04a",
|
||||||
"r-v4-5c92513c9e04a-0-c.monitoring.json",
|
"r-v4-5c92513c9e04a-0-c.monitoring.json",
|
||||||
"segments-1563566437171.639994",
|
"segments-1563566437171.639994",
|
||||||
|
"sp_ANQXRpqH_urn$3Auri$3Abase64$3A6698b0a3-97ad-52ce-8fc3-17d99e37a726",
|
||||||
|
"sp_dxJTfx11_576742227280287872",
|
||||||
|
"sp_NnUPB5wj_601a2bdcc5b69137248ddbbf",
|
||||||
|
"sp_NxITuoE4_premiumchron-article-14302157_c_ryGQBs_r_yIWvwP",
|
||||||
"t_52d94268-8810-4a7e-ba87-ffd657a6752f",
|
"t_52d94268-8810-4a7e-ba87-ffd657a6752f",
|
||||||
"timeouts-1563566437171.639994",
|
"timeouts-1563566437171.639994",
|
||||||
|
"u_YPF3GsGKMo02",
|
||||||
|
|
||||||
|
"0000000000 65535 f",
|
||||||
|
"0000000178 00000 n",
|
||||||
|
"0-10000",
|
||||||
|
"01526123,",
|
||||||
|
"0,18168,183955,3,4,1151616,5663,731,223,5104,207,3204,10,1051,175,364,1435,4,60,576,241,383,246,5,1102",
|
||||||
|
"05/10/2020",
|
||||||
|
"14336456724940333",
|
||||||
|
"fb6cjraf9cejut2a",
|
||||||
|
"JEHJW4BKVFDRTMTUQLHKK5WVAU",
|
||||||
|
|
||||||
// TODO
|
// TODO
|
||||||
// "fb6cjraf9cejut2a",
|
/*
|
||||||
// "Fxvd1timk", // questionable
|
"0,20",
|
||||||
// "JEHJW4BKVFDRTMTUQLHKK5WVAU",
|
"0.001",
|
||||||
|
"YISAtiX1",
|
||||||
|
"Fxvd1timk", // questionable
|
||||||
|
"B4GCSkORAJs",
|
||||||
|
"D_4EDAqenHQ",
|
||||||
|
"EICJp29EGOk",
|
||||||
|
"Fxvd1timk",
|
||||||
|
"GTqMZELYfQQ",
|
||||||
|
"GZPTpLPEGmwHGWPC",
|
||||||
|
"_HChnE9NDPY",
|
||||||
|
"NwhjgIWHgGg",
|
||||||
|
"production/tsbqksph4xswqjexfbec",
|
||||||
|
"p/u/bguhrxupr23mw3nwxcrw",
|
||||||
|
"nRSNapbJZnc",
|
||||||
|
"zgfpbtolciznub5egzxk",
|
||||||
|
"zufnu7aimadua9wrgwwo",
|
||||||
|
"zznto1jzch9yjsbtbrul",
|
||||||
|
*/
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, str := range cases {
|
for _, str := range cases {
|
||||||
@ -69,3 +195,18 @@ func TestPositive(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestVersionStrings(t *testing.T) {
|
||||||
|
cases := []string{
|
||||||
|
"1.0",
|
||||||
|
"1.0.0",
|
||||||
|
"v2.1.3",
|
||||||
|
"2.1.73",
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, str := range cases {
|
||||||
|
if !IsVersionString(str) {
|
||||||
|
t.Errorf("Mistakenly false: %s", str)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@ -12,9 +12,9 @@ var ignoredHeaders = []string{
|
|||||||
"authorization", "cache-control", "connection", "content-encoding", "content-length", "content-type", "cookie",
|
"authorization", "cache-control", "connection", "content-encoding", "content-length", "content-type", "cookie",
|
||||||
"date", "dnt", "expect", "forwarded", "from", "front-end-https", "host", "http2-settings",
|
"date", "dnt", "expect", "forwarded", "from", "front-end-https", "host", "http2-settings",
|
||||||
"max-forwards", "origin", "pragma", "proxy-authorization", "proxy-connection", "range", "referer",
|
"max-forwards", "origin", "pragma", "proxy-authorization", "proxy-connection", "range", "referer",
|
||||||
"save-data", "te", "trailer", "transfer-encoding", "upgrade", "upgrade-insecure-requests",
|
"save-data", "te", "trailer", "transfer-encoding", "upgrade", "upgrade-insecure-requests", "x-download-options",
|
||||||
"server", "user-agent", "via", "warning", "strict-transport-security",
|
"server", "user-agent", "via", "warning", "strict-transport-security", "x-permitted-cross-domain-policies",
|
||||||
"x-att-deviceid", "x-correlation-id", "correlation-id", "x-client-data",
|
"x-att-deviceid", "x-correlation-id", "correlation-id", "x-client-data", "x-dns-prefetch-control",
|
||||||
"x-http-method-override", "x-real-ip", "x-request-id", "x-request-start", "x-requested-with", "x-uidh",
|
"x-http-method-override", "x-real-ip", "x-request-id", "x-request-start", "x-requested-with", "x-uidh",
|
||||||
"x-same-domain", "x-content-type-options", "x-frame-options", "x-xss-protection",
|
"x-same-domain", "x-content-type-options", "x-frame-options", "x-xss-protection",
|
||||||
"x-wap-profile", "x-scheme", "status", "x-cache", "x-application-context", "retry-after",
|
"x-wap-profile", "x-scheme", "status", "x-cache", "x-application-context", "retry-after",
|
||||||
@ -31,7 +31,7 @@ var ignoredHeaderPrefixes = []string{
|
|||||||
":", "accept-", "access-control-", "if-", "sec-", "grpc-",
|
":", "accept-", "access-control-", "if-", "sec-", "grpc-",
|
||||||
"x-forwarded-", "x-original-", "cf-",
|
"x-forwarded-", "x-original-", "cf-",
|
||||||
"x-up9-", "x-envoy-", "x-hasura-", "x-b3-", "x-datadog-", "x-envoy-", "x-amz-", "x-newrelic-", "x-prometheus-",
|
"x-up9-", "x-envoy-", "x-hasura-", "x-b3-", "x-datadog-", "x-envoy-", "x-amz-", "x-newrelic-", "x-prometheus-",
|
||||||
"x-akamai-", "x-spotim-", "x-amzn-", "x-ratelimit-",
|
"x-akamai-", "x-spotim-", "x-amzn-", "x-ratelimit-", "x-goog-",
|
||||||
}
|
}
|
||||||
|
|
||||||
func isCtypeIgnored(ctype string) bool {
|
func isCtypeIgnored(ctype string) bool {
|
||||||
|
@ -160,22 +160,6 @@ func suggestTags(oas *openapi.OpenAPI) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func getSimilarPrefix(strs []string) string {
|
|
||||||
chunked := make([][]string, 0)
|
|
||||||
for _, item := range strs {
|
|
||||||
chunked = append(chunked, strings.Split(item, "/"))
|
|
||||||
}
|
|
||||||
|
|
||||||
cmn := longestCommonXfix(chunked, true)
|
|
||||||
res := make([]string, 0)
|
|
||||||
for _, chunk := range cmn {
|
|
||||||
if chunk != "api" && !IsVersionString(chunk) && !strings.HasPrefix(chunk, "{") {
|
|
||||||
res = append(res, chunk)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return strings.Join(res[1:], ".")
|
|
||||||
}
|
|
||||||
|
|
||||||
func deleteFromSlice(s []string, val string) []string {
|
func deleteFromSlice(s []string, val string) []string {
|
||||||
temp := s[:0]
|
temp := s[:0]
|
||||||
for _, x := range s {
|
for _, x := range s {
|
||||||
|
@ -62,7 +62,6 @@ func (n *Node) getOrSet(path NodePath, existingPathObj *openapi.PathObj) (node *
|
|||||||
if paramObj != nil {
|
if paramObj != nil {
|
||||||
node.pathParam = paramObj
|
node.pathParam = paramObj
|
||||||
} else if chunkIsGibberish {
|
} else if chunkIsGibberish {
|
||||||
|
|
||||||
newParam := n.createParam()
|
newParam := n.createParam()
|
||||||
node.pathParam = newParam
|
node.pathParam = newParam
|
||||||
} else {
|
} else {
|
||||||
@ -100,11 +99,14 @@ func (n *Node) createParam() *openapi.ParameterObj {
|
|||||||
} else if strings.HasSuffix(*n.constant, "s") && len(*n.constant) > 3 {
|
} else if strings.HasSuffix(*n.constant, "s") && len(*n.constant) > 3 {
|
||||||
name = *n.constant
|
name = *n.constant
|
||||||
name = name[:len(name)-1] + "Id"
|
name = name[:len(name)-1] + "Id"
|
||||||
} else if isAlpha(*n.constant) {
|
} else {
|
||||||
name = *n.constant + "Id"
|
name = *n.constant + "Id"
|
||||||
}
|
}
|
||||||
|
|
||||||
name = cleanNonAlnum([]byte(name))
|
name = cleanStr(name, isAlNumRune)
|
||||||
|
if !isAlphaRune(rune(name[0])) {
|
||||||
|
name = "_" + name
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
newParam := createSimpleParam(name, "path", "string")
|
newParam := createSimpleParam(name, "path", "string")
|
||||||
|
@ -15,7 +15,7 @@ func TestTree(t *testing.T) {
|
|||||||
{"/", 0, ""},
|
{"/", 0, ""},
|
||||||
{"/v1.0.0/config/launcher/sp_nKNHCzsN/f34efcae-6583-11eb-908a-00b0fcb9d4f6/vendor,init,conversation", 1, "vendor,init,conversation"},
|
{"/v1.0.0/config/launcher/sp_nKNHCzsN/f34efcae-6583-11eb-908a-00b0fcb9d4f6/vendor,init,conversation", 1, "vendor,init,conversation"},
|
||||||
{"/v1.0.0/config/launcher/sp_nKNHCzsN/{f34efcae-6583-11eb-908a-00b0fcb9d4f6}/vendor,init,conversation", 0, "vendor,init,conversation"},
|
{"/v1.0.0/config/launcher/sp_nKNHCzsN/{f34efcae-6583-11eb-908a-00b0fcb9d4f6}/vendor,init,conversation", 0, "vendor,init,conversation"},
|
||||||
{"/getSvgs/size/small/brand/SFLY/layoutId/170943/layoutVersion/1/sizeId/742/surface/0/isLandscape/true/childSkus/%7B%7D", 1, ""},
|
{"/getSvgs/size/small/brand/SFLY/layoutId/170943/layoutVersion/1/sizeId/742/surface/0/isLandscape/true/childSkus/%7B%7D", 1, "{}"},
|
||||||
}
|
}
|
||||||
|
|
||||||
tree := new(Node)
|
tree := new(Node)
|
||||||
|
7707
agent/pkg/oas/trigrams.go
Normal file
7707
agent/pkg/oas/trigrams.go
Normal file
File diff suppressed because it is too large
Load Diff
@ -261,6 +261,22 @@ func longestCommonXfix(strs [][]string, pre bool) []string { // https://github.c
|
|||||||
return xfix
|
return xfix
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func getSimilarPrefix(strs []string) string {
|
||||||
|
chunked := make([][]string, 0)
|
||||||
|
for _, item := range strs {
|
||||||
|
chunked = append(chunked, strings.Split(item, "/"))
|
||||||
|
}
|
||||||
|
|
||||||
|
cmn := longestCommonXfix(chunked, true)
|
||||||
|
res := make([]string, 0)
|
||||||
|
for _, chunk := range cmn {
|
||||||
|
if chunk != "api" && !IsVersionString(chunk) && !strings.HasPrefix(chunk, "{") {
|
||||||
|
res = append(res, chunk)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return strings.Join(res[1:], ".")
|
||||||
|
}
|
||||||
|
|
||||||
// returns all non-nil ops in PathObj
|
// returns all non-nil ops in PathObj
|
||||||
func getOps(pathObj *openapi.PathObj) []*openapi.Operation {
|
func getOps(pathObj *openapi.PathObj) []*openapi.Operation {
|
||||||
ops := []**openapi.Operation{&pathObj.Get, &pathObj.Patch, &pathObj.Put, &pathObj.Options, &pathObj.Post, &pathObj.Trace, &pathObj.Head, &pathObj.Delete}
|
ops := []**openapi.Operation{&pathObj.Get, &pathObj.Patch, &pathObj.Put, &pathObj.Options, &pathObj.Post, &pathObj.Trace, &pathObj.Head, &pathObj.Delete}
|
||||||
@ -324,13 +340,11 @@ func anyJSON(text string) (anyVal interface{}, isJSON bool) {
|
|||||||
return nil, false
|
return nil, false
|
||||||
}
|
}
|
||||||
|
|
||||||
func cleanNonAlnum(s []byte) string {
|
func cleanStr(str string, criterion func(r rune) bool) string {
|
||||||
|
s := []byte(str)
|
||||||
j := 0
|
j := 0
|
||||||
for _, b := range s {
|
for _, b := range s {
|
||||||
if ('a' <= b && b <= 'z') ||
|
if criterion(rune(b)) {
|
||||||
('A' <= b && b <= 'Z') ||
|
|
||||||
('0' <= b && b <= '9') ||
|
|
||||||
b == ' ' {
|
|
||||||
s[j] = b
|
s[j] = b
|
||||||
j++
|
j++
|
||||||
}
|
}
|
||||||
@ -338,11 +352,21 @@ func cleanNonAlnum(s []byte) string {
|
|||||||
return string(s[:j])
|
return string(s[:j])
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
func isAlpha(s string) bool {
|
func isAlpha(s string) bool {
|
||||||
for _, r := range s {
|
for _, r := range s {
|
||||||
if (r < 'a' || r > 'z') && (r < 'A' || r > 'Z') {
|
if isAlphaRune(r) {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
*/
|
||||||
|
|
||||||
|
func isAlphaRune(r rune) bool {
|
||||||
|
return !((r < 'a' || r > 'z') && (r < 'A' || r > 'Z'))
|
||||||
|
}
|
||||||
|
|
||||||
|
func isAlNumRune(b rune) bool {
|
||||||
|
return isAlphaRune(b) || ('0' <= b && b <= '9')
|
||||||
|
}
|
||||||
|
@ -33,3 +33,27 @@ func TestAnyJSON(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestStrRunes(t *testing.T) {
|
||||||
|
if isAlphaRune('5') {
|
||||||
|
t.Logf("Failed")
|
||||||
|
}
|
||||||
|
if !isAlphaRune('a') {
|
||||||
|
t.Logf("Failed")
|
||||||
|
}
|
||||||
|
|
||||||
|
if !isAlNumRune('5') {
|
||||||
|
t.Logf("Failed")
|
||||||
|
}
|
||||||
|
if isAlNumRune(' ') {
|
||||||
|
t.Logf("Failed")
|
||||||
|
}
|
||||||
|
|
||||||
|
if cleanStr("-abc_567", isAlphaRune) != "abc" {
|
||||||
|
t.Logf("Failed")
|
||||||
|
}
|
||||||
|
|
||||||
|
if cleanStr("-abc_567", isAlNumRune) != "abc567" {
|
||||||
|
t.Logf("Failed")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user