Trim the Domain Name from cached image references

This commit removes the Domain Name, if any, from the cached image
reference before computing the image fingerprint. This way the same
image, if stored in some oter mirror, is still seen as the same one.

Fixes #158
This commit is contained in:
David Cassany
2021-01-29 10:50:32 +01:00
parent 9f73a334b3
commit 18e9ce4557
59 changed files with 425 additions and 7802 deletions

View File

@@ -38,7 +38,7 @@ const (
URLPort string = `(:(\d{1,5}))`
URLIP string = `([1-9]\d?|1\d\d|2[01]\d|22[0-3]|24\d|25[0-5])(\.(\d{1,2}|1\d\d|2[0-4]\d|25[0-5])){2}(?:\.([0-9]\d?|1\d\d|2[0-4]\d|25[0-5]))`
URLSubdomain string = `((www\.)|([a-zA-Z0-9]+([-_\.]?[a-zA-Z0-9])*[a-zA-Z0-9]\.[a-zA-Z0-9]+))`
URL string = `^` + URLSchema + `?` + URLUsername + `?` + `((` + URLIP + `|(\[` + IP + `\])|(([a-zA-Z0-9]([a-zA-Z0-9-_]+)?[a-zA-Z0-9]([-\.][a-zA-Z0-9]+)*)|(` + URLSubdomain + `?))?(([a-zA-Z\x{00a1}-\x{ffff}0-9]+-?-?)*[a-zA-Z\x{00a1}-\x{ffff}0-9]+)(?:\.([a-zA-Z\x{00a1}-\x{ffff}]{1,}))?))\.?` + URLPort + `?` + URLPath + `?$`
URL = `^` + URLSchema + `?` + URLUsername + `?` + `((` + URLIP + `|(\[` + IP + `\])|(([a-zA-Z0-9]([a-zA-Z0-9-_]+)?[a-zA-Z0-9]([-\.][a-zA-Z0-9]+)*)|(` + URLSubdomain + `?))?(([a-zA-Z\x{00a1}-\x{ffff}0-9]+-?-?)*[a-zA-Z\x{00a1}-\x{ffff}0-9]+)(?:\.([a-zA-Z\x{00a1}-\x{ffff}]{1,}))?))\.?` + URLPort + `?` + URLPath + `?$`
SSN string = `^\d{3}[- ]?\d{2}[- ]?\d{4}$`
WinPath string = `^[a-zA-Z]:\\(?:[^\\/:*?"<>|\r\n]+\\)*[^\\/:*?"<>|\r\n]*$`
UnixPath string = `^(/[^/\x00]*)+/?$`
@@ -49,6 +49,7 @@ const (
hasWhitespace string = ".*[[:space:]]"
hasWhitespaceOnly string = "^[[:space:]]+$"
IMEI string = "^[0-9a-f]{14}$|^\\d{15}$|^\\d{18}$"
IMSI string = "^\\d{14,15}$"
)
// Used by IsFilePath func
@@ -102,4 +103,5 @@ var (
rxHasWhitespace = regexp.MustCompile(hasWhitespace)
rxHasWhitespaceOnly = regexp.MustCompile(hasWhitespaceOnly)
rxIMEI = regexp.MustCompile(IMEI)
rxIMSI = regexp.MustCompile(IMSI)
)