diff --git a/Godeps/Godeps.json b/Godeps/Godeps.json index 58ac1e16357..ec321004a85 100644 --- a/Godeps/Godeps.json +++ b/Godeps/Godeps.json @@ -762,6 +762,11 @@ "ImportPath": "github.com/mitchellh/mapstructure", "Rev": "740c764bc6149d3f1806231418adb9f52c11bcbf" }, + { + "ImportPath": "github.com/mvdan/xurls", + "Comment": "v0.8.0-14-g1b768d7", + "Rev": "1b768d7c393abd8e8dda1458385a57becd4b2d4e" + }, { "ImportPath": "github.com/mxk/go-flowrate/flowrate", "Rev": "cca7078d478f8520f85629ad7c68962d31ed7682" diff --git a/Godeps/LICENSES.md b/Godeps/LICENSES.md index 5c9b583f243..6877baf48e4 100644 --- a/Godeps/LICENSES.md +++ b/Godeps/LICENSES.md @@ -61,6 +61,7 @@ github.com/matttproud/golang_protobuf_extensions | Apache-2 github.com/mesos/mesos-go | Apache-2 github.com/miekg/dns | spdxBSD3 github.com/mitchellh/mapstructure | MITname +github.com/mvdan/xurls | spdxBSD3 github.com/mxk/go-flowrate | spdxBSD3 github.com/onsi/ginkgo | spdxMIT github.com/onsi/gomega | spdxMIT diff --git a/Godeps/_workspace/src/github.com/mvdan/xurls/.gitignore b/Godeps/_workspace/src/github.com/mvdan/xurls/.gitignore new file mode 100644 index 00000000000..663c8cb54ce --- /dev/null +++ b/Godeps/_workspace/src/github.com/mvdan/xurls/.gitignore @@ -0,0 +1,3 @@ +cmd/xurls/xurls +generate/tldsgen/tldsgen +generate/regexgen/regexgen diff --git a/Godeps/_workspace/src/github.com/mvdan/xurls/.travis.yml b/Godeps/_workspace/src/github.com/mvdan/xurls/.travis.yml new file mode 100644 index 00000000000..82d19524785 --- /dev/null +++ b/Godeps/_workspace/src/github.com/mvdan/xurls/.travis.yml @@ -0,0 +1,5 @@ +language: go + +go: + - 1.4.3 + - 1.5.1 diff --git a/Godeps/_workspace/src/github.com/mvdan/xurls/LICENSE b/Godeps/_workspace/src/github.com/mvdan/xurls/LICENSE new file mode 100644 index 00000000000..5babf241d07 --- /dev/null +++ b/Godeps/_workspace/src/github.com/mvdan/xurls/LICENSE @@ -0,0 +1,27 @@ +Copyright (c) 2015, Daniel Martí. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/Godeps/_workspace/src/github.com/mvdan/xurls/README.md b/Godeps/_workspace/src/github.com/mvdan/xurls/README.md new file mode 100644 index 00000000000..202a0f9511a --- /dev/null +++ b/Godeps/_workspace/src/github.com/mvdan/xurls/README.md @@ -0,0 +1,31 @@ +# xurls + +[![GoDoc](https://godoc.org/github.com/mvdan/xurls?status.svg)](https://godoc.org/github.com/mvdan/xurls) [![Travis](https://travis-ci.org/mvdan/xurls.svg?branch=master)](https://travis-ci.org/mvdan/xurls) + +Extract urls from text using regular expressions. + + go get github.com/mvdan/xurls + +```go +import "github.com/mvdan/xurls" + +func main() { + xurls.Relaxed.FindString("Do gophers live in golang.org?") + // "golang.org" + xurls.Relaxed.FindAllString("foo.com is http://foo.com/.", -1) + // []string{"foo.com", "http://foo.com/"} + xurls.Strict.FindAllString("foo.com is http://foo.com/.", -1) + // []string{"http://foo.com/"} +} +``` + +#### cmd/xurls + +Reads text and prints one url per line. + + go get github.com/mvdan/xurls/cmd/xurls + +```shell +$ echo "Do gophers live in http://golang.org?" | xurls +http://golang.org +``` diff --git a/Godeps/_workspace/src/github.com/mvdan/xurls/cmd/xurls/main.go b/Godeps/_workspace/src/github.com/mvdan/xurls/cmd/xurls/main.go new file mode 100644 index 00000000000..7fa00f0c826 --- /dev/null +++ b/Godeps/_workspace/src/github.com/mvdan/xurls/cmd/xurls/main.go @@ -0,0 +1,83 @@ +// Copyright (c) 2015, Daniel Martí +// See LICENSE for licensing information + +package main + +import ( + "bufio" + "flag" + "fmt" + "os" + "regexp" + + "github.com/mvdan/xurls" +) + +var ( + matching = flag.String("m", "", "") + relaxed = flag.Bool("r", false, "") +) + +func init() { + flag.Usage = func() { + p := func(format string, a ...interface{}) { + fmt.Fprintf(os.Stderr, format, a...) + } + p("Usage: xurls [-h] [files]\n\n") + p("If no files are given, it reads from standard input.\n\n") + p(" -m only match urls whose scheme matches a regexp\n") + p(" example: 'https?://|mailto:'\n") + p(" -r also match urls without a scheme (relaxed)\n") + } +} + +func scanPath(re *regexp.Regexp, path string) error { + r := os.Stdin + if path != "-" { + f, err := os.Open(path) + if err != nil { + return err + } + defer f.Close() + r = f + } + scanner := bufio.NewScanner(r) + scanner.Split(bufio.ScanWords) + for scanner.Scan() { + word := scanner.Text() + for _, match := range re.FindAllString(word, -1) { + fmt.Println(match) + } + } + return scanner.Err() +} + +func main() { + flag.Parse() + if *relaxed && *matching != "" { + errExit(fmt.Errorf("-r and -m at the same time don't make much sense")) + } + re := xurls.Strict + if *relaxed { + re = xurls.Relaxed + } else if *matching != "" { + var err error + if re, err = xurls.StrictMatchingScheme(*matching); err != nil { + errExit(err) + } + } + args := flag.Args() + if len(args) == 0 { + args = []string{"-"} + } + for _, path := range args { + if err := scanPath(re, path); err != nil { + errExit(err) + } + } +} + +func errExit(err error) { + fmt.Fprintf(os.Stderr, "%v\n", err) + os.Exit(1) +} diff --git a/Godeps/_workspace/src/github.com/mvdan/xurls/generate/regexgen/main.go b/Godeps/_workspace/src/github.com/mvdan/xurls/generate/regexgen/main.go new file mode 100644 index 00000000000..2574e9bcf8e --- /dev/null +++ b/Godeps/_workspace/src/github.com/mvdan/xurls/generate/regexgen/main.go @@ -0,0 +1,70 @@ +// Copyright (c) 2015, Daniel Martí +// See LICENSE for licensing information + +package main + +import ( + "log" + "os" + "sort" + "strings" + "text/template" + + "golang.org/x/net/idna" + + "github.com/mvdan/xurls" +) + +const path = "regex.go" + +var regexTmpl = template.Must(template.New("regex").Parse(`// Generated by regexgen + +package xurls + +const ({{ range $key, $value := . }} + {{$key}} = ` + "`" + `{{$value}}` + "`" + `{{end}} +) +`)) + +func writeRegex(tlds []string) error { + allTldsSet := make(map[string]struct{}) + add := func(tld string) { + if _, e := allTldsSet[tld]; e { + log.Fatalf("Duplicate TLD: %s", tld) + } + allTldsSet[tld] = struct{}{} + } + for _, tldlist := range [...][]string{tlds, xurls.PseudoTLDs} { + for _, tld := range tldlist { + add(tld) + asciiTld, err := idna.ToASCII(tld) + if err != nil { + return err + } + if asciiTld != tld { + add(asciiTld) + } + } + } + var allTlds []string + for tld := range allTldsSet { + allTlds = append(allTlds, tld) + } + sort.Strings(allTlds) + f, err := os.Create(path) + if err != nil { + return err + } + defer f.Close() + return regexTmpl.Execute(f, map[string]string{ + "gtld ": `(?i)(` + strings.Join(allTlds, `|`) + `)(?-i)`, + "otherScheme": `(?i)(` + strings.Join(xurls.SchemesNoAuthority, `|`) + `)(?-i):`, + }) +} + +func main() { + log.Printf("Generating %s...", path) + if err := writeRegex(xurls.TLDs); err != nil { + log.Fatalf("Could not write %s: %v", path, err) + } +} diff --git a/Godeps/_workspace/src/github.com/mvdan/xurls/generate/tldsgen/main.go b/Godeps/_workspace/src/github.com/mvdan/xurls/generate/tldsgen/main.go new file mode 100644 index 00000000000..4453a038069 --- /dev/null +++ b/Godeps/_workspace/src/github.com/mvdan/xurls/generate/tldsgen/main.go @@ -0,0 +1,140 @@ +// Copyright (c) 2015, Daniel Martí +// See LICENSE for licensing information + +package main + +import ( + "bufio" + "errors" + "log" + "net/http" + "os" + "regexp" + "sort" + "strings" + "sync" + "text/template" +) + +const path = "tlds.go" + +var tldsTmpl = template.Must(template.New("tlds").Parse(`// Generated by tldsgen + +package xurls + +// TLDs is a sorted list of all public top-level domains. +// +// Sources:{{range $_, $url := .URLs}} +// * {{$url}}{{end}} +var TLDs = []string{ +{{range $_, $tld := .TLDs}}` + "\t`" + `{{$tld}}` + "`" + `, +{{end}}} +`)) + +func cleanTld(tld string) string { + tld = strings.ToLower(tld) + if strings.HasPrefix(tld, "xn--") { + return "" + } + return tld +} + +func fetchFromURL(url, pat string) { + defer wg.Done() + log.Printf("Fetching %s", url) + resp, err := http.Get(url) + if err == nil && resp.StatusCode >= 400 { + err = errors.New(resp.Status) + } + if err != nil { + errChan <- err + return + } + defer resp.Body.Close() + scanner := bufio.NewScanner(resp.Body) + re := regexp.MustCompile(pat) + for scanner.Scan() { + line := scanner.Text() + tld := re.FindString(line) + tld = cleanTld(tld) + if tld == "" { + continue + } + tldChan <- tld + } + if err := scanner.Err(); err != nil { + errChan <- err + } +} + +var ( + wg sync.WaitGroup + tldChan = make(chan string) + errChan = make(chan error) +) + +func tldList() ([]string, []string, error) { + var urls []string + fromURL := func(url, pat string) { + urls = append(urls, url) + wg.Add(1) + go fetchFromURL(url, pat) + } + fromURL("https://data.iana.org/TLD/tlds-alpha-by-domain.txt", + `^[^#]+$`) + fromURL("https://publicsuffix.org/list/effective_tld_names.dat", + `^[^/.]+$`) + + tldSet := make(map[string]struct{}) + anyError := false + go func() { + for { + select { + case tld := <-tldChan: + tldSet[tld] = struct{}{} + case err := <-errChan: + log.Printf("%v", err) + anyError = true + } + } + }() + wg.Wait() + + if anyError { + return nil, nil, errors.New("there were some errors while fetching the TLDs") + } + + tlds := make([]string, 0, len(tldSet)) + for tld := range tldSet { + tlds = append(tlds, tld) + } + + sort.Strings(tlds) + return tlds, urls, nil +} + +func writeTlds(tlds, urls []string) error { + f, err := os.Create(path) + if err != nil { + return err + } + defer f.Close() + return tldsTmpl.Execute(f, struct { + TLDs []string + URLs []string + }{ + TLDs: tlds, + URLs: urls, + }) +} + +func main() { + tlds, urls, err := tldList() + if err != nil { + log.Fatalf("Could not get TLD list: %v", err) + } + log.Printf("Generating %s...", path) + if err := writeTlds(tlds, urls); err != nil { + log.Fatalf("Could not write path: %v", err) + } +} diff --git a/Godeps/_workspace/src/github.com/mvdan/xurls/regex.go b/Godeps/_workspace/src/github.com/mvdan/xurls/regex.go new file mode 100644 index 00000000000..d39b907946b --- /dev/null +++ b/Godeps/_workspace/src/github.com/mvdan/xurls/regex.go @@ -0,0 +1,8 @@ +// Generated by regexgen + +package xurls + +const ( + gtld = `(?i)(aaa|aarp|abarth|abb|abbott|abbvie|abc|able|abogado|abudhabi|ac|academy|accenture|accountant|accountants|aco|active|actor|ad|adac|ads|adult|ae|aeg|aero|aetna|af|afamilycompany|afl|africa|africamagic|ag|agakhan|agency|ai|aig|aigo|airbus|airforce|airtel|akdn|al|alfaromeo|alibaba|alipay|allfinanz|allstate|ally|alsace|alstom|am|americanexpress|americanfamily|amex|amfam|amica|amsterdam|analytics|android|anquan|anz|ao|aol|apartments|app|apple|aq|aquarelle|ar|arab|aramco|archi|army|arpa|arte|as|asda|asia|associates|at|athleta|attorney|au|auction|audi|audible|audio|auspost|author|auto|autos|avianca|aw|aws|ax|axa|az|azure|ba|baby|baidu|banamex|bananarepublic|band|bank|bar|barcelona|barclaycard|barclays|barefoot|bargains|baseball|basketball|bauhaus|bayern|bb|bbc|bbt|bbva|bcg|bcn|bd|be|beats|beauty|beer|bentley|berlin|best|bestbuy|bet|bf|bg|bh|bharti|bi|bible|bid|bike|bing|bingo|bio|bit|biz|bj|black|blackfriday|blanco|blockbuster|blog|bloomberg|blue|bm|bms|bmw|bn|bnl|bnpparibas|bo|boats|boehringer|bofa|bom|bond|boo|book|booking|boots|bosch|bostik|boston|bot|boutique|box|br|bradesco|bridgestone|broadway|broker|brother|brussels|bs|bt|budapest|bugatti|build|builders|business|buy|buzz|bv|bw|by|bz|bzh|ca|cab|cafe|cal|call|calvinklein|camera|camp|cancerresearch|canon|capetown|capital|capitalone|car|caravan|cards|care|career|careers|cars|cartier|casa|case|caseih|cash|casino|cat|catering|catholic|cba|cbn|cbre|cbs|cc|cd|ceb|center|ceo|cern|cf|cfa|cfd|cg|ch|chanel|channel|chase|chat|cheap|chintai|chloe|christmas|chrome|chrysler|church|ci|cipriani|circle|cisco|citadel|citi|citic|city|cityeats|ck|cl|claims|cleaning|click|clinic|clinique|clothing|cloud|club|clubmed|cm|cn|co|coach|codes|coffee|college|cologne|com|comcast|commbank|community|company|compare|computer|comsec|condos|construction|consulting|contact|contractors|cooking|cookingchannel|cool|coop|corsica|country|coupon|coupons|courses|cr|credit|creditcard|creditunion|cricket|crown|crs|cruise|cruises|csc|cu|cuisinella|cv|cw|cx|cy|cymru|cyou|cz|dabur|dad|dance|date|dating|datsun|day|dclk|dds|de|deal|dealer|deals|degree|delivery|dell|deloitte|delta|democrat|dental|dentist|desi|design|dev|dhl|diamonds|diet|digital|direct|directory|discount|discover|dish|diy|dj|dk|dm|dnp|do|docs|dodge|dog|doha|domains|doosan|dot|download|drive|dstv|dtv|dubai|duck|dunlop|duns|dupont|durban|dvag|dwg|dz|earth|eat|ec|edeka|edu|education|ee|eg|email|emerck|emerson|energy|engineer|engineering|enterprises|epost|epson|equipment|er|ericsson|erni|es|esq|estate|esurance|et|etisalat|eu|eurovision|eus|events|everbank|example|exchange|exit|expert|exposed|express|extraspace|fage|fail|fairwinds|faith|family|fan|fans|farm|farmers|fashion|fast|fedex|feedback|ferrari|ferrero|fi|fiat|fidelity|fido|film|final|finance|financial|fire|firestone|firmdale|fish|fishing|fit|fitness|fj|fk|flickr|flights|flir|florist|flowers|flsmidth|fly|fm|fo|foo|foodnetwork|football|ford|forex|forsale|forum|foundation|fox|fr|free|fresenius|frl|frogans|frontdoor|frontier|ftr|fujitsu|fujixerox|fund|furniture|futbol|fyi|ga|gal|gallery|gallo|gallup|game|games|gap|garden|gb|gbiz|gd|gdn|ge|gea|gent|genting|george|gf|gg|ggee|gh|gi|gift|gifts|gives|giving|gl|glade|glass|gle|global|globo|gm|gmail|gmo|gmx|gn|gnu|godaddy|gold|goldpoint|golf|goo|goodhands|goodyear|goog|google|gop|got|gotv|gov|gp|gq|gr|grainger|graphics|gratis|green|gripe|group|gs|gt|gu|guardian|gucci|guge|guide|guitars|guru|gw|gy|hair|hamburg|hangout|haus|hbo|hdfc|hdfcbank|health|healthcare|help|helsinki|here|hermes|hgtv|hiphop|hisamitsu|hitachi|hiv|hk|hkt|hm|hn|hockey|holdings|holiday|homedepot|homegoods|homes|homesense|honda|honeywell|horse|host|hosting|hot|hoteles|hotmail|house|how|hr|hsbc|ht|htc|hu|hughes|hyatt|hyundai|i2p|ibm|icbc|ice|icu|id|ie|ieee|ifm|iinet|ikano|il|im|imamat|imdb|immo|immobilien|in|industries|infiniti|info|ing|ink|institute|insurance|insure|int|intel|international|intuit|invalid|investments|io|ipiranga|iq|ir|irish|is|iselect|ismaili|ist|istanbul|it|itau|itv|iveco|iwc|jaguar|java|jcb|jcp|je|jeep|jetzt|jewelry|jio|jlc|jll|jm|jmp|jnj|jo|jobs|joburg|jot|joy|jp|jpmorgan|jprs|juegos|juniper|kaufen|kddi|ke|kerryhotels|kerrylogistics|kerryproperties|kfh|kg|kh|ki|kia|kim|kinder|kindle|kitchen|kiwi|km|kn|koeln|komatsu|kosher|kp|kpmg|kpn|kr|krd|kred|kuokgroup|kw|ky|kyknet|kyoto|kz|la|lacaixa|ladbrokes|lamborghini|lamer|lancaster|lancia|lancome|land|landrover|lanxess|lasalle|lat|latino|latrobe|law|lawyer|lb|lc|lds|lease|leclerc|lefrak|legal|lego|lexus|lgbt|li|liaison|lidl|life|lifeinsurance|lifestyle|lighting|like|lilly|limited|limo|lincoln|linde|link|lipsy|live|living|lixil|lk|loan|loans|local|localhost|locker|locus|loft|lol|london|lotte|lotto|love|lpl|lplfinancial|lr|ls|lt|ltd|ltda|lu|lundbeck|lupin|luxe|luxury|lv|ly|ma|macys|madrid|maif|maison|makeup|man|management|mango|market|marketing|markets|marriott|marshalls|maserati|mattel|mba|mc|mcd|mcdonalds|mckinsey|md|me|med|media|meet|melbourne|meme|memorial|men|menu|meo|metlife|mg|mh|miami|microsoft|mil|mini|mint|mit|mitsubishi|mk|ml|mlb|mls|mm|mma|mn|mnet|mo|mobi|mobily|moda|moe|moi|mom|monash|money|monster|montblanc|mopar|mormon|mortgage|moscow|moto|motorcycles|mov|movie|movistar|mp|mq|mr|ms|msd|mt|mtn|mtpc|mtr|mu|multichoice|museum|mutual|mutuelle|mv|mw|mx|my|mz|mzansimagic|na|nab|nadex|nagoya|name|naspers|nationwide|natura|navy|nba|nc|ne|nec|net|netbank|netflix|network|neustar|new|newholland|news|next|nextdirect|nexus|nf|nfl|ng|ngo|nhk|ni|nico|nike|nikon|ninja|nissan|nissay|nl|no|nokia|northwesternmutual|norton|now|nowruz|nowtv|np|nr|nra|nrw|ntt|nu|nyc|nz|obi|observer|off|office|okinawa|olayan|olayangroup|oldnavy|ollo|om|omega|one|ong|onion|onl|online|onyourside|ooo|open|oracle|orange|org|organic|orientexpress|origins|osaka|otsuka|ott|ovh|pa|page|pamperedchef|panasonic|panerai|paris|pars|partners|parts|party|passagens|pay|payu|pccw|pe|pet|pf|pfizer|pg|ph|pharmacy|philips|photo|photography|photos|physio|piaget|pics|pictet|pictures|pid|pin|ping|pink|pioneer|pizza|pk|pl|place|play|playstation|plumbing|plus|pm|pn|pnc|pohl|poker|politie|porn|post|pr|pramerica|praxi|press|prime|pro|prod|productions|prof|progressive|promo|properties|property|protection|pru|prudential|ps|pt|pub|pw|pwc|py|qa|qpon|quebec|quest|qvc|racing|raid|re|read|realestate|realtor|realty|recipes|red|redstone|redumbrella|rehab|reise|reisen|reit|reliance|ren|rent|rentals|repair|report|republican|rest|restaurant|review|reviews|rexroth|rich|richardli|ricoh|rightathome|ril|rio|rip|rmit|ro|rocher|rocks|rodeo|rogers|room|rs|rsvp|ru|ruhr|run|rw|rwe|ryukyu|sa|saarland|safe|safety|sakura|sale|salon|samsclub|samsung|sandvik|sandvikcoromant|sanofi|sap|sapo|sarl|sas|save|saxo|sb|sbi|sbs|sc|sca|scb|schaeffler|schmidt|scholarships|school|schule|schwarz|science|scjohnson|scor|scot|sd|se|seat|secure|security|seek|select|sener|services|ses|seven|sew|sex|sexy|sfr|sg|sh|shangrila|sharp|shaw|shell|shia|shiksha|shoes|shouji|show|showtime|shriram|si|silk|sina|singles|site|sj|sk|ski|skin|sky|skype|sl|sling|sm|smart|smile|sn|sncf|so|soccer|social|softbank|software|sohu|solar|solutions|song|sony|soy|space|spiegel|spot|spreadbetting|sr|srl|srt|st|stada|staples|star|starhub|statebank|statefarm|statoil|stc|stcgroup|stockholm|storage|store|studio|study|style|su|sucks|supersport|supplies|supply|support|surf|surgery|suzuki|sv|swatch|swiftcover|swiss|sx|sy|sydney|symantec|systems|sz|tab|taipei|talk|taobao|target|tatamotors|tatar|tattoo|tax|taxi|tc|tci|td|tdk|team|tech|technology|tel|telecity|telefonica|temasek|tennis|test|teva|tf|tg|th|thd|theater|theatre|theguardian|tiaa|tickets|tienda|tiffany|tips|tires|tirol|tj|tjmaxx|tjx|tk|tkmaxx|tl|tm|tmall|tn|to|today|tokyo|tools|top|toray|toshiba|total|tours|town|toyota|toys|tp|tr|trade|trading|training|travel|travelchannel|travelers|travelersinsurance|trust|trv|tt|tube|tui|tunes|tushu|tv|tvs|tw|tz|ua|ubank|ubs|uconnect|ug|uk|unicom|university|uno|uol|ups|us|uy|uz|va|vacations|vana|vanguard|vc|ve|vegas|ventures|verisign|vermögensberater|vermögensberatung|versicherung|vet|vg|vi|viajes|video|vig|viking|villas|vin|vip|virgin|visa|vision|vista|vistaprint|viva|vivo|vlaanderen|vn|vodka|volkswagen|volvo|vote|voting|voto|voyage|vu|vuelos|wales|walmart|walter|wang|wanggou|warman|watch|watches|weather|weatherchannel|webcam|weber|website|wed|wedding|weibo|weir|wf|whoswho|wien|wiki|williamhill|win|windows|wine|winners|wme|wolterskluwer|woodside|work|works|world|wow|ws|wtc|wtf|xbox|xerox|xfinity|xihuan|xin|xn--11b4c3d|xn--1ck2e1b|xn--1qqw23a|xn--30rr7y|xn--3bst00m|xn--3ds443g|xn--3e0b707e|xn--3oq18vl8pn36a|xn--3pxu8k|xn--42c2d9a|xn--45brj9c|xn--45q11c|xn--4gbrim|xn--4gq48lf9j|xn--54b7fta0cc|xn--55qw42g|xn--55qx5d|xn--5su34j936bgsg|xn--5tzm5g|xn--6frz82g|xn--6qq986b3xl|xn--80adxhks|xn--80ao21a|xn--80aqecdr1a|xn--80asehdb|xn--80aswg|xn--8y0a063a|xn--90a3ac|xn--90ais|xn--9dbq2a|xn--9et52u|xn--9krt00a|xn--b4w605ferd|xn--bck1b9a5dre4c|xn--c1avg|xn--c2br7g|xn--cck2b3b|xn--cg4bki|xn--clchc0ea0b2g2a9gcd|xn--czr694b|xn--czrs0t|xn--czru2d|xn--d1acj3b|xn--d1alf|xn--eckvdtc9d|xn--efvy88h|xn--estv75g|xn--fct429k|xn--fhbei|xn--fiq228c5hs|xn--fiq64b|xn--fiqs8s|xn--fiqz9s|xn--fjq720a|xn--flw351e|xn--fpcrj9c3d|xn--fzc2c9e2c|xn--fzys8d69uvgm|xn--g2xx48c|xn--gckr3f0f|xn--gecrj9c|xn--gk3at1e|xn--h2brj9c|xn--hxt814e|xn--i1b6b1a6a2e|xn--imr513n|xn--io0a7i|xn--j1aef|xn--j1amh|xn--j6w193g|xn--jlq61u9w7b|xn--jvr189m|xn--kcrx77d1x4a|xn--kprw13d|xn--kpry57d|xn--kpu716f|xn--kput3i|xn--l1acc|xn--lgbbat1ad8j|xn--mgb2ddes|xn--mgb9awbf|xn--mgba3a3ejt|xn--mgba3a4f16a|xn--mgba3a4fra|xn--mgba7c0bbn0a|xn--mgbaakc7dvf|xn--mgbaam7a8h|xn--mgbab2bd|xn--mgbai9a5eva00b|xn--mgbai9azgqp6j|xn--mgbayh7gpa|xn--mgbb9fbpob|xn--mgbbh1a71e|xn--mgbc0a9azcg|xn--mgbca7dzdo|xn--mgberp4a5d4a87g|xn--mgberp4a5d4ar|xn--mgbi4ecexp|xn--mgbpl2fh|xn--mgbqly7c0a67fbc|xn--mgbqly7cvafr|xn--mgbt3dhd|xn--mgbtf8fl|xn--mgbtx2b|xn--mgbx4cd0ab|xn--mix082f|xn--mix891f|xn--mk1bu44c|xn--mxtq1m|xn--ngbc5azd|xn--ngbe9e0a|xn--ngbrx|xn--nnx388a|xn--node|xn--nqv7f|xn--nqv7fs00ema|xn--nyqy26a|xn--o3cw4h|xn--ogbpf8fl|xn--p1acf|xn--p1ai|xn--pbt977c|xn--pgbs0dh|xn--pssy2u|xn--q9jyb4c|xn--qcka1pmc|xn--qxam|xn--rhqv96g|xn--rovu88b|xn--s9brj9c|xn--ses554g|xn--t60b56a|xn--tckwe|xn--tiq49xqyj|xn--unup4y|xn--vermgensberater-ctb|xn--vermgensberatung-pwb|xn--vhquv|xn--vuq861b|xn--w4r85el8fhu5dnra|xn--w4rs40l|xn--wgbh1c|xn--wgbl6a|xn--xhq521b|xn--xkc2al3hye2a|xn--xkc2dl3a5ee0h|xn--y9a3aq|xn--yfro4i67o|xn--ygbi2ammx|xn--zfr164b|xperia|xxx|xyz|yachts|yahoo|yamaxun|yandex|ye|yodobashi|yoga|yokohama|you|youtube|yt|yun|za|zappos|zara|zero|zip|zippo|zkey|zm|zone|zuerich|zw|ελ|бел|дети|католик|ком|мкд|мон|москва|онлайн|орг|рус|рф|сайт|срб|укр|қаз|հայ|קום|ابوظبي|اتصالات|ارامكو|الاردن|الجزائر|السعودية|السعوديه|السعودیة|السعودیۃ|العليان|المغرب|اليمن|امارات|ايران|ایران|بازار|بيتك|بھارت|تونس|سودان|سوريا|سورية|شبكة|عراق|عرب|عمان|فلسطين|قطر|كاثوليك|كوم|مصر|مليسيا|موبايلي|موقع|همراه|پاكستان|پاکستان|कॉम|नेट|भारत|संगठन|বাংলা|ভারত|ਭਾਰਤ|ભારત|இந்தியா|இலங்கை|சிங்கப்பூர்|భారత్|ලංකා|คอม|ไทย|გე|みんな|クラウド|グーグル|コム|ストア|セール|ファッション|ポイント|一号店|世界|中信|中国|中國|中文网|企业|佛山|信息|健康|八卦|公司|公益|台湾|台灣|商城|商店|商标|嘉里|嘉里大酒店|在线|大众汽车|大拿|天主教|娱乐|家電|工行|广东|微博|慈善|我爱你|手机|手表|政务|政府|新加坡|新闻|时尚|書籍|机构|淡马锡|游戏|澳門|澳门|点看|珠宝|移动|组织机构|网址|网店|网站|网络|联通|臺灣|诺基亚|谷歌|购物|通販|集团|電訊盈科|飞利浦|食品|餐厅|香格里拉|香港|닷넷|닷컴|삼성|한국)(?-i)` + otherScheme = `(?i)(bitcoin|file|magnet|mailto|sms|tel|xmpp)(?-i):` +) diff --git a/Godeps/_workspace/src/github.com/mvdan/xurls/schemes.go b/Godeps/_workspace/src/github.com/mvdan/xurls/schemes.go new file mode 100644 index 00000000000..6ab5bde0e6b --- /dev/null +++ b/Godeps/_workspace/src/github.com/mvdan/xurls/schemes.go @@ -0,0 +1,14 @@ +package xurls + +// SchemesNoAuthority is a sorted list of some well-known url schemes that are +// followed by ":" instead of "://". Since these are more prone to false +// positives, we limit their matching. +var SchemesNoAuthority = []string{ + `bitcoin`, // Bitcoin + `file`, // Files + `magnet`, // Torrent magnets + `mailto`, // Mail + `sms`, // SMS + `tel`, // Telephone + `xmpp`, // XMPP +} diff --git a/Godeps/_workspace/src/github.com/mvdan/xurls/tlds.go b/Godeps/_workspace/src/github.com/mvdan/xurls/tlds.go new file mode 100644 index 00000000000..04f4865e79f --- /dev/null +++ b/Godeps/_workspace/src/github.com/mvdan/xurls/tlds.go @@ -0,0 +1,1555 @@ +// Generated by tldsgen + +package xurls + +// TLDs is a sorted list of all public top-level domains. +// +// Sources: +// * https://data.iana.org/TLD/tlds-alpha-by-domain.txt +// * https://publicsuffix.org/list/effective_tld_names.dat +var TLDs = []string{ + `aaa`, + `aarp`, + `abarth`, + `abb`, + `abbott`, + `abbvie`, + `abc`, + `able`, + `abogado`, + `abudhabi`, + `ac`, + `academy`, + `accenture`, + `accountant`, + `accountants`, + `aco`, + `active`, + `actor`, + `ad`, + `adac`, + `ads`, + `adult`, + `ae`, + `aeg`, + `aero`, + `aetna`, + `af`, + `afamilycompany`, + `afl`, + `africa`, + `africamagic`, + `ag`, + `agakhan`, + `agency`, + `ai`, + `aig`, + `aigo`, + `airbus`, + `airforce`, + `airtel`, + `akdn`, + `al`, + `alfaromeo`, + `alibaba`, + `alipay`, + `allfinanz`, + `allstate`, + `ally`, + `alsace`, + `alstom`, + `am`, + `americanexpress`, + `americanfamily`, + `amex`, + `amfam`, + `amica`, + `amsterdam`, + `analytics`, + `android`, + `anquan`, + `anz`, + `ao`, + `aol`, + `apartments`, + `app`, + `apple`, + `aq`, + `aquarelle`, + `ar`, + `arab`, + `aramco`, + `archi`, + `army`, + `arpa`, + `arte`, + `as`, + `asda`, + `asia`, + `associates`, + `at`, + `athleta`, + `attorney`, + `au`, + `auction`, + `audi`, + `audible`, + `audio`, + `auspost`, + `author`, + `auto`, + `autos`, + `avianca`, + `aw`, + `aws`, + `ax`, + `axa`, + `az`, + `azure`, + `ba`, + `baby`, + `baidu`, + `banamex`, + `bananarepublic`, + `band`, + `bank`, + `bar`, + `barcelona`, + `barclaycard`, + `barclays`, + `barefoot`, + `bargains`, + `baseball`, + `basketball`, + `bauhaus`, + `bayern`, + `bb`, + `bbc`, + `bbt`, + `bbva`, + `bcg`, + `bcn`, + `bd`, + `be`, + `beats`, + `beauty`, + `beer`, + `bentley`, + `berlin`, + `best`, + `bestbuy`, + `bet`, + `bf`, + `bg`, + `bh`, + `bharti`, + `bi`, + `bible`, + `bid`, + `bike`, + `bing`, + `bingo`, + `bio`, + `biz`, + `bj`, + `black`, + `blackfriday`, + `blanco`, + `blockbuster`, + `blog`, + `bloomberg`, + `blue`, + `bm`, + `bms`, + `bmw`, + `bn`, + `bnl`, + `bnpparibas`, + `bo`, + `boats`, + `boehringer`, + `bofa`, + `bom`, + `bond`, + `boo`, + `book`, + `booking`, + `boots`, + `bosch`, + `bostik`, + `boston`, + `bot`, + `boutique`, + `box`, + `br`, + `bradesco`, + `bridgestone`, + `broadway`, + `broker`, + `brother`, + `brussels`, + `bs`, + `bt`, + `budapest`, + `bugatti`, + `build`, + `builders`, + `business`, + `buy`, + `buzz`, + `bv`, + `bw`, + `by`, + `bz`, + `bzh`, + `ca`, + `cab`, + `cafe`, + `cal`, + `call`, + `calvinklein`, + `camera`, + `camp`, + `cancerresearch`, + `canon`, + `capetown`, + `capital`, + `capitalone`, + `car`, + `caravan`, + `cards`, + `care`, + `career`, + `careers`, + `cars`, + `cartier`, + `casa`, + `case`, + `caseih`, + `cash`, + `casino`, + `cat`, + `catering`, + `catholic`, + `cba`, + `cbn`, + `cbre`, + `cbs`, + `cc`, + `cd`, + `ceb`, + `center`, + `ceo`, + `cern`, + `cf`, + `cfa`, + `cfd`, + `cg`, + `ch`, + `chanel`, + `channel`, + `chase`, + `chat`, + `cheap`, + `chintai`, + `chloe`, + `christmas`, + `chrome`, + `chrysler`, + `church`, + `ci`, + `cipriani`, + `circle`, + `cisco`, + `citadel`, + `citi`, + `citic`, + `city`, + `cityeats`, + `ck`, + `cl`, + `claims`, + `cleaning`, + `click`, + `clinic`, + `clinique`, + `clothing`, + `cloud`, + `club`, + `clubmed`, + `cm`, + `cn`, + `co`, + `coach`, + `codes`, + `coffee`, + `college`, + `cologne`, + `com`, + `comcast`, + `commbank`, + `community`, + `company`, + `compare`, + `computer`, + `comsec`, + `condos`, + `construction`, + `consulting`, + `contact`, + `contractors`, + `cooking`, + `cookingchannel`, + `cool`, + `coop`, + `corsica`, + `country`, + `coupon`, + `coupons`, + `courses`, + `cr`, + `credit`, + `creditcard`, + `creditunion`, + `cricket`, + `crown`, + `crs`, + `cruise`, + `cruises`, + `csc`, + `cu`, + `cuisinella`, + `cv`, + `cw`, + `cx`, + `cy`, + `cymru`, + `cyou`, + `cz`, + `dabur`, + `dad`, + `dance`, + `date`, + `dating`, + `datsun`, + `day`, + `dclk`, + `dds`, + `de`, + `deal`, + `dealer`, + `deals`, + `degree`, + `delivery`, + `dell`, + `deloitte`, + `delta`, + `democrat`, + `dental`, + `dentist`, + `desi`, + `design`, + `dev`, + `dhl`, + `diamonds`, + `diet`, + `digital`, + `direct`, + `directory`, + `discount`, + `discover`, + `dish`, + `diy`, + `dj`, + `dk`, + `dm`, + `dnp`, + `do`, + `docs`, + `dodge`, + `dog`, + `doha`, + `domains`, + `doosan`, + `dot`, + `download`, + `drive`, + `dstv`, + `dtv`, + `dubai`, + `duck`, + `dunlop`, + `duns`, + `dupont`, + `durban`, + `dvag`, + `dwg`, + `dz`, + `earth`, + `eat`, + `ec`, + `edeka`, + `edu`, + `education`, + `ee`, + `eg`, + `email`, + `emerck`, + `emerson`, + `energy`, + `engineer`, + `engineering`, + `enterprises`, + `epost`, + `epson`, + `equipment`, + `er`, + `ericsson`, + `erni`, + `es`, + `esq`, + `estate`, + `esurance`, + `et`, + `etisalat`, + `eu`, + `eurovision`, + `eus`, + `events`, + `everbank`, + `exchange`, + `expert`, + `exposed`, + `express`, + `extraspace`, + `fage`, + `fail`, + `fairwinds`, + `faith`, + `family`, + `fan`, + `fans`, + `farm`, + `farmers`, + `fashion`, + `fast`, + `fedex`, + `feedback`, + `ferrari`, + `ferrero`, + `fi`, + `fiat`, + `fidelity`, + `fido`, + `film`, + `final`, + `finance`, + `financial`, + `fire`, + `firestone`, + `firmdale`, + `fish`, + `fishing`, + `fit`, + `fitness`, + `fj`, + `fk`, + `flickr`, + `flights`, + `flir`, + `florist`, + `flowers`, + `flsmidth`, + `fly`, + `fm`, + `fo`, + `foo`, + `foodnetwork`, + `football`, + `ford`, + `forex`, + `forsale`, + `forum`, + `foundation`, + `fox`, + `fr`, + `free`, + `fresenius`, + `frl`, + `frogans`, + `frontdoor`, + `frontier`, + `ftr`, + `fujitsu`, + `fujixerox`, + `fund`, + `furniture`, + `futbol`, + `fyi`, + `ga`, + `gal`, + `gallery`, + `gallo`, + `gallup`, + `game`, + `games`, + `gap`, + `garden`, + `gb`, + `gbiz`, + `gd`, + `gdn`, + `ge`, + `gea`, + `gent`, + `genting`, + `george`, + `gf`, + `gg`, + `ggee`, + `gh`, + `gi`, + `gift`, + `gifts`, + `gives`, + `giving`, + `gl`, + `glade`, + `glass`, + `gle`, + `global`, + `globo`, + `gm`, + `gmail`, + `gmo`, + `gmx`, + `gn`, + `godaddy`, + `gold`, + `goldpoint`, + `golf`, + `goo`, + `goodhands`, + `goodyear`, + `goog`, + `google`, + `gop`, + `got`, + `gotv`, + `gov`, + `gp`, + `gq`, + `gr`, + `grainger`, + `graphics`, + `gratis`, + `green`, + `gripe`, + `group`, + `gs`, + `gt`, + `gu`, + `guardian`, + `gucci`, + `guge`, + `guide`, + `guitars`, + `guru`, + `gw`, + `gy`, + `hair`, + `hamburg`, + `hangout`, + `haus`, + `hbo`, + `hdfc`, + `hdfcbank`, + `health`, + `healthcare`, + `help`, + `helsinki`, + `here`, + `hermes`, + `hgtv`, + `hiphop`, + `hisamitsu`, + `hitachi`, + `hiv`, + `hk`, + `hkt`, + `hm`, + `hn`, + `hockey`, + `holdings`, + `holiday`, + `homedepot`, + `homegoods`, + `homes`, + `homesense`, + `honda`, + `honeywell`, + `horse`, + `host`, + `hosting`, + `hot`, + `hoteles`, + `hotmail`, + `house`, + `how`, + `hr`, + `hsbc`, + `ht`, + `htc`, + `hu`, + `hughes`, + `hyatt`, + `hyundai`, + `ibm`, + `icbc`, + `ice`, + `icu`, + `id`, + `ie`, + `ieee`, + `ifm`, + `iinet`, + `ikano`, + `il`, + `im`, + `imamat`, + `imdb`, + `immo`, + `immobilien`, + `in`, + `industries`, + `infiniti`, + `info`, + `ing`, + `ink`, + `institute`, + `insurance`, + `insure`, + `int`, + `intel`, + `international`, + `intuit`, + `investments`, + `io`, + `ipiranga`, + `iq`, + `ir`, + `irish`, + `is`, + `iselect`, + `ismaili`, + `ist`, + `istanbul`, + `it`, + `itau`, + `itv`, + `iveco`, + `iwc`, + `jaguar`, + `java`, + `jcb`, + `jcp`, + `je`, + `jeep`, + `jetzt`, + `jewelry`, + `jio`, + `jlc`, + `jll`, + `jm`, + `jmp`, + `jnj`, + `jo`, + `jobs`, + `joburg`, + `jot`, + `joy`, + `jp`, + `jpmorgan`, + `jprs`, + `juegos`, + `juniper`, + `kaufen`, + `kddi`, + `ke`, + `kerryhotels`, + `kerrylogistics`, + `kerryproperties`, + `kfh`, + `kg`, + `kh`, + `ki`, + `kia`, + `kim`, + `kinder`, + `kindle`, + `kitchen`, + `kiwi`, + `km`, + `kn`, + `koeln`, + `komatsu`, + `kosher`, + `kp`, + `kpmg`, + `kpn`, + `kr`, + `krd`, + `kred`, + `kuokgroup`, + `kw`, + `ky`, + `kyknet`, + `kyoto`, + `kz`, + `la`, + `lacaixa`, + `ladbrokes`, + `lamborghini`, + `lamer`, + `lancaster`, + `lancia`, + `lancome`, + `land`, + `landrover`, + `lanxess`, + `lasalle`, + `lat`, + `latino`, + `latrobe`, + `law`, + `lawyer`, + `lb`, + `lc`, + `lds`, + `lease`, + `leclerc`, + `lefrak`, + `legal`, + `lego`, + `lexus`, + `lgbt`, + `li`, + `liaison`, + `lidl`, + `life`, + `lifeinsurance`, + `lifestyle`, + `lighting`, + `like`, + `lilly`, + `limited`, + `limo`, + `lincoln`, + `linde`, + `link`, + `lipsy`, + `live`, + `living`, + `lixil`, + `lk`, + `loan`, + `loans`, + `locker`, + `locus`, + `loft`, + `lol`, + `london`, + `lotte`, + `lotto`, + `love`, + `lpl`, + `lplfinancial`, + `lr`, + `ls`, + `lt`, + `ltd`, + `ltda`, + `lu`, + `lundbeck`, + `lupin`, + `luxe`, + `luxury`, + `lv`, + `ly`, + `ma`, + `macys`, + `madrid`, + `maif`, + `maison`, + `makeup`, + `man`, + `management`, + `mango`, + `market`, + `marketing`, + `markets`, + `marriott`, + `marshalls`, + `maserati`, + `mattel`, + `mba`, + `mc`, + `mcd`, + `mcdonalds`, + `mckinsey`, + `md`, + `me`, + `med`, + `media`, + `meet`, + `melbourne`, + `meme`, + `memorial`, + `men`, + `menu`, + `meo`, + `metlife`, + `mg`, + `mh`, + `miami`, + `microsoft`, + `mil`, + `mini`, + `mint`, + `mit`, + `mitsubishi`, + `mk`, + `ml`, + `mlb`, + `mls`, + `mm`, + `mma`, + `mn`, + `mnet`, + `mo`, + `mobi`, + `mobily`, + `moda`, + `moe`, + `moi`, + `mom`, + `monash`, + `money`, + `monster`, + `montblanc`, + `mopar`, + `mormon`, + `mortgage`, + `moscow`, + `moto`, + `motorcycles`, + `mov`, + `movie`, + `movistar`, + `mp`, + `mq`, + `mr`, + `ms`, + `msd`, + `mt`, + `mtn`, + `mtpc`, + `mtr`, + `mu`, + `multichoice`, + `museum`, + `mutual`, + `mutuelle`, + `mv`, + `mw`, + `mx`, + `my`, + `mz`, + `mzansimagic`, + `na`, + `nab`, + `nadex`, + `nagoya`, + `name`, + `naspers`, + `nationwide`, + `natura`, + `navy`, + `nba`, + `nc`, + `ne`, + `nec`, + `net`, + `netbank`, + `netflix`, + `network`, + `neustar`, + `new`, + `newholland`, + `news`, + `next`, + `nextdirect`, + `nexus`, + `nf`, + `nfl`, + `ng`, + `ngo`, + `nhk`, + `ni`, + `nico`, + `nike`, + `nikon`, + `ninja`, + `nissan`, + `nissay`, + `nl`, + `no`, + `nokia`, + `northwesternmutual`, + `norton`, + `now`, + `nowruz`, + `nowtv`, + `np`, + `nr`, + `nra`, + `nrw`, + `ntt`, + `nu`, + `nyc`, + `nz`, + `obi`, + `observer`, + `off`, + `office`, + `okinawa`, + `olayan`, + `olayangroup`, + `oldnavy`, + `ollo`, + `om`, + `omega`, + `one`, + `ong`, + `onl`, + `online`, + `onyourside`, + `ooo`, + `open`, + `oracle`, + `orange`, + `org`, + `organic`, + `orientexpress`, + `origins`, + `osaka`, + `otsuka`, + `ott`, + `ovh`, + `pa`, + `page`, + `pamperedchef`, + `panasonic`, + `panerai`, + `paris`, + `pars`, + `partners`, + `parts`, + `party`, + `passagens`, + `pay`, + `payu`, + `pccw`, + `pe`, + `pet`, + `pf`, + `pfizer`, + `pg`, + `ph`, + `pharmacy`, + `philips`, + `photo`, + `photography`, + `photos`, + `physio`, + `piaget`, + `pics`, + `pictet`, + `pictures`, + `pid`, + `pin`, + `ping`, + `pink`, + `pioneer`, + `pizza`, + `pk`, + `pl`, + `place`, + `play`, + `playstation`, + `plumbing`, + `plus`, + `pm`, + `pn`, + `pnc`, + `pohl`, + `poker`, + `politie`, + `porn`, + `post`, + `pr`, + `pramerica`, + `praxi`, + `press`, + `prime`, + `pro`, + `prod`, + `productions`, + `prof`, + `progressive`, + `promo`, + `properties`, + `property`, + `protection`, + `pru`, + `prudential`, + `ps`, + `pt`, + `pub`, + `pw`, + `pwc`, + `py`, + `qa`, + `qpon`, + `quebec`, + `quest`, + `qvc`, + `racing`, + `raid`, + `re`, + `read`, + `realestate`, + `realtor`, + `realty`, + `recipes`, + `red`, + `redstone`, + `redumbrella`, + `rehab`, + `reise`, + `reisen`, + `reit`, + `reliance`, + `ren`, + `rent`, + `rentals`, + `repair`, + `report`, + `republican`, + `rest`, + `restaurant`, + `review`, + `reviews`, + `rexroth`, + `rich`, + `richardli`, + `ricoh`, + `rightathome`, + `ril`, + `rio`, + `rip`, + `rmit`, + `ro`, + `rocher`, + `rocks`, + `rodeo`, + `rogers`, + `room`, + `rs`, + `rsvp`, + `ru`, + `ruhr`, + `run`, + `rw`, + `rwe`, + `ryukyu`, + `sa`, + `saarland`, + `safe`, + `safety`, + `sakura`, + `sale`, + `salon`, + `samsclub`, + `samsung`, + `sandvik`, + `sandvikcoromant`, + `sanofi`, + `sap`, + `sapo`, + `sarl`, + `sas`, + `save`, + `saxo`, + `sb`, + `sbi`, + `sbs`, + `sc`, + `sca`, + `scb`, + `schaeffler`, + `schmidt`, + `scholarships`, + `school`, + `schule`, + `schwarz`, + `science`, + `scjohnson`, + `scor`, + `scot`, + `sd`, + `se`, + `seat`, + `secure`, + `security`, + `seek`, + `select`, + `sener`, + `services`, + `ses`, + `seven`, + `sew`, + `sex`, + `sexy`, + `sfr`, + `sg`, + `sh`, + `shangrila`, + `sharp`, + `shaw`, + `shell`, + `shia`, + `shiksha`, + `shoes`, + `shouji`, + `show`, + `showtime`, + `shriram`, + `si`, + `silk`, + `sina`, + `singles`, + `site`, + `sj`, + `sk`, + `ski`, + `skin`, + `sky`, + `skype`, + `sl`, + `sling`, + `sm`, + `smart`, + `smile`, + `sn`, + `sncf`, + `so`, + `soccer`, + `social`, + `softbank`, + `software`, + `sohu`, + `solar`, + `solutions`, + `song`, + `sony`, + `soy`, + `space`, + `spiegel`, + `spot`, + `spreadbetting`, + `sr`, + `srl`, + `srt`, + `st`, + `stada`, + `staples`, + `star`, + `starhub`, + `statebank`, + `statefarm`, + `statoil`, + `stc`, + `stcgroup`, + `stockholm`, + `storage`, + `store`, + `studio`, + `study`, + `style`, + `su`, + `sucks`, + `supersport`, + `supplies`, + `supply`, + `support`, + `surf`, + `surgery`, + `suzuki`, + `sv`, + `swatch`, + `swiftcover`, + `swiss`, + `sx`, + `sy`, + `sydney`, + `symantec`, + `systems`, + `sz`, + `tab`, + `taipei`, + `talk`, + `taobao`, + `target`, + `tatamotors`, + `tatar`, + `tattoo`, + `tax`, + `taxi`, + `tc`, + `tci`, + `td`, + `tdk`, + `team`, + `tech`, + `technology`, + `tel`, + `telecity`, + `telefonica`, + `temasek`, + `tennis`, + `teva`, + `tf`, + `tg`, + `th`, + `thd`, + `theater`, + `theatre`, + `theguardian`, + `tiaa`, + `tickets`, + `tienda`, + `tiffany`, + `tips`, + `tires`, + `tirol`, + `tj`, + `tjmaxx`, + `tjx`, + `tk`, + `tkmaxx`, + `tl`, + `tm`, + `tmall`, + `tn`, + `to`, + `today`, + `tokyo`, + `tools`, + `top`, + `toray`, + `toshiba`, + `total`, + `tours`, + `town`, + `toyota`, + `toys`, + `tp`, + `tr`, + `trade`, + `trading`, + `training`, + `travel`, + `travelchannel`, + `travelers`, + `travelersinsurance`, + `trust`, + `trv`, + `tt`, + `tube`, + `tui`, + `tunes`, + `tushu`, + `tv`, + `tvs`, + `tw`, + `tz`, + `ua`, + `ubank`, + `ubs`, + `uconnect`, + `ug`, + `uk`, + `unicom`, + `university`, + `uno`, + `uol`, + `ups`, + `us`, + `uy`, + `uz`, + `va`, + `vacations`, + `vana`, + `vanguard`, + `vc`, + `ve`, + `vegas`, + `ventures`, + `verisign`, + `vermögensberater`, + `vermögensberatung`, + `versicherung`, + `vet`, + `vg`, + `vi`, + `viajes`, + `video`, + `vig`, + `viking`, + `villas`, + `vin`, + `vip`, + `virgin`, + `visa`, + `vision`, + `vista`, + `vistaprint`, + `viva`, + `vivo`, + `vlaanderen`, + `vn`, + `vodka`, + `volkswagen`, + `volvo`, + `vote`, + `voting`, + `voto`, + `voyage`, + `vu`, + `vuelos`, + `wales`, + `walmart`, + `walter`, + `wang`, + `wanggou`, + `warman`, + `watch`, + `watches`, + `weather`, + `weatherchannel`, + `webcam`, + `weber`, + `website`, + `wed`, + `wedding`, + `weibo`, + `weir`, + `wf`, + `whoswho`, + `wien`, + `wiki`, + `williamhill`, + `win`, + `windows`, + `wine`, + `winners`, + `wme`, + `wolterskluwer`, + `woodside`, + `work`, + `works`, + `world`, + `wow`, + `ws`, + `wtc`, + `wtf`, + `xbox`, + `xerox`, + `xfinity`, + `xihuan`, + `xin`, + `xperia`, + `xxx`, + `xyz`, + `yachts`, + `yahoo`, + `yamaxun`, + `yandex`, + `ye`, + `yodobashi`, + `yoga`, + `yokohama`, + `you`, + `youtube`, + `yt`, + `yun`, + `za`, + `zappos`, + `zara`, + `zero`, + `zip`, + `zippo`, + `zm`, + `zone`, + `zuerich`, + `zw`, + `ελ`, + `бел`, + `дети`, + `католик`, + `ком`, + `мкд`, + `мон`, + `москва`, + `онлайн`, + `орг`, + `рус`, + `рф`, + `сайт`, + `срб`, + `укр`, + `қаз`, + `հայ`, + `קום`, + `ابوظبي`, + `اتصالات`, + `ارامكو`, + `الاردن`, + `الجزائر`, + `السعودية`, + `السعوديه`, + `السعودیة`, + `السعودیۃ`, + `العليان`, + `المغرب`, + `اليمن`, + `امارات`, + `ايران`, + `ایران`, + `بازار`, + `بيتك`, + `بھارت`, + `تونس`, + `سودان`, + `سوريا`, + `سورية`, + `شبكة`, + `عراق`, + `عرب`, + `عمان`, + `فلسطين`, + `قطر`, + `كاثوليك`, + `كوم`, + `مصر`, + `مليسيا`, + `موبايلي`, + `موقع`, + `همراه`, + `پاكستان`, + `پاکستان`, + `कॉम`, + `नेट`, + `भारत`, + `संगठन`, + `বাংলা`, + `ভারত`, + `ਭਾਰਤ`, + `ભારત`, + `இந்தியா`, + `இலங்கை`, + `சிங்கப்பூர்`, + `భారత్`, + `ලංකා`, + `คอม`, + `ไทย`, + `გე`, + `みんな`, + `クラウド`, + `グーグル`, + `コム`, + `ストア`, + `セール`, + `ファッション`, + `ポイント`, + `一号店`, + `世界`, + `中信`, + `中国`, + `中國`, + `中文网`, + `企业`, + `佛山`, + `信息`, + `健康`, + `八卦`, + `公司`, + `公益`, + `台湾`, + `台灣`, + `商城`, + `商店`, + `商标`, + `嘉里`, + `嘉里大酒店`, + `在线`, + `大众汽车`, + `大拿`, + `天主教`, + `娱乐`, + `家電`, + `工行`, + `广东`, + `微博`, + `慈善`, + `我爱你`, + `手机`, + `手表`, + `政务`, + `政府`, + `新加坡`, + `新闻`, + `时尚`, + `書籍`, + `机构`, + `淡马锡`, + `游戏`, + `澳門`, + `澳门`, + `点看`, + `珠宝`, + `移动`, + `组织机构`, + `网址`, + `网店`, + `网站`, + `网络`, + `联通`, + `臺灣`, + `诺基亚`, + `谷歌`, + `购物`, + `通販`, + `集团`, + `電訊盈科`, + `飞利浦`, + `食品`, + `餐厅`, + `香格里拉`, + `香港`, + `닷넷`, + `닷컴`, + `삼성`, + `한국`, +} diff --git a/Godeps/_workspace/src/github.com/mvdan/xurls/tlds_pseudo.go b/Godeps/_workspace/src/github.com/mvdan/xurls/tlds_pseudo.go new file mode 100644 index 00000000000..675c1872ad5 --- /dev/null +++ b/Godeps/_workspace/src/github.com/mvdan/xurls/tlds_pseudo.go @@ -0,0 +1,22 @@ +package xurls + +// PseudoTLDs is a sorted list of some widely used unofficial TLDs. +// +// Sources: +// * https://en.wikipedia.org/wiki/Pseudo-top-level_domain +// * https://en.wikipedia.org/wiki/Category:Pseudo-top-level_domains +// * https://tools.ietf.org/html/draft-grothoff-iesg-special-use-p2p-names-00 +// * https://www.iana.org/assignments/special-use-domain-names/special-use-domain-names.xhtml +var PseudoTLDs = []string{ + `bit`, // Namecoin + `example`, // Example domain + `exit`, // Tor exit node + `gnu`, // GNS by public key + `i2p`, // I2P network + `invalid`, // Invalid domain + `local`, // Local network + `localhost`, // Local network + `onion`, // Tor hidden services + `test`, // Test domain + `zkey`, // GNS domain name +} diff --git a/Godeps/_workspace/src/github.com/mvdan/xurls/xurls.go b/Godeps/_workspace/src/github.com/mvdan/xurls/xurls.go new file mode 100644 index 00000000000..ba1bf49ad1a --- /dev/null +++ b/Godeps/_workspace/src/github.com/mvdan/xurls/xurls.go @@ -0,0 +1,66 @@ +// Copyright (c) 2015, Daniel Martí +// See LICENSE for licensing information + +// Package xurls extracts urls from plain text using regular expressions. +package xurls + +import "regexp" + +//go:generate go run generate/tldsgen/main.go +//go:generate go run generate/regexgen/main.go + +const ( + letter = `\p{L}` + number = `\p{N}` + iriChar = letter + number + currency = `\p{Sc}` + otherSymb = `\p{So}` + endChar = iriChar + `/\-+_&~*%=#` + currency + midChar = endChar + `@.,:;'?!|` + otherSymb + wellParen = `\([` + midChar + `]*(\([` + midChar + `]*\)[` + midChar + `]*)*\)` + wellBrack = `\[[` + midChar + `]*(\[[` + midChar + `]*\][` + midChar + `]*)*\]` + wellBrace = `\{[` + midChar + `]*(\{[` + midChar + `]*\}[` + midChar + `]*)*\}` + wellAll = wellParen + `|` + wellBrack + `|` + wellBrace + pathCont = `([` + midChar + `]*(` + wellAll + `|[` + endChar + `])+)+` + comScheme = `[a-zA-Z][a-zA-Z.\-+]*://` + scheme = `(` + comScheme + `|` + otherScheme + `)` + + iri = `[` + iriChar + `]([` + iriChar + `\-]*[` + iriChar + `])?` + domain = `(` + iri + `\.)+` + octet = `(25[0-5]|2[0-4][0-9]|1[0-9]{2}|[1-9][0-9]|[0-9])` + ipv4Addr = `\b` + octet + `\.` + octet + `\.` + octet + `\.` + octet + `\b` + ipv6Addr = `([0-9a-fA-F]{1,4}:([0-9a-fA-F]{1,4}:([0-9a-fA-F]{1,4}:([0-9a-fA-F]{1,4}:([0-9a-fA-F]{1,4}:[0-9a-fA-F]{0,4}|:[0-9a-fA-F]{1,4})?|(:[0-9a-fA-F]{1,4}){0,2})|(:[0-9a-fA-F]{1,4}){0,3})|(:[0-9a-fA-F]{1,4}){0,4})|:(:[0-9a-fA-F]{1,4}){0,5})((:[0-9a-fA-F]{1,4}){2}|:(25[0-5]|(2[0-4]|1[0-9]|[1-9])?[0-9])(\.(25[0-5]|(2[0-4]|1[0-9]|[1-9])?[0-9])){3})|(([0-9a-fA-F]{1,4}:){1,6}|:):[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){7}:` + ipAddr = `(` + ipv4Addr + `|` + ipv6Addr + `)` + site = domain + gtld + hostName = `(` + site + `|` + ipAddr + `)` + port = `(:[0-9]*)?` + path = `(/|/` + pathCont + `?|\b|$)` + webURL = hostName + port + path + + strict = `(\b` + scheme + pathCont + `)` + relaxed = `(` + strict + `|` + webURL + `)` +) + +var ( + // Relaxed matches all the urls it can find. + Relaxed = regexp.MustCompile(relaxed) + // Strict only matches urls with a scheme to avoid false positives. + Strict = regexp.MustCompile(strict) +) + +func init() { + Relaxed.Longest() + Strict.Longest() +} + +// StrictMatchingScheme produces a regexp that matches urls like Strict but +// whose scheme matches the given regular expression. +func StrictMatchingScheme(exp string) (*regexp.Regexp, error) { + strictMatching := `(\b(?i)(` + exp + `)(?-i)` + pathCont + `)` + re, err := regexp.Compile(strictMatching) + if err != nil { + return nil, err + } + re.Longest() + return re, nil +} diff --git a/Makefile b/Makefile index 61e5039350a..361dc157155 100644 --- a/Makefile +++ b/Makefile @@ -49,7 +49,6 @@ verify: hack/verify-generated-deep-copies.sh hack/verify-generated-docs.sh hack/verify-swagger-spec.sh - hack/verify-linkcheck.sh hack/verify-flags-underscore.py hack/verify-godeps.sh $(BRANCH) hack/verify-godep-licenses.sh $(BRANCH) diff --git a/cmd/linkcheck/links.go b/cmd/linkcheck/links.go index 638a5938842..099277ca6d5 100644 --- a/cmd/linkcheck/links.go +++ b/cmd/linkcheck/links.go @@ -14,56 +14,155 @@ See the License for the specific language governing permissions and limitations under the License. */ -// This command checks if the hyperlinks in files are valid. It checks the files -// with 'fileSuffix' in 'rootDir' for URLs that match 'prefix'. It trims the -// 'prefix' from the URL, uses what's left as the relative path to repoRoot to -// verify if the link is valid. For example: -// $ linkcheck --root-dir=${TYPEROOT} --repo-root=${KUBE_ROOT} \ -// --file-suffix=types.go --prefix=http://releases.k8s.io/HEAD +// This tool extracts the links from types.go and .md files, visits the link and +// checks the status code of the response. +// Usage: +// $ linkcheck --root-dir=${ROOT} package main import ( "fmt" "io/ioutil" + "net/http" "os" - "path" "path/filepath" "regexp" + "strconv" "strings" + "time" + "github.com/mvdan/xurls" flag "github.com/spf13/pflag" ) var ( - httpRE *regexp.Regexp - rootDir = flag.String("root-dir", "", "Root directory containing documents to be processed.") - repoRoot = flag.String("repo-root", "", `Root directory of k8s repository.`) - fileSuffix = flag.String("file-suffix", "", "suffix of files to be checked") - prefix = flag.String("prefix", "", "Longest common prefix of the link URL, e.g., http://release.k8s.io/HEAD/ for links in pkg/api/types.go") + fileSuffix = flag.StringSlice("file-suffix", []string{"types.go", ".md"}, "suffix of files to be checked") + // URLs matching the patterns in the regWhiteList won't be checked. Patterns + // of dummy URLs should be added to the list to avoid false alerts. Also, + // patterns of URLs that we don't care about can be added here to improve + // efficiency. + regWhiteList = []*regexp.Regexp{ + regexp.MustCompile(`https://kubernetes-site\.appspot\.com`), + // skip url that doesn't start with an English alphabet, e.g., URLs with IP addresses. + regexp.MustCompile(`https?://[^A-Za-z].*`), + regexp.MustCompile(`https?://localhost.*`), + } + // URLs listed in the fullURLWhiteList won't be checked. This separated from + // the RegWhiteList to improve efficiency. This list includes dummy URLs that + // are hard to be generalized by a regex, and URLs that will cause false alerts. + fullURLWhiteList = map[string]struct{}{ + "http://github.com/some/repo.git": {}, + // This URL returns 404 when visited by this tool, but it works fine if visited by a browser. + "http://stackoverflow.com/questions/ask?tags=kubernetes": {}, + "https://github.com/$YOUR_GITHUB_USERNAME/kubernetes.git": {}, + "https://github.com/$YOUR_GITHUB_USERNAME/kubernetes": {}, + "http://storage.googleapis.com/kubernetes-release/release/v${K8S_VERSION}/bin/darwin/amd64/kubectl": {}, + // It seems this server expects certain User-Agent value, it works fine with Chrome, but returns 404 if we issue a plain cURL to it. + "http://supervisord.org/": {}, + "http://kubernetes.io/vX.Y/docs": {}, + "http://kubernetes.io/vX.Y/docs/": {}, + "http://kubernetes.io/vX.Y/": {}, + } + + visitedURLs = map[string]struct{}{} + htmlpreviewReg = regexp.MustCompile(`https://htmlpreview\.github\.io/\?`) + httpOrhttpsReg = regexp.MustCompile(`https?.*`) ) -func newWalkFunc(invalidLink *bool) filepath.WalkFunc { +func newWalkFunc(invalidLink *bool, client *http.Client) filepath.WalkFunc { return func(filePath string, info os.FileInfo, err error) error { - if !strings.HasSuffix(info.Name(), *fileSuffix) { + hasSuffix := false + for _, suffix := range *fileSuffix { + hasSuffix = hasSuffix || strings.HasSuffix(info.Name(), suffix) + } + if !hasSuffix { return nil } + fileBytes, err := ioutil.ReadFile(filePath) if err != nil { return err } foundInvalid := false - matches := httpRE.FindAllSubmatch(fileBytes, -1) - for _, match := range matches { - // match[1] should look like docs/devel/api-conventions.md - if _, err := os.Stat(path.Join(*repoRoot, string(match[1]))); err != nil { - fmt.Fprintf(os.Stderr, "Link is not valid: %s\n", string(match[0])) + allURLs := xurls.Strict.FindAll(fileBytes, -1) + fmt.Fprintf(os.Stdout, "\nChecking file %s\n", filePath) + URL: + for _, URL := range allURLs { + // Don't check non http/https URL + if !httpOrhttpsReg.Match(URL) { + continue + } + for _, whiteURL := range regWhiteList { + if whiteURL.Match(URL) { + continue URL + } + } + if _, found := fullURLWhiteList[string(URL)]; found { + continue + } + // remove the htmlpreview Prefix + processedURL := htmlpreviewReg.ReplaceAll(URL, []byte{}) + + // check if we have visited the URL. + if _, found := visitedURLs[string(processedURL)]; found { + continue + } + visitedURLs[string(processedURL)] = struct{}{} + + retry := 0 + const maxRetry int = 3 + backoff := 100 + for retry < maxRetry { + fmt.Fprintf(os.Stdout, "Visiting %s\n", string(processedURL)) + // Use verb HEAD to increase efficiency. However, some servers + // do not handle HEAD well, so we need to try a GET to avoid + // false alert. + resp, err := client.Head(string(processedURL)) + // URLs with mock host or mock port will cause error. If we report + // the error here, people need to add the mock URL to the white + // list every time they add a mock URL, which will be a maintenance + // nightmare. Hence, we decide to only report 404 to catch the + // cases where host and port are legit, but path is not, which + // is the most common mistake in our docs. + if err != nil { + break + } + if resp.StatusCode == 429 { + retryAfter := resp.Header.Get("Retry-After") + if seconds, err := strconv.Atoi(retryAfter); err != nil { + backoff = seconds + 10 + } + fmt.Fprintf(os.Stderr, "Got %d visiting %s, retry after %d seconds.\n", resp.StatusCode, string(URL), backoff) + time.Sleep(time.Duration(backoff) * time.Second) + backoff *= 2 + retry++ + } else if resp.StatusCode == 404 { + // We only check for 404 error for now. 401, 403 errors are hard to handle. + + // We need to try a GET to avoid false alert. + resp, err = client.Get(string(processedURL)) + if err != nil { + break + } + if resp.StatusCode != 404 { + continue URL + } + + foundInvalid = true + fmt.Fprintf(os.Stderr, "Failed: in file %s, Got %d visiting %s\n", filePath, resp.StatusCode, string(URL)) + break + } else { + break + } + } + if retry == maxRetry { foundInvalid = true + fmt.Fprintf(os.Stderr, "Failed: in file %s, still got 429 visiting %s after %d retries\n", filePath, string(URL), maxRetry) } } if foundInvalid { - fmt.Fprintf(os.Stderr, "Found invalid links in %s\n", filePath) *invalidLink = true } return nil @@ -72,14 +171,16 @@ func newWalkFunc(invalidLink *bool) filepath.WalkFunc { func main() { flag.Parse() - httpRE = regexp.MustCompile(*prefix + `(.*\.md)`) - if *rootDir == "" || *repoRoot == "" || *prefix == "" { + if *rootDir == "" { flag.Usage() os.Exit(2) } + client := http.Client{ + Timeout: time.Duration(5 * time.Second), + } invalidLink := false - if err := filepath.Walk(*rootDir, newWalkFunc(&invalidLink)); err != nil { + if err := filepath.Walk(*rootDir, newWalkFunc(&invalidLink, &client)); err != nil { fmt.Fprintf(os.Stderr, "Fail: %v.\n", err) os.Exit(2) } diff --git a/docs/devel/how-to-doc.md b/docs/devel/how-to-doc.md index 7f1d30bab75..2c50861159d 100644 --- a/docs/devel/how-to-doc.md +++ b/docs/devel/how-to-doc.md @@ -18,10 +18,6 @@ If you are using a released version of Kubernetes, you should refer to the docs that go with that version. - -The latest 1.1.x release of this document can be found -[here](http://releases.k8s.io/release-1.1/docs/devel/how-to-doc.md). - Documentation for other releases can be found at [releases.k8s.io](http://releases.k8s.io). diff --git a/docs/proposals/node-allocatable.md b/docs/proposals/node-allocatable.md index c915bb6a5be..a3520a41ad0 100644 --- a/docs/proposals/node-allocatable.md +++ b/docs/proposals/node-allocatable.md @@ -65,7 +65,7 @@ reservation grows), or running multiple Kubelets on a single node. ![image](node-allocatable.png) 1. **Node Capacity** - Already provided as - [`NodeStatus.Capacity`](https://htmlpreview.github.io/?https://github.com/kubernetes/kubernetes/HEAD/docs/api-reference/v1/definitions.html#_v1_nodestatus), + [`NodeStatus.Capacity`](https://htmlpreview.github.io/?https://github.com/kubernetes/kubernetes/blob/HEAD/docs/api-reference/v1/definitions.html#_v1_nodestatus), this is total capacity read from the node instance, and assumed to be constant. 2. **System-Reserved** (proposed) - Compute resources reserved for processes which are not managed by Kubernetes. Currently this covers all the processes lumped together in the `/system` raw @@ -81,7 +81,7 @@ reservation grows), or running multiple Kubelets on a single node. #### Allocatable Add `Allocatable` (4) to -[`NodeStatus`](https://htmlpreview.github.io/?https://github.com/kubernetes/kubernetes/HEAD/docs/api-reference/v1/definitions.html#_v1_nodestatus): +[`NodeStatus`](https://htmlpreview.github.io/?https://github.com/kubernetes/kubernetes/blob/HEAD/docs/api-reference/v1/definitions.html#_v1_nodestatus): ``` type NodeStatus struct { diff --git a/hack/after-build/verify-linkcheck.sh b/hack/after-build/verify-linkcheck.sh index c3865bcd740..64fc5de09a4 100755 --- a/hack/after-build/verify-linkcheck.sh +++ b/hack/after-build/verify-linkcheck.sh @@ -24,15 +24,36 @@ source "${KUBE_ROOT}/hack/lib/init.sh" kube::golang::setup_env linkcheck=$(kube::util::find-binary "linkcheck") -TYPEROOT="${KUBE_ROOT}/pkg/api/" -"${linkcheck}" "--root-dir=${TYPEROOT}" "--repo-root=${KUBE_ROOT}" "--file-suffix=types.go" "--prefix=http://releases.k8s.io/HEAD" && ret=0 || ret=$? -if [[ $ret -eq 1 ]]; then - echo "links in ${TYPEROOT} is out of date." - exit 1 -fi -if [[ $ret -gt 1 ]]; then - echo "Error running linkcheck" - exit 1 +kube::util::ensure-temp-dir +OUTPUT="${KUBE_TEMP}"/linkcheck-output +cleanup() { + rm -rf "${OUTPUT}" +} +trap "cleanup" EXIT SIGINT +mkdir -p "$OUTPUT" + +APIROOT="${KUBE_ROOT}/pkg/api/" +APISROOT="${KUBE_ROOT}/pkg/apis/" +DOCROOT="${KUBE_ROOT}/docs/" +ROOTS=($APIROOT $APISROOT $DOCROOT) +found_invalid=false +for root in "${ROOTS[@]}"; do + "${linkcheck}" "--root-dir=${root}" 2> >(tee -a "${OUTPUT}/error" >&2) && ret=0 || ret=$? + if [[ $ret -eq 1 ]]; then + echo "Failed: found invalid links in ${root}." + found_invalid=true + fi + if [[ $ret -gt 1 ]]; then + echo "Error running linkcheck" + exit 1 + fi +done + +if [ ${found_invalid} = true ]; then + echo "Summary of invalid links:" + cat ${OUTPUT}/error fi +trap "cleanup" EXIT SIGINT + # ex: ts=2 sw=2 et filetype=sh diff --git a/hack/verify-all.sh b/hack/verify-all.sh index 99f151311f7..6017f655020 100755 --- a/hack/verify-all.sh +++ b/hack/verify-all.sh @@ -60,7 +60,7 @@ if $SILENT ; then fi # remove protobuf until it is part of direct generation -EXCLUDE="verify-godeps.sh verify-godep-licenses.sh verify-generated-protobuf.sh" +EXCLUDE="verify-godeps.sh verify-godep-licenses.sh verify-generated-protobuf.sh verify-linkcheck.sh" ret=0 for t in `ls $KUBE_ROOT/hack/verify-*.sh` diff --git a/hooks/pre-commit b/hooks/pre-commit index 327c8954177..d4234f78f75 100755 --- a/hooks/pre-commit +++ b/hooks/pre-commit @@ -122,16 +122,6 @@ else fi echo "${reset}" -echo -ne "Checking for links in API descriptions... " -if ! hack/after-build/verify-linkcheck.sh > /dev/null; then - echo "${red}ERROR!" - echo "Some links in pkg/api/.*types.go are outdated. They require a manual fix." - exit_code=1 -else - echo "${green}OK" -fi -echo "${reset}" - echo -ne "Checking for docs that need updating... " if ! hack/after-build/verify-generated-docs.sh > /dev/null; then echo "${red}ERROR!" diff --git a/shippable.yml b/shippable.yml index 7f6d23cb5a8..0c43b9810c7 100644 --- a/shippable.yml +++ b/shippable.yml @@ -56,7 +56,6 @@ install: - ./hack/verify-generated-docs.sh - ./hack/verify-generated-swagger-docs.sh - ./hack/verify-swagger-spec.sh - - ./hack/verify-linkcheck.sh script: # Disable coverage collection on pull requests