updating github.com/mvdan/xurls to v1.1.0

This commit is contained in:
Davanum Srinivas
2019-06-14 11:17:34 -04:00
parent 746da40374
commit 49cae73b94
12 changed files with 60 additions and 36 deletions

View File

@@ -1,5 +1,4 @@
language: go
go:
- 1.4.3
- 1.5.1
- 1.7.x

View File

@@ -10,7 +10,7 @@ notice, this list of conditions and the following disclaimer.
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
* Neither the name of Google Inc. nor the names of its
* Neither the name of the copyright holder nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.

View File

@@ -1,10 +1,11 @@
# xurls
[![GoDoc](https://godoc.org/github.com/mvdan/xurls?status.svg)](https://godoc.org/github.com/mvdan/xurls) [![Travis](https://travis-ci.org/mvdan/xurls.svg?branch=master)](https://travis-ci.org/mvdan/xurls)
[![GoDoc](https://godoc.org/github.com/mvdan/xurls?status.svg)](https://godoc.org/github.com/mvdan/xurls)
[![Travis](https://travis-ci.org/mvdan/xurls.svg?branch=master)](https://travis-ci.org/mvdan/xurls)
Extract urls from text using regular expressions.
go get github.com/mvdan/xurls
go get -u github.com/mvdan/xurls
```go
import "github.com/mvdan/xurls"
@@ -12,18 +13,26 @@ import "github.com/mvdan/xurls"
func main() {
xurls.Relaxed.FindString("Do gophers live in golang.org?")
// "golang.org"
xurls.Relaxed.FindAllString("foo.com is http://foo.com/.", -1)
// []string{"foo.com", "http://foo.com/"}
xurls.Strict.FindAllString("foo.com is http://foo.com/.", -1)
// []string{"http://foo.com/"}
}
```
`Relaxed` is around five times slower than `Strict` since it does more
work to find the URLs without relying on the scheme:
```
BenchmarkStrictEmpty-4 1000000 1885 ns/op
BenchmarkStrictSingle-4 200000 8356 ns/op
BenchmarkStrictMany-4 100000 22547 ns/op
BenchmarkRelaxedEmpty-4 200000 7284 ns/op
BenchmarkRelaxedSingle-4 30000 58557 ns/op
BenchmarkRelaxedMany-4 10000 130251 ns/op
```
#### cmd/xurls
Reads text and prints one url per line.
go get github.com/mvdan/xurls/cmd/xurls
go get -u github.com/mvdan/xurls/cmd/xurls
```shell
$ echo "Do gophers live in http://golang.org?" | xurls

File diff suppressed because one or more lines are too long

View File

@@ -1,3 +1,6 @@
// Copyright (c) 2015, Daniel Martí <mvdan@mvdan.cc>
// See LICENSE for licensing information
package xurls
// SchemesNoAuthority is a sorted list of some well-known url schemes that are

View File

@@ -38,7 +38,6 @@ var TLDs = []string{
`afamilycompany`,
`afl`,
`africa`,
`africamagic`,
`ag`,
`agakhan`,
`agency`,
@@ -82,6 +81,7 @@ var TLDs = []string{
`archi`,
`army`,
`arpa`,
`art`,
`arte`,
`as`,
`asda`,
@@ -208,6 +208,7 @@ var TLDs = []string{
`cal`,
`call`,
`calvinklein`,
`cam`,
`camera`,
`camp`,
`cancerresearch`,
@@ -329,6 +330,7 @@ var TLDs = []string{
`dabur`,
`dad`,
`dance`,
`data`,
`date`,
`dating`,
`datsun`,
@@ -366,15 +368,14 @@ var TLDs = []string{
`dnp`,
`do`,
`docs`,
`doctor`,
`dodge`,
`dog`,
`doha`,
`domains`,
`doosan`,
`dot`,
`download`,
`drive`,
`dstv`,
`dtv`,
`dubai`,
`duck`,
@@ -383,11 +384,13 @@ var TLDs = []string{
`dupont`,
`durban`,
`dvag`,
`dvr`,
`dwg`,
`dz`,
`earth`,
`eat`,
`ec`,
`eco`,
`edeka`,
`edu`,
`education`,
@@ -395,7 +398,6 @@ var TLDs = []string{
`eg`,
`email`,
`emerck`,
`emerson`,
`energy`,
`engineer`,
`engineering`,
@@ -459,11 +461,11 @@ var TLDs = []string{
`flir`,
`florist`,
`flowers`,
`flsmidth`,
`fly`,
`fm`,
`fo`,
`foo`,
`food`,
`foodnetwork`,
`football`,
`ford`,
@@ -482,6 +484,7 @@ var TLDs = []string{
`ftr`,
`fujitsu`,
`fujixerox`,
`fun`,
`fund`,
`furniture`,
`futbol`,
@@ -521,6 +524,7 @@ var TLDs = []string{
`globo`,
`gm`,
`gmail`,
`gmbh`,
`gmo`,
`gmx`,
`gn`,
@@ -535,7 +539,6 @@ var TLDs = []string{
`google`,
`gop`,
`got`,
`gotv`,
`gov`,
`gp`,
`gq`,
@@ -545,6 +548,7 @@ var TLDs = []string{
`gratis`,
`green`,
`gripe`,
`grocery`,
`group`,
`gs`,
`gt`,
@@ -589,10 +593,12 @@ var TLDs = []string{
`honda`,
`honeywell`,
`horse`,
`hospital`,
`host`,
`hosting`,
`hot`,
`hoteles`,
`hotels`,
`hotmail`,
`house`,
`how`,
@@ -703,7 +709,6 @@ var TLDs = []string{
`kuokgroup`,
`kw`,
`ky`,
`kyknet`,
`kyoto`,
`kz`,
`la`,
@@ -785,6 +790,7 @@ var TLDs = []string{
`man`,
`management`,
`mango`,
`map`,
`market`,
`marketing`,
`markets`,
@@ -808,6 +814,7 @@ var TLDs = []string{
`men`,
`menu`,
`meo`,
`merckmsd`,
`metlife`,
`mg`,
`mh`,
@@ -825,9 +832,9 @@ var TLDs = []string{
`mm`,
`mma`,
`mn`,
`mnet`,
`mo`,
`mobi`,
`mobile`,
`mobily`,
`moda`,
`moe`,
@@ -856,7 +863,6 @@ var TLDs = []string{
`mtpc`,
`mtr`,
`mu`,
`multichoice`,
`museum`,
`mutual`,
`mutuelle`,
@@ -865,13 +871,11 @@ var TLDs = []string{
`mx`,
`my`,
`mz`,
`mzansimagic`,
`na`,
`nab`,
`nadex`,
`nagoya`,
`name`,
`naspers`,
`nationwide`,
`natura`,
`navy`,
@@ -931,6 +935,7 @@ var TLDs = []string{
`omega`,
`one`,
`ong`,
`onion`,
`onl`,
`online`,
`onyourside`,
@@ -958,7 +963,6 @@ var TLDs = []string{
`party`,
`passagens`,
`pay`,
`payu`,
`pccw`,
`pe`,
`pet`,
@@ -967,7 +971,9 @@ var TLDs = []string{
`pg`,
`ph`,
`pharmacy`,
`phd`,
`philips`,
`phone`,
`photo`,
`photography`,
`photos`,
@@ -1025,6 +1031,7 @@ var TLDs = []string{
`quest`,
`qvc`,
`racing`,
`radio`,
`raid`,
`re`,
`read`,
@@ -1109,6 +1116,7 @@ var TLDs = []string{
`scot`,
`sd`,
`se`,
`search`,
`seat`,
`secure`,
`security`,
@@ -1131,6 +1139,8 @@ var TLDs = []string{
`shia`,
`shiksha`,
`shoes`,
`shop`,
`shopping`,
`shouji`,
`show`,
`showtime`,
@@ -1184,12 +1194,12 @@ var TLDs = []string{
`stockholm`,
`storage`,
`store`,
`stream`,
`studio`,
`study`,
`style`,
`su`,
`sucks`,
`supersport`,
`supplies`,
`supply`,
`support`,
@@ -1264,7 +1274,6 @@ var TLDs = []string{
`town`,
`toyota`,
`toys`,
`tp`,
`tr`,
`trade`,
`trading`,
@@ -1407,6 +1416,7 @@ var TLDs = []string{
`ελ`,
`бел`,
`дети`,
`ею`,
`католик`,
`ком`,
`мкд`,

View File

@@ -1,3 +1,6 @@
// Copyright (c) 2015, Daniel Martí <mvdan@mvdan.cc>
// See LICENSE for licensing information
package xurls
// PseudoTLDs is a sorted list of some widely used unofficial TLDs.
@@ -16,7 +19,6 @@ var PseudoTLDs = []string{
`invalid`, // Invalid domain
`local`, // Local network
`localhost`, // Local network
`onion`, // Tor hidden services
`test`, // Test domain
`zkey`, // GNS domain name
}

View File

@@ -11,12 +11,13 @@ import "regexp"
const (
letter = `\p{L}`
mark = `\p{M}`
number = `\p{N}`
iriChar = letter + number
iriChar = letter + mark + number
currency = `\p{Sc}`
otherSymb = `\p{So}`
endChar = iriChar + `/\-+_&~*%=#` + currency
midChar = endChar + `@.,:;'?!|` + otherSymb
endChar = iriChar + `/\-+_&~*%=#` + currency + otherSymb
midChar = endChar + `@.,:;'?!|`
wellParen = `\([` + midChar + `]*(\([` + midChar + `]*\)[` + midChar + `]*)*\)`
wellBrack = `\[[` + midChar + `]*(\[[` + midChar + `]*\][` + midChar + `]*)*\]`
wellBrace = `\{[` + midChar + `]*(\{[` + midChar + `]*\}[` + midChar + `]*)*\}`