Drop link to moby fork

This commit is contained in:
Ettore Di Giacinto
2021-10-24 17:16:36 +02:00
parent c897bffdfc
commit 67a07e7c5a
116 changed files with 8027 additions and 3184 deletions

View File

@@ -1,19 +1,22 @@
language: go
sudo: false
os:
- linux
- osx
go:
- 1.9.x
- 1.10.x
- 1.13.x
- 1.14.x
- 1.15.x
- master
script:
- go test -v -cpu=1,2,4 .
- go test -v -cpu=2 -race -short .
env:
- GO111MODULE=off
script:
- diff <(gofmt -d .) <(printf "")
- go test -v -cpu=1,2,4 .
- go test -v -cpu=2 -race -short .
matrix:
allow_failures:

View File

@@ -1,4 +1,4 @@
The MIT License (MIT)
MIT License
Copyright (c) 2014 Klaus Post
@@ -19,4 +19,3 @@ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

View File

@@ -39,7 +39,6 @@ You might need to get/update the dependencies:
```
go get -u github.com/klauspost/compress
go get -u github.com/klauspost/crc32
```
Usage
@@ -65,7 +64,7 @@ Changes in [github.com/klauspost/compress](https://github.com/klauspost/compress
## Compression
The simplest way to use this is to simply do the same as you would when using [compress/gzip](http://golang.org/pkg/compress/gzip).
To change the block size, use the added (*pgzip.Writer).SetConcurrency(blockSize, blocks int) function. With this you can control the approximate size of your blocks, as well as how many you want to be processing in parallel. Default values for this is SetConcurrency(250000, 16), meaning blocks are split at 250000 bytes and up to 16 blocks can be processing at once before the writer blocks.
To change the block size, use the added (*pgzip.Writer).SetConcurrency(blockSize, blocks int) function. With this you can control the approximate size of your blocks, as well as how many you want to be processing in parallel. Default values for this is SetConcurrency(1MB, runtime.GOMAXPROCS(0)), meaning blocks are split at 1 MB and up to the number of CPU threads blocks can be processing at once before the writer blocks.
Example:
@@ -99,19 +98,19 @@ See my blog post in [Benchmarks of Golang Gzip](https://blog.klauspost.com/go-gz
Compression cost is usually about 0.2% with default settings with a block size of 250k.
Example with GOMAXPROC set to 8 (quad core with 8 hyperthreads)
Example with GOMAXPROC set to 32 (16 core CPU)
Content is [Matt Mahoneys 10GB corpus](http://mattmahoney.net/dc/10gb.html). Compression level 6.
Compressor | MB/sec | speedup | size | size overhead (lower=better)
------------|----------|---------|------|---------
[gzip](http://golang.org/pkg/compress/gzip) (golang) | 7.21MB/s | 1.0x | 4786608902 | 0%
[gzip](http://github.com/klauspost/compress/gzip) (klauspost) | 10.98MB/s | 1.52x | 4781331645 | -0.11%
[pgzip](https://github.com/klauspost/pgzip) (klauspost) | 50.76MB/s|7.04x | 4784121440 | -0.052%
[bgzf](https://godoc.org/github.com/biogo/hts/bgzf) (biogo) | 38.65MB/s | 5.36x | 4924899484 | 2.889%
[pargzip](https://godoc.org/github.com/golang/build/pargzip) (builder) | 32.00MB/s | 4.44x | 4791226567 | 0.096%
[gzip](http://golang.org/pkg/compress/gzip) (golang) | 15.44MB/s (1 thread) | 1.0x | 4781329307 | 0%
[gzip](http://github.com/klauspost/compress/gzip) (klauspost) | 135.04MB/s (1 thread) | 8.74x | 4894858258 | +2.37%
[pgzip](https://github.com/klauspost/pgzip) (klauspost) | 1573.23MB/s| 101.9x | 4902285651 | +2.53%
[bgzf](https://godoc.org/github.com/biogo/hts/bgzf) (biogo) | 361.40MB/s | 23.4x | 4869686090 | +1.85%
[pargzip](https://godoc.org/github.com/golang/build/pargzip) (builder) | 306.01MB/s | 19.8x | 4786890417 | +0.12%
pgzip also contains a [linear time compression](https://github.com/klauspost/compress#linear-time-compression) mode, that will allow compression at ~150MB per core per second, independent of the content.
pgzip also contains a [linear time compression](https://github.com/klauspost/compress#linear-time-compression-huffman-only) mode, that will allow compression at ~250MB per core per second, independent of the content.
See the [complete sheet](https://docs.google.com/spreadsheets/d/1nuNE2nPfuINCZJRMt6wFWhKpToF95I47XjSsc-1rbPQ/edit?usp=sharing) for different content types and compression settings.

View File

@@ -331,6 +331,16 @@ func (z *Reader) killReadAhead() error {
// Wait for decompressor to be closed and return error, if any.
e, ok := <-z.closeErr
z.activeRA = false
for blk := range z.readAhead {
if blk.b != nil {
z.blockPool <- blk.b
}
}
if cap(z.current) > 0 {
z.blockPool <- z.current
z.current = nil
}
if !ok {
// Channel is closed, so if there was any error it has already been returned.
return nil
@@ -418,6 +428,7 @@ func (z *Reader) doReadAhead() {
case z.readAhead <- read{b: buf, err: err}:
case <-closeReader:
// Sent on close, we don't care about the next results
z.blockPool <- buf
return
}
if err != nil {

View File

@@ -11,6 +11,7 @@ import (
"hash"
"hash/crc32"
"io"
"runtime"
"sync"
"time"
@@ -18,9 +19,9 @@ import (
)
const (
defaultBlockSize = 256 << 10
defaultBlockSize = 1 << 20
tailSize = 16384
defaultBlocks = 16
defaultBlocks = 4
)
// These constants are copied from the flate package, so that code that imports
@@ -68,8 +69,8 @@ type result struct {
// With this you can control the approximate size of your blocks,
// as well as how many you want to be processing in parallel.
//
// Default values for this is SetConcurrency(250000, 16),
// meaning blocks are split at 250000 bytes and up to 16 blocks
// Default values for this is SetConcurrency(defaultBlockSize, runtime.GOMAXPROCS(0)),
// meaning blocks are split at 1 MB and up to the number of CPU threads
// can be processing at once before the writer blocks.
func (z *Writer) SetConcurrency(blockSize, blocks int) error {
if blockSize <= tailSize {
@@ -84,7 +85,7 @@ func (z *Writer) SetConcurrency(blockSize, blocks int) error {
z.blockSize = blockSize
z.results = make(chan result, blocks)
z.blocks = blocks
z.dstPool = sync.Pool{New: func() interface{} { return make([]byte, 0, blockSize+(blockSize)>>4) }}
z.dstPool.New = func() interface{} { return make([]byte, 0, blockSize+(blockSize)>>4) }
return nil
}
@@ -115,7 +116,7 @@ func NewWriterLevel(w io.Writer, level int) (*Writer, error) {
return nil, fmt.Errorf("gzip: invalid compression level: %d", level)
}
z := new(Writer)
z.SetConcurrency(defaultBlockSize, defaultBlocks)
z.SetConcurrency(defaultBlockSize, runtime.GOMAXPROCS(0))
z.init(w, level)
return z, nil
}
@@ -174,7 +175,7 @@ func (z *Writer) Reset(w io.Writer) {
if z.results != nil && !z.closed {
close(z.results)
}
z.SetConcurrency(defaultBlockSize, defaultBlocks)
z.SetConcurrency(defaultBlockSize, runtime.GOMAXPROCS(0))
z.init(w, z.level)
}
@@ -239,36 +240,36 @@ func (z *Writer) writeString(s string) (err error) {
// compressCurrent will compress the data currently buffered
// This should only be called from the main writer/flush/closer
func (z *Writer) compressCurrent(flush bool) {
c := z.currentBuffer
if len(c) > z.blockSize {
// This can never happen through the public interface.
panic("len(z.currentBuffer) > z.blockSize (most likely due to concurrent Write race)")
}
r := result{}
r.result = make(chan []byte, 1)
r.notifyWritten = make(chan struct{}, 0)
// Reserve a result slot
select {
case z.results <- r:
case <-z.pushedErr:
return
}
// If block given is more than twice the block size, split it.
c := z.currentBuffer
if len(c) > z.blockSize*2 {
c = c[:z.blockSize]
z.wg.Add(1)
go z.compressBlock(c, z.prevTail, r, false)
z.prevTail = c[len(c)-tailSize:]
z.currentBuffer = z.currentBuffer[z.blockSize:]
z.compressCurrent(flush)
// Last one flushes if needed
return
}
z.wg.Add(1)
go z.compressBlock(c, z.prevTail, r, z.closed)
tail := z.prevTail
if len(c) > tailSize {
z.prevTail = c[len(c)-tailSize:]
buf := z.dstPool.Get().([]byte) // Put in .compressBlock
// Copy tail from current buffer before handing the buffer over to the
// compressBlock goroutine.
buf = append(buf[:0], c[len(c)-tailSize:]...)
z.prevTail = buf
} else {
z.prevTail = nil
}
z.currentBuffer = z.dstPool.Get().([]byte)
go z.compressBlock(c, tail, r, z.closed)
z.currentBuffer = z.dstPool.Get().([]byte) // Put in .compressBlock
z.currentBuffer = z.currentBuffer[:0]
// Wait if flushing
@@ -358,29 +359,37 @@ func (z *Writer) Write(p []byte) (int, error) {
// Start receiving data from compressors
go func() {
listen := z.results
var failed bool
for {
r, ok := <-listen
// If closed, we are finished.
if !ok {
return
}
if failed {
close(r.notifyWritten)
continue
}
buf := <-r.result
n, err := z.w.Write(buf)
if err != nil {
z.pushError(err)
close(r.notifyWritten)
return
failed = true
continue
}
if n != len(buf) {
z.pushError(fmt.Errorf("gzip: short write %d should be %d", n, len(buf)))
failed = true
close(r.notifyWritten)
return
continue
}
z.dstPool.Put(buf)
close(r.notifyWritten)
}
}()
z.currentBuffer = make([]byte, 0, z.blockSize)
z.currentBuffer = z.dstPool.Get().([]byte)
z.currentBuffer = z.currentBuffer[:0]
}
q := p
for len(q) > 0 {
@@ -390,10 +399,13 @@ func (z *Writer) Write(p []byte) (int, error) {
}
z.digest.Write(q[:length])
z.currentBuffer = append(z.currentBuffer, q[:length]...)
if len(z.currentBuffer) >= z.blockSize {
if len(z.currentBuffer) > z.blockSize {
panic("z.currentBuffer too large (most likely due to concurrent Write race)")
}
if len(z.currentBuffer) == z.blockSize {
z.compressCurrent(false)
if err := z.checkError(); err != nil {
return len(p) - len(q) - length, err
return len(p) - len(q), err
}
}
z.size += length
@@ -410,12 +422,13 @@ func (z *Writer) compressBlock(p, prevTail []byte, r result, closed bool) {
close(r.result)
z.wg.Done()
}()
buf := z.dstPool.Get().([]byte)
buf := z.dstPool.Get().([]byte) // Corresponding Put in .Write's result writer
dest := bytes.NewBuffer(buf[:0])
compressor := z.dictFlatePool.Get().(*flate.Writer)
compressor := z.dictFlatePool.Get().(*flate.Writer) // Put below
compressor.ResetDict(dest, prevTail)
compressor.Write(p)
z.dstPool.Put(p) // Corresponding Get in .Write and .compressCurrent
err := compressor.Flush()
if err != nil {
@@ -429,7 +442,12 @@ func (z *Writer) compressBlock(p, prevTail []byte, r result, closed bool) {
return
}
}
z.dictFlatePool.Put(compressor)
z.dictFlatePool.Put(compressor) // Get above
if prevTail != nil {
z.dstPool.Put(prevTail) // Get in .compressCurrent
}
// Read back buffer
buf = dest.Bytes()
r.result <- buf