Merge pull request #701 from vrothberg/release

Release v0.1.39
2025-06-28 23:57:51 +00:00 · 2019-08-06 15:55:50 +02:00 · 2019-08-06 15:55:50 +02:00 · 44bc4a9eb7
commit 44bc4a9eb7
parent c040b28fb8 89d6d0c70f
550 changed files with 154298 additions and 23468 deletions
--- a/go.mod
+++ b/go.mod
@ -3,20 +3,15 @@ module github.com/containers/skopeo
 go 1.12

 require (
-	github.com/BurntSushi/toml v0.3.1 // indirect
-	github.com/Microsoft/go-winio v0.4.12 // indirect
-	github.com/Microsoft/hcsshim v0.8.6 // indirect
 	github.com/VividCortex/ewma v1.1.1 // indirect
 	github.com/containerd/continuity v0.0.0-20180216233310-d8fb8589b0e8 // indirect
 	github.com/containers/buildah v1.8.4
-	github.com/containers/image v3.0.0+incompatible
-	github.com/containers/storage v1.12.10
-	github.com/davecgh/go-spew v1.1.1 // indirect
+	github.com/containers/image v3.0.1+incompatible
+	github.com/containers/storage v1.13.0
 	github.com/docker/distribution v0.0.0-20170817175659-5f6282db7d65 // indirect
 	github.com/docker/docker v0.0.0-20180522102801-da99009bbb11
 	github.com/docker/docker-credential-helpers v0.6.0 // indirect
 	github.com/docker/go-connections v0.0.0-20180212134524-7beb39f0b969 // indirect
-	github.com/docker/go-units v0.0.0-20161020213227-8a7beacffa30 // indirect
 	github.com/docker/libtrust v0.0.0-20160708172513-aabc10ec26b7 // indirect
 	github.com/etcd-io/bbolt v1.3.2 // indirect
 	github.com/ghodss/yaml v0.0.0-20150909031657-73d445a93680 // indirect
@ -25,42 +20,27 @@ require (
 	github.com/gorilla/context v0.0.0-20140604161150-14f550f51af5 // indirect
 	github.com/gorilla/mux v0.0.0-20140926153814-e444e69cbd2e // indirect
 	github.com/imdario/mergo v0.0.0-20141206190957-6633656539c1 // indirect
-	github.com/klauspost/compress v1.4.1 // indirect
-	github.com/klauspost/cpuid v1.2.0 // indirect
-	github.com/klauspost/pgzip v1.2.1 // indirect
 	github.com/kr/pretty v0.1.0 // indirect
 	github.com/mattn/go-isatty v0.0.4 // indirect
-	github.com/mattn/go-shellwords v1.0.5 // indirect
-	github.com/mistifyio/go-zfs v0.0.0-20160425201758-22c9b32c84eb // indirect
 	github.com/mtrmac/gpgme v0.0.0-20170102180018-b2432428689c // indirect
-	github.com/opencontainers/go-digest v0.0.0-20180430190053-c9281466c8b2
+	github.com/opencontainers/go-digest v1.0.0-rc1
 	github.com/opencontainers/image-spec v0.0.0-20180918080442-7b1e489870ac
 	github.com/opencontainers/image-tools v0.0.0-20170926011501-6d941547fa1d
-	github.com/opencontainers/runc v1.0.0-rc6 // indirect
 	github.com/opencontainers/runtime-spec v1.0.0 // indirect
-	github.com/opencontainers/selinux v0.0.0-20190118194635-b707dfcb00a1 // indirect
-	github.com/ostreedev/ostree-go v0.0.0-20181204105935-56f3a639dbc0 // indirect
 	github.com/pborman/uuid v0.0.0-20160209185913-a97ce2ca70fa // indirect
 	github.com/pkg/errors v0.8.1
-	github.com/pmezard/go-difflib v0.0.0-20181226105442-5d4384ee4fb2 // indirect
-	github.com/pquerna/ffjson v0.0.0-20171002144729-d49c2bc1aa13 // indirect
-	github.com/sirupsen/logrus v1.0.0
-	github.com/stretchr/testify v1.1.3
+	github.com/sirupsen/logrus v1.4.2
+	github.com/stretchr/testify v1.3.0
 	github.com/syndtr/gocapability v0.0.0-20180916011248-d98352740cb2
-	github.com/tchap/go-patricia v2.2.6+incompatible // indirect
 	github.com/ulikunitz/xz v0.5.4 // indirect
 	github.com/urfave/cli v1.20.0
-	github.com/vbatts/tar-split v0.10.2 // indirect
 	github.com/vbauerster/mpb v3.4.0+incompatible // indirect
 	github.com/xeipuuv/gojsonpointer v0.0.0-20180127040702-4e3ac2762d5f // indirect
 	github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415 // indirect
 	github.com/xeipuuv/gojsonschema v1.1.0 // indirect
 	go4.org v0.0.0-20190218023631-ce4c26f7be8e // indirect
-	golang.org/x/crypto v0.0.0-20190219172222-a4c6cb3142f2 // indirect
-	golang.org/x/net v0.0.0-20190107210223-45ffb0cd1ba0 // indirect
 	golang.org/x/sync v0.0.0-20181108010431-42b317875d0f // indirect
-	golang.org/x/sys v0.0.0-20170817234608-43e60d72a8e2 // indirect
-	golang.org/x/text v0.0.0-20181227161524-e6919f6577db // indirect
+	golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e // indirect
 	gopkg.in/yaml.v2 v2.0.0-20141029210843-d466437aa4ad // indirect
 	k8s.io/client-go v0.0.0-20181219152756-3dd551c0f083 // indirect
 )
--- a/go.sum
+++ b/go.sum
@ -1,5 +1,6 @@
 github.com/BurntSushi/toml v0.3.1 h1:WXkYYl6Yr3qBf1K79EBnL4mak0OimBfB0XUf9Vl28OQ=
 github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
+github.com/DataDog/zstd v1.4.0/go.mod h1:1jcaCB/ufaK+sKp1NBhlGmpz41jOoPQ35bpF36t7BBo=
 github.com/Microsoft/go-winio v0.4.12 h1:xAfWHN1IrQ0NJ9TBC0KBZoqLjzDTr1ML+4MywiUOryc=
 github.com/Microsoft/go-winio v0.4.12/go.mod h1:VhR8bwka0BXejwEJY73c50VrPtXAaKcyvVC4A4RozmA=
 github.com/Microsoft/hcsshim v0.8.6 h1:ZfF0+zZeYdzMIVMZHKtDKJvLHj76XCuVae/jNkjj0IA=
@ -20,12 +21,18 @@ github.com/containers/image v2.0.0+incompatible h1:FTr6Br7jlIKNCKMjSOMbAxKp2keQ0
 github.com/containers/image v2.0.0+incompatible/go.mod h1:8Vtij257IWSanUQKe1tAeNOm2sRVkSqQTVQ1IlwI3+M=
 github.com/containers/image v3.0.0+incompatible h1:pdUHY//H+3jYNnoTt+rqY8NsStX4ZBLKzPTlMC+XvnU=
 github.com/containers/image v3.0.0+incompatible/go.mod h1:8Vtij257IWSanUQKe1tAeNOm2sRVkSqQTVQ1IlwI3+M=
+github.com/containers/image v3.0.1+incompatible h1:VlNEQUI1JHa1SJfJ4jz/GBt7gpk+aRYGR6TUKsxXMkU=
+github.com/containers/image v3.0.1+incompatible/go.mod h1:8Vtij257IWSanUQKe1tAeNOm2sRVkSqQTVQ1IlwI3+M=
 github.com/containers/storage v1.12.10 h1:vw1aiLsZ1LvO09ELMxVBTe35tThRiMftI2cPeH+G5ow=
 github.com/containers/storage v1.12.10/go.mod h1:+RirK6VQAqskQlaTBrOG6ulDvn4si2QjFE1NZCn06MM=
+github.com/containers/storage v1.13.0 h1:jd0lLiGYzE3k1tMR4PB6/WDfenNd0Db0E/m9QSVaroI=
+github.com/containers/storage v1.13.0/go.mod h1:6D8nK2sU9V7nEmAraINRs88ZEscM5C5DK+8Npp27GeA=
+github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
 github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
 github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
 github.com/docker/distribution v0.0.0-20170817175659-5f6282db7d65 h1:4zlOyrJUbYnrvlzChJ+jP2J3i77Jbhm336NEuCv7kZo=
 github.com/docker/distribution v0.0.0-20170817175659-5f6282db7d65/go.mod h1:J2gT2udsDAN96Uj4KfcMRqY0/ypR+oyYUYmja8H+y+w=
+github.com/docker/docker v0.0.0-20171019062838-86f080cff091/go.mod h1:eEKB0N0r5NX/I1kEveEz05bcu8tLC/8azJZsviup8Sk=
 github.com/docker/docker v0.0.0-20180522102801-da99009bbb11 h1:p8hSDXZgVhyh/C9bPlG8QMY64VeXtVfjmjIlzaQok5Q=
 github.com/docker/docker v0.0.0-20180522102801-da99009bbb11/go.mod h1:eEKB0N0r5NX/I1kEveEz05bcu8tLC/8azJZsviup8Sk=
 github.com/docker/docker-credential-helpers v0.6.0 h1:5bhDRLn1roGiNjz8IezRngHxMfoeaXGyr0BeMHq4rD8=
@ -34,6 +41,8 @@ github.com/docker/go-connections v0.0.0-20180212134524-7beb39f0b969 h1:p2WzwcFof
 github.com/docker/go-connections v0.0.0-20180212134524-7beb39f0b969/go.mod h1:Gbd7IOopHjR8Iph03tsViu4nIes5XhDvyHbTtUxmeec=
 github.com/docker/go-units v0.0.0-20161020213227-8a7beacffa30 h1:dDGntbHn0CUgKCyVvmHcD+spha+/4+8hJv5nbZVS6R8=
 github.com/docker/go-units v0.0.0-20161020213227-8a7beacffa30/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk=
+github.com/docker/go-units v0.4.0 h1:3uh0PgVws3nIA0Q+MwDC8yjEPf9zjRfZZWXZYDct3Tw=
+github.com/docker/go-units v0.4.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk=
 github.com/docker/libtrust v0.0.0-20160708172513-aabc10ec26b7 h1:UhxFibDNY/bfvqU5CAUmr9zpesgbU6SWc8/B4mflAE4=
 github.com/docker/libtrust v0.0.0-20160708172513-aabc10ec26b7/go.mod h1:cyGadeNEkKy96OOhEzfZl+yxihPEzKnqJwvfuSUqbZE=
 github.com/etcd-io/bbolt v1.3.2 h1:RLRQ0TKLX7DlBRXAJHvbmXL17Q3KNnTBtZ9B6Qo+/Y0=
@ -44,6 +53,7 @@ github.com/go-check/check v0.0.0-20180628173108-788fd7840127 h1:0gkP6mzaMqkmpcJY
 github.com/go-check/check v0.0.0-20180628173108-788fd7840127/go.mod h1:9ES+weclKsC9YodN5RgxqK/VD9HM9JsCSh7rNhMZE98=
 github.com/gogo/protobuf v0.0.0-20170815085658-fcdc5011193f h1:r/AdTzqktq9nQpFlFePWcp+scVi+oFRajfjRJ3UnETg=
 github.com/gogo/protobuf v0.0.0-20170815085658-fcdc5011193f/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ=
+github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M=
 github.com/gorilla/context v0.0.0-20140604161150-14f550f51af5 h1:yCHB2BCyFu0V6ChUHb8sF2VodD5B0PAgPDoCxBE7ICQ=
 github.com/gorilla/context v0.0.0-20140604161150-14f550f51af5/go.mod h1:kBGZzfjB9CEq2AlWe17Uuf7NDRt0dE0s8S51q0aT7Yg=
 github.com/gorilla/mux v0.0.0-20140926153814-e444e69cbd2e h1:nH09qCdJVZxw0nRVfm14xjXkw2puLyLPN56n4u+vTC0=
@ -52,10 +62,16 @@ github.com/imdario/mergo v0.0.0-20141206190957-6633656539c1 h1:FeeCi0I2Fu8kA8IXr
 github.com/imdario/mergo v0.0.0-20141206190957-6633656539c1/go.mod h1:2EnlNZ0deacrJVfApfmtdGgDfMuh/nq6Ok1EcJh5FfA=
 github.com/klauspost/compress v1.4.1 h1:8VMb5+0wMgdBykOV96DwNwKFQ+WTI4pzYURP99CcB9E=
 github.com/klauspost/compress v1.4.1/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A=
+github.com/klauspost/compress v1.7.2 h1:liMOoeIvFpr9kEvalrZ7VVBA4wGf7zfOgwBjzz/5g2Y=
+github.com/klauspost/compress v1.7.2/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A=
 github.com/klauspost/cpuid v1.2.0 h1:NMpwD2G9JSFOE1/TJjGSo5zG7Yb2bTe7eq1jH+irmeE=
 github.com/klauspost/cpuid v1.2.0/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek=
+github.com/klauspost/cpuid v1.2.1 h1:vJi+O/nMdFt0vqm8NZBI6wzALWdA2X+egi0ogNyrC/w=
+github.com/klauspost/cpuid v1.2.1/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek=
 github.com/klauspost/pgzip v1.2.1 h1:oIPZROsWuPHpOdMVWLuJZXwgjhrW8r1yEX8UqMyeNHM=
 github.com/klauspost/pgzip v1.2.1/go.mod h1:Ch1tH69qFZu15pkjo5kYi6mth2Zzwzt50oCQKQE9RUs=
+github.com/konsorten/go-windows-terminal-sequences v1.0.1 h1:mweAR1A6xJ3oS2pRaGiHgQ4OO8tzTaLawm8vnODuwDk=
+github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
 github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI=
 github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
 github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
@ -67,44 +83,71 @@ github.com/mattn/go-shellwords v1.0.5 h1:JhhFTIOslh5ZsPrpa3Wdg8bF0WI3b44EMblmU9w
 github.com/mattn/go-shellwords v1.0.5/go.mod h1:3xCvwCdWdlDJUrvuMn7Wuy9eWs4pE8vqg+NOMyg4B2o=
 github.com/mistifyio/go-zfs v0.0.0-20160425201758-22c9b32c84eb h1:iTqJ2fjDnaldY7BXhfc15HkT769kWAstiz2bCmUrKAw=
 github.com/mistifyio/go-zfs v0.0.0-20160425201758-22c9b32c84eb/go.mod h1:8AuVvqP/mXw1px98n46wfvcGfQ4ci2FwoAjKYxuo3Z4=
+github.com/mistifyio/go-zfs v2.1.1+incompatible h1:gAMO1HM9xBRONLHHYnu5iFsOJUiJdNZo6oqSENd4eW8=
+github.com/mistifyio/go-zfs v2.1.1+incompatible/go.mod h1:8AuVvqP/mXw1px98n46wfvcGfQ4ci2FwoAjKYxuo3Z4=
 github.com/mtrmac/gpgme v0.0.0-20170102180018-b2432428689c h1:xa+eQWKuJ9MbB9FBL/eoNvDFvveAkz2LQoz8PzX7Q/4=
 github.com/mtrmac/gpgme v0.0.0-20170102180018-b2432428689c/go.mod h1:GhAqVMEWnTcW2dxoD/SO3n2enrgWl3y6Dnx4m59GvcA=
 github.com/opencontainers/go-digest v0.0.0-20180430190053-c9281466c8b2 h1:QhPf3A2AZW3tTGvHPg0TA+CR3oHbVLlXUhlghqISp1I=
 github.com/opencontainers/go-digest v0.0.0-20180430190053-c9281466c8b2/go.mod h1:cMLVZDEM3+U2I4VmLI6N8jQYUd2OVphdqWwCJHrFt2s=
+github.com/opencontainers/go-digest v1.0.0-rc1 h1:WzifXhOVOEOuFYOJAW6aQqW0TooG2iki3E3Ii+WN7gQ=
+github.com/opencontainers/go-digest v1.0.0-rc1/go.mod h1:cMLVZDEM3+U2I4VmLI6N8jQYUd2OVphdqWwCJHrFt2s=
 github.com/opencontainers/image-spec v0.0.0-20180918080442-7b1e489870ac h1:Y0AqP4onEqgQST60GE172L61SAFMZMHQgXbwLMyj418=
 github.com/opencontainers/image-spec v0.0.0-20180918080442-7b1e489870ac/go.mod h1:BtxoFyWECRxE4U/7sNtV5W15zMzWCbyJoFRP3s7yZA0=
 github.com/opencontainers/image-tools v0.0.0-20170926011501-6d941547fa1d h1:X9WSFjjZNqYRqO2MenUgqE2nj/oydcfIzXJ0R/SVnnA=
 github.com/opencontainers/image-tools v0.0.0-20170926011501-6d941547fa1d/go.mod h1:A9btVpZLzttF4iFaKNychhPyrhfOjJ1OF5KrA8GcLj4=
 github.com/opencontainers/runc v1.0.0-rc6 h1:7AoN22rYxxkmsJS48wFaziH/n0OvrZVqL/TglgHKbKQ=
 github.com/opencontainers/runc v1.0.0-rc6/go.mod h1:qT5XzbpPznkRYVz/mWwUaVBUv2rmF59PVA73FjuZG0U=
+github.com/opencontainers/runc v1.0.0-rc8 h1:dDCFes8Hj1r/i5qnypONo5jdOme/8HWZC/aNDyhECt0=
+github.com/opencontainers/runc v1.0.0-rc8/go.mod h1:qT5XzbpPznkRYVz/mWwUaVBUv2rmF59PVA73FjuZG0U=
 github.com/opencontainers/runtime-spec v1.0.0 h1:O6L965K88AilqnxeYPks/75HLpp4IG+FjeSCI3cVdRg=
 github.com/opencontainers/runtime-spec v1.0.0/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0=
 github.com/opencontainers/selinux v0.0.0-20190118194635-b707dfcb00a1 h1:V8Icxoi2vzXvXaH0wuUZR+oyDvyRISW/1fXiK69le8E=
 github.com/opencontainers/selinux v0.0.0-20190118194635-b707dfcb00a1/go.mod h1:+BLncwf63G4dgOzykXAxcmnFlUaOlkDdmw/CqsW6pjs=
+github.com/opencontainers/selinux v1.2.2 h1:Kx9J6eDG5/24A6DtUquGSpJQ+m2MUTahn4FtGEe8bFg=
+github.com/opencontainers/selinux v1.2.2/go.mod h1:+BLncwf63G4dgOzykXAxcmnFlUaOlkDdmw/CqsW6pjs=
 github.com/ostreedev/ostree-go v0.0.0-20181204105935-56f3a639dbc0 h1:l8oDb3Ln30sysfGafRZJ9zNnzYfNyWy+w4fGZjii5rQ=
 github.com/ostreedev/ostree-go v0.0.0-20181204105935-56f3a639dbc0/go.mod h1:J6OG6YJVEWopen4avK3VNQSnALmmjvniMmni/YFYAwc=
+github.com/ostreedev/ostree-go v0.0.0-20190702140239-759a8c1ac913 h1:TnbXhKzrTOyuvWrjI8W6pcoI9XPbLHFXCdN2dtUw7Rw=
+github.com/ostreedev/ostree-go v0.0.0-20190702140239-759a8c1ac913/go.mod h1:J6OG6YJVEWopen4avK3VNQSnALmmjvniMmni/YFYAwc=
 github.com/pborman/uuid v0.0.0-20160209185913-a97ce2ca70fa h1:l8VQbMdmwFH37kOOaWQ/cw24/u8AuBz5lUym13Wcu0Y=
 github.com/pborman/uuid v0.0.0-20160209185913-a97ce2ca70fa/go.mod h1:VyrYX9gd7irzKovcSS6BIIEwPRkP2Wm2m9ufcdFSJ34=
+github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
 github.com/pkg/errors v0.8.1 h1:iURUrRGxPUNPdy5/HRSm+Yj6okJ6UtLINN0Q9M4+h3I=
 github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
 github.com/pmezard/go-difflib v0.0.0-20181226105442-5d4384ee4fb2 h1:Dp6WLvjytJLgEEknBM9ie5JffieQzzdv2pNpwCJ6lQQ=
 github.com/pmezard/go-difflib v0.0.0-20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
+github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
+github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
 github.com/pquerna/ffjson v0.0.0-20171002144729-d49c2bc1aa13 h1:AUK/hm/tPsiNNASdb3J8fySVRZoI7fnK5mlOvdFD43o=
 github.com/pquerna/ffjson v0.0.0-20171002144729-d49c2bc1aa13/go.mod h1:YARuvh7BUWHNhzDq2OM5tzR2RiCcN2D7sapiKyCel/M=
+github.com/pquerna/ffjson v0.0.0-20181028064349-e517b90714f7 h1:gGBSHPOU7g8YjTbhwn+lvFm2VDEhhA+PwDIlstkgSxE=
+github.com/pquerna/ffjson v0.0.0-20181028064349-e517b90714f7/go.mod h1:YARuvh7BUWHNhzDq2OM5tzR2RiCcN2D7sapiKyCel/M=
 github.com/sirupsen/logrus v1.0.0 h1:XM8X4m/9ACaclZMs946FQNEZBZafvToJLTR4007drwo=
 github.com/sirupsen/logrus v1.0.0/go.mod h1:pMByvHTf9Beacp5x1UXfOR9xyW/9antXMhjMPG0dEzc=
+github.com/sirupsen/logrus v1.4.2 h1:SPIRibHv4MatM3XXNO2BJeFLZwZ2LvZgfQ5+UNI2im4=
+github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE=
+github.com/spf13/pflag v1.0.3/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4=
+github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
+github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
 github.com/stretchr/testify v1.1.3 h1:76sIvNG1I8oBerx/MvuVHh5HBWBW7oxfsi3snKIsz5w=
 github.com/stretchr/testify v1.1.3/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
+github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
+github.com/stretchr/testify v1.3.0 h1:TivCn/peBQ7UY8ooIcPgZFpTNSz0Q2U6UrFlUfqbe0Q=
+github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
 github.com/syndtr/gocapability v0.0.0-20180916011248-d98352740cb2 h1:b6uOv7YOFK0TYG7HtkIgExQo+2RdLuwRft63jn2HWj8=
 github.com/syndtr/gocapability v0.0.0-20180916011248-d98352740cb2/go.mod h1:hkRG7XYTFWNJGYcbNJQlaLq0fg1yr4J4t/NcTQtrfww=
 github.com/tchap/go-patricia v2.2.6+incompatible h1:JvoDL7JSoIP2HDE8AbDH3zC8QBPxmzYe32HHy5yQ+Ck=
 github.com/tchap/go-patricia v2.2.6+incompatible/go.mod h1:bmLyhP68RS6kStMGxByiQ23RP/odRBOTVjwp2cDyi6I=
+github.com/tchap/go-patricia v2.3.0+incompatible h1:GkY4dP3cEfEASBPPkWd+AmjYxhmDkqO9/zg7R0lSQRs=
+github.com/tchap/go-patricia v2.3.0+incompatible/go.mod h1:bmLyhP68RS6kStMGxByiQ23RP/odRBOTVjwp2cDyi6I=
 github.com/ulikunitz/xz v0.5.4 h1:zATC2OoZ8H1TZll3FpbX+ikwmadbO699PE06cIkm9oU=
 github.com/ulikunitz/xz v0.5.4/go.mod h1:2bypXElzHzzJZwzH67Y6wb67pO62Rzfn7BSiF4ABRW8=
 github.com/urfave/cli v1.20.0 h1:fDqGv3UG/4jbVl/QkFwEdddtEDjh/5Ov6X+0B/3bPaw=
 github.com/urfave/cli v1.20.0/go.mod h1:70zkFmudgCuE/ngEzBv17Jvp/497gISqfk5gWijbERA=
 github.com/vbatts/tar-split v0.10.2 h1:CXd7HEKGkTLjBMinpObcJZU5Hm8EKlor2a1JtX6msXQ=
 github.com/vbatts/tar-split v0.10.2/go.mod h1:LEuURwDEiWjRjwu46yU3KVGuUdVv/dcnpcEPSzR8z6g=
+github.com/vbatts/tar-split v0.11.1 h1:0Odu65rhcZ3JZaPHxl7tCI3V/C/Q9Zf82UFravl02dE=
+github.com/vbatts/tar-split v0.11.1/go.mod h1:LEuURwDEiWjRjwu46yU3KVGuUdVv/dcnpcEPSzR8z6g=
 github.com/vbauerster/mpb v3.3.4+incompatible h1:DDIhnwmgTQIDZo+SWlEr5d6mJBxkOLBwCXPzunhEfJ4=
 github.com/vbauerster/mpb v3.3.4+incompatible/go.mod h1:zAHG26FUhVKETRu+MWqYXcI70POlC6N8up9p1dID7SU=
 github.com/vbauerster/mpb v3.4.0+incompatible h1:mfiiYw87ARaeRW6x5gWwYRUawxaW1tLAD8IceomUCNw=
@ -123,16 +166,28 @@ go4.org v0.0.0-20190218023631-ce4c26f7be8e h1:m9LfARr2VIOW0vsV19kEKp/sWQvZnGobA8
 go4.org v0.0.0-20190218023631-ce4c26f7be8e/go.mod h1:MkTOUMDaeVYJUOUsaDXIhWPZYa1yOyC1qaOBpL57BhE=
 golang.org/x/crypto v0.0.0-20190219172222-a4c6cb3142f2 h1:NwxKRvbkH5MsNkvOtPZi3/3kmI8CAzs3mtv+GLQMkNo=
 golang.org/x/crypto v0.0.0-20190219172222-a4c6cb3142f2/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4=
+golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2 h1:VklqNMn3ovrHsnt90PveolxSbWFaJdECFbxSq0Mqo2M=
+golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
 golang.org/x/net v0.0.0-20190107210223-45ffb0cd1ba0 h1:1DW40AJQ7AP4nY6ORUGUdkpXyEC9W2GAXcOPaMZK0K8=
 golang.org/x/net v0.0.0-20190107210223-45ffb0cd1ba0/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
+golang.org/x/net v0.0.0-20190628185345-da137c7871d7 h1:rTIdg5QFRR7XCaK4LCjBiPbx8j4DQRpdYMnGn/bJUEU=
+golang.org/x/net v0.0.0-20190628185345-da137c7871d7/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
 golang.org/x/sync v0.0.0-20181108010431-42b317875d0f h1:Bl/8QSvNqXvPGPGXa2z5xUTmV7VDcZyvRZ+QQXkXTZQ=
 golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sys v0.0.0-20170817234608-43e60d72a8e2 h1:90z1vgEVOG718nzy69KGhEtYepBetip3OSWJjMnI8Bw=
 golang.org/x/sys v0.0.0-20170817234608-43e60d72a8e2/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
+golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
+golang.org/x/sys v0.0.0-20190422165155-953cdadca894/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20190626221950-04f50cda93cb h1:fgwFCsaw9buMuxNd6+DQfAuSFqbNiQZpcgJQAgJsK6k=
+golang.org/x/sys v0.0.0-20190626221950-04f50cda93cb/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/text v0.0.0-20181227161524-e6919f6577db h1:ERgn/rMlavvbd/tNSkNoiKxiwdqWKnOfIB/X6qFxWsM=
 golang.org/x/text v0.0.0-20181227161524-e6919f6577db/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=
+golang.org/x/text v0.3.0 h1:g61tztE5qeGQ89tm6NTjjM9VPIm088od1l6aSorWRWg=
+golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
+golang.org/x/tools v0.0.0-20180810170437-e96c4e24768d/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
 golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
 gopkg.in/yaml.v2 v2.0.0-20141029210843-d466437aa4ad h1:3SOi6w/NEma/Ir04qIGumn/RZwbXRhJSM7gN9YN8Ajc=
 gopkg.in/yaml.v2 v2.0.0-20141029210843-d466437aa4ad/go.mod h1:JAlM8MvJe8wmxCU4Bli9HhUf9+ttbYbLASfIpnQbh74=
+gotest.tools v0.0.0-20190624233834-05ebafbffc79/go.mod h1:R//lfYlUuTOTfblYI3lGoAAAebUdzjvbmQsuB7Ykd90=
 k8s.io/client-go v0.0.0-20181219152756-3dd551c0f083 h1:+Qf/nITucAbm09aIdxvoA+7X0BwaXmQGVoR8k7Ynk9o=
 k8s.io/client-go v0.0.0-20181219152756-3dd551c0f083/go.mod h1:7vJpHMYJwNQCWgzmNV+VYUl1zCObLyodBc8nIyt8L5s=
--- a/vendor/github.com/containers/image/pkg/keyctl/key.go
+++ b/vendor/github.com/containers/image/pkg/keyctl/key.go
@ -3,12 +3,11 @@
 // license that can be found in the LICENSE file.

 // +build linux
-// +build 386 amd64

 package keyctl

 import (
-	"unsafe"
+	"golang.org/x/sys/unix"
 )

 // Key represents a single key linked to one or more kernel keyrings.
@ -41,7 +40,7 @@ func (k *Key) Get() ([]byte, error) {
 	b = make([]byte, int(size))
 	sizeRead = size + 1
 	for sizeRead > size {
-		r1, _, err := keyctl(keyctlRead, uintptr(k.id), uintptr(unsafe.Pointer(&b[0])), uintptr(size))
+		r1, err := unix.KeyctlBuffer(unix.KEYCTL_READ, int(k.id), b, size)
 		if err != nil {
 			return nil, err
 		}
@ -60,6 +59,6 @@ func (k *Key) Get() ([]byte, error) {
 // Unlink a key from the keyring it was loaded from (or added to). If the key
 // is not linked to any other keyrings, it is destroyed.
 func (k *Key) Unlink() error {
-	_, _, err := keyctl(keyctlUnlink, uintptr(k.id), uintptr(k.ring))
+	_, err := unix.KeyctlInt(unix.KEYCTL_UNLINK, int(k.id), int(k.ring), 0, 0)
 	return err
 }
--- a/vendor/github.com/containers/image/pkg/keyctl/keyring.go
+++ b/vendor/github.com/containers/image/pkg/keyctl/keyring.go
@ -3,11 +3,17 @@
 // license that can be found in the LICENSE file.

 // +build linux
-// +build 386 amd64

 // Package keyctl is a Go interface to linux kernel keyrings (keyctl interface)
+//
+// Deprecated: Most callers should use either golang.org/x/sys/unix directly,
+// or the original (and more extensive) github.com/jsipprell/keyctl .
 package keyctl

+import (
+	"golang.org/x/sys/unix"
+)
+
 // Keyring is the basic interface to a linux keyctl keyring.
 type Keyring interface {
 	ID
@ -26,7 +32,7 @@ type ID interface {

 // Add a new key to a keyring. The key can be searched for later by name.
 func (kr *keyring) Add(name string, key []byte) (*Key, error) {
-	r, err := addkey("user", name, key, int32(kr.id))
+	r, err := unix.AddKey("user", name, key, int(kr.id))
 	if err == nil {
 		key := &Key{Name: name, id: keyID(r), ring: kr.id}
 		return key, nil
@ -38,9 +44,9 @@ func (kr *keyring) Add(name string, key []byte) (*Key, error) {
 // one. The key, if found, is linked to the top keyring that Search() was called
 // from.
 func (kr *keyring) Search(name string) (*Key, error) {
-	id, err := searchKeyring(kr.id, name, "user")
+	id, err := unix.KeyctlSearch(int(kr.id), "user", name, 0)
 	if err == nil {
-		return &Key{Name: name, id: id, ring: kr.id}, nil
+		return &Key{Name: name, id: keyID(id), ring: kr.id}, nil
 	}
 	return nil, err
 }
@ -52,22 +58,22 @@ func (kr *keyring) ID() int32 {

 // SessionKeyring returns the current login session keyring
 func SessionKeyring() (Keyring, error) {
-	return newKeyring(keySpecSessionKeyring)
+	return newKeyring(unix.KEY_SPEC_SESSION_KEYRING)
 }

 // UserKeyring  returns the keyring specific to the current user.
 func UserKeyring() (Keyring, error) {
-	return newKeyring(keySpecUserKeyring)
+	return newKeyring(unix.KEY_SPEC_USER_KEYRING)
 }

 // Unlink an object from a keyring
 func Unlink(parent Keyring, child ID) error {
-	_, _, err := keyctl(keyctlUnlink, uintptr(child.ID()), uintptr(parent.ID()))
+	_, err := unix.KeyctlInt(unix.KEYCTL_UNLINK, int(child.ID()), int(parent.ID()), 0, 0)
 	return err
 }

 // Link a key into a keyring
 func Link(parent Keyring, child ID) error {
-	_, _, err := keyctl(keyctlLink, uintptr(child.ID()), uintptr(parent.ID()))
+	_, err := unix.KeyctlInt(unix.KEYCTL_LINK, int(child.ID()), int(parent.ID()), 0, 0)
 	return err
 }
--- a/vendor/github.com/containers/image/pkg/keyctl/perm.go
+++ b/vendor/github.com/containers/image/pkg/keyctl/perm.go
@ -6,6 +6,10 @@

 package keyctl

+import (
+	"golang.org/x/sys/unix"
+)
+
 // KeyPerm represents in-kernel access control permission to keys and keyrings
 // as a 32-bit integer broken up into four permission sets, one per byte.
 // In MSB order, the perms are: Processor, User, Group, Other.
@ -24,6 +28,6 @@ const (

 // SetPerm sets the permissions on a key or keyring.
 func SetPerm(k ID, p KeyPerm) error {
-	_, _, err := keyctl(keyctlSetPerm, uintptr(k.ID()), uintptr(p))
+	err := unix.KeyctlSetperm(int(k.ID()), uint32(p))
 	return err
 }
--- a/vendor/github.com/containers/image/pkg/keyctl/sys_linux.go
+++ b/vendor/github.com/containers/image/pkg/keyctl/sys_linux.go
@ -3,119 +3,23 @@
 // license that can be found in the LICENSE file.

 // +build linux
-// +build 386 amd64

 package keyctl

 import (
-	"syscall"
-	"unsafe"
+	"golang.org/x/sys/unix"
 )

-type keyctlCommand int
-
 type keyID int32

-const (
-	keySpecSessionKeyring keyID = -3
-	keySpecUserKeyring    keyID = -4
-)
-
-const (
-	keyctlGetKeyringID keyctlCommand = 0
-	keyctlSetPerm      keyctlCommand = 5
-	keyctlLink         keyctlCommand = 8
-	keyctlUnlink       keyctlCommand = 9
-	keyctlSearch       keyctlCommand = 10
-	keyctlRead         keyctlCommand = 11
-)
-
-func (id keyID) ID() int32 {
-	return int32(id)
-}
-
-func keyctl(cmd keyctlCommand, args ...uintptr) (r1 int32, r2 int32, err error) {
-	a := make([]uintptr, 6)
-	l := len(args)
-	if l > 5 {
-		l = 5
-	}
-	a[0] = uintptr(cmd)
-	for idx, v := range args[:l] {
-		a[idx+1] = v
-	}
-	v1, v2, errno := syscall.Syscall6(syscallKeyctl, a[0], a[1], a[2], a[3], a[4], a[5])
-	if errno != 0 {
-		err = errno
-		return
-	}
-
-	r1 = int32(v1)
-	r2 = int32(v2)
-	return
-}
-
-func addkey(keyType, keyDesc string, payload []byte, id int32) (int32, error) {
-	var (
-		err    error
-		errno  syscall.Errno
-		b1, b2 *byte
-		r1     uintptr
-		pptr   unsafe.Pointer
-	)
-
-	if b1, err = syscall.BytePtrFromString(keyType); err != nil {
-		return 0, err
-	}
-
-	if b2, err = syscall.BytePtrFromString(keyDesc); err != nil {
-		return 0, err
-	}
-
-	if len(payload) > 0 {
-		pptr = unsafe.Pointer(&payload[0])
-	}
-	r1, _, errno = syscall.Syscall6(syscallAddKey,
-		uintptr(unsafe.Pointer(b1)),
-		uintptr(unsafe.Pointer(b2)),
-		uintptr(pptr),
-		uintptr(len(payload)),
-		uintptr(id),
-		0)
-
-	if errno != 0 {
-		err = errno
-		return 0, err
-	}
-	return int32(r1), nil
-}
-
 func newKeyring(id keyID) (*keyring, error) {
-	r1, _, err := keyctl(keyctlGetKeyringID, uintptr(id), uintptr(1))
+	r1, err := unix.KeyctlGetKeyringID(int(id), true)
 	if err != nil {
 		return nil, err
 	}

 	if id < 0 {
-		r1 = int32(id)
+		r1 = int(id)
 	}
 	return &keyring{id: keyID(r1)}, nil
 }
-
-func searchKeyring(id keyID, name, keyType string) (keyID, error) {
-	var (
-		r1     int32
-		b1, b2 *byte
-		err    error
-	)
-
-	if b1, err = syscall.BytePtrFromString(keyType); err != nil {
-		return 0, err
-	}
-	if b2, err = syscall.BytePtrFromString(name); err != nil {
-		return 0, err
-	}
-
-	r1, _, err = keyctl(keyctlSearch, uintptr(id), uintptr(unsafe.Pointer(b1)), uintptr(unsafe.Pointer(b2)))
-	return keyID(r1), err
-}
--- a/vendor/github.com/containers/image/pkg/keyctl/sys_linux_386.go
+++ b/vendor/github.com/containers/image/pkg/keyctl/sys_linux_386.go
@ -1,12 +0,0 @@
-// Copyright 2015 Jesse Sipprell. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// +build linux
-
-package keyctl
-
-const (
-	syscallKeyctl uintptr = 288
-	syscallAddKey uintptr = 286
-)
--- a/vendor/github.com/containers/image/pkg/keyctl/sys_linux_amd64.go
+++ b/vendor/github.com/containers/image/pkg/keyctl/sys_linux_amd64.go
@ -1,12 +0,0 @@
-// Copyright 2015 Jesse Sipprell. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// +build linux
-
-package keyctl
-
-const (
-	syscallKeyctl uintptr = 250
-	syscallAddKey uintptr = 248
-)
--- a/vendor/github.com/containers/image/version/version.go
+++ b/vendor/github.com/containers/image/version/version.go
@ -8,7 +8,7 @@ const (
 	// VersionMinor is for functionality in a backwards-compatible manner
 	VersionMinor = 0
 	// VersionPatch is for backwards-compatible bug fixes
-	VersionPatch = 0
+	VersionPatch = 1

 	// VersionDev indicates development branch. Releases will be empty string.
 	VersionDev = ""
--- a/vendor/github.com/containers/storage/.cirrus.yml
+++ b/vendor/github.com/containers/storage/.cirrus.yml
@ -71,6 +71,19 @@ testing_task:
        failure_ubuntu_audit_log_script: '${_UAUDITCMD} || true'
        failure_journal_log_script: '${_JOURNALCMD} || true'

+lint_task:
+    env:
+        CIRRUS_WORKING_DIR: "/go/src/github.com/containers/storage"
+    container:
+        image: golang:1.12
+    modules_cache:
+        fingerprint_script: cat go.sum
+        folder: $GOPATH/pkg/mod
+    build_script: |
+      echo "deb http://deb.debian.org/debian stretch-backports main" > /etc/apt/sources.list.d/backports.list
+      apt-get update
+      apt-get install -y libbtrfs-dev libostree-dev libdevmapper-dev
+    test_script: make lint

 # Update metadata on VM images referenced by this repository state
 meta_task:
@ -94,3 +107,12 @@ meta_task:
        CIRRUS_CLONE_DEPTH: 1  # source not used

    script: '/usr/local/bin/entrypoint.sh |& ${_TIMESTAMP}'
+
+vendor_task:
+    container:
+        image: golang:1.12
+    modules_cache:
+        fingerprint_script: cat go.sum
+        folder: $GOPATH/pkg/mod
+    build_script: make vendor
+    test_script: hack/tree_status.sh
--- a/vendor/github.com/containers/storage/.gitignore
+++ b/vendor/github.com/containers/storage/.gitignore
@ -23,6 +23,7 @@ docs/changed-files
 man/man1
 man/man5
 man/man8
+tests/tools/build
 vendor/pkg/
 .vagrant
 /containers-storage
--- a/vendor/github.com/containers/storage/.golangci.yml
+++ b/vendor/github.com/containers/storage/.golangci.yml
@ -0,0 +1,41 @@
+---
+run:
+  build-tags:
+    - ostree
+  concurrency: 6
+  deadline: 5m
+linters:
+  disable-all: true
+  enable:
+    - bodyclose
+    - depguard
+    - gofmt
+    - interfacer
+    - typecheck
+    # - deadcode
+    # - dupl
+    # - errcheck
+    # - gochecknoglobals
+    # - gochecknoinits
+    # - goconst
+    # - gocritic
+    # - gocyclo
+    # - goimports
+    # - golint
+    # - gosec
+    # - gosimple
+    # - govet
+    # - ineffassign
+    # - lll
+    # - maligned
+    # - misspell
+    # - nakedret
+    # - prealloc
+    # - scopelint
+    # - staticcheck
+    # - structcheck
+    # - stylecheck
+    # - unconvert
+    # - unparam
+    # - unused
+    # - varcheck
--- a/vendor/github.com/containers/storage/Makefile
+++ b/vendor/github.com/containers/storage/Makefile
@ -1,4 +1,27 @@
-.PHONY: all binary clean cross default docs gccgo help install.tools local-binary local-cross local-gccgo local-test-integration local-test-unit local-validate test test-integration test-unit validate
+export GO111MODULE=off
+
+.PHONY: \
+	all \
+	binary \
+	clean \
+	cross \
+	default \
+	docs \
+	gccgo \
+	help \
+	install.tools \
+	local-binary \
+	local-cross \
+	local-gccgo \
+	local-test-integration \
+	local-test-unit \
+	local-validate \
+	lint \
+	test \
+	test-integration \
+	test-unit \
+	validate \
+	vendor

 PACKAGE := github.com/containers/storage
 GIT_BRANCH := $(shell git rev-parse --abbrev-ref HEAD 2>/dev/null)
@ -7,9 +30,16 @@ EPOCH_TEST_COMMIT := 0418ebf59f9e1f564831c0ba9378b7f8e40a1c73
 NATIVETAGS := exclude_graphdriver_devicemapper exclude_graphdriver_btrfs exclude_graphdriver_overlay
 AUTOTAGS := $(shell ./hack/btrfs_tag.sh) $(shell ./hack/libdm_tag.sh) $(shell ./hack/ostree_tag.sh)
 BUILDFLAGS := -tags "$(AUTOTAGS) $(TAGS)" $(FLAGS)
-GO := go
+GO ?= go
+
+GO_BUILD=$(GO) build
+# Go module support: set `-mod=vendor` to use the vendored sources
+ifeq ($(shell $(GO) help mod >/dev/null 2>&1 && echo true), true)
+	GO_BUILD=GO111MODULE=on $(GO) build -mod=vendor
+endif

 RUNINVM := vagrant/runinvm.sh
+FFJSON := tests/tools/build/ffjson

 default all: local-binary docs local-validate local-cross local-gccgo test-unit test-integration ## validate all checks, build and cross-build\nbinaries and docs, run tests in a VM

@ -19,41 +49,42 @@ clean: ## remove all built files
 sources := $(wildcard *.go cmd/containers-storage/*.go drivers/*.go drivers/*/*.go pkg/*/*.go pkg/*/*/*.go) layers_ffjson.go images_ffjson.go containers_ffjson.go pkg/archive/archive_ffjson.go

 containers-storage: $(sources) ## build using gc on the host
-	$(GO) build -compiler gc $(BUILDFLAGS) ./cmd/containers-storage
+	$(GO_BUILD) -compiler gc $(BUILDFLAGS) ./cmd/containers-storage

 layers_ffjson.go: layers.go
 	$(RM) $@
-	ffjson layers.go
+	$(FFJSON) layers.go

 images_ffjson.go: images.go
 	$(RM) $@
-	ffjson images.go
+	$(FFJSON) images.go

 containers_ffjson.go: containers.go
 	$(RM) $@
-	ffjson containers.go
+	$(FFJSON) containers.go

 pkg/archive/archive_ffjson.go: pkg/archive/archive.go
 	$(RM) $@
-	ffjson pkg/archive/archive.go
+	$(FFJSON) pkg/archive/archive.go

 binary local-binary: containers-storage

 local-gccgo: ## build using gccgo on the host
-	GCCGO=$(PWD)/hack/gccgo-wrapper.sh $(GO) build -compiler gccgo $(BUILDFLAGS) -o containers-storage.gccgo ./cmd/containers-storage
+	GCCGO=$(PWD)/hack/gccgo-wrapper.sh $(GO_BUILD) -compiler gccgo $(BUILDFLAGS) -o containers-storage.gccgo ./cmd/containers-storage

 local-cross: ## cross build the binaries for arm, darwin, and\nfreebsd
-	@for target in linux/amd64 linux/386 linux/arm darwin/amd64 windows/amd64 ; do \
+	@for target in linux/amd64 linux/386 linux/arm linux/arm64 linux/ppc64 linux/ppc64le darwin/amd64 windows/amd64 ; do \
 		os=`echo $${target} | cut -f1 -d/` ; \
 		arch=`echo $${target} | cut -f2 -d/` ; \
 		suffix=$${os}.$${arch} ; \
-		$(MAKE) GOOS=$${os} GOARCH=$${arch} FLAGS="-o containers-storage.$${suffix}" AUTOTAGS="$(NATIVETAGS)" local-binary || exit 1; \
+		echo env CGO_ENABLED=0 GOOS=$${os} GOARCH=$${arch} $(GO_BUILD) -compiler gc -tags \"$(NATIVETAGS) $(TAGS)\" $(FLAGS) -o containers-storage.$${suffix} ./cmd/containers-storage ; \
+		env CGO_ENABLED=0 GOOS=$${os} GOARCH=$${arch} $(GO_BUILD) -compiler gc -tags "$(NATIVETAGS) $(TAGS)" $(FLAGS) -o containers-storage.$${suffix} ./cmd/containers-storage || exit 1 ; \
 	done

 cross: ## cross build the binaries for arm, darwin, and\nfreebsd using VMs
 	$(RUNINVM) make local-$@

-docs: ## build the docs on the host
+docs: install.tools ## build the docs on the host
 	$(MAKE) -C docs docs

 gccgo: ## build using gccgo using VMs
@ -82,11 +113,21 @@ validate: ## validate DCO, gofmt, ./pkg/ isolation, golint,\ngo vet and vendor u
 	$(RUNINVM) make local-$@

 install.tools:
-	go get -u $(BUILDFLAGS) github.com/cpuguy83/go-md2man
-	go get -u $(BUILDFLAGS) github.com/vbatts/git-validation
-	go get -u $(BUILDFLAGS) gopkg.in/alecthomas/gometalinter.v1
-	go get -u $(BUILDFLAGS) github.com/pquerna/ffjson
-	gometalinter.v1 -i
+	make -C tests/tools
+
+install.docs: docs
+	make -C docs install
+
+install: install.docs
+
+lint: install.tools
+	tests/tools/build/golangci-lint run

 help: ## this help
 	@awk 'BEGIN {FS = ":.*?## "} /^[a-z A-Z_-]+:.*?## / {gsub(" ",",",$$1);gsub("\\\\n",sprintf("\n%22c"," "), $$2);printf "\033[36m%-21s\033[0m %s\n", $$1, $$2}' $(MAKEFILE_LIST)
+
+vendor:
+	export GO111MODULE=on \
+		$(GO) mod tidy && \
+		$(GO) mod vendor && \
+		$(GO) mod verify
--- a/vendor/github.com/containers/storage/VERSION
+++ b/vendor/github.com/containers/storage/VERSION
@ -1 +1 @@
-1.12.10
+1.13.0
--- a/vendor/github.com/containers/storage/containers.go
+++ b/vendor/github.com/containers/storage/containers.go
@ -579,6 +579,7 @@ func (r *containerStore) RecursiveLock() {
 func (r *containerStore) RLock() {
 	r.lockfile.RLock()
 }
+
 func (r *containerStore) Unlock() {
 	r.lockfile.Unlock()
 }
--- a/vendor/github.com/containers/storage/drivers/aufs/aufs.go
+++ b/vendor/github.com/containers/storage/drivers/aufs/aufs.go
@ -83,7 +83,7 @@ type Driver struct {

 // Init returns a new AUFS driver.
 // An error is returned if AUFS is not supported.
-func Init(root string, options []string, uidMaps, gidMaps []idtools.IDMap) (graphdriver.Driver, error) {
+func Init(home string, options graphdriver.Options) (graphdriver.Driver, error) {

 	// Try to load the aufs kernel module
 	if err := supportsAufs(); err != nil {
@ -91,7 +91,7 @@ func Init(root string, options []string, uidMaps, gidMaps []idtools.IDMap) (grap

 	}

-	fsMagic, err := graphdriver.GetFSMagic(root)
+	fsMagic, err := graphdriver.GetFSMagic(home)
 	if err != nil {
 		return nil, err
 	}
@ -106,7 +106,7 @@ func Init(root string, options []string, uidMaps, gidMaps []idtools.IDMap) (grap
 	}

 	var mountOptions string
-	for _, option := range options {
+	for _, option := range options.DriverOptions {
 		key, val, err := parsers.ParseKeyValueOpt(option)
 		if err != nil {
 			return nil, err
@ -126,36 +126,36 @@ func Init(root string, options []string, uidMaps, gidMaps []idtools.IDMap) (grap
 	}

 	a := &Driver{
-		root:         root,
-		uidMaps:      uidMaps,
-		gidMaps:      gidMaps,
+		root:         home,
+		uidMaps:      options.UIDMaps,
+		gidMaps:      options.GIDMaps,
 		pathCache:    make(map[string]string),
 		ctr:          graphdriver.NewRefCounter(graphdriver.NewFsChecker(graphdriver.FsMagicAufs)),
 		locker:       locker.New(),
 		mountOptions: mountOptions,
 	}

-	rootUID, rootGID, err := idtools.GetRootUIDGID(uidMaps, gidMaps)
+	rootUID, rootGID, err := idtools.GetRootUIDGID(options.UIDMaps, options.GIDMaps)
 	if err != nil {
 		return nil, err
 	}
 	// Create the root aufs driver dir and return
 	// if it already exists
 	// If not populate the dir structure
-	if err := idtools.MkdirAllAs(root, 0700, rootUID, rootGID); err != nil {
+	if err := idtools.MkdirAllAs(home, 0700, rootUID, rootGID); err != nil {
 		if os.IsExist(err) {
 			return a, nil
 		}
 		return nil, err
 	}

-	if err := mountpk.MakePrivate(root); err != nil {
+	if err := mountpk.MakePrivate(home); err != nil {
 		return nil, err
 	}

 	// Populate the dir structure
 	for _, p := range paths {
-		if err := idtools.MkdirAllAs(path.Join(root, p), 0700, rootUID, rootGID); err != nil {
+		if err := idtools.MkdirAllAs(path.Join(home, p), 0700, rootUID, rootGID); err != nil {
 			return nil, err
 		}
 	}
@ -165,7 +165,7 @@ func Init(root string, options []string, uidMaps, gidMaps []idtools.IDMap) (grap
 	})

 	for _, path := range []string{"mnt", "diff"} {
-		p := filepath.Join(root, path)
+		p := filepath.Join(home, path)
 		entries, err := ioutil.ReadDir(p)
 		if err != nil {
 			logger.WithError(err).WithField("dir", p).Error("error reading dir entries")
@ -550,13 +550,13 @@ func (a *Driver) DiffSize(id string, idMappings *idtools.IDMappings, parent stri
 // ApplyDiff extracts the changeset from the given diff into the
 // layer with the specified id and parent, returning the size of the
 // new layer in bytes.
-func (a *Driver) ApplyDiff(id string, idMappings *idtools.IDMappings, parent, mountLabel string, diff io.Reader) (size int64, err error) {
+func (a *Driver) ApplyDiff(id, parent string, options graphdriver.ApplyDiffOpts) (size int64, err error) {
 	if !a.isParent(id, parent) {
-		return a.naiveDiff.ApplyDiff(id, idMappings, parent, mountLabel, diff)
+		return a.naiveDiff.ApplyDiff(id, parent, options)
 	}

 	// AUFS doesn't need the parent id to apply the diff if it is the direct parent.
-	if err = a.applyDiff(id, idMappings, diff); err != nil {
+	if err = a.applyDiff(id, options.Mappings, options.Diff); err != nil {
 		return
 	}

--- a/vendor/github.com/containers/storage/drivers/btrfs/btrfs.go
+++ b/vendor/github.com/containers/storage/drivers/btrfs/btrfs.go
@ -49,7 +49,7 @@ type btrfsOptions struct {

 // Init returns a new BTRFS driver.
 // An error is returned if BTRFS is not supported.
-func Init(home string, options []string, uidMaps, gidMaps []idtools.IDMap) (graphdriver.Driver, error) {
+func Init(home string, options graphdriver.Options) (graphdriver.Driver, error) {

 	fsMagic, err := graphdriver.GetFSMagic(home)
 	if err != nil {
@ -60,7 +60,7 @@ func Init(home string, options []string, uidMaps, gidMaps []idtools.IDMap) (grap
 		return nil, errors.Wrapf(graphdriver.ErrPrerequisites, "%q is not on a btrfs filesystem", home)
 	}

-	rootUID, rootGID, err := idtools.GetRootUIDGID(uidMaps, gidMaps)
+	rootUID, rootGID, err := idtools.GetRootUIDGID(options.UIDMaps, options.GIDMaps)
 	if err != nil {
 		return nil, err
 	}
@ -72,15 +72,15 @@ func Init(home string, options []string, uidMaps, gidMaps []idtools.IDMap) (grap
 		return nil, err
 	}

-	opt, userDiskQuota, err := parseOptions(options)
+	opt, userDiskQuota, err := parseOptions(options.DriverOptions)
 	if err != nil {
 		return nil, err
 	}

 	driver := &Driver{
 		home:    home,
-		uidMaps: uidMaps,
-		gidMaps: gidMaps,
+		uidMaps: options.UIDMaps,
+		gidMaps: options.GIDMaps,
 		options: opt,
 	}

--- a/vendor/github.com/containers/storage/drivers/devmapper/driver.go
+++ b/vendor/github.com/containers/storage/drivers/devmapper/driver.go
@ -34,8 +34,8 @@ type Driver struct {
 }

 // Init creates a driver with the given home and the set of options.
-func Init(home string, options []string, uidMaps, gidMaps []idtools.IDMap) (graphdriver.Driver, error) {
-	deviceSet, err := NewDeviceSet(home, true, options, uidMaps, gidMaps)
+func Init(home string, options graphdriver.Options) (graphdriver.Driver, error) {
+	deviceSet, err := NewDeviceSet(home, true, options.DriverOptions, options.UIDMaps, options.GIDMaps)
 	if err != nil {
 		return nil, err
 	}
@ -47,8 +47,8 @@ func Init(home string, options []string, uidMaps, gidMaps []idtools.IDMap) (grap
 	d := &Driver{
 		DeviceSet: deviceSet,
 		home:      home,
-		uidMaps:   uidMaps,
-		gidMaps:   gidMaps,
+		uidMaps:   options.UIDMaps,
+		gidMaps:   options.GIDMaps,
 		ctr:       graphdriver.NewRefCounter(graphdriver.NewDefaultChecker()),
 		locker:    locker.New(),
 	}
--- a/vendor/github.com/containers/storage/drivers/driver.go
+++ b/vendor/github.com/containers/storage/drivers/driver.go
@ -41,6 +41,7 @@ type CreateOpts struct {
 	MountLabel string
 	StorageOpt map[string]string
 	*idtools.IDMappings
+	ignoreChownErrors bool
 }

 // MountOpts contains optional arguments for LayerStope.Mount() methods.
@ -53,8 +54,16 @@ type MountOpts struct {
 	Options []string
 }

+// ApplyDiffOpts contains optional arguments for ApplyDiff methods.
+type ApplyDiffOpts struct {
+	Diff              io.Reader
+	Mappings          *idtools.IDMappings
+	MountLabel        string
+	IgnoreChownErrors bool
+}
+
 // InitFunc initializes the storage driver.
-type InitFunc func(root string, options []string, uidMaps, gidMaps []idtools.IDMap) (Driver, error)
+type InitFunc func(homedir string, options Options) (Driver, error)

 // ProtoDriver defines the basic capabilities of a driver.
 // This interface exists solely to be a minimum set of methods
@ -115,7 +124,7 @@ type DiffDriver interface {
 	// layer with the specified id and parent, returning the size of the
 	// new layer in bytes.
 	// The io.Reader must be an uncompressed stream.
-	ApplyDiff(id string, idMappings *idtools.IDMappings, parent string, mountLabel string, diff io.Reader) (size int64, err error)
+	ApplyDiff(id string, parent string, options ApplyDiffOpts) (size int64, err error)
 	// DiffSize calculates the changes between the specified id
 	// and its parent and returns the size in bytes of the changes
 	// relative to its base filesystem directory.
@ -203,7 +212,7 @@ func Register(name string, initFunc InitFunc) error {
 // GetDriver initializes and returns the registered driver
 func GetDriver(name string, config Options) (Driver, error) {
 	if initFunc, exists := drivers[name]; exists {
-		return initFunc(filepath.Join(config.Root, name), config.DriverOptions, config.UIDMaps, config.GIDMaps)
+		return initFunc(filepath.Join(config.Root, name), config)
 	}

 	logrus.Errorf("Failed to GetDriver graph %s %s", name, config.Root)
@ -211,9 +220,9 @@ func GetDriver(name string, config Options) (Driver, error) {
 }

 // getBuiltinDriver initializes and returns the registered driver, but does not try to load from plugins
-func getBuiltinDriver(name, home string, options []string, uidMaps, gidMaps []idtools.IDMap) (Driver, error) {
+func getBuiltinDriver(name, home string, options Options) (Driver, error) {
 	if initFunc, exists := drivers[name]; exists {
-		return initFunc(filepath.Join(home, name), options, uidMaps, gidMaps)
+		return initFunc(filepath.Join(home, name), options)
 	}
 	logrus.Errorf("Failed to built-in GetDriver graph %s %s", name, home)
 	return nil, errors.Wrapf(ErrNotSupported, "failed to built-in GetDriver graph %s %s", name, home)
@ -222,6 +231,7 @@ func getBuiltinDriver(name, home string, options []string, uidMaps, gidMaps []id
 // Options is used to initialize a graphdriver
 type Options struct {
 	Root                string
+	RunRoot             string
 	DriverOptions       []string
 	UIDMaps             []idtools.IDMap
 	GIDMaps             []idtools.IDMap
@ -245,7 +255,7 @@ func New(name string, config Options) (Driver, error) {
 		if _, prior := driversMap[name]; prior {
 			// of the state found from prior drivers, check in order of our priority
 			// which we would prefer
-			driver, err := getBuiltinDriver(name, config.Root, config.DriverOptions, config.UIDMaps, config.GIDMaps)
+			driver, err := getBuiltinDriver(name, config.Root, config)
 			if err != nil {
 				// unlike below, we will return error here, because there is prior
 				// state, and now it is no longer supported/prereq/compatible, so
@ -273,7 +283,7 @@ func New(name string, config Options) (Driver, error) {

 	// Check for priority drivers first
 	for _, name := range priority {
-		driver, err := getBuiltinDriver(name, config.Root, config.DriverOptions, config.UIDMaps, config.GIDMaps)
+		driver, err := getBuiltinDriver(name, config.Root, config)
 		if err != nil {
 			if isDriverNotSupported(err) {
 				continue
@ -285,7 +295,7 @@ func New(name string, config Options) (Driver, error) {

 	// Check all registered drivers if no priority driver is found
 	for name, initFunc := range drivers {
-		driver, err := initFunc(filepath.Join(config.Root, name), config.DriverOptions, config.UIDMaps, config.GIDMaps)
+		driver, err := initFunc(filepath.Join(config.Root, name), config)
 		if err != nil {
 			if isDriverNotSupported(err) {
 				continue
--- a/vendor/github.com/containers/storage/drivers/fsdiff.go
+++ b/vendor/github.com/containers/storage/drivers/fsdiff.go
@ -33,7 +33,7 @@ type NaiveDiffDriver struct {
 // it may or may not support on its own:
 //     Diff(id string, idMappings *idtools.IDMappings, parent string, parentMappings *idtools.IDMappings, mountLabel string) (io.ReadCloser, error)
 //     Changes(id string, idMappings *idtools.IDMappings, parent string, parentMappings *idtools.IDMappings, mountLabel string) ([]archive.Change, error)
-//     ApplyDiff(id string, idMappings *idtools.IDMappings, parent, mountLabel string, diff io.Reader) (size int64, err error)
+//     ApplyDiff(id, parent string, options ApplyDiffOpts) (size int64, err error)
 //     DiffSize(id string, idMappings *idtools.IDMappings, parent, parentMappings *idtools.IDMappings, mountLabel string) (size int64, err error)
 func NewNaiveDiffDriver(driver ProtoDriver, updater LayerIDMapUpdater) Driver {
 	return &NaiveDiffDriver{ProtoDriver: driver, LayerIDMapUpdater: updater}
@ -151,16 +151,16 @@ func (gdw *NaiveDiffDriver) Changes(id string, idMappings *idtools.IDMappings, p
 // ApplyDiff extracts the changeset from the given diff into the
 // layer with the specified id and parent, returning the size of the
 // new layer in bytes.
-func (gdw *NaiveDiffDriver) ApplyDiff(id string, applyMappings *idtools.IDMappings, parent, mountLabel string, diff io.Reader) (size int64, err error) {
+func (gdw *NaiveDiffDriver) ApplyDiff(id, parent string, options ApplyDiffOpts) (size int64, err error) {
 	driver := gdw.ProtoDriver

-	if applyMappings == nil {
-		applyMappings = &idtools.IDMappings{}
+	if options.Mappings == nil {
+		options.Mappings = &idtools.IDMappings{}
 	}

 	// Mount the root filesystem so we can apply the diff/layer.
 	mountOpts := MountOpts{
-		MountLabel: mountLabel,
+		MountLabel: options.MountLabel,
 	}
 	layerFs, err := driver.Get(id, mountOpts)
 	if err != nil {
@ -168,16 +168,17 @@ func (gdw *NaiveDiffDriver) ApplyDiff(id string, applyMappings *idtools.IDMappin
 	}
 	defer driver.Put(id)

-	options := &archive.TarOptions{
-		InUserNS: rsystem.RunningInUserNS(),
+	tarOptions := &archive.TarOptions{
+		InUserNS:          rsystem.RunningInUserNS(),
+		IgnoreChownErrors: options.IgnoreChownErrors,
 	}
-	if applyMappings != nil {
-		options.UIDMaps = applyMappings.UIDs()
-		options.GIDMaps = applyMappings.GIDs()
+	if options.Mappings != nil {
+		tarOptions.UIDMaps = options.Mappings.UIDs()
+		tarOptions.GIDMaps = options.Mappings.GIDs()
 	}
 	start := time.Now().UTC()
 	logrus.Debug("Start untar layer")
-	if size, err = ApplyUncompressedLayer(layerFs, diff, options); err != nil {
+	if size, err = ApplyUncompressedLayer(layerFs, options.Diff, tarOptions); err != nil {
 		logrus.Errorf("Error while applying layer: %s", err)
 		return
 	}
--- a/vendor/github.com/containers/storage/drivers/overlay/overlay.go
+++ b/vendor/github.com/containers/storage/drivers/overlay/overlay.go
@ -85,18 +85,20 @@ const (
 )

 type overlayOptions struct {
-	imageStores   []string
-	quota         quota.Quota
-	mountProgram  string
-	ostreeRepo    string
-	skipMountHome bool
-	mountOptions  string
+	imageStores       []string
+	quota             quota.Quota
+	mountProgram      string
+	ostreeRepo        string
+	skipMountHome     bool
+	mountOptions      string
+	ignoreChownErrors bool
 }

 // Driver contains information about the home directory and the list of active mounts that are created using this driver.
 type Driver struct {
 	name          string
 	home          string
+	runhome       string
 	uidMaps       []idtools.IDMap
 	gidMaps       []idtools.IDMap
 	ctr           *graphdriver.RefCounter
@ -125,8 +127,8 @@ func init() {
 // Init returns the a native diff driver for overlay filesystem.
 // If overlay filesystem is not supported on the host, a wrapped graphdriver.ErrNotSupported is returned as error.
 // If an overlay filesystem is not supported over an existing filesystem then a wrapped graphdriver.ErrIncompatibleFS is returned.
-func Init(home string, options []string, uidMaps, gidMaps []idtools.IDMap) (graphdriver.Driver, error) {
-	opts, err := parseOptions(options)
+func Init(home string, options graphdriver.Options) (graphdriver.Driver, error) {
+	opts, err := parseOptions(options.DriverOptions)
 	if err != nil {
 		return nil, err
 	}
@ -148,7 +150,7 @@ func Init(home string, options []string, uidMaps, gidMaps []idtools.IDMap) (grap
 		}
 	}

-	rootUID, rootGID, err := idtools.GetRootUIDGID(uidMaps, gidMaps)
+	rootUID, rootGID, err := idtools.GetRootUIDGID(options.UIDMaps, options.GIDMaps)
 	if err != nil {
 		return nil, err
 	}
@ -157,32 +159,72 @@ func Init(home string, options []string, uidMaps, gidMaps []idtools.IDMap) (grap
 	if err := idtools.MkdirAllAs(path.Join(home, linkDir), 0700, rootUID, rootGID); err != nil && !os.IsExist(err) {
 		return nil, err
 	}
+	runhome := filepath.Join(options.RunRoot, filepath.Base(home))
+	if err := idtools.MkdirAllAs(runhome, 0700, rootUID, rootGID); err != nil && !os.IsExist(err) {
+		return nil, err
+	}

 	var usingMetacopy bool
 	var supportsDType bool
 	if opts.mountProgram != "" {
 		supportsDType = true
 	} else {
-		supportsDType, err = supportsOverlay(home, fsMagic, rootUID, rootGID)
-		if err != nil {
-			os.Remove(filepath.Join(home, linkDir))
-			os.Remove(home)
-			patherr, ok := err.(*os.PathError)
-			if ok && patherr.Err == syscall.ENOSPC {
-				return nil, err
-			}
-			return nil, errors.Wrap(err, "kernel does not support overlay fs")
-		}
-		usingMetacopy, err = doesMetacopy(home, opts.mountOptions)
+		feature := "overlay"
+		overlayCacheResult, overlayCacheText, err := cachedFeatureCheck(runhome, feature)
 		if err == nil {
-			if usingMetacopy {
-				logrus.Debugf("overlay test mount indicated that metacopy is being used")
+			if overlayCacheResult {
+				logrus.Debugf("cached value indicated that overlay is supported")
 			} else {
-				logrus.Debugf("overlay test mount indicated that metacopy is not being used")
+				logrus.Debugf("cached value indicated that overlay is not supported")
+			}
+			supportsDType = overlayCacheResult
+			if !supportsDType {
+				return nil, errors.New(overlayCacheText)
 			}
 		} else {
-			logrus.Warnf("overlay test mount did not indicate whether or not metacopy is being used: %v", err)
-			return nil, err
+			supportsDType, err = supportsOverlay(home, fsMagic, rootUID, rootGID)
+			if err != nil {
+				os.Remove(filepath.Join(home, linkDir))
+				os.Remove(home)
+				patherr, ok := err.(*os.PathError)
+				if ok && patherr.Err == syscall.ENOSPC {
+					return nil, err
+				}
+				err = errors.Wrap(err, "kernel does not support overlay fs")
+				if err2 := cachedFeatureRecord(runhome, feature, false, err.Error()); err2 != nil {
+					return nil, errors.Wrapf(err2, "error recording overlay not being supported (%v)", err)
+				}
+				return nil, err
+			}
+			if err = cachedFeatureRecord(runhome, feature, supportsDType, ""); err != nil {
+				return nil, errors.Wrap(err, "error recording overlay support status")
+			}
+		}
+
+		feature = fmt.Sprintf("metacopy(%s)", opts.mountOptions)
+		metacopyCacheResult, _, err := cachedFeatureCheck(runhome, feature)
+		if err == nil {
+			if metacopyCacheResult {
+				logrus.Debugf("cached value indicated that metacopy is being used")
+			} else {
+				logrus.Debugf("cached value indicated that metacopy is not being used")
+			}
+			usingMetacopy = metacopyCacheResult
+		} else {
+			usingMetacopy, err = doesMetacopy(home, opts.mountOptions)
+			if err == nil {
+				if usingMetacopy {
+					logrus.Debugf("overlay test mount indicated that metacopy is being used")
+				} else {
+					logrus.Debugf("overlay test mount indicated that metacopy is not being used")
+				}
+				if err = cachedFeatureRecord(runhome, feature, usingMetacopy, ""); err != nil {
+					return nil, errors.Wrap(err, "error recording metacopy-being-used status")
+				}
+			} else {
+				logrus.Warnf("overlay test mount did not indicate whether or not metacopy is being used: %v", err)
+				return nil, err
+			}
 		}
 	}

@ -201,8 +243,9 @@ func Init(home string, options []string, uidMaps, gidMaps []idtools.IDMap) (grap
 	d := &Driver{
 		name:          "overlay",
 		home:          home,
-		uidMaps:       uidMaps,
-		gidMaps:       gidMaps,
+		runhome:       runhome,
+		uidMaps:       options.UIDMaps,
+		gidMaps:       options.GIDMaps,
 		ctr:           graphdriver.NewRefCounter(graphdriver.NewFsChecker(graphdriver.FsMagicOverlay)),
 		supportsDType: supportsDType,
 		usingMetacopy: usingMetacopy,
@ -211,8 +254,7 @@ func Init(home string, options []string, uidMaps, gidMaps []idtools.IDMap) (grap
 		convert:       make(map[string]bool),
 	}

-	d.naiveDiff = graphdriver.NewNaiveDiffDriver(d, d)
-
+	d.naiveDiff = graphdriver.NewNaiveDiffDriver(d, graphdriver.NewNaiveLayerIDMapUpdater(d))
 	if backingFs == "xfs" {
 		// Try to enable project quota support over xfs.
 		if d.quotaCtl, err = quota.NewControl(home); err == nil {
@ -280,6 +322,12 @@ func parseOptions(options []string) (*overlayOptions, error) {
 				return nil, fmt.Errorf("overlay: ostree_repo specified but support for ostree is missing")
 			}
 			o.ostreeRepo = val
+		case ".ignore_chown_errors", "overlay2.ignore_chown_errors", "overlay.ignore_chown_errors":
+			logrus.Debugf("overlay: ignore_chown_errors=%s", val)
+			o.ignoreChownErrors, err = strconv.ParseBool(val)
+			if err != nil {
+				return nil, err
+			}
 		case "overlay2.skip_mount_home", "overlay.skip_mount_home", ".skip_mount_home":
 			logrus.Debugf("overlay: skip_mount_home=%s", val)
 			o.skipMountHome, err = strconv.ParseBool(val)
@ -293,6 +341,36 @@ func parseOptions(options []string) (*overlayOptions, error) {
 	return o, nil
 }

+func cachedFeatureSet(feature string, set bool) string {
+	if set {
+		return fmt.Sprintf("%s-true", feature)
+	}
+	return fmt.Sprintf("%s-false", feature)
+}
+
+func cachedFeatureCheck(runhome, feature string) (supported bool, text string, err error) {
+	content, err := ioutil.ReadFile(filepath.Join(runhome, cachedFeatureSet(feature, true)))
+	if err == nil {
+		return true, string(content), nil
+	}
+	content, err = ioutil.ReadFile(filepath.Join(runhome, cachedFeatureSet(feature, false)))
+	if err == nil {
+		return false, string(content), nil
+	}
+	return false, "", err
+}
+
+func cachedFeatureRecord(runhome, feature string, supported bool, text string) (err error) {
+	f, err := os.Create(filepath.Join(runhome, cachedFeatureSet(feature, supported)))
+	if f != nil {
+		if text != "" {
+			fmt.Fprintf(f, "%s", text)
+		}
+		f.Close()
+	}
+	return err
+}
+
 func supportsOverlay(home string, homeMagic graphdriver.FsMagic, rootUID, rootGID int) (supportsDType bool, err error) {
 	// We can try to modprobe overlay first

@ -369,10 +447,24 @@ func (d *Driver) useNaiveDiff() bool {
 			useNaiveDiffOnly = true
 			return
 		}
+		feature := fmt.Sprintf("native-diff(%s)", d.options.mountOptions)
+		nativeDiffCacheResult, nativeDiffCacheText, err := cachedFeatureCheck(d.runhome, feature)
+		if err == nil {
+			if nativeDiffCacheResult {
+				logrus.Debugf("cached value indicated that native-diff is usable")
+			} else {
+				logrus.Debugf("cached value indicated that native-diff is not being used")
+				logrus.Warn(nativeDiffCacheText)
+			}
+			useNaiveDiffOnly = !nativeDiffCacheResult
+			return
+		}
 		if err := doesSupportNativeDiff(d.home, d.options.mountOptions); err != nil {
-			logrus.Warnf("Not using native diff for overlay, this may cause degraded performance for building images: %v", err)
+			nativeDiffCacheText = fmt.Sprintf("Not using native diff for overlay, this may cause degraded performance for building images: %v", err)
+			logrus.Warn(nativeDiffCacheText)
 			useNaiveDiffOnly = true
 		}
+		cachedFeatureRecord(d.runhome, feature, !useNaiveDiffOnly, nativeDiffCacheText)
 	})
 	return useNaiveDiffOnly
 }
@ -748,7 +840,6 @@ func (d *Driver) get(id string, disableShifting bool, options graphdriver.MountO
 		return "", err
 	}

-	diffDir := path.Join(dir, "diff")
 	lowers, err := ioutil.ReadFile(path.Join(dir, lowerFile))
 	if err != nil && !os.IsNotExist(err) {
 		return "", err
@ -824,7 +915,22 @@ func (d *Driver) get(id string, disableShifting bool, options graphdriver.MountO
 		relLowers = append(relLowers, path.Join(id, "empty"))
 	}

+	// user namespace requires this to move a directory from lower to upper.
+	rootUID, rootGID, err := idtools.GetRootUIDGID(d.uidMaps, d.gidMaps)
+	if err != nil {
+		return "", err
+	}
+
+	diffDir := path.Join(dir, "diff")
+	if err := idtools.MkdirAs(diffDir, 0755, rootUID, rootGID); err != nil && !os.IsExist(err) {
+		return "", err
+	}
+
 	mergedDir := path.Join(dir, "merged")
+	// Create the driver merged dir
+	if err := idtools.MkdirAs(mergedDir, 0700, rootUID, rootGID); err != nil && !os.IsExist(err) {
+		return "", err
+	}
 	if count := d.ctr.Increment(mergedDir); count > 1 {
 		return mergedDir, nil
 	}
@ -893,17 +999,6 @@ func (d *Driver) get(id string, disableShifting bool, options graphdriver.MountO
 		return "", fmt.Errorf("error creating overlay mount to %s: %v", mountTarget, err)
 	}

-	// chown "workdir/work" to the remapped root UID/GID. Overlay fs inside a
-	// user namespace requires this to move a directory from lower to upper.
-	rootUID, rootGID, err := idtools.GetRootUIDGID(d.uidMaps, d.gidMaps)
-	if err != nil {
-		return "", err
-	}
-
-	if err := os.Chown(path.Join(workDir, "work"), rootUID, rootGID); err != nil {
-		return "", err
-	}
-
 	return mergedDir, nil
 }

@ -915,7 +1010,7 @@ func (d *Driver) Put(id string) error {
 	if _, err := os.Stat(dir); err != nil {
 		return err
 	}
-	mountpoint := path.Join(d.dir(id), "merged")
+	mountpoint := path.Join(dir, "merged")
 	if count := d.ctr.Decrement(mountpoint); count > 0 {
 		return nil
 	}
@ -925,6 +1020,11 @@ func (d *Driver) Put(id string) error {
 	if err := unix.Unmount(mountpoint, unix.MNT_DETACH); err != nil {
 		logrus.Debugf("Failed to unmount %s overlay: %s - %v", id, mountpoint, err)
 	}
+
+	if err := unix.Rmdir(mountpoint); err != nil && !os.IsNotExist(err) {
+		logrus.Debugf("Failed to remove mountpoint %s overlay: %s - %v", id, mountpoint, err)
+	}
+
 	return nil
 }

@ -967,11 +1067,16 @@ func (d *Driver) getWhiteoutFormat() archive.WhiteoutFormat {
 }

 // ApplyDiff applies the new layer into a root
-func (d *Driver) ApplyDiff(id string, idMappings *idtools.IDMappings, parent string, mountLabel string, diff io.Reader) (size int64, err error) {
+func (d *Driver) ApplyDiff(id, parent string, options graphdriver.ApplyDiffOpts) (size int64, err error) {
+
 	if !d.isParent(id, parent) {
-		return d.naiveDiff.ApplyDiff(id, idMappings, parent, mountLabel, diff)
+		if d.options.ignoreChownErrors {
+			options.IgnoreChownErrors = d.options.ignoreChownErrors
+		}
+		return d.naiveDiff.ApplyDiff(id, parent, options)
 	}

+	idMappings := options.Mappings
 	if idMappings == nil {
 		idMappings = &idtools.IDMappings{}
 	}
@ -980,11 +1085,12 @@ func (d *Driver) ApplyDiff(id string, idMappings *idtools.IDMappings, parent str

 	logrus.Debugf("Applying tar in %s", applyDir)
 	// Overlay doesn't need the parent id to apply the diff
-	if err := untar(diff, applyDir, &archive.TarOptions{
-		UIDMaps:        idMappings.UIDs(),
-		GIDMaps:        idMappings.GIDs(),
-		WhiteoutFormat: d.getWhiteoutFormat(),
-		InUserNS:       rsystem.RunningInUserNS(),
+	if err := untar(options.Diff, applyDir, &archive.TarOptions{
+		UIDMaps:           idMappings.UIDs(),
+		GIDMaps:           idMappings.GIDs(),
+		IgnoreChownErrors: d.options.ignoreChownErrors,
+		WhiteoutFormat:    d.getWhiteoutFormat(),
+		InUserNS:          rsystem.RunningInUserNS(),
 	}); err != nil {
 		return 0, err
 	}
--- a/vendor/github.com/containers/storage/drivers/quota/projectquota.go
+++ b/vendor/github.com/containers/storage/drivers/quota/projectquota.go
@ -1,4 +1,4 @@
-// +build linux
+// +build linux,!exclude_disk_quota

 //
 // projectquota.go - implements XFS project quota controls
--- a/vendor/github.com/containers/storage/drivers/quota/projectquota_unsupported.go
+++ b/vendor/github.com/containers/storage/drivers/quota/projectquota_unsupported.go
@ -0,0 +1,32 @@
+// +build linux,exclude_disk_quota
+
+package quota
+
+import (
+	"github.com/pkg/errors"
+)
+
+// Quota limit params - currently we only control blocks hard limit
+type Quota struct {
+	Size uint64
+}
+
+// Control - Context to be used by storage driver (e.g. overlay)
+// who wants to apply project quotas to container dirs
+type Control struct {
+}
+
+func NewControl(basePath string) (*Control, error) {
+	return nil, errors.New("filesystem does not support, or has not enabled quotas")
+}
+
+// SetQuota - assign a unique project id to directory and set the quota limits
+// for that project id
+func (q *Control) SetQuota(targetPath string, quota Quota) error {
+	return errors.New("filesystem does not support, or has not enabled quotas")
+}
+
+// GetQuota - get the quota limits of a directory that was configured with SetQuota
+func (q *Control) GetQuota(targetPath string, quota *Quota) error {
+	return errors.New("filesystem does not support, or has not enabled quotas")
+}
--- a/vendor/github.com/containers/storage/drivers/template.go
+++ b/vendor/github.com/containers/storage/drivers/template.go
@ -35,7 +35,14 @@ func NaiveCreateFromTemplate(d TemplateDriver, id, template string, templateIDMa
 		}
 		return err
 	}
-	if _, err = d.ApplyDiff(id, templateIDMappings, parent, opts.MountLabel, diff); err != nil {
+
+	applyOptions := ApplyDiffOpts{
+		Diff:              diff,
+		Mappings:          templateIDMappings,
+		MountLabel:        opts.MountLabel,
+		IgnoreChownErrors: opts.ignoreChownErrors,
+	}
+	if _, err = d.ApplyDiff(id, parent, applyOptions); err != nil {
 		if err2 := d.Remove(id); err2 != nil {
 			logrus.Errorf("error removing layer %q: %v", id, err2)
 		}
--- a/vendor/github.com/containers/storage/drivers/vfs/driver.go
+++ b/vendor/github.com/containers/storage/drivers/vfs/driver.go
@ -2,15 +2,20 @@ package vfs

 import (
 	"fmt"
+	"io"
 	"os"
 	"path/filepath"
+	"strconv"
 	"strings"

 	"github.com/containers/storage/drivers"
+	"github.com/containers/storage/pkg/archive"
 	"github.com/containers/storage/pkg/idtools"
 	"github.com/containers/storage/pkg/ostree"
+	"github.com/containers/storage/pkg/parsers"
 	"github.com/containers/storage/pkg/system"
 	"github.com/opencontainers/selinux/go-selinux/label"
+	"github.com/sirupsen/logrus"
 )

 var (
@ -24,42 +29,48 @@ func init() {

 // Init returns a new VFS driver.
 // This sets the home directory for the driver and returns NaiveDiffDriver.
-func Init(home string, options []string, uidMaps, gidMaps []idtools.IDMap) (graphdriver.Driver, error) {
+func Init(home string, options graphdriver.Options) (graphdriver.Driver, error) {
 	d := &Driver{
+		name:       "vfs",
 		homes:      []string{home},
-		idMappings: idtools.NewIDMappingsFromMaps(uidMaps, gidMaps),
+		idMappings: idtools.NewIDMappingsFromMaps(options.UIDMaps, options.GIDMaps),
 	}
+
 	rootIDs := d.idMappings.RootPair()
 	if err := idtools.MkdirAllAndChown(home, 0700, rootIDs); err != nil {
 		return nil, err
 	}
-	for _, option := range options {
-		if strings.HasPrefix(option, "vfs.imagestore=") {
-			d.homes = append(d.homes, strings.Split(option[15:], ",")...)
-			continue
+	for _, option := range options.DriverOptions {
+
+		key, val, err := parsers.ParseKeyValueOpt(option)
+		if err != nil {
+			return nil, err
 		}
-		if strings.HasPrefix(option, ".imagestore=") {
-			d.homes = append(d.homes, strings.Split(option[12:], ",")...)
+		key = strings.ToLower(key)
+		switch key {
+		case "vfs.imagestore", ".imagestore":
+			d.homes = append(d.homes, strings.Split(val, ",")...)
 			continue
-		}
-		if strings.HasPrefix(option, "vfs.ostree_repo=") {
+		case "vfs.ostree_repo", ".ostree_repo":
 			if !ostree.OstreeSupport() {
 				return nil, fmt.Errorf("vfs: ostree_repo specified but support for ostree is missing")
 			}
-			d.ostreeRepo = option[16:]
-		}
-		if strings.HasPrefix(option, ".ostree_repo=") {
-			if !ostree.OstreeSupport() {
-				return nil, fmt.Errorf("vfs: ostree_repo specified but support for ostree is missing")
-			}
-			d.ostreeRepo = option[13:]
-		}
-		if strings.HasPrefix(option, "vfs.mountopt=") {
+			d.ostreeRepo = val
+		case "vfs.mountopt":
 			return nil, fmt.Errorf("vfs driver does not support mount options")
+		case ".ignore_chown_errors", "vfs.ignore_chown_errors":
+			logrus.Debugf("vfs: ignore_chown_errors=%s", val)
+			var err error
+			d.ignoreChownErrors, err = strconv.ParseBool(val)
+			if err != nil {
+				return nil, err
+			}
+		default:
+			return nil, fmt.Errorf("vfs driver does not support %s options", key)
 		}
 	}
 	if d.ostreeRepo != "" {
-		rootUID, rootGID, err := idtools.GetRootUIDGID(uidMaps, gidMaps)
+		rootUID, rootGID, err := idtools.GetRootUIDGID(options.UIDMaps, options.GIDMaps)
 		if err != nil {
 			return nil, err
 		}
@ -67,7 +78,10 @@ func Init(home string, options []string, uidMaps, gidMaps []idtools.IDMap) (grap
 			return nil, err
 		}
 	}
-	return graphdriver.NewNaiveDiffDriver(d, graphdriver.NewNaiveLayerIDMapUpdater(d)), nil
+	d.updater = graphdriver.NewNaiveLayerIDMapUpdater(d)
+	d.naiveDiff = graphdriver.NewNaiveDiffDriver(d, d.updater)
+
+	return d, nil
 }

 // Driver holds information about the driver, home directory of the driver.
@ -75,9 +89,13 @@ func Init(home string, options []string, uidMaps, gidMaps []idtools.IDMap) (grap
 // In order to support layering, files are copied from the parent layer into the new layer. There is no copy-on-write support.
 // Driver must be wrapped in NaiveDiffDriver to be used as a graphdriver.Driver
 type Driver struct {
-	homes      []string
-	idMappings *idtools.IDMappings
-	ostreeRepo string
+	name              string
+	homes             []string
+	idMappings        *idtools.IDMappings
+	ostreeRepo        string
+	ignoreChownErrors bool
+	naiveDiff         graphdriver.DiffDriver
+	updater           graphdriver.LayerIDMapUpdater
 }

 func (d *Driver) String() string {
@ -107,6 +125,14 @@ func (d *Driver) CreateFromTemplate(id, template string, templateIDMappings *idt
 	return d.Create(id, template, opts)
 }

+// ApplyDiff applies the new layer into a root
+func (d *Driver) ApplyDiff(id, parent string, options graphdriver.ApplyDiffOpts) (size int64, err error) {
+	if d.ignoreChownErrors {
+		options.IgnoreChownErrors = d.ignoreChownErrors
+	}
+	return d.naiveDiff.ApplyDiff(id, parent, options)
+}
+
 // CreateReadWrite creates a layer that is writable for use as a container
 // file system.
 func (d *Driver) CreateReadWrite(id, parent string, opts *graphdriver.CreateOpts) error {
@ -118,7 +144,7 @@ func (d *Driver) Create(id, parent string, opts *graphdriver.CreateOpts) error {
 	return d.create(id, parent, opts, true)
 }

-func (d *Driver) create(id, parent string, opts *graphdriver.CreateOpts, ro bool) error {
+func (d *Driver) create(id, parent string, opts *graphdriver.CreateOpts, ro bool) (retErr error) {
 	if opts != nil && len(opts.StorageOpt) != 0 {
 		return fmt.Errorf("--storage-opt is not supported for vfs")
 	}
@ -133,6 +159,13 @@ func (d *Driver) create(id, parent string, opts *graphdriver.CreateOpts, ro bool
 	if err := idtools.MkdirAllAndChown(filepath.Dir(dir), 0700, rootIDs); err != nil {
 		return err
 	}
+
+	defer func() {
+		if retErr != nil {
+			os.RemoveAll(dir)
+		}
+	}()
+
 	if parent != "" {
 		st, err := system.Stat(d.dir(parent))
 		if err != nil {
@ -224,3 +257,33 @@ func (d *Driver) AdditionalImageStores() []string {
 	}
 	return nil
 }
+
+// SupportsShifting tells whether the driver support shifting of the UIDs/GIDs in an userNS
+func (d *Driver) SupportsShifting() bool {
+	return d.updater.SupportsShifting()
+}
+
+// UpdateLayerIDMap updates ID mappings in a from matching the ones specified
+// by toContainer to those specified by toHost.
+func (d *Driver) UpdateLayerIDMap(id string, toContainer, toHost *idtools.IDMappings, mountLabel string) error {
+	return d.updater.UpdateLayerIDMap(id, toContainer, toHost, mountLabel)
+}
+
+// Changes produces a list of changes between the specified layer
+// and its parent layer. If parent is "", then all changes will be ADD changes.
+func (d *Driver) Changes(id string, idMappings *idtools.IDMappings, parent string, parentMappings *idtools.IDMappings, mountLabel string) ([]archive.Change, error) {
+	return d.naiveDiff.Changes(id, idMappings, parent, parentMappings, mountLabel)
+}
+
+// Diff produces an archive of the changes between the specified
+// layer and its parent layer which may be "".
+func (d *Driver) Diff(id string, idMappings *idtools.IDMappings, parent string, parentMappings *idtools.IDMappings, mountLabel string) (io.ReadCloser, error) {
+	return d.naiveDiff.Diff(id, idMappings, parent, parentMappings, mountLabel)
+}
+
+// DiffSize calculates the changes between the specified id
+// and its parent and returns the size in bytes of the changes
+// relative to its base filesystem directory.
+func (d *Driver) DiffSize(id string, idMappings *idtools.IDMappings, parent string, parentMappings *idtools.IDMappings, mountLabel string) (size int64, err error) {
+	return d.naiveDiff.DiffSize(id, idMappings, parent, parentMappings, mountLabel)
+}
--- a/vendor/github.com/containers/storage/drivers/windows/windows.go
+++ b/vendor/github.com/containers/storage/drivers/windows/windows.go
@ -83,10 +83,10 @@ type Driver struct {
 }

 // InitFilter returns a new Windows storage filter driver.
-func InitFilter(home string, options []string, uidMaps, gidMaps []idtools.IDMap) (graphdriver.Driver, error) {
+func InitFilter(home string, options graphdriver.Options) (graphdriver.Driver, error) {
 	logrus.Debugf("WindowsGraphDriver InitFilter at %s", home)

-	for _, option := range options {
+	for _, option := range options.DriverOptions {
 		if strings.HasPrefix(option, "windows.mountopt=") {
 			return nil, fmt.Errorf("windows driver does not support mount options")
 		} else {
@ -581,7 +581,7 @@ func (d *Driver) Changes(id string, idMappings *idtools.IDMappings, parent strin
 // layer with the specified id and parent, returning the size of the
 // new layer in bytes.
 // The layer should not be mounted when calling this function
-func (d *Driver) ApplyDiff(id string, idMappings *idtools.IDMappings, parent, mountLabel string, diff io.Reader) (int64, error) {
+func (d *Driver) ApplyDiff(id, parent string, options graphdriver.ApplyDiffOpts) (int64, error) {
 	panicIfUsedByLcow()
 	var layerChain []string
 	if parent != "" {
@ -601,7 +601,7 @@ func (d *Driver) ApplyDiff(id string, idMappings *idtools.IDMappings, parent, mo
 		layerChain = append(layerChain, parentChain...)
 	}

-	size, err := d.importLayer(id, diff, layerChain)
+	size, err := d.importLayer(id, options.Diff, layerChain)
 	if err != nil {
 		return 0, err
 	}
--- a/vendor/github.com/containers/storage/drivers/zfs/zfs.go
+++ b/vendor/github.com/containers/storage/drivers/zfs/zfs.go
@ -44,7 +44,7 @@ func (*Logger) Log(cmd []string) {
 // Init returns a new ZFS driver.
 // It takes base mount path and an array of options which are represented as key value pairs.
 // Each option is in the for key=value. 'zfs.fsname' is expected to be a valid key in the options.
-func Init(base string, opt []string, uidMaps, gidMaps []idtools.IDMap) (graphdriver.Driver, error) {
+func Init(base string, opt graphdriver.Options) (graphdriver.Driver, error) {
 	var err error

 	logger := logrus.WithField("storage-driver", "zfs")
@ -61,7 +61,7 @@ func Init(base string, opt []string, uidMaps, gidMaps []idtools.IDMap) (graphdri
 	}
 	defer file.Close()

-	options, err := parseOptions(opt)
+	options, err := parseOptions(opt.DriverOptions)
 	if err != nil {
 		return nil, err
 	}
@ -103,7 +103,7 @@ func Init(base string, opt []string, uidMaps, gidMaps []idtools.IDMap) (graphdri
 		return nil, fmt.Errorf("BUG: zfs get all -t filesystem -rHp '%s' should contain '%s'", options.fsName, options.fsName)
 	}

-	rootUID, rootGID, err := idtools.GetRootUIDGID(uidMaps, gidMaps)
+	rootUID, rootGID, err := idtools.GetRootUIDGID(opt.UIDMaps, opt.GIDMaps)
 	if err != nil {
 		return nil, fmt.Errorf("Failed to get root uid/guid: %v", err)
 	}
@ -115,8 +115,8 @@ func Init(base string, opt []string, uidMaps, gidMaps []idtools.IDMap) (graphdri
 		dataset:          rootDataset,
 		options:          options,
 		filesystemsCache: filesystemsCache,
-		uidMaps:          uidMaps,
-		gidMaps:          gidMaps,
+		uidMaps:          opt.UIDMaps,
+		gidMaps:          opt.GIDMaps,
 		ctr:              graphdriver.NewRefCounter(graphdriver.NewDefaultChecker()),
 	}
 	return graphdriver.NewNaiveDiffDriver(d, graphdriver.NewNaiveLayerIDMapUpdater(d)), nil
--- a/vendor/github.com/containers/storage/ffjson_deps.go
+++ b/vendor/github.com/containers/storage/ffjson_deps.go
@ -0,0 +1,10 @@
+package storage
+
+// NOTE: this is a hack to trick go modules into vendoring the below
+//       dependencies.  Those are required during ffjson generation
+//       but do NOT end up in the final file.
+
+import (
+	_ "github.com/pquerna/ffjson/inception" // nolint:typecheck
+	_ "github.com/pquerna/ffjson/shared"    // nolint:typecheck
+)
--- a/vendor/github.com/containers/storage/go.mod
+++ b/vendor/github.com/containers/storage/go.mod
@ -0,0 +1,29 @@
+module github.com/containers/storage
+
+require (
+	github.com/BurntSushi/toml v0.3.1
+	github.com/DataDog/zstd v1.4.0 // indirect
+	github.com/Microsoft/go-winio v0.4.12
+	github.com/Microsoft/hcsshim v0.8.6
+	github.com/docker/docker v0.0.0-20171019062838-86f080cff091
+	github.com/docker/go-units v0.4.0
+	github.com/klauspost/compress v1.7.2
+	github.com/klauspost/cpuid v1.2.1 // indirect
+	github.com/klauspost/pgzip v1.2.1
+	github.com/mattn/go-shellwords v1.0.5
+	github.com/mistifyio/go-zfs v2.1.1+incompatible
+	github.com/opencontainers/go-digest v1.0.0-rc1
+	github.com/opencontainers/runc v1.0.0-rc8
+	github.com/opencontainers/selinux v1.2.2
+	github.com/ostreedev/ostree-go v0.0.0-20190702140239-759a8c1ac913
+	github.com/pkg/errors v0.8.1
+	github.com/pquerna/ffjson v0.0.0-20181028064349-e517b90714f7
+	github.com/sirupsen/logrus v1.4.2
+	github.com/stretchr/testify v1.3.0
+	github.com/syndtr/gocapability v0.0.0-20180916011248-d98352740cb2
+	github.com/tchap/go-patricia v2.3.0+incompatible
+	github.com/vbatts/tar-split v0.11.1
+	golang.org/x/net v0.0.0-20190628185345-da137c7871d7
+	golang.org/x/sys v0.0.0-20190626221950-04f50cda93cb
+	gotest.tools v0.0.0-20190624233834-05ebafbffc79
+)
--- a/vendor/github.com/containers/storage/go.sum
+++ b/vendor/github.com/containers/storage/go.sum
@ -0,0 +1,71 @@
+github.com/BurntSushi/toml v0.3.1 h1:WXkYYl6Yr3qBf1K79EBnL4mak0OimBfB0XUf9Vl28OQ=
+github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
+github.com/DataDog/zstd v1.4.0 h1:vhoV+DUHnRZdKW1i5UMjAk2G4JY8wN4ayRfYDNdEhwo=
+github.com/DataDog/zstd v1.4.0/go.mod h1:1jcaCB/ufaK+sKp1NBhlGmpz41jOoPQ35bpF36t7BBo=
+github.com/Microsoft/go-winio v0.4.12 h1:xAfWHN1IrQ0NJ9TBC0KBZoqLjzDTr1ML+4MywiUOryc=
+github.com/Microsoft/go-winio v0.4.12/go.mod h1:VhR8bwka0BXejwEJY73c50VrPtXAaKcyvVC4A4RozmA=
+github.com/Microsoft/hcsshim v0.8.6 h1:ZfF0+zZeYdzMIVMZHKtDKJvLHj76XCuVae/jNkjj0IA=
+github.com/Microsoft/hcsshim v0.8.6/go.mod h1:Op3hHsoHPAvb6lceZHDtd9OkTew38wNoXnJs8iY7rUg=
+github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
+github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
+github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
+github.com/docker/docker v0.0.0-20171019062838-86f080cff091 h1:QpxpTw4MJeOzbC7X00IFxnZhZx8oDOqXMrMAHiwNn54=
+github.com/docker/docker v0.0.0-20171019062838-86f080cff091/go.mod h1:eEKB0N0r5NX/I1kEveEz05bcu8tLC/8azJZsviup8Sk=
+github.com/docker/go-units v0.4.0 h1:3uh0PgVws3nIA0Q+MwDC8yjEPf9zjRfZZWXZYDct3Tw=
+github.com/docker/go-units v0.4.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk=
+github.com/google/go-cmp v0.2.0 h1:+dTQ8DZQJz0Mb/HjFlkptS1FeQ4cWSnN941F8aEG4SQ=
+github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M=
+github.com/klauspost/compress v1.7.2 h1:liMOoeIvFpr9kEvalrZ7VVBA4wGf7zfOgwBjzz/5g2Y=
+github.com/klauspost/compress v1.7.2/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A=
+github.com/klauspost/cpuid v1.2.1 h1:vJi+O/nMdFt0vqm8NZBI6wzALWdA2X+egi0ogNyrC/w=
+github.com/klauspost/cpuid v1.2.1/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek=
+github.com/klauspost/pgzip v1.2.1 h1:oIPZROsWuPHpOdMVWLuJZXwgjhrW8r1yEX8UqMyeNHM=
+github.com/klauspost/pgzip v1.2.1/go.mod h1:Ch1tH69qFZu15pkjo5kYi6mth2Zzwzt50oCQKQE9RUs=
+github.com/konsorten/go-windows-terminal-sequences v1.0.1 h1:mweAR1A6xJ3oS2pRaGiHgQ4OO8tzTaLawm8vnODuwDk=
+github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
+github.com/mattn/go-shellwords v1.0.5 h1:JhhFTIOslh5ZsPrpa3Wdg8bF0WI3b44EMblmU9wIsXc=
+github.com/mattn/go-shellwords v1.0.5/go.mod h1:3xCvwCdWdlDJUrvuMn7Wuy9eWs4pE8vqg+NOMyg4B2o=
+github.com/mistifyio/go-zfs v2.1.1+incompatible h1:gAMO1HM9xBRONLHHYnu5iFsOJUiJdNZo6oqSENd4eW8=
+github.com/mistifyio/go-zfs v2.1.1+incompatible/go.mod h1:8AuVvqP/mXw1px98n46wfvcGfQ4ci2FwoAjKYxuo3Z4=
+github.com/opencontainers/go-digest v1.0.0-rc1 h1:WzifXhOVOEOuFYOJAW6aQqW0TooG2iki3E3Ii+WN7gQ=
+github.com/opencontainers/go-digest v1.0.0-rc1/go.mod h1:cMLVZDEM3+U2I4VmLI6N8jQYUd2OVphdqWwCJHrFt2s=
+github.com/opencontainers/runc v0.1.1 h1:GlxAyO6x8rfZYN9Tt0Kti5a/cP41iuiO2yYT0IJGY8Y=
+github.com/opencontainers/runc v0.1.1/go.mod h1:qT5XzbpPznkRYVz/mWwUaVBUv2rmF59PVA73FjuZG0U=
+github.com/opencontainers/runc v1.0.0-rc8 h1:dDCFes8Hj1r/i5qnypONo5jdOme/8HWZC/aNDyhECt0=
+github.com/opencontainers/runc v1.0.0-rc8/go.mod h1:qT5XzbpPznkRYVz/mWwUaVBUv2rmF59PVA73FjuZG0U=
+github.com/opencontainers/selinux v1.2.2 h1:Kx9J6eDG5/24A6DtUquGSpJQ+m2MUTahn4FtGEe8bFg=
+github.com/opencontainers/selinux v1.2.2/go.mod h1:+BLncwf63G4dgOzykXAxcmnFlUaOlkDdmw/CqsW6pjs=
+github.com/ostreedev/ostree-go v0.0.0-20190702140239-759a8c1ac913 h1:TnbXhKzrTOyuvWrjI8W6pcoI9XPbLHFXCdN2dtUw7Rw=
+github.com/ostreedev/ostree-go v0.0.0-20190702140239-759a8c1ac913/go.mod h1:J6OG6YJVEWopen4avK3VNQSnALmmjvniMmni/YFYAwc=
+github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
+github.com/pkg/errors v0.8.1 h1:iURUrRGxPUNPdy5/HRSm+Yj6okJ6UtLINN0Q9M4+h3I=
+github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
+github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
+github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
+github.com/pquerna/ffjson v0.0.0-20181028064349-e517b90714f7 h1:gGBSHPOU7g8YjTbhwn+lvFm2VDEhhA+PwDIlstkgSxE=
+github.com/pquerna/ffjson v0.0.0-20181028064349-e517b90714f7/go.mod h1:YARuvh7BUWHNhzDq2OM5tzR2RiCcN2D7sapiKyCel/M=
+github.com/sirupsen/logrus v1.4.2 h1:SPIRibHv4MatM3XXNO2BJeFLZwZ2LvZgfQ5+UNI2im4=
+github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE=
+github.com/spf13/pflag v1.0.3/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4=
+github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
+github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
+github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
+github.com/stretchr/testify v1.3.0 h1:TivCn/peBQ7UY8ooIcPgZFpTNSz0Q2U6UrFlUfqbe0Q=
+github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
+github.com/syndtr/gocapability v0.0.0-20180916011248-d98352740cb2 h1:b6uOv7YOFK0TYG7HtkIgExQo+2RdLuwRft63jn2HWj8=
+github.com/syndtr/gocapability v0.0.0-20180916011248-d98352740cb2/go.mod h1:hkRG7XYTFWNJGYcbNJQlaLq0fg1yr4J4t/NcTQtrfww=
+github.com/tchap/go-patricia v2.3.0+incompatible h1:GkY4dP3cEfEASBPPkWd+AmjYxhmDkqO9/zg7R0lSQRs=
+github.com/tchap/go-patricia v2.3.0+incompatible/go.mod h1:bmLyhP68RS6kStMGxByiQ23RP/odRBOTVjwp2cDyi6I=
+github.com/vbatts/tar-split v0.11.1 h1:0Odu65rhcZ3JZaPHxl7tCI3V/C/Q9Zf82UFravl02dE=
+github.com/vbatts/tar-split v0.11.1/go.mod h1:LEuURwDEiWjRjwu46yU3KVGuUdVv/dcnpcEPSzR8z6g=
+golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
+golang.org/x/net v0.0.0-20190628185345-da137c7871d7 h1:rTIdg5QFRR7XCaK4LCjBiPbx8j4DQRpdYMnGn/bJUEU=
+golang.org/x/net v0.0.0-20190628185345-da137c7871d7/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
+golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
+golang.org/x/sys v0.0.0-20190422165155-953cdadca894/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20190626221950-04f50cda93cb h1:fgwFCsaw9buMuxNd6+DQfAuSFqbNiQZpcgJQAgJsK6k=
+golang.org/x/sys v0.0.0-20190626221950-04f50cda93cb/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
+golang.org/x/tools v0.0.0-20180810170437-e96c4e24768d/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
+gotest.tools v0.0.0-20190624233834-05ebafbffc79 h1:C+K4iPg1rIvmCf4JjelkbWv2jeWevEwp05Lz8XfTYgE=
+gotest.tools v0.0.0-20190624233834-05ebafbffc79/go.mod h1:R//lfYlUuTOTfblYI3lGoAAAebUdzjvbmQsuB7Ykd90=
--- a/vendor/github.com/containers/storage/images.go
+++ b/vendor/github.com/containers/storage/images.go
@ -338,7 +338,7 @@ func newROImageStore(dir string) (ROImageStore, error) {
 	if err != nil {
 		return nil, err
 	}
-	lockfile.Lock()
+	lockfile.RLock()
 	defer lockfile.Unlock()
 	istore := imageStore{
 		lockfile: lockfile,
--- a/vendor/github.com/containers/storage/layers.go
+++ b/vendor/github.com/containers/storage/layers.go
@ -7,9 +7,11 @@ import (
 	"io"
 	"io/ioutil"
 	"os"
+	"path"
 	"path/filepath"
 	"reflect"
 	"sort"
+	"strings"
 	"time"

 	drivers "github.com/containers/storage/drivers"
@ -18,11 +20,13 @@ import (
 	"github.com/containers/storage/pkg/ioutils"
 	"github.com/containers/storage/pkg/stringid"
 	"github.com/containers/storage/pkg/system"
+	"github.com/containers/storage/pkg/tarlog"
 	"github.com/containers/storage/pkg/truncindex"
 	"github.com/klauspost/pgzip"
 	digest "github.com/opencontainers/go-digest"
 	"github.com/opencontainers/selinux/go-selinux/label"
 	"github.com/pkg/errors"
+	"github.com/vbatts/tar-split/archive/tar"
 	"github.com/vbatts/tar-split/tar/asm"
 	"github.com/vbatts/tar-split/tar/storage"
 )
@ -96,6 +100,12 @@ type Layer struct {
 	// that was last passed to ApplyDiff() or Put().
 	CompressionType archive.Compression `json:"compression,omitempty"`

+	// UIDs and GIDs are lists of UIDs and GIDs used in the layer.  This
+	// field is only populated (i.e., will only contain one or more
+	// entries) if the layer was created using ApplyDiff() or Put().
+	UIDs []uint32 `json:"uidset,omitempty"`
+	GIDs []uint32 `json:"gidset,omitempty"`
+
 	// Flags is arbitrary data about the layer.
 	Flags map[string]interface{} `json:"flags,omitempty"`

@ -298,7 +308,7 @@ func (r *layerStore) Load() error {
 	names := make(map[string]*Layer)
 	compressedsums := make(map[digest.Digest][]string)
 	uncompressedsums := make(map[digest.Digest][]string)
-	if r.lockfile.IsReadWrite() {
+	if r.IsReadWrite() {
 		label.ClearLabels()
 	}
 	if err = json.Unmarshal(data, &layers); len(data) == 0 || err == nil {
@ -377,6 +387,19 @@ func (r *layerStore) loadMounts() error {
 	}
 	layerMounts := []layerMountPoint{}
 	if err = json.Unmarshal(data, &layerMounts); len(data) == 0 || err == nil {
+		// Clear all of our mount information.  If another process
+		// unmounted something, it (along with its zero count) won't
+		// have been encoded into the version of mountpoints.json that
+		// we're loading, so our count could fall out of sync with it
+		// if we don't, and if we subsequently change something else,
+		// we'd pass that error along to other process that reloaded
+		// the data after we saved it.
+		for _, layer := range r.layers {
+			layer.MountPoint = ""
+			layer.MountCount = 0
+		}
+		// All of the non-zero count values will have been encoded, so
+		// we reset the still-mounted ones based on the contents.
 		for _, mount := range layerMounts {
 			if mount.MountPoint != "" {
 				if layer, ok := r.lookup(mount.ID); ok {
@ -488,7 +511,7 @@ func newROLayerStore(rundir string, layerdir string, driver drivers.Driver) (ROL
 	if err != nil {
 		return nil, err
 	}
-	lockfile.Lock()
+	lockfile.RLock()
 	defer lockfile.Unlock()
 	rlstore := layerStore{
 		lockfile:       lockfile,
@ -1236,15 +1259,32 @@ func (r *layerStore) ApplyDiff(to string, diff io.Reader) (size int64, err error
 	}
 	uncompressedDigest := digest.Canonical.Digester()
 	uncompressedCounter := ioutils.NewWriteCounter(uncompressedDigest.Hash())
-	payload, err := asm.NewInputTarStream(io.TeeReader(uncompressed, uncompressedCounter), metadata, storage.NewDiscardFilePutter())
+	uidLog := make(map[uint32]struct{})
+	gidLog := make(map[uint32]struct{})
+	idLogger, err := tarlog.NewLogger(func(h *tar.Header) {
+		if !strings.HasPrefix(path.Base(h.Name), archive.WhiteoutPrefix) {
+			uidLog[uint32(h.Uid)] = struct{}{}
+			gidLog[uint32(h.Gid)] = struct{}{}
+		}
+	})
 	if err != nil {
 		return -1, err
 	}
-	size, err = r.driver.ApplyDiff(layer.ID, r.layerMappings(layer), layer.Parent, layer.MountLabel, payload)
+	payload, err := asm.NewInputTarStream(io.TeeReader(uncompressed, io.MultiWriter(uncompressedCounter, idLogger)), metadata, storage.NewDiscardFilePutter())
+	if err != nil {
+		return -1, err
+	}
+	options := drivers.ApplyDiffOpts{
+		Diff:       payload,
+		Mappings:   r.layerMappings(layer),
+		MountLabel: layer.MountLabel,
+	}
+	size, err = r.driver.ApplyDiff(layer.ID, layer.Parent, options)
 	if err != nil {
 		return -1, err
 	}
 	compressor.Close()
+	idLogger.Close()
 	if err == nil {
 		if err := os.MkdirAll(filepath.Dir(r.tspath(layer.ID)), 0700); err != nil {
 			return -1, err
@ -1279,6 +1319,20 @@ func (r *layerStore) ApplyDiff(to string, diff io.Reader) (size int64, err error
 	layer.UncompressedDigest = uncompressedDigest.Digest()
 	layer.UncompressedSize = uncompressedCounter.Count
 	layer.CompressionType = compression
+	layer.UIDs = make([]uint32, 0, len(uidLog))
+	for uid := range uidLog {
+		layer.UIDs = append(layer.UIDs, uid)
+	}
+	sort.Slice(layer.UIDs, func(i, j int) bool {
+		return layer.UIDs[i] < layer.UIDs[j]
+	})
+	layer.GIDs = make([]uint32, 0, len(gidLog))
+	for gid := range gidLog {
+		layer.GIDs = append(layer.GIDs, gid)
+	}
+	sort.Slice(layer.GIDs, func(i, j int) bool {
+		return layer.GIDs[i] < layer.GIDs[j]
+	})

 	err = r.Save()

--- a/vendor/github.com/containers/storage/layers_ffjson.go
+++ b/vendor/github.com/containers/storage/layers_ffjson.go
@ -315,6 +315,38 @@ func (j *Layer) MarshalJSONBuf(buf fflib.EncodingBuffer) error {
 		fflib.FormatBits2(buf, uint64(j.CompressionType), 10, j.CompressionType < 0)
 		buf.WriteByte(',')
 	}
+	if len(j.UIDs) != 0 {
+		buf.WriteString(`"uidset":`)
+		if j.UIDs != nil {
+			buf.WriteString(`[`)
+			for i, v := range j.UIDs {
+				if i != 0 {
+					buf.WriteString(`,`)
+				}
+				fflib.FormatBits2(buf, uint64(v), 10, false)
+			}
+			buf.WriteString(`]`)
+		} else {
+			buf.WriteString(`null`)
+		}
+		buf.WriteByte(',')
+	}
+	if len(j.GIDs) != 0 {
+		buf.WriteString(`"gidset":`)
+		if j.GIDs != nil {
+			buf.WriteString(`[`)
+			for i, v := range j.GIDs {
+				if i != 0 {
+					buf.WriteString(`,`)
+				}
+				fflib.FormatBits2(buf, uint64(v), 10, false)
+			}
+			buf.WriteString(`]`)
+		} else {
+			buf.WriteString(`null`)
+		}
+		buf.WriteByte(',')
+	}
 	if len(j.Flags) != 0 {
 		buf.WriteString(`"flags":`)
 		/* Falling back. type=map[string]interface {} kind=map */
@ -395,6 +427,10 @@ const (

 	ffjtLayerCompressionType

+	ffjtLayerUIDs
+
+	ffjtLayerGIDs
+
 	ffjtLayerFlags

 	ffjtLayerUIDMap
@ -424,6 +460,10 @@ var ffjKeyLayerUncompressedSize = []byte("diff-size")

 var ffjKeyLayerCompressionType = []byte("compression")

+var ffjKeyLayerUIDs = []byte("uidset")
+
+var ffjKeyLayerGIDs = []byte("gidset")
+
 var ffjKeyLayerFlags = []byte("flags")

 var ffjKeyLayerUIDMap = []byte("uidmap")
@ -537,7 +577,12 @@ mainparse:

 				case 'g':

-					if bytes.Equal(ffjKeyLayerGIDMap, kn) {
+					if bytes.Equal(ffjKeyLayerGIDs, kn) {
+						currentKey = ffjtLayerGIDs
+						state = fflib.FFParse_want_colon
+						goto mainparse
+
+					} else if bytes.Equal(ffjKeyLayerGIDMap, kn) {
 						currentKey = ffjtLayerGIDMap
 						state = fflib.FFParse_want_colon
 						goto mainparse
@ -582,7 +627,12 @@ mainparse:

 				case 'u':

-					if bytes.Equal(ffjKeyLayerUIDMap, kn) {
+					if bytes.Equal(ffjKeyLayerUIDs, kn) {
+						currentKey = ffjtLayerUIDs
+						state = fflib.FFParse_want_colon
+						goto mainparse
+
+					} else if bytes.Equal(ffjKeyLayerUIDMap, kn) {
 						currentKey = ffjtLayerUIDMap
 						state = fflib.FFParse_want_colon
 						goto mainparse
@ -608,6 +658,18 @@ mainparse:
 					goto mainparse
 				}

+				if fflib.EqualFoldRight(ffjKeyLayerGIDs, kn) {
+					currentKey = ffjtLayerGIDs
+					state = fflib.FFParse_want_colon
+					goto mainparse
+				}
+
+				if fflib.EqualFoldRight(ffjKeyLayerUIDs, kn) {
+					currentKey = ffjtLayerUIDs
+					state = fflib.FFParse_want_colon
+					goto mainparse
+				}
+
 				if fflib.EqualFoldRight(ffjKeyLayerCompressionType, kn) {
 					currentKey = ffjtLayerCompressionType
 					state = fflib.FFParse_want_colon
@ -724,6 +786,12 @@ mainparse:
 				case ffjtLayerCompressionType:
 					goto handle_CompressionType

+				case ffjtLayerUIDs:
+					goto handle_UIDs
+
+				case ffjtLayerGIDs:
+					goto handle_GIDs
+
 				case ffjtLayerFlags:
 					goto handle_Flags

@ -1092,6 +1160,162 @@ handle_CompressionType:
 	state = fflib.FFParse_after_value
 	goto mainparse

+handle_UIDs:
+
+	/* handler: j.UIDs type=[]uint32 kind=slice quoted=false*/
+
+	{
+
+		{
+			if tok != fflib.FFTok_left_brace && tok != fflib.FFTok_null {
+				return fs.WrapErr(fmt.Errorf("cannot unmarshal %s into Go value for ", tok))
+			}
+		}
+
+		if tok == fflib.FFTok_null {
+			j.UIDs = nil
+		} else {
+
+			j.UIDs = []uint32{}
+
+			wantVal := true
+
+			for {
+
+				var tmpJUIDs uint32
+
+				tok = fs.Scan()
+				if tok == fflib.FFTok_error {
+					goto tokerror
+				}
+				if tok == fflib.FFTok_right_brace {
+					break
+				}
+
+				if tok == fflib.FFTok_comma {
+					if wantVal == true {
+						// TODO(pquerna): this isn't an ideal error message, this handles
+						// things like [,,,] as an array value.
+						return fs.WrapErr(fmt.Errorf("wanted value token, but got token: %v", tok))
+					}
+					continue
+				} else {
+					wantVal = true
+				}
+
+				/* handler: tmpJUIDs type=uint32 kind=uint32 quoted=false*/
+
+				{
+					if tok != fflib.FFTok_integer && tok != fflib.FFTok_null {
+						return fs.WrapErr(fmt.Errorf("cannot unmarshal %s into Go value for uint32", tok))
+					}
+				}
+
+				{
+
+					if tok == fflib.FFTok_null {
+
+					} else {
+
+						tval, err := fflib.ParseUint(fs.Output.Bytes(), 10, 32)
+
+						if err != nil {
+							return fs.WrapErr(err)
+						}
+
+						tmpJUIDs = uint32(tval)
+
+					}
+				}
+
+				j.UIDs = append(j.UIDs, tmpJUIDs)
+
+				wantVal = false
+			}
+		}
+	}
+
+	state = fflib.FFParse_after_value
+	goto mainparse
+
+handle_GIDs:
+
+	/* handler: j.GIDs type=[]uint32 kind=slice quoted=false*/
+
+	{
+
+		{
+			if tok != fflib.FFTok_left_brace && tok != fflib.FFTok_null {
+				return fs.WrapErr(fmt.Errorf("cannot unmarshal %s into Go value for ", tok))
+			}
+		}
+
+		if tok == fflib.FFTok_null {
+			j.GIDs = nil
+		} else {
+
+			j.GIDs = []uint32{}
+
+			wantVal := true
+
+			for {
+
+				var tmpJGIDs uint32
+
+				tok = fs.Scan()
+				if tok == fflib.FFTok_error {
+					goto tokerror
+				}
+				if tok == fflib.FFTok_right_brace {
+					break
+				}
+
+				if tok == fflib.FFTok_comma {
+					if wantVal == true {
+						// TODO(pquerna): this isn't an ideal error message, this handles
+						// things like [,,,] as an array value.
+						return fs.WrapErr(fmt.Errorf("wanted value token, but got token: %v", tok))
+					}
+					continue
+				} else {
+					wantVal = true
+				}
+
+				/* handler: tmpJGIDs type=uint32 kind=uint32 quoted=false*/
+
+				{
+					if tok != fflib.FFTok_integer && tok != fflib.FFTok_null {
+						return fs.WrapErr(fmt.Errorf("cannot unmarshal %s into Go value for uint32", tok))
+					}
+				}
+
+				{
+
+					if tok == fflib.FFTok_null {
+
+					} else {
+
+						tval, err := fflib.ParseUint(fs.Output.Bytes(), 10, 32)
+
+						if err != nil {
+							return fs.WrapErr(err)
+						}
+
+						tmpJGIDs = uint32(tval)
+
+					}
+				}
+
+				j.GIDs = append(j.GIDs, tmpJGIDs)
+
+				wantVal = false
+			}
+		}
+	}
+
+	state = fflib.FFParse_after_value
+	goto mainparse
+
 handle_Flags:

 	/* handler: j.Flags type=map[string]interface {} kind=map quoted=false*/
--- a/vendor/github.com/containers/storage/lockfile_compat.go
+++ b/vendor/github.com/containers/storage/lockfile_compat.go
@ -0,0 +1,15 @@
+package storage
+
+import (
+	"github.com/containers/storage/pkg/lockfile"
+)
+
+type Locker = lockfile.Locker
+
+func GetLockfile(path string) (lockfile.Locker, error) {
+	return lockfile.GetLockfile(path)
+}
+
+func GetROLockfile(path string) (lockfile.Locker, error) {
+	return lockfile.GetROLockfile(path)
+}
--- a/vendor/github.com/containers/storage/pkg/archive/archive.go
+++ b/vendor/github.com/containers/storage/pkg/archive/archive.go
@ -36,14 +36,15 @@ type (

 	// TarOptions wraps the tar options.
 	TarOptions struct {
-		IncludeFiles     []string
-		ExcludePatterns  []string
-		Compression      Compression
-		NoLchown         bool
-		UIDMaps          []idtools.IDMap
-		GIDMaps          []idtools.IDMap
-		ChownOpts        *idtools.IDPair
-		IncludeSourceDir bool
+		IncludeFiles      []string
+		ExcludePatterns   []string
+		Compression       Compression
+		NoLchown          bool
+		UIDMaps           []idtools.IDMap
+		GIDMaps           []idtools.IDMap
+		IgnoreChownErrors bool
+		ChownOpts         *idtools.IDPair
+		IncludeSourceDir  bool
 		// WhiteoutFormat is the expected on disk format for whiteout files.
 		// This format will be converted to the standard format on pack
 		// and from the standard format on unpack.
@ -98,6 +99,8 @@ const (
 	Gzip
 	// Xz is xz compression algorithm.
 	Xz
+	// Zstd is zstd compression algorithm.
+	Zstd
 )

 const (
@ -141,6 +144,7 @@ func DetectCompression(source []byte) Compression {
 		Bzip2: {0x42, 0x5A, 0x68},
 		Gzip:  {0x1F, 0x8B, 0x08},
 		Xz:    {0xFD, 0x37, 0x7A, 0x58, 0x5A, 0x00},
+		Zstd:  {0x28, 0xb5, 0x2f, 0xfd},
 	} {
 		if len(source) < len(m) {
 			logrus.Debug("Len too short")
@ -200,6 +204,8 @@ func DecompressStream(archive io.Reader) (io.ReadCloser, error) {
 			<-chdone
 			return readBufWrapper.Close()
 		}), nil
+	case Zstd:
+		return zstdReader(buf)
 	default:
 		return nil, fmt.Errorf("Unsupported compression format %s", (&compression).Extension())
 	}
@ -217,6 +223,8 @@ func CompressStream(dest io.Writer, compression Compression) (io.WriteCloser, er
 		gzWriter := gzip.NewWriter(dest)
 		writeBufWrapper := p.NewWriteCloserWrapper(buf, gzWriter)
 		return writeBufWrapper, nil
+	case Zstd:
+		return zstdWriter(dest)
 	case Bzip2, Xz:
 		// archive/bzip2 does not support writing, and there is no xz support at all
 		// However, this is not a problem as docker only currently generates gzipped tars
@ -324,6 +332,8 @@ func (compression *Compression) Extension() string {
 		return "tar.gz"
 	case Xz:
 		return "tar.xz"
+	case Zstd:
+		return "tar.zst"
 	}
 	return ""
 }
@ -554,7 +564,7 @@ func (ta *tarAppender) addTarFile(path, name string) error {
 	return nil
 }

-func createTarFile(path, extractDir string, hdr *tar.Header, reader io.Reader, Lchown bool, chownOpts *idtools.IDPair, inUserns bool) error {
+func createTarFile(path, extractDir string, hdr *tar.Header, reader io.Reader, Lchown bool, chownOpts *idtools.IDPair, inUserns, ignoreChownErrors bool) error {
 	// hdr.Mode is in linux format, which we can use for sycalls,
 	// but for os.Foo() calls we need the mode converted to os.FileMode,
 	// so use hdrInfo.Mode() (they differ for e.g. setuid bits)
@ -636,8 +646,13 @@ func createTarFile(path, extractDir string, hdr *tar.Header, reader io.Reader, L
 		if chownOpts == nil {
 			chownOpts = &idtools.IDPair{UID: hdr.Uid, GID: hdr.Gid}
 		}
-		if err := idtools.SafeLchown(path, chownOpts.UID, chownOpts.GID); err != nil {
-			return err
+		err := idtools.SafeLchown(path, chownOpts.UID, chownOpts.GID)
+		if err != nil {
+			if ignoreChownErrors {
+				fmt.Fprintf(os.Stderr, "Chown error detected. Ignoring due to ignoreChownErrors flag: %v\n", err)
+			} else {
+				return err
+			}
 		}
 	}

@ -984,7 +999,7 @@ loop:
 			chownOpts = &idtools.IDPair{UID: hdr.Uid, GID: hdr.Gid}
 		}

-		if err := createTarFile(path, dest, hdr, trBuf, !options.NoLchown, chownOpts, options.InUserNS); err != nil {
+		if err := createTarFile(path, dest, hdr, trBuf, !options.NoLchown, chownOpts, options.InUserNS, options.IgnoreChownErrors); err != nil {
 			return err
 		}

--- a/vendor/github.com/containers/storage/pkg/archive/archive_ffjson.go
+++ b/vendor/github.com/containers/storage/pkg/archive/archive_ffjson.go
@ -445,6 +445,11 @@ func (j *TarOptions) MarshalJSONBuf(buf fflib.EncodingBuffer) error {
 	} else {
 		buf.WriteString(`null`)
 	}
+	if j.IgnoreChownErrors {
+		buf.WriteString(`,"IgnoreChownErrors":true`)
+	} else {
+		buf.WriteString(`,"IgnoreChownErrors":false`)
+	}
 	if j.ChownOpts != nil {
 		/* Struct fall back. type=idtools.IDPair kind=struct */
 		buf.WriteString(`,"ChownOpts":`)
@ -516,6 +521,8 @@ const (

 	ffjtTarOptionsGIDMaps

+	ffjtTarOptionsIgnoreChownErrors
+
 	ffjtTarOptionsChownOpts

 	ffjtTarOptionsIncludeSourceDir
@ -545,6 +552,8 @@ var ffjKeyTarOptionsUIDMaps = []byte("UIDMaps")

 var ffjKeyTarOptionsGIDMaps = []byte("GIDMaps")

+var ffjKeyTarOptionsIgnoreChownErrors = []byte("IgnoreChownErrors")
+
 var ffjKeyTarOptionsChownOpts = []byte("ChownOpts")

 var ffjKeyTarOptionsIncludeSourceDir = []byte("IncludeSourceDir")
@ -663,6 +672,11 @@ mainparse:
 						state = fflib.FFParse_want_colon
 						goto mainparse

+					} else if bytes.Equal(ffjKeyTarOptionsIgnoreChownErrors, kn) {
+						currentKey = ffjtTarOptionsIgnoreChownErrors
+						state = fflib.FFParse_want_colon
+						goto mainparse
+
 					} else if bytes.Equal(ffjKeyTarOptionsIncludeSourceDir, kn) {
 						currentKey = ffjtTarOptionsIncludeSourceDir
 						state = fflib.FFParse_want_colon
@ -766,6 +780,12 @@ mainparse:
 					goto mainparse
 				}

+				if fflib.EqualFoldRight(ffjKeyTarOptionsIgnoreChownErrors, kn) {
+					currentKey = ffjtTarOptionsIgnoreChownErrors
+					state = fflib.FFParse_want_colon
+					goto mainparse
+				}
+
 				if fflib.EqualFoldRight(ffjKeyTarOptionsGIDMaps, kn) {
 					currentKey = ffjtTarOptionsGIDMaps
 					state = fflib.FFParse_want_colon
@ -837,6 +857,9 @@ mainparse:
 				case ffjtTarOptionsGIDMaps:
 					goto handle_GIDMaps

+				case ffjtTarOptionsIgnoreChownErrors:
+					goto handle_IgnoreChownErrors
+
 				case ffjtTarOptionsChownOpts:
 					goto handle_ChownOpts

@ -1224,6 +1247,41 @@ handle_GIDMaps:
 	state = fflib.FFParse_after_value
 	goto mainparse

+handle_IgnoreChownErrors:
+
+	/* handler: j.IgnoreChownErrors type=bool kind=bool quoted=false*/
+
+	{
+		if tok != fflib.FFTok_bool && tok != fflib.FFTok_null {
+			return fs.WrapErr(fmt.Errorf("cannot unmarshal %s into Go value for bool", tok))
+		}
+	}
+
+	{
+		if tok == fflib.FFTok_null {
+
+		} else {
+			tmpb := fs.Output.Bytes()
+
+			if bytes.Compare([]byte{'t', 'r', 'u', 'e'}, tmpb) == 0 {
+
+				j.IgnoreChownErrors = true
+
+			} else if bytes.Compare([]byte{'f', 'a', 'l', 's', 'e'}, tmpb) == 0 {
+
+				j.IgnoreChownErrors = false
+
+			} else {
+				err = errors.New("unexpected bytes for true/false value")
+				return fs.WrapErr(err)
+			}
+
+		}
+	}
+
+	state = fflib.FFParse_after_value
+	goto mainparse
+
 handle_ChownOpts:

 	/* handler: j.ChownOpts type=idtools.IDPair kind=struct quoted=false*/
--- a/vendor/github.com/containers/storage/pkg/archive/archive_zstd.go
+++ b/vendor/github.com/containers/storage/pkg/archive/archive_zstd.go
@ -0,0 +1,41 @@
+package archive
+
+import (
+	"io"
+
+	"github.com/klauspost/compress/zstd"
+)
+
+type wrapperZstdDecoder struct {
+	decoder *zstd.Decoder
+}
+
+func (w *wrapperZstdDecoder) Close() error {
+	w.decoder.Close()
+	return nil
+}
+
+func (w *wrapperZstdDecoder) DecodeAll(input, dst []byte) ([]byte, error) {
+	return w.decoder.DecodeAll(input, dst)
+}
+
+func (w *wrapperZstdDecoder) Read(p []byte) (int, error) {
+	return w.decoder.Read(p)
+}
+
+func (w *wrapperZstdDecoder) Reset(r io.Reader) error {
+	return w.decoder.Reset(r)
+}
+
+func (w *wrapperZstdDecoder) WriteTo(wr io.Writer) (int64, error) {
+	return w.decoder.WriteTo(wr)
+}
+
+func zstdReader(buf io.Reader) (io.ReadCloser, error) {
+	decoder, err := zstd.NewReader(buf)
+	return &wrapperZstdDecoder{decoder: decoder}, err
+}
+
+func zstdWriter(dest io.Writer) (io.WriteCloser, error) {
+	return zstd.NewWriter(dest)
+}
--- a/vendor/github.com/containers/storage/pkg/archive/diff.go
+++ b/vendor/github.com/containers/storage/pkg/archive/diff.go
@ -105,7 +105,7 @@ func UnpackLayer(dest string, layer io.Reader, options *TarOptions) (size int64,
 					}
 					defer os.RemoveAll(aufsTempdir)
 				}
-				if err := createTarFile(filepath.Join(aufsTempdir, basename), dest, hdr, tr, true, nil, options.InUserNS); err != nil {
+				if err := createTarFile(filepath.Join(aufsTempdir, basename), dest, hdr, tr, true, nil, options.InUserNS, options.IgnoreChownErrors); err != nil {
 					return 0, err
 				}
 			}
@ -196,7 +196,7 @@ func UnpackLayer(dest string, layer io.Reader, options *TarOptions) (size int64,
 				return 0, err
 			}

-			if err := createTarFile(path, dest, srcHdr, srcData, true, nil, options.InUserNS); err != nil {
+			if err := createTarFile(path, dest, srcHdr, srcData, true, nil, options.InUserNS, options.IgnoreChownErrors); err != nil {
 				return 0, err
 			}

--- a/vendor/github.com/containers/storage/pkg/config/config.go
+++ b/vendor/github.com/containers/storage/pkg/config/config.go
@ -73,6 +73,9 @@ type OptionsConfig struct {
 	RemapUIDs string `toml:"remap-uids"`
 	// RemapGIDs is a list of default GID mappings to use for layers.
 	RemapGIDs string `toml:"remap-gids"`
+	// IgnoreChownErrors is a flag for whether chown errors should be
+	// ignored when building an image.
+	IgnoreChownErrors string `toml:"ignore_chown_errors"`

 	// RemapUser is the name of one or more entries in /etc/subuid which
 	// should be used to set up default UID mappings.
--- a/vendor/github.com/containers/storage/pkg/idtools/idtools.go
+++ b/vendor/github.com/containers/storage/pkg/idtools/idtools.go
@ -4,6 +4,7 @@ import (
 	"bufio"
 	"fmt"
 	"os"
+	"os/user"
 	"sort"
 	"strconv"
 	"strings"
@ -244,7 +245,13 @@ func parseSubgid(username string) (ranges, error) {
 // and return all found ranges for a specified username. If the special value
 // "ALL" is supplied for username, then all ranges in the file will be returned
 func parseSubidFile(path, username string) (ranges, error) {
-	var rangeList ranges
+	var (
+		rangeList ranges
+		uidstr    string
+	)
+	if u, err := user.Lookup(username); err == nil {
+		uidstr = u.Uid
+	}

 	subidFile, err := os.Open(path)
 	if err != nil {
@ -266,7 +273,7 @@ func parseSubidFile(path, username string) (ranges, error) {
 		if len(parts) != 3 {
 			return rangeList, fmt.Errorf("Cannot parse subuid/gid information: Format not correct for %s file", path)
 		}
-		if parts[0] == username || username == "ALL" {
+		if parts[0] == username || username == "ALL" || (parts[0] == uidstr && parts[0] != "") {
 			startid, err := strconv.Atoi(parts[1])
 			if err != nil {
 				return rangeList, fmt.Errorf("String to int conversion failed during subuid/gid parsing of %s: %v", path, err)
--- a/vendor/github.com/containers/storage/pkg/lockfile/lockfile.go
+++ b/vendor/github.com/containers/storage/pkg/lockfile/lockfile.go
@ -1,4 +1,4 @@
-package storage
+package lockfile

 import (
 	"path/filepath"
@ -92,10 +92,10 @@ func getLockfile(path string, ro bool) (Locker, error) {
 		}
 		return locker, nil
 	}
-	locker, err := createLockerForPath(path, ro) // platform-dependent locker
+	locker, err := createLockerForPath(cleanPath, ro) // platform-dependent locker
 	if err != nil {
 		return nil, err
 	}
-	lockfiles[filepath.Clean(path)] = locker
+	lockfiles[cleanPath] = locker
 	return locker, nil
 }
--- a/vendor/github.com/containers/storage/pkg/lockfile/lockfile_unix.go
+++ b/vendor/github.com/containers/storage/pkg/lockfile/lockfile_unix.go
@ -1,6 +1,6 @@
 // +build linux solaris darwin freebsd

-package storage
+package lockfile

 import (
 	"fmt"
@ -122,11 +122,10 @@ func (l *lockfile) lock(l_type int16, recursive bool) {
 	l.counter++
 }

-// Lock locks the lockfile as a writer.  Note that RLock() will be called if
-// the lock is a read-only one.
+// Lock locks the lockfile as a writer.  Panic if the lock is a read-only one.
 func (l *lockfile) Lock() {
 	if l.ro {
-		l.RLock()
+		panic("can't take write lock on read-only lock file")
 	} else {
 		l.lock(unix.F_WRLCK, false)
 	}
@ -150,13 +149,6 @@ func (l *lockfile) RLock() {

 // Unlock unlocks the lockfile.
 func (l *lockfile) Unlock() {
-	lk := unix.Flock_t{
-		Type:   unix.F_UNLCK,
-		Whence: int16(os.SEEK_SET),
-		Start:  0,
-		Len:    0,
-		Pid:    int32(os.Getpid()),
-	}
 	l.stateMutex.Lock()
 	if l.locked == false {
 		// Panic when unlocking an unlocked lock.  That's a violation
@ -175,10 +167,8 @@ func (l *lockfile) Unlock() {
 		// avoid releasing read-locks too early; a given process may
 		// acquire a read lock multiple times.
 		l.locked = false
-		for unix.FcntlFlock(l.fd, unix.F_SETLKW, &lk) != nil {
-			time.Sleep(10 * time.Millisecond)
-		}
-		// Close the file descriptor on the last unlock.
+		// Close the file descriptor on the last unlock, releasing the
+		// file lock.
 		unix.Close(int(l.fd))
 	}
 	if l.locktype == unix.F_RDLCK || l.recursive {
--- a/vendor/github.com/containers/storage/pkg/lockfile/lockfile_windows.go
+++ b/vendor/github.com/containers/storage/pkg/lockfile/lockfile_windows.go
@ -1,6 +1,6 @@
 // +build windows

-package storage
+package lockfile

 import (
 	"os"
--- a/vendor/github.com/containers/storage/pkg/tarlog/tarlogger.go
+++ b/vendor/github.com/containers/storage/pkg/tarlog/tarlogger.go
@ -0,0 +1,69 @@
+package tarlog
+
+import (
+	"io"
+	"sync"
+
+	"github.com/sirupsen/logrus"
+	"github.com/vbatts/tar-split/archive/tar"
+)
+
+type tarLogger struct {
+	writer     *io.PipeWriter
+	closeMutex *sync.Mutex
+	stateMutex *sync.Mutex
+	closed     bool
+}
+
+// NewLogger returns a writer that, when a tar archive is written to it, calls
+// `logger` for each file header it encounters in the archive.
+func NewLogger(logger func(*tar.Header)) (io.WriteCloser, error) {
+	reader, writer := io.Pipe()
+	t := &tarLogger{
+		writer:     writer,
+		closeMutex: new(sync.Mutex),
+		stateMutex: new(sync.Mutex),
+		closed:     false,
+	}
+	tr := tar.NewReader(reader)
+	tr.RawAccounting = true
+	t.closeMutex.Lock()
+	go func() {
+		hdr, err := tr.Next()
+		for err == nil {
+			logger(hdr)
+			hdr, err = tr.Next()
+
+		}
+		// Make sure to avoid writes after the reader has been closed.
+		t.stateMutex.Lock()
+		t.closed = true
+		if err := reader.Close(); err != nil {
+			logrus.Errorf("error closing tarlogger reader: %v", err)
+		}
+		t.stateMutex.Unlock()
+		// Unblock the Close().
+		t.closeMutex.Unlock()
+	}()
+	return t, nil
+}
+
+func (t *tarLogger) Write(b []byte) (int, error) {
+	t.stateMutex.Lock()
+	if t.closed {
+		// We cannot use os.Pipe() as this alters the tar's digest. Using
+		// io.Pipe() requires this workaround as it does not allow for writes
+		// after close.
+		t.stateMutex.Unlock()
+		return len(b), nil
+	}
+	t.stateMutex.Unlock()
+	return t.writer.Write(b)
+}
+
+func (t *tarLogger) Close() error {
+	err := t.writer.Close()
+	// Wait for the reader to finish.
+	t.closeMutex.Lock()
+	return err
+}
--- a/vendor/github.com/containers/storage/storage.conf
+++ b/vendor/github.com/containers/storage/storage.conf
@ -32,6 +32,14 @@ size = ""
 # mountopt specifies comma separated list of extra mount options
 mountopt = "nodev"

+# ignore_chown_errors can be set to allow a non privileged user running with
+# a single UID within a user namespace to run containers. The user can pull
+# and use any image even those with multiple uids.  Note multiple UIDs will be
+# squasheddown to the default uid in the container.  These images will have no
+# separation between the users in the container. Only supported for the overlay
+# and vfs drivers.
+#ignore_chown_errors = false
+
 # Remap-UIDs/GIDs is the mapping from UIDs/GIDs as they should appear inside of
 # a container, to the UIDs/GIDs as they should appear outside of the container,
 # and the length of the range of UIDs/GIDs.  Additional mapped sets can be
--- a/vendor/github.com/containers/storage/store.go
+++ b/vendor/github.com/containers/storage/store.go
@ -722,6 +722,7 @@ func (s *store) getGraphDriver() (drivers.Driver, error) {
 	}
 	config := drivers.Options{
 		Root:          s.graphRoot,
+		RunRoot:       s.runRoot,
 		DriverOptions: s.graphOptions,
 		UIDMaps:       s.uidMap,
 		GIDMaps:       s.gidMap,
@ -900,7 +901,7 @@ func (s *store) PutLayer(id, parent string, names []string, mountLabel string, w
 		for _, l := range append([]ROLayerStore{rlstore}, rlstores...) {
 			lstore := l
 			if lstore != rlstore {
-				lstore.Lock()
+				lstore.RLock()
 				defer lstore.Unlock()
 				if modified, err := lstore.Modified(); modified || err != nil {
 					if err = lstore.Load(); err != nil {
@ -979,7 +980,11 @@ func (s *store) CreateImage(id string, names []string, layer, metadata string, o
 		var ilayer *Layer
 		for _, s := range append([]ROLayerStore{lstore}, lstores...) {
 			store := s
-			store.Lock()
+			if store == lstore {
+				store.Lock()
+			} else {
+				store.RLock()
+			}
 			defer store.Unlock()
 			if modified, err := store.Modified(); modified || err != nil {
 				if err = store.Load(); err != nil {
@ -1043,7 +1048,7 @@ func (s *store) imageTopLayerForMapping(image *Image, ristore ROImageStore, crea
 	for _, s := range allStores {
 		store := s
 		if store != rlstore {
-			store.Lock()
+			store.RLock()
 			defer store.Unlock()
 			if modified, err := store.Modified(); modified || err != nil {
 				if err = store.Load(); err != nil {
@ -1179,7 +1184,11 @@ func (s *store) CreateContainer(id string, names []string, image, layer, metadat
 		var cimage *Image
 		for _, s := range append([]ROImageStore{istore}, istores...) {
 			store := s
-			store.Lock()
+			if store == istore {
+				store.Lock()
+			} else {
+				store.RLock()
+			}
 			defer store.Unlock()
 			if modified, err := store.Modified(); modified || err != nil {
 				if err = store.Load(); err != nil {
@ -3348,6 +3357,9 @@ func ReloadConfigurationFile(configFile string, storeOptions *StoreOptions) {
 	if config.Storage.Options.MountProgram != "" {
 		storeOptions.GraphDriverOptions = append(storeOptions.GraphDriverOptions, fmt.Sprintf("%s.mount_program=%s", config.Storage.Driver, config.Storage.Options.MountProgram))
 	}
+	if config.Storage.Options.IgnoreChownErrors != "" {
+		storeOptions.GraphDriverOptions = append(storeOptions.GraphDriverOptions, fmt.Sprintf("%s.ignore_chown_errors=%s", config.Storage.Driver, config.Storage.Options.IgnoreChownErrors))
+	}
 	if config.Storage.Options.MountOpt != "" {
 		storeOptions.GraphDriverOptions = append(storeOptions.GraphDriverOptions, fmt.Sprintf("%s.mountopt=%s", config.Storage.Driver, config.Storage.Options.MountOpt))
 	}
--- a/vendor/github.com/containers/storage/utils.go
+++ b/vendor/github.com/containers/storage/utils.go
@ -195,17 +195,17 @@ func DefaultStoreOptions(rootless bool, rootlessUid int) (StoreOptions, error) {
 	if err != nil {
 		return storageOpts, err
 	}
-	if _, err = os.Stat(storageConf); err == nil {
+	_, err = os.Stat(storageConf)
+	if err != nil && !os.IsNotExist(err) {
+		return storageOpts, errors.Wrapf(err, "cannot stat %s", storageConf)
+	}
+	if err == nil {
 		defaultRootlessRunRoot = storageOpts.RunRoot
 		defaultRootlessGraphRoot = storageOpts.GraphRoot
 		storageOpts = StoreOptions{}
 		ReloadConfigurationFile(storageConf, &storageOpts)
 	}

-	if !os.IsNotExist(err) {
-		return storageOpts, errors.Wrapf(err, "cannot stat %s", storageConf)
-	}
-
 	if rootless && rootlessUid != 0 {
 		if err == nil {
 			// If the file did not specify a graphroot or runroot,
--- a/vendor/github.com/containers/storage/vendor.conf
+++ b/vendor/github.com/containers/storage/vendor.conf
@ -1,28 +0,0 @@
-github.com/BurntSushi/toml master
-github.com/Microsoft/go-winio 307e919c663683a9000576fdc855acaf9534c165
-github.com/Microsoft/hcsshim a8d9cc56cbce765a7eebdf4792e6ceceeff3edb8
-github.com/davecgh/go-spew 346938d642f2ec3594ed81d874461961cd0faa76
-github.com/docker/docker 86f080cff0914e9694068ed78d503701667c4c00
-github.com/docker/go-units 0dadbb0345b35ec7ef35e228dabb8de89a65bf52
-github.com/klauspost/compress v1.4.1
-github.com/klauspost/cpuid v1.2.0
-github.com/klauspost/pgzip v1.2.1
-github.com/mattn/go-shellwords 753a2322a99f87c0eff284980e77f53041555bc6
-github.com/mistifyio/go-zfs c0224de804d438efd11ea6e52ada8014537d6062
-github.com/opencontainers/go-digest master
-github.com/opencontainers/runc 6c22e77604689db8725fa866f0f2ec0b3e8c3a07
-github.com/opencontainers/selinux v1.1
-github.com/ostreedev/ostree-go master
-github.com/pborman/uuid 1b00554d822231195d1babd97ff4a781231955c9
-github.com/pkg/errors master
-github.com/pmezard/go-difflib v1.0.0
-github.com/pquerna/ffjson d49c2bc1aa135aad0c6f4fc2056623ec78f5d5ac
-github.com/sirupsen/logrus v1.0.0
-github.com/stretchr/testify 4d4bfba8f1d1027c4fdbe371823030df51419987
-github.com/syndtr/gocapability master
-github.com/tchap/go-patricia v2.2.6
-github.com/vbatts/tar-split v0.10.2
-golang.org/x/net 7dcfb8076726a3fdd9353b6b8a1f1b6be6811bd6
-golang.org/x/sys 07c182904dbd53199946ba614a412c61d3c548f5
-gotest.tools master
-github.com/google/go-cmp master
--- a/vendor/github.com/docker/go-units/MAINTAINERS
+++ b/vendor/github.com/docker/go-units/MAINTAINERS
@ -1,6 +1,6 @@
-# go-connections maintainers file
+# go-units maintainers file
 #
-# This file describes who runs the docker/go-connections project and how.
+# This file describes who runs the docker/go-units project and how.
 # This is a living document - if you see something out of date or missing, speak up!
 #
 # It is structured to be consumable by both humans and programs.
@ -11,7 +11,10 @@
 [Org]
 	[Org."Core maintainers"]
 		people = [
-			"calavera",
+			"akihirosuda",
+			"dnephin",
+			"thajeztah",
+			"vdemeester",
 		]

 [people]
@ -21,7 +24,23 @@
 # in the people section.

 	# ADD YOURSELF HERE IN ALPHABETICAL ORDER
-	[people.calavera]
-	Name = "David Calavera"
-	Email = "david.calavera@gmail.com"
-	GitHub = "calavera"
+
+	[people.akihirosuda]
+	Name = "Akihiro Suda"
+	Email = "akihiro.suda.cz@hco.ntt.co.jp"
+	GitHub = "AkihiroSuda"
+
+	[people.dnephin]
+	Name = "Daniel Nephin"
+	Email = "dnephin@gmail.com"
+	GitHub = "dnephin"
+	
+	[people.thajeztah]
+	Name = "Sebastiaan van Stijn"
+	Email = "github@gone.nl"
+	GitHub = "thaJeztah"
+
+	[people.vdemeester]
+	Name = "Vincent Demeester"
+	Email = "vincent@sbr.pm"
+	GitHub = "vdemeester"
--- a/vendor/github.com/docker/go-units/circle.yml
+++ b/vendor/github.com/docker/go-units/circle.yml
@ -1,7 +1,7 @@
 dependencies:
  post:
    # install golint
-    - go get github.com/golang/lint/golint
+    - go get golang.org/x/lint/golint

 test:
  pre:
--- a/vendor/github.com/docker/go-units/duration.go
+++ b/vendor/github.com/docker/go-units/duration.go
@ -20,7 +20,7 @@ func HumanDuration(d time.Duration) string {
 		return "About a minute"
 	} else if minutes < 60 {
 		return fmt.Sprintf("%d minutes", minutes)
-	} else if hours := int(d.Hours()); hours == 1 {
+	} else if hours := int(d.Hours() + 0.5); hours == 1 {
 		return "About an hour"
 	} else if hours < 48 {
 		return fmt.Sprintf("%d hours", hours)
--- a/vendor/github.com/docker/go-units/size.go
+++ b/vendor/github.com/docker/go-units/size.go
@ -31,7 +31,7 @@ type unitMap map[string]int64
 var (
 	decimalMap = unitMap{"k": KB, "m": MB, "g": GB, "t": TB, "p": PB}
 	binaryMap  = unitMap{"k": KiB, "m": MiB, "g": GiB, "t": TiB, "p": PiB}
-	sizeRegex  = regexp.MustCompile(`^(\d+(\.\d+)*) ?([kKmMgGtTpP])?[bB]?$`)
+	sizeRegex  = regexp.MustCompile(`^(\d+(\.\d+)*) ?([kKmMgGtTpP])?[iI]?[bB]?$`)
 )

 var decimapAbbrs = []string{"B", "kB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB"}
@ -58,7 +58,7 @@ func CustomSize(format string, size float64, base float64, _map []string) string
 // instead of 4 digit precision used in units.HumanSize.
 func HumanSizeWithPrecision(size float64, precision int) string {
 	size, unit := getSizeAndUnit(size, 1000.0, decimapAbbrs)
-	return fmt.Sprintf("%.*g %s", precision, size, unit)
+	return fmt.Sprintf("%.*g%s", precision, size, unit)
 }

 // HumanSize returns a human-readable approximation of a size
@ -70,7 +70,7 @@ func HumanSize(size float64) string {
 // BytesSize returns a human-readable size in bytes, kibibytes,
 // mebibytes, gibibytes, or tebibytes (eg. "44kiB", "17MiB").
 func BytesSize(size float64) string {
-	return CustomSize("%.4g %s", size, 1024.0, binaryAbbrs)
+	return CustomSize("%.4g%s", size, 1024.0, binaryAbbrs)
 }

 // FromHumanSize returns an integer from a human-readable specification of a
--- a/vendor/github.com/docker/go-units/ulimit.go
+++ b/vendor/github.com/docker/go-units/ulimit.go
@ -96,8 +96,13 @@ func ParseUlimit(val string) (*Ulimit, error) {
 		return nil, fmt.Errorf("too many limit value arguments - %s, can only have up to two, `soft[:hard]`", parts[1])
 	}

-	if soft > *hard {
-		return nil, fmt.Errorf("ulimit soft limit must be less than or equal to hard limit: %d > %d", soft, *hard)
+	if *hard != -1 {
+		if soft == -1 {
+			return nil, fmt.Errorf("ulimit soft limit must be less than or equal to hard limit: soft: -1 (unlimited), hard: %d", *hard)
+		}
+		if soft > *hard {
+			return nil, fmt.Errorf("ulimit soft limit must be less than or equal to hard limit: %d > %d", soft, *hard)
+		}
 	}

 	return &Ulimit{Name: parts[0], Soft: soft, Hard: *hard}, nil
--- a/vendor/github.com/klauspost/compress/flate/copy.go
+++ b/vendor/github.com/klauspost/compress/flate/copy.go
@ -1,32 +0,0 @@
-// Copyright 2012 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package flate
-
-// forwardCopy is like the built-in copy function except that it always goes
-// forward from the start, even if the dst and src overlap.
-// It is equivalent to:
-//   for i := 0; i < n; i++ {
-//     mem[dst+i] = mem[src+i]
-//   }
-func forwardCopy(mem []byte, dst, src, n int) {
-	if dst <= src {
-		copy(mem[dst:dst+n], mem[src:src+n])
-		return
-	}
-	for {
-		if dst >= src+n {
-			copy(mem[dst:dst+n], mem[src:src+n])
-			return
-		}
-		// There is some forward overlap.  The destination
-		// will be filled with a repeated pattern of mem[src:src+k].
-		// We copy one instance of the pattern here, then repeat.
-		// Each time around this loop k will double.
-		k := dst - src
-		copy(mem[dst:dst+k], mem[src:src+k])
-		n -= k
-		dst += k
-	}
-}
--- a/vendor/github.com/klauspost/compress/flate/deflate.go
+++ b/vendor/github.com/klauspost/compress/flate/deflate.go
--- a/vendor/github.com/klauspost/compress/flate/huffman_bit_writer.go
+++ b/vendor/github.com/klauspost/compress/flate/huffman_bit_writer.go
@ -35,7 +35,7 @@ const (
 )

 // The number of extra bits needed by length code X - LENGTH_CODES_START.
-var lengthExtraBits = []int8{
+var lengthExtraBits = [32]int8{
 	/* 257 */ 0, 0, 0,
 	/* 260 */ 0, 0, 0, 0, 0, 1, 1, 1, 1, 2,
 	/* 270 */ 2, 2, 2, 3, 3, 3, 3, 4, 4, 4,
@ -43,14 +43,14 @@ var lengthExtraBits = []int8{
 }

 // The length indicated by length code X - LENGTH_CODES_START.
-var lengthBase = []uint32{
+var lengthBase = [32]uint8{
 	0, 1, 2, 3, 4, 5, 6, 7, 8, 10,
 	12, 14, 16, 20, 24, 28, 32, 40, 48, 56,
 	64, 80, 96, 112, 128, 160, 192, 224, 255,
 }

 // offset code word extra bits.
-var offsetExtraBits = []int8{
+var offsetExtraBits = [64]int8{
 	0, 0, 0, 0, 1, 1, 2, 2, 3, 3,
 	4, 4, 5, 5, 6, 6, 7, 7, 8, 8,
 	9, 9, 10, 10, 11, 11, 12, 12, 13, 13,
@ -58,7 +58,7 @@ var offsetExtraBits = []int8{
 	14, 14, 15, 15, 16, 16, 17, 17, 18, 18, 19, 19, 20, 20,
 }

-var offsetBase = []uint32{
+var offsetBase = [64]uint32{
 	/* normal deflate */
 	0x000000, 0x000001, 0x000002, 0x000003, 0x000004,
 	0x000006, 0x000008, 0x00000c, 0x000010, 0x000018,
@ -86,9 +86,9 @@ type huffmanBitWriter struct {
 	// and then the low nbits of bits.
 	bits            uint64
 	nbits           uint
-	bytes           [bufferSize]byte
+	bytes           [256]byte
 	codegenFreq     [codegenCodeCount]int32
-	nbytes          int
+	nbytes          uint8
 	literalFreq     []int32
 	offsetFreq      []int32
 	codegen         []uint8
@ -101,8 +101,8 @@ type huffmanBitWriter struct {
 func newHuffmanBitWriter(w io.Writer) *huffmanBitWriter {
 	return &huffmanBitWriter{
 		writer:          w,
-		literalFreq:     make([]int32, maxNumLit),
-		offsetFreq:      make([]int32, offsetCodeCount),
+		literalFreq:     make([]int32, lengthCodesStart+32),
+		offsetFreq:      make([]int32, 32),
 		codegen:         make([]uint8, maxNumLit+offsetCodeCount+1),
 		literalEncoding: newHuffmanEncoder(maxNumLit),
 		codegenEncoding: newHuffmanEncoder(codegenCodeCount),
@ -113,7 +113,7 @@ func newHuffmanBitWriter(w io.Writer) *huffmanBitWriter {
 func (w *huffmanBitWriter) reset(writer io.Writer) {
 	w.writer = writer
 	w.bits, w.nbits, w.nbytes, w.err = 0, 0, 0, nil
-	w.bytes = [bufferSize]byte{}
+	w.bytes = [256]byte{}
 }

 func (w *huffmanBitWriter) flush() {
@ -145,9 +145,6 @@ func (w *huffmanBitWriter) write(b []byte) {
 }

 func (w *huffmanBitWriter) writeBits(b int32, nb uint) {
-	if w.err != nil {
-		return
-	}
 	w.bits |= uint64(b) << w.nbits
 	w.nbits += nb
 	if w.nbits >= 48 {
@ -155,15 +152,18 @@ func (w *huffmanBitWriter) writeBits(b int32, nb uint) {
 		w.bits >>= 48
 		w.nbits -= 48
 		n := w.nbytes
-		bytes := w.bytes[n : n+6]
-		bytes[0] = byte(bits)
-		bytes[1] = byte(bits >> 8)
-		bytes[2] = byte(bits >> 16)
-		bytes[3] = byte(bits >> 24)
-		bytes[4] = byte(bits >> 32)
-		bytes[5] = byte(bits >> 40)
+		w.bytes[n] = byte(bits)
+		w.bytes[n+1] = byte(bits >> 8)
+		w.bytes[n+2] = byte(bits >> 16)
+		w.bytes[n+3] = byte(bits >> 24)
+		w.bytes[n+4] = byte(bits >> 32)
+		w.bytes[n+5] = byte(bits >> 40)
 		n += 6
 		if n >= bufferFlushSize {
+			if w.err != nil {
+				n = 0
+				return
+			}
 			w.write(w.bytes[:n])
 			n = 0
 		}
@ -333,9 +333,6 @@ func (w *huffmanBitWriter) storedSize(in []byte) (int, bool) {
 }

 func (w *huffmanBitWriter) writeCode(c hcode) {
-	if w.err != nil {
-		return
-	}
 	w.bits |= uint64(c.code) << w.nbits
 	w.nbits += uint(c.len)
 	if w.nbits >= 48 {
@ -343,15 +340,18 @@ func (w *huffmanBitWriter) writeCode(c hcode) {
 		w.bits >>= 48
 		w.nbits -= 48
 		n := w.nbytes
-		bytes := w.bytes[n : n+6]
-		bytes[0] = byte(bits)
-		bytes[1] = byte(bits >> 8)
-		bytes[2] = byte(bits >> 16)
-		bytes[3] = byte(bits >> 24)
-		bytes[4] = byte(bits >> 32)
-		bytes[5] = byte(bits >> 40)
+		w.bytes[n] = byte(bits)
+		w.bytes[n+1] = byte(bits >> 8)
+		w.bytes[n+2] = byte(bits >> 16)
+		w.bytes[n+3] = byte(bits >> 24)
+		w.bytes[n+4] = byte(bits >> 32)
+		w.bytes[n+5] = byte(bits >> 40)
 		n += 6
 		if n >= bufferFlushSize {
+			if w.err != nil {
+				n = 0
+				return
+			}
 			w.write(w.bytes[:n])
 			n = 0
 		}
@ -460,7 +460,7 @@ func (w *huffmanBitWriter) writeBlock(tokens []token, eof bool, input []byte) {
 		}
 		for offsetCode := 4; offsetCode < numOffsets; offsetCode++ {
 			// First four offset codes have extra size = 0.
-			extraBits += int(w.offsetFreq[offsetCode]) * int(offsetExtraBits[offsetCode])
+			extraBits += int(w.offsetFreq[offsetCode]) * int(offsetExtraBits[offsetCode&63])
 		}
 	}

@ -548,15 +548,30 @@ func (w *huffmanBitWriter) indexTokens(tokens []token) (numLiterals, numOffsets
 		w.offsetFreq[i] = 0
 	}

+	if len(tokens) == 0 {
+		return
+	}
+
+	// Only last token should be endBlockMarker.
+	if tokens[len(tokens)-1] == endBlockMarker {
+		w.literalFreq[endBlockMarker]++
+		tokens = tokens[:len(tokens)-1]
+	}
+
+	// Create slices up to the next power of two to avoid bounds checks.
+	lits := w.literalFreq[:256]
+	offs := w.offsetFreq[:32]
+	lengths := w.literalFreq[lengthCodesStart:]
+	lengths = lengths[:32]
 	for _, t := range tokens {
-		if t < matchType {
-			w.literalFreq[t.literal()]++
+		if t < endBlockMarker {
+			lits[t.literal()]++
 			continue
 		}
 		length := t.length()
 		offset := t.offset()
-		w.literalFreq[lengthCodesStart+lengthCode(length)]++
-		w.offsetFreq[offsetCode(offset)]++
+		lengths[lengthCode(length)&31]++
+		offs[offsetCode(offset)&31]++
 	}

 	// get the number of literals
@ -575,8 +590,8 @@ func (w *huffmanBitWriter) indexTokens(tokens []token) (numLiterals, numOffsets
 		w.offsetFreq[0] = 1
 		numOffsets = 1
 	}
-	w.literalEncoding.generate(w.literalFreq, 15)
-	w.offsetEncoding.generate(w.offsetFreq, 15)
+	w.literalEncoding.generate(w.literalFreq[:maxNumLit], 15)
+	w.offsetEncoding.generate(w.offsetFreq[:offsetCodeCount], 15)
 	return
 }

@ -586,30 +601,50 @@ func (w *huffmanBitWriter) writeTokens(tokens []token, leCodes, oeCodes []hcode)
 	if w.err != nil {
 		return
 	}
+	if len(tokens) == 0 {
+		return
+	}
+
+	// Only last token should be endBlockMarker.
+	var deferEOB bool
+	if tokens[len(tokens)-1] == endBlockMarker {
+		tokens = tokens[:len(tokens)-1]
+		deferEOB = true
+	}
+
+	// Create slices up to the next power of two to avoid bounds checks.
+	lits := leCodes[:256]
+	offs := oeCodes[:32]
+	lengths := leCodes[lengthCodesStart:]
+	lengths = lengths[:32]
 	for _, t := range tokens {
 		if t < matchType {
-			w.writeCode(leCodes[t.literal()])
+			w.writeCode(lits[t.literal()])
 			continue
 		}
+
 		// Write the length
 		length := t.length()
 		lengthCode := lengthCode(length)
-		w.writeCode(leCodes[lengthCode+lengthCodesStart])
-		extraLengthBits := uint(lengthExtraBits[lengthCode])
+		w.writeCode(lengths[lengthCode&31])
+		extraLengthBits := uint(lengthExtraBits[lengthCode&31])
 		if extraLengthBits > 0 {
-			extraLength := int32(length - lengthBase[lengthCode])
+			extraLength := int32(length - lengthBase[lengthCode&31])
 			w.writeBits(extraLength, extraLengthBits)
 		}
 		// Write the offset
 		offset := t.offset()
 		offsetCode := offsetCode(offset)
-		w.writeCode(oeCodes[offsetCode])
-		extraOffsetBits := uint(offsetExtraBits[offsetCode])
+		w.writeCode(offs[offsetCode&31])
+		extraOffsetBits := uint(offsetExtraBits[offsetCode&63])
 		if extraOffsetBits > 0 {
-			extraOffset := int32(offset - offsetBase[offsetCode])
+			extraOffset := int32(offset - offsetBase[offsetCode&63])
 			w.writeBits(extraOffset, extraOffsetBits)
 		}
 	}
+	if deferEOB {
+		w.writeCode(leCodes[endBlockMarker])
+	}
 }

 // huffOffset is a static offset encoder used for huffman only encoding.
@ -620,7 +655,7 @@ func init() {
 	w := newHuffmanBitWriter(nil)
 	w.offsetFreq[0] = 1
 	huffOffset = newHuffmanEncoder(offsetCodeCount)
-	huffOffset.generate(w.offsetFreq, 15)
+	huffOffset.generate(w.offsetFreq[:offsetCodeCount], 15)
 }

 // writeBlockHuff encodes a block of bytes as either
@ -644,7 +679,7 @@ func (w *huffmanBitWriter) writeBlockHuff(eof bool, input []byte) {
 	const numLiterals = endBlockMarker + 1
 	const numOffsets = 1

-	w.literalEncoding.generate(w.literalFreq, 15)
+	w.literalEncoding.generate(w.literalFreq[:maxNumLit], 15)

 	// Figure out smallest code.
 	// Always use dynamic Huffman or Store
@ -679,13 +714,12 @@ func (w *huffmanBitWriter) writeBlockHuff(eof bool, input []byte) {
 		bits := w.bits
 		w.bits >>= 48
 		w.nbits -= 48
-		bytes := w.bytes[n : n+6]
-		bytes[0] = byte(bits)
-		bytes[1] = byte(bits >> 8)
-		bytes[2] = byte(bits >> 16)
-		bytes[3] = byte(bits >> 24)
-		bytes[4] = byte(bits >> 32)
-		bytes[5] = byte(bits >> 40)
+		w.bytes[n] = byte(bits)
+		w.bytes[n+1] = byte(bits >> 8)
+		w.bytes[n+2] = byte(bits >> 16)
+		w.bytes[n+3] = byte(bits >> 24)
+		w.bytes[n+4] = byte(bits >> 32)
+		w.bytes[n+5] = byte(bits >> 40)
 		n += 6
 		if n < bufferFlushSize {
 			continue
--- a/vendor/github.com/klauspost/compress/flate/huffman_code.go
+++ b/vendor/github.com/klauspost/compress/flate/huffman_code.go
@ -6,6 +6,7 @@ package flate

 import (
 	"math"
+	"math/bits"
 	"sort"
 )

@ -56,7 +57,9 @@ func (h *hcode) set(code uint16, length uint16) {
 func maxNode() literalNode { return literalNode{math.MaxUint16, math.MaxInt32} }

 func newHuffmanEncoder(size int) *huffmanEncoder {
-	return &huffmanEncoder{codes: make([]hcode, size)}
+	// Make capacity to next power of two.
+	c := uint(bits.Len32(uint32(size - 1)))
+	return &huffmanEncoder{codes: make([]hcode, size, 1<<c)}
 }

 // Generates a HuffmanCode corresponding to the fixed literal table
--- a/vendor/github.com/klauspost/compress/flate/snappy.go
+++ b/vendor/github.com/klauspost/compress/flate/snappy.go
@ -20,12 +20,12 @@ func emitCopy(dst *tokens, offset, length int) {
 	dst.n++
 }

-type snappyEnc interface {
+type fastEnc interface {
 	Encode(dst *tokens, src []byte)
 	Reset()
 }

-func newSnappy(level int) snappyEnc {
+func newFastEnc(level int) fastEnc {
 	switch level {
 	case 1:
 		return &snappyL1{}
--- a/vendor/github.com/klauspost/compress/flate/token.go
+++ b/vendor/github.com/klauspost/compress/flate/token.go
@ -4,8 +4,6 @@

 package flate

-import "fmt"
-
 const (
 	// 2 bits:   type   0 = literal  1=EOF  2=Match   3=Unused
 	// 8 bits:   xlength = length - MIN_MATCH_LENGTH
@ -19,7 +17,7 @@ const (

 // The length code for length X (MIN_MATCH_LENGTH <= X <= MAX_MATCH_LENGTH)
 // is lengthCodes[length - MIN_MATCH_LENGTH]
-var lengthCodes = [...]uint32{
+var lengthCodes = [256]uint8{
 	0, 1, 2, 3, 4, 5, 6, 7, 8, 8,
 	9, 9, 10, 10, 11, 11, 12, 12, 12, 12,
 	13, 13, 13, 13, 14, 14, 14, 14, 15, 15,
@ -48,7 +46,7 @@ var lengthCodes = [...]uint32{
 	27, 27, 27, 27, 27, 28,
 }

-var offsetCodes = [...]uint32{
+var offsetCodes = [256]uint32{
 	0, 1, 2, 3, 4, 4, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7,
 	8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9,
 	10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
@ -82,34 +80,27 @@ func matchToken(xlength uint32, xoffset uint32) token {
 	return token(matchType + xlength<<lengthShift + xoffset)
 }

-func matchTokend(xlength uint32, xoffset uint32) token {
-	if xlength > maxMatchLength || xoffset > maxMatchOffset {
-		panic(fmt.Sprintf("Invalid match: len: %d, offset: %d\n", xlength, xoffset))
-		return token(matchType)
-	}
-	return token(matchType + xlength<<lengthShift + xoffset)
-}
-
 // Returns the type of a token
 func (t token) typ() uint32 { return uint32(t) & typeMask }

 // Returns the literal of a literal token
-func (t token) literal() uint32 { return uint32(t - literalType) }
+func (t token) literal() uint8 { return uint8(t) }

 // Returns the extra offset of a match token
 func (t token) offset() uint32 { return uint32(t) & offsetMask }

-func (t token) length() uint32 { return uint32((t - matchType) >> lengthShift) }
+func (t token) length() uint8 { return uint8(t >> lengthShift) }

-func lengthCode(len uint32) uint32 { return lengthCodes[len] }
+// The code is never more than 8 bits, but is returned as uint32 for convenience.
+func lengthCode(len uint8) uint32 { return uint32(lengthCodes[len]) }

 // Returns the offset code corresponding to a specific offset
 func offsetCode(off uint32) uint32 {
 	if off < uint32(len(offsetCodes)) {
-		return offsetCodes[off]
+		return offsetCodes[off&255]
 	} else if off>>7 < uint32(len(offsetCodes)) {
-		return offsetCodes[off>>7] + 14
+		return offsetCodes[(off>>7)&255] + 14
 	} else {
-		return offsetCodes[off>>14] + 28
+		return offsetCodes[(off>>14)&255] + 28
 	}
 }
--- a/vendor/github.com/klauspost/compress/fse/README.md
+++ b/vendor/github.com/klauspost/compress/fse/README.md
@ -0,0 +1,79 @@
+# Finite State Entropy
+
+This package provides Finite State Entropy encoding and decoding.
+            
+Finite State Entropy (also referenced as [tANS](https://en.wikipedia.org/wiki/Asymmetric_numeral_systems#tANS)) 
+encoding provides a fast near-optimal symbol encoding/decoding
+for byte blocks as implemented in [zstandard](https://github.com/facebook/zstd).
+
+This can be used for compressing input with a lot of similar input values to the smallest number of bytes.
+This does not perform any multi-byte [dictionary coding](https://en.wikipedia.org/wiki/Dictionary_coder) as LZ coders,
+but it can be used as a secondary step to compressors (like Snappy) that does not do entropy encoding. 
+
+* [Godoc documentation](https://godoc.org/github.com/klauspost/compress/fse)
+
+## News
+
+ * Feb 2018: First implementation released. Consider this beta software for now.
+
+# Usage
+
+This package provides a low level interface that allows to compress single independent blocks. 
+
+Each block is separate, and there is no built in integrity checks. 
+This means that the caller should keep track of block sizes and also do checksums if needed.  
+
+Compressing a block is done via the [`Compress`](https://godoc.org/github.com/klauspost/compress/fse#Compress) function.
+You must provide input and will receive the output and maybe an error.
+
+These error values can be returned:
+
+| Error               | Description                                                                 |
+|---------------------|-----------------------------------------------------------------------------|
+| `<nil>`             | Everything ok, output is returned                                           |
+| `ErrIncompressible` | Returned when input is judged to be too hard to compress                    |
+| `ErrUseRLE`         | Returned from the compressor when the input is a single byte value repeated |
+| `(error)`           | An internal error occurred.                                                 |
+
+As can be seen above there are errors that will be returned even under normal operation so it is important to handle these.
+
+To reduce allocations you can provide a [`Scratch`](https://godoc.org/github.com/klauspost/compress/fse#Scratch) object 
+that can be re-used for successive calls. Both compression and decompression accepts a `Scratch` object, and the same 
+object can be used for both.   
+
+Be aware, that when re-using a `Scratch` object that the *output* buffer is also re-used, so if you are still using this
+you must set the `Out` field in the scratch to nil. The same buffer is used for compression and decompression output.
+
+Decompressing is done by calling the [`Decompress`](https://godoc.org/github.com/klauspost/compress/fse#Decompress) function.
+You must provide the output from the compression stage, at exactly the size you got back. If you receive an error back
+your input was likely corrupted. 
+
+It is important to note that a successful decoding does *not* mean your output matches your original input. 
+There are no integrity checks, so relying on errors from the decompressor does not assure your data is valid.
+
+For more detailed usage, see examples in the [godoc documentation](https://godoc.org/github.com/klauspost/compress/fse#pkg-examples).
+
+# Performance
+
+A lot of factors are affecting speed. Block sizes and compressibility of the material are primary factors.  
+All compression functions are currently only running on the calling goroutine so only one core will be used per block.  
+
+The compressor is significantly faster if symbols are kept as small as possible. The highest byte value of the input
+is used to reduce some of the processing, so if all your input is above byte value 64 for instance, it may be 
+beneficial to transpose all your input values down by 64.   
+
+With moderate block sizes around 64k speed are typically 200MB/s per core for compression and 
+around 300MB/s decompression speed. 
+
+The same hardware typically does Huffman (deflate) encoding at 125MB/s and decompression at 100MB/s. 
+
+# Plans
+
+At one point, more internals will be exposed to facilitate more "expert" usage of the components. 
+
+A streaming interface is also likely to be implemented. Likely compatible with [FSE stream format](https://github.com/Cyan4973/FiniteStateEntropy/blob/dev/programs/fileio.c#L261).  
+
+# Contributing
+
+Contributions are always welcome. Be aware that adding public functions will require good justification and breaking 
+changes will likely not be accepted. If in doubt open an issue before writing the PR.  
--- a/vendor/github.com/klauspost/compress/fse/bitreader.go
+++ b/vendor/github.com/klauspost/compress/fse/bitreader.go
@ -0,0 +1,107 @@
+// Copyright 2018 Klaus Post. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+// Based on work Copyright (c) 2013, Yann Collet, released under BSD License.
+
+package fse
+
+import (
+	"errors"
+	"io"
+)
+
+// bitReader reads a bitstream in reverse.
+// The last set bit indicates the start of the stream and is used
+// for aligning the input.
+type bitReader struct {
+	in       []byte
+	off      uint // next byte to read is at in[off - 1]
+	value    uint64
+	bitsRead uint8
+}
+
+// init initializes and resets the bit reader.
+func (b *bitReader) init(in []byte) error {
+	if len(in) < 1 {
+		return errors.New("corrupt stream: too short")
+	}
+	b.in = in
+	b.off = uint(len(in))
+	// The highest bit of the last byte indicates where to start
+	v := in[len(in)-1]
+	if v == 0 {
+		return errors.New("corrupt stream, did not find end of stream")
+	}
+	b.bitsRead = 64
+	b.value = 0
+	b.fill()
+	b.fill()
+	b.bitsRead += 8 - uint8(highBits(uint32(v)))
+	return nil
+}
+
+// getBits will return n bits. n can be 0.
+func (b *bitReader) getBits(n uint8) uint16 {
+	if n == 0 || b.bitsRead >= 64 {
+		return 0
+	}
+	return b.getBitsFast(n)
+}
+
+// getBitsFast requires that at least one bit is requested every time.
+// There are no checks if the buffer is filled.
+func (b *bitReader) getBitsFast(n uint8) uint16 {
+	const regMask = 64 - 1
+	v := uint16((b.value << (b.bitsRead & regMask)) >> ((regMask + 1 - n) & regMask))
+	b.bitsRead += n
+	return v
+}
+
+// fillFast() will make sure at least 32 bits are available.
+// There must be at least 4 bytes available.
+func (b *bitReader) fillFast() {
+	if b.bitsRead < 32 {
+		return
+	}
+	// Do single re-slice to avoid bounds checks.
+	v := b.in[b.off-4 : b.off]
+	low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
+	b.value = (b.value << 32) | uint64(low)
+	b.bitsRead -= 32
+	b.off -= 4
+}
+
+// fill() will make sure at least 32 bits are available.
+func (b *bitReader) fill() {
+	if b.bitsRead < 32 {
+		return
+	}
+	if b.off > 4 {
+		v := b.in[b.off-4 : b.off]
+		low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
+		b.value = (b.value << 32) | uint64(low)
+		b.bitsRead -= 32
+		b.off -= 4
+		return
+	}
+	for b.off > 0 {
+		b.value = (b.value << 8) | uint64(b.in[b.off-1])
+		b.bitsRead -= 8
+		b.off--
+	}
+}
+
+// finished returns true if all bits have been read from the bit stream.
+func (b *bitReader) finished() bool {
+	return b.off == 0 && b.bitsRead >= 64
+}
+
+// close the bitstream and returns an error if out-of-buffer reads occurred.
+func (b *bitReader) close() error {
+	// Release reference.
+	b.in = nil
+	if b.bitsRead > 64 {
+		return io.ErrUnexpectedEOF
+	}
+	return nil
+}
--- a/vendor/github.com/klauspost/compress/fse/bitwriter.go
+++ b/vendor/github.com/klauspost/compress/fse/bitwriter.go
@ -0,0 +1,168 @@
+// Copyright 2018 Klaus Post. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+// Based on work Copyright (c) 2013, Yann Collet, released under BSD License.
+
+package fse
+
+import "fmt"
+
+// bitWriter will write bits.
+// First bit will be LSB of the first byte of output.
+type bitWriter struct {
+	bitContainer uint64
+	nBits        uint8
+	out          []byte
+}
+
+// bitMask16 is bitmasks. Has extra to avoid bounds check.
+var bitMask16 = [32]uint16{
+	0, 1, 3, 7, 0xF, 0x1F,
+	0x3F, 0x7F, 0xFF, 0x1FF, 0x3FF, 0x7FF,
+	0xFFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF, 0xFFFF,
+	0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF,
+	0xFFFF, 0xFFFF} /* up to 16 bits */
+
+// addBits16NC will add up to 16 bits.
+// It will not check if there is space for them,
+// so the caller must ensure that it has flushed recently.
+func (b *bitWriter) addBits16NC(value uint16, bits uint8) {
+	b.bitContainer |= uint64(value&bitMask16[bits&31]) << (b.nBits & 63)
+	b.nBits += bits
+}
+
+// addBits16Clean will add up to 16 bits. value may not contain more set bits than indicated.
+// It will not check if there is space for them, so the caller must ensure that it has flushed recently.
+func (b *bitWriter) addBits16Clean(value uint16, bits uint8) {
+	b.bitContainer |= uint64(value) << (b.nBits & 63)
+	b.nBits += bits
+}
+
+// addBits16ZeroNC will add up to 16 bits.
+// It will not check if there is space for them,
+// so the caller must ensure that it has flushed recently.
+// This is fastest if bits can be zero.
+func (b *bitWriter) addBits16ZeroNC(value uint16, bits uint8) {
+	if bits == 0 {
+		return
+	}
+	value <<= (16 - bits) & 15
+	value >>= (16 - bits) & 15
+	b.bitContainer |= uint64(value) << (b.nBits & 63)
+	b.nBits += bits
+}
+
+// flush will flush all pending full bytes.
+// There will be at least 56 bits available for writing when this has been called.
+// Using flush32 is faster, but leaves less space for writing.
+func (b *bitWriter) flush() {
+	v := b.nBits >> 3
+	switch v {
+	case 0:
+	case 1:
+		b.out = append(b.out,
+			byte(b.bitContainer),
+		)
+	case 2:
+		b.out = append(b.out,
+			byte(b.bitContainer),
+			byte(b.bitContainer>>8),
+		)
+	case 3:
+		b.out = append(b.out,
+			byte(b.bitContainer),
+			byte(b.bitContainer>>8),
+			byte(b.bitContainer>>16),
+		)
+	case 4:
+		b.out = append(b.out,
+			byte(b.bitContainer),
+			byte(b.bitContainer>>8),
+			byte(b.bitContainer>>16),
+			byte(b.bitContainer>>24),
+		)
+	case 5:
+		b.out = append(b.out,
+			byte(b.bitContainer),
+			byte(b.bitContainer>>8),
+			byte(b.bitContainer>>16),
+			byte(b.bitContainer>>24),
+			byte(b.bitContainer>>32),
+		)
+	case 6:
+		b.out = append(b.out,
+			byte(b.bitContainer),
+			byte(b.bitContainer>>8),
+			byte(b.bitContainer>>16),
+			byte(b.bitContainer>>24),
+			byte(b.bitContainer>>32),
+			byte(b.bitContainer>>40),
+		)
+	case 7:
+		b.out = append(b.out,
+			byte(b.bitContainer),
+			byte(b.bitContainer>>8),
+			byte(b.bitContainer>>16),
+			byte(b.bitContainer>>24),
+			byte(b.bitContainer>>32),
+			byte(b.bitContainer>>40),
+			byte(b.bitContainer>>48),
+		)
+	case 8:
+		b.out = append(b.out,
+			byte(b.bitContainer),
+			byte(b.bitContainer>>8),
+			byte(b.bitContainer>>16),
+			byte(b.bitContainer>>24),
+			byte(b.bitContainer>>32),
+			byte(b.bitContainer>>40),
+			byte(b.bitContainer>>48),
+			byte(b.bitContainer>>56),
+		)
+	default:
+		panic(fmt.Errorf("bits (%d) > 64", b.nBits))
+	}
+	b.bitContainer >>= v << 3
+	b.nBits &= 7
+}
+
+// flush32 will flush out, so there are at least 32 bits available for writing.
+func (b *bitWriter) flush32() {
+	if b.nBits < 32 {
+		return
+	}
+	b.out = append(b.out,
+		byte(b.bitContainer),
+		byte(b.bitContainer>>8),
+		byte(b.bitContainer>>16),
+		byte(b.bitContainer>>24))
+	b.nBits -= 32
+	b.bitContainer >>= 32
+}
+
+// flushAlign will flush remaining full bytes and align to next byte boundary.
+func (b *bitWriter) flushAlign() {
+	nbBytes := (b.nBits + 7) >> 3
+	for i := uint8(0); i < nbBytes; i++ {
+		b.out = append(b.out, byte(b.bitContainer>>(i*8)))
+	}
+	b.nBits = 0
+	b.bitContainer = 0
+}
+
+// close will write the alignment bit and write the final byte(s)
+// to the output.
+func (b *bitWriter) close() error {
+	// End mark
+	b.addBits16Clean(1, 1)
+	// flush until next byte.
+	b.flushAlign()
+	return nil
+}
+
+// reset and continue writing by appending to out.
+func (b *bitWriter) reset(out []byte) {
+	b.bitContainer = 0
+	b.nBits = 0
+	b.out = out
+}
--- a/vendor/github.com/klauspost/compress/fse/bytereader.go
+++ b/vendor/github.com/klauspost/compress/fse/bytereader.go
@ -0,0 +1,56 @@
+// Copyright 2018 Klaus Post. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+// Based on work Copyright (c) 2013, Yann Collet, released under BSD License.
+
+package fse
+
+// byteReader provides a byte reader that reads
+// little endian values from a byte stream.
+// The input stream is manually advanced.
+// The reader performs no bounds checks.
+type byteReader struct {
+	b   []byte
+	off int
+}
+
+// init will initialize the reader and set the input.
+func (b *byteReader) init(in []byte) {
+	b.b = in
+	b.off = 0
+}
+
+// advance the stream b n bytes.
+func (b *byteReader) advance(n uint) {
+	b.off += int(n)
+}
+
+// Int32 returns a little endian int32 starting at current offset.
+func (b byteReader) Int32() int32 {
+	b2 := b.b[b.off : b.off+4 : b.off+4]
+	v3 := int32(b2[3])
+	v2 := int32(b2[2])
+	v1 := int32(b2[1])
+	v0 := int32(b2[0])
+	return v0 | (v1 << 8) | (v2 << 16) | (v3 << 24)
+}
+
+// Uint32 returns a little endian uint32 starting at current offset.
+func (b byteReader) Uint32() uint32 {
+	b2 := b.b[b.off : b.off+4 : b.off+4]
+	v3 := uint32(b2[3])
+	v2 := uint32(b2[2])
+	v1 := uint32(b2[1])
+	v0 := uint32(b2[0])
+	return v0 | (v1 << 8) | (v2 << 16) | (v3 << 24)
+}
+
+// unread returns the unread portion of the input.
+func (b byteReader) unread() []byte {
+	return b.b[b.off:]
+}
+
+// remain will return the number of bytes remaining.
+func (b byteReader) remain() int {
+	return len(b.b) - b.off
+}
--- a/vendor/github.com/klauspost/compress/fse/compress.go
+++ b/vendor/github.com/klauspost/compress/fse/compress.go
@ -0,0 +1,684 @@
+// Copyright 2018 Klaus Post. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+// Based on work Copyright (c) 2013, Yann Collet, released under BSD License.
+
+package fse
+
+import (
+	"errors"
+	"fmt"
+)
+
+// Compress the input bytes. Input must be < 2GB.
+// Provide a Scratch buffer to avoid memory allocations.
+// Note that the output is also kept in the scratch buffer.
+// If input is too hard to compress, ErrIncompressible is returned.
+// If input is a single byte value repeated ErrUseRLE is returned.
+func Compress(in []byte, s *Scratch) ([]byte, error) {
+	if len(in) <= 1 {
+		return nil, ErrIncompressible
+	}
+	if len(in) > (2<<30)-1 {
+		return nil, errors.New("input too big, must be < 2GB")
+	}
+	s, err := s.prepare(in)
+	if err != nil {
+		return nil, err
+	}
+
+	// Create histogram, if none was provided.
+	maxCount := s.maxCount
+	if maxCount == 0 {
+		maxCount = s.countSimple(in)
+	}
+	// Reset for next run.
+	s.clearCount = true
+	s.maxCount = 0
+	if maxCount == len(in) {
+		// One symbol, use RLE
+		return nil, ErrUseRLE
+	}
+	if maxCount == 1 || maxCount < (len(in)>>7) {
+		// Each symbol present maximum once or too well distributed.
+		return nil, ErrIncompressible
+	}
+	s.optimalTableLog()
+	err = s.normalizeCount()
+	if err != nil {
+		return nil, err
+	}
+	err = s.writeCount()
+	if err != nil {
+		return nil, err
+	}
+
+	if false {
+		err = s.validateNorm()
+		if err != nil {
+			return nil, err
+		}
+	}
+
+	err = s.buildCTable()
+	if err != nil {
+		return nil, err
+	}
+	err = s.compress(in)
+	if err != nil {
+		return nil, err
+	}
+	s.Out = s.bw.out
+	// Check if we compressed.
+	if len(s.Out) >= len(in) {
+		return nil, ErrIncompressible
+	}
+	return s.Out, nil
+}
+
+// cState contains the compression state of a stream.
+type cState struct {
+	bw         *bitWriter
+	stateTable []uint16
+	state      uint16
+}
+
+// init will initialize the compression state to the first symbol of the stream.
+func (c *cState) init(bw *bitWriter, ct *cTable, tableLog uint8, first symbolTransform) {
+	c.bw = bw
+	c.stateTable = ct.stateTable
+
+	nbBitsOut := (first.deltaNbBits + (1 << 15)) >> 16
+	im := int32((nbBitsOut << 16) - first.deltaNbBits)
+	lu := (im >> nbBitsOut) + first.deltaFindState
+	c.state = c.stateTable[lu]
+	return
+}
+
+// encode the output symbol provided and write it to the bitstream.
+func (c *cState) encode(symbolTT symbolTransform) {
+	nbBitsOut := (uint32(c.state) + symbolTT.deltaNbBits) >> 16
+	dstState := int32(c.state>>(nbBitsOut&15)) + symbolTT.deltaFindState
+	c.bw.addBits16NC(c.state, uint8(nbBitsOut))
+	c.state = c.stateTable[dstState]
+}
+
+// encode the output symbol provided and write it to the bitstream.
+func (c *cState) encodeZero(symbolTT symbolTransform) {
+	nbBitsOut := (uint32(c.state) + symbolTT.deltaNbBits) >> 16
+	dstState := int32(c.state>>(nbBitsOut&15)) + symbolTT.deltaFindState
+	c.bw.addBits16ZeroNC(c.state, uint8(nbBitsOut))
+	c.state = c.stateTable[dstState]
+}
+
+// flush will write the tablelog to the output and flush the remaining full bytes.
+func (c *cState) flush(tableLog uint8) {
+	c.bw.flush32()
+	c.bw.addBits16NC(c.state, tableLog)
+	c.bw.flush()
+}
+
+// compress is the main compression loop that will encode the input from the last byte to the first.
+func (s *Scratch) compress(src []byte) error {
+	if len(src) <= 2 {
+		return errors.New("compress: src too small")
+	}
+	tt := s.ct.symbolTT[:256]
+	s.bw.reset(s.Out)
+
+	// Our two states each encodes every second byte.
+	// Last byte encoded (first byte decoded) will always be encoded by c1.
+	var c1, c2 cState
+
+	// Encode so remaining size is divisible by 4.
+	ip := len(src)
+	if ip&1 == 1 {
+		c1.init(&s.bw, &s.ct, s.actualTableLog, tt[src[ip-1]])
+		c2.init(&s.bw, &s.ct, s.actualTableLog, tt[src[ip-2]])
+		c1.encodeZero(tt[src[ip-3]])
+		ip -= 3
+	} else {
+		c2.init(&s.bw, &s.ct, s.actualTableLog, tt[src[ip-1]])
+		c1.init(&s.bw, &s.ct, s.actualTableLog, tt[src[ip-2]])
+		ip -= 2
+	}
+	if ip&2 != 0 {
+		c2.encodeZero(tt[src[ip-1]])
+		c1.encodeZero(tt[src[ip-2]])
+		ip -= 2
+	}
+
+	// Main compression loop.
+	switch {
+	case !s.zeroBits && s.actualTableLog <= 8:
+		// We can encode 4 symbols without requiring a flush.
+		// We do not need to check if any output is 0 bits.
+		for ip >= 4 {
+			s.bw.flush32()
+			v3, v2, v1, v0 := src[ip-4], src[ip-3], src[ip-2], src[ip-1]
+			c2.encode(tt[v0])
+			c1.encode(tt[v1])
+			c2.encode(tt[v2])
+			c1.encode(tt[v3])
+			ip -= 4
+		}
+	case !s.zeroBits:
+		// We do not need to check if any output is 0 bits.
+		for ip >= 4 {
+			s.bw.flush32()
+			v3, v2, v1, v0 := src[ip-4], src[ip-3], src[ip-2], src[ip-1]
+			c2.encode(tt[v0])
+			c1.encode(tt[v1])
+			s.bw.flush32()
+			c2.encode(tt[v2])
+			c1.encode(tt[v3])
+			ip -= 4
+		}
+	case s.actualTableLog <= 8:
+		// We can encode 4 symbols without requiring a flush
+		for ip >= 4 {
+			s.bw.flush32()
+			v3, v2, v1, v0 := src[ip-4], src[ip-3], src[ip-2], src[ip-1]
+			c2.encodeZero(tt[v0])
+			c1.encodeZero(tt[v1])
+			c2.encodeZero(tt[v2])
+			c1.encodeZero(tt[v3])
+			ip -= 4
+		}
+	default:
+		for ip >= 4 {
+			s.bw.flush32()
+			v3, v2, v1, v0 := src[ip-4], src[ip-3], src[ip-2], src[ip-1]
+			c2.encodeZero(tt[v0])
+			c1.encodeZero(tt[v1])
+			s.bw.flush32()
+			c2.encodeZero(tt[v2])
+			c1.encodeZero(tt[v3])
+			ip -= 4
+		}
+	}
+
+	// Flush final state.
+	// Used to initialize state when decoding.
+	c2.flush(s.actualTableLog)
+	c1.flush(s.actualTableLog)
+
+	return s.bw.close()
+}
+
+// writeCount will write the normalized histogram count to header.
+// This is read back by readNCount.
+func (s *Scratch) writeCount() error {
+	var (
+		tableLog  = s.actualTableLog
+		tableSize = 1 << tableLog
+		previous0 bool
+		charnum   uint16
+
+		maxHeaderSize = ((int(s.symbolLen) * int(tableLog)) >> 3) + 3
+
+		// Write Table Size
+		bitStream = uint32(tableLog - minTablelog)
+		bitCount  = uint(4)
+		remaining = int16(tableSize + 1) /* +1 for extra accuracy */
+		threshold = int16(tableSize)
+		nbBits    = uint(tableLog + 1)
+	)
+	if cap(s.Out) < maxHeaderSize {
+		s.Out = make([]byte, 0, s.br.remain()+maxHeaderSize)
+	}
+	outP := uint(0)
+	out := s.Out[:maxHeaderSize]
+
+	// stops at 1
+	for remaining > 1 {
+		if previous0 {
+			start := charnum
+			for s.norm[charnum] == 0 {
+				charnum++
+			}
+			for charnum >= start+24 {
+				start += 24
+				bitStream += uint32(0xFFFF) << bitCount
+				out[outP] = byte(bitStream)
+				out[outP+1] = byte(bitStream >> 8)
+				outP += 2
+				bitStream >>= 16
+			}
+			for charnum >= start+3 {
+				start += 3
+				bitStream += 3 << bitCount
+				bitCount += 2
+			}
+			bitStream += uint32(charnum-start) << bitCount
+			bitCount += 2
+			if bitCount > 16 {
+				out[outP] = byte(bitStream)
+				out[outP+1] = byte(bitStream >> 8)
+				outP += 2
+				bitStream >>= 16
+				bitCount -= 16
+			}
+		}
+
+		count := s.norm[charnum]
+		charnum++
+		max := (2*threshold - 1) - remaining
+		if count < 0 {
+			remaining += count
+		} else {
+			remaining -= count
+		}
+		count++ // +1 for extra accuracy
+		if count >= threshold {
+			count += max // [0..max[ [max..threshold[ (...) [threshold+max 2*threshold[
+		}
+		bitStream += uint32(count) << bitCount
+		bitCount += nbBits
+		if count < max {
+			bitCount--
+		}
+
+		previous0 = count == 1
+		if remaining < 1 {
+			return errors.New("internal error: remaining<1")
+		}
+		for remaining < threshold {
+			nbBits--
+			threshold >>= 1
+		}
+
+		if bitCount > 16 {
+			out[outP] = byte(bitStream)
+			out[outP+1] = byte(bitStream >> 8)
+			outP += 2
+			bitStream >>= 16
+			bitCount -= 16
+		}
+	}
+
+	out[outP] = byte(bitStream)
+	out[outP+1] = byte(bitStream >> 8)
+	outP += (bitCount + 7) / 8
+
+	if uint16(charnum) > s.symbolLen {
+		return errors.New("internal error: charnum > s.symbolLen")
+	}
+	s.Out = out[:outP]
+	return nil
+}
+
+// symbolTransform contains the state transform for a symbol.
+type symbolTransform struct {
+	deltaFindState int32
+	deltaNbBits    uint32
+}
+
+// String prints values as a human readable string.
+func (s symbolTransform) String() string {
+	return fmt.Sprintf("dnbits: %08x, fs:%d", s.deltaNbBits, s.deltaFindState)
+}
+
+// cTable contains tables used for compression.
+type cTable struct {
+	tableSymbol []byte
+	stateTable  []uint16
+	symbolTT    []symbolTransform
+}
+
+// allocCtable will allocate tables needed for compression.
+// If existing tables a re big enough, they are simply re-used.
+func (s *Scratch) allocCtable() {
+	tableSize := 1 << s.actualTableLog
+	// get tableSymbol that is big enough.
+	if cap(s.ct.tableSymbol) < int(tableSize) {
+		s.ct.tableSymbol = make([]byte, tableSize)
+	}
+	s.ct.tableSymbol = s.ct.tableSymbol[:tableSize]
+
+	ctSize := tableSize
+	if cap(s.ct.stateTable) < ctSize {
+		s.ct.stateTable = make([]uint16, ctSize)
+	}
+	s.ct.stateTable = s.ct.stateTable[:ctSize]
+
+	if cap(s.ct.symbolTT) < 256 {
+		s.ct.symbolTT = make([]symbolTransform, 256)
+	}
+	s.ct.symbolTT = s.ct.symbolTT[:256]
+}
+
+// buildCTable will populate the compression table so it is ready to be used.
+func (s *Scratch) buildCTable() error {
+	tableSize := uint32(1 << s.actualTableLog)
+	highThreshold := tableSize - 1
+	var cumul [maxSymbolValue + 2]int16
+
+	s.allocCtable()
+	tableSymbol := s.ct.tableSymbol[:tableSize]
+	// symbol start positions
+	{
+		cumul[0] = 0
+		for ui, v := range s.norm[:s.symbolLen-1] {
+			u := byte(ui) // one less than reference
+			if v == -1 {
+				// Low proba symbol
+				cumul[u+1] = cumul[u] + 1
+				tableSymbol[highThreshold] = u
+				highThreshold--
+			} else {
+				cumul[u+1] = cumul[u] + v
+			}
+		}
+		// Encode last symbol separately to avoid overflowing u
+		u := int(s.symbolLen - 1)
+		v := s.norm[s.symbolLen-1]
+		if v == -1 {
+			// Low proba symbol
+			cumul[u+1] = cumul[u] + 1
+			tableSymbol[highThreshold] = byte(u)
+			highThreshold--
+		} else {
+			cumul[u+1] = cumul[u] + v
+		}
+		if uint32(cumul[s.symbolLen]) != tableSize {
+			return fmt.Errorf("internal error: expected cumul[s.symbolLen] (%d) == tableSize (%d)", cumul[s.symbolLen], tableSize)
+		}
+		cumul[s.symbolLen] = int16(tableSize) + 1
+	}
+	// Spread symbols
+	s.zeroBits = false
+	{
+		step := tableStep(tableSize)
+		tableMask := tableSize - 1
+		var position uint32
+		// if any symbol > largeLimit, we may have 0 bits output.
+		largeLimit := int16(1 << (s.actualTableLog - 1))
+		for ui, v := range s.norm[:s.symbolLen] {
+			symbol := byte(ui)
+			if v > largeLimit {
+				s.zeroBits = true
+			}
+			for nbOccurrences := int16(0); nbOccurrences < v; nbOccurrences++ {
+				tableSymbol[position] = symbol
+				position = (position + step) & tableMask
+				for position > highThreshold {
+					position = (position + step) & tableMask
+				} /* Low proba area */
+			}
+		}
+
+		// Check if we have gone through all positions
+		if position != 0 {
+			return errors.New("position!=0")
+		}
+	}
+
+	// Build table
+	table := s.ct.stateTable
+	{
+		tsi := int(tableSize)
+		for u, v := range tableSymbol {
+			// TableU16 : sorted by symbol order; gives next state value
+			table[cumul[v]] = uint16(tsi + u)
+			cumul[v]++
+		}
+	}
+
+	// Build Symbol Transformation Table
+	{
+		total := int16(0)
+		symbolTT := s.ct.symbolTT[:s.symbolLen]
+		tableLog := s.actualTableLog
+		tl := (uint32(tableLog) << 16) - (1 << tableLog)
+		for i, v := range s.norm[:s.symbolLen] {
+			switch v {
+			case 0:
+			case -1, 1:
+				symbolTT[i].deltaNbBits = tl
+				symbolTT[i].deltaFindState = int32(total - 1)
+				total++
+			default:
+				maxBitsOut := uint32(tableLog) - highBits(uint32(v-1))
+				minStatePlus := uint32(v) << maxBitsOut
+				symbolTT[i].deltaNbBits = (maxBitsOut << 16) - minStatePlus
+				symbolTT[i].deltaFindState = int32(total - v)
+				total += v
+			}
+		}
+		if total != int16(tableSize) {
+			return fmt.Errorf("total mismatch %d (got) != %d (want)", total, tableSize)
+		}
+	}
+	return nil
+}
+
+// countSimple will create a simple histogram in s.count.
+// Returns the biggest count.
+// Does not update s.clearCount.
+func (s *Scratch) countSimple(in []byte) (max int) {
+	for _, v := range in {
+		s.count[v]++
+	}
+	m := uint32(0)
+	for i, v := range s.count[:] {
+		if v > m {
+			m = v
+		}
+		if v > 0 {
+			s.symbolLen = uint16(i) + 1
+		}
+	}
+	return int(m)
+}
+
+// minTableLog provides the minimum logSize to safely represent a distribution.
+func (s *Scratch) minTableLog() uint8 {
+	minBitsSrc := highBits(uint32(s.br.remain()-1)) + 1
+	minBitsSymbols := highBits(uint32(s.symbolLen-1)) + 2
+	if minBitsSrc < minBitsSymbols {
+		return uint8(minBitsSrc)
+	}
+	return uint8(minBitsSymbols)
+}
+
+// optimalTableLog calculates and sets the optimal tableLog in s.actualTableLog
+func (s *Scratch) optimalTableLog() {
+	tableLog := s.TableLog
+	minBits := s.minTableLog()
+	maxBitsSrc := uint8(highBits(uint32(s.br.remain()-1))) - 2
+	if maxBitsSrc < tableLog {
+		// Accuracy can be reduced
+		tableLog = maxBitsSrc
+	}
+	if minBits > tableLog {
+		tableLog = minBits
+	}
+	// Need a minimum to safely represent all symbol values
+	if tableLog < minTablelog {
+		tableLog = minTablelog
+	}
+	if tableLog > maxTableLog {
+		tableLog = maxTableLog
+	}
+	s.actualTableLog = tableLog
+}
+
+var rtbTable = [...]uint32{0, 473195, 504333, 520860, 550000, 700000, 750000, 830000}
+
+// normalizeCount will normalize the count of the symbols so
+// the total is equal to the table size.
+func (s *Scratch) normalizeCount() error {
+	var (
+		tableLog          = s.actualTableLog
+		scale             = 62 - uint64(tableLog)
+		step              = (1 << 62) / uint64(s.br.remain())
+		vStep             = uint64(1) << (scale - 20)
+		stillToDistribute = int16(1 << tableLog)
+		largest           int
+		largestP          int16
+		lowThreshold      = (uint32)(s.br.remain() >> tableLog)
+	)
+
+	for i, cnt := range s.count[:s.symbolLen] {
+		// already handled
+		// if (count[s] == s.length) return 0;   /* rle special case */
+
+		if cnt == 0 {
+			s.norm[i] = 0
+			continue
+		}
+		if cnt <= lowThreshold {
+			s.norm[i] = -1
+			stillToDistribute--
+		} else {
+			proba := (int16)((uint64(cnt) * step) >> scale)
+			if proba < 8 {
+				restToBeat := vStep * uint64(rtbTable[proba])
+				v := uint64(cnt)*step - (uint64(proba) << scale)
+				if v > restToBeat {
+					proba++
+				}
+			}
+			if proba > largestP {
+				largestP = proba
+				largest = i
+			}
+			s.norm[i] = proba
+			stillToDistribute -= proba
+		}
+	}
+
+	if -stillToDistribute >= (s.norm[largest] >> 1) {
+		// corner case, need another normalization method
+		return s.normalizeCount2()
+	}
+	s.norm[largest] += stillToDistribute
+	return nil
+}
+
+// Secondary normalization method.
+// To be used when primary method fails.
+func (s *Scratch) normalizeCount2() error {
+	const notYetAssigned = -2
+	var (
+		distributed  uint32
+		total        = uint32(s.br.remain())
+		tableLog     = s.actualTableLog
+		lowThreshold = uint32(total >> tableLog)
+		lowOne       = uint32((total * 3) >> (tableLog + 1))
+	)
+	for i, cnt := range s.count[:s.symbolLen] {
+		if cnt == 0 {
+			s.norm[i] = 0
+			continue
+		}
+		if cnt <= lowThreshold {
+			s.norm[i] = -1
+			distributed++
+			total -= cnt
+			continue
+		}
+		if cnt <= lowOne {
+			s.norm[i] = 1
+			distributed++
+			total -= cnt
+			continue
+		}
+		s.norm[i] = notYetAssigned
+	}
+	toDistribute := (1 << tableLog) - distributed
+
+	if (total / toDistribute) > lowOne {
+		// risk of rounding to zero
+		lowOne = uint32((total * 3) / (toDistribute * 2))
+		for i, cnt := range s.count[:s.symbolLen] {
+			if (s.norm[i] == notYetAssigned) && (cnt <= lowOne) {
+				s.norm[i] = 1
+				distributed++
+				total -= cnt
+				continue
+			}
+		}
+		toDistribute = (1 << tableLog) - distributed
+	}
+	if distributed == uint32(s.symbolLen)+1 {
+		// all values are pretty poor;
+		//   probably incompressible data (should have already been detected);
+		//   find max, then give all remaining points to max
+		var maxV int
+		var maxC uint32
+		for i, cnt := range s.count[:s.symbolLen] {
+			if cnt > maxC {
+				maxV = i
+				maxC = cnt
+			}
+		}
+		s.norm[maxV] += int16(toDistribute)
+		return nil
+	}
+
+	if total == 0 {
+		// all of the symbols were low enough for the lowOne or lowThreshold
+		for i := uint32(0); toDistribute > 0; i = (i + 1) % (uint32(s.symbolLen)) {
+			if s.norm[i] > 0 {
+				toDistribute--
+				s.norm[i]++
+			}
+		}
+		return nil
+	}
+
+	var (
+		vStepLog = 62 - uint64(tableLog)
+		mid      = uint64((1 << (vStepLog - 1)) - 1)
+		rStep    = (((1 << vStepLog) * uint64(toDistribute)) + mid) / uint64(total) // scale on remaining
+		tmpTotal = mid
+	)
+	for i, cnt := range s.count[:s.symbolLen] {
+		if s.norm[i] == notYetAssigned {
+			var (
+				end    = tmpTotal + uint64(cnt)*rStep
+				sStart = uint32(tmpTotal >> vStepLog)
+				sEnd   = uint32(end >> vStepLog)
+				weight = sEnd - sStart
+			)
+			if weight < 1 {
+				return errors.New("weight < 1")
+			}
+			s.norm[i] = int16(weight)
+			tmpTotal = end
+		}
+	}
+	return nil
+}
+
+// validateNorm validates the normalized histogram table.
+func (s *Scratch) validateNorm() (err error) {
+	var total int
+	for _, v := range s.norm[:s.symbolLen] {
+		if v >= 0 {
+			total += int(v)
+		} else {
+			total -= int(v)
+		}
+	}
+	defer func() {
+		if err == nil {
+			return
+		}
+		fmt.Printf("selected TableLog: %d, Symbol length: %d\n", s.actualTableLog, s.symbolLen)
+		for i, v := range s.norm[:s.symbolLen] {
+			fmt.Printf("%3d: %5d -> %4d \n", i, s.count[i], v)
+		}
+	}()
+	if total != (1 << s.actualTableLog) {
+		return fmt.Errorf("warning: Total == %d != %d", total, 1<<s.actualTableLog)
+	}
+	for i, v := range s.count[s.symbolLen:] {
+		if v != 0 {
+			return fmt.Errorf("warning: Found symbol out of range, %d after cut", i)
+		}
+	}
+	return nil
+}
--- a/vendor/github.com/klauspost/compress/fse/decompress.go
+++ b/vendor/github.com/klauspost/compress/fse/decompress.go
@ -0,0 +1,370 @@
+package fse
+
+import (
+	"errors"
+	"fmt"
+)
+
+const (
+	tablelogAbsoluteMax = 15
+)
+
+// Decompress a block of data.
+// You can provide a scratch buffer to avoid allocations.
+// If nil is provided a temporary one will be allocated.
+// It is possible, but by no way guaranteed that corrupt data will
+// return an error.
+// It is up to the caller to verify integrity of the returned data.
+// Use a predefined Scrach to set maximum acceptable output size.
+func Decompress(b []byte, s *Scratch) ([]byte, error) {
+	s, err := s.prepare(b)
+	if err != nil {
+		return nil, err
+	}
+	s.Out = s.Out[:0]
+	err = s.readNCount()
+	if err != nil {
+		return nil, err
+	}
+	err = s.buildDtable()
+	if err != nil {
+		return nil, err
+	}
+	err = s.decompress()
+	if err != nil {
+		return nil, err
+	}
+
+	return s.Out, nil
+}
+
+// readNCount will read the symbol distribution so decoding tables can be constructed.
+func (s *Scratch) readNCount() error {
+	var (
+		charnum   uint16
+		previous0 bool
+		b         = &s.br
+	)
+	iend := b.remain()
+	if iend < 4 {
+		return errors.New("input too small")
+	}
+	bitStream := b.Uint32()
+	nbBits := uint((bitStream & 0xF) + minTablelog) // extract tableLog
+	if nbBits > tablelogAbsoluteMax {
+		return errors.New("tableLog too large")
+	}
+	bitStream >>= 4
+	bitCount := uint(4)
+
+	s.actualTableLog = uint8(nbBits)
+	remaining := int32((1 << nbBits) + 1)
+	threshold := int32(1 << nbBits)
+	gotTotal := int32(0)
+	nbBits++
+
+	for remaining > 1 {
+		if previous0 {
+			n0 := charnum
+			for (bitStream & 0xFFFF) == 0xFFFF {
+				n0 += 24
+				if b.off < iend-5 {
+					b.advance(2)
+					bitStream = b.Uint32() >> bitCount
+				} else {
+					bitStream >>= 16
+					bitCount += 16
+				}
+			}
+			for (bitStream & 3) == 3 {
+				n0 += 3
+				bitStream >>= 2
+				bitCount += 2
+			}
+			n0 += uint16(bitStream & 3)
+			bitCount += 2
+			if n0 > maxSymbolValue {
+				return errors.New("maxSymbolValue too small")
+			}
+			for charnum < n0 {
+				s.norm[charnum&0xff] = 0
+				charnum++
+			}
+
+			if b.off <= iend-7 || b.off+int(bitCount>>3) <= iend-4 {
+				b.advance(bitCount >> 3)
+				bitCount &= 7
+				bitStream = b.Uint32() >> bitCount
+			} else {
+				bitStream >>= 2
+			}
+		}
+
+		max := (2*(threshold) - 1) - (remaining)
+		var count int32
+
+		if (int32(bitStream) & (threshold - 1)) < max {
+			count = int32(bitStream) & (threshold - 1)
+			bitCount += nbBits - 1
+		} else {
+			count = int32(bitStream) & (2*threshold - 1)
+			if count >= threshold {
+				count -= max
+			}
+			bitCount += nbBits
+		}
+
+		count-- // extra accuracy
+		if count < 0 {
+			// -1 means +1
+			remaining += count
+			gotTotal -= count
+		} else {
+			remaining -= count
+			gotTotal += count
+		}
+		s.norm[charnum&0xff] = int16(count)
+		charnum++
+		previous0 = count == 0
+		for remaining < threshold {
+			nbBits--
+			threshold >>= 1
+		}
+		if b.off <= iend-7 || b.off+int(bitCount>>3) <= iend-4 {
+			b.advance(bitCount >> 3)
+			bitCount &= 7
+		} else {
+			bitCount -= (uint)(8 * (len(b.b) - 4 - b.off))
+			b.off = len(b.b) - 4
+		}
+		bitStream = b.Uint32() >> (bitCount & 31)
+	}
+	s.symbolLen = charnum
+
+	if s.symbolLen <= 1 {
+		return fmt.Errorf("symbolLen (%d) too small", s.symbolLen)
+	}
+	if s.symbolLen > maxSymbolValue+1 {
+		return fmt.Errorf("symbolLen (%d) too big", s.symbolLen)
+	}
+	if remaining != 1 {
+		return fmt.Errorf("corruption detected (remaining %d != 1)", remaining)
+	}
+	if bitCount > 32 {
+		return fmt.Errorf("corruption detected (bitCount %d > 32)", bitCount)
+	}
+	if gotTotal != 1<<s.actualTableLog {
+		return fmt.Errorf("corruption detected (total %d != %d)", gotTotal, 1<<s.actualTableLog)
+	}
+	b.advance((bitCount + 7) >> 3)
+	return nil
+}
+
+// decSymbol contains information about a state entry,
+// Including the state offset base, the output symbol and
+// the number of bits to read for the low part of the destination state.
+type decSymbol struct {
+	newState uint16
+	symbol   uint8
+	nbBits   uint8
+}
+
+// allocDtable will allocate decoding tables if they are not big enough.
+func (s *Scratch) allocDtable() {
+	tableSize := 1 << s.actualTableLog
+	if cap(s.decTable) < int(tableSize) {
+		s.decTable = make([]decSymbol, tableSize)
+	}
+	s.decTable = s.decTable[:tableSize]
+
+	if cap(s.ct.tableSymbol) < 256 {
+		s.ct.tableSymbol = make([]byte, 256)
+	}
+	s.ct.tableSymbol = s.ct.tableSymbol[:256]
+
+	if cap(s.ct.stateTable) < 256 {
+		s.ct.stateTable = make([]uint16, 256)
+	}
+	s.ct.stateTable = s.ct.stateTable[:256]
+}
+
+// buildDtable will build the decoding table.
+func (s *Scratch) buildDtable() error {
+	tableSize := uint32(1 << s.actualTableLog)
+	highThreshold := tableSize - 1
+	s.allocDtable()
+	symbolNext := s.ct.stateTable[:256]
+
+	// Init, lay down lowprob symbols
+	s.zeroBits = false
+	{
+		largeLimit := int16(1 << (s.actualTableLog - 1))
+		for i, v := range s.norm[:s.symbolLen] {
+			if v == -1 {
+				s.decTable[highThreshold].symbol = uint8(i)
+				highThreshold--
+				symbolNext[i] = 1
+			} else {
+				if v >= largeLimit {
+					s.zeroBits = true
+				}
+				symbolNext[i] = uint16(v)
+			}
+		}
+	}
+	// Spread symbols
+	{
+		tableMask := tableSize - 1
+		step := tableStep(tableSize)
+		position := uint32(0)
+		for ss, v := range s.norm[:s.symbolLen] {
+			for i := 0; i < int(v); i++ {
+				s.decTable[position].symbol = uint8(ss)
+				position = (position + step) & tableMask
+				for position > highThreshold {
+					// lowprob area
+					position = (position + step) & tableMask
+				}
+			}
+		}
+		if position != 0 {
+			// position must reach all cells once, otherwise normalizedCounter is incorrect
+			return errors.New("corrupted input (position != 0)")
+		}
+	}
+
+	// Build Decoding table
+	{
+		tableSize := uint16(1 << s.actualTableLog)
+		for u, v := range s.decTable {
+			symbol := v.symbol
+			nextState := symbolNext[symbol]
+			symbolNext[symbol] = nextState + 1
+			nBits := s.actualTableLog - byte(highBits(uint32(nextState)))
+			s.decTable[u].nbBits = nBits
+			newState := (nextState << nBits) - tableSize
+			if newState > tableSize {
+				return fmt.Errorf("newState (%d) outside table size (%d)", newState, tableSize)
+			}
+			if newState == uint16(u) && nBits == 0 {
+				// Seems weird that this is possible with nbits > 0.
+				return fmt.Errorf("newState (%d) == oldState (%d) and no bits", newState, u)
+			}
+			s.decTable[u].newState = newState
+		}
+	}
+	return nil
+}
+
+// decompress will decompress the bitstream.
+// If the buffer is over-read an error is returned.
+func (s *Scratch) decompress() error {
+	br := &s.bits
+	br.init(s.br.unread())
+
+	var s1, s2 decoder
+	// Initialize and decode first state and symbol.
+	s1.init(br, s.decTable, s.actualTableLog)
+	s2.init(br, s.decTable, s.actualTableLog)
+
+	// Use temp table to avoid bound checks/append penalty.
+	var tmp = s.ct.tableSymbol[:256]
+	var off uint8
+
+	// Main part
+	if !s.zeroBits {
+		for br.off >= 8 {
+			br.fillFast()
+			tmp[off+0] = s1.nextFast()
+			tmp[off+1] = s2.nextFast()
+			br.fillFast()
+			tmp[off+2] = s1.nextFast()
+			tmp[off+3] = s2.nextFast()
+			off += 4
+			if off == 0 {
+				s.Out = append(s.Out, tmp...)
+			}
+		}
+	} else {
+		for br.off >= 8 {
+			br.fillFast()
+			tmp[off+0] = s1.next()
+			tmp[off+1] = s2.next()
+			br.fillFast()
+			tmp[off+2] = s1.next()
+			tmp[off+3] = s2.next()
+			off += 4
+			if off == 0 {
+				s.Out = append(s.Out, tmp...)
+				off = 0
+				if len(s.Out) >= s.DecompressLimit {
+					return fmt.Errorf("output size (%d) > DecompressLimit (%d)", len(s.Out), s.DecompressLimit)
+				}
+			}
+		}
+	}
+	s.Out = append(s.Out, tmp[:off]...)
+
+	// Final bits, a bit more expensive check
+	for {
+		if s1.finished() {
+			s.Out = append(s.Out, s1.final(), s2.final())
+			break
+		}
+		br.fill()
+		s.Out = append(s.Out, s1.next())
+		if s2.finished() {
+			s.Out = append(s.Out, s2.final(), s1.final())
+			break
+		}
+		s.Out = append(s.Out, s2.next())
+		if len(s.Out) >= s.DecompressLimit {
+			return fmt.Errorf("output size (%d) > DecompressLimit (%d)", len(s.Out), s.DecompressLimit)
+		}
+	}
+	return br.close()
+}
+
+// decoder keeps track of the current state and updates it from the bitstream.
+type decoder struct {
+	state uint16
+	br    *bitReader
+	dt    []decSymbol
+}
+
+// init will initialize the decoder and read the first state from the stream.
+func (d *decoder) init(in *bitReader, dt []decSymbol, tableLog uint8) {
+	d.dt = dt
+	d.br = in
+	d.state = uint16(in.getBits(tableLog))
+}
+
+// next returns the next symbol and sets the next state.
+// At least tablelog bits must be available in the bit reader.
+func (d *decoder) next() uint8 {
+	n := &d.dt[d.state]
+	lowBits := d.br.getBits(n.nbBits)
+	d.state = n.newState + lowBits
+	return n.symbol
+}
+
+// finished returns true if all bits have been read from the bitstream
+// and the next state would require reading bits from the input.
+func (d *decoder) finished() bool {
+	return d.br.finished() && d.dt[d.state].nbBits > 0
+}
+
+// final returns the current state symbol without decoding the next.
+func (d *decoder) final() uint8 {
+	return d.dt[d.state].symbol
+}
+
+// nextFast returns the next symbol and sets the next state.
+// This can only be used if no symbols are 0 bits.
+// At least tablelog bits must be available in the bit reader.
+func (d *decoder) nextFast() uint8 {
+	n := d.dt[d.state]
+	lowBits := d.br.getBitsFast(n.nbBits)
+	d.state = n.newState + lowBits
+	return n.symbol
+}
--- a/vendor/github.com/klauspost/compress/fse/fse.go
+++ b/vendor/github.com/klauspost/compress/fse/fse.go
@ -0,0 +1,143 @@
+// Copyright 2018 Klaus Post. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+// Based on work Copyright (c) 2013, Yann Collet, released under BSD License.
+
+// Package fse provides Finite State Entropy encoding and decoding.
+//
+// Finite State Entropy encoding provides a fast near-optimal symbol encoding/decoding
+// for byte blocks as implemented in zstd.
+//
+// See https://github.com/klauspost/compress/tree/master/fse for more information.
+package fse
+
+import (
+	"errors"
+	"fmt"
+	"math/bits"
+)
+
+const (
+	/*!MEMORY_USAGE :
+	 *  Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
+	 *  Increasing memory usage improves compression ratio
+	 *  Reduced memory usage can improve speed, due to cache effect
+	 *  Recommended max value is 14, for 16KB, which nicely fits into Intel x86 L1 cache */
+	maxMemoryUsage     = 14
+	defaultMemoryUsage = 13
+
+	maxTableLog     = maxMemoryUsage - 2
+	maxTablesize    = 1 << maxTableLog
+	defaultTablelog = defaultMemoryUsage - 2
+	minTablelog     = 5
+	maxSymbolValue  = 255
+)
+
+var (
+	// ErrIncompressible is returned when input is judged to be too hard to compress.
+	ErrIncompressible = errors.New("input is not compressible")
+
+	// ErrUseRLE is returned from the compressor when the input is a single byte value repeated.
+	ErrUseRLE = errors.New("input is single value repeated")
+)
+
+// Scratch provides temporary storage for compression and decompression.
+type Scratch struct {
+	// Private
+	count          [maxSymbolValue + 1]uint32
+	norm           [maxSymbolValue + 1]int16
+	symbolLen      uint16 // Length of active part of the symbol table.
+	actualTableLog uint8  // Selected tablelog.
+	br             byteReader
+	bits           bitReader
+	bw             bitWriter
+	ct             cTable      // Compression tables.
+	decTable       []decSymbol // Decompression table.
+	zeroBits       bool        // no bits has prob > 50%.
+	clearCount     bool        // clear count
+	maxCount       int         // count of the most probable symbol
+
+	// Per block parameters.
+	// These can be used to override compression parameters of the block.
+	// Do not touch, unless you know what you are doing.
+
+	// Out is output buffer.
+	// If the scratch is re-used before the caller is done processing the output,
+	// set this field to nil.
+	// Otherwise the output buffer will be re-used for next Compression/Decompression step
+	// and allocation will be avoided.
+	Out []byte
+
+	// MaxSymbolValue will override the maximum symbol value of the next block.
+	MaxSymbolValue uint8
+
+	// TableLog will attempt to override the tablelog for the next block.
+	TableLog uint8
+
+	// DecompressLimit limits the maximum decoded size acceptable.
+	// If > 0 decompression will stop when approximately this many bytes
+	// has been decoded.
+	// If 0, maximum size will be 2GB.
+	DecompressLimit int
+}
+
+// Histogram allows to populate the histogram and skip that step in the compression,
+// It otherwise allows to inspect the histogram when compression is done.
+// To indicate that you have populated the histogram call HistogramFinished
+// with the value of the highest populated symbol, as well as the number of entries
+// in the most populated entry. These are accepted at face value.
+// The returned slice will always be length 256.
+func (s *Scratch) Histogram() []uint32 {
+	return s.count[:]
+}
+
+// HistogramFinished can be called to indicate that the histogram has been populated.
+// maxSymbol is the index of the highest set symbol of the next data segment.
+// maxCount is the number of entries in the most populated entry.
+// These are accepted at face value.
+func (s *Scratch) HistogramFinished(maxSymbol uint8, maxCount int) {
+	s.maxCount = maxCount
+	s.symbolLen = uint16(maxSymbol) + 1
+	s.clearCount = maxCount != 0
+}
+
+// prepare will prepare and allocate scratch tables used for both compression and decompression.
+func (s *Scratch) prepare(in []byte) (*Scratch, error) {
+	if s == nil {
+		s = &Scratch{}
+	}
+	if s.MaxSymbolValue == 0 {
+		s.MaxSymbolValue = 255
+	}
+	if s.TableLog == 0 {
+		s.TableLog = defaultTablelog
+	}
+	if s.TableLog > maxTableLog {
+		return nil, fmt.Errorf("tableLog (%d) > maxTableLog (%d)", s.TableLog, maxTableLog)
+	}
+	if cap(s.Out) == 0 {
+		s.Out = make([]byte, 0, len(in))
+	}
+	if s.clearCount && s.maxCount == 0 {
+		for i := range s.count {
+			s.count[i] = 0
+		}
+		s.clearCount = false
+	}
+	s.br.init(in)
+	if s.DecompressLimit == 0 {
+		// Max size 2GB.
+		s.DecompressLimit = (2 << 30) - 1
+	}
+
+	return s, nil
+}
+
+// tableStep returns the next table index.
+func tableStep(tableSize uint32) uint32 {
+	return (tableSize >> 1) + (tableSize >> 3) + 3
+}
+
+func highBits(val uint32) (n uint32) {
+	return uint32(bits.Len32(val) - 1)
+}
--- a/vendor/github.com/klauspost/compress/huff0/.gitignore
+++ b/vendor/github.com/klauspost/compress/huff0/.gitignore
@ -0,0 +1 @@
+/huff0-fuzz.zip
--- a/vendor/github.com/klauspost/compress/huff0/README.md
+++ b/vendor/github.com/klauspost/compress/huff0/README.md
@ -0,0 +1,87 @@
+# Huff0 entropy compression
+
+This package provides Huff0 encoding and decoding as used in zstd.
+            
+[Huff0](https://github.com/Cyan4973/FiniteStateEntropy#new-generation-entropy-coders), 
+a Huffman codec designed for modern CPU, featuring OoO (Out of Order) operations on multiple ALU 
+(Arithmetic Logic Unit), achieving extremely fast compression and decompression speeds.
+
+This can be used for compressing input with a lot of similar input values to the smallest number of bytes.
+This does not perform any multi-byte [dictionary coding](https://en.wikipedia.org/wiki/Dictionary_coder) as LZ coders,
+but it can be used as a secondary step to compressors (like Snappy) that does not do entropy encoding. 
+
+* [Godoc documentation](https://godoc.org/github.com/klauspost/compress/huff0)
+
+THIS PACKAGE IS NOT CONSIDERED STABLE AND API OR ENCODING MAY CHANGE IN THE FUTURE.
+
+## News
+
+ * Mar 2018: First implementation released. Consider this beta software for now.
+
+# Usage
+
+This package provides a low level interface that allows to compress single independent blocks. 
+
+Each block is separate, and there is no built in integrity checks. 
+This means that the caller should keep track of block sizes and also do checksums if needed.  
+
+Compressing a block is done via the [`Compress1X`](https://godoc.org/github.com/klauspost/compress/huff0#Compress1X) and 
+[`Compress4X`](https://godoc.org/github.com/klauspost/compress/huff0#Compress4X) functions.
+You must provide input and will receive the output and maybe an error.
+
+These error values can be returned:
+
+| Error               | Description                                                                 |
+|---------------------|-----------------------------------------------------------------------------|
+| `<nil>`             | Everything ok, output is returned                                           |
+| `ErrIncompressible` | Returned when input is judged to be too hard to compress                    |
+| `ErrUseRLE`         | Returned from the compressor when the input is a single byte value repeated |
+| `ErrTooBig`         | Returned if the input block exceeds the maximum allowed size (128 Kib)      |
+| `(error)`           | An internal error occurred.                                                 |
+
+
+As can be seen above some of there are errors that will be returned even under normal operation so it is important to handle these.
+
+To reduce allocations you can provide a [`Scratch`](https://godoc.org/github.com/klauspost/compress/huff0#Scratch) object 
+that can be re-used for successive calls. Both compression and decompression accepts a `Scratch` object, and the same 
+object can be used for both.   
+
+Be aware, that when re-using a `Scratch` object that the *output* buffer is also re-used, so if you are still using this
+you must set the `Out` field in the scratch to nil. The same buffer is used for compression and decompression output.
+
+The `Scratch` object will retain state that allows to re-use previous tables for encoding and decoding.  
+
+## Tables and re-use
+
+Huff0 allows for reusing tables from the previous block to save space if that is expected to give better/faster results. 
+
+The Scratch object allows you to set a [`ReusePolicy`](https://godoc.org/github.com/klauspost/compress/huff0#ReusePolicy) 
+that controls this behaviour. See the documentation for details. This can be altered between each block.
+
+Do however note that this information is *not* stored in the output block and it is up to the users of the package to
+record whether [`ReadTable`](https://godoc.org/github.com/klauspost/compress/huff0#ReadTable) should be called,
+based on the boolean reported back from the CompressXX call. 
+
+If you want to store the table separate from the data, you can access them as `OutData` and `OutTable` on the 
+[`Scratch`](https://godoc.org/github.com/klauspost/compress/huff0#Scratch) object.
+
+## Decompressing
+
+The first part of decoding is to initialize the decoding table through [`ReadTable`](https://godoc.org/github.com/klauspost/compress/huff0#ReadTable).
+This will initialize the decoding tables. 
+You can supply the complete block to `ReadTable` and it will return the data part of the block 
+which can be given to the decompressor. 
+
+Decompressing is done by calling the [`Decompress1X`](https://godoc.org/github.com/klauspost/compress/huff0#Scratch.Decompress1X) 
+or [`Decompress4X`](https://godoc.org/github.com/klauspost/compress/huff0#Scratch.Decompress4X) function.
+
+You must provide the output from the compression stage, at exactly the size you got back. If you receive an error back
+your input was likely corrupted. 
+
+It is important to note that a successful decoding does *not* mean your output matches your original input. 
+There are no integrity checks, so relying on errors from the decompressor does not assure your data is valid.
+
+# Contributing
+
+Contributions are always welcome. Be aware that adding public functions will require good justification and breaking 
+changes will likely not be accepted. If in doubt open an issue before writing the PR.
--- a/vendor/github.com/klauspost/compress/huff0/bitreader.go
+++ b/vendor/github.com/klauspost/compress/huff0/bitreader.go
@ -0,0 +1,115 @@
+// Copyright 2018 Klaus Post. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+// Based on work Copyright (c) 2013, Yann Collet, released under BSD License.
+
+package huff0
+
+import (
+	"errors"
+	"io"
+)
+
+// bitReader reads a bitstream in reverse.
+// The last set bit indicates the start of the stream and is used
+// for aligning the input.
+type bitReader struct {
+	in       []byte
+	off      uint // next byte to read is at in[off - 1]
+	value    uint64
+	bitsRead uint8
+}
+
+// init initializes and resets the bit reader.
+func (b *bitReader) init(in []byte) error {
+	if len(in) < 1 {
+		return errors.New("corrupt stream: too short")
+	}
+	b.in = in
+	b.off = uint(len(in))
+	// The highest bit of the last byte indicates where to start
+	v := in[len(in)-1]
+	if v == 0 {
+		return errors.New("corrupt stream, did not find end of stream")
+	}
+	b.bitsRead = 64
+	b.value = 0
+	b.fill()
+	b.fill()
+	b.bitsRead += 8 - uint8(highBit32(uint32(v)))
+	return nil
+}
+
+// getBits will return n bits. n can be 0.
+func (b *bitReader) getBits(n uint8) uint16 {
+	if n == 0 || b.bitsRead >= 64 {
+		return 0
+	}
+	return b.getBitsFast(n)
+}
+
+// getBitsFast requires that at least one bit is requested every time.
+// There are no checks if the buffer is filled.
+func (b *bitReader) getBitsFast(n uint8) uint16 {
+	const regMask = 64 - 1
+	v := uint16((b.value << (b.bitsRead & regMask)) >> ((regMask + 1 - n) & regMask))
+	b.bitsRead += n
+	return v
+}
+
+// peekBitsFast requires that at least one bit is requested every time.
+// There are no checks if the buffer is filled.
+func (b *bitReader) peekBitsFast(n uint8) uint16 {
+	const regMask = 64 - 1
+	v := uint16((b.value << (b.bitsRead & regMask)) >> ((regMask + 1 - n) & regMask))
+	return v
+}
+
+// fillFast() will make sure at least 32 bits are available.
+// There must be at least 4 bytes available.
+func (b *bitReader) fillFast() {
+	if b.bitsRead < 32 {
+		return
+	}
+	// Do single re-slice to avoid bounds checks.
+	v := b.in[b.off-4 : b.off]
+	low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
+	b.value = (b.value << 32) | uint64(low)
+	b.bitsRead -= 32
+	b.off -= 4
+}
+
+// fill() will make sure at least 32 bits are available.
+func (b *bitReader) fill() {
+	if b.bitsRead < 32 {
+		return
+	}
+	if b.off > 4 {
+		v := b.in[b.off-4 : b.off]
+		low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
+		b.value = (b.value << 32) | uint64(low)
+		b.bitsRead -= 32
+		b.off -= 4
+		return
+	}
+	for b.off > 0 {
+		b.value = (b.value << 8) | uint64(b.in[b.off-1])
+		b.bitsRead -= 8
+		b.off--
+	}
+}
+
+// finished returns true if all bits have been read from the bit stream.
+func (b *bitReader) finished() bool {
+	return b.off == 0 && b.bitsRead >= 64
+}
+
+// close the bitstream and returns an error if out-of-buffer reads occurred.
+func (b *bitReader) close() error {
+	// Release reference.
+	b.in = nil
+	if b.bitsRead > 64 {
+		return io.ErrUnexpectedEOF
+	}
+	return nil
+}
--- a/vendor/github.com/klauspost/compress/huff0/bitwriter.go
+++ b/vendor/github.com/klauspost/compress/huff0/bitwriter.go
@ -0,0 +1,186 @@
+// Copyright 2018 Klaus Post. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+// Based on work Copyright (c) 2013, Yann Collet, released under BSD License.
+
+package huff0
+
+import "fmt"
+
+// bitWriter will write bits.
+// First bit will be LSB of the first byte of output.
+type bitWriter struct {
+	bitContainer uint64
+	nBits        uint8
+	out          []byte
+}
+
+// bitMask16 is bitmasks. Has extra to avoid bounds check.
+var bitMask16 = [32]uint16{
+	0, 1, 3, 7, 0xF, 0x1F,
+	0x3F, 0x7F, 0xFF, 0x1FF, 0x3FF, 0x7FF,
+	0xFFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF, 0xFFFF,
+	0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF,
+	0xFFFF, 0xFFFF} /* up to 16 bits */
+
+// addBits16NC will add up to 16 bits.
+// It will not check if there is space for them,
+// so the caller must ensure that it has flushed recently.
+func (b *bitWriter) addBits16NC(value uint16, bits uint8) {
+	b.bitContainer |= uint64(value&bitMask16[bits&31]) << (b.nBits & 63)
+	b.nBits += bits
+}
+
+// addBits16Clean will add up to 16 bits. value may not contain more set bits than indicated.
+// It will not check if there is space for them, so the caller must ensure that it has flushed recently.
+func (b *bitWriter) addBits16Clean(value uint16, bits uint8) {
+	b.bitContainer |= uint64(value) << (b.nBits & 63)
+	b.nBits += bits
+}
+
+// addBits16Clean will add up to 16 bits. value may not contain more set bits than indicated.
+// It will not check if there is space for them, so the caller must ensure that it has flushed recently.
+func (b *bitWriter) encSymbol(ct cTable, symbol byte) {
+	enc := ct[symbol]
+	b.bitContainer |= uint64(enc.val) << (b.nBits & 63)
+	b.nBits += enc.nBits
+}
+
+// addBits16ZeroNC will add up to 16 bits.
+// It will not check if there is space for them,
+// so the caller must ensure that it has flushed recently.
+// This is fastest if bits can be zero.
+func (b *bitWriter) addBits16ZeroNC(value uint16, bits uint8) {
+	if bits == 0 {
+		return
+	}
+	value <<= (16 - bits) & 15
+	value >>= (16 - bits) & 15
+	b.bitContainer |= uint64(value) << (b.nBits & 63)
+	b.nBits += bits
+}
+
+// flush will flush all pending full bytes.
+// There will be at least 56 bits available for writing when this has been called.
+// Using flush32 is faster, but leaves less space for writing.
+func (b *bitWriter) flush() {
+	v := b.nBits >> 3
+	switch v {
+	case 0:
+		return
+	case 1:
+		b.out = append(b.out,
+			byte(b.bitContainer),
+		)
+		b.bitContainer >>= 1 << 3
+	case 2:
+		b.out = append(b.out,
+			byte(b.bitContainer),
+			byte(b.bitContainer>>8),
+		)
+		b.bitContainer >>= 2 << 3
+	case 3:
+		b.out = append(b.out,
+			byte(b.bitContainer),
+			byte(b.bitContainer>>8),
+			byte(b.bitContainer>>16),
+		)
+		b.bitContainer >>= 3 << 3
+	case 4:
+		b.out = append(b.out,
+			byte(b.bitContainer),
+			byte(b.bitContainer>>8),
+			byte(b.bitContainer>>16),
+			byte(b.bitContainer>>24),
+		)
+		b.bitContainer >>= 4 << 3
+	case 5:
+		b.out = append(b.out,
+			byte(b.bitContainer),
+			byte(b.bitContainer>>8),
+			byte(b.bitContainer>>16),
+			byte(b.bitContainer>>24),
+			byte(b.bitContainer>>32),
+		)
+		b.bitContainer >>= 5 << 3
+	case 6:
+		b.out = append(b.out,
+			byte(b.bitContainer),
+			byte(b.bitContainer>>8),
+			byte(b.bitContainer>>16),
+			byte(b.bitContainer>>24),
+			byte(b.bitContainer>>32),
+			byte(b.bitContainer>>40),
+		)
+		b.bitContainer >>= 6 << 3
+	case 7:
+		b.out = append(b.out,
+			byte(b.bitContainer),
+			byte(b.bitContainer>>8),
+			byte(b.bitContainer>>16),
+			byte(b.bitContainer>>24),
+			byte(b.bitContainer>>32),
+			byte(b.bitContainer>>40),
+			byte(b.bitContainer>>48),
+		)
+		b.bitContainer >>= 7 << 3
+	case 8:
+		b.out = append(b.out,
+			byte(b.bitContainer),
+			byte(b.bitContainer>>8),
+			byte(b.bitContainer>>16),
+			byte(b.bitContainer>>24),
+			byte(b.bitContainer>>32),
+			byte(b.bitContainer>>40),
+			byte(b.bitContainer>>48),
+			byte(b.bitContainer>>56),
+		)
+		b.bitContainer = 0
+		b.nBits = 0
+		return
+	default:
+		panic(fmt.Errorf("bits (%d) > 64", b.nBits))
+	}
+	b.nBits &= 7
+}
+
+// flush32 will flush out, so there are at least 32 bits available for writing.
+func (b *bitWriter) flush32() {
+	if b.nBits < 32 {
+		return
+	}
+	b.out = append(b.out,
+		byte(b.bitContainer),
+		byte(b.bitContainer>>8),
+		byte(b.bitContainer>>16),
+		byte(b.bitContainer>>24))
+	b.nBits -= 32
+	b.bitContainer >>= 32
+}
+
+// flushAlign will flush remaining full bytes and align to next byte boundary.
+func (b *bitWriter) flushAlign() {
+	nbBytes := (b.nBits + 7) >> 3
+	for i := uint8(0); i < nbBytes; i++ {
+		b.out = append(b.out, byte(b.bitContainer>>(i*8)))
+	}
+	b.nBits = 0
+	b.bitContainer = 0
+}
+
+// close will write the alignment bit and write the final byte(s)
+// to the output.
+func (b *bitWriter) close() error {
+	// End mark
+	b.addBits16Clean(1, 1)
+	// flush until next byte.
+	b.flushAlign()
+	return nil
+}
+
+// reset and continue writing by appending to out.
+func (b *bitWriter) reset(out []byte) {
+	b.bitContainer = 0
+	b.nBits = 0
+	b.out = out
+}
--- a/vendor/github.com/klauspost/compress/huff0/bytereader.go
+++ b/vendor/github.com/klauspost/compress/huff0/bytereader.go
@ -0,0 +1,54 @@
+// Copyright 2018 Klaus Post. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+// Based on work Copyright (c) 2013, Yann Collet, released under BSD License.
+
+package huff0
+
+// byteReader provides a byte reader that reads
+// little endian values from a byte stream.
+// The input stream is manually advanced.
+// The reader performs no bounds checks.
+type byteReader struct {
+	b   []byte
+	off int
+}
+
+// init will initialize the reader and set the input.
+func (b *byteReader) init(in []byte) {
+	b.b = in
+	b.off = 0
+}
+
+// advance the stream b n bytes.
+func (b *byteReader) advance(n uint) {
+	b.off += int(n)
+}
+
+// Int32 returns a little endian int32 starting at current offset.
+func (b byteReader) Int32() int32 {
+	v3 := int32(b.b[b.off+3])
+	v2 := int32(b.b[b.off+2])
+	v1 := int32(b.b[b.off+1])
+	v0 := int32(b.b[b.off])
+	return (v3 << 24) | (v2 << 16) | (v1 << 8) | v0
+}
+
+// Uint32 returns a little endian uint32 starting at current offset.
+func (b byteReader) Uint32() uint32 {
+	v3 := uint32(b.b[b.off+3])
+	v2 := uint32(b.b[b.off+2])
+	v1 := uint32(b.b[b.off+1])
+	v0 := uint32(b.b[b.off])
+	return (v3 << 24) | (v2 << 16) | (v1 << 8) | v0
+}
+
+// unread returns the unread portion of the input.
+func (b byteReader) unread() []byte {
+	return b.b[b.off:]
+}
+
+// remain will return the number of bytes remaining.
+func (b byteReader) remain() int {
+	return len(b.b) - b.off
+}
--- a/vendor/github.com/klauspost/compress/huff0/compress.go
+++ b/vendor/github.com/klauspost/compress/huff0/compress.go
@ -0,0 +1,618 @@
+package huff0
+
+import (
+	"fmt"
+	"runtime"
+	"sync"
+)
+
+// Compress1X will compress the input.
+// The output can be decoded using Decompress1X.
+// Supply a Scratch object. The scratch object contains state about re-use,
+// So when sharing across independent encodes, be sure to set the re-use policy.
+func Compress1X(in []byte, s *Scratch) (out []byte, reUsed bool, err error) {
+	s, err = s.prepare(in)
+	if err != nil {
+		return nil, false, err
+	}
+	return compress(in, s, s.compress1X)
+}
+
+// Compress4X will compress the input. The input is split into 4 independent blocks
+// and compressed similar to Compress1X.
+// The output can be decoded using Decompress4X.
+// Supply a Scratch object. The scratch object contains state about re-use,
+// So when sharing across independent encodes, be sure to set the re-use policy.
+func Compress4X(in []byte, s *Scratch) (out []byte, reUsed bool, err error) {
+	s, err = s.prepare(in)
+	if err != nil {
+		return nil, false, err
+	}
+	if false {
+		// TODO: compress4Xp only slightly faster.
+		const parallelThreshold = 8 << 10
+		if len(in) < parallelThreshold || runtime.GOMAXPROCS(0) == 1 {
+			return compress(in, s, s.compress4X)
+		}
+		return compress(in, s, s.compress4Xp)
+	}
+	return compress(in, s, s.compress4X)
+}
+
+func compress(in []byte, s *Scratch, compressor func(src []byte) ([]byte, error)) (out []byte, reUsed bool, err error) {
+	// Nuke previous table if we cannot reuse anyway.
+	if s.Reuse == ReusePolicyNone {
+		s.prevTable = s.prevTable[:0]
+	}
+
+	// Create histogram, if none was provided.
+	maxCount := s.maxCount
+	var canReuse = false
+	if maxCount == 0 {
+		maxCount, canReuse = s.countSimple(in)
+	} else {
+		canReuse = s.canUseTable(s.prevTable)
+	}
+
+	// Reset for next run.
+	s.clearCount = true
+	s.maxCount = 0
+	if maxCount >= len(in) {
+		if maxCount > len(in) {
+			return nil, false, fmt.Errorf("maxCount (%d) > length (%d)", maxCount, len(in))
+		}
+		if len(in) == 1 {
+			return nil, false, ErrIncompressible
+		}
+		// One symbol, use RLE
+		return nil, false, ErrUseRLE
+	}
+	if maxCount == 1 || maxCount < (len(in)>>7) {
+		// Each symbol present maximum once or too well distributed.
+		return nil, false, ErrIncompressible
+	}
+
+	if s.Reuse == ReusePolicyPrefer && canReuse {
+		keepTable := s.cTable
+		s.cTable = s.prevTable
+		s.Out, err = compressor(in)
+		s.cTable = keepTable
+		if err == nil && len(s.Out) < len(in) {
+			s.OutData = s.Out
+			return s.Out, true, nil
+		}
+		// Do not attempt to re-use later.
+		s.prevTable = s.prevTable[:0]
+	}
+
+	// Calculate new table.
+	s.optimalTableLog()
+	err = s.buildCTable()
+	if err != nil {
+		return nil, false, err
+	}
+
+	if false && !s.canUseTable(s.cTable) {
+		panic("invalid table generated")
+	}
+
+	if s.Reuse == ReusePolicyAllow && canReuse {
+		hSize := len(s.Out)
+		oldSize := s.prevTable.estimateSize(s.count[:s.symbolLen])
+		newSize := s.cTable.estimateSize(s.count[:s.symbolLen])
+		if oldSize <= hSize+newSize || hSize+12 >= len(in) {
+			// Retain cTable even if we re-use.
+			keepTable := s.cTable
+			s.cTable = s.prevTable
+			s.Out, err = compressor(in)
+			s.cTable = keepTable
+			if len(s.Out) >= len(in) {
+				return nil, false, ErrIncompressible
+			}
+			s.OutData = s.Out
+			return s.Out, true, nil
+		}
+	}
+
+	// Use new table
+	err = s.cTable.write(s)
+	if err != nil {
+		s.OutTable = nil
+		return nil, false, err
+	}
+	s.OutTable = s.Out
+
+	// Compress using new table
+	s.Out, err = compressor(in)
+	if err != nil {
+		s.OutTable = nil
+		return nil, false, err
+	}
+	if len(s.Out) >= len(in) {
+		s.OutTable = nil
+		return nil, false, ErrIncompressible
+	}
+	// Move current table into previous.
+	s.prevTable, s.cTable = s.cTable, s.prevTable[:0]
+	s.OutData = s.Out[len(s.OutTable):]
+	return s.Out, false, nil
+}
+
+func (s *Scratch) compress1X(src []byte) ([]byte, error) {
+	return s.compress1xDo(s.Out, src)
+}
+
+func (s *Scratch) compress1xDo(dst, src []byte) ([]byte, error) {
+	var bw = bitWriter{out: dst}
+
+	// N is length divisible by 4.
+	n := len(src)
+	n -= n & 3
+	cTable := s.cTable[:256]
+
+	// Encode last bytes.
+	for i := len(src) & 3; i > 0; i-- {
+		bw.encSymbol(cTable, src[n+i-1])
+	}
+	if s.actualTableLog <= 8 {
+		n -= 4
+		for ; n >= 0; n -= 4 {
+			tmp := src[n : n+4]
+			// tmp should be len 4
+			bw.flush32()
+			bw.encSymbol(cTable, tmp[3])
+			bw.encSymbol(cTable, tmp[2])
+			bw.encSymbol(cTable, tmp[1])
+			bw.encSymbol(cTable, tmp[0])
+		}
+	} else {
+		n -= 4
+		for ; n >= 0; n -= 4 {
+			tmp := src[n : n+4]
+			// tmp should be len 4
+			bw.flush32()
+			bw.encSymbol(cTable, tmp[3])
+			bw.encSymbol(cTable, tmp[2])
+			bw.flush32()
+			bw.encSymbol(cTable, tmp[1])
+			bw.encSymbol(cTable, tmp[0])
+		}
+	}
+	err := bw.close()
+	return bw.out, err
+}
+
+var sixZeros [6]byte
+
+func (s *Scratch) compress4X(src []byte) ([]byte, error) {
+	if len(src) < 12 {
+		return nil, ErrIncompressible
+	}
+	segmentSize := (len(src) + 3) / 4
+
+	// Add placeholder for output length
+	offsetIdx := len(s.Out)
+	s.Out = append(s.Out, sixZeros[:]...)
+
+	for i := 0; i < 4; i++ {
+		toDo := src
+		if len(toDo) > segmentSize {
+			toDo = toDo[:segmentSize]
+		}
+		src = src[len(toDo):]
+
+		var err error
+		idx := len(s.Out)
+		s.Out, err = s.compress1xDo(s.Out, toDo)
+		if err != nil {
+			return nil, err
+		}
+		// Write compressed length as little endian before block.
+		if i < 3 {
+			// Last length is not written.
+			length := len(s.Out) - idx
+			s.Out[i*2+offsetIdx] = byte(length)
+			s.Out[i*2+offsetIdx+1] = byte(length >> 8)
+		}
+	}
+
+	return s.Out, nil
+}
+
+// compress4Xp will compress 4 streams using separate goroutines.
+func (s *Scratch) compress4Xp(src []byte) ([]byte, error) {
+	if len(src) < 12 {
+		return nil, ErrIncompressible
+	}
+	// Add placeholder for output length
+	s.Out = s.Out[:6]
+
+	segmentSize := (len(src) + 3) / 4
+	var wg sync.WaitGroup
+	var errs [4]error
+	wg.Add(4)
+	for i := 0; i < 4; i++ {
+		toDo := src
+		if len(toDo) > segmentSize {
+			toDo = toDo[:segmentSize]
+		}
+		src = src[len(toDo):]
+
+		// Separate goroutine for each block.
+		go func(i int) {
+			s.tmpOut[i], errs[i] = s.compress1xDo(s.tmpOut[i][:0], toDo)
+			wg.Done()
+		}(i)
+	}
+	wg.Wait()
+	for i := 0; i < 4; i++ {
+		if errs[i] != nil {
+			return nil, errs[i]
+		}
+		o := s.tmpOut[i]
+		// Write compressed length as little endian before block.
+		if i < 3 {
+			// Last length is not written.
+			s.Out[i*2] = byte(len(o))
+			s.Out[i*2+1] = byte(len(o) >> 8)
+		}
+
+		// Write output.
+		s.Out = append(s.Out, o...)
+	}
+	return s.Out, nil
+}
+
+// countSimple will create a simple histogram in s.count.
+// Returns the biggest count.
+// Does not update s.clearCount.
+func (s *Scratch) countSimple(in []byte) (max int, reuse bool) {
+	reuse = true
+	for _, v := range in {
+		s.count[v]++
+	}
+	m := uint32(0)
+	if len(s.prevTable) > 0 {
+		for i, v := range s.count[:] {
+			if v > m {
+				m = v
+			}
+			if v > 0 {
+				s.symbolLen = uint16(i) + 1
+				if i >= len(s.prevTable) {
+					reuse = false
+				} else {
+					if s.prevTable[i].nBits == 0 {
+						reuse = false
+					}
+				}
+			}
+		}
+		return int(m), reuse
+	}
+	for i, v := range s.count[:] {
+		if v > m {
+			m = v
+		}
+		if v > 0 {
+			s.symbolLen = uint16(i) + 1
+		}
+	}
+	return int(m), false
+}
+
+func (s *Scratch) canUseTable(c cTable) bool {
+	if len(c) < int(s.symbolLen) {
+		return false
+	}
+	for i, v := range s.count[:s.symbolLen] {
+		if v != 0 && c[i].nBits == 0 {
+			return false
+		}
+	}
+	return true
+}
+
+// minTableLog provides the minimum logSize to safely represent a distribution.
+func (s *Scratch) minTableLog() uint8 {
+	minBitsSrc := highBit32(uint32(s.br.remain()-1)) + 1
+	minBitsSymbols := highBit32(uint32(s.symbolLen-1)) + 2
+	if minBitsSrc < minBitsSymbols {
+		return uint8(minBitsSrc)
+	}
+	return uint8(minBitsSymbols)
+}
+
+// optimalTableLog calculates and sets the optimal tableLog in s.actualTableLog
+func (s *Scratch) optimalTableLog() {
+	tableLog := s.TableLog
+	minBits := s.minTableLog()
+	maxBitsSrc := uint8(highBit32(uint32(s.br.remain()-1))) - 2
+	if maxBitsSrc < tableLog {
+		// Accuracy can be reduced
+		tableLog = maxBitsSrc
+	}
+	if minBits > tableLog {
+		tableLog = minBits
+	}
+	// Need a minimum to safely represent all symbol values
+	if tableLog < minTablelog {
+		tableLog = minTablelog
+	}
+	if tableLog > tableLogMax {
+		tableLog = tableLogMax
+	}
+	s.actualTableLog = tableLog
+}
+
+type cTableEntry struct {
+	val   uint16
+	nBits uint8
+	// We have 8 bits extra
+}
+
+const huffNodesMask = huffNodesLen - 1
+
+func (s *Scratch) buildCTable() error {
+	s.huffSort()
+	if cap(s.cTable) < maxSymbolValue+1 {
+		s.cTable = make([]cTableEntry, s.symbolLen, maxSymbolValue+1)
+	} else {
+		s.cTable = s.cTable[:s.symbolLen]
+		for i := range s.cTable {
+			s.cTable[i] = cTableEntry{}
+		}
+	}
+
+	var startNode = int16(s.symbolLen)
+	nonNullRank := s.symbolLen - 1
+
+	nodeNb := int16(startNode)
+	huffNode := s.nodes[1 : huffNodesLen+1]
+
+	// This overlays the slice above, but allows "-1" index lookups.
+	// Different from reference implementation.
+	huffNode0 := s.nodes[0 : huffNodesLen+1]
+
+	for huffNode[nonNullRank].count == 0 {
+		nonNullRank--
+	}
+
+	lowS := int16(nonNullRank)
+	nodeRoot := nodeNb + lowS - 1
+	lowN := nodeNb
+	huffNode[nodeNb].count = huffNode[lowS].count + huffNode[lowS-1].count
+	huffNode[lowS].parent, huffNode[lowS-1].parent = uint16(nodeNb), uint16(nodeNb)
+	nodeNb++
+	lowS -= 2
+	for n := nodeNb; n <= nodeRoot; n++ {
+		huffNode[n].count = 1 << 30
+	}
+	// fake entry, strong barrier
+	huffNode0[0].count = 1 << 31
+
+	// create parents
+	for nodeNb <= nodeRoot {
+		var n1, n2 int16
+		if huffNode0[lowS+1].count < huffNode0[lowN+1].count {
+			n1 = lowS
+			lowS--
+		} else {
+			n1 = lowN
+			lowN++
+		}
+		if huffNode0[lowS+1].count < huffNode0[lowN+1].count {
+			n2 = lowS
+			lowS--
+		} else {
+			n2 = lowN
+			lowN++
+		}
+
+		huffNode[nodeNb].count = huffNode0[n1+1].count + huffNode0[n2+1].count
+		huffNode0[n1+1].parent, huffNode0[n2+1].parent = uint16(nodeNb), uint16(nodeNb)
+		nodeNb++
+	}
+
+	// distribute weights (unlimited tree height)
+	huffNode[nodeRoot].nbBits = 0
+	for n := nodeRoot - 1; n >= startNode; n-- {
+		huffNode[n].nbBits = huffNode[huffNode[n].parent].nbBits + 1
+	}
+	for n := uint16(0); n <= nonNullRank; n++ {
+		huffNode[n].nbBits = huffNode[huffNode[n].parent].nbBits + 1
+	}
+	s.actualTableLog = s.setMaxHeight(int(nonNullRank))
+	maxNbBits := s.actualTableLog
+
+	// fill result into tree (val, nbBits)
+	if maxNbBits > tableLogMax {
+		return fmt.Errorf("internal error: maxNbBits (%d) > tableLogMax (%d)", maxNbBits, tableLogMax)
+	}
+	var nbPerRank [tableLogMax + 1]uint16
+	var valPerRank [tableLogMax + 1]uint16
+	for _, v := range huffNode[:nonNullRank+1] {
+		nbPerRank[v.nbBits]++
+	}
+	// determine stating value per rank
+	{
+		min := uint16(0)
+		for n := maxNbBits; n > 0; n-- {
+			// get starting value within each rank
+			valPerRank[n] = min
+			min += nbPerRank[n]
+			min >>= 1
+		}
+	}
+
+	// push nbBits per symbol, symbol order
+	// TODO: changed `s.symbolLen` -> `nonNullRank+1` (micro-opt)
+	for _, v := range huffNode[:nonNullRank+1] {
+		s.cTable[v.symbol].nBits = v.nbBits
+	}
+
+	// assign value within rank, symbol order
+	for n, val := range s.cTable[:s.symbolLen] {
+		v := valPerRank[val.nBits]
+		s.cTable[n].val = v
+		valPerRank[val.nBits] = v + 1
+	}
+
+	return nil
+}
+
+// huffSort will sort symbols, decreasing order.
+func (s *Scratch) huffSort() {
+	type rankPos struct {
+		base    uint32
+		current uint32
+	}
+
+	// Clear nodes
+	nodes := s.nodes[:huffNodesLen+1]
+	s.nodes = nodes
+	nodes = nodes[1 : huffNodesLen+1]
+
+	// Sort into buckets based on length of symbol count.
+	var rank [32]rankPos
+	for _, v := range s.count[:s.symbolLen] {
+		r := highBit32(v+1) & 31
+		rank[r].base++
+	}
+	for n := 30; n > 0; n-- {
+		rank[n-1].base += rank[n].base
+	}
+	for n := range rank[:] {
+		rank[n].current = rank[n].base
+	}
+	for n, c := range s.count[:s.symbolLen] {
+		r := (highBit32(c+1) + 1) & 31
+		pos := rank[r].current
+		rank[r].current++
+		prev := nodes[(pos-1)&huffNodesMask]
+		for pos > rank[r].base && c > prev.count {
+			nodes[pos&huffNodesMask] = prev
+			pos--
+			prev = nodes[(pos-1)&huffNodesMask]
+		}
+		nodes[pos&huffNodesMask] = nodeElt{count: c, symbol: byte(n)}
+	}
+	return
+}
+
+func (s *Scratch) setMaxHeight(lastNonNull int) uint8 {
+	maxNbBits := s.TableLog
+	huffNode := s.nodes[1 : huffNodesLen+1]
+	//huffNode = huffNode[: huffNodesLen]
+
+	largestBits := huffNode[lastNonNull].nbBits
+
+	// early exit : no elt > maxNbBits
+	if largestBits <= maxNbBits {
+		return largestBits
+	}
+	totalCost := int(0)
+	baseCost := int(1) << (largestBits - maxNbBits)
+	n := uint32(lastNonNull)
+
+	for huffNode[n].nbBits > maxNbBits {
+		totalCost += baseCost - (1 << (largestBits - huffNode[n].nbBits))
+		huffNode[n].nbBits = maxNbBits
+		n--
+	}
+	// n stops at huffNode[n].nbBits <= maxNbBits
+
+	for huffNode[n].nbBits == maxNbBits {
+		n--
+	}
+	// n end at index of smallest symbol using < maxNbBits
+
+	// renorm totalCost
+	totalCost >>= largestBits - maxNbBits /* note : totalCost is necessarily a multiple of baseCost */
+
+	// repay normalized cost
+	{
+		const noSymbol = 0xF0F0F0F0
+		var rankLast [tableLogMax + 2]uint32
+
+		for i := range rankLast[:] {
+			rankLast[i] = noSymbol
+		}
+
+		// Get pos of last (smallest) symbol per rank
+		{
+			currentNbBits := uint8(maxNbBits)
+			for pos := int(n); pos >= 0; pos-- {
+				if huffNode[pos].nbBits >= currentNbBits {
+					continue
+				}
+				currentNbBits = huffNode[pos].nbBits // < maxNbBits
+				rankLast[maxNbBits-currentNbBits] = uint32(pos)
+			}
+		}
+
+		for totalCost > 0 {
+			nBitsToDecrease := uint8(highBit32(uint32(totalCost))) + 1
+
+			for ; nBitsToDecrease > 1; nBitsToDecrease-- {
+				highPos := rankLast[nBitsToDecrease]
+				lowPos := rankLast[nBitsToDecrease-1]
+				if highPos == noSymbol {
+					continue
+				}
+				if lowPos == noSymbol {
+					break
+				}
+				highTotal := huffNode[highPos].count
+				lowTotal := 2 * huffNode[lowPos].count
+				if highTotal <= lowTotal {
+					break
+				}
+			}
+			// only triggered when no more rank 1 symbol left => find closest one (note : there is necessarily at least one !)
+			// HUF_MAX_TABLELOG test just to please gcc 5+; but it should not be necessary
+			// FIXME: try to remove
+			for (nBitsToDecrease <= tableLogMax) && (rankLast[nBitsToDecrease] == noSymbol) {
+				nBitsToDecrease++
+			}
+			totalCost -= 1 << (nBitsToDecrease - 1)
+			if rankLast[nBitsToDecrease-1] == noSymbol {
+				// this rank is no longer empty
+				rankLast[nBitsToDecrease-1] = rankLast[nBitsToDecrease]
+			}
+			huffNode[rankLast[nBitsToDecrease]].nbBits++
+			if rankLast[nBitsToDecrease] == 0 {
+				/* special case, reached largest symbol */
+				rankLast[nBitsToDecrease] = noSymbol
+			} else {
+				rankLast[nBitsToDecrease]--
+				if huffNode[rankLast[nBitsToDecrease]].nbBits != maxNbBits-nBitsToDecrease {
+					rankLast[nBitsToDecrease] = noSymbol /* this rank is now empty */
+				}
+			}
+		}
+
+		for totalCost < 0 { /* Sometimes, cost correction overshoot */
+			if rankLast[1] == noSymbol { /* special case : no rank 1 symbol (using maxNbBits-1); let's create one from largest rank 0 (using maxNbBits) */
+				for huffNode[n].nbBits == maxNbBits {
+					n--
+				}
+				huffNode[n+1].nbBits--
+				rankLast[1] = n + 1
+				totalCost++
+				continue
+			}
+			huffNode[rankLast[1]+1].nbBits--
+			rankLast[1]++
+			totalCost++
+		}
+	}
+	return maxNbBits
+}
+
+type nodeElt struct {
+	count  uint32
+	parent uint16
+	symbol byte
+	nbBits uint8
+}
--- a/vendor/github.com/klauspost/compress/huff0/decompress.go
+++ b/vendor/github.com/klauspost/compress/huff0/decompress.go
@ -0,0 +1,397 @@
+package huff0
+
+import (
+	"errors"
+	"fmt"
+	"io"
+
+	"github.com/klauspost/compress/fse"
+)
+
+type dTable struct {
+	single []dEntrySingle
+	double []dEntryDouble
+}
+
+// single-symbols decoding
+type dEntrySingle struct {
+	byte  uint8
+	nBits uint8
+}
+
+// double-symbols decoding
+type dEntryDouble struct {
+	seq   uint16
+	nBits uint8
+	len   uint8
+}
+
+// ReadTable will read a table from the input.
+// The size of the input may be larger than the table definition.
+// Any content remaining after the table definition will be returned.
+// If no Scratch is provided a new one is allocated.
+// The returned Scratch can be used for decoding input using this table.
+func ReadTable(in []byte, s *Scratch) (s2 *Scratch, remain []byte, err error) {
+	s, err = s.prepare(in)
+	if err != nil {
+		return s, nil, err
+	}
+	if len(in) <= 1 {
+		return s, nil, errors.New("input too small for table")
+	}
+	iSize := in[0]
+	in = in[1:]
+	if iSize >= 128 {
+		// Uncompressed
+		oSize := iSize - 127
+		iSize = (oSize + 1) / 2
+		if int(iSize) > len(in) {
+			return s, nil, errors.New("input too small for table")
+		}
+		for n := uint8(0); n < oSize; n += 2 {
+			v := in[n/2]
+			s.huffWeight[n] = v >> 4
+			s.huffWeight[n+1] = v & 15
+		}
+		s.symbolLen = uint16(oSize)
+		in = in[iSize:]
+	} else {
+		if len(in) <= int(iSize) {
+			return s, nil, errors.New("input too small for table")
+		}
+		// FSE compressed weights
+		s.fse.DecompressLimit = 255
+		hw := s.huffWeight[:]
+		s.fse.Out = hw
+		b, err := fse.Decompress(in[:iSize], s.fse)
+		s.fse.Out = nil
+		if err != nil {
+			return s, nil, err
+		}
+		if len(b) > 255 {
+			return s, nil, errors.New("corrupt input: output table too large")
+		}
+		s.symbolLen = uint16(len(b))
+		in = in[iSize:]
+	}
+
+	// collect weight stats
+	var rankStats [tableLogMax + 1]uint32
+	weightTotal := uint32(0)
+	for _, v := range s.huffWeight[:s.symbolLen] {
+		if v > tableLogMax {
+			return s, nil, errors.New("corrupt input: weight too large")
+		}
+		rankStats[v]++
+		weightTotal += (1 << (v & 15)) >> 1
+	}
+	if weightTotal == 0 {
+		return s, nil, errors.New("corrupt input: weights zero")
+	}
+
+	// get last non-null symbol weight (implied, total must be 2^n)
+	{
+		tableLog := highBit32(weightTotal) + 1
+		if tableLog > tableLogMax {
+			return s, nil, errors.New("corrupt input: tableLog too big")
+		}
+		s.actualTableLog = uint8(tableLog)
+		// determine last weight
+		{
+			total := uint32(1) << tableLog
+			rest := total - weightTotal
+			verif := uint32(1) << highBit32(rest)
+			lastWeight := highBit32(rest) + 1
+			if verif != rest {
+				// last value must be a clean power of 2
+				return s, nil, errors.New("corrupt input: last value not power of two")
+			}
+			s.huffWeight[s.symbolLen] = uint8(lastWeight)
+			s.symbolLen++
+			rankStats[lastWeight]++
+		}
+	}
+
+	if (rankStats[1] < 2) || (rankStats[1]&1 != 0) {
+		// by construction : at least 2 elts of rank 1, must be even
+		return s, nil, errors.New("corrupt input: min elt size, even check failed ")
+	}
+
+	// TODO: Choose between single/double symbol decoding
+
+	// Calculate starting value for each rank
+	{
+		var nextRankStart uint32
+		for n := uint8(1); n < s.actualTableLog+1; n++ {
+			current := nextRankStart
+			nextRankStart += rankStats[n] << (n - 1)
+			rankStats[n] = current
+		}
+	}
+
+	// fill DTable (always full size)
+	tSize := 1 << tableLogMax
+	if len(s.dt.single) != tSize {
+		s.dt.single = make([]dEntrySingle, tSize)
+	}
+
+	for n, w := range s.huffWeight[:s.symbolLen] {
+		length := (uint32(1) << w) >> 1
+		d := dEntrySingle{
+			byte:  uint8(n),
+			nBits: s.actualTableLog + 1 - w,
+		}
+		for u := rankStats[w]; u < rankStats[w]+length; u++ {
+			s.dt.single[u] = d
+		}
+		rankStats[w] += length
+	}
+	return s, in, nil
+}
+
+// Decompress1X will decompress a 1X encoded stream.
+// The length of the supplied input must match the end of a block exactly.
+// Before this is called, the table must be initialized with ReadTable unless
+// the encoder re-used the table.
+func (s *Scratch) Decompress1X(in []byte) (out []byte, err error) {
+	if len(s.dt.single) == 0 {
+		return nil, errors.New("no table loaded")
+	}
+	var br bitReader
+	err = br.init(in)
+	if err != nil {
+		return nil, err
+	}
+	s.Out = s.Out[:0]
+
+	decode := func() byte {
+		val := br.peekBitsFast(s.actualTableLog) /* note : actualTableLog >= 1 */
+		v := s.dt.single[val]
+		br.bitsRead += v.nBits
+		return v.byte
+	}
+	hasDec := func(v dEntrySingle) byte {
+		br.bitsRead += v.nBits
+		return v.byte
+	}
+
+	// Avoid bounds check by always having full sized table.
+	const tlSize = 1 << tableLogMax
+	const tlMask = tlSize - 1
+	dt := s.dt.single[:tlSize]
+
+	// Use temp table to avoid bound checks/append penalty.
+	var tmp = s.huffWeight[:256]
+	var off uint8
+
+	for br.off >= 8 {
+		br.fillFast()
+		tmp[off+0] = hasDec(dt[br.peekBitsFast(s.actualTableLog)&tlMask])
+		tmp[off+1] = hasDec(dt[br.peekBitsFast(s.actualTableLog)&tlMask])
+		br.fillFast()
+		tmp[off+2] = hasDec(dt[br.peekBitsFast(s.actualTableLog)&tlMask])
+		tmp[off+3] = hasDec(dt[br.peekBitsFast(s.actualTableLog)&tlMask])
+		off += 4
+		if off == 0 {
+			s.Out = append(s.Out, tmp...)
+		}
+	}
+
+	s.Out = append(s.Out, tmp[:off]...)
+
+	for !br.finished() {
+		br.fill()
+		s.Out = append(s.Out, decode())
+	}
+	return s.Out, br.close()
+}
+
+// Decompress4X will decompress a 4X encoded stream.
+// Before this is called, the table must be initialized with ReadTable unless
+// the encoder re-used the table.
+// The length of the supplied input must match the end of a block exactly.
+// The destination size of the uncompressed data must be known and provided.
+func (s *Scratch) Decompress4X(in []byte, dstSize int) (out []byte, err error) {
+	if len(s.dt.single) == 0 {
+		return nil, errors.New("no table loaded")
+	}
+	if len(in) < 6+(4*1) {
+		return nil, errors.New("input too small")
+	}
+	// TODO: We do not detect when we overrun a buffer, except if the last one does.
+
+	var br [4]bitReader
+	start := 6
+	for i := 0; i < 3; i++ {
+		length := int(in[i*2]) | (int(in[i*2+1]) << 8)
+		if start+length >= len(in) {
+			return nil, errors.New("truncated input (or invalid offset)")
+		}
+		err = br[i].init(in[start : start+length])
+		if err != nil {
+			return nil, err
+		}
+		start += length
+	}
+	err = br[3].init(in[start:])
+	if err != nil {
+		return nil, err
+	}
+
+	// Prepare output
+	if cap(s.Out) < dstSize {
+		s.Out = make([]byte, 0, dstSize)
+	}
+	s.Out = s.Out[:dstSize]
+	// destination, offset to match first output
+	dstOut := s.Out
+	dstEvery := (dstSize + 3) / 4
+
+	decode := func(br *bitReader) byte {
+		val := br.peekBitsFast(s.actualTableLog) /* note : actualTableLog >= 1 */
+		v := s.dt.single[val]
+		br.bitsRead += v.nBits
+		return v.byte
+	}
+
+	// Use temp table to avoid bound checks/append penalty.
+	var tmp = s.huffWeight[:256]
+	var off uint8
+
+	// Decode 2 values from each decoder/loop.
+	const bufoff = 256 / 4
+bigloop:
+	for {
+		for i := range br {
+			if br[i].off < 4 {
+				break bigloop
+			}
+			br[i].fillFast()
+		}
+		tmp[off] = decode(&br[0])
+		tmp[off+bufoff] = decode(&br[1])
+		tmp[off+bufoff*2] = decode(&br[2])
+		tmp[off+bufoff*3] = decode(&br[3])
+		tmp[off+1] = decode(&br[0])
+		tmp[off+1+bufoff] = decode(&br[1])
+		tmp[off+1+bufoff*2] = decode(&br[2])
+		tmp[off+1+bufoff*3] = decode(&br[3])
+		off += 2
+		if off == bufoff {
+			if bufoff > dstEvery {
+				return nil, errors.New("corruption detected: stream overrun")
+			}
+			copy(dstOut, tmp[:bufoff])
+			copy(dstOut[dstEvery:], tmp[bufoff:bufoff*2])
+			copy(dstOut[dstEvery*2:], tmp[bufoff*2:bufoff*3])
+			copy(dstOut[dstEvery*3:], tmp[bufoff*3:bufoff*4])
+			off = 0
+			dstOut = dstOut[bufoff:]
+			// There must at least be 3 buffers left.
+			if len(dstOut) < dstEvery*3+3 {
+				return nil, errors.New("corruption detected: stream overrun")
+			}
+		}
+	}
+	if off > 0 {
+		ioff := int(off)
+		if len(dstOut) < dstEvery*3+ioff {
+			return nil, errors.New("corruption detected: stream overrun")
+		}
+		copy(dstOut, tmp[:off])
+		copy(dstOut[dstEvery:dstEvery+ioff], tmp[bufoff:bufoff*2])
+		copy(dstOut[dstEvery*2:dstEvery*2+ioff], tmp[bufoff*2:bufoff*3])
+		copy(dstOut[dstEvery*3:dstEvery*3+ioff], tmp[bufoff*3:bufoff*4])
+		dstOut = dstOut[off:]
+	}
+
+	for i := range br {
+		offset := dstEvery * i
+		br := &br[i]
+		for !br.finished() {
+			br.fill()
+			if offset >= len(dstOut) {
+				return nil, errors.New("corruption detected: stream overrun")
+			}
+			dstOut[offset] = decode(br)
+			offset++
+		}
+		err = br.close()
+		if err != nil {
+			return nil, err
+		}
+	}
+
+	return s.Out, nil
+}
+
+// matches will compare a decoding table to a coding table.
+// Errors are written to the writer.
+// Nothing will be written if table is ok.
+func (s *Scratch) matches(ct cTable, w io.Writer) {
+	if s == nil || len(s.dt.single) == 0 {
+		return
+	}
+	dt := s.dt.single[:1<<s.actualTableLog]
+	tablelog := s.actualTableLog
+	ok := 0
+	broken := 0
+	for sym, enc := range ct {
+		errs := 0
+		broken++
+		if enc.nBits == 0 {
+			for _, dec := range dt {
+				if dec.byte == byte(sym) {
+					fmt.Fprintf(w, "symbol %x has decoder, but no encoder\n", sym)
+					errs++
+					break
+				}
+			}
+			if errs == 0 {
+				broken--
+			}
+			continue
+		}
+		// Unused bits in input
+		ub := tablelog - enc.nBits
+		top := enc.val << ub
+		// decoder looks at top bits.
+		dec := dt[top]
+		if dec.nBits != enc.nBits {
+			fmt.Fprintf(w, "symbol 0x%x bit size mismatch (enc: %d, dec:%d).\n", sym, enc.nBits, dec.nBits)
+			errs++
+		}
+		if dec.byte != uint8(sym) {
+			fmt.Fprintf(w, "symbol 0x%x decoder output mismatch (enc: %d, dec:%d).\n", sym, sym, dec.byte)
+			errs++
+		}
+		if errs > 0 {
+			fmt.Fprintf(w, "%d errros in base, stopping\n", errs)
+			continue
+		}
+		// Ensure that all combinations are covered.
+		for i := uint16(0); i < (1 << ub); i++ {
+			vval := top | i
+			dec := dt[vval]
+			if dec.nBits != enc.nBits {
+				fmt.Fprintf(w, "symbol 0x%x bit size mismatch (enc: %d, dec:%d).\n", vval, enc.nBits, dec.nBits)
+				errs++
+			}
+			if dec.byte != uint8(sym) {
+				fmt.Fprintf(w, "symbol 0x%x decoder output mismatch (enc: %d, dec:%d).\n", vval, sym, dec.byte)
+				errs++
+			}
+			if errs > 20 {
+				fmt.Fprintf(w, "%d errros, stopping\n", errs)
+				break
+			}
+		}
+		if errs == 0 {
+			ok++
+			broken--
+		}
+	}
+	if broken > 0 {
+		fmt.Fprintf(w, "%d broken, %d ok\n", broken, ok)
+	}
+}
--- a/vendor/github.com/klauspost/compress/huff0/huff0.go
+++ b/vendor/github.com/klauspost/compress/huff0/huff0.go
@ -0,0 +1,241 @@
+// Package huff0 provides fast huffman encoding as used in zstd.
+//
+// See README.md at https://github.com/klauspost/compress/tree/master/huff0 for details.
+package huff0
+
+import (
+	"errors"
+	"fmt"
+	"math"
+	"math/bits"
+
+	"github.com/klauspost/compress/fse"
+)
+
+const (
+	maxSymbolValue = 255
+
+	// zstandard limits tablelog to 11, see:
+	// https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#huffman-tree-description
+	tableLogMax     = 11
+	tableLogDefault = 11
+	minTablelog     = 5
+	huffNodesLen    = 512
+
+	// BlockSizeMax is maximum input size for a single block uncompressed.
+	BlockSizeMax = 1<<18 - 1
+)
+
+var (
+	// ErrIncompressible is returned when input is judged to be too hard to compress.
+	ErrIncompressible = errors.New("input is not compressible")
+
+	// ErrUseRLE is returned from the compressor when the input is a single byte value repeated.
+	ErrUseRLE = errors.New("input is single value repeated")
+
+	// ErrTooBig is return if input is too large for a single block.
+	ErrTooBig = errors.New("input too big")
+)
+
+type ReusePolicy uint8
+
+const (
+	// ReusePolicyAllow will allow reuse if it produces smaller output.
+	ReusePolicyAllow ReusePolicy = iota
+
+	// ReusePolicyPrefer will re-use aggressively if possible.
+	// This will not check if a new table will produce smaller output,
+	// except if the current table is impossible to use or
+	// compressed output is bigger than input.
+	ReusePolicyPrefer
+
+	// ReusePolicyNone will disable re-use of tables.
+	// This is slightly faster than ReusePolicyAllow but may produce larger output.
+	ReusePolicyNone
+)
+
+type Scratch struct {
+	count [maxSymbolValue + 1]uint32
+
+	// Per block parameters.
+	// These can be used to override compression parameters of the block.
+	// Do not touch, unless you know what you are doing.
+
+	// Out is output buffer.
+	// If the scratch is re-used before the caller is done processing the output,
+	// set this field to nil.
+	// Otherwise the output buffer will be re-used for next Compression/Decompression step
+	// and allocation will be avoided.
+	Out []byte
+
+	// OutTable will contain the table data only, if a new table has been generated.
+	// Slice of the returned data.
+	OutTable []byte
+
+	// OutData will contain the compressed data.
+	// Slice of the returned data.
+	OutData []byte
+
+	// MaxSymbolValue will override the maximum symbol value of the next block.
+	MaxSymbolValue uint8
+
+	// TableLog will attempt to override the tablelog for the next block.
+	// Must be <= 11.
+	TableLog uint8
+
+	// Reuse will specify the reuse policy
+	Reuse ReusePolicy
+
+	br             byteReader
+	symbolLen      uint16 // Length of active part of the symbol table.
+	maxCount       int    // count of the most probable symbol
+	clearCount     bool   // clear count
+	actualTableLog uint8  // Selected tablelog.
+	prevTable      cTable // Table used for previous compression.
+	cTable         cTable // compression table
+	dt             dTable // decompression table
+	nodes          []nodeElt
+	tmpOut         [4][]byte
+	fse            *fse.Scratch
+	huffWeight     [maxSymbolValue + 1]byte
+}
+
+func (s *Scratch) prepare(in []byte) (*Scratch, error) {
+	if len(in) > BlockSizeMax {
+		return nil, ErrTooBig
+	}
+	if s == nil {
+		s = &Scratch{}
+	}
+	if s.MaxSymbolValue == 0 {
+		s.MaxSymbolValue = maxSymbolValue
+	}
+	if s.TableLog == 0 {
+		s.TableLog = tableLogDefault
+	}
+	if s.TableLog > tableLogMax {
+		return nil, fmt.Errorf("tableLog (%d) > maxTableLog (%d)", s.TableLog, tableLogMax)
+	}
+	if s.clearCount && s.maxCount == 0 {
+		for i := range s.count {
+			s.count[i] = 0
+		}
+		s.clearCount = false
+	}
+	if cap(s.Out) == 0 {
+		s.Out = make([]byte, 0, len(in))
+	}
+	s.Out = s.Out[:0]
+
+	s.OutTable = nil
+	s.OutData = nil
+	if cap(s.nodes) < huffNodesLen+1 {
+		s.nodes = make([]nodeElt, 0, huffNodesLen+1)
+	}
+	s.nodes = s.nodes[:0]
+	if s.fse == nil {
+		s.fse = &fse.Scratch{}
+	}
+	s.br.init(in)
+
+	return s, nil
+}
+
+type cTable []cTableEntry
+
+func (c cTable) write(s *Scratch) error {
+	var (
+		// precomputed conversion table
+		bitsToWeight [tableLogMax + 1]byte
+		huffLog      = s.actualTableLog
+		// last weight is not saved.
+		maxSymbolValue = uint8(s.symbolLen - 1)
+		huffWeight     = s.huffWeight[:256]
+	)
+	const (
+		maxFSETableLog = 6
+	)
+	// convert to weight
+	bitsToWeight[0] = 0
+	for n := uint8(1); n < huffLog+1; n++ {
+		bitsToWeight[n] = huffLog + 1 - n
+	}
+
+	// Acquire histogram for FSE.
+	hist := s.fse.Histogram()
+	hist = hist[:256]
+	for i := range hist[:16] {
+		hist[i] = 0
+	}
+	for n := uint8(0); n < maxSymbolValue; n++ {
+		v := bitsToWeight[c[n].nBits] & 15
+		huffWeight[n] = v
+		hist[v]++
+	}
+
+	// FSE compress if feasible.
+	if maxSymbolValue >= 2 {
+		huffMaxCnt := uint32(0)
+		huffMax := uint8(0)
+		for i, v := range hist[:16] {
+			if v == 0 {
+				continue
+			}
+			huffMax = byte(i)
+			if v > huffMaxCnt {
+				huffMaxCnt = v
+			}
+		}
+		s.fse.HistogramFinished(huffMax, int(huffMaxCnt))
+		s.fse.TableLog = maxFSETableLog
+		b, err := fse.Compress(huffWeight[:maxSymbolValue], s.fse)
+		if err == nil && len(b) < int(s.symbolLen>>1) {
+			s.Out = append(s.Out, uint8(len(b)))
+			s.Out = append(s.Out, b...)
+			return nil
+		}
+		// Unable to compress (RLE/uncompressible)
+	}
+	// write raw values as 4-bits (max : 15)
+	if maxSymbolValue > (256 - 128) {
+		// should not happen : likely means source cannot be compressed
+		return ErrIncompressible
+	}
+	op := s.Out
+	// special case, pack weights 4 bits/weight.
+	op = append(op, 128|(maxSymbolValue-1))
+	// be sure it doesn't cause msan issue in final combination
+	huffWeight[maxSymbolValue] = 0
+	for n := uint16(0); n < uint16(maxSymbolValue); n += 2 {
+		op = append(op, (huffWeight[n]<<4)|huffWeight[n+1])
+	}
+	s.Out = op
+	return nil
+}
+
+// estimateSize returns the estimated size in bytes of the input represented in the
+// histogram supplied.
+func (c cTable) estimateSize(hist []uint32) int {
+	nbBits := uint32(7)
+	for i, v := range c[:len(hist)] {
+		nbBits += uint32(v.nBits) * hist[i]
+	}
+	return int(nbBits >> 3)
+}
+
+// minSize returns the minimum possible size considering the shannon limit.
+func (s *Scratch) minSize(total int) int {
+	nbBits := float64(7)
+	fTotal := float64(total)
+	for _, v := range s.count[:s.symbolLen] {
+		n := float64(v)
+		if n > 0 {
+			nbBits += math.Log2(fTotal/n) * n
+		}
+	}
+	return int(nbBits) >> 3
+}
+
+func highBit32(val uint32) (n uint32) {
+	return uint32(bits.Len32(val) - 1)
+}
--- a/vendor/github.com/klauspost/compress/snappy/.gitignore
+++ b/vendor/github.com/klauspost/compress/snappy/.gitignore
@ -0,0 +1,16 @@
+cmd/snappytool/snappytool
+testdata/bench
+
+# These explicitly listed benchmark data files are for an obsolete version of
+# snappy_test.go.
+testdata/alice29.txt
+testdata/asyoulik.txt
+testdata/fireworks.jpeg
+testdata/geo.protodata
+testdata/html
+testdata/html_x_4
+testdata/kppkn.gtb
+testdata/lcet10.txt
+testdata/paper-100k.pdf
+testdata/plrabn12.txt
+testdata/urls.10K
--- a/vendor/github.com/klauspost/compress/snappy/AUTHORS
+++ b/vendor/github.com/klauspost/compress/snappy/AUTHORS
@ -0,0 +1,15 @@
+# This is the official list of Snappy-Go authors for copyright purposes.
+# This file is distinct from the CONTRIBUTORS files.
+# See the latter for an explanation.
+
+# Names should be added to this file as
+#	Name or Organization <email address>
+# The email address is not required for organizations.
+
+# Please keep the list sorted.
+
+Damian Gryski <dgryski@gmail.com>
+Google Inc.
+Jan Mercl <0xjnml@gmail.com>
+Rodolfo Carvalho <rhcarvalho@gmail.com>
+Sebastien Binet <seb.binet@gmail.com>
--- a/vendor/github.com/klauspost/compress/snappy/CONTRIBUTORS
+++ b/vendor/github.com/klauspost/compress/snappy/CONTRIBUTORS
@ -0,0 +1,37 @@
+# This is the official list of people who can contribute
+# (and typically have contributed) code to the Snappy-Go repository.
+# The AUTHORS file lists the copyright holders; this file
+# lists people.  For example, Google employees are listed here
+# but not in AUTHORS, because Google holds the copyright.
+#
+# The submission process automatically checks to make sure
+# that people submitting code are listed in this file (by email address).
+#
+# Names should be added to this file only after verifying that
+# the individual or the individual's organization has agreed to
+# the appropriate Contributor License Agreement, found here:
+#
+#     http://code.google.com/legal/individual-cla-v1.0.html
+#     http://code.google.com/legal/corporate-cla-v1.0.html
+#
+# The agreement for individuals can be filled out on the web.
+#
+# When adding J Random Contributor's name to this file,
+# either J's name or J's organization's name should be
+# added to the AUTHORS file, depending on whether the
+# individual or corporate CLA was used.
+
+# Names should be added to this file like so:
+#     Name <email address>
+
+# Please keep the list sorted.
+
+Damian Gryski <dgryski@gmail.com>
+Jan Mercl <0xjnml@gmail.com>
+Kai Backman <kaib@golang.org>
+Marc-Antoine Ruel <maruel@chromium.org>
+Nigel Tao <nigeltao@golang.org>
+Rob Pike <r@golang.org>
+Rodolfo Carvalho <rhcarvalho@gmail.com>
+Russ Cox <rsc@golang.org>
+Sebastien Binet <seb.binet@gmail.com>
--- a/vendor/github.com/klauspost/compress/snappy/LICENSE
+++ b/vendor/github.com/klauspost/compress/snappy/LICENSE
@ -1,4 +1,4 @@
-Copyright (c) 2009,2014 Google Inc. All rights reserved.
+Copyright (c) 2011 The Snappy-Go Authors. All rights reserved.

 Redistribution and use in source and binary forms, with or without
 modification, are permitted provided that the following conditions are
--- a/vendor/github.com/klauspost/compress/snappy/README
+++ b/vendor/github.com/klauspost/compress/snappy/README
@ -0,0 +1,107 @@
+The Snappy compression format in the Go programming language.
+
+To download and install from source:
+$ go get github.com/golang/snappy
+
+Unless otherwise noted, the Snappy-Go source files are distributed
+under the BSD-style license found in the LICENSE file.
+
+
+
+Benchmarks.
+
+The golang/snappy benchmarks include compressing (Z) and decompressing (U) ten
+or so files, the same set used by the C++ Snappy code (github.com/google/snappy
+and note the "google", not "golang"). On an "Intel(R) Core(TM) i7-3770 CPU @
+3.40GHz", Go's GOARCH=amd64 numbers as of 2016-05-29:
+
+"go test -test.bench=."
+
+_UFlat0-8         2.19GB/s ± 0%  html
+_UFlat1-8         1.41GB/s ± 0%  urls
+_UFlat2-8         23.5GB/s ± 2%  jpg
+_UFlat3-8         1.91GB/s ± 0%  jpg_200
+_UFlat4-8         14.0GB/s ± 1%  pdf
+_UFlat5-8         1.97GB/s ± 0%  html4
+_UFlat6-8          814MB/s ± 0%  txt1
+_UFlat7-8          785MB/s ± 0%  txt2
+_UFlat8-8          857MB/s ± 0%  txt3
+_UFlat9-8          719MB/s ± 1%  txt4
+_UFlat10-8        2.84GB/s ± 0%  pb
+_UFlat11-8        1.05GB/s ± 0%  gaviota
+
+_ZFlat0-8         1.04GB/s ± 0%  html
+_ZFlat1-8          534MB/s ± 0%  urls
+_ZFlat2-8         15.7GB/s ± 1%  jpg
+_ZFlat3-8          740MB/s ± 3%  jpg_200
+_ZFlat4-8         9.20GB/s ± 1%  pdf
+_ZFlat5-8          991MB/s ± 0%  html4
+_ZFlat6-8          379MB/s ± 0%  txt1
+_ZFlat7-8          352MB/s ± 0%  txt2
+_ZFlat8-8          396MB/s ± 1%  txt3
+_ZFlat9-8          327MB/s ± 1%  txt4
+_ZFlat10-8        1.33GB/s ± 1%  pb
+_ZFlat11-8         605MB/s ± 1%  gaviota
+
+
+
+"go test -test.bench=. -tags=noasm"
+
+_UFlat0-8          621MB/s ± 2%  html
+_UFlat1-8          494MB/s ± 1%  urls
+_UFlat2-8         23.2GB/s ± 1%  jpg
+_UFlat3-8         1.12GB/s ± 1%  jpg_200
+_UFlat4-8         4.35GB/s ± 1%  pdf
+_UFlat5-8          609MB/s ± 0%  html4
+_UFlat6-8          296MB/s ± 0%  txt1
+_UFlat7-8          288MB/s ± 0%  txt2
+_UFlat8-8          309MB/s ± 1%  txt3
+_UFlat9-8          280MB/s ± 1%  txt4
+_UFlat10-8         753MB/s ± 0%  pb
+_UFlat11-8         400MB/s ± 0%  gaviota
+
+_ZFlat0-8          409MB/s ± 1%  html
+_ZFlat1-8          250MB/s ± 1%  urls
+_ZFlat2-8         12.3GB/s ± 1%  jpg
+_ZFlat3-8          132MB/s ± 0%  jpg_200
+_ZFlat4-8         2.92GB/s ± 0%  pdf
+_ZFlat5-8          405MB/s ± 1%  html4
+_ZFlat6-8          179MB/s ± 1%  txt1
+_ZFlat7-8          170MB/s ± 1%  txt2
+_ZFlat8-8          189MB/s ± 1%  txt3
+_ZFlat9-8          164MB/s ± 1%  txt4
+_ZFlat10-8         479MB/s ± 1%  pb
+_ZFlat11-8         270MB/s ± 1%  gaviota
+
+
+
+For comparison (Go's encoded output is byte-for-byte identical to C++'s), here
+are the numbers from C++ Snappy's
+
+make CXXFLAGS="-O2 -DNDEBUG -g" clean snappy_unittest.log && cat snappy_unittest.log
+
+BM_UFlat/0     2.4GB/s  html
+BM_UFlat/1     1.4GB/s  urls
+BM_UFlat/2    21.8GB/s  jpg
+BM_UFlat/3     1.5GB/s  jpg_200
+BM_UFlat/4    13.3GB/s  pdf
+BM_UFlat/5     2.1GB/s  html4
+BM_UFlat/6     1.0GB/s  txt1
+BM_UFlat/7   959.4MB/s  txt2
+BM_UFlat/8     1.0GB/s  txt3
+BM_UFlat/9   864.5MB/s  txt4
+BM_UFlat/10    2.9GB/s  pb
+BM_UFlat/11    1.2GB/s  gaviota
+
+BM_ZFlat/0   944.3MB/s  html (22.31 %)
+BM_ZFlat/1   501.6MB/s  urls (47.78 %)
+BM_ZFlat/2    14.3GB/s  jpg (99.95 %)
+BM_ZFlat/3   538.3MB/s  jpg_200 (73.00 %)
+BM_ZFlat/4     8.3GB/s  pdf (83.30 %)
+BM_ZFlat/5   903.5MB/s  html4 (22.52 %)
+BM_ZFlat/6   336.0MB/s  txt1 (57.88 %)
+BM_ZFlat/7   312.3MB/s  txt2 (61.91 %)
+BM_ZFlat/8   353.1MB/s  txt3 (54.99 %)
+BM_ZFlat/9   289.9MB/s  txt4 (66.26 %)
+BM_ZFlat/10    1.2GB/s  pb (19.68 %)
+BM_ZFlat/11  527.4MB/s  gaviota (37.72 %)
--- a/vendor/github.com/klauspost/compress/snappy/decode.go
+++ b/vendor/github.com/klauspost/compress/snappy/decode.go
@ -0,0 +1,237 @@
+// Copyright 2011 The Snappy-Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package snappy
+
+import (
+	"encoding/binary"
+	"errors"
+	"io"
+)
+
+var (
+	// ErrCorrupt reports that the input is invalid.
+	ErrCorrupt = errors.New("snappy: corrupt input")
+	// ErrTooLarge reports that the uncompressed length is too large.
+	ErrTooLarge = errors.New("snappy: decoded block is too large")
+	// ErrUnsupported reports that the input isn't supported.
+	ErrUnsupported = errors.New("snappy: unsupported input")
+
+	errUnsupportedLiteralLength = errors.New("snappy: unsupported literal length")
+)
+
+// DecodedLen returns the length of the decoded block.
+func DecodedLen(src []byte) (int, error) {
+	v, _, err := decodedLen(src)
+	return v, err
+}
+
+// decodedLen returns the length of the decoded block and the number of bytes
+// that the length header occupied.
+func decodedLen(src []byte) (blockLen, headerLen int, err error) {
+	v, n := binary.Uvarint(src)
+	if n <= 0 || v > 0xffffffff {
+		return 0, 0, ErrCorrupt
+	}
+
+	const wordSize = 32 << (^uint(0) >> 32 & 1)
+	if wordSize == 32 && v > 0x7fffffff {
+		return 0, 0, ErrTooLarge
+	}
+	return int(v), n, nil
+}
+
+const (
+	decodeErrCodeCorrupt                  = 1
+	decodeErrCodeUnsupportedLiteralLength = 2
+)
+
+// Decode returns the decoded form of src. The returned slice may be a sub-
+// slice of dst if dst was large enough to hold the entire decoded block.
+// Otherwise, a newly allocated slice will be returned.
+//
+// The dst and src must not overlap. It is valid to pass a nil dst.
+func Decode(dst, src []byte) ([]byte, error) {
+	dLen, s, err := decodedLen(src)
+	if err != nil {
+		return nil, err
+	}
+	if dLen <= len(dst) {
+		dst = dst[:dLen]
+	} else {
+		dst = make([]byte, dLen)
+	}
+	switch decode(dst, src[s:]) {
+	case 0:
+		return dst, nil
+	case decodeErrCodeUnsupportedLiteralLength:
+		return nil, errUnsupportedLiteralLength
+	}
+	return nil, ErrCorrupt
+}
+
+// NewReader returns a new Reader that decompresses from r, using the framing
+// format described at
+// https://github.com/google/snappy/blob/master/framing_format.txt
+func NewReader(r io.Reader) *Reader {
+	return &Reader{
+		r:       r,
+		decoded: make([]byte, maxBlockSize),
+		buf:     make([]byte, maxEncodedLenOfMaxBlockSize+checksumSize),
+	}
+}
+
+// Reader is an io.Reader that can read Snappy-compressed bytes.
+type Reader struct {
+	r       io.Reader
+	err     error
+	decoded []byte
+	buf     []byte
+	// decoded[i:j] contains decoded bytes that have not yet been passed on.
+	i, j       int
+	readHeader bool
+}
+
+// Reset discards any buffered data, resets all state, and switches the Snappy
+// reader to read from r. This permits reusing a Reader rather than allocating
+// a new one.
+func (r *Reader) Reset(reader io.Reader) {
+	r.r = reader
+	r.err = nil
+	r.i = 0
+	r.j = 0
+	r.readHeader = false
+}
+
+func (r *Reader) readFull(p []byte, allowEOF bool) (ok bool) {
+	if _, r.err = io.ReadFull(r.r, p); r.err != nil {
+		if r.err == io.ErrUnexpectedEOF || (r.err == io.EOF && !allowEOF) {
+			r.err = ErrCorrupt
+		}
+		return false
+	}
+	return true
+}
+
+// Read satisfies the io.Reader interface.
+func (r *Reader) Read(p []byte) (int, error) {
+	if r.err != nil {
+		return 0, r.err
+	}
+	for {
+		if r.i < r.j {
+			n := copy(p, r.decoded[r.i:r.j])
+			r.i += n
+			return n, nil
+		}
+		if !r.readFull(r.buf[:4], true) {
+			return 0, r.err
+		}
+		chunkType := r.buf[0]
+		if !r.readHeader {
+			if chunkType != chunkTypeStreamIdentifier {
+				r.err = ErrCorrupt
+				return 0, r.err
+			}
+			r.readHeader = true
+		}
+		chunkLen := int(r.buf[1]) | int(r.buf[2])<<8 | int(r.buf[3])<<16
+		if chunkLen > len(r.buf) {
+			r.err = ErrUnsupported
+			return 0, r.err
+		}
+
+		// The chunk types are specified at
+		// https://github.com/google/snappy/blob/master/framing_format.txt
+		switch chunkType {
+		case chunkTypeCompressedData:
+			// Section 4.2. Compressed data (chunk type 0x00).
+			if chunkLen < checksumSize {
+				r.err = ErrCorrupt
+				return 0, r.err
+			}
+			buf := r.buf[:chunkLen]
+			if !r.readFull(buf, false) {
+				return 0, r.err
+			}
+			checksum := uint32(buf[0]) | uint32(buf[1])<<8 | uint32(buf[2])<<16 | uint32(buf[3])<<24
+			buf = buf[checksumSize:]
+
+			n, err := DecodedLen(buf)
+			if err != nil {
+				r.err = err
+				return 0, r.err
+			}
+			if n > len(r.decoded) {
+				r.err = ErrCorrupt
+				return 0, r.err
+			}
+			if _, err := Decode(r.decoded, buf); err != nil {
+				r.err = err
+				return 0, r.err
+			}
+			if crc(r.decoded[:n]) != checksum {
+				r.err = ErrCorrupt
+				return 0, r.err
+			}
+			r.i, r.j = 0, n
+			continue
+
+		case chunkTypeUncompressedData:
+			// Section 4.3. Uncompressed data (chunk type 0x01).
+			if chunkLen < checksumSize {
+				r.err = ErrCorrupt
+				return 0, r.err
+			}
+			buf := r.buf[:checksumSize]
+			if !r.readFull(buf, false) {
+				return 0, r.err
+			}
+			checksum := uint32(buf[0]) | uint32(buf[1])<<8 | uint32(buf[2])<<16 | uint32(buf[3])<<24
+			// Read directly into r.decoded instead of via r.buf.
+			n := chunkLen - checksumSize
+			if n > len(r.decoded) {
+				r.err = ErrCorrupt
+				return 0, r.err
+			}
+			if !r.readFull(r.decoded[:n], false) {
+				return 0, r.err
+			}
+			if crc(r.decoded[:n]) != checksum {
+				r.err = ErrCorrupt
+				return 0, r.err
+			}
+			r.i, r.j = 0, n
+			continue
+
+		case chunkTypeStreamIdentifier:
+			// Section 4.1. Stream identifier (chunk type 0xff).
+			if chunkLen != len(magicBody) {
+				r.err = ErrCorrupt
+				return 0, r.err
+			}
+			if !r.readFull(r.buf[:len(magicBody)], false) {
+				return 0, r.err
+			}
+			for i := 0; i < len(magicBody); i++ {
+				if r.buf[i] != magicBody[i] {
+					r.err = ErrCorrupt
+					return 0, r.err
+				}
+			}
+			continue
+		}
+
+		if chunkType <= 0x7f {
+			// Section 4.5. Reserved unskippable chunks (chunk types 0x02-0x7f).
+			r.err = ErrUnsupported
+			return 0, r.err
+		}
+		// Section 4.4 Padding (chunk type 0xfe).
+		// Section 4.6. Reserved skippable chunks (chunk types 0x80-0xfd).
+		if !r.readFull(r.buf[:chunkLen], false) {
+			return 0, r.err
+		}
+	}
+}
--- a/vendor/github.com/klauspost/compress/snappy/decode_amd64.go
+++ b/vendor/github.com/klauspost/compress/snappy/decode_amd64.go
@ -0,0 +1,14 @@
+// Copyright 2016 The Snappy-Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build !appengine
+// +build gc
+// +build !noasm
+
+package snappy
+
+// decode has the same semantics as in decode_other.go.
+//
+//go:noescape
+func decode(dst, src []byte) int
--- a/vendor/github.com/klauspost/compress/snappy/decode_amd64.s
+++ b/vendor/github.com/klauspost/compress/snappy/decode_amd64.s
@ -0,0 +1,490 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build !appengine
+// +build gc
+// +build !noasm
+
+#include "textflag.h"
+
+// The asm code generally follows the pure Go code in decode_other.go, except
+// where marked with a "!!!".
+
+// func decode(dst, src []byte) int
+//
+// All local variables fit into registers. The non-zero stack size is only to
+// spill registers and push args when issuing a CALL. The register allocation:
+//	- AX	scratch
+//	- BX	scratch
+//	- CX	length or x
+//	- DX	offset
+//	- SI	&src[s]
+//	- DI	&dst[d]
+//	+ R8	dst_base
+//	+ R9	dst_len
+//	+ R10	dst_base + dst_len
+//	+ R11	src_base
+//	+ R12	src_len
+//	+ R13	src_base + src_len
+//	- R14	used by doCopy
+//	- R15	used by doCopy
+//
+// The registers R8-R13 (marked with a "+") are set at the start of the
+// function, and after a CALL returns, and are not otherwise modified.
+//
+// The d variable is implicitly DI - R8,  and len(dst)-d is R10 - DI.
+// The s variable is implicitly SI - R11, and len(src)-s is R13 - SI.
+TEXT ·decode(SB), NOSPLIT, $48-56
+	// Initialize SI, DI and R8-R13.
+	MOVQ dst_base+0(FP), R8
+	MOVQ dst_len+8(FP), R9
+	MOVQ R8, DI
+	MOVQ R8, R10
+	ADDQ R9, R10
+	MOVQ src_base+24(FP), R11
+	MOVQ src_len+32(FP), R12
+	MOVQ R11, SI
+	MOVQ R11, R13
+	ADDQ R12, R13
+
+loop:
+	// for s < len(src)
+	CMPQ SI, R13
+	JEQ  end
+
+	// CX = uint32(src[s])
+	//
+	// switch src[s] & 0x03
+	MOVBLZX (SI), CX
+	MOVL    CX, BX
+	ANDL    $3, BX
+	CMPL    BX, $1
+	JAE     tagCopy
+
+	// ----------------------------------------
+	// The code below handles literal tags.
+
+	// case tagLiteral:
+	// x := uint32(src[s] >> 2)
+	// switch
+	SHRL $2, CX
+	CMPL CX, $60
+	JAE  tagLit60Plus
+
+	// case x < 60:
+	// s++
+	INCQ SI
+
+doLit:
+	// This is the end of the inner "switch", when we have a literal tag.
+	//
+	// We assume that CX == x and x fits in a uint32, where x is the variable
+	// used in the pure Go decode_other.go code.
+
+	// length = int(x) + 1
+	//
+	// Unlike the pure Go code, we don't need to check if length <= 0 because
+	// CX can hold 64 bits, so the increment cannot overflow.
+	INCQ CX
+
+	// Prepare to check if copying length bytes will run past the end of dst or
+	// src.
+	//
+	// AX = len(dst) - d
+	// BX = len(src) - s
+	MOVQ R10, AX
+	SUBQ DI, AX
+	MOVQ R13, BX
+	SUBQ SI, BX
+
+	// !!! Try a faster technique for short (16 or fewer bytes) copies.
+	//
+	// if length > 16 || len(dst)-d < 16 || len(src)-s < 16 {
+	//   goto callMemmove // Fall back on calling runtime·memmove.
+	// }
+	//
+	// The C++ snappy code calls this TryFastAppend. It also checks len(src)-s
+	// against 21 instead of 16, because it cannot assume that all of its input
+	// is contiguous in memory and so it needs to leave enough source bytes to
+	// read the next tag without refilling buffers, but Go's Decode assumes
+	// contiguousness (the src argument is a []byte).
+	CMPQ CX, $16
+	JGT  callMemmove
+	CMPQ AX, $16
+	JLT  callMemmove
+	CMPQ BX, $16
+	JLT  callMemmove
+
+	// !!! Implement the copy from src to dst as a 16-byte load and store.
+	// (Decode's documentation says that dst and src must not overlap.)
+	//
+	// This always copies 16 bytes, instead of only length bytes, but that's
+	// OK. If the input is a valid Snappy encoding then subsequent iterations
+	// will fix up the overrun. Otherwise, Decode returns a nil []byte (and a
+	// non-nil error), so the overrun will be ignored.
+	//
+	// Note that on amd64, it is legal and cheap to issue unaligned 8-byte or
+	// 16-byte loads and stores. This technique probably wouldn't be as
+	// effective on architectures that are fussier about alignment.
+	MOVOU 0(SI), X0
+	MOVOU X0, 0(DI)
+
+	// d += length
+	// s += length
+	ADDQ CX, DI
+	ADDQ CX, SI
+	JMP  loop
+
+callMemmove:
+	// if length > len(dst)-d || length > len(src)-s { etc }
+	CMPQ CX, AX
+	JGT  errCorrupt
+	CMPQ CX, BX
+	JGT  errCorrupt
+
+	// copy(dst[d:], src[s:s+length])
+	//
+	// This means calling runtime·memmove(&dst[d], &src[s], length), so we push
+	// DI, SI and CX as arguments. Coincidentally, we also need to spill those
+	// three registers to the stack, to save local variables across the CALL.
+	MOVQ DI, 0(SP)
+	MOVQ SI, 8(SP)
+	MOVQ CX, 16(SP)
+	MOVQ DI, 24(SP)
+	MOVQ SI, 32(SP)
+	MOVQ CX, 40(SP)
+	CALL runtime·memmove(SB)
+
+	// Restore local variables: unspill registers from the stack and
+	// re-calculate R8-R13.
+	MOVQ 24(SP), DI
+	MOVQ 32(SP), SI
+	MOVQ 40(SP), CX
+	MOVQ dst_base+0(FP), R8
+	MOVQ dst_len+8(FP), R9
+	MOVQ R8, R10
+	ADDQ R9, R10
+	MOVQ src_base+24(FP), R11
+	MOVQ src_len+32(FP), R12
+	MOVQ R11, R13
+	ADDQ R12, R13
+
+	// d += length
+	// s += length
+	ADDQ CX, DI
+	ADDQ CX, SI
+	JMP  loop
+
+tagLit60Plus:
+	// !!! This fragment does the
+	//
+	// s += x - 58; if uint(s) > uint(len(src)) { etc }
+	//
+	// checks. In the asm version, we code it once instead of once per switch case.
+	ADDQ CX, SI
+	SUBQ $58, SI
+	MOVQ SI, BX
+	SUBQ R11, BX
+	CMPQ BX, R12
+	JA   errCorrupt
+
+	// case x == 60:
+	CMPL CX, $61
+	JEQ  tagLit61
+	JA   tagLit62Plus
+
+	// x = uint32(src[s-1])
+	MOVBLZX -1(SI), CX
+	JMP     doLit
+
+tagLit61:
+	// case x == 61:
+	// x = uint32(src[s-2]) | uint32(src[s-1])<<8
+	MOVWLZX -2(SI), CX
+	JMP     doLit
+
+tagLit62Plus:
+	CMPL CX, $62
+	JA   tagLit63
+
+	// case x == 62:
+	// x = uint32(src[s-3]) | uint32(src[s-2])<<8 | uint32(src[s-1])<<16
+	MOVWLZX -3(SI), CX
+	MOVBLZX -1(SI), BX
+	SHLL    $16, BX
+	ORL     BX, CX
+	JMP     doLit
+
+tagLit63:
+	// case x == 63:
+	// x = uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24
+	MOVL -4(SI), CX
+	JMP  doLit
+
+// The code above handles literal tags.
+// ----------------------------------------
+// The code below handles copy tags.
+
+tagCopy4:
+	// case tagCopy4:
+	// s += 5
+	ADDQ $5, SI
+
+	// if uint(s) > uint(len(src)) { etc }
+	MOVQ SI, BX
+	SUBQ R11, BX
+	CMPQ BX, R12
+	JA   errCorrupt
+
+	// length = 1 + int(src[s-5])>>2
+	SHRQ $2, CX
+	INCQ CX
+
+	// offset = int(uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24)
+	MOVLQZX -4(SI), DX
+	JMP     doCopy
+
+tagCopy2:
+	// case tagCopy2:
+	// s += 3
+	ADDQ $3, SI
+
+	// if uint(s) > uint(len(src)) { etc }
+	MOVQ SI, BX
+	SUBQ R11, BX
+	CMPQ BX, R12
+	JA   errCorrupt
+
+	// length = 1 + int(src[s-3])>>2
+	SHRQ $2, CX
+	INCQ CX
+
+	// offset = int(uint32(src[s-2]) | uint32(src[s-1])<<8)
+	MOVWQZX -2(SI), DX
+	JMP     doCopy
+
+tagCopy:
+	// We have a copy tag. We assume that:
+	//	- BX == src[s] & 0x03
+	//	- CX == src[s]
+	CMPQ BX, $2
+	JEQ  tagCopy2
+	JA   tagCopy4
+
+	// case tagCopy1:
+	// s += 2
+	ADDQ $2, SI
+
+	// if uint(s) > uint(len(src)) { etc }
+	MOVQ SI, BX
+	SUBQ R11, BX
+	CMPQ BX, R12
+	JA   errCorrupt
+
+	// offset = int(uint32(src[s-2])&0xe0<<3 | uint32(src[s-1]))
+	MOVQ    CX, DX
+	ANDQ    $0xe0, DX
+	SHLQ    $3, DX
+	MOVBQZX -1(SI), BX
+	ORQ     BX, DX
+
+	// length = 4 + int(src[s-2])>>2&0x7
+	SHRQ $2, CX
+	ANDQ $7, CX
+	ADDQ $4, CX
+
+doCopy:
+	// This is the end of the outer "switch", when we have a copy tag.
+	//
+	// We assume that:
+	//	- CX == length && CX > 0
+	//	- DX == offset
+
+	// if offset <= 0 { etc }
+	CMPQ DX, $0
+	JLE  errCorrupt
+
+	// if d < offset { etc }
+	MOVQ DI, BX
+	SUBQ R8, BX
+	CMPQ BX, DX
+	JLT  errCorrupt
+
+	// if length > len(dst)-d { etc }
+	MOVQ R10, BX
+	SUBQ DI, BX
+	CMPQ CX, BX
+	JGT  errCorrupt
+
+	// forwardCopy(dst[d:d+length], dst[d-offset:]); d += length
+	//
+	// Set:
+	//	- R14 = len(dst)-d
+	//	- R15 = &dst[d-offset]
+	MOVQ R10, R14
+	SUBQ DI, R14
+	MOVQ DI, R15
+	SUBQ DX, R15
+
+	// !!! Try a faster technique for short (16 or fewer bytes) forward copies.
+	//
+	// First, try using two 8-byte load/stores, similar to the doLit technique
+	// above. Even if dst[d:d+length] and dst[d-offset:] can overlap, this is
+	// still OK if offset >= 8. Note that this has to be two 8-byte load/stores
+	// and not one 16-byte load/store, and the first store has to be before the
+	// second load, due to the overlap if offset is in the range [8, 16).
+	//
+	// if length > 16 || offset < 8 || len(dst)-d < 16 {
+	//   goto slowForwardCopy
+	// }
+	// copy 16 bytes
+	// d += length
+	CMPQ CX, $16
+	JGT  slowForwardCopy
+	CMPQ DX, $8
+	JLT  slowForwardCopy
+	CMPQ R14, $16
+	JLT  slowForwardCopy
+	MOVQ 0(R15), AX
+	MOVQ AX, 0(DI)
+	MOVQ 8(R15), BX
+	MOVQ BX, 8(DI)
+	ADDQ CX, DI
+	JMP  loop
+
+slowForwardCopy:
+	// !!! If the forward copy is longer than 16 bytes, or if offset < 8, we
+	// can still try 8-byte load stores, provided we can overrun up to 10 extra
+	// bytes. As above, the overrun will be fixed up by subsequent iterations
+	// of the outermost loop.
+	//
+	// The C++ snappy code calls this technique IncrementalCopyFastPath. Its
+	// commentary says:
+	//
+	// ----
+	//
+	// The main part of this loop is a simple copy of eight bytes at a time
+	// until we've copied (at least) the requested amount of bytes.  However,
+	// if d and d-offset are less than eight bytes apart (indicating a
+	// repeating pattern of length < 8), we first need to expand the pattern in
+	// order to get the correct results. For instance, if the buffer looks like
+	// this, with the eight-byte <d-offset> and <d> patterns marked as
+	// intervals:
+	//
+	//    abxxxxxxxxxxxx
+	//    [------]           d-offset
+	//      [------]         d
+	//
+	// a single eight-byte copy from <d-offset> to <d> will repeat the pattern
+	// once, after which we can move <d> two bytes without moving <d-offset>:
+	//
+	//    ababxxxxxxxxxx
+	//    [------]           d-offset
+	//        [------]       d
+	//
+	// and repeat the exercise until the two no longer overlap.
+	//
+	// This allows us to do very well in the special case of one single byte
+	// repeated many times, without taking a big hit for more general cases.
+	//
+	// The worst case of extra writing past the end of the match occurs when
+	// offset == 1 and length == 1; the last copy will read from byte positions
+	// [0..7] and write to [4..11], whereas it was only supposed to write to
+	// position 1. Thus, ten excess bytes.
+	//
+	// ----
+	//
+	// That "10 byte overrun" worst case is confirmed by Go's
+	// TestSlowForwardCopyOverrun, which also tests the fixUpSlowForwardCopy
+	// and finishSlowForwardCopy algorithm.
+	//
+	// if length > len(dst)-d-10 {
+	//   goto verySlowForwardCopy
+	// }
+	SUBQ $10, R14
+	CMPQ CX, R14
+	JGT  verySlowForwardCopy
+
+makeOffsetAtLeast8:
+	// !!! As above, expand the pattern so that offset >= 8 and we can use
+	// 8-byte load/stores.
+	//
+	// for offset < 8 {
+	//   copy 8 bytes from dst[d-offset:] to dst[d:]
+	//   length -= offset
+	//   d      += offset
+	//   offset += offset
+	//   // The two previous lines together means that d-offset, and therefore
+	//   // R15, is unchanged.
+	// }
+	CMPQ DX, $8
+	JGE  fixUpSlowForwardCopy
+	MOVQ (R15), BX
+	MOVQ BX, (DI)
+	SUBQ DX, CX
+	ADDQ DX, DI
+	ADDQ DX, DX
+	JMP  makeOffsetAtLeast8
+
+fixUpSlowForwardCopy:
+	// !!! Add length (which might be negative now) to d (implied by DI being
+	// &dst[d]) so that d ends up at the right place when we jump back to the
+	// top of the loop. Before we do that, though, we save DI to AX so that, if
+	// length is positive, copying the remaining length bytes will write to the
+	// right place.
+	MOVQ DI, AX
+	ADDQ CX, DI
+
+finishSlowForwardCopy:
+	// !!! Repeat 8-byte load/stores until length <= 0. Ending with a negative
+	// length means that we overrun, but as above, that will be fixed up by
+	// subsequent iterations of the outermost loop.
+	CMPQ CX, $0
+	JLE  loop
+	MOVQ (R15), BX
+	MOVQ BX, (AX)
+	ADDQ $8, R15
+	ADDQ $8, AX
+	SUBQ $8, CX
+	JMP  finishSlowForwardCopy
+
+verySlowForwardCopy:
+	// verySlowForwardCopy is a simple implementation of forward copy. In C
+	// parlance, this is a do/while loop instead of a while loop, since we know
+	// that length > 0. In Go syntax:
+	//
+	// for {
+	//   dst[d] = dst[d - offset]
+	//   d++
+	//   length--
+	//   if length == 0 {
+	//     break
+	//   }
+	// }
+	MOVB (R15), BX
+	MOVB BX, (DI)
+	INCQ R15
+	INCQ DI
+	DECQ CX
+	JNZ  verySlowForwardCopy
+	JMP  loop
+
+// The code above handles copy tags.
+// ----------------------------------------
+
+end:
+	// This is the end of the "for s < len(src)".
+	//
+	// if d != len(dst) { etc }
+	CMPQ DI, R10
+	JNE  errCorrupt
+
+	// return 0
+	MOVQ $0, ret+48(FP)
+	RET
+
+errCorrupt:
+	// return decodeErrCodeCorrupt
+	MOVQ $1, ret+48(FP)
+	RET
--- a/vendor/github.com/klauspost/compress/snappy/decode_other.go
+++ b/vendor/github.com/klauspost/compress/snappy/decode_other.go
@ -0,0 +1,101 @@
+// Copyright 2016 The Snappy-Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build !amd64 appengine !gc noasm
+
+package snappy
+
+// decode writes the decoding of src to dst. It assumes that the varint-encoded
+// length of the decompressed bytes has already been read, and that len(dst)
+// equals that length.
+//
+// It returns 0 on success or a decodeErrCodeXxx error code on failure.
+func decode(dst, src []byte) int {
+	var d, s, offset, length int
+	for s < len(src) {
+		switch src[s] & 0x03 {
+		case tagLiteral:
+			x := uint32(src[s] >> 2)
+			switch {
+			case x < 60:
+				s++
+			case x == 60:
+				s += 2
+				if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
+					return decodeErrCodeCorrupt
+				}
+				x = uint32(src[s-1])
+			case x == 61:
+				s += 3
+				if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
+					return decodeErrCodeCorrupt
+				}
+				x = uint32(src[s-2]) | uint32(src[s-1])<<8
+			case x == 62:
+				s += 4
+				if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
+					return decodeErrCodeCorrupt
+				}
+				x = uint32(src[s-3]) | uint32(src[s-2])<<8 | uint32(src[s-1])<<16
+			case x == 63:
+				s += 5
+				if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
+					return decodeErrCodeCorrupt
+				}
+				x = uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24
+			}
+			length = int(x) + 1
+			if length <= 0 {
+				return decodeErrCodeUnsupportedLiteralLength
+			}
+			if length > len(dst)-d || length > len(src)-s {
+				return decodeErrCodeCorrupt
+			}
+			copy(dst[d:], src[s:s+length])
+			d += length
+			s += length
+			continue
+
+		case tagCopy1:
+			s += 2
+			if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
+				return decodeErrCodeCorrupt
+			}
+			length = 4 + int(src[s-2])>>2&0x7
+			offset = int(uint32(src[s-2])&0xe0<<3 | uint32(src[s-1]))
+
+		case tagCopy2:
+			s += 3
+			if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
+				return decodeErrCodeCorrupt
+			}
+			length = 1 + int(src[s-3])>>2
+			offset = int(uint32(src[s-2]) | uint32(src[s-1])<<8)
+
+		case tagCopy4:
+			s += 5
+			if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
+				return decodeErrCodeCorrupt
+			}
+			length = 1 + int(src[s-5])>>2
+			offset = int(uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24)
+		}
+
+		if offset <= 0 || d < offset || length > len(dst)-d {
+			return decodeErrCodeCorrupt
+		}
+		// Copy from an earlier sub-slice of dst to a later sub-slice. Unlike
+		// the built-in copy function, this byte-by-byte copy always runs
+		// forwards, even if the slices overlap. Conceptually, this is:
+		//
+		// d += forwardCopy(dst[d:d+length], dst[d-offset:])
+		for end := d + length; d != end; d++ {
+			dst[d] = dst[d-offset]
+		}
+	}
+	if d != len(dst) {
+		return decodeErrCodeCorrupt
+	}
+	return 0
+}
--- a/vendor/github.com/klauspost/compress/snappy/encode.go
+++ b/vendor/github.com/klauspost/compress/snappy/encode.go
@ -0,0 +1,285 @@
+// Copyright 2011 The Snappy-Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package snappy
+
+import (
+	"encoding/binary"
+	"errors"
+	"io"
+)
+
+// Encode returns the encoded form of src. The returned slice may be a sub-
+// slice of dst if dst was large enough to hold the entire encoded block.
+// Otherwise, a newly allocated slice will be returned.
+//
+// The dst and src must not overlap. It is valid to pass a nil dst.
+func Encode(dst, src []byte) []byte {
+	if n := MaxEncodedLen(len(src)); n < 0 {
+		panic(ErrTooLarge)
+	} else if len(dst) < n {
+		dst = make([]byte, n)
+	}
+
+	// The block starts with the varint-encoded length of the decompressed bytes.
+	d := binary.PutUvarint(dst, uint64(len(src)))
+
+	for len(src) > 0 {
+		p := src
+		src = nil
+		if len(p) > maxBlockSize {
+			p, src = p[:maxBlockSize], p[maxBlockSize:]
+		}
+		if len(p) < minNonLiteralBlockSize {
+			d += emitLiteral(dst[d:], p)
+		} else {
+			d += encodeBlock(dst[d:], p)
+		}
+	}
+	return dst[:d]
+}
+
+// inputMargin is the minimum number of extra input bytes to keep, inside
+// encodeBlock's inner loop. On some architectures, this margin lets us
+// implement a fast path for emitLiteral, where the copy of short (<= 16 byte)
+// literals can be implemented as a single load to and store from a 16-byte
+// register. That literal's actual length can be as short as 1 byte, so this
+// can copy up to 15 bytes too much, but that's OK as subsequent iterations of
+// the encoding loop will fix up the copy overrun, and this inputMargin ensures
+// that we don't overrun the dst and src buffers.
+const inputMargin = 16 - 1
+
+// minNonLiteralBlockSize is the minimum size of the input to encodeBlock that
+// could be encoded with a copy tag. This is the minimum with respect to the
+// algorithm used by encodeBlock, not a minimum enforced by the file format.
+//
+// The encoded output must start with at least a 1 byte literal, as there are
+// no previous bytes to copy. A minimal (1 byte) copy after that, generated
+// from an emitCopy call in encodeBlock's main loop, would require at least
+// another inputMargin bytes, for the reason above: we want any emitLiteral
+// calls inside encodeBlock's main loop to use the fast path if possible, which
+// requires being able to overrun by inputMargin bytes. Thus,
+// minNonLiteralBlockSize equals 1 + 1 + inputMargin.
+//
+// The C++ code doesn't use this exact threshold, but it could, as discussed at
+// https://groups.google.com/d/topic/snappy-compression/oGbhsdIJSJ8/discussion
+// The difference between Go (2+inputMargin) and C++ (inputMargin) is purely an
+// optimization. It should not affect the encoded form. This is tested by
+// TestSameEncodingAsCppShortCopies.
+const minNonLiteralBlockSize = 1 + 1 + inputMargin
+
+// MaxEncodedLen returns the maximum length of a snappy block, given its
+// uncompressed length.
+//
+// It will return a negative value if srcLen is too large to encode.
+func MaxEncodedLen(srcLen int) int {
+	n := uint64(srcLen)
+	if n > 0xffffffff {
+		return -1
+	}
+	// Compressed data can be defined as:
+	//    compressed := item* literal*
+	//    item       := literal* copy
+	//
+	// The trailing literal sequence has a space blowup of at most 62/60
+	// since a literal of length 60 needs one tag byte + one extra byte
+	// for length information.
+	//
+	// Item blowup is trickier to measure. Suppose the "copy" op copies
+	// 4 bytes of data. Because of a special check in the encoding code,
+	// we produce a 4-byte copy only if the offset is < 65536. Therefore
+	// the copy op takes 3 bytes to encode, and this type of item leads
+	// to at most the 62/60 blowup for representing literals.
+	//
+	// Suppose the "copy" op copies 5 bytes of data. If the offset is big
+	// enough, it will take 5 bytes to encode the copy op. Therefore the
+	// worst case here is a one-byte literal followed by a five-byte copy.
+	// That is, 6 bytes of input turn into 7 bytes of "compressed" data.
+	//
+	// This last factor dominates the blowup, so the final estimate is:
+	n = 32 + n + n/6
+	if n > 0xffffffff {
+		return -1
+	}
+	return int(n)
+}
+
+var errClosed = errors.New("snappy: Writer is closed")
+
+// NewWriter returns a new Writer that compresses to w.
+//
+// The Writer returned does not buffer writes. There is no need to Flush or
+// Close such a Writer.
+//
+// Deprecated: the Writer returned is not suitable for many small writes, only
+// for few large writes. Use NewBufferedWriter instead, which is efficient
+// regardless of the frequency and shape of the writes, and remember to Close
+// that Writer when done.
+func NewWriter(w io.Writer) *Writer {
+	return &Writer{
+		w:    w,
+		obuf: make([]byte, obufLen),
+	}
+}
+
+// NewBufferedWriter returns a new Writer that compresses to w, using the
+// framing format described at
+// https://github.com/google/snappy/blob/master/framing_format.txt
+//
+// The Writer returned buffers writes. Users must call Close to guarantee all
+// data has been forwarded to the underlying io.Writer. They may also call
+// Flush zero or more times before calling Close.
+func NewBufferedWriter(w io.Writer) *Writer {
+	return &Writer{
+		w:    w,
+		ibuf: make([]byte, 0, maxBlockSize),
+		obuf: make([]byte, obufLen),
+	}
+}
+
+// Writer is an io.Writer that can write Snappy-compressed bytes.
+type Writer struct {
+	w   io.Writer
+	err error
+
+	// ibuf is a buffer for the incoming (uncompressed) bytes.
+	//
+	// Its use is optional. For backwards compatibility, Writers created by the
+	// NewWriter function have ibuf == nil, do not buffer incoming bytes, and
+	// therefore do not need to be Flush'ed or Close'd.
+	ibuf []byte
+
+	// obuf is a buffer for the outgoing (compressed) bytes.
+	obuf []byte
+
+	// wroteStreamHeader is whether we have written the stream header.
+	wroteStreamHeader bool
+}
+
+// Reset discards the writer's state and switches the Snappy writer to write to
+// w. This permits reusing a Writer rather than allocating a new one.
+func (w *Writer) Reset(writer io.Writer) {
+	w.w = writer
+	w.err = nil
+	if w.ibuf != nil {
+		w.ibuf = w.ibuf[:0]
+	}
+	w.wroteStreamHeader = false
+}
+
+// Write satisfies the io.Writer interface.
+func (w *Writer) Write(p []byte) (nRet int, errRet error) {
+	if w.ibuf == nil {
+		// Do not buffer incoming bytes. This does not perform or compress well
+		// if the caller of Writer.Write writes many small slices. This
+		// behavior is therefore deprecated, but still supported for backwards
+		// compatibility with code that doesn't explicitly Flush or Close.
+		return w.write(p)
+	}
+
+	// The remainder of this method is based on bufio.Writer.Write from the
+	// standard library.
+
+	for len(p) > (cap(w.ibuf)-len(w.ibuf)) && w.err == nil {
+		var n int
+		if len(w.ibuf) == 0 {
+			// Large write, empty buffer.
+			// Write directly from p to avoid copy.
+			n, _ = w.write(p)
+		} else {
+			n = copy(w.ibuf[len(w.ibuf):cap(w.ibuf)], p)
+			w.ibuf = w.ibuf[:len(w.ibuf)+n]
+			w.Flush()
+		}
+		nRet += n
+		p = p[n:]
+	}
+	if w.err != nil {
+		return nRet, w.err
+	}
+	n := copy(w.ibuf[len(w.ibuf):cap(w.ibuf)], p)
+	w.ibuf = w.ibuf[:len(w.ibuf)+n]
+	nRet += n
+	return nRet, nil
+}
+
+func (w *Writer) write(p []byte) (nRet int, errRet error) {
+	if w.err != nil {
+		return 0, w.err
+	}
+	for len(p) > 0 {
+		obufStart := len(magicChunk)
+		if !w.wroteStreamHeader {
+			w.wroteStreamHeader = true
+			copy(w.obuf, magicChunk)
+			obufStart = 0
+		}
+
+		var uncompressed []byte
+		if len(p) > maxBlockSize {
+			uncompressed, p = p[:maxBlockSize], p[maxBlockSize:]
+		} else {
+			uncompressed, p = p, nil
+		}
+		checksum := crc(uncompressed)
+
+		// Compress the buffer, discarding the result if the improvement
+		// isn't at least 12.5%.
+		compressed := Encode(w.obuf[obufHeaderLen:], uncompressed)
+		chunkType := uint8(chunkTypeCompressedData)
+		chunkLen := 4 + len(compressed)
+		obufEnd := obufHeaderLen + len(compressed)
+		if len(compressed) >= len(uncompressed)-len(uncompressed)/8 {
+			chunkType = chunkTypeUncompressedData
+			chunkLen = 4 + len(uncompressed)
+			obufEnd = obufHeaderLen
+		}
+
+		// Fill in the per-chunk header that comes before the body.
+		w.obuf[len(magicChunk)+0] = chunkType
+		w.obuf[len(magicChunk)+1] = uint8(chunkLen >> 0)
+		w.obuf[len(magicChunk)+2] = uint8(chunkLen >> 8)
+		w.obuf[len(magicChunk)+3] = uint8(chunkLen >> 16)
+		w.obuf[len(magicChunk)+4] = uint8(checksum >> 0)
+		w.obuf[len(magicChunk)+5] = uint8(checksum >> 8)
+		w.obuf[len(magicChunk)+6] = uint8(checksum >> 16)
+		w.obuf[len(magicChunk)+7] = uint8(checksum >> 24)
+
+		if _, err := w.w.Write(w.obuf[obufStart:obufEnd]); err != nil {
+			w.err = err
+			return nRet, err
+		}
+		if chunkType == chunkTypeUncompressedData {
+			if _, err := w.w.Write(uncompressed); err != nil {
+				w.err = err
+				return nRet, err
+			}
+		}
+		nRet += len(uncompressed)
+	}
+	return nRet, nil
+}
+
+// Flush flushes the Writer to its underlying io.Writer.
+func (w *Writer) Flush() error {
+	if w.err != nil {
+		return w.err
+	}
+	if len(w.ibuf) == 0 {
+		return nil
+	}
+	w.write(w.ibuf)
+	w.ibuf = w.ibuf[:0]
+	return w.err
+}
+
+// Close calls Flush and then closes the Writer.
+func (w *Writer) Close() error {
+	w.Flush()
+	ret := w.err
+	if w.err == nil {
+		w.err = errClosed
+	}
+	return ret
+}
--- a/vendor/github.com/klauspost/compress/snappy/encode_amd64.go
+++ b/vendor/github.com/klauspost/compress/snappy/encode_amd64.go
@ -0,0 +1,29 @@
+// Copyright 2016 The Snappy-Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build !appengine
+// +build gc
+// +build !noasm
+
+package snappy
+
+// emitLiteral has the same semantics as in encode_other.go.
+//
+//go:noescape
+func emitLiteral(dst, lit []byte) int
+
+// emitCopy has the same semantics as in encode_other.go.
+//
+//go:noescape
+func emitCopy(dst []byte, offset, length int) int
+
+// extendMatch has the same semantics as in encode_other.go.
+//
+//go:noescape
+func extendMatch(src []byte, i, j int) int
+
+// encodeBlock has the same semantics as in encode_other.go.
+//
+//go:noescape
+func encodeBlock(dst, src []byte) (d int)
--- a/vendor/github.com/klauspost/compress/snappy/encode_amd64.s
+++ b/vendor/github.com/klauspost/compress/snappy/encode_amd64.s
@ -0,0 +1,730 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build !appengine
+// +build gc
+// +build !noasm
+
+#include "textflag.h"
+
+// The XXX lines assemble on Go 1.4, 1.5 and 1.7, but not 1.6, due to a
+// Go toolchain regression. See https://github.com/golang/go/issues/15426 and
+// https://github.com/golang/snappy/issues/29
+//
+// As a workaround, the package was built with a known good assembler, and
+// those instructions were disassembled by "objdump -d" to yield the
+//	4e 0f b7 7c 5c 78       movzwq 0x78(%rsp,%r11,2),%r15
+// style comments, in AT&T asm syntax. Note that rsp here is a physical
+// register, not Go/asm's SP pseudo-register (see https://golang.org/doc/asm).
+// The instructions were then encoded as "BYTE $0x.." sequences, which assemble
+// fine on Go 1.6.
+
+// The asm code generally follows the pure Go code in encode_other.go, except
+// where marked with a "!!!".
+
+// ----------------------------------------------------------------------------
+
+// func emitLiteral(dst, lit []byte) int
+//
+// All local variables fit into registers. The register allocation:
+//	- AX	len(lit)
+//	- BX	n
+//	- DX	return value
+//	- DI	&dst[i]
+//	- R10	&lit[0]
+//
+// The 24 bytes of stack space is to call runtime·memmove.
+//
+// The unusual register allocation of local variables, such as R10 for the
+// source pointer, matches the allocation used at the call site in encodeBlock,
+// which makes it easier to manually inline this function.
+TEXT ·emitLiteral(SB), NOSPLIT, $24-56
+	MOVQ dst_base+0(FP), DI
+	MOVQ lit_base+24(FP), R10
+	MOVQ lit_len+32(FP), AX
+	MOVQ AX, DX
+	MOVL AX, BX
+	SUBL $1, BX
+
+	CMPL BX, $60
+	JLT  oneByte
+	CMPL BX, $256
+	JLT  twoBytes
+
+threeBytes:
+	MOVB $0xf4, 0(DI)
+	MOVW BX, 1(DI)
+	ADDQ $3, DI
+	ADDQ $3, DX
+	JMP  memmove
+
+twoBytes:
+	MOVB $0xf0, 0(DI)
+	MOVB BX, 1(DI)
+	ADDQ $2, DI
+	ADDQ $2, DX
+	JMP  memmove
+
+oneByte:
+	SHLB $2, BX
+	MOVB BX, 0(DI)
+	ADDQ $1, DI
+	ADDQ $1, DX
+
+memmove:
+	MOVQ DX, ret+48(FP)
+
+	// copy(dst[i:], lit)
+	//
+	// This means calling runtime·memmove(&dst[i], &lit[0], len(lit)), so we push
+	// DI, R10 and AX as arguments.
+	MOVQ DI, 0(SP)
+	MOVQ R10, 8(SP)
+	MOVQ AX, 16(SP)
+	CALL runtime·memmove(SB)
+	RET
+
+// ----------------------------------------------------------------------------
+
+// func emitCopy(dst []byte, offset, length int) int
+//
+// All local variables fit into registers. The register allocation:
+//	- AX	length
+//	- SI	&dst[0]
+//	- DI	&dst[i]
+//	- R11	offset
+//
+// The unusual register allocation of local variables, such as R11 for the
+// offset, matches the allocation used at the call site in encodeBlock, which
+// makes it easier to manually inline this function.
+TEXT ·emitCopy(SB), NOSPLIT, $0-48
+	MOVQ dst_base+0(FP), DI
+	MOVQ DI, SI
+	MOVQ offset+24(FP), R11
+	MOVQ length+32(FP), AX
+
+loop0:
+	// for length >= 68 { etc }
+	CMPL AX, $68
+	JLT  step1
+
+	// Emit a length 64 copy, encoded as 3 bytes.
+	MOVB $0xfe, 0(DI)
+	MOVW R11, 1(DI)
+	ADDQ $3, DI
+	SUBL $64, AX
+	JMP  loop0
+
+step1:
+	// if length > 64 { etc }
+	CMPL AX, $64
+	JLE  step2
+
+	// Emit a length 60 copy, encoded as 3 bytes.
+	MOVB $0xee, 0(DI)
+	MOVW R11, 1(DI)
+	ADDQ $3, DI
+	SUBL $60, AX
+
+step2:
+	// if length >= 12 || offset >= 2048 { goto step3 }
+	CMPL AX, $12
+	JGE  step3
+	CMPL R11, $2048
+	JGE  step3
+
+	// Emit the remaining copy, encoded as 2 bytes.
+	MOVB R11, 1(DI)
+	SHRL $8, R11
+	SHLB $5, R11
+	SUBB $4, AX
+	SHLB $2, AX
+	ORB  AX, R11
+	ORB  $1, R11
+	MOVB R11, 0(DI)
+	ADDQ $2, DI
+
+	// Return the number of bytes written.
+	SUBQ SI, DI
+	MOVQ DI, ret+40(FP)
+	RET
+
+step3:
+	// Emit the remaining copy, encoded as 3 bytes.
+	SUBL $1, AX
+	SHLB $2, AX
+	ORB  $2, AX
+	MOVB AX, 0(DI)
+	MOVW R11, 1(DI)
+	ADDQ $3, DI
+
+	// Return the number of bytes written.
+	SUBQ SI, DI
+	MOVQ DI, ret+40(FP)
+	RET
+
+// ----------------------------------------------------------------------------
+
+// func extendMatch(src []byte, i, j int) int
+//
+// All local variables fit into registers. The register allocation:
+//	- DX	&src[0]
+//	- SI	&src[j]
+//	- R13	&src[len(src) - 8]
+//	- R14	&src[len(src)]
+//	- R15	&src[i]
+//
+// The unusual register allocation of local variables, such as R15 for a source
+// pointer, matches the allocation used at the call site in encodeBlock, which
+// makes it easier to manually inline this function.
+TEXT ·extendMatch(SB), NOSPLIT, $0-48
+	MOVQ src_base+0(FP), DX
+	MOVQ src_len+8(FP), R14
+	MOVQ i+24(FP), R15
+	MOVQ j+32(FP), SI
+	ADDQ DX, R14
+	ADDQ DX, R15
+	ADDQ DX, SI
+	MOVQ R14, R13
+	SUBQ $8, R13
+
+cmp8:
+	// As long as we are 8 or more bytes before the end of src, we can load and
+	// compare 8 bytes at a time. If those 8 bytes are equal, repeat.
+	CMPQ SI, R13
+	JA   cmp1
+	MOVQ (R15), AX
+	MOVQ (SI), BX
+	CMPQ AX, BX
+	JNE  bsf
+	ADDQ $8, R15
+	ADDQ $8, SI
+	JMP  cmp8
+
+bsf:
+	// If those 8 bytes were not equal, XOR the two 8 byte values, and return
+	// the index of the first byte that differs. The BSF instruction finds the
+	// least significant 1 bit, the amd64 architecture is little-endian, and
+	// the shift by 3 converts a bit index to a byte index.
+	XORQ AX, BX
+	BSFQ BX, BX
+	SHRQ $3, BX
+	ADDQ BX, SI
+
+	// Convert from &src[ret] to ret.
+	SUBQ DX, SI
+	MOVQ SI, ret+40(FP)
+	RET
+
+cmp1:
+	// In src's tail, compare 1 byte at a time.
+	CMPQ SI, R14
+	JAE  extendMatchEnd
+	MOVB (R15), AX
+	MOVB (SI), BX
+	CMPB AX, BX
+	JNE  extendMatchEnd
+	ADDQ $1, R15
+	ADDQ $1, SI
+	JMP  cmp1
+
+extendMatchEnd:
+	// Convert from &src[ret] to ret.
+	SUBQ DX, SI
+	MOVQ SI, ret+40(FP)
+	RET
+
+// ----------------------------------------------------------------------------
+
+// func encodeBlock(dst, src []byte) (d int)
+//
+// All local variables fit into registers, other than "var table". The register
+// allocation:
+//	- AX	.	.
+//	- BX	.	.
+//	- CX	56	shift (note that amd64 shifts by non-immediates must use CX).
+//	- DX	64	&src[0], tableSize
+//	- SI	72	&src[s]
+//	- DI	80	&dst[d]
+//	- R9	88	sLimit
+//	- R10	.	&src[nextEmit]
+//	- R11	96	prevHash, currHash, nextHash, offset
+//	- R12	104	&src[base], skip
+//	- R13	.	&src[nextS], &src[len(src) - 8]
+//	- R14	.	len(src), bytesBetweenHashLookups, &src[len(src)], x
+//	- R15	112	candidate
+//
+// The second column (56, 64, etc) is the stack offset to spill the registers
+// when calling other functions. We could pack this slightly tighter, but it's
+// simpler to have a dedicated spill map independent of the function called.
+//
+// "var table [maxTableSize]uint16" takes up 32768 bytes of stack space. An
+// extra 56 bytes, to call other functions, and an extra 64 bytes, to spill
+// local variables (registers) during calls gives 32768 + 56 + 64 = 32888.
+TEXT ·encodeBlock(SB), 0, $32888-56
+	MOVQ dst_base+0(FP), DI
+	MOVQ src_base+24(FP), SI
+	MOVQ src_len+32(FP), R14
+
+	// shift, tableSize := uint32(32-8), 1<<8
+	MOVQ $24, CX
+	MOVQ $256, DX
+
+calcShift:
+	// for ; tableSize < maxTableSize && tableSize < len(src); tableSize *= 2 {
+	//	shift--
+	// }
+	CMPQ DX, $16384
+	JGE  varTable
+	CMPQ DX, R14
+	JGE  varTable
+	SUBQ $1, CX
+	SHLQ $1, DX
+	JMP  calcShift
+
+varTable:
+	// var table [maxTableSize]uint16
+	//
+	// In the asm code, unlike the Go code, we can zero-initialize only the
+	// first tableSize elements. Each uint16 element is 2 bytes and each MOVOU
+	// writes 16 bytes, so we can do only tableSize/8 writes instead of the
+	// 2048 writes that would zero-initialize all of table's 32768 bytes.
+	SHRQ $3, DX
+	LEAQ table-32768(SP), BX
+	PXOR X0, X0
+
+memclr:
+	MOVOU X0, 0(BX)
+	ADDQ  $16, BX
+	SUBQ  $1, DX
+	JNZ   memclr
+
+	// !!! DX = &src[0]
+	MOVQ SI, DX
+
+	// sLimit := len(src) - inputMargin
+	MOVQ R14, R9
+	SUBQ $15, R9
+
+	// !!! Pre-emptively spill CX, DX and R9 to the stack. Their values don't
+	// change for the rest of the function.
+	MOVQ CX, 56(SP)
+	MOVQ DX, 64(SP)
+	MOVQ R9, 88(SP)
+
+	// nextEmit := 0
+	MOVQ DX, R10
+
+	// s := 1
+	ADDQ $1, SI
+
+	// nextHash := hash(load32(src, s), shift)
+	MOVL  0(SI), R11
+	IMULL $0x1e35a7bd, R11
+	SHRL  CX, R11
+
+outer:
+	// for { etc }
+
+	// skip := 32
+	MOVQ $32, R12
+
+	// nextS := s
+	MOVQ SI, R13
+
+	// candidate := 0
+	MOVQ $0, R15
+
+inner0:
+	// for { etc }
+
+	// s := nextS
+	MOVQ R13, SI
+
+	// bytesBetweenHashLookups := skip >> 5
+	MOVQ R12, R14
+	SHRQ $5, R14
+
+	// nextS = s + bytesBetweenHashLookups
+	ADDQ R14, R13
+
+	// skip += bytesBetweenHashLookups
+	ADDQ R14, R12
+
+	// if nextS > sLimit { goto emitRemainder }
+	MOVQ R13, AX
+	SUBQ DX, AX
+	CMPQ AX, R9
+	JA   emitRemainder
+
+	// candidate = int(table[nextHash])
+	// XXX: MOVWQZX table-32768(SP)(R11*2), R15
+	// XXX: 4e 0f b7 7c 5c 78       movzwq 0x78(%rsp,%r11,2),%r15
+	BYTE $0x4e
+	BYTE $0x0f
+	BYTE $0xb7
+	BYTE $0x7c
+	BYTE $0x5c
+	BYTE $0x78
+
+	// table[nextHash] = uint16(s)
+	MOVQ SI, AX
+	SUBQ DX, AX
+
+	// XXX: MOVW AX, table-32768(SP)(R11*2)
+	// XXX: 66 42 89 44 5c 78       mov    %ax,0x78(%rsp,%r11,2)
+	BYTE $0x66
+	BYTE $0x42
+	BYTE $0x89
+	BYTE $0x44
+	BYTE $0x5c
+	BYTE $0x78
+
+	// nextHash = hash(load32(src, nextS), shift)
+	MOVL  0(R13), R11
+	IMULL $0x1e35a7bd, R11
+	SHRL  CX, R11
+
+	// if load32(src, s) != load32(src, candidate) { continue } break
+	MOVL 0(SI), AX
+	MOVL (DX)(R15*1), BX
+	CMPL AX, BX
+	JNE  inner0
+
+fourByteMatch:
+	// As per the encode_other.go code:
+	//
+	// A 4-byte match has been found. We'll later see etc.
+
+	// !!! Jump to a fast path for short (<= 16 byte) literals. See the comment
+	// on inputMargin in encode.go.
+	MOVQ SI, AX
+	SUBQ R10, AX
+	CMPQ AX, $16
+	JLE  emitLiteralFastPath
+
+	// ----------------------------------------
+	// Begin inline of the emitLiteral call.
+	//
+	// d += emitLiteral(dst[d:], src[nextEmit:s])
+
+	MOVL AX, BX
+	SUBL $1, BX
+
+	CMPL BX, $60
+	JLT  inlineEmitLiteralOneByte
+	CMPL BX, $256
+	JLT  inlineEmitLiteralTwoBytes
+
+inlineEmitLiteralThreeBytes:
+	MOVB $0xf4, 0(DI)
+	MOVW BX, 1(DI)
+	ADDQ $3, DI
+	JMP  inlineEmitLiteralMemmove
+
+inlineEmitLiteralTwoBytes:
+	MOVB $0xf0, 0(DI)
+	MOVB BX, 1(DI)
+	ADDQ $2, DI
+	JMP  inlineEmitLiteralMemmove
+
+inlineEmitLiteralOneByte:
+	SHLB $2, BX
+	MOVB BX, 0(DI)
+	ADDQ $1, DI
+
+inlineEmitLiteralMemmove:
+	// Spill local variables (registers) onto the stack; call; unspill.
+	//
+	// copy(dst[i:], lit)
+	//
+	// This means calling runtime·memmove(&dst[i], &lit[0], len(lit)), so we push
+	// DI, R10 and AX as arguments.
+	MOVQ DI, 0(SP)
+	MOVQ R10, 8(SP)
+	MOVQ AX, 16(SP)
+	ADDQ AX, DI              // Finish the "d +=" part of "d += emitLiteral(etc)".
+	MOVQ SI, 72(SP)
+	MOVQ DI, 80(SP)
+	MOVQ R15, 112(SP)
+	CALL runtime·memmove(SB)
+	MOVQ 56(SP), CX
+	MOVQ 64(SP), DX
+	MOVQ 72(SP), SI
+	MOVQ 80(SP), DI
+	MOVQ 88(SP), R9
+	MOVQ 112(SP), R15
+	JMP  inner1
+
+inlineEmitLiteralEnd:
+	// End inline of the emitLiteral call.
+	// ----------------------------------------
+
+emitLiteralFastPath:
+	// !!! Emit the 1-byte encoding "uint8(len(lit)-1)<<2".
+	MOVB AX, BX
+	SUBB $1, BX
+	SHLB $2, BX
+	MOVB BX, (DI)
+	ADDQ $1, DI
+
+	// !!! Implement the copy from lit to dst as a 16-byte load and store.
+	// (Encode's documentation says that dst and src must not overlap.)
+	//
+	// This always copies 16 bytes, instead of only len(lit) bytes, but that's
+	// OK. Subsequent iterations will fix up the overrun.
+	//
+	// Note that on amd64, it is legal and cheap to issue unaligned 8-byte or
+	// 16-byte loads and stores. This technique probably wouldn't be as
+	// effective on architectures that are fussier about alignment.
+	MOVOU 0(R10), X0
+	MOVOU X0, 0(DI)
+	ADDQ  AX, DI
+
+inner1:
+	// for { etc }
+
+	// base := s
+	MOVQ SI, R12
+
+	// !!! offset := base - candidate
+	MOVQ R12, R11
+	SUBQ R15, R11
+	SUBQ DX, R11
+
+	// ----------------------------------------
+	// Begin inline of the extendMatch call.
+	//
+	// s = extendMatch(src, candidate+4, s+4)
+
+	// !!! R14 = &src[len(src)]
+	MOVQ src_len+32(FP), R14
+	ADDQ DX, R14
+
+	// !!! R13 = &src[len(src) - 8]
+	MOVQ R14, R13
+	SUBQ $8, R13
+
+	// !!! R15 = &src[candidate + 4]
+	ADDQ $4, R15
+	ADDQ DX, R15
+
+	// !!! s += 4
+	ADDQ $4, SI
+
+inlineExtendMatchCmp8:
+	// As long as we are 8 or more bytes before the end of src, we can load and
+	// compare 8 bytes at a time. If those 8 bytes are equal, repeat.
+	CMPQ SI, R13
+	JA   inlineExtendMatchCmp1
+	MOVQ (R15), AX
+	MOVQ (SI), BX
+	CMPQ AX, BX
+	JNE  inlineExtendMatchBSF
+	ADDQ $8, R15
+	ADDQ $8, SI
+	JMP  inlineExtendMatchCmp8
+
+inlineExtendMatchBSF:
+	// If those 8 bytes were not equal, XOR the two 8 byte values, and return
+	// the index of the first byte that differs. The BSF instruction finds the
+	// least significant 1 bit, the amd64 architecture is little-endian, and
+	// the shift by 3 converts a bit index to a byte index.
+	XORQ AX, BX
+	BSFQ BX, BX
+	SHRQ $3, BX
+	ADDQ BX, SI
+	JMP  inlineExtendMatchEnd
+
+inlineExtendMatchCmp1:
+	// In src's tail, compare 1 byte at a time.
+	CMPQ SI, R14
+	JAE  inlineExtendMatchEnd
+	MOVB (R15), AX
+	MOVB (SI), BX
+	CMPB AX, BX
+	JNE  inlineExtendMatchEnd
+	ADDQ $1, R15
+	ADDQ $1, SI
+	JMP  inlineExtendMatchCmp1
+
+inlineExtendMatchEnd:
+	// End inline of the extendMatch call.
+	// ----------------------------------------
+
+	// ----------------------------------------
+	// Begin inline of the emitCopy call.
+	//
+	// d += emitCopy(dst[d:], base-candidate, s-base)
+
+	// !!! length := s - base
+	MOVQ SI, AX
+	SUBQ R12, AX
+
+inlineEmitCopyLoop0:
+	// for length >= 68 { etc }
+	CMPL AX, $68
+	JLT  inlineEmitCopyStep1
+
+	// Emit a length 64 copy, encoded as 3 bytes.
+	MOVB $0xfe, 0(DI)
+	MOVW R11, 1(DI)
+	ADDQ $3, DI
+	SUBL $64, AX
+	JMP  inlineEmitCopyLoop0
+
+inlineEmitCopyStep1:
+	// if length > 64 { etc }
+	CMPL AX, $64
+	JLE  inlineEmitCopyStep2
+
+	// Emit a length 60 copy, encoded as 3 bytes.
+	MOVB $0xee, 0(DI)
+	MOVW R11, 1(DI)
+	ADDQ $3, DI
+	SUBL $60, AX
+
+inlineEmitCopyStep2:
+	// if length >= 12 || offset >= 2048 { goto inlineEmitCopyStep3 }
+	CMPL AX, $12
+	JGE  inlineEmitCopyStep3
+	CMPL R11, $2048
+	JGE  inlineEmitCopyStep3
+
+	// Emit the remaining copy, encoded as 2 bytes.
+	MOVB R11, 1(DI)
+	SHRL $8, R11
+	SHLB $5, R11
+	SUBB $4, AX
+	SHLB $2, AX
+	ORB  AX, R11
+	ORB  $1, R11
+	MOVB R11, 0(DI)
+	ADDQ $2, DI
+	JMP  inlineEmitCopyEnd
+
+inlineEmitCopyStep3:
+	// Emit the remaining copy, encoded as 3 bytes.
+	SUBL $1, AX
+	SHLB $2, AX
+	ORB  $2, AX
+	MOVB AX, 0(DI)
+	MOVW R11, 1(DI)
+	ADDQ $3, DI
+
+inlineEmitCopyEnd:
+	// End inline of the emitCopy call.
+	// ----------------------------------------
+
+	// nextEmit = s
+	MOVQ SI, R10
+
+	// if s >= sLimit { goto emitRemainder }
+	MOVQ SI, AX
+	SUBQ DX, AX
+	CMPQ AX, R9
+	JAE  emitRemainder
+
+	// As per the encode_other.go code:
+	//
+	// We could immediately etc.
+
+	// x := load64(src, s-1)
+	MOVQ -1(SI), R14
+
+	// prevHash := hash(uint32(x>>0), shift)
+	MOVL  R14, R11
+	IMULL $0x1e35a7bd, R11
+	SHRL  CX, R11
+
+	// table[prevHash] = uint16(s-1)
+	MOVQ SI, AX
+	SUBQ DX, AX
+	SUBQ $1, AX
+
+	// XXX: MOVW AX, table-32768(SP)(R11*2)
+	// XXX: 66 42 89 44 5c 78       mov    %ax,0x78(%rsp,%r11,2)
+	BYTE $0x66
+	BYTE $0x42
+	BYTE $0x89
+	BYTE $0x44
+	BYTE $0x5c
+	BYTE $0x78
+
+	// currHash := hash(uint32(x>>8), shift)
+	SHRQ  $8, R14
+	MOVL  R14, R11
+	IMULL $0x1e35a7bd, R11
+	SHRL  CX, R11
+
+	// candidate = int(table[currHash])
+	// XXX: MOVWQZX table-32768(SP)(R11*2), R15
+	// XXX: 4e 0f b7 7c 5c 78       movzwq 0x78(%rsp,%r11,2),%r15
+	BYTE $0x4e
+	BYTE $0x0f
+	BYTE $0xb7
+	BYTE $0x7c
+	BYTE $0x5c
+	BYTE $0x78
+
+	// table[currHash] = uint16(s)
+	ADDQ $1, AX
+
+	// XXX: MOVW AX, table-32768(SP)(R11*2)
+	// XXX: 66 42 89 44 5c 78       mov    %ax,0x78(%rsp,%r11,2)
+	BYTE $0x66
+	BYTE $0x42
+	BYTE $0x89
+	BYTE $0x44
+	BYTE $0x5c
+	BYTE $0x78
+
+	// if uint32(x>>8) == load32(src, candidate) { continue }
+	MOVL (DX)(R15*1), BX
+	CMPL R14, BX
+	JEQ  inner1
+
+	// nextHash = hash(uint32(x>>16), shift)
+	SHRQ  $8, R14
+	MOVL  R14, R11
+	IMULL $0x1e35a7bd, R11
+	SHRL  CX, R11
+
+	// s++
+	ADDQ $1, SI
+
+	// break out of the inner1 for loop, i.e. continue the outer loop.
+	JMP outer
+
+emitRemainder:
+	// if nextEmit < len(src) { etc }
+	MOVQ src_len+32(FP), AX
+	ADDQ DX, AX
+	CMPQ R10, AX
+	JEQ  encodeBlockEnd
+
+	// d += emitLiteral(dst[d:], src[nextEmit:])
+	//
+	// Push args.
+	MOVQ DI, 0(SP)
+	MOVQ $0, 8(SP)   // Unnecessary, as the callee ignores it, but conservative.
+	MOVQ $0, 16(SP)  // Unnecessary, as the callee ignores it, but conservative.
+	MOVQ R10, 24(SP)
+	SUBQ R10, AX
+	MOVQ AX, 32(SP)
+	MOVQ AX, 40(SP)  // Unnecessary, as the callee ignores it, but conservative.
+
+	// Spill local variables (registers) onto the stack; call; unspill.
+	MOVQ DI, 80(SP)
+	CALL ·emitLiteral(SB)
+	MOVQ 80(SP), DI
+
+	// Finish the "d +=" part of "d += emitLiteral(etc)".
+	ADDQ 48(SP), DI
+
+encodeBlockEnd:
+	MOVQ dst_base+0(FP), AX
+	SUBQ AX, DI
+	MOVQ DI, d+48(FP)
+	RET
--- a/vendor/github.com/klauspost/compress/snappy/encode_other.go
+++ b/vendor/github.com/klauspost/compress/snappy/encode_other.go
@ -0,0 +1,238 @@
+// Copyright 2016 The Snappy-Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build !amd64 appengine !gc noasm
+
+package snappy
+
+func load32(b []byte, i int) uint32 {
+	b = b[i : i+4 : len(b)] // Help the compiler eliminate bounds checks on the next line.
+	return uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24
+}
+
+func load64(b []byte, i int) uint64 {
+	b = b[i : i+8 : len(b)] // Help the compiler eliminate bounds checks on the next line.
+	return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 |
+		uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56
+}
+
+// emitLiteral writes a literal chunk and returns the number of bytes written.
+//
+// It assumes that:
+//	dst is long enough to hold the encoded bytes
+//	1 <= len(lit) && len(lit) <= 65536
+func emitLiteral(dst, lit []byte) int {
+	i, n := 0, uint(len(lit)-1)
+	switch {
+	case n < 60:
+		dst[0] = uint8(n)<<2 | tagLiteral
+		i = 1
+	case n < 1<<8:
+		dst[0] = 60<<2 | tagLiteral
+		dst[1] = uint8(n)
+		i = 2
+	default:
+		dst[0] = 61<<2 | tagLiteral
+		dst[1] = uint8(n)
+		dst[2] = uint8(n >> 8)
+		i = 3
+	}
+	return i + copy(dst[i:], lit)
+}
+
+// emitCopy writes a copy chunk and returns the number of bytes written.
+//
+// It assumes that:
+//	dst is long enough to hold the encoded bytes
+//	1 <= offset && offset <= 65535
+//	4 <= length && length <= 65535
+func emitCopy(dst []byte, offset, length int) int {
+	i := 0
+	// The maximum length for a single tagCopy1 or tagCopy2 op is 64 bytes. The
+	// threshold for this loop is a little higher (at 68 = 64 + 4), and the
+	// length emitted down below is is a little lower (at 60 = 64 - 4), because
+	// it's shorter to encode a length 67 copy as a length 60 tagCopy2 followed
+	// by a length 7 tagCopy1 (which encodes as 3+2 bytes) than to encode it as
+	// a length 64 tagCopy2 followed by a length 3 tagCopy2 (which encodes as
+	// 3+3 bytes). The magic 4 in the 64±4 is because the minimum length for a
+	// tagCopy1 op is 4 bytes, which is why a length 3 copy has to be an
+	// encodes-as-3-bytes tagCopy2 instead of an encodes-as-2-bytes tagCopy1.
+	for length >= 68 {
+		// Emit a length 64 copy, encoded as 3 bytes.
+		dst[i+0] = 63<<2 | tagCopy2
+		dst[i+1] = uint8(offset)
+		dst[i+2] = uint8(offset >> 8)
+		i += 3
+		length -= 64
+	}
+	if length > 64 {
+		// Emit a length 60 copy, encoded as 3 bytes.
+		dst[i+0] = 59<<2 | tagCopy2
+		dst[i+1] = uint8(offset)
+		dst[i+2] = uint8(offset >> 8)
+		i += 3
+		length -= 60
+	}
+	if length >= 12 || offset >= 2048 {
+		// Emit the remaining copy, encoded as 3 bytes.
+		dst[i+0] = uint8(length-1)<<2 | tagCopy2
+		dst[i+1] = uint8(offset)
+		dst[i+2] = uint8(offset >> 8)
+		return i + 3
+	}
+	// Emit the remaining copy, encoded as 2 bytes.
+	dst[i+0] = uint8(offset>>8)<<5 | uint8(length-4)<<2 | tagCopy1
+	dst[i+1] = uint8(offset)
+	return i + 2
+}
+
+// extendMatch returns the largest k such that k <= len(src) and that
+// src[i:i+k-j] and src[j:k] have the same contents.
+//
+// It assumes that:
+//	0 <= i && i < j && j <= len(src)
+func extendMatch(src []byte, i, j int) int {
+	for ; j < len(src) && src[i] == src[j]; i, j = i+1, j+1 {
+	}
+	return j
+}
+
+func hash(u, shift uint32) uint32 {
+	return (u * 0x1e35a7bd) >> shift
+}
+
+// encodeBlock encodes a non-empty src to a guaranteed-large-enough dst. It
+// assumes that the varint-encoded length of the decompressed bytes has already
+// been written.
+//
+// It also assumes that:
+//	len(dst) >= MaxEncodedLen(len(src)) &&
+// 	minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize
+func encodeBlock(dst, src []byte) (d int) {
+	// Initialize the hash table. Its size ranges from 1<<8 to 1<<14 inclusive.
+	// The table element type is uint16, as s < sLimit and sLimit < len(src)
+	// and len(src) <= maxBlockSize and maxBlockSize == 65536.
+	const (
+		maxTableSize = 1 << 14
+		// tableMask is redundant, but helps the compiler eliminate bounds
+		// checks.
+		tableMask = maxTableSize - 1
+	)
+	shift := uint32(32 - 8)
+	for tableSize := 1 << 8; tableSize < maxTableSize && tableSize < len(src); tableSize *= 2 {
+		shift--
+	}
+	// In Go, all array elements are zero-initialized, so there is no advantage
+	// to a smaller tableSize per se. However, it matches the C++ algorithm,
+	// and in the asm versions of this code, we can get away with zeroing only
+	// the first tableSize elements.
+	var table [maxTableSize]uint16
+
+	// sLimit is when to stop looking for offset/length copies. The inputMargin
+	// lets us use a fast path for emitLiteral in the main loop, while we are
+	// looking for copies.
+	sLimit := len(src) - inputMargin
+
+	// nextEmit is where in src the next emitLiteral should start from.
+	nextEmit := 0
+
+	// The encoded form must start with a literal, as there are no previous
+	// bytes to copy, so we start looking for hash matches at s == 1.
+	s := 1
+	nextHash := hash(load32(src, s), shift)
+
+	for {
+		// Copied from the C++ snappy implementation:
+		//
+		// Heuristic match skipping: If 32 bytes are scanned with no matches
+		// found, start looking only at every other byte. If 32 more bytes are
+		// scanned (or skipped), look at every third byte, etc.. When a match
+		// is found, immediately go back to looking at every byte. This is a
+		// small loss (~5% performance, ~0.1% density) for compressible data
+		// due to more bookkeeping, but for non-compressible data (such as
+		// JPEG) it's a huge win since the compressor quickly "realizes" the
+		// data is incompressible and doesn't bother looking for matches
+		// everywhere.
+		//
+		// The "skip" variable keeps track of how many bytes there are since
+		// the last match; dividing it by 32 (ie. right-shifting by five) gives
+		// the number of bytes to move ahead for each iteration.
+		skip := 32
+
+		nextS := s
+		candidate := 0
+		for {
+			s = nextS
+			bytesBetweenHashLookups := skip >> 5
+			nextS = s + bytesBetweenHashLookups
+			skip += bytesBetweenHashLookups
+			if nextS > sLimit {
+				goto emitRemainder
+			}
+			candidate = int(table[nextHash&tableMask])
+			table[nextHash&tableMask] = uint16(s)
+			nextHash = hash(load32(src, nextS), shift)
+			if load32(src, s) == load32(src, candidate) {
+				break
+			}
+		}
+
+		// A 4-byte match has been found. We'll later see if more than 4 bytes
+		// match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
+		// them as literal bytes.
+		d += emitLiteral(dst[d:], src[nextEmit:s])
+
+		// Call emitCopy, and then see if another emitCopy could be our next
+		// move. Repeat until we find no match for the input immediately after
+		// what was consumed by the last emitCopy call.
+		//
+		// If we exit this loop normally then we need to call emitLiteral next,
+		// though we don't yet know how big the literal will be. We handle that
+		// by proceeding to the next iteration of the main loop. We also can
+		// exit this loop via goto if we get close to exhausting the input.
+		for {
+			// Invariant: we have a 4-byte match at s, and no need to emit any
+			// literal bytes prior to s.
+			base := s
+
+			// Extend the 4-byte match as long as possible.
+			//
+			// This is an inlined version of:
+			//	s = extendMatch(src, candidate+4, s+4)
+			s += 4
+			for i := candidate + 4; s < len(src) && src[i] == src[s]; i, s = i+1, s+1 {
+			}
+
+			d += emitCopy(dst[d:], base-candidate, s-base)
+			nextEmit = s
+			if s >= sLimit {
+				goto emitRemainder
+			}
+
+			// We could immediately start working at s now, but to improve
+			// compression we first update the hash table at s-1 and at s. If
+			// another emitCopy is not our next move, also calculate nextHash
+			// at s+1. At least on GOARCH=amd64, these three hash calculations
+			// are faster as one load64 call (with some shifts) instead of
+			// three load32 calls.
+			x := load64(src, s-1)
+			prevHash := hash(uint32(x>>0), shift)
+			table[prevHash&tableMask] = uint16(s - 1)
+			currHash := hash(uint32(x>>8), shift)
+			candidate = int(table[currHash&tableMask])
+			table[currHash&tableMask] = uint16(s)
+			if uint32(x>>8) != load32(src, candidate) {
+				nextHash = hash(uint32(x>>16), shift)
+				s++
+				break
+			}
+		}
+	}
+
+emitRemainder:
+	if nextEmit < len(src) {
+		d += emitLiteral(dst[d:], src[nextEmit:])
+	}
+	return d
+}
--- a/vendor/github.com/klauspost/compress/snappy/runbench.cmd
+++ b/vendor/github.com/klauspost/compress/snappy/runbench.cmd
@ -0,0 +1,2 @@
+del old.txt
+go test -bench=. >>old.txt && go test -bench=. >>old.txt && go test -bench=. >>old.txt && benchstat -delta-test=ttest old.txt new.txt
--- a/vendor/github.com/klauspost/compress/snappy/snappy.go
+++ b/vendor/github.com/klauspost/compress/snappy/snappy.go
@ -0,0 +1,98 @@
+// Copyright 2011 The Snappy-Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package snappy implements the Snappy compression format. It aims for very
+// high speeds and reasonable compression.
+//
+// There are actually two Snappy formats: block and stream. They are related,
+// but different: trying to decompress block-compressed data as a Snappy stream
+// will fail, and vice versa. The block format is the Decode and Encode
+// functions and the stream format is the Reader and Writer types.
+//
+// The block format, the more common case, is used when the complete size (the
+// number of bytes) of the original data is known upfront, at the time
+// compression starts. The stream format, also known as the framing format, is
+// for when that isn't always true.
+//
+// The canonical, C++ implementation is at https://github.com/google/snappy and
+// it only implements the block format.
+package snappy
+
+import (
+	"hash/crc32"
+)
+
+/*
+Each encoded block begins with the varint-encoded length of the decoded data,
+followed by a sequence of chunks. Chunks begin and end on byte boundaries. The
+first byte of each chunk is broken into its 2 least and 6 most significant bits
+called l and m: l ranges in [0, 4) and m ranges in [0, 64). l is the chunk tag.
+Zero means a literal tag. All other values mean a copy tag.
+
+For literal tags:
+  - If m < 60, the next 1 + m bytes are literal bytes.
+  - Otherwise, let n be the little-endian unsigned integer denoted by the next
+    m - 59 bytes. The next 1 + n bytes after that are literal bytes.
+
+For copy tags, length bytes are copied from offset bytes ago, in the style of
+Lempel-Ziv compression algorithms. In particular:
+  - For l == 1, the offset ranges in [0, 1<<11) and the length in [4, 12).
+    The length is 4 + the low 3 bits of m. The high 3 bits of m form bits 8-10
+    of the offset. The next byte is bits 0-7 of the offset.
+  - For l == 2, the offset ranges in [0, 1<<16) and the length in [1, 65).
+    The length is 1 + m. The offset is the little-endian unsigned integer
+    denoted by the next 2 bytes.
+  - For l == 3, this tag is a legacy format that is no longer issued by most
+    encoders. Nonetheless, the offset ranges in [0, 1<<32) and the length in
+    [1, 65). The length is 1 + m. The offset is the little-endian unsigned
+    integer denoted by the next 4 bytes.
+*/
+const (
+	tagLiteral = 0x00
+	tagCopy1   = 0x01
+	tagCopy2   = 0x02
+	tagCopy4   = 0x03
+)
+
+const (
+	checksumSize    = 4
+	chunkHeaderSize = 4
+	magicChunk      = "\xff\x06\x00\x00" + magicBody
+	magicBody       = "sNaPpY"
+
+	// maxBlockSize is the maximum size of the input to encodeBlock. It is not
+	// part of the wire format per se, but some parts of the encoder assume
+	// that an offset fits into a uint16.
+	//
+	// Also, for the framing format (Writer type instead of Encode function),
+	// https://github.com/google/snappy/blob/master/framing_format.txt says
+	// that "the uncompressed data in a chunk must be no longer than 65536
+	// bytes".
+	maxBlockSize = 65536
+
+	// maxEncodedLenOfMaxBlockSize equals MaxEncodedLen(maxBlockSize), but is
+	// hard coded to be a const instead of a variable, so that obufLen can also
+	// be a const. Their equivalence is confirmed by
+	// TestMaxEncodedLenOfMaxBlockSize.
+	maxEncodedLenOfMaxBlockSize = 76490
+
+	obufHeaderLen = len(magicChunk) + checksumSize + chunkHeaderSize
+	obufLen       = obufHeaderLen + maxEncodedLenOfMaxBlockSize
+)
+
+const (
+	chunkTypeCompressedData   = 0x00
+	chunkTypeUncompressedData = 0x01
+	chunkTypePadding          = 0xfe
+	chunkTypeStreamIdentifier = 0xff
+)
+
+var crcTable = crc32.MakeTable(crc32.Castagnoli)
+
+// crc implements the checksum specified in section 3 of
+// https://github.com/google/snappy/blob/master/framing_format.txt
+func crc(b []byte) uint32 {
+	c := crc32.Update(0, crcTable, b)
+	return uint32(c>>15|c<<17) + 0xa282ead8
+}
--- a/vendor/github.com/klauspost/compress/zstd/README.md
+++ b/vendor/github.com/klauspost/compress/zstd/README.md
@ -0,0 +1,383 @@
+# zstd 
+
+[Zstandard](https://facebook.github.io/zstd/) is a real-time compression algorithm, providing high compression ratios. 
+It offers a very wide range of compression / speed trade-off, while being backed by a very fast decoder.
+A high performance compression algorithm is implemented. For now focused on speed. 
+
+This package provides [compression](#Compressor) to and [decompression](#Decompressor) of Zstandard content. 
+Note that custom dictionaries are not supported yet, so if your code relies on that, 
+you cannot use the package as-is.
+
+This package is pure Go and without use of "unsafe". 
+If a significant speedup can be achieved using "unsafe", it may be added as an option later.
+
+The `zstd` package is provided as open source software using a Go standard license.
+
+Currently the package is heavily optimized for 64 bit processors and will be significantly slower on 32 bit processors.
+
+## Installation
+
+Install using `go get -u github.com/klauspost/compress`. The package is located in `github.com/klauspost/compress/zstd`.
+
+Godoc Documentation: https://godoc.org/github.com/klauspost/compress/zstd
+
+
+## Compressor
+
+### Status: 
+
+BETA - there may still be subtle bugs, but a wide variety of content has been tested. 
+There may still be implementation specific stuff in regards to error handling that could lead to edge cases. 
+
+For now, a high speed (fastest) and medium-fast (default) compressor has been implemented. 
+
+The "Fastest" compression ratio is roughly equivalent to zstd level 1. 
+The "Default" compression ration is roughly equivalent to zstd level 3 (default).
+
+In terms of speed, it is typically 2x as fast as the stdlib deflate/gzip in its fastest mode. The compression ratio compared to stdlib is around level 3, but usually 3x as fast.
+
+Compared to cgo zstd, the speed is around level 3 (default), but compression slightly worse, between level 1&2.
+
+ 
+### Usage
+
+An Encoder can be used for either compressing a stream via the
+`io.WriteCloser` interface supported by the Encoder or as multiple independent
+tasks via the `EncodeAll` function.
+Smaller encodes are encouraged to use the EncodeAll function.
+Use `NewWriter` to create a new instance that can be used for both.
+
+To create a writer with default options, do like this:
+
+```Go
+// Compress input to output.
+func Compress(in io.Reader, out io.Writer) error {
+    w, err := NewWriter(output)
+    if err != nil {
+        return err
+    }
+    _, err := io.Copy(w, input)
+    if err != nil {
+        enc.Close()
+        return err
+    }
+    return enc.Close()
+}
+```
+
+Now you can encode by writing data to `enc`. The output will be finished writing when `Close()` is called.
+Even if your encode fails, you should still call `Close()` to release any resources that may be held up.  
+
+The above is fine for big encodes. However, whenever possible try to *reuse* the writer.
+
+To reuse the encoder, you can use the `Reset(io.Writer)` function to change to another output. 
+This will allow the encoder to reuse all resources and avoid wasteful allocations. 
+
+Currently stream encoding has 'light' concurrency, meaning up to 2 goroutines can be working on part 
+of a stream. This is independent of the `WithEncoderConcurrency(n)`, but that is likely to change 
+in the future. So if you want to limit concurrency for future updates, specify the concurrency
+you would like.
+
+You can specify your desired compression level using `WithEncoderLevel()` option. Currently only pre-defined 
+compression settings can be specified.
+
+#### Future Compatibility Guarantees
+
+This will be an evolving project. When using this package it is important to note that both the compression efficiency and speed may change.
+
+The goal will be to keep the default efficiency at the default zstd (level 3). 
+However the encoding should never be assumed to remain the same, 
+and you should not use hashes of compressed output for similarity checks.
+
+The Encoder can be assumed to produce the same output from the exact same code version.
+However, the may be modes in the future that break this, 
+although they will not be enabled without an explicit option.   
+
+This encoder is not designed to (and will probably never) output the exact same bitstream as the reference encoder.
+
+Also note, that the cgo decompressor currently does not [report all errors on invalid input](https://github.com/DataDog/zstd/issues/59),
+[omits error checks](https://github.com/DataDog/zstd/issues/61), [ignores checksums](https://github.com/DataDog/zstd/issues/43) 
+and seems to ignore concatenated streams, even though [it is part of the spec](https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#frames).
+
+#### Blocks
+
+For compressing small blocks, the returned encoder has a function called `EncodeAll(src, dst []byte) []byte`.
+
+`EncodeAll` will encode all input in src and append it to dst.
+This function can be called concurrently, but each call will only run on a single goroutine.
+
+Encoded blocks can be concatenated and the result will be the combined input stream.
+Data compressed with EncodeAll can be decoded with the Decoder, using either a stream or `DecodeAll`.
+
+Especially when encoding blocks you should take special care to reuse the encoder. 
+This will effectively make it run without allocations after a warmup period. 
+To make it run completely without allocations, supply a destination buffer with space for all content.   
+
+```Go
+import "github.com/klauspost/compress/zstd"
+
+// Create a writer that caches compressors.
+// For this operation type we supply a nil Reader.
+var encoder, _ = zstd.NewWriter(nil)
+
+// Compress a buffer. 
+// If you have a destination buffer, the allocation in the call can also be eliminated.
+func Compress(src []byte) []byte {
+    return encoder.EncodeAll(src, make([]byte, 0, len(src)))
+} 
+```
+
+You can control the maximum number of concurrent encodes using the `WithEncoderConcurrency(n)` 
+option when creating the writer.
+
+Using the Encoder for both a stream and individual blocks concurrently is safe. 
+
+### Performance
+
+I have collected some speed examples to compare speed and compression against other compressors.
+
+* `file` is the input file.
+* `out` is the compressor used. `zskp` is this package. `gzstd` is gzip standard library. `zstd` is the Datadog cgo library.
+* `level` is the compression level used. For `zskp` level 1 is "fastest", level 2 is "default".
+* `insize`/`outsize` is the input/output size.
+* `millis` is the number of milliseconds used for compression.
+* `mb/s` is megabytes (2^20 bytes) per second.
+
+```
+The test data for the Large Text Compression Benchmark is the first
+10^9 bytes of the English Wikipedia dump on Mar. 3, 2006.
+http://mattmahoney.net/dc/textdata.html
+
+file    out     level   insize  outsize     millis  mb/s
+enwik9  zskp    1   1000000000  343833033   5840    163.30
+enwik9  zskp    2   1000000000  317822183   8449    112.87
+enwik9  gzstd   1   1000000000  382578136   13627   69.98
+enwik9  gzstd   3   1000000000  349139651   22344   42.68
+enwik9  zstd    1   1000000000  357416379   4838    197.12
+enwik9  zstd    3   1000000000  313734522   7556    126.21
+
+GOB stream of binary data. Highly compressible.
+https://files.klauspost.com/compress/gob-stream.7z
+
+file        out level   insize      outsize     millis  mb/s
+gob-stream  zskp    1   1911399616  234981983   5100    357.42
+gob-stream  zskp    2   1911399616  208674003   6698    272.15
+gob-stream  gzstd   1   1911399616  357382641   14727   123.78
+gob-stream  gzstd   3   1911399616  327835097   17005   107.19
+gob-stream  zstd    1   1911399616  250787165   4075    447.22
+gob-stream  zstd    3   1911399616  208191888   5511    330.77
+
+Highly compressible JSON file. Similar to logs in a lot of ways.
+https://files.klauspost.com/compress/adresser.001.gz
+
+file            out level   insize      outsize     millis  mb/s
+adresser.001    zskp    1   1073741824  18510122    1477    692.83
+adresser.001    zskp    2   1073741824  19831697    1705    600.59
+adresser.001    gzstd   1   1073741824  47755503    3079    332.47
+adresser.001    gzstd   3   1073741824  40052381    3051    335.63
+adresser.001    zstd    1   1073741824  16135896    994     1030.18
+adresser.001    zstd    3   1073741824  17794465    905     1131.49
+
+VM Image, Linux mint with a few installed applications:
+https://files.klauspost.com/compress/rawstudio-mint14.7z
+
+file    out level   insize  outsize millis  mb/s
+rawstudio-mint14.tar    zskp    1   8558382592  3648168838  33398   244.38
+rawstudio-mint14.tar    zskp    2   8558382592  3376721436  50962   160.16
+rawstudio-mint14.tar    gzstd   1   8558382592  3926257486  84712   96.35
+rawstudio-mint14.tar    gzstd   3   8558382592  3740711978  176344  46.28
+rawstudio-mint14.tar    zstd    1   8558382592  3607859742  27903   292.51
+rawstudio-mint14.tar    zstd    3   8558382592  3341710879  46700   174.77
+
+
+The test data is designed to test archivers in realistic backup scenarios.
+http://mattmahoney.net/dc/10gb.html
+
+file    out level   insize  outsize millis  mb/s
+10gb.tar    zskp    1   10065157632 4883149814  45715   209.97
+10gb.tar    zskp    2   10065157632 4638110010  60970   157.44
+10gb.tar    gzstd   1   10065157632 5198296126  97769   98.18
+10gb.tar    gzstd   3   10065157632 4932665487  313427  30.63
+10gb.tar    zstd    1   10065157632 4940796535  40391   237.65
+10gb.tar    zstd    3   10065157632 4638618579  52911   181.42
+
+Silesia Corpus:
+http://sun.aei.polsl.pl/~sdeor/corpus/silesia.zip
+
+file    out level   insize  outsize millis  mb/s
+silesia.tar zskp    1   211947520   73025800    1108    182.26
+silesia.tar zskp    2   211947520   67674684    1599    126.41
+silesia.tar gzstd   1   211947520   80007735    2515    80.37
+silesia.tar gzstd   3   211947520   73133380    4259    47.45
+silesia.tar zstd    1   211947520   73513991    933     216.64
+silesia.tar zstd    3   211947520   66793301    1377    146.79
+```
+
+### Converters
+
+As part of the development process a *Snappy* -> *Zstandard* converter was also built.
+
+This can convert a *framed* [Snappy Stream](https://godoc.org/github.com/golang/snappy#Writer) to a zstd stream. Note that a single block is not framed.
+
+Conversion is done by converting the stream directly from Snappy without intermediate full decoding.
+Therefore the compression ratio is much less than what can be done by a full decompression
+and compression, and a faulty Snappy stream may lead to a faulty Zstandard stream without
+any errors being generated.
+No CRC value is being generated and not all CRC values of the Snappy stream are checked.
+However, it provides really fast re-compression of Snappy streams.
+
+
+```
+BenchmarkSnappy_ConvertSilesia-8           1  1156001600 ns/op   183.35 MB/s
+Snappy len 103008711 -> zstd len 82687318
+
+BenchmarkSnappy_Enwik9-8           1  6472998400 ns/op   154.49 MB/s
+Snappy len 508028601 -> zstd len 390921079
+```
+
+
+```Go
+    s := zstd.SnappyConverter{}
+    n, err = s.Convert(input, output)
+    if err != nil {
+        fmt.Println("Re-compressed stream to", n, "bytes")
+    }
+```
+
+The converter `s` can be reused to avoid allocations, even after errors.
+
+
+## Decompressor
+
+STATUS: Release Candidate - there may still be subtle bugs, but a wide variety of content has been tested.
+
+ 
+### Usage
+
+The package has been designed for two main usages, big streams of data and smaller in-memory buffers. 
+There are two main usages of the package for these. Both of them are accessed by creating a `Decoder`.
+
+For streaming use a simple setup could look like this:
+
+```Go
+import "github.com/klauspost/compress/zstd"
+
+func Decompress(in io.Reader, out io.Writer) error {
+    d, err := zstd.NewReader(input)
+    if err != nil {
+        return err
+    }
+    defer d.Close()
+    
+    // Copy content...
+    _, err := io.Copy(out, d)
+    return err
+}
+```
+
+It is important to use the "Close" function when you no longer need the Reader to stop running goroutines. 
+See "Allocation-less operation" below.
+
+For decoding buffers, it could look something like this:
+
+```Go
+import "github.com/klauspost/compress/zstd"
+
+// Create a reader that caches decompressors.
+// For this operation type we supply a nil Reader.
+var decoder, _ = zstd.NewReader(nil)
+
+// Decompress a buffer. We don't supply a destination buffer,
+// so it will be allocated by the decoder.
+func Decompress(src []byte) ([]byte, error) {
+    return decoder.DecodeAll(src, nil)
+} 
+```
+
+Both of these cases should provide the functionality needed. 
+The decoder can be used for *concurrent* decompression of multiple buffers. 
+It will only allow a certain number of concurrent operations to run. 
+To tweak that yourself use the `WithDecoderConcurrency(n)` option when creating the decoder.   
+
+### Allocation-less operation
+
+The decoder has been designed to operate without allocations after a warmup. 
+
+This means that you should *store* the decoder for best performance. 
+To re-use a stream decoder, use the `Reset(r io.Reader) error` to switch to another stream.
+A decoder can safely be re-used even if the previous stream failed.
+
+To release the resources, you must call the `Close()` function on a decoder.
+After this it can *no longer be reused*, but all running goroutines will be stopped.
+So you *must* use this if you will no longer need the Reader.
+
+For decompressing smaller buffers a single decoder can be used.
+When decoding buffers, you can supply a destination slice with length 0 and your expected capacity.
+In this case no unneeded allocations should be made. 
+
+### Concurrency
+
+The buffer decoder does everything on the same goroutine and does nothing concurrently.
+It can however decode several buffers concurrently. Use `WithDecoderConcurrency(n)` to limit that.
+
+The stream decoder operates on
+
+* One goroutine reads input and splits the input to several block decoders.
+* A number of decoders will decode blocks.
+* A goroutine coordinates these blocks and sends history from one to the next.
+
+So effectively this also means the decoder will "read ahead" and prepare data to always be available for output.
+
+Since "blocks" are quite dependent on the output of the previous block stream decoding will only have limited concurrency.
+
+In practice this means that concurrency is often limited to utilizing about 2 cores effectively.
+ 
+ 
+### Benchmarks
+
+These are some examples of performance compared to [datadog cgo library](https://github.com/DataDog/zstd).
+
+The first two are streaming decodes and the last are smaller inputs. 
+ 
+```
+BenchmarkDecoderSilesia-8             20       642550210 ns/op   329.85 MB/s      3101 B/op        8 allocs/op
+BenchmarkDecoderSilesiaCgo-8         100       384930000 ns/op   550.61 MB/s    451878 B/op     9713 allocs/op
+
+BenchmarkDecoderEnwik9-2              10        3146000080 ns/op         317.86 MB/s        2649 B/op          9 allocs/op
+BenchmarkDecoderEnwik9Cgo-2           20        1905900000 ns/op         524.69 MB/s     1125120 B/op      45785 allocs/op
+
+BenchmarkDecoder_DecodeAll/z000000.zst-8               200     7049994 ns/op   138.26 MB/s        40 B/op        2 allocs/op
+BenchmarkDecoder_DecodeAll/z000001.zst-8            100000       19560 ns/op    97.49 MB/s        40 B/op        2 allocs/op
+BenchmarkDecoder_DecodeAll/z000002.zst-8              5000      297599 ns/op   236.99 MB/s        40 B/op        2 allocs/op
+BenchmarkDecoder_DecodeAll/z000003.zst-8              2000      725502 ns/op   141.17 MB/s        40 B/op        2 allocs/op
+BenchmarkDecoder_DecodeAll/z000004.zst-8            200000        9314 ns/op    54.54 MB/s        40 B/op        2 allocs/op
+BenchmarkDecoder_DecodeAll/z000005.zst-8             10000      137500 ns/op   104.72 MB/s        40 B/op        2 allocs/op
+BenchmarkDecoder_DecodeAll/z000006.zst-8               500     2316009 ns/op   206.06 MB/s        40 B/op        2 allocs/op
+BenchmarkDecoder_DecodeAll/z000007.zst-8             20000       64499 ns/op   344.90 MB/s        40 B/op        2 allocs/op
+BenchmarkDecoder_DecodeAll/z000008.zst-8             50000       24900 ns/op   219.56 MB/s        40 B/op        2 allocs/op
+BenchmarkDecoder_DecodeAll/z000009.zst-8              1000     2348999 ns/op   154.01 MB/s        40 B/op        2 allocs/op
+
+BenchmarkDecoder_DecodeAllCgo/z000000.zst-8            500     4268005 ns/op   228.38 MB/s   1228849 B/op        3 allocs/op
+BenchmarkDecoder_DecodeAllCgo/z000001.zst-8         100000       15250 ns/op   125.05 MB/s      2096 B/op        3 allocs/op
+BenchmarkDecoder_DecodeAllCgo/z000002.zst-8          10000      147399 ns/op   478.49 MB/s     73776 B/op        3 allocs/op
+BenchmarkDecoder_DecodeAllCgo/z000003.zst-8           5000      320798 ns/op   319.27 MB/s    139312 B/op        3 allocs/op
+BenchmarkDecoder_DecodeAllCgo/z000004.zst-8         200000       10004 ns/op    50.77 MB/s       560 B/op        3 allocs/op
+BenchmarkDecoder_DecodeAllCgo/z000005.zst-8          20000       73599 ns/op   195.64 MB/s     19120 B/op        3 allocs/op
+BenchmarkDecoder_DecodeAllCgo/z000006.zst-8           1000     1119003 ns/op   426.48 MB/s    557104 B/op        3 allocs/op
+BenchmarkDecoder_DecodeAllCgo/z000007.zst-8          20000      103450 ns/op   215.04 MB/s     71296 B/op        9 allocs/op
+BenchmarkDecoder_DecodeAllCgo/z000008.zst-8         100000       20130 ns/op   271.58 MB/s      6192 B/op        3 allocs/op
+BenchmarkDecoder_DecodeAllCgo/z000009.zst-8           2000     1123500 ns/op   322.00 MB/s    368688 B/op        3 allocs/op
+```
+
+This reflects the performance around May 2019, but this may be out of date.
+
+# Contributions
+
+Contributions are always welcome. 
+For new features/fixes, remember to add tests and for performance enhancements include benchmarks.
+
+For sending files for reproducing errors use a service like [goobox](https://goobox.io/#/upload) or similar to share your files.
+
+For general feedback and experience reports, feel free to open an issue or write me on [Twitter](https://twitter.com/sh0dan).
+
+This package includes the excellent [`github.com/cespare/xxhash`](https://github.com/cespare/xxhash) package Copyright (c) 2016 Caleb Spare.
--- a/vendor/github.com/klauspost/compress/zstd/bitreader.go
+++ b/vendor/github.com/klauspost/compress/zstd/bitreader.go
@ -0,0 +1,121 @@
+// Copyright 2019+ Klaus Post. All rights reserved.
+// License information can be found in the LICENSE file.
+// Based on work by Yann Collet, released under BSD License.
+
+package zstd
+
+import (
+	"errors"
+	"io"
+	"math/bits"
+)
+
+// bitReader reads a bitstream in reverse.
+// The last set bit indicates the start of the stream and is used
+// for aligning the input.
+type bitReader struct {
+	in       []byte
+	off      uint   // next byte to read is at in[off - 1]
+	value    uint64 // Maybe use [16]byte, but shifting is awkward.
+	bitsRead uint8
+}
+
+// init initializes and resets the bit reader.
+func (b *bitReader) init(in []byte) error {
+	if len(in) < 1 {
+		return errors.New("corrupt stream: too short")
+	}
+	b.in = in
+	b.off = uint(len(in))
+	// The highest bit of the last byte indicates where to start
+	v := in[len(in)-1]
+	if v == 0 {
+		return errors.New("corrupt stream, did not find end of stream")
+	}
+	b.bitsRead = 64
+	b.value = 0
+	b.fill()
+	b.fill()
+	b.bitsRead += 8 - uint8(highBits(uint32(v)))
+	return nil
+}
+
+// getBits will return n bits. n can be 0.
+func (b *bitReader) getBits(n uint8) int {
+	if n == 0 /*|| b.bitsRead >= 64 */ {
+		return 0
+	}
+	return b.getBitsFast(n)
+}
+
+// getBitsFast requires that at least one bit is requested every time.
+// There are no checks if the buffer is filled.
+func (b *bitReader) getBitsFast(n uint8) int {
+	const regMask = 64 - 1
+	v := uint32((b.value << (b.bitsRead & regMask)) >> ((regMask + 1 - n) & regMask))
+	b.bitsRead += n
+	return int(v)
+}
+
+// fillFast() will make sure at least 32 bits are available.
+// There must be at least 4 bytes available.
+func (b *bitReader) fillFast() {
+	if b.bitsRead < 32 {
+		return
+	}
+	// Do single re-slice to avoid bounds checks.
+	v := b.in[b.off-4 : b.off]
+	low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
+	b.value = (b.value << 32) | uint64(low)
+	b.bitsRead -= 32
+	b.off -= 4
+}
+
+// fill() will make sure at least 32 bits are available.
+func (b *bitReader) fill() {
+	if b.bitsRead < 32 {
+		return
+	}
+	if b.off >= 4 {
+		v := b.in[b.off-4 : b.off]
+		low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
+		b.value = (b.value << 32) | uint64(low)
+		b.bitsRead -= 32
+		b.off -= 4
+		return
+	}
+	for b.off > 0 {
+		b.value = (b.value << 8) | uint64(b.in[b.off-1])
+		b.bitsRead -= 8
+		b.off--
+	}
+}
+
+// finished returns true if all bits have been read from the bit stream.
+func (b *bitReader) finished() bool {
+	return b.off == 0 && b.bitsRead >= 64
+}
+
+// overread returns true if more bits have been requested than is on the stream.
+func (b *bitReader) overread() bool {
+	return b.bitsRead > 64
+}
+
+// remain returns the number of bits remaining.
+func (b *bitReader) remain() uint {
+	return b.off*8 + 64 - uint(b.bitsRead)
+}
+
+// close the bitstream and returns an error if out-of-buffer reads occurred.
+func (b *bitReader) close() error {
+	// Release reference.
+	b.in = nil
+	if b.bitsRead > 64 {
+		return io.ErrUnexpectedEOF
+	}
+	return nil
+}
+
+func highBits(val uint32) (n uint32) {
+	return uint32(bits.Len32(val) - 1)
+}
--- a/vendor/github.com/klauspost/compress/zstd/bitwriter.go
+++ b/vendor/github.com/klauspost/compress/zstd/bitwriter.go
@ -0,0 +1,169 @@
+// Copyright 2018 Klaus Post. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+// Based on work Copyright (c) 2013, Yann Collet, released under BSD License.
+
+package zstd
+
+import "fmt"
+
+// bitWriter will write bits.
+// First bit will be LSB of the first byte of output.
+type bitWriter struct {
+	bitContainer uint64
+	nBits        uint8
+	out          []byte
+}
+
+// bitMask16 is bitmasks. Has extra to avoid bounds check.
+var bitMask16 = [32]uint16{
+	0, 1, 3, 7, 0xF, 0x1F,
+	0x3F, 0x7F, 0xFF, 0x1FF, 0x3FF, 0x7FF,
+	0xFFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF, 0xFFFF,
+	0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF,
+	0xFFFF, 0xFFFF} /* up to 16 bits */
+
+var bitMask32 = [32]uint32{
+	0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF,
+	0x1FF, 0x3FF, 0x7FF, 0xFFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF,
+	0x1ffff, 0x3ffff, 0x7FFFF, 0xfFFFF, 0x1fFFFF, 0x3fFFFF, 0x7fFFFF, 0xffFFFF,
+	0x1ffFFFF, 0x3ffFFFF, 0x7ffFFFF, 0xfffFFFF, 0x1fffFFFF, 0x3fffFFFF, 0x7fffFFFF,
+} // up to 32 bits
+
+// addBits16NC will add up to 16 bits.
+// It will not check if there is space for them,
+// so the caller must ensure that it has flushed recently.
+func (b *bitWriter) addBits16NC(value uint16, bits uint8) {
+	b.bitContainer |= uint64(value&bitMask16[bits&31]) << (b.nBits & 63)
+	b.nBits += bits
+}
+
+// addBits32NC will add up to 32 bits.
+// It will not check if there is space for them,
+// so the caller must ensure that it has flushed recently.
+func (b *bitWriter) addBits32NC(value uint32, bits uint8) {
+	b.bitContainer |= uint64(value&bitMask32[bits&31]) << (b.nBits & 63)
+	b.nBits += bits
+}
+
+// addBits16Clean will add up to 16 bits. value may not contain more set bits than indicated.
+// It will not check if there is space for them, so the caller must ensure that it has flushed recently.
+func (b *bitWriter) addBits16Clean(value uint16, bits uint8) {
+	b.bitContainer |= uint64(value) << (b.nBits & 63)
+	b.nBits += bits
+}
+
+// flush will flush all pending full bytes.
+// There will be at least 56 bits available for writing when this has been called.
+// Using flush32 is faster, but leaves less space for writing.
+func (b *bitWriter) flush() {
+	v := b.nBits >> 3
+	switch v {
+	case 0:
+	case 1:
+		b.out = append(b.out,
+			byte(b.bitContainer),
+		)
+	case 2:
+		b.out = append(b.out,
+			byte(b.bitContainer),
+			byte(b.bitContainer>>8),
+		)
+	case 3:
+		b.out = append(b.out,
+			byte(b.bitContainer),
+			byte(b.bitContainer>>8),
+			byte(b.bitContainer>>16),
+		)
+	case 4:
+		b.out = append(b.out,
+			byte(b.bitContainer),
+			byte(b.bitContainer>>8),
+			byte(b.bitContainer>>16),
+			byte(b.bitContainer>>24),
+		)
+	case 5:
+		b.out = append(b.out,
+			byte(b.bitContainer),
+			byte(b.bitContainer>>8),
+			byte(b.bitContainer>>16),
+			byte(b.bitContainer>>24),
+			byte(b.bitContainer>>32),
+		)
+	case 6:
+		b.out = append(b.out,
+			byte(b.bitContainer),
+			byte(b.bitContainer>>8),
+			byte(b.bitContainer>>16),
+			byte(b.bitContainer>>24),
+			byte(b.bitContainer>>32),
+			byte(b.bitContainer>>40),
+		)
+	case 7:
+		b.out = append(b.out,
+			byte(b.bitContainer),
+			byte(b.bitContainer>>8),
+			byte(b.bitContainer>>16),
+			byte(b.bitContainer>>24),
+			byte(b.bitContainer>>32),
+			byte(b.bitContainer>>40),
+			byte(b.bitContainer>>48),
+		)
+	case 8:
+		b.out = append(b.out,
+			byte(b.bitContainer),
+			byte(b.bitContainer>>8),
+			byte(b.bitContainer>>16),
+			byte(b.bitContainer>>24),
+			byte(b.bitContainer>>32),
+			byte(b.bitContainer>>40),
+			byte(b.bitContainer>>48),
+			byte(b.bitContainer>>56),
+		)
+	default:
+		panic(fmt.Errorf("bits (%d) > 64", b.nBits))
+	}
+	b.bitContainer >>= v << 3
+	b.nBits &= 7
+}
+
+// flush32 will flush out, so there are at least 32 bits available for writing.
+func (b *bitWriter) flush32() {
+	if b.nBits < 32 {
+		return
+	}
+	b.out = append(b.out,
+		byte(b.bitContainer),
+		byte(b.bitContainer>>8),
+		byte(b.bitContainer>>16),
+		byte(b.bitContainer>>24))
+	b.nBits -= 32
+	b.bitContainer >>= 32
+}
+
+// flushAlign will flush remaining full bytes and align to next byte boundary.
+func (b *bitWriter) flushAlign() {
+	nbBytes := (b.nBits + 7) >> 3
+	for i := uint8(0); i < nbBytes; i++ {
+		b.out = append(b.out, byte(b.bitContainer>>(i*8)))
+	}
+	b.nBits = 0
+	b.bitContainer = 0
+}
+
+// close will write the alignment bit and write the final byte(s)
+// to the output.
+func (b *bitWriter) close() error {
+	// End mark
+	b.addBits16Clean(1, 1)
+	// flush until next byte.
+	b.flushAlign()
+	return nil
+}
+
+// reset and continue writing by appending to out.
+func (b *bitWriter) reset(out []byte) {
+	b.bitContainer = 0
+	b.nBits = 0
+	b.out = out
+}
--- a/vendor/github.com/klauspost/compress/zstd/blockdec.go
+++ b/vendor/github.com/klauspost/compress/zstd/blockdec.go
@ -0,0 +1,708 @@
+// Copyright 2019+ Klaus Post. All rights reserved.
+// License information can be found in the LICENSE file.
+// Based on work by Yann Collet, released under BSD License.
+
+package zstd
+
+import (
+	"errors"
+	"fmt"
+	"io"
+	"sync"
+
+	"github.com/klauspost/compress/huff0"
+)
+
+type blockType uint8
+
+//go:generate stringer -type=blockType,literalsBlockType,seqCompMode,tableIndex
+
+const (
+	blockTypeRaw blockType = iota
+	blockTypeRLE
+	blockTypeCompressed
+	blockTypeReserved
+)
+
+type literalsBlockType uint8
+
+const (
+	literalsBlockRaw literalsBlockType = iota
+	literalsBlockRLE
+	literalsBlockCompressed
+	literalsBlockTreeless
+)
+
+const (
+	// maxCompressedBlockSize is the biggest allowed compressed block size (128KB)
+	maxCompressedBlockSize = 128 << 10
+
+	// Maximum possible block size (all Raw+Uncompressed).
+	maxBlockSize = (1 << 21) - 1
+
+	// https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#literals_section_header
+	maxCompressedLiteralSize = 1 << 18
+	maxRLELiteralSize        = 1 << 20
+	maxMatchLen              = 131074
+	maxSequences             = 0x7f00 + 0xffff
+
+	// We support slightly less than the reference decoder to be able to
+	// use ints on 32 bit archs.
+	maxOffsetBits = 30
+)
+
+var (
+	huffDecoderPool = sync.Pool{New: func() interface{} {
+		return &huff0.Scratch{}
+	}}
+
+	fseDecoderPool = sync.Pool{New: func() interface{} {
+		return &fseDecoder{}
+	}}
+)
+
+type blockDec struct {
+	// Raw source data of the block.
+	data []byte
+
+	// Destination of the decoded data.
+	dst []byte
+
+	// Buffer for literals data.
+	literalBuf []byte
+
+	// Window size of the block.
+	WindowSize uint64
+	Type       blockType
+	RLESize    uint32
+
+	// Is this the last block of a frame?
+	Last bool
+
+	// Use less memory
+	lowMem      bool
+	history     chan *history
+	input       chan struct{}
+	result      chan decodeOutput
+	sequenceBuf []seq
+	tmp         [4]byte
+	err         error
+}
+
+func (b *blockDec) String() string {
+	if b == nil {
+		return "<nil>"
+	}
+	return fmt.Sprintf("Steam Size: %d, Type: %v, Last: %t, Window: %d", len(b.data), b.Type, b.Last, b.WindowSize)
+}
+
+func newBlockDec(lowMem bool) *blockDec {
+	b := blockDec{
+		lowMem:  lowMem,
+		result:  make(chan decodeOutput, 1),
+		input:   make(chan struct{}, 1),
+		history: make(chan *history, 1),
+	}
+	go b.startDecoder()
+	return &b
+}
+
+// reset will reset the block.
+// Input must be a start of a block and will be at the end of the block when returned.
+func (b *blockDec) reset(br byteBuffer, windowSize uint64) error {
+	b.WindowSize = windowSize
+	tmp := br.readSmall(3)
+	if tmp == nil {
+		if debug {
+			println("Reading block header:", io.ErrUnexpectedEOF)
+		}
+		return io.ErrUnexpectedEOF
+	}
+	bh := uint32(tmp[0]) | (uint32(tmp[1]) << 8) | (uint32(tmp[2]) << 16)
+	b.Last = bh&1 != 0
+	b.Type = blockType((bh >> 1) & 3)
+	// find size.
+	cSize := int(bh >> 3)
+	switch b.Type {
+	case blockTypeReserved:
+		return ErrReservedBlockType
+	case blockTypeRLE:
+		b.RLESize = uint32(cSize)
+		cSize = 1
+	case blockTypeCompressed:
+		if debug {
+			println("Data size on stream:", cSize)
+		}
+		b.RLESize = 0
+		if cSize > maxCompressedBlockSize || uint64(cSize) > b.WindowSize {
+			if debug {
+				printf("compressed block too big: csize:%d block: %+v\n", uint64(cSize), b)
+			}
+			return ErrCompressedSizeTooBig
+		}
+	default:
+		b.RLESize = 0
+	}
+
+	// Read block data.
+	if cap(b.data) < cSize {
+		if b.lowMem {
+			b.data = make([]byte, 0, cSize)
+		} else {
+			b.data = make([]byte, 0, maxBlockSize)
+		}
+	}
+	if cap(b.dst) <= maxBlockSize {
+		b.dst = make([]byte, 0, maxBlockSize+1)
+	}
+	var err error
+	b.data, err = br.readBig(cSize, b.data[:0])
+	if err != nil {
+		if debug {
+			println("Reading block:", err)
+		}
+		return err
+	}
+	return nil
+}
+
+// sendEOF will make the decoder send EOF on this frame.
+func (b *blockDec) sendErr(err error) {
+	b.Last = true
+	b.Type = blockTypeReserved
+	b.err = err
+	b.input <- struct{}{}
+}
+
+// Close will release resources.
+// Closed blockDec cannot be reset.
+func (b *blockDec) Close() {
+	close(b.input)
+	close(b.history)
+	close(b.result)
+}
+
+// decodeAsync will prepare decoding the block when it receives input.
+// This will separate output and history.
+func (b *blockDec) startDecoder() {
+	for range b.input {
+		//println("blockDec: Got block input")
+		switch b.Type {
+		case blockTypeRLE:
+			if cap(b.dst) < int(b.RLESize) {
+				if b.lowMem {
+					b.dst = make([]byte, b.RLESize)
+				} else {
+					b.dst = make([]byte, maxBlockSize)
+				}
+			}
+			o := decodeOutput{
+				d:   b,
+				b:   b.dst[:b.RLESize],
+				err: nil,
+			}
+			v := b.data[0]
+			for i := range o.b {
+				o.b[i] = v
+			}
+			hist := <-b.history
+			hist.append(o.b)
+			b.result <- o
+		case blockTypeRaw:
+			o := decodeOutput{
+				d:   b,
+				b:   b.data,
+				err: nil,
+			}
+			hist := <-b.history
+			hist.append(o.b)
+			b.result <- o
+		case blockTypeCompressed:
+			b.dst = b.dst[:0]
+			err := b.decodeCompressed(nil)
+			o := decodeOutput{
+				d:   b,
+				b:   b.dst,
+				err: err,
+			}
+			if debug {
+				println("Decompressed to", len(b.dst), "bytes, error:", err)
+			}
+			b.result <- o
+		case blockTypeReserved:
+			// Used for returning errors.
+			<-b.history
+			b.result <- decodeOutput{
+				d:   b,
+				b:   nil,
+				err: b.err,
+			}
+		default:
+			panic("Invalid block type")
+		}
+		if debug {
+			println("blockDec: Finished block")
+		}
+	}
+}
+
+// decodeAsync will prepare decoding the block when it receives the history.
+// If history is provided, it will not fetch it from the channel.
+func (b *blockDec) decodeBuf(hist *history) error {
+	switch b.Type {
+	case blockTypeRLE:
+		if cap(b.dst) < int(b.RLESize) {
+			if b.lowMem {
+				b.dst = make([]byte, b.RLESize)
+			} else {
+				b.dst = make([]byte, maxBlockSize)
+			}
+		}
+		b.dst = b.dst[:b.RLESize]
+		v := b.data[0]
+		for i := range b.dst {
+			b.dst[i] = v
+		}
+		hist.appendKeep(b.dst)
+		return nil
+	case blockTypeRaw:
+		hist.appendKeep(b.data)
+		return nil
+	case blockTypeCompressed:
+		saved := b.dst
+		b.dst = hist.b
+		hist.b = nil
+		err := b.decodeCompressed(hist)
+		if debug {
+			println("Decompressed to total", len(b.dst), "bytes, error:", err)
+		}
+		hist.b = b.dst
+		b.dst = saved
+		return err
+	case blockTypeReserved:
+		// Used for returning errors.
+		return b.err
+	default:
+		panic("Invalid block type")
+	}
+}
+
+// decodeCompressed will start decompressing a block.
+// If no history is supplied the decoder will decodeAsync as much as possible
+// before fetching from blockDec.history
+func (b *blockDec) decodeCompressed(hist *history) error {
+	in := b.data
+	delayedHistory := hist == nil
+
+	if delayedHistory {
+		// We must always grab history.
+		defer func() {
+			if hist == nil {
+				<-b.history
+			}
+		}()
+	}
+	// There must be at least one byte for Literals_Block_Type and one for Sequences_Section_Header
+	if len(in) < 2 {
+		return ErrBlockTooSmall
+	}
+	litType := literalsBlockType(in[0] & 3)
+	var litRegenSize int
+	var litCompSize int
+	sizeFormat := (in[0] >> 2) & 3
+	var fourStreams bool
+	switch litType {
+	case literalsBlockRaw, literalsBlockRLE:
+		switch sizeFormat {
+		case 0, 2:
+			// Regenerated_Size uses 5 bits (0-31). Literals_Section_Header uses 1 byte.
+			litRegenSize = int(in[0] >> 3)
+			in = in[1:]
+		case 1:
+			// Regenerated_Size uses 12 bits (0-4095). Literals_Section_Header uses 2 bytes.
+			litRegenSize = int(in[0]>>4) + (int(in[1]) << 4)
+			in = in[2:]
+		case 3:
+			//  Regenerated_Size uses 20 bits (0-1048575). Literals_Section_Header uses 3 bytes.
+			if len(in) < 3 {
+				println("too small: litType:", litType, " sizeFormat", sizeFormat, len(in))
+				return ErrBlockTooSmall
+			}
+			litRegenSize = int(in[0]>>4) + (int(in[1]) << 4) + (int(in[2]) << 12)
+			in = in[3:]
+		}
+	case literalsBlockCompressed, literalsBlockTreeless:
+		switch sizeFormat {
+		case 0, 1:
+			// Both Regenerated_Size and Compressed_Size use 10 bits (0-1023).
+			if len(in) < 3 {
+				println("too small: litType:", litType, " sizeFormat", sizeFormat, len(in))
+				return ErrBlockTooSmall
+			}
+			n := uint64(in[0]>>4) + (uint64(in[1]) << 4) + (uint64(in[2]) << 12)
+			litRegenSize = int(n & 1023)
+			litCompSize = int(n >> 10)
+			fourStreams = sizeFormat == 1
+			in = in[3:]
+		case 2:
+			fourStreams = true
+			if len(in) < 4 {
+				println("too small: litType:", litType, " sizeFormat", sizeFormat, len(in))
+				return ErrBlockTooSmall
+			}
+			n := uint64(in[0]>>4) + (uint64(in[1]) << 4) + (uint64(in[2]) << 12) + (uint64(in[3]) << 20)
+			litRegenSize = int(n & 16383)
+			litCompSize = int(n >> 14)
+			in = in[4:]
+		case 3:
+			fourStreams = true
+			if len(in) < 5 {
+				println("too small: litType:", litType, " sizeFormat", sizeFormat, len(in))
+				return ErrBlockTooSmall
+			}
+			n := uint64(in[0]>>4) + (uint64(in[1]) << 4) + (uint64(in[2]) << 12) + (uint64(in[3]) << 20) + (uint64(in[4]) << 28)
+			litRegenSize = int(n & 262143)
+			litCompSize = int(n >> 18)
+			in = in[5:]
+		}
+	}
+	if debug {
+		println("literals type:", litType, "litRegenSize:", litRegenSize, "litCompSize", litCompSize)
+	}
+	var literals []byte
+	var huff *huff0.Scratch
+	switch litType {
+	case literalsBlockRaw:
+		if len(in) < litRegenSize {
+			println("too small: litType:", litType, " sizeFormat", sizeFormat, "remain:", len(in), "want:", litRegenSize)
+			return ErrBlockTooSmall
+		}
+		literals = in[:litRegenSize]
+		in = in[litRegenSize:]
+		//printf("Found %d uncompressed literals\n", litRegenSize)
+	case literalsBlockRLE:
+		if len(in) < 1 {
+			println("too small: litType:", litType, " sizeFormat", sizeFormat, "remain:", len(in), "want:", 1)
+			return ErrBlockTooSmall
+		}
+		if cap(b.literalBuf) < litRegenSize {
+			if b.lowMem {
+				b.literalBuf = make([]byte, litRegenSize)
+			} else {
+				if litRegenSize > maxCompressedLiteralSize {
+					// Exceptional
+					b.literalBuf = make([]byte, litRegenSize)
+				} else {
+					b.literalBuf = make([]byte, litRegenSize, maxCompressedLiteralSize)
+
+				}
+			}
+		}
+		literals = b.literalBuf[:litRegenSize]
+		v := in[0]
+		for i := range literals {
+			literals[i] = v
+		}
+		in = in[1:]
+		if debug {
+			printf("Found %d RLE compressed literals\n", litRegenSize)
+		}
+	case literalsBlockTreeless:
+		if len(in) < litCompSize {
+			println("too small: litType:", litType, " sizeFormat", sizeFormat, "remain:", len(in), "want:", litCompSize)
+			return ErrBlockTooSmall
+		}
+		// Store compressed literals, so we defer decoding until we get history.
+		literals = in[:litCompSize]
+		in = in[litCompSize:]
+		if debug {
+			printf("Found %d compressed literals\n", litCompSize)
+		}
+	case literalsBlockCompressed:
+		if len(in) < litCompSize {
+			println("too small: litType:", litType, " sizeFormat", sizeFormat, "remain:", len(in), "want:", litCompSize)
+			return ErrBlockTooSmall
+		}
+		literals = in[:litCompSize]
+		in = in[litCompSize:]
+
+		huff = huffDecoderPool.Get().(*huff0.Scratch)
+		var err error
+		// Ensure we have space to store it.
+		if cap(b.literalBuf) < litRegenSize {
+			if b.lowMem {
+				b.literalBuf = make([]byte, 0, litRegenSize)
+			} else {
+				b.literalBuf = make([]byte, 0, maxCompressedLiteralSize)
+			}
+		}
+		if huff == nil {
+			huff = &huff0.Scratch{}
+		}
+		huff.Out = b.literalBuf[:0]
+		huff, literals, err = huff0.ReadTable(literals, huff)
+		if err != nil {
+			println("reading huffman table:", err)
+			return err
+		}
+		// Use our out buffer.
+		huff.Out = b.literalBuf[:0]
+		if fourStreams {
+			literals, err = huff.Decompress4X(literals, litRegenSize)
+		} else {
+			literals, err = huff.Decompress1X(literals)
+		}
+		if err != nil {
+			println("decoding compressed literals:", err)
+			return err
+		}
+		// Make sure we don't leak our literals buffer
+		huff.Out = nil
+		if len(literals) != litRegenSize {
+			return fmt.Errorf("literal output size mismatch want %d, got %d", litRegenSize, len(literals))
+		}
+		if debug {
+			printf("Decompressed %d literals into %d bytes\n", litCompSize, litRegenSize)
+		}
+	}
+
+	// Decode Sequences
+	// https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#sequences-section
+	if len(in) < 1 {
+		return ErrBlockTooSmall
+	}
+	seqHeader := in[0]
+	nSeqs := 0
+	switch {
+	case seqHeader == 0:
+		in = in[1:]
+	case seqHeader < 128:
+		nSeqs = int(seqHeader)
+		in = in[1:]
+	case seqHeader < 255:
+		if len(in) < 2 {
+			return ErrBlockTooSmall
+		}
+		nSeqs = int(seqHeader-128)<<8 | int(in[1])
+		in = in[2:]
+	case seqHeader == 255:
+		if len(in) < 3 {
+			return ErrBlockTooSmall
+		}
+		nSeqs = 0x7f00 + int(in[1]) + (int(in[2]) << 8)
+		in = in[3:]
+	}
+	// Allocate sequences
+	if cap(b.sequenceBuf) < nSeqs {
+		if b.lowMem {
+			b.sequenceBuf = make([]seq, nSeqs)
+		} else {
+			// Allocate max
+			b.sequenceBuf = make([]seq, nSeqs, maxSequences)
+		}
+	} else {
+		// Reuse buffer
+		b.sequenceBuf = b.sequenceBuf[:nSeqs]
+	}
+	var seqs = &sequenceDecs{}
+	if nSeqs > 0 {
+		if len(in) < 1 {
+			return ErrBlockTooSmall
+		}
+		br := byteReader{b: in, off: 0}
+		compMode := br.Uint8()
+		br.advance(1)
+		if debug {
+			printf("Compression modes: 0b%b", compMode)
+		}
+		for i := uint(0); i < 3; i++ {
+			mode := seqCompMode((compMode >> (6 - i*2)) & 3)
+			if debug {
+				println("Table", tableIndex(i), "is", mode)
+			}
+			var seq *sequenceDec
+			switch tableIndex(i) {
+			case tableLiteralLengths:
+				seq = &seqs.litLengths
+			case tableOffsets:
+				seq = &seqs.offsets
+			case tableMatchLengths:
+				seq = &seqs.matchLengths
+			default:
+				panic("unknown table")
+			}
+			switch mode {
+			case compModePredefined:
+				seq.fse = &fsePredef[i]
+			case compModeRLE:
+				if br.remain() < 1 {
+					return ErrBlockTooSmall
+				}
+				v := br.Uint8()
+				br.advance(1)
+				dec := fseDecoderPool.Get().(*fseDecoder)
+				symb, err := decSymbolValue(v, symbolTableX[i])
+				if err != nil {
+					printf("RLE Transform table (%v) error: %v", tableIndex(i), err)
+					return err
+				}
+				dec.setRLE(symb)
+				seq.fse = dec
+				if debug {
+					printf("RLE set to %+v, code: %v", symb, v)
+				}
+			case compModeFSE:
+				println("Reading table for", tableIndex(i))
+				dec := fseDecoderPool.Get().(*fseDecoder)
+				err := dec.readNCount(&br, uint16(maxTableSymbol[i]))
+				if err != nil {
+					println("Read table error:", err)
+					return err
+				}
+				err = dec.transform(symbolTableX[i])
+				if err != nil {
+					println("Transform table error:", err)
+					return err
+				}
+				if debug {
+					println("Read table ok", "symbolLen:", dec.symbolLen)
+				}
+				seq.fse = dec
+			case compModeRepeat:
+				seq.repeat = true
+			}
+			if br.overread() {
+				return io.ErrUnexpectedEOF
+			}
+		}
+		in = br.unread()
+	}
+
+	// Wait for history.
+	// All time spent after this is critical since it is strictly sequential.
+	if hist == nil {
+		hist = <-b.history
+		if hist.error {
+			return ErrDecoderClosed
+		}
+	}
+
+	// Decode treeless literal block.
+	if litType == literalsBlockTreeless {
+		// TODO: We could send the history early WITHOUT the stream history.
+		//   This would allow decoding treeless literials before the byte history is available.
+		//   Silencia stats: Treeless 4393, with: 32775, total: 37168, 11% treeless.
+		//   So not much obvious gain here.
+
+		if hist.huffTree == nil {
+			return errors.New("literal block was treeless, but no history was defined")
+		}
+		// Ensure we have space to store it.
+		if cap(b.literalBuf) < litRegenSize {
+			if b.lowMem {
+				b.literalBuf = make([]byte, 0, litRegenSize)
+			} else {
+				b.literalBuf = make([]byte, 0, maxCompressedLiteralSize)
+			}
+		}
+		var err error
+		// Use our out buffer.
+		huff = hist.huffTree
+		huff.Out = b.literalBuf[:0]
+		if fourStreams {
+			literals, err = huff.Decompress4X(literals, litRegenSize)
+		} else {
+			literals, err = huff.Decompress1X(literals)
+		}
+		// Make sure we don't leak our literals buffer
+		huff.Out = nil
+		if err != nil {
+			println("decompressing literals:", err)
+			return err
+		}
+		if len(literals) != litRegenSize {
+			return fmt.Errorf("literal output size mismatch want %d, got %d", litRegenSize, len(literals))
+		}
+	} else {
+		if hist.huffTree != nil && huff != nil {
+			huffDecoderPool.Put(hist.huffTree)
+			hist.huffTree = nil
+		}
+	}
+	if huff != nil {
+		huff.Out = nil
+		hist.huffTree = huff
+	}
+	if debug {
+		println("Final literals:", len(literals), "and", nSeqs, "sequences.")
+	}
+
+	if nSeqs == 0 {
+		// Decompressed content is defined entirely as Literals Section content.
+		b.dst = append(b.dst, literals...)
+		if delayedHistory {
+			hist.append(literals)
+		}
+		return nil
+	}
+
+	seqs, err := seqs.mergeHistory(&hist.decoders)
+	if err != nil {
+		return err
+	}
+	if debug {
+		println("History merged ok")
+	}
+	br := &bitReader{}
+	if err := br.init(in); err != nil {
+		return err
+	}
+
+	// TODO: Investigate if sending history without decoders are faster.
+	//   This would allow the sequences to be decoded async and only have to construct stream history.
+	//   If only recent offsets were not transferred, this would be an obvious win.
+	// 	 Also, if first 3 sequences don't reference recent offsets, all sequences can be decoded.
+
+	if err := seqs.initialize(br, hist, literals, b.dst); err != nil {
+		println("initializing sequences:", err)
+		return err
+	}
+
+	err = seqs.decode(nSeqs, br, hist.b)
+	if err != nil {
+		return err
+	}
+	if !br.finished() {
+		return fmt.Errorf("%d extra bits on block, should be 0", br.remain())
+	}
+
+	err = br.close()
+	if err != nil {
+		printf("Closing sequences: %v, %+v\n", err, *br)
+	}
+	if len(b.data) > maxCompressedBlockSize {
+		return fmt.Errorf("compressed block size too large (%d)", len(b.data))
+	}
+	// Set output and release references.
+	b.dst = seqs.out
+	seqs.out, seqs.literals, seqs.hist = nil, nil, nil
+
+	if !delayedHistory {
+		// If we don't have delayed history, no need to update.
+		hist.recentOffsets = seqs.prevOffset
+		return nil
+	}
+	if b.Last {
+		// if last block we don't care about history.
+		println("Last block, no history returned")
+		hist.b = hist.b[:0]
+		return nil
+	}
+	hist.append(b.dst)
+	hist.recentOffsets = seqs.prevOffset
+	if debug {
+		println("Finished block with literals:", len(literals), "and", nSeqs, "sequences.")
+	}
+
+	return nil
+}
--- a/vendor/github.com/klauspost/compress/zstd/blockenc.go
+++ b/vendor/github.com/klauspost/compress/zstd/blockenc.go
@ -0,0 +1,754 @@
+// Copyright 2019+ Klaus Post. All rights reserved.
+// License information can be found in the LICENSE file.
+// Based on work by Yann Collet, released under BSD License.
+
+package zstd
+
+import (
+	"errors"
+	"fmt"
+	"math"
+	"math/bits"
+
+	"github.com/klauspost/compress/huff0"
+)
+
+type blockEnc struct {
+	size      int
+	literals  []byte
+	sequences []seq
+	coders    seqCoders
+	litEnc    *huff0.Scratch
+	wr        bitWriter
+
+	extraLits int
+	last      bool
+
+	output            []byte
+	recentOffsets     [3]uint32
+	prevRecentOffsets [3]uint32
+}
+
+// init should be used once the block has been created.
+// If called more than once, the effect is the same as calling reset.
+func (b *blockEnc) init() {
+	if cap(b.literals) < maxCompressedLiteralSize {
+		b.literals = make([]byte, 0, maxCompressedLiteralSize)
+	}
+	const defSeqs = 200
+	b.literals = b.literals[:0]
+	if cap(b.sequences) < defSeqs {
+		b.sequences = make([]seq, 0, defSeqs)
+	}
+	if cap(b.output) < maxCompressedBlockSize {
+		b.output = make([]byte, 0, maxCompressedBlockSize)
+	}
+	if b.coders.mlEnc == nil {
+		b.coders.mlEnc = &fseEncoder{}
+		b.coders.mlPrev = &fseEncoder{}
+		b.coders.ofEnc = &fseEncoder{}
+		b.coders.ofPrev = &fseEncoder{}
+		b.coders.llEnc = &fseEncoder{}
+		b.coders.llPrev = &fseEncoder{}
+	}
+	b.litEnc = &huff0.Scratch{}
+	b.reset(nil)
+}
+
+// initNewEncode can be used to reset offsets and encoders to the initial state.
+func (b *blockEnc) initNewEncode() {
+	b.recentOffsets = [3]uint32{1, 4, 8}
+	b.litEnc.Reuse = huff0.ReusePolicyNone
+	b.coders.setPrev(nil, nil, nil)
+}
+
+// reset will reset the block for a new encode, but in the same stream,
+// meaning that state will be carried over, but the block content is reset.
+// If a previous block is provided, the recent offsets are carried over.
+func (b *blockEnc) reset(prev *blockEnc) {
+	b.extraLits = 0
+	b.literals = b.literals[:0]
+	b.size = 0
+	b.sequences = b.sequences[:0]
+	b.output = b.output[:0]
+	b.last = false
+	if prev != nil {
+		b.recentOffsets = prev.prevRecentOffsets
+	}
+}
+
+// reset will reset the block for a new encode, but in the same stream,
+// meaning that state will be carried over, but the block content is reset.
+// If a previous block is provided, the recent offsets are carried over.
+func (b *blockEnc) swapEncoders(prev *blockEnc) {
+	b.coders.swap(&prev.coders)
+	b.litEnc, prev.litEnc = prev.litEnc, b.litEnc
+}
+
+// blockHeader contains the information for a block header.
+type blockHeader uint32
+
+// setLast sets the 'last' indicator on a block.
+func (h *blockHeader) setLast(b bool) {
+	if b {
+		*h = *h | 1
+	} else {
+		const mask = (1 << 24) - 2
+		*h = *h & mask
+	}
+}
+
+// setSize will store the compressed size of a block.
+func (h *blockHeader) setSize(v uint32) {
+	const mask = 7
+	*h = (*h)&mask | blockHeader(v<<3)
+}
+
+// setType sets the block type.
+func (h *blockHeader) setType(t blockType) {
+	const mask = 1 | (((1 << 24) - 1) ^ 7)
+	*h = (*h & mask) | blockHeader(t<<1)
+}
+
+// appendTo will append the block header to a slice.
+func (h blockHeader) appendTo(b []byte) []byte {
+	return append(b, uint8(h), uint8(h>>8), uint8(h>>16))
+}
+
+// String returns a string representation of the block.
+func (h blockHeader) String() string {
+	return fmt.Sprintf("Type: %d, Size: %d, Last:%t", (h>>1)&3, h>>3, h&1 == 1)
+}
+
+// literalsHeader contains literals header information.
+type literalsHeader uint64
+
+// setType can be used to set the type of literal block.
+func (h *literalsHeader) setType(t literalsBlockType) {
+	const mask = math.MaxUint64 - 3
+	*h = (*h & mask) | literalsHeader(t)
+}
+
+// setSize can be used to set a single size, for uncompressed and RLE content.
+func (h *literalsHeader) setSize(regenLen int) {
+	inBits := bits.Len32(uint32(regenLen))
+	// Only retain 2 bits
+	const mask = 3
+	lh := uint64(*h & mask)
+	switch {
+	case inBits < 5:
+		lh |= (uint64(regenLen) << 3) | (1 << 60)
+		if debug {
+			got := int(lh>>3) & 0xff
+			if got != regenLen {
+				panic(fmt.Sprint("litRegenSize = ", regenLen, "(want) != ", got, "(got)"))
+			}
+		}
+	case inBits < 12:
+		lh |= (1 << 2) | (uint64(regenLen) << 4) | (2 << 60)
+	case inBits < 20:
+		lh |= (3 << 2) | (uint64(regenLen) << 4) | (3 << 60)
+	default:
+		panic(fmt.Errorf("internal error: block too big (%d)", regenLen))
+	}
+	*h = literalsHeader(lh)
+}
+
+// setSizes will set the size of a compressed literals section and the input length.
+func (h *literalsHeader) setSizes(compLen, inLen int) {
+	compBits, inBits := bits.Len32(uint32(compLen)), bits.Len32(uint32(inLen))
+	// Only retain 2 bits
+	const mask = 3
+	lh := uint64(*h & mask)
+	switch {
+	case compBits <= 10 && inBits <= 10:
+		lh |= (1 << 2) | (uint64(inLen) << 4) | (uint64(compLen) << (10 + 4)) | (3 << 60)
+		if debug {
+			const mmask = (1 << 24) - 1
+			n := (lh >> 4) & mmask
+			if int(n&1023) != inLen {
+				panic(fmt.Sprint("regensize:", int(n&1023), "!=", inLen, inBits))
+			}
+			if int(n>>10) != compLen {
+				panic(fmt.Sprint("compsize:", int(n>>10), "!=", compLen, compBits))
+			}
+		}
+	case compBits <= 14 && inBits <= 14:
+		lh |= (2 << 2) | (uint64(inLen) << 4) | (uint64(compLen) << (14 + 4)) | (4 << 60)
+	case compBits <= 18 && inBits <= 18:
+		lh |= (3 << 2) | (uint64(inLen) << 4) | (uint64(compLen) << (18 + 4)) | (5 << 60)
+	default:
+		panic("internal error: block too big")
+	}
+	*h = literalsHeader(lh)
+}
+
+// appendTo will append the literals header to a byte slice.
+func (h literalsHeader) appendTo(b []byte) []byte {
+	size := uint8(h >> 60)
+	switch size {
+	case 1:
+		b = append(b, uint8(h))
+	case 2:
+		b = append(b, uint8(h), uint8(h>>8))
+	case 3:
+		b = append(b, uint8(h), uint8(h>>8), uint8(h>>16))
+	case 4:
+		b = append(b, uint8(h), uint8(h>>8), uint8(h>>16), uint8(h>>24))
+	case 5:
+		b = append(b, uint8(h), uint8(h>>8), uint8(h>>16), uint8(h>>24), uint8(h>>32))
+	default:
+		panic(fmt.Errorf("internal error: literalsHeader has invalid size (%d)", size))
+	}
+	return b
+}
+
+// size returns the output size with currently set values.
+func (h literalsHeader) size() int {
+	return int(h >> 60)
+}
+
+func (h literalsHeader) String() string {
+	return fmt.Sprintf("Type: %d, SizeFormat: %d, Size: 0x%d, Bytes:%d", literalsBlockType(h&3), (h>>2)&3, h&((1<<60)-1)>>4, h>>60)
+}
+
+// pushOffsets will push the recent offsets to the backup store.
+func (b *blockEnc) pushOffsets() {
+	b.prevRecentOffsets = b.recentOffsets
+}
+
+// pushOffsets will push the recent offsets to the backup store.
+func (b *blockEnc) popOffsets() {
+	b.recentOffsets = b.prevRecentOffsets
+}
+
+// matchOffset will adjust recent offsets and return the adjusted one,
+// if it matches a previous offset.
+func (b *blockEnc) matchOffset(offset, lits uint32) uint32 {
+	// Check if offset is one of the recent offsets.
+	// Adjusts the output offset accordingly.
+	// Gives a tiny bit of compression, typically around 1%.
+	if true {
+		if lits > 0 {
+			switch offset {
+			case b.recentOffsets[0]:
+				offset = 1
+			case b.recentOffsets[1]:
+				b.recentOffsets[1] = b.recentOffsets[0]
+				b.recentOffsets[0] = offset
+				offset = 2
+			case b.recentOffsets[2]:
+				b.recentOffsets[2] = b.recentOffsets[1]
+				b.recentOffsets[1] = b.recentOffsets[0]
+				b.recentOffsets[0] = offset
+				offset = 3
+			default:
+				b.recentOffsets[2] = b.recentOffsets[1]
+				b.recentOffsets[1] = b.recentOffsets[0]
+				b.recentOffsets[0] = offset
+				offset += 3
+			}
+		} else {
+			switch offset {
+			case b.recentOffsets[1]:
+				b.recentOffsets[1] = b.recentOffsets[0]
+				b.recentOffsets[0] = offset
+				offset = 1
+			case b.recentOffsets[2]:
+				b.recentOffsets[2] = b.recentOffsets[1]
+				b.recentOffsets[1] = b.recentOffsets[0]
+				b.recentOffsets[0] = offset
+				offset = 2
+			case b.recentOffsets[0] - 1:
+				b.recentOffsets[2] = b.recentOffsets[1]
+				b.recentOffsets[1] = b.recentOffsets[0]
+				b.recentOffsets[0] = offset
+				offset = 3
+			default:
+				b.recentOffsets[2] = b.recentOffsets[1]
+				b.recentOffsets[1] = b.recentOffsets[0]
+				b.recentOffsets[0] = offset
+				offset += 3
+			}
+		}
+	} else {
+		offset += 3
+	}
+	return offset
+}
+
+// encodeRaw can be used to set the output to a raw representation of supplied bytes.
+func (b *blockEnc) encodeRaw(a []byte) {
+	var bh blockHeader
+	bh.setLast(b.last)
+	bh.setSize(uint32(len(a)))
+	bh.setType(blockTypeRaw)
+	b.output = bh.appendTo(b.output[:0])
+	b.output = append(b.output, a...)
+	if debug {
+		println("Adding RAW block, length", len(a))
+	}
+}
+
+// encodeLits can be used if the block is only litLen.
+func (b *blockEnc) encodeLits() error {
+	var bh blockHeader
+	bh.setLast(b.last)
+	bh.setSize(uint32(len(b.literals)))
+
+	// Don't compress extremely small blocks
+	if len(b.literals) < 32 {
+		if debug {
+			println("Adding RAW block, length", len(b.literals))
+		}
+		bh.setType(blockTypeRaw)
+		b.output = bh.appendTo(b.output)
+		b.output = append(b.output, b.literals...)
+		return nil
+	}
+
+	// TODO: Switch to 1X when less than x bytes.
+	out, reUsed, err := huff0.Compress4X(b.literals, b.litEnc)
+	// Bail out of compression is too little.
+	if len(out) > (len(b.literals) - len(b.literals)>>4) {
+		err = huff0.ErrIncompressible
+	}
+	switch err {
+	case huff0.ErrIncompressible:
+		if debug {
+			println("Adding RAW block, length", len(b.literals))
+		}
+		bh.setType(blockTypeRaw)
+		b.output = bh.appendTo(b.output)
+		b.output = append(b.output, b.literals...)
+		return nil
+	case huff0.ErrUseRLE:
+		if debug {
+			println("Adding RLE block, length", len(b.literals))
+		}
+		bh.setType(blockTypeRLE)
+		b.output = bh.appendTo(b.output)
+		b.output = append(b.output, b.literals[0])
+		return nil
+	default:
+		return err
+	case nil:
+	}
+	// Compressed...
+	// Now, allow reuse
+	b.litEnc.Reuse = huff0.ReusePolicyAllow
+	bh.setType(blockTypeCompressed)
+	var lh literalsHeader
+	if reUsed {
+		if debug {
+			println("Reused tree, compressed to", len(out))
+		}
+		lh.setType(literalsBlockTreeless)
+	} else {
+		if debug {
+			println("New tree, compressed to", len(out), "tree size:", len(b.litEnc.OutTable))
+		}
+		lh.setType(literalsBlockCompressed)
+	}
+	// Set sizes
+	lh.setSizes(len(out), len(b.literals))
+	bh.setSize(uint32(len(out) + lh.size() + 1))
+
+	// Write block headers.
+	b.output = bh.appendTo(b.output)
+	b.output = lh.appendTo(b.output)
+	// Add compressed data.
+	b.output = append(b.output, out...)
+	// No sequences.
+	b.output = append(b.output, 0)
+	return nil
+}
+
+// encode will encode the block and put the output in b.output.
+func (b *blockEnc) encode() error {
+	if len(b.sequences) == 0 {
+		return b.encodeLits()
+	}
+	// We want some difference
+	if len(b.literals) > (b.size - (b.size >> 5)) {
+		return errIncompressible
+	}
+
+	var bh blockHeader
+	var lh literalsHeader
+	bh.setLast(b.last)
+	bh.setType(blockTypeCompressed)
+	b.output = bh.appendTo(b.output)
+
+	var (
+		out    []byte
+		reUsed bool
+		err    error
+	)
+	if len(b.literals) > 32 {
+		// TODO: Switch to 1X on small blocks.
+		out, reUsed, err = huff0.Compress4X(b.literals, b.litEnc)
+		if len(out) > len(b.literals)-len(b.literals)>>4 {
+			err = huff0.ErrIncompressible
+		}
+	} else {
+		err = huff0.ErrIncompressible
+	}
+	switch err {
+	case huff0.ErrIncompressible:
+		lh.setType(literalsBlockRaw)
+		lh.setSize(len(b.literals))
+		b.output = lh.appendTo(b.output)
+		b.output = append(b.output, b.literals...)
+		if debug {
+			println("Adding literals RAW, length", len(b.literals))
+		}
+	case huff0.ErrUseRLE:
+		lh.setType(literalsBlockRLE)
+		lh.setSize(len(b.literals))
+		b.output = lh.appendTo(b.output)
+		b.output = append(b.output, b.literals[0])
+		if debug {
+			println("Adding literals RLE")
+		}
+	default:
+		if debug {
+			println("Adding literals ERROR:", err)
+		}
+		return err
+	case nil:
+		// Compressed litLen...
+		if reUsed {
+			if debug {
+				println("reused tree")
+			}
+			lh.setType(literalsBlockTreeless)
+		} else {
+			if debug {
+				println("new tree, size:", len(b.litEnc.OutTable))
+			}
+			lh.setType(literalsBlockCompressed)
+			if debug {
+				_, _, err := huff0.ReadTable(out, nil)
+				if err != nil {
+					panic(err)
+				}
+			}
+		}
+		lh.setSizes(len(out), len(b.literals))
+		if debug {
+			printf("Compressed %d literals to %d bytes", len(b.literals), len(out))
+			println("Adding literal header:", lh)
+		}
+		b.output = lh.appendTo(b.output)
+		b.output = append(b.output, out...)
+		b.litEnc.Reuse = huff0.ReusePolicyAllow
+		if debug {
+			println("Adding literals compressed")
+		}
+	}
+	// Sequence compression
+
+	// Write the number of sequences
+	switch {
+	case len(b.sequences) < 128:
+		b.output = append(b.output, uint8(len(b.sequences)))
+	case len(b.sequences) < 0x7f00: // TODO: this could be wrong
+		n := len(b.sequences)
+		b.output = append(b.output, 128+uint8(n>>8), uint8(n))
+	default:
+		n := len(b.sequences) - 0x7f00
+		b.output = append(b.output, 255, uint8(n), uint8(n>>8))
+	}
+	if debug {
+		println("Encoding", len(b.sequences), "sequences")
+	}
+	b.genCodes()
+	llEnc := b.coders.llEnc
+	ofEnc := b.coders.ofEnc
+	mlEnc := b.coders.mlEnc
+	err = llEnc.normalizeCount(len(b.sequences))
+	if err != nil {
+		return err
+	}
+	err = ofEnc.normalizeCount(len(b.sequences))
+	if err != nil {
+		return err
+	}
+	err = mlEnc.normalizeCount(len(b.sequences))
+	if err != nil {
+		return err
+	}
+
+	// Choose the best compression mode for each type.
+	// Will evaluate the new vs predefined and previous.
+	chooseComp := func(cur, prev, preDef *fseEncoder) (*fseEncoder, seqCompMode) {
+		// See if predefined/previous is better
+		hist := cur.count[:cur.symbolLen]
+		nSize := cur.approxSize(hist) + cur.maxHeaderSize()
+		predefSize := preDef.approxSize(hist)
+		prevSize := prev.approxSize(hist)
+
+		// Add a small penalty for new encoders.
+		// Don't bother with extremely small (<2 byte gains).
+		nSize = nSize + (nSize+2*8*16)>>4
+		switch {
+		case predefSize <= prevSize && predefSize <= nSize || forcePreDef:
+			if debug {
+				println("Using predefined", predefSize>>3, "<=", nSize>>3)
+			}
+			return preDef, compModePredefined
+		case prevSize <= nSize:
+			if debug {
+				println("Using previous", prevSize>>3, "<=", nSize>>3)
+			}
+			return prev, compModeRepeat
+		default:
+			if debug {
+				println("Using new, predef", predefSize>>3, ". previous:", prevSize>>3, ">", nSize>>3, "header max:", cur.maxHeaderSize()>>3, "bytes")
+				println("tl:", cur.actualTableLog, "symbolLen:", cur.symbolLen, "norm:", cur.norm[:cur.symbolLen], "hist", cur.count[:cur.symbolLen])
+			}
+			return cur, compModeFSE
+		}
+	}
+
+	// Write compression mode
+	var mode uint8
+	if llEnc.useRLE {
+		mode |= uint8(compModeRLE) << 6
+		llEnc.setRLE(b.sequences[0].llCode)
+		if debug {
+			println("llEnc.useRLE")
+		}
+	} else {
+		var m seqCompMode
+		llEnc, m = chooseComp(llEnc, b.coders.llPrev, &fsePredefEnc[tableLiteralLengths])
+		mode |= uint8(m) << 6
+	}
+	if ofEnc.useRLE {
+		mode |= uint8(compModeRLE) << 4
+		ofEnc.setRLE(b.sequences[0].ofCode)
+		if debug {
+			println("ofEnc.useRLE")
+		}
+	} else {
+		var m seqCompMode
+		ofEnc, m = chooseComp(ofEnc, b.coders.ofPrev, &fsePredefEnc[tableOffsets])
+		mode |= uint8(m) << 4
+	}
+
+	if mlEnc.useRLE {
+		mode |= uint8(compModeRLE) << 2
+		mlEnc.setRLE(b.sequences[0].mlCode)
+		if debug {
+			println("mlEnc.useRLE, code: ", b.sequences[0].mlCode, "value", b.sequences[0].matchLen)
+		}
+	} else {
+		var m seqCompMode
+		mlEnc, m = chooseComp(mlEnc, b.coders.mlPrev, &fsePredefEnc[tableMatchLengths])
+		mode |= uint8(m) << 2
+	}
+	b.output = append(b.output, mode)
+	if debug {
+		printf("Compression modes: 0b%b", mode)
+	}
+	b.output, err = llEnc.writeCount(b.output)
+	if err != nil {
+		return err
+	}
+	start := len(b.output)
+	b.output, err = ofEnc.writeCount(b.output)
+	if err != nil {
+		return err
+	}
+	if false {
+		println("block:", b.output[start:], "tablelog", ofEnc.actualTableLog, "maxcount:", ofEnc.maxCount)
+		fmt.Printf("selected TableLog: %d, Symbol length: %d\n", ofEnc.actualTableLog, ofEnc.symbolLen)
+		for i, v := range ofEnc.norm[:ofEnc.symbolLen] {
+			fmt.Printf("%3d: %5d -> %4d \n", i, ofEnc.count[i], v)
+		}
+	}
+	b.output, err = mlEnc.writeCount(b.output)
+	if err != nil {
+		return err
+	}
+
+	// Maybe in block?
+	wr := &b.wr
+	wr.reset(b.output)
+
+	var ll, of, ml cState
+
+	// Current sequence
+	seq := len(b.sequences) - 1
+	s := b.sequences[seq]
+	llEnc.setBits(llBitsTable[:])
+	mlEnc.setBits(mlBitsTable[:])
+	ofEnc.setBits(nil)
+
+	llTT, ofTT, mlTT := llEnc.ct.symbolTT[:256], ofEnc.ct.symbolTT[:256], mlEnc.ct.symbolTT[:256]
+
+	// We have 3 bounds checks here (and in the loop).
+	// Since we are iterating backwards it is kinda hard to avoid.
+	llB, ofB, mlB := llTT[s.llCode], ofTT[s.ofCode], mlTT[s.mlCode]
+	ll.init(wr, &llEnc.ct, llB)
+	of.init(wr, &ofEnc.ct, ofB)
+	wr.flush32()
+	ml.init(wr, &mlEnc.ct, mlB)
+
+	// Each of these lookups also generates a bounds check.
+	wr.addBits32NC(s.litLen, llB.outBits)
+	wr.addBits32NC(s.matchLen, mlB.outBits)
+	wr.flush32()
+	wr.addBits32NC(s.offset, ofB.outBits)
+	if debugSequences {
+		println("Encoded seq", seq, s, "codes:", s.llCode, s.mlCode, s.ofCode, "states:", ll.state, ml.state, of.state, "bits:", llB, mlB, ofB)
+	}
+	seq--
+	if llEnc.maxBits+mlEnc.maxBits+ofEnc.maxBits <= 32 {
+		// No need to flush (common)
+		for seq >= 0 {
+			s = b.sequences[seq]
+			wr.flush32()
+			llB, ofB, mlB := llTT[s.llCode], ofTT[s.ofCode], mlTT[s.mlCode]
+			// tabelog max is 8 for all.
+			of.encode(ofB)
+			ml.encode(mlB)
+			ll.encode(llB)
+			wr.flush32()
+
+			// We checked that all can stay within 32 bits
+			wr.addBits32NC(s.litLen, llB.outBits)
+			wr.addBits32NC(s.matchLen, mlB.outBits)
+			wr.addBits32NC(s.offset, ofB.outBits)
+
+			if debugSequences {
+				println("Encoded seq", seq, s)
+			}
+
+			seq--
+		}
+	} else {
+		for seq >= 0 {
+			s = b.sequences[seq]
+			wr.flush32()
+			llB, ofB, mlB := llTT[s.llCode], ofTT[s.ofCode], mlTT[s.mlCode]
+			// tabelog max is below 8 for each.
+			of.encode(ofB)
+			ml.encode(mlB)
+			ll.encode(llB)
+			wr.flush32()
+
+			// ml+ll = max 32 bits total
+			wr.addBits32NC(s.litLen, llB.outBits)
+			wr.addBits32NC(s.matchLen, mlB.outBits)
+			wr.flush32()
+			wr.addBits32NC(s.offset, ofB.outBits)
+
+			if debugSequences {
+				println("Encoded seq", seq, s)
+			}
+
+			seq--
+		}
+	}
+	ml.flush(mlEnc.actualTableLog)
+	of.flush(ofEnc.actualTableLog)
+	ll.flush(llEnc.actualTableLog)
+	err = wr.close()
+	if err != nil {
+		return err
+	}
+	b.output = wr.out
+
+	if len(b.output)-3 >= b.size {
+		// Maybe even add a bigger margin.
+		b.litEnc.Reuse = huff0.ReusePolicyNone
+		return errIncompressible
+	}
+
+	// Size is output minus block header.
+	bh.setSize(uint32(len(b.output)) - 3)
+	if debug {
+		println("Rewriting block header", bh)
+	}
+	_ = bh.appendTo(b.output[:0])
+	b.coders.setPrev(llEnc, mlEnc, ofEnc)
+	return nil
+}
+
+var errIncompressible = errors.New("uncompressible")
+
+func (b *blockEnc) genCodes() {
+	if len(b.sequences) == 0 {
+		// nothing to do
+		return
+	}
+
+	if len(b.sequences) > math.MaxUint16 {
+		panic("can only encode up to 64K sequences")
+	}
+	// No bounds checks after here:
+	llH := b.coders.llEnc.Histogram()[:256]
+	ofH := b.coders.ofEnc.Histogram()[:256]
+	mlH := b.coders.mlEnc.Histogram()[:256]
+	for i := range llH {
+		llH[i] = 0
+	}
+	for i := range ofH {
+		ofH[i] = 0
+	}
+	for i := range mlH {
+		mlH[i] = 0
+	}
+
+	var llMax, ofMax, mlMax uint8
+	for i, seq := range b.sequences {
+		v := llCode(seq.litLen)
+		seq.llCode = v
+		llH[v]++
+		if v > llMax {
+			llMax = v
+		}
+
+		v = ofCode(seq.offset)
+		seq.ofCode = v
+		ofH[v]++
+		if v > ofMax {
+			ofMax = v
+		}
+
+		v = mlCode(seq.matchLen)
+		seq.mlCode = v
+		mlH[v]++
+		if v > mlMax {
+			mlMax = v
+			if debug && mlMax > maxMatchLengthSymbol {
+				panic(fmt.Errorf("mlMax > maxMatchLengthSymbol (%d), matchlen: %d", mlMax, seq.matchLen))
+			}
+		}
+		b.sequences[i] = seq
+	}
+	maxCount := func(a []uint32) int {
+		var max uint32
+		for _, v := range a {
+			if v > max {
+				max = v
+			}
+		}
+		return int(max)
+	}
+	if mlMax > maxMatchLengthSymbol {
+		panic(fmt.Errorf("mlMax > maxMatchLengthSymbol (%d)", mlMax))
+	}
+	if ofMax > maxOffsetBits {
+		panic(fmt.Errorf("ofMax > maxOffsetBits (%d)", ofMax))
+	}
+	if llMax > maxLiteralLengthSymbol {
+		panic(fmt.Errorf("llMax > maxLiteralLengthSymbol (%d)", llMax))
+	}
+
+	b.coders.mlEnc.HistogramFinished(mlMax, maxCount(mlH[:mlMax+1]))
+	b.coders.ofEnc.HistogramFinished(ofMax, maxCount(ofH[:ofMax+1]))
+	b.coders.llEnc.HistogramFinished(llMax, maxCount(llH[:llMax+1]))
+}
--- a/vendor/github.com/klauspost/compress/zstd/blocktype_string.go
+++ b/vendor/github.com/klauspost/compress/zstd/blocktype_string.go
@ -0,0 +1,85 @@
+// Code generated by "stringer -type=blockType,literalsBlockType,seqCompMode,tableIndex"; DO NOT EDIT.
+
+package zstd
+
+import "strconv"
+
+func _() {
+	// An "invalid array index" compiler error signifies that the constant values have changed.
+	// Re-run the stringer command to generate them again.
+	var x [1]struct{}
+	_ = x[blockTypeRaw-0]
+	_ = x[blockTypeRLE-1]
+	_ = x[blockTypeCompressed-2]
+	_ = x[blockTypeReserved-3]
+}
+
+const _blockType_name = "blockTypeRawblockTypeRLEblockTypeCompressedblockTypeReserved"
+
+var _blockType_index = [...]uint8{0, 12, 24, 43, 60}
+
+func (i blockType) String() string {
+	if i >= blockType(len(_blockType_index)-1) {
+		return "blockType(" + strconv.FormatInt(int64(i), 10) + ")"
+	}
+	return _blockType_name[_blockType_index[i]:_blockType_index[i+1]]
+}
+func _() {
+	// An "invalid array index" compiler error signifies that the constant values have changed.
+	// Re-run the stringer command to generate them again.
+	var x [1]struct{}
+	_ = x[literalsBlockRaw-0]
+	_ = x[literalsBlockRLE-1]
+	_ = x[literalsBlockCompressed-2]
+	_ = x[literalsBlockTreeless-3]
+}
+
+const _literalsBlockType_name = "literalsBlockRawliteralsBlockRLEliteralsBlockCompressedliteralsBlockTreeless"
+
+var _literalsBlockType_index = [...]uint8{0, 16, 32, 55, 76}
+
+func (i literalsBlockType) String() string {
+	if i >= literalsBlockType(len(_literalsBlockType_index)-1) {
+		return "literalsBlockType(" + strconv.FormatInt(int64(i), 10) + ")"
+	}
+	return _literalsBlockType_name[_literalsBlockType_index[i]:_literalsBlockType_index[i+1]]
+}
+func _() {
+	// An "invalid array index" compiler error signifies that the constant values have changed.
+	// Re-run the stringer command to generate them again.
+	var x [1]struct{}
+	_ = x[compModePredefined-0]
+	_ = x[compModeRLE-1]
+	_ = x[compModeFSE-2]
+	_ = x[compModeRepeat-3]
+}
+
+const _seqCompMode_name = "compModePredefinedcompModeRLEcompModeFSEcompModeRepeat"
+
+var _seqCompMode_index = [...]uint8{0, 18, 29, 40, 54}
+
+func (i seqCompMode) String() string {
+	if i >= seqCompMode(len(_seqCompMode_index)-1) {
+		return "seqCompMode(" + strconv.FormatInt(int64(i), 10) + ")"
+	}
+	return _seqCompMode_name[_seqCompMode_index[i]:_seqCompMode_index[i+1]]
+}
+func _() {
+	// An "invalid array index" compiler error signifies that the constant values have changed.
+	// Re-run the stringer command to generate them again.
+	var x [1]struct{}
+	_ = x[tableLiteralLengths-0]
+	_ = x[tableOffsets-1]
+	_ = x[tableMatchLengths-2]
+}
+
+const _tableIndex_name = "tableLiteralLengthstableOffsetstableMatchLengths"
+
+var _tableIndex_index = [...]uint8{0, 19, 31, 48}
+
+func (i tableIndex) String() string {
+	if i >= tableIndex(len(_tableIndex_index)-1) {
+		return "tableIndex(" + strconv.FormatInt(int64(i), 10) + ")"
+	}
+	return _tableIndex_name[_tableIndex_index[i]:_tableIndex_index[i+1]]
+}
--- a/vendor/github.com/klauspost/compress/zstd/bytebuf.go
+++ b/vendor/github.com/klauspost/compress/zstd/bytebuf.go
@ -0,0 +1,121 @@
+// Copyright 2019+ Klaus Post. All rights reserved.
+// License information can be found in the LICENSE file.
+// Based on work by Yann Collet, released under BSD License.
+
+package zstd
+
+import (
+	"fmt"
+	"io"
+	"io/ioutil"
+)
+
+type byteBuffer interface {
+	// Read up to 8 bytes.
+	// Returns nil if no more input is available.
+	readSmall(n int) []byte
+
+	// Read >8 bytes.
+	// MAY use the destination slice.
+	readBig(n int, dst []byte) ([]byte, error)
+
+	// Read a single byte.
+	readByte() (byte, error)
+
+	// Skip n bytes.
+	skipN(n int) error
+}
+
+// in-memory buffer
+type byteBuf []byte
+
+func (b *byteBuf) readSmall(n int) []byte {
+	if debug && n > 8 {
+		panic(fmt.Errorf("small read > 8 (%d). use readBig", n))
+	}
+	bb := *b
+	if len(bb) < n {
+		return nil
+	}
+	r := bb[:n]
+	*b = bb[n:]
+	return r
+}
+
+func (b *byteBuf) readBig(n int, dst []byte) ([]byte, error) {
+	bb := *b
+	if len(bb) < n {
+		return nil, io.ErrUnexpectedEOF
+	}
+	r := bb[:n]
+	*b = bb[n:]
+	return r, nil
+}
+
+func (b *byteBuf) remain() []byte {
+	return *b
+}
+
+func (b *byteBuf) readByte() (byte, error) {
+	bb := *b
+	if len(bb) < 1 {
+		return 0, nil
+	}
+	r := bb[0]
+	*b = bb[1:]
+	return r, nil
+}
+
+func (b *byteBuf) skipN(n int) error {
+	bb := *b
+	if len(bb) < n {
+		return io.ErrUnexpectedEOF
+	}
+	*b = bb[n:]
+	return nil
+}
+
+// wrapper around a reader.
+type readerWrapper struct {
+	r   io.Reader
+	tmp [8]byte
+}
+
+func (r *readerWrapper) readSmall(n int) []byte {
+	if debug && n > 8 {
+		panic(fmt.Errorf("small read > 8 (%d). use readBig", n))
+	}
+	n2, err := io.ReadFull(r.r, r.tmp[:n])
+	// We only really care about the actual bytes read.
+	if n2 != n {
+		if debug {
+			println("readSmall: got", n2, "want", n, "err", err)
+		}
+		return nil
+	}
+	return r.tmp[:n]
+}
+
+func (r *readerWrapper) readBig(n int, dst []byte) ([]byte, error) {
+	if cap(dst) < n {
+		dst = make([]byte, n)
+	}
+	n2, err := io.ReadFull(r.r, dst[:n])
+	return dst[:n2], err
+}
+
+func (r *readerWrapper) readByte() (byte, error) {
+	n2, err := r.r.Read(r.tmp[:1])
+	if err != nil {
+		return 0, err
+	}
+	if n2 != 1 {
+		return 0, io.ErrUnexpectedEOF
+	}
+	return r.tmp[0], nil
+}
+
+func (r *readerWrapper) skipN(n int) error {
+	_, err := io.CopyN(ioutil.Discard, r.r, int64(n))
+	return err
+}
--- a/vendor/github.com/klauspost/compress/zstd/bytereader.go
+++ b/vendor/github.com/klauspost/compress/zstd/bytereader.go
@ -0,0 +1,74 @@
+// Copyright 2019+ Klaus Post. All rights reserved.
+// License information can be found in the LICENSE file.
+// Based on work by Yann Collet, released under BSD License.
+
+package zstd
+
+// byteReader provides a byte reader that reads
+// little endian values from a byte stream.
+// The input stream is manually advanced.
+// The reader performs no bounds checks.
+type byteReader struct {
+	b   []byte
+	off int
+}
+
+// init will initialize the reader and set the input.
+func (b *byteReader) init(in []byte) {
+	b.b = in
+	b.off = 0
+}
+
+// advance the stream b n bytes.
+func (b *byteReader) advance(n uint) {
+	b.off += int(n)
+}
+
+// overread returns whether we have advanced too far.
+func (b *byteReader) overread() bool {
+	return b.off > len(b.b)
+}
+
+// Int32 returns a little endian int32 starting at current offset.
+func (b byteReader) Int32() int32 {
+	b2 := b.b[b.off : b.off+4 : b.off+4]
+	v3 := int32(b2[3])
+	v2 := int32(b2[2])
+	v1 := int32(b2[1])
+	v0 := int32(b2[0])
+	return v0 | (v1 << 8) | (v2 << 16) | (v3 << 24)
+}
+
+// Uint8 returns the next byte
+func (b *byteReader) Uint8() uint8 {
+	v := b.b[b.off]
+	return v
+}
+
+// Uint32 returns a little endian uint32 starting at current offset.
+func (b byteReader) Uint32() uint32 {
+	if r := b.remain(); r < 4 {
+		// Very rare
+		v := uint32(0)
+		for i := 1; i <= r; i++ {
+			v = (v << 8) | uint32(b.b[len(b.b)-i])
+		}
+		return v
+	}
+	b2 := b.b[b.off : b.off+4 : b.off+4]
+	v3 := uint32(b2[3])
+	v2 := uint32(b2[2])
+	v1 := uint32(b2[1])
+	v0 := uint32(b2[0])
+	return v0 | (v1 << 8) | (v2 << 16) | (v3 << 24)
+}
+
+// unread returns the unread portion of the input.
+func (b byteReader) unread() []byte {
+	return b.b[b.off:]
+}
+
+// remain will return the number of bytes remaining.
+func (b byteReader) remain() int {
+	return len(b.b) - b.off
+}
--- a/vendor/github.com/klauspost/compress/zstd/decoder.go
+++ b/vendor/github.com/klauspost/compress/zstd/decoder.go
@ -0,0 +1,437 @@
+// Copyright 2019+ Klaus Post. All rights reserved.
+// License information can be found in the LICENSE file.
+// Based on work by Yann Collet, released under BSD License.
+
+package zstd
+
+import (
+	"bytes"
+	"errors"
+	"io"
+	"sync"
+)
+
+// Decoder provides decoding of zstandard streams.
+// The decoder has been designed to operate without allocations after a warmup.
+// This means that you should store the decoder for best performance.
+// To re-use a stream decoder, use the Reset(r io.Reader) error to switch to another stream.
+// A decoder can safely be re-used even if the previous stream failed.
+// To release the resources, you must call the Close() function on a decoder.
+type Decoder struct {
+	o decoderOptions
+
+	// Unreferenced decoders, ready for use.
+	decoders chan *blockDec
+
+	// Unreferenced decoders, ready for use.
+	frames chan *frameDec
+
+	// Streams ready to be decoded.
+	stream chan decodeStream
+
+	// Current read position used for Reader functionality.
+	current decoderState
+
+	// Custom dictionaries
+	dicts map[uint32]struct{}
+
+	// streamWg is the waitgroup for all streams
+	streamWg sync.WaitGroup
+}
+
+// decoderState is used for maintaining state when the decoder
+// is used for streaming.
+type decoderState struct {
+	// current block being written to stream.
+	decodeOutput
+
+	// output in order to be written to stream.
+	output chan decodeOutput
+
+	// cancel remaining output.
+	cancel chan struct{}
+
+	flushed bool
+}
+
+var (
+	// Check the interfaces we want to support.
+	_ = io.WriterTo(&Decoder{})
+	_ = io.Reader(&Decoder{})
+)
+
+// NewReader creates a new decoder.
+// A nil Reader can be provided in which case Reset can be used to start a decode.
+//
+// A Decoder can be used in two modes:
+//
+// 1) As a stream, or
+// 2) For stateless decoding using DecodeAll or DecodeBuffer.
+//
+// Only a single stream can be decoded concurrently, but the same decoder
+// can run multiple concurrent stateless decodes. It is even possible to
+// use stateless decodes while a stream is being decoded.
+//
+// The Reset function can be used to initiate a new stream, which is will considerably
+// reduce the allocations normally caused by NewReader.
+func NewReader(r io.Reader, opts ...DOption) (*Decoder, error) {
+	var d Decoder
+	d.o.setDefault()
+	for _, o := range opts {
+		err := o(&d.o)
+		if err != nil {
+			return nil, err
+		}
+	}
+	d.current.output = make(chan decodeOutput, d.o.concurrent)
+	d.current.flushed = true
+
+	// Create decoders
+	d.decoders = make(chan *blockDec, d.o.concurrent)
+	d.frames = make(chan *frameDec, d.o.concurrent)
+	for i := 0; i < d.o.concurrent; i++ {
+		d.frames <- newFrameDec(d.o)
+		d.decoders <- newBlockDec(d.o.lowMem)
+	}
+
+	if r == nil {
+		return &d, nil
+	}
+	return &d, d.Reset(r)
+}
+
+// Read bytes from the decompressed stream into p.
+// Returns the number of bytes written and any error that occurred.
+// When the stream is done, io.EOF will be returned.
+func (d *Decoder) Read(p []byte) (int, error) {
+	if d.stream == nil {
+		return 0, errors.New("no input has been initialized")
+	}
+	var n int
+	for {
+		if len(d.current.b) > 0 {
+			filled := copy(p, d.current.b)
+			p = p[filled:]
+			d.current.b = d.current.b[filled:]
+			n += filled
+		}
+		if len(p) == 0 {
+			break
+		}
+		if len(d.current.b) == 0 {
+			// We have an error and no more data
+			if d.current.err != nil {
+				break
+			}
+			d.nextBlock()
+		}
+	}
+	if len(d.current.b) > 0 {
+		// Only return error at end of block
+		return n, nil
+	}
+	if d.current.err != nil {
+		d.drainOutput()
+	}
+	if debug {
+		println("returning", n, d.current.err, len(d.decoders))
+	}
+	return n, d.current.err
+}
+
+// Reset will reset the decoder the supplied stream after the current has finished processing.
+// Note that this functionality cannot be used after Close has been called.
+func (d *Decoder) Reset(r io.Reader) error {
+	if d.current.err == ErrDecoderClosed {
+		return d.current.err
+	}
+	if r == nil {
+		return errors.New("nil Reader sent as input")
+	}
+
+	if d.stream == nil {
+		d.stream = make(chan decodeStream, 1)
+		d.streamWg.Add(1)
+		go d.startStreamDecoder(d.stream)
+	}
+
+	d.drainOutput()
+
+	// If bytes buffer and < 1MB, do sync decoding anyway.
+	if bb, ok := r.(*bytes.Buffer); ok && bb.Len() < 1<<20 {
+		b := bb.Bytes()
+		dst, err := d.DecodeAll(b, nil)
+		if err == nil {
+			err = io.EOF
+		}
+		d.current.b = dst
+		d.current.err = err
+		d.current.flushed = true
+		return nil
+	}
+
+	// Remove current block.
+	d.current.decodeOutput = decodeOutput{}
+	d.current.err = nil
+	d.current.cancel = make(chan struct{})
+	d.current.flushed = false
+	d.current.d = nil
+
+	d.stream <- decodeStream{
+		r:      r,
+		output: d.current.output,
+		cancel: d.current.cancel,
+	}
+	return nil
+}
+
+// drainOutput will drain the output until errEndOfStream is sent.
+func (d *Decoder) drainOutput() {
+	if d.current.cancel != nil {
+		println("cancelling current")
+		close(d.current.cancel)
+		d.current.cancel = nil
+	}
+	if d.current.d != nil {
+		println("re-adding current decoder", d.current.d, len(d.decoders))
+		d.decoders <- d.current.d
+		d.current.d = nil
+		d.current.b = nil
+	}
+	if d.current.output == nil || d.current.flushed {
+		println("current already flushed")
+		return
+	}
+	for {
+		select {
+		case v := <-d.current.output:
+			if v.d != nil {
+				println("got decoder", v.d)
+				d.decoders <- v.d
+			}
+			if v.err == errEndOfStream {
+				println("current flushed")
+				d.current.flushed = true
+				return
+			}
+		}
+	}
+}
+
+// WriteTo writes data to w until there's no more data to write or when an error occurs.
+// The return value n is the number of bytes written.
+// Any error encountered during the write is also returned.
+func (d *Decoder) WriteTo(w io.Writer) (int64, error) {
+	if d.stream == nil {
+		return 0, errors.New("no input has been initialized")
+	}
+	var n int64
+	for {
+		if len(d.current.b) > 0 {
+			n2, err2 := w.Write(d.current.b)
+			n += int64(n2)
+			if err2 != nil && d.current.err == nil {
+				d.current.err = err2
+				break
+			}
+		}
+		if d.current.err != nil {
+			break
+		}
+		d.nextBlock()
+	}
+	err := d.current.err
+	if err != nil {
+		d.drainOutput()
+	}
+	if err == io.EOF {
+		err = nil
+	}
+	return n, err
+}
+
+// DecodeAll allows stateless decoding of a blob of bytes.
+// Output will be appended to dst, so if the destination size is known
+// you can pre-allocate the destination slice to avoid allocations.
+// DecodeAll can be used concurrently.
+// The Decoder concurrency limits will be respected.
+func (d *Decoder) DecodeAll(input, dst []byte) ([]byte, error) {
+	if d.current.err == ErrDecoderClosed {
+		return dst, ErrDecoderClosed
+	}
+	//println(len(d.frames), len(d.decoders), d.current)
+	block, frame := <-d.decoders, <-d.frames
+	defer func() {
+		d.decoders <- block
+		frame.rawInput = nil
+		d.frames <- frame
+	}()
+	if cap(dst) == 0 {
+		// Allocate 1MB by default.
+		dst = make([]byte, 0, 1<<20)
+	}
+	br := byteBuf(input)
+	for {
+		err := frame.reset(&br)
+		if err == io.EOF {
+			return dst, nil
+		}
+		if err != nil {
+			return dst, err
+		}
+		if frame.FrameContentSize > d.o.maxDecodedSize-uint64(len(dst)) {
+			return dst, ErrDecoderSizeExceeded
+		}
+		if frame.FrameContentSize > 0 && frame.FrameContentSize < 1<<30 {
+			// Never preallocate moe than 1 GB up front.
+			if uint64(cap(dst)) < frame.FrameContentSize {
+				dst2 := make([]byte, len(dst), len(dst)+int(frame.FrameContentSize))
+				copy(dst2, dst)
+				dst = dst2
+			}
+		}
+		dst, err = frame.runDecoder(dst, block)
+		if err != nil {
+			return dst, err
+		}
+		if len(br) == 0 {
+			break
+		}
+	}
+	return dst, nil
+}
+
+// nextBlock returns the next block.
+// If an error occurs d.err will be set.
+func (d *Decoder) nextBlock() {
+	if d.current.d != nil {
+		d.decoders <- d.current.d
+		d.current.d = nil
+	}
+	if d.current.err != nil {
+		// Keep error state.
+		return
+	}
+	d.current.decodeOutput = <-d.current.output
+	if debug {
+		println("got", len(d.current.b), "bytes, error:", d.current.err)
+	}
+}
+
+// Close will release all resources.
+// It is NOT possible to reuse the decoder after this.
+func (d *Decoder) Close() {
+	if d.current.err == ErrDecoderClosed {
+		return
+	}
+	d.drainOutput()
+	if d.stream != nil {
+		close(d.stream)
+		d.streamWg.Wait()
+		d.stream = nil
+	}
+	if d.decoders != nil {
+		close(d.decoders)
+		for dec := range d.decoders {
+			dec.Close()
+		}
+		d.decoders = nil
+	}
+	if d.current.d != nil {
+		d.current.d.Close()
+		d.current.d = nil
+	}
+	d.current.err = ErrDecoderClosed
+}
+
+type decodeOutput struct {
+	d   *blockDec
+	b   []byte
+	err error
+}
+
+type decodeStream struct {
+	r io.Reader
+
+	// Blocks ready to be written to output.
+	output chan decodeOutput
+
+	// cancel reading from the input
+	cancel chan struct{}
+}
+
+// errEndOfStream indicates that everything from the stream was read.
+var errEndOfStream = errors.New("end-of-stream")
+
+// Create Decoder:
+// Spawn n block decoders. These accept tasks to decode a block.
+// Create goroutine that handles stream processing, this will send history to decoders as they are available.
+// Decoders update the history as they decode.
+// When a block is returned:
+// 		a) history is sent to the next decoder,
+// 		b) content written to CRC.
+// 		c) return data to WRITER.
+// 		d) wait for next block to return data.
+// Once WRITTEN, the decoders reused by the writer frame decoder for re-use.
+func (d *Decoder) startStreamDecoder(inStream chan decodeStream) {
+	defer d.streamWg.Done()
+	frame := newFrameDec(d.o)
+	for stream := range inStream {
+		br := readerWrapper{r: stream.r}
+	decodeStream:
+		for {
+			err := frame.reset(&br)
+			if debug && err != nil {
+				println("Frame decoder returned", err)
+			}
+			if err != nil {
+				stream.output <- decodeOutput{
+					err: err,
+				}
+				break
+			}
+			if debug {
+				println("starting frame decoder")
+			}
+
+			// This goroutine will forward history between frames.
+			frame.frameDone.Add(1)
+			frame.initAsync()
+
+			go frame.startDecoder(stream.output)
+		decodeFrame:
+			// Go through all blocks of the frame.
+			for {
+				dec := <-d.decoders
+				select {
+				case <-stream.cancel:
+					if !frame.sendErr(dec, io.EOF) {
+						// To not let the decoder dangle, send it back.
+						stream.output <- decodeOutput{d: dec}
+					}
+					break decodeStream
+				default:
+				}
+				err := frame.next(dec)
+				switch err {
+				case io.EOF:
+					// End of current frame, no error
+					println("EOF on next block")
+					break decodeFrame
+				case nil:
+					continue
+				default:
+					println("block decoder returned", err)
+					break decodeStream
+				}
+			}
+			// All blocks have started decoding, check if there are more frames.
+			println("waiting for done")
+			frame.frameDone.Wait()
+			println("done waiting...")
+		}
+		frame.frameDone.Wait()
+		println("Sending EOS")
+		stream.output <- decodeOutput{err: errEndOfStream}
+	}
+}
--- a/vendor/github.com/klauspost/compress/zstd/decoder_options.go
+++ b/vendor/github.com/klauspost/compress/zstd/decoder_options.go
@ -0,0 +1,66 @@
+// Copyright 2019+ Klaus Post. All rights reserved.
+// License information can be found in the LICENSE file.
+// Based on work by Yann Collet, released under BSD License.
+
+package zstd
+
+import (
+	"errors"
+	"fmt"
+	"runtime"
+)
+
+// DOption is an option for creating a decoder.
+type DOption func(*decoderOptions) error
+
+// options retains accumulated state of multiple options.
+type decoderOptions struct {
+	lowMem         bool
+	concurrent     int
+	maxDecodedSize uint64
+}
+
+func (o *decoderOptions) setDefault() {
+	*o = decoderOptions{
+		// use less ram: true for now, but may change.
+		lowMem:     true,
+		concurrent: runtime.GOMAXPROCS(0),
+	}
+	o.maxDecodedSize = 1 << 63
+}
+
+// WithDecoderLowmem will set whether to use a lower amount of memory,
+// but possibly have to allocate more while running.
+func WithDecoderLowmem(b bool) DOption {
+	return func(o *decoderOptions) error { o.lowMem = b; return nil }
+}
+
+// WithDecoderConcurrency will set the concurrency,
+// meaning the maximum number of decoders to run concurrently.
+// The value supplied must be at least 1.
+// By default this will be set to GOMAXPROCS.
+func WithDecoderConcurrency(n int) DOption {
+	return func(o *decoderOptions) error {
+		if n <= 0 {
+			return fmt.Errorf("Concurrency must be at least 1")
+		}
+		o.concurrent = n
+		return nil
+	}
+}
+
+// WithDecoderMaxMemory allows to set a maximum decoded size for in-memory
+// (non-streaming) operations.
+// Maxmimum and default is 1 << 63 bytes.
+func WithDecoderMaxMemory(n uint64) DOption {
+	return func(o *decoderOptions) error {
+		if n == 0 {
+			return errors.New("WithDecoderMaxmemory must be at least 1")
+		}
+		if n > 1<<63 {
+			return fmt.Errorf("WithDecoderMaxmemorymust be less than 1 << 63")
+		}
+		o.maxDecodedSize = n
+		return nil
+	}
+}
--- a/vendor/github.com/klauspost/compress/zstd/enc_dfast.go
+++ b/vendor/github.com/klauspost/compress/zstd/enc_dfast.go
@ -0,0 +1,410 @@
+// Copyright 2019+ Klaus Post. All rights reserved.
+// License information can be found in the LICENSE file.
+// Based on work by Yann Collet, released under BSD License.
+
+package zstd
+
+const (
+	dFastLongTableBits = 17                      // Bits used in the long match table
+	dFastLongTableSize = 1 << dFastLongTableBits // Size of the table
+	dFastLongTableMask = dFastLongTableSize - 1  // Mask for table indices. Redundant, but can eliminate bounds checks.
+
+	dFastShortTableBits = tableBits                // Bits used in the short match table
+	dFastShortTableSize = 1 << dFastShortTableBits // Size of the table
+	dFastShortTableMask = dFastShortTableSize - 1  // Mask for table indices. Redundant, but can eliminate bounds checks.
+)
+
+type doubleFastEncoder struct {
+	fastEncoder
+	longTable [dFastLongTableSize]tableEntry
+}
+
+// Encode mimmics functionality in zstd_dfast.c
+func (e *doubleFastEncoder) Encode(blk *blockEnc, src []byte) {
+	const (
+		// Input margin is the number of bytes we read (8)
+		// and the maximum we will read ahead (2)
+		inputMargin            = 8 + 2
+		minNonLiteralBlockSize = 16
+	)
+
+	// Protect against e.cur wraparound.
+	for e.cur > (1<<30)+e.maxMatchOff {
+		if len(e.hist) == 0 {
+			for i := range e.table[:] {
+				e.table[i] = tableEntry{}
+			}
+			for i := range e.longTable[:] {
+				e.longTable[i] = tableEntry{}
+			}
+			e.cur = e.maxMatchOff
+			break
+		}
+		// Shift down everything in the table that isn't already too far away.
+		minOff := e.cur + int32(len(e.hist)) - e.maxMatchOff
+		for i := range e.table[:] {
+			v := e.table[i].offset
+			if v < minOff {
+				v = 0
+			} else {
+				v = v - e.cur + e.maxMatchOff
+			}
+			e.table[i].offset = v
+		}
+		for i := range e.longTable[:] {
+			v := e.longTable[i].offset
+			if v < minOff {
+				v = 0
+			} else {
+				v = v - e.cur + e.maxMatchOff
+			}
+			e.longTable[i].offset = v
+		}
+		e.cur = e.maxMatchOff
+	}
+
+	s := e.addBlock(src)
+	blk.size = len(src)
+	if len(src) < minNonLiteralBlockSize {
+		blk.extraLits = len(src)
+		blk.literals = blk.literals[:len(src)]
+		copy(blk.literals, src)
+		return
+	}
+
+	// Override src
+	src = e.hist
+	sLimit := int32(len(src)) - inputMargin
+	// stepSize is the number of bytes to skip on every main loop iteration.
+	// It should be >= 1.
+	stepSize := int32(e.o.targetLength)
+	if stepSize == 0 {
+		stepSize++
+	}
+
+	// TEMPLATE
+
+	const kSearchStrength = 8
+
+	// nextEmit is where in src the next emitLiteral should start from.
+	nextEmit := s
+	cv := load6432(src, s)
+	// nextHash is the hash at s
+	nextHashS := hash5(cv, dFastShortTableBits)
+	nextHashL := hash8(cv, dFastLongTableBits)
+
+	// Relative offsets
+	offset1 := int32(blk.recentOffsets[0])
+	offset2 := int32(blk.recentOffsets[1])
+
+	addLiterals := func(s *seq, until int32) {
+		if until == nextEmit {
+			return
+		}
+		blk.literals = append(blk.literals, src[nextEmit:until]...)
+		s.litLen = uint32(until - nextEmit)
+	}
+	if debug {
+		println("recent offsets:", blk.recentOffsets)
+	}
+
+encodeLoop:
+	for {
+		var t int32
+		// We allow the encoder to optionally turn off repeat offsets across blocks
+		canRepeat := len(blk.sequences) > 2
+
+		for {
+			if debug && canRepeat && offset1 == 0 {
+				panic("offset0 was 0")
+			}
+
+			nextHashS = nextHashS & dFastShortTableMask
+			nextHashL = nextHashL & dFastLongTableMask
+			candidateL := e.longTable[nextHashL]
+			candidateS := e.table[nextHashS]
+
+			const repOff = 1
+			repIndex := s - offset1 + repOff
+			entry := tableEntry{offset: s + e.cur, val: uint32(cv)}
+			e.longTable[nextHashL] = entry
+			e.table[nextHashS] = entry
+
+			if canRepeat {
+				if repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>(repOff*8)) {
+					// Consider history as well.
+					var seq seq
+					lenght := 4 + e.matchlen(s+4+repOff, repIndex+4, src)
+
+					seq.matchLen = uint32(lenght - zstdMinMatch)
+
+					// We might be able to match backwards.
+					// Extend as long as we can.
+					start := s + repOff
+					// We end the search early, so we don't risk 0 literals
+					// and have to do special offset treatment.
+					startLimit := nextEmit + 1
+
+					tMin := s - e.maxMatchOff
+					if tMin < 0 {
+						tMin = 0
+					}
+					for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch-1 {
+						repIndex--
+						start--
+						seq.matchLen++
+					}
+					addLiterals(&seq, start)
+
+					// rep 0
+					seq.offset = 1
+					if debugSequences {
+						println("repeat sequence", seq, "next s:", s)
+					}
+					blk.sequences = append(blk.sequences, seq)
+					s += lenght + repOff
+					nextEmit = s
+					if s >= sLimit {
+						if debug {
+							println("repeat ended", s, lenght)
+
+						}
+						break encodeLoop
+					}
+					cv = load6432(src, s)
+					nextHashS = hash5(cv, dFastShortTableBits)
+					nextHashL = hash8(cv, dFastLongTableBits)
+					continue
+				}
+				const repOff2 = 1
+				// We deviate from the reference encoder and also check offset 2.
+				// Slower and not consistently better, so disabled.
+				// repIndex = s - offset2 + repOff2
+				if false && repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>(repOff2*8)) {
+					// Consider history as well.
+					var seq seq
+					lenght := 4 + e.matchlen(s+4+repOff2, repIndex+4, src)
+
+					seq.matchLen = uint32(lenght - zstdMinMatch)
+
+					// We might be able to match backwards.
+					// Extend as long as we can.
+					start := s + repOff2
+					// We end the search early, so we don't risk 0 literals
+					// and have to do special offset treatment.
+					startLimit := nextEmit + 1
+
+					tMin := s - e.maxMatchOff
+					if tMin < 0 {
+						tMin = 0
+					}
+					for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch-1 {
+						repIndex--
+						start--
+						seq.matchLen++
+					}
+					addLiterals(&seq, start)
+
+					// rep 2
+					seq.offset = 2
+					if debugSequences {
+						println("repeat sequence 2", seq, "next s:", s)
+					}
+					blk.sequences = append(blk.sequences, seq)
+					s += lenght + repOff2
+					nextEmit = s
+					if s >= sLimit {
+						if debug {
+							println("repeat ended", s, lenght)
+
+						}
+						break encodeLoop
+					}
+					cv = load6432(src, s)
+					nextHashS = hash5(cv, dFastShortTableBits)
+					nextHashL = hash8(cv, dFastLongTableBits)
+					// Swap offsets
+					offset1, offset2 = offset2, offset1
+					continue
+				}
+			}
+			// Find the offsets of our two matches.
+			coffsetL := s - (candidateL.offset - e.cur)
+			coffsetS := s - (candidateS.offset - e.cur)
+
+			// Check if we have a long match.
+			if coffsetL < e.maxMatchOff && uint32(cv) == candidateL.val {
+				// Found a long match, likely at least 8 bytes.
+				// Reference encoder checks all 8 bytes, we only check 4,
+				// but the likelihood of both the first 4 bytes and the hash matching should be enough.
+				t = candidateL.offset - e.cur
+				if debug && s <= t {
+					panic("s <= t")
+				}
+				if debug && s-t > e.maxMatchOff {
+					panic("s - t >e.maxMatchOff")
+				}
+				if debug {
+					println("long match")
+				}
+				break
+			}
+
+			// Check if we have a short match.
+			if coffsetS < e.maxMatchOff && uint32(cv) == candidateS.val {
+				// found a regular match
+				// See if we can find a long match at s+1
+				const checkAt = 1
+				cv := load6432(src, s+checkAt)
+				nextHashL = hash8(cv, dFastLongTableBits)
+				candidateL = e.longTable[nextHashL]
+				coffsetL = s - (candidateL.offset - e.cur) + checkAt
+
+				// We can store it, since we have at least a 4 byte match.
+				e.longTable[nextHashL] = tableEntry{offset: s + checkAt + e.cur, val: uint32(cv)}
+				if coffsetL < e.maxMatchOff && uint32(cv) == candidateL.val {
+					// Found a long match, likely at least 8 bytes.
+					// Reference encoder checks all 8 bytes, we only check 4,
+					// but the likelihood of both the first 4 bytes and the hash matching should be enough.
+					t = candidateL.offset - e.cur
+					s += checkAt
+					if debug {
+						println("long match (after short)")
+					}
+					break
+				}
+
+				t = candidateS.offset - e.cur
+				if debug && s <= t {
+					panic("s <= t")
+				}
+				if debug && s-t > e.maxMatchOff {
+					panic("s - t >e.maxMatchOff")
+				}
+				if debug && t < 0 {
+					panic("t<0")
+				}
+				if debug {
+					println("short match")
+				}
+				break
+			}
+
+			// No match found, move forward in input.
+			s += stepSize + ((s - nextEmit) >> (kSearchStrength - 1))
+			if s >= sLimit {
+				break encodeLoop
+			}
+			cv = load6432(src, s)
+			nextHashS = hash5(cv, dFastShortTableBits)
+			nextHashL = hash8(cv, dFastLongTableBits)
+		}
+
+		// A 4-byte match has been found. Update recent offsets.
+		// We'll later see if more than 4 bytes.
+		offset2 = offset1
+		offset1 = s - t
+
+		if debug && s <= t {
+			panic("s <= t")
+		}
+
+		if debug && canRepeat && int(offset1) > len(src) {
+			panic("invalid offset")
+		}
+
+		// Extend the 4-byte match as long as possible.
+		l := e.matchlen(s+4, t+4, src) + 4
+
+		// Extend backwards
+		tMin := s - e.maxMatchOff
+		if tMin < 0 {
+			tMin = 0
+		}
+		for t > tMin && s > nextEmit && src[t-1] == src[s-1] && l < maxMatchLength {
+			s--
+			t--
+			l++
+		}
+
+		// Write our sequence
+		var seq seq
+		seq.litLen = uint32(s - nextEmit)
+		seq.matchLen = uint32(l - zstdMinMatch)
+		if seq.litLen > 0 {
+			blk.literals = append(blk.literals, src[nextEmit:s]...)
+		}
+		seq.offset = uint32(s-t) + 3
+		s += l
+		if debugSequences {
+			println("sequence", seq, "next s:", s)
+		}
+		blk.sequences = append(blk.sequences, seq)
+		nextEmit = s
+		if s >= sLimit {
+			break encodeLoop
+		}
+
+		// Index match start + 2 and end - 2
+		index0 := s - l + 2
+		index1 := s - 2
+		if l == 4 {
+			// if l is 4, we would check the same place twice, so index s-1 instead.
+			index1++
+		}
+
+		cv0 := load6432(src, index0)
+		cv1 := load6432(src, index1)
+		entry0 := tableEntry{offset: index0 + e.cur, val: uint32(cv0)}
+		entry1 := tableEntry{offset: index1 + e.cur, val: uint32(cv1)}
+		e.table[hash5(cv0, dFastShortTableBits)&dFastShortTableMask] = entry0
+		e.longTable[hash8(cv0, dFastLongTableBits)&dFastLongTableMask] = entry0
+		e.table[hash5(cv1, dFastShortTableBits)&dFastShortTableMask] = entry1
+		e.longTable[hash8(cv1, dFastLongTableBits)&dFastLongTableMask] = entry1
+
+		cv = load6432(src, s)
+		nextHashS = hash5(cv, dFastShortTableBits)
+		nextHashL = hash8(cv, dFastLongTableBits)
+
+		// Check offset 2
+		if o2 := s - offset2; canRepeat && o2 > 0 && load3232(src, o2) == uint32(cv) {
+			// We have at least 4 byte match.
+			// No need to check backwards. We come straight from a match
+			l := 4 + e.matchlen(s+4, o2+4, src)
+			// Store this, since we have it.
+			entry := tableEntry{offset: s + e.cur, val: uint32(cv)}
+			e.longTable[nextHashL&dFastLongTableMask] = entry
+			e.table[nextHashS&dFastShortTableMask] = entry
+			seq.matchLen = uint32(l) - zstdMinMatch
+			seq.litLen = 0
+			// Since litlen is always 0, this is offset 1.
+			seq.offset = 1
+			s += l
+			nextEmit = s
+			if debugSequences {
+				println("sequence", seq, "next s:", s)
+			}
+			blk.sequences = append(blk.sequences, seq)
+
+			// Swap offset 1 and 2.
+			offset1, offset2 = offset2, offset1
+			if s >= sLimit {
+				break encodeLoop
+			}
+			// Prepare next loop.
+			cv = load6432(src, s)
+			nextHashS = hash5(cv, dFastShortTableBits)
+			nextHashL = hash8(cv, dFastLongTableBits)
+		}
+	}
+
+	if int(nextEmit) < len(src) {
+		blk.literals = append(blk.literals, src[nextEmit:]...)
+		blk.extraLits = len(src) - int(nextEmit)
+	}
+	blk.recentOffsets[0] = uint32(offset1)
+	blk.recentOffsets[1] = uint32(offset2)
+	if debug {
+		println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
+	}
+}
--- a/Show More
+++ b/Show More
 @ -1 +1 @@
 .12.10
 .13.0