Merge pull request #110851 from negz/more-like-mac-slow-s

Use checksums instead of fsyncs to avoid slow discovery caching on MacOS
This commit is contained in:
Kubernetes Prow Robot 2022-07-29 18:11:12 -07:00 committed by GitHub
commit 8cecf183a6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 231 additions and 64 deletions

View File

@ -17,12 +17,14 @@ limitations under the License.
package disk
import (
"bytes"
"crypto/sha256"
"fmt"
"net/http"
"os"
"path/filepath"
"github.com/gregjones/httpcache"
"github.com/gregjones/httpcache/diskcache"
"github.com/peterbourgon/diskv"
"k8s.io/klog/v2"
)
@ -41,7 +43,7 @@ func newCacheRoundTripper(cacheDir string, rt http.RoundTripper) http.RoundTripp
BasePath: cacheDir,
TempDir: filepath.Join(cacheDir, ".diskv-temp"),
})
t := httpcache.NewTransport(diskcache.NewWithDiskv(d))
t := httpcache.NewTransport(&sumDiskCache{disk: d})
t.Transport = rt
return &cacheRoundTripper{rt: t}
@ -63,3 +65,56 @@ func (rt *cacheRoundTripper) CancelRequest(req *http.Request) {
}
func (rt *cacheRoundTripper) WrappedRoundTripper() http.RoundTripper { return rt.rt.Transport }
// A sumDiskCache is a cache backend for github.com/gregjones/httpcache. It is
// similar to httpcache's diskcache package, but uses SHA256 sums to ensure
// cache integrity at read time rather than fsyncing each cache entry to
// increase the likelihood they will be persisted at write time. This avoids
// significant performance degradation on MacOS.
//
// See https://github.com/kubernetes/kubernetes/issues/110753 for more.
type sumDiskCache struct {
disk *diskv.Diskv
}
// Get the requested key from the cache on disk. If Get encounters an error, or
// the returned value is not a SHA256 sum followed by bytes with a matching
// checksum it will return false to indicate a cache miss.
func (c *sumDiskCache) Get(key string) ([]byte, bool) {
b, err := c.disk.Read(sanitize(key))
if err != nil || len(b) < sha256.Size {
return []byte{}, false
}
response := b[sha256.Size:]
want := b[:sha256.Size] // The first 32 bytes of the file should be the SHA256 sum.
got := sha256.Sum256(response)
if !bytes.Equal(want, got[:]) {
return []byte{}, false
}
return response, true
}
// Set writes the response to a file on disk. The filename will be the SHA256
// sum of the key. The file will contain a SHA256 sum of the response bytes,
// followed by said response bytes.
func (c *sumDiskCache) Set(key string, response []byte) {
s := sha256.Sum256(response)
_ = c.disk.Write(sanitize(key), append(s[:], response...)) // Nothing we can do with this error.
}
func (c *sumDiskCache) Delete(key string) {
_ = c.disk.Erase(sanitize(key)) // Nothing we can do with this error.
}
// Sanitize an httpcache key such that it can be used as a diskv key, which must
// be a valid filename. The httpcache key will either be the requested URL (if
// the request method was GET) or "<method> <url>" for other methods, per the
// httpcache.cacheKey function.
func sanitize(key string) string {
// These keys are not sensitive. We use sha256 to avoid a (potentially
// malicious) collision causing the wrong cache data to be written or
// accessed.
return fmt.Sprintf("%x", sha256.Sum256([]byte(key)))
}

View File

@ -18,6 +18,7 @@ package disk
import (
"bytes"
"crypto/sha256"
"io/ioutil"
"net/http"
"net/url"
@ -25,6 +26,7 @@ import (
"path/filepath"
"testing"
"github.com/peterbourgon/diskv"
"github.com/stretchr/testify/assert"
)
@ -40,6 +42,35 @@ func (rt *testRoundTripper) RoundTrip(req *http.Request) (*http.Response, error)
return rt.Response, rt.Err
}
func BenchmarkDiskCache(b *testing.B) {
cacheDir, err := ioutil.TempDir("", "cache-rt")
if err != nil {
b.Fatal(err)
}
defer os.RemoveAll(cacheDir)
d := diskv.New(diskv.Options{
PathPerm: os.FileMode(0750),
FilePerm: os.FileMode(0660),
BasePath: cacheDir,
TempDir: filepath.Join(cacheDir, ".diskv-temp"),
})
k := "localhost:8080/apis/batch/v1.json"
v, err := ioutil.ReadFile("../../testdata/apis/batch/v1.json")
if err != nil {
b.Fatal(err)
}
c := sumDiskCache{disk: d}
for n := 0; n < b.N; n++ {
c.Set(k, v)
c.Get(k)
c.Delete(k)
}
}
func TestCacheRoundTripper(t *testing.T) {
rt := &testRoundTripper{}
cacheDir, err := ioutil.TempDir("", "cache-rt")
@ -145,3 +176,146 @@ func TestCacheRoundTripperPathPerm(t *testing.T) {
})
assert.NoError(err)
}
func TestSumDiskCache(t *testing.T) {
assert := assert.New(t)
// Ensure that we'll return a cache miss if the backing file doesn't exist.
t.Run("NoSuchKey", func(t *testing.T) {
cacheDir, err := ioutil.TempDir("", "cache-test")
if err != nil {
t.Fatal(err)
}
defer os.RemoveAll(cacheDir)
d := diskv.New(diskv.Options{BasePath: cacheDir, TempDir: filepath.Join(cacheDir, ".diskv-temp")})
c := &sumDiskCache{disk: d}
key := "testing"
got, ok := c.Get(key)
assert.False(ok)
assert.Equal([]byte{}, got)
})
// Ensure that we'll return a cache miss if the backing file is empty.
t.Run("EmptyFile", func(t *testing.T) {
cacheDir, err := ioutil.TempDir("", "cache-test")
if err != nil {
t.Fatal(err)
}
defer os.RemoveAll(cacheDir)
d := diskv.New(diskv.Options{BasePath: cacheDir, TempDir: filepath.Join(cacheDir, ".diskv-temp")})
c := &sumDiskCache{disk: d}
key := "testing"
f, err := os.Create(filepath.Join(cacheDir, sanitize(key)))
if err != nil {
t.Fatal(err)
}
f.Close()
got, ok := c.Get(key)
assert.False(ok)
assert.Equal([]byte{}, got)
})
// Ensure that we'll return a cache miss if the backing has an invalid
// checksum.
t.Run("InvalidChecksum", func(t *testing.T) {
cacheDir, err := ioutil.TempDir("", "cache-test")
if err != nil {
t.Fatal(err)
}
defer os.RemoveAll(cacheDir)
d := diskv.New(diskv.Options{BasePath: cacheDir, TempDir: filepath.Join(cacheDir, ".diskv-temp")})
c := &sumDiskCache{disk: d}
key := "testing"
value := []byte("testing")
mismatchedValue := []byte("testink")
sum := sha256.Sum256(value)
// Create a file with the sum of 'value' followed by the bytes of
// 'mismatchedValue'.
f, err := os.Create(filepath.Join(cacheDir, sanitize(key)))
if err != nil {
t.Fatal(err)
}
f.Write(sum[:])
f.Write(mismatchedValue)
f.Close()
// The mismatched checksum should result in a cache miss.
got, ok := c.Get(key)
assert.False(ok)
assert.Equal([]byte{}, got)
})
// Ensure that our disk cache will happily cache over the top of an existing
// value. We depend on this behaviour to recover from corrupted cache
// entries. When Get detects a bad checksum it will return a cache miss.
// This should cause httpcache to fall back to its underlying transport and
// to subsequently cache the new value, overwriting the corrupt one.
t.Run("OverwriteExistingKey", func(t *testing.T) {
cacheDir, err := ioutil.TempDir("", "cache-test")
if err != nil {
t.Fatal(err)
}
defer os.RemoveAll(cacheDir)
d := diskv.New(diskv.Options{BasePath: cacheDir, TempDir: filepath.Join(cacheDir, ".diskv-temp")})
c := &sumDiskCache{disk: d}
key := "testing"
value := []byte("cool value!")
// Write a value.
c.Set(key, value)
got, ok := c.Get(key)
// Ensure we can read back what we wrote.
assert.True(ok)
assert.Equal(value, got)
differentValue := []byte("I'm different!")
// Write a different value.
c.Set(key, differentValue)
got, ok = c.Get(key)
// Ensure we can read back the different value.
assert.True(ok)
assert.Equal(differentValue, got)
})
// Ensure that deleting a key does in fact delete it.
t.Run("DeleteKey", func(t *testing.T) {
cacheDir, err := ioutil.TempDir("", "cache-test")
if err != nil {
t.Fatal(err)
}
defer os.RemoveAll(cacheDir)
d := diskv.New(diskv.Options{BasePath: cacheDir, TempDir: filepath.Join(cacheDir, ".diskv-temp")})
c := &sumDiskCache{disk: d}
key := "testing"
value := []byte("coolValue")
c.Set(key, value)
// Ensure we successfully set the value.
got, ok := c.Get(key)
assert.True(ok)
assert.Equal(value, got)
c.Delete(key)
// Ensure the value is gone.
got, ok = c.Get(key)
assert.False(ok)
assert.Equal([]byte{}, got)
// Ensure that deleting a non-existent value is a no-op.
c.Delete(key)
})
}

View File

@ -1,61 +0,0 @@
// Package diskcache provides an implementation of httpcache.Cache that uses the diskv package
// to supplement an in-memory map with persistent storage
//
package diskcache
import (
"bytes"
"crypto/md5"
"encoding/hex"
"github.com/peterbourgon/diskv"
"io"
)
// Cache is an implementation of httpcache.Cache that supplements the in-memory map with persistent storage
type Cache struct {
d *diskv.Diskv
}
// Get returns the response corresponding to key if present
func (c *Cache) Get(key string) (resp []byte, ok bool) {
key = keyToFilename(key)
resp, err := c.d.Read(key)
if err != nil {
return []byte{}, false
}
return resp, true
}
// Set saves a response to the cache as key
func (c *Cache) Set(key string, resp []byte) {
key = keyToFilename(key)
c.d.WriteStream(key, bytes.NewReader(resp), true)
}
// Delete removes the response with key from the cache
func (c *Cache) Delete(key string) {
key = keyToFilename(key)
c.d.Erase(key)
}
func keyToFilename(key string) string {
h := md5.New()
io.WriteString(h, key)
return hex.EncodeToString(h.Sum(nil))
}
// New returns a new Cache that will store files in basePath
func New(basePath string) *Cache {
return &Cache{
d: diskv.New(diskv.Options{
BasePath: basePath,
CacheSizeMax: 100 * 1024 * 1024, // 100MB
}),
}
}
// NewWithDiskv returns a new Cache using the provided Diskv as underlying
// storage.
func NewWithDiskv(d *diskv.Diskv) *Cache {
return &Cache{d}
}

1
vendor/modules.txt vendored
View File

@ -507,7 +507,6 @@ github.com/gorilla/websocket
# github.com/gregjones/httpcache v0.0.0-20180305231024-9cad4c3443a7 => github.com/gregjones/httpcache v0.0.0-20180305231024-9cad4c3443a7
## explicit
github.com/gregjones/httpcache
github.com/gregjones/httpcache/diskcache
# github.com/grpc-ecosystem/go-grpc-middleware v1.3.0 => github.com/grpc-ecosystem/go-grpc-middleware v1.3.0
## explicit; go 1.14
github.com/grpc-ecosystem/go-grpc-middleware