diff --git a/discovery/cached/disk/round_tripper.go b/discovery/cached/disk/round_tripper.go index bda2e5cf..06d82627 100644 --- a/discovery/cached/disk/round_tripper.go +++ b/discovery/cached/disk/round_tripper.go @@ -17,12 +17,15 @@ limitations under the License. package disk import ( + "encoding/binary" + "fmt" + "hash/crc32" + "hash/fnv" "net/http" "os" "path/filepath" "github.com/gregjones/httpcache" - "github.com/gregjones/httpcache/diskcache" "github.com/peterbourgon/diskv" "k8s.io/klog/v2" ) @@ -41,7 +44,7 @@ func newCacheRoundTripper(cacheDir string, rt http.RoundTripper) http.RoundTripp BasePath: cacheDir, TempDir: filepath.Join(cacheDir, ".diskv-temp"), }) - t := httpcache.NewTransport(diskcache.NewWithDiskv(d)) + t := httpcache.NewTransport(&crcDiskCache{disk: d}) t.Transport = rt return &cacheRoundTripper{rt: t} @@ -63,3 +66,54 @@ func (rt *cacheRoundTripper) CancelRequest(req *http.Request) { } func (rt *cacheRoundTripper) WrappedRoundTripper() http.RoundTripper { return rt.rt.Transport } + +// A crcDiskCache is a cache backend for github.com/gregjones/httpcache. It is +// similar to httpcache's diskcache package, but uses checksums to ensure cache +// integrity rather than fsyncing each cache entry in order to avoid performance +// degradation on MacOS. +// +// See https://github.com/kubernetes/kubernetes/issues/110753 for more. +type crcDiskCache struct { + disk *diskv.Diskv +} + +// Get the requested key from the cache on disk. If Get encounters an error, or +// the returned value is not a CRC-32 checksum followed by bytes with a matching +// checksum it will return false to indicate a cache miss. +func (c *crcDiskCache) Get(key string) ([]byte, bool) { + b, err := c.disk.Read(sanitize(key)) + if err != nil || len(b) < binary.MaxVarintLen32 { + return []byte{}, false + } + + response := b[binary.MaxVarintLen32:] + sum, _ := binary.Uvarint(b[:binary.MaxVarintLen32]) + if crc32.ChecksumIEEE(response) != uint32(sum) { + return []byte{}, false + } + + return response, true +} + +// Set writes the response to a file on disk. The filename will be the FNV-32a +// hash of the key. The file will contain the CRC-32 checksum of the response +// bytes, followed by said response bytes. +func (c *crcDiskCache) Set(key string, response []byte) { + sum := make([]byte, binary.MaxVarintLen32) + _ = binary.PutUvarint(sum, uint64(crc32.ChecksumIEEE(response))) + _ = c.disk.Write(sanitize(key), append(sum, response...)) // Nothing we can do with this error. +} + +func (c *crcDiskCache) Delete(key string) { + _ = c.disk.Erase(sanitize(key)) // Nothing we can do with this error. +} + +// Sanitize an httpcache key such that it can be used as a diskv key, which must +// be a valid filename. The httpcache key will either be the requested URL (if +// the request method was GET) or " " for other methods, per the +// httpcache.cacheKey function. +func sanitize(key string) string { + h := fnv.New32a() + _, _ = h.Write([]byte(key)) // Writing to a hash never returns an error. + return fmt.Sprintf("%X", h.Sum32()) +} diff --git a/discovery/cached/disk/round_tripper_test.go b/discovery/cached/disk/round_tripper_test.go index 4b1d1144..245d83b6 100644 --- a/discovery/cached/disk/round_tripper_test.go +++ b/discovery/cached/disk/round_tripper_test.go @@ -18,6 +18,8 @@ package disk import ( "bytes" + "encoding/binary" + "hash/crc32" "io/ioutil" "net/http" "net/url" @@ -25,7 +27,6 @@ import ( "path/filepath" "testing" - "github.com/gregjones/httpcache/diskcache" "github.com/peterbourgon/diskv" "github.com/stretchr/testify/assert" ) @@ -42,9 +43,6 @@ func (rt *testRoundTripper) RoundTrip(req *http.Request) (*http.Response, error) return rt.Response, rt.Err } -// NOTE(negz): We're adding a benchmark for an external dependency in order to -// prove that one that will be added in a subsequent commit improves write -// performance. func BenchmarkDiskCache(b *testing.B) { cacheDir, err := ioutil.TempDir("", "cache-rt") if err != nil { @@ -65,7 +63,7 @@ func BenchmarkDiskCache(b *testing.B) { b.Fatal(err) } - c := diskcache.NewWithDiskv(d) + c := crcDiskCache{disk: d} for n := 0; n < b.N; n++ { c.Set(k, v) @@ -179,3 +177,147 @@ func TestCacheRoundTripperPathPerm(t *testing.T) { }) assert.NoError(err) } + +func TestCRCDiskCache(t *testing.T) { + assert := assert.New(t) + + // Ensure that we'll return a cache miss if the backing file doesn't exist. + t.Run("NoSuchKey", func(t *testing.T) { + cacheDir, err := ioutil.TempDir("", "cache-crc") + if err != nil { + t.Fatal(err) + } + defer os.RemoveAll(cacheDir) + d := diskv.New(diskv.Options{BasePath: cacheDir, TempDir: filepath.Join(cacheDir, ".diskv-temp")}) + crc := &crcDiskCache{disk: d} + + key := "testing" + + got, ok := crc.Get(key) + assert.False(ok) + assert.Equal([]byte{}, got) + }) + + // Ensure that we'll return a cache miss if the backing file is empty. + t.Run("EmptyFile", func(t *testing.T) { + cacheDir, err := ioutil.TempDir("", "cache-crc") + if err != nil { + t.Fatal(err) + } + defer os.RemoveAll(cacheDir) + d := diskv.New(diskv.Options{BasePath: cacheDir, TempDir: filepath.Join(cacheDir, ".diskv-temp")}) + crc := &crcDiskCache{disk: d} + + key := "testing" + + f, err := os.Create(filepath.Join(cacheDir, sanitize(key))) + if err != nil { + t.Fatal(err) + } + f.Close() + + got, ok := crc.Get(key) + assert.False(ok) + assert.Equal([]byte{}, got) + }) + + // Ensure that we'll return a cache miss if the backing has an invalid + // checksum. + t.Run("InvalidChecksum", func(t *testing.T) { + cacheDir, err := ioutil.TempDir("", "cache-crc") + if err != nil { + t.Fatal(err) + } + defer os.RemoveAll(cacheDir) + d := diskv.New(diskv.Options{BasePath: cacheDir, TempDir: filepath.Join(cacheDir, ".diskv-temp")}) + crc := &crcDiskCache{disk: d} + + key := "testing" + value := []byte("testing") + mismatchedValue := []byte("testink") + sum := make([]byte, binary.MaxVarintLen32) + binary.PutUvarint(sum, uint64(crc32.ChecksumIEEE(value))) + + // Create a file with the checksum of 'value' followed by the bytes of + // 'mismatchedValue'. + f, err := os.Create(filepath.Join(cacheDir, sanitize(key))) + if err != nil { + t.Fatal(err) + } + f.Write(sum) + f.Write(mismatchedValue) + f.Close() + + // The mismatched checksum should result in a cache miss. + got, ok := crc.Get(key) + assert.False(ok) + assert.Equal([]byte{}, got) + }) + + // Ensure that our disk cache will happily cache over the top of an existing + // value. We depend on this behaviour to recover from corrupted cache + // entries. When Get detects a bad checksum it will return a cache miss. + // This should cause httpcache to fall back to its underlying transport and + // to subsequently cache the new value, overwriting the corrupt one. + t.Run("OverwriteExistingKey", func(t *testing.T) { + cacheDir, err := ioutil.TempDir("", "cache-crc") + if err != nil { + t.Fatal(err) + } + defer os.RemoveAll(cacheDir) + d := diskv.New(diskv.Options{BasePath: cacheDir, TempDir: filepath.Join(cacheDir, ".diskv-temp")}) + crc := &crcDiskCache{disk: d} + + key := "testing" + value := []byte("cool value!") + + // Write a value. + crc.Set(key, value) + got, ok := crc.Get(key) + + // Ensure we can read back what we wrote. + assert.True(ok) + assert.Equal(value, got) + + differentValue := []byte("I'm different!") + + // Write a different value. + crc.Set(key, differentValue) + got, ok = crc.Get(key) + + // Ensure we can read back the different value. + assert.True(ok) + assert.Equal(differentValue, got) + }) + + // Ensure that deleting a key does in fact delete it. + t.Run("DeleteKey", func(t *testing.T) { + cacheDir, err := ioutil.TempDir("", "cache-crc") + if err != nil { + t.Fatal(err) + } + defer os.RemoveAll(cacheDir) + d := diskv.New(diskv.Options{BasePath: cacheDir, TempDir: filepath.Join(cacheDir, ".diskv-temp")}) + crc := &crcDiskCache{disk: d} + + key := "testing" + value := []byte("coolValue") + + crc.Set(key, value) + + // Ensure we successfully set the value. + got, ok := crc.Get(key) + assert.True(ok) + assert.Equal(value, got) + + crc.Delete(key) + + // Ensure the value is gone. + got, ok = crc.Get(key) + assert.False(ok) + assert.Equal([]byte{}, got) + + // Ensure that deleting a non-existent value is a no-op. + crc.Delete(key) + }) +}