diff options
author | Mikkel Krautz <mikkel@krautz.dk> | 2013-02-05 02:36:45 +0400 |
---|---|---|
committer | Mikkel Krautz <mikkel@krautz.dk> | 2013-02-05 02:36:45 +0400 |
commit | 3dc3b25f57fa975a3c219aa171e1bf0a36917dc2 (patch) | |
tree | a3ccc3a2772e7b9ce68859a2e2ec2f029b8d2c4c /pkg | |
parent | 8effbbc6b39545f59cf5334059456ee5bff91184 (diff) |
pkg/blobstore: modernize the blobstore package.
Diffstat (limited to 'pkg')
-rw-r--r-- | pkg/blobstore/Makefile | 14 | ||||
-rw-r--r-- | pkg/blobstore/blobreader.go | 69 | ||||
-rw-r--r-- | pkg/blobstore/blobreader_test.go | 41 | ||||
-rw-r--r-- | pkg/blobstore/blobstore.go | 309 | ||||
-rw-r--r-- | pkg/blobstore/blobstore_test.go | 148 |
5 files changed, 211 insertions, 370 deletions
diff --git a/pkg/blobstore/Makefile b/pkg/blobstore/Makefile deleted file mode 100644 index 3eba753..0000000 --- a/pkg/blobstore/Makefile +++ /dev/null @@ -1,14 +0,0 @@ -include $(GOROOT)/src/Make.inc - -TARG = grumble/blobstore -GOFILES = \ - blobstore.go \ - blobreader.go - -ifeq ($(GOOS),windows) -GOFILES += lock_windows.go -else -GOFILES += lock_unix.go -endif - -include $(GOROOT)/src/Make.pkg diff --git a/pkg/blobstore/blobreader.go b/pkg/blobstore/blobreader.go index 52e71a6..6b3dc4f 100644 --- a/pkg/blobstore/blobreader.go +++ b/pkg/blobstore/blobreader.go @@ -1,4 +1,4 @@ -// Copyright (c) 2011 The Grumble Authors +// Copyright (c) 2011-2013 The Grumble Authors // The use of this source code is goverened by a BSD-style // license that can be found in the LICENSE-file. @@ -8,46 +8,77 @@ import ( "bytes" "crypto/sha1" "encoding/hex" - "errors" "hash" "io" ) -// blobReader is based on the principles of the checksumReader from the archive/zip -// package of the Go standard library. +// EOFHashMismatchError signals that a blobReader reached EOF, but that +// the calculated hash did not match the given blob key. This signals +// a successful read of the blob, but that the on-disk content is +// corrupted in some fashion. +type EOFHashMismatchError struct { + // Sum represents that was calculated during the read operation. + Sum []byte +} -// ErrHashMismatch is returned if a blobReader has read a file whose computed hash -// did not match its key. -var ErrHashMismatch = errors.New("hash mismatch") +func (hme EOFHashMismatchError) Error() string { + return "blobstore: EOF hash mismatch" +} -// blobReader reads a blob from disk, hashing all incoming data. On EOF, it checks -// whether the read data matches the key. +// blobReader implements an io.ReadCloser that reads a blob from disk +// and hashes all incoming data to ensure integrity. On EOF, it matches +// its calculated hash with the given blob key in order to detect data +// corruption. +// +// If a mismatch is detected on EOF, the blobReader will return +// the error ErrEOFHashMismatch instead of a regular io.EOF error. type blobReader struct { rc io.ReadCloser sum []byte hash hash.Hash } -func newBlobReader(rc io.ReadCloser, key string) (br *blobReader, err error) { +// newBlobReader returns a new blobReader reading from rc. +// The rc is expected to be a blobstore entry identified by +// the given key. (The blobstore is content addressible, and +// a blob's key represents the SHA1 of its content). +func newBlobReader(rc io.ReadCloser, key string) (*blobReader, error) { sum, err := hex.DecodeString(key) if err != nil { - return + return nil, err } return &blobReader{rc, sum, sha1.New()}, nil } -func (r *blobReader) Read(b []byte) (n int, err error) { - n, err = r.rc.Read(b) - r.hash.Write(b[:n]) +// Read implements the Read method of io.ReadCloser. +// This Read implementation passes on read calls to the +// wrapper io.ReadCloser and hashes all read content. +// When EOF is reached, the sum of the streaming hash +// hash is calculated and compared to the blob key given +// in newBlobReader. If the calculated hash does not match +// the blob key, the special error ErrEOFHashMismatch is +// returned to signal EOF, while also signalling a hash +// mismatch. +func (r *blobReader) Read(b []byte) (int, error) { + n, err := r.rc.Read(b) + _, werr := r.hash.Write(b[:n]) + if werr != nil { + return 0, werr + } if err != io.EOF { - return + return n, err } - if !bytes.Equal(r.sum, r.hash.Sum(nil)) { - err = ErrHashMismatch + // Match the calculated digest with the expected + // digest on EOF. + calcSum := r.hash.Sum(nil) + if !bytes.Equal(r.sum, calcSum) { + return 0, EOFHashMismatchError{Sum: calcSum} } - return + return n, io.EOF } +// Close implements the Close method of io.ReadCloser. +// This Close method simply closes the wrapped io.ReadCloser. func (r *blobReader) Close() error { return r.rc.Close() -} +}
\ No newline at end of file diff --git a/pkg/blobstore/blobreader_test.go b/pkg/blobstore/blobreader_test.go new file mode 100644 index 0000000..8b5702b --- /dev/null +++ b/pkg/blobstore/blobreader_test.go @@ -0,0 +1,41 @@ +// Copyright (c) 2013 The Grumble Authors +// The use of this source code is goverened by a BSD-style +// license that can be found in the LICENSE-file. + +package blobstore + +import ( + "bytes" + "io" + "io/ioutil" + "testing" +) + +type blobReaderTest struct { + Key string + ExpectedSum string + Data string +} + +var blobReaderTests = []blobReaderTest{ + { + Key: "a3da7877f94ad4cf58636a395fff77537cb8b919", + ExpectedSum: "a3da7877f94ad4cf58636a395fff77537cb8b919", + Data: "Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.", + }, +} + +func TestBlobReader(t *testing.T) { + for _, test := range blobReaderTests { + rc := ioutil.NopCloser(bytes.NewBufferString(test.Data)) + br, err := newBlobReader(rc, test.Key) + if err != nil { + t.Errorf("unable to construct blob reader: %v", err) + continue + } + _, err = io.Copy(ioutil.Discard, br) + if err != nil { + t.Errorf("got error: %v", err) + } + } +}
\ No newline at end of file diff --git a/pkg/blobstore/blobstore.go b/pkg/blobstore/blobstore.go index f7bf0f4..a53c61c 100644 --- a/pkg/blobstore/blobstore.go +++ b/pkg/blobstore/blobstore.go @@ -2,8 +2,7 @@ // The use of this source code is goverened by a BSD-style // license that can be found in the LICENSE-file. -// This package implements a simple disk-persisted content-addressed -// blobstore. +// This package implements a simple disk-persisted content-addressed blobstore. package blobstore import ( @@ -13,263 +12,179 @@ import ( "io/ioutil" "os" "path/filepath" - "sync" ) -type BlobStore struct { - dir string -} - var ( - ErrBadFile = errors.New("a bad file exists in the blobstore directory. unable to create container directores.") - ErrNoSuchKey = errors.New("no such key") - ErrInvalidKey = errors.New("invalid key") -) + // ErrNoSuchKey signals that a blob with the given key does + // not exist in the BlobStore. + ErrNoSuchKey = errors.New("blobstore: no such key") -var ( - defaultStore *BlobStore - defaultMutex sync.Mutex + // ErrBadKey signals that the given key is not well formed. + ErrBadKey = errors.New("blobstore: bad key") ) -// Open an existing, or create a new BlobStore at path. -// Path must point to a directory, and must already exist. -// See NewBlobStore for more information. -func Open(path string) (err error) { - defaultMutex.Lock() - defer defaultMutex.Unlock() - - if defaultStore != nil { - panic("Default BlobStore already open") - } - - defaultStore, err = NewBlobStore(path) - return -} - -// Close the open default BlobStore. This removes the lockfile allowing -// other processes to open the BlobStore. -func Close() (err error) { - if defaultStore == nil { - panic("DefaultStore not open") - } - - err = defaultStore.Close() - if err != nil { - return - } - - defaultStore = nil - return -} - -// Lookup a blob by its key and return a buffer containing the contents -// of the blob. -func Get(key string) (buf []byte, err error) { - return defaultStore.Get(key) -} - -// Store a blob. If the blob was successfully stored, the returned key -// can be used to retrieve the buf from the BlobStore. -func Put(buf []byte) (key string, err error) { - return defaultStore.Put(buf) -} - -// Open an existing, or create a new BlobStore residing at path. -// Path must point to a directory, and must already exist. -func NewBlobStore(path string) (bs *BlobStore, err error) { - // Does the directory exist? - dir, err := os.Open(path) - if err != nil { - return - } - dir.Close() - - dirStructureExists := true - // Check whether a 'blobstore' file exists in the directory. - // The existence of the file signals that the directory already - // has the correct hierarchy structure. - bsf, err := os.Open(filepath.Join(path, "blobstore")) - if err != nil { - if os.IsNotExist(err) { - dirStructureExists = false - } - } else { - bsf.Close() - } - - if !dirStructureExists { - for i := 0; i < 256; i++ { - outer := filepath.Join(path, hex.EncodeToString([]byte{byte(i)})) - err = os.Mkdir(outer, 0700) - if os.IsExist(err) { - // The path already exists. Stat it to check whether it is indeed - // a directory. - fi, err := os.Stat(outer) - if err != nil { - return nil, err - } - if !fi.IsDir() { - return nil, ErrBadFile - } - } else if err != nil { - return nil, err - } - for j := 0; j < 256; j++ { - inner := filepath.Join(outer, hex.EncodeToString([]byte{byte(j)})) - err = os.Mkdir(inner, 0700) - if os.IsExist(err) { - // The path already exists. Stat it to check whether it is indeed - // a directory. - fi, err := os.Stat(outer) - if err != nil { - return nil, err - } - if !fi.IsDir() { - return nil, ErrBadFile - } - } else if err != nil { - return nil, err - } - } - } - - // Add a blobstore file to signal that a correct directory - // structure exists for this blobstore. - bsf, err = os.Create(filepath.Join(path, "blobstore")) - if err != nil { - return nil, err - } - bsf.Close() - } - - bs = &BlobStore{ - dir: path, - } - return bs, nil +// BlobStore represents a simple disk-persisted content addressible +// blob store that uses the file system for persistence. +// +// Blobs in the blobstore are indexed by their SHA1 hash. +// +// The BlobStore is backed by a directory on the filesystem. This +// directory contains subdirectories which contain keys (SHA1 hashes). +// Each subdirectory is named according to the first hex-encoded byte +// of the keys that subdirectory contains. +// +// For example, a file that has the content 'hello world' will have +// the SHA1 hash '2aae6c35c94fcfb415dbe95f408b9ce91ee846ed'. If our +// blobstore's backing directory is called 'blobstore', the blob with +// only 'hello world' in it will be stored as follows: +// +// blobstore/2a/2aae6c35c94fcfb415dbe95f408b9ce91ee846ed +// +// The BlobStore is self-synchronizing, relying on the filesystem +// operations to ensure atomicity. Thus, accessing a single BlobStore +// from multiple goroutines should have no ill side effects. +type BlobStore struct { + dir string } -// Close an open BlobStore. This removes the lockfile allowing -// other processes to open the BlobStore. -func (bs *BlobStore) Close() (err error) { - return nil +// Open opens an existing BlobStore. The path parameter must +// point to a directory that already exists for correct +// operation, however, the Open function does not check that +// this is the case. +func Open(path string) BlobStore { + return BlobStore{dir: path} } -// Checks that a given key is a valid key for the BlobStore. -// If it is, it returns the three components that make up the on-disk path -// the given key can be found or should be stored at. -func getKeyComponents(key string) (dir1, dir2, fn string, err error) { - // SHA1 digests are 40 bytes long when hex-encoded +// isValidKey checks whether key is a valid BlobStore key. +func isValidKey(key string) bool { + // SHA1 digests are 40 bytes long when hex-encoded. if len(key) != 40 { - err = ErrInvalidKey - return + return false } + // Check whether the string is valid hex-encoding. - _, err = hex.DecodeString(key) + _, err := hex.DecodeString(key) if err != nil { - err = ErrInvalidKey - return + return false } - return key[0:2], key[2:4], key[4:], nil + return true } -// Lookup the path hat a key would have on disk. -// Returns an error if the key is not a valid BlobStore key. -func (bs *BlobStore) pathForKey(key string) (fn string, err error) { - dir1, dir2, rest, err := getKeyComponents(key) - if err != nil { - return +// extractKeyComponents returns the directory and the filename that the +// blob identified by key should be stored under in the BlobStore. +// This function also checks whether the key is valid. If not, it returns +// ErrBadKey. +func extractKeyComponents(key string) (dir string, fn string, err error) { + if !isValidKey(key) { + return "", "", ErrBadKey } - - fn = filepath.Join(bs.dir, dir1, dir2, rest) - return + return key[0:2], key, nil } -// Lookup a blob by its key and return a buffer containing the contents -// of the blob. -func (bs *BlobStore) Get(key string) (buf []byte, err error) { - fn, err := bs.pathForKey(key) +// Get returns a byte slice containing the contents of +// the blob identified by key. If no such blob is found, +// Get returns ErrNoSuchKey. +func (bs BlobStore) Get(key string) ([]byte, error) { + dir, fn, err := extractKeyComponents(key) if err != nil { - return + return nil, err } - file, err := os.Open(fn) + blobfn := filepath.Join(bs.dir, dir, fn) + f, err := os.Open(blobfn) if os.IsNotExist(err) { - err = ErrNoSuchKey - return + return nil, ErrNoSuchKey } else if err != nil { - return + return nil, err } - br, err := newBlobReader(file, key) + br, err := newBlobReader(f, key) if err != nil { - file.Close() - return + f.Close() + return nil, err } defer br.Close() - buf, err = ioutil.ReadAll(br) + buf, err := ioutil.ReadAll(br) if err != nil { - return + return nil, err } - return + return buf, nil } -// Store a blob. If the blob was successfully stored, the returned key -// can be used to retrieve the buf from the BlobStore. -func (bs *BlobStore) Put(buf []byte) (key string, err error) { +// Put puts the contents of blob into the BlobStore. If +// the blob was successfully stored, the returned key can +// be used to retrieve the buf from the BlobStore at a +// later time. +func (bs BlobStore) Put(buf []byte) (key string, err error) { // Calculate the key for the blob. We can't really delay it more than this, // since we need to know the key for the blob to check whether it's already on // disk. h := sha1.New() - h.Write(buf) + _, err = h.Write(buf) + if err != nil { + return "", err + } key = hex.EncodeToString(h.Sum(nil)) // Get the components that make up the on-disk // path for the blob. - dir1, dir2, rest, err := getKeyComponents(key) + dir, fn, err := extractKeyComponents(key) if err != nil { - return + return "", err } - blobpath := filepath.Join(bs.dir, dir1, dir2, rest) - blobdir := filepath.Join(bs.dir, dir1, dir2) + blobdir := filepath.Join(bs.dir, dir) + blobpath := filepath.Join(blobdir, fn) // Check if the blob already exists. - file, err := os.Open(blobpath) + _, err = os.Stat(blobpath) if err == nil { - // File exists. Job's done. - file.Close() - return - } else { - if os.IsNotExist(err) { - // No such file exists on disk. Ready to rock! - } else { - return - } + // The file already exists. Our job is done. + return key, nil + } else if os.IsNotExist(err) { + // The blob does not exist on disk yet. + // Fallthrough. + } else if err != nil { + return "", err } - // Create a temporary file to write to. Once we're done, we - // can atomically rename the file to the correct key. - file, err = ioutil.TempFile(blobdir, rest) - if err != nil { - return + // Ensure that blobdir exist. + err = os.Mkdir(blobdir, 0750) + if err != nil && !os.IsExist(err) { + return "", err } - tmpfn := file.Name() + // Create a temporary file to write to. + // + // Once we're done, we can atomically rename the file + // to the correct key. + // + // This method is racy: two callers can attempt to write + // the same blob at the same time. This shouldn't affect + // the consistency of the final blob, but worst case, we've + // done some extra work. + f, err := ioutil.TempFile(blobdir, fn) + if err != nil { + return "", err + } - _, err = file.Write(buf) + tmpfn := f.Name() + _, err = f.Write(buf) if err != nil { - return + f.Close() + return "", err } - err = file.Sync() + err = f.Sync() if err != nil { + f.Close() return "", err } - err = file.Close() + err = f.Close() if err != nil { return "", err } @@ -277,7 +192,7 @@ func (bs *BlobStore) Put(buf []byte) (key string, err error) { err = os.Rename(tmpfn, blobpath) if err != nil { os.Remove(tmpfn) - return + return "", err } return key, nil diff --git a/pkg/blobstore/blobstore_test.go b/pkg/blobstore/blobstore_test.go index 030e0d0..f643c58 100644 --- a/pkg/blobstore/blobstore_test.go +++ b/pkg/blobstore/blobstore_test.go @@ -10,91 +10,9 @@ import ( "encoding/hex" "io/ioutil" "os" - "path/filepath" "testing" ) -func TestMakeAllCreateAll(t *testing.T) { - dir, err := ioutil.TempDir("", "blobstore") - if err != nil { - t.Error(err) - return - } - defer os.RemoveAll(dir) - - bs, err := NewBlobStore(dir) - if err != nil { - t.Error(err) - return - } - defer bs.Close() - - // Check whether the blobstore created all the directories... - for i := 0; i < 256; i++ { - for j := 0; j < 256; j++ { - dirname := filepath.Join(dir, hex.EncodeToString([]byte{byte(i)}), hex.EncodeToString([]byte{byte(j)})) - fi, err := os.Stat(dirname) - if err != nil { - t.Fatal(err) - } - if !fi.IsDir() { - t.Errorf("Not a directory") - } - } - } -} - -func TestAllInvalidFiles(t *testing.T) { - dir, err := ioutil.TempDir("", "blobstore") - if err != nil { - t.Error(err) - return - } - defer os.RemoveAll(dir) - - err = ioutil.WriteFile(filepath.Join(dir, "00"), []byte{0x0f, 0x00}, 0600) - if err != nil { - t.Error(err) - } - - _, err = NewBlobStore(dir) - if err == ErrBadFile { - // Success - } else if err != nil { - t.Error(err) - } else { - t.Error("NewBlobStore returned without error") - } -} - -func TestAllInvalidFilesLevel2(t *testing.T) { - dir, err := ioutil.TempDir("", "blobstore") - if err != nil { - t.Error(err) - return - } - defer os.RemoveAll(dir) - - err = os.Mkdir(filepath.Join(dir, "00"), 0700) - if err != nil { - t.Error(err) - } - - err = ioutil.WriteFile(filepath.Join(dir, "00", "00"), []byte{0x0f, 0x00}, 0600) - if err != nil { - t.Error(err) - } - - _, err = NewBlobStore(dir) - if err == ErrBadFile { - // Success - } else if err != nil { - t.Error(err) - } else { - t.Error("NewBlobStore returned without error") - } -} - func TestStoreRetrieve(t *testing.T) { dir, err := ioutil.TempDir("", "blobstore") if err != nil { @@ -103,12 +21,7 @@ func TestStoreRetrieve(t *testing.T) { } defer os.RemoveAll(dir) - bs, err := NewBlobStore(dir) - if err != nil { - t.Error(err) - return - } - defer bs.Close() + bs := OpenBlobStore(dir) data := []byte{0xde, 0xad, 0xca, 0xfe, 0xba, 0xbe, 0xbe, 0xef} @@ -136,12 +49,7 @@ func TestReadNonExistantKey(t *testing.T) { } defer os.RemoveAll(dir) - bs, err := NewBlobStore(dir) - if err != nil { - t.Error(err) - return - } - defer bs.Close() + bs := OpenBlobStore(dir) h := sha1.New() h.Write([]byte{0x42}) @@ -160,12 +68,7 @@ func TestReadInvalidKeyLength(t *testing.T) { } defer os.RemoveAll(dir) - bs, err := NewBlobStore(dir) - if err != nil { - t.Error(err) - return - } - defer bs.Close() + bs := OpenBlobStore(dir) key := "" for i := 0; i < 5; i++ { @@ -173,13 +76,13 @@ func TestReadInvalidKeyLength(t *testing.T) { } _, err = bs.Get(key) - if err != ErrInvalidKey { + if err != ErrBadKey { t.Error("Expected invalid key for %v, got %v", key, err) return } } -func TestReadInvalidKeyNonHex(t *testing.T) { +func TestReadBadKeyNonHex(t *testing.T) { dir, err := ioutil.TempDir("", "blobstore") if err != nil { t.Error(err) @@ -187,12 +90,7 @@ func TestReadInvalidKeyNonHex(t *testing.T) { } defer os.RemoveAll(dir) - bs, err := NewBlobStore(dir) - if err != nil { - t.Error(err) - return - } - defer bs.Close() + bs := OpenBlobStore(dir) key := "" for i := 0; i < 40; i++ { @@ -200,38 +98,8 @@ func TestReadInvalidKeyNonHex(t *testing.T) { } _, err = bs.Get(key) - if err != ErrInvalidKey { - t.Errorf("Expected invalid key for %v, got %v", key, err) + if err != ErrBadKey { + t.Errorf("Expected bad key for %v, got %v", key, err) return } } - -func TestDefaultBlobStore(t *testing.T) { - dir, err := ioutil.TempDir("", "blobstore") - if err != nil { - t.Error(err) - return - } - defer os.RemoveAll(dir) - - err = Open(dir) - if err != nil { - t.Error(err) - } - - data := []byte{0xf, 0x0, 0x0, 0xb, 0xa, 0xf} - - key, err := Put(data) - if err != nil { - t.Error(err) - } - - fetchedData, err := Get(key) - if err != nil { - t.Error(err) - } - - if !bytes.Equal(fetchedData, data) { - t.Errorf("stored data and retrieved data does not match: %v vs. %v", fetchedData, data) - } -} |