diff options
author | Christopher Haster <chaster@utexas.edu> | 2018-08-09 17:06:17 +0300 |
---|---|---|
committer | Christopher Haster <chaster@utexas.edu> | 2018-10-18 17:55:47 +0300 |
commit | 126ef8b07fc5f111fa2e3b4076441471dcd2e40e (patch) | |
tree | 19e99f8adb682f5480a0c018eef0da4b12f27fcf | |
parent | e4a0d586d5b38f092bd0c44ccece8659c6f4a025 (diff) |
Added allocation randomization for dynamic wear-leveling
This implements the second step of full dynamic wear-leveling, block
allocation randomization. This is the key part the uniformly distributes
wear across the filesystem, even through reboots.
The entropy actually comes from the filesystem itself, by xoring
together all of the CRCs in the metadata-pairs on the filesystem. While
this sounds like a ridiculous operation, it's easy to do when we already
scan the metadata-pairs at mount time.
This gives us a random number we can use for block allocation.
Unfortunately it's not a great general purpose random generator as the
output only changes every filesystem write. Fortunately that's exactly
when we need our allocator.
---
Additionally, the randomization created a mess for the testing
framework. Fortunately, this method of randomization is deterministic.
A very useful property for reproducing bugs.
-rw-r--r-- | emubd/lfs_emubd.c | 104 | ||||
-rw-r--r-- | emubd/lfs_emubd.h | 4 | ||||
-rw-r--r-- | lfs.c | 15 | ||||
-rw-r--r-- | lfs.h | 1 | ||||
-rwxr-xr-x | tests/corrupt.py | 66 | ||||
-rwxr-xr-x | tests/debug.py | 98 | ||||
-rwxr-xr-x | tests/stats.py | 4 | ||||
-rwxr-xr-x | tests/test_corrupt.sh | 13 | ||||
-rwxr-xr-x | tests/test_move.sh | 10 | ||||
-rwxr-xr-x | tests/test_orphan.sh | 2 |
10 files changed, 254 insertions, 63 deletions
diff --git a/emubd/lfs_emubd.c b/emubd/lfs_emubd.c index 682ad92..de63057 100644 --- a/emubd/lfs_emubd.c +++ b/emubd/lfs_emubd.c @@ -19,6 +19,40 @@ #include <inttypes.h> +// Emulated block device utils +static inline void lfs_emubd_tole32(lfs_emubd_t *emu) { + emu->cfg.read_size = lfs_tole32(emu->cfg.read_size); + emu->cfg.prog_size = lfs_tole32(emu->cfg.prog_size); + emu->cfg.block_size = lfs_tole32(emu->cfg.block_size); + emu->cfg.block_count = lfs_tole32(emu->cfg.block_count); + + emu->stats.read_count = lfs_tole32(emu->stats.read_count); + emu->stats.prog_count = lfs_tole32(emu->stats.prog_count); + emu->stats.erase_count = lfs_tole32(emu->stats.erase_count); + + for (int i = 0; i < sizeof(emu->history.blocks) / + sizeof(emu->history.blocks[0]); i++) { + emu->history.blocks[i] = lfs_tole32(emu->history.blocks[i]); + } +} + +static inline void lfs_emubd_fromle32(lfs_emubd_t *emu) { + emu->cfg.read_size = lfs_fromle32(emu->cfg.read_size); + emu->cfg.prog_size = lfs_fromle32(emu->cfg.prog_size); + emu->cfg.block_size = lfs_fromle32(emu->cfg.block_size); + emu->cfg.block_count = lfs_fromle32(emu->cfg.block_count); + + emu->stats.read_count = lfs_fromle32(emu->stats.read_count); + emu->stats.prog_count = lfs_fromle32(emu->stats.prog_count); + emu->stats.erase_count = lfs_fromle32(emu->stats.erase_count); + + for (int i = 0; i < sizeof(emu->history.blocks) / + sizeof(emu->history.blocks[0]); i++) { + emu->history.blocks[i] = lfs_fromle32(emu->history.blocks[i]); + } +} + + // Block device emulated on existing filesystem int lfs_emubd_create(const struct lfs_config *cfg, const char *path) { lfs_emubd_t *emu = cfg->context; @@ -46,20 +80,39 @@ int lfs_emubd_create(const struct lfs_config *cfg, const char *path) { } // Load stats to continue incrementing - snprintf(emu->child, LFS_NAME_MAX, "stats"); + snprintf(emu->child, LFS_NAME_MAX, ".stats"); FILE *f = fopen(emu->path, "r"); if (!f) { - return -errno; - } + memset(&emu->stats, 0, sizeof(emu->stats)); + } else { + size_t res = fread(&emu->stats, sizeof(emu->stats), 1, f); + lfs_emubd_fromle32(emu); + if (res < 1) { + return -errno; + } - size_t res = fread(&emu->stats, sizeof(emu->stats), 1, f); - if (res < 1) { - return -errno; + err = fclose(f); + if (err) { + return -errno; + } } - err = fclose(f); - if (err) { - return -errno; + // Load history + snprintf(emu->child, LFS_NAME_MAX, ".history"); + f = fopen(emu->path, "r"); + if (!f) { + memset(&emu->history, 0, sizeof(emu->history)); + } else { + size_t res = fread(&emu->history, sizeof(emu->history), 1, f); + lfs_emubd_fromle32(emu); + if (res < 1) { + return -errno; + } + + err = fclose(f); + if (err) { + return -errno; + } } return 0; @@ -161,6 +214,13 @@ int lfs_emubd_prog(const struct lfs_config *cfg, lfs_block_t block, return -errno; } + // update history and stats + if (block != emu->history.blocks[0]) { + memcpy(&emu->history.blocks[1], &emu->history.blocks[0], + sizeof(emu->history) - sizeof(emu->history.blocks[0])); + emu->history.blocks[0] = block; + } + emu->stats.prog_count += 1; return 0; } @@ -206,13 +266,15 @@ int lfs_emubd_sync(const struct lfs_config *cfg) { lfs_emubd_t *emu = cfg->context; // Just write out info/stats for later lookup - snprintf(emu->child, LFS_NAME_MAX, "config"); + snprintf(emu->child, LFS_NAME_MAX, ".config"); FILE *f = fopen(emu->path, "w"); if (!f) { return -errno; } + lfs_emubd_tole32(emu); size_t res = fwrite(&emu->cfg, sizeof(emu->cfg), 1, f); + lfs_emubd_fromle32(emu); if (res < 1) { return -errno; } @@ -222,13 +284,33 @@ int lfs_emubd_sync(const struct lfs_config *cfg) { return -errno; } - snprintf(emu->child, LFS_NAME_MAX, "stats"); + snprintf(emu->child, LFS_NAME_MAX, ".stats"); f = fopen(emu->path, "w"); if (!f) { return -errno; } + lfs_emubd_tole32(emu); res = fwrite(&emu->stats, sizeof(emu->stats), 1, f); + lfs_emubd_fromle32(emu); + if (res < 1) { + return -errno; + } + + err = fclose(f); + if (err) { + return -errno; + } + + snprintf(emu->child, LFS_NAME_MAX, ".history"); + f = fopen(emu->path, "w"); + if (!f) { + return -errno; + } + + lfs_emubd_tole32(emu); + res = fwrite(&emu->history, sizeof(emu->history), 1, f); + lfs_emubd_fromle32(emu); if (res < 1) { return -errno; } diff --git a/emubd/lfs_emubd.h b/emubd/lfs_emubd.h index 0fd4387..64afa3e 100644 --- a/emubd/lfs_emubd.h +++ b/emubd/lfs_emubd.h @@ -46,6 +46,10 @@ typedef struct lfs_emubd { } stats; struct { + lfs_block_t blocks[4]; + } history; + + struct { uint32_t read_size; uint32_t prog_size; uint32_t block_size; @@ -879,6 +879,8 @@ static int32_t lfs_dir_fetchmatch(lfs_t *lfs, dir->tail[1] = temptail[1]; dir->split = tempsplit; dir->locals = templocals; + + lfs->seed ^= crc; crc = 0xffffffff; } else { err = lfs_bd_crc32(lfs, dir->pair[0], @@ -2874,6 +2876,7 @@ static int lfs_init(lfs_t *lfs, const struct lfs_config *cfg) { lfs->root[0] = 0xffffffff; lfs->root[1] = 0xffffffff; lfs->mlist = NULL; + lfs->seed = 0; lfs->globals.s.movepair[0] = 0xffffffff; lfs->globals.s.movepair[1] = 0xffffffff; lfs->globals.s.moveid = 0x3ff; @@ -2962,12 +2965,6 @@ int lfs_mount(lfs_t *lfs, const struct lfs_config *cfg) { return err; } - // setup free lookahead - lfs->free.off = 0; - lfs->free.size = 0; - lfs->free.i = 0; - lfs_alloc_ack(lfs); - // load superblock lfs_mdir_t root; err = lfs_dir_fetch(lfs, &root, (const lfs_block_t[2]){0, 1}); @@ -3065,6 +3062,12 @@ int lfs_mount(lfs_t *lfs, const struct lfs_config *cfg) { lfs->globals.s.moveid); } + // setup free lookahead + lfs->free.off = lfs->seed % lfs->cfg->block_size; + lfs->free.size = 0; + lfs->free.i = 0; + lfs_alloc_ack(lfs); + return 0; cleanup: @@ -382,6 +382,7 @@ typedef struct lfs { lfs_block_t root[2]; lfs_mlist_t *mlist; + uint32_t seed; lfs_global_t globals; lfs_global_t locals; diff --git a/tests/corrupt.py b/tests/corrupt.py index 76f07ce..5e2a9a3 100755 --- a/tests/corrupt.py +++ b/tests/corrupt.py @@ -3,37 +3,41 @@ import struct import sys import os +import argparse -def main(*paths): - # find most recent block - file = None - rev = None - for path in paths: - try: - nfile = open(path, 'r+b') - nrev, = struct.unpack('<I', nfile.read(4)) - - assert rev != nrev - if not file or ((rev - nrev) & 0x80000000): - file = nfile - rev = nrev - except IOError: - pass - - # go to last commit - tag = 0 - while True: - try: - ntag, = struct.unpack('<I', file.read(4)) - except struct.error: - break - - tag ^= ntag - file.seek(tag & 0xfff, os.SEEK_CUR) - - # lob off last 3 bytes - file.seek(-((tag & 0xfff) + 3), os.SEEK_CUR) - file.truncate() +def corrupt(block): + with open(block, 'r+b') as file: + # skip rev + file.read(4) + + # go to last commit + tag = 0 + while True: + try: + ntag, = struct.unpack('<I', file.read(4)) + except struct.error: + break + + tag ^= ntag + file.seek(tag & 0xfff, os.SEEK_CUR) + + # lob off last 3 bytes + file.seek(-((tag & 0xfff) + 3), os.SEEK_CUR) + file.truncate() + +def main(args): + if args.n or not args.blocks: + with open('blocks/.history', 'rb') as file: + for i in range(int(args.n or 1)): + last, = struct.unpack('<I', file.read(4)) + args.blocks.append('blocks/%x' % last) + + for block in args.blocks: + print 'corrupting %s' % block + corrupt(block) if __name__ == "__main__": - main(*sys.argv[1:]) + parser = argparse.ArgumentParser() + parser.add_argument('-n') + parser.add_argument('blocks', nargs='*') + main(parser.parse_args()) diff --git a/tests/debug.py b/tests/debug.py new file mode 100755 index 0000000..58fb81e --- /dev/null +++ b/tests/debug.py @@ -0,0 +1,98 @@ +#!/usr/bin/env python2 + +import struct +import binascii + +TYPES = { + (0x1ff, 0x001): 'reg', + (0x1ff, 0x002): 'dir', + (0x1ff, 0x011): 'superblock', + (0x1ff, 0x012): 'root', + (0x1ff, 0x030): 'delete', + (0x1f0, 0x080): 'globals', + (0x1ff, 0x0c0): 'tail soft', + (0x1ff, 0x0c1): 'tail hard', + (0x1ff, 0x0f0): 'crc', + (0x1ff, 0x040): 'struct dir', + (0x1ff, 0x041): 'struct inline', + (0x1ff, 0x042): 'struct ctz', + (0x100, 0x100): 'attr', +} + +def typeof(type): + for prefix in range(9): + mask = 0x1ff & ~((1 << prefix)-1) + if (mask, type & mask) in TYPES: + return TYPES[mask, type & mask] + ( + ' [%0*x]' % (prefix/4, type & ((1 << prefix)-1)) + if prefix else '') + else: + return '[%02x]' % type + +def main(*blocks): + # find most recent block + file = None + rev = None + crc = None + versions = [] + + for block in blocks: + try: + nfile = open(block, 'rb') + ndata = nfile.read(4) + ncrc = binascii.crc32(ndata) + nrev, = struct.unpack('<I', ndata) + + assert rev != nrev + if not file or ((rev - nrev) & 0x80000000): + file = nfile + rev = nrev + crc = ncrc + + versions.append((nrev, '%s (rev %d)' % (block, nrev))) + except IOError: + pass + + print "--- %s ---" % ', '.join(v for _,v in sorted(versions, reverse=True)) + + # go through each tag, print useful information + print "%-4s %-8s %-14s %3s %3s %s" % ( + 'off', 'tag', 'type', 'id', 'len', 'dump') + + tag = 0 + off = 4 + while True: + try: + data = file.read(4) + crc = binascii.crc32(data, crc) + ntag, = struct.unpack('<I', data) + except struct.error: + break + + tag ^= ntag + off += 4 + + type = (tag & 0x7fc00000) >> 22 + id = (tag & 0x003ff000) >> 12 + size = (tag & 0x00000fff) >> 0 + + data = file.read(size) + if type == 0x0f0: + crc = binascii.crc32(data[:4], crc) + else: + crc = binascii.crc32(data, crc) + + print '%04x: %08x %-14s %3s %3d %-23s %-8s' % ( + off, tag, + typeof(type) + (' bad!' if type == 0x0f0 and ~crc else ''), + id if id != 0x3ff else '.', size, + ' '.join('%02x' % ord(c) for c in data[:8]), + ''.join(c if c >= ' ' and c <= '~' else '.' for c in data[:8])) + + off += tag & 0xfff + if type == 0x0f0: + crc = 0 + +if __name__ == "__main__": + import sys + main(*sys.argv[1:]) diff --git a/tests/stats.py b/tests/stats.py index 2ba1fb6..ab21b59 100755 --- a/tests/stats.py +++ b/tests/stats.py @@ -7,7 +7,7 @@ import os import re def main(): - with open('blocks/config') as file: + with open('blocks/.config') as file: s = struct.unpack('<LLLL', file.read()) print 'read_size: %d' % s[0] print 'prog_size: %d' % s[1] @@ -18,7 +18,7 @@ def main(): os.path.getsize(os.path.join('blocks', f)) for f in os.listdir('blocks') if re.match('\d+', f)) - with open('blocks/stats') as file: + with open('blocks/.stats') as file: s = struct.unpack('<QQQ', file.read()) print 'read_count: %d' % s[0] print 'prog_count: %d' % s[1] diff --git a/tests/test_corrupt.sh b/tests/test_corrupt.sh index 1491dac..a73b7f8 100755 --- a/tests/test_corrupt.sh +++ b/tests/test_corrupt.sh @@ -71,24 +71,25 @@ echo "--- Sanity check ---" rm -rf blocks lfs_mktree lfs_chktree +BLOCKS="$(ls blocks | grep -vw '[01]')" echo "--- Block corruption ---" -for i in {2..33} +for b in $BLOCKS do rm -rf blocks mkdir blocks - ln -s /dev/zero blocks/$(printf '%x' $i) + ln -s /dev/zero blocks/$b lfs_mktree lfs_chktree done echo "--- Block persistance ---" -for i in {2..33} +for b in $BLOCKS do rm -rf blocks mkdir blocks lfs_mktree - chmod a-w blocks/$(printf '%x' $i) || true + chmod a-w blocks/$b lfs_mktree lfs_chktree done @@ -96,7 +97,7 @@ done echo "--- Big region corruption ---" rm -rf blocks mkdir blocks -for i in {2..255} +for i in {2..512} do ln -s /dev/zero blocks/$(printf '%x' $i) done @@ -106,7 +107,7 @@ lfs_chktree echo "--- Alternating corruption ---" rm -rf blocks mkdir blocks -for i in {2..511..2} +for i in {2..1024..2} do ln -s /dev/zero blocks/$(printf '%x' $i) done diff --git a/tests/test_move.sh b/tests/test_move.sh index 824d4d3..53f7e15 100755 --- a/tests/test_move.sh +++ b/tests/test_move.sh @@ -59,7 +59,7 @@ tests/test.py << TEST lfs_rename(&lfs, "b/hello", "c/hello") => 0; lfs_unmount(&lfs) => 0; TEST -tests/corrupt.py blocks/{4,5} +tests/corrupt.py -n 1 tests/test.py << TEST lfs_mount(&lfs, &cfg) => 0; lfs_dir_open(&lfs, &dir[0], "b") => 0; @@ -86,8 +86,7 @@ tests/test.py << TEST lfs_rename(&lfs, "c/hello", "d/hello") => 0; lfs_unmount(&lfs) => 0; TEST -tests/corrupt.py blocks/{6,7} -tests/corrupt.py blocks/{8,9} +tests/corrupt.py -n 2 tests/test.py << TEST lfs_mount(&lfs, &cfg) => 0; lfs_dir_open(&lfs, &dir[0], "c") => 0; @@ -166,7 +165,7 @@ tests/test.py << TEST lfs_rename(&lfs, "b/hi", "c/hi") => 0; lfs_unmount(&lfs) => 0; TEST -tests/corrupt.py blocks/{4,5} +tests/corrupt.py -n 1 tests/test.py << TEST lfs_mount(&lfs, &cfg) => 0; lfs_dir_open(&lfs, &dir[0], "b") => 0; @@ -193,8 +192,7 @@ tests/test.py << TEST lfs_rename(&lfs, "c/hi", "d/hi") => 0; lfs_unmount(&lfs) => 0; TEST -tests/corrupt.py blocks/{6,7} -tests/corrupt.py blocks/{8,9} +tests/corrupt.py -n 2 tests/test.py << TEST lfs_mount(&lfs, &cfg) => 0; lfs_dir_open(&lfs, &dir[0], "c") => 0; diff --git a/tests/test_orphan.sh b/tests/test_orphan.sh index e66e592..9c2cb7b 100755 --- a/tests/test_orphan.sh +++ b/tests/test_orphan.sh @@ -17,7 +17,7 @@ tests/test.py << TEST TEST # corrupt most recent commit, this should be the update to the previous # linked-list entry and should orphan the child -tests/corrupt.py blocks/{6,7} +tests/corrupt.py tests/test.py << TEST lfs_mount(&lfs, &cfg) => 0; |