diff --git a/cpp/rs/rs.cpp b/cpp/rs/rs.cpp index de5eac7a..ec360639 100644 --- a/cpp/rs/rs.cpp +++ b/cpp/rs/rs.cpp @@ -25,7 +25,22 @@ struct rs { }; static struct rs* cached[256] = { - nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr + nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, + nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, + nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, + nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, + nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, + nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, + nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, + nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, + nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, + nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, + nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, + nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, + nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, + nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, + nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, + nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, }; static uint8_t* rs_cauchy_matrix(uint8_t parity) { @@ -62,9 +77,6 @@ static uint8_t* rs_cauchy_matrix(uint8_t parity) { static struct rs* rs_new(uint8_t parity) { struct rs* r = (struct rs*)malloc_or_die(sizeof(struct rs), "cannot allocate 'struct rs'\n"); - if (r == nullptr) { - die("can't allocate 'struct rs'\n"); - } r->parity = parity; r->matrix = rs_cauchy_matrix(parity); return r; @@ -102,11 +114,11 @@ uint64_t rs_block_size(struct rs* r, uint64_t size) { return (size + (d - 1)) / d; } -void rs_compute_parity(struct rs* r, uint64_t block_size, const uint8_t** data, uint8_t** parity) { +void rs_compute_parity(struct rs* r, uint64_t size, const uint8_t** data, uint8_t** parity) { int D = rs_data_blocks(r->parity); int P = rs_parity_blocks(r->parity); // parity = r->matrix * data - for (size_t i = 0; i < block_size; i++) { + for (size_t i = 0; i < size; i++) { for (int j = 0; j < P; j++) { const uint8_t* col = &r->matrix[D*D + j*D]; parity[j][i] = 0; @@ -119,7 +131,7 @@ void rs_compute_parity(struct rs* r, uint64_t block_size, const uint8_t** data, void rs_recover( struct rs* r, - uint64_t block_size, + uint64_t size, const uint8_t* have_blocks, const uint8_t** have, uint8_t want_block, @@ -127,7 +139,6 @@ void rs_recover( ) { int d = rs_data_blocks(r->parity); int b = rs_blocks(r->parity); - bool have_all_data = true; // Preliminary checks for (int i = 0; i < d; i++) { if (have_blocks[i] >= b) { @@ -136,7 +147,6 @@ void rs_recover( if (have_blocks[i] == want_block) { die("have_blocks[%d]=%d == want_block=%d\n", i, have_blocks[i], want_block); } - have_all_data = have_all_data && have_blocks[i] < d; if (i == 0) { continue; } @@ -174,7 +184,7 @@ void rs_recover( } } // want = have_to_want * have - for (size_t i = 0; i < block_size; i++) { + for (size_t i = 0; i < size; i++) { want[i] = 0; for (int j = 0; j < d; j++) { want[i] ^= gf_mul(have_to_want[j], have[j][i]); diff --git a/cpp/rs/rs.h b/cpp/rs/rs.h index 2b1537c4..d83b9e96 100644 --- a/cpp/rs/rs.h +++ b/cpp/rs/rs.h @@ -19,9 +19,6 @@ // of a uint8_t, but this is a fairly irrelevant quirk of EggsFS, // although it does nicely enforce that we do not go beyond what's // resonable for data storage purposes (rather than for error correction). -// -// TODO allow to pass short data blocks so that the input won't have to -// be copied to compute the parity in the simple case. #ifndef EGGS_RS #define EGGS_RS @@ -56,31 +53,24 @@ struct rs* rs_get(uint8_t parity); uint8_t rs_parity(struct rs* rs); -// Tells us how big each block will be given a certain size we want to store. -// -// We'll have that `rs_block_size() * D >= size`, and the user -// is responsible with creating the data blocks by just filling the blocks -// with the data, and padding the remainder with zeros. -uint64_t rs_block_size(struct rs* rs, uint64_t size); - // Computes all parity blocks at once. This is what you use when you store // something for the first time. void rs_compute_parity( struct rs* rs, - uint64_t block_size, - const uint8_t** data, // input, uint8_t[D][block_size] - uint8_t** parity // output, uint8_t[P][block_size] + uint64_t size, + const uint8_t** data, // input, uint8_t[D][size] + uint8_t** parity // output, uint8_t[P][size] ); // Computes an arbitrary block given at least `D` other blocks. // This is what you use to recover a lost block. void rs_recover( struct rs* rs, - uint64_t block_size, + uint64_t size, const uint8_t* have_blocks, // [0, B)[D], must be sorted. - const uint8_t** blocks, // uint8_t[D][block_size] + const uint8_t** blocks, // uint8_t[D][size] uint8_t want_block, // [0, B) and not in `have_blocks` - uint8_t* block // uint8_t[block_size] + uint8_t* block // uint8_t[size] ); #ifdef __cplusplus diff --git a/cpp/shard/ShardDB.cpp b/cpp/shard/ShardDB.cpp index 3b5485be..46366391 100644 --- a/cpp/shard/ShardDB.cpp +++ b/cpp/shard/ShardDB.cpp @@ -1207,25 +1207,27 @@ struct ShardDBImpl { } } } else { - // consistency check for the general case - // - // For parity mode 1+M (mirroring), for any M, the server can validate - // that the CRC of the span equals the CRC of every block. - // For N+M in the general case, it can probably validate that the CRC of - // the span equals the concatenation of the CRCs of the N, and that the - // CRC of the 1st of the M equals the XOR of the CRCs of the N. - // It cannot validate anything about the rest of the M though (scrubbing - // can validate the rest, if it wants to, but it would be too expensive - // for the shard to validate). + // Consistency check for the general case. Given what we do in + // `rs.h`, we know that the span is the concatenation of the + // data blocks, and that the first parity block is the XOR of the + // data blocks. We can't check the rest without the data though. std::array blocksCrc32; memset(&blocksCrc32[0], 0, 4); + std::array parity0Crc32; + memset(&parity0Crc32[0], 0, 4); for (const auto& block: req.bodyBlocks.els) { blocksCrc32 = crc32cCombine(blocksCrc32, block.crc32.data, req.blockSize); + parity0Crc32[0] ^= blocksCrc32[0]; + parity0Crc32[1] ^= blocksCrc32[1]; + parity0Crc32[2] ^= blocksCrc32[2]; + parity0Crc32[3] ^= blocksCrc32[3]; } if (req.crc32.data != blocksCrc32) { return false; } - // TODO check parity block CRC32 + if (req.bodyBlocks.els[req.parity.dataBlocks()].crc32.data != parity0Crc32) { + return false; + } } return true; diff --git a/cpp/tests/tests.cpp b/cpp/tests/tests.cpp index 96ac1cbe..e5a204cb 100644 --- a/cpp/tests/tests.cpp +++ b/cpp/tests/tests.cpp @@ -639,54 +639,45 @@ TEST_CASE("crc32c") { TEST_CASE("RS") { uint64_t rand = 0; for (int i = 0; i < 16*16*100; i++) { - uint8_t numData = 2 + splitmix64(rand)%(16-2); - uint8_t numParity = 1 + splitmix64(rand)%(16-1); - std::vector data(1 + splitmix64(rand)%1000); - for (size_t i = 0; i < data.size(); i++) { + int numData = 2 + splitmix64(rand)%(16-2); + int numParity = 1 + splitmix64(rand)%(16-1); + int blockSize = 1 + splitmix64(rand)%100; + std::vector data(blockSize*(numData+numParity)); // all blocks + for (size_t i = 0; i < numData*blockSize; i++) { // fill data blocks with random bytes data[i] = splitmix64(rand) & 0xFF; } - Parity parity(numData, numParity); - auto rs = rs_get(parity.u8); - size_t blockSize = rs_block_size(rs, data.size()); - // prepare data blocks - std::vector blocks(blockSize*parity.blocks(), 0); - memcpy(&blocks[0], &data[0], data.size()); - std::vector dataBlocksPtrs(parity.dataBlocks()); - for (int i = 0; i < parity.dataBlocks(); i++) { - dataBlocksPtrs[i] = &blocks[i*blockSize]; + auto rs = rs_get(Parity(numData, numParity).u8); + std::vector blocksPtrs(numData+numParity); + for (int i = 0; i < numData+numParity; i++) { + blocksPtrs[i] = &data[i*blockSize]; } - // prepare parity blocks - std::vector parityBlocksPtrs(parity.parityBlocks()); - for (int i = 0; i < parity.parityBlocks(); i++) { - parityBlocksPtrs[i] = &blocks[(parity.dataBlocks()+i)*blockSize]; - } - rs_compute_parity(rs, blockSize, &dataBlocksPtrs[0], &parityBlocksPtrs[0]); + rs_compute_parity(rs, blockSize, (const uint8_t**)&blocksPtrs[0], &blocksPtrs[numData]); // verify that the first parity block is the XOR of all the original data for (size_t i = 0; i < blockSize; i++) { uint8_t expectedParity = 0; - for (int j = 0; j < parity.dataBlocks(); j++) { - expectedParity ^= dataBlocksPtrs[j][i]; + for (int j = 0; j < numData; j++) { + expectedParity ^= data[j*blockSize + i]; } - REQUIRE(expectedParity == parityBlocksPtrs[0][i]); + REQUIRE(expectedParity == data[numData*blockSize + i]); } // restore a random block, using random blocks. { - std::vector allBlocks(parity.blocks()); - for (int i = 0; i < parity.blocks(); i++) { + std::vector allBlocks(numData+numParity); + for (int i = 0; i < allBlocks.size(); i++) { allBlocks[i] = i; } - for (int i = 0; i < std::min(parity.dataBlocks()+1, parity.blocks()-1); i++) { - std::swap(allBlocks[i], allBlocks[i+1+ splitmix64(rand)%(parity.blocks()-1-i)]); + for (int i = 0; i < std::min(numData+1, numData+numParity-1); i++) { + std::swap(allBlocks[i], allBlocks[i+1+ splitmix64(rand)%(numData+numParity-1-i)]); } - std::sort(allBlocks.begin(), allBlocks.begin()+parity.dataBlocks()); - std::vector havePtrs(parity.dataBlocks()); - for (int i = 0; i < parity.dataBlocks(); i++) { - havePtrs[i] = &blocks[allBlocks[i]*blockSize]; + std::sort(allBlocks.begin(), allBlocks.begin()+numData); + std::vector havePtrs(numData); + for (int i = 0; i < numData; i++) { + havePtrs[i] = &data[allBlocks[i]*blockSize]; } - uint8_t wantBlock = allBlocks[parity.dataBlocks()]; + uint8_t wantBlock = allBlocks[numData]; std::vector recoveredBlock(blockSize); rs_recover(rs, blockSize, &allBlocks[0], &havePtrs[0], wantBlock, &recoveredBlock[0]); - std::vector expectedBlock(blocks.begin() + wantBlock*blockSize, blocks.begin() + (wantBlock+1)*blockSize); + std::vector expectedBlock(data.begin() + wantBlock*blockSize, data.begin() + (wantBlock+1)*blockSize); REQUIRE(expectedBlock == recoveredBlock); } } diff --git a/go/rs/rs.go b/go/rs/rs.go index 8427083b..c1c33253 100644 --- a/go/rs/rs.go +++ b/go/rs/rs.go @@ -73,30 +73,6 @@ func (r *Rs) Parity() Parity { return Parity(C.rs_parity(r.r)) } -func (r *Rs) BlockSize(size int) int { - if size < 0 { - panic(fmt.Errorf("negative size %v", size)) - } - return int(C.rs_block_size(r.r, C.ulong(size))) -} - -func (r *Rs) SplitData(data []byte) [][]byte { - if len(data) == 0 { - panic("empty data") - } - dataBlocks := make([][]byte, r.Parity().DataBlocks()) - blockSize := r.BlockSize(len(data)) - for i := range dataBlocks { - dataBlocks[i] = make([]byte, blockSize) - // some blocks might be entirely empty, if the data is small and the number - // of data blocks is large - if i*blockSize <= len(data) { - copy(dataBlocks[i], data[i*blockSize:]) - } - } - return dataBlocks -} - func (r *Rs) ComputeParity(data [][]byte) [][]byte { parity := make([][]byte, r.Parity().ParityBlocks()) blockSize := len(data[0]) @@ -115,7 +91,7 @@ func (r *Rs) checkBlockSize(data [][]byte, parity [][]byte) int { blockSize := len(data[0]) for i := range data { if len(data[i]) != blockSize { - panic(fmt.Errorf("bad block size, expected %v, got %v", blockSize, len(data[i]))) + panic(fmt.Errorf("differing block size, expected %v, got %v", blockSize, len(data[i]))) } } if parity != nil { @@ -124,14 +100,10 @@ func (r *Rs) checkBlockSize(data [][]byte, parity [][]byte) int { } for i := range parity { if len(parity[i]) != blockSize { - panic(fmt.Errorf("bad block size, expected %v, got %v", blockSize, len(parity[i]))) + panic(fmt.Errorf("differing block size, expected %v, got %v", blockSize, len(parity[i]))) } } } - expectedBlockSize := r.BlockSize(blockSize * len(data)) - if expectedBlockSize != blockSize { - panic(fmt.Errorf("bad block size, expected %v, got %v", expectedBlockSize, blockSize)) - } return blockSize } @@ -168,9 +140,22 @@ func (r *Rs) RecoverInto( ) { blockSize := r.checkBlockSize(blocks, nil) if len(block) != blockSize { - panic(fmt.Errorf("bad block size, expected %v, got %v", blockSize, len(block))) + panic(fmt.Errorf("differing block size, expected %v, got %v", blockSize, len(block))) + } + for i, haveBlock := range haveBlocks { + if int(haveBlock) >= r.Parity().DataBlocks() { + panic(fmt.Errorf("have_blocks[%d]=%d >= %d", i, haveBlock, r.Parity().ParityBlocks())) + } + if haveBlock == wantBlock { + panic(fmt.Errorf("have_blocks[%d]=%d == want_block=%d", i, haveBlock, wantBlock)) + } + if i == 0 { + continue + } + if haveBlock <= haveBlocks[i-1] { + panic(fmt.Errorf("have_blocks[%d]=%d <= have_blocks[%d-1]=%d", i, haveBlock, i, haveBlocks[i-1])) + } } - // TODO check forr sortedness etc. blocksPtrs := (**C.uchar)(C.malloc(C.size_t(uintptr(r.Parity().DataBlocks()) * unsafe.Sizeof((*C.uchar)(nil))))) for i := range blocks { C.set_ptr(blocksPtrs, C.ulong(i), (*C.uchar)(&blocks[i][0])) diff --git a/go/rs/rs_test.go b/go/rs/rs_test.go index cd7f33c1..f338f94e 100644 --- a/go/rs/rs_test.go +++ b/go/rs/rs_test.go @@ -19,31 +19,23 @@ func TestComputeParity(t *testing.T) { for i := 0; i < 16*16*100; i++ { numData := int(2 + rand.Uint32()%(16-2)) numParity := int(1 + rand.Uint32()%(16-1)) - data := make([]byte, 1+int(rand.Uint32()%1000)) - rand.Read(data) + blockSize := 1 + int(rand.Uint32()%100) + data := make([]byte, blockSize*(numData+numParity)) + rand.Read(data[:blockSize*numData]) + blocks := make([][]byte, numData+numParity) + for i := range blocks { + blocks[i] = data[i*blockSize : (i+1)*blockSize] + } rs := GetRs(MkParity(uint8(numData), uint8(numParity))) - dataBlocks := rs.SplitData(data) - parityBlocks := rs.ComputeParity(dataBlocks) - allBlocks := append(append([][]byte{}, dataBlocks...), parityBlocks...) - assert.Equal(t, rs.Parity().DataBlocks(), len(dataBlocks)) - assert.Equal(t, rs.Parity().ParityBlocks(), len(parityBlocks)) - // Verify that the concatenation is the original data - concatData := []byte{} - for _, dataBlock := range dataBlocks { - concatData = append(concatData, dataBlock...) - } - assert.Equal(t, data, concatData[:len(data)]) - for _, b := range concatData[len(data):] { - assert.Equal(t, uint8(0), b) - } + rs.ComputeParityInto(blocks[:numData], blocks[numData:]) // Verify that the first parity block is the XOR of all the data blocks - expectedParity0 := make([]byte, len(dataBlocks[0])) - for _, dataBlock := range dataBlocks { - for i, b := range dataBlock { - expectedParity0[i] ^= b + expectedParity0 := make([]byte, blockSize) + for i := 0; i < numData; i++ { + for j, b := range blocks[i] { + expectedParity0[j] ^= b } } - assert.Equal(t, expectedParity0, parityBlocks[0]) + assert.Equal(t, expectedParity0, blocks[numData]) // Restore a random block { haveBlocks := make([]uint8, numData+numParity) @@ -62,10 +54,10 @@ func TestComputeParity(t *testing.T) { sort.Slice(haveBlocks, func(i, j int) bool { return haveBlocks[i] < haveBlocks[j] }) have := make([][]byte, numData) for i := range have { - have[i] = allBlocks[haveBlocks[i]] + have[i] = blocks[haveBlocks[i]] } recoveredBlock := rs.Recover(haveBlocks, have, wantBlock) - assert.Equal(t, allBlocks[wantBlock], recoveredBlock) + assert.Equal(t, blocks[wantBlock], recoveredBlock) } } }