mirror of
https://github.com/XTXMarkets/ternfs.git
synced 2026-01-06 11:00:10 -06:00
First version of kernel module
Initial version really by Pawel, but many changes in between. Big outstanding issues: * span cache reclamation (unbounded memory otherwise...) * bad block service detection and workarounds * corrupted blocks detection and workaround Co-authored-by: Paweł Dziepak <pawel.dziepak@xtxmarkets.com>
This commit is contained in:
@@ -16,8 +16,8 @@ if(NOT CMAKE_BUILD_TYPE)
|
||||
)
|
||||
endif()
|
||||
|
||||
if (NOT (${CMAKE_BUILD_TYPE} MATCHES "^(debug|release|alpine|sanitized|valgrind)$"))
|
||||
message(FATAL_ERROR "Build type must be one of debug, release, sanitized, alpine, got ${CMAKE_BUILD_TYPE}")
|
||||
if (NOT (${CMAKE_BUILD_TYPE} MATCHES "^(debug|release|alpine|alpine-debug|sanitized|valgrind)$"))
|
||||
message(FATAL_ERROR "Build type must be one of debug, release, sanitized, alpine, alpine-debug got ${CMAKE_BUILD_TYPE}")
|
||||
endif()
|
||||
|
||||
set(CMAKE_CXX_STANDARD 20)
|
||||
@@ -33,13 +33,13 @@ add_compile_options("$<$<CONFIG:valgrind>:-march=haswell;-maes;-mgfni>")
|
||||
add_compile_options("$<$<NOT:$<CONFIG:valgrind>>:-march=skylake;-mgfni>")
|
||||
|
||||
# performance/debug stuff
|
||||
add_compile_options("$<$<NOT:$<CONFIG:debug>>:-O3>")
|
||||
add_compile_options("$<$<CONFIG:debug>:-O;-DEGGS_DEBUG>")
|
||||
add_compile_options("$<$<NOT:$<CONFIG:debug,alpine-debug>>:-O3>")
|
||||
add_compile_options("$<$<CONFIG:debug,alpine-debug>:-O;-DEGGS_DEBUG>")
|
||||
|
||||
# We build the release build statically in Alpine
|
||||
add_compile_options("$<$<CONFIG:alpine>:-DEGGS_ALPINE>")
|
||||
add_link_options("$<$<CONFIG:alpine>:-static>")
|
||||
add_link_options("$<$<NOT:$<CONFIG:alpine>>:-no-pie>")
|
||||
add_compile_options("$<$<CONFIG:alpine,alpine-debug>:-DEGGS_ALPINE>")
|
||||
add_link_options("$<$<CONFIG:alpine,alpine-debug>:-static>")
|
||||
add_link_options("$<$<NOT:$<CONFIG:alpine,alpine-debug>>:-no-pie>")
|
||||
|
||||
# sanitizer options
|
||||
set(SANITIZE_OPTIONS "-fsanitize=undefined,address,integer,function;-fno-sanitize-recover=all;-fsanitize-blacklist=${CMAKE_SOURCE_DIR}/ubsan-ignorelist")
|
||||
|
||||
@@ -5,7 +5,7 @@ from pathlib import Path
|
||||
import subprocess
|
||||
|
||||
if len(sys.argv) < 2:
|
||||
print(f'Usage: {sys.argv[0]} release|alpine|sanitized|debug|valgrind [NINJA_ARG ...]', file=sys.stderr)
|
||||
print(f'Usage: {sys.argv[0]} release|alpine|alpine-debug|sanitized|debug|valgrind [NINJA_ARG ...]', file=sys.stderr)
|
||||
sys.exit(2)
|
||||
|
||||
if len(sys.argv) == 1:
|
||||
@@ -19,9 +19,9 @@ repo_dir = cpp_dir.parent
|
||||
build_dir = cpp_dir / 'build' / build_type
|
||||
build_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
if build_type == 'alpine' and 'IN_EGGS_BUILD_CONTAINER' not in os.environ:
|
||||
if build_type in ('alpine', 'alpine-debug') and 'IN_EGGS_BUILD_CONTAINER' not in os.environ:
|
||||
subprocess.run(
|
||||
['docker', 'run', '--rm', '-i', '--mount', f'type=bind,src={repo_dir},dst=/eggsfs', 'REDACTED', '/eggsfs/cpp/build.py', 'alpine'] + sys.argv[2:],
|
||||
['docker', 'run', '--rm', '-i', '--mount', f'type=bind,src={repo_dir},dst=/eggsfs', 'REDACTED', '/eggsfs/cpp/build.py', build_type] + sys.argv[2:],
|
||||
check=True,
|
||||
)
|
||||
else:
|
||||
|
||||
@@ -264,7 +264,6 @@ struct MakeDirectoryStateMachine {
|
||||
void createDirectoryInode() {
|
||||
auto& shardReq = env.needsShard(MAKE_DIRECTORY_CREATE_DIR, state.dirId().shard()).setCreateDirectoryInode();
|
||||
shardReq.id = state.dirId();
|
||||
shardReq.info = req.info;
|
||||
shardReq.ownerId = req.ownerId;
|
||||
}
|
||||
|
||||
@@ -738,6 +737,7 @@ struct SoftUnlinkDirectoryStateMachine {
|
||||
shardReq.name = req.name;
|
||||
shardReq.targetId = req.targetId;
|
||||
shardReq.wasMoved = false;
|
||||
shardReq.creationTime = req.creationTime;
|
||||
}
|
||||
|
||||
void afterRollback(EggsError err, const ShardRespContainer* resp) {
|
||||
|
||||
@@ -3,26 +3,6 @@
|
||||
|
||||
std::ostream& operator<<(std::ostream& out, const BincodeBytesRef& x) {
|
||||
return goLangBytesFmt(out, x.data(), x.size());
|
||||
/*
|
||||
out << "b\"";
|
||||
uint8_t len = x.size();
|
||||
const uint8_t* data = (const uint8_t*)x.data();
|
||||
for (int i = 0; i < len; i++) {
|
||||
uint8_t ch = data[i];
|
||||
if (isprint(ch)) {
|
||||
out << ch;
|
||||
} else if (ch == 0) {
|
||||
out << "\\0";
|
||||
} else {
|
||||
const char cfill = out.fill();
|
||||
out << std::hex << std::setfill('0');
|
||||
out << "\\x" << std::setw(2) << (int)ch;
|
||||
out << std::setfill(cfill) << std::dec;
|
||||
}
|
||||
}
|
||||
out << "\"";
|
||||
return out;
|
||||
*/
|
||||
}
|
||||
|
||||
std::ostream& operator<<(std::ostream& out, const BincodeBytes& x) {
|
||||
|
||||
@@ -4,6 +4,7 @@
|
||||
#include <netinet/in.h>
|
||||
#include <string.h>
|
||||
#include <unistd.h>
|
||||
#include <iomanip>
|
||||
|
||||
// Throwing in static initialization is nasty, and there is no useful stacktrace
|
||||
// Also use direct syscalls to write the error as iostream might not be initialized
|
||||
@@ -48,4 +49,27 @@ std::ostream& goLangBytesFmt(std::ostream& out, const char* str, size_t len) {
|
||||
|
||||
std::ostream& operator<<(std::ostream& out, const GoLangBytesFmt& bytes) {
|
||||
return goLangBytesFmt(out, bytes.str, bytes.len);
|
||||
}
|
||||
|
||||
std::ostream& goLangQuotedStringFmt(std::ostream& out, const char* data, size_t len) {
|
||||
out << "\"";
|
||||
for (int i = 0; i < len; i++) {
|
||||
uint8_t ch = data[i];
|
||||
if (isprint(ch)) {
|
||||
out << ch;
|
||||
} else if (ch == 0) {
|
||||
out << "\\0";
|
||||
} else {
|
||||
const char cfill = out.fill();
|
||||
out << std::hex << std::setfill('0');
|
||||
out << "\\x" << std::setw(2) << (int)ch;
|
||||
out << std::setfill(cfill) << std::dec;
|
||||
}
|
||||
}
|
||||
out << "\"";
|
||||
return out;
|
||||
}
|
||||
|
||||
std::ostream& operator<<(std::ostream& out, const GoLangQuotedStringFmt& bytes) {
|
||||
return goLangQuotedStringFmt(out, bytes.str, bytes.len);
|
||||
}
|
||||
@@ -52,4 +52,15 @@ struct GoLangBytesFmt {
|
||||
GoLangBytesFmt(const char* str_, size_t len_) : str(str_), len(len_) {}
|
||||
};
|
||||
|
||||
std::ostream& operator<<(std::ostream& out, const GoLangBytesFmt& bytes);
|
||||
std::ostream& operator<<(std::ostream& out, const GoLangBytesFmt& bytes);
|
||||
|
||||
std::ostream& goLangQuotedStringFmt(std::ostream& out, const char* str, size_t len);
|
||||
|
||||
struct GoLangQuotedStringFmt {
|
||||
const char* str;
|
||||
size_t len;
|
||||
|
||||
GoLangQuotedStringFmt(const char* str_, size_t len_) : str(str_), len(len_) {}
|
||||
};
|
||||
|
||||
std::ostream& operator<<(std::ostream& out, const GoLangQuotedStringFmt& bytes);
|
||||
1974
cpp/core/MsgsGen.cpp
1974
cpp/core/MsgsGen.cpp
File diff suppressed because it is too large
Load Diff
1092
cpp/core/MsgsGen.hpp
1092
cpp/core/MsgsGen.hpp
File diff suppressed because it is too large
Load Diff
@@ -1,6 +1,6 @@
|
||||
add_library(crc32c crc32c.h crc32c.cpp iscsi.hpp)
|
||||
add_library(crc32c crc32c.h crc32c.c iscsi.h)
|
||||
|
||||
add_executable(crc32c-tables tables.cpp iscsi.hpp)
|
||||
add_executable(crc32c-tables tables.cpp iscsi.h)
|
||||
|
||||
add_executable(crc32c-tests tests.cpp)
|
||||
target_link_libraries(crc32c-tests PRIVATE crc32c)
|
||||
|
||||
@@ -1,11 +1,29 @@
|
||||
#ifndef __KERNEL__
|
||||
|
||||
#include "crc32c.h"
|
||||
|
||||
#include <immintrin.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
|
||||
#define kernel_static
|
||||
|
||||
typedef uint8_t u8;
|
||||
typedef uint32_t u32;
|
||||
|
||||
#else
|
||||
|
||||
#define kernel_static static
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef __clang__
|
||||
__attribute__((no_sanitize("integer")))
|
||||
static uint32_t crc32_fusion_kernel(uint32_t acc_a, const char* buf, size_t n_blocks) {
|
||||
#endif
|
||||
#ifdef __KERNEL__
|
||||
__attribute__((target("crc32,pclmul")))
|
||||
#endif
|
||||
static u32 crc32_fusion_kernel(u32 acc_a, const char* buf, size_t n_blocks) {
|
||||
size_t stride = n_blocks * 24 + 8;
|
||||
// Four chunks:
|
||||
// Chunk A: 0 through stride
|
||||
@@ -18,8 +36,8 @@ static uint32_t crc32_fusion_kernel(uint32_t acc_a, const char* buf, size_t n_bl
|
||||
__m128i x2 = _mm_loadu_si128((__m128i*)(buf2 + 16));
|
||||
__m128i x3 = _mm_loadu_si128((__m128i*)(buf2 + 32));
|
||||
__m128i x4 = _mm_loadu_si128((__m128i*)(buf2 + 48));
|
||||
uint32_t acc_b = 0;
|
||||
uint32_t acc_c = 0;
|
||||
u32 acc_b = 0;
|
||||
u32 acc_c = 0;
|
||||
// Parallel fold remaining blocks of 64 from D, and 24 from each of A/B/C.
|
||||
// k1 == magic(4*128+32-1)
|
||||
// k2 == magic(4*128-32-1)
|
||||
@@ -100,16 +118,16 @@ static uint32_t crc32_fusion_kernel(uint32_t acc_a, const char* buf, size_t n_bl
|
||||
stack_a = ~stack_a;
|
||||
stack_b = ~stack_b;
|
||||
stack_c = ~stack_c;
|
||||
uint32_t magic_a = ((uint32_t)0x80000000) >> (bits_a & 31); bits_a >>= 5;
|
||||
uint32_t magic_b = ((uint32_t)0x80000000) >> (bits_b & 31); bits_b >>= 5;
|
||||
uint32_t magic_c = ((uint32_t)0x80000000) >> (bits_c & 31); bits_c >>= 5;
|
||||
u32 magic_a = ((u32)0x80000000) >> (bits_a & 31); bits_a >>= 5;
|
||||
u32 magic_b = ((u32)0x80000000) >> (bits_b & 31); bits_b >>= 5;
|
||||
u32 magic_c = ((u32)0x80000000) >> (bits_c & 31); bits_c >>= 5;
|
||||
bits_a -= bits_b;
|
||||
bits_b -= bits_c;
|
||||
for (; bits_c; --bits_c) magic_a = _mm_crc32_u32(magic_a, 0), magic_b = _mm_crc32_u32(magic_b, 0), magic_c = _mm_crc32_u32(magic_c, 0);
|
||||
for (; bits_b; --bits_b) magic_a = _mm_crc32_u32(magic_a, 0), magic_b = _mm_crc32_u32(magic_b, 0);
|
||||
for (; bits_a; --bits_a) magic_a = _mm_crc32_u32(magic_a, 0);
|
||||
for (;;) {
|
||||
uint32_t low = stack_a & 1;
|
||||
u32 low = stack_a & 1;
|
||||
if (!(stack_a >>= 1)) break;
|
||||
__m128i x = _mm_cvtsi32_si128(magic_a);
|
||||
uint64_t y = _mm_cvtsi128_si64(_mm_clmulepi64_si128(x, x, 0));
|
||||
@@ -130,13 +148,18 @@ static uint32_t crc32_fusion_kernel(uint32_t acc_a, const char* buf, size_t n_bl
|
||||
x1 = _mm_xor_si128(x1, x5);
|
||||
uint64_t abc = _mm_cvtsi128_si64(_mm_xor_si128(_mm_xor_si128(vec_c, vec_a), vec_b));
|
||||
// Apply missing <<32 and fold down to 32-bits.
|
||||
uint32_t crc = _mm_crc32_u64(0, _mm_extract_epi64(x1, 0));
|
||||
u32 crc = _mm_crc32_u64(0, _mm_extract_epi64(x1, 0));
|
||||
crc = _mm_crc32_u64(crc, abc ^ _mm_extract_epi64(x1, 1));
|
||||
return crc;
|
||||
}
|
||||
|
||||
#ifdef __clang__
|
||||
__attribute__((no_sanitize("integer")))
|
||||
uint32_t crc32c(uint32_t crc, const char* buf, size_t length) {
|
||||
#endif
|
||||
#ifdef __KERNEL__
|
||||
__attribute__((target("crc32")))
|
||||
#endif
|
||||
kernel_static u32 crc32c(u32 crc, const char* buf, size_t length) {
|
||||
crc = ~crc; // preset to -1 (distinguish leading zeros)
|
||||
if (length >= 31) {
|
||||
size_t n_blocks = (length - 16) / 136;
|
||||
@@ -149,7 +172,7 @@ uint32_t crc32c(uint32_t crc, const char* buf, size_t length) {
|
||||
const char* kernel_start = kernel_end - kernel_length;
|
||||
length -= kernel_start - buf;
|
||||
for (; buf != kernel_start; ++buf) {
|
||||
crc = _mm_crc32_u8(crc, *(const uint8_t*)buf);
|
||||
crc = _mm_crc32_u8(crc, *(const u8*)buf);
|
||||
}
|
||||
if (n_blocks) {
|
||||
length -= kernel_length;
|
||||
@@ -161,17 +184,34 @@ uint32_t crc32c(uint32_t crc, const char* buf, size_t length) {
|
||||
crc = _mm_crc32_u64(crc, *(const uint64_t*)buf);
|
||||
}
|
||||
for (; length; --length, ++buf) {
|
||||
crc = _mm_crc32_u8(crc, *(const uint8_t*)buf);
|
||||
crc = _mm_crc32_u8(crc, *(const u8*)buf);
|
||||
}
|
||||
return ~crc; // post-invert -- distinguish scaled multiples
|
||||
}
|
||||
|
||||
#include "iscsi.hpp"
|
||||
#ifdef __clang__
|
||||
__attribute__((no_sanitize("integer")))
|
||||
#endif
|
||||
#ifdef __KERNEL__
|
||||
__attribute__((target("crc32")))
|
||||
#endif
|
||||
kernel_static u32 crc32c_simple(u32 crc, const char* buf, size_t length) {
|
||||
crc = ~crc; // preset to -1 (distinguish leading zeros)
|
||||
for (; length >= 8; length -= 8, buf += 8) {
|
||||
crc = _mm_crc32_u64(crc, *(const uint64_t*)buf);
|
||||
}
|
||||
for (; length; --length, ++buf) {
|
||||
crc = _mm_crc32_u8(crc, *(const u8*)buf);
|
||||
}
|
||||
return ~crc; // post-invert -- distinguish scaled multiples
|
||||
}
|
||||
|
||||
#include "iscsi.h"
|
||||
|
||||
// In the comments below, multiplication is meant to be modulo ISCSI_POLY.
|
||||
|
||||
// Stores x^2^0, ..., x^2^31. Can be generated with `mult_mod_p`, see tables.cpp.
|
||||
static uint32_t CRC_POWER_TABLE[31] = {
|
||||
static u32 CRC_POWER_TABLE[31] = {
|
||||
0x40000000, 0x20000000, 0x08000000, 0x00800000, 0x00008000,
|
||||
0x82f63b78, 0x6ea2d55c, 0x18b8ea18, 0x510ac59a, 0xb82be955,
|
||||
0xb8fdb1e7, 0x88e56f72, 0x74c360a4, 0xe4172b16, 0x0d65762a,
|
||||
@@ -182,7 +222,7 @@ static uint32_t CRC_POWER_TABLE[31] = {
|
||||
};
|
||||
|
||||
// Stores x^-(2^0), ..., x^-(2^31). Can be generated with `mult_mod_p`, see tables.cpp.
|
||||
static uint32_t CRC_INVERSE_POWER_TABLE[31] = {
|
||||
static u32 CRC_INVERSE_POWER_TABLE[31] = {
|
||||
0x05ec76f1, 0x0bd8ede2, 0x2f63b788, 0xfde39562, 0xbef0965e,
|
||||
0xd610d67e, 0xe67cce65, 0xa268b79e, 0x134fb088, 0x32998d96,
|
||||
0xcedac2cc, 0x70118575, 0x0e004a40, 0xa7864c8b, 0xbc7be916,
|
||||
@@ -193,7 +233,7 @@ static uint32_t CRC_INVERSE_POWER_TABLE[31] = {
|
||||
};
|
||||
|
||||
// Return x^(n * 2^k), or in other words, the factor to use to extend a CRC with zeros.
|
||||
static uint32_t x2n_mod_p(size_t n, uint32_t k) {
|
||||
static u32 x2n_mod_p(size_t n, u32 k) {
|
||||
// CRC_POWER_TABLE has all powers of two can multiply combinations
|
||||
// of these to achieve any power.
|
||||
|
||||
@@ -210,7 +250,7 @@ static uint32_t x2n_mod_p(size_t n, uint32_t k) {
|
||||
// We walk along the p_is above and keep adding to the result.
|
||||
//
|
||||
// Note that 2^2^(31 + k) = 2^2^k TODO clarify
|
||||
uint32_t p = 1u << 31;
|
||||
u32 p = 1u << 31;
|
||||
for (; n != 0; n >>= 1, k++) {
|
||||
if (n & 1) {
|
||||
// p(x) = p(x) * 2^(k % 31)
|
||||
@@ -221,8 +261,8 @@ static uint32_t x2n_mod_p(size_t n, uint32_t k) {
|
||||
}
|
||||
|
||||
// Return x^-(n * 2^k), or in other words, the factor to use to extend a CRC with zeros.
|
||||
static uint32_t x2n_mod_p_inv(size_t n, uint32_t k) {
|
||||
uint32_t p = 1u << 31;
|
||||
static u32 x2n_mod_p_inv(size_t n, u32 k) {
|
||||
u32 p = 1u << 31;
|
||||
for (; n != 0; n >>= 1, k++) {
|
||||
if (n & 1) {
|
||||
p = crc32c_mult_mod_p(CRC_INVERSE_POWER_TABLE[k & 0x1F], p);
|
||||
@@ -231,7 +271,7 @@ static uint32_t x2n_mod_p_inv(size_t n, uint32_t k) {
|
||||
return p;
|
||||
}
|
||||
|
||||
uint32_t crc32c_zero_extend(uint32_t crc, ssize_t zeros) {
|
||||
kernel_static u32 crc32c_zero_extend(u32 crc, ssize_t zeros) {
|
||||
if (zeros > 0) {
|
||||
return ~crc32c_mult_mod_p(x2n_mod_p(zeros, 3), ~crc);
|
||||
} else {
|
||||
@@ -239,17 +279,17 @@ uint32_t crc32c_zero_extend(uint32_t crc, ssize_t zeros) {
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t crc32c_append(uint32_t crc_a, uint32_t crc_b, size_t len_b) {
|
||||
kernel_static u32 crc32c_append(u32 crc_a, u32 crc_b, size_t len_b) {
|
||||
// We need to extend crc_a with len_b*8 zeros (len_b is the number
|
||||
// of bytes) (without the inversion).
|
||||
// This amounts to performing `crc_a * x^(len_b*8)`.
|
||||
return crc32c_mult_mod_p(x2n_mod_p(len_b, 3), crc_a) ^ crc_b;
|
||||
}
|
||||
|
||||
uint32_t crc32c_xor(uint32_t crc_a, uint32_t crc_b, size_t len) {
|
||||
kernel_static u32 crc32c_xor(u32 crc_a, u32 crc_b, size_t len) {
|
||||
// We need to to extend crc_a with the crc of len*8 bits.
|
||||
// We could do this in 32 steps rather than 32+32 with a dedicated
|
||||
// table, but probably doesn't matter.
|
||||
uint32_t crc_0 = ~crc32c_mult_mod_p(~(uint32_t)0, x2n_mod_p(len, 3));
|
||||
u32 crc_0 = ~crc32c_mult_mod_p(~(u32)0, x2n_mod_p(len, 3));
|
||||
return crc_a ^ crc_b ^ crc_0;
|
||||
}
|
||||
}
|
||||
24
cpp/crc32c/iscsi.h
Normal file
24
cpp/crc32c/iscsi.h
Normal file
@@ -0,0 +1,24 @@
|
||||
#define ISCSI_POLY 0x82F63B78u
|
||||
|
||||
// Return a(x) multiplied by b(x) modulo ISCSI_POLY, For speed, this requires
|
||||
// that a(x) not be zero.
|
||||
static u32 crc32c_mult_mod_p(u32 a, u32 b) {
|
||||
// m goes from x^0 to x^31
|
||||
u32 m = 1u << 31;
|
||||
u32 p = 0;
|
||||
for (;;) {
|
||||
// If a(x) contains x^n, add b(x)*x^n
|
||||
if (a & m) {
|
||||
p ^= b;
|
||||
// Exit when there are no higher bits in a(x)
|
||||
if ((a & (m - 1)) == 0) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
// Go from x^n to x^(n+1)
|
||||
m >>= 1;
|
||||
// Go from b(x)*x^n to b(x)*x^(n+1)
|
||||
b = (b & 1) ? ((b >> 1) ^ ISCSI_POLY) : b >> 1;
|
||||
}
|
||||
return p;
|
||||
}
|
||||
@@ -30,6 +30,7 @@ int main() {
|
||||
for (int i = 0; i < 1000; i++) {
|
||||
auto s1 = randString(1 + wyhash64(&rand)%100);
|
||||
uint32_t crc1 = crc32c(0, (const char*)s1.data(), s1.size());
|
||||
ASSERT(crc1 == crc32c_append(0, crc1, s1.size()));
|
||||
auto s2 = randString(1 + wyhash64(&rand)%100);
|
||||
uint32_t crc2 = crc32c(0, (const char*)s2.data(), s2.size());
|
||||
std::vector<uint8_t> s = s1;
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
add_library(rs rs.h rs.cpp gf.hpp gf_tables.cpp)
|
||||
add_library(rs rs.h rs.cpp gf_tables.c)
|
||||
|
||||
add_executable(rs-tests tests.cpp)
|
||||
target_link_libraries(rs-tests PRIVATE rs)
|
||||
|
||||
110
cpp/rs/gf.hpp
110
cpp/rs/gf.hpp
@@ -1,110 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <immintrin.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
|
||||
extern const uint8_t rs_gf_inv_table[256];
|
||||
extern const uint8_t rs_gf_log_table[256];
|
||||
extern const uint8_t rs_gf_exp_table[256];
|
||||
|
||||
inline uint8_t gf_inv(uint8_t x) {
|
||||
return rs_gf_inv_table[x];
|
||||
}
|
||||
|
||||
inline uint8_t gf_mul(uint8_t x, uint8_t y) {
|
||||
if (x == 0 || y == 0) {
|
||||
return 0;
|
||||
}
|
||||
int i = rs_gf_log_table[x] + rs_gf_log_table[y];
|
||||
return rs_gf_exp_table[i > 254 ? i - 255 : i];
|
||||
}
|
||||
|
||||
inline void gf_mul_expand_factor(uint8_t x, uint8_t* expanded_x) {
|
||||
for (int i = 0; i < 16; i++) {
|
||||
expanded_x[i] = gf_mul(i, x);
|
||||
}
|
||||
for (int i = 0; i < 16; i++) {
|
||||
expanded_x[16 + i] = gf_mul(i << 4, x);
|
||||
}
|
||||
}
|
||||
|
||||
inline uint8_t gf_mul_expanded(uint8_t x, const uint8_t* expanded_y) {
|
||||
return expanded_y[x & 0x0f] ^ expanded_y[16 + ((x & 0xf0) >> 4)];
|
||||
}
|
||||
|
||||
inline __m256i gf_mul_expanded_avx2(__m256i x, __m256i expanded_y, __m256i low_nibble_mask) {
|
||||
__m256i expanded_y_lo = _mm256_permute2x128_si256(expanded_y, expanded_y, 0x00);
|
||||
__m256i expanded_y_hi = _mm256_permute2x128_si256(expanded_y, expanded_y, 0x11);
|
||||
|
||||
__m256i x_lo = _mm256_and_si256(x, low_nibble_mask);
|
||||
__m256i x_hi = _mm256_and_si256(_mm256_srli_epi16(x, 4), low_nibble_mask);
|
||||
|
||||
return _mm256_xor_si256(
|
||||
_mm256_shuffle_epi8(expanded_y_lo, x_lo),
|
||||
_mm256_shuffle_epi8(expanded_y_hi, x_hi)
|
||||
);
|
||||
}
|
||||
|
||||
// From <https://github.com/intel/isa-l/blob/33a2d9484595c2d6516c920ce39a694c144ddf69/erasure_code/ec_base.c#L110>,
|
||||
// just Gaussian elimination.
|
||||
//
|
||||
// TODO this is in row-major, it'd be nice to have it in column-major
|
||||
// to save have the final operation to be more natural in rs_recover.
|
||||
__attribute__((noinline))
|
||||
static bool rs_gf_invert_matrix(uint8_t* in_mat, uint8_t* out_mat, const int n) {
|
||||
int i, j, k;
|
||||
uint8_t temp;
|
||||
|
||||
// Set out_mat[] to the identity matrix
|
||||
memset(out_mat, 0, n*n);
|
||||
for (i = 0; i < n; i++) {
|
||||
out_mat[i * n + i] = 1;
|
||||
}
|
||||
|
||||
// Inverse
|
||||
for (i = 0; i < n; i++) {
|
||||
// Check for 0 in pivot element
|
||||
if (in_mat[i * n + i] == 0) {
|
||||
// Find a row with non-zero in current column and swap
|
||||
for (j = i + 1; j < n; j++) {
|
||||
if (in_mat[j * n + i]) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (j == n) { // Couldn't find means it's singular
|
||||
return false;
|
||||
}
|
||||
|
||||
for (k = 0; k < n; k++) { // Swap rows i,j
|
||||
temp = in_mat[i * n + k];
|
||||
in_mat[i * n + k] = in_mat[j * n + k];
|
||||
in_mat[j * n + k] = temp;
|
||||
|
||||
temp = out_mat[i * n + k];
|
||||
out_mat[i * n + k] = out_mat[j * n + k];
|
||||
out_mat[j * n + k] = temp;
|
||||
}
|
||||
}
|
||||
|
||||
temp = gf_inv(in_mat[i * n + i]); // 1/pivot
|
||||
for (j = 0; j < n; j++) { // Scale row i by 1/pivot
|
||||
in_mat[i * n + j] = gf_mul(in_mat[i * n + j], temp);
|
||||
out_mat[i * n + j] = gf_mul(out_mat[i * n + j], temp);
|
||||
}
|
||||
|
||||
for (j = 0; j < n; j++) {
|
||||
if (j == i) {
|
||||
continue;
|
||||
}
|
||||
|
||||
temp = in_mat[j * n + i];
|
||||
for (k = 0; k < n; k++) {
|
||||
out_mat[j * n + k] ^= gf_mul(temp, out_mat[i * n + k]);
|
||||
in_mat[j * n + k] ^= gf_mul(temp, in_mat[i * n + k]);
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
@@ -1,7 +1,9 @@
|
||||
// generated with gf_tables.py
|
||||
#ifndef __KERNEL__
|
||||
#include <stdint.h>
|
||||
#endif
|
||||
|
||||
extern const uint8_t rs_gf_exp_table[256] = {
|
||||
const uint8_t rs_gf_exp_table[256] = {
|
||||
0x01, 0x03, 0x05, 0x0f, 0x11, 0x33, 0x55, 0xff, 0x1a, 0x2e, 0x72, 0x96, 0xa1, 0xf8, 0x13, 0x35,
|
||||
0x5f, 0xe1, 0x38, 0x48, 0xd8, 0x73, 0x95, 0xa4, 0xf7, 0x02, 0x06, 0x0a, 0x1e, 0x22, 0x66, 0xaa,
|
||||
0xe5, 0x34, 0x5c, 0xe4, 0x37, 0x59, 0xeb, 0x26, 0x6a, 0xbe, 0xd9, 0x70, 0x90, 0xab, 0xe6, 0x31,
|
||||
@@ -20,7 +22,7 @@ extern const uint8_t rs_gf_exp_table[256] = {
|
||||
0x39, 0x4b, 0xdd, 0x7c, 0x84, 0x97, 0xa2, 0xfd, 0x1c, 0x24, 0x6c, 0xb4, 0xc7, 0x52, 0xf6, 0x01,
|
||||
};
|
||||
|
||||
extern const uint8_t rs_gf_log_table[256] = {
|
||||
const uint8_t rs_gf_log_table[256] = {
|
||||
0x00, 0xff, 0x19, 0x01, 0x32, 0x02, 0x1a, 0xc6, 0x4b, 0xc7, 0x1b, 0x68, 0x33, 0xee, 0xdf, 0x03,
|
||||
0x64, 0x04, 0xe0, 0x0e, 0x34, 0x8d, 0x81, 0xef, 0x4c, 0x71, 0x08, 0xc8, 0xf8, 0x69, 0x1c, 0xc1,
|
||||
0x7d, 0xc2, 0x1d, 0xb5, 0xf9, 0xb9, 0x27, 0x6a, 0x4d, 0xe4, 0xa6, 0x72, 0x9a, 0xc9, 0x09, 0x78,
|
||||
@@ -39,7 +41,7 @@ extern const uint8_t rs_gf_log_table[256] = {
|
||||
0x67, 0x4a, 0xed, 0xde, 0xc5, 0x31, 0xfe, 0x18, 0x0d, 0x63, 0x8c, 0x80, 0xc0, 0xf7, 0x70, 0x07,
|
||||
};
|
||||
|
||||
extern const uint8_t rs_gf_inv_table[256] = {
|
||||
const uint8_t rs_gf_inv_table[256] = {
|
||||
0x00, 0x01, 0x8d, 0xf6, 0xcb, 0x52, 0x7b, 0xd1, 0xe8, 0x4f, 0x29, 0xc0, 0xb0, 0xe1, 0xe5, 0xc7,
|
||||
0x74, 0xb4, 0xaa, 0x4b, 0x99, 0x2b, 0x60, 0x5f, 0x58, 0x3f, 0xfd, 0xcc, 0xff, 0x40, 0xee, 0xb2,
|
||||
0x3a, 0x6e, 0x5a, 0xf1, 0x55, 0x4d, 0xa8, 0xc9, 0xc1, 0x0a, 0x98, 0x15, 0x30, 0x44, 0xa2, 0xc2,
|
||||
413
cpp/rs/rs.cpp
413
cpp/rs/rs.cpp
@@ -6,108 +6,10 @@
|
||||
#include <array>
|
||||
|
||||
#include "rs.h"
|
||||
#include "gf.hpp"
|
||||
|
||||
#define die(...) do { fprintf(stderr, __VA_ARGS__); raise(SIGABRT); } while(false)
|
||||
|
||||
static void* malloc_or_die(size_t size, const char* what) {
|
||||
void* ptr = malloc(size);
|
||||
if (ptr == nullptr) {
|
||||
die(what);
|
||||
}
|
||||
return ptr;
|
||||
}
|
||||
|
||||
struct rs {
|
||||
uint8_t parity;
|
||||
// uint8_t[D*B], in column-major.
|
||||
uint8_t* matrix;
|
||||
// uint8_t[D*P][32], in column-major. These are the lookup tables
|
||||
// to perform multiplication quickly.
|
||||
uint8_t* expanded_matrix;
|
||||
};
|
||||
|
||||
static struct rs* rs_cached[256] = {
|
||||
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
|
||||
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
|
||||
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
|
||||
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
|
||||
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
|
||||
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
|
||||
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
|
||||
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
|
||||
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
|
||||
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
|
||||
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
|
||||
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
|
||||
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
|
||||
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
|
||||
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
|
||||
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
|
||||
};
|
||||
|
||||
// Note that we pervasively assume that the first column of the parity columns
|
||||
// is 1s, which causes the first parity to be the XORs of the data. So you can't
|
||||
// really change how the matrix is generated.
|
||||
static void rs_cauchy_matrix(struct rs* r) {
|
||||
int D = rs_data_blocks(r->parity);
|
||||
int B = rs_blocks(r->parity);
|
||||
uint8_t* matrix = r->matrix;
|
||||
memset(matrix, 0, D*B);
|
||||
// Identity in the d*d upper half
|
||||
for (int i = 0; i < D; i++) {
|
||||
matrix[D*i + i] = 1;
|
||||
}
|
||||
// Fill in the rest using cauchy
|
||||
for (int col = D; col < B; col++) {
|
||||
for (int row = 0; row < D; row++) {
|
||||
matrix[col*D + row] = gf_inv(col ^ row);
|
||||
}
|
||||
}
|
||||
// Scale the columns
|
||||
for (int col = D; col < B; col++) {
|
||||
uint8_t factor = gf_inv(matrix[col*D]);
|
||||
for (int row = 0; row < D; row++) {
|
||||
matrix[col*D + row] = gf_mul(matrix[col*D + row], factor);
|
||||
}
|
||||
}
|
||||
// Scale the rows
|
||||
for (int row = 1; row < D; row++) {
|
||||
uint8_t factor = gf_inv(matrix[D*D + row]);
|
||||
for (int col = D; col < B; col++) {
|
||||
matrix[col*D + row] = gf_mul(matrix[col*D + row], factor);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static struct rs* rs_new(uint8_t parity) {
|
||||
int B = rs_blocks(parity);
|
||||
int D = rs_data_blocks(parity);
|
||||
int P = rs_parity_blocks(parity);
|
||||
struct rs* r = (struct rs*)malloc_or_die(sizeof(struct rs) + B*D + D*P*32, "rs_new\n");
|
||||
r->parity = parity;
|
||||
r->matrix = (uint8_t*)(r + 1);
|
||||
r->expanded_matrix = r->matrix + B*D;
|
||||
rs_cauchy_matrix(r);
|
||||
for (int p = 0; p < P; p++) {
|
||||
for (int d = 0; d < D; d++) {
|
||||
gf_mul_expand_factor(r->matrix[D*D + D*p + d], &r->expanded_matrix[D*32*p + 32*d]);
|
||||
}
|
||||
}
|
||||
return r;
|
||||
}
|
||||
|
||||
static void rs_delete(struct rs* r) {
|
||||
free(r);
|
||||
}
|
||||
|
||||
static std::array<uint32_t, 4> rs_cpuidex(uint32_t function_id, uint32_t subfunction_id) {
|
||||
uint32_t a, b, c, d;
|
||||
__asm("cpuid":"=a"(a),"=b"(b),"=c"(c),"=d"(d):"0"(function_id),"2"(subfunction_id));
|
||||
return {a, b, c, d};
|
||||
}
|
||||
|
||||
static uint8_t rs_chosen_cpu_level = RS_CPU_SCALAR;
|
||||
#define rs_malloc malloc
|
||||
#define rs_free free
|
||||
|
||||
// See `valgrind.h`
|
||||
static uint64_t rs_valgrind_client_request(uint64_t defaultResult, uint64_t reqID, uint64_t arg1, uint64_t arg2, uint64_t arg3, uint64_t arg4, uint64_t arg5) {
|
||||
@@ -129,20 +31,48 @@ static bool rs_detect_valgrind() {
|
||||
return rs_valgrind_client_request(0, 0x1001, 0, 0, 0, 0, 0);
|
||||
}
|
||||
|
||||
bool rs_has_cpu_level(rs_cpu_level level) {
|
||||
const auto cpuid7 = (rs_cpuidex(0, 0)[0] >= 7) ? rs_cpuidex(7, 0) : std::array<uint32_t, 4>{0, 0, 0, 0};
|
||||
switch (level) {
|
||||
case RS_CPU_SCALAR:
|
||||
return true;
|
||||
case RS_CPU_AVX2:
|
||||
return cpuid7[1] & (1<<5);
|
||||
case RS_CPU_GFNI:
|
||||
return cpuid7[2] & (1<<8) && !rs_detect_valgrind();
|
||||
default:
|
||||
die("bad CPU level %d\n", level);
|
||||
}
|
||||
// This will emit vbroadcastb
|
||||
__attribute__((no_sanitize("integer")))
|
||||
static inline __m256i broadcast_u8(uint8_t x) {
|
||||
return _mm256_set_epi8(
|
||||
x, x, x, x, x, x, x, x,
|
||||
x, x, x, x, x, x, x, x,
|
||||
x, x, x, x, x, x, x, x,
|
||||
x, x, x, x, x, x, x, x
|
||||
);
|
||||
}
|
||||
|
||||
#include "rs_core.c"
|
||||
|
||||
uint8_t rs_parity(struct rs* r) {
|
||||
return r->parity;
|
||||
}
|
||||
|
||||
static struct rs* rs_cached[256] = {
|
||||
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
|
||||
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
|
||||
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
|
||||
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
|
||||
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
|
||||
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
|
||||
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
|
||||
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
|
||||
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
|
||||
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
|
||||
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
|
||||
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
|
||||
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
|
||||
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
|
||||
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
|
||||
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
|
||||
};
|
||||
|
||||
bool rs_has_cpu_level(rs_cpu_level level) {
|
||||
return rs_has_cpu_level_core(level);
|
||||
}
|
||||
|
||||
static uint8_t rs_chosen_cpu_level = RS_CPU_SCALAR;
|
||||
|
||||
__attribute__((constructor))
|
||||
void rs_detect_cpu_level() {
|
||||
if (rs_has_cpu_level(RS_CPU_GFNI)) {
|
||||
@@ -172,124 +102,46 @@ struct rs* rs_get(uint8_t parity) {
|
||||
}
|
||||
struct rs* r = __atomic_load_n(&rs_cached[parity], __ATOMIC_RELAXED);
|
||||
if (__builtin_expect(r == nullptr, 0)) {
|
||||
r = rs_new(parity);
|
||||
r = rs_new_core(parity);
|
||||
if (r == nullptr) {
|
||||
die("could not allocate RS data");
|
||||
}
|
||||
struct rs* expected = nullptr;
|
||||
if (!__atomic_compare_exchange_n(&rs_cached[parity], &expected, r, false, __ATOMIC_RELAXED, __ATOMIC_RELAXED)) {
|
||||
// somebody else got to it first
|
||||
rs_delete(r);
|
||||
rs_delete_core(r);
|
||||
r = __atomic_load_n(&rs_cached[parity], __ATOMIC_RELAXED);
|
||||
}
|
||||
}
|
||||
return r;
|
||||
}
|
||||
|
||||
uint8_t rs_parity(struct rs* r) {
|
||||
return r->parity;
|
||||
template<int D, int P> __attribute__((noinline))
|
||||
static void rs_compute_parity_scalar_tmpl(struct rs* r, uint64_t size, const uint8_t** data, uint8_t** parity) {
|
||||
rs_compute_parity_scalar(D, P, r, size, data, parity);
|
||||
}
|
||||
|
||||
// This will emit vbroadcastb
|
||||
__attribute__((no_sanitize("integer")))
|
||||
inline __m256i broadcast_u8(uint8_t x) {
|
||||
return _mm256_set_epi8(
|
||||
x, x, x, x, x, x, x, x,
|
||||
x, x, x, x, x, x, x, x,
|
||||
x, x, x, x, x, x, x, x,
|
||||
x, x, x, x, x, x, x, x
|
||||
);
|
||||
template<int D, int P> __attribute__((noinline))
|
||||
static void rs_compute_parity_avx2_tmpl(struct rs* r, uint64_t size, const uint8_t** data, uint8_t** parity) {
|
||||
rs_compute_parity_avx2(D, P, r, size, data, parity);
|
||||
}
|
||||
|
||||
template<int D, int P>
|
||||
static void rs_compute_parity_single(struct rs* r, uint64_t i, const uint8_t** data, uint8_t** parity) {
|
||||
parity[0][i] = 0;
|
||||
for (int d = 0; d < D; d++) {
|
||||
parity[0][i] ^= data[d][i];
|
||||
}
|
||||
for (int p = 1; p < P; p++) {
|
||||
const uint8_t* factor = &r->expanded_matrix[D*32*p];
|
||||
parity[p][i] = 0;
|
||||
for (int d = 0; d < D; d++, factor += 32) {
|
||||
parity[p][i] ^= gf_mul_expanded(data[d][i], factor);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<int D, int P>
|
||||
__attribute__((noinline))
|
||||
void rs_compute_parity_scalar(struct rs* r, uint64_t size, const uint8_t** data, uint8_t** parity) {
|
||||
// parity = r->matrix * data
|
||||
for (uint64_t i = 0; i < size; i++) {
|
||||
rs_compute_parity_single<D, P>(r, i, data, parity);
|
||||
}
|
||||
}
|
||||
|
||||
template<int D, int P>
|
||||
__attribute__((noinline))
|
||||
void rs_compute_parity_avx2(struct rs* r, uint64_t size, const uint8_t** data, uint8_t** parity) {
|
||||
__m256i low_nibble_mask = broadcast_u8(0x0f);
|
||||
size_t avx_leftover = size % 32;
|
||||
size_t avx_size = size-avx_leftover;
|
||||
for (uint64_t i = 0; i < avx_size; i += 32) {
|
||||
{
|
||||
__m256i parity_0 = _mm256_setzero_si256();
|
||||
for (int d = 0; d < D; d++) {
|
||||
parity_0 = _mm256_xor_si256(parity_0, _mm256_loadu_si256((const __m256i*)(data[d] + i)));
|
||||
}
|
||||
_mm256_storeu_si256((__m256i*)(parity[0] + i), parity_0);
|
||||
}
|
||||
for (int p = 1; p < P; p++) {
|
||||
__m256i parity_p = _mm256_setzero_si256();
|
||||
for (int d = 0; d < D; d++) {
|
||||
__m256i data_d = _mm256_loadu_si256((const __m256i*)(data[d] + i));
|
||||
__m256i factor = _mm256_loadu_si256((const __m256i*)&r->expanded_matrix[D*p*32 + 32*d]);
|
||||
parity_p = _mm256_xor_si256(parity_p, gf_mul_expanded_avx2(data_d, factor, low_nibble_mask));
|
||||
}
|
||||
_mm256_storeu_si256((__m256i*)(parity[p] + i), parity_p);
|
||||
}
|
||||
}
|
||||
for (uint64_t i = avx_size; i < size; i++) {
|
||||
rs_compute_parity_single<D, P>(r, i, data, parity);
|
||||
}
|
||||
}
|
||||
|
||||
template<int D, int P>
|
||||
__attribute__((noinline))
|
||||
void rs_compute_parity_gfni(struct rs* r, uint64_t size, const uint8_t** data, uint8_t** parity) {
|
||||
size_t avx_leftover = size % 32;
|
||||
size_t avx_size = size-avx_leftover;
|
||||
for (uint64_t i = 0; i < avx_size; i += 32) {
|
||||
{
|
||||
__m256i parity_0 = _mm256_setzero_si256();
|
||||
for (int d = 0; d < D; d++) {
|
||||
parity_0 = _mm256_xor_si256(parity_0, _mm256_loadu_si256((const __m256i*)(data[d] + i)));
|
||||
}
|
||||
_mm256_storeu_si256((__m256i*)(parity[0] + i), parity_0);
|
||||
}
|
||||
for (int p = 1; p < P; p++) {
|
||||
__m256i parity_p = _mm256_setzero_si256();
|
||||
for (int d = 0; d < D; d++) {
|
||||
__m256i data_d = _mm256_loadu_si256((const __m256i*)(data[d] + i));
|
||||
__m256i factor = broadcast_u8(r->matrix[D*D + D*p + d]);
|
||||
parity_p = _mm256_xor_si256(parity_p, _mm256_gf2p8mul_epi8(data_d, factor));
|
||||
}
|
||||
_mm256_storeu_si256((__m256i*)(parity[p] + i), parity_p);
|
||||
}
|
||||
}
|
||||
for (uint64_t i = avx_size; i < size; i++) {
|
||||
rs_compute_parity_single<D, P>(r, i, data, parity);
|
||||
}
|
||||
template<int D, int P> __attribute__((noinline))
|
||||
static void rs_compute_parity_gfni_tmpl(struct rs* r, uint64_t size, const uint8_t** data, uint8_t** parity) {
|
||||
rs_compute_parity_gfni(D, P, r, size, data, parity);
|
||||
}
|
||||
|
||||
template<int D, int P>
|
||||
static void rs_compute_parity_tmpl(struct rs* r, uint64_t size, const uint8_t** data, uint8_t** parity) {
|
||||
switch (rs_cpu_level l = rs_get_cpu_level()) {
|
||||
case RS_CPU_SCALAR:
|
||||
rs_compute_parity_scalar<D, P>(r, size, data, parity);
|
||||
rs_compute_parity_scalar_tmpl<D, P>(r, size, data, parity);
|
||||
break;
|
||||
case RS_CPU_AVX2:
|
||||
rs_compute_parity_avx2<D, P>(r, size, data, parity);
|
||||
rs_compute_parity_avx2_tmpl<D, P>(r, size, data, parity);
|
||||
break;
|
||||
case RS_CPU_GFNI:
|
||||
rs_compute_parity_gfni<D, P>(r, size, data, parity);
|
||||
rs_compute_parity_gfni_tmpl<D, P>(r, size, data, parity);
|
||||
break;
|
||||
default:
|
||||
die("bad cpu_level %d\n", l);
|
||||
@@ -301,101 +153,32 @@ void rs_compute_parity(struct rs* r, uint64_t size, const uint8_t** data, uint8_
|
||||
rs_compute_parity_funcs[r->parity](r, size, data, parity);
|
||||
}
|
||||
|
||||
template<int D>
|
||||
static void rs_recover_matmul_single(uint64_t i, const uint8_t** have, uint8_t* want, const uint8_t* have_to_want) {
|
||||
want[i] = 0;
|
||||
for (int j = 0; j < D; j++) {
|
||||
want[i] ^= gf_mul(have_to_want[j], have[j][i]);
|
||||
}
|
||||
template<int D> __attribute__((noinline))
|
||||
static void rs_recover_matmul_scalar_tmpl(uint64_t size, const uint8_t** have, uint8_t* want, const uint8_t* mat) {
|
||||
rs_recover_matmul_scalar(D, size, have, want, mat);
|
||||
}
|
||||
|
||||
template<int D>
|
||||
static void rs_recover_matmul_single_expanded(uint64_t i, const uint8_t** have, uint8_t* want, const uint8_t* have_to_want_expanded) {
|
||||
want[i] = 0;
|
||||
for (int j = 0; j < D; j++) {
|
||||
want[i] ^= gf_mul_expanded(have[j][i], &have_to_want_expanded[j*32]);
|
||||
}
|
||||
template<int D> __attribute__((noinline))
|
||||
static void rs_recover_matmul_avx2_tmpl(uint64_t size, const uint8_t** have, uint8_t* want, const uint8_t* mat) {
|
||||
rs_recover_matmul_avx2(D, size, have, want, mat);
|
||||
}
|
||||
|
||||
template<int D>
|
||||
__attribute__((noinline))
|
||||
static void rs_recover_matmul_scalar(uint64_t size, const uint8_t** have, uint8_t* want, const uint8_t* have_to_want) {
|
||||
uint8_t have_to_want_expanded[D*32];
|
||||
for (int i = 0; i < D; i++) {
|
||||
gf_mul_expand_factor(have_to_want[i], &have_to_want_expanded[i*32]);
|
||||
}
|
||||
for (size_t i = 0; i < size; i++) {
|
||||
rs_recover_matmul_single_expanded<D>(i, have, want, have_to_want_expanded);
|
||||
}
|
||||
}
|
||||
|
||||
template<int D>
|
||||
__attribute__((noinline))
|
||||
static void rs_recover_matmul_avx2(uint64_t size, const uint8_t** have, uint8_t* want, const uint8_t* have_to_want) {
|
||||
__m256i have_to_want_expanded[D];
|
||||
for (int i = 0; i < D; i++) {
|
||||
gf_mul_expand_factor(have_to_want[i], (uint8_t*)&have_to_want_expanded[i]);
|
||||
}
|
||||
__m256i low_nibble_mask = broadcast_u8(0x0f);
|
||||
size_t avx_leftover = size % 32;
|
||||
size_t avx_size = size-avx_leftover;
|
||||
for (uint64_t i = 0; i < avx_size; i += 32) {
|
||||
__m256i want_i = _mm256_setzero_si256();
|
||||
for (int d = 0; d < D; d++) {
|
||||
want_i = _mm256_xor_si256(
|
||||
want_i,
|
||||
gf_mul_expanded_avx2(
|
||||
_mm256_loadu_si256((const __m256i*)(have[d] + i)),
|
||||
have_to_want_expanded[d],
|
||||
low_nibble_mask
|
||||
)
|
||||
);
|
||||
}
|
||||
_mm256_storeu_si256((__m256i*)(want + i), want_i);
|
||||
}
|
||||
for (uint64_t i = avx_size; i < size; i++) {
|
||||
rs_recover_matmul_single_expanded<D>(i, have, want, (const uint8_t*)have_to_want_expanded);
|
||||
}
|
||||
}
|
||||
|
||||
template<int D>
|
||||
__attribute__((noinline))
|
||||
static void rs_recover_matmul_gfni(uint64_t size, const uint8_t** have, uint8_t* want, const uint8_t* have_to_want) {
|
||||
__m256i have_to_want_avx[D];
|
||||
for (int i = 0; i < D; i++) {
|
||||
have_to_want_avx[i] = broadcast_u8(have_to_want[i]);
|
||||
}
|
||||
size_t avx_leftover = size % 32;
|
||||
size_t avx_size = size-avx_leftover;
|
||||
for (uint64_t i = 0; i < avx_size; i += 32) {
|
||||
__m256i want_i = _mm256_setzero_si256();
|
||||
for (int d = 0; d < D; d++) {
|
||||
want_i = _mm256_xor_si256(
|
||||
want_i,
|
||||
_mm256_gf2p8mul_epi8(
|
||||
_mm256_loadu_si256((const __m256i*)(have[d] + i)),
|
||||
have_to_want_avx[d]
|
||||
)
|
||||
);
|
||||
}
|
||||
_mm256_storeu_si256((__m256i*)(want + i), want_i);
|
||||
}
|
||||
for (uint64_t i = avx_size; i < size; i++) {
|
||||
rs_recover_matmul_single<D>(i, have, want, have_to_want);
|
||||
}
|
||||
template<int D> __attribute__((noinline))
|
||||
static void rs_recover_matmul_gfni_tmpl(uint64_t size, const uint8_t** have, uint8_t* want, const uint8_t* mat) {
|
||||
rs_recover_matmul_gfni(D, size, have, want, mat);
|
||||
}
|
||||
|
||||
template<int D>
|
||||
static void rs_recover_matmul_tmpl(uint64_t size, const uint8_t** have, uint8_t* want, const uint8_t* mat) {
|
||||
switch (rs_cpu_level l = rs_get_cpu_level()) {
|
||||
case RS_CPU_SCALAR:
|
||||
rs_recover_matmul_scalar<D>(size, have, want, mat);
|
||||
rs_recover_matmul_scalar_tmpl<D>(size, have, want, mat);
|
||||
break;
|
||||
case RS_CPU_AVX2:
|
||||
rs_recover_matmul_avx2<D>(size, have, want, mat);
|
||||
rs_recover_matmul_avx2_tmpl<D>(size, have, want, mat);
|
||||
break;
|
||||
case RS_CPU_GFNI:
|
||||
rs_recover_matmul_gfni<D>(size, have, want, mat);
|
||||
rs_recover_matmul_gfni_tmpl<D>(size, have, want, mat);
|
||||
break;
|
||||
default:
|
||||
die("bad cpu_level %d\n", l);
|
||||
@@ -403,7 +186,6 @@ static void rs_recover_matmul_tmpl(uint64_t size, const uint8_t** have, uint8_t*
|
||||
}
|
||||
|
||||
static void (*rs_recover_matmul_funcs[16])(uint64_t size, const uint8_t** have, uint8_t* want, const uint8_t* mat);
|
||||
|
||||
void rs_recover(
|
||||
struct rs* r,
|
||||
uint64_t size,
|
||||
@@ -412,55 +194,12 @@ void rs_recover(
|
||||
uint8_t want_block,
|
||||
uint8_t* want
|
||||
) {
|
||||
int D = rs_data_blocks(r->parity);
|
||||
int B = rs_blocks(r->parity);
|
||||
// Create some space
|
||||
uint8_t* scratch = (uint8_t*)malloc(D*D + D*D);
|
||||
uint8_t* mat_1 = scratch;
|
||||
uint8_t* mat_2 = scratch + D*D;
|
||||
// Preliminary checks
|
||||
for (int i = 0; i < D; i++) {
|
||||
if (have_blocks[i] >= B) {
|
||||
die("have_blocks[%d]=%d >= %d\n", i, have_blocks[i], B);
|
||||
rs_recover_core(
|
||||
r, size, have_blocks, have, want_block, want,
|
||||
[](int D, uint64_t size, const uint8_t** have, uint8_t* want, const uint8_t* mat) {
|
||||
rs_recover_matmul_funcs[D](size, have, want, mat);
|
||||
}
|
||||
if (have_blocks[i] == want_block) {
|
||||
die("have_blocks[%d]=%d == want_block=%d\n", i, have_blocks[i], want_block);
|
||||
}
|
||||
if (i > 0 && have_blocks[i] <= have_blocks[i-1]) {
|
||||
die("have_blocks[%d]=%d <= have_blocks[%d-1]=%d\n", i, have_blocks[i], i, have_blocks[i-1]);
|
||||
}
|
||||
}
|
||||
// below in the dimensionality annotation we paper over transposes
|
||||
// [DxD] matrix going from the data blocks to the blocks we currently have
|
||||
uint8_t* data_to_have = mat_1;
|
||||
for (int i = 0, have_cursor = 0; i < B; i++) {
|
||||
if (have_cursor >= D || have_blocks[have_cursor] != i) {
|
||||
continue;
|
||||
}
|
||||
memcpy(data_to_have + have_cursor*D, r->matrix + i*D, D);
|
||||
have_cursor++;
|
||||
}
|
||||
// [DxD] matrix going from what we have to the original data blocks
|
||||
uint8_t* have_to_data = mat_2;
|
||||
if (!rs_gf_invert_matrix(data_to_have, have_to_data, D)) {
|
||||
die("unexpected singular matrix\n");
|
||||
}
|
||||
data_to_have = nullptr;
|
||||
// [Dx1] matrix going from the data blocks to the block we want
|
||||
uint8_t* data_to_want = &r->matrix[want_block*D];
|
||||
// have_to_want = data_to_want * have_to_data
|
||||
// [Dx1] matrix going from `blocks` to the block we're into
|
||||
uint8_t* have_to_want = mat_1;
|
||||
for (int i = 0; i < D; i++) {
|
||||
have_to_want[i] = 0;
|
||||
for (int j = 0; j < D; j++) {
|
||||
have_to_want[i] ^= gf_mul(data_to_want[j], have_to_data[j*D + i]);
|
||||
}
|
||||
}
|
||||
// want = have_to_want * have
|
||||
rs_recover_matmul_funcs[D](size, have, want, have_to_want);
|
||||
// We're done.
|
||||
free(scratch);
|
||||
);
|
||||
}
|
||||
|
||||
__attribute__((constructor))
|
||||
|
||||
451
cpp/rs/rs_core.c
Normal file
451
cpp/rs/rs_core.c
Normal file
@@ -0,0 +1,451 @@
|
||||
#ifndef __KERNEL__
|
||||
|
||||
typedef uint8_t u8;
|
||||
typedef uint32_t u32;
|
||||
typedef uint64_t u64;
|
||||
|
||||
#endif
|
||||
|
||||
extern const u8 rs_gf_inv_table[256];
|
||||
extern const u8 rs_gf_log_table[256];
|
||||
extern const u8 rs_gf_exp_table[256];
|
||||
|
||||
static inline u8 gf_inv(u8 x) {
|
||||
return rs_gf_inv_table[x];
|
||||
}
|
||||
|
||||
static inline u8 gf_mul(u8 x, u8 y) {
|
||||
if (x == 0 || y == 0) {
|
||||
return 0;
|
||||
}
|
||||
int i = rs_gf_log_table[x] + rs_gf_log_table[y];
|
||||
return rs_gf_exp_table[i > 254 ? i - 255 : i];
|
||||
}
|
||||
|
||||
static inline void gf_mul_expand_factor(u8 x, u8* expanded_x) {
|
||||
int i;
|
||||
for (i = 0; i < 16; i++) {
|
||||
expanded_x[i] = gf_mul(i, x);
|
||||
}
|
||||
for (i = 0; i < 16; i++) {
|
||||
expanded_x[16 + i] = gf_mul(i << 4, x);
|
||||
}
|
||||
}
|
||||
|
||||
static inline u8 gf_mul_expanded(u8 x, const u8* expanded_y) {
|
||||
return expanded_y[x & 0x0f] ^ expanded_y[16 + ((x & 0xf0) >> 4)];
|
||||
}
|
||||
|
||||
__attribute__((target("avx2")))
|
||||
static inline __m256i gf_mul_expanded_avx2(__m256i x, __m256i expanded_y, __m256i low_nibble_mask) {
|
||||
__m256i expanded_y_lo = _mm256_permute2x128_si256(expanded_y, expanded_y, 0x00);
|
||||
__m256i expanded_y_hi = _mm256_permute2x128_si256(expanded_y, expanded_y, 0x11);
|
||||
|
||||
__m256i x_lo = _mm256_and_si256(x, low_nibble_mask);
|
||||
__m256i x_hi = _mm256_and_si256(_mm256_srli_epi16(x, 4), low_nibble_mask);
|
||||
|
||||
return _mm256_xor_si256(
|
||||
_mm256_shuffle_epi8(expanded_y_lo, x_lo),
|
||||
_mm256_shuffle_epi8(expanded_y_hi, x_hi)
|
||||
);
|
||||
}
|
||||
|
||||
static inline u8 rs_data_blocks_core(u8 parity) {
|
||||
return parity & 0x0F;
|
||||
}
|
||||
|
||||
static inline u8 rs_parity_blocks_core(u8 parity) {
|
||||
return parity >> 4;
|
||||
}
|
||||
|
||||
static inline u8 rs_blocks_core(u8 parity) {
|
||||
return rs_data_blocks_core(parity) + rs_parity_blocks_core(parity);
|
||||
}
|
||||
|
||||
// From <https://github.com/intel/isa-l/blob/33a2d9484595c2d6516c920ce39a694c144ddf69/erasure_code/ec_base.c#L110>,
|
||||
// just Gaussian elimination.
|
||||
//
|
||||
// TODO this is in row-major, it'd be nice to have it in column-major
|
||||
// to save have the final operation to be more natural in rs_recover.
|
||||
static bool rs_gf_invert_matrix(u8* in_mat, u8* out_mat, const int n) {
|
||||
int i, j, k;
|
||||
u8 temp;
|
||||
|
||||
// Set out_mat[] to the identity matrix
|
||||
memset(out_mat, 0, n*n);
|
||||
for (i = 0; i < n; i++) {
|
||||
out_mat[i * n + i] = 1;
|
||||
}
|
||||
|
||||
// Inverse
|
||||
for (i = 0; i < n; i++) {
|
||||
// Check for 0 in pivot element
|
||||
if (in_mat[i * n + i] == 0) {
|
||||
// Find a row with non-zero in current column and swap
|
||||
for (j = i + 1; j < n; j++) {
|
||||
if (in_mat[j * n + i]) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (j == n) { // Couldn't find means it's singular
|
||||
return false;
|
||||
}
|
||||
|
||||
for (k = 0; k < n; k++) { // Swap rows i,j
|
||||
temp = in_mat[i * n + k];
|
||||
in_mat[i * n + k] = in_mat[j * n + k];
|
||||
in_mat[j * n + k] = temp;
|
||||
|
||||
temp = out_mat[i * n + k];
|
||||
out_mat[i * n + k] = out_mat[j * n + k];
|
||||
out_mat[j * n + k] = temp;
|
||||
}
|
||||
}
|
||||
|
||||
temp = gf_inv(in_mat[i * n + i]); // 1/pivot
|
||||
for (j = 0; j < n; j++) { // Scale row i by 1/pivot
|
||||
in_mat[i * n + j] = gf_mul(in_mat[i * n + j], temp);
|
||||
out_mat[i * n + j] = gf_mul(out_mat[i * n + j], temp);
|
||||
}
|
||||
|
||||
for (j = 0; j < n; j++) {
|
||||
if (j == i) {
|
||||
continue;
|
||||
}
|
||||
|
||||
temp = in_mat[j * n + i];
|
||||
for (k = 0; k < n; k++) {
|
||||
out_mat[j * n + k] ^= gf_mul(temp, out_mat[i * n + k]);
|
||||
in_mat[j * n + k] ^= gf_mul(temp, in_mat[i * n + k]);
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
struct rs {
|
||||
u8 parity;
|
||||
// u8[D*B], in column-major.
|
||||
u8* matrix;
|
||||
// u8[D*P][32], in column-major. These are the lookup tables
|
||||
// to perform multiplication quickly.
|
||||
u8* expanded_matrix;
|
||||
};
|
||||
|
||||
// Note that we pervasively assume that the first column of the parity columns
|
||||
// is 1s, which causes the first parity to be the XORs of the data. So you can't
|
||||
// really change how the matrix is generated.
|
||||
static void rs_cauchy_matrix(struct rs* r) {
|
||||
int D = rs_data_blocks_core(r->parity);
|
||||
int B = rs_blocks_core(r->parity);
|
||||
u8* matrix = r->matrix;
|
||||
memset(matrix, 0, D*B);
|
||||
// Identity in the d*d upper half
|
||||
int i;
|
||||
for (i = 0; i < D; i++) {
|
||||
matrix[D*i + i] = 1;
|
||||
}
|
||||
// Fill in the rest using cauchy
|
||||
int col, row;
|
||||
for (col = D; col < B; col++) {
|
||||
for (row = 0; row < D; row++) {
|
||||
matrix[col*D + row] = gf_inv(col ^ row);
|
||||
}
|
||||
}
|
||||
// Scale the columns
|
||||
for (col = D; col < B; col++) {
|
||||
u8 factor = gf_inv(matrix[col*D]);
|
||||
for (row = 0; row < D; row++) {
|
||||
matrix[col*D + row] = gf_mul(matrix[col*D + row], factor);
|
||||
}
|
||||
}
|
||||
// Scale the rows
|
||||
for (row = 1; row < D; row++) {
|
||||
u8 factor = gf_inv(matrix[D*D + row]);
|
||||
for (col = D; col < B; col++) {
|
||||
matrix[col*D + row] = gf_mul(matrix[col*D + row], factor);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// out must be at leas size 4
|
||||
static void rs_cpuidex(u32 function_id, u32 subfunction_id, u32* out) {
|
||||
u32 a, b, c, d;
|
||||
__asm("cpuid":"=a"(a),"=b"(b),"=c"(c),"=d"(d):"0"(function_id),"2"(subfunction_id));
|
||||
out[0] = a; out[1] = b; out[2] = c; out[3] = d;
|
||||
}
|
||||
|
||||
static bool rs_has_cpu_level_core(enum rs_cpu_level level) {
|
||||
u32 _0_0[4];
|
||||
rs_cpuidex(0, 0, _0_0);
|
||||
u32 _7_0[4];
|
||||
if (_0_0[0] >= 7) {
|
||||
rs_cpuidex(7, 0, _7_0);
|
||||
} else {
|
||||
memset(_7_0, 0, sizeof(_7_0));
|
||||
}
|
||||
switch (level) {
|
||||
case RS_CPU_SCALAR:
|
||||
return true;
|
||||
case RS_CPU_AVX2:
|
||||
return _7_0[1] & (1<<5);
|
||||
case RS_CPU_GFNI:
|
||||
return _7_0[2] & (1<<8) && !rs_detect_valgrind();
|
||||
default:
|
||||
die("bad CPU level %d\n", level);
|
||||
}
|
||||
}
|
||||
|
||||
#define rs_compute_parity_single(D, P, r, i, data, parity) do { \
|
||||
parity[0][i] = 0; \
|
||||
int d; \
|
||||
int p; \
|
||||
for (d = 0; d < D; d++) { \
|
||||
parity[0][i] ^= data[d][i]; \
|
||||
} \
|
||||
for (p = 1; p < P; p++) { \
|
||||
const u8* factor = &r->expanded_matrix[D*32*p]; \
|
||||
parity[p][i] = 0; \
|
||||
for (d = 0; d < D; d++, factor += 32) { \
|
||||
parity[p][i] ^= gf_mul_expanded(data[d][i], factor); \
|
||||
} \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define rs_compute_parity_scalar(D, P, r, size, data, parity) do { \
|
||||
/* parity = r->matrix * data */ \
|
||||
u64 i; \
|
||||
for (i = 0; i < size; i++) { \
|
||||
rs_compute_parity_single(D, P, r, i, data, parity); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define rs_compute_parity_avx2(D, P, r, size, data, parity) do { \
|
||||
__m256i low_nibble_mask = broadcast_u8(0x0f); \
|
||||
size_t avx_leftover = size % 32; \
|
||||
size_t avx_size = size-avx_leftover; \
|
||||
u32 i; \
|
||||
int p; \
|
||||
int d; \
|
||||
for (i = 0; i < avx_size; i += 32) { \
|
||||
{ \
|
||||
__m256i parity_0 = _mm256_setzero_si256(); \
|
||||
for (d = 0; d < D; d++) { \
|
||||
parity_0 = _mm256_xor_si256(parity_0, _mm256_loadu_si256((const __m256i*)(data[d] + i))); \
|
||||
} \
|
||||
_mm256_storeu_si256((__m256i*)(parity[0] + i), parity_0); \
|
||||
} \
|
||||
for (p = 1; p < P; p++) { \
|
||||
__m256i parity_p = _mm256_setzero_si256(); \
|
||||
for (d = 0; d < D; d++) { \
|
||||
__m256i data_d = _mm256_loadu_si256((const __m256i*)(data[d] + i)); \
|
||||
__m256i factor = _mm256_loadu_si256((const __m256i*)&r->expanded_matrix[D*p*32 + 32*d]); \
|
||||
parity_p = _mm256_xor_si256(parity_p, gf_mul_expanded_avx2(data_d, factor, low_nibble_mask)); \
|
||||
} \
|
||||
_mm256_storeu_si256((__m256i*)(parity[p] + i), parity_p); \
|
||||
} \
|
||||
} \
|
||||
for (i = avx_size; i < size; i++) { \
|
||||
rs_compute_parity_single(D, P, r, i, data, parity); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define rs_compute_parity_gfni(D, P, r, size, data, parity) do { \
|
||||
size_t avx_leftover = size % 32; \
|
||||
size_t avx_size = size-avx_leftover; \
|
||||
u64 i; \
|
||||
int p; \
|
||||
int d; \
|
||||
for (i = 0; i < avx_size; i += 32) { \
|
||||
{ \
|
||||
__m256i parity_0 = _mm256_setzero_si256(); \
|
||||
for (d = 0; d < D; d++) { \
|
||||
parity_0 = _mm256_xor_si256(parity_0, _mm256_loadu_si256((const __m256i*)(data[d] + i))); \
|
||||
} \
|
||||
_mm256_storeu_si256((__m256i*)(parity[0] + i), parity_0); \
|
||||
} \
|
||||
for (p = 1; p < P; p++) { \
|
||||
__m256i parity_p = _mm256_setzero_si256(); \
|
||||
for (d = 0; d < D; d++) { \
|
||||
__m256i data_d = _mm256_loadu_si256((const __m256i*)(data[d] + i)); \
|
||||
__m256i factor = broadcast_u8(r->matrix[D*D + D*p + d]); \
|
||||
parity_p = _mm256_xor_si256(parity_p, _mm256_gf2p8mul_epi8(data_d, factor)); \
|
||||
} \
|
||||
_mm256_storeu_si256((__m256i*)(parity[p] + i), parity_p); \
|
||||
} \
|
||||
} \
|
||||
for (i = avx_size; i < size; i++) { \
|
||||
rs_compute_parity_single(D, P, r, i, data, parity); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define rs_recover_matmul_single(D, i, have, want, have_to_want) do { \
|
||||
want[i] = 0; \
|
||||
int j; \
|
||||
for (j = 0; j < D; j++) { \
|
||||
want[i] ^= gf_mul(have_to_want[j], have[j][i]); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define rs_recover_matmul_single_expanded(D, i, have, want, have_to_want_expanded) do { \
|
||||
want[i] = 0; \
|
||||
int j; \
|
||||
for (j = 0; j < D; j++) { \
|
||||
want[i] ^= gf_mul_expanded(have[j][i], &have_to_want_expanded[j*32]); \
|
||||
} \
|
||||
} while(0)
|
||||
|
||||
#define rs_recover_matmul_scalar(D, size, have, want, have_to_want) do { \
|
||||
u8 have_to_want_expanded[D*32]; \
|
||||
int d; \
|
||||
for (d = 0; d < D; d++) { \
|
||||
gf_mul_expand_factor(have_to_want[d], &have_to_want_expanded[d*32]); \
|
||||
} \
|
||||
size_t i; \
|
||||
for (i = 0; i < size; i++) { \
|
||||
rs_recover_matmul_single_expanded(D, i, have, want, have_to_want_expanded); \
|
||||
} \
|
||||
} while (0) \
|
||||
|
||||
#define rs_recover_matmul_avx2(D, size, have, want, have_to_want) do { \
|
||||
__m256i have_to_want_expanded[D]; \
|
||||
int d; \
|
||||
for (d = 0; d < D; d++) { \
|
||||
gf_mul_expand_factor(have_to_want[d], ((u8*)&have_to_want_expanded[d])); \
|
||||
} \
|
||||
__m256i low_nibble_mask = broadcast_u8(0x0f); \
|
||||
size_t avx_leftover = size % 32; \
|
||||
size_t avx_size = size-avx_leftover; \
|
||||
u64 i; \
|
||||
for (i = 0; i < avx_size; i += 32) { \
|
||||
__m256i want_i = _mm256_setzero_si256(); \
|
||||
for (d = 0; d < D; d++) { \
|
||||
want_i = _mm256_xor_si256( \
|
||||
want_i, \
|
||||
gf_mul_expanded_avx2( \
|
||||
_mm256_loadu_si256((const __m256i*)(have[d] + i)), \
|
||||
have_to_want_expanded[d], \
|
||||
low_nibble_mask \
|
||||
) \
|
||||
); \
|
||||
} \
|
||||
_mm256_storeu_si256((__m256i*)(want + i), want_i); \
|
||||
} \
|
||||
for (i = avx_size; i < size; i++) { \
|
||||
rs_recover_matmul_single_expanded(D, i, have, want, ((const u8*)have_to_want_expanded)); \
|
||||
} \
|
||||
} while(0)
|
||||
|
||||
#define rs_recover_matmul_gfni(D, size, have, want, have_to_want) do { \
|
||||
__m256i have_to_want_avx[D]; \
|
||||
int d; \
|
||||
for (d = 0; d < D; d++) { \
|
||||
have_to_want_avx[d] = broadcast_u8(have_to_want[d]); \
|
||||
} \
|
||||
size_t avx_leftover = size % 32; \
|
||||
size_t avx_size = size-avx_leftover; \
|
||||
u64 i; \
|
||||
for (i = 0; i < avx_size; i += 32) { \
|
||||
__m256i want_i = _mm256_setzero_si256(); \
|
||||
for (d = 0; d < D; d++) { \
|
||||
want_i = _mm256_xor_si256( \
|
||||
want_i, \
|
||||
_mm256_gf2p8mul_epi8( \
|
||||
_mm256_loadu_si256((const __m256i*)(have[d] + i)), \
|
||||
have_to_want_avx[d] \
|
||||
) \
|
||||
); \
|
||||
} \
|
||||
_mm256_storeu_si256((__m256i*)(want + i), want_i); \
|
||||
} \
|
||||
for (i = avx_size; i < size; i++) { \
|
||||
rs_recover_matmul_single(D, i, have, want, have_to_want); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
static struct rs* rs_new_core(u8 parity) {
|
||||
int B = rs_blocks_core(parity);
|
||||
int D = rs_data_blocks_core(parity);
|
||||
int P = rs_parity_blocks_core(parity);
|
||||
struct rs* r = (struct rs*)rs_malloc(sizeof(struct rs) + B*D + D*P*32);
|
||||
if (r == NULL) { return NULL; }
|
||||
r->parity = parity;
|
||||
r->matrix = (u8*)(r + 1);
|
||||
r->expanded_matrix = r->matrix + B*D;
|
||||
rs_cauchy_matrix(r);
|
||||
int p;
|
||||
int d;
|
||||
for (p = 0; p < P; p++) {
|
||||
for (d = 0; d < D; d++) {
|
||||
gf_mul_expand_factor(r->matrix[D*D + D*p + d], &r->expanded_matrix[D*32*p + 32*d]);
|
||||
}
|
||||
}
|
||||
return r;
|
||||
}
|
||||
|
||||
static void rs_delete_core(struct rs* r) {
|
||||
rs_free(r);
|
||||
}
|
||||
|
||||
static void rs_recover_core(
|
||||
struct rs* r,
|
||||
u64 size,
|
||||
const u8* have_blocks,
|
||||
const u8** have,
|
||||
u8 want_block,
|
||||
u8* want,
|
||||
void (*recover_func)(int D, u64 size, const u8** have, u8* want, const u8* mat)
|
||||
) {
|
||||
int D = rs_data_blocks_core(r->parity);
|
||||
int B = rs_blocks_core(r->parity);
|
||||
// Create some space
|
||||
u8* scratch = (u8*)rs_malloc(D*D + D*D);
|
||||
u8* mat_1 = scratch;
|
||||
u8* mat_2 = scratch + D*D;
|
||||
// Preliminary checks
|
||||
int i, j, d, b;
|
||||
for (d = 0; d < D; d++) {
|
||||
if (have_blocks[d] >= B) {
|
||||
die("have_blocks[%d]=%d >= %d\n", d, have_blocks[d], B);
|
||||
}
|
||||
if (have_blocks[d] == want_block) {
|
||||
die("have_blocks[%d]=%d == want_block=%d\n", d, have_blocks[d], want_block);
|
||||
}
|
||||
if (d > 0 && have_blocks[d] <= have_blocks[d-1]) {
|
||||
die("have_blocks[%d]=%d <= have_blocks[%d-1]=%d\n", d, have_blocks[d], d, have_blocks[d-1]);
|
||||
}
|
||||
}
|
||||
// below in the dimensionality annotation we paper over transposes
|
||||
// [DxD] matrix going from the data blocks to the blocks we currently have
|
||||
u8* data_to_have = mat_1;
|
||||
int have_cursor;
|
||||
for (b = 0, have_cursor = 0; b < B; b++) {
|
||||
if (have_cursor >= D || have_blocks[have_cursor] != b) {
|
||||
continue;
|
||||
}
|
||||
memcpy(data_to_have + have_cursor*D, r->matrix + b*D, D);
|
||||
have_cursor++;
|
||||
}
|
||||
// [DxD] matrix going from what we have to the original data blocks
|
||||
u8* have_to_data = mat_2;
|
||||
if (!rs_gf_invert_matrix(data_to_have, have_to_data, D)) {
|
||||
die("unexpected singular matrix\n");
|
||||
}
|
||||
data_to_have = NULL;
|
||||
// [Dx1] matrix going from the data blocks to the block we want
|
||||
u8* data_to_want = &r->matrix[want_block*D];
|
||||
// have_to_want = data_to_want * have_to_data
|
||||
// [Dx1] matrix going from `blocks` to the block we're into
|
||||
u8* have_to_want = mat_1;
|
||||
for (i = 0; i < D; i++) {
|
||||
have_to_want[i] = 0;
|
||||
for (j = 0; j < D; j++) {
|
||||
have_to_want[i] ^= gf_mul(data_to_want[j], have_to_data[j*D + i]);
|
||||
}
|
||||
}
|
||||
// want = have_to_want * have
|
||||
recover_func(D, size, have, want, have_to_want);
|
||||
// We're done.
|
||||
rs_free(scratch);
|
||||
}
|
||||
@@ -156,6 +156,8 @@
|
||||
//
|
||||
// TODO fill in results
|
||||
|
||||
static constexpr uint64_t EGGSFS_PAGE_SIZE = 4096;
|
||||
|
||||
static auto wrappedSnapshot(rocksdb::DB* db) {
|
||||
auto deleter = [db](const rocksdb::Snapshot* snapshot) {
|
||||
db->ReleaseSnapshot(snapshot);
|
||||
@@ -726,7 +728,11 @@ struct ShardDBImpl {
|
||||
beginKey().setOffset(req.byteOffset);
|
||||
{
|
||||
std::unique_ptr<rocksdb::Iterator> it(_db->NewIterator(options, _spansCf));
|
||||
for (it->SeekForPrev(beginKey.toSlice()); it->Valid(); it->Next()) {
|
||||
for (
|
||||
it->SeekForPrev(beginKey.toSlice());
|
||||
it->Valid() && (req.limit == 0 || resp.spans.els.size() < req.limit);
|
||||
it->Next()
|
||||
) {
|
||||
auto key = ExternalValue<SpanKey>::FromSlice(it->key());
|
||||
if (key().fileId() != req.fileId) {
|
||||
break;
|
||||
@@ -1258,6 +1264,11 @@ struct ShardDBImpl {
|
||||
LOG_DEBUG(_env, "inline span has bad storage class %s", req.storageClass);
|
||||
return EggsError::BAD_SPAN_BODY;
|
||||
}
|
||||
|
||||
if (req.byteOffset%EGGSFS_PAGE_SIZE != 0) {
|
||||
LOG_DEBUG(_env, "req.byteOffset=%s is not a multiple of PAGE_SIZE=%s", req.byteOffset, EGGSFS_PAGE_SIZE);
|
||||
return EggsError::BAD_SPAN_BODY;
|
||||
}
|
||||
|
||||
uint32_t expectedCrc = crc32c(0, req.body.data(), req.body.size());
|
||||
expectedCrc = crc32c_zero_extend(expectedCrc, req.size - req.body.size());
|
||||
@@ -1293,10 +1304,14 @@ struct ShardDBImpl {
|
||||
LOG_DEBUG(_env, "bad storage class %s for blocks span", (int)req.storageClass);
|
||||
return EggsError::BAD_SPAN_BODY;
|
||||
}
|
||||
if (req.byteOffset%EGGSFS_PAGE_SIZE != 0 || req.cellSize%EGGSFS_PAGE_SIZE != 0) {
|
||||
LOG_DEBUG(_env, "req.byteOffset=%s or cellSize=%s is not a multiple of PAGE_SIZE=%s", req.byteOffset, req.cellSize, EGGSFS_PAGE_SIZE);
|
||||
return EggsError::BAD_SPAN_BODY;
|
||||
}
|
||||
if (!_checkSpanBody(req)) {
|
||||
return EggsError::BAD_SPAN_BODY;
|
||||
}
|
||||
|
||||
|
||||
// start filling in entry
|
||||
entry.fileId = req.fileId;
|
||||
entry.byteOffset = req.byteOffset;
|
||||
@@ -1888,6 +1903,7 @@ struct ShardDBImpl {
|
||||
return EggsError::MISMATCHING_TARGET;
|
||||
}
|
||||
if (edgeBody().creationTime() != creationTime) {
|
||||
LOG_DEBUG(_env, "expected time %s, got %s", edgeBody().creationTime(), creationTime);
|
||||
return EggsError::MISMATCHING_CREATION_TIME;
|
||||
}
|
||||
if (edgeBody().targetIdWithLocked().extra()) { // locked
|
||||
@@ -1992,6 +2008,7 @@ struct ShardDBImpl {
|
||||
}
|
||||
ExternalValue<CurrentEdgeBody> edge(edgeValue);
|
||||
if (edge().creationTime() != entry.creationTime) {
|
||||
LOG_DEBUG(_env, "expected time %s, got %s", edge().creationTime(), entry.creationTime);
|
||||
return EggsError::MISMATCHING_CREATION_TIME;
|
||||
}
|
||||
if (edge().locked()) {
|
||||
@@ -2046,6 +2063,7 @@ struct ShardDBImpl {
|
||||
}
|
||||
ExternalValue<CurrentEdgeBody> edge(edgeValue);
|
||||
if (edge().creationTime() != entry.creationTime) {
|
||||
LOG_DEBUG(_env, "expected time %s, got %s", edge().creationTime(), entry.creationTime);
|
||||
return EggsError::MISMATCHING_CREATION_TIME;
|
||||
}
|
||||
if (!edge().locked()) {
|
||||
@@ -2732,7 +2750,13 @@ struct ShardDBImpl {
|
||||
const auto& cache = _blockServicesCache.at(blockServiceId.u64);
|
||||
auto expectedProof = cbcmac(cache.secretKey, (uint8_t*)buf, sizeof(buf));
|
||||
|
||||
return proof.proof == expectedProof;
|
||||
bool good = proof.proof == expectedProof;
|
||||
|
||||
if (!good) {
|
||||
LOG_DEBUG(_env, "bad block write proof, expected %s, got %s", BincodeFixedBytes<8>(expectedProof), proof);
|
||||
}
|
||||
|
||||
return good;
|
||||
}
|
||||
|
||||
std::array<uint8_t, 8> _blockEraseCertificate(uint32_t blockSize, const BlockBody block, const AES128Key& secretKey) {
|
||||
@@ -3335,8 +3359,16 @@ DirectoryInfo defaultDirectoryInfo() {
|
||||
hddBlocks.storageClass = storageClassByName("HDD");
|
||||
addSegment(BLOCK_POLICY_TAG, blockPolicy);
|
||||
|
||||
// Span policy: up to 10MiB: RS(4,4), up to 100MiB (max span size): RS(10,4)
|
||||
// Span policy:
|
||||
// * up to 64KiB: RS(1,4). This mirroring span simplifies things in the kernel (so that we
|
||||
// we never have cell sizes that are not multiple of span sizes). We still set things
|
||||
// up so that we can lose 4 copies and still be fine.
|
||||
// * up to 10MiB: RS(4,4).
|
||||
// * up to 100MiB (max span size): RS(10,4).
|
||||
SpanPolicy spanPolicy;
|
||||
auto& tinySpans = spanPolicy.entries.els.emplace_back();
|
||||
tinySpans.maxSize = 1 << 16;
|
||||
tinySpans.parity = Parity(1, 4);
|
||||
auto& smallSpans = spanPolicy.entries.els.emplace_back();
|
||||
smallSpans.maxSize = 10 << 20;
|
||||
smallSpans.parity = Parity(4, 4);
|
||||
|
||||
Reference in New Issue
Block a user