Combined hash and fill AES loop (#166)

Adds more parallelizm into AES loop so modern CPUs can take advantage of it. Also, scratchpad data moves between L1 and L3 caches only one time which saves time and energy per hash.
This commit is contained in:
SChernykh
2019-12-01 16:58:38 +01:00
committed by tevador
parent e3561d661e
commit 219c02e1e5
8 changed files with 140 additions and 3 deletions

View File

@@ -120,6 +120,12 @@ namespace randomx {
blake2b(out, outSize, &reg, sizeof(RegisterFile), nullptr, 0);
}
template<class Allocator, bool softAes>
void VmBase<Allocator, softAes>::hashAndFill(void* out, size_t outSize, uint64_t *fill_state) {
hashAndFillAes1Rx4<softAes>((void*) getScratchpad(), ScratchpadSize, &reg.a, fill_state);
blake2b(out, outSize, &reg, sizeof(RegisterFile), nullptr, 0);
}
template<class Allocator, bool softAes>
void VmBase<Allocator, softAes>::initScratchpad(void* seed) {
fillAes1Rx4<softAes>(seed, ScratchpadSize, scratchpad);