Combined hash and fill AES loop (#166)

Adds more parallelizm into AES loop so modern CPUs can take advantage of it. Also, scratchpad data moves between L1 and L3 caches only one time which saves time and energy per hash.
This commit is contained in:
SChernykh
2019-12-01 16:58:38 +01:00
committed by tevador
parent e3561d661e
commit 219c02e1e5
8 changed files with 140 additions and 3 deletions

View File

@@ -122,11 +122,16 @@ void mine(randomx_vm* vm, std::atomic<uint32_t>& atomicNonce, AtomicHash& result
void* noncePtr = blockTemplate + 39;
auto nonce = atomicNonce.fetch_add(1);
uint64_t tempHash[8];
store32(noncePtr, nonce);
randomx_calculate_hash_first(vm, tempHash, blockTemplate, sizeof(blockTemplate));
while (nonce < noncesCount) {
store32(noncePtr, nonce);
randomx_calculate_hash(vm, blockTemplate, sizeof(blockTemplate), &hash);
result.xorWith(hash);
nonce = atomicNonce.fetch_add(1);
store32(noncePtr, nonce);
randomx_calculate_hash_next(vm, tempHash, blockTemplate, sizeof(blockTemplate), &hash);
result.xorWith(hash);
}
}