Combined hash and fill AES loop (#166)

Adds more parallelizm into AES loop so modern CPUs can take advantage of it. Also, scratchpad data moves between L1 and L3 caches only one time which saves time and energy per hash.
2026-03-06 06:37:33 -05:00 · 2019-12-01 16:58:38 +01:00
parent e3561d661e
commit 219c02e1e5
8 changed files with 140 additions and 3 deletions
--- a/src/tests/benchmark.cpp
+++ b/src/tests/benchmark.cpp
@@ -122,11 +122,16 @@ void mine(randomx_vm* vm, std::atomic<uint32_t>& atomicNonce, AtomicHash& result
 	void* noncePtr = blockTemplate + 39;
 	auto nonce = atomicNonce.fetch_add(1);

+	uint64_t tempHash[8];
+
+	store32(noncePtr, nonce);
+	randomx_calculate_hash_first(vm, tempHash, blockTemplate, sizeof(blockTemplate));
+
 	while (nonce < noncesCount) {
-		store32(noncePtr, nonce);
-		randomx_calculate_hash(vm, blockTemplate, sizeof(blockTemplate), &hash);
-		result.xorWith(hash);
 		nonce = atomicNonce.fetch_add(1);
+		store32(noncePtr, nonce);
+		randomx_calculate_hash_next(vm, tempHash, blockTemplate, sizeof(blockTemplate), &hash);
+		result.xorWith(hash);
 	}
 }