Added comments to hashAes1Rx4 and fillAes1Rx4

Fixed gcc compilation
Added performance numbers
This commit is contained in:
tevador
2019-02-09 19:32:53 +01:00
parent 2798d78717
commit b8ce504be6
12 changed files with 72 additions and 325 deletions

View File

@@ -1,292 +0,0 @@
/*
Copyright (c) 2019 tevador
This file is part of RandomX.
RandomX is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
RandomX is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with RandomX. If not, see<http://www.gnu.org/licenses/>.
*/
#include "common.hpp"
#include "InterpretedVirtualMachine.hpp"
#include <iostream>
#include <iomanip>
#include <sstream>
namespace RandomX {
class Mul9Transform : public ITransform {
public:
Mul9Transform(int32_t cc) : c(cc) {
std::ostringstream oss;
oss << "mul9_" << std::hex << (cc & 255);
name = oss.str();
}
int32_t apply(int32_t x) const override {
return 9 * x + c;
}
const char* getName() const override {
return name.c_str();
}
std::ostream& printAsm(std::ostream& os) const override {
os << "lea ecx, [rcx+rcx*8" << std::showpos << c << "]" << std::noshowpos << std::endl;
return os;
}
std::ostream& printCxx(std::ostream& os) const override {
os << "static const Mul9Transform " << name << "(" << c << ");" << std::endl;
return os;
}
private:
int32_t c;
std::string name;
};
class AddTransform : public ITransform {
public:
AddTransform(int32_t cc) : c(cc) {
std::ostringstream oss;
oss << "add_" << std::hex << (cc & 255);
name = oss.str();
}
int32_t apply(int32_t x) const override {
return x + c;
}
const char* getName() const override {
return name.c_str();
}
std::ostream& printAsm(std::ostream& os) const override {
os << "db 64" << std::endl;
os << "add ecx, " << c << std::endl;
return os;
}
std::ostream& printCxx(std::ostream& os) const override {
os << "static const AddTransform " << name << "(" << c << ");" << std::endl;
return os;
}
private:
int32_t c;
std::string name;
};
class XorTransform : public ITransform {
public:
XorTransform(int32_t cc) : c(cc) {
std::ostringstream oss;
oss << "xor_" << std::hex << (cc & 255);
name = oss.str();
}
int32_t apply(int32_t x) const override {
return x ^ c;
}
const char* getName() const override {
return name.c_str();
}
std::ostream& printAsm(std::ostream& os) const override {
os << "db 64" << std::endl;
os << "xor ecx, " << c << std::endl;
return os;
}
std::ostream& printCxx(std::ostream& os) const override {
os << "static const XorTransform " << name << "(" << c << ");" << std::endl;
return os;
}
private:
int32_t c;
std::string name;
};
static const Mul9Transform mul9_6d(109);
static const XorTransform xor_60(96);
static const Mul9Transform mul9_ed(-19);
static const AddTransform add_9e(-98);
static const AddTransform add_eb(-21);
static const XorTransform xor_b0(-80);
static const Mul9Transform mul9_a4(-92);
static const AddTransform add_71(113);
static const Mul9Transform mul9_64(100);
static const AddTransform add_d9(-39);
static const XorTransform xor_78(120);
static const Mul9Transform mul9_89(-119);
static const AddTransform add_8f(-113);
static const AddTransform add_6f(111);
static const XorTransform xor_68(104);
static const Mul9Transform mul9_ad(-83);
static const Mul9Transform mul9_7f(127);
static const XorTransform xor_90(-112);
static const AddTransform add_59(89);
static const AddTransform add_e0(-32);
static const AddTransform add_68(104);
static const XorTransform xor_88(-120);
static const XorTransform xor_18(24);
static const Mul9Transform mul9_9(9);
static const AddTransform add_e1(-31);
static const XorTransform xor_f0(-16);
static const AddTransform add_44(68);
static const Mul9Transform mul9_92(-110);
static const XorTransform xor_40(64);
static const XorTransform xor_d8(-40);
static const XorTransform xor_f8(-8);
static const AddTransform add_f6(-10);
static const XorTransform xor_e0(-32);
static const AddTransform add_e(14);
static const Mul9Transform mul9_d2(-46);
static const XorTransform xor_98(-104);
static const Mul9Transform mul9_24(36);
static const AddTransform add_64(100);
static const Mul9Transform mul9_bf(-65);
static const Mul9Transform mul9_1b(27);
static const Mul9Transform mul9_5b(91);
static const AddTransform add_9b(-101);
static const AddTransform add_a2(-94);
static const Mul9Transform mul9_f6(-10);
static const XorTransform xor_50(80);
static const AddTransform add_94(-108);
static const AddTransform add_c6(-58);
static const XorTransform xor_30(48);
static const Mul9Transform mul9_49(73);
static const XorTransform xor_d0(-48);
static const XorTransform xor_20(32);
static const XorTransform xor_a0(-96);
static const AddTransform add_76(118);
static const AddTransform add_5b(91);
static const Mul9Transform mul9_12(18);
static const AddTransform add_f5(-11);
static const Mul9Transform mul9_3f(63);
static const AddTransform add_72(114);
static const Mul9Transform mul9_2d(45);
static const AddTransform add_bd(-67);
static const AddTransform add_35(53);
static const Mul9Transform mul9_9b(-101);
static const Mul9Transform mul9_ff(-1);
static const XorTransform xor_10(16);
static const Mul9Transform mul9_db(-37);
static const Mul9Transform mul9_e4(-28);
static const Mul9Transform mul9_c9(-55);
static const XorTransform xor_a8(-88);
static const XorTransform xor_b8(-72);
static const AddTransform add_24(36);
static const XorTransform xor_c8(-56);
static const AddTransform add_74(116);
static const XorTransform xor_58(88);
static const XorTransform xor_80(-128);
static const AddTransform add_32(50);
static const AddTransform add_69(105);
static const AddTransform add_db(-37);
static const XorTransform xor_70(112);
static const XorTransform xor_8(8);
static const XorTransform xor_e8(-24);
static const Mul9Transform mul9_76(118);
static const XorTransform xor_48(72);
static const XorTransform xor_c0(-64);
static const AddTransform add_28(40);
static const Mul9Transform mul9_b6(-74);
static const Mul9Transform mul9_52(82);
static const Mul9Transform mul9_36(54);
static const XorTransform xor_38(56);
static const XorTransform xor_28(40);
static const AddTransform add_57(87);
const ITransform* InterpretedVirtualMachine::addressTransformations[TransformationCount] = {
(ITransform*)&mul9_6d,
(ITransform*)&xor_60,
(ITransform*)&mul9_ed,
(ITransform*)&add_9e,
(ITransform*)&add_eb,
(ITransform*)&xor_b0,
(ITransform*)&mul9_a4,
(ITransform*)&add_71,
(ITransform*)&mul9_64,
(ITransform*)&add_d9,
(ITransform*)&xor_78,
(ITransform*)&mul9_89,
(ITransform*)&add_8f,
(ITransform*)&add_6f,
(ITransform*)&xor_68,
(ITransform*)&mul9_ad,
(ITransform*)&mul9_7f,
(ITransform*)&xor_90,
(ITransform*)&add_59,
(ITransform*)&add_e0,
(ITransform*)&add_68,
(ITransform*)&xor_88,
(ITransform*)&xor_18,
(ITransform*)&mul9_9,
(ITransform*)&add_e1,
(ITransform*)&xor_f0,
(ITransform*)&add_44,
(ITransform*)&mul9_92,
(ITransform*)&xor_40,
(ITransform*)&xor_d8,
(ITransform*)&xor_f8,
(ITransform*)&add_f6,
(ITransform*)&xor_e0,
(ITransform*)&add_e,
(ITransform*)&mul9_d2,
(ITransform*)&xor_98,
(ITransform*)&mul9_24,
(ITransform*)&add_64,
(ITransform*)&mul9_bf,
(ITransform*)&mul9_1b,
(ITransform*)&mul9_5b,
(ITransform*)&add_9b,
(ITransform*)&add_a2,
(ITransform*)&mul9_f6,
(ITransform*)&xor_50,
(ITransform*)&add_94,
(ITransform*)&add_c6,
(ITransform*)&xor_30,
(ITransform*)&mul9_49,
(ITransform*)&xor_d0,
(ITransform*)&xor_20,
(ITransform*)&xor_a0,
(ITransform*)&add_76,
(ITransform*)&add_5b,
(ITransform*)&mul9_12,
(ITransform*)&add_f5,
(ITransform*)&mul9_3f,
(ITransform*)&add_72,
(ITransform*)&mul9_2d,
(ITransform*)&add_bd,
(ITransform*)&add_35,
(ITransform*)&mul9_9b,
(ITransform*)&mul9_ff,
(ITransform*)&xor_10,
(ITransform*)&mul9_db,
(ITransform*)&mul9_e4,
(ITransform*)&mul9_c9,
(ITransform*)&xor_a8,
(ITransform*)&xor_b8,
(ITransform*)&add_24,
(ITransform*)&xor_c8,
(ITransform*)&add_74,
(ITransform*)&xor_58,
(ITransform*)&xor_80,
(ITransform*)&add_32,
(ITransform*)&add_69,
(ITransform*)&add_db,
(ITransform*)&xor_70,
(ITransform*)&xor_8,
(ITransform*)&xor_e8,
(ITransform*)&mul9_76,
(ITransform*)&xor_48,
(ITransform*)&xor_c0,
(ITransform*)&add_28,
(ITransform*)&mul9_b6,
(ITransform*)&mul9_52,
(ITransform*)&mul9_36,
(ITransform*)&xor_38,
(ITransform*)&xor_28,
(ITransform*)&add_57,
};
}

View File

@@ -450,6 +450,8 @@ namespace RandomX {
return "l";
case 7:
return "ge";
default:
UNREACHABLE;
}
}

View File

@@ -296,6 +296,8 @@ namespace RandomX {
return "lt";
case 7:
return "ge";
default:
UNREACHABLE;
}
}

View File

@@ -27,6 +27,7 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
#include <stdexcept>
#include <sstream>
#include <cmath>
#include <cfloat>
#include <thread>
#include "intrinPortable.h"
#ifdef STATS
@@ -262,7 +263,7 @@ namespace RandomX {
uint32_t spAddr0 = mem.mx;
uint32_t spAddr1 = mem.ma;
for(int iter = 0; iter < InstructionCount; ++iter) {
for(unsigned iter = 0; iter < InstructionCount; ++iter) {
//std::cout << "Iteration " << iter << std::endl;
spAddr0 ^= r[readReg0];
spAddr0 &= ScratchpadL3Mask64;

View File

@@ -705,6 +705,8 @@ namespace RandomX {
return 0x9c; //setl
case 7:
return 0x9d; //setge
default:
UNREACHABLE;
}
}

View File

@@ -83,7 +83,7 @@ namespace RandomX {
template<size_t N>
void emit(const uint8_t (&src)[N]) {
for (int i = 0; i < N; ++i) {
for (unsigned i = 0; i < N; ++i) {
code[codePos + i] = src[i];
}
codePos += N;

View File

@@ -28,9 +28,15 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
std::ostream& operator<<(std::ostream& os, const RandomX::RegisterFile& rf) {
for (int i = 0; i < RandomX::RegistersCount; ++i)
os << std::hex << "r" << i << " = " << rf.r[i] << std::endl << std::dec;
for (int i = 0; i < RandomX::RegistersCount; ++i)
for (int i = 0; i < 4; ++i)
os << std::hex << "f" << i << " = " << *(uint64_t*)&rf.f[i].hi << " (" << rf.f[i].hi << ")" << std::endl
<< " = " << *(uint64_t*)&rf.f[i].lo << " (" << rf.f[i].lo << ")" << std::endl << std::dec;
for (int i = 0; i < 4; ++i)
os << std::hex << "e" << i << " = " << *(uint64_t*)&rf.e[i].hi << " (" << rf.e[i].hi << ")" << std::endl
<< " = " << *(uint64_t*)&rf.e[i].lo << " (" << rf.e[i].lo << ")" << std::endl << std::dec;
for (int i = 0; i < 4; ++i)
os << std::hex << "a" << i << " = " << *(uint64_t*)&rf.a[i].hi << " (" << rf.a[i].hi << ")" << std::endl
<< " = " << *(uint64_t*)&rf.a[i].lo << " (" << rf.a[i].lo << ")" << std::endl << std::dec;
return os;
}

View File

@@ -93,6 +93,7 @@ namespace RandomX {
class ILightClientAsyncWorker {
public:
virtual ~ILightClientAsyncWorker() {}
virtual void prepareBlock(addr_t) = 0;
virtual void prepareBlocks(void* out, uint32_t startBlock, uint32_t blockCount) = 0;
virtual const uint64_t* getBlock(addr_t) = 0;

View File

@@ -19,6 +19,18 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
#include "softAes.h"
/*
Calculate a 512-bit hash of 'input' using 4 lanes of AES.
The input is treated as a set of round keys for the encryption
of the initial state.
'inputSize' must be a multiple of 64.
For a 2 MiB input, this has the same security as 32768-round
AES encryption.
Hashing throughput: >20 GiB/s per CPU core with hardware AES
*/
template<bool softAes>
void hashAes1Rx4(const void *input, size_t inputSize, void *hash) {
const uint8_t* inptr = (uint8_t*)input;
@@ -72,6 +84,16 @@ void hashAes1Rx4(const void *input, size_t inputSize, void *hash) {
template void hashAes1Rx4<false>(const void *input, size_t inputSize, void *hash);
template void hashAes1Rx4<true>(const void *input, size_t inputSize, void *hash);
/*
Fill 'buffer' with pseudorandom data based on 512-bit 'state'.
The state is encrypted using a single AES round per 16 bytes of output
in 4 lanes.
'outputSize' must be a multiple of 64.
The modified state is written back to 'state' to allow multiple
calls to this function.
*/
template<bool softAes>
void fillAes1Rx4(void *state, size_t outputSize, void *buffer) {
const uint8_t* outptr = (uint8_t*)buffer;

View File

@@ -254,7 +254,7 @@ int main(int argc, char** argv) {
}
if (RandomX::trace) {
std::cout << "Keys: " << std::endl;
for (int i = 0; i < dataset.cache->getKeys().size(); ++i) {
for (unsigned i = 0; i < dataset.cache->getKeys().size(); ++i) {
outputHex(std::cout, (char*)&dataset.cache->getKeys()[i], sizeof(__m128i));
}
std::cout << std::endl;
@@ -280,7 +280,7 @@ int main(int argc, char** argv) {
threads.push_back(std::thread(&RandomX::datasetInit<false>, cache, dataset, i * perThread, count));
}
}
for (int i = 0; i < threads.size(); ++i) {
for (unsigned i = 0; i < threads.size(); ++i) {
threads[i].join();
}
}
@@ -318,10 +318,10 @@ int main(int argc, char** argv) {
std::cout << "Running benchmark (" << programCount << " programs) ..." << std::endl;
sw.restart();
if (threadCount > 1) {
for (int i = 0; i < vms.size(); ++i) {
for (unsigned i = 0; i < vms.size(); ++i) {
threads.push_back(std::thread(&mine, vms[i], std::ref(atomicNonce), std::ref(result), programCount, i, scratchpadMem + RandomX::ScratchpadSize * i));
}
for (int i = 0; i < threads.size(); ++i) {
for (unsigned i = 0; i < threads.size(); ++i) {
threads[i].join();
}
}
@@ -336,10 +336,10 @@ int main(int argc, char** argv) {
if(programCount == 1000)
std::cout << "Reference result: 3e1c5f9b9d0bf8ffa250f860bf5f7ab76ac823b206ddee6a592660119a3640c6" << std::endl;
if (lightClient) {
std::cout << "Performance: " << 1000 * elapsed / programCount << " ms per program" << std::endl;
std::cout << "Performance: " << 1000 * elapsed / programCount << " ms per hash" << std::endl;
}
else {
std::cout << "Performance: " << programCount / elapsed << " programs per second" << std::endl;
std::cout << "Performance: " << programCount / elapsed << " hashes per second" << std::endl;
}
}
catch (std::exception& e) {