Merge pull request #47 from tevador/pr-doc

Documentation and tests
This commit is contained in:
tevador
2019-06-01 11:14:13 +02:00
committed by GitHub
13 changed files with 934 additions and 197 deletions

View File

@@ -37,16 +37,16 @@ namespace randomx {
(this->*handler)(os);
}
void Instruction::genAddressReg(std::ostream& os) const {
os << (getModMem() ? "L1" : "L2") << "[r" << (int)src << std::showpos << (int32_t)getImm32() << std::noshowpos << "]";
void Instruction::genAddressReg(std::ostream& os, int srcIndex) const {
os << (getModMem() ? "L1" : "L2") << "[r" << srcIndex << std::showpos << (int32_t)getImm32() << std::noshowpos << "]";
}
void Instruction::genAddressRegDst(std::ostream& os) const {
void Instruction::genAddressRegDst(std::ostream& os, int dstIndex) const {
if (getModCond() < StoreL3Condition)
os << (getModMem() ? "L1" : "L2");
else
os << "L3";
os << "[r" << (int)dst << std::showpos << (int32_t)getImm32() << std::noshowpos << "]";
os << "[r" << dstIndex << std::showpos << (int32_t)getImm32() << std::noshowpos << "]";
}
void Instruction::genAddressImm(std::ostream& os) const {
@@ -54,159 +54,192 @@ namespace randomx {
}
void Instruction::h_IADD_RS(std::ostream& os) const {
os << "r" << (int)dst << ", r" << (int)src;
if(dst == RegisterNeedsDisplacement) {
auto dstIndex = dst % RegistersCount;
auto srcIndex = src % RegistersCount;
os << "r" << dstIndex << ", r" << srcIndex;
if(dstIndex == RegisterNeedsDisplacement) {
os << ", " << (int32_t)getImm32();
}
os << ", SHFT " << (int)getModShift() << std::endl;
os << ", SHFT " << getModShift() << std::endl;
}
void Instruction::h_IADD_M(std::ostream& os) const {
if (src != dst) {
os << "r" << (int)dst << ", ";
genAddressReg(os);
auto dstIndex = dst % RegistersCount;
auto srcIndex = src % RegistersCount;
if (dstIndex != srcIndex) {
os << "r" << dstIndex << ", ";
genAddressReg(os, srcIndex);
os << std::endl;
}
else {
os << "r" << (int)dst << ", ";
os << "r" << dstIndex << ", ";
genAddressImm(os);
os << std::endl;
}
}
void Instruction::h_ISUB_R(std::ostream& os) const {
if (src != dst) {
os << "r" << (int)dst << ", r" << (int)src << std::endl;
auto dstIndex = dst % RegistersCount;
auto srcIndex = src % RegistersCount;
if (dstIndex != srcIndex) {
os << "r" << dstIndex << ", r" << srcIndex << std::endl;
}
else {
os << "r" << (int)dst << ", " << (int32_t)getImm32() << std::endl;
os << "r" << dstIndex << ", " << (int32_t)getImm32() << std::endl;
}
}
void Instruction::h_ISUB_M(std::ostream& os) const {
if (src != dst) {
os << "r" << (int)dst << ", ";
genAddressReg(os);
auto dstIndex = dst % RegistersCount;
auto srcIndex = src % RegistersCount;
if (dstIndex != srcIndex) {
os << "r" << dstIndex << ", ";
genAddressReg(os, srcIndex);
os << std::endl;
}
else {
os << "r" << (int)dst << ", ";
os << "r" << dstIndex << ", ";
genAddressImm(os);
os << std::endl;
}
}
void Instruction::h_IMUL_R(std::ostream& os) const {
if (src != dst) {
os << "r" << (int)dst << ", r" << (int)src << std::endl;
auto dstIndex = dst % RegistersCount;
auto srcIndex = src % RegistersCount;
if (dstIndex != srcIndex) {
os << "r" << dstIndex << ", r" << srcIndex << std::endl;
}
else {
os << "r" << (int)dst << ", " << (int32_t)getImm32() << std::endl;
os << "r" << dstIndex << ", " << (int32_t)getImm32() << std::endl;
}
}
void Instruction::h_IMUL_M(std::ostream& os) const {
if (src != dst) {
os << "r" << (int)dst << ", ";
genAddressReg(os);
auto dstIndex = dst % RegistersCount;
auto srcIndex = src % RegistersCount;
if (dstIndex != srcIndex) {
os << "r" << dstIndex << ", ";
genAddressReg(os, srcIndex);
os << std::endl;
}
else {
os << "r" << (int)dst << ", ";
os << "r" << dstIndex << ", ";
genAddressImm(os);
os << std::endl;
}
}
void Instruction::h_IMULH_R(std::ostream& os) const {
os << "r" << (int)dst << ", r" << (int)src << std::endl;
auto dstIndex = dst % RegistersCount;
auto srcIndex = src % RegistersCount;
os << "r" << dstIndex << ", r" << srcIndex << std::endl;
}
void Instruction::h_IMULH_M(std::ostream& os) const {
if (src != dst) {
os << "r" << (int)dst << ", ";
genAddressReg(os);
auto dstIndex = dst % RegistersCount;
auto srcIndex = src % RegistersCount;
if (dstIndex != srcIndex) {
os << "r" << dstIndex << ", ";
genAddressReg(os, srcIndex);
os << std::endl;
}
else {
os << "r" << (int)dst << ", ";
os << "r" << dstIndex << ", ";
genAddressImm(os);
os << std::endl;
}
}
void Instruction::h_ISMULH_R(std::ostream& os) const {
os << "r" << (int)dst << ", r" << (int)src << std::endl;
auto dstIndex = dst % RegistersCount;
auto srcIndex = src % RegistersCount;
os << "r" << dstIndex << ", r" << srcIndex << std::endl;
}
void Instruction::h_ISMULH_M(std::ostream& os) const {
if (src != dst) {
os << "r" << (int)dst << ", ";
genAddressReg(os);
auto dstIndex = dst % RegistersCount;
auto srcIndex = src % RegistersCount;
if (dstIndex != srcIndex) {
os << "r" << dstIndex << ", ";
genAddressReg(os, srcIndex);
os << std::endl;
}
else {
os << "r" << (int)dst << ", ";
os << "r" << dstIndex << ", ";
genAddressImm(os);
os << std::endl;
}
}
void Instruction::h_INEG_R(std::ostream& os) const {
os << "r" << (int)dst << std::endl;
auto dstIndex = dst % RegistersCount;
os << "r" << dstIndex << std::endl;
}
void Instruction::h_IXOR_R(std::ostream& os) const {
if (src != dst) {
os << "r" << (int)dst << ", r" << (int)src << std::endl;
auto dstIndex = dst % RegistersCount;
auto srcIndex = src % RegistersCount;
if (dstIndex != srcIndex) {
os << "r" << dstIndex << ", r" << srcIndex << std::endl;
}
else {
os << "r" << (int)dst << ", " << (int32_t)getImm32() << std::endl;
os << "r" << dstIndex << ", " << (int32_t)getImm32() << std::endl;
}
}
void Instruction::h_IXOR_M(std::ostream& os) const {
if (src != dst) {
os << "r" << (int)dst << ", ";
genAddressReg(os);
auto dstIndex = dst % RegistersCount;
auto srcIndex = src % RegistersCount;
if (dstIndex != srcIndex) {
os << "r" << dstIndex << ", ";
genAddressReg(os, srcIndex);
os << std::endl;
}
else {
os << "r" << (int)dst << ", ";
os << "r" << dstIndex << ", ";
genAddressImm(os);
os << std::endl;
}
}
void Instruction::h_IROR_R(std::ostream& os) const {
if (src != dst) {
os << "r" << (int)dst << ", r" << (int)src << std::endl;
auto dstIndex = dst % RegistersCount;
auto srcIndex = src % RegistersCount;
if (dstIndex != srcIndex) {
os << "r" << dstIndex << ", r" << srcIndex << std::endl;
}
else {
os << "r" << (int)dst << ", " << (getImm32() & 63) << std::endl;
os << "r" << dstIndex << ", " << (getImm32() & 63) << std::endl;
}
}
void Instruction::h_IROL_R(std::ostream& os) const {
if (src != dst) {
os << "r" << (int)dst << ", r" << (int)src << std::endl;
auto dstIndex = dst % RegistersCount;
auto srcIndex = src % RegistersCount;
if (dstIndex != srcIndex) {
os << "r" << dstIndex << ", r" << srcIndex << std::endl;
}
else {
os << "r" << (int)dst << ", " << (getImm32() & 63) << std::endl;
os << "r" << dstIndex << ", " << (getImm32() & 63) << std::endl;
}
}
void Instruction::h_IMUL_RCP(std::ostream& os) const {
os << "r" << (int)dst << ", " << getImm32() << std::endl;
auto dstIndex = dst % RegistersCount;
os << "r" << dstIndex << ", " << getImm32() << std::endl;
}
void Instruction::h_ISWAP_R(std::ostream& os) const {
os << "r" << (int)dst << ", r" << (int)src << std::endl;
auto dstIndex = dst % RegistersCount;
auto srcIndex = src % RegistersCount;
os << "r" << dstIndex << ", r" << srcIndex << std::endl;
}
void Instruction::h_FSWAP_R(std::ostream& os) const {
const char reg = (dst >= RegisterCountFlt) ? 'e' : 'f';
auto dstIndex = dst % RegisterCountFlt;
auto dstIndex = dst % RegistersCount;
const char reg = (dstIndex >= RegisterCountFlt) ? 'e' : 'f';
dstIndex %= RegisterCountFlt;
os << reg << dstIndex << std::endl;
}
@@ -218,8 +251,9 @@ namespace randomx {
void Instruction::h_FADD_M(std::ostream& os) const {
auto dstIndex = dst % RegisterCountFlt;
auto srcIndex = src % RegistersCount;
os << "f" << dstIndex << ", ";
genAddressReg(os);
genAddressReg(os, srcIndex);
os << std::endl;
}
@@ -231,8 +265,9 @@ namespace randomx {
void Instruction::h_FSUB_M(std::ostream& os) const {
auto dstIndex = dst % RegisterCountFlt;
auto srcIndex = src % RegistersCount;
os << "f" << dstIndex << ", ";
genAddressReg(os);
genAddressReg(os, srcIndex);
os << std::endl;
}
@@ -249,8 +284,9 @@ namespace randomx {
void Instruction::h_FDIV_M(std::ostream& os) const {
auto dstIndex = dst % RegisterCountFlt;
auto srcIndex = src % RegistersCount;
os << "e" << dstIndex << ", ";
genAddressReg(os);
genAddressReg(os, srcIndex);
os << std::endl;
}
@@ -260,40 +296,21 @@ namespace randomx {
}
void Instruction::h_CFROUND(std::ostream& os) const {
os << "r" << (int)src << ", " << (getImm32() & 63) << std::endl;
}
static inline const char* condition(int index) {
switch (index)
{
case 0:
return "be";
case 1:
return "ab";
case 2:
return "sg";
case 3:
return "ns";
case 4:
return "of";
case 5:
return "no";
case 6:
return "lt";
case 7:
return "ge";
default:
UNREACHABLE;
}
auto srcIndex = src % RegistersCount;
os << "r" << srcIndex << ", " << (getImm32() & 63) << std::endl;
}
void Instruction::h_CBRANCH(std::ostream& os) const {
os << "r" << (int)dst << ", " << (int32_t)getImm32() << ", COND " << (int)(getModCond()) << std::endl;
auto dstIndex = dst % RegistersCount;
auto srcIndex = src % RegistersCount;
os << "r" << dstIndex << ", " << (int32_t)getImm32() << ", COND " << (int)(getModCond()) << std::endl;
}
void Instruction::h_ISTORE(std::ostream& os) const {
genAddressRegDst(os);
os << ", r" << (int)src << std::endl;
auto dstIndex = dst % RegistersCount;
auto srcIndex = src % RegistersCount;
genAddressRegDst(os, dstIndex);
os << ", r" << srcIndex << std::endl;
}
void Instruction::h_NOP(std::ostream& os) const {

View File

@@ -109,9 +109,9 @@ namespace randomx {
void print(std::ostream&) const;
static const char* names[256];
static InstructionFormatter engine[256];
void genAddressReg(std::ostream& os) const;
void genAddressReg(std::ostream& os, int) const;
void genAddressImm(std::ostream& os) const;
void genAddressRegDst(std::ostream&) const;
void genAddressRegDst(std::ostream&, int) const;
void h_IADD_RS(std::ostream&) const;
void h_IADD_M(std::ostream&) const;
void h_ISUB_R(std::ostream&) const;

View File

@@ -70,10 +70,6 @@ void generateNative(uint32_t nonce) {
fillAes1Rx4<softAes>((void*)hash, randomx::ScratchpadSize, scratchpad);
alignas(16) randomx::Program prog;
fillAes1Rx4<softAes>((void*)hash, sizeof(prog), &prog);
for (int i = 0; i < RANDOMX_PROGRAM_SIZE; ++i) {
prog(i).dst %= 8;
prog(i).src %= 8;
}
std::cout << prog << std::endl;
}

View File

@@ -83,7 +83,7 @@ int main(int argc, char** argv) {
readIntOption("--seed", argc, argv, seed, 0);
readIntOption("--executionPorts", argc, argv, executionPorts, 4);
readIntOption("--memoryPorts", argc, argv, memoryPorts, 2);
readIntOption("--pipeline", argc, argv, pipeline, 3 + speculate);
readIntOption("--pipeline", argc, argv, pipeline, 3);
randomx::Program p, original;
double totalCycles = 0.0;
double jumpCount = 0;
@@ -113,7 +113,6 @@ int executeInOrder(randomx::Program& p, randomx::Program& original, bool print,
int flt_reg_ready[randomx::RegistersCount] = { 0 };
//each workgroup takes 1 or 2 cycles (2 cycles if any instruction has a memory operand)
while (index < RANDOMX_PROGRAM_SIZE) {
int memoryReads = 0;
int memoryAccesses = 0;
bool hasRound = false;
int workers = 0;
@@ -128,7 +127,10 @@ int executeInOrder(randomx::Program& p, randomx::Program& original, bool print,
if (has(instr, MASK_SRC, SRC_INT) && int_reg_ready[instr.src] > cycle)
break;
if (has(instr, MASK_SRC, SRC_MEM) && int_reg_ready[instr.src] > cycle)
if (has(instr, MASK_SRC, SRC_MEM) && int_reg_ready[instr.src] > cycle - 1)
break;
if (has(instr, MASK_DST, DST_MEM) && int_reg_ready[instr.dst] > cycle - 1)
break;
if (has(instr, MASK_DST, DST_FLT) && flt_reg_ready[instr.dst] > cycle)
@@ -160,20 +162,12 @@ int executeInOrder(randomx::Program& p, randomx::Program& original, bool print,
if (has(instr, MASK_EXT, OP_CFROUND))
hasRound = true;
if (has(instr, MASK_SRC, SRC_MEM)) {
memoryReads++;
if (has(instr, MASK_SRC, SRC_MEM) || has(instr, MASK_DST, DST_MEM)) {
memoryAccesses++;
if (print)
std::cout << std::setw(2) << (cycle + 2) << ": " << origi;
}
else {
if (print)
std::cout << std::setw(2) << (cycle + 1) << ": " << origi;
}
if (has(instr, MASK_DST, DST_MEM)) {
memoryAccesses++;
}
if (print)
std::cout << std::setw(2) << (cycle + 1) << ": " << origi;
//non-speculative execution must stall after branch
if (!speculate && has(instr, MASK_EXT, OP_BRANCH)) {
@@ -183,8 +177,6 @@ int executeInOrder(randomx::Program& p, randomx::Program& original, bool print,
}
//std::cout << " workers: " << workers << std::endl;
cycle++;
if (memoryReads)
cycle++;
}
if (speculate) {
//account for mispredicted branches
@@ -201,8 +193,8 @@ int executeInOrder(randomx::Program& p, randomx::Program& original, bool print,
int executeOutOfOrder(randomx::Program& p, randomx::Program& original, bool print, int executionPorts, int memoryPorts, bool speculate, int pipeline) {
int index = 0;
int busyExecutionPorts[RANDOMX_PROGRAM_SIZE] = { 0 };
int busyMemoryPorts[RANDOMX_PROGRAM_SIZE] = { 0 };
int busyExecutionPorts[2 * RANDOMX_PROGRAM_SIZE] = { 0 };
int busyMemoryPorts[2 * RANDOMX_PROGRAM_SIZE] = { 0 };
int int_reg_ready[randomx::RegistersCount] = { 0 };
int flt_reg_ready[randomx::RegistersCount] = { 0 };
int fprcReady = 0;
@@ -219,14 +211,15 @@ int executeOutOfOrder(randomx::Program& p, randomx::Program& original, bool prin
//check dependencies
if (has(instr, MASK_SRC, SRC_INT)) {
retireCycle = std::max(retireCycle, int_reg_ready[instr.src]);
int_reg_ready[instr.src] = retireCycle;
}
if (has(instr, MASK_SRC, SRC_MEM)) {
retireCycle = std::max(retireCycle, int_reg_ready[instr.src]);
retireCycle = std::max(retireCycle, int_reg_ready[instr.src] + 1);
//find free memory port
do {
while (busyMemoryPorts[retireCycle - 1] >= memoryPorts) {
retireCycle++;
} while (busyMemoryPorts[retireCycle - 1] >= memoryPorts);
}
busyMemoryPorts[retireCycle - 1]++;
}
@@ -244,11 +237,13 @@ int executeOutOfOrder(randomx::Program& p, randomx::Program& original, bool prin
//execute
if (has(instr, MASK_DST, DST_MEM)) {
retireCycle = std::max(retireCycle, int_reg_ready[instr.dst] + 1);
//find free memory port
do {
while (busyMemoryPorts[retireCycle - 1] >= memoryPorts) {
retireCycle++;
} while (busyMemoryPorts[retireCycle - 1] >= memoryPorts);
}
busyMemoryPorts[retireCycle - 1]++;
retireCycle++;
}
if (has(instr, MASK_DST, DST_FLT)) {
@@ -625,7 +620,6 @@ int analyze(randomx::Program& p) {
CASE_REP(ISTORE) {
instr.dst = instr.dst % randomx::RegistersCount;
instr.src = instr.src % randomx::RegistersCount;
instr.opcode |= SRC_INT;
instr.opcode |= DST_MEM;
if (instr.getModCond() < randomx::StoreL3Condition)
instr.imm32 = (instr.getModMem() ? randomx::ScratchpadL1Mask : randomx::ScratchpadL2Mask);

172
src/tests/runtime-distr.cpp Normal file
View File

@@ -0,0 +1,172 @@
#include <thread>
#include "utility.hpp"
#include "stopwatch.hpp"
#include "../dataset.hpp"
#include "../vm_compiled.hpp"
#include "../blake2/blake2.h"
struct Outlier {
Outlier(int idx, double rtime) : index(idx), runtime(rtime) {}
int index;
double runtime;
};
int main(int argc, char** argv) {
constexpr int distributionSize = 100;
int distribution[distributionSize + 1] = { 0 };
Stopwatch sw;
alignas(16) uint64_t hash[8];
uint64_t checksum = 0;
double totalRuntime = 0;
double maxRuntime = 0;
std::vector<Outlier> outliers;
outliers.reserve(25);
randomx_flags flags = RANDOMX_FLAG_DEFAULT;
bool softAes, largePages, jit, verify;
int totalCount, initThreadCount;
double binSize, offset;
int32_t seed;
readOption("--verify", argc, argv, verify);
readOption("--jit", argc, argv, jit);
readOption("--softAes", argc, argv, softAes);
readIntOption("--nonces", argc, argv, totalCount, 10000);
readIntOption("--init", argc, argv, initThreadCount, 1);
readFloatOption("--binSize", argc, argv, binSize, 1e-3);
readFloatOption("--offset", argc, argv, offset, 0);
readIntOption("--seed", argc, argv, seed, 0);
readOption("--largePages", argc, argv, largePages);
if (!verify) {
flags = (randomx_flags)(flags | RANDOMX_FLAG_FULL_MEM);
std::cout << "Measure program runtime" << std::endl;
}
else {
std::cout << "Measure verification time" << std::endl;
}
std::cout << " - histogram offset: " << offset << std::endl;
std::cout << " - histogram bin size: " << binSize << std::endl;
if (jit) {
flags = (randomx_flags)(flags | RANDOMX_FLAG_JIT);
std::cout << " - JIT compiled mode" << std::endl;
}
else {
std::cout << " - interpreted mode" << std::endl;
}
if (softAes) {
std::cout << " - software AES mode" << std::endl;
}
else {
flags = (randomx_flags)(flags | RANDOMX_FLAG_HARD_AES);
std::cout << " - hardware AES mode" << std::endl;
}
if (largePages) {
flags = (randomx_flags)(flags | RANDOMX_FLAG_LARGE_PAGES);
std::cout << " - large pages mode" << std::endl;
}
else {
std::cout << " - small pages mode" << std::endl;
}
std::cout << "Initializing..." << std::endl;
randomx_cache *cache = randomx_alloc_cache(flags);
randomx_dataset *dataset = nullptr;
if (cache == nullptr) {
std::cout << "Cache allocation failed" << std::endl;
return 1;
}
randomx_init_cache(cache, &seed, sizeof seed);
if (!verify) {
blake2b(&hash, sizeof hash, &seed, sizeof seed, nullptr, 0);
dataset = randomx_alloc_dataset(flags);
if (dataset == nullptr) {
std::cout << "Dataset allocation failed" << std::endl;
return 1;
}
std::vector<std::thread> threads;
uint32_t datasetItemCount = randomx_dataset_item_count();
if (initThreadCount > 1) {
auto perThread = datasetItemCount / initThreadCount;
auto remainder = datasetItemCount % initThreadCount;
uint32_t startItem = 0;
for (int i = 0; i < initThreadCount; ++i) {
auto count = perThread + (i == initThreadCount - 1 ? remainder : 0);
threads.push_back(std::thread(&randomx_init_dataset, dataset, cache, startItem, count));
startItem += count;
}
for (unsigned i = 0; i < threads.size(); ++i) {
threads[i].join();
}
}
else {
randomx_init_dataset(dataset, cache, 0, datasetItemCount);
}
randomx_release_cache(cache);
cache = nullptr;
}
std::cout << "Running " << totalCount << " programs..." << std::endl;
randomx_vm* vm = randomx_create_vm(flags, cache, dataset);
if (!verify) {
vm->initScratchpad(&hash);
vm->resetRoundingMode();
}
for (int i = 0; i < totalCount; ++i) {
sw.restart();
if (verify)
randomx_calculate_hash(vm, &i, sizeof i, &hash);
else
vm->run(&hash);
double elapsed = sw.getElapsed();
//std::cout << "Elapsed: " << elapsed << std::endl;
totalRuntime += elapsed;
if (elapsed > maxRuntime)
maxRuntime = elapsed;
int bin = (elapsed - offset) / binSize;
bool outlier = false;
if (bin < 0) {
bin = 0;
outlier = true;
}
if (bin > distributionSize) {
bin = distributionSize;
outlier = true;
}
if (outlier && outliers.size() < outliers.capacity())
outliers.push_back(Outlier(i, elapsed));
distribution[bin]++;
if(!verify)
blake2b(hash, sizeof(hash), vm->getRegisterFile(), sizeof(randomx::RegisterFile), nullptr, 0);
checksum ^= hash[0];
}
for (int i = 0; i < distributionSize + 1; ++i) {
std::cout << i << " " << distribution[i] << std::endl;
}
std::cout << "Average runtime: " << totalRuntime / totalCount << std::endl;
std::cout << "Maximum runtime: " << maxRuntime << std::endl;
std::cout << "Checksum: " << checksum << std::endl;
std::cout << "Outliers: " << std::endl;
for (Outlier& ol : outliers) {
std::cout << " " << ol.index << ": " << ol.runtime << std::endl;
}
return 0;
}

View File

@@ -66,6 +66,15 @@ inline void readIntOption(const char* option, int argc, char** argv, int& out, i
out = defaultValue;
}
inline void readFloatOption(const char* option, int argc, char** argv, double& out, double defaultValue) {
for (int i = 0; i < argc - 1; ++i) {
if (strcmp(argv[i], option) == 0 && (out = atof(argv[i + 1])) > 0) {
return;
}
}
out = defaultValue;
}
inline void readInt(int argc, char** argv, int& out, int defaultValue) {
for (int i = 0; i < argc; ++i) {
if (*argv[i] != '-' && (out = atoi(argv[i])) > 0) {

View File

@@ -49,6 +49,10 @@ public:
const void* getScratchpad() {
return scratchpad;
}
const randomx::Program& getProgram()
{
return program;
}
protected:
void initialize();
alignas(64) randomx::Program program;