Vector FPU instructions

JitCompilerX86 - static code written in asm
Updated ALU/FPU tests
Updated instruction weights
This commit is contained in:
tevador
2018-12-31 19:06:45 +01:00
parent a09bee8d60
commit 3caecc7646
30 changed files with 3757 additions and 3618 deletions

View File

@@ -44,9 +44,11 @@ namespace RandomX {
*(((uint32_t*)&reg) + i) = gen();
}
FPINIT();
for (int i = 0; i < 8; ++i) {
reg.f[i].f64 = (double)reg.f[i].i64;
for (int i = 0; i < RegistersCount; ++i) {
reg.f[i].lo.f64 = (double)reg.f[i].lo.i64;
reg.f[i].hi.f64 = (double)reg.f[i].hi.i64;
}
//std::cout << reg;
p.initialize(gen);
mem.ma = (gen() ^ *(((uint32_t*)seed) + 4)) & ~7;
mem.mx = *(((uint32_t*)seed) + 5);
@@ -97,52 +99,36 @@ namespace RandomX {
convertible_t InterpretedVirtualMachine::loadbr1(Instruction& inst) {
switch (inst.locb & 7)
{
case 0:
case 1:
case 2:
case 3:
case 4:
case 5:
return reg.r[inst.regb % RegistersCount];
case 6:
case 7:
convertible_t temp;
temp.i64 = inst.imm32; //sign-extend imm32
return temp;
case 0:
case 1:
case 2:
case 3:
case 4:
case 5:
return reg.r[inst.regb % RegistersCount];
case 6:
case 7:
convertible_t temp;
temp.i64 = inst.imm32; //sign-extend imm32
return temp;
}
}
convertible_t InterpretedVirtualMachine::loadbr0(Instruction& inst) {
switch (inst.locb & 7)
{
case 0:
case 1:
case 2:
case 3:
case 4:
case 5:
return reg.r[inst.regb % RegistersCount];
case 6:
case 7:
convertible_t temp;
temp.u64 = inst.imm8;
return temp;
}
}
double InterpretedVirtualMachine::loadbf(Instruction& inst) {
switch (inst.locb & 7)
{
case 0:
case 1:
case 2:
case 3:
case 4:
case 5:
return reg.f[inst.regb % RegistersCount].f64;
case 6:
case 7:
return (double)inst.imm32;
case 0:
case 1:
case 2:
case 3:
return reg.r[inst.regb % RegistersCount];
case 4:
case 5:
case 6:
case 7:
convertible_t temp;
temp.u64 = inst.imm8;
return temp;
}
}
@@ -150,43 +136,61 @@ namespace RandomX {
addr_t addr;
switch (inst.locc & 7)
{
case 0:
addr = reg.r[inst.regc % RegistersCount].u32 ^ inst.addrc;
return scratchpad[addr % ScratchpadL2];
case 0:
addr = reg.r[inst.regc % RegistersCount].u32 ^ inst.addrc;
return scratchpad[addr % ScratchpadL2];
case 1:
case 2:
case 3:
addr = reg.r[inst.regc % RegistersCount].u32 ^ inst.addrc;
return scratchpad[addr % ScratchpadL1];
case 1:
case 2:
case 3:
addr = reg.r[inst.regc % RegistersCount].u32 ^ inst.addrc;
return scratchpad[addr % ScratchpadL1];
case 4:
case 5:
case 6:
case 7:
return reg.r[inst.regc % RegistersCount];
case 4:
case 5:
case 6:
case 7:
return reg.r[inst.regc % RegistersCount];
}
}
convertible_t& InterpretedVirtualMachine::getcf(Instruction& inst) {
void InterpretedVirtualMachine::writecf(Instruction& inst, fpu_reg_t& regc) {
addr_t addr;
switch (inst.locc & 7)
{
case 0:
addr = reg.r[inst.regc % RegistersCount].u32 ^ inst.addrc;
return scratchpad[addr % ScratchpadL2];
case 4:
addr = reg.r[inst.regc % RegistersCount].u32 ^ inst.addrc;
scratchpad[addr % ScratchpadL2] = (inst.locc & 8) ? regc.hi : regc.lo;
break;
case 1:
case 2:
case 3:
addr = reg.r[inst.regc % RegistersCount].u32 ^ inst.addrc;
return scratchpad[addr % ScratchpadL1];
case 5:
case 6:
case 7:
addr = reg.r[inst.regc % RegistersCount].u32 ^ inst.addrc;
scratchpad[addr % ScratchpadL1] = (inst.locc & 8) ? regc.hi : regc.lo;
case 4:
case 5:
case 6:
case 7:
return reg.f[inst.regc % RegistersCount];
default:
break;
}
}
void InterpretedVirtualMachine::writecflo(Instruction& inst, fpu_reg_t& regc) {
addr_t addr;
switch (inst.locc & 7)
{
case 4:
addr = reg.r[inst.regc % RegistersCount].u32 ^ inst.addrc;
scratchpad[addr % ScratchpadL2] = regc.lo;
break;
case 5:
case 6:
case 7:
addr = reg.r[inst.regc % RegistersCount].u32 ^ inst.addrc;
scratchpad[addr % ScratchpadL1] = regc.lo;
default:
break;
}
}
@@ -194,22 +198,18 @@ namespace RandomX {
if(trace) std::cout << std::hex << /*a.u64 << " " << b.u64 << " " <<*/ c.u64 << std::endl;
#define FPU_RETIRE(x) x(a, b, c); \
writecf(inst, c); \
if(trace) { \
convertible_t bc; \
bc.f64 = b; \
std::cout << std::hex << /*a.u64 << " " << bc.u64 << " " <<*/ c.u64 << std::endl; \
std::cout << std::hex << ((inst.locc & 8) ? c.hi.u64 : c.lo.u64) << std::endl; \
} \
if(fpuCheck) { \
convertible_t bc; \
if(c.f64 != c.f64) { \
if(c.hi.f64 != c.hi.f64 || c.lo.f64 != c.lo.f64) { \
std::stringstream ss; \
bc.f64 = b; \
ss << "NaN result of " << #x << "(" << std::hex << a.u64 << ", " << bc.u64 << ") = " << c.u64; \
ss << "NaN result of " << #x << "(" << std::hex << a.u64 << ", " << b.hi.u64 << " " << b.lo.u64 << ") = " << c.hi.u64 << " " << c.lo.u64 << std::endl; \
throw std::runtime_error(ss.str()); \
} else if (std::fpclassify(c.f64) == FP_SUBNORMAL) {\
} else if (std::fpclassify(c.hi.f64) == FP_SUBNORMAL || std::fpclassify(c.lo.f64) == FP_SUBNORMAL) {\
std::stringstream ss; \
bc.f64 = b; \
ss << "Denormal result of " << #x << "(" << std::hex << a.u64 << ", " << bc.u64 << ") = " << c.u64; \
ss << "Denormal result of " << #x << "(" << std::hex << a.u64 << ", " << b.hi.u64 << " " << b.lo.u64 << ") = " << c.hi.u64 << " " << c.lo.u64 << std::endl; \
throw std::runtime_error(ss.str()); \
} \
}
@@ -220,8 +220,13 @@ namespace RandomX {
#define INC_COUNT(x)
#endif
#define FPU_RETIRE_NB(x) x(a, b, c); \
if(trace) std::cout << std::hex << /*a.u64 << " " <<*/ c.u64 << std::endl;
#define FPU_RETIRE_FPSQRT(x) FPSQRT(a, b, c); \
writecf(inst, c); \
if(trace) std::cout << std::hex << ((inst.locc & 8) ? c.hi.u64 : c.lo.u64) << std::endl;
#define FPU_RETIRE_FPROUND(x) FPROUND(a, b, c); \
writecflo(inst, c); \
if(trace) std::cout << std::hex << c.lo.u64 << std::endl;
#define ALU_INST(x) void InterpretedVirtualMachine::h_##x(Instruction& inst) { \
INC_COUNT(x) \
@@ -242,17 +247,17 @@ namespace RandomX {
#define FPU_INST(x) void InterpretedVirtualMachine::h_##x(Instruction& inst) { \
INC_COUNT(x) \
convertible_t a = loada(inst); \
double b = loadbf(inst); \
convertible_t& c = getcf(inst); \
fpu_reg_t& b = reg.f[inst.regb % RegistersCount]; \
fpu_reg_t& c = reg.f[inst.regc % RegistersCount]; \
FPU_RETIRE(x) \
}
#define FPU_INST_NB(x) void InterpretedVirtualMachine::h_##x(Instruction& inst) { \
INC_COUNT(x) \
convertible_t a = loada(inst); \
convertible_t b; \
convertible_t& c = getcf(inst); \
FPU_RETIRE_NB(x) \
fpu_reg_t b; \
fpu_reg_t& c = reg.f[inst.regc % RegistersCount]; \
FPU_RETIRE_##x(x) \
}
ALU_INST(ADD_64)