8 branch conditions for CALL/RET

This commit is contained in:
tevador
2018-12-21 22:41:35 +01:00
parent 55afe9646f
commit 740c40b218
11 changed files with 1396 additions and 1051 deletions

View File

@@ -307,7 +307,7 @@ namespace RandomX {
}
}
else {
asmCode << "mov ecx, 1" << std::endl;
asmCode << "\tmov ecx, 1" << std::endl;
asmCode << "\tmov edx, " << regR32[instr.regb % RegistersCount] << std::endl;
asmCode << "\ttest edx, edx" << std::endl;
asmCode << "\tcmovne ecx, edx" << std::endl;
@@ -458,15 +458,36 @@ namespace RandomX {
gencf(instr);
}
static inline const char* jumpCondition(Instruction& instr, bool invert = false) {
switch ((instr.locb & 7) ^ invert)
{
case 0:
return "jbe";
case 1:
return "ja";
case 2:
return "js";
case 3:
return "jns";
case 4:
return "jo";
case 5:
return "jno";
case 6:
return "jl";
case 7:
return "jge";
}
}
void AssemblyGeneratorX86::h_CALL(Instruction& instr, int i) {
gena(instr);
if ((instr.locb & 7) < 6) {
asmCode << "\tcmp " << regR32[instr.regb % RegistersCount] << ", " << instr.imm32 << std::endl;
asmCode << "\tjbe short taken_call_" << i << std::endl;
gencr(instr);
asmCode << "\tjmp rx_i_" << wrapInstr(i + 1) << std::endl;
asmCode << "taken_call_" << i << ":" << std::endl;
}
asmCode << "\tcmp " << regR32[instr.regb % RegistersCount] << ", " << instr.imm32 << std::endl;
asmCode << "\t" << jumpCondition(instr);
asmCode << " short taken_call_" << i << std::endl;
gencr(instr);
asmCode << "\tjmp rx_i_" << wrapInstr(i + 1) << std::endl;
asmCode << "taken_call_" << i << ":" << std::endl;
if (trace) {
asmCode << "\tmov qword ptr [rsi + rdi * 8 + 262144], rax" << std::endl;
}
@@ -478,10 +499,9 @@ namespace RandomX {
gena(instr);
asmCode << "\tcmp rsp, rbp" << std::endl;
asmCode << "\tje short not_taken_ret_" << i << std::endl;
if ((instr.locb & 7) < 6) {
asmCode << "\tcmp " << regR32[instr.regb % RegistersCount] << ", " << instr.imm32 << std::endl;
asmCode << "\tja short not_taken_ret_" << i << std::endl;
}
asmCode << "\tcmp " << regR32[instr.regb % RegistersCount] << ", " << instr.imm32 << std::endl;
asmCode << "\t" << jumpCondition(instr, true);
asmCode << " short not_taken_ret_" << i << std::endl;
asmCode << "\txor rax, qword ptr [rsp + 8]" << std::endl;
gencr(instr);
asmCode << "\tret 8" << std::endl;

View File

@@ -280,13 +280,10 @@ namespace RandomX {
void InterpretedVirtualMachine::h_CALL(Instruction& inst) {
convertible_t a = loada(inst);
convertible_t b = loadbr1(inst);
if (b.u32 <= (uint32_t)inst.imm32) {
if (JMP_COND(inst.locb, reg.r[inst.regb % RegistersCount], inst.imm32)) {
#ifdef STATS
if ((inst.locb & 7) <= 5)
count_CALL_taken++;
else
count_CALL_uncond++;
count_CALL_taken++;
count_jump_taken[inst.locb & 7]++;
#endif
stackPush(a);
stackPush(pc);
@@ -298,6 +295,7 @@ namespace RandomX {
convertible_t& c = getcr(inst);
#ifdef STATS
count_CALL_not_taken++;
count_jump_not_taken[inst.locb & 7]++;
#endif
c.u64 = a.u64;
if (trace) std::cout << std::hex << /*a.u64 << " " <<*/ c.u64 << std::endl;
@@ -308,12 +306,10 @@ namespace RandomX {
convertible_t a = loada(inst);
convertible_t b = loadbr1(inst);
convertible_t& c = getcr(inst);
if (stack.size() > 0 && b.u32 <= (uint32_t)inst.imm32) {
if (stack.size() > 0 && JMP_COND(inst.locb, reg.r[inst.regb % RegistersCount], inst.imm32)) {
#ifdef STATS
if ((inst.locb & 7) <= 5)
count_RET_taken++;
else
count_RET_uncond++;
count_RET_taken++;
count_jump_taken[inst.locb & 7]++;
#endif
auto raddr = stackPopAddress();
auto retval = stackPopValue();
@@ -324,8 +320,10 @@ namespace RandomX {
#ifdef STATS
if (stack.size() == 0)
count_RET_stack_empty++;
else
else {
count_RET_not_taken++;
count_jump_not_taken[inst.locb & 7]++;
}
#endif
c.u64 = a.u64;
}

View File

@@ -71,13 +71,13 @@ namespace RandomX {
int count_FPDIV;
int count_FPSQRT;
int count_FPROUND;
int count_CALL_uncond;
int count_CALL_taken;
int count_CALL_not_taken;
int count_RET_stack_empty;
int count_RET_uncond;
int count_RET_taken;
int count_RET_not_taken;
int count_jump_taken[8] = { 0 };
int count_jump_not_taken[8] = { 0 };
#endif
convertible_t loada(Instruction&);

View File

@@ -657,20 +657,41 @@ namespace RandomX {
gencf(instr);
}
void JitCompilerX86::h_CALL(Instruction& instr, int i) {
if ((instr.locb & 7) <= 5) {
emit(uint16_t(0x8141)); //cmp regb, imm32
emitByte(0xf8 + (instr.regb % RegistersCount));
emit(instr.imm32);
if ((instr.locc & 7) <= 3) {
emit(uint16_t(0x1676)); //jmp
}
else {
emit(uint16_t(0x0576)); //jmp
}
gencr(instr);
emit(uint16_t(0x06eb)); //jmp to next
static inline uint8_t jumpCondition(Instruction& instr, bool invert = false) {
switch ((instr.locb & 7) ^ invert)
{
case 0:
return 0x76; //jbe
case 1:
return 0x77; //ja
case 2:
return 0x78; //js
case 3:
return 0x79; //jns
case 4:
return 0x70; //jo
case 5:
return 0x71; //jno
case 6:
return 0x7c; //jl
case 7:
return 0x7d; //jge
}
}
void JitCompilerX86::h_CALL(Instruction& instr, int i) {
emit(uint16_t(0x8141)); //cmp regb, imm32
emitByte(0xf8 + (instr.regb % RegistersCount));
emit(instr.imm32);
emitByte(jumpCondition(instr));
if ((instr.locc & 7) <= 3) {
emitByte(0x16);
}
else {
emitByte(0x05);
}
gencr(instr);
emit(uint16_t(0x06eb)); //jmp to next
emitByte(0x50); //push rax
emitByte(0xe8); //call
i = wrapInstr(i + (instr.imm8 & 127) + 2);
@@ -685,22 +706,16 @@ namespace RandomX {
void JitCompilerX86::h_RET(Instruction& instr, int i) {
int crlen = 0;
int blen = 0;
if ((instr.locc & 7) <= 3) {
crlen = 17;
}
if ((instr.locb & 7) <= 5) {
blen = 9;
}
emit(0x74e53b48); //cmp rsp, rbp; je
emitByte(11 + blen + crlen);
if ((instr.locb & 7) <= 5) {
emit(uint16_t(0x8141)); //cmp regb, imm32
emitByte(0xf8 + (instr.regb % RegistersCount));
emit(instr.imm32);
emitByte(0x77); //jmp
emitByte(11 + crlen);
}
emitByte(20 + crlen);
emit(uint16_t(0x8141)); //cmp regb, imm32
emitByte(0xf8 + (instr.regb % RegistersCount));
emit(instr.imm32);
emitByte(jumpCondition(instr, true));
emitByte(11 + crlen);
emitByte(0x48);
emit(0x08244433); //xor rax,QWORD PTR [rsp+0x8]
gencr(instr);

View File

@@ -19,16 +19,16 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
#pragma once
#define WT_ADD_64 16
#define WT_ADD_32 4
#define WT_SUB_64 16
#define WT_SUB_32 4
#define WT_MUL_64 15
#define WT_MULH_64 11
#define WT_MUL_32 11
#define WT_IMUL_32 11
#define WT_IMULH_64 11
#define WT_DIV_64 1
#define WT_ADD_64 10
#define WT_ADD_32 2
#define WT_SUB_64 10
#define WT_SUB_32 2
#define WT_MUL_64 21
#define WT_MULH_64 10
#define WT_MUL_32 15
#define WT_IMUL_32 15
#define WT_IMULH_64 10
#define WT_DIV_64 1
#define WT_IDIV_64 1
#define WT_AND_64 4
#define WT_AND_32 2
@@ -39,16 +39,16 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
#define WT_SHL_64 3
#define WT_SHR_64 3
#define WT_SAR_64 3
#define WT_ROL_64 9
#define WT_ROR_64 9
#define WT_ROL_64 6
#define WT_ROR_64 6
#define WT_FPADD 20
#define WT_FPSUB 20
#define WT_FPMUL 22
#define WT_FPDIV 8
#define WT_FPSQRT 6
#define WT_FPROUND 2
#define WT_CALL 17
#define WT_RET 15
#define WT_CALL 24
#define WT_RET 18
constexpr int wtSum = WT_ADD_64 + WT_ADD_32 + WT_SUB_64 + WT_SUB_32 + \
WT_MUL_64 + WT_MULH_64 + WT_MUL_32 + WT_IMUL_32 + WT_IMULH_64 + \

View File

@@ -57,6 +57,7 @@ namespace RandomX {
void SAR_64(convertible_t& a, convertible_t& b, convertible_t& c);
void ROL_64(convertible_t& a, convertible_t& b, convertible_t& c);
void ROR_64(convertible_t& a, convertible_t& b, convertible_t& c);
bool JMP_COND(uint8_t, convertible_t&, int32_t);
void FPINIT();
void FPADD(convertible_t& a, double b, convertible_t& c);
void FPSUB(convertible_t& a, double b, convertible_t& c);

View File

@@ -126,6 +126,34 @@ along with RandomX. If not, see<http://www.gnu.org/licenses/>.
#define imulhi64 __imulhi64
#endif
// avoid undefined behavior of signed overflow
static inline int32_t safeSub(int32_t a, int32_t b) {
return int32_t(uint32_t(a) - uint32_t(b));
}
#if __GNUC__ >= 5
#undef __has_builtin
#define __has_builtin(x) 1
#endif
#if defined(__has_builtin)
#if __has_builtin(__builtin_sub_overflow)
static inline bool __subOverflow(int32_t a, int32_t b) {
int32_t temp;
return __builtin_sub_overflow(a, b, &temp);
}
#define subOverflow __subOverflow
#endif
#endif
#ifndef subOverflow
static inline bool __subOverflow(int32_t a, int32_t b) {
auto c = safeSub(a, b);
return (c < a) != (b > 0);
}
#define subOverflow __subOverflow
#endif
static double FlushDenormal(double x) {
if (std::fpclassify(x) == FP_SUBNORMAL) {
return 0;
@@ -235,6 +263,28 @@ namespace RandomX {
c.u64 = ror64(a.u64, (b.u64 & 63));
}
bool JMP_COND(uint8_t type, convertible_t& regb, int32_t imm32) {
switch (type & 7)
{
case 0:
return regb.u32 <= (uint32_t)imm32;
case 1:
return regb.u32 > (uint32_t)imm32;
case 2:
return safeSub(regb.i32, imm32) < 0;
case 3:
return safeSub(regb.i32, imm32) >= 0;
case 4:
return subOverflow(regb.i32, imm32);
case 5:
return !subOverflow(regb.i32, imm32);
case 6:
return regb.i32 < imm32;
case 7:
return regb.i32 >= imm32;
}
}
void FPINIT() {
setRoundMode(FE_TONEAREST);
}

View File

@@ -248,8 +248,12 @@ int main(int argc, char** argv) {
std::cout << "Calculated result: ";
result.print(std::cout);
if(programCount == 1000)
std::cout << "Reference result: d62ed85c39030cd2c5704fca3a23019f1244f2b03447c9a6b39dea5390ed1d10" << std::endl;
std::cout << "Reference result: f6bf06465d5fa1b1dc919140b9e9f9e210b07ae6d662988458a172e9a267eb3f" << std::endl;
std::cout << "Performance: " << programCount / elapsed << " programs per second" << std::endl;
/*if (threadCount == 1 && !compiled) {
auto ivm = (RandomX::InterpretedVirtualMachine*)vms[0];
std::cout << ivm->getProgam();
}*/
}
catch (std::exception& e) {
std::cout << "ERROR: " << e.what() << std::endl;

File diff suppressed because it is too large Load Diff