Vector FPU instructions

JitCompilerX86 - static code written in asm
Updated ALU/FPU tests
Updated instruction weights
This commit is contained in:
tevador
2018-12-31 19:06:45 +01:00
parent a09bee8d60
commit 3caecc7646
30 changed files with 3757 additions and 3618 deletions

View File

@@ -54,7 +54,7 @@ namespace RandomX {
(this->*generator)(instr, i);
}
void AssemblyGeneratorX86::gena(Instruction& instr) {
void AssemblyGeneratorX86::genar(Instruction& instr) {
asmCode << "\txor " << regR[instr.rega % RegistersCount] << ", 0" << std::hex << instr.addra << "h" << std::dec << std::endl;
switch (instr.loca & 7)
{
@@ -63,7 +63,7 @@ namespace RandomX {
case 2:
case 3:
asmCode << "\tmov ecx, " << regR32[instr.rega % RegistersCount] << std::endl;
asmCode << "\tcall rx_read_dataset" << std::endl;
asmCode << "\tcall rx_read_dataset_r" << std::endl;
return;
case 4:
@@ -80,6 +80,33 @@ namespace RandomX {
}
}
void AssemblyGeneratorX86::genaf(Instruction& instr) {
asmCode << "\txor " << regR[instr.rega % RegistersCount] << ", 0" << std::hex << instr.addra << "h" << std::dec << std::endl;
switch (instr.loca & 7)
{
case 0:
case 1:
case 2:
case 3:
asmCode << "\tmov ecx, " << regR32[instr.rega % RegistersCount] << std::endl;
asmCode << "\tcall rx_read_dataset_f" << std::endl;
return;
case 4:
asmCode << "\tmov eax, " << regR32[instr.rega % RegistersCount] << std::endl;
asmCode << "\tand eax, " << (ScratchpadL2 - 1) << std::endl;
asmCode << "\tcvtdq2pd xmm0, qword ptr [rsi + rax * 8]" << std::endl;
return;
default:
asmCode << "\tmov eax, " << regR32[instr.rega % RegistersCount] << std::endl;
asmCode << "\tand eax, " << (ScratchpadL1 - 1) << std::endl;
asmCode << "\tcvtdq2pd xmm0, qword ptr [rsi + rax * 8]" << std::endl;
return;
}
}
void AssemblyGeneratorX86::genbr0(Instruction& instr, const char* instrx86) {
switch (instr.locb & 7)
{
@@ -87,8 +114,6 @@ namespace RandomX {
case 1:
case 2:
case 3:
case 4:
case 5:
asmCode << "\tmov rcx, " << regR[instr.regb % RegistersCount] << std::endl;
asmCode << "\t" << instrx86 << " rax, cl" << std::endl;
return;
@@ -133,26 +158,7 @@ namespace RandomX {
}
void AssemblyGeneratorX86::genbf(Instruction& instr, const char* instrx86) {
asmCode << "\tand rax, -2048" << std::endl;
asmCode << "\tcvtsi2sd xmm0, rax" << std::endl;
switch (instr.locb & 7)
{
case 0:
case 1:
case 2:
case 3:
case 4:
case 5:
asmCode << "\t" << instrx86 << " xmm0, " << regF[instr.regb % RegistersCount] << std::endl;
return;
default:
convertible_t bimm;
bimm.f64 = (double)instr.imm32;
asmCode << "\tmov rax, " << bimm.i64 << std::endl;
asmCode << "\tmovd xmm1, rax" << std::endl;
asmCode << "\t" << instrx86 << " xmm0, xmm1" << std::endl;
return;
}
asmCode << "\t" << instrx86 << " xmm0, " << regF[instr.regb % RegistersCount] << std::endl;
}
void AssemblyGeneratorX86::gencr(Instruction& instr) {
@@ -165,7 +171,7 @@ namespace RandomX {
asmCode << "\tand eax, " << (ScratchpadL2 - 1) << std::endl;
asmCode << "\tmov qword ptr [rsi + rax * 8], rcx" << std::endl;
if (trace) {
asmCode << "\tmov qword ptr [rsi + rdi * 8 + 262144], rcx" << std::endl;
asmCode << "\tmov qword ptr [rsi + rdi * 8 + 262136], rcx" << std::endl;
}
return;
@@ -178,76 +184,75 @@ namespace RandomX {
asmCode << "\tand eax, " << (ScratchpadL1 - 1) << std::endl;
asmCode << "\tmov qword ptr [rsi + rax * 8], rcx" << std::endl;
if (trace) {
asmCode << "\tmov qword ptr [rsi + rdi * 8 + 262144], rcx" << std::endl;
asmCode << "\tmov qword ptr [rsi + rdi * 8 + 262136], rcx" << std::endl;
}
return;
default:
asmCode << "\tmov " << regR[instr.regc % RegistersCount] << ", rax" << std::endl;
if (trace) {
asmCode << "\tmov qword ptr [rsi + rdi * 8 + 262144], rax" << std::endl;
asmCode << "\tmov qword ptr [rsi + rdi * 8 + 262136], rax" << std::endl;
}
}
}
void AssemblyGeneratorX86::gencf(Instruction& instr) {
void AssemblyGeneratorX86::gencf(Instruction& instr, bool alwaysLow = false) {
if(!alwaysLow)
asmCode << "\tmovaps " << regF[instr.regc % RegistersCount] << ", xmm0" << std::endl;
const char* store = (!alwaysLow && (instr.locc & 8)) ? "movhpd" : "movlpd";
switch (instr.locc & 7)
{
case 0:
asmCode << "\tmov eax, " << regR32[instr.regc % RegistersCount] << std::endl;
asmCode << "\txor eax, 0" << std::hex << instr.addrc << "h" << std::dec << std::endl;
asmCode << "\tand eax, " << (ScratchpadL2 - 1) << std::endl;
asmCode << "\tmovd qword ptr [rsi + rax * 8], xmm0" << std::endl;
break;
case 4:
asmCode << "\tmov eax, " << regR32[instr.regc % RegistersCount] << std::endl;
asmCode << "\txor eax, 0" << std::hex << instr.addrc << "h" << std::dec << std::endl;
asmCode << "\tand eax, " << (ScratchpadL2 - 1) << std::endl;
asmCode << "\t" << store << " qword ptr [rsi + rax * 8], " << regF[instr.regc % RegistersCount] << std::endl;
break;
case 1:
case 2:
case 3:
asmCode << "\tmov eax, " << regR32[instr.regc % RegistersCount] << std::endl;
asmCode << "\txor eax, 0" << std::hex << instr.addrc << "h" << std::dec << std::endl;
asmCode << "\tand eax, " << (ScratchpadL1 - 1) << std::endl;
asmCode << "\tmovd qword ptr [rsi + rax * 8], xmm0" << std::endl;
break;
default:
asmCode << "\tmovsd " << regF[instr.regc % RegistersCount] << ", xmm0" << std::endl;
break;
case 5:
case 6:
case 7:
asmCode << "\tmov eax, " << regR32[instr.regc % RegistersCount] << std::endl;
asmCode << "\txor eax, 0" << std::hex << instr.addrc << "h" << std::dec << std::endl;
asmCode << "\tand eax, " << (ScratchpadL1 - 1) << std::endl;
asmCode << "\t" << store << " qword ptr [rsi + rax * 8], " << regF[instr.regc % RegistersCount] << std::endl;
break;
}
if (trace) {
asmCode << "\tmovd qword ptr [rsi + rdi * 8 + 262144], xmm0" << std::endl;
asmCode << "\t" << store << " qword ptr [rsi + rdi * 8 + 262136], " << regF[instr.regc % RegistersCount] << std::endl;
}
}
void AssemblyGeneratorX86::h_ADD_64(Instruction& instr, int i) {
gena(instr);
genar(instr);
asmCode << "\tadd rax, ";
genbr1(instr);
gencr(instr);
}
void AssemblyGeneratorX86::h_ADD_32(Instruction& instr, int i) {
gena(instr);
genar(instr);
asmCode << "\tadd eax, ";
genbr132(instr);
gencr(instr);
}
void AssemblyGeneratorX86::h_SUB_64(Instruction& instr, int i) {
gena(instr);
genar(instr);
asmCode << "\tsub rax, ";
genbr1(instr);
gencr(instr);
}
void AssemblyGeneratorX86::h_SUB_32(Instruction& instr, int i) {
gena(instr);
genar(instr);
asmCode << "\tsub eax, ";
genbr132(instr);
gencr(instr);
}
void AssemblyGeneratorX86::h_MUL_64(Instruction& instr, int i) {
gena(instr);
genar(instr);
asmCode << "\timul rax, ";
if ((instr.locb & 7) >= 6) {
asmCode << "rax, ";
@@ -257,7 +262,7 @@ namespace RandomX {
}
void AssemblyGeneratorX86::h_MULH_64(Instruction& instr, int i) {
gena(instr);
genar(instr);
asmCode << "\tmov rcx, ";
genbr1(instr);
asmCode << "\tmul rcx" << std::endl;
@@ -266,7 +271,7 @@ namespace RandomX {
}
void AssemblyGeneratorX86::h_MUL_32(Instruction& instr, int i) {
gena(instr);
genar(instr);
asmCode << "\tmov ecx, eax" << std::endl;
asmCode << "\tmov eax, ";
genbr132(instr);
@@ -275,7 +280,7 @@ namespace RandomX {
}
void AssemblyGeneratorX86::h_IMUL_32(Instruction& instr, int i) {
gena(instr);
genar(instr);
asmCode << "\tmovsxd rcx, eax" << std::endl;
if ((instr.locb & 7) >= 6) {
asmCode << "\tmov rax, " << instr.imm32 << std::endl;
@@ -288,7 +293,7 @@ namespace RandomX {
}
void AssemblyGeneratorX86::h_IMULH_64(Instruction& instr, int i) {
gena(instr);
genar(instr);
asmCode << "\tmov rcx, ";
genbr1(instr);
asmCode << "\timul rcx" << std::endl;
@@ -297,7 +302,7 @@ namespace RandomX {
}
void AssemblyGeneratorX86::h_DIV_64(Instruction& instr, int i) {
gena(instr);
genar(instr);
if ((instr.locb & 7) >= 6) {
if (instr.imm32 == 0) {
asmCode << "\tmov ecx, 1" << std::endl;
@@ -318,7 +323,7 @@ namespace RandomX {
}
void AssemblyGeneratorX86::h_IDIV_64(Instruction& instr, int i) {
gena(instr);
genar(instr);
asmCode << "\tmov edx, ";
genbr132(instr);
asmCode << "\tcmp edx, -1" << std::endl;
@@ -339,123 +344,125 @@ namespace RandomX {
}
void AssemblyGeneratorX86::h_AND_64(Instruction& instr, int i) {
gena(instr);
genar(instr);
asmCode << "\tand rax, ";
genbr1(instr);
gencr(instr);
}
void AssemblyGeneratorX86::h_AND_32(Instruction& instr, int i) {
gena(instr);
genar(instr);
asmCode << "\tand eax, ";
genbr132(instr);
gencr(instr);
}
void AssemblyGeneratorX86::h_OR_64(Instruction& instr, int i) {
gena(instr);
genar(instr);
asmCode << "\tor rax, ";
genbr1(instr);
gencr(instr);
}
void AssemblyGeneratorX86::h_OR_32(Instruction& instr, int i) {
gena(instr);
genar(instr);
asmCode << "\tor eax, ";
genbr132(instr);
gencr(instr);
}
void AssemblyGeneratorX86::h_XOR_64(Instruction& instr, int i) {
gena(instr);
genar(instr);
asmCode << "\txor rax, ";
genbr1(instr);
gencr(instr);
}
void AssemblyGeneratorX86::h_XOR_32(Instruction& instr, int i) {
gena(instr);
genar(instr);
asmCode << "\txor eax, ";
genbr132(instr);
gencr(instr);
}
void AssemblyGeneratorX86::h_SHL_64(Instruction& instr, int i) {
gena(instr);
genar(instr);
genbr0(instr, "shl");
gencr(instr);
}
void AssemblyGeneratorX86::h_SHR_64(Instruction& instr, int i) {
gena(instr);
genar(instr);
genbr0(instr, "shr");
gencr(instr);
}
void AssemblyGeneratorX86::h_SAR_64(Instruction& instr, int i) {
gena(instr);
genar(instr);
genbr0(instr, "sar");
gencr(instr);
}
void AssemblyGeneratorX86::h_ROL_64(Instruction& instr, int i) {
gena(instr);
genar(instr);
genbr0(instr, "rol");
gencr(instr);
}
void AssemblyGeneratorX86::h_ROR_64(Instruction& instr, int i) {
gena(instr);
genar(instr);
genbr0(instr, "ror");
gencr(instr);
}
void AssemblyGeneratorX86::h_FPADD(Instruction& instr, int i) {
gena(instr);
genbf(instr, "addsd");
genaf(instr);
genbf(instr, "addpd");
gencf(instr);
}
void AssemblyGeneratorX86::h_FPSUB(Instruction& instr, int i) {
gena(instr);
genbf(instr, "subsd");
genaf(instr);
genbf(instr, "subpd");
gencf(instr);
}
void AssemblyGeneratorX86::h_FPMUL(Instruction& instr, int i) {
gena(instr);
asmCode << "\tor rax, 2048" << std::endl;
genbf(instr, "mulsd");
genaf(instr);
genbf(instr, "mulpd");
asmCode << "\tmovaps xmm1, xmm0" << std::endl;
asmCode << "\tcmpeqpd xmm1, xmm1" << std::endl;
asmCode << "\tandps xmm0, xmm1" << std::endl;
gencf(instr);
}
void AssemblyGeneratorX86::h_FPDIV(Instruction& instr, int i) {
gena(instr);
asmCode << "\tor rax, 2048" << std::endl;
genbf(instr, "divsd");
genaf(instr);
genbf(instr, "divpd");
asmCode << "\tmovaps xmm1, xmm0" << std::endl;
asmCode << "\tcmpeqpd xmm1, xmm1" << std::endl;
asmCode << "\tandps xmm0, xmm1" << std::endl;
gencf(instr);
}
void AssemblyGeneratorX86::h_FPSQRT(Instruction& instr, int i) {
gena(instr);
asmCode << "\tmov rcx, 9223372036854773760" << std::endl;
asmCode << "\tand rax, rcx" << std::endl;
asmCode << "\tcvtsi2sd xmm0, rax" << std::endl;
asmCode << "\tsqrtsd xmm0, xmm0" << std::endl;
genaf(instr);
asmCode << "\tandps xmm0, xmm10" << std::endl;
asmCode << "\tsqrtpd xmm0, xmm0" << std::endl;
gencf(instr);
}
void AssemblyGeneratorX86::h_FPROUND(Instruction& instr, int i) {
gena(instr);
genar(instr);
asmCode << "\tmov rcx, rax" << std::endl;
asmCode << "\tshl eax, 13" << std::endl;
asmCode << "\tand rcx, -2048" << std::endl;
asmCode << "\tand eax, 24576" << std::endl;
asmCode << "\tcvtsi2sd xmm0, rcx" << std::endl;
asmCode << "\tcvtsi2sd " << regF[instr.regc % RegistersCount] << ", rcx" << std::endl;
asmCode << "\tor eax, 40896" << std::endl;
asmCode << "\tmov dword ptr [rsp - 8], eax" << std::endl;
asmCode << "\tldmxcsr dword ptr [rsp - 8]" << std::endl;
gencf(instr);
gencf(instr, true);
}
static inline const char* jumpCondition(Instruction& instr, bool invert = false) {
@@ -481,7 +488,7 @@ namespace RandomX {
}
void AssemblyGeneratorX86::h_CALL(Instruction& instr, int i) {
gena(instr);
genar(instr);
asmCode << "\tcmp " << regR32[instr.regb % RegistersCount] << ", " << instr.imm32 << std::endl;
asmCode << "\t" << jumpCondition(instr);
asmCode << " short taken_call_" << i << std::endl;
@@ -489,14 +496,14 @@ namespace RandomX {
asmCode << "\tjmp rx_i_" << wrapInstr(i + 1) << std::endl;
asmCode << "taken_call_" << i << ":" << std::endl;
if (trace) {
asmCode << "\tmov qword ptr [rsi + rdi * 8 + 262144], rax" << std::endl;
asmCode << "\tmov qword ptr [rsi + rdi * 8 + 262136], rax" << std::endl;
}
asmCode << "\tpush rax" << std::endl;
asmCode << "\tcall rx_i_" << wrapInstr(i + (instr.imm8 & 127) + 2) << std::endl;
}
void AssemblyGeneratorX86::h_RET(Instruction& instr, int i) {
gena(instr);
genar(instr);
asmCode << "\tcmp rsp, rbp" << std::endl;
asmCode << "\tje short not_taken_ret_" << i << std::endl;
asmCode << "\txor rax, qword ptr [rsp + 8]" << std::endl;