diff --git a/doc/isa-ops.md b/doc/isa-ops.md index 1ab9591..79ac307 100644 --- a/doc/isa-ops.md +++ b/doc/isa-ops.md @@ -48,11 +48,16 @@ Memory operands are loaded as 8-byte values from the address indicated by `src`. |5/256|FADD_M|F|mem|`(dst0, dst1) = (dst0 + [src][0], dst1 + [src][1])`| |20/256|FSUB_R|F|A|`(dst0, dst1) = (dst0 - src0, dst1 - src1)`| |5/256|FSUB_M|F|mem|`(dst0, dst1) = (dst0 - [src][0], dst1 - [src][1])`| -|6/256|FNEG_R|F|-|`(dst0, dst1) = (-dst0, -dst1)`| +|6/256|FSCAL_R|F|-|(dst0, dst1) = (-2x0 * dst0, -2x1 * dst1)| |20/256|FMUL_R|E|A|`(dst0, dst1) = (dst0 * src0, dst1 * src1)`| |4/256|FDIV_M|E|mem|`(dst0, dst1) = (dst0 / [src][0], dst1 / [src][1])`| |6/256|FSQRT_R|E|-|`(dst0, dst1) = (√dst0, √dst1)`| +#### FSCAL_R +This instruction negates the number and multiplies it by 2x. `x` is calculated by taking the 5 least significant digits of the biased exponent and interpreting them as a binary number using the digit set `{-1, +1}` as opposed to the traditional `{0, 1}`. The possible values of `x` are all odd numbers from -31 to +31. + +The mathematical operation described above is equivalent to a bitwise XOR of the binary representation with the value of `0x81F0000000000000`. + #### Denormal and NaN values Due to restrictions on the values of the floating point registers, no operation results in `NaN`. `FDIV_M` can produce a denormal result. In that case, the result is set to `DBL_MIN = 2.22507385850720138309e-308`, which is the smallest positive normal number. diff --git a/src/AssemblyGeneratorX86.cpp b/src/AssemblyGeneratorX86.cpp index bb50718..1e51fac 100644 --- a/src/AssemblyGeneratorX86.cpp +++ b/src/AssemblyGeneratorX86.cpp @@ -373,7 +373,7 @@ namespace RandomX { } //1 uOP - void AssemblyGeneratorX86::h_FNEG_R(Instruction& instr, int i) { + void AssemblyGeneratorX86::h_FSCAL_R(Instruction& instr, int i) { instr.dst %= 4; asmCode << "\txorps " << regF[instr.dst] << ", " << signMask << std::endl; } @@ -522,7 +522,7 @@ namespace RandomX { INST_HANDLE(FADD_M) INST_HANDLE(FSUB_R) INST_HANDLE(FSUB_M) - INST_HANDLE(FNEG_R) + INST_HANDLE(FSCAL_R) //Floating point group E INST_HANDLE(FMUL_R) diff --git a/src/AssemblyGeneratorX86.hpp b/src/AssemblyGeneratorX86.hpp index 0c1844e..affd65c 100644 --- a/src/AssemblyGeneratorX86.hpp +++ b/src/AssemblyGeneratorX86.hpp @@ -70,7 +70,7 @@ namespace RandomX { void h_FADD_M(Instruction&, int); void h_FSUB_R(Instruction&, int); void h_FSUB_M(Instruction&, int); - void h_FNEG_R(Instruction&, int); + void h_FSCAL_R(Instruction&, int); void h_FMUL_R(Instruction&, int); void h_FMUL_M(Instruction&, int); void h_FDIV_R(Instruction&, int); diff --git a/src/Instruction.cpp b/src/Instruction.cpp index bdcaf39..4296c88 100644 --- a/src/Instruction.cpp +++ b/src/Instruction.cpp @@ -237,7 +237,7 @@ namespace RandomX { os << std::endl; } - void Instruction::h_FNEG_R(std::ostream& os) const { + void Instruction::h_FSCAL_R(std::ostream& os) const { auto dstIndex = dst % 4; os << "f" << dstIndex << std::endl; } @@ -362,7 +362,7 @@ namespace RandomX { INST_NAME(FADD_M) INST_NAME(FSUB_R) INST_NAME(FSUB_M) - INST_NAME(FNEG_R) + INST_NAME(FSCAL_R) //Floating point group E INST_NAME(FMUL_R) @@ -413,7 +413,7 @@ namespace RandomX { INST_HANDLE(FADD_M) INST_HANDLE(FSUB_R) INST_HANDLE(FSUB_M) - INST_HANDLE(FNEG_R) + INST_HANDLE(FSCAL_R) //Floating point group E INST_HANDLE(FMUL_R) diff --git a/src/Instruction.hpp b/src/Instruction.hpp index 5cfd833..a38e3e6 100644 --- a/src/Instruction.hpp +++ b/src/Instruction.hpp @@ -54,7 +54,7 @@ namespace RandomX { constexpr int FADD_M = 22; constexpr int FSUB_R = 23; constexpr int FSUB_M = 24; - constexpr int FNEG_R = 25; + constexpr int FSCAL_R = 25; constexpr int FMUL_R = 26; constexpr int FMUL_M = 27; constexpr int FDIV_R = 28; @@ -116,7 +116,7 @@ namespace RandomX { void h_FADD_M(std::ostream&) const; void h_FSUB_R(std::ostream&) const; void h_FSUB_M(std::ostream&) const; - void h_FNEG_R(std::ostream&) const; + void h_FSCAL_R(std::ostream&) const; void h_FMUL_R(std::ostream&) const; void h_FMUL_M(std::ostream&) const; void h_FDIV_R(std::ostream&) const; diff --git a/src/InterpretedVirtualMachine.cpp b/src/InterpretedVirtualMachine.cpp index c5a6d53..71c03af 100644 --- a/src/InterpretedVirtualMachine.cpp +++ b/src/InterpretedVirtualMachine.cpp @@ -203,8 +203,8 @@ namespace RandomX { *ibc.fdst = _mm_sub_pd(*ibc.fdst, fsrc); } break; - case InstructionType::FNEG_R: { - const __m128d signMask = _mm_castsi128_pd(_mm_set1_epi64x(1ULL << 63)); + case InstructionType::FSCAL_R: { + const __m128d signMask = _mm_castsi128_pd(_mm_set1_epi64x(0x81F0000000000000)); *ibc.fdst = _mm_xor_pd(*ibc.fdst, signMask); } break; @@ -657,10 +657,10 @@ namespace RandomX { ibc.memMask = ((instr.mod % 4) ? ScratchpadL1Mask : ScratchpadL2Mask); } break; - CASE_REP(FNEG_R) { + CASE_REP(FSCAL_R) { auto dst = instr.dst % 4; ibc.fdst = &f[dst]; - ibc.type = InstructionType::FNEG_R; + ibc.type = InstructionType::FSCAL_R; } break; CASE_REP(FMUL_R) { diff --git a/src/JitCompilerX86.cpp b/src/JitCompilerX86.cpp index 97afb2e..c725c6e 100644 --- a/src/JitCompilerX86.cpp +++ b/src/JitCompilerX86.cpp @@ -605,7 +605,7 @@ namespace RandomX { emitByte(0xc4 + 8 * instr.dst); } - void JitCompilerX86::h_FNEG_R(Instruction& instr) { + void JitCompilerX86::h_FSCAL_R(Instruction& instr) { instr.dst %= 4; emit(REX_XORPS); emitByte(0xc7 + 8 * instr.dst); @@ -761,7 +761,7 @@ namespace RandomX { INST_HANDLE(FADD_M) INST_HANDLE(FSUB_R) INST_HANDLE(FSUB_M) - INST_HANDLE(FNEG_R) + INST_HANDLE(FSCAL_R) INST_HANDLE(FMUL_R) INST_HANDLE(FMUL_M) INST_HANDLE(FDIV_R) diff --git a/src/JitCompilerX86.hpp b/src/JitCompilerX86.hpp index fedcf20..5936dcf 100644 --- a/src/JitCompilerX86.hpp +++ b/src/JitCompilerX86.hpp @@ -114,7 +114,7 @@ namespace RandomX { void h_FADD_M(Instruction&); void h_FSUB_R(Instruction&); void h_FSUB_M(Instruction&); - void h_FNEG_R(Instruction&); + void h_FSCAL_R(Instruction&); void h_FMUL_R(Instruction&); void h_FMUL_M(Instruction&); void h_FDIV_R(Instruction&); diff --git a/src/asm/program_xmm_constants.inc b/src/asm/program_xmm_constants.inc index 38c897c..79d05a4 100644 --- a/src/asm/program_xmm_constants.inc +++ b/src/asm/program_xmm_constants.inc @@ -3,4 +3,4 @@ minDbl: absMask: db 255, 255, 255, 255, 255, 255, 255, 127, 255, 255, 255, 255, 255, 255, 255, 127 signMask: - db 0, 0, 0, 0, 0, 0, 0, 128, 0, 0, 0, 0, 0, 0, 0, 128 \ No newline at end of file + db 0, 0, 0, 0, 0, 0, 240, 129, 0, 0, 0, 0, 0, 0, 240, 129 \ No newline at end of file diff --git a/src/instructionWeights.hpp b/src/instructionWeights.hpp index c336b29..74b6211 100644 --- a/src/instructionWeights.hpp +++ b/src/instructionWeights.hpp @@ -49,7 +49,7 @@ along with RandomX. If not, see. #define WT_FADD_M 5 #define WT_FSUB_R 20 #define WT_FSUB_M 5 -#define WT_FNEG_R 6 +#define WT_FSCAL_R 6 //Floating point group E #define WT_FMUL_R 20 @@ -74,7 +74,7 @@ WT_ISUB_M + WT_IMUL_9C + WT_IMUL_R + WT_IMUL_M + WT_IMULH_R + \ WT_IMULH_M + WT_ISMULH_R + WT_ISMULH_M + WT_IDIV_C + WT_ISDIV_C + \ WT_INEG_R + WT_IXOR_R + WT_IXOR_M + WT_IROR_R + WT_IROL_R + \ WT_ISWAP_R + WT_FSWAP_R + WT_FADD_R + WT_FADD_M + WT_FSUB_R + WT_FSUB_M + \ -WT_FNEG_R + WT_FMUL_R + WT_FMUL_M + WT_FDIV_R + WT_FDIV_M + \ +WT_FSCAL_R + WT_FMUL_R + WT_FMUL_M + WT_FDIV_R + WT_FDIV_M + \ WT_FSQRT_R + WT_COND_R + WT_COND_M + WT_CFROUND + WT_ISTORE + WT_FSTORE + WT_NOP; static_assert(wtSum == 256, diff --git a/src/program.inc b/src/program.inc index ac8957b..3c73b24 100644 --- a/src/program.inc +++ b/src/program.inc @@ -229,7 +229,7 @@ mov eax, r13d and eax, 16376 xor r8, qword ptr [rsi+rax] - ; FNEG_R f2 + ; FSCAL_R f2 xorps xmm2, xmm15 ; IDIV_C r5, 2577129788 mov rax, 15371395512010654233 @@ -429,7 +429,7 @@ ror r10, cl ; ISUB_R r4, -1079131550 sub r12, -1079131550 - ; FNEG_R f3 + ; FSCAL_R f3 xorps xmm3, xmm15 ; COND_R r4, ns(r5, -362284631) xor ecx, ecx @@ -440,7 +440,7 @@ subpd xmm2, xmm8 ; IXOR_R r4, r5 xor r12, r13 - ; FNEG_R f1 + ; FSCAL_R f1 xorps xmm1, xmm15 ; FADD_R f0, a0 addpd xmm0, xmm8 @@ -605,7 +605,7 @@ mov eax, r9d and eax, 262136 mov qword ptr [rsi+rax], r8 - ; FNEG_R f0 + ; FSCAL_R f0 xorps xmm0, xmm15 ; FMUL_R e0, a3 mulpd xmm4, xmm11 @@ -620,7 +620,7 @@ addpd xmm0, xmm8 ; FMUL_R e1, a2 mulpd xmm5, xmm10 - ; FNEG_R f3 + ; FSCAL_R f3 xorps xmm3, xmm15 ; FADD_R f1, a1 addpd xmm1, xmm9