diff --git a/doc/specs.md b/doc/specs.md
index c977f84..9328273 100644
--- a/doc/specs.md
+++ b/doc/specs.md
@@ -519,7 +519,7 @@ This instructions adds the values of two registers (modulo 264). The
These instructions output the high 64 bits of the whole 128-bit multiplication result. The result differs for signed and unsigned multiplication (IMULH is unsigned, ISMULH is signed). The variants with a register source operand perform a squaring operation if `dst` equals `src`.
#### 5.2.6 IMUL_RCP
-This instruction multiplies the destination register by a reciprocal of `imm32` (the immediate value is zero-extended and treated as unsigned). The reciprocal is calculated as rcp = 2x / imm32 by choosing the largest integer `x` such that rcp < 264. If `imm32` equals 0, IMUL_RCP is a no-op.
+If `imm32` equals 0 or is a power of 2, IMUL_RCP is a no-op. In other cases, the instruction multiplies the destination register by a reciprocal of `imm32` (the immediate value is zero-extended and treated as unsigned). The reciprocal is calculated as rcp = 2x / imm32 by choosing the largest integer `x` such that rcp < 264.
#### 5.2.7 INEG_R
Performs two's complement negation of the destination register.
@@ -607,7 +607,7 @@ This instruction performs a conditional jump in the Program Buffer. It uses an i
A register is considered as modified by an instruction in the following cases:
* It is the destination register of an integer instruction except IMUL_RCP and ISWAP_R.
-* It is the destination register of IMUL_RCP and `imm32` is not zero.
+* It is the destination register of IMUL_RCP and `imm32` is not zero or a power of 2.
* It is the source or the destination register of ISWAP_R and the destination and source registers are distinct.
* The CBRANCH instruction is considered to modify all integer registers.
diff --git a/src/assembly_generator_x86.cpp b/src/assembly_generator_x86.cpp
index 030ea60..4af5b73 100644
--- a/src/assembly_generator_x86.cpp
+++ b/src/assembly_generator_x86.cpp
@@ -428,9 +428,10 @@ namespace randomx {
}
void AssemblyGeneratorX86::h_IMUL_RCP(Instruction& instr, int i) {
- if (instr.getImm32() != 0) {
+ uint64_t divisor = instr.getImm32();
+ if (!isPowerOf2(divisor)) {
registerUsage[instr.dst].lastUsed = i;
- asmCode << "\tmov rax, " << randomx_reciprocal(instr.getImm32()) << std::endl;
+ asmCode << "\tmov rax, " << randomx_reciprocal(divisor) << std::endl;
asmCode << "\timul " << regR[instr.dst] << ", rax" << std::endl;
traceint(instr);
}
diff --git a/src/common.hpp b/src/common.hpp
index ccbd301..56023ce 100644
--- a/src/common.hpp
+++ b/src/common.hpp
@@ -126,6 +126,10 @@ namespace randomx {
return minIndex;
}
+ inline bool isPowerOf2(uint64_t x) {
+ return (x & (x - 1)) == 0;
+ }
+
constexpr int mantissaSize = 52;
constexpr int exponentSize = 11;
constexpr uint64_t mantissaMask = (1ULL << mantissaSize) - 1;
diff --git a/src/jit_compiler_x86.cpp b/src/jit_compiler_x86.cpp
index b8ae884..82a5503 100644
--- a/src/jit_compiler_x86.cpp
+++ b/src/jit_compiler_x86.cpp
@@ -266,8 +266,6 @@ namespace randomx {
SuperscalarProgram& prog = programs[j];
for (unsigned i = 0; i < prog.getSize(); ++i) {
Instruction& instr = prog(i);
- instr.src %= RegistersCount;
- instr.dst %= RegistersCount;
generateSuperscalarCode(instr, reciprocalCache);
}
emit(codeShhLoad, codeSshLoadSize);
@@ -614,10 +612,11 @@ namespace randomx {
}
void JitCompilerX86::h_IMUL_RCP(Instruction& instr, int i) {
- if (instr.getImm32() != 0) {
+ uint64_t divisor = instr.getImm32();
+ if (!isPowerOf2(divisor)) {
registerUsage[instr.dst].lastUsed = i;
emit(MOV_RAX_I);
- emit64(randomx_reciprocal(instr.getImm32()));
+ emit64(randomx_reciprocal(divisor));
emit(REX_IMUL_RM);
emitByte(0xc0 + 8 * instr.dst);
}
diff --git a/src/vm_interpreted.cpp b/src/vm_interpreted.cpp
index b24a275..ea85f60 100644
--- a/src/vm_interpreted.cpp
+++ b/src/vm_interpreted.cpp
@@ -435,8 +435,8 @@ namespace randomx {
} break;
CASE_REP(IMUL_RCP) {
- uint32_t divisor = instr.getImm32();
- if (divisor != 0) {
+ uint64_t divisor = instr.getImm32();
+ if (!isPowerOf2(divisor)) {
auto dst = instr.dst % RegistersCount;
ibc.type = InstructionType::IMUL_R;
ibc.idst = &r[dst];