From b1f1e1d6ad1482ffb907482858333e70819fa1e3 Mon Sep 17 00:00:00 2001 From: tevador Date: Sat, 4 May 2019 18:00:17 +0200 Subject: [PATCH] Fixed IMUL_RCP if divisor is a power of 2 --- doc/specs.md | 4 ++-- src/assembly_generator_x86.cpp | 5 +++-- src/common.hpp | 4 ++++ src/jit_compiler_x86.cpp | 7 +++---- src/vm_interpreted.cpp | 4 ++-- 5 files changed, 14 insertions(+), 10 deletions(-) diff --git a/doc/specs.md b/doc/specs.md index c977f84..9328273 100644 --- a/doc/specs.md +++ b/doc/specs.md @@ -519,7 +519,7 @@ This instructions adds the values of two registers (modulo 264). The These instructions output the high 64 bits of the whole 128-bit multiplication result. The result differs for signed and unsigned multiplication (IMULH is unsigned, ISMULH is signed). The variants with a register source operand perform a squaring operation if `dst` equals `src`. #### 5.2.6 IMUL_RCP -This instruction multiplies the destination register by a reciprocal of `imm32` (the immediate value is zero-extended and treated as unsigned). The reciprocal is calculated as rcp = 2x / imm32 by choosing the largest integer `x` such that rcp < 264. If `imm32` equals 0, IMUL_RCP is a no-op. +If `imm32` equals 0 or is a power of 2, IMUL_RCP is a no-op. In other cases, the instruction multiplies the destination register by a reciprocal of `imm32` (the immediate value is zero-extended and treated as unsigned). The reciprocal is calculated as rcp = 2x / imm32 by choosing the largest integer `x` such that rcp < 264. #### 5.2.7 INEG_R Performs two's complement negation of the destination register. @@ -607,7 +607,7 @@ This instruction performs a conditional jump in the Program Buffer. It uses an i A register is considered as modified by an instruction in the following cases: * It is the destination register of an integer instruction except IMUL_RCP and ISWAP_R. -* It is the destination register of IMUL_RCP and `imm32` is not zero. +* It is the destination register of IMUL_RCP and `imm32` is not zero or a power of 2. * It is the source or the destination register of ISWAP_R and the destination and source registers are distinct. * The CBRANCH instruction is considered to modify all integer registers. diff --git a/src/assembly_generator_x86.cpp b/src/assembly_generator_x86.cpp index 030ea60..4af5b73 100644 --- a/src/assembly_generator_x86.cpp +++ b/src/assembly_generator_x86.cpp @@ -428,9 +428,10 @@ namespace randomx { } void AssemblyGeneratorX86::h_IMUL_RCP(Instruction& instr, int i) { - if (instr.getImm32() != 0) { + uint64_t divisor = instr.getImm32(); + if (!isPowerOf2(divisor)) { registerUsage[instr.dst].lastUsed = i; - asmCode << "\tmov rax, " << randomx_reciprocal(instr.getImm32()) << std::endl; + asmCode << "\tmov rax, " << randomx_reciprocal(divisor) << std::endl; asmCode << "\timul " << regR[instr.dst] << ", rax" << std::endl; traceint(instr); } diff --git a/src/common.hpp b/src/common.hpp index ccbd301..56023ce 100644 --- a/src/common.hpp +++ b/src/common.hpp @@ -126,6 +126,10 @@ namespace randomx { return minIndex; } + inline bool isPowerOf2(uint64_t x) { + return (x & (x - 1)) == 0; + } + constexpr int mantissaSize = 52; constexpr int exponentSize = 11; constexpr uint64_t mantissaMask = (1ULL << mantissaSize) - 1; diff --git a/src/jit_compiler_x86.cpp b/src/jit_compiler_x86.cpp index b8ae884..82a5503 100644 --- a/src/jit_compiler_x86.cpp +++ b/src/jit_compiler_x86.cpp @@ -266,8 +266,6 @@ namespace randomx { SuperscalarProgram& prog = programs[j]; for (unsigned i = 0; i < prog.getSize(); ++i) { Instruction& instr = prog(i); - instr.src %= RegistersCount; - instr.dst %= RegistersCount; generateSuperscalarCode(instr, reciprocalCache); } emit(codeShhLoad, codeSshLoadSize); @@ -614,10 +612,11 @@ namespace randomx { } void JitCompilerX86::h_IMUL_RCP(Instruction& instr, int i) { - if (instr.getImm32() != 0) { + uint64_t divisor = instr.getImm32(); + if (!isPowerOf2(divisor)) { registerUsage[instr.dst].lastUsed = i; emit(MOV_RAX_I); - emit64(randomx_reciprocal(instr.getImm32())); + emit64(randomx_reciprocal(divisor)); emit(REX_IMUL_RM); emitByte(0xc0 + 8 * instr.dst); } diff --git a/src/vm_interpreted.cpp b/src/vm_interpreted.cpp index b24a275..ea85f60 100644 --- a/src/vm_interpreted.cpp +++ b/src/vm_interpreted.cpp @@ -435,8 +435,8 @@ namespace randomx { } break; CASE_REP(IMUL_RCP) { - uint32_t divisor = instr.getImm32(); - if (divisor != 0) { + uint64_t divisor = instr.getImm32(); + if (!isPowerOf2(divisor)) { auto dst = instr.dst % RegistersCount; ibc.type = InstructionType::IMUL_R; ibc.idst = &r[dst];