android_kernel_xiaomi_sm8350/arch/mips/include/asm/div64.h
Maciej W. Rozycki 2c44110300 MIPS: Avoid handcoded DIVU in `__div64_32' altogether
commit 25ab14cbe9d1b66fda44c71a2db7582a31b6f5cd upstream.

Remove the inline asm with a DIVU instruction from `__div64_32' and use
plain C code for the intended DIVMOD calculation instead.  GCC is smart
enough to know that both the quotient and the remainder are calculated
with single DIVU, so with ISAs up to R5 the same instruction is actually
produced with overall similar code.

For R6 compiled code will work, but separate DIVU and MODU instructions
will be produced, which are also interlocked, so scalar implementations
will likely not perform as well as older ISAs with their asynchronous MD
unit.  Likely still faster then the generic algorithm though.

This removes a compilation error for R6 however where the original DIVU
instruction is not supported anymore and the MDU accumulator registers
have been removed and consequently GCC complains as to a constraint it
cannot find a register for:

In file included from ./include/linux/math.h:5,
                 from ./include/linux/kernel.h:13,
                 from mm/page-writeback.c:15:
./include/linux/math64.h: In function 'div_u64_rem':
./arch/mips/include/asm/div64.h:76:17: error: inconsistent operand constraints in an 'asm'
   76 |                 __asm__("divu   $0, %z1, %z2"                           \
      |                 ^~~~~~~
./include/asm-generic/div64.h:245:25: note: in expansion of macro '__div64_32'
  245 |                 __rem = __div64_32(&(n), __base);       \
      |                         ^~~~~~~~~~
./include/linux/math64.h:91:22: note: in expansion of macro 'do_div'
   91 |         *remainder = do_div(dividend, divisor);
      |                      ^~~~~~

This has passed correctness verification with test_div64 and reduced the
module's average execution time down to 1.0404s from 1.0445s with R3400
@40MHz.  The module's MIPS I machine code has also shrunk by 12 bytes or
3 instructions.

Signed-off-by: Maciej W. Rozycki <macro@orcam.me.uk>
Signed-off-by: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2021-05-19 10:08:32 +02:00

92 lines
2.2 KiB
C

/*
* Copyright (C) 2000, 2004, 2021 Maciej W. Rozycki
* Copyright (C) 2003, 07 Ralf Baechle (ralf@linux-mips.org)
*
* This file is subject to the terms and conditions of the GNU General Public
* License. See the file "COPYING" in the main directory of this archive
* for more details.
*/
#ifndef __ASM_DIV64_H
#define __ASM_DIV64_H
#include <asm/bitsperlong.h>
#if BITS_PER_LONG == 32
/*
* No traps on overflows for any of these...
*/
#define do_div64_32(res, high, low, base) ({ \
unsigned long __cf, __tmp, __tmp2, __i; \
unsigned long __quot32, __mod32; \
\
__asm__( \
" .set push \n" \
" .set noat \n" \
" .set noreorder \n" \
" move %2, $0 \n" \
" move %3, $0 \n" \
" b 1f \n" \
" li %4, 0x21 \n" \
"0: \n" \
" sll $1, %0, 0x1 \n" \
" srl %3, %0, 0x1f \n" \
" or %0, $1, %5 \n" \
" sll %1, %1, 0x1 \n" \
" sll %2, %2, 0x1 \n" \
"1: \n" \
" bnez %3, 2f \n" \
" sltu %5, %0, %z6 \n" \
" bnez %5, 3f \n" \
"2: \n" \
" addiu %4, %4, -1 \n" \
" subu %0, %0, %z6 \n" \
" addiu %2, %2, 1 \n" \
"3: \n" \
" bnez %4, 0b \n" \
" srl %5, %1, 0x1f \n" \
" .set pop" \
: "=&r" (__mod32), "=&r" (__tmp), \
"=&r" (__quot32), "=&r" (__cf), \
"=&r" (__i), "=&r" (__tmp2) \
: "Jr" (base), "0" (high), "1" (low)); \
\
(res) = __quot32; \
__mod32; \
})
#define __div64_32(n, base) ({ \
unsigned long __upper, __low, __high, __radix; \
unsigned long long __quot; \
unsigned long long __div; \
unsigned long __mod; \
\
__div = (*n); \
__radix = (base); \
\
__high = __div >> 32; \
__low = __div; \
\
if (__high < __radix) { \
__upper = __high; \
__high = 0; \
} else { \
__upper = __high % __radix; \
__high /= __radix; \
} \
\
__mod = do_div64_32(__low, __upper, __low, __radix); \
\
__quot = __high; \
__quot = __quot << 32 | __low; \
(*n) = __quot; \
__mod; \
})
#endif /* BITS_PER_LONG == 32 */
#include <asm-generic/div64.h>
#endif /* __ASM_DIV64_H */