b8d3f2448b
apply_alternatives uses memcpy() to apply alternatives. Which has the unfortunate effect that while applying memcpy alternative to memcpy itself it tries to overwrite itself with nops - which causes #UD fault as it overwrites half of an instruction in copy loop, and from this point on only possible outcome is triplefault and reboot. So let's overwrite only first two instructions of memcpy - as long as the main memcpy loop is not in first two bytes it will work fine. Signed-off-by: Petr Vandrovec <petr@vandrovec.name> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
132 lines
2.0 KiB
ArmAsm
132 lines
2.0 KiB
ArmAsm
/* Copyright 2002 Andi Kleen */
|
|
|
|
#include <linux/linkage.h>
|
|
#include <asm/dwarf2.h>
|
|
#include <asm/cpufeature.h>
|
|
|
|
/*
|
|
* memcpy - Copy a memory block.
|
|
*
|
|
* Input:
|
|
* rdi destination
|
|
* rsi source
|
|
* rdx count
|
|
*
|
|
* Output:
|
|
* rax original destination
|
|
*/
|
|
|
|
ALIGN
|
|
memcpy_c:
|
|
CFI_STARTPROC
|
|
movq %rdi,%rax
|
|
movl %edx,%ecx
|
|
shrl $3,%ecx
|
|
andl $7,%edx
|
|
rep movsq
|
|
movl %edx,%ecx
|
|
rep movsb
|
|
ret
|
|
CFI_ENDPROC
|
|
ENDPROC(memcpy_c)
|
|
|
|
ENTRY(__memcpy)
|
|
ENTRY(memcpy)
|
|
CFI_STARTPROC
|
|
pushq %rbx
|
|
CFI_ADJUST_CFA_OFFSET 8
|
|
CFI_REL_OFFSET rbx, 0
|
|
movq %rdi,%rax
|
|
|
|
movl %edx,%ecx
|
|
shrl $6,%ecx
|
|
jz .Lhandle_tail
|
|
|
|
.p2align 4
|
|
.Lloop_64:
|
|
decl %ecx
|
|
|
|
movq (%rsi),%r11
|
|
movq 8(%rsi),%r8
|
|
|
|
movq %r11,(%rdi)
|
|
movq %r8,1*8(%rdi)
|
|
|
|
movq 2*8(%rsi),%r9
|
|
movq 3*8(%rsi),%r10
|
|
|
|
movq %r9,2*8(%rdi)
|
|
movq %r10,3*8(%rdi)
|
|
|
|
movq 4*8(%rsi),%r11
|
|
movq 5*8(%rsi),%r8
|
|
|
|
movq %r11,4*8(%rdi)
|
|
movq %r8,5*8(%rdi)
|
|
|
|
movq 6*8(%rsi),%r9
|
|
movq 7*8(%rsi),%r10
|
|
|
|
movq %r9,6*8(%rdi)
|
|
movq %r10,7*8(%rdi)
|
|
|
|
leaq 64(%rsi),%rsi
|
|
leaq 64(%rdi),%rdi
|
|
jnz .Lloop_64
|
|
|
|
.Lhandle_tail:
|
|
movl %edx,%ecx
|
|
andl $63,%ecx
|
|
shrl $3,%ecx
|
|
jz .Lhandle_7
|
|
.p2align 4
|
|
.Lloop_8:
|
|
decl %ecx
|
|
movq (%rsi),%r8
|
|
movq %r8,(%rdi)
|
|
leaq 8(%rdi),%rdi
|
|
leaq 8(%rsi),%rsi
|
|
jnz .Lloop_8
|
|
|
|
.Lhandle_7:
|
|
movl %edx,%ecx
|
|
andl $7,%ecx
|
|
jz .Lende
|
|
.p2align 4
|
|
.Lloop_1:
|
|
movb (%rsi),%r8b
|
|
movb %r8b,(%rdi)
|
|
incq %rdi
|
|
incq %rsi
|
|
decl %ecx
|
|
jnz .Lloop_1
|
|
|
|
.Lende:
|
|
popq %rbx
|
|
CFI_ADJUST_CFA_OFFSET -8
|
|
CFI_RESTORE rbx
|
|
ret
|
|
.Lfinal:
|
|
CFI_ENDPROC
|
|
ENDPROC(memcpy)
|
|
ENDPROC(__memcpy)
|
|
|
|
/* Some CPUs run faster using the string copy instructions.
|
|
It is also a lot simpler. Use this when possible */
|
|
|
|
.section .altinstr_replacement,"ax"
|
|
1: .byte 0xeb /* jmp <disp8> */
|
|
.byte (memcpy_c - memcpy) - (2f - 1b) /* offset */
|
|
2:
|
|
.previous
|
|
.section .altinstructions,"a"
|
|
.align 8
|
|
.quad memcpy
|
|
.quad 1b
|
|
.byte X86_FEATURE_REP_GOOD
|
|
/* Replace only beginning, memcpy is used to apply alternatives, so it
|
|
* is silly to overwrite itself with nops - reboot is only outcome... */
|
|
.byte 2b - 1b
|
|
.byte 2b - 1b
|
|
.previous
|