android_kernel_xiaomi_sm8350/arch/x86_64/lib/copy_page.S

/* Written 2003 by Andi Kleen, based on a kernel by Evandro Menezes */
	
/* Don't use streaming store because it's better when the target
   ends up in cache. */
	    
/* Could vary the prefetch distance based on SMP/UP */

	.globl copy_page
	.p2align 4
copy_page:
	subq	$3*8,%rsp
	movq	%rbx,(%rsp)
	movq	%r12,1*8(%rsp)
	movq	%r13,2*8(%rsp)

	movl	$(4096/64)-5,%ecx
	.p2align 4
.Loop64:
  	dec     %rcx

	movq        (%rsi), %rax
	movq      8 (%rsi), %rbx
	movq     16 (%rsi), %rdx
	movq     24 (%rsi), %r8
	movq     32 (%rsi), %r9
	movq     40 (%rsi), %r10
	movq     48 (%rsi), %r11
	movq     56 (%rsi), %r12

	prefetcht0 5*64(%rsi)

	movq     %rax,    (%rdi)
	movq     %rbx,  8 (%rdi)
	movq     %rdx, 16 (%rdi)
	movq     %r8,  24 (%rdi)
	movq     %r9,  32 (%rdi)
	movq     %r10, 40 (%rdi)
	movq     %r11, 48 (%rdi)
	movq     %r12, 56 (%rdi)

	leaq    64 (%rsi), %rsi
	leaq    64 (%rdi), %rdi

	jnz     .Loop64

	movl	$5,%ecx
	.p2align 4
.Loop2:
	decl   %ecx

	movq        (%rsi), %rax
	movq      8 (%rsi), %rbx
	movq     16 (%rsi), %rdx
	movq     24 (%rsi), %r8
	movq     32 (%rsi), %r9
	movq     40 (%rsi), %r10
	movq     48 (%rsi), %r11
	movq     56 (%rsi), %r12

	movq     %rax,    (%rdi)
	movq     %rbx,  8 (%rdi)
	movq     %rdx, 16 (%rdi)
	movq     %r8,  24 (%rdi)
	movq     %r9,  32 (%rdi)
	movq     %r10, 40 (%rdi)
	movq     %r11, 48 (%rdi)
	movq     %r12, 56 (%rdi)

	leaq	64(%rdi),%rdi
	leaq	64(%rsi),%rsi

	jnz	.Loop2

	movq	(%rsp),%rbx
	movq	1*8(%rsp),%r12
	movq	2*8(%rsp),%r13
	addq	$3*8,%rsp
	ret

	/* Some CPUs run faster using the string copy instructions.
	   It is also a lot simpler. Use this when possible */

#include <asm/cpufeature.h>

	.section .altinstructions,"a"
	.align 8
	.quad  copy_page
	.quad  copy_page_c
	.byte  X86_FEATURE_REP_GOOD
	.byte  copy_page_c_end-copy_page_c
	.byte  copy_page_c_end-copy_page_c
	.previous

	.section .altinstr_replacement,"ax"
copy_page_c:
	movl $4096/8,%ecx
	rep 
	movsq 
	ret
copy_page_c_end:
	.previous
Linux-2.6.12-rc2 Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip! 2005-04-16 18:20:36 -04:00			`/* Written 2003 by Andi Kleen, based on a kernel by Evandro Menezes */`

			`/* Don't use streaming store because it's better when the target`
			`ends up in cache. */`

			`/* Could vary the prefetch distance based on SMP/UP */`

			`.globl copy_page`
			`.p2align 4`
			`copy_page:`
[PATCH] x86_64: Undo the earlier changes to remove unrolled copy/memset functions They cause quite bad performance regressions on Netburst This is temporary until we can get new optimized functions for these CPUs. This undoes changes that were done in 2.6.15 and in 2.6.16-rc1, essentially bringing the code back to 2.6.14 level. Only change is I renamed the X86_FEATURE_K8_C flag to X86_FEATURE_REP_GOOD and fixed the check for the flag and also fixed some comments. Signed-off-by: Andi Kleen <ak@suse.de> Signed-off-by: Linus Torvalds <torvalds@osdl.org> 2006-02-03 15:51:02 -05:00			`subq $3*8,%rsp`
			`movq %rbx,(%rsp)`
			`movq %r12,1*8(%rsp)`
			`movq %r13,2*8(%rsp)`

			`movl $(4096/64)-5,%ecx`
			`.p2align 4`
			`.Loop64:`
			`dec %rcx`

			`movq (%rsi), %rax`
			`movq 8 (%rsi), %rbx`
			`movq 16 (%rsi), %rdx`
			`movq 24 (%rsi), %r8`
			`movq 32 (%rsi), %r9`
			`movq 40 (%rsi), %r10`
			`movq 48 (%rsi), %r11`
			`movq 56 (%rsi), %r12`

			`prefetcht0 5*64(%rsi)`

			`movq %rax, (%rdi)`
			`movq %rbx, 8 (%rdi)`
			`movq %rdx, 16 (%rdi)`
			`movq %r8, 24 (%rdi)`
			`movq %r9, 32 (%rdi)`
			`movq %r10, 40 (%rdi)`
			`movq %r11, 48 (%rdi)`
			`movq %r12, 56 (%rdi)`

			`leaq 64 (%rsi), %rsi`
			`leaq 64 (%rdi), %rdi`

			`jnz .Loop64`

			`movl $5,%ecx`
			`.p2align 4`
			`.Loop2:`
			`decl %ecx`

			`movq (%rsi), %rax`
			`movq 8 (%rsi), %rbx`
			`movq 16 (%rsi), %rdx`
			`movq 24 (%rsi), %r8`
			`movq 32 (%rsi), %r9`
			`movq 40 (%rsi), %r10`
			`movq 48 (%rsi), %r11`
			`movq 56 (%rsi), %r12`

			`movq %rax, (%rdi)`
			`movq %rbx, 8 (%rdi)`
			`movq %rdx, 16 (%rdi)`
			`movq %r8, 24 (%rdi)`
			`movq %r9, 32 (%rdi)`
			`movq %r10, 40 (%rdi)`
			`movq %r11, 48 (%rdi)`
			`movq %r12, 56 (%rdi)`

			`leaq 64(%rdi),%rdi`
			`leaq 64(%rsi),%rsi`

			`jnz .Loop2`

			`movq (%rsp),%rbx`
			`movq 1*8(%rsp),%r12`
			`movq 2*8(%rsp),%r13`
			`addq $3*8,%rsp`
			`ret`

			`/* Some CPUs run faster using the string copy instructions.`
			`It is also a lot simpler. Use this when possible */`

			`#include <asm/cpufeature.h>`

			`.section .altinstructions,"a"`
			`.align 8`
			`.quad copy_page`
			`.quad copy_page_c`
			`.byte X86_FEATURE_REP_GOOD`
			`.byte copy_page_c_end-copy_page_c`
			`.byte copy_page_c_end-copy_page_c`
			`.previous`

			`.section .altinstr_replacement,"ax"`
			`copy_page_c:`
Linux-2.6.12-rc2 Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip! 2005-04-16 18:20:36 -04:00			`movl $4096/8,%ecx`
			`rep`
			`movsq`
			`ret`
[PATCH] x86_64: Undo the earlier changes to remove unrolled copy/memset functions They cause quite bad performance regressions on Netburst This is temporary until we can get new optimized functions for these CPUs. This undoes changes that were done in 2.6.15 and in 2.6.16-rc1, essentially bringing the code back to 2.6.14 level. Only change is I renamed the X86_FEATURE_K8_C flag to X86_FEATURE_REP_GOOD and fixed the check for the flag and also fixed some comments. Signed-off-by: Andi Kleen <ak@suse.de> Signed-off-by: Linus Torvalds <torvalds@osdl.org> 2006-02-03 15:51:02 -05:00			`copy_page_c_end:`
			`.previous`