]>
Commit | Line | Data |
---|---|---|
b2441318 | 1 | /* SPDX-License-Identifier: GPL-2.0 */ |
1da177e4 | 2 | /* Written 2003 by Andi Kleen, based on a kernel by Evandro Menezes */ |
038b0a6d | 3 | |
8d379dad | 4 | #include <linux/linkage.h> |
cd4d09ec | 5 | #include <asm/cpufeatures.h> |
59e97e4d | 6 | #include <asm/alternative-asm.h> |
784d5699 | 7 | #include <asm/export.h> |
8d379dad | 8 | |
090a3f61 BP |
9 | /* |
10 | * Some CPUs run faster using the string copy instructions (sane microcode). | |
11 | * It is also a lot simpler. Use this when possible. But, don't use streaming | |
12 | * copy unless the CPU indicates X86_FEATURE_REP_GOOD. Could vary the | |
13 | * prefetch distance based on SMP/UP. | |
14 | */ | |
8d379dad | 15 | ALIGN |
090a3f61 | 16 | ENTRY(copy_page) |
090a3f61 | 17 | ALTERNATIVE "jmp copy_page_regs", "", X86_FEATURE_REP_GOOD |
269833bd ML |
18 | movl $4096/8, %ecx |
19 | rep movsq | |
8d379dad | 20 | ret |
090a3f61 | 21 | ENDPROC(copy_page) |
784d5699 | 22 | EXPORT_SYMBOL(copy_page) |
1da177e4 | 23 | |
090a3f61 | 24 | ENTRY(copy_page_regs) |
269833bd | 25 | subq $2*8, %rsp |
269833bd | 26 | movq %rbx, (%rsp) |
269833bd | 27 | movq %r12, 1*8(%rsp) |
7bcd3f34 | 28 | |
269833bd | 29 | movl $(4096/64)-5, %ecx |
7bcd3f34 AK |
30 | .p2align 4 |
31 | .Loop64: | |
269833bd ML |
32 | dec %rcx |
33 | movq 0x8*0(%rsi), %rax | |
34 | movq 0x8*1(%rsi), %rbx | |
35 | movq 0x8*2(%rsi), %rdx | |
36 | movq 0x8*3(%rsi), %r8 | |
37 | movq 0x8*4(%rsi), %r9 | |
38 | movq 0x8*5(%rsi), %r10 | |
39 | movq 0x8*6(%rsi), %r11 | |
40 | movq 0x8*7(%rsi), %r12 | |
7bcd3f34 AK |
41 | |
42 | prefetcht0 5*64(%rsi) | |
43 | ||
269833bd ML |
44 | movq %rax, 0x8*0(%rdi) |
45 | movq %rbx, 0x8*1(%rdi) | |
46 | movq %rdx, 0x8*2(%rdi) | |
47 | movq %r8, 0x8*3(%rdi) | |
48 | movq %r9, 0x8*4(%rdi) | |
49 | movq %r10, 0x8*5(%rdi) | |
50 | movq %r11, 0x8*6(%rdi) | |
51 | movq %r12, 0x8*7(%rdi) | |
7bcd3f34 | 52 | |
269833bd ML |
53 | leaq 64 (%rsi), %rsi |
54 | leaq 64 (%rdi), %rdi | |
7bcd3f34 | 55 | |
269833bd | 56 | jnz .Loop64 |
7bcd3f34 | 57 | |
269833bd | 58 | movl $5, %ecx |
7bcd3f34 AK |
59 | .p2align 4 |
60 | .Loop2: | |
269833bd ML |
61 | decl %ecx |
62 | ||
63 | movq 0x8*0(%rsi), %rax | |
64 | movq 0x8*1(%rsi), %rbx | |
65 | movq 0x8*2(%rsi), %rdx | |
66 | movq 0x8*3(%rsi), %r8 | |
67 | movq 0x8*4(%rsi), %r9 | |
68 | movq 0x8*5(%rsi), %r10 | |
69 | movq 0x8*6(%rsi), %r11 | |
70 | movq 0x8*7(%rsi), %r12 | |
71 | ||
72 | movq %rax, 0x8*0(%rdi) | |
73 | movq %rbx, 0x8*1(%rdi) | |
74 | movq %rdx, 0x8*2(%rdi) | |
75 | movq %r8, 0x8*3(%rdi) | |
76 | movq %r9, 0x8*4(%rdi) | |
77 | movq %r10, 0x8*5(%rdi) | |
78 | movq %r11, 0x8*6(%rdi) | |
79 | movq %r12, 0x8*7(%rdi) | |
80 | ||
81 | leaq 64(%rdi), %rdi | |
82 | leaq 64(%rsi), %rsi | |
7bcd3f34 AK |
83 | jnz .Loop2 |
84 | ||
269833bd | 85 | movq (%rsp), %rbx |
269833bd | 86 | movq 1*8(%rsp), %r12 |
269833bd | 87 | addq $2*8, %rsp |
7bcd3f34 | 88 | ret |
090a3f61 | 89 | ENDPROC(copy_page_regs) |