]>
Commit | Line | Data |
---|---|---|
b2441318 | 1 | // SPDX-License-Identifier: GPL-2.0 |
f5d45dc9 SH |
2 | /* |
3 | * arch/openrisc/lib/memcpy.c | |
4 | * | |
5 | * Optimized memory copy routines for openrisc. These are mostly copied | |
6 | * from ohter sources but slightly entended based on ideas discuassed in | |
7 | * #openrisc. | |
8 | * | |
9 | * The word unroll implementation is an extension to the arm byte | |
10 | * unrolled implementation, but using word copies (if things are | |
11 | * properly aligned) | |
12 | * | |
13 | * The great arm loop unroll algorithm can be found at: | |
14 | * arch/arm/boot/compressed/string.c | |
15 | */ | |
16 | ||
17 | #include <linux/export.h> | |
18 | ||
19 | #include <linux/string.h> | |
20 | ||
e34f671d | 21 | #ifdef CONFIG_OR1K_1200 |
f5d45dc9 SH |
22 | /* |
23 | * Do memcpy with word copies and loop unrolling. This gives the | |
24 | * best performance on the OR1200 and MOR1KX archirectures | |
25 | */ | |
26 | void *memcpy(void *dest, __const void *src, __kernel_size_t n) | |
27 | { | |
28 | int i = 0; | |
29 | unsigned char *d, *s; | |
30 | uint32_t *dest_w = (uint32_t *)dest, *src_w = (uint32_t *)src; | |
31 | ||
32 | /* If both source and dest are word aligned copy words */ | |
33 | if (!((unsigned int)dest_w & 3) && !((unsigned int)src_w & 3)) { | |
34 | /* Copy 32 bytes per loop */ | |
35 | for (i = n >> 5; i > 0; i--) { | |
36 | *dest_w++ = *src_w++; | |
37 | *dest_w++ = *src_w++; | |
38 | *dest_w++ = *src_w++; | |
39 | *dest_w++ = *src_w++; | |
40 | *dest_w++ = *src_w++; | |
41 | *dest_w++ = *src_w++; | |
42 | *dest_w++ = *src_w++; | |
43 | *dest_w++ = *src_w++; | |
44 | } | |
45 | ||
46 | if (n & 1 << 4) { | |
47 | *dest_w++ = *src_w++; | |
48 | *dest_w++ = *src_w++; | |
49 | *dest_w++ = *src_w++; | |
50 | *dest_w++ = *src_w++; | |
51 | } | |
52 | ||
53 | if (n & 1 << 3) { | |
54 | *dest_w++ = *src_w++; | |
55 | *dest_w++ = *src_w++; | |
56 | } | |
57 | ||
58 | if (n & 1 << 2) | |
59 | *dest_w++ = *src_w++; | |
60 | ||
61 | d = (unsigned char *)dest_w; | |
62 | s = (unsigned char *)src_w; | |
63 | ||
64 | } else { | |
65 | d = (unsigned char *)dest_w; | |
66 | s = (unsigned char *)src_w; | |
67 | ||
68 | for (i = n >> 3; i > 0; i--) { | |
69 | *d++ = *s++; | |
70 | *d++ = *s++; | |
71 | *d++ = *s++; | |
72 | *d++ = *s++; | |
73 | *d++ = *s++; | |
74 | *d++ = *s++; | |
75 | *d++ = *s++; | |
76 | *d++ = *s++; | |
77 | } | |
78 | ||
79 | if (n & 1 << 2) { | |
80 | *d++ = *s++; | |
81 | *d++ = *s++; | |
82 | *d++ = *s++; | |
83 | *d++ = *s++; | |
84 | } | |
85 | } | |
86 | ||
87 | if (n & 1 << 1) { | |
88 | *d++ = *s++; | |
89 | *d++ = *s++; | |
90 | } | |
91 | ||
92 | if (n & 1) | |
93 | *d++ = *s++; | |
94 | ||
95 | return dest; | |
96 | } | |
97 | #else | |
98 | /* | |
99 | * Use word copies but no loop unrolling as we cannot assume there | |
100 | * will be benefits on the archirecture | |
101 | */ | |
102 | void *memcpy(void *dest, __const void *src, __kernel_size_t n) | |
103 | { | |
104 | unsigned char *d = (unsigned char *)dest, *s = (unsigned char *)src; | |
105 | uint32_t *dest_w = (uint32_t *)dest, *src_w = (uint32_t *)src; | |
106 | ||
107 | /* If both source and dest are word aligned copy words */ | |
108 | if (!((unsigned int)dest_w & 3) && !((unsigned int)src_w & 3)) { | |
109 | for (; n >= 4; n -= 4) | |
110 | *dest_w++ = *src_w++; | |
111 | } | |
112 | ||
113 | d = (unsigned char *)dest_w; | |
114 | s = (unsigned char *)src_w; | |
115 | ||
116 | /* For remaining or if not aligned, copy bytes */ | |
117 | for (; n >= 1; n -= 1) | |
118 | *d++ = *s++; | |
119 | ||
120 | return dest; | |
121 | ||
122 | } | |
123 | #endif | |
124 | ||
125 | EXPORT_SYMBOL(memcpy); |