]>
Commit | Line | Data |
---|---|---|
b2441318 | 1 | // SPDX-License-Identifier: GPL-2.0 |
1da177e4 LT |
2 | /* |
3 | * arch/x86_64/lib/csum-partial.c | |
4 | * | |
5 | * This file contains network checksum routines that are better done | |
6 | * in an architecture-specific manner due to speed. | |
7 | */ | |
8 | ||
9 | #include <linux/compiler.h> | |
e683014c | 10 | #include <linux/export.h> |
1da177e4 LT |
11 | #include <asm/checksum.h> |
12 | ||
1da177e4 LT |
13 | static inline unsigned short from32to16(unsigned a) |
14 | { | |
15 | unsigned short b = a >> 16; | |
16 | asm("addw %w2,%w0\n\t" | |
17 | "adcw $0,%w0\n" | |
18 | : "=r" (b) | |
19 | : "0" (b), "r" (a)); | |
20 | return b; | |
21 | } | |
22 | ||
23 | /* | |
24 | * Do a 64-bit checksum on an arbitrary memory area. | |
25 | * Returns a 32bit checksum. | |
26 | * | |
27 | * This isn't as time critical as it used to be because many NICs | |
28 | * do hardware checksumming these days. | |
29 | * | |
30 | * Things tried and found to not make it faster: | |
31 | * Manual Prefetching | |
32 | * Unrolling to an 128 bytes inner loop. | |
33 | * Using interleaving with more registers to break the carry chains. | |
34 | */ | |
b6bcc4bb | 35 | static unsigned do_csum(const unsigned char *buff, unsigned len) |
1da177e4 LT |
36 | { |
37 | unsigned odd, count; | |
38 | unsigned long result = 0; | |
39 | ||
40 | if (unlikely(len == 0)) | |
41 | return result; | |
42 | odd = 1 & (unsigned long) buff; | |
43 | if (unlikely(odd)) { | |
44 | result = *buff << 8; | |
45 | len--; | |
46 | buff++; | |
47 | } | |
48 | count = len >> 1; /* nr of 16-bit words.. */ | |
49 | if (count) { | |
50 | if (2 & (unsigned long) buff) { | |
51 | result += *(unsigned short *)buff; | |
52 | count--; | |
53 | len -= 2; | |
54 | buff += 2; | |
55 | } | |
56 | count >>= 1; /* nr of 32-bit words.. */ | |
57 | if (count) { | |
58 | unsigned long zero; | |
59 | unsigned count64; | |
60 | if (4 & (unsigned long) buff) { | |
61 | result += *(unsigned int *) buff; | |
62 | count--; | |
63 | len -= 4; | |
64 | buff += 4; | |
65 | } | |
66 | count >>= 1; /* nr of 64-bit words.. */ | |
67 | ||
68 | /* main loop using 64byte blocks */ | |
69 | zero = 0; | |
70 | count64 = count >> 3; | |
71 | while (count64) { | |
72 | asm("addq 0*8(%[src]),%[res]\n\t" | |
73 | "adcq 1*8(%[src]),%[res]\n\t" | |
74 | "adcq 2*8(%[src]),%[res]\n\t" | |
75 | "adcq 3*8(%[src]),%[res]\n\t" | |
76 | "adcq 4*8(%[src]),%[res]\n\t" | |
77 | "adcq 5*8(%[src]),%[res]\n\t" | |
78 | "adcq 6*8(%[src]),%[res]\n\t" | |
79 | "adcq 7*8(%[src]),%[res]\n\t" | |
80 | "adcq %[zero],%[res]" | |
81 | : [res] "=r" (result) | |
82 | : [src] "r" (buff), [zero] "r" (zero), | |
83 | "[res]" (result)); | |
84 | buff += 64; | |
85 | count64--; | |
86 | } | |
87 | ||
0d2eb44f | 88 | /* last up to 7 8byte blocks */ |
1da177e4 LT |
89 | count %= 8; |
90 | while (count) { | |
91 | asm("addq %1,%0\n\t" | |
92 | "adcq %2,%0\n" | |
93 | : "=r" (result) | |
94 | : "m" (*(unsigned long *)buff), | |
95 | "r" (zero), "0" (result)); | |
96 | --count; | |
97 | buff += 8; | |
98 | } | |
99 | result = add32_with_carry(result>>32, | |
100 | result&0xffffffff); | |
101 | ||
102 | if (len & 4) { | |
103 | result += *(unsigned int *) buff; | |
104 | buff += 4; | |
105 | } | |
106 | } | |
107 | if (len & 2) { | |
108 | result += *(unsigned short *) buff; | |
109 | buff += 2; | |
110 | } | |
111 | } | |
112 | if (len & 1) | |
113 | result += *buff; | |
114 | result = add32_with_carry(result>>32, result & 0xffffffff); | |
115 | if (unlikely(odd)) { | |
116 | result = from32to16(result); | |
117 | result = ((result >> 8) & 0xff) | ((result & 0xff) << 8); | |
118 | } | |
119 | return result; | |
120 | } | |
121 | ||
122 | /* | |
123 | * computes the checksum of a memory block at buff, length len, | |
124 | * and adds in "sum" (32-bit) | |
125 | * | |
126 | * returns a 32-bit number suitable for feeding into itself | |
127 | * or csum_tcpudp_magic | |
128 | * | |
129 | * this function must be called with even lengths, except | |
130 | * for the last fragment, which may be odd | |
131 | * | |
132 | * it's best to have buff aligned on a 64-bit boundary | |
133 | */ | |
a4f89fb7 | 134 | __wsum csum_partial(const void *buff, int len, __wsum sum) |
1da177e4 | 135 | { |
a4f89fb7 AV |
136 | return (__force __wsum)add32_with_carry(do_csum(buff, len), |
137 | (__force u32)sum); | |
1da177e4 | 138 | } |
784d5699 | 139 | EXPORT_SYMBOL(csum_partial); |
1da177e4 | 140 | |
1da177e4 LT |
141 | /* |
142 | * this routine is used for miscellaneous IP-like checksums, mainly | |
143 | * in icmp.c | |
144 | */ | |
a4f89fb7 | 145 | __sum16 ip_compute_csum(const void *buff, int len) |
1da177e4 LT |
146 | { |
147 | return csum_fold(csum_partial(buff,len,0)); | |
148 | } | |
2ee60e17 | 149 | EXPORT_SYMBOL(ip_compute_csum); |
1da177e4 | 150 |