]>
Commit | Line | Data |
---|---|---|
b2441318 | 1 | /* SPDX-License-Identifier: GPL-2.0 */ |
51533b61 MS |
2 | /* |
3 | * A fast checksum+copy routine using movem | |
41f9412b | 4 | * Copyright (c) 1998-2007 Axis Communications AB |
51533b61 MS |
5 | * |
6 | * Authors: Bjorn Wesen | |
7 | * | |
8 | * csum_partial_copy_nocheck(const char *src, char *dst, | |
9 | * int len, unsigned int sum) | |
10 | */ | |
11 | ||
12 | .globl csum_partial_copy_nocheck | |
5f9ac92f | 13 | .type csum_partial_copy_nocheck,@function |
51533b61 MS |
14 | csum_partial_copy_nocheck: |
15 | ||
16 | ;; r10 - src | |
17 | ;; r11 - dst | |
18 | ;; r12 - length | |
19 | ;; r13 - checksum | |
20 | ||
41f9412b JN |
21 | ;; Optimized for large packets |
22 | subq 10*4, $r12 | |
23 | blt _word_loop | |
24 | move.d $r12, $acr | |
51533b61 MS |
25 | |
26 | subq 9*4,$sp | |
41f9412b | 27 | clearf c |
51533b61 MS |
28 | movem $r8,[$sp] |
29 | ||
30 | ;; do a movem copy and checksum | |
51533b61 MS |
31 | 1: ;; A failing userspace access (the read) will have this as PC. |
32 | _mloop: movem [$r10+],$r9 ; read 10 longwords | |
41f9412b | 33 | addoq -10*4, $acr, $acr ; loop counter in latency cycle |
51533b61 MS |
34 | movem $r9,[$r11+] ; write 10 longwords |
35 | ||
36 | ;; perform dword checksumming on the 10 longwords | |
41f9412b | 37 | addc $r0,$r13 |
51533b61 MS |
38 | addc $r1,$r13 |
39 | addc $r2,$r13 | |
40 | addc $r3,$r13 | |
41 | addc $r4,$r13 | |
42 | addc $r5,$r13 | |
43 | addc $r6,$r13 | |
44 | addc $r7,$r13 | |
45 | addc $r8,$r13 | |
46 | addc $r9,$r13 | |
47 | ||
41f9412b JN |
48 | ;; test $acr, without trashing carry. |
49 | move.d $acr, $acr | |
50 | bpl _mloop | |
51 | ;; r12 <= acr is needed after mloop and in the exception handlers. | |
52 | move.d $acr, $r12 | |
51533b61 | 53 | |
41f9412b JN |
54 | ;; fold the last carry into r13 |
55 | addc 0, $r13 | |
51533b61 MS |
56 | movem [$sp+],$r8 ; restore regs |
57 | ||
58 | _word_loop: | |
41f9412b | 59 | addq 10*4,$r12 ; compensate for last loop underflowing length |
51533b61 MS |
60 | |
61 | ;; fold 32-bit checksum into a 16-bit checksum, to avoid carries below | |
62 | ;; r9 can be used as temporary. | |
51533b61 MS |
63 | move.d $r13,$r9 |
64 | lsrq 16,$r9 ; r0 = checksum >> 16 | |
65 | and.d 0xffff,$r13 ; checksum = checksum & 0xffff | |
51533b61 | 66 | |
41f9412b | 67 | subq 2, $r12 |
51533b61 | 68 | blt _no_words |
41f9412b | 69 | add.d $r9,$r13 ; checksum += r0 |
51533b61 MS |
70 | |
71 | ;; copy and checksum the rest of the words | |
51533b61 MS |
72 | 2: ;; A failing userspace access for the read below will have this as PC. |
73 | _wloop: move.w [$r10+],$r9 | |
74 | addu.w $r9,$r13 | |
75 | subq 2,$r12 | |
76 | bge _wloop | |
77 | move.w $r9,[$r11+] | |
78 | ||
51533b61 | 79 | _no_words: |
41f9412b JN |
80 | addq 2,$r12 |
81 | bne _do_byte | |
51533b61 MS |
82 | nop |
83 | ret | |
84 | move.d $r13,$r10 | |
85 | ||
86 | _do_byte: | |
87 | ;; copy and checksum the last byte | |
88 | 3: ;; A failing userspace access for the read below will have this as PC. | |
89 | move.b [$r10],$r9 | |
90 | addu.b $r9,$r13 | |
91 | move.b $r9,[$r11] | |
92 | ret | |
93 | move.d $r13,$r10 | |
5f9ac92f JN |
94 | |
95 | .size csum_partial_copy_nocheck, . - csum_partial_copy_nocheck |