define(`HP', `r0') C Overlaps unused modulo argument
define(`RP', `r1')
-
-define(`T0', `r2')
-define(`T1', `r3')
-define(`T2', `r4')
-define(`T3', `r5')
-define(`T4', `r6')
-define(`T5', `r7')
-define(`T6', `r8')
-define(`T7', `r10')
+define(`XP', `r2')
+
+define(`T0', `r3')
+define(`T1', `r4')
+define(`T2', `r5')
+define(`T3', `r6')
+define(`T4', `r7')
+define(`T5', `r8')
+define(`T6', `r10')
+define(`T7', `r11')
define(`H0', `T0') C Overlaps T0 and T1
define(`H1', `T1')
define(`C2', `HP')
.align 2
PROLOGUE(_nettle_ecc_secp192r1_modp)
- push {r4,r5,r6,r7,r8,r10}
+ push {r4,r5,r6,r7,r8,r10,r11}
C Reduce two words at a time
- add HP, RP, #48
- add RP, RP, #8
+ add HP, XP, #48
+ add XP, XP, #8
ldmdb HP!, {H0,H1}
- ldm RP, {T2,T3,T4,T5,T6,T7}
+ ldm XP, {T2,T3,T4,T5,T6,T7}
mov C4, #0
adds T4, T4, H0
adcs T5, T5, H1
C Need to add carry to T0 and T2, do T2 later
adc C2, C2, #0
- ldmdb RP!, {T0, T1}
+ ldmdb XP!, {T0, T1}
adcs T0, T0, T6
adcs T1, T1, T7
adcs T2, T2, T6
stm RP, {T0,T1,T2,T3,T4,T5}
- pop {r4,r5,r6,r7,r8,r10}
+ pop {r4,r5,r6,r7,r8,r10,r11}
bx lr
EPILOGUE(_nettle_ecc_secp192r1_modp)
.file "ecc-secp224r1-modp.asm"
.arm
-define(`RP', `r1')
-define(`H', `r0') C Overlaps unused modulo argument
+define(`RP', `r1') C Overlaps T0
+define(`XP', `r2')
+define(`H', `r0') C Overlaps unused modulo argument
-define(`T0', `r2')
+define(`T0', `r1')
define(`T1', `r3')
define(`T2', `r4')
define(`T3', `r5')
.align 2
PROLOGUE(_nettle_ecc_secp224r1_modp)
- push {r4,r5,r6,r7,r8,r10,r11,lr}
+ C Pushes RP last
+ push {r1,r4,r5,r6,r7,r8,r10,r11,lr}
- add L2, RP, #28
+ add L2, XP, #28
ldm L2, {T0,T1,T2,T3,T4,T5,T6}
mov H, #0
sbc H, #0
C Now subtract from low half
- ldm RP!, {L0,L1,L2}
+ ldm XP!, {L0,L1,L2}
C Clear carry, with the sbcs, this is the 1.
- adds RP, #0
+ adds XP, #0
sbcs T0, L0, T0
sbcs T1, L1, T1
sbcs T2, L2, T2
- ldm RP!, {T3,L0,L1,L2}
+ ldm XP!, {T3,L0,L1,L2}
sbcs T3, T3, N3
sbcs T4, L0, T4
sbcs T5, L1, T5
sbcs T6, T6, #0
sbcs H, H, H
+ pop {XP} C Original RP
+
C Final borrow, subtract (B^3 - 1) |H|
subs T0, T0, H
sbcs T1, T1, H
sbcs T5, T5, #0
sbcs T6, T6, #0
- stmdb RP, {T0,T1,T2,T3,T4,T5,T6}
+ stm XP, {T0,T1,T2,T3,T4,T5,T6}
pop {r4,r5,r6,r7,r8,r10,r11,pc}
EPILOGUE(_nettle_ecc_secp224r1_modp)
.file "ecc-secp256r1-redc.asm"
.arm
-define(`RP', `r1')
+define(`RP', `r1') C Overlaps T1 below
+define(`XP', `r2')
-define(`T0', `r0') C Overlaps unused modulo argument
-define(`T1', `r2')
+define(`T0', `r0') C Overlaps unused modulo argument
+define(`T1', `r1')
define(`T2', `r3')
define(`T3', `r4')
define(`T4', `r5')
.align 2
PROLOGUE(_nettle_ecc_secp256r1_redc)
- push {r4,r5,r6,r7,r8,r10,r11,lr}
+ C Pushes RP last
+ push {r1, r4,r5,r6,r7,r8,r10,r11,lr}
- ldm RP!, {T0,T1,T2,T3,T4,T5,T6,T7}
+ ldm XP!, {T0,T1,T2,T3,T4,T5,T6,T7}
C Set <F3,F2,F1> to the high 4 limbs of (B^2-B+1)<T2,T1,T0>
C T2 T1
mov T3, T6
adcs T4, T7, F0
- ldm RP!, {T5,T6,T7}
+ ldm XP!, {T5,T6,T7}
adcs T5, T5, F1
adcs T6, T6, F2
adcs T7, T7, F3
mov T3, T6
adcs T4, T7, F0
- ldm RP!, {T5,T6,T7}
+ ldm XP!, {T5,T6,T7}
adcs T5, T5, F1
adcs T6, T6, F2
adcs T7, T7, F3
adcs T5, T5, #0
adcs T6, T6, T0
adcs T7, T7, F0
- ldm RP!, {T0, T1}
+ ldm XP!, {T0, T1}
mov F3, #0
adcs F1, F1, T0
adcs F2, F2, T1
adc F3, F3, #0
rsb F3, F3, #0
+ pop {XP} C Original RP
+
adcs T0, T2, #0
adcs T1, T3, #0
adcs T2, T4, #0
adcs T6, F1, F3
adcs T7, F2, #0
- sub RP, RP, #64
- stm RP, {T0,T1,T2,T3,T4,T5,T6,T7}
+ stm XP, {T0,T1,T2,T3,T4,T5,T6,T7}
pop {r4,r5,r6,r7,r8,r10,r11,pc}
EPILOGUE(_nettle_ecc_secp256r1_redc)
.arm
define(`RP', `r1')
+define(`XP', `r2')
+
define(`T0', `r0')
-define(`T1', `r2')
-define(`T2', `r3')
-define(`T3', `r4')
-define(`F0', `r5')
-define(`F1', `r6')
-define(`F2', `r7')
-define(`F3', `r8')
-define(`F4', `r10')
+define(`T1', `r3')
+define(`T2', `r4')
+define(`T3', `r5')
+define(`F0', `r6')
+define(`F1', `r7')
+define(`F2', `r8')
+define(`F3', `r10')
+define(`F4', `r11')
define(`N', `r12')
define(`H', `lr')
.align 2
PROLOGUE(_nettle_ecc_secp384r1_modp)
- push {r4,r5,r6,r7,r8,r10,lr}
+ push {r4,r5,r6,r7,r8,r10,r11,lr}
- add RP, RP, #80
- ldm RP, {T0, T1, T2, T3} C 20-23
+ add XP, XP, #80
+ ldm XP, {T0, T1, T2, T3} C 20-23
C First get top 4 limbs, which need folding twice, as
C
adcs F4, F4, #0
C Add in to high part
- sub RP, RP, #32
- ldm RP, {T0, T1, T2, T3} C 12-15
+ sub XP, XP, #32
+ ldm XP, {T0, T1, T2, T3} C 12-15
mov H, #0
adds F0, T0, F0
adcs F1, T1, F1
adcs F4, F4, #0 C Do F4 later
C Add to low part, keeping carry (positive or negative) in H
- sub RP, RP, #48
- ldm RP, {T0, T1, T2, T3} C 0-3
+ sub XP, XP, #48
+ ldm XP, {T0, T1, T2, T3} C 0-3
mov H, #0
adds T0, T0, F0
adcs T1, T1, F1
adds T3, T3, F0
adc H, H, #0
- stm RP!, {T0,T1,T2,T3} C 0-3
+ stm XP!, {T0,T1,T2,T3} C 0-3
mov N, #2
.Loop:
- ldm RP, {T0,T1,T2,T3} C 4-7
+ ldm XP, {T0,T1,T2,T3} C 4-7
C First, propagate carry
adds T0, T0, H
adc H, H, #0
C +B^3 terms
- ldr F0, [RP, #+48] C 16
+ ldr F0, [XP, #+48] C 16
adds T0, T0, F1
adcs T1, T1, F2
adcs T2, T2, F3
adc H, H, #0
C -B
- ldr F1, [RP, #+52] C 17-18
- ldr F2, [RP, #+56]
+ ldr F1, [XP, #+52] C 17-18
+ ldr F2, [XP, #+56]
subs T0, T0, F3
sbcs T1, T1, F0
sbcs T2, T2, F1
sbcs H, H, #0
C +1
- ldr F3, [RP, #+60] C 19
+ ldr F3, [XP, #+60] C 19
adds T0, T0, F0
adcs T1, T1, F1
adcs T2, T2, F2
adcs T3, T3, F3
adc H, H, #0
subs N, N, #1
- stm RP!, {T0,T1,T2,T3}
+ stm XP!, {T0,T1,T2,T3}
bne .Loop
C Fold high limbs, we need to add in
C
C We always have F4 >= 0, but we can have H < 0.
C Sign extension gets tricky when F4 = 0 and H < 0.
- sub RP, RP, #48
+ sub XP, XP, #48
- ldm RP, {T0,T1,T2,T3} C 0-3
+ ldm XP, {T0,T1,T2,T3} C 0-3
C H H 0 -H H
C ----------------
adcs T3, T3, F3
adc H, H, F0 C 0+cy H+cy -2+cy
- stm RP!, {T0,T1,T2,T3} C 0-3
- ldm RP, {T0,T1,T2,T3} C 4-7
+ stm XP!, {T0,T1,T2,T3} C 0-3
+ ldm XP, {T0,T1,T2,T3} C 4-7
C F4 0 -F4
C ---------
adcs T2, T2, F2
adcs T3, T3, F3
- stm RP!, {T0,T1,T2,T3} C 4-7
- ldm RP, {T0,T1,T2,T3} C 8-11
+ stm XP!, {T0,T1,T2,T3} C 4-7
+ ldm XP, {T0,T1,T2,T3} C 8-11
adcs T0, T0, F4
adcs T1, T1, H
adcs T3, T3, H
adc H, H, #0
- stm RP, {T0,T1,T2,T3} C 8-11
+ stm XP, {T0,T1,T2,T3} C 8-11
C Final (unlikely) carry
- sub RP, RP, #32
- ldm RP, {T0,T1,T2,T3} C 0-3
+ sub XP, XP, #32
+ ldm XP!, {T0,T1,T2,T3} C 0-3
C Fold H into F0-F4
mov F0, H
asr H, #31
adcs T3, T3, F3
stm RP!, {T0,T1,T2,T3} C 0-3
- ldm RP, {T0,T1,T2,T3} C 4-7
+ ldm XP!, {T0,T1,T2,T3} C 4-7
adcs T0, T0, F4
adcs T1, T1, H
adcs T2, T2, H
adcs T3, T3, H
stm RP!, {T0,T1,T2,T3} C 4-7
- ldm RP, {T0,T1,T2,T3} C 8-11
+ ldm XP, {T0,T1,T2,T3} C 8-11
adcs T0, T0, H
adcs T1, T1, H
adcs T2, T2, H
adcs T3, T3, H
- stm RP!, {T0,T1,T2,T3} C 8-11
- pop {r4,r5,r6,r7,r8,r10,pc}
+ stm RP, {T0,T1,T2,T3} C 8-11
+ pop {r4,r5,r6,r7,r8,r10,r11,pc}
EPILOGUE(_nettle_ecc_secp384r1_modp)
define(`HP', `r0')
define(`RP', `r1')
-define(`T0', `r2')
-define(`T1', `r3')
-define(`T2', `r4')
-define(`F0', `r5')
-define(`F1', `r6')
-define(`F2', `r7')
-define(`F3', `r8')
+define(`XP', `r2')
+define(`T0', `r3')
+define(`T1', `r4')
+define(`T2', `r5')
+define(`F0', `r6')
+define(`F1', `r7')
+define(`F2', `r8')
+define(`F3', `r10')
define(`H', `r12')
define(`N', `lr')
.align 2
PROLOGUE(_nettle_ecc_secp521r1_modp)
- push {r4,r5,r6,r7,r8,lr}
+ push {r4,r5,r6,r7,r8,r10,lr}
C Use that B^17 = 2^23 (mod p)
- ldr F3, [RP, #+68] C 17
- add HP, RP, #72 C 18
- ldr T0, [RP] C 0
+ ldr F3, [XP, #+68] C 17
+ add HP, XP, #72 C 18
+ ldr T0, [XP] C 0
adds T0, T0, F3, lsl #23
- str T0, [RP], #+4
+ str T0, [XP], #+4
mov N, #5
C 5 iterations, reading limbs 18-20, 21-23, 24-26, 27-29, 30-32
C and adding to limbs 1-3, 4-6, 7-9, 19-12, 13-15
.Loop:
- ldm RP, {T0,T1,T2} C 1+3*k -- 3+3*k
+ ldm XP, {T0,T1,T2} C 1+3*k -- 3+3*k
lsr F0, F3, #9
ldm HP!, {F1,F2,F3} C 18+3*k -- 20+3*k
orr F0, F0, F1, lsl #23
adcs T1, T1, F1
adcs T2, T2, F2
sub N, N, #1
- stm RP!,{T0,T1,T2}
+ stm XP!,{T0,T1,T2}
teq N, #0
bne .Loop
- ldr F0, [RP], #-64 C 16
+ ldr F0, [XP], #-64 C 16
ldr F1, [HP] C 33
ldr T0, .Lc511
lsr F1, F1, #18
adc F1, F1, #0
- ldm RP, {T0, T1} C 0-1
+ ldm XP!, {T0, T1} C 0-1
adds T0, T0, F0
adcs T1, T1, F1
stm RP!, {T0, T1}
- ldm RP, {T0,T1,T2,F0,F1,F2,F3} C 2-8
+ ldm XP!, {T0,T1,T2,F0,F1,F2,F3} C 2-8
adcs T0, T0, #0
adcs T1, T1, #0
adcs T2, T2, #0
adcs F2, F2, #0
adcs F3, F3, #0
stm RP!, {T0,T1,T2,F0,F1,F2,F3} C 2-8
- ldm RP, {T0,T1,T2,F0,F1,F2,F3} C 9-15
+ ldm XP, {T0,T1,T2,F0,F1,F2,F3} C 9-15
adcs T0, T0, #0
adcs T1, T1, #0
adcs T2, T2, #0
adcs H, H, #0
stm RP, {T0,T1,T2,F0,F1,F2,F3,H} C 9-16
- pop {r4,r5,r6,r7,r8,pc}
+ pop {r4,r5,r6,r7,r8,r10,pc}
EPILOGUE(_nettle_ecc_secp521r1_modp)