ifelse(eval($# > 3), 1,
`OPN_XXXY($1, $2, shift(shift(shift($@))))dnl
')')
+
+C FIXME: If we allow clobber of F, no need for T register.
+C Polynomial reduction D = R + x^{-64} F mod P
+C where x^{-64} = x^{64} + P1 (mod P)
+C GHASH_REDUCE(D, R, F, P1, T)
+define(`GHASH_REDUCE', `
+ vpmsumd $5,$3,$4
+ xxswapd VSR($1),VSR($3)
+ vxor $5, $5, $2
+ vxor $1, $1, $5
+')
vxor F,F,F3
vxor R,R,R3
- C reduction
- vpmsumd T,F,POLY_L
- xxswapd VSR(D),VSR(F)
- vxor R,R,T
- vxor D,R,D
+ GHASH_REDUCE(D, R, F, POLY_L, T)
addi DATA,DATA,0x40
bdnz L4x_loop
vxor F,F,F2
vxor R,R,R2
- C reduction
- vpmsumd T,F,POLY_L
- xxswapd VSR(D),VSR(F)
- vxor R,R,T
- vxor D,R,D
+ GHASH_REDUCE(D, R, F, POLY_L, T)
addi DATA,DATA,0x20
clrldi BLOCKS,BLOCKS,63 C 'set the high-order 63 bits to zeros'
vpmsumd F,H1L,C0
vpmsumd R,H1M,C0
- C reduction
- vpmsumd T,F,POLY_L
- xxswapd VSR(D),VSR(F)
- vxor R,R,T
- vxor D,R,D
+ GHASH_REDUCE(D, R, F, POLY_L, T)
addi DATA,DATA,0x10
clrldi BLOCKS,BLOCKS,60 C 'set the high-order 60 bits to zeros'