--- /dev/null
+/* The f2 phase of sha1 */
+/* !!! arch x86_32 */
+/* !!! signature n */
+/* !!! count 0 mod 5 */
+
+/* Run with loopmix -f -s -m -i sha1-f2.nlms */
+
+/* Current version can be loop-mixed down to 31 cycles. */
+define(`SA',`%eax')
+define(`SB',`%ebx')
+define(`SC',`%ecx')
+define(`SD',`%edx')
+define(`SE',`%ebp')
+define(`DATA',`%esp')
+define(`TMP',`%edi')
+dnl define(`TMP2',`%esi')
+define(`KVALUE',`%esi')
+
+define(`COUNT', `84(%esp)')
+
+dnl Expands to 4*i, or to the empty string if i is zero
+define(`OFFSET', `ifelse($1,0,,eval(4*$1))')
+
+dnl ROUND(a, b, c, d, e, i)
+define(`ROUND', `
+ mov OFFSET(eval($6 % 16)) (DATA), TMP
+ xor OFFSET(eval(($6 + 2) % 16)) (DATA), TMP
+ xor OFFSET(eval(($6 + 8) % 16)) (DATA), TMP
+ xor OFFSET(eval(($6 + 13) % 16)) (DATA), TMP
+ rol `$'1, TMP
+ mov TMP, OFFSET(eval($6 % 16)) (DATA)
+
+ add KVALUE, $5
+ add TMP, $5
+
+ mov $2, TMP
+ xor $3, TMP
+ xor $4, TMP
+ add TMP, $5
+
+ mov $1, TMP
+ rol `$'5, TMP
+ add TMP, $5
+
+ rol `$'30, $2
+')
+
+.text
+.p2align 4,15
+.globl loop_entry
+loop_entry:
+ push %ebx
+ push %ebp
+ push %esi
+ push %edi
+
+ sub $64, %esp
+
+.align 32
+loop_begin:
+ ROUND(SA, SB, SC, SD, SE, 20)
+ ROUND(SE, SA, SB, SC, SD, 21)
+ ROUND(SD, SE, SA, SB, SC, 22)
+ ROUND(SC, SD, SE, SA, SB, 23)
+ ROUND(SB, SC, SD, SE, SA, 24)
+ sub $5, COUNT
+ jnz loop_begin
+
+loop_end:
+ add $64, %esp
+ pop %edi
+ pop %esi
+ pop %ebp
+ pop %ebx
+ ret
+