and \$0x0F,%al # wrap-around $num in a 16-byte block
leaq ($num,$ivp),%r11 # process $left iv bytes
- vmovdqu8 (%r11),%xmm0
- vmovdqu8 ($inp),%xmm1 # process $left input bytes
+ vmovdqu8 (%r11),%xmm0{%k1}{z}
+ vmovdqu8 ($inp),%xmm1{%k1}{z} # process $left input bytes
vpxor %xmm0,%xmm1,%xmm2 # CipherFeedBack XOR
vmovdqu8 %xmm2,($out){%k1} # write $left output bytes
vmovdqu8 %xmm2,(%r11){%k1} # blend $left output bytes into iv
and \$0x0F,%al # wrap-around in a 16-byte block
leaq ($num,$ivp),%r11 # process $left iv bytes
- vmovdqu8 (%r11),%xmm0
- vmovdqu8 ($inp),%xmm1 # process $left input bytes
+ vmovdqu8 (%r11),%xmm0{%k1}{z}
+ vmovdqu8 ($inp),%xmm1{%k1}{z} # process $left input bytes
vpxor %xmm0,%xmm1,%xmm2 # CipherFeedBack XOR
vmovdqu8 %xmm2,($out){%k1} # write $left output bytes
vmovdqu8 %xmm1,(%r11){%k1} # blend $left input bytes into iv