and \$0xf, %r14
vpsubq .Lmask52x4(%rip), $_R0, $T0
shl \$5, %r14
- vmovapd (%rdx, %r14), $T1
+ vmovapd (%rdx,%r14), $T1
vblendvpd $T1, $T0, $_R0, $_R0
shr \$4, %r13b
and \$0xf, %r13
vpsubq .Lmask52x4(%rip), $_R0h, $T0
shl \$5, %r13
- vmovapd (%rdx, %r13), $T1
+ vmovapd (%rdx,%r13), $T1
vblendvpd $T1, $T0, $_R0h, $_R0h
mov %r12b, %r11b
and \$0xf, %r12
vpsubq .Lmask52x4(%rip), $_R1, $T0
shl \$5, %r12
- vmovapd (%rdx, %r12), $T1
+ vmovapd (%rdx,%r12), $T1
vblendvpd $T1, $T0, $_R1, $_R1
shr \$4, %r11b
and \$0xf, %r11
vpsubq .Lmask52x4(%rip), $_R1h, $T0
shl \$5, %r11
- vmovapd (%rdx, %r11), $T1
+ vmovapd (%rdx,%r11), $T1
vblendvpd $T1, $T0, $_R1h, $_R1h
and \$0xf, %r10
vpsubq .Lmask52x4(%rip), $_R2, $T0
shl \$5, %r10
- vmovapd (%rdx, %r10), $T1
+ vmovapd (%rdx,%r10), $T1
vblendvpd $T1, $T0, $_R2, $_R2
# Add carries according to the obtained mask
and \$0xf, %r14
vpsubq .Lmask52x4(%rip), $_R0, $T0
shl \$5, %r14
- vmovapd (%rdx, %r14), $T1
+ vmovapd (%rdx,%r14), $T1
vblendvpd $T1, $T0, $_R0, $_R0
shr \$4, %r10b
and \$0xf, %r10
vpsubq .Lmask52x4(%rip), $_R0h, $T0
shl \$5, %r10
- vmovapd (%rdx, %r10), $T1
+ vmovapd (%rdx,%r10), $T1
vblendvpd $T1, $T0, $_R0h, $_R0h
mov %r13b, %r10b
and \$0xf, %r13
vpsubq .Lmask52x4(%rip), $_R1, $T0
shl \$5, %r13
- vmovapd (%rdx, %r13), $T1
+ vmovapd (%rdx,%r13), $T1
vblendvpd $T1, $T0, $_R1, $_R1
shr \$4, %r10b
and \$0xf, %r10
vpsubq .Lmask52x4(%rip), $_R1h, $T0
shl \$5, %r10
- vmovapd (%rdx, %r10), $T1
+ vmovapd (%rdx,%r10), $T1
vblendvpd $T1, $T0, $_R1h, $_R1h
mov %r12b, %r10b
and \$0xf, %r12
vpsubq .Lmask52x4(%rip), $_R2, $T0
shl \$5, %r12
- vmovapd (%rdx, %r12), $T1
+ vmovapd (%rdx,%r12), $T1
vblendvpd $T1, $T0, $_R2, $_R2
shr \$4, %r10b
and \$0xf, %r10
vpsubq .Lmask52x4(%rip), $_R2h, $T0
shl \$5, %r10
- vmovapd (%rdx, %r10), $T1
+ vmovapd (%rdx,%r10), $T1
vblendvpd $T1, $T0, $_R2h, $_R2h
mov %r11b, %r10b
and \$0xf, %r11
vpsubq .Lmask52x4(%rip), $_R3, $T0
shl \$5, %r11
- vmovapd (%rdx, %r11), $T1
+ vmovapd (%rdx,%r11), $T1
vblendvpd $T1, $T0, $_R3, $_R3
shr \$4, %r10b
and \$0xf, %r10
vpsubq .Lmask52x4(%rip), $_R3h, $T0
shl \$5, %r10
- vmovapd (%rdx, %r10), $T1
+ vmovapd (%rdx,%r10), $T1
vblendvpd $T1, $T0, $_R3h, $_R3h
vpand .Lmask52x4(%rip), $_R0, $_R0
and \$0xf, %r14
vpsubq .Lmask52x4(%rip), $_R0, $tmp
shl \$5, %r14
- vmovapd (%r8, %r14), $tmp2
+ vmovapd (%r8,%r14), $tmp2
vblendvpd $tmp2, $tmp, $_R0, $_R0
shr \$4, %r9b
and \$0xf, %r9
vpsubq .Lmask52x4(%rip), $_R0h, $tmp
shl \$5, %r9
- vmovapd (%r8, %r9), $tmp2
+ vmovapd (%r8,%r9), $tmp2
vblendvpd $tmp2, $tmp, $_R0h, $_R0h
movb %r13b,%r9b
and \$0xf, %r13
vpsubq .Lmask52x4(%rip), $_R1, $tmp
shl \$5, %r13
- vmovapd (%r8, %r13), $tmp2
+ vmovapd (%r8,%r13), $tmp2
vblendvpd $tmp2, $tmp, $_R1, $_R1
shr \$4, %r9b
and \$0xf, %r9
vpsubq .Lmask52x4(%rip), $_R1h, $tmp
shl \$5, %r9
- vmovapd (%r8, %r9), $tmp2
+ vmovapd (%r8,%r9), $tmp2
vblendvpd $tmp2, $tmp, $_R1h, $_R1h
movb %r12b,%r9b
and \$0xf, %r12
vpsubq .Lmask52x4(%rip), $_R2, $tmp
shl \$5, %r12
- vmovapd (%r8, %r12), $tmp2
+ vmovapd (%r8,%r12), $tmp2
vblendvpd $tmp2, $tmp, $_R2, $_R2
shr \$4, %r9b
and \$0xf, %r9
vpsubq .Lmask52x4(%rip), $_R2h, $tmp
shl \$5, %r9
- vmovapd (%r8, %r9), $tmp2
+ vmovapd (%r8,%r9), $tmp2
vblendvpd $tmp2, $tmp, $_R2h, $_R2h
movb %r11b,%r9b
and \$0xf, %r11
vpsubq .Lmask52x4(%rip), $_R3, $tmp
shl \$5, %r11
- vmovapd (%r8, %r11), $tmp2
+ vmovapd (%r8,%r11), $tmp2
vblendvpd $tmp2, $tmp, $_R3, $_R3
shr \$4, %r9b
and \$0xf, %r9
vpsubq .Lmask52x4(%rip), $_R3h, $tmp
shl \$5, %r9
- vmovapd (%r8, %r9), $tmp2
+ vmovapd (%r8,%r9), $tmp2
vblendvpd $tmp2, $tmp, $_R3h, $_R3h
movb %r10b,%r9b
and \$0xf, %r10
vpsubq .Lmask52x4(%rip), $_R4, $tmp
shl \$5, %r10
- vmovapd (%r8, %r10), $tmp2
+ vmovapd (%r8,%r10), $tmp2
vblendvpd $tmp2, $tmp, $_R4, $_R4
shr \$4, %r9b
and \$0xf, %r9
vpsubq .Lmask52x4(%rip), $_R4h, $tmp
shl \$5, %r9
- vmovapd (%r8, %r9), $tmp2
+ vmovapd (%r8,%r9), $tmp2
vblendvpd $tmp2, $tmp, $_R4h, $_R4h
pop %r8