]> git.ipfire.org Git - thirdparty/openssl.git/commitdiff
Add power optimizations for AES-XTS algorithm
authorsanumesh <sanumesh@in.ibm.com>
Mon, 20 May 2024 14:35:55 +0000 (09:35 -0500)
committerTomas Mraz <tomas@openssl.org>
Fri, 7 Jun 2024 06:06:35 +0000 (08:06 +0200)
Reviewed-by: Paul Dale <ppzgs1@gmail.com>
Reviewed-by: Tomas Mraz <tomas@openssl.org>
(Merged from https://github.com/openssl/openssl/pull/24531)

(cherry picked from commit 858dfdfc67ea50fbe9ba38250daf306d5d0370a3)

crypto/aes/asm/aesp8-ppc.pl

index 60cf86f52aed20b33101ee69123ca5031d6e92c3..38b9405a283b7fa8c8c8b26fd6817e3e3ce4289a 100755 (executable)
@@ -99,11 +99,12 @@ rcon:
 .long  0x1b000000, 0x1b000000, 0x1b000000, 0x1b000000  ?rev
 .long  0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c  ?rev
 .long  0,0,0,0                                         ?asis
+.long  0x0f102132, 0x43546576, 0x8798a9ba, 0xcbdcedfe
 Lconsts:
        mflr    r0
        bcl     20,31,\$+4
        mflr    $ptr     #vvvvv "distance between . and rcon
-       addi    $ptr,$ptr,-0x48
+       addi    $ptr,$ptr,-0x58
        mtlr    r0
        blr
        .long   0
@@ -2405,7 +2406,7 @@ ___
 my $key_=$key2;
 my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,3,26..31));
     $x00=0 if ($flavour =~ /osx/);
-my ($in0,  $in1,  $in2,  $in3,  $in4,  $in5 )=map("v$_",(0..5));
+my ($in0,  $in1,  $in2,  $in3,  $in4,  $in5)=map("v$_",(0..5));
 my ($out0, $out1, $out2, $out3, $out4, $out5)=map("v$_",(7,12..16));
 my ($twk0, $twk1, $twk2, $twk3, $twk4, $twk5)=map("v$_",(17..22));
 my $rndkey0="v23";     # v24-v25 rotating buffer for first found keys
@@ -2460,6 +2461,18 @@ _aesp8_xts_encrypt6x:
        li              $x70,0x70
        mtspr           256,r0
 
+       # Reverse eighty7 to 0x010101..87
+       xxlor           2, 32+$eighty7, 32+$eighty7
+       vsldoi          $eighty7,$tmp,$eighty7,1        # 0x010101..87
+       xxlor           1, 32+$eighty7, 32+$eighty7
+
+       # Load XOR contents. 0xf102132435465768798a9bacbdcedfe
+       mr              $x70, r6
+       bl              Lconsts
+       lxvw4x          0, $x40, r6             # load XOR contents
+       mr              r6, $x70
+       li              $x70,0x70
+
        subi            $rounds,$rounds,3       # -4 in total
 
        lvx             $rndkey0,$x00,$key1     # load key schedule
@@ -2502,69 +2515,77 @@ Load_xts_enc_key:
        ?vperm          v31,v31,$twk5,$keyperm
        lvx             v25,$x10,$key_          # pre-load round[2]
 
+       # Switch to use the following codes with 0x010101..87 to generate tweak.
+       #     eighty7 = 0x010101..87
+       # vsrab         tmp, tweak, seven       # next tweak value, right shift 7 bits
+       # vand          tmp, tmp, eighty7       # last byte with carry
+       # vaddubm       tweak, tweak, tweak     # left shift 1 bit (x2)
+       # xxlor         vsx, 0, 0
+       # vpermxor      tweak, tweak, tmp, vsx
+
         vperm          $in0,$inout,$inptail,$inpperm
         subi           $inp,$inp,31            # undo "caller"
        vxor            $twk0,$tweak,$rndkey0
        vsrab           $tmp,$tweak,$seven      # next tweak value
        vaddubm         $tweak,$tweak,$tweak
-       vsldoi          $tmp,$tmp,$tmp,15
        vand            $tmp,$tmp,$eighty7
         vxor           $out0,$in0,$twk0
-       vxor            $tweak,$tweak,$tmp
+       xxlor           32+$in1, 0, 0
+       vpermxor        $tweak, $tweak, $tmp, $in1
 
         lvx_u          $in1,$x10,$inp
        vxor            $twk1,$tweak,$rndkey0
        vsrab           $tmp,$tweak,$seven      # next tweak value
        vaddubm         $tweak,$tweak,$tweak
-       vsldoi          $tmp,$tmp,$tmp,15
         le?vperm       $in1,$in1,$in1,$leperm
        vand            $tmp,$tmp,$eighty7
         vxor           $out1,$in1,$twk1
-       vxor            $tweak,$tweak,$tmp
+       xxlor           32+$in2, 0, 0
+       vpermxor        $tweak, $tweak, $tmp, $in2
 
         lvx_u          $in2,$x20,$inp
         andi.          $taillen,$len,15
        vxor            $twk2,$tweak,$rndkey0
        vsrab           $tmp,$tweak,$seven      # next tweak value
        vaddubm         $tweak,$tweak,$tweak
-       vsldoi          $tmp,$tmp,$tmp,15
         le?vperm       $in2,$in2,$in2,$leperm
        vand            $tmp,$tmp,$eighty7
         vxor           $out2,$in2,$twk2
-       vxor            $tweak,$tweak,$tmp
+       xxlor           32+$in3, 0, 0
+       vpermxor        $tweak, $tweak, $tmp, $in3
 
         lvx_u          $in3,$x30,$inp
         sub            $len,$len,$taillen
        vxor            $twk3,$tweak,$rndkey0
        vsrab           $tmp,$tweak,$seven      # next tweak value
        vaddubm         $tweak,$tweak,$tweak
-       vsldoi          $tmp,$tmp,$tmp,15
         le?vperm       $in3,$in3,$in3,$leperm
        vand            $tmp,$tmp,$eighty7
         vxor           $out3,$in3,$twk3
-       vxor            $tweak,$tweak,$tmp
+       xxlor           32+$in4, 0, 0
+       vpermxor        $tweak, $tweak, $tmp, $in4
 
         lvx_u          $in4,$x40,$inp
         subi           $len,$len,0x60
        vxor            $twk4,$tweak,$rndkey0
        vsrab           $tmp,$tweak,$seven      # next tweak value
        vaddubm         $tweak,$tweak,$tweak
-       vsldoi          $tmp,$tmp,$tmp,15
         le?vperm       $in4,$in4,$in4,$leperm
        vand            $tmp,$tmp,$eighty7
         vxor           $out4,$in4,$twk4
-       vxor            $tweak,$tweak,$tmp
+       xxlor           32+$in5, 0, 0
+       vpermxor        $tweak, $tweak, $tmp, $in5
 
         lvx_u          $in5,$x50,$inp
         addi           $inp,$inp,0x60
        vxor            $twk5,$tweak,$rndkey0
        vsrab           $tmp,$tweak,$seven      # next tweak value
        vaddubm         $tweak,$tweak,$tweak
-       vsldoi          $tmp,$tmp,$tmp,15
         le?vperm       $in5,$in5,$in5,$leperm
        vand            $tmp,$tmp,$eighty7
         vxor           $out5,$in5,$twk5
-       vxor            $tweak,$tweak,$tmp
+       xxlor           32+$in0, 0, 0
+       vpermxor        $tweak, $tweak, $tmp, $in0
 
        vxor            v31,v31,$rndkey0
        mtctr           $rounds
@@ -2590,6 +2611,8 @@ Loop_xts_enc6x:
        lvx             v25,$x10,$key_          # round[4]
        bdnz            Loop_xts_enc6x
 
+       xxlor           32+$eighty7, 1, 1               # 0x010101..87
+
        subic           $len,$len,96            # $len-=96
         vxor           $in0,$twk0,v31          # xor with last round key
        vcipher         $out0,$out0,v24
@@ -2599,7 +2622,6 @@ Loop_xts_enc6x:
         vaddubm        $tweak,$tweak,$tweak
        vcipher         $out2,$out2,v24
        vcipher         $out3,$out3,v24
-        vsldoi         $tmp,$tmp,$tmp,15
        vcipher         $out4,$out4,v24
        vcipher         $out5,$out5,v24
 
@@ -2607,7 +2629,8 @@ Loop_xts_enc6x:
         vand           $tmp,$tmp,$eighty7
        vcipher         $out0,$out0,v25
        vcipher         $out1,$out1,v25
-        vxor           $tweak,$tweak,$tmp
+        xxlor          32+$in1, 0, 0
+        vpermxor       $tweak, $tweak, $tmp, $in1
        vcipher         $out2,$out2,v25
        vcipher         $out3,$out3,v25
         vxor           $in1,$twk1,v31
@@ -2618,13 +2641,13 @@ Loop_xts_enc6x:
 
        and             r0,r0,$len
         vaddubm        $tweak,$tweak,$tweak
-        vsldoi         $tmp,$tmp,$tmp,15
        vcipher         $out0,$out0,v26
        vcipher         $out1,$out1,v26
         vand           $tmp,$tmp,$eighty7
        vcipher         $out2,$out2,v26
        vcipher         $out3,$out3,v26
-        vxor           $tweak,$tweak,$tmp
+        xxlor          32+$in2, 0, 0
+        vpermxor       $tweak, $tweak, $tmp, $in2
        vcipher         $out4,$out4,v26
        vcipher         $out5,$out5,v26
 
@@ -2638,7 +2661,6 @@ Loop_xts_enc6x:
         vaddubm        $tweak,$tweak,$tweak
        vcipher         $out0,$out0,v27
        vcipher         $out1,$out1,v27
-        vsldoi         $tmp,$tmp,$tmp,15
        vcipher         $out2,$out2,v27
        vcipher         $out3,$out3,v27
         vand           $tmp,$tmp,$eighty7
@@ -2646,7 +2668,8 @@ Loop_xts_enc6x:
        vcipher         $out5,$out5,v27
 
        addi            $key_,$sp,$FRAME+15     # rewind $key_
-        vxor           $tweak,$tweak,$tmp
+        xxlor          32+$in3, 0, 0
+        vpermxor       $tweak, $tweak, $tmp, $in3
        vcipher         $out0,$out0,v28
        vcipher         $out1,$out1,v28
         vxor           $in3,$twk3,v31
@@ -2655,7 +2678,6 @@ Loop_xts_enc6x:
        vcipher         $out2,$out2,v28
        vcipher         $out3,$out3,v28
         vaddubm        $tweak,$tweak,$tweak
-        vsldoi         $tmp,$tmp,$tmp,15
        vcipher         $out4,$out4,v28
        vcipher         $out5,$out5,v28
        lvx             v24,$x00,$key_          # re-pre-load round[1]
@@ -2663,7 +2685,8 @@ Loop_xts_enc6x:
 
        vcipher         $out0,$out0,v29
        vcipher         $out1,$out1,v29
-        vxor           $tweak,$tweak,$tmp
+        xxlor          32+$in4, 0, 0
+        vpermxor       $tweak, $tweak, $tmp, $in4
        vcipher         $out2,$out2,v29
        vcipher         $out3,$out3,v29
         vxor           $in4,$twk4,v31
@@ -2673,14 +2696,14 @@ Loop_xts_enc6x:
        vcipher         $out5,$out5,v29
        lvx             v25,$x10,$key_          # re-pre-load round[2]
         vaddubm        $tweak,$tweak,$tweak
-        vsldoi         $tmp,$tmp,$tmp,15
 
        vcipher         $out0,$out0,v30
        vcipher         $out1,$out1,v30
         vand           $tmp,$tmp,$eighty7
        vcipher         $out2,$out2,v30
        vcipher         $out3,$out3,v30
-        vxor           $tweak,$tweak,$tmp
+        xxlor          32+$in5, 0, 0
+        vpermxor       $tweak, $tweak, $tmp, $in5
        vcipher         $out4,$out4,v30
        vcipher         $out5,$out5,v30
         vxor           $in5,$twk5,v31
@@ -2690,7 +2713,6 @@ Loop_xts_enc6x:
        vcipherlast     $out0,$out0,$in0
         lvx_u          $in0,$x00,$inp          # load next input block
         vaddubm        $tweak,$tweak,$tweak
-        vsldoi         $tmp,$tmp,$tmp,15
        vcipherlast     $out1,$out1,$in1
         lvx_u          $in1,$x10,$inp
        vcipherlast     $out2,$out2,$in2
@@ -2703,7 +2725,10 @@ Loop_xts_enc6x:
        vcipherlast     $out4,$out4,$in4
         le?vperm       $in2,$in2,$in2,$leperm
         lvx_u          $in4,$x40,$inp
-        vxor           $tweak,$tweak,$tmp
+        xxlor          10, 32+$in0, 32+$in0
+        xxlor          32+$in0, 0, 0
+        vpermxor       $tweak, $tweak, $tmp, $in0
+        xxlor          32+$in0, 10, 10
        vcipherlast     $tmp,$out5,$in5         # last block might be needed
                                                # in stealing mode
         le?vperm       $in3,$in3,$in3,$leperm
@@ -2736,6 +2761,8 @@ Loop_xts_enc6x:
        mtctr           $rounds
        beq             Loop_xts_enc6x          # did $len-=96 borrow?
 
+       xxlor           32+$eighty7, 2, 2               # 0x870101..01
+
        addic.          $len,$len,0x60
        beq             Lxts_enc6x_zero
        cmpwi           $len,0x20
@@ -3112,6 +3139,18 @@ _aesp8_xts_decrypt6x:
        li              $x70,0x70
        mtspr           256,r0
 
+       # Reverse eighty7 to 0x010101..87
+       xxlor           2, 32+$eighty7, 32+$eighty7
+       vsldoi          $eighty7,$tmp,$eighty7,1        # 0x010101..87
+       xxlor           1, 32+$eighty7, 32+$eighty7
+
+       # Load XOR contents. 0xf102132435465768798a9bacbdcedfe
+       mr              $x70, r6
+       bl              Lconsts
+       lxvw4x          0, $x40, r6             # load XOR contents
+       mr              r6, $x70
+       li              $x70,0x70
+
        subi            $rounds,$rounds,3       # -4 in total
 
        lvx             $rndkey0,$x00,$key1     # load key schedule
@@ -3159,64 +3198,64 @@ Load_xts_dec_key:
        vxor            $twk0,$tweak,$rndkey0
        vsrab           $tmp,$tweak,$seven      # next tweak value
        vaddubm         $tweak,$tweak,$tweak
-       vsldoi          $tmp,$tmp,$tmp,15
        vand            $tmp,$tmp,$eighty7
         vxor           $out0,$in0,$twk0
-       vxor            $tweak,$tweak,$tmp
+       xxlor           32+$in1, 0, 0
+       vpermxor        $tweak, $tweak, $tmp, $in1
 
         lvx_u          $in1,$x10,$inp
        vxor            $twk1,$tweak,$rndkey0
        vsrab           $tmp,$tweak,$seven      # next tweak value
        vaddubm         $tweak,$tweak,$tweak
-       vsldoi          $tmp,$tmp,$tmp,15
         le?vperm       $in1,$in1,$in1,$leperm
        vand            $tmp,$tmp,$eighty7
         vxor           $out1,$in1,$twk1
-       vxor            $tweak,$tweak,$tmp
+       xxlor           32+$in2, 0, 0
+       vpermxor        $tweak, $tweak, $tmp, $in2
 
         lvx_u          $in2,$x20,$inp
         andi.          $taillen,$len,15
        vxor            $twk2,$tweak,$rndkey0
        vsrab           $tmp,$tweak,$seven      # next tweak value
        vaddubm         $tweak,$tweak,$tweak
-       vsldoi          $tmp,$tmp,$tmp,15
         le?vperm       $in2,$in2,$in2,$leperm
        vand            $tmp,$tmp,$eighty7
         vxor           $out2,$in2,$twk2
-       vxor            $tweak,$tweak,$tmp
+       xxlor           32+$in3, 0, 0
+       vpermxor        $tweak, $tweak, $tmp, $in3
 
         lvx_u          $in3,$x30,$inp
         sub            $len,$len,$taillen
        vxor            $twk3,$tweak,$rndkey0
        vsrab           $tmp,$tweak,$seven      # next tweak value
        vaddubm         $tweak,$tweak,$tweak
-       vsldoi          $tmp,$tmp,$tmp,15
         le?vperm       $in3,$in3,$in3,$leperm
        vand            $tmp,$tmp,$eighty7
         vxor           $out3,$in3,$twk3
-       vxor            $tweak,$tweak,$tmp
+       xxlor           32+$in4, 0, 0
+       vpermxor        $tweak, $tweak, $tmp, $in4
 
         lvx_u          $in4,$x40,$inp
         subi           $len,$len,0x60
        vxor            $twk4,$tweak,$rndkey0
        vsrab           $tmp,$tweak,$seven      # next tweak value
        vaddubm         $tweak,$tweak,$tweak
-       vsldoi          $tmp,$tmp,$tmp,15
         le?vperm       $in4,$in4,$in4,$leperm
        vand            $tmp,$tmp,$eighty7
         vxor           $out4,$in4,$twk4
-       vxor            $tweak,$tweak,$tmp
+       xxlor           32+$in5, 0, 0
+       vpermxor        $tweak, $tweak, $tmp, $in5
 
         lvx_u          $in5,$x50,$inp
         addi           $inp,$inp,0x60
        vxor            $twk5,$tweak,$rndkey0
        vsrab           $tmp,$tweak,$seven      # next tweak value
        vaddubm         $tweak,$tweak,$tweak
-       vsldoi          $tmp,$tmp,$tmp,15
         le?vperm       $in5,$in5,$in5,$leperm
        vand            $tmp,$tmp,$eighty7
         vxor           $out5,$in5,$twk5
-       vxor            $tweak,$tweak,$tmp
+       xxlor           32+$in0, 0, 0
+       vpermxor        $tweak, $tweak, $tmp, $in0
 
        vxor            v31,v31,$rndkey0
        mtctr           $rounds
@@ -3242,6 +3281,8 @@ Loop_xts_dec6x:
        lvx             v25,$x10,$key_          # round[4]
        bdnz            Loop_xts_dec6x
 
+       xxlor           32+$eighty7, 1, 1
+
        subic           $len,$len,96            # $len-=96
         vxor           $in0,$twk0,v31          # xor with last round key
        vncipher        $out0,$out0,v24
@@ -3251,7 +3292,6 @@ Loop_xts_dec6x:
         vaddubm        $tweak,$tweak,$tweak
        vncipher        $out2,$out2,v24
        vncipher        $out3,$out3,v24
-        vsldoi         $tmp,$tmp,$tmp,15
        vncipher        $out4,$out4,v24
        vncipher        $out5,$out5,v24
 
@@ -3259,7 +3299,8 @@ Loop_xts_dec6x:
         vand           $tmp,$tmp,$eighty7
        vncipher        $out0,$out0,v25
        vncipher        $out1,$out1,v25
-        vxor           $tweak,$tweak,$tmp
+        xxlor          32+$in1, 0, 0
+        vpermxor       $tweak, $tweak, $tmp, $in1
        vncipher        $out2,$out2,v25
        vncipher        $out3,$out3,v25
         vxor           $in1,$twk1,v31
@@ -3270,13 +3311,13 @@ Loop_xts_dec6x:
 
        and             r0,r0,$len
         vaddubm        $tweak,$tweak,$tweak
-        vsldoi         $tmp,$tmp,$tmp,15
        vncipher        $out0,$out0,v26
        vncipher        $out1,$out1,v26
         vand           $tmp,$tmp,$eighty7
        vncipher        $out2,$out2,v26
        vncipher        $out3,$out3,v26
-        vxor           $tweak,$tweak,$tmp
+        xxlor          32+$in2, 0, 0
+        vpermxor       $tweak, $tweak, $tmp, $in2
        vncipher        $out4,$out4,v26
        vncipher        $out5,$out5,v26
 
@@ -3290,7 +3331,6 @@ Loop_xts_dec6x:
         vaddubm        $tweak,$tweak,$tweak
        vncipher        $out0,$out0,v27
        vncipher        $out1,$out1,v27
-        vsldoi         $tmp,$tmp,$tmp,15
        vncipher        $out2,$out2,v27
        vncipher        $out3,$out3,v27
         vand           $tmp,$tmp,$eighty7
@@ -3298,7 +3338,8 @@ Loop_xts_dec6x:
        vncipher        $out5,$out5,v27
 
        addi            $key_,$sp,$FRAME+15     # rewind $key_
-        vxor           $tweak,$tweak,$tmp
+        xxlor          32+$in3, 0, 0
+        vpermxor       $tweak, $tweak, $tmp, $in3
        vncipher        $out0,$out0,v28
        vncipher        $out1,$out1,v28
         vxor           $in3,$twk3,v31
@@ -3307,7 +3348,6 @@ Loop_xts_dec6x:
        vncipher        $out2,$out2,v28
        vncipher        $out3,$out3,v28
         vaddubm        $tweak,$tweak,$tweak
-        vsldoi         $tmp,$tmp,$tmp,15
        vncipher        $out4,$out4,v28
        vncipher        $out5,$out5,v28
        lvx             v24,$x00,$key_          # re-pre-load round[1]
@@ -3315,7 +3355,8 @@ Loop_xts_dec6x:
 
        vncipher        $out0,$out0,v29
        vncipher        $out1,$out1,v29
-        vxor           $tweak,$tweak,$tmp
+        xxlor          32+$in4, 0, 0
+        vpermxor       $tweak, $tweak, $tmp, $in4
        vncipher        $out2,$out2,v29
        vncipher        $out3,$out3,v29
         vxor           $in4,$twk4,v31
@@ -3325,14 +3366,14 @@ Loop_xts_dec6x:
        vncipher        $out5,$out5,v29
        lvx             v25,$x10,$key_          # re-pre-load round[2]
         vaddubm        $tweak,$tweak,$tweak
-        vsldoi         $tmp,$tmp,$tmp,15
 
        vncipher        $out0,$out0,v30
        vncipher        $out1,$out1,v30
         vand           $tmp,$tmp,$eighty7
        vncipher        $out2,$out2,v30
        vncipher        $out3,$out3,v30
-        vxor           $tweak,$tweak,$tmp
+        xxlor          32+$in5, 0, 0
+        vpermxor       $tweak, $tweak, $tmp, $in5
        vncipher        $out4,$out4,v30
        vncipher        $out5,$out5,v30
         vxor           $in5,$twk5,v31
@@ -3342,7 +3383,6 @@ Loop_xts_dec6x:
        vncipherlast    $out0,$out0,$in0
         lvx_u          $in0,$x00,$inp          # load next input block
         vaddubm        $tweak,$tweak,$tweak
-        vsldoi         $tmp,$tmp,$tmp,15
        vncipherlast    $out1,$out1,$in1
         lvx_u          $in1,$x10,$inp
        vncipherlast    $out2,$out2,$in2
@@ -3355,7 +3395,10 @@ Loop_xts_dec6x:
        vncipherlast    $out4,$out4,$in4
         le?vperm       $in2,$in2,$in2,$leperm
         lvx_u          $in4,$x40,$inp
-        vxor           $tweak,$tweak,$tmp
+        xxlor          10, 32+$in0, 32+$in0
+        xxlor          32+$in0, 0, 0
+        vpermxor       $tweak, $tweak, $tmp, $in0
+        xxlor          32+$in0, 10, 10
        vncipherlast    $out5,$out5,$in5
         le?vperm       $in3,$in3,$in3,$leperm
         lvx_u          $in5,$x50,$inp
@@ -3386,6 +3429,8 @@ Loop_xts_dec6x:
        mtctr           $rounds
        beq             Loop_xts_dec6x          # did $len-=96 borrow?
 
+       xxlor           32+$eighty7, 2, 2
+
        addic.          $len,$len,0x60
        beq             Lxts_dec6x_zero
        cmpwi           $len,0x20