]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
5.4-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 19 Mar 2021 10:43:29 +0000 (11:43 +0100)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 19 Mar 2021 10:43:29 +0000 (11:43 +0100)
added patches:
crypto-aesni-use-test-reg-reg-instead-of-cmp-0-reg.patch
crypto-x86-aes-ni-xts-use-direct-calls-to-and-4-way-stride.patch
crypto-x86-regularize-glue-function-prototypes.patch
net-dsa-b53-support-setting-learning-on-port.patch
net-dsa-tag_mtk-fix-802.1ad-vlan-egress.patch

queue-5.4/crypto-aesni-use-test-reg-reg-instead-of-cmp-0-reg.patch [new file with mode: 0644]
queue-5.4/crypto-x86-aes-ni-xts-use-direct-calls-to-and-4-way-stride.patch [new file with mode: 0644]
queue-5.4/crypto-x86-regularize-glue-function-prototypes.patch [new file with mode: 0644]
queue-5.4/net-dsa-b53-support-setting-learning-on-port.patch [new file with mode: 0644]
queue-5.4/net-dsa-tag_mtk-fix-802.1ad-vlan-egress.patch [new file with mode: 0644]
queue-5.4/series

diff --git a/queue-5.4/crypto-aesni-use-test-reg-reg-instead-of-cmp-0-reg.patch b/queue-5.4/crypto-aesni-use-test-reg-reg-instead-of-cmp-0-reg.patch
new file mode 100644 (file)
index 0000000..ce0e47f
--- /dev/null
@@ -0,0 +1,208 @@
+From 032d049ea0f45b45c21f3f02b542aa18bc6b6428 Mon Sep 17 00:00:00 2001
+From: Uros Bizjak <ubizjak@gmail.com>
+Date: Fri, 27 Nov 2020 10:44:52 +0100
+Subject: crypto: aesni - Use TEST %reg,%reg instead of CMP $0,%reg
+
+From: Uros Bizjak <ubizjak@gmail.com>
+
+commit 032d049ea0f45b45c21f3f02b542aa18bc6b6428 upstream.
+
+CMP $0,%reg can't set overflow flag, so we can use shorter TEST %reg,%reg
+instruction when only zero and sign flags are checked (E,L,LE,G,GE conditions).
+
+Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
+Cc: Herbert Xu <herbert@gondor.apana.org.au>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: "H. Peter Anvin" <hpa@zytor.com>
+Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
+Cc: Ard Biesheuvel <ardb@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/crypto/aesni-intel_asm.S        |   20 ++++++++++----------
+ arch/x86/crypto/aesni-intel_avx-x86_64.S |   20 ++++++++++----------
+ 2 files changed, 20 insertions(+), 20 deletions(-)
+
+--- a/arch/x86/crypto/aesni-intel_asm.S
++++ b/arch/x86/crypto/aesni-intel_asm.S
+@@ -319,7 +319,7 @@ _initial_blocks_\@:
+       # Main loop - Encrypt/Decrypt remaining blocks
+-      cmp     $0, %r13
++      test    %r13, %r13
+       je      _zero_cipher_left_\@
+       sub     $64, %r13
+       je      _four_cipher_left_\@
+@@ -438,7 +438,7 @@ _multiple_of_16_bytes_\@:
+       mov PBlockLen(%arg2), %r12
+-      cmp $0, %r12
++      test %r12, %r12
+       je _partial_done\@
+       GHASH_MUL %xmm8, %xmm13, %xmm9, %xmm10, %xmm11, %xmm5, %xmm6
+@@ -475,7 +475,7 @@ _T_8_\@:
+       add     $8, %r10
+       sub     $8, %r11
+       psrldq  $8, %xmm0
+-      cmp     $0, %r11
++      test    %r11, %r11
+       je      _return_T_done_\@
+ _T_4_\@:
+       movd    %xmm0, %eax
+@@ -483,7 +483,7 @@ _T_4_\@:
+       add     $4, %r10
+       sub     $4, %r11
+       psrldq  $4, %xmm0
+-      cmp     $0, %r11
++      test    %r11, %r11
+       je      _return_T_done_\@
+ _T_123_\@:
+       movd    %xmm0, %eax
+@@ -620,7 +620,7 @@ _get_AAD_blocks\@:
+       /* read the last <16B of AAD */
+ _get_AAD_rest\@:
+-      cmp        $0, %r11
++      test       %r11, %r11
+       je         _get_AAD_done\@
+       READ_PARTIAL_BLOCK %r10, %r11, \TMP1, \TMP7
+@@ -641,7 +641,7 @@ _get_AAD_done\@:
+ .macro PARTIAL_BLOCK CYPH_PLAIN_OUT PLAIN_CYPH_IN PLAIN_CYPH_LEN DATA_OFFSET \
+       AAD_HASH operation
+       mov     PBlockLen(%arg2), %r13
+-      cmp     $0, %r13
++      test    %r13, %r13
+       je      _partial_block_done_\@  # Leave Macro if no partial blocks
+       # Read in input data without over reading
+       cmp     $16, \PLAIN_CYPH_LEN
+@@ -693,7 +693,7 @@ _no_extra_mask_1_\@:
+       PSHUFB_XMM      %xmm2, %xmm3
+       pxor    %xmm3, \AAD_HASH
+-      cmp     $0, %r10
++      test    %r10, %r10
+       jl      _partial_incomplete_1_\@
+       # GHASH computation for the last <16 Byte block
+@@ -728,7 +728,7 @@ _no_extra_mask_2_\@:
+       PSHUFB_XMM %xmm2, %xmm9
+       pxor    %xmm9, \AAD_HASH
+-      cmp     $0, %r10
++      test    %r10, %r10
+       jl      _partial_incomplete_2_\@
+       # GHASH computation for the last <16 Byte block
+@@ -748,7 +748,7 @@ _encode_done_\@:
+       PSHUFB_XMM      %xmm2, %xmm9
+ .endif
+       # output encrypted Bytes
+-      cmp     $0, %r10
++      test    %r10, %r10
+       jl      _partial_fill_\@
+       mov     %r13, %r12
+       mov     $16, %r13
+@@ -2731,7 +2731,7 @@ ENDPROC(aesni_ctr_enc)
+  */
+ ENTRY(aesni_xts_crypt8)
+       FRAME_BEGIN
+-      cmpb $0, %cl
++      testb %cl, %cl
+       movl $0, %ecx
+       movl $240, %r10d
+       leaq _aesni_enc4, %r11
+--- a/arch/x86/crypto/aesni-intel_avx-x86_64.S
++++ b/arch/x86/crypto/aesni-intel_avx-x86_64.S
+@@ -370,7 +370,7 @@ _initial_num_blocks_is_0\@:
+ _initial_blocks_encrypted\@:
+-        cmp     $0, %r13
++        test    %r13, %r13
+         je      _zero_cipher_left\@
+         sub     $128, %r13
+@@ -529,7 +529,7 @@ _multiple_of_16_bytes\@:
+         vmovdqu HashKey(arg2), %xmm13
+         mov PBlockLen(arg2), %r12
+-        cmp $0, %r12
++        test %r12, %r12
+         je _partial_done\@
+       #GHASH computation for the last <16 Byte block
+@@ -574,7 +574,7 @@ _T_8\@:
+         add     $8, %r10
+         sub     $8, %r11
+         vpsrldq $8, %xmm9, %xmm9
+-        cmp     $0, %r11
++        test    %r11, %r11
+         je     _return_T_done\@
+ _T_4\@:
+         vmovd   %xmm9, %eax
+@@ -582,7 +582,7 @@ _T_4\@:
+         add     $4, %r10
+         sub     $4, %r11
+         vpsrldq     $4, %xmm9, %xmm9
+-        cmp     $0, %r11
++        test    %r11, %r11
+         je     _return_T_done\@
+ _T_123\@:
+         vmovd     %xmm9, %eax
+@@ -626,7 +626,7 @@ _get_AAD_blocks\@:
+       cmp     $16, %r11
+       jge     _get_AAD_blocks\@
+       vmovdqu \T8, \T7
+-      cmp     $0, %r11
++      test    %r11, %r11
+       je      _get_AAD_done\@
+       vpxor   \T7, \T7, \T7
+@@ -645,7 +645,7 @@ _get_AAD_rest8\@:
+       vpxor   \T1, \T7, \T7
+       jmp     _get_AAD_rest8\@
+ _get_AAD_rest4\@:
+-      cmp     $0, %r11
++      test    %r11, %r11
+       jle      _get_AAD_rest0\@
+       mov     (%r10), %eax
+       movq    %rax, \T1
+@@ -750,7 +750,7 @@ _done_read_partial_block_\@:
+ .macro PARTIAL_BLOCK GHASH_MUL CYPH_PLAIN_OUT PLAIN_CYPH_IN PLAIN_CYPH_LEN DATA_OFFSET \
+         AAD_HASH ENC_DEC
+         mov   PBlockLen(arg2), %r13
+-        cmp   $0, %r13
++        test  %r13, %r13
+         je    _partial_block_done_\@  # Leave Macro if no partial blocks
+         # Read in input data without over reading
+         cmp   $16, \PLAIN_CYPH_LEN
+@@ -802,7 +802,7 @@ _no_extra_mask_1_\@:
+         vpshufb       %xmm2, %xmm3, %xmm3
+         vpxor %xmm3, \AAD_HASH, \AAD_HASH
+-        cmp   $0, %r10
++        test  %r10, %r10
+         jl    _partial_incomplete_1_\@
+         # GHASH computation for the last <16 Byte block
+@@ -837,7 +837,7 @@ _no_extra_mask_2_\@:
+         vpshufb %xmm2, %xmm9, %xmm9
+         vpxor %xmm9, \AAD_HASH, \AAD_HASH
+-        cmp   $0, %r10
++        test  %r10, %r10
+         jl    _partial_incomplete_2_\@
+         # GHASH computation for the last <16 Byte block
+@@ -857,7 +857,7 @@ _encode_done_\@:
+         vpshufb       %xmm2, %xmm9, %xmm9
+ .endif
+         # output encrypted Bytes
+-        cmp   $0, %r10
++        test  %r10, %r10
+         jl    _partial_fill_\@
+         mov   %r13, %r12
+         mov   $16, %r13
diff --git a/queue-5.4/crypto-x86-aes-ni-xts-use-direct-calls-to-and-4-way-stride.patch b/queue-5.4/crypto-x86-aes-ni-xts-use-direct-calls-to-and-4-way-stride.patch
new file mode 100644 (file)
index 0000000..81a659b
--- /dev/null
@@ -0,0 +1,274 @@
+From 86ad60a65f29dd862a11c22bb4b5be28d6c5cef1 Mon Sep 17 00:00:00 2001
+From: Ard Biesheuvel <ardb@kernel.org>
+Date: Thu, 31 Dec 2020 17:41:54 +0100
+Subject: crypto: x86/aes-ni-xts - use direct calls to and 4-way stride
+
+From: Ard Biesheuvel <ardb@kernel.org>
+
+commit 86ad60a65f29dd862a11c22bb4b5be28d6c5cef1 upstream.
+
+The XTS asm helper arrangement is a bit odd: the 8-way stride helper
+consists of back-to-back calls to the 4-way core transforms, which
+are called indirectly, based on a boolean that indicates whether we
+are performing encryption or decryption.
+
+Given how costly indirect calls are on x86, let's switch to direct
+calls, and given how the 8-way stride doesn't really add anything
+substantial, use a 4-way stride instead, and make the asm core
+routine deal with any multiple of 4 blocks. Since 512 byte sectors
+or 4 KB blocks are the typical quantities XTS operates on, increase
+the stride exported to the glue helper to 512 bytes as well.
+
+As a result, the number of indirect calls is reduced from 3 per 64 bytes
+of in/output to 1 per 512 bytes of in/output, which produces a 65% speedup
+when operating on 1 KB blocks (measured on a Intel(R) Core(TM) i7-8650U CPU)
+
+Fixes: 9697fa39efd3f ("x86/retpoline/crypto: Convert crypto assembler indirect jumps")
+Tested-by: Eric Biggers <ebiggers@google.com> # x86_64
+Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
+Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
+[ardb: rebase onto stable/linux-5.4.y]
+Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/crypto/aesni-intel_asm.S  |  115 ++++++++++++++++++++++---------------
+ arch/x86/crypto/aesni-intel_glue.c |   25 ++++----
+ 2 files changed, 84 insertions(+), 56 deletions(-)
+
+--- a/arch/x86/crypto/aesni-intel_asm.S
++++ b/arch/x86/crypto/aesni-intel_asm.S
+@@ -2726,25 +2726,18 @@ ENDPROC(aesni_ctr_enc)
+       pxor CTR, IV;
+ /*
+- * void aesni_xts_crypt8(const struct crypto_aes_ctx *ctx, u8 *dst,
+- *                     const u8 *src, bool enc, le128 *iv)
++ * void aesni_xts_encrypt(const struct crypto_aes_ctx *ctx, u8 *dst,
++ *                      const u8 *src, unsigned int len, le128 *iv)
+  */
+-ENTRY(aesni_xts_crypt8)
++ENTRY(aesni_xts_encrypt)
+       FRAME_BEGIN
+-      testb %cl, %cl
+-      movl $0, %ecx
+-      movl $240, %r10d
+-      leaq _aesni_enc4, %r11
+-      leaq _aesni_dec4, %rax
+-      cmovel %r10d, %ecx
+-      cmoveq %rax, %r11
+       movdqa .Lgf128mul_x_ble_mask, GF128MUL_MASK
+       movups (IVP), IV
+       mov 480(KEYP), KLEN
+-      addq %rcx, KEYP
++.Lxts_enc_loop4:
+       movdqa IV, STATE1
+       movdqu 0x00(INP), INC
+       pxor INC, STATE1
+@@ -2768,71 +2761,103 @@ ENTRY(aesni_xts_crypt8)
+       pxor INC, STATE4
+       movdqu IV, 0x30(OUTP)
+-      CALL_NOSPEC %r11
++      call _aesni_enc4
+       movdqu 0x00(OUTP), INC
+       pxor INC, STATE1
+       movdqu STATE1, 0x00(OUTP)
+-      _aesni_gf128mul_x_ble()
+-      movdqa IV, STATE1
+-      movdqu 0x40(INP), INC
+-      pxor INC, STATE1
+-      movdqu IV, 0x40(OUTP)
+-
+       movdqu 0x10(OUTP), INC
+       pxor INC, STATE2
+       movdqu STATE2, 0x10(OUTP)
+-      _aesni_gf128mul_x_ble()
+-      movdqa IV, STATE2
+-      movdqu 0x50(INP), INC
+-      pxor INC, STATE2
+-      movdqu IV, 0x50(OUTP)
+-
+       movdqu 0x20(OUTP), INC
+       pxor INC, STATE3
+       movdqu STATE3, 0x20(OUTP)
+-      _aesni_gf128mul_x_ble()
+-      movdqa IV, STATE3
+-      movdqu 0x60(INP), INC
+-      pxor INC, STATE3
+-      movdqu IV, 0x60(OUTP)
+-
+       movdqu 0x30(OUTP), INC
+       pxor INC, STATE4
+       movdqu STATE4, 0x30(OUTP)
+       _aesni_gf128mul_x_ble()
+-      movdqa IV, STATE4
+-      movdqu 0x70(INP), INC
+-      pxor INC, STATE4
+-      movdqu IV, 0x70(OUTP)
+-      _aesni_gf128mul_x_ble()
++      add $64, INP
++      add $64, OUTP
++      sub $64, LEN
++      ja .Lxts_enc_loop4
++
+       movups IV, (IVP)
+-      CALL_NOSPEC %r11
++      FRAME_END
++      ret
++ENDPROC(aesni_xts_encrypt)
++
++/*
++ * void aesni_xts_decrypt(const struct crypto_aes_ctx *ctx, u8 *dst,
++ *                      const u8 *src, unsigned int len, le128 *iv)
++ */
++ENTRY(aesni_xts_decrypt)
++      FRAME_BEGIN
++
++      movdqa .Lgf128mul_x_ble_mask, GF128MUL_MASK
++      movups (IVP), IV
++
++      mov 480(KEYP), KLEN
++      add $240, KEYP
++
++.Lxts_dec_loop4:
++      movdqa IV, STATE1
++      movdqu 0x00(INP), INC
++      pxor INC, STATE1
++      movdqu IV, 0x00(OUTP)
++
++      _aesni_gf128mul_x_ble()
++      movdqa IV, STATE2
++      movdqu 0x10(INP), INC
++      pxor INC, STATE2
++      movdqu IV, 0x10(OUTP)
++
++      _aesni_gf128mul_x_ble()
++      movdqa IV, STATE3
++      movdqu 0x20(INP), INC
++      pxor INC, STATE3
++      movdqu IV, 0x20(OUTP)
++
++      _aesni_gf128mul_x_ble()
++      movdqa IV, STATE4
++      movdqu 0x30(INP), INC
++      pxor INC, STATE4
++      movdqu IV, 0x30(OUTP)
++
++      call _aesni_dec4
+-      movdqu 0x40(OUTP), INC
++      movdqu 0x00(OUTP), INC
+       pxor INC, STATE1
+-      movdqu STATE1, 0x40(OUTP)
++      movdqu STATE1, 0x00(OUTP)
+-      movdqu 0x50(OUTP), INC
++      movdqu 0x10(OUTP), INC
+       pxor INC, STATE2
+-      movdqu STATE2, 0x50(OUTP)
++      movdqu STATE2, 0x10(OUTP)
+-      movdqu 0x60(OUTP), INC
++      movdqu 0x20(OUTP), INC
+       pxor INC, STATE3
+-      movdqu STATE3, 0x60(OUTP)
++      movdqu STATE3, 0x20(OUTP)
+-      movdqu 0x70(OUTP), INC
++      movdqu 0x30(OUTP), INC
+       pxor INC, STATE4
+-      movdqu STATE4, 0x70(OUTP)
++      movdqu STATE4, 0x30(OUTP)
++
++      _aesni_gf128mul_x_ble()
++
++      add $64, INP
++      add $64, OUTP
++      sub $64, LEN
++      ja .Lxts_dec_loop4
++
++      movups IV, (IVP)
+       FRAME_END
+       ret
+-ENDPROC(aesni_xts_crypt8)
++ENDPROC(aesni_xts_decrypt)
+ #endif
+--- a/arch/x86/crypto/aesni-intel_glue.c
++++ b/arch/x86/crypto/aesni-intel_glue.c
+@@ -97,6 +97,12 @@ asmlinkage void aesni_cbc_dec(struct cry
+ #define AVX_GEN2_OPTSIZE 640
+ #define AVX_GEN4_OPTSIZE 4096
++asmlinkage void aesni_xts_encrypt(const struct crypto_aes_ctx *ctx, u8 *out,
++                                const u8 *in, unsigned int len, u8 *iv);
++
++asmlinkage void aesni_xts_decrypt(const struct crypto_aes_ctx *ctx, u8 *out,
++                                const u8 *in, unsigned int len, u8 *iv);
++
+ #ifdef CONFIG_X86_64
+ static void (*aesni_ctr_enc_tfm)(struct crypto_aes_ctx *ctx, u8 *out,
+@@ -104,9 +110,6 @@ static void (*aesni_ctr_enc_tfm)(struct
+ asmlinkage void aesni_ctr_enc(struct crypto_aes_ctx *ctx, u8 *out,
+                             const u8 *in, unsigned int len, u8 *iv);
+-asmlinkage void aesni_xts_crypt8(const struct crypto_aes_ctx *ctx, u8 *out,
+-                               const u8 *in, bool enc, le128 *iv);
+-
+ /* asmlinkage void aesni_gcm_enc()
+  * void *ctx,  AES Key schedule. Starts on a 16 byte boundary.
+  * struct gcm_context_data.  May be uninitialized.
+@@ -558,14 +561,14 @@ static void aesni_xts_dec(const void *ct
+       glue_xts_crypt_128bit_one(ctx, dst, src, iv, aesni_dec);
+ }
+-static void aesni_xts_enc8(const void *ctx, u8 *dst, const u8 *src, le128 *iv)
++static void aesni_xts_enc32(const void *ctx, u8 *dst, const u8 *src, le128 *iv)
+ {
+-      aesni_xts_crypt8(ctx, dst, src, true, iv);
++      aesni_xts_encrypt(ctx, dst, src, 32 * AES_BLOCK_SIZE, (u8 *)iv);
+ }
+-static void aesni_xts_dec8(const void *ctx, u8 *dst, const u8 *src, le128 *iv)
++static void aesni_xts_dec32(const void *ctx, u8 *dst, const u8 *src, le128 *iv)
+ {
+-      aesni_xts_crypt8(ctx, dst, src, false, iv);
++      aesni_xts_decrypt(ctx, dst, src, 32 * AES_BLOCK_SIZE, (u8 *)iv);
+ }
+ static const struct common_glue_ctx aesni_enc_xts = {
+@@ -573,8 +576,8 @@ static const struct common_glue_ctx aesn
+       .fpu_blocks_limit = 1,
+       .funcs = { {
+-              .num_blocks = 8,
+-              .fn_u = { .xts = aesni_xts_enc8 }
++              .num_blocks = 32,
++              .fn_u = { .xts = aesni_xts_enc32 }
+       }, {
+               .num_blocks = 1,
+               .fn_u = { .xts = aesni_xts_enc }
+@@ -586,8 +589,8 @@ static const struct common_glue_ctx aesn
+       .fpu_blocks_limit = 1,
+       .funcs = { {
+-              .num_blocks = 8,
+-              .fn_u = { .xts = aesni_xts_dec8 }
++              .num_blocks = 32,
++              .fn_u = { .xts = aesni_xts_dec32 }
+       }, {
+               .num_blocks = 1,
+               .fn_u = { .xts = aesni_xts_dec }
diff --git a/queue-5.4/crypto-x86-regularize-glue-function-prototypes.patch b/queue-5.4/crypto-x86-regularize-glue-function-prototypes.patch
new file mode 100644 (file)
index 0000000..911381d
--- /dev/null
@@ -0,0 +1,2056 @@
+From 9c1e8836edbbaf3656bc07437b59c04be034ac4e Mon Sep 17 00:00:00 2001
+From: Kees Cook <keescook@chromium.org>
+Date: Tue, 26 Nov 2019 22:08:02 -0800
+Subject: crypto: x86 - Regularize glue function prototypes
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Kees Cook <keescook@chromium.org>
+
+commit 9c1e8836edbbaf3656bc07437b59c04be034ac4e upstream.
+
+The crypto glue performed function prototype casting via macros to make
+indirect calls to assembly routines. Instead of performing casts at the
+call sites (which trips Control Flow Integrity prototype checking), switch
+each prototype to a common standard set of arguments which allows the
+removal of the existing macros. In order to keep pointer math unchanged,
+internal casting between u128 pointers and u8 pointers is added.
+
+Co-developed-by: João Moreira <joao.moreira@intel.com>
+Signed-off-by: João Moreira <joao.moreira@intel.com>
+Signed-off-by: Kees Cook <keescook@chromium.org>
+Reviewed-by: Eric Biggers <ebiggers@kernel.org>
+Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
+Cc: Ard Biesheuvel <ardb@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/crypto/aesni-intel_asm.S          |    8 +-
+ arch/x86/crypto/aesni-intel_glue.c         |   45 ++++++----------
+ arch/x86/crypto/camellia_aesni_avx2_glue.c |   78 +++++++++++++---------------
+ arch/x86/crypto/camellia_aesni_avx_glue.c  |   72 +++++++++++---------------
+ arch/x86/crypto/camellia_glue.c            |   45 ++++++++--------
+ arch/x86/crypto/cast6_avx_glue.c           |   68 +++++++++++-------------
+ arch/x86/crypto/glue_helper.c              |   23 +++++---
+ arch/x86/crypto/serpent_avx2_glue.c        |   65 +++++++++++------------
+ arch/x86/crypto/serpent_avx_glue.c         |   63 +++++++++++------------
+ arch/x86/crypto/serpent_sse2_glue.c        |   30 ++++++-----
+ arch/x86/crypto/twofish_avx_glue.c         |   79 ++++++++++++-----------------
+ arch/x86/crypto/twofish_glue_3way.c        |   37 +++++++------
+ arch/x86/include/asm/crypto/camellia.h     |   61 ++++++++++------------
+ arch/x86/include/asm/crypto/glue_helper.h  |   18 ++----
+ arch/x86/include/asm/crypto/serpent-avx.h  |   20 +++----
+ arch/x86/include/asm/crypto/serpent-sse2.h |   28 ++++------
+ arch/x86/include/asm/crypto/twofish.h      |   19 ++----
+ crypto/cast6_generic.c                     |   18 +++---
+ crypto/serpent_generic.c                   |    6 +-
+ include/crypto/cast6.h                     |    4 -
+ include/crypto/serpent.h                   |    4 -
+ include/crypto/xts.h                       |    2 
+ 22 files changed, 377 insertions(+), 416 deletions(-)
+
+--- a/arch/x86/crypto/aesni-intel_asm.S
++++ b/arch/x86/crypto/aesni-intel_asm.S
+@@ -1946,7 +1946,7 @@ ENTRY(aesni_set_key)
+ ENDPROC(aesni_set_key)
+ /*
+- * void aesni_enc(struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src)
++ * void aesni_enc(const void *ctx, u8 *dst, const u8 *src)
+  */
+ ENTRY(aesni_enc)
+       FRAME_BEGIN
+@@ -2137,7 +2137,7 @@ _aesni_enc4:
+ ENDPROC(_aesni_enc4)
+ /*
+- * void aesni_dec (struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src)
++ * void aesni_dec (const void *ctx, u8 *dst, const u8 *src)
+  */
+ ENTRY(aesni_dec)
+       FRAME_BEGIN
+@@ -2726,8 +2726,8 @@ ENDPROC(aesni_ctr_enc)
+       pxor CTR, IV;
+ /*
+- * void aesni_xts_crypt8(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
+- *                     bool enc, u8 *iv)
++ * void aesni_xts_crypt8(const struct crypto_aes_ctx *ctx, u8 *dst,
++ *                     const u8 *src, bool enc, le128 *iv)
+  */
+ ENTRY(aesni_xts_crypt8)
+       FRAME_BEGIN
+--- a/arch/x86/crypto/aesni-intel_glue.c
++++ b/arch/x86/crypto/aesni-intel_glue.c
+@@ -83,10 +83,8 @@ struct gcm_context_data {
+ asmlinkage int aesni_set_key(struct crypto_aes_ctx *ctx, const u8 *in_key,
+                            unsigned int key_len);
+-asmlinkage void aesni_enc(struct crypto_aes_ctx *ctx, u8 *out,
+-                        const u8 *in);
+-asmlinkage void aesni_dec(struct crypto_aes_ctx *ctx, u8 *out,
+-                        const u8 *in);
++asmlinkage void aesni_enc(const void *ctx, u8 *out, const u8 *in);
++asmlinkage void aesni_dec(const void *ctx, u8 *out, const u8 *in);
+ asmlinkage void aesni_ecb_enc(struct crypto_aes_ctx *ctx, u8 *out,
+                             const u8 *in, unsigned int len);
+ asmlinkage void aesni_ecb_dec(struct crypto_aes_ctx *ctx, u8 *out,
+@@ -106,8 +104,8 @@ static void (*aesni_ctr_enc_tfm)(struct
+ asmlinkage void aesni_ctr_enc(struct crypto_aes_ctx *ctx, u8 *out,
+                             const u8 *in, unsigned int len, u8 *iv);
+-asmlinkage void aesni_xts_crypt8(struct crypto_aes_ctx *ctx, u8 *out,
+-                               const u8 *in, bool enc, u8 *iv);
++asmlinkage void aesni_xts_crypt8(const struct crypto_aes_ctx *ctx, u8 *out,
++                               const u8 *in, bool enc, le128 *iv);
+ /* asmlinkage void aesni_gcm_enc()
+  * void *ctx,  AES Key schedule. Starts on a 16 byte boundary.
+@@ -550,29 +548,24 @@ static int xts_aesni_setkey(struct crypt
+ }
+-static void aesni_xts_tweak(void *ctx, u8 *out, const u8 *in)
++static void aesni_xts_enc(const void *ctx, u8 *dst, const u8 *src, le128 *iv)
+ {
+-      aesni_enc(ctx, out, in);
++      glue_xts_crypt_128bit_one(ctx, dst, src, iv, aesni_enc);
+ }
+-static void aesni_xts_enc(void *ctx, u128 *dst, const u128 *src, le128 *iv)
++static void aesni_xts_dec(const void *ctx, u8 *dst, const u8 *src, le128 *iv)
+ {
+-      glue_xts_crypt_128bit_one(ctx, dst, src, iv, GLUE_FUNC_CAST(aesni_enc));
++      glue_xts_crypt_128bit_one(ctx, dst, src, iv, aesni_dec);
+ }
+-static void aesni_xts_dec(void *ctx, u128 *dst, const u128 *src, le128 *iv)
++static void aesni_xts_enc8(const void *ctx, u8 *dst, const u8 *src, le128 *iv)
+ {
+-      glue_xts_crypt_128bit_one(ctx, dst, src, iv, GLUE_FUNC_CAST(aesni_dec));
++      aesni_xts_crypt8(ctx, dst, src, true, iv);
+ }
+-static void aesni_xts_enc8(void *ctx, u128 *dst, const u128 *src, le128 *iv)
++static void aesni_xts_dec8(const void *ctx, u8 *dst, const u8 *src, le128 *iv)
+ {
+-      aesni_xts_crypt8(ctx, (u8 *)dst, (const u8 *)src, true, (u8 *)iv);
+-}
+-
+-static void aesni_xts_dec8(void *ctx, u128 *dst, const u128 *src, le128 *iv)
+-{
+-      aesni_xts_crypt8(ctx, (u8 *)dst, (const u8 *)src, false, (u8 *)iv);
++      aesni_xts_crypt8(ctx, dst, src, false, iv);
+ }
+ static const struct common_glue_ctx aesni_enc_xts = {
+@@ -581,10 +574,10 @@ static const struct common_glue_ctx aesn
+       .funcs = { {
+               .num_blocks = 8,
+-              .fn_u = { .xts = GLUE_XTS_FUNC_CAST(aesni_xts_enc8) }
++              .fn_u = { .xts = aesni_xts_enc8 }
+       }, {
+               .num_blocks = 1,
+-              .fn_u = { .xts = GLUE_XTS_FUNC_CAST(aesni_xts_enc) }
++              .fn_u = { .xts = aesni_xts_enc }
+       } }
+ };
+@@ -594,10 +587,10 @@ static const struct common_glue_ctx aesn
+       .funcs = { {
+               .num_blocks = 8,
+-              .fn_u = { .xts = GLUE_XTS_FUNC_CAST(aesni_xts_dec8) }
++              .fn_u = { .xts = aesni_xts_dec8 }
+       }, {
+               .num_blocks = 1,
+-              .fn_u = { .xts = GLUE_XTS_FUNC_CAST(aesni_xts_dec) }
++              .fn_u = { .xts = aesni_xts_dec }
+       } }
+ };
+@@ -606,8 +599,7 @@ static int xts_encrypt(struct skcipher_r
+       struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+       struct aesni_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
+-      return glue_xts_req_128bit(&aesni_enc_xts, req,
+-                                 XTS_TWEAK_CAST(aesni_xts_tweak),
++      return glue_xts_req_128bit(&aesni_enc_xts, req, aesni_enc,
+                                  aes_ctx(ctx->raw_tweak_ctx),
+                                  aes_ctx(ctx->raw_crypt_ctx),
+                                  false);
+@@ -618,8 +610,7 @@ static int xts_decrypt(struct skcipher_r
+       struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+       struct aesni_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
+-      return glue_xts_req_128bit(&aesni_dec_xts, req,
+-                                 XTS_TWEAK_CAST(aesni_xts_tweak),
++      return glue_xts_req_128bit(&aesni_dec_xts, req, aesni_enc,
+                                  aes_ctx(ctx->raw_tweak_ctx),
+                                  aes_ctx(ctx->raw_crypt_ctx),
+                                  true);
+--- a/arch/x86/crypto/camellia_aesni_avx2_glue.c
++++ b/arch/x86/crypto/camellia_aesni_avx2_glue.c
+@@ -19,20 +19,17 @@
+ #define CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS 32
+ /* 32-way AVX2/AES-NI parallel cipher functions */
+-asmlinkage void camellia_ecb_enc_32way(struct camellia_ctx *ctx, u8 *dst,
+-                                     const u8 *src);
+-asmlinkage void camellia_ecb_dec_32way(struct camellia_ctx *ctx, u8 *dst,
+-                                     const u8 *src);
+-
+-asmlinkage void camellia_cbc_dec_32way(struct camellia_ctx *ctx, u8 *dst,
+-                                     const u8 *src);
+-asmlinkage void camellia_ctr_32way(struct camellia_ctx *ctx, u8 *dst,
+-                                 const u8 *src, le128 *iv);
+-
+-asmlinkage void camellia_xts_enc_32way(struct camellia_ctx *ctx, u8 *dst,
+-                                     const u8 *src, le128 *iv);
+-asmlinkage void camellia_xts_dec_32way(struct camellia_ctx *ctx, u8 *dst,
+-                                     const u8 *src, le128 *iv);
++asmlinkage void camellia_ecb_enc_32way(const void *ctx, u8 *dst, const u8 *src);
++asmlinkage void camellia_ecb_dec_32way(const void *ctx, u8 *dst, const u8 *src);
++
++asmlinkage void camellia_cbc_dec_32way(const void *ctx, u8 *dst, const u8 *src);
++asmlinkage void camellia_ctr_32way(const void *ctx, u8 *dst, const u8 *src,
++                                 le128 *iv);
++
++asmlinkage void camellia_xts_enc_32way(const void *ctx, u8 *dst, const u8 *src,
++                                     le128 *iv);
++asmlinkage void camellia_xts_dec_32way(const void *ctx, u8 *dst, const u8 *src,
++                                     le128 *iv);
+ static const struct common_glue_ctx camellia_enc = {
+       .num_funcs = 4,
+@@ -40,16 +37,16 @@ static const struct common_glue_ctx came
+       .funcs = { {
+               .num_blocks = CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS,
+-              .fn_u = { .ecb = GLUE_FUNC_CAST(camellia_ecb_enc_32way) }
++              .fn_u = { .ecb = camellia_ecb_enc_32way }
+       }, {
+               .num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS,
+-              .fn_u = { .ecb = GLUE_FUNC_CAST(camellia_ecb_enc_16way) }
++              .fn_u = { .ecb = camellia_ecb_enc_16way }
+       }, {
+               .num_blocks = 2,
+-              .fn_u = { .ecb = GLUE_FUNC_CAST(camellia_enc_blk_2way) }
++              .fn_u = { .ecb = camellia_enc_blk_2way }
+       }, {
+               .num_blocks = 1,
+-              .fn_u = { .ecb = GLUE_FUNC_CAST(camellia_enc_blk) }
++              .fn_u = { .ecb = camellia_enc_blk }
+       } }
+ };
+@@ -59,16 +56,16 @@ static const struct common_glue_ctx came
+       .funcs = { {
+               .num_blocks = CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS,
+-              .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(camellia_ctr_32way) }
++              .fn_u = { .ctr = camellia_ctr_32way }
+       }, {
+               .num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS,
+-              .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(camellia_ctr_16way) }
++              .fn_u = { .ctr = camellia_ctr_16way }
+       }, {
+               .num_blocks = 2,
+-              .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(camellia_crypt_ctr_2way) }
++              .fn_u = { .ctr = camellia_crypt_ctr_2way }
+       }, {
+               .num_blocks = 1,
+-              .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(camellia_crypt_ctr) }
++              .fn_u = { .ctr = camellia_crypt_ctr }
+       } }
+ };
+@@ -78,13 +75,13 @@ static const struct common_glue_ctx came
+       .funcs = { {
+               .num_blocks = CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS,
+-              .fn_u = { .xts = GLUE_XTS_FUNC_CAST(camellia_xts_enc_32way) }
++              .fn_u = { .xts = camellia_xts_enc_32way }
+       }, {
+               .num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS,
+-              .fn_u = { .xts = GLUE_XTS_FUNC_CAST(camellia_xts_enc_16way) }
++              .fn_u = { .xts = camellia_xts_enc_16way }
+       }, {
+               .num_blocks = 1,
+-              .fn_u = { .xts = GLUE_XTS_FUNC_CAST(camellia_xts_enc) }
++              .fn_u = { .xts = camellia_xts_enc }
+       } }
+ };
+@@ -94,16 +91,16 @@ static const struct common_glue_ctx came
+       .funcs = { {
+               .num_blocks = CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS,
+-              .fn_u = { .ecb = GLUE_FUNC_CAST(camellia_ecb_dec_32way) }
++              .fn_u = { .ecb = camellia_ecb_dec_32way }
+       }, {
+               .num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS,
+-              .fn_u = { .ecb = GLUE_FUNC_CAST(camellia_ecb_dec_16way) }
++              .fn_u = { .ecb = camellia_ecb_dec_16way }
+       }, {
+               .num_blocks = 2,
+-              .fn_u = { .ecb = GLUE_FUNC_CAST(camellia_dec_blk_2way) }
++              .fn_u = { .ecb = camellia_dec_blk_2way }
+       }, {
+               .num_blocks = 1,
+-              .fn_u = { .ecb = GLUE_FUNC_CAST(camellia_dec_blk) }
++              .fn_u = { .ecb = camellia_dec_blk }
+       } }
+ };
+@@ -113,16 +110,16 @@ static const struct common_glue_ctx came
+       .funcs = { {
+               .num_blocks = CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS,
+-              .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(camellia_cbc_dec_32way) }
++              .fn_u = { .cbc = camellia_cbc_dec_32way }
+       }, {
+               .num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS,
+-              .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(camellia_cbc_dec_16way) }
++              .fn_u = { .cbc = camellia_cbc_dec_16way }
+       }, {
+               .num_blocks = 2,
+-              .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(camellia_decrypt_cbc_2way) }
++              .fn_u = { .cbc = camellia_decrypt_cbc_2way }
+       }, {
+               .num_blocks = 1,
+-              .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(camellia_dec_blk) }
++              .fn_u = { .cbc = camellia_dec_blk }
+       } }
+ };
+@@ -132,13 +129,13 @@ static const struct common_glue_ctx came
+       .funcs = { {
+               .num_blocks = CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS,
+-              .fn_u = { .xts = GLUE_XTS_FUNC_CAST(camellia_xts_dec_32way) }
++              .fn_u = { .xts = camellia_xts_dec_32way }
+       }, {
+               .num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS,
+-              .fn_u = { .xts = GLUE_XTS_FUNC_CAST(camellia_xts_dec_16way) }
++              .fn_u = { .xts = camellia_xts_dec_16way }
+       }, {
+               .num_blocks = 1,
+-              .fn_u = { .xts = GLUE_XTS_FUNC_CAST(camellia_xts_dec) }
++              .fn_u = { .xts = camellia_xts_dec }
+       } }
+ };
+@@ -161,8 +158,7 @@ static int ecb_decrypt(struct skcipher_r
+ static int cbc_encrypt(struct skcipher_request *req)
+ {
+-      return glue_cbc_encrypt_req_128bit(GLUE_FUNC_CAST(camellia_enc_blk),
+-                                         req);
++      return glue_cbc_encrypt_req_128bit(camellia_enc_blk, req);
+ }
+ static int cbc_decrypt(struct skcipher_request *req)
+@@ -180,8 +176,7 @@ static int xts_encrypt(struct skcipher_r
+       struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+       struct camellia_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
+-      return glue_xts_req_128bit(&camellia_enc_xts, req,
+-                                 XTS_TWEAK_CAST(camellia_enc_blk),
++      return glue_xts_req_128bit(&camellia_enc_xts, req, camellia_enc_blk,
+                                  &ctx->tweak_ctx, &ctx->crypt_ctx, false);
+ }
+@@ -190,8 +185,7 @@ static int xts_decrypt(struct skcipher_r
+       struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+       struct camellia_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
+-      return glue_xts_req_128bit(&camellia_dec_xts, req,
+-                                 XTS_TWEAK_CAST(camellia_enc_blk),
++      return glue_xts_req_128bit(&camellia_dec_xts, req, camellia_enc_blk,
+                                  &ctx->tweak_ctx, &ctx->crypt_ctx, true);
+ }
+--- a/arch/x86/crypto/camellia_aesni_avx_glue.c
++++ b/arch/x86/crypto/camellia_aesni_avx_glue.c
+@@ -18,41 +18,36 @@
+ #define CAMELLIA_AESNI_PARALLEL_BLOCKS 16
+ /* 16-way parallel cipher functions (avx/aes-ni) */
+-asmlinkage void camellia_ecb_enc_16way(struct camellia_ctx *ctx, u8 *dst,
+-                                     const u8 *src);
++asmlinkage void camellia_ecb_enc_16way(const void *ctx, u8 *dst, const u8 *src);
+ EXPORT_SYMBOL_GPL(camellia_ecb_enc_16way);
+-asmlinkage void camellia_ecb_dec_16way(struct camellia_ctx *ctx, u8 *dst,
+-                                     const u8 *src);
++asmlinkage void camellia_ecb_dec_16way(const void *ctx, u8 *dst, const u8 *src);
+ EXPORT_SYMBOL_GPL(camellia_ecb_dec_16way);
+-asmlinkage void camellia_cbc_dec_16way(struct camellia_ctx *ctx, u8 *dst,
+-                                     const u8 *src);
++asmlinkage void camellia_cbc_dec_16way(const void *ctx, u8 *dst, const u8 *src);
+ EXPORT_SYMBOL_GPL(camellia_cbc_dec_16way);
+-asmlinkage void camellia_ctr_16way(struct camellia_ctx *ctx, u8 *dst,
+-                                 const u8 *src, le128 *iv);
++asmlinkage void camellia_ctr_16way(const void *ctx, u8 *dst, const u8 *src,
++                                 le128 *iv);
+ EXPORT_SYMBOL_GPL(camellia_ctr_16way);
+-asmlinkage void camellia_xts_enc_16way(struct camellia_ctx *ctx, u8 *dst,
+-                                     const u8 *src, le128 *iv);
++asmlinkage void camellia_xts_enc_16way(const void *ctx, u8 *dst, const u8 *src,
++                                     le128 *iv);
+ EXPORT_SYMBOL_GPL(camellia_xts_enc_16way);
+-asmlinkage void camellia_xts_dec_16way(struct camellia_ctx *ctx, u8 *dst,
+-                                     const u8 *src, le128 *iv);
++asmlinkage void camellia_xts_dec_16way(const void *ctx, u8 *dst, const u8 *src,
++                                     le128 *iv);
+ EXPORT_SYMBOL_GPL(camellia_xts_dec_16way);
+-void camellia_xts_enc(void *ctx, u128 *dst, const u128 *src, le128 *iv)
++void camellia_xts_enc(const void *ctx, u8 *dst, const u8 *src, le128 *iv)
+ {
+-      glue_xts_crypt_128bit_one(ctx, dst, src, iv,
+-                                GLUE_FUNC_CAST(camellia_enc_blk));
++      glue_xts_crypt_128bit_one(ctx, dst, src, iv, camellia_enc_blk);
+ }
+ EXPORT_SYMBOL_GPL(camellia_xts_enc);
+-void camellia_xts_dec(void *ctx, u128 *dst, const u128 *src, le128 *iv)
++void camellia_xts_dec(const void *ctx, u8 *dst, const u8 *src, le128 *iv)
+ {
+-      glue_xts_crypt_128bit_one(ctx, dst, src, iv,
+-                                GLUE_FUNC_CAST(camellia_dec_blk));
++      glue_xts_crypt_128bit_one(ctx, dst, src, iv, camellia_dec_blk);
+ }
+ EXPORT_SYMBOL_GPL(camellia_xts_dec);
+@@ -62,13 +57,13 @@ static const struct common_glue_ctx came
+       .funcs = { {
+               .num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS,
+-              .fn_u = { .ecb = GLUE_FUNC_CAST(camellia_ecb_enc_16way) }
++              .fn_u = { .ecb = camellia_ecb_enc_16way }
+       }, {
+               .num_blocks = 2,
+-              .fn_u = { .ecb = GLUE_FUNC_CAST(camellia_enc_blk_2way) }
++              .fn_u = { .ecb = camellia_enc_blk_2way }
+       }, {
+               .num_blocks = 1,
+-              .fn_u = { .ecb = GLUE_FUNC_CAST(camellia_enc_blk) }
++              .fn_u = { .ecb = camellia_enc_blk }
+       } }
+ };
+@@ -78,13 +73,13 @@ static const struct common_glue_ctx came
+       .funcs = { {
+               .num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS,
+-              .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(camellia_ctr_16way) }
++              .fn_u = { .ctr = camellia_ctr_16way }
+       }, {
+               .num_blocks = 2,
+-              .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(camellia_crypt_ctr_2way) }
++              .fn_u = { .ctr = camellia_crypt_ctr_2way }
+       }, {
+               .num_blocks = 1,
+-              .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(camellia_crypt_ctr) }
++              .fn_u = { .ctr = camellia_crypt_ctr }
+       } }
+ };
+@@ -94,10 +89,10 @@ static const struct common_glue_ctx came
+       .funcs = { {
+               .num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS,
+-              .fn_u = { .xts = GLUE_XTS_FUNC_CAST(camellia_xts_enc_16way) }
++              .fn_u = { .xts = camellia_xts_enc_16way }
+       }, {
+               .num_blocks = 1,
+-              .fn_u = { .xts = GLUE_XTS_FUNC_CAST(camellia_xts_enc) }
++              .fn_u = { .xts = camellia_xts_enc }
+       } }
+ };
+@@ -107,13 +102,13 @@ static const struct common_glue_ctx came
+       .funcs = { {
+               .num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS,
+-              .fn_u = { .ecb = GLUE_FUNC_CAST(camellia_ecb_dec_16way) }
++              .fn_u = { .ecb = camellia_ecb_dec_16way }
+       }, {
+               .num_blocks = 2,
+-              .fn_u = { .ecb = GLUE_FUNC_CAST(camellia_dec_blk_2way) }
++              .fn_u = { .ecb = camellia_dec_blk_2way }
+       }, {
+               .num_blocks = 1,
+-              .fn_u = { .ecb = GLUE_FUNC_CAST(camellia_dec_blk) }
++              .fn_u = { .ecb = camellia_dec_blk }
+       } }
+ };
+@@ -123,13 +118,13 @@ static const struct common_glue_ctx came
+       .funcs = { {
+               .num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS,
+-              .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(camellia_cbc_dec_16way) }
++              .fn_u = { .cbc = camellia_cbc_dec_16way }
+       }, {
+               .num_blocks = 2,
+-              .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(camellia_decrypt_cbc_2way) }
++              .fn_u = { .cbc = camellia_decrypt_cbc_2way }
+       }, {
+               .num_blocks = 1,
+-              .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(camellia_dec_blk) }
++              .fn_u = { .cbc = camellia_dec_blk }
+       } }
+ };
+@@ -139,10 +134,10 @@ static const struct common_glue_ctx came
+       .funcs = { {
+               .num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS,
+-              .fn_u = { .xts = GLUE_XTS_FUNC_CAST(camellia_xts_dec_16way) }
++              .fn_u = { .xts = camellia_xts_dec_16way }
+       }, {
+               .num_blocks = 1,
+-              .fn_u = { .xts = GLUE_XTS_FUNC_CAST(camellia_xts_dec) }
++              .fn_u = { .xts = camellia_xts_dec }
+       } }
+ };
+@@ -165,8 +160,7 @@ static int ecb_decrypt(struct skcipher_r
+ static int cbc_encrypt(struct skcipher_request *req)
+ {
+-      return glue_cbc_encrypt_req_128bit(GLUE_FUNC_CAST(camellia_enc_blk),
+-                                         req);
++      return glue_cbc_encrypt_req_128bit(camellia_enc_blk, req);
+ }
+ static int cbc_decrypt(struct skcipher_request *req)
+@@ -206,8 +200,7 @@ static int xts_encrypt(struct skcipher_r
+       struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+       struct camellia_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
+-      return glue_xts_req_128bit(&camellia_enc_xts, req,
+-                                 XTS_TWEAK_CAST(camellia_enc_blk),
++      return glue_xts_req_128bit(&camellia_enc_xts, req, camellia_enc_blk,
+                                  &ctx->tweak_ctx, &ctx->crypt_ctx, false);
+ }
+@@ -216,8 +209,7 @@ static int xts_decrypt(struct skcipher_r
+       struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+       struct camellia_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
+-      return glue_xts_req_128bit(&camellia_dec_xts, req,
+-                                 XTS_TWEAK_CAST(camellia_enc_blk),
++      return glue_xts_req_128bit(&camellia_dec_xts, req, camellia_enc_blk,
+                                  &ctx->tweak_ctx, &ctx->crypt_ctx, true);
+ }
+--- a/arch/x86/crypto/camellia_glue.c
++++ b/arch/x86/crypto/camellia_glue.c
+@@ -18,19 +18,17 @@
+ #include <asm/crypto/glue_helper.h>
+ /* regular block cipher functions */
+-asmlinkage void __camellia_enc_blk(struct camellia_ctx *ctx, u8 *dst,
+-                                 const u8 *src, bool xor);
++asmlinkage void __camellia_enc_blk(const void *ctx, u8 *dst, const u8 *src,
++                                 bool xor);
+ EXPORT_SYMBOL_GPL(__camellia_enc_blk);
+-asmlinkage void camellia_dec_blk(struct camellia_ctx *ctx, u8 *dst,
+-                               const u8 *src);
++asmlinkage void camellia_dec_blk(const void *ctx, u8 *dst, const u8 *src);
+ EXPORT_SYMBOL_GPL(camellia_dec_blk);
+ /* 2-way parallel cipher functions */
+-asmlinkage void __camellia_enc_blk_2way(struct camellia_ctx *ctx, u8 *dst,
+-                                      const u8 *src, bool xor);
++asmlinkage void __camellia_enc_blk_2way(const void *ctx, u8 *dst, const u8 *src,
++                                      bool xor);
+ EXPORT_SYMBOL_GPL(__camellia_enc_blk_2way);
+-asmlinkage void camellia_dec_blk_2way(struct camellia_ctx *ctx, u8 *dst,
+-                                    const u8 *src);
++asmlinkage void camellia_dec_blk_2way(const void *ctx, u8 *dst, const u8 *src);
+ EXPORT_SYMBOL_GPL(camellia_dec_blk_2way);
+ static void camellia_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
+@@ -1267,8 +1265,10 @@ static int camellia_setkey_skcipher(stru
+       return camellia_setkey(&tfm->base, key, key_len);
+ }
+-void camellia_decrypt_cbc_2way(void *ctx, u128 *dst, const u128 *src)
++void camellia_decrypt_cbc_2way(const void *ctx, u8 *d, const u8 *s)
+ {
++      u128 *dst = (u128 *)d;
++      const u128 *src = (const u128 *)s;
+       u128 iv = *src;
+       camellia_dec_blk_2way(ctx, (u8 *)dst, (u8 *)src);
+@@ -1277,9 +1277,11 @@ void camellia_decrypt_cbc_2way(void *ctx
+ }
+ EXPORT_SYMBOL_GPL(camellia_decrypt_cbc_2way);
+-void camellia_crypt_ctr(void *ctx, u128 *dst, const u128 *src, le128 *iv)
++void camellia_crypt_ctr(const void *ctx, u8 *d, const u8 *s, le128 *iv)
+ {
+       be128 ctrblk;
++      u128 *dst = (u128 *)d;
++      const u128 *src = (const u128 *)s;
+       if (dst != src)
+               *dst = *src;
+@@ -1291,9 +1293,11 @@ void camellia_crypt_ctr(void *ctx, u128
+ }
+ EXPORT_SYMBOL_GPL(camellia_crypt_ctr);
+-void camellia_crypt_ctr_2way(void *ctx, u128 *dst, const u128 *src, le128 *iv)
++void camellia_crypt_ctr_2way(const void *ctx, u8 *d, const u8 *s, le128 *iv)
+ {
+       be128 ctrblks[2];
++      u128 *dst = (u128 *)d;
++      const u128 *src = (const u128 *)s;
+       if (dst != src) {
+               dst[0] = src[0];
+@@ -1315,10 +1319,10 @@ static const struct common_glue_ctx came
+       .funcs = { {
+               .num_blocks = 2,
+-              .fn_u = { .ecb = GLUE_FUNC_CAST(camellia_enc_blk_2way) }
++              .fn_u = { .ecb = camellia_enc_blk_2way }
+       }, {
+               .num_blocks = 1,
+-              .fn_u = { .ecb = GLUE_FUNC_CAST(camellia_enc_blk) }
++              .fn_u = { .ecb = camellia_enc_blk }
+       } }
+ };
+@@ -1328,10 +1332,10 @@ static const struct common_glue_ctx came
+       .funcs = { {
+               .num_blocks = 2,
+-              .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(camellia_crypt_ctr_2way) }
++              .fn_u = { .ctr = camellia_crypt_ctr_2way }
+       }, {
+               .num_blocks = 1,
+-              .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(camellia_crypt_ctr) }
++              .fn_u = { .ctr = camellia_crypt_ctr }
+       } }
+ };
+@@ -1341,10 +1345,10 @@ static const struct common_glue_ctx came
+       .funcs = { {
+               .num_blocks = 2,
+-              .fn_u = { .ecb = GLUE_FUNC_CAST(camellia_dec_blk_2way) }
++              .fn_u = { .ecb = camellia_dec_blk_2way }
+       }, {
+               .num_blocks = 1,
+-              .fn_u = { .ecb = GLUE_FUNC_CAST(camellia_dec_blk) }
++              .fn_u = { .ecb = camellia_dec_blk }
+       } }
+ };
+@@ -1354,10 +1358,10 @@ static const struct common_glue_ctx came
+       .funcs = { {
+               .num_blocks = 2,
+-              .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(camellia_decrypt_cbc_2way) }
++              .fn_u = { .cbc = camellia_decrypt_cbc_2way }
+       }, {
+               .num_blocks = 1,
+-              .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(camellia_dec_blk) }
++              .fn_u = { .cbc = camellia_dec_blk }
+       } }
+ };
+@@ -1373,8 +1377,7 @@ static int ecb_decrypt(struct skcipher_r
+ static int cbc_encrypt(struct skcipher_request *req)
+ {
+-      return glue_cbc_encrypt_req_128bit(GLUE_FUNC_CAST(camellia_enc_blk),
+-                                         req);
++      return glue_cbc_encrypt_req_128bit(camellia_enc_blk, req);
+ }
+ static int cbc_decrypt(struct skcipher_request *req)
+--- a/arch/x86/crypto/cast6_avx_glue.c
++++ b/arch/x86/crypto/cast6_avx_glue.c
+@@ -20,20 +20,17 @@
+ #define CAST6_PARALLEL_BLOCKS 8
+-asmlinkage void cast6_ecb_enc_8way(struct cast6_ctx *ctx, u8 *dst,
+-                                 const u8 *src);
+-asmlinkage void cast6_ecb_dec_8way(struct cast6_ctx *ctx, u8 *dst,
+-                                 const u8 *src);
+-
+-asmlinkage void cast6_cbc_dec_8way(struct cast6_ctx *ctx, u8 *dst,
+-                                 const u8 *src);
+-asmlinkage void cast6_ctr_8way(struct cast6_ctx *ctx, u8 *dst, const u8 *src,
++asmlinkage void cast6_ecb_enc_8way(const void *ctx, u8 *dst, const u8 *src);
++asmlinkage void cast6_ecb_dec_8way(const void *ctx, u8 *dst, const u8 *src);
++
++asmlinkage void cast6_cbc_dec_8way(const void *ctx, u8 *dst, const u8 *src);
++asmlinkage void cast6_ctr_8way(const void *ctx, u8 *dst, const u8 *src,
+                              le128 *iv);
+-asmlinkage void cast6_xts_enc_8way(struct cast6_ctx *ctx, u8 *dst,
+-                                 const u8 *src, le128 *iv);
+-asmlinkage void cast6_xts_dec_8way(struct cast6_ctx *ctx, u8 *dst,
+-                                 const u8 *src, le128 *iv);
++asmlinkage void cast6_xts_enc_8way(const void *ctx, u8 *dst, const u8 *src,
++                                 le128 *iv);
++asmlinkage void cast6_xts_dec_8way(const void *ctx, u8 *dst, const u8 *src,
++                                 le128 *iv);
+ static int cast6_setkey_skcipher(struct crypto_skcipher *tfm,
+                                const u8 *key, unsigned int keylen)
+@@ -41,21 +38,21 @@ static int cast6_setkey_skcipher(struct
+       return cast6_setkey(&tfm->base, key, keylen);
+ }
+-static void cast6_xts_enc(void *ctx, u128 *dst, const u128 *src, le128 *iv)
++static void cast6_xts_enc(const void *ctx, u8 *dst, const u8 *src, le128 *iv)
+ {
+-      glue_xts_crypt_128bit_one(ctx, dst, src, iv,
+-                                GLUE_FUNC_CAST(__cast6_encrypt));
++      glue_xts_crypt_128bit_one(ctx, dst, src, iv, __cast6_encrypt);
+ }
+-static void cast6_xts_dec(void *ctx, u128 *dst, const u128 *src, le128 *iv)
++static void cast6_xts_dec(const void *ctx, u8 *dst, const u8 *src, le128 *iv)
+ {
+-      glue_xts_crypt_128bit_one(ctx, dst, src, iv,
+-                                GLUE_FUNC_CAST(__cast6_decrypt));
++      glue_xts_crypt_128bit_one(ctx, dst, src, iv, __cast6_decrypt);
+ }
+-static void cast6_crypt_ctr(void *ctx, u128 *dst, const u128 *src, le128 *iv)
++static void cast6_crypt_ctr(const void *ctx, u8 *d, const u8 *s, le128 *iv)
+ {
+       be128 ctrblk;
++      u128 *dst = (u128 *)d;
++      const u128 *src = (const u128 *)s;
+       le128_to_be128(&ctrblk, iv);
+       le128_inc(iv);
+@@ -70,10 +67,10 @@ static const struct common_glue_ctx cast
+       .funcs = { {
+               .num_blocks = CAST6_PARALLEL_BLOCKS,
+-              .fn_u = { .ecb = GLUE_FUNC_CAST(cast6_ecb_enc_8way) }
++              .fn_u = { .ecb = cast6_ecb_enc_8way }
+       }, {
+               .num_blocks = 1,
+-              .fn_u = { .ecb = GLUE_FUNC_CAST(__cast6_encrypt) }
++              .fn_u = { .ecb = __cast6_encrypt }
+       } }
+ };
+@@ -83,10 +80,10 @@ static const struct common_glue_ctx cast
+       .funcs = { {
+               .num_blocks = CAST6_PARALLEL_BLOCKS,
+-              .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(cast6_ctr_8way) }
++              .fn_u = { .ctr = cast6_ctr_8way }
+       }, {
+               .num_blocks = 1,
+-              .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(cast6_crypt_ctr) }
++              .fn_u = { .ctr = cast6_crypt_ctr }
+       } }
+ };
+@@ -96,10 +93,10 @@ static const struct common_glue_ctx cast
+       .funcs = { {
+               .num_blocks = CAST6_PARALLEL_BLOCKS,
+-              .fn_u = { .xts = GLUE_XTS_FUNC_CAST(cast6_xts_enc_8way) }
++              .fn_u = { .xts = cast6_xts_enc_8way }
+       }, {
+               .num_blocks = 1,
+-              .fn_u = { .xts = GLUE_XTS_FUNC_CAST(cast6_xts_enc) }
++              .fn_u = { .xts = cast6_xts_enc }
+       } }
+ };
+@@ -109,10 +106,10 @@ static const struct common_glue_ctx cast
+       .funcs = { {
+               .num_blocks = CAST6_PARALLEL_BLOCKS,
+-              .fn_u = { .ecb = GLUE_FUNC_CAST(cast6_ecb_dec_8way) }
++              .fn_u = { .ecb = cast6_ecb_dec_8way }
+       }, {
+               .num_blocks = 1,
+-              .fn_u = { .ecb = GLUE_FUNC_CAST(__cast6_decrypt) }
++              .fn_u = { .ecb = __cast6_decrypt }
+       } }
+ };
+@@ -122,10 +119,10 @@ static const struct common_glue_ctx cast
+       .funcs = { {
+               .num_blocks = CAST6_PARALLEL_BLOCKS,
+-              .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(cast6_cbc_dec_8way) }
++              .fn_u = { .cbc = cast6_cbc_dec_8way }
+       }, {
+               .num_blocks = 1,
+-              .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(__cast6_decrypt) }
++              .fn_u = { .cbc = __cast6_decrypt }
+       } }
+ };
+@@ -135,10 +132,10 @@ static const struct common_glue_ctx cast
+       .funcs = { {
+               .num_blocks = CAST6_PARALLEL_BLOCKS,
+-              .fn_u = { .xts = GLUE_XTS_FUNC_CAST(cast6_xts_dec_8way) }
++              .fn_u = { .xts = cast6_xts_dec_8way }
+       }, {
+               .num_blocks = 1,
+-              .fn_u = { .xts = GLUE_XTS_FUNC_CAST(cast6_xts_dec) }
++              .fn_u = { .xts = cast6_xts_dec }
+       } }
+ };
+@@ -154,8 +151,7 @@ static int ecb_decrypt(struct skcipher_r
+ static int cbc_encrypt(struct skcipher_request *req)
+ {
+-      return glue_cbc_encrypt_req_128bit(GLUE_FUNC_CAST(__cast6_encrypt),
+-                                         req);
++      return glue_cbc_encrypt_req_128bit(__cast6_encrypt, req);
+ }
+ static int cbc_decrypt(struct skcipher_request *req)
+@@ -199,8 +195,7 @@ static int xts_encrypt(struct skcipher_r
+       struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+       struct cast6_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
+-      return glue_xts_req_128bit(&cast6_enc_xts, req,
+-                                 XTS_TWEAK_CAST(__cast6_encrypt),
++      return glue_xts_req_128bit(&cast6_enc_xts, req, __cast6_encrypt,
+                                  &ctx->tweak_ctx, &ctx->crypt_ctx, false);
+ }
+@@ -209,8 +204,7 @@ static int xts_decrypt(struct skcipher_r
+       struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+       struct cast6_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
+-      return glue_xts_req_128bit(&cast6_dec_xts, req,
+-                                 XTS_TWEAK_CAST(__cast6_encrypt),
++      return glue_xts_req_128bit(&cast6_dec_xts, req, __cast6_encrypt,
+                                  &ctx->tweak_ctx, &ctx->crypt_ctx, true);
+ }
+--- a/arch/x86/crypto/glue_helper.c
++++ b/arch/x86/crypto/glue_helper.c
+@@ -134,7 +134,8 @@ int glue_cbc_decrypt_req_128bit(const st
+                               src -= num_blocks - 1;
+                               dst -= num_blocks - 1;
+-                              gctx->funcs[i].fn_u.cbc(ctx, dst, src);
++                              gctx->funcs[i].fn_u.cbc(ctx, (u8 *)dst,
++                                                      (const u8 *)src);
+                               nbytes -= func_bytes;
+                               if (nbytes < bsize)
+@@ -188,7 +189,9 @@ int glue_ctr_req_128bit(const struct com
+                       /* Process multi-block batch */
+                       do {
+-                              gctx->funcs[i].fn_u.ctr(ctx, dst, src, &ctrblk);
++                              gctx->funcs[i].fn_u.ctr(ctx, (u8 *)dst,
++                                                      (const u8 *)src,
++                                                      &ctrblk);
+                               src += num_blocks;
+                               dst += num_blocks;
+                               nbytes -= func_bytes;
+@@ -210,7 +213,8 @@ int glue_ctr_req_128bit(const struct com
+               be128_to_le128(&ctrblk, (be128 *)walk.iv);
+               memcpy(&tmp, walk.src.virt.addr, nbytes);
+-              gctx->funcs[gctx->num_funcs - 1].fn_u.ctr(ctx, &tmp, &tmp,
++              gctx->funcs[gctx->num_funcs - 1].fn_u.ctr(ctx, (u8 *)&tmp,
++                                                        (const u8 *)&tmp,
+                                                         &ctrblk);
+               memcpy(walk.dst.virt.addr, &tmp, nbytes);
+               le128_to_be128((be128 *)walk.iv, &ctrblk);
+@@ -240,7 +244,8 @@ static unsigned int __glue_xts_req_128bi
+               if (nbytes >= func_bytes) {
+                       do {
+-                              gctx->funcs[i].fn_u.xts(ctx, dst, src,
++                              gctx->funcs[i].fn_u.xts(ctx, (u8 *)dst,
++                                                      (const u8 *)src,
+                                                       walk->iv);
+                               src += num_blocks;
+@@ -354,8 +359,8 @@ out:
+ }
+ EXPORT_SYMBOL_GPL(glue_xts_req_128bit);
+-void glue_xts_crypt_128bit_one(void *ctx, u128 *dst, const u128 *src, le128 *iv,
+-                             common_glue_func_t fn)
++void glue_xts_crypt_128bit_one(const void *ctx, u8 *dst, const u8 *src,
++                             le128 *iv, common_glue_func_t fn)
+ {
+       le128 ivblk = *iv;
+@@ -363,13 +368,13 @@ void glue_xts_crypt_128bit_one(void *ctx
+       gf128mul_x_ble(iv, &ivblk);
+       /* CC <- T xor C */
+-      u128_xor(dst, src, (u128 *)&ivblk);
++      u128_xor((u128 *)dst, (const u128 *)src, (u128 *)&ivblk);
+       /* PP <- D(Key2,CC) */
+-      fn(ctx, (u8 *)dst, (u8 *)dst);
++      fn(ctx, dst, dst);
+       /* P <- T xor PP */
+-      u128_xor(dst, dst, (u128 *)&ivblk);
++      u128_xor((u128 *)dst, (u128 *)dst, (u128 *)&ivblk);
+ }
+ EXPORT_SYMBOL_GPL(glue_xts_crypt_128bit_one);
+--- a/arch/x86/crypto/serpent_avx2_glue.c
++++ b/arch/x86/crypto/serpent_avx2_glue.c
+@@ -19,18 +19,16 @@
+ #define SERPENT_AVX2_PARALLEL_BLOCKS 16
+ /* 16-way AVX2 parallel cipher functions */
+-asmlinkage void serpent_ecb_enc_16way(struct serpent_ctx *ctx, u8 *dst,
+-                                    const u8 *src);
+-asmlinkage void serpent_ecb_dec_16way(struct serpent_ctx *ctx, u8 *dst,
+-                                    const u8 *src);
+-asmlinkage void serpent_cbc_dec_16way(void *ctx, u128 *dst, const u128 *src);
++asmlinkage void serpent_ecb_enc_16way(const void *ctx, u8 *dst, const u8 *src);
++asmlinkage void serpent_ecb_dec_16way(const void *ctx, u8 *dst, const u8 *src);
++asmlinkage void serpent_cbc_dec_16way(const void *ctx, u8 *dst, const u8 *src);
+-asmlinkage void serpent_ctr_16way(void *ctx, u128 *dst, const u128 *src,
++asmlinkage void serpent_ctr_16way(const void *ctx, u8 *dst, const u8 *src,
+                                 le128 *iv);
+-asmlinkage void serpent_xts_enc_16way(struct serpent_ctx *ctx, u8 *dst,
+-                                    const u8 *src, le128 *iv);
+-asmlinkage void serpent_xts_dec_16way(struct serpent_ctx *ctx, u8 *dst,
+-                                    const u8 *src, le128 *iv);
++asmlinkage void serpent_xts_enc_16way(const void *ctx, u8 *dst, const u8 *src,
++                                    le128 *iv);
++asmlinkage void serpent_xts_dec_16way(const void *ctx, u8 *dst, const u8 *src,
++                                    le128 *iv);
+ static int serpent_setkey_skcipher(struct crypto_skcipher *tfm,
+                                  const u8 *key, unsigned int keylen)
+@@ -44,13 +42,13 @@ static const struct common_glue_ctx serp
+       .funcs = { {
+               .num_blocks = 16,
+-              .fn_u = { .ecb = GLUE_FUNC_CAST(serpent_ecb_enc_16way) }
++              .fn_u = { .ecb = serpent_ecb_enc_16way }
+       }, {
+               .num_blocks = 8,
+-              .fn_u = { .ecb = GLUE_FUNC_CAST(serpent_ecb_enc_8way_avx) }
++              .fn_u = { .ecb = serpent_ecb_enc_8way_avx }
+       }, {
+               .num_blocks = 1,
+-              .fn_u = { .ecb = GLUE_FUNC_CAST(__serpent_encrypt) }
++              .fn_u = { .ecb = __serpent_encrypt }
+       } }
+ };
+@@ -60,13 +58,13 @@ static const struct common_glue_ctx serp
+       .funcs = { {
+               .num_blocks = 16,
+-              .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(serpent_ctr_16way) }
++              .fn_u = { .ctr = serpent_ctr_16way }
+       },  {
+               .num_blocks = 8,
+-              .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(serpent_ctr_8way_avx) }
++              .fn_u = { .ctr = serpent_ctr_8way_avx }
+       }, {
+               .num_blocks = 1,
+-              .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(__serpent_crypt_ctr) }
++              .fn_u = { .ctr = __serpent_crypt_ctr }
+       } }
+ };
+@@ -76,13 +74,13 @@ static const struct common_glue_ctx serp
+       .funcs = { {
+               .num_blocks = 16,
+-              .fn_u = { .xts = GLUE_XTS_FUNC_CAST(serpent_xts_enc_16way) }
++              .fn_u = { .xts = serpent_xts_enc_16way }
+       }, {
+               .num_blocks = 8,
+-              .fn_u = { .xts = GLUE_XTS_FUNC_CAST(serpent_xts_enc_8way_avx) }
++              .fn_u = { .xts = serpent_xts_enc_8way_avx }
+       }, {
+               .num_blocks = 1,
+-              .fn_u = { .xts = GLUE_XTS_FUNC_CAST(serpent_xts_enc) }
++              .fn_u = { .xts = serpent_xts_enc }
+       } }
+ };
+@@ -92,13 +90,13 @@ static const struct common_glue_ctx serp
+       .funcs = { {
+               .num_blocks = 16,
+-              .fn_u = { .ecb = GLUE_FUNC_CAST(serpent_ecb_dec_16way) }
++              .fn_u = { .ecb = serpent_ecb_dec_16way }
+       }, {
+               .num_blocks = 8,
+-              .fn_u = { .ecb = GLUE_FUNC_CAST(serpent_ecb_dec_8way_avx) }
++              .fn_u = { .ecb = serpent_ecb_dec_8way_avx }
+       }, {
+               .num_blocks = 1,
+-              .fn_u = { .ecb = GLUE_FUNC_CAST(__serpent_decrypt) }
++              .fn_u = { .ecb = __serpent_decrypt }
+       } }
+ };
+@@ -108,13 +106,13 @@ static const struct common_glue_ctx serp
+       .funcs = { {
+               .num_blocks = 16,
+-              .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(serpent_cbc_dec_16way) }
++              .fn_u = { .cbc = serpent_cbc_dec_16way }
+       }, {
+               .num_blocks = 8,
+-              .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(serpent_cbc_dec_8way_avx) }
++              .fn_u = { .cbc = serpent_cbc_dec_8way_avx }
+       }, {
+               .num_blocks = 1,
+-              .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(__serpent_decrypt) }
++              .fn_u = { .cbc = __serpent_decrypt }
+       } }
+ };
+@@ -124,13 +122,13 @@ static const struct common_glue_ctx serp
+       .funcs = { {
+               .num_blocks = 16,
+-              .fn_u = { .xts = GLUE_XTS_FUNC_CAST(serpent_xts_dec_16way) }
++              .fn_u = { .xts = serpent_xts_dec_16way }
+       }, {
+               .num_blocks = 8,
+-              .fn_u = { .xts = GLUE_XTS_FUNC_CAST(serpent_xts_dec_8way_avx) }
++              .fn_u = { .xts = serpent_xts_dec_8way_avx }
+       }, {
+               .num_blocks = 1,
+-              .fn_u = { .xts = GLUE_XTS_FUNC_CAST(serpent_xts_dec) }
++              .fn_u = { .xts = serpent_xts_dec }
+       } }
+ };
+@@ -146,8 +144,7 @@ static int ecb_decrypt(struct skcipher_r
+ static int cbc_encrypt(struct skcipher_request *req)
+ {
+-      return glue_cbc_encrypt_req_128bit(GLUE_FUNC_CAST(__serpent_encrypt),
+-                                         req);
++      return glue_cbc_encrypt_req_128bit(__serpent_encrypt, req);
+ }
+ static int cbc_decrypt(struct skcipher_request *req)
+@@ -166,8 +163,8 @@ static int xts_encrypt(struct skcipher_r
+       struct serpent_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
+       return glue_xts_req_128bit(&serpent_enc_xts, req,
+-                                 XTS_TWEAK_CAST(__serpent_encrypt),
+-                                 &ctx->tweak_ctx, &ctx->crypt_ctx, false);
++                                 __serpent_encrypt, &ctx->tweak_ctx,
++                                 &ctx->crypt_ctx, false);
+ }
+ static int xts_decrypt(struct skcipher_request *req)
+@@ -176,8 +173,8 @@ static int xts_decrypt(struct skcipher_r
+       struct serpent_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
+       return glue_xts_req_128bit(&serpent_dec_xts, req,
+-                                 XTS_TWEAK_CAST(__serpent_encrypt),
+-                                 &ctx->tweak_ctx, &ctx->crypt_ctx, true);
++                                 __serpent_encrypt, &ctx->tweak_ctx,
++                                 &ctx->crypt_ctx, true);
+ }
+ static struct skcipher_alg serpent_algs[] = {
+--- a/arch/x86/crypto/serpent_avx_glue.c
++++ b/arch/x86/crypto/serpent_avx_glue.c
+@@ -20,33 +20,35 @@
+ #include <asm/crypto/serpent-avx.h>
+ /* 8-way parallel cipher functions */
+-asmlinkage void serpent_ecb_enc_8way_avx(struct serpent_ctx *ctx, u8 *dst,
++asmlinkage void serpent_ecb_enc_8way_avx(const void *ctx, u8 *dst,
+                                        const u8 *src);
+ EXPORT_SYMBOL_GPL(serpent_ecb_enc_8way_avx);
+-asmlinkage void serpent_ecb_dec_8way_avx(struct serpent_ctx *ctx, u8 *dst,
++asmlinkage void serpent_ecb_dec_8way_avx(const void *ctx, u8 *dst,
+                                        const u8 *src);
+ EXPORT_SYMBOL_GPL(serpent_ecb_dec_8way_avx);
+-asmlinkage void serpent_cbc_dec_8way_avx(struct serpent_ctx *ctx, u8 *dst,
++asmlinkage void serpent_cbc_dec_8way_avx(const void *ctx, u8 *dst,
+                                        const u8 *src);
+ EXPORT_SYMBOL_GPL(serpent_cbc_dec_8way_avx);
+-asmlinkage void serpent_ctr_8way_avx(struct serpent_ctx *ctx, u8 *dst,
+-                                   const u8 *src, le128 *iv);
++asmlinkage void serpent_ctr_8way_avx(const void *ctx, u8 *dst, const u8 *src,
++                                   le128 *iv);
+ EXPORT_SYMBOL_GPL(serpent_ctr_8way_avx);
+-asmlinkage void serpent_xts_enc_8way_avx(struct serpent_ctx *ctx, u8 *dst,
++asmlinkage void serpent_xts_enc_8way_avx(const void *ctx, u8 *dst,
+                                        const u8 *src, le128 *iv);
+ EXPORT_SYMBOL_GPL(serpent_xts_enc_8way_avx);
+-asmlinkage void serpent_xts_dec_8way_avx(struct serpent_ctx *ctx, u8 *dst,
++asmlinkage void serpent_xts_dec_8way_avx(const void *ctx, u8 *dst,
+                                        const u8 *src, le128 *iv);
+ EXPORT_SYMBOL_GPL(serpent_xts_dec_8way_avx);
+-void __serpent_crypt_ctr(void *ctx, u128 *dst, const u128 *src, le128 *iv)
++void __serpent_crypt_ctr(const void *ctx, u8 *d, const u8 *s, le128 *iv)
+ {
+       be128 ctrblk;
++      u128 *dst = (u128 *)d;
++      const u128 *src = (const u128 *)s;
+       le128_to_be128(&ctrblk, iv);
+       le128_inc(iv);
+@@ -56,17 +58,15 @@ void __serpent_crypt_ctr(void *ctx, u128
+ }
+ EXPORT_SYMBOL_GPL(__serpent_crypt_ctr);
+-void serpent_xts_enc(void *ctx, u128 *dst, const u128 *src, le128 *iv)
++void serpent_xts_enc(const void *ctx, u8 *dst, const u8 *src, le128 *iv)
+ {
+-      glue_xts_crypt_128bit_one(ctx, dst, src, iv,
+-                                GLUE_FUNC_CAST(__serpent_encrypt));
++      glue_xts_crypt_128bit_one(ctx, dst, src, iv, __serpent_encrypt);
+ }
+ EXPORT_SYMBOL_GPL(serpent_xts_enc);
+-void serpent_xts_dec(void *ctx, u128 *dst, const u128 *src, le128 *iv)
++void serpent_xts_dec(const void *ctx, u8 *dst, const u8 *src, le128 *iv)
+ {
+-      glue_xts_crypt_128bit_one(ctx, dst, src, iv,
+-                                GLUE_FUNC_CAST(__serpent_decrypt));
++      glue_xts_crypt_128bit_one(ctx, dst, src, iv, __serpent_decrypt);
+ }
+ EXPORT_SYMBOL_GPL(serpent_xts_dec);
+@@ -102,10 +102,10 @@ static const struct common_glue_ctx serp
+       .funcs = { {
+               .num_blocks = SERPENT_PARALLEL_BLOCKS,
+-              .fn_u = { .ecb = GLUE_FUNC_CAST(serpent_ecb_enc_8way_avx) }
++              .fn_u = { .ecb = serpent_ecb_enc_8way_avx }
+       }, {
+               .num_blocks = 1,
+-              .fn_u = { .ecb = GLUE_FUNC_CAST(__serpent_encrypt) }
++              .fn_u = { .ecb = __serpent_encrypt }
+       } }
+ };
+@@ -115,10 +115,10 @@ static const struct common_glue_ctx serp
+       .funcs = { {
+               .num_blocks = SERPENT_PARALLEL_BLOCKS,
+-              .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(serpent_ctr_8way_avx) }
++              .fn_u = { .ctr = serpent_ctr_8way_avx }
+       }, {
+               .num_blocks = 1,
+-              .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(__serpent_crypt_ctr) }
++              .fn_u = { .ctr = __serpent_crypt_ctr }
+       } }
+ };
+@@ -128,10 +128,10 @@ static const struct common_glue_ctx serp
+       .funcs = { {
+               .num_blocks = SERPENT_PARALLEL_BLOCKS,
+-              .fn_u = { .xts = GLUE_XTS_FUNC_CAST(serpent_xts_enc_8way_avx) }
++              .fn_u = { .xts = serpent_xts_enc_8way_avx }
+       }, {
+               .num_blocks = 1,
+-              .fn_u = { .xts = GLUE_XTS_FUNC_CAST(serpent_xts_enc) }
++              .fn_u = { .xts = serpent_xts_enc }
+       } }
+ };
+@@ -141,10 +141,10 @@ static const struct common_glue_ctx serp
+       .funcs = { {
+               .num_blocks = SERPENT_PARALLEL_BLOCKS,
+-              .fn_u = { .ecb = GLUE_FUNC_CAST(serpent_ecb_dec_8way_avx) }
++              .fn_u = { .ecb = serpent_ecb_dec_8way_avx }
+       }, {
+               .num_blocks = 1,
+-              .fn_u = { .ecb = GLUE_FUNC_CAST(__serpent_decrypt) }
++              .fn_u = { .ecb = __serpent_decrypt }
+       } }
+ };
+@@ -154,10 +154,10 @@ static const struct common_glue_ctx serp
+       .funcs = { {
+               .num_blocks = SERPENT_PARALLEL_BLOCKS,
+-              .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(serpent_cbc_dec_8way_avx) }
++              .fn_u = { .cbc = serpent_cbc_dec_8way_avx }
+       }, {
+               .num_blocks = 1,
+-              .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(__serpent_decrypt) }
++              .fn_u = { .cbc = __serpent_decrypt }
+       } }
+ };
+@@ -167,10 +167,10 @@ static const struct common_glue_ctx serp
+       .funcs = { {
+               .num_blocks = SERPENT_PARALLEL_BLOCKS,
+-              .fn_u = { .xts = GLUE_XTS_FUNC_CAST(serpent_xts_dec_8way_avx) }
++              .fn_u = { .xts = serpent_xts_dec_8way_avx }
+       }, {
+               .num_blocks = 1,
+-              .fn_u = { .xts = GLUE_XTS_FUNC_CAST(serpent_xts_dec) }
++              .fn_u = { .xts = serpent_xts_dec }
+       } }
+ };
+@@ -186,8 +186,7 @@ static int ecb_decrypt(struct skcipher_r
+ static int cbc_encrypt(struct skcipher_request *req)
+ {
+-      return glue_cbc_encrypt_req_128bit(GLUE_FUNC_CAST(__serpent_encrypt),
+-                                         req);
++      return glue_cbc_encrypt_req_128bit(__serpent_encrypt, req);
+ }
+ static int cbc_decrypt(struct skcipher_request *req)
+@@ -206,8 +205,8 @@ static int xts_encrypt(struct skcipher_r
+       struct serpent_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
+       return glue_xts_req_128bit(&serpent_enc_xts, req,
+-                                 XTS_TWEAK_CAST(__serpent_encrypt),
+-                                 &ctx->tweak_ctx, &ctx->crypt_ctx, false);
++                                 __serpent_encrypt, &ctx->tweak_ctx,
++                                 &ctx->crypt_ctx, false);
+ }
+ static int xts_decrypt(struct skcipher_request *req)
+@@ -216,8 +215,8 @@ static int xts_decrypt(struct skcipher_r
+       struct serpent_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
+       return glue_xts_req_128bit(&serpent_dec_xts, req,
+-                                 XTS_TWEAK_CAST(__serpent_encrypt),
+-                                 &ctx->tweak_ctx, &ctx->crypt_ctx, true);
++                                 __serpent_encrypt, &ctx->tweak_ctx,
++                                 &ctx->crypt_ctx, true);
+ }
+ static struct skcipher_alg serpent_algs[] = {
+--- a/arch/x86/crypto/serpent_sse2_glue.c
++++ b/arch/x86/crypto/serpent_sse2_glue.c
+@@ -31,9 +31,11 @@ static int serpent_setkey_skcipher(struc
+       return __serpent_setkey(crypto_skcipher_ctx(tfm), key, keylen);
+ }
+-static void serpent_decrypt_cbc_xway(void *ctx, u128 *dst, const u128 *src)
++static void serpent_decrypt_cbc_xway(const void *ctx, u8 *d, const u8 *s)
+ {
+       u128 ivs[SERPENT_PARALLEL_BLOCKS - 1];
++      u128 *dst = (u128 *)d;
++      const u128 *src = (const u128 *)s;
+       unsigned int j;
+       for (j = 0; j < SERPENT_PARALLEL_BLOCKS - 1; j++)
+@@ -45,9 +47,11 @@ static void serpent_decrypt_cbc_xway(voi
+               u128_xor(dst + (j + 1), dst + (j + 1), ivs + j);
+ }
+-static void serpent_crypt_ctr(void *ctx, u128 *dst, const u128 *src, le128 *iv)
++static void serpent_crypt_ctr(const void *ctx, u8 *d, const u8 *s, le128 *iv)
+ {
+       be128 ctrblk;
++      u128 *dst = (u128 *)d;
++      const u128 *src = (const u128 *)s;
+       le128_to_be128(&ctrblk, iv);
+       le128_inc(iv);
+@@ -56,10 +60,12 @@ static void serpent_crypt_ctr(void *ctx,
+       u128_xor(dst, src, (u128 *)&ctrblk);
+ }
+-static void serpent_crypt_ctr_xway(void *ctx, u128 *dst, const u128 *src,
++static void serpent_crypt_ctr_xway(const void *ctx, u8 *d, const u8 *s,
+                                  le128 *iv)
+ {
+       be128 ctrblks[SERPENT_PARALLEL_BLOCKS];
++      u128 *dst = (u128 *)d;
++      const u128 *src = (const u128 *)s;
+       unsigned int i;
+       for (i = 0; i < SERPENT_PARALLEL_BLOCKS; i++) {
+@@ -79,10 +85,10 @@ static const struct common_glue_ctx serp
+       .funcs = { {
+               .num_blocks = SERPENT_PARALLEL_BLOCKS,
+-              .fn_u = { .ecb = GLUE_FUNC_CAST(serpent_enc_blk_xway) }
++              .fn_u = { .ecb = serpent_enc_blk_xway }
+       }, {
+               .num_blocks = 1,
+-              .fn_u = { .ecb = GLUE_FUNC_CAST(__serpent_encrypt) }
++              .fn_u = { .ecb = __serpent_encrypt }
+       } }
+ };
+@@ -92,10 +98,10 @@ static const struct common_glue_ctx serp
+       .funcs = { {
+               .num_blocks = SERPENT_PARALLEL_BLOCKS,
+-              .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(serpent_crypt_ctr_xway) }
++              .fn_u = { .ctr = serpent_crypt_ctr_xway }
+       }, {
+               .num_blocks = 1,
+-              .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(serpent_crypt_ctr) }
++              .fn_u = { .ctr = serpent_crypt_ctr }
+       } }
+ };
+@@ -105,10 +111,10 @@ static const struct common_glue_ctx serp
+       .funcs = { {
+               .num_blocks = SERPENT_PARALLEL_BLOCKS,
+-              .fn_u = { .ecb = GLUE_FUNC_CAST(serpent_dec_blk_xway) }
++              .fn_u = { .ecb = serpent_dec_blk_xway }
+       }, {
+               .num_blocks = 1,
+-              .fn_u = { .ecb = GLUE_FUNC_CAST(__serpent_decrypt) }
++              .fn_u = { .ecb = __serpent_decrypt }
+       } }
+ };
+@@ -118,10 +124,10 @@ static const struct common_glue_ctx serp
+       .funcs = { {
+               .num_blocks = SERPENT_PARALLEL_BLOCKS,
+-              .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(serpent_decrypt_cbc_xway) }
++              .fn_u = { .cbc = serpent_decrypt_cbc_xway }
+       }, {
+               .num_blocks = 1,
+-              .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(__serpent_decrypt) }
++              .fn_u = { .cbc = __serpent_decrypt }
+       } }
+ };
+@@ -137,7 +143,7 @@ static int ecb_decrypt(struct skcipher_r
+ static int cbc_encrypt(struct skcipher_request *req)
+ {
+-      return glue_cbc_encrypt_req_128bit(GLUE_FUNC_CAST(__serpent_encrypt),
++      return glue_cbc_encrypt_req_128bit(__serpent_encrypt,
+                                          req);
+ }
+--- a/arch/x86/crypto/twofish_avx_glue.c
++++ b/arch/x86/crypto/twofish_avx_glue.c
+@@ -22,20 +22,17 @@
+ #define TWOFISH_PARALLEL_BLOCKS 8
+ /* 8-way parallel cipher functions */
+-asmlinkage void twofish_ecb_enc_8way(struct twofish_ctx *ctx, u8 *dst,
+-                                   const u8 *src);
+-asmlinkage void twofish_ecb_dec_8way(struct twofish_ctx *ctx, u8 *dst,
+-                                   const u8 *src);
+-
+-asmlinkage void twofish_cbc_dec_8way(struct twofish_ctx *ctx, u8 *dst,
+-                                   const u8 *src);
+-asmlinkage void twofish_ctr_8way(struct twofish_ctx *ctx, u8 *dst,
+-                               const u8 *src, le128 *iv);
+-
+-asmlinkage void twofish_xts_enc_8way(struct twofish_ctx *ctx, u8 *dst,
+-                                   const u8 *src, le128 *iv);
+-asmlinkage void twofish_xts_dec_8way(struct twofish_ctx *ctx, u8 *dst,
+-                                   const u8 *src, le128 *iv);
++asmlinkage void twofish_ecb_enc_8way(const void *ctx, u8 *dst, const u8 *src);
++asmlinkage void twofish_ecb_dec_8way(const void *ctx, u8 *dst, const u8 *src);
++
++asmlinkage void twofish_cbc_dec_8way(const void *ctx, u8 *dst, const u8 *src);
++asmlinkage void twofish_ctr_8way(const void *ctx, u8 *dst, const u8 *src,
++                               le128 *iv);
++
++asmlinkage void twofish_xts_enc_8way(const void *ctx, u8 *dst, const u8 *src,
++                                   le128 *iv);
++asmlinkage void twofish_xts_dec_8way(const void *ctx, u8 *dst, const u8 *src,
++                                   le128 *iv);
+ static int twofish_setkey_skcipher(struct crypto_skcipher *tfm,
+                                  const u8 *key, unsigned int keylen)
+@@ -43,22 +40,19 @@ static int twofish_setkey_skcipher(struc
+       return twofish_setkey(&tfm->base, key, keylen);
+ }
+-static inline void twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst,
+-                                      const u8 *src)
++static inline void twofish_enc_blk_3way(const void *ctx, u8 *dst, const u8 *src)
+ {
+       __twofish_enc_blk_3way(ctx, dst, src, false);
+ }
+-static void twofish_xts_enc(void *ctx, u128 *dst, const u128 *src, le128 *iv)
++static void twofish_xts_enc(const void *ctx, u8 *dst, const u8 *src, le128 *iv)
+ {
+-      glue_xts_crypt_128bit_one(ctx, dst, src, iv,
+-                                GLUE_FUNC_CAST(twofish_enc_blk));
++      glue_xts_crypt_128bit_one(ctx, dst, src, iv, twofish_enc_blk);
+ }
+-static void twofish_xts_dec(void *ctx, u128 *dst, const u128 *src, le128 *iv)
++static void twofish_xts_dec(const void *ctx, u8 *dst, const u8 *src, le128 *iv)
+ {
+-      glue_xts_crypt_128bit_one(ctx, dst, src, iv,
+-                                GLUE_FUNC_CAST(twofish_dec_blk));
++      glue_xts_crypt_128bit_one(ctx, dst, src, iv, twofish_dec_blk);
+ }
+ struct twofish_xts_ctx {
+@@ -93,13 +87,13 @@ static const struct common_glue_ctx twof
+       .funcs = { {
+               .num_blocks = TWOFISH_PARALLEL_BLOCKS,
+-              .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_ecb_enc_8way) }
++              .fn_u = { .ecb = twofish_ecb_enc_8way }
+       }, {
+               .num_blocks = 3,
+-              .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk_3way) }
++              .fn_u = { .ecb = twofish_enc_blk_3way }
+       }, {
+               .num_blocks = 1,
+-              .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk) }
++              .fn_u = { .ecb = twofish_enc_blk }
+       } }
+ };
+@@ -109,13 +103,13 @@ static const struct common_glue_ctx twof
+       .funcs = { {
+               .num_blocks = TWOFISH_PARALLEL_BLOCKS,
+-              .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(twofish_ctr_8way) }
++              .fn_u = { .ctr = twofish_ctr_8way }
+       }, {
+               .num_blocks = 3,
+-              .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(twofish_enc_blk_ctr_3way) }
++              .fn_u = { .ctr = twofish_enc_blk_ctr_3way }
+       }, {
+               .num_blocks = 1,
+-              .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(twofish_enc_blk_ctr) }
++              .fn_u = { .ctr = twofish_enc_blk_ctr }
+       } }
+ };
+@@ -125,10 +119,10 @@ static const struct common_glue_ctx twof
+       .funcs = { {
+               .num_blocks = TWOFISH_PARALLEL_BLOCKS,
+-              .fn_u = { .xts = GLUE_XTS_FUNC_CAST(twofish_xts_enc_8way) }
++              .fn_u = { .xts = twofish_xts_enc_8way }
+       }, {
+               .num_blocks = 1,
+-              .fn_u = { .xts = GLUE_XTS_FUNC_CAST(twofish_xts_enc) }
++              .fn_u = { .xts = twofish_xts_enc }
+       } }
+ };
+@@ -138,13 +132,13 @@ static const struct common_glue_ctx twof
+       .funcs = { {
+               .num_blocks = TWOFISH_PARALLEL_BLOCKS,
+-              .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_ecb_dec_8way) }
++              .fn_u = { .ecb = twofish_ecb_dec_8way }
+       }, {
+               .num_blocks = 3,
+-              .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_dec_blk_3way) }
++              .fn_u = { .ecb = twofish_dec_blk_3way }
+       }, {
+               .num_blocks = 1,
+-              .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_dec_blk) }
++              .fn_u = { .ecb = twofish_dec_blk }
+       } }
+ };
+@@ -154,13 +148,13 @@ static const struct common_glue_ctx twof
+       .funcs = { {
+               .num_blocks = TWOFISH_PARALLEL_BLOCKS,
+-              .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_cbc_dec_8way) }
++              .fn_u = { .cbc = twofish_cbc_dec_8way }
+       }, {
+               .num_blocks = 3,
+-              .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_dec_blk_cbc_3way) }
++              .fn_u = { .cbc = twofish_dec_blk_cbc_3way }
+       }, {
+               .num_blocks = 1,
+-              .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_dec_blk) }
++              .fn_u = { .cbc = twofish_dec_blk }
+       } }
+ };
+@@ -170,10 +164,10 @@ static const struct common_glue_ctx twof
+       .funcs = { {
+               .num_blocks = TWOFISH_PARALLEL_BLOCKS,
+-              .fn_u = { .xts = GLUE_XTS_FUNC_CAST(twofish_xts_dec_8way) }
++              .fn_u = { .xts = twofish_xts_dec_8way }
+       }, {
+               .num_blocks = 1,
+-              .fn_u = { .xts = GLUE_XTS_FUNC_CAST(twofish_xts_dec) }
++              .fn_u = { .xts = twofish_xts_dec }
+       } }
+ };
+@@ -189,8 +183,7 @@ static int ecb_decrypt(struct skcipher_r
+ static int cbc_encrypt(struct skcipher_request *req)
+ {
+-      return glue_cbc_encrypt_req_128bit(GLUE_FUNC_CAST(twofish_enc_blk),
+-                                         req);
++      return glue_cbc_encrypt_req_128bit(twofish_enc_blk, req);
+ }
+ static int cbc_decrypt(struct skcipher_request *req)
+@@ -208,8 +201,7 @@ static int xts_encrypt(struct skcipher_r
+       struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+       struct twofish_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
+-      return glue_xts_req_128bit(&twofish_enc_xts, req,
+-                                 XTS_TWEAK_CAST(twofish_enc_blk),
++      return glue_xts_req_128bit(&twofish_enc_xts, req, twofish_enc_blk,
+                                  &ctx->tweak_ctx, &ctx->crypt_ctx, false);
+ }
+@@ -218,8 +210,7 @@ static int xts_decrypt(struct skcipher_r
+       struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+       struct twofish_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
+-      return glue_xts_req_128bit(&twofish_dec_xts, req,
+-                                 XTS_TWEAK_CAST(twofish_enc_blk),
++      return glue_xts_req_128bit(&twofish_dec_xts, req, twofish_enc_blk,
+                                  &ctx->tweak_ctx, &ctx->crypt_ctx, true);
+ }
+--- a/arch/x86/crypto/twofish_glue_3way.c
++++ b/arch/x86/crypto/twofish_glue_3way.c
+@@ -25,21 +25,22 @@ static int twofish_setkey_skcipher(struc
+       return twofish_setkey(&tfm->base, key, keylen);
+ }
+-static inline void twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst,
+-                                      const u8 *src)
++static inline void twofish_enc_blk_3way(const void *ctx, u8 *dst, const u8 *src)
+ {
+       __twofish_enc_blk_3way(ctx, dst, src, false);
+ }
+-static inline void twofish_enc_blk_xor_3way(struct twofish_ctx *ctx, u8 *dst,
++static inline void twofish_enc_blk_xor_3way(const void *ctx, u8 *dst,
+                                           const u8 *src)
+ {
+       __twofish_enc_blk_3way(ctx, dst, src, true);
+ }
+-void twofish_dec_blk_cbc_3way(void *ctx, u128 *dst, const u128 *src)
++void twofish_dec_blk_cbc_3way(const void *ctx, u8 *d, const u8 *s)
+ {
+       u128 ivs[2];
++      u128 *dst = (u128 *)d;
++      const u128 *src = (const u128 *)s;
+       ivs[0] = src[0];
+       ivs[1] = src[1];
+@@ -51,9 +52,11 @@ void twofish_dec_blk_cbc_3way(void *ctx,
+ }
+ EXPORT_SYMBOL_GPL(twofish_dec_blk_cbc_3way);
+-void twofish_enc_blk_ctr(void *ctx, u128 *dst, const u128 *src, le128 *iv)
++void twofish_enc_blk_ctr(const void *ctx, u8 *d, const u8 *s, le128 *iv)
+ {
+       be128 ctrblk;
++      u128 *dst = (u128 *)d;
++      const u128 *src = (const u128 *)s;
+       if (dst != src)
+               *dst = *src;
+@@ -66,10 +69,11 @@ void twofish_enc_blk_ctr(void *ctx, u128
+ }
+ EXPORT_SYMBOL_GPL(twofish_enc_blk_ctr);
+-void twofish_enc_blk_ctr_3way(void *ctx, u128 *dst, const u128 *src,
+-                            le128 *iv)
++void twofish_enc_blk_ctr_3way(const void *ctx, u8 *d, const u8 *s, le128 *iv)
+ {
+       be128 ctrblks[3];
++      u128 *dst = (u128 *)d;
++      const u128 *src = (const u128 *)s;
+       if (dst != src) {
+               dst[0] = src[0];
+@@ -94,10 +98,10 @@ static const struct common_glue_ctx twof
+       .funcs = { {
+               .num_blocks = 3,
+-              .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk_3way) }
++              .fn_u = { .ecb = twofish_enc_blk_3way }
+       }, {
+               .num_blocks = 1,
+-              .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk) }
++              .fn_u = { .ecb = twofish_enc_blk }
+       } }
+ };
+@@ -107,10 +111,10 @@ static const struct common_glue_ctx twof
+       .funcs = { {
+               .num_blocks = 3,
+-              .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk_ctr_3way) }
++              .fn_u = { .ctr = twofish_enc_blk_ctr_3way }
+       }, {
+               .num_blocks = 1,
+-              .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk_ctr) }
++              .fn_u = { .ctr = twofish_enc_blk_ctr }
+       } }
+ };
+@@ -120,10 +124,10 @@ static const struct common_glue_ctx twof
+       .funcs = { {
+               .num_blocks = 3,
+-              .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_dec_blk_3way) }
++              .fn_u = { .ecb = twofish_dec_blk_3way }
+       }, {
+               .num_blocks = 1,
+-              .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_dec_blk) }
++              .fn_u = { .ecb = twofish_dec_blk }
+       } }
+ };
+@@ -133,10 +137,10 @@ static const struct common_glue_ctx twof
+       .funcs = { {
+               .num_blocks = 3,
+-              .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_dec_blk_cbc_3way) }
++              .fn_u = { .cbc = twofish_dec_blk_cbc_3way }
+       }, {
+               .num_blocks = 1,
+-              .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_dec_blk) }
++              .fn_u = { .cbc = twofish_dec_blk }
+       } }
+ };
+@@ -152,8 +156,7 @@ static int ecb_decrypt(struct skcipher_r
+ static int cbc_encrypt(struct skcipher_request *req)
+ {
+-      return glue_cbc_encrypt_req_128bit(GLUE_FUNC_CAST(twofish_enc_blk),
+-                                         req);
++      return glue_cbc_encrypt_req_128bit(twofish_enc_blk, req);
+ }
+ static int cbc_decrypt(struct skcipher_request *req)
+--- a/arch/x86/include/asm/crypto/camellia.h
++++ b/arch/x86/include/asm/crypto/camellia.h
+@@ -32,65 +32,60 @@ extern int xts_camellia_setkey(struct cr
+                              unsigned int keylen);
+ /* regular block cipher functions */
+-asmlinkage void __camellia_enc_blk(struct camellia_ctx *ctx, u8 *dst,
+-                                 const u8 *src, bool xor);
+-asmlinkage void camellia_dec_blk(struct camellia_ctx *ctx, u8 *dst,
+-                               const u8 *src);
++asmlinkage void __camellia_enc_blk(const void *ctx, u8 *dst, const u8 *src,
++                                 bool xor);
++asmlinkage void camellia_dec_blk(const void *ctx, u8 *dst, const u8 *src);
+ /* 2-way parallel cipher functions */
+-asmlinkage void __camellia_enc_blk_2way(struct camellia_ctx *ctx, u8 *dst,
+-                                      const u8 *src, bool xor);
+-asmlinkage void camellia_dec_blk_2way(struct camellia_ctx *ctx, u8 *dst,
+-                                    const u8 *src);
++asmlinkage void __camellia_enc_blk_2way(const void *ctx, u8 *dst, const u8 *src,
++                                      bool xor);
++asmlinkage void camellia_dec_blk_2way(const void *ctx, u8 *dst, const u8 *src);
+ /* 16-way parallel cipher functions (avx/aes-ni) */
+-asmlinkage void camellia_ecb_enc_16way(struct camellia_ctx *ctx, u8 *dst,
+-                                     const u8 *src);
+-asmlinkage void camellia_ecb_dec_16way(struct camellia_ctx *ctx, u8 *dst,
+-                                     const u8 *src);
+-
+-asmlinkage void camellia_cbc_dec_16way(struct camellia_ctx *ctx, u8 *dst,
+-                                     const u8 *src);
+-asmlinkage void camellia_ctr_16way(struct camellia_ctx *ctx, u8 *dst,
+-                                 const u8 *src, le128 *iv);
+-
+-asmlinkage void camellia_xts_enc_16way(struct camellia_ctx *ctx, u8 *dst,
+-                                     const u8 *src, le128 *iv);
+-asmlinkage void camellia_xts_dec_16way(struct camellia_ctx *ctx, u8 *dst,
+-                                     const u8 *src, le128 *iv);
++asmlinkage void camellia_ecb_enc_16way(const void *ctx, u8 *dst, const u8 *src);
++asmlinkage void camellia_ecb_dec_16way(const void *ctx, u8 *dst, const u8 *src);
+-static inline void camellia_enc_blk(struct camellia_ctx *ctx, u8 *dst,
+-                                  const u8 *src)
++asmlinkage void camellia_cbc_dec_16way(const void *ctx, u8 *dst, const u8 *src);
++asmlinkage void camellia_ctr_16way(const void *ctx, u8 *dst, const u8 *src,
++                                 le128 *iv);
++
++asmlinkage void camellia_xts_enc_16way(const void *ctx, u8 *dst, const u8 *src,
++                                     le128 *iv);
++asmlinkage void camellia_xts_dec_16way(const void *ctx, u8 *dst, const u8 *src,
++                                     le128 *iv);
++
++static inline void camellia_enc_blk(const void *ctx, u8 *dst, const u8 *src)
+ {
+       __camellia_enc_blk(ctx, dst, src, false);
+ }
+-static inline void camellia_enc_blk_xor(struct camellia_ctx *ctx, u8 *dst,
+-                                      const u8 *src)
++static inline void camellia_enc_blk_xor(const void *ctx, u8 *dst, const u8 *src)
+ {
+       __camellia_enc_blk(ctx, dst, src, true);
+ }
+-static inline void camellia_enc_blk_2way(struct camellia_ctx *ctx, u8 *dst,
++static inline void camellia_enc_blk_2way(const void *ctx, u8 *dst,
+                                        const u8 *src)
+ {
+       __camellia_enc_blk_2way(ctx, dst, src, false);
+ }
+-static inline void camellia_enc_blk_xor_2way(struct camellia_ctx *ctx, u8 *dst,
++static inline void camellia_enc_blk_xor_2way(const void *ctx, u8 *dst,
+                                            const u8 *src)
+ {
+       __camellia_enc_blk_2way(ctx, dst, src, true);
+ }
+ /* glue helpers */
+-extern void camellia_decrypt_cbc_2way(void *ctx, u128 *dst, const u128 *src);
+-extern void camellia_crypt_ctr(void *ctx, u128 *dst, const u128 *src,
++extern void camellia_decrypt_cbc_2way(const void *ctx, u8 *dst, const u8 *src);
++extern void camellia_crypt_ctr(const void *ctx, u8 *dst, const u8 *src,
+                              le128 *iv);
+-extern void camellia_crypt_ctr_2way(void *ctx, u128 *dst, const u128 *src,
++extern void camellia_crypt_ctr_2way(const void *ctx, u8 *dst, const u8 *src,
+                                   le128 *iv);
+-extern void camellia_xts_enc(void *ctx, u128 *dst, const u128 *src, le128 *iv);
+-extern void camellia_xts_dec(void *ctx, u128 *dst, const u128 *src, le128 *iv);
++extern void camellia_xts_enc(const void *ctx, u8 *dst, const u8 *src,
++                           le128 *iv);
++extern void camellia_xts_dec(const void *ctx, u8 *dst, const u8 *src,
++                           le128 *iv);
+ #endif /* ASM_X86_CAMELLIA_H */
+--- a/arch/x86/include/asm/crypto/glue_helper.h
++++ b/arch/x86/include/asm/crypto/glue_helper.h
+@@ -11,18 +11,13 @@
+ #include <asm/fpu/api.h>
+ #include <crypto/b128ops.h>
+-typedef void (*common_glue_func_t)(void *ctx, u8 *dst, const u8 *src);
+-typedef void (*common_glue_cbc_func_t)(void *ctx, u128 *dst, const u128 *src);
+-typedef void (*common_glue_ctr_func_t)(void *ctx, u128 *dst, const u128 *src,
++typedef void (*common_glue_func_t)(const void *ctx, u8 *dst, const u8 *src);
++typedef void (*common_glue_cbc_func_t)(const void *ctx, u8 *dst, const u8 *src);
++typedef void (*common_glue_ctr_func_t)(const void *ctx, u8 *dst, const u8 *src,
+                                      le128 *iv);
+-typedef void (*common_glue_xts_func_t)(void *ctx, u128 *dst, const u128 *src,
++typedef void (*common_glue_xts_func_t)(const void *ctx, u8 *dst, const u8 *src,
+                                      le128 *iv);
+-#define GLUE_FUNC_CAST(fn) ((common_glue_func_t)(fn))
+-#define GLUE_CBC_FUNC_CAST(fn) ((common_glue_cbc_func_t)(fn))
+-#define GLUE_CTR_FUNC_CAST(fn) ((common_glue_ctr_func_t)(fn))
+-#define GLUE_XTS_FUNC_CAST(fn) ((common_glue_xts_func_t)(fn))
+-
+ struct common_glue_func_entry {
+       unsigned int num_blocks; /* number of blocks that @fn will process */
+       union {
+@@ -116,7 +111,8 @@ extern int glue_xts_req_128bit(const str
+                              common_glue_func_t tweak_fn, void *tweak_ctx,
+                              void *crypt_ctx, bool decrypt);
+-extern void glue_xts_crypt_128bit_one(void *ctx, u128 *dst, const u128 *src,
+-                                    le128 *iv, common_glue_func_t fn);
++extern void glue_xts_crypt_128bit_one(const void *ctx, u8 *dst,
++                                    const u8 *src, le128 *iv,
++                                    common_glue_func_t fn);
+ #endif /* _CRYPTO_GLUE_HELPER_H */
+--- a/arch/x86/include/asm/crypto/serpent-avx.h
++++ b/arch/x86/include/asm/crypto/serpent-avx.h
+@@ -15,26 +15,26 @@ struct serpent_xts_ctx {
+       struct serpent_ctx crypt_ctx;
+ };
+-asmlinkage void serpent_ecb_enc_8way_avx(struct serpent_ctx *ctx, u8 *dst,
++asmlinkage void serpent_ecb_enc_8way_avx(const void *ctx, u8 *dst,
+                                        const u8 *src);
+-asmlinkage void serpent_ecb_dec_8way_avx(struct serpent_ctx *ctx, u8 *dst,
++asmlinkage void serpent_ecb_dec_8way_avx(const void *ctx, u8 *dst,
+                                        const u8 *src);
+-asmlinkage void serpent_cbc_dec_8way_avx(struct serpent_ctx *ctx, u8 *dst,
++asmlinkage void serpent_cbc_dec_8way_avx(const void *ctx, u8 *dst,
+                                        const u8 *src);
+-asmlinkage void serpent_ctr_8way_avx(struct serpent_ctx *ctx, u8 *dst,
+-                                   const u8 *src, le128 *iv);
++asmlinkage void serpent_ctr_8way_avx(const void *ctx, u8 *dst, const u8 *src,
++                                   le128 *iv);
+-asmlinkage void serpent_xts_enc_8way_avx(struct serpent_ctx *ctx, u8 *dst,
++asmlinkage void serpent_xts_enc_8way_avx(const void *ctx, u8 *dst,
+                                        const u8 *src, le128 *iv);
+-asmlinkage void serpent_xts_dec_8way_avx(struct serpent_ctx *ctx, u8 *dst,
++asmlinkage void serpent_xts_dec_8way_avx(const void *ctx, u8 *dst,
+                                        const u8 *src, le128 *iv);
+-extern void __serpent_crypt_ctr(void *ctx, u128 *dst, const u128 *src,
++extern void __serpent_crypt_ctr(const void *ctx, u8 *dst, const u8 *src,
+                               le128 *iv);
+-extern void serpent_xts_enc(void *ctx, u128 *dst, const u128 *src, le128 *iv);
+-extern void serpent_xts_dec(void *ctx, u128 *dst, const u128 *src, le128 *iv);
++extern void serpent_xts_enc(const void *ctx, u8 *dst, const u8 *src, le128 *iv);
++extern void serpent_xts_dec(const void *ctx, u8 *dst, const u8 *src, le128 *iv);
+ extern int xts_serpent_setkey(struct crypto_skcipher *tfm, const u8 *key,
+                             unsigned int keylen);
+--- a/arch/x86/include/asm/crypto/serpent-sse2.h
++++ b/arch/x86/include/asm/crypto/serpent-sse2.h
+@@ -9,25 +9,23 @@
+ #define SERPENT_PARALLEL_BLOCKS 4
+-asmlinkage void __serpent_enc_blk_4way(struct serpent_ctx *ctx, u8 *dst,
++asmlinkage void __serpent_enc_blk_4way(const struct serpent_ctx *ctx, u8 *dst,
+                                      const u8 *src, bool xor);
+-asmlinkage void serpent_dec_blk_4way(struct serpent_ctx *ctx, u8 *dst,
++asmlinkage void serpent_dec_blk_4way(const struct serpent_ctx *ctx, u8 *dst,
+                                    const u8 *src);
+-static inline void serpent_enc_blk_xway(struct serpent_ctx *ctx, u8 *dst,
+-                                      const u8 *src)
++static inline void serpent_enc_blk_xway(const void *ctx, u8 *dst, const u8 *src)
+ {
+       __serpent_enc_blk_4way(ctx, dst, src, false);
+ }
+-static inline void serpent_enc_blk_xway_xor(struct serpent_ctx *ctx, u8 *dst,
+-                                          const u8 *src)
++static inline void serpent_enc_blk_xway_xor(const struct serpent_ctx *ctx,
++                                          u8 *dst, const u8 *src)
+ {
+       __serpent_enc_blk_4way(ctx, dst, src, true);
+ }
+-static inline void serpent_dec_blk_xway(struct serpent_ctx *ctx, u8 *dst,
+-                                      const u8 *src)
++static inline void serpent_dec_blk_xway(const void *ctx, u8 *dst, const u8 *src)
+ {
+       serpent_dec_blk_4way(ctx, dst, src);
+ }
+@@ -36,25 +34,23 @@ static inline void serpent_dec_blk_xway(
+ #define SERPENT_PARALLEL_BLOCKS 8
+-asmlinkage void __serpent_enc_blk_8way(struct serpent_ctx *ctx, u8 *dst,
++asmlinkage void __serpent_enc_blk_8way(const struct serpent_ctx *ctx, u8 *dst,
+                                      const u8 *src, bool xor);
+-asmlinkage void serpent_dec_blk_8way(struct serpent_ctx *ctx, u8 *dst,
++asmlinkage void serpent_dec_blk_8way(const struct serpent_ctx *ctx, u8 *dst,
+                                    const u8 *src);
+-static inline void serpent_enc_blk_xway(struct serpent_ctx *ctx, u8 *dst,
+-                                 const u8 *src)
++static inline void serpent_enc_blk_xway(const void *ctx, u8 *dst, const u8 *src)
+ {
+       __serpent_enc_blk_8way(ctx, dst, src, false);
+ }
+-static inline void serpent_enc_blk_xway_xor(struct serpent_ctx *ctx, u8 *dst,
+-                                     const u8 *src)
++static inline void serpent_enc_blk_xway_xor(const struct serpent_ctx *ctx,
++                                          u8 *dst, const u8 *src)
+ {
+       __serpent_enc_blk_8way(ctx, dst, src, true);
+ }
+-static inline void serpent_dec_blk_xway(struct serpent_ctx *ctx, u8 *dst,
+-                                 const u8 *src)
++static inline void serpent_dec_blk_xway(const void *ctx, u8 *dst, const u8 *src)
+ {
+       serpent_dec_blk_8way(ctx, dst, src);
+ }
+--- a/arch/x86/include/asm/crypto/twofish.h
++++ b/arch/x86/include/asm/crypto/twofish.h
+@@ -7,22 +7,19 @@
+ #include <crypto/b128ops.h>
+ /* regular block cipher functions from twofish_x86_64 module */
+-asmlinkage void twofish_enc_blk(struct twofish_ctx *ctx, u8 *dst,
+-                              const u8 *src);
+-asmlinkage void twofish_dec_blk(struct twofish_ctx *ctx, u8 *dst,
+-                              const u8 *src);
++asmlinkage void twofish_enc_blk(const void *ctx, u8 *dst, const u8 *src);
++asmlinkage void twofish_dec_blk(const void *ctx, u8 *dst, const u8 *src);
+ /* 3-way parallel cipher functions */
+-asmlinkage void __twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst,
+-                                     const u8 *src, bool xor);
+-asmlinkage void twofish_dec_blk_3way(struct twofish_ctx *ctx, u8 *dst,
+-                                   const u8 *src);
++asmlinkage void __twofish_enc_blk_3way(const void *ctx, u8 *dst, const u8 *src,
++                                     bool xor);
++asmlinkage void twofish_dec_blk_3way(const void *ctx, u8 *dst, const u8 *src);
+ /* helpers from twofish_x86_64-3way module */
+-extern void twofish_dec_blk_cbc_3way(void *ctx, u128 *dst, const u128 *src);
+-extern void twofish_enc_blk_ctr(void *ctx, u128 *dst, const u128 *src,
++extern void twofish_dec_blk_cbc_3way(const void *ctx, u8 *dst, const u8 *src);
++extern void twofish_enc_blk_ctr(const void *ctx, u8 *dst, const u8 *src,
+                               le128 *iv);
+-extern void twofish_enc_blk_ctr_3way(void *ctx, u128 *dst, const u128 *src,
++extern void twofish_enc_blk_ctr_3way(const void *ctx, u8 *dst, const u8 *src,
+                                    le128 *iv);
+ #endif /* ASM_X86_TWOFISH_H */
+--- a/crypto/cast6_generic.c
++++ b/crypto/cast6_generic.c
+@@ -154,7 +154,7 @@ int cast6_setkey(struct crypto_tfm *tfm,
+ EXPORT_SYMBOL_GPL(cast6_setkey);
+ /*forward quad round*/
+-static inline void Q(u32 *block, u8 *Kr, u32 *Km)
++static inline void Q(u32 *block, const u8 *Kr, const u32 *Km)
+ {
+       u32 I;
+       block[2] ^= F1(block[3], Kr[0], Km[0]);
+@@ -164,7 +164,7 @@ static inline void Q(u32 *block, u8 *Kr,
+ }
+ /*reverse quad round*/
+-static inline void QBAR(u32 *block, u8 *Kr, u32 *Km)
++static inline void QBAR(u32 *block, const u8 *Kr, const u32 *Km)
+ {
+       u32 I;
+       block[3] ^= F1(block[0], Kr[3], Km[3]);
+@@ -173,13 +173,14 @@ static inline void QBAR(u32 *block, u8 *
+       block[2] ^= F1(block[3], Kr[0], Km[0]);
+ }
+-void __cast6_encrypt(struct cast6_ctx *c, u8 *outbuf, const u8 *inbuf)
++void __cast6_encrypt(const void *ctx, u8 *outbuf, const u8 *inbuf)
+ {
++      const struct cast6_ctx *c = ctx;
+       const __be32 *src = (const __be32 *)inbuf;
+       __be32 *dst = (__be32 *)outbuf;
+       u32 block[4];
+-      u32 *Km;
+-      u8 *Kr;
++      const u32 *Km;
++      const u8 *Kr;
+       block[0] = be32_to_cpu(src[0]);
+       block[1] = be32_to_cpu(src[1]);
+@@ -211,13 +212,14 @@ static void cast6_encrypt(struct crypto_
+       __cast6_encrypt(crypto_tfm_ctx(tfm), outbuf, inbuf);
+ }
+-void __cast6_decrypt(struct cast6_ctx *c, u8 *outbuf, const u8 *inbuf)
++void __cast6_decrypt(const void *ctx, u8 *outbuf, const u8 *inbuf)
+ {
++      const struct cast6_ctx *c = ctx;
+       const __be32 *src = (const __be32 *)inbuf;
+       __be32 *dst = (__be32 *)outbuf;
+       u32 block[4];
+-      u32 *Km;
+-      u8 *Kr;
++      const u32 *Km;
++      const u8 *Kr;
+       block[0] = be32_to_cpu(src[0]);
+       block[1] = be32_to_cpu(src[1]);
+--- a/crypto/serpent_generic.c
++++ b/crypto/serpent_generic.c
+@@ -449,8 +449,9 @@ int serpent_setkey(struct crypto_tfm *tf
+ }
+ EXPORT_SYMBOL_GPL(serpent_setkey);
+-void __serpent_encrypt(struct serpent_ctx *ctx, u8 *dst, const u8 *src)
++void __serpent_encrypt(const void *c, u8 *dst, const u8 *src)
+ {
++      const struct serpent_ctx *ctx = c;
+       const u32 *k = ctx->expkey;
+       const __le32 *s = (const __le32 *)src;
+       __le32  *d = (__le32 *)dst;
+@@ -514,8 +515,9 @@ static void serpent_encrypt(struct crypt
+       __serpent_encrypt(ctx, dst, src);
+ }
+-void __serpent_decrypt(struct serpent_ctx *ctx, u8 *dst, const u8 *src)
++void __serpent_decrypt(const void *c, u8 *dst, const u8 *src)
+ {
++      const struct serpent_ctx *ctx = c;
+       const u32 *k = ctx->expkey;
+       const __le32 *s = (const __le32 *)src;
+       __le32  *d = (__le32 *)dst;
+--- a/include/crypto/cast6.h
++++ b/include/crypto/cast6.h
+@@ -19,7 +19,7 @@ int __cast6_setkey(struct cast6_ctx *ctx
+                  unsigned int keylen, u32 *flags);
+ int cast6_setkey(struct crypto_tfm *tfm, const u8 *key, unsigned int keylen);
+-void __cast6_encrypt(struct cast6_ctx *ctx, u8 *dst, const u8 *src);
+-void __cast6_decrypt(struct cast6_ctx *ctx, u8 *dst, const u8 *src);
++void __cast6_encrypt(const void *ctx, u8 *dst, const u8 *src);
++void __cast6_decrypt(const void *ctx, u8 *dst, const u8 *src);
+ #endif
+--- a/include/crypto/serpent.h
++++ b/include/crypto/serpent.h
+@@ -22,7 +22,7 @@ int __serpent_setkey(struct serpent_ctx
+                    unsigned int keylen);
+ int serpent_setkey(struct crypto_tfm *tfm, const u8 *key, unsigned int keylen);
+-void __serpent_encrypt(struct serpent_ctx *ctx, u8 *dst, const u8 *src);
+-void __serpent_decrypt(struct serpent_ctx *ctx, u8 *dst, const u8 *src);
++void __serpent_encrypt(const void *ctx, u8 *dst, const u8 *src);
++void __serpent_decrypt(const void *ctx, u8 *dst, const u8 *src);
+ #endif
+--- a/include/crypto/xts.h
++++ b/include/crypto/xts.h
+@@ -8,8 +8,6 @@
+ #define XTS_BLOCK_SIZE 16
+-#define XTS_TWEAK_CAST(x) ((void (*)(void *, u8*, const u8*))(x))
+-
+ static inline int xts_check_key(struct crypto_tfm *tfm,
+                               const u8 *key, unsigned int keylen)
+ {
diff --git a/queue-5.4/net-dsa-b53-support-setting-learning-on-port.patch b/queue-5.4/net-dsa-b53-support-setting-learning-on-port.patch
new file mode 100644 (file)
index 0000000..25fde3a
--- /dev/null
@@ -0,0 +1,107 @@
+From f9b3827ee66cfcf297d0acd6ecf33653a5f297ef Mon Sep 17 00:00:00 2001
+From: Florian Fainelli <f.fainelli@gmail.com>
+Date: Mon, 22 Feb 2021 14:30:10 -0800
+Subject: net: dsa: b53: Support setting learning on port
+
+From: Florian Fainelli <f.fainelli@gmail.com>
+
+commit f9b3827ee66cfcf297d0acd6ecf33653a5f297ef upstream.
+
+Add support for being able to set the learning attribute on port, and
+make sure that the standalone ports start up with learning disabled.
+
+We can remove the code in bcm_sf2 that configured the ports learning
+attribute because we want the standalone ports to have learning disabled
+by default and port 7 cannot be bridged, so its learning attribute will
+not change past its initial configuration.
+
+Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
+Reviewed-by: Vladimir Oltean <olteanv@gmail.com>
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/net/dsa/b53/b53_common.c |   18 ++++++++++++++++++
+ drivers/net/dsa/b53/b53_regs.h   |    1 +
+ drivers/net/dsa/bcm_sf2.c        |    5 -----
+ 3 files changed, 19 insertions(+), 5 deletions(-)
+
+--- a/drivers/net/dsa/b53/b53_common.c
++++ b/drivers/net/dsa/b53/b53_common.c
+@@ -514,6 +514,19 @@ void b53_imp_vlan_setup(struct dsa_switc
+ }
+ EXPORT_SYMBOL(b53_imp_vlan_setup);
++static void b53_port_set_learning(struct b53_device *dev, int port,
++                                bool learning)
++{
++      u16 reg;
++
++      b53_read16(dev, B53_CTRL_PAGE, B53_DIS_LEARNING, &reg);
++      if (learning)
++              reg &= ~BIT(port);
++      else
++              reg |= BIT(port);
++      b53_write16(dev, B53_CTRL_PAGE, B53_DIS_LEARNING, reg);
++}
++
+ int b53_enable_port(struct dsa_switch *ds, int port, struct phy_device *phy)
+ {
+       struct b53_device *dev = ds->priv;
+@@ -527,6 +540,7 @@ int b53_enable_port(struct dsa_switch *d
+       cpu_port = ds->ports[port].cpu_dp->index;
+       b53_br_egress_floods(ds, port, true, true);
++      b53_port_set_learning(dev, port, false);
+       if (dev->ops->irq_enable)
+               ret = dev->ops->irq_enable(dev, port);
+@@ -645,6 +659,7 @@ static void b53_enable_cpu_port(struct b
+       b53_brcm_hdr_setup(dev->ds, port);
+       b53_br_egress_floods(dev->ds, port, true, true);
++      b53_port_set_learning(dev, port, false);
+ }
+ static void b53_enable_mib(struct b53_device *dev)
+@@ -1704,6 +1719,8 @@ int b53_br_join(struct dsa_switch *ds, i
+       b53_write16(dev, B53_PVLAN_PAGE, B53_PVLAN_PORT_MASK(port), pvlan);
+       dev->ports[port].vlan_ctl_mask = pvlan;
++      b53_port_set_learning(dev, port, true);
++
+       return 0;
+ }
+ EXPORT_SYMBOL(b53_br_join);
+@@ -1751,6 +1768,7 @@ void b53_br_leave(struct dsa_switch *ds,
+               vl->untag |= BIT(port) | BIT(cpu_port);
+               b53_set_vlan_entry(dev, pvid, vl);
+       }
++      b53_port_set_learning(dev, port, false);
+ }
+ EXPORT_SYMBOL(b53_br_leave);
+--- a/drivers/net/dsa/b53/b53_regs.h
++++ b/drivers/net/dsa/b53/b53_regs.h
+@@ -115,6 +115,7 @@
+ #define B53_UC_FLOOD_MASK             0x32
+ #define B53_MC_FLOOD_MASK             0x34
+ #define B53_IPMC_FLOOD_MASK           0x36
++#define B53_DIS_LEARNING              0x3c
+ /*
+  * Override Ports 0-7 State on devices with xMII interfaces (8 bit)
+--- a/drivers/net/dsa/bcm_sf2.c
++++ b/drivers/net/dsa/bcm_sf2.c
+@@ -172,11 +172,6 @@ static int bcm_sf2_port_setup(struct dsa
+       reg &= ~P_TXQ_PSM_VDD(port);
+       core_writel(priv, reg, CORE_MEM_PSM_VDD_CTRL);
+-      /* Enable learning */
+-      reg = core_readl(priv, CORE_DIS_LEARN);
+-      reg &= ~BIT(port);
+-      core_writel(priv, reg, CORE_DIS_LEARN);
+-
+       /* Enable Broadcom tags for that port if requested */
+       if (priv->brcm_tag_mask & BIT(port))
+               b53_brcm_hdr_setup(ds, port);
diff --git a/queue-5.4/net-dsa-tag_mtk-fix-802.1ad-vlan-egress.patch b/queue-5.4/net-dsa-tag_mtk-fix-802.1ad-vlan-egress.patch
new file mode 100644 (file)
index 0000000..18c1937
--- /dev/null
@@ -0,0 +1,83 @@
+From 9200f515c41f4cbaeffd8fdd1d8b6373a18b1b67 Mon Sep 17 00:00:00 2001
+From: DENG Qingfang <dqfext@gmail.com>
+Date: Tue, 2 Mar 2021 00:01:59 +0800
+Subject: net: dsa: tag_mtk: fix 802.1ad VLAN egress
+
+From: DENG Qingfang <dqfext@gmail.com>
+
+commit 9200f515c41f4cbaeffd8fdd1d8b6373a18b1b67 upstream.
+
+A different TPID bit is used for 802.1ad VLAN frames.
+
+Reported-by: Ilario Gelmetti <iochesonome@gmail.com>
+Fixes: f0af34317f4b ("net: dsa: mediatek: combine MediaTek tag with VLAN tag")
+Signed-off-by: DENG Qingfang <dqfext@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ net/dsa/tag_mtk.c |   19 +++++++++++++------
+ 1 file changed, 13 insertions(+), 6 deletions(-)
+
+--- a/net/dsa/tag_mtk.c
++++ b/net/dsa/tag_mtk.c
+@@ -13,6 +13,7 @@
+ #define MTK_HDR_LEN           4
+ #define MTK_HDR_XMIT_UNTAGGED         0
+ #define MTK_HDR_XMIT_TAGGED_TPID_8100 1
++#define MTK_HDR_XMIT_TAGGED_TPID_88A8 2
+ #define MTK_HDR_RECV_SOURCE_PORT_MASK GENMASK(2, 0)
+ #define MTK_HDR_XMIT_DP_BIT_MASK      GENMASK(5, 0)
+ #define MTK_HDR_XMIT_SA_DIS           BIT(6)
+@@ -21,8 +22,8 @@ static struct sk_buff *mtk_tag_xmit(stru
+                                   struct net_device *dev)
+ {
+       struct dsa_port *dp = dsa_slave_to_port(dev);
++      u8 xmit_tpid;
+       u8 *mtk_tag;
+-      bool is_vlan_skb = true;
+       unsigned char *dest = eth_hdr(skb)->h_dest;
+       bool is_multicast_skb = is_multicast_ether_addr(dest) &&
+                               !is_broadcast_ether_addr(dest);
+@@ -33,13 +34,20 @@ static struct sk_buff *mtk_tag_xmit(stru
+        * the both special and VLAN tag at the same time and then look up VLAN
+        * table with VID.
+        */
+-      if (!skb_vlan_tagged(skb)) {
++      switch (skb->protocol) {
++      case htons(ETH_P_8021Q):
++              xmit_tpid = MTK_HDR_XMIT_TAGGED_TPID_8100;
++              break;
++      case htons(ETH_P_8021AD):
++              xmit_tpid = MTK_HDR_XMIT_TAGGED_TPID_88A8;
++              break;
++      default:
+               if (skb_cow_head(skb, MTK_HDR_LEN) < 0)
+                       return NULL;
++              xmit_tpid = MTK_HDR_XMIT_UNTAGGED;
+               skb_push(skb, MTK_HDR_LEN);
+               memmove(skb->data, skb->data + MTK_HDR_LEN, 2 * ETH_ALEN);
+-              is_vlan_skb = false;
+       }
+       mtk_tag = skb->data + 2 * ETH_ALEN;
+@@ -47,8 +55,7 @@ static struct sk_buff *mtk_tag_xmit(stru
+       /* Mark tag attribute on special tag insertion to notify hardware
+        * whether that's a combined special tag with 802.1Q header.
+        */
+-      mtk_tag[0] = is_vlan_skb ? MTK_HDR_XMIT_TAGGED_TPID_8100 :
+-                   MTK_HDR_XMIT_UNTAGGED;
++      mtk_tag[0] = xmit_tpid;
+       mtk_tag[1] = (1 << dp->index) & MTK_HDR_XMIT_DP_BIT_MASK;
+       /* Disable SA learning for multicast frames */
+@@ -56,7 +63,7 @@ static struct sk_buff *mtk_tag_xmit(stru
+               mtk_tag[1] |= MTK_HDR_XMIT_SA_DIS;
+       /* Tag control information is kept for 802.1Q */
+-      if (!is_vlan_skb) {
++      if (xmit_tpid == MTK_HDR_XMIT_UNTAGGED) {
+               mtk_tag[2] = 0;
+               mtk_tag[3] = 0;
+       }
index af350dad3942e9c0897150bb3871c82820eb500c..5db70c006585af30fa6b6c7f547bcbd3ccaf67c9 100644 (file)
@@ -11,3 +11,8 @@ drm-i915-gvt-fix-virtual-display-setup-for-bxt-apl.patch
 drm-i915-gvt-fix-port-number-for-bdw-on-edid-region-setup.patch
 drm-i915-gvt-fix-vfio_edid-issue-for-bxt-apl.patch
 fuse-fix-live-lock-in-fuse_iget.patch
+crypto-x86-regularize-glue-function-prototypes.patch
+crypto-aesni-use-test-reg-reg-instead-of-cmp-0-reg.patch
+crypto-x86-aes-ni-xts-use-direct-calls-to-and-4-way-stride.patch
+net-dsa-tag_mtk-fix-802.1ad-vlan-egress.patch
+net-dsa-b53-support-setting-learning-on-port.patch