From b6e527a2c313d23a5dbd58e22b7e6973e949856c Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Fri, 19 Mar 2021 11:43:29 +0100
Subject: [PATCH] 5.4-stable patches

added patches:
	crypto-aesni-use-test-reg-reg-instead-of-cmp-0-reg.patch
	crypto-x86-aes-ni-xts-use-direct-calls-to-and-4-way-stride.patch
	crypto-x86-regularize-glue-function-prototypes.patch
	net-dsa-b53-support-setting-learning-on-port.patch
	net-dsa-tag_mtk-fix-802.1ad-vlan-egress.patch
---
 ...se-test-reg-reg-instead-of-cmp-0-reg.patch |  208 ++
 ...use-direct-calls-to-and-4-way-stride.patch |  274 +++
 ...-regularize-glue-function-prototypes.patch | 2056 +++++++++++++++++
 ...b53-support-setting-learning-on-port.patch |  107 +
 ...-dsa-tag_mtk-fix-802.1ad-vlan-egress.patch |   83 +
 queue-5.4/series                              |    5 +
 6 files changed, 2733 insertions(+)
 create mode 100644 queue-5.4/crypto-aesni-use-test-reg-reg-instead-of-cmp-0-reg.patch
 create mode 100644 queue-5.4/crypto-x86-aes-ni-xts-use-direct-calls-to-and-4-way-stride.patch
 create mode 100644 queue-5.4/crypto-x86-regularize-glue-function-prototypes.patch
 create mode 100644 queue-5.4/net-dsa-b53-support-setting-learning-on-port.patch
 create mode 100644 queue-5.4/net-dsa-tag_mtk-fix-802.1ad-vlan-egress.patch

diff --git a/queue-5.4/crypto-aesni-use-test-reg-reg-instead-of-cmp-0-reg.patch b/queue-5.4/crypto-aesni-use-test-reg-reg-instead-of-cmp-0-reg.patch
new file mode 100644
index 00000000000..ce0e47fafb5
--- /dev/null
+++ b/queue-5.4/crypto-aesni-use-test-reg-reg-instead-of-cmp-0-reg.patch
@@ -0,0 +1,208 @@
+From 032d049ea0f45b45c21f3f02b542aa18bc6b6428 Mon Sep 17 00:00:00 2001
+From: Uros Bizjak <ubizjak@gmail.com>
+Date: Fri, 27 Nov 2020 10:44:52 +0100
+Subject: crypto: aesni - Use TEST %reg,%reg instead of CMP $0,%reg
+
+From: Uros Bizjak <ubizjak@gmail.com>
+
+commit 032d049ea0f45b45c21f3f02b542aa18bc6b6428 upstream.
+
+CMP $0,%reg can't set overflow flag, so we can use shorter TEST %reg,%reg
+instruction when only zero and sign flags are checked (E,L,LE,G,GE conditions).
+
+Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
+Cc: Herbert Xu <herbert@gondor.apana.org.au>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: "H. Peter Anvin" <hpa@zytor.com>
+Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
+Cc: Ard Biesheuvel <ardb@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/crypto/aesni-intel_asm.S        |   20 ++++++++++----------
+ arch/x86/crypto/aesni-intel_avx-x86_64.S |   20 ++++++++++----------
+ 2 files changed, 20 insertions(+), 20 deletions(-)
+
+--- a/arch/x86/crypto/aesni-intel_asm.S
++++ b/arch/x86/crypto/aesni-intel_asm.S
+@@ -319,7 +319,7 @@ _initial_blocks_\@:
+ 
+ 	# Main loop - Encrypt/Decrypt remaining blocks
+ 
+-	cmp	$0, %r13
++	test	%r13, %r13
+ 	je	_zero_cipher_left_\@
+ 	sub	$64, %r13
+ 	je	_four_cipher_left_\@
+@@ -438,7 +438,7 @@ _multiple_of_16_bytes_\@:
+ 
+ 	mov PBlockLen(%arg2), %r12
+ 
+-	cmp $0, %r12
++	test %r12, %r12
+ 	je _partial_done\@
+ 
+ 	GHASH_MUL %xmm8, %xmm13, %xmm9, %xmm10, %xmm11, %xmm5, %xmm6
+@@ -475,7 +475,7 @@ _T_8_\@:
+ 	add	$8, %r10
+ 	sub	$8, %r11
+ 	psrldq	$8, %xmm0
+-	cmp	$0, %r11
++	test	%r11, %r11
+ 	je	_return_T_done_\@
+ _T_4_\@:
+ 	movd	%xmm0, %eax
+@@ -483,7 +483,7 @@ _T_4_\@:
+ 	add	$4, %r10
+ 	sub	$4, %r11
+ 	psrldq	$4, %xmm0
+-	cmp	$0, %r11
++	test	%r11, %r11
+ 	je	_return_T_done_\@
+ _T_123_\@:
+ 	movd	%xmm0, %eax
+@@ -620,7 +620,7 @@ _get_AAD_blocks\@:
+ 
+ 	/* read the last <16B of AAD */
+ _get_AAD_rest\@:
+-	cmp	   $0, %r11
++	test	   %r11, %r11
+ 	je	   _get_AAD_done\@
+ 
+ 	READ_PARTIAL_BLOCK %r10, %r11, \TMP1, \TMP7
+@@ -641,7 +641,7 @@ _get_AAD_done\@:
+ .macro PARTIAL_BLOCK CYPH_PLAIN_OUT PLAIN_CYPH_IN PLAIN_CYPH_LEN DATA_OFFSET \
+ 	AAD_HASH operation
+ 	mov 	PBlockLen(%arg2), %r13
+-	cmp	$0, %r13
++	test	%r13, %r13
+ 	je	_partial_block_done_\@	# Leave Macro if no partial blocks
+ 	# Read in input data without over reading
+ 	cmp	$16, \PLAIN_CYPH_LEN
+@@ -693,7 +693,7 @@ _no_extra_mask_1_\@:
+ 	PSHUFB_XMM	%xmm2, %xmm3
+ 	pxor	%xmm3, \AAD_HASH
+ 
+-	cmp	$0, %r10
++	test	%r10, %r10
+ 	jl	_partial_incomplete_1_\@
+ 
+ 	# GHASH computation for the last <16 Byte block
+@@ -728,7 +728,7 @@ _no_extra_mask_2_\@:
+ 	PSHUFB_XMM %xmm2, %xmm9
+ 	pxor	%xmm9, \AAD_HASH
+ 
+-	cmp	$0, %r10
++	test	%r10, %r10
+ 	jl	_partial_incomplete_2_\@
+ 
+ 	# GHASH computation for the last <16 Byte block
+@@ -748,7 +748,7 @@ _encode_done_\@:
+ 	PSHUFB_XMM	%xmm2, %xmm9
+ .endif
+ 	# output encrypted Bytes
+-	cmp	$0, %r10
++	test	%r10, %r10
+ 	jl	_partial_fill_\@
+ 	mov	%r13, %r12
+ 	mov	$16, %r13
+@@ -2731,7 +2731,7 @@ ENDPROC(aesni_ctr_enc)
+  */
+ ENTRY(aesni_xts_crypt8)
+ 	FRAME_BEGIN
+-	cmpb $0, %cl
++	testb %cl, %cl
+ 	movl $0, %ecx
+ 	movl $240, %r10d
+ 	leaq _aesni_enc4, %r11
+--- a/arch/x86/crypto/aesni-intel_avx-x86_64.S
++++ b/arch/x86/crypto/aesni-intel_avx-x86_64.S
+@@ -370,7 +370,7 @@ _initial_num_blocks_is_0\@:
+ 
+ 
+ _initial_blocks_encrypted\@:
+-        cmp     $0, %r13
++        test    %r13, %r13
+         je      _zero_cipher_left\@
+ 
+         sub     $128, %r13
+@@ -529,7 +529,7 @@ _multiple_of_16_bytes\@:
+         vmovdqu HashKey(arg2), %xmm13
+ 
+         mov PBlockLen(arg2), %r12
+-        cmp $0, %r12
++        test %r12, %r12
+         je _partial_done\@
+ 
+ 	#GHASH computation for the last <16 Byte block
+@@ -574,7 +574,7 @@ _T_8\@:
+         add     $8, %r10
+         sub     $8, %r11
+         vpsrldq $8, %xmm9, %xmm9
+-        cmp     $0, %r11
++        test    %r11, %r11
+         je     _return_T_done\@
+ _T_4\@:
+         vmovd   %xmm9, %eax
+@@ -582,7 +582,7 @@ _T_4\@:
+         add     $4, %r10
+         sub     $4, %r11
+         vpsrldq     $4, %xmm9, %xmm9
+-        cmp     $0, %r11
++        test    %r11, %r11
+         je     _return_T_done\@
+ _T_123\@:
+         vmovd     %xmm9, %eax
+@@ -626,7 +626,7 @@ _get_AAD_blocks\@:
+ 	cmp     $16, %r11
+ 	jge     _get_AAD_blocks\@
+ 	vmovdqu \T8, \T7
+-	cmp     $0, %r11
++	test    %r11, %r11
+ 	je      _get_AAD_done\@
+ 
+ 	vpxor   \T7, \T7, \T7
+@@ -645,7 +645,7 @@ _get_AAD_rest8\@:
+ 	vpxor   \T1, \T7, \T7
+ 	jmp     _get_AAD_rest8\@
+ _get_AAD_rest4\@:
+-	cmp     $0, %r11
++	test    %r11, %r11
+ 	jle      _get_AAD_rest0\@
+ 	mov     (%r10), %eax
+ 	movq    %rax, \T1
+@@ -750,7 +750,7 @@ _done_read_partial_block_\@:
+ .macro PARTIAL_BLOCK GHASH_MUL CYPH_PLAIN_OUT PLAIN_CYPH_IN PLAIN_CYPH_LEN DATA_OFFSET \
+         AAD_HASH ENC_DEC
+         mov 	PBlockLen(arg2), %r13
+-        cmp	$0, %r13
++        test	%r13, %r13
+         je	_partial_block_done_\@	# Leave Macro if no partial blocks
+         # Read in input data without over reading
+         cmp	$16, \PLAIN_CYPH_LEN
+@@ -802,7 +802,7 @@ _no_extra_mask_1_\@:
+         vpshufb	%xmm2, %xmm3, %xmm3
+         vpxor	%xmm3, \AAD_HASH, \AAD_HASH
+ 
+-        cmp	$0, %r10
++        test	%r10, %r10
+         jl	_partial_incomplete_1_\@
+ 
+         # GHASH computation for the last <16 Byte block
+@@ -837,7 +837,7 @@ _no_extra_mask_2_\@:
+         vpshufb %xmm2, %xmm9, %xmm9
+         vpxor	%xmm9, \AAD_HASH, \AAD_HASH
+ 
+-        cmp	$0, %r10
++        test	%r10, %r10
+         jl	_partial_incomplete_2_\@
+ 
+         # GHASH computation for the last <16 Byte block
+@@ -857,7 +857,7 @@ _encode_done_\@:
+         vpshufb	%xmm2, %xmm9, %xmm9
+ .endif
+         # output encrypted Bytes
+-        cmp	$0, %r10
++        test	%r10, %r10
+         jl	_partial_fill_\@
+         mov	%r13, %r12
+         mov	$16, %r13
diff --git a/queue-5.4/crypto-x86-aes-ni-xts-use-direct-calls-to-and-4-way-stride.patch b/queue-5.4/crypto-x86-aes-ni-xts-use-direct-calls-to-and-4-way-stride.patch
new file mode 100644
index 00000000000..81a659b3839
--- /dev/null
+++ b/queue-5.4/crypto-x86-aes-ni-xts-use-direct-calls-to-and-4-way-stride.patch
@@ -0,0 +1,274 @@
+From 86ad60a65f29dd862a11c22bb4b5be28d6c5cef1 Mon Sep 17 00:00:00 2001
+From: Ard Biesheuvel <ardb@kernel.org>
+Date: Thu, 31 Dec 2020 17:41:54 +0100
+Subject: crypto: x86/aes-ni-xts - use direct calls to and 4-way stride
+
+From: Ard Biesheuvel <ardb@kernel.org>
+
+commit 86ad60a65f29dd862a11c22bb4b5be28d6c5cef1 upstream.
+
+The XTS asm helper arrangement is a bit odd: the 8-way stride helper
+consists of back-to-back calls to the 4-way core transforms, which
+are called indirectly, based on a boolean that indicates whether we
+are performing encryption or decryption.
+
+Given how costly indirect calls are on x86, let's switch to direct
+calls, and given how the 8-way stride doesn't really add anything
+substantial, use a 4-way stride instead, and make the asm core
+routine deal with any multiple of 4 blocks. Since 512 byte sectors
+or 4 KB blocks are the typical quantities XTS operates on, increase
+the stride exported to the glue helper to 512 bytes as well.
+
+As a result, the number of indirect calls is reduced from 3 per 64 bytes
+of in/output to 1 per 512 bytes of in/output, which produces a 65% speedup
+when operating on 1 KB blocks (measured on a Intel(R) Core(TM) i7-8650U CPU)
+
+Fixes: 9697fa39efd3f ("x86/retpoline/crypto: Convert crypto assembler indirect jumps")
+Tested-by: Eric Biggers <ebiggers@google.com> # x86_64
+Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
+Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
+[ardb: rebase onto stable/linux-5.4.y]
+Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/crypto/aesni-intel_asm.S  |  115 ++++++++++++++++++++++---------------
+ arch/x86/crypto/aesni-intel_glue.c |   25 ++++----
+ 2 files changed, 84 insertions(+), 56 deletions(-)
+
+--- a/arch/x86/crypto/aesni-intel_asm.S
++++ b/arch/x86/crypto/aesni-intel_asm.S
+@@ -2726,25 +2726,18 @@ ENDPROC(aesni_ctr_enc)
+ 	pxor CTR, IV;
+ 
+ /*
+- * void aesni_xts_crypt8(const struct crypto_aes_ctx *ctx, u8 *dst,
+- *			 const u8 *src, bool enc, le128 *iv)
++ * void aesni_xts_encrypt(const struct crypto_aes_ctx *ctx, u8 *dst,
++ *			  const u8 *src, unsigned int len, le128 *iv)
+  */
+-ENTRY(aesni_xts_crypt8)
++ENTRY(aesni_xts_encrypt)
+ 	FRAME_BEGIN
+-	testb %cl, %cl
+-	movl $0, %ecx
+-	movl $240, %r10d
+-	leaq _aesni_enc4, %r11
+-	leaq _aesni_dec4, %rax
+-	cmovel %r10d, %ecx
+-	cmoveq %rax, %r11
+ 
+ 	movdqa .Lgf128mul_x_ble_mask, GF128MUL_MASK
+ 	movups (IVP), IV
+ 
+ 	mov 480(KEYP), KLEN
+-	addq %rcx, KEYP
+ 
++.Lxts_enc_loop4:
+ 	movdqa IV, STATE1
+ 	movdqu 0x00(INP), INC
+ 	pxor INC, STATE1
+@@ -2768,71 +2761,103 @@ ENTRY(aesni_xts_crypt8)
+ 	pxor INC, STATE4
+ 	movdqu IV, 0x30(OUTP)
+ 
+-	CALL_NOSPEC %r11
++	call _aesni_enc4
+ 
+ 	movdqu 0x00(OUTP), INC
+ 	pxor INC, STATE1
+ 	movdqu STATE1, 0x00(OUTP)
+ 
+-	_aesni_gf128mul_x_ble()
+-	movdqa IV, STATE1
+-	movdqu 0x40(INP), INC
+-	pxor INC, STATE1
+-	movdqu IV, 0x40(OUTP)
+-
+ 	movdqu 0x10(OUTP), INC
+ 	pxor INC, STATE2
+ 	movdqu STATE2, 0x10(OUTP)
+ 
+-	_aesni_gf128mul_x_ble()
+-	movdqa IV, STATE2
+-	movdqu 0x50(INP), INC
+-	pxor INC, STATE2
+-	movdqu IV, 0x50(OUTP)
+-
+ 	movdqu 0x20(OUTP), INC
+ 	pxor INC, STATE3
+ 	movdqu STATE3, 0x20(OUTP)
+ 
+-	_aesni_gf128mul_x_ble()
+-	movdqa IV, STATE3
+-	movdqu 0x60(INP), INC
+-	pxor INC, STATE3
+-	movdqu IV, 0x60(OUTP)
+-
+ 	movdqu 0x30(OUTP), INC
+ 	pxor INC, STATE4
+ 	movdqu STATE4, 0x30(OUTP)
+ 
+ 	_aesni_gf128mul_x_ble()
+-	movdqa IV, STATE4
+-	movdqu 0x70(INP), INC
+-	pxor INC, STATE4
+-	movdqu IV, 0x70(OUTP)
+ 
+-	_aesni_gf128mul_x_ble()
++	add $64, INP
++	add $64, OUTP
++	sub $64, LEN
++	ja .Lxts_enc_loop4
++
+ 	movups IV, (IVP)
+ 
+-	CALL_NOSPEC %r11
++	FRAME_END
++	ret
++ENDPROC(aesni_xts_encrypt)
++
++/*
++ * void aesni_xts_decrypt(const struct crypto_aes_ctx *ctx, u8 *dst,
++ *			  const u8 *src, unsigned int len, le128 *iv)
++ */
++ENTRY(aesni_xts_decrypt)
++	FRAME_BEGIN
++
++	movdqa .Lgf128mul_x_ble_mask, GF128MUL_MASK
++	movups (IVP), IV
++
++	mov 480(KEYP), KLEN
++	add $240, KEYP
++
++.Lxts_dec_loop4:
++	movdqa IV, STATE1
++	movdqu 0x00(INP), INC
++	pxor INC, STATE1
++	movdqu IV, 0x00(OUTP)
++
++	_aesni_gf128mul_x_ble()
++	movdqa IV, STATE2
++	movdqu 0x10(INP), INC
++	pxor INC, STATE2
++	movdqu IV, 0x10(OUTP)
++
++	_aesni_gf128mul_x_ble()
++	movdqa IV, STATE3
++	movdqu 0x20(INP), INC
++	pxor INC, STATE3
++	movdqu IV, 0x20(OUTP)
++
++	_aesni_gf128mul_x_ble()
++	movdqa IV, STATE4
++	movdqu 0x30(INP), INC
++	pxor INC, STATE4
++	movdqu IV, 0x30(OUTP)
++
++	call _aesni_dec4
+ 
+-	movdqu 0x40(OUTP), INC
++	movdqu 0x00(OUTP), INC
+ 	pxor INC, STATE1
+-	movdqu STATE1, 0x40(OUTP)
++	movdqu STATE1, 0x00(OUTP)
+ 
+-	movdqu 0x50(OUTP), INC
++	movdqu 0x10(OUTP), INC
+ 	pxor INC, STATE2
+-	movdqu STATE2, 0x50(OUTP)
++	movdqu STATE2, 0x10(OUTP)
+ 
+-	movdqu 0x60(OUTP), INC
++	movdqu 0x20(OUTP), INC
+ 	pxor INC, STATE3
+-	movdqu STATE3, 0x60(OUTP)
++	movdqu STATE3, 0x20(OUTP)
+ 
+-	movdqu 0x70(OUTP), INC
++	movdqu 0x30(OUTP), INC
+ 	pxor INC, STATE4
+-	movdqu STATE4, 0x70(OUTP)
++	movdqu STATE4, 0x30(OUTP)
++
++	_aesni_gf128mul_x_ble()
++
++	add $64, INP
++	add $64, OUTP
++	sub $64, LEN
++	ja .Lxts_dec_loop4
++
++	movups IV, (IVP)
+ 
+ 	FRAME_END
+ 	ret
+-ENDPROC(aesni_xts_crypt8)
++ENDPROC(aesni_xts_decrypt)
+ 
+ #endif
+--- a/arch/x86/crypto/aesni-intel_glue.c
++++ b/arch/x86/crypto/aesni-intel_glue.c
+@@ -97,6 +97,12 @@ asmlinkage void aesni_cbc_dec(struct cry
+ #define AVX_GEN2_OPTSIZE 640
+ #define AVX_GEN4_OPTSIZE 4096
+ 
++asmlinkage void aesni_xts_encrypt(const struct crypto_aes_ctx *ctx, u8 *out,
++				  const u8 *in, unsigned int len, u8 *iv);
++
++asmlinkage void aesni_xts_decrypt(const struct crypto_aes_ctx *ctx, u8 *out,
++				  const u8 *in, unsigned int len, u8 *iv);
++
+ #ifdef CONFIG_X86_64
+ 
+ static void (*aesni_ctr_enc_tfm)(struct crypto_aes_ctx *ctx, u8 *out,
+@@ -104,9 +110,6 @@ static void (*aesni_ctr_enc_tfm)(struct
+ asmlinkage void aesni_ctr_enc(struct crypto_aes_ctx *ctx, u8 *out,
+ 			      const u8 *in, unsigned int len, u8 *iv);
+ 
+-asmlinkage void aesni_xts_crypt8(const struct crypto_aes_ctx *ctx, u8 *out,
+-				 const u8 *in, bool enc, le128 *iv);
+-
+ /* asmlinkage void aesni_gcm_enc()
+  * void *ctx,  AES Key schedule. Starts on a 16 byte boundary.
+  * struct gcm_context_data.  May be uninitialized.
+@@ -558,14 +561,14 @@ static void aesni_xts_dec(const void *ct
+ 	glue_xts_crypt_128bit_one(ctx, dst, src, iv, aesni_dec);
+ }
+ 
+-static void aesni_xts_enc8(const void *ctx, u8 *dst, const u8 *src, le128 *iv)
++static void aesni_xts_enc32(const void *ctx, u8 *dst, const u8 *src, le128 *iv)
+ {
+-	aesni_xts_crypt8(ctx, dst, src, true, iv);
++	aesni_xts_encrypt(ctx, dst, src, 32 * AES_BLOCK_SIZE, (u8 *)iv);
+ }
+ 
+-static void aesni_xts_dec8(const void *ctx, u8 *dst, const u8 *src, le128 *iv)
++static void aesni_xts_dec32(const void *ctx, u8 *dst, const u8 *src, le128 *iv)
+ {
+-	aesni_xts_crypt8(ctx, dst, src, false, iv);
++	aesni_xts_decrypt(ctx, dst, src, 32 * AES_BLOCK_SIZE, (u8 *)iv);
+ }
+ 
+ static const struct common_glue_ctx aesni_enc_xts = {
+@@ -573,8 +576,8 @@ static const struct common_glue_ctx aesn
+ 	.fpu_blocks_limit = 1,
+ 
+ 	.funcs = { {
+-		.num_blocks = 8,
+-		.fn_u = { .xts = aesni_xts_enc8 }
++		.num_blocks = 32,
++		.fn_u = { .xts = aesni_xts_enc32 }
+ 	}, {
+ 		.num_blocks = 1,
+ 		.fn_u = { .xts = aesni_xts_enc }
+@@ -586,8 +589,8 @@ static const struct common_glue_ctx aesn
+ 	.fpu_blocks_limit = 1,
+ 
+ 	.funcs = { {
+-		.num_blocks = 8,
+-		.fn_u = { .xts = aesni_xts_dec8 }
++		.num_blocks = 32,
++		.fn_u = { .xts = aesni_xts_dec32 }
+ 	}, {
+ 		.num_blocks = 1,
+ 		.fn_u = { .xts = aesni_xts_dec }
diff --git a/queue-5.4/crypto-x86-regularize-glue-function-prototypes.patch b/queue-5.4/crypto-x86-regularize-glue-function-prototypes.patch
new file mode 100644
index 00000000000..911381d24e3
--- /dev/null
+++ b/queue-5.4/crypto-x86-regularize-glue-function-prototypes.patch
@@ -0,0 +1,2056 @@
+From 9c1e8836edbbaf3656bc07437b59c04be034ac4e Mon Sep 17 00:00:00 2001
+From: Kees Cook <keescook@chromium.org>
+Date: Tue, 26 Nov 2019 22:08:02 -0800
+Subject: crypto: x86 - Regularize glue function prototypes
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Kees Cook <keescook@chromium.org>
+
+commit 9c1e8836edbbaf3656bc07437b59c04be034ac4e upstream.
+
+The crypto glue performed function prototype casting via macros to make
+indirect calls to assembly routines. Instead of performing casts at the
+call sites (which trips Control Flow Integrity prototype checking), switch
+each prototype to a common standard set of arguments which allows the
+removal of the existing macros. In order to keep pointer math unchanged,
+internal casting between u128 pointers and u8 pointers is added.
+
+Co-developed-by: JoÃ£o Moreira <joao.moreira@intel.com>
+Signed-off-by: JoÃ£o Moreira <joao.moreira@intel.com>
+Signed-off-by: Kees Cook <keescook@chromium.org>
+Reviewed-by: Eric Biggers <ebiggers@kernel.org>
+Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
+Cc: Ard Biesheuvel <ardb@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/crypto/aesni-intel_asm.S          |    8 +-
+ arch/x86/crypto/aesni-intel_glue.c         |   45 ++++++----------
+ arch/x86/crypto/camellia_aesni_avx2_glue.c |   78 +++++++++++++---------------
+ arch/x86/crypto/camellia_aesni_avx_glue.c  |   72 +++++++++++---------------
+ arch/x86/crypto/camellia_glue.c            |   45 ++++++++--------
+ arch/x86/crypto/cast6_avx_glue.c           |   68 +++++++++++-------------
+ arch/x86/crypto/glue_helper.c              |   23 +++++---
+ arch/x86/crypto/serpent_avx2_glue.c        |   65 +++++++++++------------
+ arch/x86/crypto/serpent_avx_glue.c         |   63 +++++++++++------------
+ arch/x86/crypto/serpent_sse2_glue.c        |   30 ++++++-----
+ arch/x86/crypto/twofish_avx_glue.c         |   79 ++++++++++++-----------------
+ arch/x86/crypto/twofish_glue_3way.c        |   37 +++++++------
+ arch/x86/include/asm/crypto/camellia.h     |   61 ++++++++++------------
+ arch/x86/include/asm/crypto/glue_helper.h  |   18 ++----
+ arch/x86/include/asm/crypto/serpent-avx.h  |   20 +++----
+ arch/x86/include/asm/crypto/serpent-sse2.h |   28 ++++------
+ arch/x86/include/asm/crypto/twofish.h      |   19 ++----
+ crypto/cast6_generic.c                     |   18 +++---
+ crypto/serpent_generic.c                   |    6 +-
+ include/crypto/cast6.h                     |    4 -
+ include/crypto/serpent.h                   |    4 -
+ include/crypto/xts.h                       |    2 
+ 22 files changed, 377 insertions(+), 416 deletions(-)
+
+--- a/arch/x86/crypto/aesni-intel_asm.S
++++ b/arch/x86/crypto/aesni-intel_asm.S
+@@ -1946,7 +1946,7 @@ ENTRY(aesni_set_key)
+ ENDPROC(aesni_set_key)
+ 
+ /*
+- * void aesni_enc(struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src)
++ * void aesni_enc(const void *ctx, u8 *dst, const u8 *src)
+  */
+ ENTRY(aesni_enc)
+ 	FRAME_BEGIN
+@@ -2137,7 +2137,7 @@ _aesni_enc4:
+ ENDPROC(_aesni_enc4)
+ 
+ /*
+- * void aesni_dec (struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src)
++ * void aesni_dec (const void *ctx, u8 *dst, const u8 *src)
+  */
+ ENTRY(aesni_dec)
+ 	FRAME_BEGIN
+@@ -2726,8 +2726,8 @@ ENDPROC(aesni_ctr_enc)
+ 	pxor CTR, IV;
+ 
+ /*
+- * void aesni_xts_crypt8(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
+- *			 bool enc, u8 *iv)
++ * void aesni_xts_crypt8(const struct crypto_aes_ctx *ctx, u8 *dst,
++ *			 const u8 *src, bool enc, le128 *iv)
+  */
+ ENTRY(aesni_xts_crypt8)
+ 	FRAME_BEGIN
+--- a/arch/x86/crypto/aesni-intel_glue.c
++++ b/arch/x86/crypto/aesni-intel_glue.c
+@@ -83,10 +83,8 @@ struct gcm_context_data {
+ 
+ asmlinkage int aesni_set_key(struct crypto_aes_ctx *ctx, const u8 *in_key,
+ 			     unsigned int key_len);
+-asmlinkage void aesni_enc(struct crypto_aes_ctx *ctx, u8 *out,
+-			  const u8 *in);
+-asmlinkage void aesni_dec(struct crypto_aes_ctx *ctx, u8 *out,
+-			  const u8 *in);
++asmlinkage void aesni_enc(const void *ctx, u8 *out, const u8 *in);
++asmlinkage void aesni_dec(const void *ctx, u8 *out, const u8 *in);
+ asmlinkage void aesni_ecb_enc(struct crypto_aes_ctx *ctx, u8 *out,
+ 			      const u8 *in, unsigned int len);
+ asmlinkage void aesni_ecb_dec(struct crypto_aes_ctx *ctx, u8 *out,
+@@ -106,8 +104,8 @@ static void (*aesni_ctr_enc_tfm)(struct
+ asmlinkage void aesni_ctr_enc(struct crypto_aes_ctx *ctx, u8 *out,
+ 			      const u8 *in, unsigned int len, u8 *iv);
+ 
+-asmlinkage void aesni_xts_crypt8(struct crypto_aes_ctx *ctx, u8 *out,
+-				 const u8 *in, bool enc, u8 *iv);
++asmlinkage void aesni_xts_crypt8(const struct crypto_aes_ctx *ctx, u8 *out,
++				 const u8 *in, bool enc, le128 *iv);
+ 
+ /* asmlinkage void aesni_gcm_enc()
+  * void *ctx,  AES Key schedule. Starts on a 16 byte boundary.
+@@ -550,29 +548,24 @@ static int xts_aesni_setkey(struct crypt
+ }
+ 
+ 
+-static void aesni_xts_tweak(void *ctx, u8 *out, const u8 *in)
++static void aesni_xts_enc(const void *ctx, u8 *dst, const u8 *src, le128 *iv)
+ {
+-	aesni_enc(ctx, out, in);
++	glue_xts_crypt_128bit_one(ctx, dst, src, iv, aesni_enc);
+ }
+ 
+-static void aesni_xts_enc(void *ctx, u128 *dst, const u128 *src, le128 *iv)
++static void aesni_xts_dec(const void *ctx, u8 *dst, const u8 *src, le128 *iv)
+ {
+-	glue_xts_crypt_128bit_one(ctx, dst, src, iv, GLUE_FUNC_CAST(aesni_enc));
++	glue_xts_crypt_128bit_one(ctx, dst, src, iv, aesni_dec);
+ }
+ 
+-static void aesni_xts_dec(void *ctx, u128 *dst, const u128 *src, le128 *iv)
++static void aesni_xts_enc8(const void *ctx, u8 *dst, const u8 *src, le128 *iv)
+ {
+-	glue_xts_crypt_128bit_one(ctx, dst, src, iv, GLUE_FUNC_CAST(aesni_dec));
++	aesni_xts_crypt8(ctx, dst, src, true, iv);
+ }
+ 
+-static void aesni_xts_enc8(void *ctx, u128 *dst, const u128 *src, le128 *iv)
++static void aesni_xts_dec8(const void *ctx, u8 *dst, const u8 *src, le128 *iv)
+ {
+-	aesni_xts_crypt8(ctx, (u8 *)dst, (const u8 *)src, true, (u8 *)iv);
+-}
+-
+-static void aesni_xts_dec8(void *ctx, u128 *dst, const u128 *src, le128 *iv)
+-{
+-	aesni_xts_crypt8(ctx, (u8 *)dst, (const u8 *)src, false, (u8 *)iv);
++	aesni_xts_crypt8(ctx, dst, src, false, iv);
+ }
+ 
+ static const struct common_glue_ctx aesni_enc_xts = {
+@@ -581,10 +574,10 @@ static const struct common_glue_ctx aesn
+ 
+ 	.funcs = { {
+ 		.num_blocks = 8,
+-		.fn_u = { .xts = GLUE_XTS_FUNC_CAST(aesni_xts_enc8) }
++		.fn_u = { .xts = aesni_xts_enc8 }
+ 	}, {
+ 		.num_blocks = 1,
+-		.fn_u = { .xts = GLUE_XTS_FUNC_CAST(aesni_xts_enc) }
++		.fn_u = { .xts = aesni_xts_enc }
+ 	} }
+ };
+ 
+@@ -594,10 +587,10 @@ static const struct common_glue_ctx aesn
+ 
+ 	.funcs = { {
+ 		.num_blocks = 8,
+-		.fn_u = { .xts = GLUE_XTS_FUNC_CAST(aesni_xts_dec8) }
++		.fn_u = { .xts = aesni_xts_dec8 }
+ 	}, {
+ 		.num_blocks = 1,
+-		.fn_u = { .xts = GLUE_XTS_FUNC_CAST(aesni_xts_dec) }
++		.fn_u = { .xts = aesni_xts_dec }
+ 	} }
+ };
+ 
+@@ -606,8 +599,7 @@ static int xts_encrypt(struct skcipher_r
+ 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ 	struct aesni_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
+ 
+-	return glue_xts_req_128bit(&aesni_enc_xts, req,
+-				   XTS_TWEAK_CAST(aesni_xts_tweak),
++	return glue_xts_req_128bit(&aesni_enc_xts, req, aesni_enc,
+ 				   aes_ctx(ctx->raw_tweak_ctx),
+ 				   aes_ctx(ctx->raw_crypt_ctx),
+ 				   false);
+@@ -618,8 +610,7 @@ static int xts_decrypt(struct skcipher_r
+ 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ 	struct aesni_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
+ 
+-	return glue_xts_req_128bit(&aesni_dec_xts, req,
+-				   XTS_TWEAK_CAST(aesni_xts_tweak),
++	return glue_xts_req_128bit(&aesni_dec_xts, req, aesni_enc,
+ 				   aes_ctx(ctx->raw_tweak_ctx),
+ 				   aes_ctx(ctx->raw_crypt_ctx),
+ 				   true);
+--- a/arch/x86/crypto/camellia_aesni_avx2_glue.c
++++ b/arch/x86/crypto/camellia_aesni_avx2_glue.c
+@@ -19,20 +19,17 @@
+ #define CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS 32
+ 
+ /* 32-way AVX2/AES-NI parallel cipher functions */
+-asmlinkage void camellia_ecb_enc_32way(struct camellia_ctx *ctx, u8 *dst,
+-				       const u8 *src);
+-asmlinkage void camellia_ecb_dec_32way(struct camellia_ctx *ctx, u8 *dst,
+-				       const u8 *src);
+-
+-asmlinkage void camellia_cbc_dec_32way(struct camellia_ctx *ctx, u8 *dst,
+-				       const u8 *src);
+-asmlinkage void camellia_ctr_32way(struct camellia_ctx *ctx, u8 *dst,
+-				   const u8 *src, le128 *iv);
+-
+-asmlinkage void camellia_xts_enc_32way(struct camellia_ctx *ctx, u8 *dst,
+-				       const u8 *src, le128 *iv);
+-asmlinkage void camellia_xts_dec_32way(struct camellia_ctx *ctx, u8 *dst,
+-				       const u8 *src, le128 *iv);
++asmlinkage void camellia_ecb_enc_32way(const void *ctx, u8 *dst, const u8 *src);
++asmlinkage void camellia_ecb_dec_32way(const void *ctx, u8 *dst, const u8 *src);
++
++asmlinkage void camellia_cbc_dec_32way(const void *ctx, u8 *dst, const u8 *src);
++asmlinkage void camellia_ctr_32way(const void *ctx, u8 *dst, const u8 *src,
++				   le128 *iv);
++
++asmlinkage void camellia_xts_enc_32way(const void *ctx, u8 *dst, const u8 *src,
++				       le128 *iv);
++asmlinkage void camellia_xts_dec_32way(const void *ctx, u8 *dst, const u8 *src,
++				       le128 *iv);
+ 
+ static const struct common_glue_ctx camellia_enc = {
+ 	.num_funcs = 4,
+@@ -40,16 +37,16 @@ static const struct common_glue_ctx came
+ 
+ 	.funcs = { {
+ 		.num_blocks = CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS,
+-		.fn_u = { .ecb = GLUE_FUNC_CAST(camellia_ecb_enc_32way) }
++		.fn_u = { .ecb = camellia_ecb_enc_32way }
+ 	}, {
+ 		.num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS,
+-		.fn_u = { .ecb = GLUE_FUNC_CAST(camellia_ecb_enc_16way) }
++		.fn_u = { .ecb = camellia_ecb_enc_16way }
+ 	}, {
+ 		.num_blocks = 2,
+-		.fn_u = { .ecb = GLUE_FUNC_CAST(camellia_enc_blk_2way) }
++		.fn_u = { .ecb = camellia_enc_blk_2way }
+ 	}, {
+ 		.num_blocks = 1,
+-		.fn_u = { .ecb = GLUE_FUNC_CAST(camellia_enc_blk) }
++		.fn_u = { .ecb = camellia_enc_blk }
+ 	} }
+ };
+ 
+@@ -59,16 +56,16 @@ static const struct common_glue_ctx came
+ 
+ 	.funcs = { {
+ 		.num_blocks = CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS,
+-		.fn_u = { .ctr = GLUE_CTR_FUNC_CAST(camellia_ctr_32way) }
++		.fn_u = { .ctr = camellia_ctr_32way }
+ 	}, {
+ 		.num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS,
+-		.fn_u = { .ctr = GLUE_CTR_FUNC_CAST(camellia_ctr_16way) }
++		.fn_u = { .ctr = camellia_ctr_16way }
+ 	}, {
+ 		.num_blocks = 2,
+-		.fn_u = { .ctr = GLUE_CTR_FUNC_CAST(camellia_crypt_ctr_2way) }
++		.fn_u = { .ctr = camellia_crypt_ctr_2way }
+ 	}, {
+ 		.num_blocks = 1,
+-		.fn_u = { .ctr = GLUE_CTR_FUNC_CAST(camellia_crypt_ctr) }
++		.fn_u = { .ctr = camellia_crypt_ctr }
+ 	} }
+ };
+ 
+@@ -78,13 +75,13 @@ static const struct common_glue_ctx came
+ 
+ 	.funcs = { {
+ 		.num_blocks = CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS,
+-		.fn_u = { .xts = GLUE_XTS_FUNC_CAST(camellia_xts_enc_32way) }
++		.fn_u = { .xts = camellia_xts_enc_32way }
+ 	}, {
+ 		.num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS,
+-		.fn_u = { .xts = GLUE_XTS_FUNC_CAST(camellia_xts_enc_16way) }
++		.fn_u = { .xts = camellia_xts_enc_16way }
+ 	}, {
+ 		.num_blocks = 1,
+-		.fn_u = { .xts = GLUE_XTS_FUNC_CAST(camellia_xts_enc) }
++		.fn_u = { .xts = camellia_xts_enc }
+ 	} }
+ };
+ 
+@@ -94,16 +91,16 @@ static const struct common_glue_ctx came
+ 
+ 	.funcs = { {
+ 		.num_blocks = CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS,
+-		.fn_u = { .ecb = GLUE_FUNC_CAST(camellia_ecb_dec_32way) }
++		.fn_u = { .ecb = camellia_ecb_dec_32way }
+ 	}, {
+ 		.num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS,
+-		.fn_u = { .ecb = GLUE_FUNC_CAST(camellia_ecb_dec_16way) }
++		.fn_u = { .ecb = camellia_ecb_dec_16way }
+ 	}, {
+ 		.num_blocks = 2,
+-		.fn_u = { .ecb = GLUE_FUNC_CAST(camellia_dec_blk_2way) }
++		.fn_u = { .ecb = camellia_dec_blk_2way }
+ 	}, {
+ 		.num_blocks = 1,
+-		.fn_u = { .ecb = GLUE_FUNC_CAST(camellia_dec_blk) }
++		.fn_u = { .ecb = camellia_dec_blk }
+ 	} }
+ };
+ 
+@@ -113,16 +110,16 @@ static const struct common_glue_ctx came
+ 
+ 	.funcs = { {
+ 		.num_blocks = CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS,
+-		.fn_u = { .cbc = GLUE_CBC_FUNC_CAST(camellia_cbc_dec_32way) }
++		.fn_u = { .cbc = camellia_cbc_dec_32way }
+ 	}, {
+ 		.num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS,
+-		.fn_u = { .cbc = GLUE_CBC_FUNC_CAST(camellia_cbc_dec_16way) }
++		.fn_u = { .cbc = camellia_cbc_dec_16way }
+ 	}, {
+ 		.num_blocks = 2,
+-		.fn_u = { .cbc = GLUE_CBC_FUNC_CAST(camellia_decrypt_cbc_2way) }
++		.fn_u = { .cbc = camellia_decrypt_cbc_2way }
+ 	}, {
+ 		.num_blocks = 1,
+-		.fn_u = { .cbc = GLUE_CBC_FUNC_CAST(camellia_dec_blk) }
++		.fn_u = { .cbc = camellia_dec_blk }
+ 	} }
+ };
+ 
+@@ -132,13 +129,13 @@ static const struct common_glue_ctx came
+ 
+ 	.funcs = { {
+ 		.num_blocks = CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS,
+-		.fn_u = { .xts = GLUE_XTS_FUNC_CAST(camellia_xts_dec_32way) }
++		.fn_u = { .xts = camellia_xts_dec_32way }
+ 	}, {
+ 		.num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS,
+-		.fn_u = { .xts = GLUE_XTS_FUNC_CAST(camellia_xts_dec_16way) }
++		.fn_u = { .xts = camellia_xts_dec_16way }
+ 	}, {
+ 		.num_blocks = 1,
+-		.fn_u = { .xts = GLUE_XTS_FUNC_CAST(camellia_xts_dec) }
++		.fn_u = { .xts = camellia_xts_dec }
+ 	} }
+ };
+ 
+@@ -161,8 +158,7 @@ static int ecb_decrypt(struct skcipher_r
+ 
+ static int cbc_encrypt(struct skcipher_request *req)
+ {
+-	return glue_cbc_encrypt_req_128bit(GLUE_FUNC_CAST(camellia_enc_blk),
+-					   req);
++	return glue_cbc_encrypt_req_128bit(camellia_enc_blk, req);
+ }
+ 
+ static int cbc_decrypt(struct skcipher_request *req)
+@@ -180,8 +176,7 @@ static int xts_encrypt(struct skcipher_r
+ 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ 	struct camellia_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
+ 
+-	return glue_xts_req_128bit(&camellia_enc_xts, req,
+-				   XTS_TWEAK_CAST(camellia_enc_blk),
++	return glue_xts_req_128bit(&camellia_enc_xts, req, camellia_enc_blk,
+ 				   &ctx->tweak_ctx, &ctx->crypt_ctx, false);
+ }
+ 
+@@ -190,8 +185,7 @@ static int xts_decrypt(struct skcipher_r
+ 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ 	struct camellia_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
+ 
+-	return glue_xts_req_128bit(&camellia_dec_xts, req,
+-				   XTS_TWEAK_CAST(camellia_enc_blk),
++	return glue_xts_req_128bit(&camellia_dec_xts, req, camellia_enc_blk,
+ 				   &ctx->tweak_ctx, &ctx->crypt_ctx, true);
+ }
+ 
+--- a/arch/x86/crypto/camellia_aesni_avx_glue.c
++++ b/arch/x86/crypto/camellia_aesni_avx_glue.c
+@@ -18,41 +18,36 @@
+ #define CAMELLIA_AESNI_PARALLEL_BLOCKS 16
+ 
+ /* 16-way parallel cipher functions (avx/aes-ni) */
+-asmlinkage void camellia_ecb_enc_16way(struct camellia_ctx *ctx, u8 *dst,
+-				       const u8 *src);
++asmlinkage void camellia_ecb_enc_16way(const void *ctx, u8 *dst, const u8 *src);
+ EXPORT_SYMBOL_GPL(camellia_ecb_enc_16way);
+ 
+-asmlinkage void camellia_ecb_dec_16way(struct camellia_ctx *ctx, u8 *dst,
+-				       const u8 *src);
++asmlinkage void camellia_ecb_dec_16way(const void *ctx, u8 *dst, const u8 *src);
+ EXPORT_SYMBOL_GPL(camellia_ecb_dec_16way);
+ 
+-asmlinkage void camellia_cbc_dec_16way(struct camellia_ctx *ctx, u8 *dst,
+-				       const u8 *src);
++asmlinkage void camellia_cbc_dec_16way(const void *ctx, u8 *dst, const u8 *src);
+ EXPORT_SYMBOL_GPL(camellia_cbc_dec_16way);
+ 
+-asmlinkage void camellia_ctr_16way(struct camellia_ctx *ctx, u8 *dst,
+-				   const u8 *src, le128 *iv);
++asmlinkage void camellia_ctr_16way(const void *ctx, u8 *dst, const u8 *src,
++				   le128 *iv);
+ EXPORT_SYMBOL_GPL(camellia_ctr_16way);
+ 
+-asmlinkage void camellia_xts_enc_16way(struct camellia_ctx *ctx, u8 *dst,
+-				       const u8 *src, le128 *iv);
++asmlinkage void camellia_xts_enc_16way(const void *ctx, u8 *dst, const u8 *src,
++				       le128 *iv);
+ EXPORT_SYMBOL_GPL(camellia_xts_enc_16way);
+ 
+-asmlinkage void camellia_xts_dec_16way(struct camellia_ctx *ctx, u8 *dst,
+-				       const u8 *src, le128 *iv);
++asmlinkage void camellia_xts_dec_16way(const void *ctx, u8 *dst, const u8 *src,
++				       le128 *iv);
+ EXPORT_SYMBOL_GPL(camellia_xts_dec_16way);
+ 
+-void camellia_xts_enc(void *ctx, u128 *dst, const u128 *src, le128 *iv)
++void camellia_xts_enc(const void *ctx, u8 *dst, const u8 *src, le128 *iv)
+ {
+-	glue_xts_crypt_128bit_one(ctx, dst, src, iv,
+-				  GLUE_FUNC_CAST(camellia_enc_blk));
++	glue_xts_crypt_128bit_one(ctx, dst, src, iv, camellia_enc_blk);
+ }
+ EXPORT_SYMBOL_GPL(camellia_xts_enc);
+ 
+-void camellia_xts_dec(void *ctx, u128 *dst, const u128 *src, le128 *iv)
++void camellia_xts_dec(const void *ctx, u8 *dst, const u8 *src, le128 *iv)
+ {
+-	glue_xts_crypt_128bit_one(ctx, dst, src, iv,
+-				  GLUE_FUNC_CAST(camellia_dec_blk));
++	glue_xts_crypt_128bit_one(ctx, dst, src, iv, camellia_dec_blk);
+ }
+ EXPORT_SYMBOL_GPL(camellia_xts_dec);
+ 
+@@ -62,13 +57,13 @@ static const struct common_glue_ctx came
+ 
+ 	.funcs = { {
+ 		.num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS,
+-		.fn_u = { .ecb = GLUE_FUNC_CAST(camellia_ecb_enc_16way) }
++		.fn_u = { .ecb = camellia_ecb_enc_16way }
+ 	}, {
+ 		.num_blocks = 2,
+-		.fn_u = { .ecb = GLUE_FUNC_CAST(camellia_enc_blk_2way) }
++		.fn_u = { .ecb = camellia_enc_blk_2way }
+ 	}, {
+ 		.num_blocks = 1,
+-		.fn_u = { .ecb = GLUE_FUNC_CAST(camellia_enc_blk) }
++		.fn_u = { .ecb = camellia_enc_blk }
+ 	} }
+ };
+ 
+@@ -78,13 +73,13 @@ static const struct common_glue_ctx came
+ 
+ 	.funcs = { {
+ 		.num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS,
+-		.fn_u = { .ctr = GLUE_CTR_FUNC_CAST(camellia_ctr_16way) }
++		.fn_u = { .ctr = camellia_ctr_16way }
+ 	}, {
+ 		.num_blocks = 2,
+-		.fn_u = { .ctr = GLUE_CTR_FUNC_CAST(camellia_crypt_ctr_2way) }
++		.fn_u = { .ctr = camellia_crypt_ctr_2way }
+ 	}, {
+ 		.num_blocks = 1,
+-		.fn_u = { .ctr = GLUE_CTR_FUNC_CAST(camellia_crypt_ctr) }
++		.fn_u = { .ctr = camellia_crypt_ctr }
+ 	} }
+ };
+ 
+@@ -94,10 +89,10 @@ static const struct common_glue_ctx came
+ 
+ 	.funcs = { {
+ 		.num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS,
+-		.fn_u = { .xts = GLUE_XTS_FUNC_CAST(camellia_xts_enc_16way) }
++		.fn_u = { .xts = camellia_xts_enc_16way }
+ 	}, {
+ 		.num_blocks = 1,
+-		.fn_u = { .xts = GLUE_XTS_FUNC_CAST(camellia_xts_enc) }
++		.fn_u = { .xts = camellia_xts_enc }
+ 	} }
+ };
+ 
+@@ -107,13 +102,13 @@ static const struct common_glue_ctx came
+ 
+ 	.funcs = { {
+ 		.num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS,
+-		.fn_u = { .ecb = GLUE_FUNC_CAST(camellia_ecb_dec_16way) }
++		.fn_u = { .ecb = camellia_ecb_dec_16way }
+ 	}, {
+ 		.num_blocks = 2,
+-		.fn_u = { .ecb = GLUE_FUNC_CAST(camellia_dec_blk_2way) }
++		.fn_u = { .ecb = camellia_dec_blk_2way }
+ 	}, {
+ 		.num_blocks = 1,
+-		.fn_u = { .ecb = GLUE_FUNC_CAST(camellia_dec_blk) }
++		.fn_u = { .ecb = camellia_dec_blk }
+ 	} }
+ };
+ 
+@@ -123,13 +118,13 @@ static const struct common_glue_ctx came
+ 
+ 	.funcs = { {
+ 		.num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS,
+-		.fn_u = { .cbc = GLUE_CBC_FUNC_CAST(camellia_cbc_dec_16way) }
++		.fn_u = { .cbc = camellia_cbc_dec_16way }
+ 	}, {
+ 		.num_blocks = 2,
+-		.fn_u = { .cbc = GLUE_CBC_FUNC_CAST(camellia_decrypt_cbc_2way) }
++		.fn_u = { .cbc = camellia_decrypt_cbc_2way }
+ 	}, {
+ 		.num_blocks = 1,
+-		.fn_u = { .cbc = GLUE_CBC_FUNC_CAST(camellia_dec_blk) }
++		.fn_u = { .cbc = camellia_dec_blk }
+ 	} }
+ };
+ 
+@@ -139,10 +134,10 @@ static const struct common_glue_ctx came
+ 
+ 	.funcs = { {
+ 		.num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS,
+-		.fn_u = { .xts = GLUE_XTS_FUNC_CAST(camellia_xts_dec_16way) }
++		.fn_u = { .xts = camellia_xts_dec_16way }
+ 	}, {
+ 		.num_blocks = 1,
+-		.fn_u = { .xts = GLUE_XTS_FUNC_CAST(camellia_xts_dec) }
++		.fn_u = { .xts = camellia_xts_dec }
+ 	} }
+ };
+ 
+@@ -165,8 +160,7 @@ static int ecb_decrypt(struct skcipher_r
+ 
+ static int cbc_encrypt(struct skcipher_request *req)
+ {
+-	return glue_cbc_encrypt_req_128bit(GLUE_FUNC_CAST(camellia_enc_blk),
+-					   req);
++	return glue_cbc_encrypt_req_128bit(camellia_enc_blk, req);
+ }
+ 
+ static int cbc_decrypt(struct skcipher_request *req)
+@@ -206,8 +200,7 @@ static int xts_encrypt(struct skcipher_r
+ 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ 	struct camellia_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
+ 
+-	return glue_xts_req_128bit(&camellia_enc_xts, req,
+-				   XTS_TWEAK_CAST(camellia_enc_blk),
++	return glue_xts_req_128bit(&camellia_enc_xts, req, camellia_enc_blk,
+ 				   &ctx->tweak_ctx, &ctx->crypt_ctx, false);
+ }
+ 
+@@ -216,8 +209,7 @@ static int xts_decrypt(struct skcipher_r
+ 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ 	struct camellia_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
+ 
+-	return glue_xts_req_128bit(&camellia_dec_xts, req,
+-				   XTS_TWEAK_CAST(camellia_enc_blk),
++	return glue_xts_req_128bit(&camellia_dec_xts, req, camellia_enc_blk,
+ 				   &ctx->tweak_ctx, &ctx->crypt_ctx, true);
+ }
+ 
+--- a/arch/x86/crypto/camellia_glue.c
++++ b/arch/x86/crypto/camellia_glue.c
+@@ -18,19 +18,17 @@
+ #include <asm/crypto/glue_helper.h>
+ 
+ /* regular block cipher functions */
+-asmlinkage void __camellia_enc_blk(struct camellia_ctx *ctx, u8 *dst,
+-				   const u8 *src, bool xor);
++asmlinkage void __camellia_enc_blk(const void *ctx, u8 *dst, const u8 *src,
++				   bool xor);
+ EXPORT_SYMBOL_GPL(__camellia_enc_blk);
+-asmlinkage void camellia_dec_blk(struct camellia_ctx *ctx, u8 *dst,
+-				 const u8 *src);
++asmlinkage void camellia_dec_blk(const void *ctx, u8 *dst, const u8 *src);
+ EXPORT_SYMBOL_GPL(camellia_dec_blk);
+ 
+ /* 2-way parallel cipher functions */
+-asmlinkage void __camellia_enc_blk_2way(struct camellia_ctx *ctx, u8 *dst,
+-					const u8 *src, bool xor);
++asmlinkage void __camellia_enc_blk_2way(const void *ctx, u8 *dst, const u8 *src,
++					bool xor);
+ EXPORT_SYMBOL_GPL(__camellia_enc_blk_2way);
+-asmlinkage void camellia_dec_blk_2way(struct camellia_ctx *ctx, u8 *dst,
+-				      const u8 *src);
++asmlinkage void camellia_dec_blk_2way(const void *ctx, u8 *dst, const u8 *src);
+ EXPORT_SYMBOL_GPL(camellia_dec_blk_2way);
+ 
+ static void camellia_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
+@@ -1267,8 +1265,10 @@ static int camellia_setkey_skcipher(stru
+ 	return camellia_setkey(&tfm->base, key, key_len);
+ }
+ 
+-void camellia_decrypt_cbc_2way(void *ctx, u128 *dst, const u128 *src)
++void camellia_decrypt_cbc_2way(const void *ctx, u8 *d, const u8 *s)
+ {
++	u128 *dst = (u128 *)d;
++	const u128 *src = (const u128 *)s;
+ 	u128 iv = *src;
+ 
+ 	camellia_dec_blk_2way(ctx, (u8 *)dst, (u8 *)src);
+@@ -1277,9 +1277,11 @@ void camellia_decrypt_cbc_2way(void *ctx
+ }
+ EXPORT_SYMBOL_GPL(camellia_decrypt_cbc_2way);
+ 
+-void camellia_crypt_ctr(void *ctx, u128 *dst, const u128 *src, le128 *iv)
++void camellia_crypt_ctr(const void *ctx, u8 *d, const u8 *s, le128 *iv)
+ {
+ 	be128 ctrblk;
++	u128 *dst = (u128 *)d;
++	const u128 *src = (const u128 *)s;
+ 
+ 	if (dst != src)
+ 		*dst = *src;
+@@ -1291,9 +1293,11 @@ void camellia_crypt_ctr(void *ctx, u128
+ }
+ EXPORT_SYMBOL_GPL(camellia_crypt_ctr);
+ 
+-void camellia_crypt_ctr_2way(void *ctx, u128 *dst, const u128 *src, le128 *iv)
++void camellia_crypt_ctr_2way(const void *ctx, u8 *d, const u8 *s, le128 *iv)
+ {
+ 	be128 ctrblks[2];
++	u128 *dst = (u128 *)d;
++	const u128 *src = (const u128 *)s;
+ 
+ 	if (dst != src) {
+ 		dst[0] = src[0];
+@@ -1315,10 +1319,10 @@ static const struct common_glue_ctx came
+ 
+ 	.funcs = { {
+ 		.num_blocks = 2,
+-		.fn_u = { .ecb = GLUE_FUNC_CAST(camellia_enc_blk_2way) }
++		.fn_u = { .ecb = camellia_enc_blk_2way }
+ 	}, {
+ 		.num_blocks = 1,
+-		.fn_u = { .ecb = GLUE_FUNC_CAST(camellia_enc_blk) }
++		.fn_u = { .ecb = camellia_enc_blk }
+ 	} }
+ };
+ 
+@@ -1328,10 +1332,10 @@ static const struct common_glue_ctx came
+ 
+ 	.funcs = { {
+ 		.num_blocks = 2,
+-		.fn_u = { .ctr = GLUE_CTR_FUNC_CAST(camellia_crypt_ctr_2way) }
++		.fn_u = { .ctr = camellia_crypt_ctr_2way }
+ 	}, {
+ 		.num_blocks = 1,
+-		.fn_u = { .ctr = GLUE_CTR_FUNC_CAST(camellia_crypt_ctr) }
++		.fn_u = { .ctr = camellia_crypt_ctr }
+ 	} }
+ };
+ 
+@@ -1341,10 +1345,10 @@ static const struct common_glue_ctx came
+ 
+ 	.funcs = { {
+ 		.num_blocks = 2,
+-		.fn_u = { .ecb = GLUE_FUNC_CAST(camellia_dec_blk_2way) }
++		.fn_u = { .ecb = camellia_dec_blk_2way }
+ 	}, {
+ 		.num_blocks = 1,
+-		.fn_u = { .ecb = GLUE_FUNC_CAST(camellia_dec_blk) }
++		.fn_u = { .ecb = camellia_dec_blk }
+ 	} }
+ };
+ 
+@@ -1354,10 +1358,10 @@ static const struct common_glue_ctx came
+ 
+ 	.funcs = { {
+ 		.num_blocks = 2,
+-		.fn_u = { .cbc = GLUE_CBC_FUNC_CAST(camellia_decrypt_cbc_2way) }
++		.fn_u = { .cbc = camellia_decrypt_cbc_2way }
+ 	}, {
+ 		.num_blocks = 1,
+-		.fn_u = { .cbc = GLUE_CBC_FUNC_CAST(camellia_dec_blk) }
++		.fn_u = { .cbc = camellia_dec_blk }
+ 	} }
+ };
+ 
+@@ -1373,8 +1377,7 @@ static int ecb_decrypt(struct skcipher_r
+ 
+ static int cbc_encrypt(struct skcipher_request *req)
+ {
+-	return glue_cbc_encrypt_req_128bit(GLUE_FUNC_CAST(camellia_enc_blk),
+-					   req);
++	return glue_cbc_encrypt_req_128bit(camellia_enc_blk, req);
+ }
+ 
+ static int cbc_decrypt(struct skcipher_request *req)
+--- a/arch/x86/crypto/cast6_avx_glue.c
++++ b/arch/x86/crypto/cast6_avx_glue.c
+@@ -20,20 +20,17 @@
+ 
+ #define CAST6_PARALLEL_BLOCKS 8
+ 
+-asmlinkage void cast6_ecb_enc_8way(struct cast6_ctx *ctx, u8 *dst,
+-				   const u8 *src);
+-asmlinkage void cast6_ecb_dec_8way(struct cast6_ctx *ctx, u8 *dst,
+-				   const u8 *src);
+-
+-asmlinkage void cast6_cbc_dec_8way(struct cast6_ctx *ctx, u8 *dst,
+-				   const u8 *src);
+-asmlinkage void cast6_ctr_8way(struct cast6_ctx *ctx, u8 *dst, const u8 *src,
++asmlinkage void cast6_ecb_enc_8way(const void *ctx, u8 *dst, const u8 *src);
++asmlinkage void cast6_ecb_dec_8way(const void *ctx, u8 *dst, const u8 *src);
++
++asmlinkage void cast6_cbc_dec_8way(const void *ctx, u8 *dst, const u8 *src);
++asmlinkage void cast6_ctr_8way(const void *ctx, u8 *dst, const u8 *src,
+ 			       le128 *iv);
+ 
+-asmlinkage void cast6_xts_enc_8way(struct cast6_ctx *ctx, u8 *dst,
+-				   const u8 *src, le128 *iv);
+-asmlinkage void cast6_xts_dec_8way(struct cast6_ctx *ctx, u8 *dst,
+-				   const u8 *src, le128 *iv);
++asmlinkage void cast6_xts_enc_8way(const void *ctx, u8 *dst, const u8 *src,
++				   le128 *iv);
++asmlinkage void cast6_xts_dec_8way(const void *ctx, u8 *dst, const u8 *src,
++				   le128 *iv);
+ 
+ static int cast6_setkey_skcipher(struct crypto_skcipher *tfm,
+ 				 const u8 *key, unsigned int keylen)
+@@ -41,21 +38,21 @@ static int cast6_setkey_skcipher(struct
+ 	return cast6_setkey(&tfm->base, key, keylen);
+ }
+ 
+-static void cast6_xts_enc(void *ctx, u128 *dst, const u128 *src, le128 *iv)
++static void cast6_xts_enc(const void *ctx, u8 *dst, const u8 *src, le128 *iv)
+ {
+-	glue_xts_crypt_128bit_one(ctx, dst, src, iv,
+-				  GLUE_FUNC_CAST(__cast6_encrypt));
++	glue_xts_crypt_128bit_one(ctx, dst, src, iv, __cast6_encrypt);
+ }
+ 
+-static void cast6_xts_dec(void *ctx, u128 *dst, const u128 *src, le128 *iv)
++static void cast6_xts_dec(const void *ctx, u8 *dst, const u8 *src, le128 *iv)
+ {
+-	glue_xts_crypt_128bit_one(ctx, dst, src, iv,
+-				  GLUE_FUNC_CAST(__cast6_decrypt));
++	glue_xts_crypt_128bit_one(ctx, dst, src, iv, __cast6_decrypt);
+ }
+ 
+-static void cast6_crypt_ctr(void *ctx, u128 *dst, const u128 *src, le128 *iv)
++static void cast6_crypt_ctr(const void *ctx, u8 *d, const u8 *s, le128 *iv)
+ {
+ 	be128 ctrblk;
++	u128 *dst = (u128 *)d;
++	const u128 *src = (const u128 *)s;
+ 
+ 	le128_to_be128(&ctrblk, iv);
+ 	le128_inc(iv);
+@@ -70,10 +67,10 @@ static const struct common_glue_ctx cast
+ 
+ 	.funcs = { {
+ 		.num_blocks = CAST6_PARALLEL_BLOCKS,
+-		.fn_u = { .ecb = GLUE_FUNC_CAST(cast6_ecb_enc_8way) }
++		.fn_u = { .ecb = cast6_ecb_enc_8way }
+ 	}, {
+ 		.num_blocks = 1,
+-		.fn_u = { .ecb = GLUE_FUNC_CAST(__cast6_encrypt) }
++		.fn_u = { .ecb = __cast6_encrypt }
+ 	} }
+ };
+ 
+@@ -83,10 +80,10 @@ static const struct common_glue_ctx cast
+ 
+ 	.funcs = { {
+ 		.num_blocks = CAST6_PARALLEL_BLOCKS,
+-		.fn_u = { .ctr = GLUE_CTR_FUNC_CAST(cast6_ctr_8way) }
++		.fn_u = { .ctr = cast6_ctr_8way }
+ 	}, {
+ 		.num_blocks = 1,
+-		.fn_u = { .ctr = GLUE_CTR_FUNC_CAST(cast6_crypt_ctr) }
++		.fn_u = { .ctr = cast6_crypt_ctr }
+ 	} }
+ };
+ 
+@@ -96,10 +93,10 @@ static const struct common_glue_ctx cast
+ 
+ 	.funcs = { {
+ 		.num_blocks = CAST6_PARALLEL_BLOCKS,
+-		.fn_u = { .xts = GLUE_XTS_FUNC_CAST(cast6_xts_enc_8way) }
++		.fn_u = { .xts = cast6_xts_enc_8way }
+ 	}, {
+ 		.num_blocks = 1,
+-		.fn_u = { .xts = GLUE_XTS_FUNC_CAST(cast6_xts_enc) }
++		.fn_u = { .xts = cast6_xts_enc }
+ 	} }
+ };
+ 
+@@ -109,10 +106,10 @@ static const struct common_glue_ctx cast
+ 
+ 	.funcs = { {
+ 		.num_blocks = CAST6_PARALLEL_BLOCKS,
+-		.fn_u = { .ecb = GLUE_FUNC_CAST(cast6_ecb_dec_8way) }
++		.fn_u = { .ecb = cast6_ecb_dec_8way }
+ 	}, {
+ 		.num_blocks = 1,
+-		.fn_u = { .ecb = GLUE_FUNC_CAST(__cast6_decrypt) }
++		.fn_u = { .ecb = __cast6_decrypt }
+ 	} }
+ };
+ 
+@@ -122,10 +119,10 @@ static const struct common_glue_ctx cast
+ 
+ 	.funcs = { {
+ 		.num_blocks = CAST6_PARALLEL_BLOCKS,
+-		.fn_u = { .cbc = GLUE_CBC_FUNC_CAST(cast6_cbc_dec_8way) }
++		.fn_u = { .cbc = cast6_cbc_dec_8way }
+ 	}, {
+ 		.num_blocks = 1,
+-		.fn_u = { .cbc = GLUE_CBC_FUNC_CAST(__cast6_decrypt) }
++		.fn_u = { .cbc = __cast6_decrypt }
+ 	} }
+ };
+ 
+@@ -135,10 +132,10 @@ static const struct common_glue_ctx cast
+ 
+ 	.funcs = { {
+ 		.num_blocks = CAST6_PARALLEL_BLOCKS,
+-		.fn_u = { .xts = GLUE_XTS_FUNC_CAST(cast6_xts_dec_8way) }
++		.fn_u = { .xts = cast6_xts_dec_8way }
+ 	}, {
+ 		.num_blocks = 1,
+-		.fn_u = { .xts = GLUE_XTS_FUNC_CAST(cast6_xts_dec) }
++		.fn_u = { .xts = cast6_xts_dec }
+ 	} }
+ };
+ 
+@@ -154,8 +151,7 @@ static int ecb_decrypt(struct skcipher_r
+ 
+ static int cbc_encrypt(struct skcipher_request *req)
+ {
+-	return glue_cbc_encrypt_req_128bit(GLUE_FUNC_CAST(__cast6_encrypt),
+-					   req);
++	return glue_cbc_encrypt_req_128bit(__cast6_encrypt, req);
+ }
+ 
+ static int cbc_decrypt(struct skcipher_request *req)
+@@ -199,8 +195,7 @@ static int xts_encrypt(struct skcipher_r
+ 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ 	struct cast6_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
+ 
+-	return glue_xts_req_128bit(&cast6_enc_xts, req,
+-				   XTS_TWEAK_CAST(__cast6_encrypt),
++	return glue_xts_req_128bit(&cast6_enc_xts, req, __cast6_encrypt,
+ 				   &ctx->tweak_ctx, &ctx->crypt_ctx, false);
+ }
+ 
+@@ -209,8 +204,7 @@ static int xts_decrypt(struct skcipher_r
+ 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ 	struct cast6_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
+ 
+-	return glue_xts_req_128bit(&cast6_dec_xts, req,
+-				   XTS_TWEAK_CAST(__cast6_encrypt),
++	return glue_xts_req_128bit(&cast6_dec_xts, req, __cast6_encrypt,
+ 				   &ctx->tweak_ctx, &ctx->crypt_ctx, true);
+ }
+ 
+--- a/arch/x86/crypto/glue_helper.c
++++ b/arch/x86/crypto/glue_helper.c
+@@ -134,7 +134,8 @@ int glue_cbc_decrypt_req_128bit(const st
+ 				src -= num_blocks - 1;
+ 				dst -= num_blocks - 1;
+ 
+-				gctx->funcs[i].fn_u.cbc(ctx, dst, src);
++				gctx->funcs[i].fn_u.cbc(ctx, (u8 *)dst,
++							(const u8 *)src);
+ 
+ 				nbytes -= func_bytes;
+ 				if (nbytes < bsize)
+@@ -188,7 +189,9 @@ int glue_ctr_req_128bit(const struct com
+ 
+ 			/* Process multi-block batch */
+ 			do {
+-				gctx->funcs[i].fn_u.ctr(ctx, dst, src, &ctrblk);
++				gctx->funcs[i].fn_u.ctr(ctx, (u8 *)dst,
++							(const u8 *)src,
++							&ctrblk);
+ 				src += num_blocks;
+ 				dst += num_blocks;
+ 				nbytes -= func_bytes;
+@@ -210,7 +213,8 @@ int glue_ctr_req_128bit(const struct com
+ 
+ 		be128_to_le128(&ctrblk, (be128 *)walk.iv);
+ 		memcpy(&tmp, walk.src.virt.addr, nbytes);
+-		gctx->funcs[gctx->num_funcs - 1].fn_u.ctr(ctx, &tmp, &tmp,
++		gctx->funcs[gctx->num_funcs - 1].fn_u.ctr(ctx, (u8 *)&tmp,
++							  (const u8 *)&tmp,
+ 							  &ctrblk);
+ 		memcpy(walk.dst.virt.addr, &tmp, nbytes);
+ 		le128_to_be128((be128 *)walk.iv, &ctrblk);
+@@ -240,7 +244,8 @@ static unsigned int __glue_xts_req_128bi
+ 
+ 		if (nbytes >= func_bytes) {
+ 			do {
+-				gctx->funcs[i].fn_u.xts(ctx, dst, src,
++				gctx->funcs[i].fn_u.xts(ctx, (u8 *)dst,
++							(const u8 *)src,
+ 							walk->iv);
+ 
+ 				src += num_blocks;
+@@ -354,8 +359,8 @@ out:
+ }
+ EXPORT_SYMBOL_GPL(glue_xts_req_128bit);
+ 
+-void glue_xts_crypt_128bit_one(void *ctx, u128 *dst, const u128 *src, le128 *iv,
+-			       common_glue_func_t fn)
++void glue_xts_crypt_128bit_one(const void *ctx, u8 *dst, const u8 *src,
++			       le128 *iv, common_glue_func_t fn)
+ {
+ 	le128 ivblk = *iv;
+ 
+@@ -363,13 +368,13 @@ void glue_xts_crypt_128bit_one(void *ctx
+ 	gf128mul_x_ble(iv, &ivblk);
+ 
+ 	/* CC <- T xor C */
+-	u128_xor(dst, src, (u128 *)&ivblk);
++	u128_xor((u128 *)dst, (const u128 *)src, (u128 *)&ivblk);
+ 
+ 	/* PP <- D(Key2,CC) */
+-	fn(ctx, (u8 *)dst, (u8 *)dst);
++	fn(ctx, dst, dst);
+ 
+ 	/* P <- T xor PP */
+-	u128_xor(dst, dst, (u128 *)&ivblk);
++	u128_xor((u128 *)dst, (u128 *)dst, (u128 *)&ivblk);
+ }
+ EXPORT_SYMBOL_GPL(glue_xts_crypt_128bit_one);
+ 
+--- a/arch/x86/crypto/serpent_avx2_glue.c
++++ b/arch/x86/crypto/serpent_avx2_glue.c
+@@ -19,18 +19,16 @@
+ #define SERPENT_AVX2_PARALLEL_BLOCKS 16
+ 
+ /* 16-way AVX2 parallel cipher functions */
+-asmlinkage void serpent_ecb_enc_16way(struct serpent_ctx *ctx, u8 *dst,
+-				      const u8 *src);
+-asmlinkage void serpent_ecb_dec_16way(struct serpent_ctx *ctx, u8 *dst,
+-				      const u8 *src);
+-asmlinkage void serpent_cbc_dec_16way(void *ctx, u128 *dst, const u128 *src);
++asmlinkage void serpent_ecb_enc_16way(const void *ctx, u8 *dst, const u8 *src);
++asmlinkage void serpent_ecb_dec_16way(const void *ctx, u8 *dst, const u8 *src);
++asmlinkage void serpent_cbc_dec_16way(const void *ctx, u8 *dst, const u8 *src);
+ 
+-asmlinkage void serpent_ctr_16way(void *ctx, u128 *dst, const u128 *src,
++asmlinkage void serpent_ctr_16way(const void *ctx, u8 *dst, const u8 *src,
+ 				  le128 *iv);
+-asmlinkage void serpent_xts_enc_16way(struct serpent_ctx *ctx, u8 *dst,
+-				      const u8 *src, le128 *iv);
+-asmlinkage void serpent_xts_dec_16way(struct serpent_ctx *ctx, u8 *dst,
+-				      const u8 *src, le128 *iv);
++asmlinkage void serpent_xts_enc_16way(const void *ctx, u8 *dst, const u8 *src,
++				      le128 *iv);
++asmlinkage void serpent_xts_dec_16way(const void *ctx, u8 *dst, const u8 *src,
++				      le128 *iv);
+ 
+ static int serpent_setkey_skcipher(struct crypto_skcipher *tfm,
+ 				   const u8 *key, unsigned int keylen)
+@@ -44,13 +42,13 @@ static const struct common_glue_ctx serp
+ 
+ 	.funcs = { {
+ 		.num_blocks = 16,
+-		.fn_u = { .ecb = GLUE_FUNC_CAST(serpent_ecb_enc_16way) }
++		.fn_u = { .ecb = serpent_ecb_enc_16way }
+ 	}, {
+ 		.num_blocks = 8,
+-		.fn_u = { .ecb = GLUE_FUNC_CAST(serpent_ecb_enc_8way_avx) }
++		.fn_u = { .ecb = serpent_ecb_enc_8way_avx }
+ 	}, {
+ 		.num_blocks = 1,
+-		.fn_u = { .ecb = GLUE_FUNC_CAST(__serpent_encrypt) }
++		.fn_u = { .ecb = __serpent_encrypt }
+ 	} }
+ };
+ 
+@@ -60,13 +58,13 @@ static const struct common_glue_ctx serp
+ 
+ 	.funcs = { {
+ 		.num_blocks = 16,
+-		.fn_u = { .ctr = GLUE_CTR_FUNC_CAST(serpent_ctr_16way) }
++		.fn_u = { .ctr = serpent_ctr_16way }
+ 	},  {
+ 		.num_blocks = 8,
+-		.fn_u = { .ctr = GLUE_CTR_FUNC_CAST(serpent_ctr_8way_avx) }
++		.fn_u = { .ctr = serpent_ctr_8way_avx }
+ 	}, {
+ 		.num_blocks = 1,
+-		.fn_u = { .ctr = GLUE_CTR_FUNC_CAST(__serpent_crypt_ctr) }
++		.fn_u = { .ctr = __serpent_crypt_ctr }
+ 	} }
+ };
+ 
+@@ -76,13 +74,13 @@ static const struct common_glue_ctx serp
+ 
+ 	.funcs = { {
+ 		.num_blocks = 16,
+-		.fn_u = { .xts = GLUE_XTS_FUNC_CAST(serpent_xts_enc_16way) }
++		.fn_u = { .xts = serpent_xts_enc_16way }
+ 	}, {
+ 		.num_blocks = 8,
+-		.fn_u = { .xts = GLUE_XTS_FUNC_CAST(serpent_xts_enc_8way_avx) }
++		.fn_u = { .xts = serpent_xts_enc_8way_avx }
+ 	}, {
+ 		.num_blocks = 1,
+-		.fn_u = { .xts = GLUE_XTS_FUNC_CAST(serpent_xts_enc) }
++		.fn_u = { .xts = serpent_xts_enc }
+ 	} }
+ };
+ 
+@@ -92,13 +90,13 @@ static const struct common_glue_ctx serp
+ 
+ 	.funcs = { {
+ 		.num_blocks = 16,
+-		.fn_u = { .ecb = GLUE_FUNC_CAST(serpent_ecb_dec_16way) }
++		.fn_u = { .ecb = serpent_ecb_dec_16way }
+ 	}, {
+ 		.num_blocks = 8,
+-		.fn_u = { .ecb = GLUE_FUNC_CAST(serpent_ecb_dec_8way_avx) }
++		.fn_u = { .ecb = serpent_ecb_dec_8way_avx }
+ 	}, {
+ 		.num_blocks = 1,
+-		.fn_u = { .ecb = GLUE_FUNC_CAST(__serpent_decrypt) }
++		.fn_u = { .ecb = __serpent_decrypt }
+ 	} }
+ };
+ 
+@@ -108,13 +106,13 @@ static const struct common_glue_ctx serp
+ 
+ 	.funcs = { {
+ 		.num_blocks = 16,
+-		.fn_u = { .cbc = GLUE_CBC_FUNC_CAST(serpent_cbc_dec_16way) }
++		.fn_u = { .cbc = serpent_cbc_dec_16way }
+ 	}, {
+ 		.num_blocks = 8,
+-		.fn_u = { .cbc = GLUE_CBC_FUNC_CAST(serpent_cbc_dec_8way_avx) }
++		.fn_u = { .cbc = serpent_cbc_dec_8way_avx }
+ 	}, {
+ 		.num_blocks = 1,
+-		.fn_u = { .cbc = GLUE_CBC_FUNC_CAST(__serpent_decrypt) }
++		.fn_u = { .cbc = __serpent_decrypt }
+ 	} }
+ };
+ 
+@@ -124,13 +122,13 @@ static const struct common_glue_ctx serp
+ 
+ 	.funcs = { {
+ 		.num_blocks = 16,
+-		.fn_u = { .xts = GLUE_XTS_FUNC_CAST(serpent_xts_dec_16way) }
++		.fn_u = { .xts = serpent_xts_dec_16way }
+ 	}, {
+ 		.num_blocks = 8,
+-		.fn_u = { .xts = GLUE_XTS_FUNC_CAST(serpent_xts_dec_8way_avx) }
++		.fn_u = { .xts = serpent_xts_dec_8way_avx }
+ 	}, {
+ 		.num_blocks = 1,
+-		.fn_u = { .xts = GLUE_XTS_FUNC_CAST(serpent_xts_dec) }
++		.fn_u = { .xts = serpent_xts_dec }
+ 	} }
+ };
+ 
+@@ -146,8 +144,7 @@ static int ecb_decrypt(struct skcipher_r
+ 
+ static int cbc_encrypt(struct skcipher_request *req)
+ {
+-	return glue_cbc_encrypt_req_128bit(GLUE_FUNC_CAST(__serpent_encrypt),
+-					   req);
++	return glue_cbc_encrypt_req_128bit(__serpent_encrypt, req);
+ }
+ 
+ static int cbc_decrypt(struct skcipher_request *req)
+@@ -166,8 +163,8 @@ static int xts_encrypt(struct skcipher_r
+ 	struct serpent_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
+ 
+ 	return glue_xts_req_128bit(&serpent_enc_xts, req,
+-				   XTS_TWEAK_CAST(__serpent_encrypt),
+-				   &ctx->tweak_ctx, &ctx->crypt_ctx, false);
++				   __serpent_encrypt, &ctx->tweak_ctx,
++				   &ctx->crypt_ctx, false);
+ }
+ 
+ static int xts_decrypt(struct skcipher_request *req)
+@@ -176,8 +173,8 @@ static int xts_decrypt(struct skcipher_r
+ 	struct serpent_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
+ 
+ 	return glue_xts_req_128bit(&serpent_dec_xts, req,
+-				   XTS_TWEAK_CAST(__serpent_encrypt),
+-				   &ctx->tweak_ctx, &ctx->crypt_ctx, true);
++				   __serpent_encrypt, &ctx->tweak_ctx,
++				   &ctx->crypt_ctx, true);
+ }
+ 
+ static struct skcipher_alg serpent_algs[] = {
+--- a/arch/x86/crypto/serpent_avx_glue.c
++++ b/arch/x86/crypto/serpent_avx_glue.c
+@@ -20,33 +20,35 @@
+ #include <asm/crypto/serpent-avx.h>
+ 
+ /* 8-way parallel cipher functions */
+-asmlinkage void serpent_ecb_enc_8way_avx(struct serpent_ctx *ctx, u8 *dst,
++asmlinkage void serpent_ecb_enc_8way_avx(const void *ctx, u8 *dst,
+ 					 const u8 *src);
+ EXPORT_SYMBOL_GPL(serpent_ecb_enc_8way_avx);
+ 
+-asmlinkage void serpent_ecb_dec_8way_avx(struct serpent_ctx *ctx, u8 *dst,
++asmlinkage void serpent_ecb_dec_8way_avx(const void *ctx, u8 *dst,
+ 					 const u8 *src);
+ EXPORT_SYMBOL_GPL(serpent_ecb_dec_8way_avx);
+ 
+-asmlinkage void serpent_cbc_dec_8way_avx(struct serpent_ctx *ctx, u8 *dst,
++asmlinkage void serpent_cbc_dec_8way_avx(const void *ctx, u8 *dst,
+ 					 const u8 *src);
+ EXPORT_SYMBOL_GPL(serpent_cbc_dec_8way_avx);
+ 
+-asmlinkage void serpent_ctr_8way_avx(struct serpent_ctx *ctx, u8 *dst,
+-				     const u8 *src, le128 *iv);
++asmlinkage void serpent_ctr_8way_avx(const void *ctx, u8 *dst, const u8 *src,
++				     le128 *iv);
+ EXPORT_SYMBOL_GPL(serpent_ctr_8way_avx);
+ 
+-asmlinkage void serpent_xts_enc_8way_avx(struct serpent_ctx *ctx, u8 *dst,
++asmlinkage void serpent_xts_enc_8way_avx(const void *ctx, u8 *dst,
+ 					 const u8 *src, le128 *iv);
+ EXPORT_SYMBOL_GPL(serpent_xts_enc_8way_avx);
+ 
+-asmlinkage void serpent_xts_dec_8way_avx(struct serpent_ctx *ctx, u8 *dst,
++asmlinkage void serpent_xts_dec_8way_avx(const void *ctx, u8 *dst,
+ 					 const u8 *src, le128 *iv);
+ EXPORT_SYMBOL_GPL(serpent_xts_dec_8way_avx);
+ 
+-void __serpent_crypt_ctr(void *ctx, u128 *dst, const u128 *src, le128 *iv)
++void __serpent_crypt_ctr(const void *ctx, u8 *d, const u8 *s, le128 *iv)
+ {
+ 	be128 ctrblk;
++	u128 *dst = (u128 *)d;
++	const u128 *src = (const u128 *)s;
+ 
+ 	le128_to_be128(&ctrblk, iv);
+ 	le128_inc(iv);
+@@ -56,17 +58,15 @@ void __serpent_crypt_ctr(void *ctx, u128
+ }
+ EXPORT_SYMBOL_GPL(__serpent_crypt_ctr);
+ 
+-void serpent_xts_enc(void *ctx, u128 *dst, const u128 *src, le128 *iv)
++void serpent_xts_enc(const void *ctx, u8 *dst, const u8 *src, le128 *iv)
+ {
+-	glue_xts_crypt_128bit_one(ctx, dst, src, iv,
+-				  GLUE_FUNC_CAST(__serpent_encrypt));
++	glue_xts_crypt_128bit_one(ctx, dst, src, iv, __serpent_encrypt);
+ }
+ EXPORT_SYMBOL_GPL(serpent_xts_enc);
+ 
+-void serpent_xts_dec(void *ctx, u128 *dst, const u128 *src, le128 *iv)
++void serpent_xts_dec(const void *ctx, u8 *dst, const u8 *src, le128 *iv)
+ {
+-	glue_xts_crypt_128bit_one(ctx, dst, src, iv,
+-				  GLUE_FUNC_CAST(__serpent_decrypt));
++	glue_xts_crypt_128bit_one(ctx, dst, src, iv, __serpent_decrypt);
+ }
+ EXPORT_SYMBOL_GPL(serpent_xts_dec);
+ 
+@@ -102,10 +102,10 @@ static const struct common_glue_ctx serp
+ 
+ 	.funcs = { {
+ 		.num_blocks = SERPENT_PARALLEL_BLOCKS,
+-		.fn_u = { .ecb = GLUE_FUNC_CAST(serpent_ecb_enc_8way_avx) }
++		.fn_u = { .ecb = serpent_ecb_enc_8way_avx }
+ 	}, {
+ 		.num_blocks = 1,
+-		.fn_u = { .ecb = GLUE_FUNC_CAST(__serpent_encrypt) }
++		.fn_u = { .ecb = __serpent_encrypt }
+ 	} }
+ };
+ 
+@@ -115,10 +115,10 @@ static const struct common_glue_ctx serp
+ 
+ 	.funcs = { {
+ 		.num_blocks = SERPENT_PARALLEL_BLOCKS,
+-		.fn_u = { .ctr = GLUE_CTR_FUNC_CAST(serpent_ctr_8way_avx) }
++		.fn_u = { .ctr = serpent_ctr_8way_avx }
+ 	}, {
+ 		.num_blocks = 1,
+-		.fn_u = { .ctr = GLUE_CTR_FUNC_CAST(__serpent_crypt_ctr) }
++		.fn_u = { .ctr = __serpent_crypt_ctr }
+ 	} }
+ };
+ 
+@@ -128,10 +128,10 @@ static const struct common_glue_ctx serp
+ 
+ 	.funcs = { {
+ 		.num_blocks = SERPENT_PARALLEL_BLOCKS,
+-		.fn_u = { .xts = GLUE_XTS_FUNC_CAST(serpent_xts_enc_8way_avx) }
++		.fn_u = { .xts = serpent_xts_enc_8way_avx }
+ 	}, {
+ 		.num_blocks = 1,
+-		.fn_u = { .xts = GLUE_XTS_FUNC_CAST(serpent_xts_enc) }
++		.fn_u = { .xts = serpent_xts_enc }
+ 	} }
+ };
+ 
+@@ -141,10 +141,10 @@ static const struct common_glue_ctx serp
+ 
+ 	.funcs = { {
+ 		.num_blocks = SERPENT_PARALLEL_BLOCKS,
+-		.fn_u = { .ecb = GLUE_FUNC_CAST(serpent_ecb_dec_8way_avx) }
++		.fn_u = { .ecb = serpent_ecb_dec_8way_avx }
+ 	}, {
+ 		.num_blocks = 1,
+-		.fn_u = { .ecb = GLUE_FUNC_CAST(__serpent_decrypt) }
++		.fn_u = { .ecb = __serpent_decrypt }
+ 	} }
+ };
+ 
+@@ -154,10 +154,10 @@ static const struct common_glue_ctx serp
+ 
+ 	.funcs = { {
+ 		.num_blocks = SERPENT_PARALLEL_BLOCKS,
+-		.fn_u = { .cbc = GLUE_CBC_FUNC_CAST(serpent_cbc_dec_8way_avx) }
++		.fn_u = { .cbc = serpent_cbc_dec_8way_avx }
+ 	}, {
+ 		.num_blocks = 1,
+-		.fn_u = { .cbc = GLUE_CBC_FUNC_CAST(__serpent_decrypt) }
++		.fn_u = { .cbc = __serpent_decrypt }
+ 	} }
+ };
+ 
+@@ -167,10 +167,10 @@ static const struct common_glue_ctx serp
+ 
+ 	.funcs = { {
+ 		.num_blocks = SERPENT_PARALLEL_BLOCKS,
+-		.fn_u = { .xts = GLUE_XTS_FUNC_CAST(serpent_xts_dec_8way_avx) }
++		.fn_u = { .xts = serpent_xts_dec_8way_avx }
+ 	}, {
+ 		.num_blocks = 1,
+-		.fn_u = { .xts = GLUE_XTS_FUNC_CAST(serpent_xts_dec) }
++		.fn_u = { .xts = serpent_xts_dec }
+ 	} }
+ };
+ 
+@@ -186,8 +186,7 @@ static int ecb_decrypt(struct skcipher_r
+ 
+ static int cbc_encrypt(struct skcipher_request *req)
+ {
+-	return glue_cbc_encrypt_req_128bit(GLUE_FUNC_CAST(__serpent_encrypt),
+-					   req);
++	return glue_cbc_encrypt_req_128bit(__serpent_encrypt, req);
+ }
+ 
+ static int cbc_decrypt(struct skcipher_request *req)
+@@ -206,8 +205,8 @@ static int xts_encrypt(struct skcipher_r
+ 	struct serpent_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
+ 
+ 	return glue_xts_req_128bit(&serpent_enc_xts, req,
+-				   XTS_TWEAK_CAST(__serpent_encrypt),
+-				   &ctx->tweak_ctx, &ctx->crypt_ctx, false);
++				   __serpent_encrypt, &ctx->tweak_ctx,
++				   &ctx->crypt_ctx, false);
+ }
+ 
+ static int xts_decrypt(struct skcipher_request *req)
+@@ -216,8 +215,8 @@ static int xts_decrypt(struct skcipher_r
+ 	struct serpent_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
+ 
+ 	return glue_xts_req_128bit(&serpent_dec_xts, req,
+-				   XTS_TWEAK_CAST(__serpent_encrypt),
+-				   &ctx->tweak_ctx, &ctx->crypt_ctx, true);
++				   __serpent_encrypt, &ctx->tweak_ctx,
++				   &ctx->crypt_ctx, true);
+ }
+ 
+ static struct skcipher_alg serpent_algs[] = {
+--- a/arch/x86/crypto/serpent_sse2_glue.c
++++ b/arch/x86/crypto/serpent_sse2_glue.c
+@@ -31,9 +31,11 @@ static int serpent_setkey_skcipher(struc
+ 	return __serpent_setkey(crypto_skcipher_ctx(tfm), key, keylen);
+ }
+ 
+-static void serpent_decrypt_cbc_xway(void *ctx, u128 *dst, const u128 *src)
++static void serpent_decrypt_cbc_xway(const void *ctx, u8 *d, const u8 *s)
+ {
+ 	u128 ivs[SERPENT_PARALLEL_BLOCKS - 1];
++	u128 *dst = (u128 *)d;
++	const u128 *src = (const u128 *)s;
+ 	unsigned int j;
+ 
+ 	for (j = 0; j < SERPENT_PARALLEL_BLOCKS - 1; j++)
+@@ -45,9 +47,11 @@ static void serpent_decrypt_cbc_xway(voi
+ 		u128_xor(dst + (j + 1), dst + (j + 1), ivs + j);
+ }
+ 
+-static void serpent_crypt_ctr(void *ctx, u128 *dst, const u128 *src, le128 *iv)
++static void serpent_crypt_ctr(const void *ctx, u8 *d, const u8 *s, le128 *iv)
+ {
+ 	be128 ctrblk;
++	u128 *dst = (u128 *)d;
++	const u128 *src = (const u128 *)s;
+ 
+ 	le128_to_be128(&ctrblk, iv);
+ 	le128_inc(iv);
+@@ -56,10 +60,12 @@ static void serpent_crypt_ctr(void *ctx,
+ 	u128_xor(dst, src, (u128 *)&ctrblk);
+ }
+ 
+-static void serpent_crypt_ctr_xway(void *ctx, u128 *dst, const u128 *src,
++static void serpent_crypt_ctr_xway(const void *ctx, u8 *d, const u8 *s,
+ 				   le128 *iv)
+ {
+ 	be128 ctrblks[SERPENT_PARALLEL_BLOCKS];
++	u128 *dst = (u128 *)d;
++	const u128 *src = (const u128 *)s;
+ 	unsigned int i;
+ 
+ 	for (i = 0; i < SERPENT_PARALLEL_BLOCKS; i++) {
+@@ -79,10 +85,10 @@ static const struct common_glue_ctx serp
+ 
+ 	.funcs = { {
+ 		.num_blocks = SERPENT_PARALLEL_BLOCKS,
+-		.fn_u = { .ecb = GLUE_FUNC_CAST(serpent_enc_blk_xway) }
++		.fn_u = { .ecb = serpent_enc_blk_xway }
+ 	}, {
+ 		.num_blocks = 1,
+-		.fn_u = { .ecb = GLUE_FUNC_CAST(__serpent_encrypt) }
++		.fn_u = { .ecb = __serpent_encrypt }
+ 	} }
+ };
+ 
+@@ -92,10 +98,10 @@ static const struct common_glue_ctx serp
+ 
+ 	.funcs = { {
+ 		.num_blocks = SERPENT_PARALLEL_BLOCKS,
+-		.fn_u = { .ctr = GLUE_CTR_FUNC_CAST(serpent_crypt_ctr_xway) }
++		.fn_u = { .ctr = serpent_crypt_ctr_xway }
+ 	}, {
+ 		.num_blocks = 1,
+-		.fn_u = { .ctr = GLUE_CTR_FUNC_CAST(serpent_crypt_ctr) }
++		.fn_u = { .ctr = serpent_crypt_ctr }
+ 	} }
+ };
+ 
+@@ -105,10 +111,10 @@ static const struct common_glue_ctx serp
+ 
+ 	.funcs = { {
+ 		.num_blocks = SERPENT_PARALLEL_BLOCKS,
+-		.fn_u = { .ecb = GLUE_FUNC_CAST(serpent_dec_blk_xway) }
++		.fn_u = { .ecb = serpent_dec_blk_xway }
+ 	}, {
+ 		.num_blocks = 1,
+-		.fn_u = { .ecb = GLUE_FUNC_CAST(__serpent_decrypt) }
++		.fn_u = { .ecb = __serpent_decrypt }
+ 	} }
+ };
+ 
+@@ -118,10 +124,10 @@ static const struct common_glue_ctx serp
+ 
+ 	.funcs = { {
+ 		.num_blocks = SERPENT_PARALLEL_BLOCKS,
+-		.fn_u = { .cbc = GLUE_CBC_FUNC_CAST(serpent_decrypt_cbc_xway) }
++		.fn_u = { .cbc = serpent_decrypt_cbc_xway }
+ 	}, {
+ 		.num_blocks = 1,
+-		.fn_u = { .cbc = GLUE_CBC_FUNC_CAST(__serpent_decrypt) }
++		.fn_u = { .cbc = __serpent_decrypt }
+ 	} }
+ };
+ 
+@@ -137,7 +143,7 @@ static int ecb_decrypt(struct skcipher_r
+ 
+ static int cbc_encrypt(struct skcipher_request *req)
+ {
+-	return glue_cbc_encrypt_req_128bit(GLUE_FUNC_CAST(__serpent_encrypt),
++	return glue_cbc_encrypt_req_128bit(__serpent_encrypt,
+ 					   req);
+ }
+ 
+--- a/arch/x86/crypto/twofish_avx_glue.c
++++ b/arch/x86/crypto/twofish_avx_glue.c
+@@ -22,20 +22,17 @@
+ #define TWOFISH_PARALLEL_BLOCKS 8
+ 
+ /* 8-way parallel cipher functions */
+-asmlinkage void twofish_ecb_enc_8way(struct twofish_ctx *ctx, u8 *dst,
+-				     const u8 *src);
+-asmlinkage void twofish_ecb_dec_8way(struct twofish_ctx *ctx, u8 *dst,
+-				     const u8 *src);
+-
+-asmlinkage void twofish_cbc_dec_8way(struct twofish_ctx *ctx, u8 *dst,
+-				     const u8 *src);
+-asmlinkage void twofish_ctr_8way(struct twofish_ctx *ctx, u8 *dst,
+-				 const u8 *src, le128 *iv);
+-
+-asmlinkage void twofish_xts_enc_8way(struct twofish_ctx *ctx, u8 *dst,
+-				     const u8 *src, le128 *iv);
+-asmlinkage void twofish_xts_dec_8way(struct twofish_ctx *ctx, u8 *dst,
+-				     const u8 *src, le128 *iv);
++asmlinkage void twofish_ecb_enc_8way(const void *ctx, u8 *dst, const u8 *src);
++asmlinkage void twofish_ecb_dec_8way(const void *ctx, u8 *dst, const u8 *src);
++
++asmlinkage void twofish_cbc_dec_8way(const void *ctx, u8 *dst, const u8 *src);
++asmlinkage void twofish_ctr_8way(const void *ctx, u8 *dst, const u8 *src,
++				 le128 *iv);
++
++asmlinkage void twofish_xts_enc_8way(const void *ctx, u8 *dst, const u8 *src,
++				     le128 *iv);
++asmlinkage void twofish_xts_dec_8way(const void *ctx, u8 *dst, const u8 *src,
++				     le128 *iv);
+ 
+ static int twofish_setkey_skcipher(struct crypto_skcipher *tfm,
+ 				   const u8 *key, unsigned int keylen)
+@@ -43,22 +40,19 @@ static int twofish_setkey_skcipher(struc
+ 	return twofish_setkey(&tfm->base, key, keylen);
+ }
+ 
+-static inline void twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst,
+-					const u8 *src)
++static inline void twofish_enc_blk_3way(const void *ctx, u8 *dst, const u8 *src)
+ {
+ 	__twofish_enc_blk_3way(ctx, dst, src, false);
+ }
+ 
+-static void twofish_xts_enc(void *ctx, u128 *dst, const u128 *src, le128 *iv)
++static void twofish_xts_enc(const void *ctx, u8 *dst, const u8 *src, le128 *iv)
+ {
+-	glue_xts_crypt_128bit_one(ctx, dst, src, iv,
+-				  GLUE_FUNC_CAST(twofish_enc_blk));
++	glue_xts_crypt_128bit_one(ctx, dst, src, iv, twofish_enc_blk);
+ }
+ 
+-static void twofish_xts_dec(void *ctx, u128 *dst, const u128 *src, le128 *iv)
++static void twofish_xts_dec(const void *ctx, u8 *dst, const u8 *src, le128 *iv)
+ {
+-	glue_xts_crypt_128bit_one(ctx, dst, src, iv,
+-				  GLUE_FUNC_CAST(twofish_dec_blk));
++	glue_xts_crypt_128bit_one(ctx, dst, src, iv, twofish_dec_blk);
+ }
+ 
+ struct twofish_xts_ctx {
+@@ -93,13 +87,13 @@ static const struct common_glue_ctx twof
+ 
+ 	.funcs = { {
+ 		.num_blocks = TWOFISH_PARALLEL_BLOCKS,
+-		.fn_u = { .ecb = GLUE_FUNC_CAST(twofish_ecb_enc_8way) }
++		.fn_u = { .ecb = twofish_ecb_enc_8way }
+ 	}, {
+ 		.num_blocks = 3,
+-		.fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk_3way) }
++		.fn_u = { .ecb = twofish_enc_blk_3way }
+ 	}, {
+ 		.num_blocks = 1,
+-		.fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk) }
++		.fn_u = { .ecb = twofish_enc_blk }
+ 	} }
+ };
+ 
+@@ -109,13 +103,13 @@ static const struct common_glue_ctx twof
+ 
+ 	.funcs = { {
+ 		.num_blocks = TWOFISH_PARALLEL_BLOCKS,
+-		.fn_u = { .ctr = GLUE_CTR_FUNC_CAST(twofish_ctr_8way) }
++		.fn_u = { .ctr = twofish_ctr_8way }
+ 	}, {
+ 		.num_blocks = 3,
+-		.fn_u = { .ctr = GLUE_CTR_FUNC_CAST(twofish_enc_blk_ctr_3way) }
++		.fn_u = { .ctr = twofish_enc_blk_ctr_3way }
+ 	}, {
+ 		.num_blocks = 1,
+-		.fn_u = { .ctr = GLUE_CTR_FUNC_CAST(twofish_enc_blk_ctr) }
++		.fn_u = { .ctr = twofish_enc_blk_ctr }
+ 	} }
+ };
+ 
+@@ -125,10 +119,10 @@ static const struct common_glue_ctx twof
+ 
+ 	.funcs = { {
+ 		.num_blocks = TWOFISH_PARALLEL_BLOCKS,
+-		.fn_u = { .xts = GLUE_XTS_FUNC_CAST(twofish_xts_enc_8way) }
++		.fn_u = { .xts = twofish_xts_enc_8way }
+ 	}, {
+ 		.num_blocks = 1,
+-		.fn_u = { .xts = GLUE_XTS_FUNC_CAST(twofish_xts_enc) }
++		.fn_u = { .xts = twofish_xts_enc }
+ 	} }
+ };
+ 
+@@ -138,13 +132,13 @@ static const struct common_glue_ctx twof
+ 
+ 	.funcs = { {
+ 		.num_blocks = TWOFISH_PARALLEL_BLOCKS,
+-		.fn_u = { .ecb = GLUE_FUNC_CAST(twofish_ecb_dec_8way) }
++		.fn_u = { .ecb = twofish_ecb_dec_8way }
+ 	}, {
+ 		.num_blocks = 3,
+-		.fn_u = { .ecb = GLUE_FUNC_CAST(twofish_dec_blk_3way) }
++		.fn_u = { .ecb = twofish_dec_blk_3way }
+ 	}, {
+ 		.num_blocks = 1,
+-		.fn_u = { .ecb = GLUE_FUNC_CAST(twofish_dec_blk) }
++		.fn_u = { .ecb = twofish_dec_blk }
+ 	} }
+ };
+ 
+@@ -154,13 +148,13 @@ static const struct common_glue_ctx twof
+ 
+ 	.funcs = { {
+ 		.num_blocks = TWOFISH_PARALLEL_BLOCKS,
+-		.fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_cbc_dec_8way) }
++		.fn_u = { .cbc = twofish_cbc_dec_8way }
+ 	}, {
+ 		.num_blocks = 3,
+-		.fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_dec_blk_cbc_3way) }
++		.fn_u = { .cbc = twofish_dec_blk_cbc_3way }
+ 	}, {
+ 		.num_blocks = 1,
+-		.fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_dec_blk) }
++		.fn_u = { .cbc = twofish_dec_blk }
+ 	} }
+ };
+ 
+@@ -170,10 +164,10 @@ static const struct common_glue_ctx twof
+ 
+ 	.funcs = { {
+ 		.num_blocks = TWOFISH_PARALLEL_BLOCKS,
+-		.fn_u = { .xts = GLUE_XTS_FUNC_CAST(twofish_xts_dec_8way) }
++		.fn_u = { .xts = twofish_xts_dec_8way }
+ 	}, {
+ 		.num_blocks = 1,
+-		.fn_u = { .xts = GLUE_XTS_FUNC_CAST(twofish_xts_dec) }
++		.fn_u = { .xts = twofish_xts_dec }
+ 	} }
+ };
+ 
+@@ -189,8 +183,7 @@ static int ecb_decrypt(struct skcipher_r
+ 
+ static int cbc_encrypt(struct skcipher_request *req)
+ {
+-	return glue_cbc_encrypt_req_128bit(GLUE_FUNC_CAST(twofish_enc_blk),
+-					   req);
++	return glue_cbc_encrypt_req_128bit(twofish_enc_blk, req);
+ }
+ 
+ static int cbc_decrypt(struct skcipher_request *req)
+@@ -208,8 +201,7 @@ static int xts_encrypt(struct skcipher_r
+ 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ 	struct twofish_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
+ 
+-	return glue_xts_req_128bit(&twofish_enc_xts, req,
+-				   XTS_TWEAK_CAST(twofish_enc_blk),
++	return glue_xts_req_128bit(&twofish_enc_xts, req, twofish_enc_blk,
+ 				   &ctx->tweak_ctx, &ctx->crypt_ctx, false);
+ }
+ 
+@@ -218,8 +210,7 @@ static int xts_decrypt(struct skcipher_r
+ 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ 	struct twofish_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
+ 
+-	return glue_xts_req_128bit(&twofish_dec_xts, req,
+-				   XTS_TWEAK_CAST(twofish_enc_blk),
++	return glue_xts_req_128bit(&twofish_dec_xts, req, twofish_enc_blk,
+ 				   &ctx->tweak_ctx, &ctx->crypt_ctx, true);
+ }
+ 
+--- a/arch/x86/crypto/twofish_glue_3way.c
++++ b/arch/x86/crypto/twofish_glue_3way.c
+@@ -25,21 +25,22 @@ static int twofish_setkey_skcipher(struc
+ 	return twofish_setkey(&tfm->base, key, keylen);
+ }
+ 
+-static inline void twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst,
+-					const u8 *src)
++static inline void twofish_enc_blk_3way(const void *ctx, u8 *dst, const u8 *src)
+ {
+ 	__twofish_enc_blk_3way(ctx, dst, src, false);
+ }
+ 
+-static inline void twofish_enc_blk_xor_3way(struct twofish_ctx *ctx, u8 *dst,
++static inline void twofish_enc_blk_xor_3way(const void *ctx, u8 *dst,
+ 					    const u8 *src)
+ {
+ 	__twofish_enc_blk_3way(ctx, dst, src, true);
+ }
+ 
+-void twofish_dec_blk_cbc_3way(void *ctx, u128 *dst, const u128 *src)
++void twofish_dec_blk_cbc_3way(const void *ctx, u8 *d, const u8 *s)
+ {
+ 	u128 ivs[2];
++	u128 *dst = (u128 *)d;
++	const u128 *src = (const u128 *)s;
+ 
+ 	ivs[0] = src[0];
+ 	ivs[1] = src[1];
+@@ -51,9 +52,11 @@ void twofish_dec_blk_cbc_3way(void *ctx,
+ }
+ EXPORT_SYMBOL_GPL(twofish_dec_blk_cbc_3way);
+ 
+-void twofish_enc_blk_ctr(void *ctx, u128 *dst, const u128 *src, le128 *iv)
++void twofish_enc_blk_ctr(const void *ctx, u8 *d, const u8 *s, le128 *iv)
+ {
+ 	be128 ctrblk;
++	u128 *dst = (u128 *)d;
++	const u128 *src = (const u128 *)s;
+ 
+ 	if (dst != src)
+ 		*dst = *src;
+@@ -66,10 +69,11 @@ void twofish_enc_blk_ctr(void *ctx, u128
+ }
+ EXPORT_SYMBOL_GPL(twofish_enc_blk_ctr);
+ 
+-void twofish_enc_blk_ctr_3way(void *ctx, u128 *dst, const u128 *src,
+-			      le128 *iv)
++void twofish_enc_blk_ctr_3way(const void *ctx, u8 *d, const u8 *s, le128 *iv)
+ {
+ 	be128 ctrblks[3];
++	u128 *dst = (u128 *)d;
++	const u128 *src = (const u128 *)s;
+ 
+ 	if (dst != src) {
+ 		dst[0] = src[0];
+@@ -94,10 +98,10 @@ static const struct common_glue_ctx twof
+ 
+ 	.funcs = { {
+ 		.num_blocks = 3,
+-		.fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk_3way) }
++		.fn_u = { .ecb = twofish_enc_blk_3way }
+ 	}, {
+ 		.num_blocks = 1,
+-		.fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk) }
++		.fn_u = { .ecb = twofish_enc_blk }
+ 	} }
+ };
+ 
+@@ -107,10 +111,10 @@ static const struct common_glue_ctx twof
+ 
+ 	.funcs = { {
+ 		.num_blocks = 3,
+-		.fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk_ctr_3way) }
++		.fn_u = { .ctr = twofish_enc_blk_ctr_3way }
+ 	}, {
+ 		.num_blocks = 1,
+-		.fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk_ctr) }
++		.fn_u = { .ctr = twofish_enc_blk_ctr }
+ 	} }
+ };
+ 
+@@ -120,10 +124,10 @@ static const struct common_glue_ctx twof
+ 
+ 	.funcs = { {
+ 		.num_blocks = 3,
+-		.fn_u = { .ecb = GLUE_FUNC_CAST(twofish_dec_blk_3way) }
++		.fn_u = { .ecb = twofish_dec_blk_3way }
+ 	}, {
+ 		.num_blocks = 1,
+-		.fn_u = { .ecb = GLUE_FUNC_CAST(twofish_dec_blk) }
++		.fn_u = { .ecb = twofish_dec_blk }
+ 	} }
+ };
+ 
+@@ -133,10 +137,10 @@ static const struct common_glue_ctx twof
+ 
+ 	.funcs = { {
+ 		.num_blocks = 3,
+-		.fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_dec_blk_cbc_3way) }
++		.fn_u = { .cbc = twofish_dec_blk_cbc_3way }
+ 	}, {
+ 		.num_blocks = 1,
+-		.fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_dec_blk) }
++		.fn_u = { .cbc = twofish_dec_blk }
+ 	} }
+ };
+ 
+@@ -152,8 +156,7 @@ static int ecb_decrypt(struct skcipher_r
+ 
+ static int cbc_encrypt(struct skcipher_request *req)
+ {
+-	return glue_cbc_encrypt_req_128bit(GLUE_FUNC_CAST(twofish_enc_blk),
+-					   req);
++	return glue_cbc_encrypt_req_128bit(twofish_enc_blk, req);
+ }
+ 
+ static int cbc_decrypt(struct skcipher_request *req)
+--- a/arch/x86/include/asm/crypto/camellia.h
++++ b/arch/x86/include/asm/crypto/camellia.h
+@@ -32,65 +32,60 @@ extern int xts_camellia_setkey(struct cr
+ 			       unsigned int keylen);
+ 
+ /* regular block cipher functions */
+-asmlinkage void __camellia_enc_blk(struct camellia_ctx *ctx, u8 *dst,
+-				   const u8 *src, bool xor);
+-asmlinkage void camellia_dec_blk(struct camellia_ctx *ctx, u8 *dst,
+-				 const u8 *src);
++asmlinkage void __camellia_enc_blk(const void *ctx, u8 *dst, const u8 *src,
++				   bool xor);
++asmlinkage void camellia_dec_blk(const void *ctx, u8 *dst, const u8 *src);
+ 
+ /* 2-way parallel cipher functions */
+-asmlinkage void __camellia_enc_blk_2way(struct camellia_ctx *ctx, u8 *dst,
+-					const u8 *src, bool xor);
+-asmlinkage void camellia_dec_blk_2way(struct camellia_ctx *ctx, u8 *dst,
+-				      const u8 *src);
++asmlinkage void __camellia_enc_blk_2way(const void *ctx, u8 *dst, const u8 *src,
++					bool xor);
++asmlinkage void camellia_dec_blk_2way(const void *ctx, u8 *dst, const u8 *src);
+ 
+ /* 16-way parallel cipher functions (avx/aes-ni) */
+-asmlinkage void camellia_ecb_enc_16way(struct camellia_ctx *ctx, u8 *dst,
+-				       const u8 *src);
+-asmlinkage void camellia_ecb_dec_16way(struct camellia_ctx *ctx, u8 *dst,
+-				       const u8 *src);
+-
+-asmlinkage void camellia_cbc_dec_16way(struct camellia_ctx *ctx, u8 *dst,
+-				       const u8 *src);
+-asmlinkage void camellia_ctr_16way(struct camellia_ctx *ctx, u8 *dst,
+-				   const u8 *src, le128 *iv);
+-
+-asmlinkage void camellia_xts_enc_16way(struct camellia_ctx *ctx, u8 *dst,
+-				       const u8 *src, le128 *iv);
+-asmlinkage void camellia_xts_dec_16way(struct camellia_ctx *ctx, u8 *dst,
+-				       const u8 *src, le128 *iv);
++asmlinkage void camellia_ecb_enc_16way(const void *ctx, u8 *dst, const u8 *src);
++asmlinkage void camellia_ecb_dec_16way(const void *ctx, u8 *dst, const u8 *src);
+ 
+-static inline void camellia_enc_blk(struct camellia_ctx *ctx, u8 *dst,
+-				    const u8 *src)
++asmlinkage void camellia_cbc_dec_16way(const void *ctx, u8 *dst, const u8 *src);
++asmlinkage void camellia_ctr_16way(const void *ctx, u8 *dst, const u8 *src,
++				   le128 *iv);
++
++asmlinkage void camellia_xts_enc_16way(const void *ctx, u8 *dst, const u8 *src,
++				       le128 *iv);
++asmlinkage void camellia_xts_dec_16way(const void *ctx, u8 *dst, const u8 *src,
++				       le128 *iv);
++
++static inline void camellia_enc_blk(const void *ctx, u8 *dst, const u8 *src)
+ {
+ 	__camellia_enc_blk(ctx, dst, src, false);
+ }
+ 
+-static inline void camellia_enc_blk_xor(struct camellia_ctx *ctx, u8 *dst,
+-					const u8 *src)
++static inline void camellia_enc_blk_xor(const void *ctx, u8 *dst, const u8 *src)
+ {
+ 	__camellia_enc_blk(ctx, dst, src, true);
+ }
+ 
+-static inline void camellia_enc_blk_2way(struct camellia_ctx *ctx, u8 *dst,
++static inline void camellia_enc_blk_2way(const void *ctx, u8 *dst,
+ 					 const u8 *src)
+ {
+ 	__camellia_enc_blk_2way(ctx, dst, src, false);
+ }
+ 
+-static inline void camellia_enc_blk_xor_2way(struct camellia_ctx *ctx, u8 *dst,
++static inline void camellia_enc_blk_xor_2way(const void *ctx, u8 *dst,
+ 					     const u8 *src)
+ {
+ 	__camellia_enc_blk_2way(ctx, dst, src, true);
+ }
+ 
+ /* glue helpers */
+-extern void camellia_decrypt_cbc_2way(void *ctx, u128 *dst, const u128 *src);
+-extern void camellia_crypt_ctr(void *ctx, u128 *dst, const u128 *src,
++extern void camellia_decrypt_cbc_2way(const void *ctx, u8 *dst, const u8 *src);
++extern void camellia_crypt_ctr(const void *ctx, u8 *dst, const u8 *src,
+ 			       le128 *iv);
+-extern void camellia_crypt_ctr_2way(void *ctx, u128 *dst, const u128 *src,
++extern void camellia_crypt_ctr_2way(const void *ctx, u8 *dst, const u8 *src,
+ 				    le128 *iv);
+ 
+-extern void camellia_xts_enc(void *ctx, u128 *dst, const u128 *src, le128 *iv);
+-extern void camellia_xts_dec(void *ctx, u128 *dst, const u128 *src, le128 *iv);
++extern void camellia_xts_enc(const void *ctx, u8 *dst, const u8 *src,
++			     le128 *iv);
++extern void camellia_xts_dec(const void *ctx, u8 *dst, const u8 *src,
++			     le128 *iv);
+ 
+ #endif /* ASM_X86_CAMELLIA_H */
+--- a/arch/x86/include/asm/crypto/glue_helper.h
++++ b/arch/x86/include/asm/crypto/glue_helper.h
+@@ -11,18 +11,13 @@
+ #include <asm/fpu/api.h>
+ #include <crypto/b128ops.h>
+ 
+-typedef void (*common_glue_func_t)(void *ctx, u8 *dst, const u8 *src);
+-typedef void (*common_glue_cbc_func_t)(void *ctx, u128 *dst, const u128 *src);
+-typedef void (*common_glue_ctr_func_t)(void *ctx, u128 *dst, const u128 *src,
++typedef void (*common_glue_func_t)(const void *ctx, u8 *dst, const u8 *src);
++typedef void (*common_glue_cbc_func_t)(const void *ctx, u8 *dst, const u8 *src);
++typedef void (*common_glue_ctr_func_t)(const void *ctx, u8 *dst, const u8 *src,
+ 				       le128 *iv);
+-typedef void (*common_glue_xts_func_t)(void *ctx, u128 *dst, const u128 *src,
++typedef void (*common_glue_xts_func_t)(const void *ctx, u8 *dst, const u8 *src,
+ 				       le128 *iv);
+ 
+-#define GLUE_FUNC_CAST(fn) ((common_glue_func_t)(fn))
+-#define GLUE_CBC_FUNC_CAST(fn) ((common_glue_cbc_func_t)(fn))
+-#define GLUE_CTR_FUNC_CAST(fn) ((common_glue_ctr_func_t)(fn))
+-#define GLUE_XTS_FUNC_CAST(fn) ((common_glue_xts_func_t)(fn))
+-
+ struct common_glue_func_entry {
+ 	unsigned int num_blocks; /* number of blocks that @fn will process */
+ 	union {
+@@ -116,7 +111,8 @@ extern int glue_xts_req_128bit(const str
+ 			       common_glue_func_t tweak_fn, void *tweak_ctx,
+ 			       void *crypt_ctx, bool decrypt);
+ 
+-extern void glue_xts_crypt_128bit_one(void *ctx, u128 *dst, const u128 *src,
+-				      le128 *iv, common_glue_func_t fn);
++extern void glue_xts_crypt_128bit_one(const void *ctx, u8 *dst,
++				      const u8 *src, le128 *iv,
++				      common_glue_func_t fn);
+ 
+ #endif /* _CRYPTO_GLUE_HELPER_H */
+--- a/arch/x86/include/asm/crypto/serpent-avx.h
++++ b/arch/x86/include/asm/crypto/serpent-avx.h
+@@ -15,26 +15,26 @@ struct serpent_xts_ctx {
+ 	struct serpent_ctx crypt_ctx;
+ };
+ 
+-asmlinkage void serpent_ecb_enc_8way_avx(struct serpent_ctx *ctx, u8 *dst,
++asmlinkage void serpent_ecb_enc_8way_avx(const void *ctx, u8 *dst,
+ 					 const u8 *src);
+-asmlinkage void serpent_ecb_dec_8way_avx(struct serpent_ctx *ctx, u8 *dst,
++asmlinkage void serpent_ecb_dec_8way_avx(const void *ctx, u8 *dst,
+ 					 const u8 *src);
+ 
+-asmlinkage void serpent_cbc_dec_8way_avx(struct serpent_ctx *ctx, u8 *dst,
++asmlinkage void serpent_cbc_dec_8way_avx(const void *ctx, u8 *dst,
+ 					 const u8 *src);
+-asmlinkage void serpent_ctr_8way_avx(struct serpent_ctx *ctx, u8 *dst,
+-				     const u8 *src, le128 *iv);
++asmlinkage void serpent_ctr_8way_avx(const void *ctx, u8 *dst, const u8 *src,
++				     le128 *iv);
+ 
+-asmlinkage void serpent_xts_enc_8way_avx(struct serpent_ctx *ctx, u8 *dst,
++asmlinkage void serpent_xts_enc_8way_avx(const void *ctx, u8 *dst,
+ 					 const u8 *src, le128 *iv);
+-asmlinkage void serpent_xts_dec_8way_avx(struct serpent_ctx *ctx, u8 *dst,
++asmlinkage void serpent_xts_dec_8way_avx(const void *ctx, u8 *dst,
+ 					 const u8 *src, le128 *iv);
+ 
+-extern void __serpent_crypt_ctr(void *ctx, u128 *dst, const u128 *src,
++extern void __serpent_crypt_ctr(const void *ctx, u8 *dst, const u8 *src,
+ 				le128 *iv);
+ 
+-extern void serpent_xts_enc(void *ctx, u128 *dst, const u128 *src, le128 *iv);
+-extern void serpent_xts_dec(void *ctx, u128 *dst, const u128 *src, le128 *iv);
++extern void serpent_xts_enc(const void *ctx, u8 *dst, const u8 *src, le128 *iv);
++extern void serpent_xts_dec(const void *ctx, u8 *dst, const u8 *src, le128 *iv);
+ 
+ extern int xts_serpent_setkey(struct crypto_skcipher *tfm, const u8 *key,
+ 			      unsigned int keylen);
+--- a/arch/x86/include/asm/crypto/serpent-sse2.h
++++ b/arch/x86/include/asm/crypto/serpent-sse2.h
+@@ -9,25 +9,23 @@
+ 
+ #define SERPENT_PARALLEL_BLOCKS 4
+ 
+-asmlinkage void __serpent_enc_blk_4way(struct serpent_ctx *ctx, u8 *dst,
++asmlinkage void __serpent_enc_blk_4way(const struct serpent_ctx *ctx, u8 *dst,
+ 				       const u8 *src, bool xor);
+-asmlinkage void serpent_dec_blk_4way(struct serpent_ctx *ctx, u8 *dst,
++asmlinkage void serpent_dec_blk_4way(const struct serpent_ctx *ctx, u8 *dst,
+ 				     const u8 *src);
+ 
+-static inline void serpent_enc_blk_xway(struct serpent_ctx *ctx, u8 *dst,
+-					const u8 *src)
++static inline void serpent_enc_blk_xway(const void *ctx, u8 *dst, const u8 *src)
+ {
+ 	__serpent_enc_blk_4way(ctx, dst, src, false);
+ }
+ 
+-static inline void serpent_enc_blk_xway_xor(struct serpent_ctx *ctx, u8 *dst,
+-					    const u8 *src)
++static inline void serpent_enc_blk_xway_xor(const struct serpent_ctx *ctx,
++					    u8 *dst, const u8 *src)
+ {
+ 	__serpent_enc_blk_4way(ctx, dst, src, true);
+ }
+ 
+-static inline void serpent_dec_blk_xway(struct serpent_ctx *ctx, u8 *dst,
+-					const u8 *src)
++static inline void serpent_dec_blk_xway(const void *ctx, u8 *dst, const u8 *src)
+ {
+ 	serpent_dec_blk_4way(ctx, dst, src);
+ }
+@@ -36,25 +34,23 @@ static inline void serpent_dec_blk_xway(
+ 
+ #define SERPENT_PARALLEL_BLOCKS 8
+ 
+-asmlinkage void __serpent_enc_blk_8way(struct serpent_ctx *ctx, u8 *dst,
++asmlinkage void __serpent_enc_blk_8way(const struct serpent_ctx *ctx, u8 *dst,
+ 				       const u8 *src, bool xor);
+-asmlinkage void serpent_dec_blk_8way(struct serpent_ctx *ctx, u8 *dst,
++asmlinkage void serpent_dec_blk_8way(const struct serpent_ctx *ctx, u8 *dst,
+ 				     const u8 *src);
+ 
+-static inline void serpent_enc_blk_xway(struct serpent_ctx *ctx, u8 *dst,
+-				   const u8 *src)
++static inline void serpent_enc_blk_xway(const void *ctx, u8 *dst, const u8 *src)
+ {
+ 	__serpent_enc_blk_8way(ctx, dst, src, false);
+ }
+ 
+-static inline void serpent_enc_blk_xway_xor(struct serpent_ctx *ctx, u8 *dst,
+-				       const u8 *src)
++static inline void serpent_enc_blk_xway_xor(const struct serpent_ctx *ctx,
++					    u8 *dst, const u8 *src)
+ {
+ 	__serpent_enc_blk_8way(ctx, dst, src, true);
+ }
+ 
+-static inline void serpent_dec_blk_xway(struct serpent_ctx *ctx, u8 *dst,
+-				   const u8 *src)
++static inline void serpent_dec_blk_xway(const void *ctx, u8 *dst, const u8 *src)
+ {
+ 	serpent_dec_blk_8way(ctx, dst, src);
+ }
+--- a/arch/x86/include/asm/crypto/twofish.h
++++ b/arch/x86/include/asm/crypto/twofish.h
+@@ -7,22 +7,19 @@
+ #include <crypto/b128ops.h>
+ 
+ /* regular block cipher functions from twofish_x86_64 module */
+-asmlinkage void twofish_enc_blk(struct twofish_ctx *ctx, u8 *dst,
+-				const u8 *src);
+-asmlinkage void twofish_dec_blk(struct twofish_ctx *ctx, u8 *dst,
+-				const u8 *src);
++asmlinkage void twofish_enc_blk(const void *ctx, u8 *dst, const u8 *src);
++asmlinkage void twofish_dec_blk(const void *ctx, u8 *dst, const u8 *src);
+ 
+ /* 3-way parallel cipher functions */
+-asmlinkage void __twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst,
+-				       const u8 *src, bool xor);
+-asmlinkage void twofish_dec_blk_3way(struct twofish_ctx *ctx, u8 *dst,
+-				     const u8 *src);
++asmlinkage void __twofish_enc_blk_3way(const void *ctx, u8 *dst, const u8 *src,
++				       bool xor);
++asmlinkage void twofish_dec_blk_3way(const void *ctx, u8 *dst, const u8 *src);
+ 
+ /* helpers from twofish_x86_64-3way module */
+-extern void twofish_dec_blk_cbc_3way(void *ctx, u128 *dst, const u128 *src);
+-extern void twofish_enc_blk_ctr(void *ctx, u128 *dst, const u128 *src,
++extern void twofish_dec_blk_cbc_3way(const void *ctx, u8 *dst, const u8 *src);
++extern void twofish_enc_blk_ctr(const void *ctx, u8 *dst, const u8 *src,
+ 				le128 *iv);
+-extern void twofish_enc_blk_ctr_3way(void *ctx, u128 *dst, const u128 *src,
++extern void twofish_enc_blk_ctr_3way(const void *ctx, u8 *dst, const u8 *src,
+ 				     le128 *iv);
+ 
+ #endif /* ASM_X86_TWOFISH_H */
+--- a/crypto/cast6_generic.c
++++ b/crypto/cast6_generic.c
+@@ -154,7 +154,7 @@ int cast6_setkey(struct crypto_tfm *tfm,
+ EXPORT_SYMBOL_GPL(cast6_setkey);
+ 
+ /*forward quad round*/
+-static inline void Q(u32 *block, u8 *Kr, u32 *Km)
++static inline void Q(u32 *block, const u8 *Kr, const u32 *Km)
+ {
+ 	u32 I;
+ 	block[2] ^= F1(block[3], Kr[0], Km[0]);
+@@ -164,7 +164,7 @@ static inline void Q(u32 *block, u8 *Kr,
+ }
+ 
+ /*reverse quad round*/
+-static inline void QBAR(u32 *block, u8 *Kr, u32 *Km)
++static inline void QBAR(u32 *block, const u8 *Kr, const u32 *Km)
+ {
+ 	u32 I;
+ 	block[3] ^= F1(block[0], Kr[3], Km[3]);
+@@ -173,13 +173,14 @@ static inline void QBAR(u32 *block, u8 *
+ 	block[2] ^= F1(block[3], Kr[0], Km[0]);
+ }
+ 
+-void __cast6_encrypt(struct cast6_ctx *c, u8 *outbuf, const u8 *inbuf)
++void __cast6_encrypt(const void *ctx, u8 *outbuf, const u8 *inbuf)
+ {
++	const struct cast6_ctx *c = ctx;
+ 	const __be32 *src = (const __be32 *)inbuf;
+ 	__be32 *dst = (__be32 *)outbuf;
+ 	u32 block[4];
+-	u32 *Km;
+-	u8 *Kr;
++	const u32 *Km;
++	const u8 *Kr;
+ 
+ 	block[0] = be32_to_cpu(src[0]);
+ 	block[1] = be32_to_cpu(src[1]);
+@@ -211,13 +212,14 @@ static void cast6_encrypt(struct crypto_
+ 	__cast6_encrypt(crypto_tfm_ctx(tfm), outbuf, inbuf);
+ }
+ 
+-void __cast6_decrypt(struct cast6_ctx *c, u8 *outbuf, const u8 *inbuf)
++void __cast6_decrypt(const void *ctx, u8 *outbuf, const u8 *inbuf)
+ {
++	const struct cast6_ctx *c = ctx;
+ 	const __be32 *src = (const __be32 *)inbuf;
+ 	__be32 *dst = (__be32 *)outbuf;
+ 	u32 block[4];
+-	u32 *Km;
+-	u8 *Kr;
++	const u32 *Km;
++	const u8 *Kr;
+ 
+ 	block[0] = be32_to_cpu(src[0]);
+ 	block[1] = be32_to_cpu(src[1]);
+--- a/crypto/serpent_generic.c
++++ b/crypto/serpent_generic.c
+@@ -449,8 +449,9 @@ int serpent_setkey(struct crypto_tfm *tf
+ }
+ EXPORT_SYMBOL_GPL(serpent_setkey);
+ 
+-void __serpent_encrypt(struct serpent_ctx *ctx, u8 *dst, const u8 *src)
++void __serpent_encrypt(const void *c, u8 *dst, const u8 *src)
+ {
++	const struct serpent_ctx *ctx = c;
+ 	const u32 *k = ctx->expkey;
+ 	const __le32 *s = (const __le32 *)src;
+ 	__le32	*d = (__le32 *)dst;
+@@ -514,8 +515,9 @@ static void serpent_encrypt(struct crypt
+ 	__serpent_encrypt(ctx, dst, src);
+ }
+ 
+-void __serpent_decrypt(struct serpent_ctx *ctx, u8 *dst, const u8 *src)
++void __serpent_decrypt(const void *c, u8 *dst, const u8 *src)
+ {
++	const struct serpent_ctx *ctx = c;
+ 	const u32 *k = ctx->expkey;
+ 	const __le32 *s = (const __le32 *)src;
+ 	__le32	*d = (__le32 *)dst;
+--- a/include/crypto/cast6.h
++++ b/include/crypto/cast6.h
+@@ -19,7 +19,7 @@ int __cast6_setkey(struct cast6_ctx *ctx
+ 		   unsigned int keylen, u32 *flags);
+ int cast6_setkey(struct crypto_tfm *tfm, const u8 *key, unsigned int keylen);
+ 
+-void __cast6_encrypt(struct cast6_ctx *ctx, u8 *dst, const u8 *src);
+-void __cast6_decrypt(struct cast6_ctx *ctx, u8 *dst, const u8 *src);
++void __cast6_encrypt(const void *ctx, u8 *dst, const u8 *src);
++void __cast6_decrypt(const void *ctx, u8 *dst, const u8 *src);
+ 
+ #endif
+--- a/include/crypto/serpent.h
++++ b/include/crypto/serpent.h
+@@ -22,7 +22,7 @@ int __serpent_setkey(struct serpent_ctx
+ 		     unsigned int keylen);
+ int serpent_setkey(struct crypto_tfm *tfm, const u8 *key, unsigned int keylen);
+ 
+-void __serpent_encrypt(struct serpent_ctx *ctx, u8 *dst, const u8 *src);
+-void __serpent_decrypt(struct serpent_ctx *ctx, u8 *dst, const u8 *src);
++void __serpent_encrypt(const void *ctx, u8 *dst, const u8 *src);
++void __serpent_decrypt(const void *ctx, u8 *dst, const u8 *src);
+ 
+ #endif
+--- a/include/crypto/xts.h
++++ b/include/crypto/xts.h
+@@ -8,8 +8,6 @@
+ 
+ #define XTS_BLOCK_SIZE 16
+ 
+-#define XTS_TWEAK_CAST(x) ((void (*)(void *, u8*, const u8*))(x))
+-
+ static inline int xts_check_key(struct crypto_tfm *tfm,
+ 				const u8 *key, unsigned int keylen)
+ {
diff --git a/queue-5.4/net-dsa-b53-support-setting-learning-on-port.patch b/queue-5.4/net-dsa-b53-support-setting-learning-on-port.patch
new file mode 100644
index 00000000000..25fde3a928b
--- /dev/null
+++ b/queue-5.4/net-dsa-b53-support-setting-learning-on-port.patch
@@ -0,0 +1,107 @@
+From f9b3827ee66cfcf297d0acd6ecf33653a5f297ef Mon Sep 17 00:00:00 2001
+From: Florian Fainelli <f.fainelli@gmail.com>
+Date: Mon, 22 Feb 2021 14:30:10 -0800
+Subject: net: dsa: b53: Support setting learning on port
+
+From: Florian Fainelli <f.fainelli@gmail.com>
+
+commit f9b3827ee66cfcf297d0acd6ecf33653a5f297ef upstream.
+
+Add support for being able to set the learning attribute on port, and
+make sure that the standalone ports start up with learning disabled.
+
+We can remove the code in bcm_sf2 that configured the ports learning
+attribute because we want the standalone ports to have learning disabled
+by default and port 7 cannot be bridged, so its learning attribute will
+not change past its initial configuration.
+
+Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
+Reviewed-by: Vladimir Oltean <olteanv@gmail.com>
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/net/dsa/b53/b53_common.c |   18 ++++++++++++++++++
+ drivers/net/dsa/b53/b53_regs.h   |    1 +
+ drivers/net/dsa/bcm_sf2.c        |    5 -----
+ 3 files changed, 19 insertions(+), 5 deletions(-)
+
+--- a/drivers/net/dsa/b53/b53_common.c
++++ b/drivers/net/dsa/b53/b53_common.c
+@@ -514,6 +514,19 @@ void b53_imp_vlan_setup(struct dsa_switc
+ }
+ EXPORT_SYMBOL(b53_imp_vlan_setup);
+ 
++static void b53_port_set_learning(struct b53_device *dev, int port,
++				  bool learning)
++{
++	u16 reg;
++
++	b53_read16(dev, B53_CTRL_PAGE, B53_DIS_LEARNING, &reg);
++	if (learning)
++		reg &= ~BIT(port);
++	else
++		reg |= BIT(port);
++	b53_write16(dev, B53_CTRL_PAGE, B53_DIS_LEARNING, reg);
++}
++
+ int b53_enable_port(struct dsa_switch *ds, int port, struct phy_device *phy)
+ {
+ 	struct b53_device *dev = ds->priv;
+@@ -527,6 +540,7 @@ int b53_enable_port(struct dsa_switch *d
+ 	cpu_port = ds->ports[port].cpu_dp->index;
+ 
+ 	b53_br_egress_floods(ds, port, true, true);
++	b53_port_set_learning(dev, port, false);
+ 
+ 	if (dev->ops->irq_enable)
+ 		ret = dev->ops->irq_enable(dev, port);
+@@ -645,6 +659,7 @@ static void b53_enable_cpu_port(struct b
+ 	b53_brcm_hdr_setup(dev->ds, port);
+ 
+ 	b53_br_egress_floods(dev->ds, port, true, true);
++	b53_port_set_learning(dev, port, false);
+ }
+ 
+ static void b53_enable_mib(struct b53_device *dev)
+@@ -1704,6 +1719,8 @@ int b53_br_join(struct dsa_switch *ds, i
+ 	b53_write16(dev, B53_PVLAN_PAGE, B53_PVLAN_PORT_MASK(port), pvlan);
+ 	dev->ports[port].vlan_ctl_mask = pvlan;
+ 
++	b53_port_set_learning(dev, port, true);
++
+ 	return 0;
+ }
+ EXPORT_SYMBOL(b53_br_join);
+@@ -1751,6 +1768,7 @@ void b53_br_leave(struct dsa_switch *ds,
+ 		vl->untag |= BIT(port) | BIT(cpu_port);
+ 		b53_set_vlan_entry(dev, pvid, vl);
+ 	}
++	b53_port_set_learning(dev, port, false);
+ }
+ EXPORT_SYMBOL(b53_br_leave);
+ 
+--- a/drivers/net/dsa/b53/b53_regs.h
++++ b/drivers/net/dsa/b53/b53_regs.h
+@@ -115,6 +115,7 @@
+ #define B53_UC_FLOOD_MASK		0x32
+ #define B53_MC_FLOOD_MASK		0x34
+ #define B53_IPMC_FLOOD_MASK		0x36
++#define B53_DIS_LEARNING		0x3c
+ 
+ /*
+  * Override Ports 0-7 State on devices with xMII interfaces (8 bit)
+--- a/drivers/net/dsa/bcm_sf2.c
++++ b/drivers/net/dsa/bcm_sf2.c
+@@ -172,11 +172,6 @@ static int bcm_sf2_port_setup(struct dsa
+ 	reg &= ~P_TXQ_PSM_VDD(port);
+ 	core_writel(priv, reg, CORE_MEM_PSM_VDD_CTRL);
+ 
+-	/* Enable learning */
+-	reg = core_readl(priv, CORE_DIS_LEARN);
+-	reg &= ~BIT(port);
+-	core_writel(priv, reg, CORE_DIS_LEARN);
+-
+ 	/* Enable Broadcom tags for that port if requested */
+ 	if (priv->brcm_tag_mask & BIT(port))
+ 		b53_brcm_hdr_setup(ds, port);
diff --git a/queue-5.4/net-dsa-tag_mtk-fix-802.1ad-vlan-egress.patch b/queue-5.4/net-dsa-tag_mtk-fix-802.1ad-vlan-egress.patch
new file mode 100644
index 00000000000..18c1937fd6f
--- /dev/null
+++ b/queue-5.4/net-dsa-tag_mtk-fix-802.1ad-vlan-egress.patch
@@ -0,0 +1,83 @@
+From 9200f515c41f4cbaeffd8fdd1d8b6373a18b1b67 Mon Sep 17 00:00:00 2001
+From: DENG Qingfang <dqfext@gmail.com>
+Date: Tue, 2 Mar 2021 00:01:59 +0800
+Subject: net: dsa: tag_mtk: fix 802.1ad VLAN egress
+
+From: DENG Qingfang <dqfext@gmail.com>
+
+commit 9200f515c41f4cbaeffd8fdd1d8b6373a18b1b67 upstream.
+
+A different TPID bit is used for 802.1ad VLAN frames.
+
+Reported-by: Ilario Gelmetti <iochesonome@gmail.com>
+Fixes: f0af34317f4b ("net: dsa: mediatek: combine MediaTek tag with VLAN tag")
+Signed-off-by: DENG Qingfang <dqfext@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ net/dsa/tag_mtk.c |   19 +++++++++++++------
+ 1 file changed, 13 insertions(+), 6 deletions(-)
+
+--- a/net/dsa/tag_mtk.c
++++ b/net/dsa/tag_mtk.c
+@@ -13,6 +13,7 @@
+ #define MTK_HDR_LEN		4
+ #define MTK_HDR_XMIT_UNTAGGED		0
+ #define MTK_HDR_XMIT_TAGGED_TPID_8100	1
++#define MTK_HDR_XMIT_TAGGED_TPID_88A8	2
+ #define MTK_HDR_RECV_SOURCE_PORT_MASK	GENMASK(2, 0)
+ #define MTK_HDR_XMIT_DP_BIT_MASK	GENMASK(5, 0)
+ #define MTK_HDR_XMIT_SA_DIS		BIT(6)
+@@ -21,8 +22,8 @@ static struct sk_buff *mtk_tag_xmit(stru
+ 				    struct net_device *dev)
+ {
+ 	struct dsa_port *dp = dsa_slave_to_port(dev);
++	u8 xmit_tpid;
+ 	u8 *mtk_tag;
+-	bool is_vlan_skb = true;
+ 	unsigned char *dest = eth_hdr(skb)->h_dest;
+ 	bool is_multicast_skb = is_multicast_ether_addr(dest) &&
+ 				!is_broadcast_ether_addr(dest);
+@@ -33,13 +34,20 @@ static struct sk_buff *mtk_tag_xmit(stru
+ 	 * the both special and VLAN tag at the same time and then look up VLAN
+ 	 * table with VID.
+ 	 */
+-	if (!skb_vlan_tagged(skb)) {
++	switch (skb->protocol) {
++	case htons(ETH_P_8021Q):
++		xmit_tpid = MTK_HDR_XMIT_TAGGED_TPID_8100;
++		break;
++	case htons(ETH_P_8021AD):
++		xmit_tpid = MTK_HDR_XMIT_TAGGED_TPID_88A8;
++		break;
++	default:
+ 		if (skb_cow_head(skb, MTK_HDR_LEN) < 0)
+ 			return NULL;
+ 
++		xmit_tpid = MTK_HDR_XMIT_UNTAGGED;
+ 		skb_push(skb, MTK_HDR_LEN);
+ 		memmove(skb->data, skb->data + MTK_HDR_LEN, 2 * ETH_ALEN);
+-		is_vlan_skb = false;
+ 	}
+ 
+ 	mtk_tag = skb->data + 2 * ETH_ALEN;
+@@ -47,8 +55,7 @@ static struct sk_buff *mtk_tag_xmit(stru
+ 	/* Mark tag attribute on special tag insertion to notify hardware
+ 	 * whether that's a combined special tag with 802.1Q header.
+ 	 */
+-	mtk_tag[0] = is_vlan_skb ? MTK_HDR_XMIT_TAGGED_TPID_8100 :
+-		     MTK_HDR_XMIT_UNTAGGED;
++	mtk_tag[0] = xmit_tpid;
+ 	mtk_tag[1] = (1 << dp->index) & MTK_HDR_XMIT_DP_BIT_MASK;
+ 
+ 	/* Disable SA learning for multicast frames */
+@@ -56,7 +63,7 @@ static struct sk_buff *mtk_tag_xmit(stru
+ 		mtk_tag[1] |= MTK_HDR_XMIT_SA_DIS;
+ 
+ 	/* Tag control information is kept for 802.1Q */
+-	if (!is_vlan_skb) {
++	if (xmit_tpid == MTK_HDR_XMIT_UNTAGGED) {
+ 		mtk_tag[2] = 0;
+ 		mtk_tag[3] = 0;
+ 	}
diff --git a/queue-5.4/series b/queue-5.4/series
index af350dad394..5db70c00658 100644
--- a/queue-5.4/series
+++ b/queue-5.4/series
@@ -11,3 +11,8 @@ drm-i915-gvt-fix-virtual-display-setup-for-bxt-apl.patch
 drm-i915-gvt-fix-port-number-for-bdw-on-edid-region-setup.patch
 drm-i915-gvt-fix-vfio_edid-issue-for-bxt-apl.patch
 fuse-fix-live-lock-in-fuse_iget.patch
+crypto-x86-regularize-glue-function-prototypes.patch
+crypto-aesni-use-test-reg-reg-instead-of-cmp-0-reg.patch
+crypto-x86-aes-ni-xts-use-direct-calls-to-and-4-way-stride.patch
+net-dsa-tag_mtk-fix-802.1ad-vlan-egress.patch
+net-dsa-b53-support-setting-learning-on-port.patch
-- 
2.47.3