#ifndef _XOR_H
#define _XOR_H
-#define MAX_XOR_BLOCKS 4
-
-extern void xor_blocks(unsigned int count, unsigned int bytes,
- void *dest, void **srcs);
-
void xor_gen(void *dest, void **srcs, unsigned int src_cnt, unsigned int bytes);
#endif /* _XOR_H */
.end xor_alpha_prefetch_5 \n\
");
+DO_XOR_BLOCKS(alpha, xor_alpha_2, xor_alpha_3, xor_alpha_4, xor_alpha_5);
+
struct xor_block_template xor_block_alpha = {
- .name = "alpha",
- .do_2 = xor_alpha_2,
- .do_3 = xor_alpha_3,
- .do_4 = xor_alpha_4,
- .do_5 = xor_alpha_5,
+ .name = "alpha",
+ .xor_gen = xor_gen_alpha,
};
+DO_XOR_BLOCKS(alpha_prefetch, xor_alpha_prefetch_2, xor_alpha_prefetch_3,
+ xor_alpha_prefetch_4, xor_alpha_prefetch_5);
+
struct xor_block_template xor_block_alpha_prefetch = {
- .name = "alpha prefetch",
- .do_2 = xor_alpha_prefetch_2,
- .do_3 = xor_alpha_prefetch_3,
- .do_4 = xor_alpha_prefetch_4,
- .do_5 = xor_alpha_prefetch_5,
+ .name = "alpha prefetch",
+ .xor_gen = xor_gen_alpha_prefetch,
};
#include "xor_impl.h"
#include "xor_arch.h"
-extern struct xor_block_template const xor_block_neon_inner;
-
-static void
-xor_neon_2(unsigned long bytes, unsigned long * __restrict p1,
- const unsigned long * __restrict p2)
-{
- kernel_neon_begin();
- xor_block_neon_inner.do_2(bytes, p1, p2);
- kernel_neon_end();
-}
-
-static void
-xor_neon_3(unsigned long bytes, unsigned long * __restrict p1,
- const unsigned long * __restrict p2,
- const unsigned long * __restrict p3)
-{
- kernel_neon_begin();
- xor_block_neon_inner.do_3(bytes, p1, p2, p3);
- kernel_neon_end();
-}
-
-static void
-xor_neon_4(unsigned long bytes, unsigned long * __restrict p1,
- const unsigned long * __restrict p2,
- const unsigned long * __restrict p3,
- const unsigned long * __restrict p4)
-{
- kernel_neon_begin();
- xor_block_neon_inner.do_4(bytes, p1, p2, p3, p4);
- kernel_neon_end();
-}
-
-static void
-xor_neon_5(unsigned long bytes, unsigned long * __restrict p1,
- const unsigned long * __restrict p2,
- const unsigned long * __restrict p3,
- const unsigned long * __restrict p4,
- const unsigned long * __restrict p5)
+static void xor_gen_neon(void *dest, void **srcs, unsigned int src_cnt,
+ unsigned int bytes)
{
kernel_neon_begin();
- xor_block_neon_inner.do_5(bytes, p1, p2, p3, p4, p5);
+ xor_gen_neon_inner(dest, srcs, src_cnt, bytes);
kernel_neon_end();
}
struct xor_block_template xor_block_neon = {
- .name = "neon",
- .do_2 = xor_neon_2,
- .do_3 = xor_neon_3,
- .do_4 = xor_neon_4,
- .do_5 = xor_neon_5
+ .name = "neon",
+ .xor_gen = xor_gen_neon,
};
*/
#include "xor_impl.h"
+#include "xor_arch.h"
#ifndef __ARM_NEON__
#error You should compile this file with '-march=armv7-a -mfloat-abi=softfp -mfpu=neon'
#define NO_TEMPLATE
#include "../xor-8regs.c"
-struct xor_block_template const xor_block_neon_inner = {
- .name = "__inner_neon__",
- .do_2 = xor_8regs_2,
- .do_3 = xor_8regs_3,
- .do_4 = xor_8regs_4,
- .do_5 = xor_8regs_5,
-};
+__DO_XOR_BLOCKS(neon_inner, xor_8regs_2, xor_8regs_3, xor_8regs_4, xor_8regs_5);
} while (--lines);
}
+DO_XOR_BLOCKS(arm4regs, xor_arm4regs_2, xor_arm4regs_3, xor_arm4regs_4,
+ xor_arm4regs_5);
+
struct xor_block_template xor_block_arm4regs = {
- .name = "arm4regs",
- .do_2 = xor_arm4regs_2,
- .do_3 = xor_arm4regs_3,
- .do_4 = xor_arm4regs_4,
- .do_5 = xor_arm4regs_5,
+ .name = "arm4regs",
+ .xor_gen = xor_gen_arm4regs,
};
extern struct xor_block_template xor_block_arm4regs;
extern struct xor_block_template xor_block_neon;
+void xor_gen_neon_inner(void *dest, void **srcs, unsigned int src_cnt,
+ unsigned int bytes);
+
static __always_inline void __init arch_xor_init(void)
{
xor_register(&xor_block_arm4regs);
#include "xor-neon.h"
#define XOR_TEMPLATE(_name) \
-static void \
-xor_##_name##_2(unsigned long bytes, unsigned long * __restrict p1, \
- const unsigned long * __restrict p2) \
+static void xor_gen_##_name(void *dest, void **srcs, unsigned int src_cnt, \
+ unsigned int bytes) \
{ \
scoped_ksimd() \
- __xor_##_name##_2(bytes, p1, p2); \
-} \
- \
-static void \
-xor_##_name##_3(unsigned long bytes, unsigned long * __restrict p1, \
- const unsigned long * __restrict p2, \
- const unsigned long * __restrict p3) \
-{ \
- scoped_ksimd() \
- __xor_##_name##_3(bytes, p1, p2, p3); \
-} \
- \
-static void \
-xor_##_name##_4(unsigned long bytes, unsigned long * __restrict p1, \
- const unsigned long * __restrict p2, \
- const unsigned long * __restrict p3, \
- const unsigned long * __restrict p4) \
-{ \
- scoped_ksimd() \
- __xor_##_name##_4(bytes, p1, p2, p3, p4); \
-} \
- \
-static void \
-xor_##_name##_5(unsigned long bytes, unsigned long * __restrict p1, \
- const unsigned long * __restrict p2, \
- const unsigned long * __restrict p3, \
- const unsigned long * __restrict p4, \
- const unsigned long * __restrict p5) \
-{ \
- scoped_ksimd() \
- __xor_##_name##_5(bytes, p1, p2, p3, p4, p5); \
+ xor_gen_##_name##_inner(dest, srcs, src_cnt, bytes); \
} \
\
struct xor_block_template xor_block_##_name = { \
- .name = __stringify(_name), \
- .do_2 = xor_##_name##_2, \
- .do_3 = xor_##_name##_3, \
- .do_4 = xor_##_name##_4, \
- .do_5 = xor_##_name##_5 \
+ .name = __stringify(_name), \
+ .xor_gen = xor_gen_##_name, \
};
XOR_TEMPLATE(neon);
#include "xor_arch.h"
#include "xor-neon.h"
-void __xor_neon_2(unsigned long bytes, unsigned long * __restrict p1,
+static void __xor_neon_2(unsigned long bytes, unsigned long * __restrict p1,
const unsigned long * __restrict p2)
{
uint64_t *dp1 = (uint64_t *)p1;
} while (--lines > 0);
}
-void __xor_neon_3(unsigned long bytes, unsigned long * __restrict p1,
+static void __xor_neon_3(unsigned long bytes, unsigned long * __restrict p1,
const unsigned long * __restrict p2,
const unsigned long * __restrict p3)
{
} while (--lines > 0);
}
-void __xor_neon_4(unsigned long bytes, unsigned long * __restrict p1,
+static void __xor_neon_4(unsigned long bytes, unsigned long * __restrict p1,
const unsigned long * __restrict p2,
const unsigned long * __restrict p3,
const unsigned long * __restrict p4)
} while (--lines > 0);
}
-void __xor_neon_5(unsigned long bytes, unsigned long * __restrict p1,
+static void __xor_neon_5(unsigned long bytes, unsigned long * __restrict p1,
const unsigned long * __restrict p2,
const unsigned long * __restrict p3,
const unsigned long * __restrict p4,
} while (--lines > 0);
}
+__DO_XOR_BLOCKS(neon_inner, __xor_neon_2, __xor_neon_3, __xor_neon_4,
+ __xor_neon_5);
+
static inline uint64x2_t eor3(uint64x2_t p, uint64x2_t q, uint64x2_t r)
{
uint64x2_t res;
return res;
}
-void __xor_eor3_3(unsigned long bytes, unsigned long * __restrict p1,
+static void __xor_eor3_3(unsigned long bytes, unsigned long * __restrict p1,
const unsigned long * __restrict p2,
const unsigned long * __restrict p3)
{
} while (--lines > 0);
}
-void __xor_eor3_4(unsigned long bytes, unsigned long * __restrict p1,
+static void __xor_eor3_4(unsigned long bytes, unsigned long * __restrict p1,
const unsigned long * __restrict p2,
const unsigned long * __restrict p3,
const unsigned long * __restrict p4)
} while (--lines > 0);
}
-void __xor_eor3_5(unsigned long bytes, unsigned long * __restrict p1,
+static void __xor_eor3_5(unsigned long bytes, unsigned long * __restrict p1,
const unsigned long * __restrict p2,
const unsigned long * __restrict p3,
const unsigned long * __restrict p4,
dp5 += 8;
} while (--lines > 0);
}
+
+__DO_XOR_BLOCKS(eor3_inner, __xor_neon_2, __xor_eor3_3, __xor_eor3_4,
+ __xor_eor3_5);
/* SPDX-License-Identifier: GPL-2.0-only */
-void __xor_neon_2(unsigned long bytes, unsigned long * __restrict p1,
- const unsigned long * __restrict p2);
-void __xor_neon_3(unsigned long bytes, unsigned long * __restrict p1,
- const unsigned long * __restrict p2,
- const unsigned long * __restrict p3);
-void __xor_neon_4(unsigned long bytes, unsigned long * __restrict p1,
- const unsigned long * __restrict p2,
- const unsigned long * __restrict p3,
- const unsigned long * __restrict p4);
-void __xor_neon_5(unsigned long bytes, unsigned long * __restrict p1,
- const unsigned long * __restrict p2,
- const unsigned long * __restrict p3,
- const unsigned long * __restrict p4,
- const unsigned long * __restrict p5);
-
-#define __xor_eor3_2 __xor_neon_2
-void __xor_eor3_3(unsigned long bytes, unsigned long * __restrict p1,
- const unsigned long * __restrict p2,
- const unsigned long * __restrict p3);
-void __xor_eor3_4(unsigned long bytes, unsigned long * __restrict p1,
- const unsigned long * __restrict p2,
- const unsigned long * __restrict p3,
- const unsigned long * __restrict p4);
-void __xor_eor3_5(unsigned long bytes, unsigned long * __restrict p1,
- const unsigned long * __restrict p2,
- const unsigned long * __restrict p3,
- const unsigned long * __restrict p4,
- const unsigned long * __restrict p5);
+void xor_gen_neon_inner(void *dest, void **srcs, unsigned int src_cnt,
+ unsigned int bytes);
+void xor_gen_eor3_inner(void *dest, void **srcs, unsigned int src_cnt,
+ unsigned int bytes);
#include "xor_arch.h"
#include "xor_simd.h"
-#define MAKE_XOR_GLUE_2(flavor) \
-static void xor_##flavor##_2(unsigned long bytes, unsigned long * __restrict p1,\
- const unsigned long * __restrict p2) \
+#define MAKE_XOR_GLUES(flavor) \
+DO_XOR_BLOCKS(flavor##_inner, __xor_##flavor##_2, __xor_##flavor##_3, \
+ __xor_##flavor##_4, __xor_##flavor##_5); \
+ \
+static void xor_gen_##flavor(void *dest, void **srcs, unsigned int src_cnt, \
+ unsigned int bytes) \
{ \
kernel_fpu_begin(); \
- __xor_##flavor##_2(bytes, p1, p2); \
+ xor_gen_##flavor##_inner(dest, srcs, src_cnt, bytes); \
kernel_fpu_end(); \
} \
-
-#define MAKE_XOR_GLUE_3(flavor) \
-static void xor_##flavor##_3(unsigned long bytes, unsigned long * __restrict p1,\
- const unsigned long * __restrict p2, \
- const unsigned long * __restrict p3) \
-{ \
- kernel_fpu_begin(); \
- __xor_##flavor##_3(bytes, p1, p2, p3); \
- kernel_fpu_end(); \
-} \
-
-#define MAKE_XOR_GLUE_4(flavor) \
-static void xor_##flavor##_4(unsigned long bytes, unsigned long * __restrict p1,\
- const unsigned long * __restrict p2, \
- const unsigned long * __restrict p3, \
- const unsigned long * __restrict p4) \
-{ \
- kernel_fpu_begin(); \
- __xor_##flavor##_4(bytes, p1, p2, p3, p4); \
- kernel_fpu_end(); \
-} \
-
-#define MAKE_XOR_GLUE_5(flavor) \
-static void xor_##flavor##_5(unsigned long bytes, unsigned long * __restrict p1,\
- const unsigned long * __restrict p2, \
- const unsigned long * __restrict p3, \
- const unsigned long * __restrict p4, \
- const unsigned long * __restrict p5) \
-{ \
- kernel_fpu_begin(); \
- __xor_##flavor##_5(bytes, p1, p2, p3, p4, p5); \
- kernel_fpu_end(); \
-} \
-
-#define MAKE_XOR_GLUES(flavor) \
- MAKE_XOR_GLUE_2(flavor); \
- MAKE_XOR_GLUE_3(flavor); \
- MAKE_XOR_GLUE_4(flavor); \
- MAKE_XOR_GLUE_5(flavor); \
- \
-struct xor_block_template xor_block_##flavor = { \
- .name = __stringify(flavor), \
- .do_2 = xor_##flavor##_2, \
- .do_3 = xor_##flavor##_3, \
- .do_4 = xor_##flavor##_4, \
- .do_5 = xor_##flavor##_5, \
+ \
+struct xor_block_template xor_block_##flavor = { \
+ .name = __stringify(flavor), \
+ .xor_gen = xor_gen_##flavor \
}
-
#ifdef CONFIG_CPU_HAS_LSX
MAKE_XOR_GLUES(lsx);
#endif /* CONFIG_CPU_HAS_LSX */
* Sparse (as at v0.5.0) gets very, very confused by this file.
* Make it a bit simpler for it.
*/
+#include "xor_impl.h"
#if !defined(__CHECKER__)
#include <altivec.h>
#else
V1##_3 = vec_xor(V1##_3, V2##_3); \
} while (0)
-void __xor_altivec_2(unsigned long bytes,
- unsigned long * __restrict v1_in,
- const unsigned long * __restrict v2_in)
+static void __xor_altivec_2(unsigned long bytes,
+ unsigned long * __restrict v1_in,
+ const unsigned long * __restrict v2_in)
{
DEFINE(v1);
DEFINE(v2);
} while (--lines > 0);
}
-void __xor_altivec_3(unsigned long bytes,
- unsigned long * __restrict v1_in,
- const unsigned long * __restrict v2_in,
- const unsigned long * __restrict v3_in)
+static void __xor_altivec_3(unsigned long bytes,
+ unsigned long * __restrict v1_in,
+ const unsigned long * __restrict v2_in,
+ const unsigned long * __restrict v3_in)
{
DEFINE(v1);
DEFINE(v2);
} while (--lines > 0);
}
-void __xor_altivec_4(unsigned long bytes,
- unsigned long * __restrict v1_in,
- const unsigned long * __restrict v2_in,
- const unsigned long * __restrict v3_in,
- const unsigned long * __restrict v4_in)
+static void __xor_altivec_4(unsigned long bytes,
+ unsigned long * __restrict v1_in,
+ const unsigned long * __restrict v2_in,
+ const unsigned long * __restrict v3_in,
+ const unsigned long * __restrict v4_in)
{
DEFINE(v1);
DEFINE(v2);
} while (--lines > 0);
}
-void __xor_altivec_5(unsigned long bytes,
- unsigned long * __restrict v1_in,
- const unsigned long * __restrict v2_in,
- const unsigned long * __restrict v3_in,
- const unsigned long * __restrict v4_in,
- const unsigned long * __restrict v5_in)
+static void __xor_altivec_5(unsigned long bytes,
+ unsigned long * __restrict v1_in,
+ const unsigned long * __restrict v2_in,
+ const unsigned long * __restrict v3_in,
+ const unsigned long * __restrict v4_in,
+ const unsigned long * __restrict v5_in)
{
DEFINE(v1);
DEFINE(v2);
v5 += 4;
} while (--lines > 0);
}
+
+__DO_XOR_BLOCKS(altivec_inner, __xor_altivec_2, __xor_altivec_3,
+ __xor_altivec_4, __xor_altivec_5);
* outside of the enable/disable altivec block.
*/
-void __xor_altivec_2(unsigned long bytes, unsigned long * __restrict p1,
- const unsigned long * __restrict p2);
-void __xor_altivec_3(unsigned long bytes, unsigned long * __restrict p1,
- const unsigned long * __restrict p2,
- const unsigned long * __restrict p3);
-void __xor_altivec_4(unsigned long bytes, unsigned long * __restrict p1,
- const unsigned long * __restrict p2,
- const unsigned long * __restrict p3,
- const unsigned long * __restrict p4);
-void __xor_altivec_5(unsigned long bytes, unsigned long * __restrict p1,
- const unsigned long * __restrict p2,
- const unsigned long * __restrict p3,
- const unsigned long * __restrict p4,
- const unsigned long * __restrict p5);
+void xor_gen_altivec_inner(void *dest, void **srcs, unsigned int src_cnt,
+ unsigned int bytes);
#include "xor_arch.h"
#include "xor_vmx.h"
-static void xor_altivec_2(unsigned long bytes, unsigned long * __restrict p1,
- const unsigned long * __restrict p2)
+static void xor_gen_altivec(void *dest, void **srcs, unsigned int src_cnt,
+ unsigned int bytes)
{
preempt_disable();
enable_kernel_altivec();
- __xor_altivec_2(bytes, p1, p2);
- disable_kernel_altivec();
- preempt_enable();
-}
-
-static void xor_altivec_3(unsigned long bytes, unsigned long * __restrict p1,
- const unsigned long * __restrict p2,
- const unsigned long * __restrict p3)
-{
- preempt_disable();
- enable_kernel_altivec();
- __xor_altivec_3(bytes, p1, p2, p3);
- disable_kernel_altivec();
- preempt_enable();
-}
-
-static void xor_altivec_4(unsigned long bytes, unsigned long * __restrict p1,
- const unsigned long * __restrict p2,
- const unsigned long * __restrict p3,
- const unsigned long * __restrict p4)
-{
- preempt_disable();
- enable_kernel_altivec();
- __xor_altivec_4(bytes, p1, p2, p3, p4);
- disable_kernel_altivec();
- preempt_enable();
-}
-
-static void xor_altivec_5(unsigned long bytes, unsigned long * __restrict p1,
- const unsigned long * __restrict p2,
- const unsigned long * __restrict p3,
- const unsigned long * __restrict p4,
- const unsigned long * __restrict p5)
-{
- preempt_disable();
- enable_kernel_altivec();
- __xor_altivec_5(bytes, p1, p2, p3, p4, p5);
+ xor_gen_altivec_inner(dest, srcs, src_cnt, bytes);
disable_kernel_altivec();
preempt_enable();
}
struct xor_block_template xor_block_altivec = {
- .name = "altivec",
- .do_2 = xor_altivec_2,
- .do_3 = xor_altivec_3,
- .do_4 = xor_altivec_4,
- .do_5 = xor_altivec_5,
+ .name = "altivec",
+ .xor_gen = xor_gen_altivec,
};
#include "xor_impl.h"
#include "xor_arch.h"
-static void xor_vector_2(unsigned long bytes, unsigned long *__restrict p1,
- const unsigned long *__restrict p2)
-{
- kernel_vector_begin();
- xor_regs_2_(bytes, p1, p2);
- kernel_vector_end();
-}
-
-static void xor_vector_3(unsigned long bytes, unsigned long *__restrict p1,
- const unsigned long *__restrict p2,
- const unsigned long *__restrict p3)
-{
- kernel_vector_begin();
- xor_regs_3_(bytes, p1, p2, p3);
- kernel_vector_end();
-}
-
-static void xor_vector_4(unsigned long bytes, unsigned long *__restrict p1,
- const unsigned long *__restrict p2,
- const unsigned long *__restrict p3,
- const unsigned long *__restrict p4)
-{
- kernel_vector_begin();
- xor_regs_4_(bytes, p1, p2, p3, p4);
- kernel_vector_end();
-}
+DO_XOR_BLOCKS(vector_inner, xor_regs_2_, xor_regs_3_, xor_regs_4_, xor_regs_5_);
-static void xor_vector_5(unsigned long bytes, unsigned long *__restrict p1,
- const unsigned long *__restrict p2,
- const unsigned long *__restrict p3,
- const unsigned long *__restrict p4,
- const unsigned long *__restrict p5)
+static void xor_gen_vector(void *dest, void **srcs, unsigned int src_cnt,
+ unsigned int bytes)
{
kernel_vector_begin();
- xor_regs_5_(bytes, p1, p2, p3, p4, p5);
+ xor_gen_vector_inner(dest, srcs, src_cnt, bytes);
kernel_vector_end();
}
struct xor_block_template xor_block_rvv = {
- .name = "rvv",
- .do_2 = xor_vector_2,
- .do_3 = xor_vector_3,
- .do_4 = xor_vector_4,
- .do_5 = xor_vector_5
+ .name = "rvv",
+ .xor_gen = xor_gen_vector,
};
: : "0", "cc", "memory");
}
+DO_XOR_BLOCKS(xc, xor_xc_2, xor_xc_3, xor_xc_4, xor_xc_5);
+
struct xor_block_template xor_block_xc = {
- .name = "xc",
- .do_2 = xor_xc_2,
- .do_3 = xor_xc_3,
- .do_4 = xor_xc_4,
- .do_5 = xor_xc_5,
+ .name = "xc",
+ .xor_gen = xor_gen_xc,
};
} while (--lines > 0);
}
+DO_XOR_BLOCKS(sparc32, sparc_2, sparc_3, sparc_4, sparc_5);
+
struct xor_block_template xor_block_SPARC = {
- .name = "SPARC",
- .do_2 = sparc_2,
- .do_3 = sparc_3,
- .do_4 = sparc_4,
- .do_5 = sparc_5,
+ .name = "SPARC",
+ .xor_gen = xor_gen_sparc32,
};
/* XXX Ugh, write cheetah versions... -DaveM */
+DO_XOR_BLOCKS(vis, xor_vis_2, xor_vis_3, xor_vis_4, xor_vis_5);
+
struct xor_block_template xor_block_VIS = {
- .name = "VIS",
- .do_2 = xor_vis_2,
- .do_3 = xor_vis_3,
- .do_4 = xor_vis_4,
- .do_5 = xor_vis_5,
+ .name = "VIS",
+ .xor_gen = xor_gen_vis,
};
void xor_niagara_2(unsigned long bytes, unsigned long * __restrict p1,
const unsigned long * __restrict p4,
const unsigned long * __restrict p5);
+DO_XOR_BLOCKS(niagara, xor_niagara_2, xor_niagara_3, xor_niagara_4,
+ xor_niagara_5);
+
struct xor_block_template xor_block_niagara = {
- .name = "Niagara",
- .do_2 = xor_niagara_2,
- .do_3 = xor_niagara_3,
- .do_4 = xor_niagara_4,
- .do_5 = xor_niagara_5,
+ .name = "Niagara",
+ .xor_gen = xor_gen_niagara,
};
{
unsigned long lines = bytes >> 9;
- kernel_fpu_begin();
-
while (lines--) {
#undef BLOCK
#define BLOCK(i, reg) \
p0 = (unsigned long *)((uintptr_t)p0 + 512);
p1 = (unsigned long *)((uintptr_t)p1 + 512);
}
-
- kernel_fpu_end();
}
static void xor_avx_3(unsigned long bytes, unsigned long * __restrict p0,
{
unsigned long lines = bytes >> 9;
- kernel_fpu_begin();
-
while (lines--) {
#undef BLOCK
#define BLOCK(i, reg) \
p1 = (unsigned long *)((uintptr_t)p1 + 512);
p2 = (unsigned long *)((uintptr_t)p2 + 512);
}
-
- kernel_fpu_end();
}
static void xor_avx_4(unsigned long bytes, unsigned long * __restrict p0,
{
unsigned long lines = bytes >> 9;
- kernel_fpu_begin();
-
while (lines--) {
#undef BLOCK
#define BLOCK(i, reg) \
p2 = (unsigned long *)((uintptr_t)p2 + 512);
p3 = (unsigned long *)((uintptr_t)p3 + 512);
}
-
- kernel_fpu_end();
}
static void xor_avx_5(unsigned long bytes, unsigned long * __restrict p0,
{
unsigned long lines = bytes >> 9;
- kernel_fpu_begin();
-
while (lines--) {
#undef BLOCK
#define BLOCK(i, reg) \
p3 = (unsigned long *)((uintptr_t)p3 + 512);
p4 = (unsigned long *)((uintptr_t)p4 + 512);
}
+}
+
+DO_XOR_BLOCKS(avx_inner, xor_avx_2, xor_avx_3, xor_avx_4, xor_avx_5);
+static void xor_gen_avx(void *dest, void **srcs, unsigned int src_cnt,
+ unsigned int bytes)
+{
+ kernel_fpu_begin();
+ xor_gen_avx_inner(dest, srcs, src_cnt, bytes);
kernel_fpu_end();
}
struct xor_block_template xor_block_avx = {
- .name = "avx",
- .do_2 = xor_avx_2,
- .do_3 = xor_avx_3,
- .do_4 = xor_avx_4,
- .do_5 = xor_avx_5,
+ .name = "avx",
+ .xor_gen = xor_gen_avx,
};
{
unsigned long lines = bytes >> 7;
- kernel_fpu_begin();
-
asm volatile(
#undef BLOCK
#define BLOCK(i) \
"+r" (p1), "+r" (p2)
:
: "memory");
-
- kernel_fpu_end();
}
static void
{
unsigned long lines = bytes >> 7;
- kernel_fpu_begin();
-
asm volatile(
#undef BLOCK
#define BLOCK(i) \
"+r" (p1), "+r" (p2), "+r" (p3)
:
: "memory");
-
- kernel_fpu_end();
}
static void
{
unsigned long lines = bytes >> 7;
- kernel_fpu_begin();
-
asm volatile(
#undef BLOCK
#define BLOCK(i) \
"+r" (p1), "+r" (p2), "+r" (p3), "+r" (p4)
:
: "memory");
-
- kernel_fpu_end();
}
{
unsigned long lines = bytes >> 7;
- kernel_fpu_begin();
-
/* Make sure GCC forgets anything it knows about p4 or p5,
such that it won't pass to the asm volatile below a
register that is shared with any other variable. That's
Clobber them just to be sure nobody does something stupid
like assuming they have some legal value. */
asm("" : "=r" (p4), "=r" (p5));
-
- kernel_fpu_end();
}
#undef LD
{
unsigned long lines = bytes >> 6;
- kernel_fpu_begin();
-
asm volatile(
" .align 32 ;\n"
" 1: ;\n"
"+r" (p1), "+r" (p2)
:
: "memory");
-
- kernel_fpu_end();
}
static void
{
unsigned long lines = bytes >> 6;
- kernel_fpu_begin();
-
asm volatile(
" .align 32,0x90 ;\n"
" 1: ;\n"
"+r" (p1), "+r" (p2), "+r" (p3)
:
: "memory" );
-
- kernel_fpu_end();
}
static void
{
unsigned long lines = bytes >> 6;
- kernel_fpu_begin();
-
asm volatile(
" .align 32,0x90 ;\n"
" 1: ;\n"
"+r" (p1), "+r" (p2), "+r" (p3), "+r" (p4)
:
: "memory");
-
- kernel_fpu_end();
}
static void
{
unsigned long lines = bytes >> 6;
- kernel_fpu_begin();
-
/* Make sure GCC forgets anything it knows about p4 or p5,
such that it won't pass to the asm volatile below a
register that is shared with any other variable. That's
Clobber them just to be sure nobody does something stupid
like assuming they have some legal value. */
asm("" : "=r" (p4), "=r" (p5));
+}
+
+DO_XOR_BLOCKS(pII_mmx_inner, xor_pII_mmx_2, xor_pII_mmx_3, xor_pII_mmx_4,
+ xor_pII_mmx_5);
+static void xor_gen_pII_mmx(void *dest, void **srcs, unsigned int src_cnt,
+ unsigned int bytes)
+{
+ kernel_fpu_begin();
+ xor_gen_pII_mmx_inner(dest, srcs, src_cnt, bytes);
kernel_fpu_end();
}
struct xor_block_template xor_block_pII_mmx = {
- .name = "pII_mmx",
- .do_2 = xor_pII_mmx_2,
- .do_3 = xor_pII_mmx_3,
- .do_4 = xor_pII_mmx_4,
- .do_5 = xor_pII_mmx_5,
+ .name = "pII_mmx",
+ .xor_gen = xor_gen_pII_mmx,
};
+DO_XOR_BLOCKS(p5_mmx_inner, xor_p5_mmx_2, xor_p5_mmx_3, xor_p5_mmx_4,
+ xor_p5_mmx_5);
+
+static void xor_gen_p5_mmx(void *dest, void **srcs, unsigned int src_cnt,
+ unsigned int bytes)
+{
+ kernel_fpu_begin();
+ xor_gen_p5_mmx_inner(dest, srcs, src_cnt, bytes);
+ kernel_fpu_end();
+}
+
struct xor_block_template xor_block_p5_mmx = {
- .name = "p5_mmx",
- .do_2 = xor_p5_mmx_2,
- .do_3 = xor_p5_mmx_3,
- .do_4 = xor_p5_mmx_4,
- .do_5 = xor_p5_mmx_5,
+ .name = "p5_mmx",
+ .xor_gen = xor_gen_p5_mmx,
};
{
unsigned long lines = bytes >> 8;
- kernel_fpu_begin();
-
asm volatile(
#undef BLOCK
#define BLOCK(i) \
[p1] "+r" (p1), [p2] "+r" (p2)
: [inc] XOR_CONSTANT_CONSTRAINT (256UL)
: "memory");
-
- kernel_fpu_end();
}
static void
{
unsigned long lines = bytes >> 8;
- kernel_fpu_begin();
-
asm volatile(
#undef BLOCK
#define BLOCK(i) \
[p1] "+r" (p1), [p2] "+r" (p2)
: [inc] XOR_CONSTANT_CONSTRAINT (256UL)
: "memory");
-
- kernel_fpu_end();
}
static void
{
unsigned long lines = bytes >> 8;
- kernel_fpu_begin();
-
asm volatile(
#undef BLOCK
#define BLOCK(i) \
[p1] "+r" (p1), [p2] "+r" (p2), [p3] "+r" (p3)
: [inc] XOR_CONSTANT_CONSTRAINT (256UL)
: "memory");
-
- kernel_fpu_end();
}
static void
{
unsigned long lines = bytes >> 8;
- kernel_fpu_begin();
-
asm volatile(
#undef BLOCK
#define BLOCK(i) \
[p1] "+r" (p1), [p2] "+r" (p2), [p3] "+r" (p3)
: [inc] XOR_CONSTANT_CONSTRAINT (256UL)
: "memory");
-
- kernel_fpu_end();
}
static void
{
unsigned long lines = bytes >> 8;
- kernel_fpu_begin();
-
asm volatile(
#undef BLOCK
#define BLOCK(i) \
[p2] "+r" (p2), [p3] "+r" (p3), [p4] "+r" (p4)
: [inc] XOR_CONSTANT_CONSTRAINT (256UL)
: "memory");
-
- kernel_fpu_end();
}
static void
{
unsigned long lines = bytes >> 8;
- kernel_fpu_begin();
-
asm volatile(
#undef BLOCK
#define BLOCK(i) \
[p2] "+r" (p2), [p3] "+r" (p3), [p4] "+r" (p4)
: [inc] XOR_CONSTANT_CONSTRAINT (256UL)
: "memory");
-
- kernel_fpu_end();
}
static void
{
unsigned long lines = bytes >> 8;
- kernel_fpu_begin();
-
asm volatile(
#undef BLOCK
#define BLOCK(i) \
[p3] "+r" (p3), [p4] "+r" (p4), [p5] "+r" (p5)
: [inc] XOR_CONSTANT_CONSTRAINT (256UL)
: "memory");
-
- kernel_fpu_end();
}
static void
{
unsigned long lines = bytes >> 8;
- kernel_fpu_begin();
-
asm volatile(
#undef BLOCK
#define BLOCK(i) \
[p3] "+r" (p3), [p4] "+r" (p4), [p5] "+r" (p5)
: [inc] XOR_CONSTANT_CONSTRAINT (256UL)
: "memory");
+}
+
+DO_XOR_BLOCKS(sse_inner, xor_sse_2, xor_sse_3, xor_sse_4, xor_sse_5);
+static void xor_gen_sse(void *dest, void **srcs, unsigned int src_cnt,
+ unsigned int bytes)
+{
+ kernel_fpu_begin();
+ xor_gen_sse_inner(dest, srcs, src_cnt, bytes);
kernel_fpu_end();
}
struct xor_block_template xor_block_sse = {
- .name = "sse",
- .do_2 = xor_sse_2,
- .do_3 = xor_sse_3,
- .do_4 = xor_sse_4,
- .do_5 = xor_sse_5,
+ .name = "sse",
+ .xor_gen = xor_gen_sse,
};
+DO_XOR_BLOCKS(sse_pf64_inner, xor_sse_2_pf64, xor_sse_3_pf64, xor_sse_4_pf64,
+ xor_sse_5_pf64);
+
+static void xor_gen_sse_pf64(void *dest, void **srcs, unsigned int src_cnt,
+ unsigned int bytes)
+{
+ kernel_fpu_begin();
+ xor_gen_sse_pf64_inner(dest, srcs, src_cnt, bytes);
+ kernel_fpu_end();
+}
+
struct xor_block_template xor_block_sse_pf64 = {
- .name = "prefetch64-sse",
- .do_2 = xor_sse_2_pf64,
- .do_3 = xor_sse_3_pf64,
- .do_4 = xor_sse_4_pf64,
- .do_5 = xor_sse_5_pf64,
+ .name = "prefetch64-sse",
+ .xor_gen = xor_gen_sse_pf64,
};
goto once_more;
}
+DO_XOR_BLOCKS(32regs_p, xor_32regs_p_2, xor_32regs_p_3, xor_32regs_p_4,
+ xor_32regs_p_5);
+
struct xor_block_template xor_block_32regs_p = {
- .name = "32regs_prefetch",
- .do_2 = xor_32regs_p_2,
- .do_3 = xor_32regs_p_3,
- .do_4 = xor_32regs_p_4,
- .do_5 = xor_32regs_p_5,
+ .name = "32regs_prefetch",
+ .xor_gen = xor_gen_32regs_p,
};
} while (--lines > 0);
}
+DO_XOR_BLOCKS(32regs, xor_32regs_2, xor_32regs_3, xor_32regs_4, xor_32regs_5);
+
struct xor_block_template xor_block_32regs = {
- .name = "32regs",
- .do_2 = xor_32regs_2,
- .do_3 = xor_32regs_3,
- .do_4 = xor_32regs_4,
- .do_5 = xor_32regs_5,
+ .name = "32regs",
+ .xor_gen = xor_gen_32regs,
};
goto once_more;
}
+
+DO_XOR_BLOCKS(8regs_p, xor_8regs_p_2, xor_8regs_p_3, xor_8regs_p_4,
+ xor_8regs_p_5);
+
struct xor_block_template xor_block_8regs_p = {
- .name = "8regs_prefetch",
- .do_2 = xor_8regs_p_2,
- .do_3 = xor_8regs_p_3,
- .do_4 = xor_8regs_p_4,
- .do_5 = xor_8regs_p_5,
+ .name = "8regs_prefetch",
+ .xor_gen = xor_gen_8regs_p,
};
}
#ifndef NO_TEMPLATE
+DO_XOR_BLOCKS(8regs, xor_8regs_2, xor_8regs_3, xor_8regs_4, xor_8regs_5);
+
struct xor_block_template xor_block_8regs = {
- .name = "8regs",
- .do_2 = xor_8regs_2,
- .do_3 = xor_8regs_3,
- .do_4 = xor_8regs_4,
- .do_5 = xor_8regs_5,
+ .name = "8regs",
+ .xor_gen = xor_gen_8regs,
};
#endif /* NO_TEMPLATE */
#include <linux/preempt.h>
#include "xor_impl.h"
-/* The xor routines to use. */
+/* The xor routine to use. */
static struct xor_block_template *active_template;
-void
-xor_blocks(unsigned int src_count, unsigned int bytes, void *dest, void **srcs)
-{
- unsigned long *p1, *p2, *p3, *p4;
-
- WARN_ON_ONCE(!in_task() || irqs_disabled() || softirq_count());
-
- p1 = (unsigned long *) srcs[0];
- if (src_count == 1) {
- active_template->do_2(bytes, dest, p1);
- return;
- }
-
- p2 = (unsigned long *) srcs[1];
- if (src_count == 2) {
- active_template->do_3(bytes, dest, p1, p2);
- return;
- }
-
- p3 = (unsigned long *) srcs[2];
- if (src_count == 3) {
- active_template->do_4(bytes, dest, p1, p2, p3);
- return;
- }
-
- p4 = (unsigned long *) srcs[3];
- active_template->do_5(bytes, dest, p1, p2, p3, p4);
-}
-EXPORT_SYMBOL(xor_blocks);
-
/**
* xor_gen - generate RAID-style XOR information
* @dest: destination vector
*/
void xor_gen(void *dest, void **srcs, unsigned int src_cnt, unsigned int bytes)
{
- unsigned int src_off = 0;
-
- WARN_ON_ONCE(in_interrupt());
+ WARN_ON_ONCE(!in_task() || irqs_disabled() || softirq_count());
WARN_ON_ONCE(bytes == 0);
WARN_ON_ONCE(bytes & 511);
- while (src_cnt > 0) {
- unsigned int this_cnt = min(src_cnt, MAX_XOR_BLOCKS);
-
- xor_blocks(this_cnt, bytes, dest, srcs + src_off);
-
- src_cnt -= this_cnt;
- src_off += this_cnt;
- }
+ active_template->xor_gen(dest, srcs, src_cnt, bytes);
}
EXPORT_SYMBOL(xor_gen);
int speed;
unsigned long reps;
ktime_t min, start, t0;
+ void *srcs[1] = { b2 };
preempt_disable();
cpu_relax();
do {
mb(); /* prevent loop optimization */
- tmpl->do_2(BENCH_SIZE, b1, b2);
+ tmpl->xor_gen(b1, srcs, 1, BENCH_SIZE);
mb();
} while (reps++ < REPS || (t0 = ktime_get()) == start);
min = ktime_sub(t0, start);
#define _XOR_IMPL_H
#include <linux/init.h>
+#include <linux/minmax.h>
struct xor_block_template {
struct xor_block_template *next;
const char *name;
int speed;
- void (*do_2)(unsigned long, unsigned long * __restrict,
- const unsigned long * __restrict);
- void (*do_3)(unsigned long, unsigned long * __restrict,
- const unsigned long * __restrict,
- const unsigned long * __restrict);
- void (*do_4)(unsigned long, unsigned long * __restrict,
- const unsigned long * __restrict,
- const unsigned long * __restrict,
- const unsigned long * __restrict);
- void (*do_5)(unsigned long, unsigned long * __restrict,
- const unsigned long * __restrict,
- const unsigned long * __restrict,
- const unsigned long * __restrict,
- const unsigned long * __restrict);
+ void (*xor_gen)(void *dest, void **srcs, unsigned int src_cnt,
+ unsigned int bytes);
};
+#define __DO_XOR_BLOCKS(_name, _handle1, _handle2, _handle3, _handle4) \
+void \
+xor_gen_##_name(void *dest, void **srcs, unsigned int src_cnt, \
+ unsigned int bytes) \
+{ \
+ unsigned int src_off = 0; \
+ \
+ while (src_cnt > 0) { \
+ unsigned int this_cnt = min(src_cnt, 4); \
+ \
+ if (this_cnt == 1) \
+ _handle1(bytes, dest, srcs[src_off]); \
+ else if (this_cnt == 2) \
+ _handle2(bytes, dest, srcs[src_off], \
+ srcs[src_off + 1]); \
+ else if (this_cnt == 3) \
+ _handle3(bytes, dest, srcs[src_off], \
+ srcs[src_off + 1], srcs[src_off + 2]); \
+ else \
+ _handle4(bytes, dest, srcs[src_off], \
+ srcs[src_off + 1], srcs[src_off + 2], \
+ srcs[src_off + 3]); \
+ \
+ src_cnt -= this_cnt; \
+ src_off += this_cnt; \
+ } \
+}
+
+#define DO_XOR_BLOCKS(_name, _handle1, _handle2, _handle3, _handle4) \
+ static __DO_XOR_BLOCKS(_name, _handle1, _handle2, _handle3, _handle4)
+
/* generic implementations */
extern struct xor_block_template xor_block_8regs;
extern struct xor_block_template xor_block_32regs;