From: Julian Seward Date: Wed, 3 Aug 2016 11:44:02 +0000 (+0000) Subject: Add test cases for v8 crypto instructions in 32-bit mode. Is not yet connected X-Git-Tag: svn/VALGRIND_3_12_0~101 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=b8490fbbb91f264b99d1308f01bbf61d1eb022ad;p=thirdparty%2Fvalgrind.git Add test cases for v8 crypto instructions in 32-bit mode. Is not yet connected to the build/test system. git-svn-id: svn://svn.valgrind.org/valgrind/trunk@15922 --- diff --git a/none/tests/arm/v8crypto.c b/none/tests/arm/v8crypto.c new file mode 100644 index 0000000000..17e79b38a6 --- /dev/null +++ b/none/tests/arm/v8crypto.c @@ -0,0 +1,250 @@ + +/* +gcc -o v8crypto v8crypto.c -march=armv8-a -mfpu=crypto-neon-fp-armv8 +gcc -o v8crypto v8crypto.c -mfpu=crypto-neon-fp-armv8 +*/ + +#include +#include +#include // memalign +#include // memset +#include "tests/malloc.h" +#include // isnormal + +typedef unsigned char UChar; +typedef unsigned short int UShort; +typedef unsigned int UInt; +typedef signed int Int; +typedef unsigned char UChar; +typedef unsigned long long int ULong; +typedef signed long long int Long; +typedef double Double; +typedef float Float; + +typedef unsigned char Bool; +#define False ((Bool)0) +#define True ((Bool)1) + + +#define ITERS 1 + +typedef + enum { TyHF=1234, TySF, TyDF, TyB, TyH, TyS, TyD, TyNONE } + LaneTy; + +union _V128 { + UChar u8[16]; + UShort u16[8]; + UInt u32[4]; + ULong u64[2]; + Float f32[4]; + Double f64[2]; +}; +typedef union _V128 V128; + +static inline UChar randUChar ( void ) +{ + static UInt seed = 80021; + seed = 1103515245 * seed + 12345; + return (seed >> 17) & 0xFF; +} + +static ULong randULong ( LaneTy ty ) +{ + Int i; + ULong r = 0; + for (i = 0; i < 8; i++) { + r = (r << 8) | (ULong)(0xFF & randUChar()); + } + return r; +} + +/* Generates a random V128. Ensures that that it contains normalised + FP numbers when viewed as either F32x4 or F64x2, so that it is + reasonable to use in FP test cases. */ +static void randV128 ( /*OUT*/V128* v, LaneTy ty ) +{ + static UInt nCalls = 0, nIters = 0; + Int i; + nCalls++; + while (1) { + nIters++; + for (i = 0; i < 16; i++) { + v->u8[i] = randUChar(); + } + if (isnormal(v->f32[0]) && isnormal(v->f32[1]) && isnormal(v->f32[2]) + && isnormal(v->f32[3]) && isnormal(v->f64[0]) && isnormal(v->f64[1])) + break; + } + if (0 == (nCalls & 0xFF)) + printf("randV128: %u calls, %u iters\n", nCalls, nIters); +} + +static void showV128 ( V128* v ) +{ + Int i; + for (i = 15; i >= 0; i--) + printf("%02x", (Int)v->u8[i]); +} + +static void showBlock ( const char* msg, V128* block, Int nBlock ) +{ + Int i; + printf("%s\n", msg); + for (i = 0; i < nBlock; i++) { + printf(" "); + showV128(&block[i]); + printf("\n"); + } +} + + +/* ---------------------------------------------------------------- */ +/* -- Parameterisable test macros -- */ +/* ---------------------------------------------------------------- */ + +#define DO50(_action) \ + do { \ + Int _qq; for (_qq = 0; _qq < 50; _qq++) { _action ; } \ + } while (0) + + +/* Generate a test that involves two vector regs, + with no bias as towards which is input or output. + It's OK to use r8 as scratch.*/ +#define GEN_TWOVEC_TEST(TESTNAME,INSN,VECREG1NO,VECREG2NO) \ + __attribute__((noinline)) \ + static void test_##TESTNAME ( LaneTy ty ) { \ + Int i; \ + for (i = 0; i < ITERS; i++) { \ + V128 block[4+1]; \ + memset(block, 0x55, sizeof(block)); \ + randV128(&block[0], ty); \ + randV128(&block[1], ty); \ + randV128(&block[2], ty); \ + randV128(&block[3], ty); \ + __asm__ __volatile__( \ + "mov r9, #0 ; vmsr fpscr, r9 ; " \ + "add r9, %0, #0 ; vld1.8 { q"#VECREG1NO" }, [r9] ; " \ + "add r9, %0, #16 ; vld1.8 { q"#VECREG2NO" }, [r9] ; " \ + INSN " ; " \ + "add r9, %0, #32 ; vst1.8 { q"#VECREG1NO" }, [r9] ; " \ + "add r9, %0, #48 ; vst1.8 { q"#VECREG2NO" }, [r9] ; " \ + "vmrs r9, fpscr ; str r9, [%0, #64] " \ + : : "r"(&block[0]) \ + : "cc", "memory", "q"#VECREG1NO, "q"#VECREG2NO, "r8", "r9" \ + ); \ + printf(INSN " "); \ + UInt fpscr = 0xFFFFFFFF & block[4].u32[0]; \ + showV128(&block[0]); printf(" "); \ + showV128(&block[1]); printf(" "); \ + showV128(&block[2]); printf(" "); \ + showV128(&block[3]); printf(" fpscr=%08x\n", fpscr); \ + } \ + } + + +/* Generate a test that involves three vector regs, + with no bias as towards which is input or output. It's also OK + to use r8 scratch. */ +#define GEN_THREEVEC_TEST(TESTNAME,INSN,VECREG1NO,VECREG2NO,VECREG3NO) \ + __attribute__((noinline)) \ + static void test_##TESTNAME ( LaneTy ty ) { \ + Int i; \ + for (i = 0; i < ITERS; i++) { \ + V128 block[6+1]; \ + memset(block, 0x55, sizeof(block)); \ + randV128(&block[0], ty); \ + randV128(&block[1], ty); \ + randV128(&block[2], ty); \ + randV128(&block[3], ty); \ + randV128(&block[4], ty); \ + randV128(&block[5], ty); \ + __asm__ __volatile__( \ + "mov r9, #0 ; vmsr fpscr, r9 ; " \ + "add r9, %0, #0 ; vld1.8 { q"#VECREG1NO" }, [r9] ; " \ + "add r9, %0, #16 ; vld1.8 { q"#VECREG2NO" }, [r9] ; " \ + "add r9, %0, #32 ; vld1.8 { q"#VECREG3NO" }, [r9] ; " \ + INSN " ; " \ + "add r9, %0, #48 ; vst1.8 { q"#VECREG1NO" }, [r9] ; " \ + "add r9, %0, #64 ; vst1.8 { q"#VECREG2NO" }, [r9] ; " \ + "add r9, %0, #80 ; vst1.8 { q"#VECREG3NO" }, [r9] ; " \ + "vmrs r9, fpscr ; str r9, [%0, #96] " \ + : : "r"(&block[0]) \ + : "cc", "memory", "q"#VECREG1NO, "q"#VECREG2NO, "q"#VECREG3NO, \ + "r8", "r9" \ + ); \ + printf(INSN " "); \ + UInt fpscr = 0xFFFFFFFF & block[6].u32[0]; \ + showV128(&block[0]); printf(" "); \ + showV128(&block[1]); printf(" "); \ + showV128(&block[2]); printf(" "); \ + showV128(&block[3]); printf(" "); \ + showV128(&block[4]); printf(" "); \ + showV128(&block[5]); printf(" fpscr=%08x\n", fpscr); \ + } \ + } + +// ======================== CRYPTO ======================== + +GEN_TWOVEC_TEST(aesd_q_q, "aesd.8 q3, q4", 3, 4) +GEN_TWOVEC_TEST(aese_q_q, "aese.8 q12, q13", 12, 13) +GEN_TWOVEC_TEST(aesimc_q_q, "aesimc.8 q15, q0", 15, 0) +GEN_TWOVEC_TEST(aesmc_q_q, "aesmc.8 q1, q9", 1, 9) + +GEN_THREEVEC_TEST(sha1c_q_q_q, "sha1c.32 q11, q10, q2", 11, 10, 2) +GEN_TWOVEC_TEST(sha1h_q_q, "sha1h.32 q6, q7", 6, 7) +GEN_THREEVEC_TEST(sha1m_q_q_q, "sha1m.32 q2, q8, q13", 2, 8, 13) +GEN_THREEVEC_TEST(sha1p_q_q_q, "sha1p.32 q3, q9, q14", 3, 9, 14) +GEN_THREEVEC_TEST(sha1su0_q_q_q, "sha1su0.32 q4, q10, q15", 4, 10, 15) +GEN_TWOVEC_TEST(sha1su1_q_q, "sha1su1.32 q11, q2", 11, 2) + +GEN_THREEVEC_TEST(sha256h2_q_q_q, "sha256h2.32 q9, q8, q7", 9, 8, 7) +GEN_THREEVEC_TEST(sha256h_q_q_q, "sha256h.32 q10, q9, q8", 10, 9, 8) +GEN_TWOVEC_TEST(sha256su0_q_q, "sha256su0.32 q11, q10", 11, 10) +GEN_THREEVEC_TEST(sha256su1_q_q_q, "sha256su1.32 q12, q11, q10", 12, 11, 10) + +// This is a bit complex. +//GEN_THREEVEC_TEST(pmull_q_d_d, 1q, 1d, 1d) + +int main ( void ) +{ + // ======================== CRYPTO ======================== + + // aesd.8 q_q (aes single round decryption) + // aese.8 q_q (aes single round encryption) + // aesimc.8 q_q (aes inverse mix columns) + // aesmc.8 q_q (aes mix columns) + if (1) DO50( test_aesd_q_q(TyNONE) ); + if (1) DO50( test_aese_q_q(TyNONE) ); + if (1) DO50( test_aesimc_q_q(TyNONE) ); + if (1) DO50( test_aesmc_q_q(TyNONE) ); + +#if 0 + // sha1c.32 q_q_q + // sha1h.32 q_q + // sha1m.32 q_q_q + // sha1p.32 q_q_q + // sha1su0.32 q_q_q + // sha1su1.32 q_q + if (1) DO50( test_sha1c_q_q_q(TyNONE) ); + if (1) DO50( test_sha1h_q_q(TyNONE) ); + if (1) DO50( test_sha1m_q_q_q(TyNONE) ); + if (1) DO50( test_sha1p_q_q_q(TyNONE) ); + if (1) DO50( test_sha1su0_q_q_q(TyNONE) ); + if (1) DO50( test_sha1su1_q_q(TyNONE) ); + + // sha256h2.32 q_q_q + // sha256h.32 q_q_q + // sha256su0.32 q_q + // sha256su1.32 q_q_q + if (1) DO50( test_sha256h2_q_q_q(TyNONE) ); + if (1) DO50( test_sha256h_q_q_q(TyNONE) ); + if (1) DO50( test_sha256su0_q_q(TyNONE) ); + if (1) DO50( test_sha256su1_q_q_q(TyNONE) ); + + // vmull.64 q_d_d + if (1) test_pmull_q_d_d(TyD); +#endif + return 0; +}