From ad98db97d2bec1670145f73fab9c484d71495dc2 Mon Sep 17 00:00:00 2001 From: Mariam Arutunian Date: Fri, 3 Mar 2023 19:36:12 +0400 Subject: [PATCH] Changes in CRC code generation v4: - Added table generation part. - Added clmul, clmulr, clmulh insns. - Removed UNSPEC_CRC16 from define_expand crc*. - Modified gf2n_poly_long_div_quotient function. --- gcc/config/riscv/bitmanip.md | 64 +++++++++++++++++++++++---------- gcc/config/riscv/riscv-protos.h | 2 ++ gcc/config/riscv/riscv.cc | 60 +++++++++++++++++++++++++++++++ gcc/config/riscv/riscv.md | 2 -- gcc/gimple-crc-optimization.cc | 18 +++++----- gcc/optabs.def | 3 ++ gcc/rtl.def | 5 +++ 7 files changed, 124 insertions(+), 30 deletions(-) diff --git a/gcc/config/riscv/bitmanip.md b/gcc/config/riscv/bitmanip.md index 2419b5a35cf8..c0e637fe9fe4 100644 --- a/gcc/config/riscv/bitmanip.md +++ b/gcc/config/riscv/bitmanip.md @@ -656,30 +656,56 @@ operands[9] = GEN_INT (clearbit); }) +(define_insn "clmul3" +[(set (match_operand:X 0 "register_operand" "=r") +(clmul:X (match_operand:X 1 "register_operand" "r") +(match_operand:X 2 "arith_operand" "r")))] +"TARGET_ZBC" +"clmul\t%0,%1,%2" +[(set_attr "type" "bitmanip")]) + + +(define_insn "clmulh3" +[(set (match_operand:X 0 "register_operand" "=r") +(plus:X (match_operand:X 1 "register_operand" " r") +(match_operand:X 2 "arith_operand" " r")))] +"TARGET_ZBC" +"clmulh\t%0,%1,%2" +[(set_attr "type" "bitmanip")]) + + +(define_insn "clmulr3" +[(set (match_operand:X 0 "register_operand" "=r") +(clmulr:X (match_operand:X 1 "register_operand" "r") +(match_operand:X 2 "arith_operand" "r")))] +"TARGET_ZBC" +"clmulr\t%0,%1,%2" +[(set_attr "type" "bitmanip")]) + (define_expand "crcqihi4" -[(set (match_operand:HI 0 "register_operand" "=r") -(unspec:HI [(match_operand:HI 1) (match_operand:QI 2) (match_operand:HI 3)] -UNSPEC_CRC16))] ;; "" { -// FIXME: Note correct instruction sequence. -rtx data = force_reg (SImode, gen_rtx_ASHIFT (SImode, operands[1], - GEN_INT (32))); - -rtx op3 = simplify_gen_subreg (SImode, operands[3], HImode, 0); -rtx t2 = force_reg (SImode, gen_rtx_PLUS (SImode, data, op3)); // Must be CLMULH - -t2 = force_reg (SImode, gen_rtx_ASHIFT (SImode, t2, GEN_INT (16+1))); - -t2 = force_reg (SImode, gen_rtx_LSHIFTRT (SImode, t2, GEN_INT (48-1))); - -t2 = force_reg (SImode, gen_rtx_PLUS (SImode, data, t2)); // Must be CLMULH - -rtx tgt = simplify_gen_subreg (SImode, operands[0], HImode, 0); -rtx crc = simplify_gen_subreg (SImode, operands[2], QImode, 0); -emit_move_insn (tgt, gen_rtx_XOR (SImode, crc, t2)); +if (TARGET_ZBC) + { + // FIXME: Note correct instruction sequence. + rtx data = force_reg (SImode, gen_rtx_ASHIFT (SImode, operands[1], + GEN_INT (32))); + + rtx op3 = simplify_gen_subreg (SImode, operands[3], HImode, 0); + rtx t2 = force_reg (SImode, gen_rtx_CLMULH (SImode, data, op3)); + t2 = force_reg (SImode, gen_rtx_ASHIFT (SImode, t2, GEN_INT (16+1))); + t2 = force_reg (SImode, gen_rtx_LSHIFTRT (SImode, t2, GEN_INT (48-1))); + t2 = force_reg (SImode, gen_rtx_CLMULH (SImode, data, t2)); + rtx tgt = simplify_gen_subreg (SImode, operands[0], HImode, 0); + rtx crc = simplify_gen_subreg (SImode, operands[2], QImode, 0); + emit_move_insn (tgt, gen_rtx_XOR (SImode, crc, t2)); + } +else + { + expand_crc_table_based (operands); + } DONE; }) \ No newline at end of file diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h index 88a6bf5442f6..fba6e5344d0d 100644 --- a/gcc/config/riscv/riscv-protos.h +++ b/gcc/config/riscv/riscv-protos.h @@ -78,6 +78,8 @@ extern void riscv_reinit (void); extern poly_uint64 riscv_regmode_natural_size (machine_mode); extern bool riscv_v_ext_vector_mode_p (machine_mode); extern bool riscv_shamt_matches_mask_p (int, HOST_WIDE_INT); +extern void expand_crc_table_based (rtx *operands); +extern rtx generate_crc16_table (uint16_t); /* Routines implemented in riscv-c.cc. */ void riscv_cpu_cpp_builtins (cpp_reader *); diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc index befb9b498b78..761bdddffa7d 100644 --- a/gcc/config/riscv/riscv.cc +++ b/gcc/config/riscv/riscv.cc @@ -6902,6 +6902,66 @@ riscv_shamt_matches_mask_p (int shamt, HOST_WIDE_INT mask) return shamt == ctz_hwi (mask); } +/* Calculates CRC for initial CRC and given polynomial. */ +static uint16_t +generate_crc (uint16_t crc, + uint16_t polynomial) +{ + for (int bits = 16; bits > 0; --bits) + { + if (crc & 0x8000) + { + crc = (crc << 1) ^ polynomial; + } + else + { + crc <<= 1; + } + } + + return crc; +} + +/* Generates 16-bit CRC table. */ +rtx +generate_crc16_table (uint16_t polynom) +{ + FILE *out = asm_out_file; + char buf[9+1]; + sprintf (buf, "crc_table"); + tree id = maybe_get_identifier (buf); + if (id) + return gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (id)); + id = get_identifier (buf); + rtx lab = gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (id)); + asm_fprintf (out, "\t%s:\n\t", buf); + unsigned table_size = 256; + for (unsigned i = 0; i < table_size; i++) + { + unsigned HOST_WIDE_INT crc = generate_crc (i, polynom); + fprintf (out, HOST_WIDE_INT_PRINT_HEX, crc); + if (i % 8 != 7) + asm_fprintf (out, ", "); + else if (i < table_size - 1) + asm_fprintf (out, ",\n\t"); + else + asm_fprintf (out, "\n"); + } + return lab; +} + +/* Generate table based CRC. */ +void +expand_crc_table_based (rtx *operands) +{ + rtx tab = generate_crc16_table (INTVAL (operands[3])); + + machine_mode mode = GET_MODE (operands[0]); + tab = gen_rtx_MEM (mode, tab); + rtx crc = force_reg (mode, gen_rtx_XOR (mode, tab, operands[1])); + riscv_emit_move (operands[0], gen_rtx_SUBREG (mode, crc, 0)); +} + /* Initialize the GCC target structure. */ #undef TARGET_ASM_ALIGNED_HI_OP #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t" diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md index f9a62b5f88c8..6c3176042fbd 100644 --- a/gcc/config/riscv/riscv.md +++ b/gcc/config/riscv/riscv.md @@ -65,8 +65,6 @@ ;; OR-COMBINE UNSPEC_ORC_B - - UNSPEC_CRC16 ]) (define_c_enum "unspecv" [ diff --git a/gcc/gimple-crc-optimization.cc b/gcc/gimple-crc-optimization.cc index c9bfd214cc38..56487a173b0a 100644 --- a/gcc/gimple-crc-optimization.cc +++ b/gcc/gimple-crc-optimization.cc @@ -1112,9 +1112,9 @@ unsigned HOST_WIDE_INT gf2n_poly_long_div_quotient (value *polynomial, bool is_left_shift) { vec x2n, pol, q; - size_t n = (*polynomial).length () * 2 + 1, m = (*polynomial).length () + 1; - x2n.create (n); - pol.create (m); + size_t n = (*polynomial).length () + 1; + x2n.create ((*polynomial).length () * 2 + 1); + pol.create (n); for (size_t i = 0; i < (*polynomial).length (); i++) { @@ -1128,20 +1128,20 @@ gf2n_poly_long_div_quotient (value *polynomial, bool is_left_shift) pol.quick_push (1); - for (size_t i = 0; i < n - 1; i++) + for (size_t i = 0; i < (*polynomial).length () * 2; i++) x2n.quick_push (0); x2n.quick_push (1); - q.create (n - m + 1); - for (size_t i = 0; i < n - m + 1; i++) + q.create (n); + for (size_t i = 0; i < n; i++) q.quick_push (0); - for (int i = n - m; i >= 0; i--) + for (int i = n - 1; i >= 0; i--) { - int d = x2n[i + m - 1]; + int d = x2n[i + n - 1]; if (d == 0) continue; - for (int j = i + m - 1; j >= i; j--) + for (int j = i + n - 1; j >= i; j--) x2n[j] = x2n[j] ^ (pol[j - i] * d); q[i] = d; } diff --git a/gcc/optabs.def b/gcc/optabs.def index 849607b6d116..4bbadaa53f67 100644 --- a/gcc/optabs.def +++ b/gcc/optabs.def @@ -101,6 +101,9 @@ OPTAB_CD(vec_init_optab, "vec_init$a$b") OPTAB_CD (while_ult_optab, "while_ult$a$b") +OPTAB_NL(clmul_optab, "clmul$a3", CLMUL, "clmul", '3', NULL) +OPTAB_NL(clmulh_optab, "clmulh$a3", CLMULH, "clmulh", '3', NULL) +OPTAB_NL(clmulr_optab, "clmulr$a3", CLMULR, "clmulr", '3', NULL) OPTAB_NL(add_optab, "add$P$a3", PLUS, "add", '3', gen_int_fp_fixed_libfunc) OPTAB_NX(add_optab, "add$F$a3") OPTAB_NX(add_optab, "add$Q$a3") diff --git a/gcc/rtl.def b/gcc/rtl.def index 6ddbce380dcd..bf0476aa71f2 100644 --- a/gcc/rtl.def +++ b/gcc/rtl.def @@ -472,6 +472,11 @@ DEF_RTL_EXPR(SMUL_HIGHPART, "smul_highpart", "ee", RTX_COMM_ARITH) /* Unsigned high-part multiplication. */ DEF_RTL_EXPR(UMUL_HIGHPART, "umul_highpart", "ee", RTX_COMM_ARITH) +/* Carry-less multiplication. */ +DEF_RTL_EXPR(CLMUL, "clmul", "ee", RTX_BIN_ARITH) +DEF_RTL_EXPR(CLMULH, "clmulh", "ee", RTX_BIN_ARITH) +DEF_RTL_EXPR(CLMULR, "clmulr", "ee", RTX_BIN_ARITH) + /* Operand 0 divided by operand 1. */ DEF_RTL_EXPR(DIV, "div", "ee", RTX_BIN_ARITH) /* Division with signed saturation */ -- 2.47.2