Many more comments. Use a vec<bool> when we only care about 0/1.

author Jeff Law <jlaw@ventanamicro>

Wed, 22 Mar 2023 04:45:51 +0000 (22:45 -0600)

committer Jeff Law <jlaw@ventanamicro>

Wed, 22 Mar 2023 04:46:50 +0000 (22:46 -0600)
author Jeff Law <jlaw@ventanamicro>
Wed, 22 Mar 2023 04:45:51 +0000 (22:45 -0600)
committer Jeff Law <jlaw@ventanamicro>
Wed, 22 Mar 2023 04:46:50 +0000 (22:46 -0600)
diff --git a/gcc/config/riscv/bitmanip.md b/gcc/config/riscv/bitmanip.md

index bcd5302f740df4a2ffe495b7804043564d887922..fffd8c6ca50464af8b15b7fb365f1555869c36d1 100644 (file)
--- a/gcc/config/riscv/bitmanip.md
+++ b/gcc/config/riscv/bitmanip.md
@@ -702,7 +702,19 @@
        rtx t1 = force_reg (word_mode, operands[3]);
        a0 = force_reg (word_mode, gen_rtx_XOR (word_mode, a0, a1));
        a0 = force_reg (word_mode, gen_rtx_CLMUL (word_mode, a0, q_reg));
+
+      /* XXX By adjusting Q we may be able to eliminate this shift.  The size
+         of this shift seems to be dependent on the size of the CRC
+         output (aka N in N-bit CRC).  */
        a0 = force_reg (word_mode, gen_rtx_ASHIFT (word_mode, a0, GEN_INT (16)));
+
+      /* CCC By adjusting operands[3] (which should be a constant) we may
+         be able to utilize CLMULH to get the bits in the right place and
+         avoid the shifts to extract the bitfield.   If that is not possible
+         the shifts will still be needed and are dependent on input/output
+         sizes as well.   Does adjusting the constant and shift counts
+         result in a constant that is more likely to bt synthesized in a
+         single instruction?  */
        a0 = force_reg (word_mode, gen_rtx_CLMUL (word_mode, a0, t1));
        a0 = force_reg (word_mode, gen_rtx_LSHIFTRT (word_mode, a0, GEN_INT (24)));
        a0 = force_reg (word_mode, gen_rtx_ASHIFT (word_mode, a0, GEN_INT (24)));
@@ -718,7 +730,13 @@
    else
      {
        /* If we do not have the ZBC extension (ie, no clmul), then
-         use a table based algorithm to implement the CRC.  */
+         use a table based algorithm to implement the CRC. 
+
+         XXX What is the size of each element in this table and
+         how many entries are in the table?  Does the element
+         size or number of elements vary based on the input or
+         output types for the CRC function?   If so, do we need
+         to restrict it to only be used for certain modes?  */
        expand_crc_table_based (operands, QImode);
      }
  
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc

index 842fe3f7902e79063702c64cc5a1fa39e59d744b..3fc6b9c54be76507a36f7dcf6e8133060ef25eb3 100644 (file)
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -6902,19 +6902,21 @@ riscv_shamt_matches_mask_p (int shamt, HOST_WIDE_INT mask)
    return shamt == ctz_hwi (mask);
  }
  
-/* Calculate the quotient of polynomial long division of x^2n by the polynomial
+/* Return the quotient of polynomial long division of x^2n by POLYNOMIAL
     in GF (2^n).  */
  
  unsigned HOST_WIDE_INT
  gf2n_poly_long_div_quotient (unsigned HOST_WIDE_INT polynomial)
  {
-  vec<short> x2n, pol, q;
+  vec<bool>pol;
+  vec<short> x2n, q;
+
    // Create vector of bits, for the polynomial.
    pol.create (sizeof (polynomial) * BITS_PER_UNIT + 1);
    size_t n = 1;
    for ( ; polynomial; n++)
      {
-      pol.quick_push (polynomial&1);
+      pol.quick_push (polynomial & 1);
        polynomial >>= 1;
      }
    pol.quick_push (1);
@@ -6923,6 +6925,8 @@ gf2n_poly_long_div_quotient (unsigned HOST_WIDE_INT polynomial)
    x2n.create (2 * n - 1);
    for (size_t i = 0; i < 2 * (n - 1); i++)
      x2n.safe_push (0);
+
+  /* Is this the implicit bit on at the top of the poly?  */
    x2n.safe_push (1);
  
    q.create (n);
@@ -6952,6 +6956,9 @@ gf2n_poly_long_div_quotient (unsigned HOST_WIDE_INT polynomial)
  }
  
  /* Calculates reciprocal CRC for initial CRC and given polynomial.  */
+/* XXX Is this needed?  It's not referenced anywhere. 
+   If it is needed, it needs to be generalized rather than only
+   working on uint16_t.  */
  static uint16_t
  generate_crc_reciprocal (uint16_t crc,
                          uint16_t polynomial)
@@ -6967,6 +6974,8 @@ generate_crc_reciprocal (uint16_t crc,
  }
  
  /* Calculates CRC for initial CRC and given polynomial.  */
+/* XXX This needs to be generalized rather than only working
+   on uint16_t.  */
  static uint16_t
  generate_crc (uint16_t crc,
               uint16_t polynomial)
@@ -6983,6 +6992,19 @@ generate_crc (uint16_t crc,
  }
  
  /* Generates 16-bit CRC table.  */
+/* XXX This needs to be generalized rather than only working
+   on uint16_t.
+
+   This looks like it tries to share tables which is good, but
+   don't we have to verify that the polynomial and sizes are the
+   same for sharing to be safe?  Doesn't that in turn argue that
+   the polynomial and size should be encoded into the table
+   name? 
+
+   Presumably the table should be going into a readonly data
+   section.  It doesn't look like we make any attempt to switch
+   sections.  Mixing code and data is exceedingly bad on
+   modern processors.  */
  rtx
  generate_crc16_table (uint16_t polynom)
  {
@@ -7016,27 +7038,43 @@ generate_crc16_table (uint16_t polynom)
    return lab;
  }
  
-void reflect (rtx op1, machine_mode mode)
+/* XXX Is this needed?  It's not referenced anywhere.   */
+void
+reflect (rtx op1, machine_mode mode)
  {
    // Reflect the bits
    op1 = gen_rtx_BSWAP (mode, op1);
  
-// Adjust the position of the reflected bits
+  // Adjust the position of the reflected bits
+  /* XXX I don't understand the comment.  Under what
+     conditions does mode != Pmode?  */
    if (mode != Pmode)
      op1 = gen_rtx_SUBREG (Pmode, op1, 0);
  
-// Shift the reflected bits to the least significant end
+  // Shift the reflected bits to the least significant end
+  /* XXX This seems to assume we're always dealing with
+     a 16bit quantity.  */
    rtx shift_amt = gen_rtx_CONST_INT (Pmode, 8);
    op1 = gen_rtx_LSHIFTRT (Pmode, op1, shift_amt);
+
+  /* XXX This routine is going to have no impact if it was
+     ever used.  Changing OP1 above isn't reflected into
+     the caller.  */
  }
  
  /* Generate table based CRC code.  */
+/* XXX This doesn't seem to be used.  Is it the case that we're
+   eventually going to need to distinguish between a bit-reflected
+   CRC and a normal CRC for table based variants?  If so, doesn't
+   that need to be in the operands for the .CRC IFN?  */
  void
-expand_crc_table_based_reflected (rtx *operands,  machine_mode data_mode)
+expand_crc_table_based_reflected (rtx *operands, machine_mode data_mode)
  {
    machine_mode mode = GET_MODE (operands[0]);
    rtx in = force_reg (mode, gen_rtx_XOR (mode, operands[1], operands[2]));
    rtx ix = gen_rtx_AND (mode, in, GEN_INT (GET_MODE_MASK (data_mode)));
+  /* XXX Under what conditions will mode != Pmode be true?  Is this an
+     artifact of having the modes wrong for the crc expander?  */
    if (mode != Pmode)
      ix = gen_rtx_SUBREG (Pmode, ix, 0);
    ix = gen_rtx_ASHIFT (Pmode, ix, GEN_INT (exact_log2 (GET_MODE_SIZE (mode)
@@ -7048,6 +7086,15 @@ expand_crc_table_based_reflected (rtx *operands,  machine_mode data_mode)
    rtx high = gen_rtx_LSHIFTRT (mode, operands[1],
                                GEN_INT (data_mode));
    rtx crc = force_reg (mode, gen_rtx_XOR (mode, tab, high));
+
+  /* XXX In general we prefer to avoid SUBREGs, especially
+     paradoxical subregs (outer mode is wider than inner mode).
+
+     It should be possible to replace a paradoxical subreg with
+     a sign or zero extension.
+
+     If this is a narrowing subreg, then gen_lowpart might be
+     better.  */
    riscv_emit_move (operands[0], gen_rtx_SUBREG (mode, crc, 0));
  }
  
@@ -7060,6 +7107,8 @@ expand_crc_table_based (rtx *operands,  machine_mode data_mode)
                                GEN_INT (8));
    rtx in = force_reg (mode, gen_rtx_XOR (mode, op1, operands[2]));
    rtx ix = gen_rtx_AND (mode, in, GEN_INT (GET_MODE_MASK (data_mode)));
+  /* XXX Under what conditions will mode != Pmode be true?  Is this an
+     artifact of having the modes wrong for the crc expander?  */
    if (mode != Pmode)
      ix = gen_rtx_SUBREG (Pmode, ix, 0);
    ix = gen_rtx_ASHIFT (Pmode, ix, GEN_INT (exact_log2 (GET_MODE_SIZE (mode)
@@ -7072,6 +7121,10 @@ expand_crc_table_based (rtx *operands,  machine_mode data_mode)
                                GEN_INT (8));
    high = force_reg (mode, gen_rtx_AND (mode, high, GEN_INT (65535)));
    rtx crc = force_reg (mode, gen_rtx_XOR (mode, tab, high));
+
+  /* Why is this different than the reflected version above?  Doesn't
+     it have the same potential concers WRT mismatched modes between
+     these two objects?  */
    riscv_emit_move (operands[0], crc);
  }
author	Jeff Law <jlaw@ventanamicro>
	Wed, 22 Mar 2023 04:45:51 +0000 (22:45 -0600)
committer	Jeff Law <jlaw@ventanamicro>
	Wed, 22 Mar 2023 04:46:50 +0000 (22:46 -0600)
gcc/config/riscv/bitmanip.md		patch \| blob \| blame \| history
gcc/config/riscv/riscv.cc		patch \| blob \| blame \| history