libxfs: use crc32c slice-by-8 variant by default

author Darrick J. Wong <darrick.wong@oracle.com>

Wed, 21 Jun 2017 22:14:30 +0000 (17:14 -0500)

committer Eric Sandeen <sandeen@redhat.com>

Wed, 21 Jun 2017 22:14:30 +0000 (17:14 -0500)
author Darrick J. Wong <darrick.wong@oracle.com>
Wed, 21 Jun 2017 22:14:30 +0000 (17:14 -0500)
committer Eric Sandeen <sandeen@redhat.com>
Wed, 21 Jun 2017 22:14:30 +0000 (17:14 -0500)
diff --git a/libxfs/Makefile b/libxfs/Makefile

index baba02f037805c62bf6644fc1d0c771db9c75e6b..d248c1fc65aec7f4175425d2c69d24a973438274 100644 (file)
--- a/libxfs/Makefile
+++ b/libxfs/Makefile
@@ -122,7 +122,7 @@ LDIRT = gen_crc32table crc32table.h crc32selftest
  
  default: crc32selftest ltdepend $(LTLIBRARY)
  
-crc32table.h: gen_crc32table.c
+crc32table.h: gen_crc32table.c crc32defs.h
         @echo "    [CC]     gen_crc32table"
         $(Q) $(BUILD_CC) $(BUILD_CFLAGS) -o gen_crc32table $<
         @echo "    [GENERATE] $@"
@@ -133,7 +133,7 @@ crc32table.h: gen_crc32table.c
  # systems/architectures. Hence we make sure that xfsprogs will never use a
  # busted CRC calculation at build time and hence avoid putting bad CRCs down on
  # disk.
-crc32selftest: gen_crc32table.c crc32table.h crc32.c
+crc32selftest: gen_crc32table.c crc32table.h crc32.c crc32defs.h
         @echo "    [TEST]    CRC32"
         $(Q) $(BUILD_CC) $(BUILD_CFLAGS) -D CRC32_SELFTEST=1 crc32.c -o $@
         $(Q) ./$@
diff --git a/libxfs/crc32defs.h b/libxfs/crc32defs.h

index 64cba2c3c7008fa1341cc28129c2c0796b935d25..2999782e27d0dcdbe34cdc08714b335a9ab94a3d 100644 (file)
--- a/libxfs/crc32defs.h
+++ b/libxfs/crc32defs.h
@@ -1,3 +1,37 @@
+/*
+ * Use slice-by-8, which is the fastest variant.
+ *
+ * Calculate checksum 8 bytes at a time with a clever slicing algorithm.
+ * This is the fastest algorithm, but comes with a 8KiB lookup table.
+ * Most modern processors have enough cache to hold this table without
+ * thrashing the cache.
+ *
+ * The Linux kernel uses this as the default implementation "unless you
+ * have a good reason not to".  The reason why Kconfig urges you to pick
+ * SLICEBY8 is because people challenged the assertion that we should
+ * always use slice by 8, so Darrick wrote a crc microbenchmark utility
+ * and ran it on as many machines as he could get his hands on to show
+ * that sb8 was the fastest.
+ *
+ * Every 64-bit machine (and most of the 32-bit ones too) saw the best
+ * results with sb8.  Any machine with more than 4K of cache saw better
+ * results.  The spreadsheet still exists today[1]; note that
+ * 'crc32-kern-le' corresponds to the slice by 4 algorithm which is the
+ * default unless CRC_LE_BITS is defined explicitly.
+ *
+ * FWIW, there are a handful of board defconfigs in the kernel that
+ * don't pick sliceby8.  These are all embedded 32-bit mips/ppc systems
+ * with very small cache sizes which experience cache thrashing with the
+ * slice by 8 algorithm, and therefore chose to pick defaults that are
+ * saner for their particular board configuration.  For nearly all of
+ * XFS' perceived userbase (which we assume are 32 and 64-bit machines
+ * with sufficiently large CPU cache and largeish storage devices) slice
+ * by 8 is the right choice.
+ *
+ * [1] https://goo.gl/0LSzsG ("crc32c_bench")
+ */
+#define CRC_LE_BITS 64
+
  /*
   * There are multiple 16-bit CRC polynomials in common use, but this is
   * *the* standard CRC-32 polynomial, first popularized by Ethernet.
author	Darrick J. Wong <darrick.wong@oracle.com>
	Wed, 21 Jun 2017 22:14:30 +0000 (17:14 -0500)
committer	Eric Sandeen <sandeen@redhat.com>
	Wed, 21 Jun 2017 22:14:30 +0000 (17:14 -0500)
libxfs/Makefile		patch \| blob \| blame \| history
libxfs/crc32defs.h		patch \| blob \| blame \| history