return 1;
}
+/* ZSTD_selectAddr:
+ * @return a >= b ? trueAddr : falseAddr,
+ * tries to force branchless codegen. */
+MEM_STATIC const BYTE* ZSTD_selectAddr(U32 a, U32 b, const BYTE* trueAddr, const BYTE* falseAddr) {
+#if defined(__GNUC__) && defined(__x86_64__)
+ __asm__ (
+ "cmp %1, %2\n"
+ "cmova %3, %0\n"
+ : "+r"(trueAddr)
+ : "r"(a), "r"(b), "r"(falseAddr)
+ );
+ return trueAddr;
+#else
+ return a >= b ? trueAddr : falseAddr;
+#endif
+}
+
/* ZSTD_noCompressBlock() :
* Writes uncompressed block to dst buffer from given src.
* Returns the size of the block */
U32 idxl1; /* the long match index for ip1 */
const BYTE* matchl0; /* the long match for ip */
+ const BYTE* matchl0_safe; /* matchl0 or safe address */
const BYTE* matchs0; /* the short match for ip */
const BYTE* matchl1; /* the long match for ip1 */
+ const BYTE* matchs0_safe; /* matchs0 or safe address */
const BYTE* ip = istart; /* the current position */
const BYTE* ip1; /* the next position */
+ /* Array of ~random data, should have low probability of matching data
+ * we load from here instead of from tables, if matchl0/matchl1 are
+ * invalid indices. Used to avoid unpredictable branches. */
+ const BYTE dummy[] = {0x12,0x34,0x56,0x78,0x9a,0xbc,0xde,0xf0,0xe2,0xb4};
DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_noDict_generic");
hl1 = ZSTD_hashPtr(ip1, hBitsL, 8);
- if (idxl0 > prefixLowestIndex) {
- /* check prefix long match */
- if (MEM_read64(matchl0) == MEM_read64(ip)) {
- mLength = ZSTD_count(ip+8, matchl0+8, iend) + 8;
- offset = (U32)(ip-matchl0);
- while (((ip>anchor) & (matchl0>prefixLowest)) && (ip[-1] == matchl0[-1])) { ip--; matchl0--; mLength++; } /* catch up */
- goto _match_found;
- }
+ /* idxl0 > prefixLowestIndex is a (somewhat) unpredictable branch.
+ * However expression below complies into conditional move. Since
+ * match is unlikely and we only *branch* on idxl0 > prefixLowestIndex
+ * if there is a match, all branches become predictable. */
+ matchl0_safe = ZSTD_selectAddr(prefixLowestIndex, idxl0, &dummy[0], matchl0);
+
+ /* check prefix long match */
+ if (MEM_read64(matchl0_safe) == MEM_read64(ip) && matchl0_safe == matchl0) {
+ mLength = ZSTD_count(ip+8, matchl0+8, iend) + 8;
+ offset = (U32)(ip-matchl0);
+ while (((ip>anchor) & (matchl0>prefixLowest)) && (ip[-1] == matchl0[-1])) { ip--; matchl0--; mLength++; } /* catch up */
+ goto _match_found;
}
idxl1 = hashLong[hl1];
matchl1 = base + idxl1;
- if (idxs0 > prefixLowestIndex) {
- /* check prefix short match */
- if (MEM_read32(matchs0) == MEM_read32(ip)) {
- goto _search_next_long;
- }
+ /* Same optimization as matchl0 above */
+ matchs0_safe = ZSTD_selectAddr(prefixLowestIndex, idxs0, &dummy[0], matchs0);
+
+ /* check prefix short match */
+ if(MEM_read32(matchs0_safe) == MEM_read32(ip) && matchs0_safe == matchs0) {
+ goto _search_next_long;
}
if (ip1 >= nextStep) {
const BYTE* const prefixStart = base + prefixStartIndex;
const BYTE* const iend = istart + srcSize;
const BYTE* const ilimit = iend - HASH_READ_SIZE;
+ /* Array of ~random data, should have low probability of matching data
+ * we load from here instead of from tables, if the index is invalid.
+ * Used to avoid unpredictable branches. */
+ const BYTE dummy[] = {0x12,0x34,0x56,0x78,0x9a,0xbc,0xde,0xf0,0xe2,0xb4};
+ const BYTE *mvalAddr;
const BYTE* anchor = istart;
const BYTE* ip0 = istart;
goto _match;
}
+ /* idx >= prefixStartIndex is a (somewhat) unpredictable branch.
+ * However expression below complies into conditional move. Since
+ * match is unlikely and we only *branch* on idxl0 > prefixLowestIndex
+ * if there is a match, all branches become predictable. */
+ mvalAddr = base + idx;
+ mvalAddr = ZSTD_selectAddr(idx, prefixStartIndex, mvalAddr, &dummy[0]);
+
/* load match for ip[0] */
- if (idx >= prefixStartIndex) {
- mval = MEM_read32(base + idx);
- } else {
- mval = MEM_read32(ip0) ^ 1; /* guaranteed to not match. */
- }
+ mval = MEM_read32(mvalAddr);
/* check match at ip[0] */
- if (MEM_read32(ip0) == mval) {
+ if (MEM_read32(ip0) == mval && idx >= prefixStartIndex) {
/* found a match! */
/* First write next hash table entry; we've already calculated it.
current0 = (U32)(ip0 - base);
hashTable[hash0] = current0;
+ mvalAddr = base + idx;
+ mvalAddr = ZSTD_selectAddr(idx, prefixStartIndex, mvalAddr, &dummy[0]);
+
/* load match for ip[0] */
- if (idx >= prefixStartIndex) {
- mval = MEM_read32(base + idx);
- } else {
- mval = MEM_read32(ip0) ^ 1; /* guaranteed to not match. */
- }
+ mval = MEM_read32(mvalAddr);
+
/* check match at ip[0] */
- if (MEM_read32(ip0) == mval) {
+ if (MEM_read32(ip0) == mval && idx >= prefixStartIndex) {
/* found a match! */
/* first write next hash table entry; we've already calculated it */