[Minor] Backport fixes from t1ha

author Vsevolod Stakhov <vsevolod@highsecure.ru>

Tue, 22 May 2018 13:15:47 +0000 (14:15 +0100)

committer Vsevolod Stakhov <vsevolod@highsecure.ru>

Tue, 22 May 2018 13:15:47 +0000 (14:15 +0100)
author Vsevolod Stakhov <vsevolod@highsecure.ru>
Tue, 22 May 2018 13:15:47 +0000 (14:15 +0100)
committer Vsevolod Stakhov <vsevolod@highsecure.ru>
Tue, 22 May 2018 13:15:47 +0000 (14:15 +0100)
diff --git a/contrib/t1ha/t1ha1.c b/contrib/t1ha/t1ha1.c

index 956f7e24e8a08f604a53169d26f71f2f297fa870..6e25d37f4b9678fc04cdb0900e8f5ab878fa7188 100644 (file)
--- a/contrib/t1ha/t1ha1.c
+++ b/contrib/t1ha/t1ha1.c
@@ -58,21 +58,21 @@ static __inline uint64_t final_weak_avalanche(uint64_t a, uint64_t b) {
    return mux64(rot64(a + b, 17), prime_4) + mix64(a ^ b, prime_0);
  }
  
-/* TODO C++ template in the next version */
-#define T1HA1_BODY(ENDIANNES, ALIGNESS, DOCOPY)                                \
+/* TODO: C++ template in the next version */
+#define T1HA1_BODY(ENDIANNES, ALIGNESS)                                        \
+  const uint64_t *v = (const uint64_t *)data;                                  \
    if (unlikely(len > 32)) {                                                    \
      uint64_t c = rot64(len, 17) + seed;                                        \
      uint64_t d = len ^ rot64(seed, 17);                                        \
-    const void *detent = (const uint8_t *)data + len - 31;                     \
+    const uint64_t *detent =                                                   \
+        (const uint64_t *)((const uint8_t *)data + len - 31);                  \
      do {                                                                       \
-      const uint64_t *v = (const uint64_t *)data;                              \
-      if (DOCOPY)                                                              \
-        memcpy((void *)(v = align), data, 32);                                 \
-                                                                               \
        const uint64_t w0 = fetch64_##ENDIANNES##_##ALIGNESS(v + 0);             \
        const uint64_t w1 = fetch64_##ENDIANNES##_##ALIGNESS(v + 1);             \
        const uint64_t w2 = fetch64_##ENDIANNES##_##ALIGNESS(v + 2);             \
        const uint64_t w3 = fetch64_##ENDIANNES##_##ALIGNESS(v + 3);             \
+      v += 4;                                                                  \
+      prefetch(v);                                                             \
                                                                                 \
        const uint64_t d02 = w0 ^ rot64(w2 + d, 17);                             \
        const uint64_t c13 = w1 ^ rot64(w3 + c, 17);                             \
@@ -80,18 +80,13 @@ static __inline uint64_t final_weak_avalanche(uint64_t a, uint64_t b) {
        d -= b ^ rot64(w1, 31);                                                  \
        a ^= prime_1 * (d02 + w3);                                               \
        b ^= prime_0 * (c13 + w2);                                               \
-      data = (const uint64_t *)data + 4;                                       \
-    } while (likely(data < detent));                                           \
+    } while (likely(v < detent));                                              \
                                                                                 \
      a ^= prime_6 * (rot64(c, 17) + d);                                         \
      b ^= prime_5 * (c + rot64(d, 17));                                         \
      len &= 31;                                                                 \
    }                                                                            \
                                                                                 \
-  const uint64_t *v = (const uint64_t *)data;                                  \
-  if (unlikely(need_copy4align) && len > 8)                                    \
-    memcpy((void *)(v = align), data, len);                                    \
-                                                                               \
    switch (len) {                                                               \
    default:                                                                     \
      b += mux64(fetch64_##ENDIANNES##_##ALIGNESS(v++), prime_4);                \
@@ -134,26 +129,30 @@ uint64_t t1ha1_le(const void *data, size_t len, uint64_t seed) {
    uint64_t a = seed;
    uint64_t b = len;
  
-  const bool need_copy4align =
-      (((uintptr_t)data) & (ALIGNMENT_64 - 1)) != 0 && !UNALIGNED_OK;
-  uint64_t align[4];
-  if (need_copy4align) {
-    T1HA1_BODY(le, aligned, true);
+#if T1HA_CONFIG_UNALIGNED_ACCESS == T1HA_CONFIG_UNALIGNED_ACCESS__EFFICIENT
+  T1HA1_BODY(le, unaligned);
+#else
+  const bool misaligned = (((uintptr_t)data) & (ALIGNMENT_64 - 1)) != 0;
+  if (misaligned) {
+    T1HA1_BODY(le, unaligned);
    } else {
-    T1HA1_BODY(le, unaligned, false);
+    T1HA1_BODY(le, aligned);
    }
+#endif
  }
  
  uint64_t t1ha1_be(const void *data, size_t len, uint64_t seed) {
    uint64_t a = seed;
    uint64_t b = len;
  
-  const bool need_copy4align =
-      (((uintptr_t)data) & (ALIGNMENT_64 - 1)) != 0 && !UNALIGNED_OK;
-  uint64_t align[4];
-  if (need_copy4align) {
-    T1HA1_BODY(be, aligned, true);
+#if T1HA_CONFIG_UNALIGNED_ACCESS == T1HA_CONFIG_UNALIGNED_ACCESS__EFFICIENT
+  T1HA1_BODY(be, unaligned);
+#else
+  const bool misaligned = (((uintptr_t)data) & (ALIGNMENT_64 - 1)) != 0;
+  if (misaligned) {
+    T1HA1_BODY(be, unaligned);
    } else {
-    T1HA1_BODY(be, unaligned, false);
+    T1HA1_BODY(be, aligned);
    }
-}
-\ No newline at end of file
+#endif
+}
diff --git a/contrib/t1ha/t1ha2.c b/contrib/t1ha/t1ha2.c

index 95f646da40efe76d2d3802daed4989a17a942b1a..4cb5281ee79564c29c921e4e1a550b18249ceefe 100644 (file)
--- a/contrib/t1ha/t1ha2.c
+++ b/contrib/t1ha/t1ha2.c
@@ -56,7 +56,7 @@ static __always_inline void init_cd(t1ha_state256_t *s, uint64_t x,
    s->n.d = ~y + rot64(x, 19);
  }
  
-/* TODO C++ template in the next version */
+/* TODO: C++ template in the next version */
  #define T1HA2_UPDATE(ENDIANNES, ALIGNESS, state, v)                            \
    do {                                                                         \
      t1ha_state256_t *const s = state;                                          \
@@ -67,8 +67,8 @@ static __always_inline void init_cd(t1ha_state256_t *s, uint64_t x,
                                                                                 \
      const uint64_t d02 = w0 + rot64(w2 + s->n.d, 56);                          \
      const uint64_t c13 = w1 + rot64(w3 + s->n.c, 19);                          \
-    s->n.d ^= s->n.b + rot64(w1, 38);                                          \
      s->n.c ^= s->n.a + rot64(w0, 57);                                          \
+    s->n.d ^= s->n.b + rot64(w1, 38);                                          \
      s->n.b ^= prime_6 * (c13 + w2);                                            \
      s->n.a ^= prime_5 * (d02 + w3);                                            \
    } while (0)
@@ -78,26 +78,23 @@ static __always_inline void squash(t1ha_state256_t *s) {
    s->n.b ^= prime_5 * (rot64(s->n.c, 19) + s->n.d);
  }
  
-/* TODO C++ template in the next version */
-#define T1HA2_LOOP(ENDIANNES, ALIGNESS, BUFFER4COPY, state, data, len)         \
+/* TODO: C++ template in the next version */
+#define T1HA2_LOOP(ENDIANNES, ALIGNESS, state, data, len)                      \
    do {                                                                         \
      const void *detent = (const uint8_t *)data + len - 31;                     \
      do {                                                                       \
        const uint64_t *v = (const uint64_t *)data;                              \
-      if (BUFFER4COPY != NULL)                                                 \
-        memcpy((void *)(v = BUFFER4COPY), data, 32);                           \
-      T1HA2_UPDATE(le, unaligned, state, v);                                   \
        data = (const uint64_t *)data + 4;                                       \
+      prefetch(data);                                                          \
+      T1HA2_UPDATE(le, ALIGNESS, state, v);                                    \
      } while (likely(data < detent));                                           \
    } while (0)
  
-/* TODO C++ template in the next version */
-#define T1HA2_TAIL_AB(ENDIANNES, ALIGNESS, BUFFER4COPY, state, data, len)      \
+/* TODO: C++ template in the next version */
+#define T1HA2_TAIL_AB(ENDIANNES, ALIGNESS, state, data, len)                   \
    do {                                                                         \
      t1ha_state256_t *const s = state;                                          \
      const uint64_t *v = (const uint64_t *)data;                                \
-    if (BUFFER4COPY != NULL)                                                   \
-      memcpy((void *)(v = BUFFER4COPY), data, len);                            \
      switch (len) {                                                             \
      default:                                                                   \
        mixup64(&s->n.a, &s->n.b, fetch64_##ENDIANNES##_##ALIGNESS(v++),         \
@@ -141,13 +138,11 @@ static __always_inline void squash(t1ha_state256_t *s) {
      }                                                                          \
    } while (0)
  
-/* TODO C++ template in the next version */
-#define T1HA2_TAIL_ABCD(ENDIANNES, ALIGNESS, BUFFER4COPY, state, data, len)    \
+/* TODO: C++ template in the next version */
+#define T1HA2_TAIL_ABCD(ENDIANNES, ALIGNESS, state, data, len)                 \
    do {                                                                         \
      t1ha_state256_t *const s = state;                                          \
      const uint64_t *v = (const uint64_t *)data;                                \
-    if (BUFFER4COPY != NULL)                                                   \
-      memcpy((void *)(v = BUFFER4COPY), data, len);                            \
      switch (len) {                                                             \
      default:                                                                   \
        mixup64(&s->n.a, &s->n.d, fetch64_##ENDIANNES##_##ALIGNESS(v++),         \
@@ -207,26 +202,34 @@ uint64_t t1ha2_atonce(const void *data, size_t length, uint64_t seed) {
    t1ha_state256_t state;
    init_ab(&state, seed, length);
  
-  const bool need_copy4align =
-      (((uintptr_t)data) & (ALIGNMENT_64 - 1)) != 0 && !UNALIGNED_OK;
-  if (need_copy4align) {
-    uint64_t buffer4align[4];
+#if T1HA_CONFIG_UNALIGNED_ACCESS == T1HA_CONFIG_UNALIGNED_ACCESS__EFFICIENT
+  if (unlikely(length > 32)) {
+    init_cd(&state, seed, length);
+    T1HA2_LOOP(le, unaligned, &state, data, length);
+    squash(&state);
+    length &= 31;
+  }
+  T1HA2_TAIL_AB(le, unaligned, &state, data, length);
+#else
+  const bool misaligned = (((uintptr_t)data) & (ALIGNMENT_64 - 1)) != 0;
+  if (misaligned) {
      if (unlikely(length > 32)) {
        init_cd(&state, seed, length);
-      T1HA2_LOOP(le, aligned, buffer4align, &state, data, length);
+      T1HA2_LOOP(le, unaligned, &state, data, length);
        squash(&state);
        length &= 31;
      }
-    T1HA2_TAIL_AB(le, aligned, buffer4align, &state, data, length);
+    T1HA2_TAIL_AB(le, unaligned, &state, data, length);
    } else {
      if (unlikely(length > 32)) {
        init_cd(&state, seed, length);
-      T1HA2_LOOP(le, unaligned, NULL, &state, data, length);
+      T1HA2_LOOP(le, aligned, &state, data, length);
        squash(&state);
        length &= 31;
      }
-    T1HA2_TAIL_AB(le, unaligned, NULL, &state, data, length);
+    T1HA2_TAIL_AB(le, aligned, &state, data, length);
    }
+#endif
  }
  
  uint64_t t1ha2_atonce128(uint64_t *__restrict extra_result,
@@ -236,22 +239,28 @@ uint64_t t1ha2_atonce128(uint64_t *__restrict extra_result,
    init_ab(&state, seed, length);
    init_cd(&state, seed, length);
  
-  const bool need_copy4align =
-      (((uintptr_t)data) & (ALIGNMENT_64 - 1)) != 0 && !UNALIGNED_OK;
-  if (need_copy4align) {
-    uint64_t buffer4align[4];
+#if T1HA_CONFIG_UNALIGNED_ACCESS == T1HA_CONFIG_UNALIGNED_ACCESS__EFFICIENT
+  if (unlikely(length > 32)) {
+    T1HA2_LOOP(le, unaligned, &state, data, length);
+    length &= 31;
+  }
+  T1HA2_TAIL_ABCD(le, unaligned, &state, data, length);
+#else
+  const bool misaligned = (((uintptr_t)data) & (ALIGNMENT_64 - 1)) != 0;
+  if (misaligned) {
      if (unlikely(length > 32)) {
-      T1HA2_LOOP(le, aligned, buffer4align, &state, data, length);
+      T1HA2_LOOP(le, unaligned, &state, data, length);
        length &= 31;
      }
-    T1HA2_TAIL_ABCD(le, aligned, buffer4align, &state, data, length);
+    T1HA2_TAIL_ABCD(le, unaligned, &state, data, length);
    } else {
      if (unlikely(length > 32)) {
-      T1HA2_LOOP(le, unaligned, NULL, &state, data, length);
+      T1HA2_LOOP(le, aligned, &state, data, length);
        length &= 31;
      }
-    T1HA2_TAIL_ABCD(le, unaligned, NULL, &state, data, length);
+    T1HA2_TAIL_ABCD(le, aligned, &state, data, length);
    }
+#endif
  }
  
  //------------------------------------------------------------------------------
@@ -283,13 +292,16 @@ void t1ha2_update(t1ha_context_t *__restrict ctx, const void *__restrict data,
    }
  
    if (length >= 32) {
-    const bool need_copy4align =
-        (((uintptr_t)data) & (ALIGNMENT_64 - 1)) != 0 && !UNALIGNED_OK;
-    if (need_copy4align) {
-      T1HA2_LOOP(le, aligned, ctx->buffer.u64, &ctx->state, data, length);
+#if T1HA_CONFIG_UNALIGNED_ACCESS == T1HA_CONFIG_UNALIGNED_ACCESS__EFFICIENT
+    T1HA2_LOOP(le, unaligned, &ctx->state, data, length);
+#else
+    const bool misaligned = (((uintptr_t)data) & (ALIGNMENT_64 - 1)) != 0;
+    if (misaligned) {
+      T1HA2_LOOP(le, unaligned, &ctx->state, data, length);
      } else {
-      T1HA2_LOOP(le, unaligned, NULL, &ctx->state, data, length);
+      T1HA2_LOOP(le, aligned, &ctx->state, data, length);
      }
+#endif
      length &= 31;
    }
  
@@ -307,13 +319,8 @@ uint64_t t1ha2_final(t1ha_context_t *__restrict ctx,
  
    if (likely(!extra_result)) {
      squash(&ctx->state);
-    T1HA2_TAIL_AB(le, aligned, NULL, &ctx->state, ctx->buffer.u64,
-                  ctx->partial);
-    return final64(ctx->state.n.a, ctx->state.n.b);
+    T1HA2_TAIL_AB(le, aligned, &ctx->state, ctx->buffer.u64, ctx->partial);
    }
  
-  T1HA2_TAIL_ABCD(le, aligned, NULL, &ctx->state, ctx->buffer.u64,
-                  ctx->partial);
-  return final128(ctx->state.n.a, ctx->state.n.b, ctx->state.n.c,
-                  ctx->state.n.d, extra_result);
-}
-\ No newline at end of file
+  T1HA2_TAIL_ABCD(le, aligned, &ctx->state, ctx->buffer.u64, ctx->partial);
+}
diff --git a/contrib/t1ha/t1ha_bits.h b/contrib/t1ha/t1ha_bits.h

index e3815a4e77cf952051584d67f74ff7cf247d01cb..454e43aed6d87d02bdd4b78ee99adf2e6d5ca155 100644 (file)
--- a/contrib/t1ha/t1ha_bits.h
+++ b/contrib/t1ha/t1ha_bits.h
@@ -72,19 +72,23 @@
  #error Unsupported byte order.
  #endif
  
-#if !defined(UNALIGNED_OK)
-#if (defined(__ia32__) || defined(__e2k__) ||                                  \
-     defined(__ARM_FEATURE_UNALIGNED)) &&                                      \
-    !defined(__ALIGNED__)
-#define UNALIGNED_OK 1
+#define T1HA_CONFIG_UNALIGNED_ACCESS__UNABLE 0
+#define T1HA_CONFIG_UNALIGNED_ACCESS__SLOW 1
+#define T1HA_CONFIG_UNALIGNED_ACCESS__EFFICIENT 2
+
+#ifndef T1HA_CONFIG_UNALIGNED_ACCESS
+#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)
+#define T1HA_CONFIG_UNALIGNED_ACCESS T1HA_CONFIG_UNALIGNED_ACCESS__EFFICIENT
+#elif defined(__ia32__)
+#define T1HA_CONFIG_UNALIGNED_ACCESS T1HA_CONFIG_UNALIGNED_ACCESS__EFFICIENT
+#elif defined(__e2k__)
+#define T1HA_CONFIG_UNALIGNED_ACCESS T1HA_CONFIG_UNALIGNED_ACCESS__SLOW
+#elif defined(__ARM_FEATURE_UNALIGNED)
+#define T1HA_CONFIG_UNALIGNED_ACCESS T1HA_CONFIG_UNALIGNED_ACCESS__EFFICIENT
  #else
-#define UNALIGNED_OK 0
+#define T1HA_CONFIG_UNALIGNED_ACCESS T1HA_CONFIG_UNALIGNED_ACCESS__UNABLE
  #endif
-#endif /* UNALIGNED_OK */
-
-#if UNALIGNED_OK && !defined(PAGESIZE)
-#define PAGESIZE 4096
-#endif /* PAGESIZE */
+#endif /* T1HA_CONFIG_UNALIGNED_ACCESS */
  
  #define ALIGNMENT_16 2
  #define ALIGNMENT_32 4
@@ -94,6 +98,10 @@
  #define ALIGNMENT_64 4
  #endif
  
+#ifndef PAGESIZE
+#define PAGESIZE 4096
+#endif /* PAGESIZE */
+
  /***************************************************************************/
  
  #ifndef __has_builtin
@@ -118,6 +126,13 @@
  
  #if __GNUC_PREREQ(4, 4) || defined(__clang__)
  
+#if defined(__ia32__) || defined(__e2k__)
+#include <x86intrin.h>
+#endif
+
+#if defined(__ia32__) && !defined(__cpuid_count)
+#include <cpuid.h>
+#endif
  
  #if defined(__e2k__)
  #include <e2kbuiltin.h>
@@ -393,10 +408,10 @@ typedef struct {
  #endif /* read_unaligned */
  
  #ifndef read_aligned
-#if __has_builtin(assume_aligned)
+#if __GNUC_PREREQ(4, 8) || __has_builtin(__builtin_assume_aligned)
  #define read_aligned(ptr, bits)                                                \
    (*(const uint##bits##_t *)__builtin_assume_aligned(ptr, ALIGNMENT_##bits))
-#elif __has_attribute(aligned) && !defined(__clang__)
+#elif (__GNUC_PREREQ(3, 3) || __has_attribute(aligned)) && !defined(__clang__)
  #define read_aligned(ptr, bits)                                                \
    (*(const uint##bits##_t __attribute__((aligned(ALIGNMENT_##bits))) *)(ptr))
  #elif __has_attribute(assume_aligned)
@@ -427,6 +442,20 @@ static __always_inline const
  #endif
  #endif /* read_aligned */
  
+#ifndef prefetch
+#if (__GNUC_PREREQ(4, 0) || __has_builtin(__builtin_prefetch)) &&              \
+    !defined(__ia32__)
+#define prefetch(ptr) __builtin_prefetch(ptr)
+#elif defined(_M_ARM64) || defined(_M_ARM)
+#define prefetch(ptr) __prefetch(ptr)
+#else
+#define prefetch(ptr)                                                          \
+  do {                                                                         \
+    (void)(ptr);                                                               \
+  } while (0)
+#endif
+#endif /* prefetch */
+
  #if __has_warning("-Wconstant-logical-operand")
  #if defined(__clang__)
  #pragma clang diagnostic ignored "-Wconstant-logical-operand"
@@ -451,28 +480,33 @@ static __always_inline const
  
  /*---------------------------------------------------------- Little Endian */
  
-#ifndef fetch64_le_aligned
-static __always_inline uint64_t fetch64_le_aligned(const void *v) {
+#ifndef fetch16_le_aligned
+static __always_inline uint16_t fetch16_le_aligned(const void *v) {
+  assert(((uintptr_t)v) % ALIGNMENT_16 == 0);
  #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
-  return read_aligned(v, 64);
+  return read_aligned(v, 16);
  #else
-  return bswap64(read_aligned(v, 64));
+  return bswap16(read_aligned(v, 16));
  #endif
  }
-#endif /* fetch64_le_aligned */
+#endif /* fetch16_le_aligned */
  
-#ifndef fetch64_le_unaligned
-static __always_inline uint64_t fetch64_le_unaligned(const void *v) {
-#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
-  return read_unaligned(v, 64);
+#ifndef fetch16_le_unaligned
+static __always_inline uint16_t fetch16_le_unaligned(const void *v) {
+#if T1HA_CONFIG_UNALIGNED_ACCESS == T1HA_CONFIG_UNALIGNED_ACCESS__UNABLE
+  const uint8_t *p = (const uint8_t *)v;
+  return p[0] | (uint16_t)p[1] << 8;
+#elif __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+  return read_unaligned(v, 16);
  #else
-  return bswap64(read_unaligned(v, 64));
+  return bswap16(read_unaligned(v, 16));
  #endif
  }
-#endif /* fetch64_le_unaligned */
+#endif /* fetch16_le_unaligned */
  
  #ifndef fetch32_le_aligned
  static __always_inline uint32_t fetch32_le_aligned(const void *v) {
+  assert(((uintptr_t)v) % ALIGNMENT_32 == 0);
  #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
    return read_aligned(v, 32);
  #else
@@ -483,7 +517,10 @@ static __always_inline uint32_t fetch32_le_aligned(const void *v) {
  
  #ifndef fetch32_le_unaligned
  static __always_inline uint32_t fetch32_le_unaligned(const void *v) {
-#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+#if T1HA_CONFIG_UNALIGNED_ACCESS == T1HA_CONFIG_UNALIGNED_ACCESS__UNABLE
+  return fetch16_le_unaligned(v) |
+         (uint32_t)fetch16_le_unaligned((const uint8_t *)v + 2) << 16;
+#elif __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
    return read_unaligned(v, 32);
  #else
    return bswap32(read_unaligned(v, 32));
@@ -491,25 +528,29 @@ static __always_inline uint32_t fetch32_le_unaligned(const void *v) {
  }
  #endif /* fetch32_le_unaligned */
  
-#ifndef fetch16_le_aligned
-static __always_inline uint16_t fetch16_le_aligned(const void *v) {
+#ifndef fetch64_le_aligned
+static __always_inline uint64_t fetch64_le_aligned(const void *v) {
+  assert(((uintptr_t)v) % ALIGNMENT_64 == 0);
  #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
-  return read_aligned(v, 16);
+  return read_aligned(v, 64);
  #else
-  return bswap16(read_aligned(v, 16));
+  return bswap64(read_aligned(v, 64));
  #endif
  }
-#endif /* fetch16_le_aligned */
+#endif /* fetch64_le_aligned */
  
-#ifndef fetch16_le_unaligned
-static __always_inline uint16_t fetch16_le_unaligned(const void *v) {
-#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
-  return read_unaligned(v, 16);
+#ifndef fetch64_le_unaligned
+static __always_inline uint64_t fetch64_le_unaligned(const void *v) {
+#if T1HA_CONFIG_UNALIGNED_ACCESS == T1HA_CONFIG_UNALIGNED_ACCESS__UNABLE
+  return fetch32_le_unaligned(v) |
+         (uint64_t)fetch32_le_unaligned((const uint8_t *)v + 4) << 32;
+#elif __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+  return read_unaligned(v, 64);
  #else
-  return bswap16(read_unaligned(v, 16));
+  return bswap64(read_unaligned(v, 64));
  #endif
  }
-#endif /* fetch16_le_unaligned */
+#endif /* fetch64_le_unaligned */
  
  static __always_inline uint64_t tail64_le_aligned(const void *v, size_t tail) {
    const uint8_t *const p = (const uint8_t *)v;
@@ -517,10 +558,12 @@ static __always_inline uint64_t tail64_le_aligned(const void *v, size_t tail) {
    /* We can perform a 'oneshot' read, which is little bit faster. */
    const unsigned shift = ((8 - tail) & 7) << 3;
    return fetch64_le_aligned(p) & ((~UINT64_C(0)) >> shift);
-#endif /* 'oneshot' read */
-
+#else
    uint64_t r = 0;
    switch (tail & 7) {
+  default:
+    unreachable();
+/* fall through */
  #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
    /* For most CPUs this code is better when not needed byte reordering. */
    case 0:
@@ -577,14 +620,15 @@ static __always_inline uint64_t tail64_le_aligned(const void *v, size_t tail) {
      return r + p[0];
  #endif
    }
-  unreachable();
+#endif /* T1HA_USE_FAST_ONESHOT_READ */
  }
  
-#if T1HA_USE_FAST_ONESHOT_READ && UNALIGNED_OK && defined(PAGESIZE) &&         \
-    !defined(__SANITIZE_ADDRESS__) && !defined(__sun)
+#if T1HA_USE_FAST_ONESHOT_READ &&                                              \
+    T1HA_CONFIG_UNALIGNED_ACCESS != T1HA_CONFIG_UNALIGNED_ACCESS__UNABLE &&    \
+    defined(PAGESIZE) && !defined(__sun) && !defined(__SANITIZE_ADDRESS__)
  #define can_read_underside(ptr, size)                                          \
    ((size) <= sizeof(uintptr_t) && ((PAGESIZE - (size)) & (uintptr_t)(ptr)) != 0)
-#endif /* can_fast_read */
+#endif /* T1HA_USE_FAST_ONESHOT_READ */
  
  static __always_inline uint64_t tail64_le_unaligned(const void *v,
                                                      size_t tail) {
@@ -600,11 +644,14 @@ static __always_inline uint64_t tail64_le_unaligned(const void *v,
      return fetch64_le_unaligned(p) >> shift;
    }
    return fetch64_le_unaligned(p) & ((~UINT64_C(0)) >> shift);
-#endif /* 'oneshot' read */
-
+#else
    uint64_t r = 0;
    switch (tail & 7) {
-#if UNALIGNED_OK && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+  default:
+    unreachable();
+/* fall through */
+#if T1HA_CONFIG_UNALIGNED_ACCESS == T1HA_CONFIG_UNALIGNED_ACCESS__EFFICIENT && \
+    __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
    /* For most CPUs this code is better when not needed
     * copying for alignment or byte reordering. */
    case 0:
@@ -663,36 +710,41 @@ static __always_inline uint64_t tail64_le_unaligned(const void *v,
      return r + p[0];
  #endif
    }
-  unreachable();
+#endif /* can_read_underside */
  }
  
  /*------------------------------------------------------------- Big Endian */
  
-#ifndef fetch64_be_aligned
-static __maybe_unused __always_inline uint64_t
-fetch64_be_aligned(const void *v) {
+#ifndef fetch16_be_aligned
+static __maybe_unused __always_inline uint16_t
+fetch16_be_aligned(const void *v) {
+  assert(((uintptr_t)v) % ALIGNMENT_16 == 0);
  #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
-  return read_aligned(v, 64);
+  return read_aligned(v, 16);
  #else
-  return bswap64(read_aligned(v, 64));
+  return bswap16(read_aligned(v, 16));
  #endif
  }
-#endif /* fetch64_be_aligned */
+#endif /* fetch16_be_aligned */
  
-#ifndef fetch64_be_unaligned
-static __maybe_unused __always_inline uint64_t
-fetch64_be_unaligned(const void *v) {
-#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
-  return read_unaligned(v, 64);
+#ifndef fetch16_be_unaligned
+static __maybe_unused __always_inline uint16_t
+fetch16_be_unaligned(const void *v) {
+#if T1HA_CONFIG_UNALIGNED_ACCESS == T1HA_CONFIG_UNALIGNED_ACCESS__UNABLE
+  const uint8_t *p = (const uint8_t *)v;
+  return (uint16_t)p[0] << 8 | p[1];
+#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+  return read_unaligned(v, 16);
  #else
-  return bswap64(read_unaligned(v, 64));
+  return bswap16(read_unaligned(v, 16));
  #endif
  }
-#endif /* fetch64_be_unaligned */
+#endif /* fetch16_be_unaligned */
  
  #ifndef fetch32_be_aligned
  static __maybe_unused __always_inline uint32_t
  fetch32_be_aligned(const void *v) {
+  assert(((uintptr_t)v) % ALIGNMENT_32 == 0);
  #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
    return read_aligned(v, 32);
  #else
@@ -704,7 +756,10 @@ fetch32_be_aligned(const void *v) {
  #ifndef fetch32_be_unaligned
  static __maybe_unused __always_inline uint32_t
  fetch32_be_unaligned(const void *v) {
-#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+#if T1HA_CONFIG_UNALIGNED_ACCESS == T1HA_CONFIG_UNALIGNED_ACCESS__UNABLE
+  return (uint32_t)fetch16_be_unaligned(v) << 16 |
+         fetch16_be_unaligned((const uint8_t *)v + 2);
+#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
    return read_unaligned(v, 32);
  #else
    return bswap32(read_unaligned(v, 32));
@@ -712,27 +767,31 @@ fetch32_be_unaligned(const void *v) {
  }
  #endif /* fetch32_be_unaligned */
  
-#ifndef fetch16_be_aligned
-static __maybe_unused __always_inline uint16_t
-fetch16_be_aligned(const void *v) {
+#ifndef fetch64_be_aligned
+static __maybe_unused __always_inline uint64_t
+fetch64_be_aligned(const void *v) {
+  assert(((uintptr_t)v) % ALIGNMENT_64 == 0);
  #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
-  return read_aligned(v, 16);
+  return read_aligned(v, 64);
  #else
-  return bswap16(read_aligned(v, 16));
+  return bswap64(read_aligned(v, 64));
  #endif
  }
-#endif /* fetch16_be_aligned */
+#endif /* fetch64_be_aligned */
  
-#ifndef fetch16_be_unaligned
-static __maybe_unused __always_inline uint16_t
-fetch16_be_unaligned(const void *v) {
-#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
-  return read_unaligned(v, 16);
+#ifndef fetch64_be_unaligned
+static __maybe_unused __always_inline uint64_t
+fetch64_be_unaligned(const void *v) {
+#if T1HA_CONFIG_UNALIGNED_ACCESS == T1HA_CONFIG_UNALIGNED_ACCESS__UNABLE
+  return (uint64_t)fetch32_be_unaligned(v) << 32 |
+         fetch32_be_unaligned((const uint8_t *)v + 4);
+#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+  return read_unaligned(v, 64);
  #else
-  return bswap16(read_unaligned(v, 16));
+  return bswap64(read_unaligned(v, 64));
  #endif
  }
-#endif /* fetch16_be_unaligned */
+#endif /* fetch64_be_unaligned */
  
  static __maybe_unused __always_inline uint64_t tail64_be_aligned(const void *v,
                                                                   size_t tail) {
@@ -741,9 +800,11 @@ static __maybe_unused __always_inline uint64_t tail64_be_aligned(const void *v,
    /* We can perform a 'oneshot' read, which is little bit faster. */
    const unsigned shift = ((8 - tail) & 7) << 3;
    return fetch64_be_aligned(p) >> shift;
-#endif /* 'oneshot' read */
-
+#else
    switch (tail & 7) {
+  default:
+    unreachable();
+/* fall through */
  #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
    /* For most CPUs this code is better when not byte reordering. */
    case 1:
@@ -762,7 +823,7 @@ static __maybe_unused __always_inline uint64_t tail64_be_aligned(const void *v,
      return (uint64_t)fetch32_be_aligned(p) << 24 |
             (uint32_t)fetch16_be_aligned(p + 4) << 8 | p[6];
    case 0:
-    return fetch64_be(p);
+    return fetch64_be_aligned(p);
  #else
    case 1:
      return p[0];
@@ -789,7 +850,7 @@ static __maybe_unused __always_inline uint64_t tail64_be_aligned(const void *v,
             (uint64_t)p[1] << 48 | (uint64_t)p[0] << 56;
  #endif
    }
-  unreachable();
+#endif /* T1HA_USE_FAST_ONESHOT_READ */
  }
  
  static __maybe_unused __always_inline uint64_t
@@ -806,10 +867,13 @@ tail64_be_unaligned(const void *v, size_t tail) {
      return fetch64_be_unaligned(p) & ((~UINT64_C(0)) >> shift);
    }
    return fetch64_be_unaligned(p) >> shift;
-#endif /* 'oneshot' read */
-
+#else
    switch (tail & 7) {
-#if UNALIGNED_OK && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+  default:
+    unreachable();
+/* fall through */
+#if T1HA_CONFIG_UNALIGNED_ACCESS == T1HA_CONFIG_UNALIGNED_ACCESS__EFFICIENT && \
+    __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
    /* For most CPUs this code is better when not needed
     * copying for alignment or byte reordering. */
    case 1:
@@ -858,7 +922,7 @@ tail64_be_unaligned(const void *v, size_t tail) {
             (uint64_t)p[1] << 48 | (uint64_t)p[0] << 56;
  #endif
    }
-  unreachable();
+#endif /* can_read_underside */
  }
  
  /***************************************************************************/
author	Vsevolod Stakhov <vsevolod@highsecure.ru>
	Tue, 22 May 2018 13:15:47 +0000 (14:15 +0100)
committer	Vsevolod Stakhov <vsevolod@highsecure.ru>
	Tue, 22 May 2018 13:15:47 +0000 (14:15 +0100)
contrib/t1ha/t1ha1.c		patch \| blob \| blame \| history
contrib/t1ha/t1ha2.c		patch \| blob \| blame \| history
contrib/t1ha/t1ha_bits.h		patch \| blob \| blame \| history