libitm/config/x86/cacheline.h

   1 /* Copyright (C) 2009, 2011 Free Software Foundation, Inc.
   2    Contributed by Richard Henderson <rth@redhat.com>.
   3
   4    This file is part of the GNU Transactional Memory Library (libitm).
   5
   6    Libitm is free software; you can redistribute it and/or modify it
   7    under the terms of the GNU General Public License as published by
   8    the Free Software Foundation; either version 3 of the License, or
   9    (at your option) any later version.
  10
  11    Libitm is distributed in the hope that it will be useful, but WITHOUT ANY
  12    WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
  13    FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  14    more details.
  15
  16    Under Section 7 of GPL version 3, you are granted additional
  17    permissions described in the GCC Runtime Library Exception, version
  18    3.1, as published by the Free Software Foundation.
  19
  20    You should have received a copy of the GNU General Public License and
  21    a copy of the GCC Runtime Library Exception along with this program;
  22    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
  23    <http://www.gnu.org/licenses/>.  */
  24
  25 #ifndef LIBITM_CACHELINE_H
  26 #define LIBITM_CACHELINE_H 1
  27
  28 // Minimum cacheline size is 32, due to both complex long double and __m256.
  29 // There's no requirement that 64-bit use a 64-byte cacheline size, but do
  30 // so for now to make sure everything is parameterized properly.
  31 #ifdef __x86_64__
  32 # define CACHELINE_SIZE 64
  33 #else
  34 # define CACHELINE_SIZE 32
  35 #endif
  36
  37 namespace GTM HIDDEN {
  38
  39 // A gtm_cacheline_mask stores a modified bit for every modified byte
  40 // in the cacheline with which it is associated.
  41 typedef sized_integral<CACHELINE_SIZE / 8>::type gtm_cacheline_mask;
  42
  43 union gtm_cacheline
  44 {
  45   // Byte access to the cacheline.
  46   unsigned char b[CACHELINE_SIZE] __attribute__((aligned(CACHELINE_SIZE)));
  47
  48   // Larger sized access to the cacheline.
  49   uint16_t u16[CACHELINE_SIZE / sizeof(uint16_t)];
  50   uint32_t u32[CACHELINE_SIZE / sizeof(uint32_t)];
  51   uint64_t u64[CACHELINE_SIZE / sizeof(uint64_t)];
  52   gtm_word w[CACHELINE_SIZE / sizeof(gtm_word)];
  53
  54 #ifdef __MMX__
  55   __m64 m64[CACHELINE_SIZE / sizeof(__m64)];
  56 #endif
  57 #ifdef __SSE__
  58   __m128 m128[CACHELINE_SIZE / sizeof(__m128)];
  59 #endif
  60 #ifdef __SSE2__
  61   __m128i m128i[CACHELINE_SIZE / sizeof(__m128i)];
  62 #endif
  63 #ifdef __AVX__
  64   __m256 m256[CACHELINE_SIZE / sizeof(__m256)];
  65   __m256i m256i[CACHELINE_SIZE / sizeof(__m256i)];
  66 #endif
  67
  68 #if defined(__SSE__) || defined(__AVX__)
  69   // Copy S to D; only bother defining if we can do this more efficiently
  70   // than the compiler-generated default implementation.
  71   gtm_cacheline& operator= (const gtm_cacheline &s);
  72 #endif // SSE, AVX
  73 };
  74
  75 #if defined(__SSE__) || defined(__AVX__)
  76 inline gtm_cacheline& ALWAYS_INLINE
  77 gtm_cacheline::operator= (const gtm_cacheline & __restrict s)
  78 {
  79 #ifdef __AVX__
  80 # define CP     m256
  81 # define TYPE   __m256
  82 #else
  83 # define CP     m128
  84 # define TYPE   __m128
  85 #endif
  86
  87   TYPE w, x, y, z;
  88
  89   // ??? Wouldn't it be nice to have a pragma to tell the compiler
  90   // to completely unroll a given loop?
  91   switch (CACHELINE_SIZE / sizeof(TYPE))
  92     {
  93     case 1:
  94       this->CP[0] = s.CP[0];
  95       break;
  96     case 2:
  97       x = s.CP[0];
  98       y = s.CP[1];
  99       this->CP[0] = x;
 100       this->CP[1] = y;
 101       break;
 102     case 4:
 103       w = s.CP[0];
 104       x = s.CP[1];
 105       y = s.CP[2];
 106       z = s.CP[3];
 107       this->CP[0] = w;
 108       this->CP[1] = x;
 109       this->CP[2] = y;
 110       this->CP[3] = z;
 111       break;
 112     default:
 113       __builtin_trap ();
 114     }
 115
 116   return *this;
 117
 118 #undef CP
 119 #undef TYPE
 120 }
 121 #endif
 122
 123 } // namespace GTM
 124
 125 #endif // LIBITM_CACHELINE_H