arch/sh/include/asm/unaligned-sh4a.h

   1 /* SPDX-License-Identifier: GPL-2.0 */
   2 #ifndef __ASM_SH_UNALIGNED_SH4A_H
   3 #define __ASM_SH_UNALIGNED_SH4A_H
   4
   5 /*
   6  * SH-4A has support for unaligned 32-bit loads, and 32-bit loads only.
   7  * Support for 64-bit accesses are done through shifting and masking
   8  * relative to the endianness. Unaligned stores are not supported by the
   9  * instruction encoding, so these continue to use the packed
  10  * struct.
  11  *
  12  * The same note as with the movli.l/movco.l pair applies here, as long
  13  * as the load is guaranteed to be inlined, nothing else will hook in to
  14  * r0 and we get the return value for free.
  15  *
  16  * NOTE: Due to the fact we require r0 encoding, care should be taken to
  17  * avoid mixing these heavily with other r0 consumers, such as the atomic
  18  * ops. Failure to adhere to this can result in the compiler running out
  19  * of spill registers and blowing up when building at low optimization
  20  * levels. See http://gcc.gnu.org/bugzilla/show_bug.cgi?id=34777.
  21  */
  22 #include <linux/unaligned/packed_struct.h>
  23 #include <linux/types.h>
  24 #include <asm/byteorder.h>
  25
  26 static inline u16 sh4a_get_unaligned_cpu16(const u8 *p)
  27 {
  28 #ifdef __LITTLE_ENDIAN
  29         return p[0] | p[1] << 8;
  30 #else
  31         return p[0] << 8 | p[1];
  32 #endif
  33 }
  34
  35 static __always_inline u32 sh4a_get_unaligned_cpu32(const u8 *p)
  36 {
  37         unsigned long unaligned;
  38
  39         __asm__ __volatile__ (
  40                 "movua.l        @%1, %0\n\t"
  41                  : "=z" (unaligned)
  42                  : "r" (p)
  43         );
  44
  45         return unaligned;
  46 }
  47
  48 /*
  49  * Even though movua.l supports auto-increment on the read side, it can
  50  * only store to r0 due to instruction encoding constraints, so just let
  51  * the compiler sort it out on its own.
  52  */
  53 static inline u64 sh4a_get_unaligned_cpu64(const u8 *p)
  54 {
  55 #ifdef __LITTLE_ENDIAN
  56         return (u64)sh4a_get_unaligned_cpu32(p + 4) << 32 |
  57                     sh4a_get_unaligned_cpu32(p);
  58 #else
  59         return (u64)sh4a_get_unaligned_cpu32(p) << 32 |
  60                     sh4a_get_unaligned_cpu32(p + 4);
  61 #endif
  62 }
  63
  64 static inline u16 get_unaligned_le16(const void *p)
  65 {
  66         return le16_to_cpu(sh4a_get_unaligned_cpu16(p));
  67 }
  68
  69 static inline u32 get_unaligned_le32(const void *p)
  70 {
  71         return le32_to_cpu(sh4a_get_unaligned_cpu32(p));
  72 }
  73
  74 static inline u64 get_unaligned_le64(const void *p)
  75 {
  76         return le64_to_cpu(sh4a_get_unaligned_cpu64(p));
  77 }
  78
  79 static inline u16 get_unaligned_be16(const void *p)
  80 {
  81         return be16_to_cpu(sh4a_get_unaligned_cpu16(p));
  82 }
  83
  84 static inline u32 get_unaligned_be32(const void *p)
  85 {
  86         return be32_to_cpu(sh4a_get_unaligned_cpu32(p));
  87 }
  88
  89 static inline u64 get_unaligned_be64(const void *p)
  90 {
  91         return be64_to_cpu(sh4a_get_unaligned_cpu64(p));
  92 }
  93
  94 static inline void nonnative_put_le16(u16 val, u8 *p)
  95 {
  96         *p++ = val;
  97         *p++ = val >> 8;
  98 }
  99
 100 static inline void nonnative_put_le32(u32 val, u8 *p)
 101 {
 102         nonnative_put_le16(val, p);
 103         nonnative_put_le16(val >> 16, p + 2);
 104 }
 105
 106 static inline void nonnative_put_le64(u64 val, u8 *p)
 107 {
 108         nonnative_put_le32(val, p);
 109         nonnative_put_le32(val >> 32, p + 4);
 110 }
 111
 112 static inline void nonnative_put_be16(u16 val, u8 *p)
 113 {
 114         *p++ = val >> 8;
 115         *p++ = val;
 116 }
 117
 118 static inline void nonnative_put_be32(u32 val, u8 *p)
 119 {
 120         nonnative_put_be16(val >> 16, p);
 121         nonnative_put_be16(val, p + 2);
 122 }
 123
 124 static inline void nonnative_put_be64(u64 val, u8 *p)
 125 {
 126         nonnative_put_be32(val >> 32, p);
 127         nonnative_put_be32(val, p + 4);
 128 }
 129
 130 static inline void put_unaligned_le16(u16 val, void *p)
 131 {
 132 #ifdef __LITTLE_ENDIAN
 133         __put_unaligned_cpu16(val, p);
 134 #else
 135         nonnative_put_le16(val, p);
 136 #endif
 137 }
 138
 139 static inline void put_unaligned_le32(u32 val, void *p)
 140 {
 141 #ifdef __LITTLE_ENDIAN
 142         __put_unaligned_cpu32(val, p);
 143 #else
 144         nonnative_put_le32(val, p);
 145 #endif
 146 }
 147
 148 static inline void put_unaligned_le64(u64 val, void *p)
 149 {
 150 #ifdef __LITTLE_ENDIAN
 151         __put_unaligned_cpu64(val, p);
 152 #else
 153         nonnative_put_le64(val, p);
 154 #endif
 155 }
 156
 157 static inline void put_unaligned_be16(u16 val, void *p)
 158 {
 159 #ifdef __BIG_ENDIAN
 160         __put_unaligned_cpu16(val, p);
 161 #else
 162         nonnative_put_be16(val, p);
 163 #endif
 164 }
 165
 166 static inline void put_unaligned_be32(u32 val, void *p)
 167 {
 168 #ifdef __BIG_ENDIAN
 169         __put_unaligned_cpu32(val, p);
 170 #else
 171         nonnative_put_be32(val, p);
 172 #endif
 173 }
 174
 175 static inline void put_unaligned_be64(u64 val, void *p)
 176 {
 177 #ifdef __BIG_ENDIAN
 178         __put_unaligned_cpu64(val, p);
 179 #else
 180         nonnative_put_be64(val, p);
 181 #endif
 182 }
 183
 184 /*
 185  * While it's a bit non-obvious, even though the generic le/be wrappers
 186  * use the __get/put_xxx prefixing, they actually wrap in to the
 187  * non-prefixed get/put_xxx variants as provided above.
 188  */
 189 #include <linux/unaligned/generic.h>
 190
 191 #ifdef __LITTLE_ENDIAN
 192 # define get_unaligned __get_unaligned_le
 193 # define put_unaligned __put_unaligned_le
 194 #else
 195 # define get_unaligned __get_unaligned_be
 196 # define put_unaligned __put_unaligned_be
 197 #endif
 198
 199 #endif /* __ASM_SH_UNALIGNED_SH4A_H */