#endif
}
+static really_inline m128 low64from128(const m128 in) {
+ return vcombine_u64(vget_low_u64(in), vdup_n_u64(0));
+}
+
+static really_inline m128 high64from128(const m128 in) {
+ return vcombine_u64(vget_high_u64(in), vdup_n_u64(0));
+}
+
+static really_inline m128 add128(m128 a, m128 b) {
+ return (m128) vaddq_u64((uint64x2_t)a, (uint64x2_t)b);
+}
+
static really_inline m128 and128(m128 a, m128 b) {
return (m128) vandq_s8((int8x16_t)a, (int8x16_t)b);
}
static really_inline
m128 set4x32(u32 x3, u32 x2, u32 x1, u32 x0) {
- uint32_t __attribute__((aligned(16))) data[4] = { x0, x1, x2, x3 };
+ uint32_t ALIGN_ATTR(16) data[4] = { x0, x1, x2, x3 };
return (m128) vld1q_u32((uint32_t *) data);
}
static really_inline
m128 set2x64(u64a hi, u64a lo) {
- uint64_t __attribute__((aligned(16))) data[2] = { lo, hi };
+ uint64_t ALIGN_ATTR(16) data[2] = { lo, hi };
return (m128) vld1q_u64((uint64_t *) data);
}
#ifdef DEBUG
static inline void print_m128_16x8(char *label, m128 vector) {
- uint8_t __attribute__((aligned(16))) data[16];
+ uint8_t ALIGN_ATTR(16) data[16];
store128(data, vector);
DEBUG_PRINTF("%s: ", label);
for(int i=0; i < 16; i++)
}
static inline void print_m128_8x16(char *label, m128 vector) {
- uint16_t __attribute__((aligned(16))) data[8];
+ uint16_t ALIGN_ATTR(16) data[8];
store128(data, vector);
DEBUG_PRINTF("%s: ", label);
for(int i=0; i < 8; i++)
}
static inline void print_m128_4x32(char *label, m128 vector) {
- uint32_t __attribute__((aligned(16))) data[4];
+ uint32_t ALIGN_ATTR(16) data[4];
store128(data, vector);
DEBUG_PRINTF("%s: ", label);
for(int i=0; i < 4; i++)
}
static inline void print_m128_2x64(char *label, m128 vector) {
- uint64_t __attribute__((aligned(16))) data[2];
+ uint64_t ALIGN_ATTR(16) data[2];
store128(data, vector);
DEBUG_PRINTF("%s: ", label);
for(int i=0; i < 2; i++)
return rv;
}
+static really_inline m256 add256(m256 a, m256 b) {
+ m256 rv;
+ rv.lo = add128(a.lo, b.lo);
+ rv.hi = add128(a.hi, b.hi);
+ return rv;
+}
+
static really_inline m256 and256(m256 a, m256 b) {
m256 rv;
rv.lo = and128(a.lo, b.lo);
return rv;
}
+static really_inline
+m512 add512(m512 a, m512 b) {
+ m512 rv;
+ rv.lo = add256(a.lo, b.lo);
+ rv.hi = add256(a.hi, b.hi);
+ return rv;
+}
static really_inline
m512 and512(m512 a, m512 b) {