/**
* Shift left a 128-bit register by b bytes (zero-filling from the right).
*/
-# define BYTESHL128(a, b) vreinterpretq_u64_u8(vextq_s8(vdupq_n_s8(0), (uint8x16_t) a, 16 - (b)))
+# define BYTESHL128(a, b) vreinterpretq_u64_u8(vextq_u8(vdupq_n_u8(0), vreinterpretq_u8_u64(a), 16 - (b)))
/**
- * Reorder 32-bit lanes in a 128-bit register according to the indices (a, b, c, d).
+ * Broadcast 32-bit lane 3 across the 128-bit register.
*/
-# define SHUFFLE32x4(x, a, b, c, d) \
- vreinterpretq_u64_u32(__builtin_shufflevector( \
- vreinterpretq_u32_u64(x), vreinterpretq_u32_u64(x), (a), (b), (c), (d)))
+# define SHUFFLE32x4_3333(x) vreinterpretq_u64_u32(vdupq_laneq_u32(vreinterpretq_u32_u64(x), 3))
/**
* Invert an AES round key for decryption.
*/
static inline BlockVec
SHL1_128(const BlockVec a)
{
- const BlockVec shl = vshlq_n_u8(a, 1);
- const BlockVec msb = vshrq_n_u8(a, 7);
- const BlockVec zero = vdupq_n_u8(0);
- const BlockVec carries = vextq_u8(msb, zero, 1);
- return vorrq_u8(shl, carries);
+ const uint8x16_t shl = vshlq_n_u8(vreinterpretq_u8_u64(a), 1);
+ const uint8x16_t msb = vshrq_n_u8(vreinterpretq_u8_u64(a), 7);
+ const uint8x16_t zero = vdupq_n_u8(0);
+ const uint8x16_t carries = vextq_u8(msb, zero, 1);
+ return vreinterpretq_u64_u8(vorrq_u8(shl, carries));
}
/**
// This extracts the needed transformation for generating a new round key.
uint8x16_t a = vaeseq_u8(vreinterpretq_u8_u64(block_vec), vmovq_n_u8(0));
// Shuffle for the key expansion rotation.
- const uint8x16_t b =
- __builtin_shufflevector(a, a, 4, 1, 14, 11, 1, 14, 11, 4, 12, 9, 6, 3, 9, 6, 3, 12);
+ static const uint8_t aes_keygen_shuffle[16] = {
+ 4, 1, 14, 11, 1, 14, 11, 4, 12, 9, 6, 3, 9, 6, 3, 12,
+ };
+ const BlockVec b = vreinterpretq_u64_u8(vqtbl1q_u8(a, vld1q_u8(aes_keygen_shuffle)));
// Combine with round constant.
const uint64x2_t c = SET64x2((uint64_t) rc << 32, (uint64_t) rc << 32);
return XOR128(b, c);
/**
* Reorder 32-bit lanes in a 128-bit block.
*/
-# define SHUFFLE32x4(x, a, b, c, d) _mm_shuffle_epi32((x), _MM_SHUFFLE((d), (c), (b), (a)))
+# define SHUFFLE32x4_3333(x) _mm_shuffle_epi32((x), _MM_SHUFFLE(3, 3, 3, 3))
/**
* Invert an AES round key for decryption.
*/
* st: the AesState structure to be populated.
* key: a 16-byte AES key.
*/
-static void __vectorcall expand_key(KeySchedule rkeys, const unsigned char key[IPCRYPT_KEYBYTES])
+static void __vectorcall
+expand_key(KeySchedule rkeys, const unsigned char key[IPCRYPT_KEYBYTES])
{
BlockVec t, s;
size_t i = 0;
s = AES_KEYGEN(t, RC); \
t = XOR128(t, BYTESHL128(t, 4)); \
t = XOR128(t, BYTESHL128(t, 8)); \
- t = XOR128(t, SHUFFLE32x4(s, 3, 3, 3, 3));
+ t = XOR128(t, SHUFFLE32x4_3333(s));
// Load the initial 128-bit key from memory.
t = LOAD128(key);
static void
ipcrypt_pfx_set_bit(uint8_t ip16[16], const unsigned int bit_index, const uint8_t bit_value)
{
- if (bit_value) {
- ip16[15 - bit_index / 8] |= (1 << (bit_index % 8));
- } else {
- ip16[15 - bit_index / 8] &= ~(1 << (bit_index % 8));
- }
+ const size_t byte_index = 15 - bit_index / 8;
+ const uint8_t bit_mask = (uint8_t) (1 << (bit_index % 8));
+ uint8_t mask = (uint8_t) -((bit_value & 1));
+
+#if defined(__GNUC__) || defined(__clang__)
+ __asm__ __volatile__("" : "+r"(mask) :);
+#endif
+ ip16[byte_index] = (ip16[byte_index] & ~bit_mask) | (bit_mask & mask);
}
static void