dtes64 monitor ds_cpl vmx smx est tm2 ssse3 cx16
xtpr pdcm sse4_1 sse4_2 popcnt aes lahf_lm ida
arat tpr_shadow vnmi flexpriority ept vpid
- MINUS aes (see below)
bogomips : 6957.57
clflush size : 64
cache_alignment : 64
SET_ABCD(0x0000000b, 0x756e6547, 0x6c65746e, 0x49656e69);
break;
case 0x00000001:
- // & ~(1<<25): don't claim to support AES insns. See
- // bug 249991.
- SET_ABCD(0x00020652, 0x00100800, 0x0298e3ff & ~(1<<25),
- 0xbfebfbff);
+ SET_ABCD(0x00020652, 0x00100800, 0x0298e3ff, 0xbfebfbff);
break;
case 0x00000002:
SET_ABCD(0x55035a01, 0x00f0b2e3, 0x00000000, 0x09ca212c);
}
}
+/*---------------------------------------------------------------*/
+/*--- AES primitives and helpers ---*/
+/*---------------------------------------------------------------*/
+/* a 16 x 16 matrix */
+static const UChar sbox[256] = { // row nr
+ 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, // 1
+ 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76,
+ 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, // 2
+ 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0,
+ 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc, // 3
+ 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15,
+ 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a, // 4
+ 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75,
+ 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0, // 5
+ 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84,
+ 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b, // 6
+ 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf,
+ 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, // 7
+ 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8,
+ 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5, // 8
+ 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2,
+ 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17, // 9
+ 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73,
+ 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88, //10
+ 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb,
+ 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c, //11
+ 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79,
+ 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9, //12
+ 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08,
+ 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, //13
+ 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a,
+ 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e, //14
+ 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e,
+ 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, //15
+ 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf,
+ 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, //16
+ 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
+};
+static void SubBytes (V128* v)
+{
+ V128 r;
+ UInt i;
+ for (i = 0; i < 16; i++)
+ r.w8[i] = sbox[v->w8[i]];
+ *v = r;
+}
+
+/* a 16 x 16 matrix */
+static const UChar invsbox[256] = { // row nr
+ 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38, // 1
+ 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb,
+ 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87, // 2
+ 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb,
+ 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d, // 3
+ 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e,
+ 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2, // 4
+ 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25,
+ 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16, // 5
+ 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92,
+ 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda, // 6
+ 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84,
+ 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a, // 7
+ 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06,
+ 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02, // 8
+ 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b,
+ 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea, // 9
+ 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73,
+ 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85, //10
+ 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e,
+ 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89, //11
+ 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b,
+ 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20, //12
+ 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4,
+ 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31, //13
+ 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f,
+ 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d, //14
+ 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef,
+ 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0, //15
+ 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61,
+ 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26, //16
+ 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
+};
+static void InvSubBytes (V128* v)
+{
+ V128 r;
+ UInt i;
+ for (i = 0; i < 16; i++)
+ r.w8[i] = invsbox[v->w8[i]];
+ *v = r;
+}
+
+static const UChar ShiftRows_op[16] =
+ {11, 6, 1, 12, 7, 2, 13, 8, 3, 14, 9, 4, 15, 10, 5, 0};
+static void ShiftRows (V128* v)
+{
+ V128 r;
+ UInt i;
+ for (i = 0; i < 16; i++)
+ r.w8[i] = v->w8[ShiftRows_op[15-i]];
+ *v = r;
+}
+
+static const UChar InvShiftRows_op[16] =
+ {3, 6, 9, 12, 15, 2, 5, 8, 11, 14, 1, 4, 7, 10, 13, 0};
+static void InvShiftRows (V128* v)
+{
+ V128 r;
+ UInt i;
+ for (i = 0; i < 16; i++)
+ r.w8[i] = v->w8[InvShiftRows_op[15-i]];
+ *v = r;
+}
+
+/* Multiplication of the finite fields elements of AES.
+ See "A Specification for The AES Algorithm Rijndael
+ (by Joan Daemen & Vincent Rijmen)"
+ Dr. Brian Gladman, v3.1, 3rd March 2001. */
+/* N values so that (hex) xy = 0x03^N.
+ 0x00 cannot be used. We put 0xff for this value.*/
+/* a 16 x 16 matrix */
+static const UChar Nxy[256] = { // row nr
+ 0xff, 0x00, 0x19, 0x01, 0x32, 0x02, 0x1a, 0xc6, // 1
+ 0x4b, 0xc7, 0x1b, 0x68, 0x33, 0xee, 0xdf, 0x03,
+ 0x64, 0x04, 0xe0, 0x0e, 0x34, 0x8d, 0x81, 0xef, // 2
+ 0x4c, 0x71, 0x08, 0xc8, 0xf8, 0x69, 0x1c, 0xc1,
+ 0x7d, 0xc2, 0x1d, 0xb5, 0xf9, 0xb9, 0x27, 0x6a, // 3
+ 0x4d, 0xe4, 0xa6, 0x72, 0x9a, 0xc9, 0x09, 0x78,
+ 0x65, 0x2f, 0x8a, 0x05, 0x21, 0x0f, 0xe1, 0x24, // 4
+ 0x12, 0xf0, 0x82, 0x45, 0x35, 0x93, 0xda, 0x8e,
+ 0x96, 0x8f, 0xdb, 0xbd, 0x36, 0xd0, 0xce, 0x94, // 5
+ 0x13, 0x5c, 0xd2, 0xf1, 0x40, 0x46, 0x83, 0x38,
+ 0x66, 0xdd, 0xfd, 0x30, 0xbf, 0x06, 0x8b, 0x62, // 6
+ 0xb3, 0x25, 0xe2, 0x98, 0x22, 0x88, 0x91, 0x10,
+ 0x7e, 0x6e, 0x48, 0xc3, 0xa3, 0xb6, 0x1e, 0x42, // 7
+ 0x3a, 0x6b, 0x28, 0x54, 0xfa, 0x85, 0x3d, 0xba,
+ 0x2b, 0x79, 0x0a, 0x15, 0x9b, 0x9f, 0x5e, 0xca, // 8
+ 0x4e, 0xd4, 0xac, 0xe5, 0xf3, 0x73, 0xa7, 0x57,
+ 0xaf, 0x58, 0xa8, 0x50, 0xf4, 0xea, 0xd6, 0x74, // 9
+ 0x4f, 0xae, 0xe9, 0xd5, 0xe7, 0xe6, 0xad, 0xe8,
+ 0x2c, 0xd7, 0x75, 0x7a, 0xeb, 0x16, 0x0b, 0xf5, //10
+ 0x59, 0xcb, 0x5f, 0xb0, 0x9c, 0xa9, 0x51, 0xa0,
+ 0x7f, 0x0c, 0xf6, 0x6f, 0x17, 0xc4, 0x49, 0xec, //11
+ 0xd8, 0x43, 0x1f, 0x2d, 0xa4, 0x76, 0x7b, 0xb7,
+ 0xcc, 0xbb, 0x3e, 0x5a, 0xfb, 0x60, 0xb1, 0x86, //12
+ 0x3b, 0x52, 0xa1, 0x6c, 0xaa, 0x55, 0x29, 0x9d,
+ 0x97, 0xb2, 0x87, 0x90, 0x61, 0xbe, 0xdc, 0xfc, //13
+ 0xbc, 0x95, 0xcf, 0xcd, 0x37, 0x3f, 0x5b, 0xd1,
+ 0x53, 0x39, 0x84, 0x3c, 0x41, 0xa2, 0x6d, 0x47, //14
+ 0x14, 0x2a, 0x9e, 0x5d, 0x56, 0xf2, 0xd3, 0xab,
+ 0x44, 0x11, 0x92, 0xd9, 0x23, 0x20, 0x2e, 0x89, //15
+ 0xb4, 0x7c, 0xb8, 0x26, 0x77, 0x99, 0xe3, 0xa5,
+ 0x67, 0x4a, 0xed, 0xde, 0xc5, 0x31, 0xfe, 0x18, //16
+ 0x0d, 0x63, 0x8c, 0x80, 0xc0, 0xf7, 0x70, 0x07
+};
+
+/* E values so that E = 0x03^xy. */
+static const UChar Exy[256] = { // row nr
+ 0x01, 0x03, 0x05, 0x0f, 0x11, 0x33, 0x55, 0xff, // 1
+ 0x1a, 0x2e, 0x72, 0x96, 0xa1, 0xf8, 0x13, 0x35,
+ 0x5f, 0xe1, 0x38, 0x48, 0xd8, 0x73, 0x95, 0xa4, // 2
+ 0xf7, 0x02, 0x06, 0x0a, 0x1e, 0x22, 0x66, 0xaa,
+ 0xe5, 0x34, 0x5c, 0xe4, 0x37, 0x59, 0xeb, 0x26, // 3
+ 0x6a, 0xbe, 0xd9, 0x70, 0x90, 0xab, 0xe6, 0x31,
+ 0x53, 0xf5, 0x04, 0x0c, 0x14, 0x3c, 0x44, 0xcc, // 4
+ 0x4f, 0xd1, 0x68, 0xb8, 0xd3, 0x6e, 0xb2, 0xcd,
+ 0x4c, 0xd4, 0x67, 0xa9, 0xe0, 0x3b, 0x4d, 0xd7, // 5
+ 0x62, 0xa6, 0xf1, 0x08, 0x18, 0x28, 0x78, 0x88,
+ 0x83, 0x9e, 0xb9, 0xd0, 0x6b, 0xbd, 0xdc, 0x7f, // 6
+ 0x81, 0x98, 0xb3, 0xce, 0x49, 0xdb, 0x76, 0x9a,
+ 0xb5, 0xc4, 0x57, 0xf9, 0x10, 0x30, 0x50, 0xf0, // 7
+ 0x0b, 0x1d, 0x27, 0x69, 0xbb, 0xd6, 0x61, 0xa3,
+ 0xfe, 0x19, 0x2b, 0x7d, 0x87, 0x92, 0xad, 0xec, // 8
+ 0x2f, 0x71, 0x93, 0xae, 0xe9, 0x20, 0x60, 0xa0,
+ 0xfb, 0x16, 0x3a, 0x4e, 0xd2, 0x6d, 0xb7, 0xc2, // 9
+ 0x5d, 0xe7, 0x32, 0x56, 0xfa, 0x15, 0x3f, 0x41,
+ 0xc3, 0x5e, 0xe2, 0x3d, 0x47, 0xc9, 0x40, 0xc0, //10
+ 0x5b, 0xed, 0x2c, 0x74, 0x9c, 0xbf, 0xda, 0x75,
+ 0x9f, 0xba, 0xd5, 0x64, 0xac, 0xef, 0x2a, 0x7e, //11
+ 0x82, 0x9d, 0xbc, 0xdf, 0x7a, 0x8e, 0x89, 0x80,
+ 0x9b, 0xb6, 0xc1, 0x58, 0xe8, 0x23, 0x65, 0xaf, //12
+ 0xea, 0x25, 0x6f, 0xb1, 0xc8, 0x43, 0xc5, 0x54,
+ 0xfc, 0x1f, 0x21, 0x63, 0xa5, 0xf4, 0x07, 0x09, //13
+ 0x1b, 0x2d, 0x77, 0x99, 0xb0, 0xcb, 0x46, 0xca,
+ 0x45, 0xcf, 0x4a, 0xde, 0x79, 0x8b, 0x86, 0x91, //14
+ 0xa8, 0xe3, 0x3e, 0x42, 0xc6, 0x51, 0xf3, 0x0e,
+ 0x12, 0x36, 0x5a, 0xee, 0x29, 0x7b, 0x8d, 0x8c, //15
+ 0x8f, 0x8a, 0x85, 0x94, 0xa7, 0xf2, 0x0d, 0x17,
+ 0x39, 0x4b, 0xdd, 0x7c, 0x84, 0x97, 0xa2, 0xfd, //16
+ 0x1c, 0x24, 0x6c, 0xb4, 0xc7, 0x52, 0xf6, 0x01};
+
+static inline UChar ff_mul(UChar u1, UChar u2)
+{
+ if ((u1 > 0) && (u2 > 0)) {
+ UInt ui = Nxy[u1] + Nxy[u2];
+ if (ui >= 255)
+ ui = ui - 255;
+ return Exy[ui];
+ } else {
+ return 0;
+ };
+}
+
+static void MixColumns (V128* v)
+{
+ V128 r;
+ Int j;
+#define P(x,row,col) (x)->w8[((row)*4+(col))]
+ for (j = 0; j < 4; j++) {
+ P(&r,j,0) = ff_mul(0x02, P(v,j,0)) ^ ff_mul(0x03, P(v,j,1))
+ ^ P(v,j,2) ^ P(v,j,3);
+ P(&r,j,1) = P(v,j,0) ^ ff_mul( 0x02, P(v,j,1) )
+ ^ ff_mul(0x03, P(v,j,2) ) ^ P(v,j,3);
+ P(&r,j,2) = P(v,j,0) ^ P(v,j,1) ^ ff_mul( 0x02, P(v,j,2) )
+ ^ ff_mul(0x03, P(v,j,3) );
+ P(&r,j,3) = ff_mul(0x03, P(v,j,0) ) ^ P(v,j,1) ^ P(v,j,2)
+ ^ ff_mul( 0x02, P(v,j,3) );
+ }
+ *v = r;
+#undef P
+}
+
+static void InvMixColumns (V128* v)
+{
+ V128 r;
+ Int j;
+#define P(x,row,col) (x)->w8[((row)*4+(col))]
+ for (j = 0; j < 4; j++) {
+ P(&r,j,0) = ff_mul(0x0e, P(v,j,0) ) ^ ff_mul(0x0b, P(v,j,1) )
+ ^ ff_mul(0x0d,P(v,j,2) ) ^ ff_mul(0x09, P(v,j,3) );
+ P(&r,j,1) = ff_mul(0x09, P(v,j,0) ) ^ ff_mul(0x0e, P(v,j,1) )
+ ^ ff_mul(0x0b,P(v,j,2) ) ^ ff_mul(0x0d, P(v,j,3) );
+ P(&r,j,2) = ff_mul(0x0d, P(v,j,0) ) ^ ff_mul(0x09, P(v,j,1) )
+ ^ ff_mul(0x0e,P(v,j,2) ) ^ ff_mul(0x0b, P(v,j,3) );
+ P(&r,j,3) = ff_mul(0x0b, P(v,j,0) ) ^ ff_mul(0x0d, P(v,j,1) )
+ ^ ff_mul(0x09,P(v,j,2) ) ^ ff_mul(0x0e, P(v,j,3) );
+ }
+ *v = r;
+#undef P
+
+}
+
+/* For description, see definition in guest_amd64_defs.h */
+void amd64g_dirtyhelper_AES (
+ VexGuestAMD64State* gst,
+ HWord opc4,
+ HWord gstOffL, HWord gstOffR
+ )
+{
+ // where the args are
+ V128* argL = (V128*)( ((UChar*)gst) + gstOffL );
+ V128* argR = (V128*)( ((UChar*)gst) + gstOffR );
+
+ switch (opc4) {
+ case 0xDC: /* AESENC */
+ case 0xDD: /* AESENCLAST */
+ ShiftRows (argR);
+ SubBytes (argR);
+ if (opc4 == 0xDC)
+ MixColumns (argR);
+ argR->w64[0] = argR->w64[0] ^ argL->w64[0];
+ argR->w64[1] = argR->w64[1] ^ argL->w64[1];
+ break;
+
+ case 0xDE: /* AESDEC */
+ case 0xDF: /* AESDECLAST */
+ InvShiftRows (argR);
+ InvSubBytes (argR);
+ if (opc4 == 0xDE)
+ InvMixColumns (argR);
+ argR->w64[0] = argR->w64[0] ^ argL->w64[0];
+ argR->w64[1] = argR->w64[1] ^ argL->w64[1];
+ break;
+
+ case 0xDB: /* AESIMC */
+ *argR = *argL;
+ InvMixColumns (argR);
+ break;
+ default: vassert(0);
+ }
+}
+
+static inline UInt RotWord (UInt w32)
+{
+ return ((w32 >> 8) | (w32 << 24));
+}
+
+static inline UInt SubWord (UInt w32)
+{
+ UChar *w8;
+ UChar *r8;
+ UInt res;
+ w8 = (UChar*) &w32;
+ r8 = (UChar*) &res;
+ r8[0] = sbox[w8[0]];
+ r8[1] = sbox[w8[1]];
+ r8[2] = sbox[w8[2]];
+ r8[3] = sbox[w8[3]];
+ return res;
+}
+
+/* For description, see definition in guest_amd64_defs.h */
+extern void amd64g_dirtyhelper_AESKEYGENASSIST (
+ VexGuestAMD64State* gst,
+ HWord imm8,
+ HWord gstOffL, HWord gstOffR
+ )
+{
+ // where the args are
+ V128* argL = (V128*)( ((UChar*)gst) + gstOffL );
+ V128* argR = (V128*)( ((UChar*)gst) + gstOffR );
+
+ argR->w32[3] = RotWord (SubWord (argL->w32[3])) ^ imm8;
+ argR->w32[2] = SubWord (argL->w32[3]);
+ argR->w32[1] = RotWord (SubWord (argL->w32[1])) ^ imm8;
+ argR->w32[0] = SubWord (argL->w32[1]);
+}
+
+
/*---------------------------------------------------------------*/
/*--- Helpers for dealing with, and describing, ---*/