"fldz\n\t"
"fldz\n\t"
"finit\n");
+#ifndef VGP_amd64_darwin
asm __volatile__("movups " VG_SYM(vecZ) ", %xmm0");
asm __volatile__("movups " VG_SYM(vecZ) ", %xmm1");
asm __volatile__("movups " VG_SYM(vecZ) ", %xmm2");
asm __volatile__("movups " VG_SYM(vecZ) ", %xmm13");
asm __volatile__("movups " VG_SYM(vecZ) ", %xmm14");
asm __volatile__("movups " VG_SYM(vecZ) ", %xmm15");
+#else
+ asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm0");
+ asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm1");
+ asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm2");
+ asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm3");
+ asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm4");
+ asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm5");
+ asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm6");
+ asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm7");
+ asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm8");
+ asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm9");
+ asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm10");
+ asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm11");
+ asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm12");
+ asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm13");
+ asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm14");
+ asm __volatile__("movups " VG_SYM(vecZ) "(%rip), %xmm15");
+#endif
asm __volatile__(
"pushq $0\n\t"
"ldmxcsr 0(%rsp)\n\t"
"\tpush %rcx\n"
"\tpush %rdx\n"
"\txor %rax, %rax\n" // get eflags in a known state
+#ifndef VGP_amd64_darwin
"\tmov " VG_SYM(rax) ",%rax\n"
"\tmov " VG_SYM(rbx) ",%rbx\n"
"\tmov " VG_SYM(rcx) ",%rcx\n"
"\tmov " VG_SYM(rdx) ",%rdx\n"
+#else
+ "\tmov " VG_SYM(rax) "(%rip),%rax\n"
+ "\tmov " VG_SYM(rbx) "(%rip),%rbx\n"
+ "\tmov " VG_SYM(rcx) "(%rip),%rcx\n"
+ "\tmov " VG_SYM(rdx) "(%rip),%rdx\n"
+#endif
"\tcmpxchg %bl,%cl \n"
+#ifndef VGP_amd64_darwin
"\tmov %rax," VG_SYM(rax_out) "\n"
"\tmov %rbx," VG_SYM(rbx_out) "\n"
"\tmov %rcx," VG_SYM(rcx_out) "\n"
+#else
+ "\tmov %rax," VG_SYM(rax_out) "(%rip)\n"
+ "\tmov %rbx," VG_SYM(rbx_out) "(%rip)\n"
+ "\tmov %rcx," VG_SYM(rcx_out) "(%rip)\n"
+#endif
"\tpop %rdx\n"
"\tpop %rcx\n"
"\tpop %rbx\n"
"\tpush %rcx\n"
"\tpush %rdx\n"
"\txor %rax, %rax\n" // get eflags in a known state
+#ifndef VGP_amd64_darwin
"\tmov " VG_SYM(rax) ",%rax\n"
"\tmov " VG_SYM(rbx) ",%rbx\n"
"\tmov " VG_SYM(rcx) ",%rcx\n"
"\tmov " VG_SYM(rdx) ",%rdx\n"
+#else
+ "\tmov " VG_SYM(rax) "(%rip),%rax\n"
+ "\tmov " VG_SYM(rbx) "(%rip),%rbx\n"
+ "\tmov " VG_SYM(rcx) "(%rip),%rcx\n"
+ "\tmov " VG_SYM(rdx) "(%rip),%rdx\n"
+#endif
"\tcmpxchg %bl,%cl \n"
+#ifndef VGP_amd64_darwin
"\tmov %rax," VG_SYM(rax_out) "\n"
"\tmov %rbx," VG_SYM(rbx_out) "\n"
"\tmov %rcx," VG_SYM(rcx_out) "\n"
+#else
+ "\tmov %rax," VG_SYM(rax_out) "(%rip)\n"
+ "\tmov %rbx," VG_SYM(rbx_out) "(%rip)\n"
+ "\tmov %rcx," VG_SYM(rcx_out) "(%rip)\n"
+#endif
"\tpop %rdx\n"
"\tpop %rcx\n"
"\tpop %rbx\n"
"\tpush %rcx\n"
"\tpush %rdx\n"
"\txor %rax, %rax\n" // get eflags in a known state
+#ifndef VGP_amd64_darwin
"\tmov " VG_SYM(rax) ",%rax\n"
"\tmov " VG_SYM(rbx) ",%rbx\n"
"\tmov " VG_SYM(rcx) ",%rcx\n"
"\tmov " VG_SYM(rdx) ",%rdx\n"
+#else
+ "\tmov " VG_SYM(rax) "(%rip),%rax\n"
+ "\tmov " VG_SYM(rbx) "(%rip),%rbx\n"
+ "\tmov " VG_SYM(rcx) "(%rip),%rcx\n"
+ "\tmov " VG_SYM(rdx) "(%rip),%rdx\n"
+#endif
"\tcmpxchg %bx,%cx \n"
+#ifndef VGP_amd64_darwin
"\tmov %rax," VG_SYM(rax_out) "\n"
"\tmov %rbx," VG_SYM(rbx_out) "\n"
"\tmov %rcx," VG_SYM(rcx_out) "\n"
+#else
+ "\tmov %rax," VG_SYM(rax_out) "(%rip)\n"
+ "\tmov %rbx," VG_SYM(rbx_out) "(%rip)\n"
+ "\tmov %rcx," VG_SYM(rcx_out) "(%rip)\n"
+#endif
"\tpop %rdx\n"
"\tpop %rcx\n"
"\tpop %rbx\n"
"\tpush %rcx\n"
"\tpush %rdx\n"
"\txor %rax, %rax\n" // get eflags in a known state
+#ifndef VGP_amd64_darwin
"\tmov " VG_SYM(rax) ",%rax\n"
"\tmov " VG_SYM(rbx) ",%rbx\n"
"\tmov " VG_SYM(rcx) ",%rcx\n"
"\tmov " VG_SYM(rdx) ",%rdx\n"
+#else
+ "\tmov " VG_SYM(rax) "(%rip),%rax\n"
+ "\tmov " VG_SYM(rbx) "(%rip),%rbx\n"
+ "\tmov " VG_SYM(rcx) "(%rip),%rcx\n"
+ "\tmov " VG_SYM(rdx) "(%rip),%rdx\n"
+#endif
"\tcmpxchg %bx,%cx \n"
+#ifndef VGP_amd64_darwin
"\tmov %rax," VG_SYM(rax_out) "\n"
"\tmov %rbx," VG_SYM(rbx_out) "\n"
"\tmov %rcx," VG_SYM(rcx_out) "\n"
+#else
+ "\tmov %rax," VG_SYM(rax_out) "(%rip)\n"
+ "\tmov %rbx," VG_SYM(rbx_out) "(%rip)\n"
+ "\tmov %rcx," VG_SYM(rcx_out) "(%rip)\n"
+#endif
"\tpop %rdx\n"
"\tpop %rcx\n"
"\tpop %rbx\n"
"\tpush %rcx\n"
"\tpush %rdx\n"
"\txor %rax, %rax\n" // get eflags in a known state
+#ifndef VGP_amd64_darwin
"\tmov " VG_SYM(rax) ",%rax\n"
"\tmov " VG_SYM(rbx) ",%rbx\n"
"\tmov " VG_SYM(rcx) ",%rcx\n"
"\tmov " VG_SYM(rdx) ",%rdx\n"
+#else
+ "\tmov " VG_SYM(rax) "(%rip),%rax\n"
+ "\tmov " VG_SYM(rbx) "(%rip),%rbx\n"
+ "\tmov " VG_SYM(rcx) "(%rip),%rcx\n"
+ "\tmov " VG_SYM(rdx) "(%rip),%rdx\n"
+#endif
"\tcmpxchg %ebx,%ecx \n"
+#ifndef VGP_amd64_darwin
"\tmov %rax," VG_SYM(rax_out) "\n"
"\tmov %rbx," VG_SYM(rbx_out) "\n"
"\tmov %rcx," VG_SYM(rcx_out) "\n"
+#else
+ "\tmov %rax," VG_SYM(rax_out) "(%rip)\n"
+ "\tmov %rbx," VG_SYM(rbx_out) "(%rip)\n"
+ "\tmov %rcx," VG_SYM(rcx_out) "(%rip)\n"
+#endif
"\tpop %rdx\n"
"\tpop %rcx\n"
"\tpop %rbx\n"
"\tpush %rcx\n"
"\tpush %rdx\n"
"\txor %rax, %rax\n" // get eflags in a known state
+#ifndef VGP_amd64_darwin
"\tmov " VG_SYM(rax) ",%rax\n"
"\tmov " VG_SYM(rbx) ",%rbx\n"
"\tmov " VG_SYM(rcx) ",%rcx\n"
"\tmov " VG_SYM(rdx) ",%rdx\n"
+#else
+ "\tmov " VG_SYM(rax) "(%rip),%rax\n"
+ "\tmov " VG_SYM(rbx) "(%rip),%rbx\n"
+ "\tmov " VG_SYM(rcx) "(%rip),%rcx\n"
+ "\tmov " VG_SYM(rdx) "(%rip),%rdx\n"
+#endif
"\tcmpxchg %ebx,%ecx \n"
+#ifndef VGP_amd64_darwin
"\tmov %rax," VG_SYM(rax_out) "\n"
"\tmov %rbx," VG_SYM(rbx_out) "\n"
"\tmov %rcx," VG_SYM(rcx_out) "\n"
+#else
+ "\tmov %rax," VG_SYM(rax_out) "(%rip)\n"
+ "\tmov %rbx," VG_SYM(rbx_out) "(%rip)\n"
+ "\tmov %rcx," VG_SYM(rcx_out) "(%rip)\n"
+#endif
"\tpop %rdx\n"
"\tpop %rcx\n"
"\tpop %rbx\n"
"\tpush %rcx\n"
"\tpush %rdx\n"
"\txor %rax, %rax\n" // get eflags in a known state
+#ifndef VGP_amd64_darwin
"\tmov " VG_SYM(rax) ",%rax\n"
"\tmov " VG_SYM(rbx) ",%rbx\n"
"\tmov " VG_SYM(rcx) ",%rcx\n"
"\tmov " VG_SYM(rdx) ",%rdx\n"
+#else
+ "\tmov " VG_SYM(rax) "(%rip),%rax\n"
+ "\tmov " VG_SYM(rbx) "(%rip),%rbx\n"
+ "\tmov " VG_SYM(rcx) "(%rip),%rcx\n"
+ "\tmov " VG_SYM(rdx) "(%rip),%rdx\n"
+#endif
"\tcmpxchg %rbx,%rcx \n"
+#ifndef VGP_amd64_darwin
"\tmov %rax," VG_SYM(rax_out) "\n"
"\tmov %rbx," VG_SYM(rbx_out) "\n"
"\tmov %rcx," VG_SYM(rcx_out) "\n"
+#else
+ "\tmov %rax," VG_SYM(rax_out) "(%rip)\n"
+ "\tmov %rbx," VG_SYM(rbx_out) "(%rip)\n"
+ "\tmov %rcx," VG_SYM(rcx_out) "(%rip)\n"
+#endif
"\tpop %rdx\n"
"\tpop %rcx\n"
"\tpop %rbx\n"
"\tpush %rcx\n"
"\tpush %rdx\n"
"\txor %rax, %rax\n" // get eflags in a known state
+#ifndef VGP_amd64_darwin
"\tmov " VG_SYM(rax) ",%rax\n"
"\tmov " VG_SYM(rbx) ",%rbx\n"
"\tmov " VG_SYM(rcx) ",%rcx\n"
"\tmov " VG_SYM(rdx) ",%rdx\n"
+#else
+ "\tmov " VG_SYM(rax) "(%rip),%rax\n"
+ "\tmov " VG_SYM(rbx) "(%rip),%rbx\n"
+ "\tmov " VG_SYM(rcx) "(%rip),%rcx\n"
+ "\tmov " VG_SYM(rdx) "(%rip),%rdx\n"
+#endif
"\tcmpxchg %rbx,%rcx \n"
+#ifndef VGP_amd64_darwin
"\tmov %rax," VG_SYM(rax_out) "\n"
"\tmov %rbx," VG_SYM(rbx_out) "\n"
"\tmov %rcx," VG_SYM(rcx_out) "\n"
+#else
+ "\tmov %rax," VG_SYM(rax_out) "(%rip)\n"
+ "\tmov %rbx," VG_SYM(rbx_out) "(%rip)\n"
+ "\tmov %rcx," VG_SYM(rcx_out) "(%rip)\n"
+#endif
"\tpop %rdx\n"
"\tpop %rcx\n"
"\tpop %rbx\n"
asm("\n"
VG_SYM(sbb_ib_al) ":\n"
+#ifndef VGP_amd64_darwin
"\tmovb " VG_SYM(in_b) ", %al\n"
+#else
+"\tmovb " VG_SYM(in_b) "(%rip), %al\n"
+#endif
+
"\tclc\n"
"\tsbbb $5, %al\n"
+#ifndef VGP_amd64_darwin
"\tmovb %al, " VG_SYM(out_b1) "\n"
"\tmovb " VG_SYM(in_b) ", %al\n"
+#else
+"\tmovb %al, " VG_SYM(out_b1) "(%rip)\n"
+
+"\tmovb " VG_SYM(in_b) "(%rip), %al\n"
+#endif
"\tstc\n"
"\tsbbb $5, %al\n"
+#ifndef VGP_amd64_darwin
"\tmovb %al, " VG_SYM(out_b2) "\n"
+#else
+"\tmovb %al," VG_SYM(out_b2) "(%rip) \n"
+#endif
"\tretq\n"
);
asm("\n"
VG_SYM(sbb_iw_ax) ":\n"
+#ifndef VGP_amd64_darwin
"\tmovw " VG_SYM(in_w) ", %ax\n"
+#else
+"\tmovw " VG_SYM(in_w) "(%rip), %ax\n"
+#endif
"\tclc\n"
"\tsbbw $555, %ax\n"
+#ifndef VGP_amd64_darwin
"\tmovw %ax, " VG_SYM(out_w1) "\n"
"\tmovw " VG_SYM(in_w) ", %ax\n"
+#else
+"\tmovw %ax, " VG_SYM(out_w1) "(%rip)\n"
+
+"\tmovw " VG_SYM(in_w) "(%rip), %ax\n"
+#endif
"\tstc\n"
"\tsbbw $555, %ax\n"
+#ifndef VGP_amd64_darwin
"\tmovw %ax, " VG_SYM(out_w2) "\n"
+#else
+"\tmovw %ax, " VG_SYM(out_w2) "(%rip)\n"
+#endif
"\tretq\n"
);
asm("\n"
VG_SYM(sbb_il_eax) ":\n"
+#ifndef VGP_amd64_darwin
"\tmovl " VG_SYM(in_l) ", %eax\n"
+#else
+"\tmovl " VG_SYM(in_l) "(%rip), %eax\n"
+#endif
"\tclc\n"
"\tsbbl $555666, %eax\n"
+#ifndef VGP_amd64_darwin
"\tmovl %eax, " VG_SYM(out_l1) "\n"
"\tmovl " VG_SYM(in_l) ", %eax\n"
+#else
+"\tmovl %eax, " VG_SYM(out_l1) "(%rip)\n"
+
+"\tmovl " VG_SYM(in_l) "(%rip), %eax\n"
+#endif
"\tstc\n"
"\tsbbl $555666, %eax\n"
+#ifndef VGP_amd64_darwin
"\tmovl %eax, " VG_SYM(out_l2) "\n"
+#else
+"\tmovl %eax, " VG_SYM(out_l2) "(%rip)\n"
+#endif
"\tretq\n"
);
asm("\n"
VG_SYM(sbb_eb_gb) ":\n"
+#ifndef VGP_amd64_darwin
"\tmovb " VG_SYM(in_b) ", %al\n"
+#else
+"\tmovb " VG_SYM(in_b) "(%rip), %al\n"
+#endif
"\tclc\n"
+#ifndef VGP_amd64_darwin
"\tsbbb " VG_SYM(in_b2) ", %al\n"
"\tmovb %al, " VG_SYM(out_b1) "\n"
"\tmovb " VG_SYM(in_b) ", %al\n"
+#else
+"\tsbbb " VG_SYM(in_b2) "(%rip), %al\n"
+"\tmovb %al, " VG_SYM(out_b1) "(%rip)\n"
+
+"\tmovb " VG_SYM(in_b) "(%rip), %al\n"
+#endif
"\tstc\n"
+#ifndef VGP_amd64_darwin
"\tsbbb " VG_SYM(in_b2) ", %al\n"
"\tmovb %al, " VG_SYM(out_b2) "\n"
+#else
+"\tsbbb " VG_SYM(in_b2) "(%rip), %al\n"
+"\tmovb %al, " VG_SYM(out_b2) "(%rip)\n"
+#endif
"\tretq\n"
);
VG_SYM(sbb_eb_gb_2) ":\n"
"\tpushq %rcx\n"
+#ifndef VGP_amd64_darwin
"\tmovb " VG_SYM(in_b) ", %cl\n"
"\tmovb " VG_SYM(in_b2) ", %dh\n"
+#else
+"\tmovb " VG_SYM(in_b) "(%rip), %cl\n"
+"\tmovb " VG_SYM(in_b2) "(%rip), %dh\n"
+#endif
"\tclc\n"
"\tsbbb %dh,%cl\n"
+#ifndef VGP_amd64_darwin
"\tmovb %cl, " VG_SYM(out_b1) "\n"
"\tmovb " VG_SYM(in_b) ", %cl\n"
"\tmovb " VG_SYM(in_b2) ", %dh\n"
+#else
+"\tmovb %cl, " VG_SYM(out_b1) "(%rip)\n"
+
+"\tmovb " VG_SYM(in_b) "(%rip), %cl\n"
+"\tmovb " VG_SYM(in_b2) "(%rip), %dh\n"
+#endif
"\tstc\n"
"\tsbbb %dh,%cl\n"
+#ifndef VGP_amd64_darwin
"\tmovb %cl, " VG_SYM(out_b2) "\n"
+#else
+"\tmovb %cl, " VG_SYM(out_b2) "(%rip)\n"
+#endif
"\tpopq %rcx\n"
"\tretq\n"
asm("\n"
VG_SYM(adc_eb_gb) ":\n"
+#ifndef VGP_amd64_darwin
"\tmovb " VG_SYM(in_b) ", %al\n"
+#else
+"\tmovb " VG_SYM(in_b) "(%rip), %al\n"
+#endif
"\tclc\n"
+#ifndef VGP_amd64_darwin
"\tadcb " VG_SYM(in_b2) ", %al\n"
"\tmovb %al, " VG_SYM(out_b1) "\n"
"\tmovb " VG_SYM(in_b) ", %al\n"
+#else
+"\tadcb " VG_SYM(in_b2) "(%rip), %al\n"
+"\tmovb %al, " VG_SYM(out_b1) "(%rip)\n"
+
+"\tmovb " VG_SYM(in_b) "(%rip), %al\n"
+#endif
"\tstc\n"
+#ifndef VGP_amd64_darwin
"\tadcb " VG_SYM(in_b2) ", %al\n"
"\tmovb %al, " VG_SYM(out_b2) "\n"
+#else
+"\tadcb " VG_SYM(in_b2) "(%rip), %al\n"
+"\tmovb %al, " VG_SYM(out_b2) "(%rip)\n"
+#endif
"\tretq\n"
);
VG_SYM(adc_eb_gb_2) ":\n"
"\tpushq %rcx\n"
+#ifndef VGP_amd64_darwin
"\tmovb " VG_SYM(in_b) ", %cl\n"
"\tmovb " VG_SYM(in_b2) ", %dh\n"
+#else
+"\tmovb " VG_SYM(in_b) "(%rip), %cl\n"
+"\tmovb " VG_SYM(in_b2) "(%rip), %dh\n"
+#endif
"\tclc\n"
"\tadcb %dh,%cl\n"
+#ifndef VGP_amd64_darwin
"\tmovb %cl, " VG_SYM(out_b1) "\n"
"\tmovb " VG_SYM(in_b) ", %cl\n"
"\tmovb " VG_SYM(in_b2) ", %dh\n"
+#else
+"\tmovb %cl, " VG_SYM(out_b1) "(%rip)\n"
+
+"\tmovb " VG_SYM(in_b) "(%rip), %cl\n"
+"\tmovb " VG_SYM(in_b2) "(%rip), %dh\n"
+#endif
"\tstc\n"
"\tadcb %dh,%cl\n"
+#ifndef VGP_amd64_darwin
"\tmovb %cl, " VG_SYM(out_b2) "\n"
+#else
+"\tmovb %cl, " VG_SYM(out_b2) "(%rip)\n"
+#endif
"\tpopq %rcx\n"
"\tretq\n"
asm("\n"
VG_SYM(adc_ib_al) ":\n"
+#ifndef VGP_amd64_darwin
"\tmovb " VG_SYM(in_b) ", %al\n"
+#else
+"\tmovb " VG_SYM(in_b) "(%rip), %al\n"
+#endif
"\tclc\n"
"\tadcb $5, %al\n"
+#ifndef VGP_amd64_darwin
"\tmovb %al, " VG_SYM(out_b1) "\n"
"\tmovb " VG_SYM(in_b) ", %al\n"
+#else
+"\tmovb %al, " VG_SYM(out_b1) "(%rip)\n"
+
+"\tmovb " VG_SYM(in_b) "(%rip), %al\n"
+#endif
"\tstc\n"
"\tadcb $5, %al\n"
+#ifndef VGP_amd64_darwin
"\tmovb %al, " VG_SYM(out_b2) "\n"
+#else
+"\tmovb %al, " VG_SYM(out_b2) "(%rip)\n"
+#endif
"\tretq\n"
);
asm("\n"
VG_SYM(adc_iw_ax) ":\n"
+#ifndef VGP_amd64_darwin
"\tmovw " VG_SYM(in_w) ", %ax\n"
+#else
+"\tmovw " VG_SYM(in_w) "(%rip), %ax\n"
+#endif
"\tclc\n"
"\tadcw $555, %ax\n"
+#ifndef VGP_amd64_darwin
"\tmovw %ax, " VG_SYM(out_w1) "\n"
"\tmovw " VG_SYM(in_w) ", %ax\n"
+#else
+"\tmovw %ax, " VG_SYM(out_w1) "(%rip)\n"
+
+"\tmovw " VG_SYM(in_w) "(%rip), %ax\n"
+#endif
"\tstc\n"
"\tadcw $555, %ax\n"
+#ifndef VGP_amd64_darwin
"\tmovw %ax, " VG_SYM(out_w2) "\n"
+#else
+"\tmovw %ax, " VG_SYM(out_w2) "(%rip)\n"
+#endif
"\tretq\n"
);
asm("\n"
VG_SYM(adc_il_eax) ":\n"
+#ifndef VGP_amd64_darwin
"\tmovl " VG_SYM(in_l) ", %eax\n"
+#else
+"\tmovl " VG_SYM(in_l) "(%rip), %eax\n"
+#endif
"\tclc\n"
"\tadcl $555666, %eax\n"
+#ifndef VGP_amd64_darwin
"\tmovl %eax, " VG_SYM(out_l1) "\n"
"\tmovl " VG_SYM(in_l) ", %eax\n"
+#else
+"\tmovl %eax, " VG_SYM(out_l1) "(%rip)\n"
+
+"\tmovl " VG_SYM(in_l) "(%rip), %eax\n"
+#endif
"\tstc\n"
"\tadcl $555666, %eax\n"
+#ifndef VGP_amd64_darwin
"\tmovl %eax, " VG_SYM(out_l2) "\n"
+#else
+"\tmovl %eax, " VG_SYM(out_l2) "(%rip)\n"
+#endif
"\tretq\n"
);
// rmme when reintegrated
// Allocates a 16-aligned block. Asserts if the allocation fails.
+#ifdef VGO_darwin
+#include <stdlib.h>
+#else
#include <malloc.h>
+#endif
__attribute__((unused))
static void* memalign16(size_t szB)
{
"movq %0, %%rax" "\n\t"
"movq 0(%%rax), %%rdi" "\n\t"
"movq 8(%%rax), %%r11" "\n\t"
+#ifndef VGP_amd64_darwin
"popcntq %%rdi, %%r11" "\n\t"
+#else
+ "popcnt %%rdi, %%r11" "\n\t"
+#endif
"movq %%r11, 16(%%rax)" "\n\t"
"pushfq" "\n\t"
"popq %%r12" "\n\t"
__asm__ __volatile__(
"movq %0, %%rax" "\n\t"
"movq 8(%%rax), %%r11" "\n\t"
+#ifndef VGP_amd64_darwin
"popcntq 0(%%rax), %%r11" "\n\t"
+#else
+ "popcnt 0(%%rax), %%r11" "\n\t"
+#endif
"movq %%r11, 16(%%rax)" "\n\t"
"pushfq" "\n\t"
"popq %%r12" "\n\t"
"movq %0, %%rax" "\n\t"
"movq 0(%%rax), %%rdi" "\n\t"
"movq 8(%%rax), %%r11" "\n\t"
+#ifndef VGP_amd64_darwin
"popcntl %%edi, %%r11d" "\n\t"
+#else
+ "popcnt %%edi, %%r11d" "\n\t"
+#endif
"movq %%r11, 16(%%rax)" "\n\t"
"pushfq" "\n\t"
"popq %%r12" "\n\t"
__asm__ __volatile__(
"movq %0, %%rax" "\n\t"
"movq 8(%%rax), %%r11" "\n\t"
+#ifndef VGP_amd64_darwin
"popcntl 0(%%rax), %%r11d" "\n\t"
+#else
+ "popcnt 0(%%rax), %%r11d" "\n\t"
+#endif
"movq %%r11, 16(%%rax)" "\n\t"
"pushfq" "\n\t"
"popq %%r12" "\n\t"
"movq %0, %%rax" "\n\t"
"movq 0(%%rax), %%rdi" "\n\t"
"movq 8(%%rax), %%r11" "\n\t"
+#ifndef VGP_amd64_darwin
"popcntw %%di, %%r11w" "\n\t"
+#else
+ "popcnt %%di, %%r11w" "\n\t"
+#endif
"movq %%r11, 16(%%rax)" "\n\t"
"pushfq" "\n\t"
"popq %%r12" "\n\t"
__asm__ __volatile__(
"movq %0, %%rax" "\n\t"
"movq 8(%%rax), %%r11" "\n\t"
+#ifndef VGP_amd64_darwin
"popcntw 0(%%rax), %%r11w" "\n\t"
+#else
+ "popcnt 0(%%rax), %%r11w" "\n\t"
+#endif
"movq %%r11, 16(%%rax)" "\n\t"
"pushfq" "\n\t"
"popq %%r12" "\n\t"