From 00ed5424b1d4dcccfa187f55205521826794898c Mon Sep 17 00:00:00 2001 From: Haochen Jiang Date: Wed, 29 May 2024 11:13:55 +0800 Subject: [PATCH] Adjust generic loop alignment from 16:11:8 to 16 for Intel processors Previously, we use 16:11:8 in generic tune for Intel processors, which lead to cross cache line issue and result in some random performance penalty in benchmarks with small loops commit to commit. After changing to always aligning to 16 bytes, it will somehow solve the issue. gcc/ChangeLog: * config/i386/x86-tune-costs.h (generic_cost): Change from 16:11:8 to 16. --- gcc/config/i386/x86-tune-costs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gcc/config/i386/x86-tune-costs.h b/gcc/config/i386/x86-tune-costs.h index 65d7d1f7e429..d3aaaa4b5cc2 100644 --- a/gcc/config/i386/x86-tune-costs.h +++ b/gcc/config/i386/x86-tune-costs.h @@ -3758,7 +3758,7 @@ struct processor_costs generic_cost = { generic_memset, COSTS_N_INSNS (4), /* cond_taken_branch_cost. */ COSTS_N_INSNS (2), /* cond_not_taken_branch_cost. */ - "16:11:8", /* Loop alignment. */ + "16", /* Loop alignment. */ "16:11:8", /* Jump alignment. */ "0:0:8", /* Label alignment. */ "16", /* Func alignment. */ -- 2.47.3