]> git.ipfire.org Git - thirdparty/glibc.git/blob - sysdeps/x86_64/multiarch/init-arch.c
Optimized st{r,p}{,n}cpy for SSE2/SSSE3 on x86-32
[thirdparty/glibc.git] / sysdeps / x86_64 / multiarch / init-arch.c
1 /* Initialize CPU feature data.
2 This file is part of the GNU C Library.
3 Copyright (C) 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
4 Contributed by Ulrich Drepper <drepper@redhat.com>.
5
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
10
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, write to the Free
18 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
19 02111-1307 USA. */
20
21 #include <atomic.h>
22 #include <cpuid.h>
23 #include "init-arch.h"
24
25
26 struct cpu_features __cpu_features attribute_hidden;
27
28
29 static void
30 get_common_indeces (unsigned int *family, unsigned int *model)
31 {
32 __cpuid (1, __cpu_features.cpuid[COMMON_CPUID_INDEX_1].eax,
33 __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ebx,
34 __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx,
35 __cpu_features.cpuid[COMMON_CPUID_INDEX_1].edx);
36
37 unsigned int eax = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].eax;
38 *family = (eax >> 8) & 0x0f;
39 *model = (eax >> 4) & 0x0f;
40 }
41
42
43 void
44 __init_cpu_features (void)
45 {
46 unsigned int ebx;
47 unsigned int ecx;
48 unsigned int edx;
49 unsigned int family = 0;
50 unsigned int model = 0;
51 enum cpu_features_kind kind;
52
53 __cpuid (0, __cpu_features.max_cpuid, ebx, ecx, edx);
54
55 /* This spells out "GenuineIntel". */
56 if (ebx == 0x756e6547 && ecx == 0x6c65746e && edx == 0x49656e69)
57 {
58 kind = arch_kind_intel;
59
60 get_common_indeces (&family, &model);
61
62 /* Intel processors prefer SSE instruction for memory/string
63 routines if they are available. */
64 __cpu_features.feature[index_Prefer_SSE_for_memop]
65 |= bit_Prefer_SSE_for_memop;
66
67 unsigned int eax = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].eax;
68 unsigned int extended_family = (eax >> 20) & 0xff;
69 unsigned int extended_model = (eax >> 12) & 0xf0;
70 if (family == 0x0f)
71 {
72 family += extended_family;
73 model += extended_model;
74 }
75 else if (family == 0x06)
76 {
77 ecx = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx;
78 model += extended_model;
79 switch (model)
80 {
81 case 0x1c:
82 case 0x26:
83 /* BSF is slow on Atom. */
84 __cpu_features.feature[index_Slow_BSF] |= bit_Slow_BSF;
85 break;
86
87 default:
88 /* Unknown family 0x06 processors. Assuming this is one
89 of Core i3/i5/i7 processors if AVX is available. */
90 if ((ecx & bit_AVX) == 0)
91 break;
92
93 case 0x1a:
94 case 0x1e:
95 case 0x1f:
96 case 0x25:
97 case 0x2c:
98 case 0x2e:
99 case 0x2f:
100 /* Rep string instructions, copy backward and unaligned loads
101 are fast on Intel Core i3, i5 and i7. */
102 #if index_Fast_Rep_String != index_Fast_Copy_Backward
103 # error index_Fast_Rep_String != index_Fast_Copy_Backward
104 #endif
105 #if index_Fast_Rep_String != index_Fast_Unaligned_Load
106 # error index_Fast_Rep_String != index_Fast_Unaligned_Load
107 #endif
108 __cpu_features.feature[index_Fast_Rep_String]
109 |= (bit_Fast_Rep_String
110 | bit_Fast_Copy_Backward
111 | bit_Fast_Unaligned_Load);
112 break;
113 }
114 }
115 }
116 /* This spells out "AuthenticAMD". */
117 else if (ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65)
118 {
119 kind = arch_kind_amd;
120
121 get_common_indeces (&family, &model);
122
123 unsigned int ecx = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx;
124
125 /* AMD processors prefer SSE instructions for memory/string routines
126 if they are available, otherwise they prefer integer instructions. */
127 if ((ecx & 0x200))
128 __cpu_features.feature[index_Prefer_SSE_for_memop]
129 |= bit_Prefer_SSE_for_memop;
130 }
131 else
132 kind = arch_kind_other;
133
134 __cpu_features.family = family;
135 __cpu_features.model = model;
136 atomic_write_barrier ();
137 __cpu_features.kind = kind;
138 }
139
140 #undef __get_cpu_features
141
142 const struct cpu_features *
143 __get_cpu_features (void)
144 {
145 if (__cpu_features.kind == arch_kind_unknown)
146 __init_cpu_features ();
147
148 return &__cpu_features;
149 }