]>
Commit | Line | Data |
---|---|---|
af2728a4 JL |
1 | ;; Pentium Pro/PII Scheduling |
2 | ;; Copyright (C) 2002 Free Software Foundation, Inc. | |
3 | ;; | |
4 | ;; This file is part of GNU CC. | |
5 | ;; | |
6 | ;; GNU CC is free software; you can redistribute it and/or modify | |
7 | ;; it under the terms of the GNU General Public License as published by | |
8 | ;; the Free Software Foundation; either version 2, or (at your option) | |
9 | ;; any later version. | |
10 | ;; | |
11 | ;; GNU CC is distributed in the hope that it will be useful, | |
12 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 | ;; GNU General Public License for more details. | |
15 | ;; | |
16 | ;; You should have received a copy of the GNU General Public License | |
17 | ;; along with GNU CC; see the file COPYING. If not, write to | |
18 | ;; the Free Software Foundation, 59 Temple Place - Suite 330, | |
19 | ;; Boston, MA 02111-1307, USA. */ | |
20 | ||
21 | ;; Categorize how many uops an ia32 instruction evaluates to: | |
22 | ;; one -- an instruction with 1 uop can be decoded by any of the | |
23 | ;; three decoders. | |
24 | ;; few -- an instruction with 1 to 4 uops can be decoded only by | |
25 | ;; decoder 0. | |
26 | ;; many -- a complex instruction may take an unspecified number of | |
27 | ;; cycles to decode in decoder 0. | |
28 | ||
29 | (define_attr "ppro_uops" "one,few,many" | |
30 | (cond [(eq_attr "type" "other,multi,call,callv,fpspc,str") | |
31 | (const_string "many") | |
4977bab6 | 32 | (eq_attr "type" "icmov,fcmov,str,cld,leave") |
af2728a4 JL |
33 | (const_string "few") |
34 | (eq_attr "type" "imov") | |
35 | (if_then_else (eq_attr "memory" "store,both") | |
36 | (const_string "few") | |
37 | (const_string "one")) | |
38 | (eq_attr "memory" "!none") | |
39 | (const_string "few") | |
40 | ] | |
41 | (const_string "one"))) | |
42 | ||
43 | ;; | |
44 | ;; The PPro has an out-of-order core, but the instruction decoders are | |
45 | ;; naturally in-order and asymmetric. We get best performance by scheduling | |
46 | ;; for the decoders, for in doing so we give the oo execution unit the | |
47 | ;; most choices. | |
48 | ;; | |
49 | ;; Rough readiness numbers. Fine tuning happens in i386.c. | |
50 | ;; | |
51 | ;; p0 describes port 0. | |
52 | ;; p01 describes ports 0 and 1 as a pair; alu insns can issue to either. | |
53 | ;; p2 describes port 2 for loads. | |
54 | ;; p34 describes ports 3 and 4 for stores. | |
55 | ;; fpu describes the fpu accessed via port 0. | |
56 | ;; ??? It is less than clear if there are separate fadd and fmul units | |
57 | ;; that could operate in parallel. | |
58 | ;; | |
59 | ;; ??? fxch isn't handled; not an issue until sched3 after reg-stack is real. | |
60 | ||
61 | (define_function_unit "ppro_p0" 1 0 | |
62 | (and (eq_attr "cpu" "pentiumpro") | |
1b245ade | 63 | (eq_attr "type" "ishift,rotate,ishift1,rotate1,lea,ibr,cld")) |
af2728a4 JL |
64 | 1 1) |
65 | ||
66 | (define_function_unit "ppro_p0" 1 0 | |
67 | (and (eq_attr "cpu" "pentiumpro") | |
68 | (eq_attr "type" "imul")) | |
69 | 4 1) | |
70 | ||
71 | ;; ??? Does the divider lock out the pipe while it works, | |
72 | ;; or is there a disconnected unit? | |
73 | (define_function_unit "ppro_p0" 1 0 | |
74 | (and (eq_attr "cpu" "pentiumpro") | |
75 | (eq_attr "type" "idiv")) | |
76 | 17 17) | |
77 | ||
78 | (define_function_unit "ppro_p0" 1 0 | |
79 | (and (eq_attr "cpu" "pentiumpro") | |
80 | (eq_attr "type" "fop,fsgn,fistp")) | |
81 | 3 1) | |
82 | ||
83 | (define_function_unit "ppro_p0" 1 0 | |
84 | (and (eq_attr "cpu" "pentiumpro") | |
85 | (eq_attr "type" "fcmov")) | |
86 | 2 1) | |
87 | ||
88 | (define_function_unit "ppro_p0" 1 0 | |
89 | (and (eq_attr "cpu" "pentiumpro") | |
90 | (eq_attr "type" "fcmp")) | |
91 | 1 1) | |
92 | ||
93 | (define_function_unit "ppro_p0" 1 0 | |
94 | (and (eq_attr "cpu" "pentiumpro") | |
95 | (eq_attr "type" "fmov")) | |
96 | 1 1) | |
97 | ||
98 | (define_function_unit "ppro_p0" 1 0 | |
99 | (and (eq_attr "cpu" "pentiumpro") | |
100 | (eq_attr "type" "fmul")) | |
101 | 5 1) | |
102 | ||
103 | (define_function_unit "ppro_p0" 1 0 | |
104 | (and (eq_attr "cpu" "pentiumpro") | |
105 | (eq_attr "type" "fdiv,fpspc")) | |
106 | 56 1) | |
107 | ||
108 | (define_function_unit "ppro_p01" 2 0 | |
109 | (and (eq_attr "cpu" "pentiumpro") | |
110 | (eq_attr "type" "!imov,fmov")) | |
111 | 1 1) | |
112 | ||
113 | (define_function_unit "ppro_p01" 2 0 | |
114 | (and (and (eq_attr "cpu" "pentiumpro") | |
115 | (eq_attr "type" "imov,fmov")) | |
116 | (eq_attr "memory" "none")) | |
117 | 1 1) | |
118 | ||
119 | (define_function_unit "ppro_p2" 1 0 | |
120 | (and (eq_attr "cpu" "pentiumpro") | |
4977bab6 | 121 | (ior (eq_attr "type" "pop,leave") |
af2728a4 JL |
122 | (eq_attr "memory" "load,both"))) |
123 | 3 1) | |
124 | ||
125 | (define_function_unit "ppro_p34" 1 0 | |
126 | (and (eq_attr "cpu" "pentiumpro") | |
127 | (ior (eq_attr "type" "push") | |
128 | (eq_attr "memory" "store,both"))) | |
129 | 1 1) | |
130 | ||
131 | (define_function_unit "fpu" 1 0 | |
132 | (and (eq_attr "cpu" "pentiumpro") | |
133 | (eq_attr "type" "fop,fsgn,fmov,fcmp,fcmov,fistp")) | |
134 | 1 1) | |
135 | ||
136 | (define_function_unit "fpu" 1 0 | |
137 | (and (eq_attr "cpu" "pentiumpro") | |
138 | (eq_attr "type" "fmul")) | |
139 | 5 2) | |
140 | ||
141 | (define_function_unit "fpu" 1 0 | |
142 | (and (eq_attr "cpu" "pentiumpro") | |
143 | (eq_attr "type" "fdiv,fpspc")) | |
144 | 56 56) | |
145 | ||
146 | ;; imul uses the fpu. ??? does it have the same throughput as fmul? | |
147 | (define_function_unit "fpu" 1 0 | |
148 | (and (eq_attr "cpu" "pentiumpro") | |
149 | (eq_attr "type" "imul")) | |
150 | 4 1) |