]>
Commit | Line | Data |
---|---|---|
800eeca4 JW |
1 | // |
2 | // Detect WAW violations. Cases taken from DV tables. | |
3 | // | |
4 | .text | |
5 | .explicit | |
6 | // AR[BSP] | |
7 | mov ar.bsp = r0 | |
8 | mov ar.bsp = r1 | |
9 | ;; | |
10 | // AR[BSPSTORE] | |
11 | mov ar.bspstore = r2 | |
12 | mov ar.bspstore = r3 | |
13 | ;; | |
14 | ||
15 | // AR[CCV] | |
16 | mov ar.ccv = r4 | |
17 | mov ar.ccv = r4 | |
18 | ;; | |
19 | ||
20 | // AR[EC] | |
21 | br.wtop.sptk L | |
22 | mov ar.ec = r0 | |
23 | ;; | |
24 | ||
25 | // AR[FPSR].sf0.controls | |
26 | mov ar.fpsr = r0 | |
27 | fsetc.s0 0x7f, 0x0f | |
28 | ;; | |
29 | ||
30 | // AR[FPSR].sf1.controls | |
31 | mov ar.fpsr = r0 | |
32 | fsetc.s1 0x7f, 0x0f | |
33 | ;; | |
34 | ||
35 | // AR[FPSR].sf2.controls | |
36 | mov ar.fpsr = r0 | |
37 | fsetc.s2 0x7f, 0x0f | |
38 | ;; | |
39 | ||
40 | // AR[FPSR].sf3.controls | |
41 | mov ar.fpsr = r0 | |
42 | fsetc.s3 0x7f, 0x0f | |
43 | ;; | |
44 | ||
45 | // AR[FPSR].sf0.flags | |
46 | fcmp.eq.s0 p1, p2 = f3, f4 | |
47 | fcmp.eq.s0 p3, p4 = f3, f4 // no DV here | |
48 | ;; | |
49 | fcmp.eq.s0 p1, p2 = f3, f4 | |
50 | fclrf.s0 | |
51 | ;; | |
52 | ||
53 | // AR[FPSR].sf1.flags | |
54 | fcmp.eq.s1 p1, p2 = f3, f4 | |
55 | fcmp.eq.s1 p3, p4 = f3, f4 // no DV here | |
56 | ;; | |
57 | fcmp.eq.s1 p1, p2 = f3, f4 | |
58 | fclrf.s1 | |
59 | ;; | |
60 | ||
61 | // AR[FPSR].sf2.flags | |
62 | fcmp.eq.s2 p1, p2 = f3, f4 | |
63 | fcmp.eq.s2 p3, p4 = f3, f4 // no DV here | |
64 | ;; | |
65 | fcmp.eq.s2 p1, p2 = f3, f4 | |
66 | fclrf.s2 | |
67 | ;; | |
68 | ||
69 | // AR[FPSR].sf3.flags | |
70 | fcmp.eq.s3 p1, p2 = f3, f4 | |
71 | fcmp.eq.s3 p3, p4 = f3, f4 // no DV here | |
72 | ;; | |
73 | fcmp.eq.s3 p1, p2 = f3, f4 | |
74 | fclrf.s3 | |
75 | ;; | |
76 | ||
77 | // AR[FPSR].traps/rv plus all controls/flags | |
78 | mov ar.fpsr = r0 | |
79 | mov ar.fpsr = r0 | |
80 | ;; | |
81 | ||
82 | // AR[ITC] | |
83 | mov ar.itc = r1 | |
84 | mov ar.itc = r1 | |
85 | ;; | |
86 | ||
4f8631b1 L |
87 | // AR[RUC] |
88 | mov ar.ruc = r1 | |
89 | mov ar.ruc = r1 | |
90 | ;; | |
91 | ||
800eeca4 JW |
92 | // AR[K] |
93 | mov ar.k2 = r3 | |
94 | mov ar.k2 = r3 | |
95 | ;; | |
96 | ||
97 | // AR[LC] | |
98 | br.cloop.sptk L | |
99 | mov ar.lc = r0 | |
100 | ;; | |
101 | ||
102 | // AR[PFS] | |
103 | mov ar.pfs = r0 | |
104 | br.call.sptk b0 = L | |
105 | ;; | |
106 | ||
107 | // AR[RNAT] (see also AR[BSPSTORE]) | |
108 | mov ar.rnat = r8 | |
109 | mov ar.rnat = r8 | |
110 | ;; | |
111 | ||
112 | // AR[RSC] | |
113 | mov ar.rsc = r10 | |
114 | mov ar.rsc = r10 | |
115 | ;; | |
116 | ||
117 | // AR[UNAT] | |
118 | mov ar.unat = r12 | |
119 | st8.spill [r0] = r1 | |
120 | ;; | |
121 | ||
122 | // AR% | |
123 | mov ar48 = r0 | |
124 | mov ar48 = r0 | |
125 | ;; | |
126 | ||
127 | // BR% | |
128 | mov b1 = r0 | |
129 | mov b1 = r1 | |
130 | ;; | |
131 | ||
132 | // CFM (and others) | |
133 | br.wtop.sptk L | |
134 | br.wtop.sptk L | |
135 | ;; | |
136 | ||
137 | // CR[CMCV] | |
138 | mov cr.cmcv = r1 | |
139 | mov cr.cmcv = r2 | |
140 | ;; | |
141 | ||
142 | // CR[DCR] | |
143 | mov cr.dcr = r3 | |
144 | mov cr.dcr = r3 | |
145 | ;; | |
146 | ||
147 | // CR[EOI] (and InService) | |
148 | mov cr.eoi = r0 | |
149 | mov cr.eoi = r0 | |
150 | ;; | |
151 | srlz.d | |
152 | ||
153 | // CR[GPTA] | |
154 | mov cr.gpta = r6 | |
155 | mov cr.gpta = r7 | |
156 | ;; | |
157 | ||
158 | // CR[IFA] | |
159 | mov cr.ifa = r9 | |
160 | mov cr.ifa = r10 | |
161 | ;; | |
162 | ||
163 | // CR[IFS] | |
164 | mov cr.ifs = r11 | |
165 | cover | |
166 | ;; | |
167 | ||
168 | // CR[IHA] | |
169 | mov cr.iha = r13 | |
170 | mov cr.iha = r14 | |
171 | ;; | |
172 | ||
1ca35711 L |
173 | // CR[IIB%] |
174 | mov cr.iib0 = r15 | |
175 | mov cr.iib0 = r16 | |
176 | ;; | |
177 | ||
178 | mov cr.iib1 = r15 | |
179 | mov cr.iib1 = r16 | |
180 | ;; | |
181 | ||
800eeca4 JW |
182 | // CR[IIM] |
183 | mov cr.iim = r15 | |
184 | mov cr.iim = r16 | |
185 | ;; | |
186 | ||
187 | // CR[IIP] | |
188 | mov cr.iip = r17 | |
189 | mov cr.iip = r17 | |
190 | ;; | |
191 | ||
192 | // CR[IIPA] | |
193 | mov cr.iipa = r19 | |
194 | mov cr.iipa = r20 | |
195 | ;; | |
196 | ||
197 | // CR[IPSR] | |
198 | mov cr.ipsr = r21 | |
199 | mov cr.ipsr = r22 | |
200 | ;; | |
201 | ||
202 | // CR[IRR%] (and others) | |
4b09e828 JB |
203 | mov r2 = cr.ivr |
204 | mov r3 = cr.ivr | |
800eeca4 JW |
205 | ;; |
206 | ||
207 | // CR[ISR] | |
208 | mov cr.isr = r24 | |
209 | mov cr.isr = r25 | |
210 | ;; | |
211 | ||
212 | // CR[ITIR] | |
213 | mov cr.itir = r26 | |
214 | mov cr.itir = r27 | |
215 | ;; | |
216 | ||
217 | // CR[ITM] | |
218 | mov cr.itm = r28 | |
219 | mov cr.itm = r29 | |
220 | ;; | |
221 | ||
222 | // CR[ITV] | |
223 | mov cr.itv = r0 | |
224 | mov cr.itv = r1 | |
225 | ;; | |
226 | ||
227 | // CR[IVA] | |
228 | mov cr.iva = r0 | |
229 | mov cr.iva = r1 | |
230 | ;; | |
231 | ||
232 | // CR[IVR] (no explicit writers) | |
233 | ||
234 | // CR[LID] | |
235 | mov cr.lid = r0 | |
236 | mov cr.lid = r1 | |
237 | ;; | |
238 | ||
239 | // CR[LRR%] | |
240 | mov cr.lrr0 = r0 | |
241 | mov cr.lrr1 = r0 // no DV here | |
242 | ;; | |
243 | mov cr.lrr0 = r0 | |
244 | mov cr.lrr0 = r0 | |
245 | ;; | |
246 | ||
247 | // CR[PMV] | |
248 | mov cr.pmv = r0 | |
249 | mov cr.pmv = r1 | |
250 | ;; | |
251 | ||
252 | // CR[PTA] | |
253 | mov cr.pta = r0 | |
254 | mov cr.pta = r1 | |
255 | ;; | |
256 | ||
257 | // CR[TPR] | |
258 | mov cr.tpr = r0 | |
259 | mov cr.tpr = r1 | |
260 | ;; | |
261 | ||
262 | // DBR# | |
263 | mov dbr[r1] = r1 | |
264 | mov dbr[r1] = r2 | |
265 | ;; | |
266 | srlz.d | |
267 | ||
268 | // DTC | |
269 | ptc.e r0 | |
270 | ptc.e r1 // no DVs here | |
271 | ;; | |
272 | ptc.e r0 // (and others) | |
273 | itc.i r0 | |
274 | ;; | |
275 | srlz.d | |
276 | ||
277 | // DTC_LIMIT | |
278 | ptc.g r0, r1 // NOTE: GAS automatically emits stops after | |
279 | ptc.ga r2, r3 // ptc.g/ptc.ga, so this conflict is no | |
280 | ;; // longer possible in GAS-generated assembly | |
281 | srlz.d | |
282 | ||
283 | // DTR | |
284 | itr.d dtr[r0] = r1 // (and others) | |
285 | ptr.d r2, r3 | |
286 | ;; | |
287 | srlz.d | |
288 | ||
289 | // FR% | |
290 | mov f3 = f2 | |
291 | ldfs.c.clr f3 = [r1] | |
292 | ;; | |
293 | ||
294 | // GR% | |
295 | mov r2 = r0 | |
296 | ld8.c.clr r2 = [r1] | |
297 | ;; | |
298 | ||
299 | // IBR# | |
300 | mov ibr[r0] = r2 | |
301 | mov ibr[r1] = r2 | |
302 | ;; | |
303 | ||
304 | // InService | |
305 | mov cr.eoi = r0 | |
306 | mov r1 = cr.ivr | |
307 | ;; | |
308 | srlz.d | |
309 | ||
310 | // ITC | |
311 | ptc.e r0 | |
312 | itc.i r1 | |
313 | ;; | |
314 | srlz.i | |
315 | ;; | |
316 | ||
317 | // ITR | |
318 | itr.i itr[r0] = r1 | |
319 | ptr.i r2, r3 | |
320 | ;; | |
321 | srlz.i | |
322 | ;; | |
323 | ||
324 | // PKR# | |
325 | .reg.val r1, 0x1 | |
326 | .reg.val r2, ~0x1 | |
327 | mov pkr[r1] = r1 | |
328 | mov pkr[r2] = r1 // no DV here | |
329 | ;; | |
330 | mov pkr[r1] = r1 | |
331 | mov pkr[r1] = r1 | |
332 | ;; | |
333 | ||
334 | // PMC# | |
335 | mov pmc[r3] = r1 | |
336 | mov pmc[r4] = r1 | |
337 | ;; | |
338 | ||
339 | // PMD# | |
340 | mov pmd[r3] = r1 | |
341 | mov pmd[r4] = r1 | |
342 | ;; | |
343 | ||
139368c9 | 344 | // PR%, 1 - 15 |
800eeca4 JW |
345 | cmp.eq p1, p0 = r0, r1 |
346 | cmp.eq p1, p0 = r2, r3 | |
347 | ;; | |
348 | fcmp.eq p1, p2 = f2, f3 | |
349 | fcmp.eq p1, p3 = f2, f3 | |
350 | ;; | |
351 | cmp.eq.and p1, p2 = r0, r1 | |
352 | cmp.eq.or p1, p3 = r2, r3 | |
353 | ;; | |
354 | cmp.eq.or p1, p3 = r2, r3 | |
355 | cmp.eq.and p1, p2 = r0, r1 | |
356 | ;; | |
357 | cmp.eq.and p1, p2 = r0, r1 | |
358 | cmp.eq.and p1, p3 = r2, r3 // no DV here | |
359 | ;; | |
360 | cmp.eq.or p1, p2 = r0, r1 | |
361 | cmp.eq.or p1, p3 = r2, r3 // no DV here | |
362 | ;; | |
363 | ||
364 | // PR63 | |
365 | br.wtop.sptk L | |
366 | br.wtop.sptk L | |
367 | ;; | |
368 | cmp.eq p63, p0 = r0, r1 | |
369 | cmp.eq p63, p0 = r2, r3 | |
370 | ;; | |
371 | fcmp.eq p63, p2 = f2, f3 | |
372 | fcmp.eq p63, p3 = f2, f3 | |
373 | ;; | |
374 | cmp.eq.and p63, p2 = r0, r1 | |
375 | cmp.eq.or p63, p3 = r2, r3 | |
376 | ;; | |
377 | cmp.eq.or p63, p3 = r2, r3 | |
378 | cmp.eq.and p63, p2 = r0, r1 | |
379 | ;; | |
380 | cmp.eq.and p63, p2 = r0, r1 | |
381 | cmp.eq.and p63, p3 = r2, r3 // no DV here | |
382 | ;; | |
383 | cmp.eq.or p63, p2 = r0, r1 | |
384 | cmp.eq.or p63, p3 = r2, r3 // no DV here | |
385 | ;; | |
386 | ||
387 | // PSR.ac | |
388 | rum (1<<3) | |
389 | rum (1<<3) | |
390 | ;; | |
391 | ||
392 | // PSR.be | |
393 | rum (1<<1) | |
394 | rum (1<<1) | |
395 | ;; | |
396 | ||
397 | // PSR.bn | |
398 | bsw.0 // GAS automatically emits a stop after bsw.n | |
399 | bsw.0 // so this conflict is avoided | |
400 | ;; | |
401 | ||
402 | // PSR.cpl | |
403 | epc | |
404 | br.ret.sptk b0 | |
405 | ;; | |
406 | ||
407 | // PSR.da (rfi is the only writer) | |
408 | // PSR.db (and others) | |
409 | mov psr.l = r0 | |
410 | mov psr.l = r1 | |
411 | ;; | |
412 | srlz.d | |
413 | ||
414 | // PSR.dd (rfi is the only writer) | |
415 | ||
416 | // PSR.dfh | |
417 | ssm (1<<19) | |
418 | ssm (1<<19) | |
419 | ;; | |
420 | srlz.d | |
421 | ||
422 | // PSR.dfl | |
423 | ssm (1<<18) | |
424 | ssm (1<<18) | |
425 | ;; | |
426 | srlz.d | |
427 | ||
428 | // PSR.di | |
429 | rsm (1<<22) | |
430 | rsm (1<<22) | |
431 | ;; | |
432 | ||
433 | // PSR.dt | |
434 | rsm (1<<17) | |
435 | rsm (1<<17) | |
436 | ;; | |
437 | ||
438 | // PSR.ed (rfi is the only writer) | |
439 | // PSR.i | |
440 | ssm (1<<14) | |
441 | ssm (1<<14) | |
442 | ;; | |
443 | ||
444 | // PSR.ia (no DV semantics) | |
445 | // PSR.ic | |
446 | ssm (1<<13) | |
447 | ssm (1<<13) | |
448 | ;; | |
449 | ||
450 | // PSR.id (rfi is the only writer) | |
451 | // PSR.is (br.ia and rfi are the only writers) | |
452 | // PSR.it (rfi is the only writer) | |
453 | // PSR.lp (see PSR.db) | |
454 | ||
455 | // PSR.mc (rfi is the only writer) | |
456 | // PSR.mfh | |
457 | mov f32 = f33 | |
4b09e828 | 458 | mov r10 = psr |
800eeca4 JW |
459 | ;; |
460 | ssm (1<<5) | |
461 | ssm (1<<5) | |
462 | ;; | |
463 | ssm (1<<5) | |
4b09e828 | 464 | mov psr.um = r10 |
800eeca4 JW |
465 | ;; |
466 | rum (1<<5) | |
467 | rum (1<<5) | |
468 | ;; | |
469 | mov f32 = f33 | |
470 | mov f34 = f35 // no DV here | |
471 | ;; | |
472 | ||
473 | // PSR.mfl | |
474 | mov f2 = f3 | |
4b09e828 | 475 | mov r10 = psr |
800eeca4 JW |
476 | ;; |
477 | ssm (1<<4) | |
478 | ssm (1<<4) | |
479 | ;; | |
480 | ssm (1<<4) | |
4b09e828 | 481 | mov psr.um = r10 |
800eeca4 JW |
482 | ;; |
483 | rum (1<<4) | |
484 | rum (1<<4) | |
485 | ;; | |
486 | mov f2 = f3 | |
487 | mov f4 = f5 // no DV here | |
488 | ;; | |
489 | ||
490 | // PSR.pk | |
491 | rsm (1<<15) | |
492 | rsm (1<<15) | |
493 | ;; | |
494 | ||
495 | // PSR.pp | |
496 | rsm (1<<21) | |
497 | rsm (1<<21) | |
498 | ;; | |
499 | ||
500 | // PSR.ri (no DV semantics) | |
501 | // PSR.rt (see PSR.db) | |
502 | ||
503 | // PSR.si | |
504 | rsm (1<<23) | |
505 | ssm (1<<23) | |
506 | ;; | |
507 | ||
508 | // PSR.sp | |
509 | ssm (1<<20) | |
510 | rsm (1<<20) | |
511 | ;; | |
512 | srlz.d | |
513 | ||
514 | // PSR.ss (rfi is the only writer) | |
515 | // PSR.tb (see PSR.db) | |
516 | ||
517 | // PSR.up | |
518 | rsm (1<<2) | |
519 | rsm (1<<2) | |
520 | ;; | |
521 | rum (1<<2) | |
522 | mov psr.um = r0 | |
523 | ;; | |
524 | ||
525 | // RR# | |
526 | mov rr[r2] = r1 | |
527 | mov rr[r2] = r3 | |
528 | ;; | |
139368c9 | 529 | |
7484b8e6 TW |
530 | // PR, additional cases (or.andcm and and.orcm interaction) |
531 | cmp.eq.or.andcm p6, p7 = 1, r32 | |
532 | cmp.eq.or.andcm p6, p7 = 5, r36 // no DV here | |
533 | ;; | |
534 | cmp.eq.and.orcm p6, p7 = 1, r32 | |
535 | cmp.eq.and.orcm p6, p7 = 5, r36 // no DV here | |
536 | ;; | |
537 | cmp.eq.or.andcm p63, p7 = 1, r32 | |
538 | cmp.eq.or.andcm p63, p7 = 5, r36 // no DV here | |
539 | ;; | |
540 | cmp.eq.or.andcm p6, p63 = 1, r32 | |
541 | cmp.eq.or.andcm p6, p63 = 5, r36 // no DV here | |
542 | ;; | |
543 | cmp.eq.and.orcm p63, p7 = 1, r32 | |
544 | cmp.eq.and.orcm p63, p7 = 5, r36 // no DV here | |
545 | ;; | |
546 | cmp.eq.and.orcm p6, p63 = 1, r32 | |
547 | cmp.eq.and.orcm p6, p63 = 5, r36 // no DV here | |
548 | ;; | |
549 | cmp.eq.or.andcm p6, p7 = 1, r32 | |
550 | cmp.eq.and.orcm p6, p7 = 5, r36 | |
551 | ;; | |
552 | cmp.eq.or.andcm p63, p7 = 1, r32 | |
553 | cmp.eq.and.orcm p63, p7 = 5, r36 | |
554 | ;; | |
555 | cmp.eq.or.andcm p6, p63 = 1, r32 | |
556 | cmp.eq.and.orcm p6, p63 = 5, r36 | |
557 | ;; | |
139368c9 JW |
558 | |
559 | // PR%, 16 - 62 | |
560 | cmp.eq p21, p0 = r0, r1 | |
561 | cmp.eq p21, p0 = r2, r3 | |
562 | ;; | |
563 | fcmp.eq p21, p22 = f2, f3 | |
564 | fcmp.eq p21, p23 = f2, f3 | |
565 | ;; | |
566 | cmp.eq.and p21, p22 = r0, r1 | |
567 | cmp.eq.or p21, p23 = r2, r3 | |
568 | ;; | |
569 | cmp.eq.or p21, p23 = r2, r3 | |
570 | cmp.eq.and p21, p22 = r0, r1 | |
571 | ;; | |
572 | cmp.eq.and p21, p22 = r0, r1 | |
573 | cmp.eq.and p21, p23 = r2, r3 // no DV here | |
574 | ;; | |
575 | cmp.eq.or p21, p22 = r0, r1 | |
576 | cmp.eq.or p21, p23 = r2, r3 // no DV here | |
577 | ;; | |
578 | ||
579 | // RSE | |
580 | ||
581 | L: |