]> git.ipfire.org Git - thirdparty/binutils-gdb.git/blob - sim/ppc/altivec.igen
Update years in copyright notice for the GDB files.
[thirdparty/binutils-gdb.git] / sim / ppc / altivec.igen
1 # Altivec instruction set, for PSIM, the PowerPC simulator.
2
3 # Copyright 2003-2013 Free Software Foundation, Inc.
4
5 # Contributed by Red Hat Inc; developed under contract from Motorola.
6 # Written by matthew green <mrg@redhat.com>.
7
8 # This file is part of GDB.
9
10 # This program is free software; you can redistribute it and/or modify
11 # it under the terms of the GNU General Public License as published by
12 # the Free Software Foundation; either version 3 of the License, or
13 # (at your option) any later version.
14
15 # This program is distributed in the hope that it will be useful,
16 # but WITHOUT ANY WARRANTY; without even the implied warranty of
17 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 # GNU General Public License for more details.
19
20 # You should have received a copy of the GNU General Public License
21 # along with this program. If not, see <http://www.gnu.org/licenses/>. */
22
23
24 #
25 # Motorola AltiVec instructions.
26 #
27
28 :cache:av:::VS:VS:
29 :cache:av::vreg *:vS:VS:(cpu_registers(processor)->altivec.vr + VS)
30 :cache:av::unsigned32:VS_BITMASK:VS:(1 << VS)
31 :cache:av:::VA:VA:
32 :cache:av::vreg *:vA:VA:(cpu_registers(processor)->altivec.vr + VA)
33 :cache:av::unsigned32:VA_BITMASK:VA:(1 << VA)
34 :cache:av:::VB:VB:
35 :cache:av::vreg *:vB:VB:(cpu_registers(processor)->altivec.vr + VB)
36 :cache:av::unsigned32:VB_BITMASK:VB:(1 << VB)
37 :cache:av:::VC:VC:
38 :cache:av::vreg *:vC:VC:(cpu_registers(processor)->altivec.vr + VC)
39 :cache:av::unsigned32:VC_BITMASK:VC:(1 << VC)
40
41 # Flags for model.h
42 ::model-macro:::
43 #define PPC_INSN_INT_VR(OUT_MASK, IN_MASK, OUT_VMASK, IN_VMASK) \
44 do { \
45 if (CURRENT_MODEL_ISSUE > 0) \
46 ppc_insn_int_vr(MY_INDEX, cpu_model(processor), OUT_MASK, IN_MASK, OUT_VMASK, IN_VMASK); \
47 } while (0)
48
49 #define PPC_INSN_VR(OUT_VMASK, IN_VMASK) \
50 do { \
51 if (CURRENT_MODEL_ISSUE > 0) \
52 ppc_insn_vr(MY_INDEX, cpu_model(processor), OUT_VMASK, IN_VMASK); \
53 } while (0)
54
55 #define PPC_INSN_VR_CR(OUT_VMASK, IN_VMASK, CR_MASK) \
56 do { \
57 if (CURRENT_MODEL_ISSUE > 0) \
58 ppc_insn_vr_cr(MY_INDEX, cpu_model(processor), OUT_VMASK, IN_VMASK, CR_MASK); \
59 } while (0)
60
61 #define PPC_INSN_VR_VSCR(OUT_VMASK, IN_VMASK) \
62 do { \
63 if (CURRENT_MODEL_ISSUE > 0) \
64 ppc_insn_vr_vscr(MY_INDEX, cpu_model(processor), OUT_VMASK, IN_VMASK); \
65 } while (0)
66
67 #define PPC_INSN_FROM_VSCR(VR_MASK) \
68 do { \
69 if (CURRENT_MODEL_ISSUE > 0) \
70 ppc_insn_from_vscr(MY_INDEX, cpu_model(processor), VR_MASK); \
71 } while (0)
72
73 #define PPC_INSN_TO_VSCR(VR_MASK) \
74 do { \
75 if (CURRENT_MODEL_ISSUE > 0) \
76 ppc_insn_to_vscr(MY_INDEX, cpu_model(processor), VR_MASK); \
77 } while (0)
78
79 # Trace waiting for AltiVec registers to become available
80 void::model-static::model_trace_altivec_busy_p:model_data *model_ptr, unsigned32 vr_busy
81 int i;
82 if (vr_busy) {
83 vr_busy &= model_ptr->vr_busy;
84 for(i = 0; i < 32; i++) {
85 if (((1 << i) & vr_busy) != 0) {
86 TRACE(trace_model, ("Waiting for register v%d.\n", i));
87 }
88 }
89 }
90 if (model_ptr->vscr_busy)
91 TRACE(trace_model, ("Waiting for VSCR\n"));
92
93 # Trace making AltiVec registers busy
94 void::model-static::model_trace_altivec_make_busy:model_data *model_ptr, unsigned32 vr_mask, unsigned32 cr_mask
95 int i;
96 if (vr_mask) {
97 for(i = 0; i < 32; i++) {
98 if (((1 << i) & vr_mask) != 0) {
99 TRACE(trace_model, ("Register v%d is now busy.\n", i));
100 }
101 }
102 }
103 if (cr_mask) {
104 for(i = 0; i < 8; i++) {
105 if (((1 << i) & cr_mask) != 0) {
106 TRACE(trace_model, ("Register cr%d is now busy.\n", i));
107 }
108 }
109 }
110
111 # Schedule an AltiVec instruction that takes integer input registers and produces output registers
112 void::model-function::ppc_insn_int_vr:itable_index index, model_data *model_ptr, const unsigned32 out_mask, const unsigned32 in_mask, const unsigned32 out_vmask, const unsigned32 in_vmask
113 const unsigned32 int_mask = out_mask | in_mask;
114 const unsigned32 vr_mask = out_vmask | in_vmask;
115 model_busy *busy_ptr;
116
117 if ((model_ptr->int_busy & int_mask) != 0 || (model_ptr->vr_busy & vr_mask)) {
118 model_new_cycle(model_ptr); /* don't count first dependency as a stall */
119
120 while ((model_ptr->int_busy & int_mask) != 0 || (model_ptr->vr_busy & vr_mask)) {
121 if (WITH_TRACE && ppc_trace[trace_model]) {
122 model_trace_busy_p(model_ptr, int_mask, 0, 0, PPC_NO_SPR);
123 model_trace_altivec_busy_p(model_ptr, vr_mask);
124 }
125
126 model_ptr->nr_stalls_data++;
127 model_new_cycle(model_ptr);
128 }
129 }
130
131 busy_ptr = model_wait_for_unit(index, model_ptr, &model_ptr->timing[index]);
132 model_ptr->int_busy |= out_mask;
133 busy_ptr->int_busy |= out_mask;
134 model_ptr->vr_busy |= out_vmask;
135 busy_ptr->vr_busy |= out_vmask;
136
137 if (out_mask)
138 busy_ptr->nr_writebacks = (PPC_ONE_BIT_SET_P(out_vmask)) ? 1 : 2;
139
140 if (out_vmask)
141 busy_ptr->nr_writebacks += (PPC_ONE_BIT_SET_P(out_vmask)) ? 1 : 2;
142
143 if (WITH_TRACE && ppc_trace[trace_model]) {
144 model_trace_make_busy(model_ptr, out_mask, 0, 0);
145 model_trace_altivec_make_busy(model_ptr, vr_mask, 0);
146 }
147
148 # Schedule an AltiVec instruction that takes vector input registers and produces vector output registers
149 void::model-function::ppc_insn_vr:itable_index index, model_data *model_ptr, const unsigned32 out_vmask, const unsigned32 in_vmask
150 const unsigned32 vr_mask = out_vmask | in_vmask;
151 model_busy *busy_ptr;
152
153 if (model_ptr->vr_busy & vr_mask) {
154 model_new_cycle(model_ptr); /* don't count first dependency as a stall */
155
156 while (model_ptr->vr_busy & vr_mask) {
157 if (WITH_TRACE && ppc_trace[trace_model]) {
158 model_trace_altivec_busy_p(model_ptr, vr_mask);
159 }
160
161 model_ptr->nr_stalls_data++;
162 model_new_cycle(model_ptr);
163 }
164 }
165
166 busy_ptr = model_wait_for_unit(index, model_ptr, &model_ptr->timing[index]);
167 model_ptr->vr_busy |= out_vmask;
168 busy_ptr->vr_busy |= out_vmask;
169 if (out_vmask)
170 busy_ptr->nr_writebacks = (PPC_ONE_BIT_SET_P(out_vmask)) ? 1 : 2;
171
172 if (WITH_TRACE && ppc_trace[trace_model]) {
173 model_trace_altivec_make_busy(model_ptr, vr_mask, 0);
174 }
175
176 # Schedule an AltiVec instruction that takes vector input registers and produces vector output registers, touches CR
177 void::model-function::ppc_insn_vr_cr:itable_index index, model_data *model_ptr, const unsigned32 out_vmask, const unsigned32 in_vmask, const unsigned32 cr_mask
178 const unsigned32 vr_mask = out_vmask | in_vmask;
179 model_busy *busy_ptr;
180
181 if ((model_ptr->vr_busy & vr_mask) || (model_ptr->cr_fpscr_busy & cr_mask)) {
182 model_new_cycle(model_ptr); /* don't count first dependency as a stall */
183
184 while ((model_ptr->vr_busy & vr_mask) || (model_ptr->cr_fpscr_busy & cr_mask)) {
185 if (WITH_TRACE && ppc_trace[trace_model]) {
186 model_trace_busy_p(model_ptr, 0, 0, cr_mask, PPC_NO_SPR);
187 model_trace_altivec_busy_p(model_ptr, vr_mask);
188 }
189
190 model_ptr->nr_stalls_data++;
191 model_new_cycle(model_ptr);
192 }
193 }
194
195 busy_ptr = model_wait_for_unit(index, model_ptr, &model_ptr->timing[index]);
196 model_ptr->cr_fpscr_busy |= cr_mask;
197 busy_ptr->cr_fpscr_busy |= cr_mask;
198 model_ptr->vr_busy |= out_vmask;
199 busy_ptr->vr_busy |= out_vmask;
200
201 if (out_vmask)
202 busy_ptr->nr_writebacks = (PPC_ONE_BIT_SET_P(out_vmask)) ? 1 : 2;
203
204 if (cr_mask)
205 busy_ptr->nr_writebacks++;
206
207 if (WITH_TRACE && ppc_trace[trace_model])
208 model_trace_altivec_make_busy(model_ptr, vr_mask, cr_mask);
209
210 # Schedule an AltiVec instruction that takes vector input registers and produces vector output registers, touches VSCR
211 void::model-function::ppc_insn_vr_vscr:itable_index index, model_data *model_ptr, const unsigned32 out_vmask, const unsigned32 in_vmask
212 const unsigned32 vr_mask = out_vmask | in_vmask;
213 model_busy *busy_ptr;
214
215 if ((model_ptr->vr_busy & vr_mask) != 0 || model_ptr->vscr_busy != 0) {
216 model_new_cycle(model_ptr); /* don't count first dependency as a stall */
217
218 while ((model_ptr->vr_busy & vr_mask) != 0 || model_ptr->vscr_busy != 0) {
219 if (WITH_TRACE && ppc_trace[trace_model])
220 model_trace_altivec_busy_p(model_ptr, vr_mask);
221
222 model_ptr->nr_stalls_data++;
223 model_new_cycle(model_ptr);
224 }
225 }
226
227 busy_ptr = model_wait_for_unit(index, model_ptr, &model_ptr->timing[index]);
228 model_ptr->vr_busy |= out_vmask;
229 busy_ptr->vr_busy |= out_vmask;
230 model_ptr->vscr_busy = 1;
231 busy_ptr->vscr_busy = 1;
232
233 if (out_vmask)
234 busy_ptr->nr_writebacks = 1 + (PPC_ONE_BIT_SET_P(out_vmask)) ? 1 : 2;
235
236 if (WITH_TRACE && ppc_trace[trace_model])
237 model_trace_altivec_make_busy(model_ptr, vr_mask, 0);
238
239 # Schedule an MFVSCR instruction that VSCR input register and produces an AltiVec output register
240 void::model-function::ppc_insn_from_vscr:itable_index index, model_data *model_ptr, const unsigned32 vr_mask
241 model_busy *busy_ptr;
242
243 while ((model_ptr->vr_busy & vr_mask) != 0 || model_ptr->vscr_busy != 0) {
244 if (WITH_TRACE && ppc_trace[trace_model])
245 model_trace_altivec_busy_p(model_ptr, vr_mask);
246
247 model_ptr->nr_stalls_data++;
248 model_new_cycle(model_ptr);
249 }
250 busy_ptr = model_wait_for_unit(index, model_ptr, &model_ptr->timing[index]);
251 model_ptr->cr_fpscr_busy |= vr_mask;
252 busy_ptr->cr_fpscr_busy |= vr_mask;
253
254 if (vr_mask)
255 busy_ptr->nr_writebacks = 1;
256
257 model_ptr->vr_busy |= vr_mask;
258 if (WITH_TRACE && ppc_trace[trace_model])
259 model_trace_altivec_make_busy(model_ptr, vr_mask, 0);
260
261 # Schedule an MTVSCR instruction that one AltiVec input register and produces a vscr output register
262 void::model-function::ppc_insn_to_vscr:itable_index index, model_data *model_ptr, const unsigned32 vr_mask
263 model_busy *busy_ptr;
264
265 while ((model_ptr->vr_busy & vr_mask) != 0 || model_ptr->vscr_busy != 0) {
266 if (WITH_TRACE && ppc_trace[trace_model])
267 model_trace_altivec_busy_p(model_ptr, vr_mask);
268
269 model_ptr->nr_stalls_data++;
270 model_new_cycle(model_ptr);
271 }
272 busy_ptr = model_wait_for_unit(index, model_ptr, &model_ptr->timing[index]);
273 busy_ptr ->vscr_busy = 1;
274 model_ptr->vscr_busy = 1;
275 busy_ptr->nr_writebacks = 1;
276
277 TRACE(trace_model,("Making VSCR busy.\n"));
278
279 # The follow are AltiVec saturate operations
280
281 signed8::model-function::altivec_signed_saturate_8:signed16 val, int *sat
282 signed8 rv;
283 if (val > 127) {
284 rv = 127;
285 *sat = 1;
286 } else if (val < -128) {
287 rv = -128;
288 *sat = 1;
289 } else {
290 rv = val;
291 *sat = 0;
292 }
293 return rv;
294
295 signed16::model-function::altivec_signed_saturate_16:signed32 val, int *sat
296 signed16 rv;
297 if (val > 32767) {
298 rv = 32767;
299 *sat = 1;
300 } else if (val < -32768) {
301 rv = -32768;
302 *sat = 1;
303 } else {
304 rv = val;
305 *sat = 0;
306 }
307 return rv;
308
309 signed32::model-function::altivec_signed_saturate_32:signed64 val, int *sat
310 signed32 rv;
311 if (val > 2147483647) {
312 rv = 2147483647;
313 *sat = 1;
314 } else if (val < -2147483648LL) {
315 rv = -2147483648LL;
316 *sat = 1;
317 } else {
318 rv = val;
319 *sat = 0;
320 }
321 return rv;
322
323 unsigned8::model-function::altivec_unsigned_saturate_8:signed16 val, int *sat
324 unsigned8 rv;
325 if (val > 255) {
326 rv = 255;
327 *sat = 1;
328 } else if (val < 0) {
329 rv = 0;
330 *sat = 1;
331 } else {
332 rv = val;
333 *sat = 0;
334 }
335 return rv;
336
337 unsigned16::model-function::altivec_unsigned_saturate_16:signed32 val, int *sat
338 unsigned16 rv;
339 if (val > 65535) {
340 rv = 65535;
341 *sat = 1;
342 } else if (val < 0) {
343 rv = 0;
344 *sat = 1;
345 } else {
346 rv = val;
347 *sat = 0;
348 }
349 return rv;
350
351 unsigned32::model-function::altivec_unsigned_saturate_32:signed64 val, int *sat
352 unsigned32 rv;
353 if (val > 4294967295LL) {
354 rv = 4294967295LL;
355 *sat = 1;
356 } else if (val < 0) {
357 rv = 0;
358 *sat = 1;
359 } else {
360 rv = val;
361 *sat = 0;
362 }
363 return rv;
364
365 #
366 # Load instructions, 6-14 ... 6-22.
367 #
368
369 0.31,6.VS,11.RA,16.RB,21.7,31.0:X:av:lvebx %VD, %RA, %RB:Load Vector Element Byte Indexed
370 unsigned_word b;
371 unsigned_word EA;
372 unsigned_word eb;
373 if (RA_is_0) b = 0;
374 else b = *rA;
375 EA = b + *rB;
376 eb = EA & 0xf;
377 (*vS).b[AV_BINDEX(eb)] = MEM(unsigned, EA, 1);
378 PPC_INSN_INT_VR(0, RA_BITMASK | RB_BITMASK, VS_BITMASK, 0);
379
380 0.31,6.VS,11.RA,16.RB,21.39,31.0:X:av:lvehx %VD, %RA, %RB:Load Vector Element Half Word Indexed
381 unsigned_word b;
382 unsigned_word EA;
383 unsigned_word eb;
384 if (RA_is_0) b = 0;
385 else b = *rA;
386 EA = (b + *rB) & ~1;
387 eb = EA & 0xf;
388 (*vS).h[AV_HINDEX(eb/2)] = MEM(unsigned, EA, 2);
389 PPC_INSN_INT_VR(0, RA_BITMASK | RB_BITMASK, VS_BITMASK, 0);
390
391 0.31,6.VS,11.RA,16.RB,21.71,31.0:X:av:lvewx %VD, %RA, %RB:Load Vector Element Word Indexed
392 unsigned_word b;
393 unsigned_word EA;
394 unsigned_word eb;
395 if (RA_is_0) b = 0;
396 else b = *rA;
397 EA = (b + *rB) & ~3;
398 eb = EA & 0xf;
399 (*vS).w[eb/4] = MEM(unsigned, EA, 4);
400 PPC_INSN_INT_VR(0, RA_BITMASK | RB_BITMASK, VS_BITMASK, 0);
401
402
403 0.31,6.VS,11.RA,16.RB,21.6,31.0:X:av:lvsl %VD, %RA, %RB:Load Vector for Shift Left
404 unsigned_word b;
405 unsigned_word addr;
406 int i, j;
407 if (RA_is_0) b = 0;
408 else b = *rA;
409 addr = b + *rB;
410 j = addr & 0xf;
411 for (i = 0; i < 16; i++)
412 if (CURRENT_TARGET_BYTE_ORDER == BIG_ENDIAN)
413 (*vS).b[AV_BINDEX(i)] = j++;
414 else
415 (*vS).b[AV_BINDEX(15 - i)] = j++;
416 PPC_INSN_INT_VR(0, RA_BITMASK | RB_BITMASK, VS_BITMASK, 0);
417
418 0.31,6.VS,11.RA,16.RB,21.38,31.0:X:av:lvsr %VD, %RA, %RB:Load Vector for Shift Right
419 unsigned_word b;
420 unsigned_word addr;
421 int i, j;
422 if (RA_is_0) b = 0;
423 else b = *rA;
424 addr = b + *rB;
425 j = 0x10 - (addr & 0xf);
426 for (i = 0; i < 16; i++)
427 if (CURRENT_TARGET_BYTE_ORDER == BIG_ENDIAN)
428 (*vS).b[AV_BINDEX(i)] = j++;
429 else
430 (*vS).b[AV_BINDEX(15 - i)] = j++;
431 PPC_INSN_INT_VR(0, RA_BITMASK | RB_BITMASK, VS_BITMASK, 0);
432
433
434 0.31,6.VS,11.RA,16.RB,21.103,31.0:X:av:lvx %VD, %RA, %RB:Load Vector Indexed
435 unsigned_word b;
436 unsigned_word EA;
437 if (RA_is_0) b = 0;
438 else b = *rA;
439 EA = (b + *rB) & ~0xf;
440 if (CURRENT_TARGET_BYTE_ORDER == BIG_ENDIAN) {
441 (*vS).w[0] = MEM(unsigned, EA + 0, 4);
442 (*vS).w[1] = MEM(unsigned, EA + 4, 4);
443 (*vS).w[2] = MEM(unsigned, EA + 8, 4);
444 (*vS).w[3] = MEM(unsigned, EA + 12, 4);
445 } else {
446 (*vS).w[0] = MEM(unsigned, EA + 12, 4);
447 (*vS).w[1] = MEM(unsigned, EA + 8, 4);
448 (*vS).w[2] = MEM(unsigned, EA + 4, 4);
449 (*vS).w[3] = MEM(unsigned, EA + 0, 4);
450 }
451 PPC_INSN_INT_VR(0, RA_BITMASK | RB_BITMASK, VS_BITMASK, 0);
452
453 0.31,6.VS,11.RA,16.RB,21.359,31.0:X:av:lvxl %VD, %RA, %RB:Load Vector Indexed LRU
454 unsigned_word b;
455 unsigned_word EA;
456 if (RA_is_0) b = 0;
457 else b = *rA;
458 EA = (b + *rB) & ~0xf;
459 if (CURRENT_TARGET_BYTE_ORDER == BIG_ENDIAN) {
460 (*vS).w[0] = MEM(unsigned, EA + 0, 4);
461 (*vS).w[1] = MEM(unsigned, EA + 4, 4);
462 (*vS).w[2] = MEM(unsigned, EA + 8, 4);
463 (*vS).w[3] = MEM(unsigned, EA + 12, 4);
464 } else {
465 (*vS).w[0] = MEM(unsigned, EA + 12, 4);
466 (*vS).w[1] = MEM(unsigned, EA + 8, 4);
467 (*vS).w[2] = MEM(unsigned, EA + 4, 4);
468 (*vS).w[3] = MEM(unsigned, EA + 0, 4);
469 }
470 PPC_INSN_INT_VR(0, RA_BITMASK | RB_BITMASK, VS_BITMASK, 0);
471
472 #
473 # Move to/from VSCR instructions, 6-23 & 6-24.
474 #
475
476 0.4,6.VS,11.0,16.0,21.1540:VX:av:mfvscr %VS:Move from Vector Status and Control Register
477 (*vS).w[0] = 0;
478 (*vS).w[1] = 0;
479 (*vS).w[2] = 0;
480 (*vS).w[3] = VSCR;
481 PPC_INSN_FROM_VSCR(VS_BITMASK);
482
483 0.4,6.0,11.0,16.VB,21.1604:VX:av:mtvscr %VB:Move to Vector Status and Control Register
484 VSCR = (*vB).w[3];
485 PPC_INSN_TO_VSCR(VB_BITMASK);
486
487 #
488 # Store instructions, 6-25 ... 6-29.
489 #
490
491 0.31,6.VS,11.RA,16.RB,21.135,31.0:X:av:stvebx %VD, %RA, %RB:Store Vector Element Byte Indexed
492 unsigned_word b;
493 unsigned_word EA;
494 unsigned_word eb;
495 if (RA_is_0) b = 0;
496 else b = *rA;
497 EA = b + *rB;
498 eb = EA & 0xf;
499 if (CURRENT_TARGET_BYTE_ORDER == BIG_ENDIAN)
500 STORE(EA, 1, (*vS).b[eb]);
501 else
502 STORE(EA, 1, (*vS).b[15-eb]);
503 PPC_INSN_INT_VR(0, RA_BITMASK | RB_BITMASK, VS_BITMASK, 0);
504
505 0.31,6.VS,11.RA,16.RB,21.167,31.0:X:av:stvehx %VD, %RA, %RB:Store Vector Element Half Word Indexed
506 unsigned_word b;
507 unsigned_word EA;
508 unsigned_word eb;
509 if (RA_is_0) b = 0;
510 else b = *rA;
511 EA = (b + *rB) & ~1;
512 eb = EA & 0xf;
513 if (CURRENT_TARGET_BYTE_ORDER == BIG_ENDIAN)
514 STORE(EA, 2, (*vS).h[eb/2]);
515 else
516 STORE(EA, 2, (*vS).h[7-eb]);
517 PPC_INSN_INT_VR(0, RA_BITMASK | RB_BITMASK, VS_BITMASK, 0);
518
519 0.31,6.VS,11.RA,16.RB,21.199,31.0:X:av:stvewx %VD, %RA, %RB:Store Vector Element Word Indexed
520 unsigned_word b;
521 unsigned_word EA;
522 unsigned_word eb;
523 if (RA_is_0) b = 0;
524 else b = *rA;
525 EA = (b + *rB) & ~3;
526 eb = EA & 0xf;
527 if (CURRENT_TARGET_BYTE_ORDER == BIG_ENDIAN)
528 STORE(EA, 4, (*vS).w[eb/4]);
529 else
530 STORE(EA, 4, (*vS).w[3-(eb/4)]);
531 PPC_INSN_INT_VR(0, RA_BITMASK | RB_BITMASK, VS_BITMASK, 0);
532
533 0.31,6.VS,11.RA,16.RB,21.231,31.0:X:av:stvx %VD, %RA, %RB:Store Vector Indexed
534 unsigned_word b;
535 unsigned_word EA;
536 if (RA_is_0) b = 0;
537 else b = *rA;
538 EA = (b + *rB) & ~0xf;
539 if (CURRENT_TARGET_BYTE_ORDER == BIG_ENDIAN) {
540 STORE(EA + 0, 4, (*vS).w[0]);
541 STORE(EA + 4, 4, (*vS).w[1]);
542 STORE(EA + 8, 4, (*vS).w[2]);
543 STORE(EA + 12, 4, (*vS).w[3]);
544 } else {
545 STORE(EA + 12, 4, (*vS).w[0]);
546 STORE(EA + 8, 4, (*vS).w[1]);
547 STORE(EA + 4, 4, (*vS).w[2]);
548 STORE(EA + 0, 4, (*vS).w[3]);
549 }
550 PPC_INSN_INT_VR(0, RA_BITMASK | RB_BITMASK, VS_BITMASK, 0);
551
552 0.31,6.VS,11.RA,16.RB,21.487,31.0:X:av:stvxl %VD, %RA, %RB:Store Vector Indexed LRU
553 unsigned_word b;
554 unsigned_word EA;
555 if (RA_is_0) b = 0;
556 else b = *rA;
557 EA = (b + *rB) & ~0xf;
558 if (CURRENT_TARGET_BYTE_ORDER == BIG_ENDIAN) {
559 STORE(EA + 0, 4, (*vS).w[0]);
560 STORE(EA + 4, 4, (*vS).w[1]);
561 STORE(EA + 8, 4, (*vS).w[2]);
562 STORE(EA + 12, 4, (*vS).w[3]);
563 } else {
564 STORE(EA + 12, 4, (*vS).w[0]);
565 STORE(EA + 8, 4, (*vS).w[1]);
566 STORE(EA + 4, 4, (*vS).w[2]);
567 STORE(EA + 0, 4, (*vS).w[3]);
568 }
569 PPC_INSN_INT_VR(0, RA_BITMASK | RB_BITMASK, VS_BITMASK, 0);
570
571 #
572 # Vector Add instructions, 6-30 ... 6-40.
573 #
574
575 0.4,6.VS,11.VA,16.VB,21.384:VX:av:vaddcuw %VD, %VA, %VB:Vector Add Carryout Unsigned Word
576 unsigned64 temp;
577 int i;
578 for (i = 0; i < 4; i++) {
579 temp = (unsigned64)(*vA).w[i] + (unsigned64)(*vB).w[i];
580 (*vS).w[i] = temp >> 32;
581 }
582 PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK);
583
584 0.4,6.VS,11.VA,16.VB,21.10:VX:av:vaddfp %VD, %VA, %VB:Vector Add Floating Point
585 int i;
586 unsigned32 f;
587 sim_fpu a, b, d;
588 for (i = 0; i < 4; i++) {
589 sim_fpu_32to (&a, (*vA).w[i]);
590 sim_fpu_32to (&b, (*vB).w[i]);
591 sim_fpu_add (&d, &a, &b);
592 sim_fpu_to32 (&f, &d);
593 (*vS).w[i] = f;
594 }
595 PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK);
596
597 0.4,6.VS,11.VA,16.VB,21.768:VX:av:vaddsbs %VD, %VA, %VB:Vector Add Signed Byte Saturate
598 int i, sat, tempsat;
599 signed16 temp;
600 for (i = 0; i < 16; i++) {
601 temp = (signed16)(signed8)(*vA).b[i] + (signed16)(signed8)(*vB).b[i];
602 (*vS).b[i] = altivec_signed_saturate_8(temp, &tempsat);
603 sat |= tempsat;
604 }
605 ALTIVEC_SET_SAT(sat);
606 PPC_INSN_VR_VSCR(VS_BITMASK, VA_BITMASK | VB_BITMASK);
607
608 0.4,6.VS,11.VA,16.VB,21.832:VX:av:vaddshs %VD, %VA, %VB:Vector Add Signed Half Word Saturate
609 int i, sat, tempsat;
610 signed32 temp, a, b;
611 for (i = 0; i < 8; i++) {
612 a = (signed32)(signed16)(*vA).h[i];
613 b = (signed32)(signed16)(*vB).h[i];
614 temp = a + b;
615 (*vS).h[i] = altivec_signed_saturate_16(temp, &tempsat);
616 sat |= tempsat;
617 }
618 ALTIVEC_SET_SAT(sat);
619 PPC_INSN_VR_VSCR(VS_BITMASK, VA_BITMASK | VB_BITMASK);
620
621 0.4,6.VS,11.VA,16.VB,21.896:VX:av:vaddsws %VD, %VA, %VB:Vector Add Signed Word Saturate
622 int i, sat, tempsat;
623 signed64 temp;
624 for (i = 0; i < 4; i++) {
625 temp = (signed64)(signed32)(*vA).w[i] + (signed64)(signed32)(*vB).w[i];
626 (*vS).w[i] = altivec_signed_saturate_32(temp, &tempsat);
627 sat |= tempsat;
628 }
629 ALTIVEC_SET_SAT(sat);
630 PPC_INSN_VR_VSCR(VS_BITMASK, VA_BITMASK | VB_BITMASK);
631
632 0.4,6.VS,11.VA,16.VB,21.0:VX:av:vaddubm %VD, %VA, %VB:Vector Add Unsigned Byte Modulo
633 int i;
634 for (i = 0; i < 16; i++)
635 (*vS).b[i] = ((*vA).b[i] + (*vB).b[i]) & 0xff;
636 PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK);
637
638 0.4,6.VS,11.VA,16.VB,21.512:VX:av:vaddubs %VD, %VA, %VB:Vector Add Unsigned Byte Saturate
639 int i, sat, tempsat;
640 signed16 temp;
641 sat = 0;
642 for (i = 0; i < 16; i++) {
643 temp = (signed16)(unsigned8)(*vA).b[i] + (signed16)(unsigned8)(*vB).b[i];
644 (*vS).b[i] = altivec_unsigned_saturate_8(temp, &tempsat);
645 sat |= tempsat;
646 }
647 ALTIVEC_SET_SAT(sat);
648 PPC_INSN_VR_VSCR(VS_BITMASK, VA_BITMASK | VB_BITMASK);
649
650 0.4,6.VS,11.VA,16.VB,21.64:VX:av:vadduhm %VD, %VA, %VB:Vector Add Unsigned Half Word Modulo
651 int i;
652 for (i = 0; i < 8; i++)
653 (*vS).h[i] = ((*vA).h[i] + (*vB).h[i]) & 0xffff;
654 PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK);
655
656 0.4,6.VS,11.VA,16.VB,21.576:VX:av:vadduhs %VD, %VA, %VB:Vector Add Unsigned Half Word Saturate
657 int i, sat, tempsat;
658 signed32 temp;
659 for (i = 0; i < 8; i++) {
660 temp = (signed32)(unsigned16)(*vA).h[i] + (signed32)(unsigned16)(*vB).h[i];
661 (*vS).h[i] = altivec_unsigned_saturate_16(temp, &tempsat);
662 sat |= tempsat;
663 }
664 ALTIVEC_SET_SAT(sat);
665 PPC_INSN_VR_VSCR(VS_BITMASK, VA_BITMASK | VB_BITMASK);
666
667 0.4,6.VS,11.VA,16.VB,21.128:VX:av:vadduwm %VD, %VA, %VB:Vector Add Unsigned Word Modulo
668 int i;
669 for (i = 0; i < 4; i++)
670 (*vS).w[i] = (*vA).w[i] + (*vB).w[i];
671 PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK);
672
673 0.4,6.VS,11.VA,16.VB,21.640:VX:av:vadduws %VD, %VA, %VB:Vector Add Unsigned Word Saturate
674 int i, sat, tempsat;
675 signed64 temp;
676 for (i = 0; i < 4; i++) {
677 temp = (signed64)(unsigned32)(*vA).w[i] + (signed64)(unsigned32)(*vB).w[i];
678 (*vS).w[i] = altivec_unsigned_saturate_32(temp, &tempsat);
679 sat |= tempsat;
680 }
681 ALTIVEC_SET_SAT(sat);
682 PPC_INSN_VR_VSCR(VS_BITMASK, VA_BITMASK | VB_BITMASK);
683
684 #
685 # Vector AND instructions, 6-41, 6-42
686 #
687
688 0.4,6.VS,11.VA,16.VB,21.1028:VX:av:vand %VD, %VA, %VB:Vector Logical AND
689 int i;
690 for (i = 0; i < 4; i++)
691 (*vS).w[i] = (*vA).w[i] & (*vB).w[i];
692 PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK);
693
694 0.4,6.VS,11.VA,16.VB,21.1092:VX:av:vandc %VD, %VA, %VB:Vector Logical AND with Compliment
695 int i;
696 for (i = 0; i < 4; i++)
697 (*vS).w[i] = (*vA).w[i] & ~((*vB).w[i]);
698 PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK);
699
700
701 #
702 # Vector Average instructions, 6-43, 6-48
703 #
704
705 0.4,6.VS,11.VA,16.VB,21.1282:VX:av:vavgsb %VD, %VA, %VB:Vector Average Signed Byte
706 int i;
707 signed16 temp, a, b;
708 for (i = 0; i < 16; i++) {
709 a = (signed16)(signed8)(*vA).b[i];
710 b = (signed16)(signed8)(*vB).b[i];
711 temp = a + b + 1;
712 (*vS).b[i] = (temp >> 1) & 0xff;
713 }
714 PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK);
715
716 0.4,6.VS,11.VA,16.VB,21.1346:VX:av:vavgsh %VD, %VA, %VB:Vector Average Signed Half Word
717 int i;
718 signed32 temp, a, b;
719 for (i = 0; i < 8; i++) {
720 a = (signed32)(signed16)(*vA).h[i];
721 b = (signed32)(signed16)(*vB).h[i];
722 temp = a + b + 1;
723 (*vS).h[i] = (temp >> 1) & 0xffff;
724 }
725 PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK);
726
727 0.4,6.VS,11.VA,16.VB,21.1410:VX:av:vavgsw %VD, %VA, %VB:Vector Average Signed Word
728 int i;
729 signed64 temp, a, b;
730 for (i = 0; i < 4; i++) {
731 a = (signed64)(signed32)(*vA).w[i];
732 b = (signed64)(signed32)(*vB).w[i];
733 temp = a + b + 1;
734 (*vS).w[i] = (temp >> 1) & 0xffffffff;
735 }
736 PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK);
737
738 0.4,6.VS,11.VA,16.VB,21.1026:VX:av:vavgub %VD, %VA, %VB:Vector Average Unsigned Byte
739 int i;
740 unsigned16 temp, a, b;
741 for (i = 0; i < 16; i++) {
742 a = (*vA).b[i];
743 b = (*vB).b[i];
744 temp = a + b + 1;
745 (*vS).b[i] = (temp >> 1) & 0xff;
746 }
747 PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK);
748
749 0.4,6.VS,11.VA,16.VB,21.1090:VX:av:vavguh %VD, %VA, %VB:Vector Average Unsigned Half Word
750 int i;
751 unsigned32 temp, a, b;
752 for (i = 0; i < 8; i++) {
753 a = (*vA).h[i];
754 b = (*vB).h[i];
755 temp = a + b + 1;
756 (*vS).h[i] = (temp >> 1) & 0xffff;
757 }
758 PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK);
759
760 0.4,6.VS,11.VA,16.VB,21.1154:VX:av:vavguw %VD, %VA, %VB:Vector Average Unsigned Word
761 int i;
762 unsigned64 temp, a, b;
763 for (i = 0; i < 4; i++) {
764 a = (*vA).w[i];
765 b = (*vB).w[i];
766 temp = a + b + 1;
767 (*vS).w[i] = (temp >> 1) & 0xffffffff;
768 }
769 PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK);
770
771 #
772 # Vector Fixed Point Convert instructions, 6-49, 6-50
773 #
774
775 0.4,6.VS,11.UIMM,16.VB,21.842:VX:av:vcfsx %VD, %VB, %UIMM:Vector Convert From Signed Fixed-Point Word
776 int i;
777 unsigned32 f;
778 sim_fpu b, div, d;
779 for (i = 0; i < 4; i++) {
780 sim_fpu_32to (&b, (*vB).w[i]);
781 sim_fpu_u32to (&div, 2 << UIMM, sim_fpu_round_default);
782 sim_fpu_div (&d, &b, &div);
783 sim_fpu_to32 (&f, &d);
784 (*vS).w[i] = f;
785 }
786 PPC_INSN_VR(VS_BITMASK, VB_BITMASK);
787
788 0.4,6.VS,11.UIMM,16.VB,21.778:VX:av:vcfux %VD, %VA, %UIMM:Vector Convert From Unsigned Fixed-Point Word
789 int i;
790 unsigned32 f;
791 sim_fpu b, d, div;
792 for (i = 0; i < 4; i++) {
793 sim_fpu_32to (&b, (*vB).w[i]);
794 sim_fpu_u32to (&div, 2 << UIMM, sim_fpu_round_default);
795 sim_fpu_div (&d, &b, &div);
796 sim_fpu_to32u (&f, &d, sim_fpu_round_default);
797 (*vS).w[i] = f;
798 }
799 PPC_INSN_VR(VS_BITMASK, VB_BITMASK);
800
801 #
802 # Vector Compare instructions, 6-51 ... 6-64
803 #
804
805 0.4,6.VS,11.VA,16.VB,21.RC,22.966:VXR:av:vcmpbpfpx %VD, %VA, %VB:Vector Compare Bounds Floating Point
806 int i, le, ge;
807 sim_fpu a, b, d;
808 for (i = 0; i < 4; i++) {
809 sim_fpu_32to (&a, (*vA).w[i]);
810 sim_fpu_32to (&b, (*vB).w[i]);
811 le = sim_fpu_is_le(&a, &b);
812 ge = sim_fpu_is_ge(&a, &b);
813 (*vS).w[i] = (le ? 0 : 1 << 31) | (ge ? 0 : 1 << 30);
814 }
815 if (RC)
816 ALTIVEC_SET_CR6(vS, 0);
817 PPC_INSN_VR_CR(VS_BITMASK, VA_BITMASK | VB_BITMASK, RC ? 0x000000f0 : 0);
818
819 0.4,6.VS,11.VA,16.VB,21.RC,22.198:VXR:av:vcmpeqfpx %VD, %VA, %VB:Vector Compare Equal-to-Floating Point
820 int i;
821 sim_fpu a, b;
822 for (i = 0; i < 4; i++) {
823 sim_fpu_32to (&a, (*vA).w[i]);
824 sim_fpu_32to (&b, (*vB).w[i]);
825 if (sim_fpu_is_eq(&a, &b))
826 (*vS).w[i] = 0xffffffff;
827 else
828 (*vS).w[i] = 0;
829 }
830 if (RC)
831 ALTIVEC_SET_CR6(vS, 1);
832 PPC_INSN_VR_CR(VS_BITMASK, VA_BITMASK | VB_BITMASK, RC ? 0x000000f0 : 0);
833
834 0.4,6.VS,11.VA,16.VB,21.RC,22.6:VXR:av:vcmpequbx %VD, %VA, %VB:Vector Compare Equal-to Unsigned Byte
835 int i;
836 for (i = 0; i < 16; i++)
837 if ((*vA).b[i] == (*vB).b[i])
838 (*vS).b[i] = 0xff;
839 else
840 (*vS).b[i] = 0;
841 if (RC)
842 ALTIVEC_SET_CR6(vS, 1);
843 PPC_INSN_VR_CR(VS_BITMASK, VA_BITMASK | VB_BITMASK, RC ? 0x000000f0 : 0);
844
845 0.4,6.VS,11.VA,16.VB,21.RC,22.70:VXR:av:vcmpequhx %VD, %VA, %VB:Vector Compare Equal-to Unsigned Half Word
846 int i;
847 for (i = 0; i < 8; i++)
848 if ((*vA).h[i] == (*vB).h[i])
849 (*vS).h[i] = 0xffff;
850 else
851 (*vS).h[i] = 0;
852 if (RC)
853 ALTIVEC_SET_CR6(vS, 1);
854 PPC_INSN_VR_CR(VS_BITMASK, VA_BITMASK | VB_BITMASK, RC ? 0x000000f0 : 0);
855
856 0.4,6.VS,11.VA,16.VB,21.RC,22.134:VXR:av:vcmpequwx %VD, %VA, %VB:Vector Compare Equal-to Unsigned Word
857 int i;
858 for (i = 0; i < 4; i++)
859 if ((*vA).w[i] == (*vB).w[i])
860 (*vS).w[i] = 0xffffffff;
861 else
862 (*vS).w[i] = 0;
863 if (RC)
864 ALTIVEC_SET_CR6(vS, 1);
865 PPC_INSN_VR_CR(VS_BITMASK, VA_BITMASK | VB_BITMASK, RC ? 0x000000f0 : 0);
866
867 0.4,6.VS,11.VA,16.VB,21.RC,22.454:VXR:av:vcmpgefpx %VD, %VA, %VB:Vector Compare Greater-Than-or-Equal-to Floating Point
868 int i;
869 sim_fpu a, b;
870 for (i = 0; i < 4; i++) {
871 sim_fpu_32to (&a, (*vA).w[i]);
872 sim_fpu_32to (&b, (*vB).w[i]);
873 if (sim_fpu_is_ge(&a, &b))
874 (*vS).w[i] = 0xffffffff;
875 else
876 (*vS).w[i] = 0;
877 }
878 if (RC)
879 ALTIVEC_SET_CR6(vS, 1);
880 PPC_INSN_VR_CR(VS_BITMASK, VA_BITMASK | VB_BITMASK, RC ? 0x000000f0 : 0);
881
882 0.4,6.VS,11.VA,16.VB,21.RC,22.710:VXR:av:vcmpgtfpx %VD, %VA, %VB:Vector Compare Greater-Than Floating Point
883 int i;
884 sim_fpu a, b;
885 for (i = 0; i < 4; i++) {
886 sim_fpu_32to (&a, (*vA).w[i]);
887 sim_fpu_32to (&b, (*vB).w[i]);
888 if (sim_fpu_is_gt(&a, &b))
889 (*vS).w[i] = 0xffffffff;
890 else
891 (*vS).w[i] = 0;
892 }
893 if (RC)
894 ALTIVEC_SET_CR6(vS, 1);
895 PPC_INSN_VR_CR(VS_BITMASK, VA_BITMASK | VB_BITMASK, RC ? 0x000000f0 : 0);
896
897 0.4,6.VS,11.VA,16.VB,21.RC,22.774:VXR:av:vcmpgtsbx %VD, %VA, %VB:Vector Compare Greater-Than Signed Byte
898 int i;
899 signed8 a, b;
900 for (i = 0; i < 16; i++) {
901 a = (*vA).b[i];
902 b = (*vB).b[i];
903 if (a > b)
904 (*vS).b[i] = 0xff;
905 else
906 (*vS).b[i] = 0;
907 }
908 if (RC)
909 ALTIVEC_SET_CR6(vS, 1);
910 PPC_INSN_VR_CR(VS_BITMASK, VA_BITMASK | VB_BITMASK, RC ? 0x000000f0 : 0);
911
912 0.4,6.VS,11.VA,16.VB,21.RC,22.838:VXR:av:vcmpgtshx %VD, %VA, %VB:Vector Compare Greater-Than Signed Half Word
913 int i;
914 signed16 a, b;
915 for (i = 0; i < 8; i++) {
916 a = (*vA).h[i];
917 b = (*vB).h[i];
918 if (a > b)
919 (*vS).h[i] = 0xffff;
920 else
921 (*vS).h[i] = 0;
922 }
923 if (RC)
924 ALTIVEC_SET_CR6(vS, 1);
925 PPC_INSN_VR_CR(VS_BITMASK, VA_BITMASK | VB_BITMASK, RC ? 0x000000f0 : 0);
926
927 0.4,6.VS,11.VA,16.VB,21.RC,22.902:VXR:av:vcmpgtswx %VD, %VA, %VB:Vector Compare Greater-Than Signed Word
928 int i;
929 signed32 a, b;
930 for (i = 0; i < 4; i++) {
931 a = (*vA).w[i];
932 b = (*vB).w[i];
933 if (a > b)
934 (*vS).w[i] = 0xffffffff;
935 else
936 (*vS).w[i] = 0;
937 }
938 if (RC)
939 ALTIVEC_SET_CR6(vS, 1);
940 PPC_INSN_VR_CR(VS_BITMASK, VA_BITMASK | VB_BITMASK, RC ? 0x000000f0 : 0);
941
942 0.4,6.VS,11.VA,16.VB,21.RC,22.518:VXR:av:vcmpgtubx %VD, %VA, %VB:Vector Compare Greater-Than Unsigned Byte
943 int i;
944 unsigned8 a, b;
945 for (i = 0; i < 16; i++) {
946 a = (*vA).b[i];
947 b = (*vB).b[i];
948 if (a > b)
949 (*vS).b[i] = 0xff;
950 else
951 (*vS).b[i] = 0;
952 }
953 if (RC)
954 ALTIVEC_SET_CR6(vS, 1);
955 PPC_INSN_VR_CR(VS_BITMASK, VA_BITMASK | VB_BITMASK, RC ? 0x000000f0 : 0);
956
957 0.4,6.VS,11.VA,16.VB,21.RC,22.582:VXR:av:vcmpgtuhx %VD, %VA, %VB:Vector Compare Greater-Than Unsigned Half Word
958 int i;
959 unsigned16 a, b;
960 for (i = 0; i < 8; i++) {
961 a = (*vA).h[i];
962 b = (*vB).h[i];
963 if (a > b)
964 (*vS).h[i] = 0xffff;
965 else
966 (*vS).h[i] = 0;
967 }
968 if (RC)
969 ALTIVEC_SET_CR6(vS, 1);
970 PPC_INSN_VR_CR(VS_BITMASK, VA_BITMASK | VB_BITMASK, RC ? 0x000000f0 : 0);
971
972 0.4,6.VS,11.VA,16.VB,21.RC,22.646:VXR:av:vcmpgtuwx %VD, %VA, %VB:Vector Compare Greater-Than Unsigned Word
973 int i;
974 unsigned32 a, b;
975 for (i = 0; i < 4; i++) {
976 a = (*vA).w[i];
977 b = (*vB).w[i];
978 if (a > b)
979 (*vS).w[i] = 0xffffffff;
980 else
981 (*vS).w[i] = 0;
982 }
983 if (RC)
984 ALTIVEC_SET_CR6(vS, 1);
985 PPC_INSN_VR_CR(VS_BITMASK, VA_BITMASK | VB_BITMASK, RC ? 0x000000f0 : 0);
986
987 #
988 # Vector Convert instructions, 6-65, 6-66.
989 #
990
991 0.4,6.VS,11.UIMM,16.VB,21.970:VX:av:vctsxs %VD, %VB, %UIMM:Vector Convert to Signed Fixed-Point Word Saturate
992 int i, sat, tempsat;
993 signed64 temp;
994 sim_fpu a, b, m;
995 sat = 0;
996 for (i = 0; i < 4; i++) {
997 sim_fpu_32to (&b, (*vB).w[i]);
998 sim_fpu_u32to (&m, 2 << UIMM, sim_fpu_round_default);
999 sim_fpu_mul (&a, &b, &m);
1000 sim_fpu_to64i (&temp, &a, sim_fpu_round_default);
1001 (*vS).w[i] = altivec_signed_saturate_32(temp, &tempsat);
1002 sat |= tempsat;
1003 }
1004 ALTIVEC_SET_SAT(sat);
1005 PPC_INSN_VR_VSCR(VS_BITMASK, VB_BITMASK);
1006
1007 0.4,6.VS,11.UIMM,16.VB,21.906:VX:av:vctuxs %VD, %VB, %UIMM:Vector Convert to Unsigned Fixed-Point Word Saturate
1008 int i, sat, tempsat;
1009 signed64 temp;
1010 sim_fpu a, b, m;
1011 sat = 0;
1012 for (i = 0; i < 4; i++) {
1013 sim_fpu_32to (&b, (*vB).w[i]);
1014 sim_fpu_u32to (&m, 2 << UIMM, sim_fpu_round_default);
1015 sim_fpu_mul (&a, &b, &m);
1016 sim_fpu_to64u (&temp, &a, sim_fpu_round_default);
1017 (*vS).w[i] = altivec_unsigned_saturate_32(temp, &tempsat);
1018 sat |= tempsat;
1019 }
1020 ALTIVEC_SET_SAT(sat);
1021 PPC_INSN_VR_VSCR(VS_BITMASK, VB_BITMASK);
1022
1023 #
1024 # Vector Estimate instructions, 6-67 ... 6-70.
1025 #
1026
1027 0.4,6.VS,11.0,16.VB,21.394:VX:av:vexptefp %VD, %VB:Vector 2 Raised to the Exponent Estimate Floating Point
1028 int i;
1029 unsigned32 f;
1030 signed32 bi;
1031 sim_fpu b, d;
1032 for (i = 0; i < 4; i++) {
1033 /*HACK!*/
1034 sim_fpu_32to (&b, (*vB).w[i]);
1035 sim_fpu_to32i (&bi, &b, sim_fpu_round_default);
1036 bi = 2 ^ bi;
1037 sim_fpu_32to (&d, bi);
1038 sim_fpu_to32 (&f, &d);
1039 (*vS).w[i] = f;
1040 }
1041 PPC_INSN_VR_VSCR(VS_BITMASK, VB_BITMASK);
1042
1043 0.4,6.VS,11.0,16.VB,21.458:VX:av:vlogefp %VD, %VB:Vector Log2 Estimate Floating Point
1044 int i;
1045 unsigned32 c, u, f;
1046 sim_fpu b, cfpu, d;
1047 for (i = 0; i < 4; i++) {
1048 /*HACK!*/
1049 sim_fpu_32to (&b, (*vB).w[i]);
1050 sim_fpu_to32u (&u, &b, sim_fpu_round_default);
1051 for (c = 0; (u /= 2) > 1; c++)
1052 ;
1053 sim_fpu_32to (&cfpu, c);
1054 sim_fpu_add (&d, &b, &cfpu);
1055 sim_fpu_to32 (&f, &d);
1056 (*vS).w[i] = f;
1057 }
1058 PPC_INSN_VR_VSCR(VS_BITMASK, VB_BITMASK);
1059
1060 #
1061 # Vector Multiply Add instruction, 6-71
1062 #
1063
1064 0.4,6.VS,11.VA,16.VB,21.VC,26.46:VAX:av:vmaddfp %VD, %VA, %VB, %VC:Vector Multiply Add Floating Point
1065 int i;
1066 unsigned32 f;
1067 sim_fpu a, b, c, d, e;
1068 for (i = 0; i < 4; i++) {
1069 sim_fpu_32to (&a, (*vA).w[i]);
1070 sim_fpu_32to (&b, (*vB).w[i]);
1071 sim_fpu_32to (&c, (*vC).w[i]);
1072 sim_fpu_mul (&e, &a, &c);
1073 sim_fpu_add (&d, &e, &b);
1074 sim_fpu_to32 (&f, &d);
1075 (*vS).w[i] = f;
1076 }
1077 PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK | VC_BITMASK);
1078
1079
1080 #
1081 # Vector Maximum instructions, 6-72 ... 6-78.
1082 #
1083
1084 0.4,6.VS,11.VA,16.VB,21.1034:VX:av:vmaxfp %VD, %VA, %VB:Vector Maximum Floating Point
1085 int i;
1086 unsigned32 f;
1087 sim_fpu a, b, d;
1088 for (i = 0; i < 4; i++) {
1089 sim_fpu_32to (&a, (*vA).w[i]);
1090 sim_fpu_32to (&b, (*vB).w[i]);
1091 sim_fpu_max (&d, &a, &b);
1092 sim_fpu_to32 (&f, &d);
1093 (*vS).w[i] = f;
1094 }
1095 PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK);
1096
1097 0.4,6.VS,11.VA,16.VB,21.258:VX:av:vmaxsb %VD, %VA, %VB:Vector Maximum Signed Byte
1098 int i;
1099 signed8 a, b;
1100 for (i = 0; i < 16; i++) {
1101 a = (*vA).b[i];
1102 b = (*vB).b[i];
1103 if (a > b)
1104 (*vS).b[i] = a;
1105 else
1106 (*vS).b[i] = b;
1107 }
1108 PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK);
1109
1110 0.4,6.VS,11.VA,16.VB,21.322:VX:av:vmaxsh %VD, %VA, %VB:Vector Maximum Signed Half Word
1111 int i;
1112 signed16 a, b;
1113 for (i = 0; i < 8; i++) {
1114 a = (*vA).h[i];
1115 b = (*vB).h[i];
1116 if (a > b)
1117 (*vS).h[i] = a;
1118 else
1119 (*vS).h[i] = b;
1120 }
1121 PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK);
1122
1123 0.4,6.VS,11.VA,16.VB,21.386:VX:av:vmaxsw %VD, %VA, %VB:Vector Maximum Signed Word
1124 int i;
1125 signed32 a, b;
1126 for (i = 0; i < 4; i++) {
1127 a = (*vA).w[i];
1128 b = (*vB).w[i];
1129 if (a > b)
1130 (*vS).w[i] = a;
1131 else
1132 (*vS).w[i] = b;
1133 }
1134 PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK);
1135
1136 0.4,6.VS,11.VA,16.VB,21.2:VX:av:vmaxub %VD, %VA, %VB:Vector Maximum Unsigned Byte
1137 int i;
1138 unsigned8 a, b;
1139 for (i = 0; i < 16; i++) {
1140 a = (*vA).b[i];
1141 b = (*vB).b[i];
1142 if (a > b)
1143 (*vS).b[i] = a;
1144 else
1145 (*vS).b[i] = b;
1146 };
1147 PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK);
1148
1149 0.4,6.VS,11.VA,16.VB,21.66:VX:av:vmaxus %VD, %VA, %VB:Vector Maximum Unsigned Half Word
1150 int i;
1151 unsigned16 a, b;
1152 for (i = 0; i < 8; i++) {
1153 a = (*vA).h[i];
1154 b = (*vB).h[i];
1155 if (a > b)
1156 (*vS).h[i] = a;
1157 else
1158 (*vS).h[i] = b;
1159 }
1160 PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK);
1161
1162 0.4,6.VS,11.VA,16.VB,21.130:VX:av:vmaxuw %VD, %VA, %VB:Vector Maximum Unsigned Word
1163 int i;
1164 unsigned32 a, b;
1165 for (i = 0; i < 4; i++) {
1166 a = (*vA).w[i];
1167 b = (*vB).w[i];
1168 if (a > b)
1169 (*vS).w[i] = a;
1170 else
1171 (*vS).w[i] = b;
1172 }
1173 PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK);
1174
1175
1176 #
1177 # Vector Multiple High instructions, 6-79, 6-80.
1178 #
1179
1180 0.4,6.VS,11.VA,16.VB,21.VC,26.32:VAX:av:vmhaddshs %VD, %VA, %VB, %VC:Vector Multiple High and Add Signed Half Word Saturate
1181 int i, sat, tempsat;
1182 signed16 a, b;
1183 signed32 prod, temp, c;
1184 for (i = 0; i < 8; i++) {
1185 a = (*vA).h[i];
1186 b = (*vB).h[i];
1187 c = (signed32)(signed16)(*vC).h[i];
1188 prod = (signed32)a * (signed32)b;
1189 temp = (prod >> 15) + c;
1190 (*vS).h[i] = altivec_signed_saturate_16(temp, &tempsat);
1191 sat |= tempsat;
1192 }
1193 ALTIVEC_SET_SAT(sat);
1194 PPC_INSN_VR_VSCR(VS_BITMASK, VA_BITMASK | VB_BITMASK | VC_BITMASK);
1195
1196 0.4,6.VS,11.VA,16.VB,21.VC,26.33:VAX:av:vmhraddshs %VD, %VA, %VB, %VC:Vector Multiple High Round and Add Signed Half Word Saturate
1197 int i, sat, tempsat;
1198 signed16 a, b;
1199 signed32 prod, temp, c;
1200 for (i = 0; i < 8; i++) {
1201 a = (*vA).h[i];
1202 b = (*vB).h[i];
1203 c = (signed32)(signed16)(*vC).h[i];
1204 prod = (signed32)a * (signed32)b;
1205 prod += 0x4000;
1206 temp = (prod >> 15) + c;
1207 (*vS).h[i] = altivec_signed_saturate_16(temp, &tempsat);
1208 sat |= tempsat;
1209 }
1210 ALTIVEC_SET_SAT(sat);
1211 PPC_INSN_VR_VSCR(VS_BITMASK, VA_BITMASK | VB_BITMASK | VC_BITMASK);
1212
1213
1214 #
1215 # Vector Minimum instructions, 6-81 ... 6-87
1216 #
1217
1218 0.4,6.VS,11.VA,16.VB,21.1098:VX:av:vminfp %VD, %VA, %VB:Vector Minimum Floating Point
1219 int i;
1220 unsigned32 f;
1221 sim_fpu a, b, d;
1222 for (i = 0; i < 4; i++) {
1223 sim_fpu_32to (&a, (*vA).w[i]);
1224 sim_fpu_32to (&b, (*vB).w[i]);
1225 sim_fpu_min (&d, &a, &b);
1226 sim_fpu_to32 (&f, &d);
1227 (*vS).w[i] = f;
1228 }
1229 PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK);
1230
1231 0.4,6.VS,11.VA,16.VB,21.770:VX:av:vminsb %VD, %VA, %VB:Vector Minimum Signed Byte
1232 int i;
1233 signed8 a, b;
1234 for (i = 0; i < 16; i++) {
1235 a = (*vA).b[i];
1236 b = (*vB).b[i];
1237 if (a < b)
1238 (*vS).b[i] = a;
1239 else
1240 (*vS).b[i] = b;
1241 }
1242 PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK);
1243
1244 0.4,6.VS,11.VA,16.VB,21.834:VX:av:vminsh %VD, %VA, %VB:Vector Minimum Signed Half Word
1245 int i;
1246 signed16 a, b;
1247 for (i = 0; i < 8; i++) {
1248 a = (*vA).h[i];
1249 b = (*vB).h[i];
1250 if (a < b)
1251 (*vS).h[i] = a;
1252 else
1253 (*vS).h[i] = b;
1254 }
1255 PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK);
1256
1257 0.4,6.VS,11.VA,16.VB,21.898:VX:av:vminsw %VD, %VA, %VB:Vector Minimum Signed Word
1258 int i;
1259 signed32 a, b;
1260 for (i = 0; i < 4; i++) {
1261 a = (*vA).w[i];
1262 b = (*vB).w[i];
1263 if (a < b)
1264 (*vS).w[i] = a;
1265 else
1266 (*vS).w[i] = b;
1267 }
1268 PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK);
1269
1270 0.4,6.VS,11.VA,16.VB,21.514:VX:av:vminub %VD, %VA, %VB:Vector Minimum Unsigned Byte
1271 int i;
1272 unsigned8 a, b;
1273 for (i = 0; i < 16; i++) {
1274 a = (*vA).b[i];
1275 b = (*vB).b[i];
1276 if (a < b)
1277 (*vS).b[i] = a;
1278 else
1279 (*vS).b[i] = b;
1280 };
1281 PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK);
1282
1283 0.4,6.VS,11.VA,16.VB,21.578:VX:av:vminuh %VD, %VA, %VB:Vector Minimum Unsigned Half Word
1284 int i;
1285 unsigned16 a, b;
1286 for (i = 0; i < 8; i++) {
1287 a = (*vA).h[i];
1288 b = (*vB).h[i];
1289 if (a < b)
1290 (*vS).h[i] = a;
1291 else
1292 (*vS).h[i] = b;
1293 }
1294 PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK);
1295
1296 0.4,6.VS,11.VA,16.VB,21.642:VX:av:vminuw %VD, %VA, %VB:Vector Minimum Unsigned Word
1297 int i;
1298 unsigned32 a, b;
1299 for (i = 0; i < 4; i++) {
1300 a = (*vA).w[i];
1301 b = (*vB).w[i];
1302 if (a < b)
1303 (*vS).w[i] = a;
1304 else
1305 (*vS).w[i] = b;
1306 }
1307 PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK);
1308
1309
1310 #
1311 # Vector Multiply Low instruction, 6-88
1312 #
1313
1314 0.4,6.VS,11.VA,16.VB,21.VC,26.34:VAX:av:vmladduhm %VD, %VA, %VB, %VC:Vector Multiply Low and Add Unsigned Half Word Modulo
1315 int i;
1316 unsigned16 a, b, c;
1317 unsigned32 prod;
1318 for (i = 0; i < 8; i++) {
1319 a = (*vA).h[i];
1320 b = (*vB).h[i];
1321 c = (*vC).h[i];
1322 prod = (unsigned32)a * (unsigned32)b;
1323 (*vS).h[i] = (prod + c) & 0xffff;
1324 }
1325 PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK | VC_BITMASK);
1326
1327
1328 #
1329 # Vector Merge instructions, 6-89 ... 6-94
1330 #
1331
1332 0.4,6.VS,11.VA,16.VB,21.12:VX:av:vmrghb %VD, %VA, %VB:Vector Merge High Byte
1333 int i;
1334 for (i = 0; i < 16; i += 2) {
1335 (*vS).b[AV_BINDEX(i)] = (*vA).b[AV_BINDEX(i/2)];
1336 (*vS).b[AV_BINDEX(i+1)] = (*vB).b[AV_BINDEX(i/2)];
1337 }
1338 PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK);
1339
1340 0.4,6.VS,11.VA,16.VB,21.76:VX:av:vmrghh %VD, %VA, %VB:Vector Merge High Half Word
1341 int i;
1342 for (i = 0; i < 8; i += 2) {
1343 (*vS).h[AV_HINDEX(i)] = (*vA).h[AV_HINDEX(i/2)];
1344 (*vS).h[AV_HINDEX(i+1)] = (*vB).h[AV_HINDEX(i/2)];
1345 }
1346 PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK);
1347
1348 0.4,6.VS,11.VA,16.VB,21.140:VX:av:vmrghw %VD, %VA, %VB:Vector Merge High Word
1349 int i;
1350 for (i = 0; i < 4; i += 2) {
1351 (*vS).w[i] = (*vA).w[i/2];
1352 (*vS).w[i+1] = (*vB).w[i/2];
1353 }
1354 PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK);
1355
1356 0.4,6.VS,11.VA,16.VB,21.268:VX:av:vmrglb %VD, %VA, %VB:Vector Merge Low Byte
1357 int i;
1358 for (i = 0; i < 16; i += 2) {
1359 (*vS).b[AV_BINDEX(i)] = (*vA).b[AV_BINDEX((i/2) + 8)];
1360 (*vS).b[AV_BINDEX(i+1)] = (*vB).b[AV_BINDEX((i/2) + 8)];
1361 }
1362 PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK);
1363
1364 0.4,6.VS,11.VA,16.VB,21.332:VX:av:vmrglh %VD, %VA, %VB:Vector Merge Low Half Word
1365 int i;
1366 for (i = 0; i < 8; i += 2) {
1367 (*vS).h[AV_HINDEX(i)] = (*vA).h[AV_HINDEX((i/2) + 4)];
1368 (*vS).h[AV_HINDEX(i+1)] = (*vB).h[AV_HINDEX((i/2) + 4)];
1369 }
1370 PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK);
1371
1372 0.4,6.VS,11.VA,16.VB,21.396:VX:av:vmrglw %VD, %VA, %VB:Vector Merge Low Word
1373 int i;
1374 for (i = 0; i < 4; i += 2) {
1375 (*vS).w[i] = (*vA).w[(i/2) + 2];
1376 (*vS).w[i+1] = (*vB).w[(i/2) + 2];
1377 }
1378 PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK);
1379
1380
1381 #
1382 # Vector Multiply Sum instructions, 6-95 ... 6-100
1383 #
1384
1385 0.4,6.VS,11.VA,16.VB,21.VC,26.37:VAX:av:vmsummbm %VD, %VA, %VB, %VC:Vector Multiply Sum Mixed-Sign Byte Modulo
1386 int i, j;
1387 signed32 temp;
1388 signed16 prod, a;
1389 unsigned16 b;
1390 for (i = 0; i < 4; i++) {
1391 temp = (*vC).w[i];
1392 for (j = 0; j < 4; j++) {
1393 a = (signed16)(signed8)(*vA).b[i*4+j];
1394 b = (*vB).b[i*4+j];
1395 prod = a * b;
1396 temp += (signed32)prod;
1397 }
1398 (*vS).w[i] = temp;
1399 }
1400 PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK | VC_BITMASK);
1401
1402 0.4,6.VS,11.VA,16.VB,21.VC,26.40:VAX:av:vmsumshm %VD, %VA, %VB, %VC:Vector Multiply Sum Signed Half Word Modulo
1403 int i, j;
1404 signed32 temp, prod, a, b;
1405 for (i = 0; i < 4; i++) {
1406 temp = (*vC).w[i];
1407 for (j = 0; j < 2; j++) {
1408 a = (signed32)(signed16)(*vA).h[i*2+j];
1409 b = (signed32)(signed16)(*vB).h[i*2+j];
1410 prod = a * b;
1411 temp += prod;
1412 }
1413 (*vS).w[i] = temp;
1414 }
1415 PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK | VC_BITMASK);
1416
1417 0.4,6.VS,11.VA,16.VB,21.VC,26.41:VAX:av:vmsumshs %VD, %VA, %VB, %VC:Vector Multiply Sum Signed Half Word Saturate
1418 int i, j, sat, tempsat;
1419 signed64 temp;
1420 signed32 prod, a, b;
1421 sat = 0;
1422 for (i = 0; i < 4; i++) {
1423 temp = (signed64)(signed32)(*vC).w[i];
1424 for (j = 0; j < 2; j++) {
1425 a = (signed32)(signed16)(*vA).h[i*2+j];
1426 b = (signed32)(signed16)(*vB).h[i*2+j];
1427 prod = a * b;
1428 temp += (signed64)prod;
1429 }
1430 (*vS).w[i] = altivec_signed_saturate_32(temp, &tempsat);
1431 sat |= tempsat;
1432 }
1433 ALTIVEC_SET_SAT(sat);
1434 PPC_INSN_VR_VSCR(VS_BITMASK, VA_BITMASK | VB_BITMASK | VC_BITMASK);
1435
1436 0.4,6.VS,11.VA,16.VB,21.VC,26.36:VAX:av:vmsumubm %VD, %VA, %VB, %VC:Vector Multiply Sum Unsigned Byte Modulo
1437 int i, j;
1438 unsigned32 temp;
1439 unsigned16 prod, a, b;
1440 for (i = 0; i < 4; i++) {
1441 temp = (*vC).w[i];
1442 for (j = 0; j < 4; j++) {
1443 a = (*vA).b[i*4+j];
1444 b = (*vB).b[i*4+j];
1445 prod = a * b;
1446 temp += prod;
1447 }
1448 (*vS).w[i] = temp;
1449 }
1450 PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK | VC_BITMASK);
1451
1452 0.4,6.VS,11.VA,16.VB,21.VC,26.38:VAX:av:vmsumuhm %VD, %VA, %VB, %VC:Vector Multiply Sum Unsigned Half Word Modulo
1453 int i, j;
1454 unsigned32 temp, prod, a, b;
1455 for (i = 0; i < 4; i++) {
1456 temp = (*vC).w[i];
1457 for (j = 0; j < 2; j++) {
1458 a = (*vA).h[i*2+j];
1459 b = (*vB).h[i*2+j];
1460 prod = a * b;
1461 temp += prod;
1462 }
1463 (*vS).w[i] = temp;
1464 }
1465 PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK | VC_BITMASK);
1466
1467 0.4,6.VS,11.VA,16.VB,21.VC,26.39:VAX:av:vmsumuhs %VD, %VA, %VB, %VC:Vector Multiply Sum Unsigned Half Word Saturate
1468 int i, j, sat, tempsat;
1469 unsigned32 temp, prod, a, b;
1470 sat = 0;
1471 for (i = 0; i < 4; i++) {
1472 temp = (*vC).w[i];
1473 for (j = 0; j < 2; j++) {
1474 a = (*vA).h[i*2+j];
1475 b = (*vB).h[i*2+j];
1476 prod = a * b;
1477 temp += prod;
1478 }
1479 (*vS).w[i] = altivec_unsigned_saturate_32(temp, &tempsat);
1480 sat |= tempsat;
1481 }
1482 ALTIVEC_SET_SAT(sat);
1483 PPC_INSN_VR_VSCR(VS_BITMASK, VA_BITMASK | VB_BITMASK | VC_BITMASK);
1484
1485
1486 #
1487 # Vector Multiply Even/Odd instructions, 6-101 ... 6-108
1488 #
1489
1490 0.4,6.VS,11.VA,16.VB,21.776:VX:av:vmulesb %VD, %VA, %VB:Vector Multiply Even Signed Byte
1491 int i;
1492 signed8 a, b;
1493 signed16 prod;
1494 for (i = 0; i < 8; i++) {
1495 a = (*vA).b[AV_BINDEX(i*2)];
1496 b = (*vB).b[AV_BINDEX(i*2)];
1497 prod = a * b;
1498 (*vS).h[AV_HINDEX(i)] = prod;
1499 }
1500 PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK);
1501
1502 0.4,6.VS,11.VA,16.VB,21.840:VX:av:vmulesh %VD, %VA, %VB:Vector Multiply Even Signed Half Word
1503 int i;
1504 signed16 a, b;
1505 signed32 prod;
1506 for (i = 0; i < 4; i++) {
1507 a = (*vA).h[AV_HINDEX(i*2)];
1508 b = (*vB).h[AV_HINDEX(i*2)];
1509 prod = a * b;
1510 (*vS).w[i] = prod;
1511 }
1512 PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK);
1513
1514 0.4,6.VS,11.VA,16.VB,21.520:VX:av:vmuleub %VD, %VA, %VB:Vector Multiply Even Unsigned Byte
1515 int i;
1516 unsigned8 a, b;
1517 unsigned16 prod;
1518 for (i = 0; i < 8; i++) {
1519 a = (*vA).b[AV_BINDEX(i*2)];
1520 b = (*vB).b[AV_BINDEX(i*2)];
1521 prod = a * b;
1522 (*vS).h[AV_HINDEX(i)] = prod;
1523 }
1524 PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK);
1525
1526 0.4,6.VS,11.VA,16.VB,21.584:VX:av:vmuleuh %VD, %VA, %VB:Vector Multiply Even Unsigned Half Word
1527 int i;
1528 unsigned16 a, b;
1529 unsigned32 prod;
1530 for (i = 0; i < 4; i++) {
1531 a = (*vA).h[AV_HINDEX(i*2)];
1532 b = (*vB).h[AV_HINDEX(i*2)];
1533 prod = a * b;
1534 (*vS).w[i] = prod;
1535 }
1536 PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK);
1537
1538 0.4,6.VS,11.VA,16.VB,21.264:VX:av:vmulosb %VD, %VA, %VB:Vector Multiply Odd Signed Byte
1539 int i;
1540 signed8 a, b;
1541 signed16 prod;
1542 for (i = 0; i < 8; i++) {
1543 a = (*vA).b[AV_BINDEX((i*2)+1)];
1544 b = (*vB).b[AV_BINDEX((i*2)+1)];
1545 prod = a * b;
1546 (*vS).h[AV_HINDEX(i)] = prod;
1547 }
1548 PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK);
1549
1550 0.4,6.VS,11.VA,16.VB,21.328:VX:av:vmulosh %VD, %VA, %VB:Vector Multiply Odd Signed Half Word
1551 int i;
1552 signed16 a, b;
1553 signed32 prod;
1554 for (i = 0; i < 4; i++) {
1555 a = (*vA).h[AV_HINDEX((i*2)+1)];
1556 b = (*vB).h[AV_HINDEX((i*2)+1)];
1557 prod = a * b;
1558 (*vS).w[i] = prod;
1559 }
1560 PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK);
1561
1562 0.4,6.VS,11.VA,16.VB,21.8:VX:av:vmuloub %VD, %VA, %VB:Vector Multiply Odd Unsigned Byte
1563 int i;
1564 unsigned8 a, b;
1565 unsigned16 prod;
1566 for (i = 0; i < 8; i++) {
1567 a = (*vA).b[AV_BINDEX((i*2)+1)];
1568 b = (*vB).b[AV_BINDEX((i*2)+1)];
1569 prod = a * b;
1570 (*vS).h[AV_HINDEX(i)] = prod;
1571 }
1572 PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK);
1573
1574 0.4,6.VS,11.VA,16.VB,21.72:VX:av:vmulouh %VD, %VA, %VB:Vector Multiply Odd Unsigned Half Word
1575 int i;
1576 unsigned16 a, b;
1577 unsigned32 prod;
1578 for (i = 0; i < 4; i++) {
1579 a = (*vA).h[AV_HINDEX((i*2)+1)];
1580 b = (*vB).h[AV_HINDEX((i*2)+1)];
1581 prod = a * b;
1582 (*vS).w[i] = prod;
1583 }
1584 PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK);
1585
1586
1587 #
1588 # Vector Negative Multiply-Subtract instruction, 6-109
1589 #
1590
1591 0.4,6.VS,11.VA,16.VB,21.VC,26.47:VX:av:vnmsubfp %VD, %VA, %VB, %VC:Vector Negative Multiply-Subtract Floating Point
1592 int i;
1593 unsigned32 f;
1594 sim_fpu a, b, c, d, i1, i2;
1595 for (i = 0; i < 4; i++) {
1596 sim_fpu_32to (&a, (*vA).w[i]);
1597 sim_fpu_32to (&b, (*vB).w[i]);
1598 sim_fpu_32to (&c, (*vC).w[i]);
1599 sim_fpu_mul (&i1, &a, &c);
1600 sim_fpu_sub (&i2, &i1, &b);
1601 sim_fpu_neg (&d, &i2);
1602 sim_fpu_to32 (&f, &d);
1603 (*vS).w[i] = f;
1604 }
1605 PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK | VC_BITMASK);
1606
1607
1608 #
1609 # Vector Logical OR instructions, 6-110, 6-111, 6-177
1610 #
1611
1612 0.4,6.VS,11.VA,16.VB,21.1284:VX:av:vnor %VD, %VA, %VB:Vector Logical NOR
1613 int i;
1614 for (i = 0; i < 4; i++)
1615 (*vS).w[i] = ~((*vA).w[i] | (*vB).w[i]);
1616 PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK);
1617
1618 0.4,6.VS,11.VA,16.VB,21.1156:VX:av:vor %VD, %VA, %VB:Vector Logical OR
1619 int i;
1620 for (i = 0; i < 4; i++)
1621 (*vS).w[i] = (*vA).w[i] | (*vB).w[i];
1622 PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK);
1623
1624 0.4,6.VS,11.VA,16.VB,21.1220:VX:av:vxor %VD, %VA, %VB:Vector Logical XOR
1625 int i;
1626 for (i = 0; i < 4; i++)
1627 (*vS).w[i] = (*vA).w[i] ^ (*vB).w[i];
1628 PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK);
1629
1630
1631 #
1632 # Vector Permute instruction, 6-112
1633 #
1634
1635 0.4,6.VS,11.VA,16.VB,21.VC,26.43:VX:av:vperm %VD, %VA, %VB, %VC:Vector Permute
1636 int i, who;
1637 /* The permutation vector might have us read into the source vectors
1638 back at positions before the iteration index, so we must latch the
1639 sources to prevent early-clobbering in case the destination vector
1640 is the same as one of them. */
1641 vreg myvA = (*vA), myvB = (*vB);
1642 for (i = 0; i < 16; i++) {
1643 who = (*vC).b[AV_BINDEX(i)] & 0x1f;
1644 if (who & 0x10)
1645 (*vS).b[AV_BINDEX(i)] = myvB.b[AV_BINDEX(who & 0xf)];
1646 else
1647 (*vS).b[AV_BINDEX(i)] = myvA.b[AV_BINDEX(who & 0xf)];
1648 }
1649 PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK | VC_BITMASK);
1650
1651
1652 #
1653 # Vector Pack instructions, 6-113 ... 6-121
1654 #
1655
1656 0.4,6.VS,11.VA,16.VB,21.782:VX:av:vpkpx %VD, %VA, %VB:Vector Pack Pixel32
1657 int i;
1658 for (i = 0; i < 4; i++) {
1659 (*vS).h[AV_HINDEX(i+4)] = ((((*vB).w[i]) >> 9) & 0xfc00)
1660 | ((((*vB).w[i]) >> 6) & 0x03e0)
1661 | ((((*vB).w[i]) >> 3) & 0x001f);
1662 (*vS).h[AV_HINDEX(i)] = ((((*vA).w[i]) >> 9) & 0xfc00)
1663 | ((((*vA).w[i]) >> 6) & 0x03e0)
1664 | ((((*vA).w[i]) >> 3) & 0x001f);
1665 }
1666 PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK);
1667
1668 0.4,6.VS,11.VA,16.VB,21.398:VX:av:vpkshss %VD, %VA, %VB:Vector Pack Signed Half Word Signed Saturate
1669 int i, sat, tempsat;
1670 signed16 temp;
1671 sat = 0;
1672 for (i = 0; i < 16; i++) {
1673 if (i < 8)
1674 temp = (*vA).h[AV_HINDEX(i)];
1675 else
1676 temp = (*vB).h[AV_HINDEX(i-8)];
1677 (*vS).b[AV_BINDEX(i)] = altivec_signed_saturate_8(temp, &tempsat);
1678 sat |= tempsat;
1679 }
1680 ALTIVEC_SET_SAT(sat);
1681 PPC_INSN_VR_VSCR(VS_BITMASK, VA_BITMASK | VB_BITMASK);
1682
1683 0.4,6.VS,11.VA,16.VB,21.270:VX:av:vpkshus %VD, %VA, %VB:Vector Pack Signed Half Word Unsigned Saturate
1684 int i, sat, tempsat;
1685 signed16 temp;
1686 sat = 0;
1687 for (i = 0; i < 16; i++) {
1688 if (i < 8)
1689 temp = (*vA).h[AV_HINDEX(i)];
1690 else
1691 temp = (*vB).h[AV_HINDEX(i-8)];
1692 (*vS).b[AV_BINDEX(i)] = altivec_unsigned_saturate_8(temp, &tempsat);
1693 sat |= tempsat;
1694 }
1695 ALTIVEC_SET_SAT(sat);
1696 PPC_INSN_VR_VSCR(VS_BITMASK, VA_BITMASK | VB_BITMASK);
1697
1698 0.4,6.VS,11.VA,16.VB,21.462:VX:av:vpkswss %VD, %VA, %VB:Vector Pack Signed Word Signed Saturate
1699 int i, sat, tempsat;
1700 signed32 temp;
1701 sat = 0;
1702 for (i = 0; i < 8; i++) {
1703 if (i < 4)
1704 temp = (*vA).w[i];
1705 else
1706 temp = (*vB).w[i-4];
1707 (*vS).h[AV_HINDEX(i)] = altivec_signed_saturate_16(temp, &tempsat);
1708 sat |= tempsat;
1709 }
1710 ALTIVEC_SET_SAT(sat);
1711 PPC_INSN_VR_VSCR(VS_BITMASK, VA_BITMASK | VB_BITMASK);
1712
1713 0.4,6.VS,11.VA,16.VB,21.334:VX:av:vpkswus %VD, %VA, %VB:Vector Pack Signed Word Unsigned Saturate
1714 int i, sat, tempsat;
1715 signed32 temp;
1716 sat = 0;
1717 for (i = 0; i < 8; i++) {
1718 if (i < 4)
1719 temp = (*vA).w[i];
1720 else
1721 temp = (*vB).w[i-4];
1722 (*vS).h[AV_HINDEX(i)] = altivec_unsigned_saturate_16(temp, &tempsat);
1723 sat |= tempsat;
1724 }
1725 ALTIVEC_SET_SAT(sat);
1726 PPC_INSN_VR_VSCR(VS_BITMASK, VA_BITMASK | VB_BITMASK);
1727
1728 0.4,6.VS,11.VA,16.VB,21.14:VX:av:vpkuhum %VD, %VA, %VB:Vector Pack Unsigned Half Word Unsigned Modulo
1729 int i;
1730 for (i = 0; i < 16; i++)
1731 if (i < 8)
1732 (*vS).b[AV_BINDEX(i)] = (*vA).h[AV_HINDEX(i)];
1733 else
1734 (*vS).b[AV_BINDEX(i)] = (*vB).h[AV_HINDEX(i-8)];
1735 PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK);
1736
1737 0.4,6.VS,11.VA,16.VB,21.142:VX:av:vpkuhus %VD, %VA, %VB:Vector Pack Unsigned Half Word Unsigned Saturate
1738 int i, sat, tempsat;
1739 signed16 temp;
1740 sat = 0;
1741 for (i = 0; i < 16; i++) {
1742 if (i < 8)
1743 temp = (*vA).h[AV_HINDEX(i)];
1744 else
1745 temp = (*vB).h[AV_HINDEX(i-8)];
1746 /* force positive in signed16, ok as we'll toss the bit away anyway */
1747 temp &= ~0x8000;
1748 (*vS).b[AV_BINDEX(i)] = altivec_unsigned_saturate_8(temp, &tempsat);
1749 sat |= tempsat;
1750 }
1751 ALTIVEC_SET_SAT(sat);
1752 PPC_INSN_VR_VSCR(VS_BITMASK, VA_BITMASK | VB_BITMASK);
1753
1754 0.4,6.VS,11.VA,16.VB,21.78:VX:av:vpkuwum %VD, %VA, %VB:Vector Pack Unsigned Word Unsigned Modulo
1755 int i;
1756 for (i = 0; i < 8; i++)
1757 if (i < 8)
1758 (*vS).h[AV_HINDEX(i)] = (*vA).w[i];
1759 else
1760 (*vS).h[AV_HINDEX(i)] = (*vB).w[i-8];
1761 PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK);
1762
1763 0.4,6.VS,11.VA,16.VB,21.206:VX:av:vpkuwus %VD, %VA, %VB:Vector Pack Unsigned Word Unsigned Saturate
1764 int i, sat, tempsat;
1765 signed32 temp;
1766 sat = 0;
1767 for (i = 0; i < 8; i++) {
1768 if (i < 4)
1769 temp = (*vA).w[i];
1770 else
1771 temp = (*vB).w[i-4];
1772 /* force positive in signed32, ok as we'll toss the bit away anyway */
1773 temp &= ~0x80000000;
1774 (*vS).h[AV_HINDEX(i)] = altivec_unsigned_saturate_16(temp, &tempsat);
1775 sat |= tempsat;
1776 }
1777 ALTIVEC_SET_SAT(sat);
1778 PPC_INSN_VR_VSCR(VS_BITMASK, VA_BITMASK | VB_BITMASK);
1779
1780
1781 #
1782 # Vector Reciprocal instructions, 6-122, 6-123, 6-131
1783 #
1784
1785 0.4,6.VS,11.0,16.VB,21.266:VX:av:vrefp %VD, %VB:Vector Reciprocal Estimate Floating Point
1786 int i;
1787 unsigned32 f;
1788 sim_fpu op, d;
1789 for (i = 0; i < 4; i++) {
1790 sim_fpu_32to (&op, (*vB).w[i]);
1791 sim_fpu_div (&d, &sim_fpu_one, &op);
1792 sim_fpu_to32 (&f, &d);
1793 (*vS).w[i] = f;
1794 }
1795 PPC_INSN_VR(VS_BITMASK, VB_BITMASK);
1796
1797 0.4,6.VS,11.0,16.VB,21.330:VX:av:vrsqrtefp %VD, %VB:Vector Reciprocal Square Root Estimate Floating Point
1798 int i;
1799 unsigned32 f;
1800 sim_fpu op, i1, one, d;
1801 for (i = 0; i < 4; i++) {
1802 sim_fpu_32to (&op, (*vB).w[i]);
1803 sim_fpu_sqrt (&i1, &op);
1804 sim_fpu_div (&d, &sim_fpu_one, &i1);
1805 sim_fpu_to32 (&f, &d);
1806 (*vS).w[i] = f;
1807 }
1808 PPC_INSN_VR(VS_BITMASK, VB_BITMASK);
1809
1810
1811 #
1812 # Vector Round instructions, 6-124 ... 6-127
1813 #
1814
1815 0.4,6.VS,11.0,16.VB,21.714:VX:av:vrfim %VD, %VB:Vector Round to Floating-Point Integer towards Minus Infinity
1816 int i;
1817 unsigned32 f;
1818 sim_fpu op;
1819 for (i = 0; i < 4; i++) {
1820 sim_fpu_32to (&op, (*vB).w[i]);
1821 sim_fpu_round_32(&op, sim_fpu_round_down, sim_fpu_denorm_default);
1822 sim_fpu_to32 (&f, &op);
1823 (*vS).w[i] = f;
1824 }
1825 PPC_INSN_VR(VS_BITMASK, VB_BITMASK);
1826
1827 0.4,6.VS,11.0,16.VB,21.522:VX:av:vrfin %VD, %VB:Vector Round to Floating-Point Integer Nearest
1828 int i;
1829 unsigned32 f;
1830 sim_fpu op;
1831 for (i = 0; i < 4; i++) {
1832 sim_fpu_32to (&op, (*vB).w[i]);
1833 sim_fpu_round_32(&op, sim_fpu_round_near, sim_fpu_denorm_default);
1834 sim_fpu_to32 (&f, &op);
1835 (*vS).w[i] = f;
1836 }
1837 PPC_INSN_VR(VS_BITMASK, VB_BITMASK);
1838
1839 0.4,6.VS,11.0,16.VB,21.650:VX:av:vrfip %VD, %VB:Vector Round to Floating-Point Integer towards Plus Infinity
1840 int i;
1841 unsigned32 f;
1842 sim_fpu op;
1843 for (i = 0; i < 4; i++) {
1844 sim_fpu_32to (&op, (*vB).w[i]);
1845 sim_fpu_round_32(&op, sim_fpu_round_up, sim_fpu_denorm_default);
1846 sim_fpu_to32 (&f, &op);
1847 (*vS).w[i] = f;
1848 }
1849 PPC_INSN_VR(VS_BITMASK, VB_BITMASK);
1850
1851 0.4,6.VS,11.0,16.VB,21.586:VX:av:vrfiz %VD, %VB:Vector Round to Floating-Point Integer towards Zero
1852 int i;
1853 unsigned32 f;
1854 sim_fpu op;
1855 for (i = 0; i < 4; i++) {
1856 sim_fpu_32to (&op, (*vB).w[i]);
1857 sim_fpu_round_32(&op, sim_fpu_round_zero, sim_fpu_denorm_default);
1858 sim_fpu_to32 (&f, &op);
1859 (*vS).w[i] = f;
1860 }
1861 PPC_INSN_VR(VS_BITMASK, VB_BITMASK);
1862
1863
1864 #
1865 # Vector Rotate Left instructions, 6-128 ... 6-130
1866 #
1867
1868 0.4,6.VS,11.VA,16.VB,21.4:VX:av:vrlb %VD, %VA, %VB:Vector Rotate Left Integer Byte
1869 int i;
1870 unsigned16 temp;
1871 for (i = 0; i < 16; i++) {
1872 temp = (unsigned16)(*vA).b[i] << (((*vB).b[i]) & 7);
1873 (*vS).b[i] = (temp & 0xff) | ((temp >> 8) & 0xff);
1874 }
1875 PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK);
1876
1877 0.4,6.VS,11.VA,16.VB,21.68:VX:av:vrlh %VD, %VA, %VB:Vector Rotate Left Integer Half Word
1878 int i;
1879 unsigned32 temp;
1880 for (i = 0; i < 8; i++) {
1881 temp = (unsigned32)(*vA).h[i] << (((*vB).h[i]) & 0xf);
1882 (*vS).h[i] = (temp & 0xffff) | ((temp >> 16) & 0xffff);
1883 }
1884 PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK);
1885
1886 0.4,6.VS,11.VA,16.VB,21.132:VX:av:vrlw %VD, %VA, %VB:Vector Rotate Left Integer Word
1887 int i;
1888 unsigned64 temp;
1889 for (i = 0; i < 4; i++) {
1890 temp = (unsigned64)(*vA).w[i] << (((*vB).w[i]) & 0x1f);
1891 (*vS).w[i] = (temp & 0xffffffff) | ((temp >> 32) & 0xffffffff);
1892 }
1893 PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK);
1894
1895
1896 #
1897 # Vector Conditional Select instruction, 6-133
1898 #
1899
1900 0.4,6.VS,11.VA,16.VB,21.VC,26.42:VAX:av:vsel %VD, %VA, %VB, %VC:Vector Conditional Select
1901 int i;
1902 unsigned32 c;
1903 for (i = 0; i < 4; i++) {
1904 c = (*vC).w[i];
1905 (*vS).w[i] = ((*vB).w[i] & c) | ((*vA).w[i] & ~c);
1906 }
1907 PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK | VC_BITMASK);
1908
1909 #
1910 # Vector Shift Left instructions, 6-134 ... 6-139
1911 #
1912
1913 0.4,6.VS,11.VA,16.VB,21.452:VX:av:vsl %VD, %VA, %VB:Vector Shift Left
1914 int sh, i, j, carry, new_carry;
1915 sh = (*vB).b[0] & 7; /* don't bother checking everything */
1916 carry = 0;
1917 for (j = 3; j >= 0; j--) {
1918 if (CURRENT_TARGET_BYTE_ORDER == BIG_ENDIAN)
1919 i = j;
1920 else
1921 i = (j + 2) % 4;
1922 new_carry = (*vA).w[i] >> (32 - sh);
1923 (*vS).w[i] = ((*vA).w[i] << sh) | carry;
1924 carry = new_carry;
1925 }
1926 PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK);
1927
1928 0.4,6.VS,11.VA,16.VB,21.260:VX:av:vslb %VD, %VA, %VB:Vector Shift Left Integer Byte
1929 int i, sh;
1930 for (i = 0; i < 16; i++) {
1931 sh = ((*vB).b[i]) & 7;
1932 (*vS).b[i] = (*vA).b[i] << sh;
1933 }
1934 PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK);
1935
1936 0.4,6.VS,11.VA,16.VB,21.0,22.SH,26.44:VX:av:vsldol %VD, %VA, %VB:Vector Shift Left Double by Octet Immediate
1937 int i, j;
1938 for (j = 0, i = SH; i < 16; i++)
1939 (*vS).b[j++] = (*vA).b[i];
1940 for (i = 0; i < SH; i++)
1941 (*vS).b[j++] = (*vB).b[i];
1942 PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK);
1943
1944 0.4,6.VS,11.VA,16.VB,21.324:VX:av:vslh %VD, %VA, %VB:Vector Shift Left Half Word
1945 int i, sh;
1946 for (i = 0; i < 8; i++) {
1947 sh = ((*vB).h[i]) & 0xf;
1948 (*vS).h[i] = (*vA).h[i] << sh;
1949 }
1950 PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK);
1951
1952 0.4,6.VS,11.VA,16.VB,21.1036:VX:av:vslo %VD, %VA, %VB:Vector Shift Left by Octet
1953 int i, sh;
1954 if (CURRENT_TARGET_BYTE_ORDER == BIG_ENDIAN)
1955 sh = ((*vB).b[AV_BINDEX(15)] >> 3) & 0xf;
1956 else
1957 sh = ((*vB).b[AV_BINDEX(0)] >> 3) & 0xf;
1958 for (i = 0; i < 16; i++) {
1959 if (15 - i > sh)
1960 (*vS).b[AV_BINDEX(i)] = (*vA).b[AV_BINDEX(i + sh)];
1961 else
1962 (*vS).b[AV_BINDEX(i)] = 0;
1963 }
1964 PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK);
1965
1966 0.4,6.VS,11.VA,16.VB,21.388:VX:av:vslw %VD, %VA, %VB:Vector Shift Left Integer Word
1967 int i, sh;
1968 for (i = 0; i < 4; i++) {
1969 sh = ((*vB).w[i]) & 0x1f;
1970 (*vS).w[i] = (*vA).w[i] << sh;
1971 }
1972 PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK);
1973
1974
1975 #
1976 # Vector Splat instructions, 6-140 ... 6-145
1977 #
1978
1979 0.4,6.VS,11.UIMM,16.VB,21.524:VX:av:vspltb %VD, %VB, %UIMM:Vector Splat Byte
1980 int i;
1981 unsigned8 b;
1982 b = (*vB).b[AV_BINDEX(UIMM & 0xf)];
1983 for (i = 0; i < 16; i++)
1984 (*vS).b[i] = b;
1985 PPC_INSN_VR(VS_BITMASK, VB_BITMASK);
1986
1987 0.4,6.VS,11.UIMM,16.VB,21.588:VX:av:vsplth %VD, %VB, %UIMM:Vector Splat Half Word
1988 int i;
1989 unsigned16 h;
1990 h = (*vB).h[AV_HINDEX(UIMM & 0x7)];
1991 for (i = 0; i < 8; i++)
1992 (*vS).h[i] = h;
1993 PPC_INSN_VR(VS_BITMASK, VB_BITMASK);
1994
1995 0.4,6.VS,11.SIMM,16.0,21.780:VX:av:vspltisb %VD, %SIMM:Vector Splat Immediate Signed Byte
1996 int i;
1997 signed8 b = SIMM;
1998 /* manual 5-bit signed extension */
1999 if (b & 0x10)
2000 b -= 0x20;
2001 for (i = 0; i < 16; i++)
2002 (*vS).b[i] = b;
2003 PPC_INSN_VR(VS_BITMASK, 0);
2004
2005 0.4,6.VS,11.SIMM,16.0,21.844:VX:av:vspltish %VD, %SIMM:Vector Splat Immediate Signed Half Word
2006 int i;
2007 signed16 h = SIMM;
2008 /* manual 5-bit signed extension */
2009 if (h & 0x10)
2010 h -= 0x20;
2011 for (i = 0; i < 8; i++)
2012 (*vS).h[i] = h;
2013 PPC_INSN_VR(VS_BITMASK, 0);
2014
2015 0.4,6.VS,11.SIMM,16.0,21.908:VX:av:vspltisw %VD, %SIMM:Vector Splat Immediate Signed Word
2016 int i;
2017 signed32 w = SIMM;
2018 /* manual 5-bit signed extension */
2019 if (w & 0x10)
2020 w -= 0x20;
2021 for (i = 0; i < 4; i++)
2022 (*vS).w[i] = w;
2023 PPC_INSN_VR(VS_BITMASK, 0);
2024
2025 0.4,6.VS,11.UIMM,16.VB,21.652:VX:av:vspltw %VD, %VB, %UIMM:Vector Splat Word
2026 int i;
2027 unsigned32 w;
2028 w = (*vB).w[UIMM & 0x3];
2029 for (i = 0; i < 4; i++)
2030 (*vS).w[i] = w;
2031 PPC_INSN_VR(VS_BITMASK, VB_BITMASK);
2032
2033
2034 #
2035 # Vector Shift Right instructions, 6-146 ... 6-154
2036 #
2037
2038 0.4,6.VS,11.VA,16.VB,21.708:VX:av:vsr %VD, %VA, %VB:Vector Shift Right
2039 int sh, i, j, carry, new_carry;
2040 sh = (*vB).b[0] & 7; /* don't bother checking everything */
2041 carry = 0;
2042 for (j = 0; j < 4; j++) {
2043 if (CURRENT_TARGET_BYTE_ORDER == BIG_ENDIAN)
2044 i = j;
2045 else
2046 i = (j + 2) % 4;
2047 new_carry = (*vA).w[i] << (32 - sh);
2048 (*vS).w[i] = ((*vA).w[i] >> sh) | carry;
2049 carry = new_carry;
2050 }
2051 PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK);
2052
2053 0.4,6.VS,11.VA,16.VB,21.772:VX:av:vsrab %VD, %VA, %VB:Vector Shift Right Algebraic Byte
2054 int i, sh;
2055 signed16 a;
2056 for (i = 0; i < 16; i++) {
2057 sh = ((*vB).b[i]) & 7;
2058 a = (signed16)(signed8)(*vA).b[i];
2059 (*vS).b[i] = (a >> sh) & 0xff;
2060 }
2061 PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK);
2062
2063 0.4,6.VS,11.VA,16.VB,21.836:VX:av:vsrah %VD, %VA, %VB:Vector Shift Right Algebraic Half Word
2064 int i, sh;
2065 signed32 a;
2066 for (i = 0; i < 8; i++) {
2067 sh = ((*vB).h[i]) & 0xf;
2068 a = (signed32)(signed16)(*vA).h[i];
2069 (*vS).h[i] = (a >> sh) & 0xffff;
2070 }
2071 PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK);
2072
2073 0.4,6.VS,11.VA,16.VB,21.900:VX:av:vsraw %VD, %VA, %VB:Vector Shift Right Algebraic Word
2074 int i, sh;
2075 signed64 a;
2076 for (i = 0; i < 4; i++) {
2077 sh = ((*vB).w[i]) & 0xf;
2078 a = (signed64)(signed32)(*vA).w[i];
2079 (*vS).w[i] = (a >> sh) & 0xffffffff;
2080 }
2081 PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK);
2082
2083 0.4,6.VS,11.VA,16.VB,21.516:VX:av:vsrb %VD, %VA, %VB:Vector Shift Right Byte
2084 int i, sh;
2085 for (i = 0; i < 16; i++) {
2086 sh = ((*vB).b[i]) & 7;
2087 (*vS).b[i] = (*vA).b[i] >> sh;
2088 }
2089 PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK);
2090
2091 0.4,6.VS,11.VA,16.VB,21.580:VX:av:vsrh %VD, %VA, %VB:Vector Shift Right Half Word
2092 int i, sh;
2093 for (i = 0; i < 8; i++) {
2094 sh = ((*vB).h[i]) & 0xf;
2095 (*vS).h[i] = (*vA).h[i] >> sh;
2096 }
2097 PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK);
2098
2099 0.4,6.VS,11.VA,16.VB,21.1100:VX:av:vsro %VD, %VA, %VB:Vector Shift Right Octet
2100 int i, sh;
2101 if (CURRENT_TARGET_BYTE_ORDER == BIG_ENDIAN)
2102 sh = ((*vB).b[AV_BINDEX(15)] >> 3) & 0xf;
2103 else
2104 sh = ((*vB).b[AV_BINDEX(0)] >> 3) & 0xf;
2105 for (i = 0; i < 16; i++) {
2106 if (i < sh)
2107 (*vS).b[AV_BINDEX(i)] = 0;
2108 else
2109 (*vS).b[AV_BINDEX(i)] = (*vA).b[AV_BINDEX(i - sh)];
2110 }
2111 PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK);
2112
2113 0.4,6.VS,11.VA,16.VB,21.644:VX:av:vsrw %VD, %VA, %VB:Vector Shift Right Word
2114 int i, sh;
2115 for (i = 0; i < 4; i++) {
2116 sh = ((*vB).w[i]) & 0x1f;
2117 (*vS).w[i] = (*vA).w[i] >> sh;
2118 }
2119 PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK);
2120
2121
2122 #
2123 # Vector Subtract instructions, 6-155 ... 6-165
2124 #
2125
2126 0.4,6.VS,11.VA,16.VB,21.1408:VX:av:vsubcuw %VD, %VA, %VB:Vector Subtract Carryout Unsigned Word
2127 int i;
2128 signed64 temp, a, b;
2129 for (i = 0; i < 4; i++) {
2130 a = (signed64)(unsigned32)(*vA).w[i];
2131 b = (signed64)(unsigned32)(*vB).w[i];
2132 temp = a - b;
2133 (*vS).w[i] = ~(temp >> 32) & 1;
2134 }
2135 PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK);
2136
2137 0.4,6.VS,11.VA,16.VB,21.74:VX:av:vsubfp %VD, %VA, %VB:Vector Subtract Floating Point
2138 int i;
2139 unsigned32 f;
2140 sim_fpu a, b, d;
2141 for (i = 0; i < 4; i++) {
2142 sim_fpu_32to (&a, (*vA).w[i]);
2143 sim_fpu_32to (&b, (*vB).w[i]);
2144 sim_fpu_sub (&d, &a, &b);
2145 sim_fpu_to32 (&f, &d);
2146 (*vS).w[i] = f;
2147 }
2148 PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK);
2149
2150 0.4,6.VS,11.VA,16.VB,21.1792:VX:av:vsubsbs %VD, %VA, %VB:Vector Subtract Signed Byte Saturate
2151 int i, sat, tempsat;
2152 signed16 temp;
2153 sat = 0;
2154 for (i = 0; i < 16; i++) {
2155 temp = (signed16)(signed8)(*vA).b[i] - (signed16)(signed8)(*vB).b[i];
2156 (*vS).b[i] = altivec_signed_saturate_8(temp, &tempsat);
2157 sat |= tempsat;
2158 }
2159 ALTIVEC_SET_SAT(sat);
2160 PPC_INSN_VR_VSCR(VS_BITMASK, VA_BITMASK | VB_BITMASK);
2161
2162 0.4,6.VS,11.VA,16.VB,21.1856:VX:av:vsubshs %VD, %VA, %VB:Vector Subtract Signed Half Word Saturate
2163 int i, sat, tempsat;
2164 signed32 temp;
2165 sat = 0;
2166 for (i = 0; i < 8; i++) {
2167 temp = (signed32)(signed16)(*vA).h[i] - (signed32)(signed16)(*vB).h[i];
2168 (*vS).h[i] = altivec_signed_saturate_16(temp, &tempsat);
2169 sat |= tempsat;
2170 }
2171 ALTIVEC_SET_SAT(sat);
2172 PPC_INSN_VR_VSCR(VS_BITMASK, VA_BITMASK | VB_BITMASK);
2173
2174 0.4,6.VS,11.VA,16.VB,21.1920:VX:av:vsubsws %VD, %VA, %VB:Vector Subtract Signed Word Saturate
2175 int i, sat, tempsat;
2176 signed64 temp;
2177 sat = 0;
2178 for (i = 0; i < 4; i++) {
2179 temp = (signed64)(signed32)(*vA).w[i] - (signed64)(signed32)(*vB).w[i];
2180 (*vS).w[i] = altivec_signed_saturate_32(temp, &tempsat);
2181 sat |= tempsat;
2182 }
2183 ALTIVEC_SET_SAT(sat);
2184 PPC_INSN_VR_VSCR(VS_BITMASK, VA_BITMASK | VB_BITMASK);
2185
2186 0.4,6.VS,11.VA,16.VB,21.1024:VX:av:vsububm %VD, %VA, %VB:Vector Subtract Unsigned Byte Modulo
2187 int i;
2188 for (i = 0; i < 16; i++)
2189 (*vS).b[i] = (*vA).b[i] - (*vB).b[i];
2190 PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK);
2191
2192 0.4,6.VS,11.VA,16.VB,21.1536:VX:av:vsububs %VD, %VA, %VB:Vector Subtract Unsigned Byte Saturate
2193 int i, sat, tempsat;
2194 signed16 temp;
2195 sat = 0;
2196 for (i = 0; i < 16; i++) {
2197 temp = (signed16)(unsigned8)(*vA).b[i] - (signed16)(unsigned8)(*vB).b[i];
2198 (*vS).b[i] = altivec_unsigned_saturate_8(temp, &tempsat);
2199 sat |= tempsat;
2200 }
2201 ALTIVEC_SET_SAT(sat);
2202 PPC_INSN_VR_VSCR(VS_BITMASK, VA_BITMASK | VB_BITMASK);
2203
2204 0.4,6.VS,11.VA,16.VB,21.1088:VX:av:vsubuhm %VD, %VA, %VB:Vector Subtract Unsigned Half Word Modulo
2205 int i;
2206 for (i = 0; i < 8; i++)
2207 (*vS).h[i] = ((*vA).h[i] - (*vB).h[i]) & 0xffff;
2208 PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK);
2209
2210 0.4,6.VS,11.VA,16.VB,21.1600:VX:av:vsubuhs %VD, %VA, %VB:Vector Subtract Unsigned Half Word Saturate
2211 int i, sat, tempsat;
2212 signed32 temp;
2213 for (i = 0; i < 8; i++) {
2214 temp = (signed32)(unsigned16)(*vA).h[i] - (signed32)(unsigned16)(*vB).h[i];
2215 (*vS).h[i] = altivec_unsigned_saturate_16(temp, &tempsat);
2216 sat |= tempsat;
2217 }
2218 ALTIVEC_SET_SAT(sat);
2219 PPC_INSN_VR_VSCR(VS_BITMASK, VA_BITMASK | VB_BITMASK);
2220
2221 0.4,6.VS,11.VA,16.VB,21.1152:VX:av:vsubuwm %VD, %VA, %VB:Vector Subtract Unsigned Word Modulo
2222 int i;
2223 for (i = 0; i < 4; i++)
2224 (*vS).w[i] = (*vA).w[i] - (*vB).w[i];
2225 PPC_INSN_VR(VS_BITMASK, VA_BITMASK | VB_BITMASK);
2226
2227 0.4,6.VS,11.VA,16.VB,21.1664:VX:av:vsubuws %VD, %VA, %VB:Vector Subtract Unsigned Word Saturate
2228 int i, sat, tempsat;
2229 signed64 temp;
2230 for (i = 0; i < 4; i++) {
2231 temp = (signed64)(unsigned32)(*vA).w[i] - (signed64)(unsigned32)(*vB).w[i];
2232 (*vS).w[i] = altivec_unsigned_saturate_32(temp, &tempsat);
2233 sat |= tempsat;
2234 }
2235 ALTIVEC_SET_SAT(sat);
2236 PPC_INSN_VR_VSCR(VS_BITMASK, VA_BITMASK | VB_BITMASK);
2237
2238
2239 #
2240 # Vector Sum instructions, 6-166 ... 6-170
2241 #
2242
2243 0.4,6.VS,11.VA,16.VB,21.1928:VX:av:vsumsws %VD, %VA, %VB:Vector Sum Across Signed Word Saturate
2244 int i, sat;
2245 signed64 temp;
2246 temp = (signed64)(signed32)(*vB).w[3];
2247 for (i = 0; i < 4; i++)
2248 temp += (signed64)(signed32)(*vA).w[i];
2249 (*vS).w[3] = altivec_signed_saturate_32(temp, &sat);
2250 (*vS).w[0] = (*vS).w[1] = (*vS).w[2] = 0;
2251 ALTIVEC_SET_SAT(sat);
2252 PPC_INSN_VR_VSCR(VS_BITMASK, VA_BITMASK | VB_BITMASK);
2253
2254 0.4,6.VS,11.VA,16.VB,21.1672:VX:av:vsum2sws %VD, %VA, %VB:Vector Sum Across Partial (1/2) Signed Word Saturate
2255 int i, j, sat, tempsat;
2256 signed64 temp;
2257 for (j = 0; j < 4; j += 2) {
2258 temp = (signed64)(signed32)(*vB).w[j+1];
2259 temp += (signed64)(signed32)(*vA).w[j] + (signed64)(signed32)(*vA).w[j+1];
2260 (*vS).w[j+1] = altivec_signed_saturate_32(temp, &tempsat);
2261 sat |= tempsat;
2262 }
2263 (*vS).w[0] = (*vS).w[2] = 0;
2264 ALTIVEC_SET_SAT(sat);
2265 PPC_INSN_VR_VSCR(VS_BITMASK, VA_BITMASK | VB_BITMASK);
2266
2267 0.4,6.VS,11.VA,16.VB,21.1800:VX:av:vsum4sbs %VD, %VA, %VB:Vector Sum Across Partial (1/4) Signed Byte Saturate
2268 int i, j, sat, tempsat;
2269 signed64 temp;
2270 for (j = 0; j < 4; j++) {
2271 temp = (signed64)(signed32)(*vB).w[j];
2272 for (i = 0; i < 4; i++)
2273 temp += (signed64)(signed8)(*vA).b[i+(j*4)];
2274 (*vS).w[j] = altivec_signed_saturate_32(temp, &tempsat);
2275 sat |= tempsat;
2276 }
2277 ALTIVEC_SET_SAT(sat);
2278 PPC_INSN_VR_VSCR(VS_BITMASK, VA_BITMASK | VB_BITMASK);
2279
2280 0.4,6.VS,11.VA,16.VB,21.1608:VX:av:vsum4shs %VD, %VA, %VB:Vector Sum Across Partial (1/4) Signed Half Word Saturate
2281 int i, j, sat, tempsat;
2282 signed64 temp;
2283 for (j = 0; j < 4; j++) {
2284 temp = (signed64)(signed32)(*vB).w[j];
2285 for (i = 0; i < 2; i++)
2286 temp += (signed64)(signed16)(*vA).h[i+(j*2)];
2287 (*vS).w[j] = altivec_signed_saturate_32(temp, &tempsat);
2288 sat |= tempsat;
2289 }
2290 ALTIVEC_SET_SAT(sat);
2291 PPC_INSN_VR_VSCR(VS_BITMASK, VA_BITMASK | VB_BITMASK);
2292
2293 0.4,6.VS,11.VA,16.VB,21.1544:VX:av:vsum4ubs %VD, %VA, %VB:Vector Sum Across Partial (1/4) Unsigned Byte Saturate
2294 int i, j, sat, tempsat;
2295 signed64 utemp;
2296 signed64 temp;
2297 for (j = 0; j < 4; j++) {
2298 utemp = (signed64)(unsigned32)(*vB).w[j];
2299 for (i = 0; i < 4; i++)
2300 utemp += (signed64)(unsigned16)(*vA).b[i+(j*4)];
2301 temp = utemp;
2302 (*vS).w[j] = altivec_unsigned_saturate_32(temp, &tempsat);
2303 sat |= tempsat;
2304 }
2305 ALTIVEC_SET_SAT(sat);
2306 PPC_INSN_VR_VSCR(VS_BITMASK, VA_BITMASK | VB_BITMASK);
2307
2308
2309 #
2310 # Vector Unpack instructions, 6-171 ... 6-176
2311 #
2312
2313 0.4,6.VS,11.0,16.VB,21.846:VX:av:vupkhpx %VD, %VB:Vector Unpack High Pixel16
2314 int i;
2315 unsigned16 h;
2316 for (i = 0; i < 4; i++) {
2317 h = (*vB).h[AV_HINDEX(i)];
2318 (*vS).w[i] = ((h & 0x8000) ? 0xff000000 : 0)
2319 | ((h & 0x7c00) << 6)
2320 | ((h & 0x03e0) << 3)
2321 | ((h & 0x001f));
2322 }
2323 PPC_INSN_VR(VS_BITMASK, VB_BITMASK);
2324
2325 0.4,6.VS,11.0,16.VB,21.526:VX:av:vupkhsb %VD, %VB:Vector Unpack High Signed Byte
2326 int i;
2327 for (i = 0; i < 8; i++)
2328 (*vS).h[AV_HINDEX(i)] = (signed16)(signed8)(*vB).b[AV_BINDEX(i)];
2329 PPC_INSN_VR(VS_BITMASK, VB_BITMASK);
2330
2331 0.4,6.VS,11.0,16.VB,21.590:VX:av:vupkhsh %VD, %VB:Vector Unpack High Signed Half Word
2332 int i;
2333 for (i = 0; i < 4; i++)
2334 (*vS).w[i] = (signed32)(signed16)(*vB).h[AV_HINDEX(i)];
2335 PPC_INSN_VR(VS_BITMASK, VB_BITMASK);
2336
2337 0.4,6.VS,11.0,16.VB,21.974:VX:av:vupklpx %VD, %VB:Vector Unpack Low Pixel16
2338 int i;
2339 unsigned16 h;
2340 for (i = 0; i < 4; i++) {
2341 h = (*vB).h[AV_HINDEX(i + 4)];
2342 (*vS).w[i] = ((h & 0x8000) ? 0xff000000 : 0)
2343 | ((h & 0x7c00) << 6)
2344 | ((h & 0x03e0) << 3)
2345 | ((h & 0x001f));
2346 }
2347 PPC_INSN_VR(VS_BITMASK, VB_BITMASK);
2348
2349 0.4,6.VS,11.0,16.VB,21.654:VX:av:vupklsb %VD, %VB:Vector Unpack Low Signed Byte
2350 int i;
2351 for (i = 0; i < 8; i++)
2352 (*vS).h[AV_HINDEX(i)] = (signed16)(signed8)(*vB).b[AV_BINDEX(i + 8)];
2353 PPC_INSN_VR(VS_BITMASK, VB_BITMASK);
2354
2355 0.4,6.VS,11.0,16.VB,21.718:VX:av:vupklsh %VD, %VB:Vector Unpack Low Signed Half Word
2356 int i;
2357 for (i = 0; i < 4; i++)
2358 (*vS).w[i] = (signed32)(signed16)(*vB).h[AV_HINDEX(i + 4)];
2359 PPC_INSN_VR(VS_BITMASK, VB_BITMASK);