2 Copyright 1988-2022 Free Software Foundation, Inc.
3 This is part of the GCC manual.
4 For copying conditions, see the copyright.rst file.
6 .. _mips-loongson-built-in-functions:
8 MIPS Loongson Built-in Functions
9 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
11 GCC provides intrinsics to access the SIMD instructions provided by the
12 ST Microelectronics Loongson-2E and -2F processors. These intrinsics,
13 available after inclusion of the ``loongson.h`` header file,
14 operate on the following 64-bit vector types:
16 * ``uint8x8_t``, a vector of eight unsigned 8-bit integers;
18 * ``uint16x4_t``, a vector of four unsigned 16-bit integers;
20 * ``uint32x2_t``, a vector of two unsigned 32-bit integers;
22 * ``int8x8_t``, a vector of eight signed 8-bit integers;
24 * ``int16x4_t``, a vector of four signed 16-bit integers;
26 * ``int32x2_t``, a vector of two signed 32-bit integers.
28 The intrinsics provided are listed below; each is named after the
29 machine instruction to which it corresponds, with suffixes added as
30 appropriate to distinguish intrinsics that expand to the same machine
31 instruction yet have different argument types. Refer to the architecture
32 documentation for a description of the functionality of each
37 int16x4_t packsswh (int32x2_t s, int32x2_t t);
38 int8x8_t packsshb (int16x4_t s, int16x4_t t);
39 uint8x8_t packushb (uint16x4_t s, uint16x4_t t);
40 uint32x2_t paddw_u (uint32x2_t s, uint32x2_t t);
41 uint16x4_t paddh_u (uint16x4_t s, uint16x4_t t);
42 uint8x8_t paddb_u (uint8x8_t s, uint8x8_t t);
43 int32x2_t paddw_s (int32x2_t s, int32x2_t t);
44 int16x4_t paddh_s (int16x4_t s, int16x4_t t);
45 int8x8_t paddb_s (int8x8_t s, int8x8_t t);
46 uint64_t paddd_u (uint64_t s, uint64_t t);
47 int64_t paddd_s (int64_t s, int64_t t);
48 int16x4_t paddsh (int16x4_t s, int16x4_t t);
49 int8x8_t paddsb (int8x8_t s, int8x8_t t);
50 uint16x4_t paddush (uint16x4_t s, uint16x4_t t);
51 uint8x8_t paddusb (uint8x8_t s, uint8x8_t t);
52 uint64_t pandn_ud (uint64_t s, uint64_t t);
53 uint32x2_t pandn_uw (uint32x2_t s, uint32x2_t t);
54 uint16x4_t pandn_uh (uint16x4_t s, uint16x4_t t);
55 uint8x8_t pandn_ub (uint8x8_t s, uint8x8_t t);
56 int64_t pandn_sd (int64_t s, int64_t t);
57 int32x2_t pandn_sw (int32x2_t s, int32x2_t t);
58 int16x4_t pandn_sh (int16x4_t s, int16x4_t t);
59 int8x8_t pandn_sb (int8x8_t s, int8x8_t t);
60 uint16x4_t pavgh (uint16x4_t s, uint16x4_t t);
61 uint8x8_t pavgb (uint8x8_t s, uint8x8_t t);
62 uint32x2_t pcmpeqw_u (uint32x2_t s, uint32x2_t t);
63 uint16x4_t pcmpeqh_u (uint16x4_t s, uint16x4_t t);
64 uint8x8_t pcmpeqb_u (uint8x8_t s, uint8x8_t t);
65 int32x2_t pcmpeqw_s (int32x2_t s, int32x2_t t);
66 int16x4_t pcmpeqh_s (int16x4_t s, int16x4_t t);
67 int8x8_t pcmpeqb_s (int8x8_t s, int8x8_t t);
68 uint32x2_t pcmpgtw_u (uint32x2_t s, uint32x2_t t);
69 uint16x4_t pcmpgth_u (uint16x4_t s, uint16x4_t t);
70 uint8x8_t pcmpgtb_u (uint8x8_t s, uint8x8_t t);
71 int32x2_t pcmpgtw_s (int32x2_t s, int32x2_t t);
72 int16x4_t pcmpgth_s (int16x4_t s, int16x4_t t);
73 int8x8_t pcmpgtb_s (int8x8_t s, int8x8_t t);
74 uint16x4_t pextrh_u (uint16x4_t s, int field);
75 int16x4_t pextrh_s (int16x4_t s, int field);
76 uint16x4_t pinsrh_0_u (uint16x4_t s, uint16x4_t t);
77 uint16x4_t pinsrh_1_u (uint16x4_t s, uint16x4_t t);
78 uint16x4_t pinsrh_2_u (uint16x4_t s, uint16x4_t t);
79 uint16x4_t pinsrh_3_u (uint16x4_t s, uint16x4_t t);
80 int16x4_t pinsrh_0_s (int16x4_t s, int16x4_t t);
81 int16x4_t pinsrh_1_s (int16x4_t s, int16x4_t t);
82 int16x4_t pinsrh_2_s (int16x4_t s, int16x4_t t);
83 int16x4_t pinsrh_3_s (int16x4_t s, int16x4_t t);
84 int32x2_t pmaddhw (int16x4_t s, int16x4_t t);
85 int16x4_t pmaxsh (int16x4_t s, int16x4_t t);
86 uint8x8_t pmaxub (uint8x8_t s, uint8x8_t t);
87 int16x4_t pminsh (int16x4_t s, int16x4_t t);
88 uint8x8_t pminub (uint8x8_t s, uint8x8_t t);
89 uint8x8_t pmovmskb_u (uint8x8_t s);
90 int8x8_t pmovmskb_s (int8x8_t s);
91 uint16x4_t pmulhuh (uint16x4_t s, uint16x4_t t);
92 int16x4_t pmulhh (int16x4_t s, int16x4_t t);
93 int16x4_t pmullh (int16x4_t s, int16x4_t t);
94 int64_t pmuluw (uint32x2_t s, uint32x2_t t);
95 uint8x8_t pasubub (uint8x8_t s, uint8x8_t t);
96 uint16x4_t biadd (uint8x8_t s);
97 uint16x4_t psadbh (uint8x8_t s, uint8x8_t t);
98 uint16x4_t pshufh_u (uint16x4_t dest, uint16x4_t s, uint8_t order);
99 int16x4_t pshufh_s (int16x4_t dest, int16x4_t s, uint8_t order);
100 uint16x4_t psllh_u (uint16x4_t s, uint8_t amount);
101 int16x4_t psllh_s (int16x4_t s, uint8_t amount);
102 uint32x2_t psllw_u (uint32x2_t s, uint8_t amount);
103 int32x2_t psllw_s (int32x2_t s, uint8_t amount);
104 uint16x4_t psrlh_u (uint16x4_t s, uint8_t amount);
105 int16x4_t psrlh_s (int16x4_t s, uint8_t amount);
106 uint32x2_t psrlw_u (uint32x2_t s, uint8_t amount);
107 int32x2_t psrlw_s (int32x2_t s, uint8_t amount);
108 uint16x4_t psrah_u (uint16x4_t s, uint8_t amount);
109 int16x4_t psrah_s (int16x4_t s, uint8_t amount);
110 uint32x2_t psraw_u (uint32x2_t s, uint8_t amount);
111 int32x2_t psraw_s (int32x2_t s, uint8_t amount);
112 uint32x2_t psubw_u (uint32x2_t s, uint32x2_t t);
113 uint16x4_t psubh_u (uint16x4_t s, uint16x4_t t);
114 uint8x8_t psubb_u (uint8x8_t s, uint8x8_t t);
115 int32x2_t psubw_s (int32x2_t s, int32x2_t t);
116 int16x4_t psubh_s (int16x4_t s, int16x4_t t);
117 int8x8_t psubb_s (int8x8_t s, int8x8_t t);
118 uint64_t psubd_u (uint64_t s, uint64_t t);
119 int64_t psubd_s (int64_t s, int64_t t);
120 int16x4_t psubsh (int16x4_t s, int16x4_t t);
121 int8x8_t psubsb (int8x8_t s, int8x8_t t);
122 uint16x4_t psubush (uint16x4_t s, uint16x4_t t);
123 uint8x8_t psubusb (uint8x8_t s, uint8x8_t t);
124 uint32x2_t punpckhwd_u (uint32x2_t s, uint32x2_t t);
125 uint16x4_t punpckhhw_u (uint16x4_t s, uint16x4_t t);
126 uint8x8_t punpckhbh_u (uint8x8_t s, uint8x8_t t);
127 int32x2_t punpckhwd_s (int32x2_t s, int32x2_t t);
128 int16x4_t punpckhhw_s (int16x4_t s, int16x4_t t);
129 int8x8_t punpckhbh_s (int8x8_t s, int8x8_t t);
130 uint32x2_t punpcklwd_u (uint32x2_t s, uint32x2_t t);
131 uint16x4_t punpcklhw_u (uint16x4_t s, uint16x4_t t);
132 uint8x8_t punpcklbh_u (uint8x8_t s, uint8x8_t t);
133 int32x2_t punpcklwd_s (int32x2_t s, int32x2_t t);
134 int16x4_t punpcklhw_s (int16x4_t s, int16x4_t t);
135 int8x8_t punpcklbh_s (int8x8_t s, int8x8_t t);
141 .. _paired-single-arithmetic:
143 Paired-Single Arithmetic
144 ~~~~~~~~~~~~~~~~~~~~~~~~
146 The table below lists the ``v2sf`` operations for which hardware
147 support exists. ``a``, ``b`` and ``c`` are ``v2sf``
148 values and ``x`` is an integral value.
173 - ``movn.ps`` / ``movz.ps``
175 Note that the multiply-accumulate instructions can be disabled
176 using the command-line option ``-mno-fused-madd``.
178 .. _paired-single-built-in-functions:
180 Paired-Single Built-in Functions
181 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
183 The following paired-single functions map directly to a particular
184 MIPS instruction. Please refer to the architecture specification
185 for details on what each instruction does.
187 .. function:: v2sf __builtin_mips_pll_ps (v2sf, v2sf)
189 Pair lower lower (``pll.ps``).
191 .. function:: v2sf __builtin_mips_pul_ps (v2sf, v2sf)
193 Pair upper lower (``pul.ps``).
195 .. function:: v2sf __builtin_mips_plu_ps (v2sf, v2sf)
197 Pair lower upper (``plu.ps``).
199 .. function:: v2sf __builtin_mips_puu_ps (v2sf, v2sf)
201 Pair upper upper (``puu.ps``).
203 .. function:: v2sf __builtin_mips_cvt_ps_s (float, float)
205 Convert pair to paired single (``cvt.ps.s``).
207 .. function:: float __builtin_mips_cvt_s_pl (v2sf)
209 Convert pair lower to single (``cvt.s.pl``).
211 .. function:: float __builtin_mips_cvt_s_pu (v2sf)
213 Convert pair upper to single (``cvt.s.pu``).
215 .. function:: v2sf __builtin_mips_abs_ps (v2sf)
217 Absolute value (``abs.ps``).
219 .. function:: v2sf __builtin_mips_alnv_ps (v2sf, v2sf, int)
221 Align variable (``alnv.ps``).
225 The value of the third parameter must be 0 or 4
226 modulo 8, otherwise the result is unpredictable. Please read the
227 instruction description for details.
229 The following multi-instruction functions are also available.
230 In each case, :samp:`{cond}` can be any of the 16 floating-point conditions:
231 ``f``, ``un``, ``eq``, ``ueq``, ``olt``, ``ult``,
232 ``ole``, ``ule``, ``sf``, ``ngle``, ``seq``, ``ngl``,
233 ``lt``, ``nge``, ``le`` or ``ngt``.
235 .. function:: v2sf __builtin_mips_movt_c_cond_ps (v2sf a, v2sf b, v2sf c, v2sf d)
236 .. function:: v2sf __builtin_mips_movf_c_cond_ps (v2sf a, v2sf b, v2sf c, v2sf d)
238 Conditional move based on floating-point comparison (``c.cond.ps``,
239 ``movt.ps`` / ``movf.ps``).
241 The ``movt`` functions return the value :samp:`{x}` computed by:
249 The ``movf`` functions are similar but use ``movf.ps`` instead
252 .. function:: int __builtin_mips_upper_c_cond_ps (v2sf a, v2sf b)
253 .. function:: int __builtin_mips_lower_c_cond_ps (v2sf a, v2sf b)
255 Comparison of two paired-single values (``c.cond.ps``,
256 ``bc1t`` / ``bc1f``).
258 These functions compare :samp:`{a}` and :samp:`{b}` using ``c.cond.ps``
259 and return either the upper or lower half of the result. For example:
264 if (__builtin_mips_upper_c_eq_ps (a, b))
265 upper_halves_are_equal ();
267 upper_halves_are_unequal ();
269 if (__builtin_mips_lower_c_eq_ps (a, b))
270 lower_halves_are_equal ();
272 lower_halves_are_unequal ();
274 .. _mips-3d-built-in-functions:
276 MIPS-3D Built-in Functions
277 ~~~~~~~~~~~~~~~~~~~~~~~~~~
279 The MIPS-3D Application-Specific Extension (ASE) includes additional
280 paired-single instructions that are designed to improve the performance
281 of 3D graphics operations. Support for these instructions is controlled
282 by the :option:`-mips3d` command-line option.
284 The functions listed below map directly to a particular MIPS-3D
285 instruction. Please refer to the architecture specification for
286 more details on what each instruction does.
288 .. function:: v2sf __builtin_mips_addr_ps (v2sf, v2sf)
290 Reduction add (``addr.ps``).
292 .. function:: v2sf __builtin_mips_mulr_ps (v2sf, v2sf)
294 Reduction multiply (``mulr.ps``).
296 .. function:: v2sf __builtin_mips_cvt_pw_ps (v2sf)
298 Convert paired single to paired word (``cvt.pw.ps``).
300 .. function:: v2sf __builtin_mips_cvt_ps_pw (v2sf)
302 Convert paired word to paired single (``cvt.ps.pw``).
304 .. function:: float __builtin_mips_recip1_s (float)
305 .. function:: double __builtin_mips_recip1_d (double)
306 .. function:: v2sf __builtin_mips_recip1_ps (v2sf)
308 Reduced-precision reciprocal (sequence step 1) (``recip1.fmt``).
310 .. function:: float __builtin_mips_recip2_s (float, float)
311 .. function:: double __builtin_mips_recip2_d (double, double)
312 .. function:: v2sf __builtin_mips_recip2_ps (v2sf, v2sf)
314 Reduced-precision reciprocal (sequence step 2) (``recip2.fmt``).
316 .. function:: float __builtin_mips_rsqrt1_s (float)
317 .. function:: double __builtin_mips_rsqrt1_d (double)
318 .. function:: v2sf __builtin_mips_rsqrt1_ps (v2sf)
320 Reduced-precision reciprocal square root (sequence step 1)
323 .. function:: float __builtin_mips_rsqrt2_s (float, float)
324 .. function:: double __builtin_mips_rsqrt2_d (double, double)
325 .. function:: v2sf __builtin_mips_rsqrt2_ps (v2sf, v2sf)
327 Reduced-precision reciprocal square root (sequence step 2)
330 The following multi-instruction functions are also available.
331 In each case, :samp:`{cond}` can be any of the 16 floating-point conditions:
333 ``f``, ``un``, ``eq``, ``ueq``, ``olt``, ``ult``,
334 ``ole``, ``ule``, ``sf``, ``ngle``, ``seq``,
335 ``ngl``, ``lt``, ``nge``, ``le`` or ``ngt``.
337 .. function:: int __builtin_mips_cabs_cond_s (float a, float b)
338 .. function:: int __builtin_mips_cabs_cond_d (double a, double b)
340 Absolute comparison of two scalar values (``cabs.cond.fmt``,
341 ``bc1t`` / ``bc1f``).
343 These functions compare :samp:`{a}` and :samp:`{b}` using ``cabs.cond.s``
344 or ``cabs.cond.d`` and return the result as a boolean value.
350 if (__builtin_mips_cabs_eq_s (a, b))
355 .. function:: int __builtin_mips_upper_cabs_cond_ps (v2sf a, v2sf b)
356 .. function:: int __builtin_mips_lower_cabs_cond_ps (v2sf a, v2sf b)
358 Absolute comparison of two paired-single values (``cabs.cond.ps``,
359 ``bc1t`` / ``bc1f``).
361 These functions compare :samp:`{a}` and :samp:`{b}` using ``cabs.cond.ps``
362 and return either the upper or lower half of the result. For example:
367 if (__builtin_mips_upper_cabs_eq_ps (a, b))
368 upper_halves_are_equal ();
370 upper_halves_are_unequal ();
372 if (__builtin_mips_lower_cabs_eq_ps (a, b))
373 lower_halves_are_equal ();
375 lower_halves_are_unequal ();
377 .. function:: v2sf __builtin_mips_movt_cabs_cond_ps (v2sf a, v2sf b, v2sf c, v2sf d)
378 .. function:: v2sf __builtin_mips_movf_cabs_cond_ps (v2sf a, v2sf b, v2sf c, v2sf d)
380 Conditional move based on absolute comparison (``cabs.cond.ps``,
381 ``movt.ps`` / ``movf.ps``).
383 The ``movt`` functions return the value :samp:`{x}` computed by:
391 The ``movf`` functions are similar but use ``movf.ps`` instead
394 .. function:: int __builtin_mips_any_c_cond_ps (v2sf a, v2sf b)
395 .. function:: int __builtin_mips_all_c_cond_ps (v2sf a, v2sf b)
396 .. function:: int __builtin_mips_any_cabs_cond_ps (v2sf a, v2sf b)
397 .. function:: int __builtin_mips_all_cabs_cond_ps (v2sf a, v2sf b)
399 Comparison of two paired-single values
400 (``c.cond.ps`` / ``cabs.cond.ps``,
401 ``bc1any2t`` / ``bc1any2f``).
403 These functions compare :samp:`{a}` and :samp:`{b}` using ``c.cond.ps``
404 or ``cabs.cond.ps``. The ``any`` forms return ``true`` if either
405 result is ``true`` and the ``all`` forms return ``true`` if both results are ``true``.
411 if (__builtin_mips_any_c_eq_ps (a, b))
416 if (__builtin_mips_all_c_eq_ps (a, b))
421 .. function:: int __builtin_mips_any_c_cond_4s (v2sf a, v2sf b, v2sf c, v2sf d)
422 .. function:: int __builtin_mips_all_c_cond_4s (v2sf a, v2sf b, v2sf c, v2sf d)
423 .. function:: int __builtin_mips_any_cabs_cond_4s (v2sf a, v2sf b, v2sf c, v2sf d)
424 .. function:: int __builtin_mips_all_cabs_cond_4s (v2sf a, v2sf b, v2sf c, v2sf d)
426 Comparison of four paired-single values
427 (``c.cond.ps`` / ``cabs.cond.ps``,
428 ``bc1any4t`` / ``bc1any4f``).
430 These functions use ``c.cond.ps`` or ``cabs.cond.ps``
431 to compare :samp:`{a}` with :samp:`{b}` and to compare :samp:`{c}` with :samp:`{d}`.
432 The ``any`` forms return ``true`` if any of the four results are ``true``
433 and the ``all`` forms return ``true`` if all four results are ``true``.
439 if (__builtin_mips_any_c_eq_4s (a, b, c, d))
444 if (__builtin_mips_all_c_eq_4s (a, b, c, d))