gcc/doc/gcc/extensions-to-the-c-language-family/target-builtins/mips-loongson-built-in-functions.rst

   1 ..
   2   Copyright 1988-2022 Free Software Foundation, Inc.
   3   This is part of the GCC manual.
   4   For copying conditions, see the copyright.rst file.
   5
   6 .. _mips-loongson-built-in-functions:
   7
   8 MIPS Loongson Built-in Functions
   9 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  10
  11 GCC provides intrinsics to access the SIMD instructions provided by the
  12 ST Microelectronics Loongson-2E and -2F processors.  These intrinsics,
  13 available after inclusion of the ``loongson.h`` header file,
  14 operate on the following 64-bit vector types:
  15
  16 * ``uint8x8_t``, a vector of eight unsigned 8-bit integers;
  17
  18 * ``uint16x4_t``, a vector of four unsigned 16-bit integers;
  19
  20 * ``uint32x2_t``, a vector of two unsigned 32-bit integers;
  21
  22 * ``int8x8_t``, a vector of eight signed 8-bit integers;
  23
  24 * ``int16x4_t``, a vector of four signed 16-bit integers;
  25
  26 * ``int32x2_t``, a vector of two signed 32-bit integers.
  27
  28 The intrinsics provided are listed below; each is named after the
  29 machine instruction to which it corresponds, with suffixes added as
  30 appropriate to distinguish intrinsics that expand to the same machine
  31 instruction yet have different argument types.  Refer to the architecture
  32 documentation for a description of the functionality of each
  33 instruction.
  34
  35 .. code-block:: c++
  36
  37   int16x4_t packsswh (int32x2_t s, int32x2_t t);
  38   int8x8_t packsshb (int16x4_t s, int16x4_t t);
  39   uint8x8_t packushb (uint16x4_t s, uint16x4_t t);
  40   uint32x2_t paddw_u (uint32x2_t s, uint32x2_t t);
  41   uint16x4_t paddh_u (uint16x4_t s, uint16x4_t t);
  42   uint8x8_t paddb_u (uint8x8_t s, uint8x8_t t);
  43   int32x2_t paddw_s (int32x2_t s, int32x2_t t);
  44   int16x4_t paddh_s (int16x4_t s, int16x4_t t);
  45   int8x8_t paddb_s (int8x8_t s, int8x8_t t);
  46   uint64_t paddd_u (uint64_t s, uint64_t t);
  47   int64_t paddd_s (int64_t s, int64_t t);
  48   int16x4_t paddsh (int16x4_t s, int16x4_t t);
  49   int8x8_t paddsb (int8x8_t s, int8x8_t t);
  50   uint16x4_t paddush (uint16x4_t s, uint16x4_t t);
  51   uint8x8_t paddusb (uint8x8_t s, uint8x8_t t);
  52   uint64_t pandn_ud (uint64_t s, uint64_t t);
  53   uint32x2_t pandn_uw (uint32x2_t s, uint32x2_t t);
  54   uint16x4_t pandn_uh (uint16x4_t s, uint16x4_t t);
  55   uint8x8_t pandn_ub (uint8x8_t s, uint8x8_t t);
  56   int64_t pandn_sd (int64_t s, int64_t t);
  57   int32x2_t pandn_sw (int32x2_t s, int32x2_t t);
  58   int16x4_t pandn_sh (int16x4_t s, int16x4_t t);
  59   int8x8_t pandn_sb (int8x8_t s, int8x8_t t);
  60   uint16x4_t pavgh (uint16x4_t s, uint16x4_t t);
  61   uint8x8_t pavgb (uint8x8_t s, uint8x8_t t);
  62   uint32x2_t pcmpeqw_u (uint32x2_t s, uint32x2_t t);
  63   uint16x4_t pcmpeqh_u (uint16x4_t s, uint16x4_t t);
  64   uint8x8_t pcmpeqb_u (uint8x8_t s, uint8x8_t t);
  65   int32x2_t pcmpeqw_s (int32x2_t s, int32x2_t t);
  66   int16x4_t pcmpeqh_s (int16x4_t s, int16x4_t t);
  67   int8x8_t pcmpeqb_s (int8x8_t s, int8x8_t t);
  68   uint32x2_t pcmpgtw_u (uint32x2_t s, uint32x2_t t);
  69   uint16x4_t pcmpgth_u (uint16x4_t s, uint16x4_t t);
  70   uint8x8_t pcmpgtb_u (uint8x8_t s, uint8x8_t t);
  71   int32x2_t pcmpgtw_s (int32x2_t s, int32x2_t t);
  72   int16x4_t pcmpgth_s (int16x4_t s, int16x4_t t);
  73   int8x8_t pcmpgtb_s (int8x8_t s, int8x8_t t);
  74   uint16x4_t pextrh_u (uint16x4_t s, int field);
  75   int16x4_t pextrh_s (int16x4_t s, int field);
  76   uint16x4_t pinsrh_0_u (uint16x4_t s, uint16x4_t t);
  77   uint16x4_t pinsrh_1_u (uint16x4_t s, uint16x4_t t);
  78   uint16x4_t pinsrh_2_u (uint16x4_t s, uint16x4_t t);
  79   uint16x4_t pinsrh_3_u (uint16x4_t s, uint16x4_t t);
  80   int16x4_t pinsrh_0_s (int16x4_t s, int16x4_t t);
  81   int16x4_t pinsrh_1_s (int16x4_t s, int16x4_t t);
  82   int16x4_t pinsrh_2_s (int16x4_t s, int16x4_t t);
  83   int16x4_t pinsrh_3_s (int16x4_t s, int16x4_t t);
  84   int32x2_t pmaddhw (int16x4_t s, int16x4_t t);
  85   int16x4_t pmaxsh (int16x4_t s, int16x4_t t);
  86   uint8x8_t pmaxub (uint8x8_t s, uint8x8_t t);
  87   int16x4_t pminsh (int16x4_t s, int16x4_t t);
  88   uint8x8_t pminub (uint8x8_t s, uint8x8_t t);
  89   uint8x8_t pmovmskb_u (uint8x8_t s);
  90   int8x8_t pmovmskb_s (int8x8_t s);
  91   uint16x4_t pmulhuh (uint16x4_t s, uint16x4_t t);
  92   int16x4_t pmulhh (int16x4_t s, int16x4_t t);
  93   int16x4_t pmullh (int16x4_t s, int16x4_t t);
  94   int64_t pmuluw (uint32x2_t s, uint32x2_t t);
  95   uint8x8_t pasubub (uint8x8_t s, uint8x8_t t);
  96   uint16x4_t biadd (uint8x8_t s);
  97   uint16x4_t psadbh (uint8x8_t s, uint8x8_t t);
  98   uint16x4_t pshufh_u (uint16x4_t dest, uint16x4_t s, uint8_t order);
  99   int16x4_t pshufh_s (int16x4_t dest, int16x4_t s, uint8_t order);
 100   uint16x4_t psllh_u (uint16x4_t s, uint8_t amount);
 101   int16x4_t psllh_s (int16x4_t s, uint8_t amount);
 102   uint32x2_t psllw_u (uint32x2_t s, uint8_t amount);
 103   int32x2_t psllw_s (int32x2_t s, uint8_t amount);
 104   uint16x4_t psrlh_u (uint16x4_t s, uint8_t amount);
 105   int16x4_t psrlh_s (int16x4_t s, uint8_t amount);
 106   uint32x2_t psrlw_u (uint32x2_t s, uint8_t amount);
 107   int32x2_t psrlw_s (int32x2_t s, uint8_t amount);
 108   uint16x4_t psrah_u (uint16x4_t s, uint8_t amount);
 109   int16x4_t psrah_s (int16x4_t s, uint8_t amount);
 110   uint32x2_t psraw_u (uint32x2_t s, uint8_t amount);
 111   int32x2_t psraw_s (int32x2_t s, uint8_t amount);
 112   uint32x2_t psubw_u (uint32x2_t s, uint32x2_t t);
 113   uint16x4_t psubh_u (uint16x4_t s, uint16x4_t t);
 114   uint8x8_t psubb_u (uint8x8_t s, uint8x8_t t);
 115   int32x2_t psubw_s (int32x2_t s, int32x2_t t);
 116   int16x4_t psubh_s (int16x4_t s, int16x4_t t);
 117   int8x8_t psubb_s (int8x8_t s, int8x8_t t);
 118   uint64_t psubd_u (uint64_t s, uint64_t t);
 119   int64_t psubd_s (int64_t s, int64_t t);
 120   int16x4_t psubsh (int16x4_t s, int16x4_t t);
 121   int8x8_t psubsb (int8x8_t s, int8x8_t t);
 122   uint16x4_t psubush (uint16x4_t s, uint16x4_t t);
 123   uint8x8_t psubusb (uint8x8_t s, uint8x8_t t);
 124   uint32x2_t punpckhwd_u (uint32x2_t s, uint32x2_t t);
 125   uint16x4_t punpckhhw_u (uint16x4_t s, uint16x4_t t);
 126   uint8x8_t punpckhbh_u (uint8x8_t s, uint8x8_t t);
 127   int32x2_t punpckhwd_s (int32x2_t s, int32x2_t t);
 128   int16x4_t punpckhhw_s (int16x4_t s, int16x4_t t);
 129   int8x8_t punpckhbh_s (int8x8_t s, int8x8_t t);
 130   uint32x2_t punpcklwd_u (uint32x2_t s, uint32x2_t t);
 131   uint16x4_t punpcklhw_u (uint16x4_t s, uint16x4_t t);
 132   uint8x8_t punpcklbh_u (uint8x8_t s, uint8x8_t t);
 133   int32x2_t punpcklwd_s (int32x2_t s, int32x2_t t);
 134   int16x4_t punpcklhw_s (int16x4_t s, int16x4_t t);
 135   int8x8_t punpcklbh_s (int8x8_t s, int8x8_t t);
 136
 137 .. toctree::
 138   :maxdepth: 2
 139
 140
 141 .. _paired-single-arithmetic:
 142
 143 Paired-Single Arithmetic
 144 ~~~~~~~~~~~~~~~~~~~~~~~~
 145
 146 The table below lists the ``v2sf`` operations for which hardware
 147 support exists.  ``a``, ``b`` and ``c`` are ``v2sf``
 148 values and ``x`` is an integral value.
 149
 150 .. list-table::
 151    :header-rows: 1
 152
 153    * - C code
 154      - MIPS instruction
 155
 156    * - ``a + b``
 157      - ``add.ps``
 158    * - ``a - b``
 159      - ``sub.ps``
 160    * - ``-a``
 161      - ``neg.ps``
 162    * - ``a * b``
 163      - ``mul.ps``
 164    * - ``a * b + c``
 165      - ``madd.ps``
 166    * - ``a * b - c``
 167      - ``msub.ps``
 168    * - ``-(a * b + c)``
 169      - ``nmadd.ps``
 170    * - ``-(a * b - c)``
 171      - ``nmsub.ps``
 172    * - ``x ? a : b``
 173      - ``movn.ps`` / ``movz.ps``
 174
 175 Note that the multiply-accumulate instructions can be disabled
 176 using the command-line option ``-mno-fused-madd``.
 177
 178 .. _paired-single-built-in-functions:
 179
 180 Paired-Single Built-in Functions
 181 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 182
 183 The following paired-single functions map directly to a particular
 184 MIPS instruction.  Please refer to the architecture specification
 185 for details on what each instruction does.
 186
 187 .. function:: v2sf __builtin_mips_pll_ps (v2sf, v2sf)
 188
 189   Pair lower lower (``pll.ps``).
 190
 191 .. function:: v2sf __builtin_mips_pul_ps (v2sf, v2sf)
 192
 193   Pair upper lower (``pul.ps``).
 194
 195 .. function:: v2sf __builtin_mips_plu_ps (v2sf, v2sf)
 196
 197   Pair lower upper (``plu.ps``).
 198
 199 .. function:: v2sf __builtin_mips_puu_ps (v2sf, v2sf)
 200
 201   Pair upper upper (``puu.ps``).
 202
 203 .. function:: v2sf __builtin_mips_cvt_ps_s (float, float)
 204
 205   Convert pair to paired single (``cvt.ps.s``).
 206
 207 .. function:: float __builtin_mips_cvt_s_pl (v2sf)
 208
 209   Convert pair lower to single (``cvt.s.pl``).
 210
 211 .. function:: float __builtin_mips_cvt_s_pu (v2sf)
 212
 213   Convert pair upper to single (``cvt.s.pu``).
 214
 215 .. function:: v2sf __builtin_mips_abs_ps (v2sf)
 216
 217   Absolute value (``abs.ps``).
 218
 219 .. function:: v2sf __builtin_mips_alnv_ps (v2sf, v2sf, int)
 220
 221   Align variable (``alnv.ps``).
 222
 223   .. note::
 224
 225     The value of the third parameter must be 0 or 4
 226     modulo 8, otherwise the result is unpredictable.  Please read the
 227     instruction description for details.
 228
 229 The following multi-instruction functions are also available.
 230 In each case, :samp:`{cond}` can be any of the 16 floating-point conditions:
 231 ``f``, ``un``, ``eq``, ``ueq``, ``olt``, ``ult``,
 232 ``ole``, ``ule``, ``sf``, ``ngle``, ``seq``, ``ngl``,
 233 ``lt``, ``nge``, ``le`` or ``ngt``.
 234
 235 .. function:: v2sf __builtin_mips_movt_c_cond_ps (v2sf a, v2sf b, v2sf c, v2sf d)
 236 .. function:: v2sf __builtin_mips_movf_c_cond_ps (v2sf a, v2sf b, v2sf c, v2sf d)
 237
 238   Conditional move based on floating-point comparison (``c.cond.ps``,
 239   ``movt.ps`` / ``movf.ps``).
 240
 241   The ``movt`` functions return the value :samp:`{x}` computed by:
 242
 243   .. code-block:: c++
 244
 245     c.cond.ps cc,a,b
 246     mov.ps x,c
 247     movt.ps x,d,cc
 248
 249   The ``movf`` functions are similar but use ``movf.ps`` instead
 250   of ``movt.ps``.
 251
 252 .. function:: int __builtin_mips_upper_c_cond_ps (v2sf a, v2sf b)
 253 .. function:: int __builtin_mips_lower_c_cond_ps (v2sf a, v2sf b)
 254
 255   Comparison of two paired-single values (``c.cond.ps``,
 256   ``bc1t`` / ``bc1f``).
 257
 258   These functions compare :samp:`{a}` and :samp:`{b}` using ``c.cond.ps``
 259   and return either the upper or lower half of the result.  For example:
 260
 261   .. code-block:: c++
 262
 263     v2sf a, b;
 264     if (__builtin_mips_upper_c_eq_ps (a, b))
 265       upper_halves_are_equal ();
 266     else
 267       upper_halves_are_unequal ();
 268
 269     if (__builtin_mips_lower_c_eq_ps (a, b))
 270       lower_halves_are_equal ();
 271     else
 272       lower_halves_are_unequal ();
 273
 274 .. _mips-3d-built-in-functions:
 275
 276 MIPS-3D Built-in Functions
 277 ~~~~~~~~~~~~~~~~~~~~~~~~~~
 278
 279 The MIPS-3D Application-Specific Extension (ASE) includes additional
 280 paired-single instructions that are designed to improve the performance
 281 of 3D graphics operations.  Support for these instructions is controlled
 282 by the :option:`-mips3d` command-line option.
 283
 284 The functions listed below map directly to a particular MIPS-3D
 285 instruction.  Please refer to the architecture specification for
 286 more details on what each instruction does.
 287
 288 .. function:: v2sf __builtin_mips_addr_ps (v2sf, v2sf)
 289
 290   Reduction add (``addr.ps``).
 291
 292 .. function:: v2sf __builtin_mips_mulr_ps (v2sf, v2sf)
 293
 294   Reduction multiply (``mulr.ps``).
 295
 296 .. function:: v2sf __builtin_mips_cvt_pw_ps (v2sf)
 297
 298   Convert paired single to paired word (``cvt.pw.ps``).
 299
 300 .. function:: v2sf __builtin_mips_cvt_ps_pw (v2sf)
 301
 302   Convert paired word to paired single (``cvt.ps.pw``).
 303
 304 .. function:: float __builtin_mips_recip1_s (float)
 305 .. function:: double __builtin_mips_recip1_d (double)
 306 .. function:: v2sf __builtin_mips_recip1_ps (v2sf)
 307
 308   Reduced-precision reciprocal (sequence step 1) (``recip1.fmt``).
 309
 310 .. function:: float __builtin_mips_recip2_s (float, float)
 311 .. function:: double __builtin_mips_recip2_d (double, double)
 312 .. function:: v2sf __builtin_mips_recip2_ps (v2sf, v2sf)
 313
 314   Reduced-precision reciprocal (sequence step 2) (``recip2.fmt``).
 315
 316 .. function:: float __builtin_mips_rsqrt1_s (float)
 317 .. function:: double __builtin_mips_rsqrt1_d (double)
 318 .. function:: v2sf __builtin_mips_rsqrt1_ps (v2sf)
 319
 320   Reduced-precision reciprocal square root (sequence step 1)
 321   (``rsqrt1.fmt``).
 322
 323 .. function:: float __builtin_mips_rsqrt2_s (float, float)
 324 .. function:: double __builtin_mips_rsqrt2_d (double, double)
 325 .. function:: v2sf __builtin_mips_rsqrt2_ps (v2sf, v2sf)
 326
 327   Reduced-precision reciprocal square root (sequence step 2)
 328   (``rsqrt2.fmt``).
 329
 330 The following multi-instruction functions are also available.
 331 In each case, :samp:`{cond}` can be any of the 16 floating-point conditions:
 332
 333 ``f``, ``un``, ``eq``, ``ueq``, ``olt``, ``ult``,
 334 ``ole``, ``ule``, ``sf``, ``ngle``, ``seq``,
 335 ``ngl``, ``lt``, ``nge``, ``le`` or ``ngt``.
 336
 337 .. function:: int __builtin_mips_cabs_cond_s (float a, float b)
 338 .. function:: int __builtin_mips_cabs_cond_d (double a, double b)
 339
 340   Absolute comparison of two scalar values (``cabs.cond.fmt``,
 341   ``bc1t`` / ``bc1f``).
 342
 343   These functions compare :samp:`{a}` and :samp:`{b}` using ``cabs.cond.s``
 344   or ``cabs.cond.d`` and return the result as a boolean value.
 345   For example:
 346
 347   .. code-block:: c++
 348
 349     float a, b;
 350     if (__builtin_mips_cabs_eq_s (a, b))
 351       true ();
 352     else
 353       false ();
 354
 355 .. function:: int __builtin_mips_upper_cabs_cond_ps (v2sf a, v2sf b)
 356 .. function:: int __builtin_mips_lower_cabs_cond_ps (v2sf a, v2sf b)
 357
 358   Absolute comparison of two paired-single values (``cabs.cond.ps``,
 359   ``bc1t`` / ``bc1f``).
 360
 361   These functions compare :samp:`{a}` and :samp:`{b}` using ``cabs.cond.ps``
 362   and return either the upper or lower half of the result.  For example:
 363
 364   .. code-block:: c++
 365
 366     v2sf a, b;
 367     if (__builtin_mips_upper_cabs_eq_ps (a, b))
 368       upper_halves_are_equal ();
 369     else
 370       upper_halves_are_unequal ();
 371
 372     if (__builtin_mips_lower_cabs_eq_ps (a, b))
 373       lower_halves_are_equal ();
 374     else
 375       lower_halves_are_unequal ();
 376
 377 .. function:: v2sf __builtin_mips_movt_cabs_cond_ps (v2sf a, v2sf b, v2sf c, v2sf d)
 378 .. function:: v2sf __builtin_mips_movf_cabs_cond_ps (v2sf a, v2sf b, v2sf c, v2sf d)
 379
 380   Conditional move based on absolute comparison (``cabs.cond.ps``,
 381   ``movt.ps`` / ``movf.ps``).
 382
 383   The ``movt`` functions return the value :samp:`{x}` computed by:
 384
 385   .. code-block:: c++
 386
 387     cabs.cond.ps cc,a,b
 388     mov.ps x,c
 389     movt.ps x,d,cc
 390
 391   The ``movf`` functions are similar but use ``movf.ps`` instead
 392   of ``movt.ps``.
 393
 394 .. function:: int __builtin_mips_any_c_cond_ps (v2sf a, v2sf b)
 395 .. function:: int __builtin_mips_all_c_cond_ps (v2sf a, v2sf b)
 396 .. function:: int __builtin_mips_any_cabs_cond_ps (v2sf a, v2sf b)
 397 .. function:: int __builtin_mips_all_cabs_cond_ps (v2sf a, v2sf b)
 398
 399   Comparison of two paired-single values
 400   (``c.cond.ps`` / ``cabs.cond.ps``,
 401   ``bc1any2t`` / ``bc1any2f``).
 402
 403   These functions compare :samp:`{a}` and :samp:`{b}` using ``c.cond.ps``
 404   or ``cabs.cond.ps``.  The ``any`` forms return ``true`` if either
 405   result is ``true`` and the ``all`` forms return ``true`` if both results are ``true``.
 406   For example:
 407
 408   .. code-block:: c++
 409
 410     v2sf a, b;
 411     if (__builtin_mips_any_c_eq_ps (a, b))
 412       one_is_true ();
 413     else
 414       both_are_false ();
 415
 416     if (__builtin_mips_all_c_eq_ps (a, b))
 417       both_are_true ();
 418     else
 419       one_is_false ();
 420
 421 .. function:: int __builtin_mips_any_c_cond_4s (v2sf a, v2sf b, v2sf c, v2sf d)
 422 .. function:: int __builtin_mips_all_c_cond_4s (v2sf a, v2sf b, v2sf c, v2sf d)
 423 .. function:: int __builtin_mips_any_cabs_cond_4s (v2sf a, v2sf b, v2sf c, v2sf d)
 424 .. function:: int __builtin_mips_all_cabs_cond_4s (v2sf a, v2sf b, v2sf c, v2sf d)
 425
 426   Comparison of four paired-single values
 427   (``c.cond.ps`` / ``cabs.cond.ps``,
 428   ``bc1any4t`` / ``bc1any4f``).
 429
 430   These functions use ``c.cond.ps`` or ``cabs.cond.ps``
 431   to compare :samp:`{a}` with :samp:`{b}` and to compare :samp:`{c}` with :samp:`{d}`.
 432   The ``any`` forms return ``true`` if any of the four results are ``true``
 433   and the ``all`` forms return ``true`` if all four results are ``true``.
 434   For example:
 435
 436   .. code-block:: c++
 437
 438     v2sf a, b, c, d;
 439     if (__builtin_mips_any_c_eq_4s (a, b, c, d))
 440       some_are_true ();
 441     else
 442       all_are_false ();
 443
 444     if (__builtin_mips_all_c_eq_4s (a, b, c, d))
 445       all_are_true ();
 446     else
 447       some_are_false ();