configure.ac: Add Visium support.

[thirdparty/gcc.git] / gcc / doc / extend.texi
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi

index 721c9b10daf5523fd3a187985e5e3302b4184446..25226824043dd404ec030b335880a59268991cf1 100644 (file)
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -1,4 +1,4 @@
-@c Copyright (C) 1988-2013 Free Software Foundation, Inc.
+@c Copyright (C) 1988-2015 Free Software Foundation, Inc.
  
  @c This is part of the GCC manual.
  @c For copying conditions, see the file gcc.texi.
@@ -30,7 +30,7 @@ extensions, accepted by GCC in C90 mode and in C++.
  * Constructing Calls::  Dispatching a call to another function.
  * Typeof::              @code{typeof}: referring to the type of an expression.
  * Conditionals::        Omitting the middle operand of a @samp{?:} expression.
-* __int128::                   128-bit integers---@code{__int128}.
+* __int128::           128-bit integers---@code{__int128}.
  * Long Long::           Double-word integers---@code{long long int}.
  * Complex::             Data types for complex numbers.
  * Floating Types::      Additional Floating Types.
@@ -46,6 +46,7 @@ extensions, accepted by GCC in C90 mode and in C++.
  * Escaped Newlines::    Slightly looser rules for escaped newlines.
  * Subscripting::        Any array can be subscripted, even if not an lvalue.
  * Pointer Arith::       Arithmetic on @code{void}-pointers and function pointers.
+* Pointers to Arrays::  Pointers to arrays with qualifiers work as expected.
  * Initializers::        Non-constant initializers.
  * Compound Literals::   Compound literals give structures, unions
                          or arrays as values.
@@ -55,6 +56,7 @@ extensions, accepted by GCC in C90 mode and in C++.
  * Mixed Declarations::  Mixing declarations and code.
  * Function Attributes:: Declaring that functions have no side effects,
                          or that they can never return.
+* Label Attributes::    Specifying attributes on labels.
  * Attribute Syntax::    Formal syntax for attributes.
  * Function Prototypes:: Prototype declarations and old-style definitions.
  * C++ Comments::        C++ comments are recognized.
@@ -65,11 +67,7 @@ extensions, accepted by GCC in C90 mode and in C++.
  * Alignment::           Inquiring about the alignment of a type or variable.
  * Inline::              Defining inline functions (as fast as macros).
  * Volatiles::           What constitutes an access to a volatile object.
-* Extended Asm::        Assembler instructions with C expressions as operands.
-                        (With them you can define ``built-in'' functions.)
-* Constraints::         Constraints for asm operands
-* Asm Labels::          Specifying the assembler name to use for a C symbol.
-* Explicit Reg Vars::   Defining variables residing in specified registers.
+* Using Assembly Language with C:: Instructions and extensions for interfacing C with assembler.
  * Alternate Keywords::  @code{__const__}, @code{__asm__}, etc., for header files.
  * Incomplete Enums::    @code{enum foo;}, with details to follow.
  * Function Names::      Printable strings which are the name of the current
@@ -79,9 +77,12 @@ extensions, accepted by GCC in C90 mode and in C++.
  * Offsetof::            Special syntax for implementing @code{offsetof}.
  * __sync Builtins::     Legacy built-in functions for atomic memory access.
  * __atomic Builtins::   Atomic built-in functions with memory model.
+* Integer Overflow Builtins:: Built-in functions to perform arithmetics and
+                        arithmetic overflow checking.
  * x86 specific memory model extensions for transactional memory:: x86 memory models.
  * Object Size Checking:: Built-in functions for limited buffer overflow
                          checking.
+* Pointer Bounds Checker builtins:: Built-in functions for Pointer Bounds Checker.
  * Cilk Plus Builtins::  Built-in functions for the Cilk Plus language extension.
  * Other Builtins::      Other built-in functions.
  * Target Builtins::     Built-in functions specific to particular targets.
@@ -152,7 +153,7 @@ the value of an enumeration constant, the width of a bit-field, or
  the initial value of a static variable.
  
  If you don't know the type of the operand, you can still do this, but you
-must use @code{typeof} (@pxref{Typeof}).
+must use @code{typeof} or @code{__auto_type} (@pxref{Typeof}).
  
  In G++, the result value of a statement expression undergoes array and
  function pointer decay, and is returned by value to the enclosing
@@ -376,6 +377,8 @@ goto *(&&foo + array[i]);
  This is more friendly to code living in shared libraries, as it reduces
  the number of dynamic relocations that are needed, and by consequence,
  allows the data to be read-only.
+This alternative with label differences is not supported for the AVR target,
+please use the first approach for AVR programs.
  
  The @code{&&foo} expressions for the same label might have different
  values if the containing function is inlined or cloned.  If a program
@@ -754,6 +757,35 @@ Thus, @code{array (pointer (char), 4)} is the type of arrays of 4
  pointers to @code{char}.
  @end itemize
  
+In GNU C, but not GNU C++, you may also declare the type of a variable
+as @code{__auto_type}.  In that case, the declaration must declare
+only one variable, whose declarator must just be an identifier, the
+declaration must be initialized, and the type of the variable is
+determined by the initializer; the name of the variable is not in
+scope until after the initializer.  (In C++, you should use C++11
+@code{auto} for this purpose.)  Using @code{__auto_type}, the
+``maximum'' macro above could be written as:
+
+@smallexample
+#define max(a,b) \
+  (@{ __auto_type _a = (a); \
+      __auto_type _b = (b); \
+    _a > _b ? _a : _b; @})
+@end smallexample
+
+Using @code{__auto_type} instead of @code{typeof} has two advantages:
+
+@itemize @bullet
+@item Each argument to the macro appears only once in the expansion of
+the macro.  This prevents the size of the macro expansion growing
+exponentially when calls to such macros are nested inside arguments of
+such macros.
+
+@item If the argument to the macro has variably modified type, it is
+evaluated only once when using @code{__auto_type}, but twice if
+@code{typeof} is used.
+@end itemize
+
  @emph{Compatibility Note:} In addition to @code{typeof}, GCC 2 supported
  a more limited extension that permitted one to write
  
@@ -1512,7 +1544,7 @@ struct bar @{ struct foo z; @};
  struct foo a = @{ 1, @{ 2, 3, 4 @} @};        // @r{Valid.}
  struct bar b = @{ @{ 1, @{ 2, 3, 4 @} @} @};    // @r{Invalid.}
  struct bar c = @{ @{ 1, @{ @} @} @};            // @r{Valid.}
-struct foo d[1] = @{ @{ 1 @{ 2, 3, 4 @} @} @};  // @r{Invalid.}
+struct foo d[1] = @{ @{ 1, @{ 2, 3, 4 @} @} @};  // @r{Invalid.}
  @end smallexample
  
  @node Empty Structures
@@ -1563,6 +1595,18 @@ Jumping or breaking out of the scope of the array name deallocates the
  storage.  Jumping into the scope is not allowed; you get an error
  message for it.
  
+@cindex variable-length array in a structure
+As an extension, GCC accepts variable-length arrays as a member of
+a structure or a union.  For example:
+
+@smallexample
+void
+foo (int n)
+@{
+  struct S @{ int x[n]; @};
+@}
+@end smallexample
+
  @cindex @code{alloca} vs variable-length arrays
  You can use the function @code{alloca} to get an effect much like
  variable-length arrays.  The function @code{alloca} is available in
@@ -1741,6 +1785,27 @@ and on function types, and returns 1.
  The option @option{-Wpointer-arith} requests a warning if these extensions
  are used.
  
+@node Pointers to Arrays
+@section Pointers to arrays with qualifiers work as expected
+@cindex pointers to arrays
+@cindex const qualifier
+
+In GNU C, pointers to arrays with qualifiers work similar to pointers
+to other qualified types. For example, a value of type @code{int (*)[5]}
+can be used to initialize a variable of type @code{const int (*)[5]}.
+These types are incompatible in ISO C because the @code{const} qualifier
+is formally attached to the element type of the array and not the
+array itself.
+
+@smallexample
+extern void
+transpose (int N, int M, double out[M][N], const double in[N][M]);
+double x[3][2];
+double y[2][3];
+@r{@dots{}}
+transpose(3, 2, y, x);
+@end smallexample
+
  @node Initializers
  @section Non-Constant Initializers
  @cindex initializers, non-constant
@@ -1938,6 +2003,9 @@ Another syntax that has the same meaning, obsolete since GCC 2.5, is
  struct point p = @{ y: yvalue, x: xvalue @};
  @end smallexample
  
+Omitted field members are implicitly initialized the same as objects
+that have static storage duration.
+
  @cindex designators
  The @samp{[@var{index}]} or @samp{.@var{fieldname}} is known as a
  @dfn{designator}.  You can also use a designator (or the obsolete colon
@@ -2125,21 +2193,25 @@ The keyword @code{__attribute__} allows you to specify special
  attributes when making a declaration.  This keyword is followed by an
  attribute specification inside double parentheses.  The following
  attributes are currently defined for functions on all targets:
-@code{aligned}, @code{alloc_size}, @code{noreturn},
-@code{returns_twice}, @code{noinline}, @code{noclone},
+@code{aligned}, @code{alloc_size}, @code{alloc_align}, @code{assume_aligned},
+@code{noreturn}, @code{returns_twice}, @code{noinline}, @code{noclone},
  @code{always_inline}, @code{flatten}, @code{pure}, @code{const},
  @code{nothrow}, @code{sentinel}, @code{format}, @code{format_arg},
  @code{no_instrument_function}, @code{no_split_stack},
  @code{section}, @code{constructor},
  @code{destructor}, @code{used}, @code{unused}, @code{deprecated},
  @code{weak}, @code{malloc}, @code{alias}, @code{ifunc},
-@code{warn_unused_result}, @code{nonnull}, @code{gnu_inline},
+@code{warn_unused_result}, @code{nonnull},
+@code{returns_nonnull}, @code{gnu_inline},
  @code{externally_visible}, @code{hot}, @code{cold}, @code{artificial},
  @code{no_sanitize_address}, @code{no_address_safety_analysis},
+@code{no_sanitize_undefined}, @code{no_reorder}, @code{bnd_legacy},
+@code{bnd_instrument},
  @code{error} and @code{warning}.
  Several other attributes are defined for functions on particular
  target systems.  Other attributes, including @code{section} are
-supported for variables declarations (@pxref{Variable Attributes})
+supported for variables declarations (@pxref{Variable Attributes}),
+labels (@pxref{Label Attributes})
  and for types (@pxref{Type Attributes}).
  
  GCC plugins may provide their own attributes.
@@ -2210,7 +2282,7 @@ For instance,
  
  @smallexample
  void* my_calloc(size_t, size_t) __attribute__((alloc_size(1,2)))
-void my_realloc(void*, size_t) __attribute__((alloc_size(2)))
+void* my_realloc(void*, size_t) __attribute__((alloc_size(2)))
  @end smallexample
  
  @noindent
@@ -2218,11 +2290,55 @@ declares that @code{my_calloc} returns memory of the size given by
  the product of parameter 1 and 2 and that @code{my_realloc} returns memory
  of the size given by parameter 2.
  
+@item alloc_align
+@cindex @code{alloc_align} attribute
+The @code{alloc_align} attribute is used to tell the compiler that the
+function return value points to memory, where the returned pointer minimum
+alignment is given by one of the functions parameters.  GCC uses this
+information to improve pointer alignment analysis.
+
+The function parameter denoting the allocated alignment is specified by
+one integer argument, whose number is the argument of the attribute.
+Argument numbering starts at one.
+
+For instance,
+
+@smallexample
+void* my_memalign(size_t, size_t) __attribute__((alloc_align(1)))
+@end smallexample
+
+@noindent
+declares that @code{my_memalign} returns memory with minimum alignment
+given by parameter 1.
+
+@item assume_aligned
+@cindex @code{assume_aligned} attribute
+The @code{assume_aligned} attribute is used to tell the compiler that the
+function return value points to memory, where the returned pointer minimum
+alignment is given by the first argument.
+If the attribute has two arguments, the second argument is misalignment offset.
+
+For instance
+
+@smallexample
+void* my_alloc1(size_t) __attribute__((assume_aligned(16)))
+void* my_alloc2(size_t) __attribute__((assume_aligned(32, 8)))
+@end smallexample
+
+@noindent
+declares that @code{my_alloc1} returns 16-byte aligned pointer and
+that @code{my_alloc2} returns a pointer whose value modulo 32 is equal
+to 8.
+
  @item always_inline
  @cindex @code{always_inline} function attribute
  Generally, functions are not inlined unless optimization is specified.
-For functions declared inline, this attribute inlines the function even
-if no optimization level is specified.
+For functions declared inline, this attribute inlines the function
+independent of any restrictions that otherwise apply to inlining.
+Failure to inline such a function is diagnosed as an error.
+Note that if such a function is called indirectly the compiler may
+or may not inline it depending on optimization level and a failure
+to inline an indirect call may or may not be diagnosed.
  
  @item gnu_inline
  @cindex @code{gnu_inline} function attribute
@@ -2502,6 +2618,12 @@ on data in the eight-bit data area.  Note the eight-bit data area is limited to
  You must use GAS and GLD from GNU binutils version 2.7 or later for
  this attribute to work correctly.
  
+@item exception
+@cindex exception handler functions
+Use this attribute on the NDS32 target to indicate that the specified function
+is an exception handler.  The compiler will generate corresponding sections
+for use in an exception handler.
+
  @item exception_handler
  @cindex exception handler functions on the Blackfin processor
  Use this attribute on the Blackfin to indicate that the specified function
@@ -2812,16 +2934,26 @@ least version 2.20.1), and GNU C library (at least version 2.11.1).
  
  @item interrupt
  @cindex interrupt handler functions
-Use this attribute on the ARM, AVR, CR16, Epiphany, M32C, M32R/D, m68k, MeP, MIPS,
-RL78, RX and Xstormy16 ports to indicate that the specified function is an
-interrupt handler.  The compiler generates function entry and exit
-sequences suitable for use in an interrupt handler when this attribute
-is present.  With Epiphany targets it may also generate a special section with
-code to initialize the interrupt vector table.
+Use this attribute on the ARC, ARM, AVR, CR16, Epiphany, M32C, M32R/D,
+m68k, MeP, MIPS, MSP430, RL78, RX, Visium and Xstormy16 ports to indicate
+that the specified function is an interrupt handler.  The compiler generates
+function entry and exit sequences suitable for use in an interrupt handler
+when this attribute is present.  With Epiphany targets it may also generate
+a special section with code to initialize the interrupt vector table.
  
  Note, interrupt handlers for the Blackfin, H8/300, H8/300H, H8S, MicroBlaze,
  and SH processors can be specified via the @code{interrupt_handler} attribute.
  
+Note, on the ARC, you must specify the kind of interrupt to be handled
+in a parameter to the interrupt attribute like this:
+
+@smallexample
+void f () __attribute__ ((interrupt ("ilink1")));
+@end smallexample
+
+Permissible values for this parameter are: @w{@code{ilink1}} and
+@w{@code{ilink2}}.
+
  Note, on the AVR, the hardware globally disables interrupts when an
  interrupt is executed.  The first instruction of an interrupt handler
  declared with this attribute is a @code{SEI} instruction to
@@ -2844,6 +2976,42 @@ Permissible values for this parameter are: @code{IRQ}, @code{FIQ},
  On ARMv7-M the interrupt type is ignored, and the attribute means the function
  may be called with a word-aligned stack pointer.
  
+Note, for the MSP430 you can provide an argument to the interrupt
+attribute which specifies a name or number.  If the argument is a
+number it indicates the slot in the interrupt vector table (0 - 31) to
+which this handler should be assigned.  If the argument is a name it
+is treated as a symbolic name for the vector slot.  These names should
+match up with appropriate entries in the linker script.  By default
+the names @code{watchdog} for vector 26, @code{nmi} for vector 30 and
+@code{reset} for vector 31 are recognised.
+
+You can also use the following function attributes to modify how
+normal functions interact with interrupt functions:
+
+@table @code
+@item critical
+@cindex @code{critical} attribute
+Critical functions disable interrupts upon entry and restore the
+previous interrupt state upon exit.  Critical functions cannot also
+have the @code{naked} or @code{reentrant} attributes.  They can have
+the @code{interrupt} attribute.
+
+@item reentrant
+@cindex @code{reentrant} attribute
+Reentrant functions disable interrupts upon entry and enable them
+upon exit.  Reentrant functions cannot also have the @code{naked}
+or @code{critical} attributes.  They can have the @code{interrupt}
+attribute.
+
+@item wakeup
+@cindex @code{wakeup} attribute
+This attribute only applies to interrupt functions.  It is silently
+ignored if applied to a non-interrupt function.  A wakeup interrupt
+function will rouse the processor from any low-power state that it
+might be in when the function exits.
+
+@end table
+
  On Epiphany targets one or more optional parameters can be added like this:
  
  @smallexample
@@ -2922,10 +3090,58 @@ void __attribute__ ((interrupt, use_shadow_register_set,
                       use_debug_exception_return)) v7 ();
  @end smallexample
  
+On NDS32 target, this attribute is to indicate that the specified function
+is an interrupt handler.  The compiler will generate corresponding sections
+for use in an interrupt handler.  You can use the following attributes
+to modify the behavior:
+@table @code
+@item nested
+@cindex @code{nested} attribute
+This interrupt service routine is interruptible.
+@item not_nested
+@cindex @code{not_nested} attribute
+This interrupt service routine is not interruptible.
+@item nested_ready
+@cindex @code{nested_ready} attribute
+This interrupt service routine is interruptible after @code{PSW.GIE}
+(global interrupt enable) is set.  This allows interrupt service routine to
+finish some short critical code before enabling interrupts.
+@item save_all
+@cindex @code{save_all} attribute
+The system will help save all registers into stack before entering
+interrupt handler.
+@item partial_save
+@cindex @code{partial_save} attribute
+The system will help save caller registers into stack before entering
+interrupt handler.
+@end table
+
  On RL78, use @code{brk_interrupt} instead of @code{interrupt} for
  handlers intended to be used with the @code{BRK} opcode (i.e.@: those
  that must end with @code{RETB} instead of @code{RETI}).
  
+On RX targets, you may specify one or more vector numbers as arguments
+to the attribute, as well as naming an alternate table name.
+Parameters are handled sequentially, so one handler can be assigned to
+multiple entries in multiple tables.  One may also pass the magic
+string @code{"$default"} which causes the function to be used for any
+unfilled slots in the current table.
+
+This example shows a simple assignment of a function to one vector in
+the default table (note that preprocessor macros may be used for
+chip-specific symbolic vector names):
+@smallexample
+void __attribute__ ((interrupt (5))) txd1_handler ();
+@end smallexample
+
+This example assigns a function to two slots in the default table
+(using preprocessor macros defined elsewhere) and makes it the default
+for the @code{dct} table:
+@smallexample
+void __attribute__ ((interrupt (RXD1_VECT,RXD2_VECT,"dct","$default")))
+       txd1_handler ();
+@end smallexample
+
  @item interrupt_handler
  @cindex interrupt handler functions on the Blackfin, m68k, H8/300 and SH processors
  Use this attribute on the Blackfin, m68k, H8/300, H8/300H, H8S, and SH to
@@ -2991,18 +3207,33 @@ unit.  This is to allow easy merging of multiple compilation units into one,
  for example, by using the link-time optimization.  For this reason the
  attribute is not allowed on types to annotate indirect calls.
  
-@item long_call/short_call
+@item long_call/medium_call/short_call
+@cindex indirect calls on ARC
  @cindex indirect calls on ARM
-This attribute specifies how a particular function is called on
-ARM and Epiphany.  Both attributes override the
-@option{-mlong-calls} (@pxref{ARM Options})
-command-line switch and @code{#pragma long_calls} settings.  The
+@cindex indirect calls on Epiphany
+These attributes specify how a particular function is called on
+ARC, ARM and Epiphany - with @code{medium_call} being specific to ARC.
+These attributes override the
+@option{-mlong-calls} (@pxref{ARM Options} and @ref{ARC Options})
+and @option{-mmedium-calls} (@pxref{ARC Options})
+command-line switches and @code{#pragma long_calls} settings.  For ARM, the
  @code{long_call} attribute indicates that the function might be far
  away from the call site and require a different (more expensive)
  calling sequence.   The @code{short_call} attribute always places
  the offset to the function from the call site into the @samp{BL}
  instruction directly.
  
+For ARC, a function marked with the @code{long_call} attribute is
+always called using register-indirect jump-and-link instructions,
+thereby enabling the called function to be placed anywhere within the
+32-bit address space.  A function marked with the @code{medium_call}
+attribute will always be close enough to be called with an unconditional
+branch-and-link instruction, which has a 25-bit offset from
+the call site.  A function marked with the @code{short_call}
+attribute will always be close enough to be called with a conditional
+branch-and-link instruction, which has a 21-bit offset from
+the call site.
+
  @item longcall/shortcall
  @cindex functions called via pointer on the RS/6000 and PowerPC
  On the Blackfin, RS/6000 and PowerPC, the @code{longcall} attribute
@@ -3029,14 +3260,16 @@ efficient @code{jal} instruction.
  
  @item malloc
  @cindex @code{malloc} attribute
-The @code{malloc} attribute is used to tell the compiler that a function
-may be treated as if any non-@code{NULL} pointer it returns cannot
-alias any other pointer valid when the function returns and that the memory
-has undefined content.
-This often improves optimization.
-Standard functions with this property include @code{malloc} and
-@code{calloc}.  @code{realloc}-like functions do not have this
-property as the memory pointed to does not have undefined content.
+This tells the compiler that a function is @code{malloc}-like, i.e.,
+that the pointer @var{P} returned by the function cannot alias any
+other pointer valid when the function returns, and moreover no
+pointers to valid objects occur in any storage addressed by @var{P}.
+
+Using this attribute can improve optimization.  Functions like
+@code{malloc} and @code{calloc} have this property because they return
+a pointer to uninitialized or zeroed-out storage.  However, functions
+like @code{realloc} do not have this property, as they can return a
+pointer to storage containing pointers.
  
  @item mips16/nomips16
  @cindex @code{mips16} attribute
@@ -3141,17 +3374,28 @@ this function attribute to make GCC generate the ``hot-patching'' function
  prologue used in Win32 API functions in Microsoft Windows XP Service Pack 2
  and newer.
  
+@item hotpatch [(@var{prologue-halfwords})]
+@cindex @code{hotpatch} attribute
+
+On S/390 System z targets, you can use this function attribute to
+make GCC generate a ``hot-patching'' function prologue.  The
+@code{hotpatch} has no effect on funtions that are explicitly
+inline.  If the @option{-mhotpatch} or @option{-mno-hotpatch}
+command-line option is used at the same time, the @code{hotpatch}
+attribute takes precedence.  If an argument is given, the maximum
+allowed value is 1000000.
+
  @item naked
  @cindex function without a prologue/epilogue code
-Use this attribute on the ARM, AVR, MCORE, RL78, RX and SPU ports to indicate that
-the specified function does not need prologue/epilogue sequences generated by
-the compiler.  It is up to the programmer to provide these sequences. The
-only statements that can be safely included in naked functions are
-@code{asm} statements that do not have operands.  All other statements,
-including declarations of local variables, @code{if} statements, and so
-forth, should be avoided.  Naked functions should be used to implement the
-body of an assembly function, while allowing the compiler to construct
-the requisite function declaration for the assembler.
+This attribute is available on the ARM, AVR, MCORE, MSP430, NDS32,
+RL78, RX and SPU ports.  It allows the compiler to construct the
+requisite function declaration, while allowing the body of the
+function to be assembly code. The specified function will not have
+prologue/epilogue sequences generated by the compiler. Only Basic
+@code{asm} statements can safely be included in naked functions
+(@pxref{Basic Asm}). While using Extended @code{asm} or a mixture of
+Basic @code{asm} and ``C'' code may appear to work, they cannot be
+depended upon to work reliably and are not supported.
  
  @item near
  @cindex functions that do not handle memory bank switching on 68HC11/68HC12
@@ -3253,6 +3497,30 @@ my_memcpy (void *dest, const void *src, size_t len)
          __attribute__((nonnull));
  @end smallexample
  
+@item no_reorder
+@cindex @code{no_reorder} function or variable attribute
+Do not reorder functions or variables marked @code{no_reorder}
+against each other or top level assembler statements the executable.
+The actual order in the program will depend on the linker command
+line. Static variables marked like this are also not removed.
+This has a similar effect
+as the @option{-fno-toplevel-reorder} option, but only applies to the
+marked symbols.
+
+@item returns_nonnull
+@cindex @code{returns_nonnull} function attribute
+The @code{returns_nonnull} attribute specifies that the function
+return value should be a non-null pointer.  For instance, the declaration:
+
+@smallexample
+extern void *
+mymalloc (size_t len) __attribute__((returns_nonnull));
+@end smallexample
+
+@noindent
+lets the compiler optimize callers based on the knowledge
+that the return value will never be null.
+
  @item noreturn
  @cindex @code{noreturn} function attribute
  A few standard library functions, such as @code{abort} and @code{exit},
@@ -3415,8 +3683,8 @@ than 2.96.
  @cindex @code{hot} function attribute
  The @code{hot} attribute on a function is used to inform the compiler that
  the function is a hot spot of the compiled program.  The function is
-optimized more aggressively and on many target it is placed into special
-subsection of the text section so all hot functions appears close together
+optimized more aggressively and on many targets it is placed into a special
+subsection of the text section so all hot functions appear close together,
  improving locality.
  
  When profile feedback is available, via @option{-fprofile-use}, hot functions
@@ -3425,23 +3693,14 @@ are automatically detected and this attribute is ignored.
  The @code{hot} attribute on functions is not implemented in GCC versions
  earlier than 4.3.
  
-@cindex @code{hot} label attribute
-The @code{hot} attribute on a label is used to inform the compiler that
-path following the label are more likely than paths that are not so
-annotated.  This attribute is used in cases where @code{__builtin_expect}
-cannot be used, for instance with computed goto or @code{asm goto}.
-
-The @code{hot} attribute on labels is not implemented in GCC versions
-earlier than 4.8.
-
  @item cold
  @cindex @code{cold} function attribute
  The @code{cold} attribute on functions is used to inform the compiler that
  the function is unlikely to be executed.  The function is optimized for
-size rather than speed and on many targets it is placed into special
-subsection of the text section so all cold functions appears close together
+size rather than speed and on many targets it is placed into a special
+subsection of the text section so all cold functions appear close together,
  improving code locality of non-cold parts of program.  The paths leading
-to call of cold functions within code are marked as unlikely by the branch
+to calls of cold functions within code are marked as unlikely by the branch
  prediction mechanism.  It is thus useful to mark functions used to handle
  unlikely conditions, such as @code{perror}, as cold to improve optimization
  of hot functions that do call marked functions in rare occasions.
@@ -3452,15 +3711,6 @@ are automatically detected and this attribute is ignored.
  The @code{cold} attribute on functions is not implemented in GCC versions
  earlier than 4.3.
  
-@cindex @code{cold} label attribute
-The @code{cold} attribute on labels is used to inform the compiler that
-the path following the label is unlikely to be executed.  This attribute
-is used in cases where @code{__builtin_expect} cannot be used, for instance
-with computed goto or @code{asm goto}.
-
-The @code{cold} attribute on labels is not implemented in GCC versions
-earlier than 4.8.
-
  @item no_sanitize_address
  @itemx no_address_safety_analysis
  @cindex @code{no_sanitize_address} function attribute
@@ -3471,6 +3721,24 @@ The @code{no_address_safety_analysis} is a deprecated alias of the
  @code{no_sanitize_address} attribute, new code should use
  @code{no_sanitize_address}.
  
+@item no_sanitize_undefined
+@cindex @code{no_sanitize_undefined} function attribute
+The @code{no_sanitize_undefined} attribute on functions is used
+to inform the compiler that it should not check for undefined behavior
+in the function when compiling with the @option{-fsanitize=undefined} option.
+
+@item bnd_legacy
+@cindex @code{bnd_legacy} function attribute
+The @code{bnd_legacy} attribute on functions is used to inform
+compiler that function should not be instrumented when compiled
+with @option{-fcheck-pointer-bounds} option.
+
+@item bnd_instrument
+@cindex @code{bnd_instrument} function attribute
+The @code{bnd_instrument} attribute on functions is used to inform
+compiler that function should be instrumented when compiled
+with @option{-fchkp-instrument-marked-only} option.
+
  @item regparm (@var{number})
  @cindex @code{regparm} attribute
  @cindex functions that are passed arguments in registers on the 386
@@ -3491,6 +3759,21 @@ safe since the loaders there save EAX, EDX and ECX.  (Lazy binding can be
  disabled with the linker or the loader if desired, to avoid the
  problem.)
  
+@item reset
+@cindex reset handler functions
+Use this attribute on the NDS32 target to indicate that the specified function
+is a reset handler.  The compiler will generate corresponding sections
+for use in a reset handler.  You can use the following attributes
+to provide extra exception handling:
+@table @code
+@item nmi
+@cindex @code{nmi} attribute
+Provide a user-defined function to handle NMI exception.
+@item warm
+@cindex @code{warm} attribute
+Provide a user-defined function to handle warm reset exception.
+@end table
+
  @item sseregparm
  @cindex @code{sseregparm} attribute
  On the Intel 386 with SSE support, the @code{sseregparm} attribute
@@ -3549,6 +3832,18 @@ registers) are saved in the function prologue.  If the function is a leaf
  function, only volatiles used by the function are saved.  A normal function
  return is generated instead of a return from interrupt.
  
+@item break_handler
+@cindex break handler functions
+Use this attribute on the MicroBlaze ports to indicate that
+the specified function is an break handler.  The compiler generates function
+entry and exit sequences suitable for use in an break handler when this
+attribute is present. The return from @code{break_handler} is done through
+the @code{rtbd} instead of @code{rtsd}.
+
+@smallexample
+void f () __attribute__ ((break_handler));
+@end smallexample
+
  @item section ("@var{section-name}")
  @cindex @code{section} function attribute
  Normally, the compiler places the code it generates in the @code{text} section.
@@ -3675,6 +3970,14 @@ int core2_func (void) __attribute__ ((__target__ ("arch=core2")));
  int sse3_func (void) __attribute__ ((__target__ ("sse3")));
  @end smallexample
  
+You can either use multiple
+strings to specify multiple options, or separate the options
+with a comma (@samp{,}).
+
+The @code{target} attribute is presently implemented for
+i386/x86_64, PowerPC, and Nios II targets only.
+The options supported are specific to each target.
+
  On the 386, the following options are allowed:
  
  @table @samp
@@ -3996,9 +4299,29 @@ compilation tunes for the @var{CPU} architecture, and not the
  default tuning specified on the command line.
  @end table
  
-On the 386/x86_64 and PowerPC back ends, you can use either multiple
-strings to specify multiple options, or you can separate the option
-with a comma (@code{,}).
+When compiling for Nios II, the following options are allowed:
+
+@table @samp
+@item custom-@var{insn}=@var{N}
+@itemx no-custom-@var{insn}
+@cindex @code{target("custom-@var{insn}=@var{N}")} attribute
+@cindex @code{target("no-custom-@var{insn}")} attribute
+Each @samp{custom-@var{insn}=@var{N}} attribute locally enables use of a
+custom instruction with encoding @var{N} when generating code that uses 
+@var{insn}.  Similarly, @samp{no-custom-@var{insn}} locally inhibits use of
+the custom instruction @var{insn}.
+These target attributes correspond to the
+@option{-mcustom-@var{insn}=@var{N}} and @option{-mno-custom-@var{insn}}
+command-line options, and support the same set of @var{insn} keywords.
+@xref{Nios II Options}, for more information.
+
+@item custom-fpu-cfg=@var{name}
+@cindex @code{target("custom-fpu-cfg=@var{name}")} attribute
+This attribute corresponds to the @option{-mcustom-fpu-cfg=@var{name}}
+command-line option, to select a predefined set of custom instructions
+named @var{name}.
+@xref{Nios II Options}, for more information.
+@end table
  
  On the 386/x86_64 and PowerPC back ends, the inliner does not inline a
  function that has different target options than the caller, unless the
@@ -4006,10 +4329,6 @@ callee has a subset of the target options of the caller.  For example
  a function declared with @code{target("sse3")} can inline a function
  with @code{target("sse2")}, since @code{-msse3} implies @code{-msse2}.
  
-The @code{target} attribute is not implemented in GCC versions earlier
-than 4.4 for the i386/x86_64 and 4.6 for the PowerPC back ends.  It is
-not currently implemented for other back ends.
-
  @item tiny_data
  @cindex tiny data section on the H8/300H and H8S
  Use this attribute on the H8/300H and H8S to indicate that the specified
@@ -4046,6 +4365,13 @@ When applied to a member function of a C++ class template, the
  attribute also means that the function is instantiated if the
  class itself is instantiated.
  
+@item vector
+@cindex @code{vector} attribute
+This RX attribute is similar to the @code{interrupt} attribute, including its
+parameters, but does not make the function an interrupt-handler type
+function (i.e. it retains the normal C function calling ABI).  See the
+@code{interrupt} attribute for a description of its arguments.
+
  @item version_id
  @cindex @code{version_id} attribute
  This IA-64 HP-UX attribute, attached to a global variable or function, renames a
@@ -4143,6 +4469,12 @@ the One Definition Rule; for example, it is usually not useful to mark
  an inline method as hidden without marking the whole class as hidden.
  
  A C++ namespace declaration can also have the visibility attribute.
+
+@smallexample
+namespace nspace1 __attribute__ ((visibility ("protected")))
+@{ /* @r{Do something.} */; @}
+@end smallexample
+
  This attribute applies only to the particular namespace body, not to
  other definitions of the same namespace; it is equivalent to using
  @samp{#pragma GCC visibility} before and after the namespace
@@ -4262,6 +4594,65 @@ attachment of attributes to their corresponding declarations, whereas
  @code{#pragma GCC} is of use for constructs that do not naturally form
  part of the grammar.  @xref{Pragmas,,Pragmas Accepted by GCC}.
  
+@node Label Attributes
+@section Label Attributes
+@cindex Label Attributes
+
+GCC allows attributes to be set on C labels.  @xref{Attribute Syntax}, for 
+details of the exact syntax for using attributes.  Other attributes are 
+available for functions (@pxref{Function Attributes}), variables 
+(@pxref{Variable Attributes}) and for types (@pxref{Type Attributes}).
+
+This example uses the @code{cold} label attribute to indicate the 
+@code{ErrorHandling} branch is unlikely to be taken and that the
+@code{ErrorHandling} label is unused:
+
+@smallexample
+
+   asm goto ("some asm" : : : : NoError);
+
+/* This branch (the fallthru from the asm) is less commonly used */
+ErrorHandling: 
+   __attribute__((cold, unused)); /* Semi-colon is required here */
+   printf("error\n");
+   return 0;
+
+NoError:
+   printf("no error\n");
+   return 1;
+@end smallexample
+
+@table @code
+@item unused
+@cindex @code{unused} label attribute
+This feature is intended for program-generated code that may contain 
+unused labels, but which is compiled with @option{-Wall}.  It is
+not normally appropriate to use in it human-written code, though it
+could be useful in cases where the code that jumps to the label is
+contained within an @code{#ifdef} conditional.
+
+@item hot
+@cindex @code{hot} label attribute
+The @code{hot} attribute on a label is used to inform the compiler that
+the path following the label is more likely than paths that are not so
+annotated.  This attribute is used in cases where @code{__builtin_expect}
+cannot be used, for instance with computed goto or @code{asm goto}.
+
+The @code{hot} attribute on labels is not implemented in GCC versions
+earlier than 4.8.
+
+@item cold
+@cindex @code{cold} label attribute
+The @code{cold} attribute on labels is used to inform the compiler that
+the path following the label is unlikely to be executed.  This attribute
+is used in cases where @code{__builtin_expect} cannot be used, for instance
+with computed goto or @code{asm goto}.
+
+The @code{cold} attribute on labels is not implemented in GCC versions
+earlier than 4.8.
+
+@end table
+
  @node Attribute Syntax
  @section Attribute Syntax
  @cindex attribute syntax
@@ -4285,6 +4676,8 @@ applying to functions.  @xref{Variable Attributes}, for details of the
  semantics of attributes applying to variables.  @xref{Type Attributes},
  for details of the semantics of attributes applying to structure, union
  and enumerated types.
+@xref{Label Attributes}, for details of the semantics of attributes 
+applying to labels.
  
  An @dfn{attribute specifier} is of the form
  @code{__attribute__ ((@var{attribute-list}))}.  An @dfn{attribute list}
@@ -4322,14 +4715,10 @@ with the list being a single string constant.
  An @dfn{attribute specifier list} is a sequence of one or more attribute
  specifiers, not separated by any other tokens.
  
+@subsubheading Label Attributes
+
  In GNU C, an attribute specifier list may appear after the colon following a
-label, other than a @code{case} or @code{default} label.  The only
-attribute it makes sense to use after a label is @code{unused}.  This
-feature is intended for program-generated code that may contain unused labels,
-but which is compiled with @option{-Wall}.  It is
-not normally appropriate to use in it human-written code, though it
-could be useful in cases where the code that jumps to the label is
-contained within an @code{#ifdef} conditional.  GNU C++ only permits
+label, other than a @code{case} or @code{default} label.  GNU C++ only permits
  attributes on labels if the attribute specifier is immediately
  followed by a semicolon (i.e., the label applies to an empty
  statement).  If the semicolon is missing, C++ label attributes are
@@ -4337,6 +4726,8 @@ ambiguous, as it is permissible for a declaration, which could begin
  with an attribute list, to be labelled in C++.  Declarations cannot be
  labelled in C90 or C99, so the ambiguity does not arise there.
  
+@subsubheading Type Attributes
+
  An attribute specifier list may appear as part of a @code{struct},
  @code{union} or @code{enum} specifier.  It may go either immediately
  after the @code{struct}, @code{union} or @code{enum} keyword, or after
@@ -4351,6 +4742,9 @@ defined is not complete until after the attribute specifiers.
  @c attributes could use sizeof for the structure, but the size could be
  @c changed later by "packed" attributes.
  
+
+@subsubheading All other attributes
+
  Otherwise, an attribute specifier appears as part of a declaration,
  counting declarations of unnamed parameters and type names, and relates
  to that declaration (which may be nested in another declaration, for
@@ -4591,7 +4985,8 @@ by an attribute specification inside double parentheses.  Some
  attributes are currently defined generically for variables.
  Other attributes are defined for variables on particular target
  systems.  Other attributes are available for functions
-(@pxref{Function Attributes}) and for types (@pxref{Type Attributes}).
+(@pxref{Function Attributes}), labels (@pxref{Label Attributes}) and for 
+types (@pxref{Type Attributes}).
  Other front ends might define more attributes
  (@pxref{C++ Extensions,,Extensions to the C++ Language}).
  
@@ -4855,8 +5250,9 @@ to be possibly unused.  GCC does not produce a warning for this
  variable.
  
  @item used
-This attribute, attached to a variable, means that the variable must be
-emitted even if it appears that the variable is not referenced.
+This attribute, attached to a variable with the static storage, means that
+the variable must be emitted even if it appears that the variable is not
+referenced.
  
  When applied to a static data member of a C++ class template, the
  attribute also means that the member is instantiated if the
@@ -4959,6 +5355,47 @@ normally resides in the data memory (RAM).
  
  See also the @ref{AVR Named Address Spaces} section for
  an alternate way to locate and access data in flash memory.
+
+@item io
+@itemx io (@var{addr})
+Variables with the @code{io} attribute are used to address
+memory-mapped peripherals in the io address range.
+If an address is specified, the variable
+is assigned that address, and the value is interpreted as an
+address in the data address space.
+Example:
+
+@smallexample
+volatile int porta __attribute__((io (0x22)));
+@end smallexample
+
+The address specified in the address in the data address range.
+
+Otherwise, the variable it is not assigned an address, but the
+compiler will still use in/out instructions where applicable,
+assuming some other module assigns an address in the io address range.
+Example:
+
+@smallexample
+extern volatile int porta __attribute__((io));
+@end smallexample
+
+@item io_low
+@itemx io_low (@var{addr})
+This is like the @code{io} attribute, but additionally it informs the
+compiler that the object lies in the lower half of the I/O area,
+allowing the use of @code{cbi}, @code{sbi}, @code{sbic} and @code{sbis}
+instructions.
+
+@item address
+@itemx address (@var{addr})
+Variables with the @code{address} attribute are used to address
+memory-mapped peripherals that may lie outside the io address range.
+
+@smallexample
+volatile int porta __attribute__((address (0x600)));
+@end smallexample
+
  @end table
  
  @subsection Blackfin Variable Attributes
@@ -5242,12 +5679,12 @@ placed in either the @code{.bss_below100} section or the
  The keyword @code{__attribute__} allows you to specify special
  attributes of @code{struct} and @code{union} types when you define
  such types.  This keyword is followed by an attribute specification
-inside double parentheses.  Seven attributes are currently defined for
+inside double parentheses.  Eight attributes are currently defined for
  types: @code{aligned}, @code{packed}, @code{transparent_union},
-@code{unused}, @code{deprecated}, @code{visibility}, and
-@code{may_alias}.  Other attributes are defined for functions
-(@pxref{Function Attributes}) and for variables (@pxref{Variable
-Attributes}).
+@code{unused}, @code{deprecated}, @code{visibility}, @code{may_alias}
+and @code{bnd_variable_size}.  Other attributes are defined for
+functions (@pxref{Function Attributes}), labels (@pxref{Label 
+Attributes}) and for variables (@pxref{Variable Attributes}).
  
  You may also specify any one of these attributes with @samp{__}
  preceding and following its keyword.  This allows you to use these
@@ -5386,6 +5823,8 @@ You may only specify this attribute on the definition of an @code{enum},
  also define the enumerated type, structure or union.
  
  @item transparent_union
+@cindex @code{transparent_union} attribute
+
  This attribute, attached to a @code{union} type definition, indicates
  that any function parameter having that union type causes calls to that
  function to be treated in a special way.
@@ -5539,6 +5978,46 @@ and caught in another, the class must have default visibility.
  Otherwise the two shared objects are unable to use the same
  typeinfo node and exception handling will break.
  
+@item designated_init
+This attribute may only be applied to structure types.  It indicates
+that any initialization of an object of this type must use designated
+initializers rather than positional initializers.  The intent of this
+attribute is to allow the programmer to indicate that a structure's
+layout may change, and that therefore relying on positional
+initialization will result in future breakage.
+
+GCC emits warnings based on this attribute by default; use
+@option{-Wno-designated-init} to suppress them.
+
+@item bnd_variable_size
+When applied to a structure field, this attribute tells Pointer
+Bounds Checker that the size of this field should not be computed
+using static type information.  It may be used to mark variable
+sized static array fields placed at the end of a structure.
+
+@smallexample
+struct S
+@{
+  int size;
+  char data[1];
+@}
+S *p = (S *)malloc (sizeof(S) + 100);
+p->data[10] = 0; //Bounds violation
+@end smallexample
+
+By using an attribute for a field we may avoid bound violation
+we most probably do not want to see:
+
+@smallexample
+struct S
+@{
+  int size;
+  char data[1] __attribute__((bnd_variable_size));
+@}
+S *p = (S *)malloc (sizeof(S) + 100);
+p->data[10] = 0; //OK
+@end smallexample
+
  @end table
  
  To specify multiple attributes, separate them by commas within the
@@ -5879,491 +6358,954 @@ bit-fields are only partially accessed, if they straddle a storage unit
  boundary.  For these reasons it is unwise to use volatile bit-fields to
  access hardware.
  
-@node Extended Asm
-@section Assembler Instructions with C Expression Operands
-@cindex extended @code{asm}
-@cindex @code{asm} expressions
-@cindex assembler instructions
-@cindex registers
-
-In an assembler instruction using @code{asm}, you can specify the
-operands of the instruction using C expressions.  This means you need not
-guess which registers or memory locations contain the data you want
-to use.
-
-You must specify an assembler instruction template much like what
-appears in a machine description, plus an operand constraint string for
-each operand.
-
-For example, here is how to use the 68881's @code{fsinx} instruction:
-
-@smallexample
-asm ("fsinx %1,%0" : "=f" (result) : "f" (angle));
-@end smallexample
-
-@noindent
-Here @code{angle} is the C expression for the input operand while
-@code{result} is that of the output operand.  Each has @samp{"f"} as its
-operand constraint, saying that a floating-point register is required.
-The @samp{=} in @samp{=f} indicates that the operand is an output; all
-output operands' constraints must use @samp{=}.  The constraints use the
-same language used in the machine description (@pxref{Constraints}).
-
-Each operand is described by an operand-constraint string followed by
-the C expression in parentheses.  A colon separates the assembler
-template from the first output operand and another separates the last
-output operand from the first input, if any.  Commas separate the
-operands within each group.  The total number of operands is currently
-limited to 30; this limitation may be lifted in some future version of
-GCC@.
-
-If there are no output operands but there are input operands, you must
-place two consecutive colons surrounding the place where the output
-operands would go.
-
-As of GCC version 3.1, it is also possible to specify input and output
-operands using symbolic names which can be referenced within the
-assembler code.  These names are specified inside square brackets
-preceding the constraint string, and can be referenced inside the
-assembler code using @code{%[@var{name}]} instead of a percentage sign
-followed by the operand number.  Using named operands the above example
-could look like:
-
-@smallexample
-asm ("fsinx %[angle],%[output]"
-     : [output] "=f" (result)
-     : [angle] "f" (angle));
-@end smallexample
-
-@noindent
-Note that the symbolic operand names have no relation whatsoever to
-other C identifiers.  You may use any name you like, even those of
-existing C symbols, but you must ensure that no two operands within the same
-assembler construct use the same symbolic name.
-
-Output operand expressions must be lvalues; the compiler can check this.
-The input operands need not be lvalues.  The compiler cannot check
-whether the operands have data types that are reasonable for the
-instruction being executed.  It does not parse the assembler instruction
-template and does not know what it means or even whether it is valid
-assembler input.  The extended @code{asm} feature is most often used for
-machine instructions the compiler itself does not know exist.  If
-the output expression cannot be directly addressed (for example, it is a
-bit-field), your constraint must allow a register.  In that case, GCC
-uses the register as the output of the @code{asm}, and then stores
-that register into the output.
-
-The ordinary output operands must be write-only; GCC assumes that
-the values in these operands before the instruction are dead and need
-not be generated.  Extended asm supports input-output or read-write
-operands.  Use the constraint character @samp{+} to indicate such an
-operand and list it with the output operands.
-
-You may, as an alternative, logically split its function into two
-separate operands, one input operand and one write-only output
-operand.  The connection between them is expressed by constraints
-that say they need to be in the same location when the instruction
-executes.  You can use the same C expression for both operands, or
-different expressions.  For example, here we write the (fictitious)
-@samp{combine} instruction with @code{bar} as its read-only source
-operand and @code{foo} as its read-write destination:
-
-@smallexample
-asm ("combine %2,%0" : "=r" (foo) : "0" (foo), "g" (bar));
-@end smallexample
-
-@noindent
-The constraint @samp{"0"} for operand 1 says that it must occupy the
-same location as operand 0.  A number in constraint is allowed only in
-an input operand and it must refer to an output operand.
-
-Only a number in the constraint can guarantee that one operand is in
-the same place as another.  The mere fact that @code{foo} is the value
-of both operands is not enough to guarantee that they are in the
-same place in the generated assembler code.  The following does not
-work reliably:
-
-@smallexample
-asm ("combine %2,%0" : "=r" (foo) : "r" (foo), "g" (bar));
-@end smallexample
-
-Various optimizations or reloading could cause operands 0 and 1 to be in
-different registers; GCC knows no reason not to do so.  For example, the
-compiler might find a copy of the value of @code{foo} in one register and
-use it for operand 1, but generate the output operand 0 in a different
-register (copying it afterward to @code{foo}'s own address).  Of course,
-since the register for operand 1 is not even mentioned in the assembler
-code, the result will not work, but GCC can't tell that.
+@node Using Assembly Language with C
+@section How to Use Inline Assembly Language in C Code
  
-As of GCC version 3.1, one may write @code{[@var{name}]} instead of
-the operand number for a matching constraint.  For example:
-
-@smallexample
-asm ("cmoveq %1,%2,%[result]"
-     : [result] "=r"(result)
-     : "r" (test), "r"(new), "[result]"(old));
-@end smallexample
-
-Sometimes you need to make an @code{asm} operand be a specific register,
-but there's no matching constraint letter for that register @emph{by
-itself}.  To force the operand into that register, use a local variable
-for the operand and specify the register in the variable declaration.
-@xref{Explicit Reg Vars}.  Then for the @code{asm} operand, use any
-register constraint letter that matches the register:
-
-@smallexample
-register int *p1 asm ("r0") = @dots{};
-register int *p2 asm ("r1") = @dots{};
-register int *result asm ("r0");
-asm ("sysint" : "=r" (result) : "0" (p1), "r" (p2));
-@end smallexample
-
-@anchor{Example of asm with clobbered asm reg}
-In the above example, beware that a register that is call-clobbered by
-the target ABI will be overwritten by any function call in the
-assignment, including library calls for arithmetic operators.
-Also a register may be clobbered when generating some operations,
-like variable shift, memory copy or memory move on x86.
-Assuming it is a call-clobbered register, this may happen to @code{r0}
-above by the assignment to @code{p2}.  If you have to use such a
-register, use temporary variables for expressions between the register
-assignment and use:
-
-@smallexample
-int t1 = @dots{};
-register int *p1 asm ("r0") = @dots{};
-register int *p2 asm ("r1") = t1;
-register int *result asm ("r0");
-asm ("sysint" : "=r" (result) : "0" (p1), "r" (p2));
-@end smallexample
+GCC provides various extensions that allow you to embed assembler within 
+C code.
  
-Some instructions clobber specific hard registers.  To describe this,
-write a third colon after the input operands, followed by the names of
-the clobbered hard registers (given as strings).  Here is a realistic
-example for the VAX:
+@menu
+* Basic Asm::          Inline assembler with no operands.
+* Extended Asm::       Inline assembler with operands.
+* Constraints::        Constraints for @code{asm} operands
+* Asm Labels::         Specifying the assembler name to use for a C symbol.
+* Explicit Reg Vars::  Defining variables residing in specified registers.
+* Size of an asm::     How GCC calculates the size of an @code{asm} block.
+@end menu
  
-@smallexample
-asm volatile ("movc3 %0,%1,%2"
-              : /* @r{no outputs} */
-              : "g" (from), "g" (to), "g" (count)
-              : "r0", "r1", "r2", "r3", "r4", "r5");
-@end smallexample
+@node Basic Asm
+@subsection Basic Asm --- Assembler Instructions with No Operands
+@cindex basic @code{asm}
+
+The @code{asm} keyword allows you to embed assembler instructions within 
+C code.
+
+@example
+asm [ volatile ] ( AssemblerInstructions )
+@end example
+
+To create headers compatible with ISO C, write @code{__asm__} instead of 
+@code{asm} (@pxref{Alternate Keywords}).
+
+By definition, a Basic @code{asm} statement is one with no operands. 
+@code{asm} statements that contain one or more colons (used to delineate 
+operands) are considered to be Extended (for example, @code{asm("int $3")} 
+is Basic, and @code{asm("int $3" : )} is Extended). @xref{Extended Asm}.
+
+@subsubheading Qualifiers
+@emph{volatile}
+@*
+This optional qualifier has no effect. All Basic @code{asm} blocks are 
+implicitly volatile.
+
+@subsubheading Parameters
+@emph{AssemblerInstructions}
+@*
+This is a literal string that specifies the assembler code. The string can 
+contain any instructions recognized by the assembler, including directives. 
+GCC does not parse the assembler instructions themselves and 
+does not know what they mean or even whether they are valid assembler input. 
+The compiler copies it verbatim to the assembly language output file, without 
+processing dialects or any of the "%" operators that are available with
+Extended @code{asm}. This results in minor differences between Basic 
+@code{asm} strings and Extended @code{asm} templates. For example, to refer to 
+registers you might use %%eax in Extended @code{asm} and %eax in Basic 
+@code{asm}.
+
+You may place multiple assembler instructions together in a single @code{asm} 
+string, separated by the characters normally used in assembly code for the 
+system. A combination that works in most places is a newline to break the 
+line, plus a tab character (written as "\n\t").
+Some assemblers allow semicolons as a line separator. However, 
+note that some assembler dialects use semicolons to start a comment. 
+
+Do not expect a sequence of @code{asm} statements to remain perfectly 
+consecutive after compilation. If certain instructions need to remain 
+consecutive in the output, put them in a single multi-instruction asm 
+statement. Note that GCC's optimizers can move @code{asm} statements 
+relative to other code, including across jumps.
+
+@code{asm} statements may not perform jumps into other @code{asm} statements. 
+GCC does not know about these jumps, and therefore cannot take 
+account of them when deciding how to optimize. Jumps from @code{asm} to C 
+labels are only supported in Extended @code{asm}.
+
+@subsubheading Remarks
+Using Extended @code{asm} will typically produce smaller, safer, and more 
+efficient code, and in most cases it is a better solution. When writing 
+inline assembly language outside of C functions, however, you must use Basic 
+@code{asm}. Extended @code{asm} statements have to be inside a C function.
+Functions declared with the @code{naked} attribute also require Basic 
+@code{asm} (@pxref{Function Attributes}).
+
+Under certain circumstances, GCC may duplicate (or remove duplicates of) your 
+assembly code when optimizing. This can lead to unexpected duplicate 
+symbol errors during compilation if your assembly code defines symbols or 
+labels.
+
+Safely accessing C data and calling functions from Basic @code{asm} is more 
+complex than it may appear. To access C data, it is better to use Extended 
+@code{asm}.
+
+Since GCC does not parse the AssemblerInstructions, it has no 
+visibility of any symbols it references. This may result in GCC discarding 
+those symbols as unreferenced.
+
+Unlike Extended @code{asm}, all Basic @code{asm} blocks are implicitly 
+volatile. @xref{Volatile}.  Similarly, Basic @code{asm} blocks are not treated 
+as though they used a "memory" clobber (@pxref{Clobbers}).
+
+All Basic @code{asm} blocks use the assembler dialect specified by the 
+@option{-masm} command-line option. Basic @code{asm} provides no
+mechanism to provide different assembler strings for different dialects.
+
+Here is an example of Basic @code{asm} for i386:
+
+@example
+/* Note that this code will not compile with -masm=intel */
+#define DebugBreak() asm("int $3")
+@end example
  
-You may not write a clobber description in a way that overlaps with an
-input or output operand.  For example, you may not have an operand
-describing a register class with one member if you mention that register
-in the clobber list.  Variables declared to live in specific registers
-(@pxref{Explicit Reg Vars}), and used as asm input or output operands must
-have no part mentioned in the clobber description.
-There is no way for you to specify that an input
-operand is modified without also specifying it as an output
-operand.  Note that if all the output operands you specify are for this
-purpose (and hence unused), you then also need to specify
-@code{volatile} for the @code{asm} construct, as described below, to
-prevent GCC from deleting the @code{asm} statement as unused.
+@node Extended Asm
+@subsection Extended Asm - Assembler Instructions with C Expression Operands
+@cindex @code{asm} keyword
+@cindex extended @code{asm}
+@cindex assembler instructions
  
-If you refer to a particular hardware register from the assembler code,
-you probably have to list the register after the third colon to
-tell the compiler the register's value is modified.  In some assemblers,
-the register names begin with @samp{%}; to produce one @samp{%} in the
-assembler code, you must write @samp{%%} in the input.
+The @code{asm} keyword allows you to embed assembler instructions within C 
+code. With Extended @code{asm} you can read and write C variables from 
+assembler and perform jumps from assembler code to C labels.
+
+@example
+@ifhtml
+asm [volatile] ( AssemblerTemplate : [OutputOperands] [ : [InputOperands] [ : [Clobbers] ] ] )
+
+asm [volatile] goto ( AssemblerTemplate : : [InputOperands] : [Clobbers] : GotoLabels )
+@end ifhtml
+@ifnothtml
+asm [volatile] ( AssemblerTemplate 
+                 : [OutputOperands] 
+                 [ : [InputOperands] 
+                 [ : [Clobbers] ] ])
+
+asm [volatile] goto ( AssemblerTemplate 
+                      : 
+                      : [InputOperands] 
+                      : [Clobbers] 
+                      : GotoLabels)
+@end ifnothtml
+@end example
+
+To create headers compatible with ISO C, write @code{__asm__} instead of 
+@code{asm} and @code{__volatile__} instead of @code{volatile} 
+(@pxref{Alternate Keywords}). There is no alternate for @code{goto}.
+
+By definition, Extended @code{asm} is an @code{asm} statement that contains 
+operands. To separate the classes of operands, you use colons. Basic 
+@code{asm} statements contain no colons. (So, for example, 
+@code{asm("int $3")} is Basic @code{asm}, and @code{asm("int $3" : )} is 
+Extended @code{asm}. @pxref{Basic Asm}.)
+
+@subsubheading Qualifiers
+@emph{volatile}
+@*
+The typical use of Extended @code{asm} statements is to manipulate input 
+values to produce output values. However, your @code{asm} statements may 
+also produce side effects. If so, you may need to use the @code{volatile} 
+qualifier to disable certain optimizations. @xref{Volatile}.
+
+@emph{goto}
+@*
+This qualifier informs the compiler that the @code{asm} statement may 
+perform a jump to one of the labels listed in the GotoLabels section. 
+@xref{GotoLabels}.
+
+@subsubheading Parameters
+@emph{AssemblerTemplate}
+@*
+This is a literal string that contains the assembler code. It is a 
+combination of fixed text and tokens that refer to the input, output, 
+and goto parameters. @xref{AssemblerTemplate}.
+
+@emph{OutputOperands}
+@*
+A comma-separated list of the C variables modified by the instructions in the 
+AssemblerTemplate. @xref{OutputOperands}.
+
+@emph{InputOperands}
+@*
+A comma-separated list of C expressions read by the instructions in the 
+AssemblerTemplate. @xref{InputOperands}.
+
+@emph{Clobbers}
+@*
+A comma-separated list of registers or other values changed by the 
+AssemblerTemplate, beyond those listed as outputs. @xref{Clobbers}.
+
+@emph{GotoLabels}
+@*
+When you are using the @code{goto} form of @code{asm}, this section contains 
+the list of all C labels to which the AssemblerTemplate may jump. 
+@xref{GotoLabels}.
+
+@subsubheading Remarks
+The @code{asm} statement allows you to include assembly instructions directly 
+within C code. This may help you to maximize performance in time-sensitive 
+code or to access assembly instructions that are not readily available to C 
+programs.
+
+Note that Extended @code{asm} statements must be inside a function. Only 
+Basic @code{asm} may be outside functions (@pxref{Basic Asm}).
+Functions declared with the @code{naked} attribute also require Basic 
+@code{asm} (@pxref{Function Attributes}).
+
+While the uses of @code{asm} are many and varied, it may help to think of an 
+@code{asm} statement as a series of low-level instructions that convert input 
+parameters to output parameters. So a simple (if not particularly useful) 
+example for i386 using @code{asm} might look like this:
+
+@example
+int src = 1;
+int dst;   
+
+asm ("mov %1, %0\n\t"
+    "add $1, %0"
+    : "=r" (dst) 
+    : "r" (src));
+
+printf("%d\n", dst);
+@end example
+
+This code will copy @var{src} to @var{dst} and add 1 to @var{dst}.
+
+@anchor{Volatile}
+@subsubsection Volatile
+@cindex volatile @code{asm}
+@cindex @code{asm} volatile
+
+GCC's optimizers sometimes discard @code{asm} statements if they determine 
+there is no need for the output variables. Also, the optimizers may move 
+code out of loops if they believe that the code will always return the same 
+result (i.e. none of its input values change between calls). Using the 
+@code{volatile} qualifier disables these optimizations. @code{asm} statements 
+that have no output operands are implicitly volatile.
+
+Examples:
+
+This i386 code demonstrates a case that does not use (or require) the 
+@code{volatile} qualifier. If it is performing assertion checking, this code 
+uses @code{asm} to perform the validation. Otherwise, @var{dwRes} is 
+unreferenced by any code. As a result, the optimizers can discard the 
+@code{asm} statement, which in turn removes the need for the entire 
+@code{DoCheck} routine. By omitting the @code{volatile} qualifier when it 
+isn't needed you allow the optimizers to produce the most efficient code 
+possible.
+
+@example
+void DoCheck(uint32_t dwSomeValue)
+@{
+   uint32_t dwRes;
  
-If your assembler instruction can alter the condition code register, add
-@samp{cc} to the list of clobbered registers.  GCC on some machines
-represents the condition codes as a specific hardware register;
-@samp{cc} serves to name this register.  On other machines, the
-condition code is handled differently, and specifying @samp{cc} has no
-effect.  But it is valid no matter what the machine.
+   // Assumes dwSomeValue is not zero.
+   asm ("bsfl %1,%0"
+     : "=r" (dwRes)
+     : "r" (dwSomeValue)
+     : "cc");
  
-If your assembler instructions access memory in an unpredictable
-fashion, add @samp{memory} to the list of clobbered registers.  This
-causes GCC to not keep memory values cached in registers across the
-assembler instruction and not optimize stores or loads to that memory.
-You also should add the @code{volatile} keyword if the memory
-affected is not listed in the inputs or outputs of the @code{asm}, as
-the @samp{memory} clobber does not count as a side-effect of the
-@code{asm}.  If you know how large the accessed memory is, you can add
-it as input or output but if this is not known, you should add
-@samp{memory}.  As an example, if you access ten bytes of a string, you
-can use a memory input like:
+   assert(dwRes > 3);
+@}
+@end example
  
-@smallexample
-@{"m"( (@{ struct @{ char x[10]; @} *p = (void *)ptr ; *p; @}) )@}.
-@end smallexample
+The next example shows a case where the optimizers can recognize that the input 
+(@var{dwSomeValue}) never changes during the execution of the function and can 
+therefore move the @code{asm} outside the loop to produce more efficient code. 
+Again, using @code{volatile} disables this type of optimization.
  
-Note that in the following example the memory input is necessary,
-otherwise GCC might optimize the store to @code{x} away:
-@smallexample
-int foo ()
+@example
+void do_print(uint32_t dwSomeValue)
  @{
-  int x = 42;
-  int *y = &x;
-  int result;
-  asm ("magic stuff accessing an 'int' pointed to by '%1'"
-       : "=&d" (r) : "a" (y), "m" (*y));
-  return result;
+   uint32_t dwRes;
+
+   for (uint32_t x=0; x < 5; x++)
+   @{
+      // Assumes dwSomeValue is not zero.
+      asm ("bsfl %1,%0"
+        : "=r" (dwRes)
+        : "r" (dwSomeValue)
+        : "cc");
+
+      printf("%u: %u %u\n", x, dwSomeValue, dwRes);
+   @}
  @}
-@end smallexample
-
-You can put multiple assembler instructions together in a single
-@code{asm} template, separated by the characters normally used in assembly
-code for the system.  A combination that works in most places is a newline
-to break the line, plus a tab character to move to the instruction field
-(written as @samp{\n\t}).  Sometimes semicolons can be used, if the
-assembler allows semicolons as a line-breaking character.  Note that some
-assembler dialects use semicolons to start a comment.
-The input operands are guaranteed not to use any of the clobbered
-registers, and neither do the output operands' addresses, so you can
-read and write the clobbered registers as many times as you like.  Here
-is an example of multiple instructions in a template; it assumes the
-subroutine @code{_foo} accepts arguments in registers 9 and 10:
-
-@smallexample
-asm ("movl %0,r9\n\tmovl %1,r10\n\tcall _foo"
-     : /* no outputs */
-     : "g" (from), "g" (to)
-     : "r9", "r10");
-@end smallexample
-
-Unless an output operand has the @samp{&} constraint modifier, GCC
-may allocate it in the same register as an unrelated input operand, on
-the assumption the inputs are consumed before the outputs are produced.
-This assumption may be false if the assembler code actually consists of
-more than one instruction.  In such a case, use @samp{&} for each output
-operand that may not overlap an input.  @xref{Modifiers}.
-
-If you want to test the condition code produced by an assembler
-instruction, you must include a branch and a label in the @code{asm}
-construct, as follows:
-
-@smallexample
-asm ("clr %0\n\tfrob %1\n\tbeq 0f\n\tmov #1,%0\n0:"
-     : "g" (result)
-     : "g" (input));
-@end smallexample
+@end example
+
+The following example demonstrates a case where you need to use the 
+@code{volatile} qualifier. It uses the i386 RDTSC instruction, which reads 
+the computer's time-stamp counter. Without the @code{volatile} qualifier, 
+the optimizers might assume that the @code{asm} block will always return the 
+same value and therefore optimize away the second call.
+
+@example
+uint64_t msr;
+
+asm volatile ( "rdtsc\n\t"    // Returns the time in EDX:EAX.
+        "shl $32, %%rdx\n\t"  // Shift the upper bits left.
+        "or %%rdx, %0"        // 'Or' in the lower bits.
+        : "=a" (msr)
+        : 
+        : "rdx");
+
+printf("msr: %llx\n", msr);
+
+// Do other work...
+
+// Reprint the timestamp
+asm volatile ( "rdtsc\n\t"    // Returns the time in EDX:EAX.
+        "shl $32, %%rdx\n\t"  // Shift the upper bits left.
+        "or %%rdx, %0"        // 'Or' in the lower bits.
+        : "=a" (msr)
+        : 
+        : "rdx");
+
+printf("msr: %llx\n", msr);
+@end example
+
+GCC's optimizers will not treat this code like the non-volatile code in the 
+earlier examples. They do not move it out of loops or omit it on the 
+assumption that the result from a previous call is still valid.
+
+Note that the compiler can move even volatile @code{asm} instructions relative 
+to other code, including across jump instructions. For example, on many 
+targets there is a system register that controls the rounding mode of 
+floating-point operations. Setting it with a volatile @code{asm}, as in the 
+following PowerPC example, will not work reliably.
+
+@example
+asm volatile("mtfsf 255, %0" : : "f" (fpenv));
+sum = x + y;
+@end example
+
+The compiler may move the addition back before the volatile @code{asm}. To 
+make it work as expected, add an artificial dependency to the @code{asm} by 
+referencing a variable in the subsequent code, for example: 
+
+@example
+asm volatile ("mtfsf 255,%1" : "=X" (sum) : "f" (fpenv));
+sum = x + y;
+@end example
+
+Under certain circumstances, GCC may duplicate (or remove duplicates of) your 
+assembly code when optimizing. This can lead to unexpected duplicate symbol 
+errors during compilation if your asm code defines symbols or labels. Using %= 
+(@pxref{AssemblerTemplate}) may help resolve this problem.
+
+@anchor{AssemblerTemplate}
+@subsubsection Assembler Template
+@cindex @code{asm} assembler template
+
+An assembler template is a literal string containing assembler instructions. 
+The compiler will replace any references to inputs, outputs, and goto labels 
+in the template, and then output the resulting string to the assembler. The 
+string can contain any instructions recognized by the assembler, including 
+directives. GCC does not parse the assembler instructions 
+themselves and does not know what they mean or even whether they are valid 
+assembler input. However, it does count the statements 
+(@pxref{Size of an asm}).
+
+You may place multiple assembler instructions together in a single @code{asm} 
+string, separated by the characters normally used in assembly code for the 
+system. A combination that works in most places is a newline to break the 
+line, plus a tab character to move to the instruction field (written as 
+"\n\t"). Some assemblers allow semicolons as a line separator. However, note 
+that some assembler dialects use semicolons to start a comment. 
+
+Do not expect a sequence of @code{asm} statements to remain perfectly 
+consecutive after compilation, even when you are using the @code{volatile} 
+qualifier. If certain instructions need to remain consecutive in the output, 
+put them in a single multi-instruction asm statement.
+
+Accessing data from C programs without using input/output operands (such as 
+by using global symbols directly from the assembler template) may not work as 
+expected. Similarly, calling functions directly from an assembler template 
+requires a detailed understanding of the target assembler and ABI.
+
+Since GCC does not parse the AssemblerTemplate, it has no visibility of any 
+symbols it references. This may result in GCC discarding those symbols as 
+unreferenced unless they are also listed as input, output, or goto operands.
+
+GCC can support multiple assembler dialects (for example, GCC for i386 
+supports "att" and "intel" dialects) for inline assembler. In builds that 
+support this capability, the @option{-masm} option controls which dialect 
+GCC uses as its default. The hardware-specific documentation for the 
+@option{-masm} option contains the list of supported dialects, as well as the 
+default dialect if the option is not specified. This information may be 
+important to understand, since assembler code that works correctly when 
+compiled using one dialect will likely fail if compiled using another.
+
+@subsubheading Using braces in @code{asm} templates
+
+If your code needs to support multiple assembler dialects (for example, if 
+you are writing public headers that need to support a variety of compilation 
+options), use constructs of this form:
+
+@example
+@{ dialect0 | dialect1 | dialect2... @}
+@end example
+
+This construct outputs 'dialect0' when using dialect #0 to compile the code, 
+'dialect1' for dialect #1, etc. If there are fewer alternatives within the 
+braces than the number of dialects the compiler supports, the construct 
+outputs nothing.
+
+For example, if an i386 compiler supports two dialects (att, intel), an 
+assembler template such as this:
+
+@example
+"bt@{l %[Offset],%[Base] | %[Base],%[Offset]@}; jc %l2"
+@end example
+
+would produce the output:
+
+@example
+For att: "btl %[Offset],%[Base] ; jc %l2"
+For intel: "bt %[Base],%[Offset]; jc %l2"
+@end example
+
+Using that same compiler, this code:
+
+@example
+"xchg@{l@}\t@{%%@}ebx, %1"
+@end example
+
+would produce 
+
+@example
+For att: "xchgl\t%%ebx, %1"
+For intel: "xchg\tebx, %1"
+@end example
+
+There is no support for nesting dialect alternatives. Also, there is no 
+``escape'' for an open brace (@{), so do not use open braces in an Extended 
+@code{asm} template other than as a dialect indicator.
+
+@subsubheading Other format strings
+
+In addition to the tokens described by the input, output, and goto operands, 
+there are a few special cases:
  
-@noindent
-This assumes your assembler supports local labels, as the GNU assembler
-and most Unix assemblers do.
-
-Speaking of labels, jumps from one @code{asm} to another are not
-supported.  The compiler's optimizers do not know about these jumps, and
-therefore they cannot take account of them when deciding how to
-optimize.  @xref{Extended asm with goto}.
-
-@cindex macros containing @code{asm}
-Usually the most convenient way to use these @code{asm} instructions is to
-encapsulate them in macros that look like functions.  For example,
-
-@smallexample
-#define sin(x)       \
-(@{ double __value, __arg = (x);   \
-   asm ("fsinx %1,%0": "=f" (__value): "f" (__arg));  \
-   __value; @})
-@end smallexample
-
-@noindent
-Here the variable @code{__arg} is used to make sure that the instruction
-operates on a proper @code{double} value, and to accept only those
-arguments @code{x} that can convert automatically to a @code{double}.
-
-Another way to make sure the instruction operates on the correct data
-type is to use a cast in the @code{asm}.  This is different from using a
-variable @code{__arg} in that it converts more different types.  For
-example, if the desired type is @code{int}, casting the argument to
-@code{int} accepts a pointer with no complaint, while assigning the
-argument to an @code{int} variable named @code{__arg} warns about
-using a pointer unless the caller explicitly casts it.
-
-If an @code{asm} has output operands, GCC assumes for optimization
-purposes the instruction has no side effects except to change the output
-operands.  This does not mean instructions with a side effect cannot be
-used, but you must be careful, because the compiler may eliminate them
-if the output operands aren't used, or move them out of loops, or
-replace two with one if they constitute a common subexpression.  Also,
-if your instruction does have a side effect on a variable that otherwise
-appears not to change, the old value of the variable may be reused later
-if it happens to be found in a register.
-
-You can prevent an @code{asm} instruction from being deleted
-by writing the keyword @code{volatile} after
-the @code{asm}.  For example:
-
-@smallexample
-#define get_and_set_priority(new)              \
-(@{ int __old;                                  \
-   asm volatile ("get_and_set_priority %0, %1" \
-                 : "=g" (__old) : "g" (new));  \
-   __old; @})
-@end smallexample
-
-@noindent
-The @code{volatile} keyword indicates that the instruction has
-important side-effects.  GCC does not delete a volatile @code{asm} if
-it is reachable.  (The instruction can still be deleted if GCC can
-prove that control flow never reaches the location of the
-instruction.)  Note that even a volatile @code{asm} instruction
-can be moved relative to other code, including across jump
-instructions.  For example, on many targets there is a system
-register that can be set to control the rounding mode of
-floating-point operations.  You might try
-setting it with a volatile @code{asm}, like this PowerPC example:
-
-@smallexample
-       asm volatile("mtfsf 255,%0" : : "f" (fpenv));
-       sum = x + y;
-@end smallexample
-
-@noindent
-This does not work reliably, as the compiler may move the addition back
-before the volatile @code{asm}.  To make it work you need to add an
-artificial dependency to the @code{asm} referencing a variable in the code
-you don't want moved, for example:
+@itemize
+@item
+"%%" outputs a single "%" into the assembler code.
  
-@smallexample
-    asm volatile ("mtfsf 255,%1" : "=X"(sum): "f"(fpenv));
-    sum = x + y;
-@end smallexample
+@item
+"%=" outputs a number that is unique to each instance of the @code{asm} 
+statement in the entire compilation. This option is useful when creating local 
+labels and referring to them multiple times in a single template that 
+generates multiple assembler instructions. 
  
-Similarly, you can't expect a
-sequence of volatile @code{asm} instructions to remain perfectly
-consecutive.  If you want consecutive output, use a single @code{asm}.
-Also, GCC performs some optimizations across a volatile @code{asm}
-instruction; GCC does not ``forget everything'' when it encounters
-a volatile @code{asm} instruction the way some other compilers do.
+@end itemize
  
-An @code{asm} instruction without any output operands is treated
-identically to a volatile @code{asm} instruction.
+@anchor{OutputOperands}
+@subsubsection Output Operands
+@cindex @code{asm} output operands
+
+An @code{asm} statement has zero or more output operands indicating the names
+of C variables modified by the assembler code.
+
+In this i386 example, @var{old} (referred to in the template string as 
+@code{%0}) and @var{*Base} (as @code{%1}) are outputs and @var{Offset} 
+(@code{%2}) is an input:
+
+@example
+bool old;
+
+__asm__ ("btsl %2,%1\n\t" // Turn on zero-based bit #Offset in Base.
+         "sbb %0,%0"      // Use the CF to calculate old.
+   : "=r" (old), "+rm" (*Base)
+   : "Ir" (Offset)
+   : "cc");
+
+return old;
+@end example
+
+Operands use this format:
+
+@example
+[ [asmSymbolicName] ] "constraint" (cvariablename)
+@end example
+
+@emph{asmSymbolicName}
+@*
+
+When not using asmSymbolicNames, use the (zero-based) position of the operand 
+in the list of operands in the assembler template. For example if there are 
+three output operands, use @code{%0} in the template to refer to the first, 
+@code{%1} for the second, and @code{%2} for the third. When using an 
+asmSymbolicName, reference it by enclosing the name in square brackets 
+(i.e. @code{%[Value]}). The scope of the name is the @code{asm} statement 
+that contains the definition. Any valid C variable name is acceptable, 
+including names already defined in the surrounding code. No two operands 
+within the same @code{asm} statement can use the same symbolic name.
+
+@emph{constraint}
+@*
+Output constraints must begin with either @code{"="} (a variable overwriting an 
+existing value) or @code{"+"} (when reading and writing). When using 
+@code{"="}, do not assume the location will contain the existing value (except 
+when tying the variable to an input; @pxref{InputOperands,,Input Operands}).
+
+After the prefix, there must be one or more additional constraints 
+(@pxref{Constraints}) that describe where the value resides. Common 
+constraints include @code{"r"} for register and @code{"m"} for memory. 
+When you list more than one possible location (for example @code{"=rm"}), the 
+compiler chooses the most efficient one based on the current context. If you 
+list as many alternates as the @code{asm} statement allows, you will permit 
+the optimizers to produce the best possible code. If you must use a specific
+register, but your Machine Constraints do not provide sufficient 
+control to select the specific register you want, Local Reg Vars may provide 
+a solution (@pxref{Local Reg Vars}).
+
+@emph{cvariablename}
+@*
+Specifies the C variable name of the output (enclosed by parentheses). Accepts 
+any (non-constant) variable within scope.
+
+Remarks:
+
+The total number of input + output + goto operands has a limit of 30. Commas 
+separate the operands. When the compiler selects the registers to use to 
+represent the output operands, it will not use any of the clobbered registers 
+(@pxref{Clobbers}).
+
+Output operand expressions must be lvalues. The compiler cannot check whether 
+the operands have data types that are reasonable for the instruction being 
+executed. For output expressions that are not directly addressable (for 
+example a bit-field), the constraint must allow a register. In that case, GCC 
+uses the register as the output of the @code{asm}, and then stores that 
+register into the output. 
+
+Unless an output operand has the '@code{&}' constraint modifier 
+(@pxref{Modifiers}), GCC may allocate it in the same register as an unrelated 
+input operand, on the assumption that the assembler code will consume its 
+inputs before producing outputs. This assumption may be false if the assembler 
+code actually consists of more than one instruction. In this case, use 
+'@code{&}' on each output operand that must not overlap an input.
+
+The same problem can occur if one output parameter (@var{a}) allows a register 
+constraint and another output parameter (@var{b}) allows a memory constraint.
+The code generated by GCC to access the memory address in @var{b} can contain
+registers which @emph{might} be shared by @var{a}, and GCC considers those 
+registers to be inputs to the asm. As above, GCC assumes that such input
+registers are consumed before any outputs are written. This assumption may 
+result in incorrect behavior if the asm writes to @var{a} before using 
+@var{b}. Combining the `@code{&}' constraint with the register constraint 
+ensures that modifying @var{a} will not affect what address is referenced by 
+@var{b}. Omitting the `@code{&}' constraint means that the location of @var{b} 
+will be undefined if @var{a} is modified before using @var{b}.
+
+@code{asm} supports operand modifiers on operands (for example @code{%k2} 
+instead of simply @code{%2}). Typically these qualifiers are hardware 
+dependent. The list of supported modifiers for i386 is found at 
+@ref{i386Operandmodifiers,i386 Operand modifiers}.
+
+If the C code that follows the @code{asm} makes no use of any of the output 
+operands, use @code{volatile} for the @code{asm} statement to prevent the 
+optimizers from discarding the @code{asm} statement as unneeded 
+(see @ref{Volatile}).
+
+Examples:
+
+This code makes no use of the optional asmSymbolicName. Therefore it 
+references the first output operand as @code{%0} (were there a second, it 
+would be @code{%1}, etc). The number of the first input operand is one greater 
+than that of the last output operand. In this i386 example, that makes 
+@var{Mask} @code{%1}:
+
+@example
+uint32_t Mask = 1234;
+uint32_t Index;
+
+  asm ("bsfl %1, %0"
+     : "=r" (Index)
+     : "r" (Mask)
+     : "cc");
+@end example
+
+That code overwrites the variable Index ("="), placing the value in a register 
+("r"). The generic "r" constraint instead of a constraint for a specific 
+register allows the compiler to pick the register to use, which can result 
+in more efficient code. This may not be possible if an assembler instruction 
+requires a specific register.
+
+The following i386 example uses the asmSymbolicName operand. It produces the 
+same result as the code above, but some may consider it more readable or more 
+maintainable since reordering index numbers is not necessary when adding or 
+removing operands. The names aIndex and aMask are only used to emphasize which 
+names get used where. It is acceptable to reuse the names Index and Mask.
+
+@example
+uint32_t Mask = 1234;
+uint32_t Index;
+
+  asm ("bsfl %[aMask], %[aIndex]"
+     : [aIndex] "=r" (Index)
+     : [aMask] "r" (Mask)
+     : "cc");
+@end example
+
+Here are some more examples of output operands.
+
+@example
+uint32_t c = 1;
+uint32_t d;
+uint32_t *e = &c;
+
+asm ("mov %[e], %[d]"
+   : [d] "=rm" (d)
+   : [e] "rm" (*e));
+@end example
+
+Here, @var{d} may either be in a register or in memory. Since the compiler 
+might already have the current value of the uint32_t pointed to by @var{e} 
+in a register, you can enable it to choose the best location
+for @var{d} by specifying both constraints.
+
+@anchor{InputOperands}
+@subsubsection Input Operands
+@cindex @code{asm} input operands
+@cindex @code{asm} expressions
  
-It is a natural idea to look for a way to give access to the condition
-code left by the assembler instruction.  However, when we attempted to
-implement this, we found no way to make it work reliably.  The problem
-is that output operands might need reloading, which result in
-additional following ``store'' instructions.  On most machines, these
-instructions alter the condition code before there is time to
-test it.  This problem doesn't arise for ordinary ``test'' and
-``compare'' instructions because they don't have any output operands.
+Input operands make inputs from C variables and expressions available to the 
+assembly code.
+
+Specify input operands by using the format:
+
+@example
+[ [asmSymbolicName] ] "constraint" (cexpression)
+@end example
+
+@emph{asmSymbolicName}
+@*
+When not using asmSymbolicNames, use the (zero-based) position of the operand 
+in the list of operands, including outputs, in the assembler template. For 
+example, if there are two output parameters and three inputs, @code{%2} refers 
+to the first input, @code{%3} to the second, and @code{%4} to the third.
+When using an asmSymbolicName, reference it by enclosing the name in square 
+brackets (e.g. @code{%[Value]}). The scope of the name is the @code{asm} 
+statement that contains the definition. Any valid C variable name is 
+acceptable, including names already defined in the surrounding code. No two 
+operands within the same @code{asm} statement can use the same symbolic name.
+
+@emph{constraint}
+@*
+Input constraints must be a string containing one or more constraints 
+(@pxref{Constraints}). When you give more than one possible constraint 
+(for example, @code{"irm"}), the compiler will choose the most efficient 
+method based on the current context. Input constraints may not begin with 
+either "=" or "+". If you must use a specific register, but your Machine
+Constraints do not provide sufficient control to select the specific 
+register you want, Local Reg Vars may provide a solution 
+(@pxref{Local Reg Vars}).
+
+Input constraints can also be digits (for example, @code{"0"}). This indicates 
+that the specified input will be in the same place as the output constraint 
+at the (zero-based) index in the output constraint list. When using 
+asmSymbolicNames for the output operands, you may use these names (enclosed 
+in brackets []) instead of digits.
+
+@emph{cexpression}
+@*
+This is the C variable or expression being passed to the @code{asm} statement 
+as input.
+
+When the compiler selects the registers to use to represent the input 
+operands, it will not use any of the clobbered registers (@pxref{Clobbers}).
+
+If there are no output operands but there are input operands, place two 
+consecutive colons where the output operands would go:
+
+@example
+__asm__ ("some instructions"
+   : /* No outputs. */
+   : "r" (Offset / 8);
+@end example
+
+@strong{Warning:} Do @emph{not} modify the contents of input-only operands 
+(except for inputs tied to outputs). The compiler assumes that on exit from 
+the @code{asm} statement these operands will contain the same values as they 
+had before executing the assembler. It is @emph{not} possible to use Clobbers 
+to inform the compiler that the values in these inputs are changing. One 
+common work-around is to tie the changing input variable to an output variable 
+that never gets used. Note, however, that if the code that follows the 
+@code{asm} statement makes no use of any of the output operands, the GCC 
+optimizers may discard the @code{asm} statement as unneeded 
+(see @ref{Volatile}).
+
+Remarks:
+
+The total number of input + output + goto operands has a limit of 30.
+
+@code{asm} supports operand modifiers on operands (for example @code{%k2} 
+instead of simply @code{%2}). Typically these qualifiers are hardware 
+dependent. The list of supported modifiers for i386 is found at 
+@ref{i386Operandmodifiers,i386 Operand modifiers}.
+
+Examples:
+
+In this example using the fictitious @code{combine} instruction, the 
+constraint @code{"0"} for input operand 1 says that it must occupy the same 
+location as output operand 0. Only input operands may use numbers in 
+constraints, and they must each refer to an output operand. Only a number (or 
+the symbolic assembler name) in the constraint can guarantee that one operand 
+is in the same place as another. The mere fact that @var{foo} is the value of 
+both operands is not enough to guarantee that they are in the same place in 
+the generated assembler code.
+
+@example
+asm ("combine %2, %0" 
+   : "=r" (foo) 
+   : "0" (foo), "g" (bar));
+@end example
+
+Here is an example using symbolic names.
+
+@example
+asm ("cmoveq %1, %2, %[result]" 
+   : [result] "=r"(result) 
+   : "r" (test), "r" (new), "[result]" (old));
+@end example
+
+@anchor{Clobbers}
+@subsubsection Clobbers
+@cindex @code{asm} clobbers
+
+While the compiler is aware of changes to entries listed in the output 
+operands, the assembler code may modify more than just the outputs. For 
+example, calculations may require additional registers, or the processor may 
+overwrite a register as a side effect of a particular assembler instruction. 
+In order to inform the compiler of these changes, list them in the clobber 
+list. Clobber list items are either register names or the special clobbers 
+(listed below). Each clobber list item is enclosed in double quotes and 
+separated by commas.
+
+Clobber descriptions may not in any way overlap with an input or output 
+operand. For example, you may not have an operand describing a register class 
+with one member when listing that register in the clobber list. Variables 
+declared to live in specific registers (@pxref{Explicit Reg Vars}), and used 
+as @code{asm} input or output operands, must have no part mentioned in the 
+clobber description. In particular, there is no way to specify that input 
+operands get modified without also specifying them as output operands.
+
+When the compiler selects which registers to use to represent input and output 
+operands, it will not use any of the clobbered registers. As a result, 
+clobbered registers are available for any use in the assembler code.
+
+Here is a realistic example for the VAX showing the use of clobbered 
+registers: 
+
+@example
+asm volatile ("movc3 %0, %1, %2"
+                   : /* No outputs. */
+                   : "g" (from), "g" (to), "g" (count)
+                   : "r0", "r1", "r2", "r3", "r4", "r5");
+@end example
+
+Also, there are two special clobber arguments:
  
-For reasons similar to those described above, it is not possible to give
-an assembler instruction access to the condition code left by previous
-instructions.
+@enumerate
+@item
+The @code{"cc"} clobber indicates that the assembler code modifies the flags 
+register. On some machines, GCC represents the condition codes as a specific 
+hardware register; "cc" serves to name this register. On other machines, 
+condition code handling is different, and specifying "cc" has no effect. But 
+it is valid no matter what the machine.
  
-@anchor{Extended asm with goto}
-As of GCC version 4.5, @code{asm goto} may be used to have the assembly
-jump to one or more C labels.  In this form, a fifth section after the
-clobber list contains a list of all C labels to which the assembly may jump.
-Each label operand is implicitly self-named.  The @code{asm} is also assumed
-to fall through to the next statement.
+@item
+The "memory" clobber tells the compiler that the assembly code performs memory 
+reads or writes to items other than those listed in the input and output 
+operands (for example accessing the memory pointed to by one of the input 
+parameters). To ensure memory contains correct values, GCC may need to flush 
+specific register values to memory before executing the @code{asm}. Further, 
+the compiler will not assume that any values read from memory before an 
+@code{asm} will remain unchanged after that @code{asm}; it will reload them as 
+needed. This effectively forms a read/write memory barrier for the compiler.
+
+Note that this clobber does not prevent the @emph{processor} from doing 
+speculative reads past the @code{asm} statement. To prevent that, you need 
+processor-specific fence instructions.
+
+Flushing registers to memory has performance implications and may be an issue 
+for time-sensitive code. One trick to avoid this is available if the size of 
+the memory being accessed is known at compile time. For example, if accessing 
+ten bytes of a string, use a memory input like: 
+
+@code{@{"m"( (@{ struct @{ char x[10]; @} *p = (void *)ptr ; *p; @}) )@}}.
  
-This form of @code{asm} is restricted to not have outputs.  This is due
-to a internal restriction in the compiler that control transfer instructions
-cannot have outputs.  This restriction on @code{asm goto} may be lifted
-in some future version of the compiler.  In the meantime, @code{asm goto}
-may include a memory clobber, and so leave outputs in memory.
+@end enumerate
  
-@smallexample
+@anchor{GotoLabels}
+@subsubsection Goto Labels
+@cindex @code{asm} goto labels
+
+@code{asm goto} allows assembly code to jump to one or more C labels. The 
+GotoLabels section in an @code{asm goto} statement contains a comma-separated 
+list of all C labels to which the assembler code may jump. GCC assumes that 
+@code{asm} execution falls through to the next statement (if this is not the 
+case, consider using the @code{__builtin_unreachable} intrinsic after the 
+@code{asm} statement). Optimization of @code{asm goto} may be improved by 
+using the @code{hot} and @code{cold} label attributes (@pxref{Label 
+Attributes}). The total number of input + output + goto operands has 
+a limit of 30.
+
+An @code{asm goto} statement can not have outputs (which means that the 
+statement is implicitly volatile). This is due to an internal restriction of 
+the compiler: control transfer instructions cannot have outputs. If the 
+assembler code does modify anything, use the "memory" clobber to force the 
+optimizers to flush all register values to memory, and reload them if 
+necessary, after the @code{asm} statement.
+
+To reference a label, prefix it with @code{%l} (that's a lowercase L) followed 
+by its (zero-based) position in GotoLabels plus the number of input 
+arguments.  For example, if the @code{asm} has three inputs and references two 
+labels, refer to the first label as @code{%l3} and the second as @code{%l4}).
+
+@code{asm} statements may not perform jumps into other @code{asm} statements. 
+GCC's optimizers do not know about these jumps; therefore they cannot take 
+account of them when deciding how to optimize.
+
+Example code for i386 might look like:
+
+@example
+asm goto (
+    "btl %1, %0\n\t"
+    "jc %l2"
+    : /* No outputs. */
+    : "r" (p1), "r" (p2) 
+    : "cc" 
+    : carry);
+
+return 0;
+
+carry:
+return 1;
+@end example
+
+The following example shows an @code{asm goto} that uses the memory clobber.
+
+@example
  int frob(int x)
  @{
    int y;
    asm goto ("frob %%r5, %1; jc %l[error]; mov (%2), %%r5"
-            : : "r"(x), "r"(&y) : "r5", "memory" : error);
+            : /* No outputs. */
+            : "r"(x), "r"(&y)
+            : "r5", "memory" 
+            : error);
    return y;
- error:
+error:
    return -1;
  @}
-@end smallexample
+@end example
  
-@noindent
-In this (inefficient) example, the @code{frob} instruction sets the
-carry bit to indicate an error.  The @code{jc} instruction detects
-this and branches to the @code{error} label.  Finally, the output
-of the @code{frob} instruction (@code{%r5}) is stored into the memory
-for variable @code{y}, which is later read by the @code{return} statement.
+@anchor{i386Operandmodifiers}
+@subsubsection i386 Operand modifiers
  
-@smallexample
-void doit(void)
-@{
-  int i = 0;
-  asm goto ("mfsr %%r1, 123; jmp %%r1;"
-            ".pushsection doit_table;"
-            ".long %l0, %l1, %l2, %l3;"
-            ".popsection"
-            : : : "r1" : label1, label2, label3, label4);
-  __builtin_unreachable ();
-
- label1:
-  f1();
-  return;
- label2:
-  f2();
-  return;
- label3:
-  i = 1;
- label4:
-  f3(i);
-@}
-@end smallexample
+Input, output, and goto operands for extended @code{asm} statements can use 
+modifiers to affect the code output to the assembler. For example, the 
+following code uses the "h" and "b" modifiers for i386:
  
-@noindent
-In this (also inefficient) example, the @code{mfsr} instruction reads
-an address from some out-of-band machine register, and the following
-@code{jmp} instruction branches to that address.  The address read by
-the @code{mfsr} instruction is assumed to have been previously set via
-some application-specific mechanism to be one of the four values stored
-in the @code{doit_table} section.  Finally, the @code{asm} is followed
-by a call to @code{__builtin_unreachable} to indicate that the @code{asm}
-does not in fact fall through.
-
-@smallexample
-#define TRACE1(NUM)                         \
-  do @{                                      \
-    asm goto ("0: nop;"                     \
-              ".pushsection trace_table;"   \
-              ".long 0b, %l0;"              \
-              ".popsection"                 \
-              : : : : trace#NUM);           \
-    if (0) @{ trace#NUM: trace(); @}          \
-  @} while (0)
-#define TRACE  TRACE1(__COUNTER__)
-@end smallexample
+@example
+uint16_t  num;
+asm volatile ("xchg %h0, %b0" : "+a" (num) );
+@end example
  
-@noindent
-In this example (which in fact inspired the @code{asm goto} feature)
-we want on rare occasions to call the @code{trace} function; on other
-occasions we'd like to keep the overhead to the absolute minimum.
-The normal code path consists of a single @code{nop} instruction.
-However, we record the address of this @code{nop} together with the
-address of a label that calls the @code{trace} function.  This allows
-the @code{nop} instruction to be patched at run time to be an
-unconditional branch to the stored label.  It is assumed that an
-optimizing compiler moves the labeled block out of line, to
-optimize the fall through path from the @code{asm}.
-
-If you are writing a header file that should be includable in ISO C
-programs, write @code{__asm__} instead of @code{asm}.  @xref{Alternate
-Keywords}.
+These modifiers generate this assembler code:
  
-@subsection Size of an @code{asm}
+@example
+xchg %ah, %al
+@end example
  
-Some targets require that GCC track the size of each instruction used in
-order to generate correct code.  Because the final length of an
-@code{asm} is only known by the assembler, GCC must make an estimate as
-to how big it will be.  The estimate is formed by counting the number of
-statements in the pattern of the @code{asm} and multiplying that by the
-length of the longest instruction on that processor.  Statements in the
-@code{asm} are identified by newline characters and whatever statement
-separator characters are supported by the assembler; on most processors
-this is the @samp{;} character.
-
-Normally, GCC's estimate is perfectly adequate to ensure that correct
-code is generated, but it is possible to confuse the compiler if you use
-pseudo instructions or assembler macros that expand into multiple real
-instructions or if you use assembler directives that expand to more
-space in the object file than is needed for a single instruction.
-If this happens then the assembler produces a diagnostic saying that
-a label is unreachable.
+The rest of this discussion uses the following code for illustrative purposes.
+
+@example
+int main()
+@{
+   int iInt = 1;
+
+top:
+
+   asm volatile goto ("some assembler instructions here"
+   : /* No outputs. */
+   : "q" (iInt), "X" (sizeof(unsigned char) + 1)
+   : /* No clobbers. */
+   : top);
+@}
+@end example
+
+With no modifiers, this is what the output from the operands would be for the 
+att and intel dialects of assembler:
+
+@multitable {Operand} {masm=att} {OFFSET FLAT:.L2}
+@headitem Operand @tab masm=att @tab masm=intel
+@item @code{%0}
+@tab @code{%eax}
+@tab @code{eax}
+@item @code{%1}
+@tab @code{$2}
+@tab @code{2}
+@item @code{%2}
+@tab @code{$.L2}
+@tab @code{OFFSET FLAT:.L2}
+@end multitable
+
+The table below shows the list of supported modifiers and their effects.
+
+@multitable {Modifier} {Print the opcode suffix for the size of th} {Operand} {masm=att} {masm=intel}
+@headitem Modifier @tab Description @tab Operand @tab @option{masm=att} @tab @option{masm=intel}
+@item @code{z}
+@tab Print the opcode suffix for the size of the current integer operand (one of @code{b}/@code{w}/@code{l}/@code{q}).
+@tab @code{%z0}
+@tab @code{l}
+@tab 
+@item @code{b}
+@tab Print the QImode name of the register.
+@tab @code{%b0}
+@tab @code{%al}
+@tab @code{al}
+@item @code{h}
+@tab Print the QImode name for a ``high'' register.
+@tab @code{%h0}
+@tab @code{%ah}
+@tab @code{ah}
+@item @code{w}
+@tab Print the HImode name of the register.
+@tab @code{%w0}
+@tab @code{%ax}
+@tab @code{ax}
+@item @code{k}
+@tab Print the SImode name of the register.
+@tab @code{%k0}
+@tab @code{%eax}
+@tab @code{eax}
+@item @code{q}
+@tab Print the DImode name of the register.
+@tab @code{%q0}
+@tab @code{%rax}
+@tab @code{rax}
+@item @code{l}
+@tab Print the label name with no punctuation.
+@tab @code{%l2}
+@tab @code{.L2}
+@tab @code{.L2}
+@item @code{c}
+@tab Require a constant operand and print the constant expression with no punctuation.
+@tab @code{%c1}
+@tab @code{2}
+@tab @code{2}
+@end multitable
  
-@subsection i386 floating-point asm operands
+@anchor{i386floatingpointasmoperands}
+@subsubsection i386 floating-point asm operands
  
  On i386 targets, there are several rules on the usage of stack-like registers
  in the operands of an @code{asm}.  These rules apply only to the operands
@@ -6457,10 +7399,12 @@ for the compiler to know that @code{fyl2xp1} pops both inputs.
  asm ("fyl2xp1" : "=t" (result) : "0" (x), "u" (y) : "st(1)");
  @end smallexample
  
+@lowersections
  @include md.texi
+@raisesections
  
  @node Asm Labels
-@section Controlling Names Used in Assembler Code
+@subsection Controlling Names Used in Assembler Code
  @cindex assembler names for identifiers
  @cindex names used in assembler code
  @cindex identifiers, names in assembler code
@@ -6508,7 +7452,7 @@ does not as yet have the ability to store static variables in registers.
  Perhaps that will be added.
  
  @node Explicit Reg Vars
-@section Variables in Specified Registers
+@subsection Variables in Specified Registers
  @cindex explicit register variables
  @cindex variables in specified registers
  @cindex specified registers
@@ -6548,7 +7492,7 @@ specified for that operand in the @code{asm}.)
  @end menu
  
  @node Global Reg Vars
-@subsection Defining Global Register Variables
+@subsubsection Defining Global Register Variables
  @cindex global register variables
  @cindex registers, global variables in
  
@@ -6645,7 +7589,7 @@ On the 68000, a2 @dots{} a5 should be suitable, as should d2 @dots{} d7.
  Of course, it does not do to use more than a few of those.
  
  @node Local Reg Vars
-@subsection Specifying Registers for Local Variables
+@subsubsection Specifying Registers for Local Variables
  @cindex local variables, specifying registers
  @cindex specifying registers for local variables
  @cindex registers for local variables
@@ -6688,21 +7632,59 @@ Stores into local register variables may be deleted when they appear to be dead
  according to dataflow analysis.  References to local register variables may
  be deleted or moved or simplified.
  
-As for global register variables, it's recommended that you choose a
+As with global register variables, it is recommended that you choose a
  register that is normally saved and restored by function calls on
-your machine, so that library routines will not clobber it.  A common
-pitfall is to initialize multiple call-clobbered registers with
-arbitrary expressions, where a function call or library call for an
-arithmetic operator overwrites a register value from a previous
-assignment, for example @code{r0} below:
+your machine, so that library routines will not clobber it.  
+
+Sometimes when writing inline @code{asm} code, you need to make an operand be a 
+specific register, but there's no matching constraint letter for that 
+register. To force the operand into that register, create a local variable 
+and specify the register in the variable's declaration. Then use the local 
+variable for the asm operand and specify any constraint letter that matches 
+the register:
+
  @smallexample
  register int *p1 asm ("r0") = @dots{};
  register int *p2 asm ("r1") = @dots{};
+register int *result asm ("r0");
+asm ("sysint" : "=r" (result) : "0" (p1), "r" (p2));
  @end smallexample
  
-@noindent
-In those cases, a solution is to use a temporary variable for
-each arbitrary expression.   @xref{Example of asm with clobbered asm reg}.
+@emph{Warning:} In the above example, be aware that a register (for example r0) can be 
+call-clobbered by subsequent code, including function calls and library calls 
+for arithmetic operators on other variables (for example the initialization 
+of p2). In this case, use temporary variables for expressions between the 
+register assignments:
+
+@smallexample
+int t1 = @dots{};
+register int *p1 asm ("r0") = @dots{};
+register int *p2 asm ("r1") = t1;
+register int *result asm ("r0");
+asm ("sysint" : "=r" (result) : "0" (p1), "r" (p2));
+@end smallexample
+
+@node Size of an asm
+@subsection Size of an @code{asm}
+
+Some targets require that GCC track the size of each instruction used
+in order to generate correct code.  Because the final length of the
+code produced by an @code{asm} statement is only known by the
+assembler, GCC must make an estimate as to how big it will be.  It
+does this by counting the number of instructions in the pattern of the
+@code{asm} and multiplying that by the length of the longest
+instruction supported by that processor.  (When working out the number
+of instructions, it assumes that any occurrence of a newline or of
+whatever statement separator character is supported by the assembler --
+typically @samp{;} --- indicates the end of an instruction.)
+
+Normally, GCC's estimate is adequate to ensure that correct
+code is generated, but it is possible to confuse the compiler if you use
+pseudo instructions or assembler macros that expand into multiple real
+instructions, or if you use assembler directives that expand to more
+space in the object file than is needed for a single instruction.
+If this happens then the assembler may produce a diagnostic saying that
+a label is unreachable.
  
  @node Alternate Keywords
  @section Alternate Keywords
@@ -7030,6 +8012,26 @@ c = a >  b;     /* The result would be @{0, 0,-1, 0@}  */
  c = a == b;     /* The result would be @{0,-1, 0,-1@}  */
  @end smallexample
  
+In C++, the ternary operator @code{?:} is available. @code{a?b:c}, where
+@code{b} and @code{c} are vectors of the same type and @code{a} is an
+integer vector with the same number of elements of the same size as @code{b}
+and @code{c}, computes all three arguments and creates a vector
+@code{@{a[0]?b[0]:c[0], a[1]?b[1]:c[1], @dots{}@}}.  Note that unlike in
+OpenCL, @code{a} is thus interpreted as @code{a != 0} and not @code{a < 0}.
+As in the case of binary operations, this syntax is also accepted when
+one of @code{b} or @code{c} is a scalar that is then transformed into a
+vector. If both @code{b} and @code{c} are scalars and the type of
+@code{true?b:c} has the same size as the element type of @code{a}, then
+@code{b} and @code{c} are converted to a vector type whose elements have
+this type and with the same number of elements as @code{a}.
+
+In C++, the logic operators @code{!, &&, ||} are available for vectors.
+@code{!v} is equivalent to @code{v == 0}, @code{a && b} is equivalent to
+@code{a!=0 & b!=0} and @code{a || b} is equivalent to @code{a!=0 | b!=0}.
+For mixed operations between a scalar @code{s} and a vector @code{v},
+@code{s && v} is equivalent to @code{s?v!=0:0} (the evaluation is
+short-circuit) and @code{v && s} is equivalent to @code{v!=0 & (s?-1:0)}.
+
  Vector shuffling is available using functions
  @code{__builtin_shuffle (vec, mask)} and
  @code{__builtin_shuffle (vec0, vec1, mask)}.
@@ -7508,6 +8510,65 @@ alignment.  A value of 0 indicates typical alignment should be used.  The
  compiler may also ignore this parameter.
  @end deftypefn
  
+@node Integer Overflow Builtins
+@section Built-in functions to perform arithmetics and arithmetic overflow checking.
+
+The following built-in functions allow performing simple arithmetic operations
+together with checking whether the operations overflowed.
+
+@deftypefn {Built-in Function} bool __builtin_add_overflow (@var{type1} a, @var{type2} b, @var{type3} *res)
+@deftypefnx {Built-in Function} bool __builtin_sadd_overflow (int a, int b, int *res)
+@deftypefnx {Built-in Function} bool __builtin_saddl_overflow (long int a, long int b, long int *res)
+@deftypefnx {Built-in Function} bool __builtin_saddll_overflow (long long int a, long long int b, long int *res)
+@deftypefnx {Built-in Function} bool __builtin_uadd_overflow (unsigned int a, unsigned int b, unsigned int *res)
+@deftypefnx {Built-in Function} bool __builtin_uaddl_overflow (unsigned long int a, unsigned long int b, unsigned long int *res)
+@deftypefnx {Built-in Function} bool __builtin_uaddll_overflow (unsigned long long int a, unsigned long long int b, unsigned long int *res)
+
+These built-in functions promote the first two operands into infinite precision signed
+type and perform addition on those promoted operands.  The result is then
+cast to the type the third pointer argument points to and stored there.
+If the stored result is equal to the infinite precision result, the built-in
+functions return false, otherwise they return true.  As the addition is
+performed in infinite signed precision, these built-in functions have fully defined
+behavior for all argument values.
+
+The first built-in function allows arbitrary integral types for operands and
+the result type must be pointer to some integer type, the rest of the built-in
+functions have explicit integer types.
+
+The compiler will attempt to use hardware instructions to implement
+these built-in functions where possible, like conditional jump on overflow
+after addition, conditional jump on carry etc.
+
+@end deftypefn
+
+@deftypefn {Built-in Function} bool __builtin_sub_overflow (@var{type1} a, @var{type2} b, @var{type3} *res)
+@deftypefnx {Built-in Function} bool __builtin_ssub_overflow (int a, int b, int *res)
+@deftypefnx {Built-in Function} bool __builtin_ssubl_overflow (long int a, long int b, long int *res)
+@deftypefnx {Built-in Function} bool __builtin_ssubll_overflow (long long int a, long long int b, long int *res)
+@deftypefnx {Built-in Function} bool __builtin_usub_overflow (unsigned int a, unsigned int b, unsigned int *res)
+@deftypefnx {Built-in Function} bool __builtin_usubl_overflow (unsigned long int a, unsigned long int b, unsigned long int *res)
+@deftypefnx {Built-in Function} bool __builtin_usubll_overflow (unsigned long long int a, unsigned long long int b, unsigned long int *res)
+
+These built-in functions are similar to the add overflow checking built-in
+functions above, except they perform subtraction, subtract the second argument
+from the first one, instead of addition.
+
+@end deftypefn
+
+@deftypefn {Built-in Function} bool __builtin_mul_overflow (@var{type1} a, @var{type2} b, @var{type3} *res)
+@deftypefnx {Built-in Function} bool __builtin_smul_overflow (int a, int b, int *res)
+@deftypefnx {Built-in Function} bool __builtin_smull_overflow (long int a, long int b, long int *res)
+@deftypefnx {Built-in Function} bool __builtin_smulll_overflow (long long int a, long long int b, long int *res)
+@deftypefnx {Built-in Function} bool __builtin_umul_overflow (unsigned int a, unsigned int b, unsigned int *res)
+@deftypefnx {Built-in Function} bool __builtin_umull_overflow (unsigned long int a, unsigned long int b, unsigned long int *res)
+@deftypefnx {Built-in Function} bool __builtin_umulll_overflow (unsigned long long int a, unsigned long long int b, unsigned long int *res)
+
+These built-in functions are similar to the add overflow checking built-in
+functions above, except they perform multiplication, instead of addition.
+
+@end deftypefn
+
  @node x86 specific memory model extensions for transactional memory
  @section x86 specific memory model extensions for transactional memory
  
@@ -7674,9 +8735,206 @@ format string @var{fmt}.  If the compiler is able to optimize them to
  @code{fputc} etc.@: functions, it does, otherwise the checking function
  is called and the @var{flag} argument passed to it.
  
+@node Pointer Bounds Checker builtins
+@section Pointer Bounds Checker Built-in Functions
+@findex __builtin___bnd_set_ptr_bounds
+@findex __builtin___bnd_narrow_ptr_bounds
+@findex __builtin___bnd_copy_ptr_bounds
+@findex __builtin___bnd_init_ptr_bounds
+@findex __builtin___bnd_null_ptr_bounds
+@findex __builtin___bnd_store_ptr_bounds
+@findex __builtin___bnd_chk_ptr_lbounds
+@findex __builtin___bnd_chk_ptr_ubounds
+@findex __builtin___bnd_chk_ptr_bounds
+@findex __builtin___bnd_get_ptr_lbound
+@findex __builtin___bnd_get_ptr_ubound
+
+GCC provides a set of built-in functions to control Pointer Bounds Checker
+instrumentation.  Note that all Pointer Bounds Checker builtins are allowed
+to use even if you compile with Pointer Bounds Checker off.  The builtins
+behavior may differ in such case as documented below.
+
+@deftypefn {Built-in Function} void * __builtin___bnd_set_ptr_bounds (const void * @var{q}, size_t @var{size})
+
+This built-in function returns a new pointer with the value of @var{q}, and
+associate it with the bounds [@var{q}, @var{q}+@var{size}-1].  With Pointer
+Bounds Checker off built-in function just returns the first argument.
+
+@smallexample
+extern void *__wrap_malloc (size_t n)
+@{
+  void *p = (void *)__real_malloc (n);
+  if (!p) return __builtin___bnd_null_ptr_bounds (p);
+  return __builtin___bnd_set_ptr_bounds (p, n);
+@}
+@end smallexample
+
+@end deftypefn
+
+@deftypefn {Built-in Function} void * __builtin___bnd_narrow_ptr_bounds (const void * @var{p}, const void * @var{q}, size_t  @var{size})
+
+This built-in function returns a new pointer with the value of @var{p}
+and associate it with the narrowed bounds formed by the intersection
+of bounds associated with @var{q} and the [@var{p}, @var{p} + @var{size} - 1].
+With Pointer Bounds Checker off built-in function just returns the first
+argument.
+
+@smallexample
+void init_objects (object *objs, size_t size)
+@{
+  size_t i;
+  /* Initialize objects one-by-one passing pointers with bounds of an object,
+     not the full array of objects.  */
+  for (i = 0; i < size; i++)
+    init_object (__builtin___bnd_narrow_ptr_bounds (objs + i, objs, sizeof(object)));
+@}
+@end smallexample
+
+@end deftypefn
+
+@deftypefn {Built-in Function} void * __builtin___bnd_copy_ptr_bounds (const void * @var{q}, const void * @var{r})
+
+This built-in function returns a new pointer with the value of @var{q},
+and associate it with the bounds already associated with pointer @var{r}.
+With Pointer Bounds Checker off built-in function just returns the first
+argument.
+
+@smallexample
+/* Here is a way to get pointer to object's field but
+   still with the full object's bounds.  */
+int *field_ptr = __builtin___bnd_copy_ptr_bounds (&objptr->int_filed, objptr);
+@end smallexample
+
+@end deftypefn
+
+@deftypefn {Built-in Function} void * __builtin___bnd_init_ptr_bounds (const void * @var{q})
+
+This built-in function returns a new pointer with the value of @var{q}, and
+associate it with INIT (allowing full memory access) bounds. With Pointer
+Bounds Checker off built-in function just returns the first argument.
+
+@end deftypefn
+
+@deftypefn {Built-in Function} void * __builtin___bnd_null_ptr_bounds (const void * @var{q})
+
+This built-in function returns a new pointer with the value of @var{q}, and
+associate it with NULL (allowing no memory access) bounds. With Pointer
+Bounds Checker off built-in function just returns the first argument.
+
+@end deftypefn
+
+@deftypefn {Built-in Function} void __builtin___bnd_store_ptr_bounds (const void ** @var{ptr_addr}, const void * @var{ptr_val})
+
+This built-in function stores the bounds associated with pointer @var{ptr_val}
+and location @var{ptr_addr} into Bounds Table.  This can be useful to propagate
+bounds from legacy code without touching the associated pointer's memory when
+pointers were copied as integers.  With Pointer Bounds Checker off built-in
+function call is ignored.
+
+@end deftypefn
+
+@deftypefn {Built-in Function} void __builtin___bnd_chk_ptr_lbounds (const void * @var{q})
+
+This built-in function checks if the pointer @var{q} is within the lower
+bound of its associated bounds.  With Pointer Bounds Checker off built-in
+function call is ignored.
+
+@smallexample
+extern void *__wrap_memset (void *dst, int c, size_t len)
+@{
+  if (len > 0)
+    @{
+      __builtin___bnd_chk_ptr_lbounds (dst);
+      __builtin___bnd_chk_ptr_ubounds ((char *)dst + len - 1);
+      __real_memset (dst, c, len);
+    @}
+  return dst;
+@}
+@end smallexample
+
+@end deftypefn
+
+@deftypefn {Built-in Function} void __builtin___bnd_chk_ptr_ubounds (const void * @var{q})
+
+This built-in function checks if the pointer @var{q} is within the upper
+bound of its associated bounds.  With Pointer Bounds Checker off built-in
+function call is ignored.
+
+@end deftypefn
+
+@deftypefn {Built-in Function} void __builtin___bnd_chk_ptr_bounds (const void * @var{q}, size_t @var{size})
+
+This built-in function checks if [@var{q}, @var{q} + @var{size} - 1] is within
+the lower and upper bounds associated with @var{q}.  With Pointer Bounds Checker
+off built-in function call is ignored.
+
+@smallexample
+extern void *__wrap_memcpy (void *dst, const void *src, size_t n)
+@{
+  if (n > 0)
+    @{
+      __bnd_chk_ptr_bounds (dst, n);
+      __bnd_chk_ptr_bounds (src, n);
+      __real_memcpy (dst, src, n);
+    @}
+  return dst;
+@}
+@end smallexample
+
+@end deftypefn
+
+@deftypefn {Built-in Function} const void * __builtin___bnd_get_ptr_lbound (const void * @var{q})
+
+This built-in function returns the lower bound (which is a pointer) associated
+with the pointer @var{q}.  This is at least useful for debugging using printf.
+With Pointer Bounds Checker off built-in function returns 0.
+
+@smallexample
+void *lb = __builtin___bnd_get_ptr_lbound (q);
+void *ub = __builtin___bnd_get_ptr_ubound (q);
+printf ("q = %p  lb(q) = %p  ub(q) = %p", q, lb, ub);
+@end smallexample
+
+@end deftypefn
+
+@deftypefn {Built-in Function} const void * __builtin___bnd_get_ptr_ubound (const void * @var{q})
+
+This built-in function returns the upper bound (which is a pointer) associated
+with the pointer @var{q}.  With Pointer Bounds Checker off built-in function
+returns -1.
+
+@end deftypefn
+
+@node Cilk Plus Builtins
+@section Cilk Plus C/C++ language extension Built-in Functions.
+
+GCC provides support for the following built-in reduction funtions if Cilk Plus
+is enabled. Cilk Plus can be enabled using the @option{-fcilkplus} flag.
+
+@itemize @bullet
+@item __sec_implicit_index
+@item __sec_reduce
+@item __sec_reduce_add
+@item __sec_reduce_all_nonzero
+@item __sec_reduce_all_zero
+@item __sec_reduce_any_nonzero
+@item __sec_reduce_any_zero
+@item __sec_reduce_max
+@item __sec_reduce_min
+@item __sec_reduce_max_ind
+@item __sec_reduce_min_ind
+@item __sec_reduce_mul
+@item __sec_reduce_mutating
+@end itemize
+
+Further details and examples about these built-in functions are described 
+in the Cilk Plus language manual which can be found at 
+@uref{http://www.cilkplus.org}.
+
  @node Other Builtins
  @section Other Built-in Functions Provided by GCC
  @cindex built-in functions
+@findex __builtin_call_with_static_chain
  @findex __builtin_fpclassify
  @findex __builtin_isfinite
  @findex __builtin_isnormal
@@ -8265,6 +9523,18 @@ depending on the arguments' types.  For example:
  
  @end deftypefn
  
+@deftypefn {Built-in Function} @var{type} __builtin_call_with_static_chain (@var{call_exp}, @var{pointer_exp})
+
+The @var{call_exp} expression must be a function call, and the
+@var{pointer_exp} expression must be a pointer.  The @var{pointer_exp}
+is passed to the function call in the target's static chain location.
+The result of builtin is the result of the function call.
+
+@emph{Note:} This builtin is only available for C@.
+This builtin can be used to call Go closures from C.
+
+@end deftypefn
+
  @deftypefn {Built-in Function} @var{type} __builtin_choose_expr (@var{const_exp}, @var{exp1}, @var{exp2})
  
  You can use the built-in function @code{__builtin_choose_expr} to
@@ -8497,16 +9767,20 @@ means that the compiler can assume for @code{x}, set to @code{arg}, that
  @deftypefn {Built-in Function} int __builtin_LINE ()
  This function is the equivalent to the preprocessor @code{__LINE__}
  macro and returns the line number of the invocation of the built-in.
+In a C++ default argument for a function @var{F}, it gets the line number of
+the call to @var{F}.
  @end deftypefn
  
-@deftypefn {Built-in Function} int __builtin_FUNCTION ()
+@deftypefn {Built-in Function} {const char *} __builtin_FUNCTION ()
  This function is the equivalent to the preprocessor @code{__FUNCTION__}
  macro and returns the function name the invocation of the built-in is in.
  @end deftypefn
  
-@deftypefn {Built-in Function} int __builtin_FILE ()
+@deftypefn {Built-in Function} {const char *} __builtin_FILE ()
  This function is the equivalent to the preprocessor @code{__FILE__}
  macro and returns the file name the invocation of the built-in is in.
+In a C++ default argument for a function @var{F}, it gets the file name of
+the call to @var{F}.
  @end deftypefn
  
  @deftypefn {Built-in Function} void __builtin___clear_cache (char *@var{begin}, char *@var{end})
@@ -8618,8 +9892,9 @@ type is @code{long double}.
  @end deftypefn
  
  @deftypefn {Built-in Function} int __builtin_isinf_sign (...)
-Similar to @code{isinf}, except the return value is negative for
-an argument of @code{-Inf}.  Note while the parameter list is an
+Similar to @code{isinf}, except the return value is -1 for
+an argument of @code{-Inf} and 1 for an argument of @code{+Inf}.
+Note while the parameter list is an
  ellipsis, this function only accepts exactly one floating-point
  argument.  GCC treats this parameter as type-generic, which means it
  does not do default promotion from float to double.
@@ -8676,7 +9951,7 @@ Similar to @code{__builtin_nans}, except the return type is @code{float}.
  Similar to @code{__builtin_nans}, except the return type is @code{long double}.
  @end deftypefn
  
-@deftypefn {Built-in Function} int __builtin_ffs (unsigned int x)
+@deftypefn {Built-in Function} int __builtin_ffs (int x)
  Returns one plus the index of the least significant 1-bit of @var{x}, or
  if @var{x} is zero, returns zero.
  @end deftypefn
@@ -8706,9 +9981,9 @@ Returns the parity of @var{x}, i.e.@: the number of 1-bits in @var{x}
  modulo 2.
  @end deftypefn
  
-@deftypefn {Built-in Function} int __builtin_ffsl (unsigned long)
+@deftypefn {Built-in Function} int __builtin_ffsl (long)
  Similar to @code{__builtin_ffs}, except the argument type is
-@code{unsigned long}.
+@code{long}.
  @end deftypefn
  
  @deftypefn {Built-in Function} int __builtin_clzl (unsigned long)
@@ -8736,9 +10011,9 @@ Similar to @code{__builtin_parity}, except the argument type is
  @code{unsigned long}.
  @end deftypefn
  
-@deftypefn {Built-in Function} int __builtin_ffsll (unsigned long long)
+@deftypefn {Built-in Function} int __builtin_ffsll (long long)
  Similar to @code{__builtin_ffs}, except the argument type is
-@code{unsigned long long}.
+@code{long long}.
  @end deftypefn
  
  @deftypefn {Built-in Function} int __builtin_clzll (unsigned long long)
@@ -8797,32 +10072,6 @@ Similar to @code{__builtin_bswap32}, except the argument and return types
  are 64 bit.
  @end deftypefn
  
-@node Cilk Plus Builtins
-@section Cilk Plus C/C++ language extension Built-in Functions.
-
-GCC provides support for the following built-in reduction funtions if Cilk Plus
-is enabled. Cilk Plus can be enabled using the @option{-fcilkplus} flag.
-
-@itemize @bullet
-@item __sec_implicit_index
-@item __sec_reduce
-@item __sec_reduce_add
-@item __sec_reduce_all_nonzero
-@item __sec_reduce_all_zero
-@item __sec_reduce_any_nonzero
-@item __sec_reduce_any_zero
-@item __sec_reduce_max
-@item __sec_reduce_min
-@item __sec_reduce_max_ind
-@item __sec_reduce_min_ind
-@item __sec_reduce_mul
-@item __sec_reduce_mutating
-@end itemize
-
-Further details and examples about these built-in functions are described 
-in the Cilk Plus language manual which can be found at 
-@uref{http://www.cilkplus.org}.
-
  @node Target Builtins
  @section Built-in Functions Specific to Particular Target Machines
  
@@ -8831,9 +10080,14 @@ to those machines.  Generally these generate calls to specific machine
  instructions, but allow the compiler to schedule those calls.
  
  @menu
+* AArch64 Built-in Functions::
  * Alpha Built-in Functions::
+* Altera Nios II Built-in Functions::
+* ARC Built-in Functions::
+* ARC SIMD Built-in Functions::
  * ARM iWMMXt Built-in Functions::
-* ARM NEON Intrinsics::
+* ARM C Language Extensions (ACLE)::
+* ARM Floating Point Status and Control Intrinsics::
  * AVR Built-in Functions::
  * Blackfin Built-in Functions::
  * FR-V Built-in Functions::
@@ -8843,9 +10097,12 @@ instructions, but allow the compiler to schedule those calls.
  * MIPS Paired-Single Support::
  * MIPS Loongson Built-in Functions::
  * Other MIPS Built-in Functions::
+* MSP430 Built-in Functions::
+* NDS32 Built-in Functions::
  * picoChip Built-in Functions::
  * PowerPC Built-in Functions::
  * PowerPC AltiVec/VSX Built-in Functions::
+* PowerPC Hardware Transactional Memory Built-in Functions::
  * RX Built-in Functions::
  * S/390 System z Built-in Functions::
  * SH Built-in Functions::
@@ -8856,6 +10113,18 @@ instructions, but allow the compiler to schedule those calls.
  * TILEPro Built-in Functions::
  @end menu
  
+@node AArch64 Built-in Functions
+@subsection AArch64 Built-in Functions
+
+These built-in functions are available for the AArch64 family of
+processors.
+@smallexample
+unsigned int __builtin_aarch64_get_fpcr ()
+void __builtin_aarch64_set_fpcr (unsigned int)
+unsigned int __builtin_aarch64_get_fpsr ()
+void __builtin_aarch64_set_fpsr (unsigned int)
+@end smallexample
+
  @node Alpha Built-in Functions
  @subsection Alpha Built-in Functions
  
@@ -8938,6 +10207,534 @@ void *__builtin_thread_pointer (void)
  void __builtin_set_thread_pointer (void *)
  @end smallexample
  
+@node Altera Nios II Built-in Functions
+@subsection Altera Nios II Built-in Functions
+
+These built-in functions are available for the Altera Nios II
+family of processors.
+
+The following built-in functions are always available.  They
+all generate the machine instruction that is part of the name.
+
+@example
+int __builtin_ldbio (volatile const void *)
+int __builtin_ldbuio (volatile const void *)
+int __builtin_ldhio (volatile const void *)
+int __builtin_ldhuio (volatile const void *)
+int __builtin_ldwio (volatile const void *)
+void __builtin_stbio (volatile void *, int)
+void __builtin_sthio (volatile void *, int)
+void __builtin_stwio (volatile void *, int)
+void __builtin_sync (void)
+int __builtin_rdctl (int) 
+void __builtin_wrctl (int, int)
+@end example
+
+The following built-in functions are always available.  They
+all generate a Nios II Custom Instruction. The name of the
+function represents the types that the function takes and
+returns. The letter before the @code{n} is the return type
+or void if absent. The @code{n} represents the first parameter
+to all the custom instructions, the custom instruction number.
+The two letters after the @code{n} represent the up to two
+parameters to the function.
+
+The letters represent the following data types:
+@table @code
+@item <no letter>
+@code{void} for return type and no parameter for parameter types.
+
+@item i
+@code{int} for return type and parameter type
+
+@item f
+@code{float} for return type and parameter type
+
+@item p
+@code{void *} for return type and parameter type
+
+@end table
+
+And the function names are:
+@example
+void __builtin_custom_n (void)
+void __builtin_custom_ni (int)
+void __builtin_custom_nf (float)
+void __builtin_custom_np (void *)
+void __builtin_custom_nii (int, int)
+void __builtin_custom_nif (int, float)
+void __builtin_custom_nip (int, void *)
+void __builtin_custom_nfi (float, int)
+void __builtin_custom_nff (float, float)
+void __builtin_custom_nfp (float, void *)
+void __builtin_custom_npi (void *, int)
+void __builtin_custom_npf (void *, float)
+void __builtin_custom_npp (void *, void *)
+int __builtin_custom_in (void)
+int __builtin_custom_ini (int)
+int __builtin_custom_inf (float)
+int __builtin_custom_inp (void *)
+int __builtin_custom_inii (int, int)
+int __builtin_custom_inif (int, float)
+int __builtin_custom_inip (int, void *)
+int __builtin_custom_infi (float, int)
+int __builtin_custom_inff (float, float)
+int __builtin_custom_infp (float, void *)
+int __builtin_custom_inpi (void *, int)
+int __builtin_custom_inpf (void *, float)
+int __builtin_custom_inpp (void *, void *)
+float __builtin_custom_fn (void)
+float __builtin_custom_fni (int)
+float __builtin_custom_fnf (float)
+float __builtin_custom_fnp (void *)
+float __builtin_custom_fnii (int, int)
+float __builtin_custom_fnif (int, float)
+float __builtin_custom_fnip (int, void *)
+float __builtin_custom_fnfi (float, int)
+float __builtin_custom_fnff (float, float)
+float __builtin_custom_fnfp (float, void *)
+float __builtin_custom_fnpi (void *, int)
+float __builtin_custom_fnpf (void *, float)
+float __builtin_custom_fnpp (void *, void *)
+void * __builtin_custom_pn (void)
+void * __builtin_custom_pni (int)
+void * __builtin_custom_pnf (float)
+void * __builtin_custom_pnp (void *)
+void * __builtin_custom_pnii (int, int)
+void * __builtin_custom_pnif (int, float)
+void * __builtin_custom_pnip (int, void *)
+void * __builtin_custom_pnfi (float, int)
+void * __builtin_custom_pnff (float, float)
+void * __builtin_custom_pnfp (float, void *)
+void * __builtin_custom_pnpi (void *, int)
+void * __builtin_custom_pnpf (void *, float)
+void * __builtin_custom_pnpp (void *, void *)
+@end example
+
+@node ARC Built-in Functions
+@subsection ARC Built-in Functions
+
+The following built-in functions are provided for ARC targets.  The
+built-ins generate the corresponding assembly instructions.  In the
+examples given below, the generated code often requires an operand or
+result to be in a register.  Where necessary further code will be
+generated to ensure this is true, but for brevity this is not
+described in each case.
+
+@emph{Note:} Using a built-in to generate an instruction not supported
+by a target may cause problems. At present the compiler is not
+guaranteed to detect such misuse, and as a result an internal compiler
+error may be generated.
+
+@deftypefn {Built-in Function} int __builtin_arc_aligned (void *@var{val}, int @var{alignval})
+Return 1 if @var{val} is known to have the byte alignment given
+by @var{alignval}, otherwise return 0.
+Note that this is different from
+@smallexample
+__alignof__(*(char *)@var{val}) >= alignval
+@end smallexample
+because __alignof__ sees only the type of the dereference, whereas
+__builtin_arc_align uses alignment information from the pointer
+as well as from the pointed-to type.
+The information available will depend on optimization level.
+@end deftypefn
+
+@deftypefn {Built-in Function} void __builtin_arc_brk (void)
+Generates
+@example
+brk
+@end example
+@end deftypefn
+
+@deftypefn {Built-in Function} {unsigned int} __builtin_arc_core_read (unsigned int @var{regno})
+The operand is the number of a register to be read.  Generates:
+@example
+mov  @var{dest}, r@var{regno}
+@end example
+where the value in @var{dest} will be the result returned from the
+built-in.
+@end deftypefn
+
+@deftypefn {Built-in Function} void __builtin_arc_core_write (unsigned int @var{regno}, unsigned int @var{val})
+The first operand is the number of a register to be written, the
+second operand is a compile time constant to write into that
+register.  Generates:
+@example
+mov  r@var{regno}, @var{val}
+@end example
+@end deftypefn
+
+@deftypefn {Built-in Function} int __builtin_arc_divaw (int @var{a}, int @var{b})
+Only available if either @option{-mcpu=ARC700} or @option{-meA} is set.
+Generates:
+@example
+divaw  @var{dest}, @var{a}, @var{b}
+@end example
+where the value in @var{dest} will be the result returned from the
+built-in.
+@end deftypefn
+
+@deftypefn {Built-in Function} void __builtin_arc_flag (unsigned int @var{a})
+Generates
+@example
+flag  @var{a}
+@end example
+@end deftypefn
+
+@deftypefn {Built-in Function} {unsigned int} __builtin_arc_lr (unsigned int @var{auxr})
+The operand, @var{auxv}, is the address of an auxiliary register and
+must be a compile time constant.  Generates:
+@example
+lr  @var{dest}, [@var{auxr}]
+@end example
+Where the value in @var{dest} will be the result returned from the
+built-in.
+@end deftypefn
+
+@deftypefn {Built-in Function} void __builtin_arc_mul64 (int @var{a}, int @var{b})
+Only available with @option{-mmul64}.  Generates:
+@example
+mul64  @var{a}, @var{b}
+@end example
+@end deftypefn
+
+@deftypefn {Built-in Function} void __builtin_arc_mulu64 (unsigned int @var{a}, unsigned int @var{b})
+Only available with @option{-mmul64}.  Generates:
+@example
+mulu64  @var{a}, @var{b}
+@end example
+@end deftypefn
+
+@deftypefn {Built-in Function} void __builtin_arc_nop (void)
+Generates:
+@example
+nop
+@end example
+@end deftypefn
+
+@deftypefn {Built-in Function} int __builtin_arc_norm (int @var{src})
+Only valid if the @samp{norm} instruction is available through the
+@option{-mnorm} option or by default with @option{-mcpu=ARC700}.
+Generates:
+@example
+norm  @var{dest}, @var{src}
+@end example
+Where the value in @var{dest} will be the result returned from the
+built-in.
+@end deftypefn
+
+@deftypefn {Built-in Function}  {short int} __builtin_arc_normw (short int @var{src})
+Only valid if the @samp{normw} instruction is available through the
+@option{-mnorm} option or by default with @option{-mcpu=ARC700}.
+Generates:
+@example
+normw  @var{dest}, @var{src}
+@end example
+Where the value in @var{dest} will be the result returned from the
+built-in.
+@end deftypefn
+
+@deftypefn {Built-in Function}  void __builtin_arc_rtie (void)
+Generates:
+@example
+rtie
+@end example
+@end deftypefn
+
+@deftypefn {Built-in Function}  void __builtin_arc_sleep (int @var{a}
+Generates:
+@example
+sleep  @var{a}
+@end example
+@end deftypefn
+
+@deftypefn {Built-in Function}  void __builtin_arc_sr (unsigned int @var{auxr}, unsigned int @var{val})
+The first argument, @var{auxv}, is the address of an auxiliary
+register, the second argument, @var{val}, is a compile time constant
+to be written to the register.  Generates:
+@example
+sr  @var{auxr}, [@var{val}]
+@end example
+@end deftypefn
+
+@deftypefn {Built-in Function}  int __builtin_arc_swap (int @var{src})
+Only valid with @option{-mswap}.  Generates:
+@example
+swap  @var{dest}, @var{src}
+@end example
+Where the value in @var{dest} will be the result returned from the
+built-in.
+@end deftypefn
+
+@deftypefn {Built-in Function}  void __builtin_arc_swi (void)
+Generates:
+@example
+swi
+@end example
+@end deftypefn
+
+@deftypefn {Built-in Function}  void __builtin_arc_sync (void)
+Only available with @option{-mcpu=ARC700}.  Generates:
+@example
+sync
+@end example
+@end deftypefn
+
+@deftypefn {Built-in Function}  void __builtin_arc_trap_s (unsigned int @var{c})
+Only available with @option{-mcpu=ARC700}.  Generates:
+@example
+trap_s  @var{c}
+@end example
+@end deftypefn
+
+@deftypefn {Built-in Function}  void __builtin_arc_unimp_s (void)
+Only available with @option{-mcpu=ARC700}.  Generates:
+@example
+unimp_s
+@end example
+@end deftypefn
+
+The instructions generated by the following builtins are not
+considered as candidates for scheduling.  They are not moved around by
+the compiler during scheduling, and thus can be expected to appear
+where they are put in the C code:
+@example
+__builtin_arc_brk()
+__builtin_arc_core_read()
+__builtin_arc_core_write()
+__builtin_arc_flag()
+__builtin_arc_lr()
+__builtin_arc_sleep()
+__builtin_arc_sr()
+__builtin_arc_swi()
+@end example
+
+@node ARC SIMD Built-in Functions
+@subsection ARC SIMD Built-in Functions
+
+SIMD builtins provided by the compiler can be used to generate the
+vector instructions.  This section describes the available builtins
+and their usage in programs.  With the @option{-msimd} option, the
+compiler provides 128-bit vector types, which can be specified using
+the @code{vector_size} attribute.  The header file @file{arc-simd.h}
+can be included to use the following predefined types:
+@example
+typedef int __v4si   __attribute__((vector_size(16)));
+typedef short __v8hi __attribute__((vector_size(16)));
+@end example
+
+These types can be used to define 128-bit variables.  The built-in
+functions listed in the following section can be used on these
+variables to generate the vector operations.
+
+For all builtins, @code{__builtin_arc_@var{someinsn}}, the header file
+@file{arc-simd.h} also provides equivalent macros called
+@code{_@var{someinsn}} that can be used for programming ease and
+improved readability.  The following macros for DMA control are also
+provided:
+@example
+#define _setup_dma_in_channel_reg _vdiwr
+#define _setup_dma_out_channel_reg _vdowr
+@end example
+
+The following is a complete list of all the SIMD built-ins provided
+for ARC, grouped by calling signature.
+
+The following take two @code{__v8hi} arguments and return a
+@code{__v8hi} result:
+@example
+__v8hi __builtin_arc_vaddaw (__v8hi, __v8hi)
+__v8hi __builtin_arc_vaddw (__v8hi, __v8hi)
+__v8hi __builtin_arc_vand (__v8hi, __v8hi)
+__v8hi __builtin_arc_vandaw (__v8hi, __v8hi)
+__v8hi __builtin_arc_vavb (__v8hi, __v8hi)
+__v8hi __builtin_arc_vavrb (__v8hi, __v8hi)
+__v8hi __builtin_arc_vbic (__v8hi, __v8hi)
+__v8hi __builtin_arc_vbicaw (__v8hi, __v8hi)
+__v8hi __builtin_arc_vdifaw (__v8hi, __v8hi)
+__v8hi __builtin_arc_vdifw (__v8hi, __v8hi)
+__v8hi __builtin_arc_veqw (__v8hi, __v8hi)
+__v8hi __builtin_arc_vh264f (__v8hi, __v8hi)
+__v8hi __builtin_arc_vh264ft (__v8hi, __v8hi)
+__v8hi __builtin_arc_vh264fw (__v8hi, __v8hi)
+__v8hi __builtin_arc_vlew (__v8hi, __v8hi)
+__v8hi __builtin_arc_vltw (__v8hi, __v8hi)
+__v8hi __builtin_arc_vmaxaw (__v8hi, __v8hi)
+__v8hi __builtin_arc_vmaxw (__v8hi, __v8hi)
+__v8hi __builtin_arc_vminaw (__v8hi, __v8hi)
+__v8hi __builtin_arc_vminw (__v8hi, __v8hi)
+__v8hi __builtin_arc_vmr1aw (__v8hi, __v8hi)
+__v8hi __builtin_arc_vmr1w (__v8hi, __v8hi)
+__v8hi __builtin_arc_vmr2aw (__v8hi, __v8hi)
+__v8hi __builtin_arc_vmr2w (__v8hi, __v8hi)
+__v8hi __builtin_arc_vmr3aw (__v8hi, __v8hi)
+__v8hi __builtin_arc_vmr3w (__v8hi, __v8hi)
+__v8hi __builtin_arc_vmr4aw (__v8hi, __v8hi)
+__v8hi __builtin_arc_vmr4w (__v8hi, __v8hi)
+__v8hi __builtin_arc_vmr5aw (__v8hi, __v8hi)
+__v8hi __builtin_arc_vmr5w (__v8hi, __v8hi)
+__v8hi __builtin_arc_vmr6aw (__v8hi, __v8hi)
+__v8hi __builtin_arc_vmr6w (__v8hi, __v8hi)
+__v8hi __builtin_arc_vmr7aw (__v8hi, __v8hi)
+__v8hi __builtin_arc_vmr7w (__v8hi, __v8hi)
+__v8hi __builtin_arc_vmrb (__v8hi, __v8hi)
+__v8hi __builtin_arc_vmulaw (__v8hi, __v8hi)
+__v8hi __builtin_arc_vmulfaw (__v8hi, __v8hi)
+__v8hi __builtin_arc_vmulfw (__v8hi, __v8hi)
+__v8hi __builtin_arc_vmulw (__v8hi, __v8hi)
+__v8hi __builtin_arc_vnew (__v8hi, __v8hi)
+__v8hi __builtin_arc_vor (__v8hi, __v8hi)
+__v8hi __builtin_arc_vsubaw (__v8hi, __v8hi)
+__v8hi __builtin_arc_vsubw (__v8hi, __v8hi)
+__v8hi __builtin_arc_vsummw (__v8hi, __v8hi)
+__v8hi __builtin_arc_vvc1f (__v8hi, __v8hi)
+__v8hi __builtin_arc_vvc1ft (__v8hi, __v8hi)
+__v8hi __builtin_arc_vxor (__v8hi, __v8hi)
+__v8hi __builtin_arc_vxoraw (__v8hi, __v8hi)
+@end example
+
+The following take one @code{__v8hi} and one @code{int} argument and return a
+@code{__v8hi} result:
+
+@example
+__v8hi __builtin_arc_vbaddw (__v8hi, int)
+__v8hi __builtin_arc_vbmaxw (__v8hi, int)
+__v8hi __builtin_arc_vbminw (__v8hi, int)
+__v8hi __builtin_arc_vbmulaw (__v8hi, int)
+__v8hi __builtin_arc_vbmulfw (__v8hi, int)
+__v8hi __builtin_arc_vbmulw (__v8hi, int)
+__v8hi __builtin_arc_vbrsubw (__v8hi, int)
+__v8hi __builtin_arc_vbsubw (__v8hi, int)
+@end example
+
+The following take one @code{__v8hi} argument and one @code{int} argument which
+must be a 3-bit compile time constant indicating a register number
+I0-I7.  They return a @code{__v8hi} result.
+@example
+__v8hi __builtin_arc_vasrw (__v8hi, const int)
+__v8hi __builtin_arc_vsr8 (__v8hi, const int)
+__v8hi __builtin_arc_vsr8aw (__v8hi, const int)
+@end example
+
+The following take one @code{__v8hi} argument and one @code{int}
+argument which must be a 6-bit compile time constant.  They return a
+@code{__v8hi} result.
+@example
+__v8hi __builtin_arc_vasrpwbi (__v8hi, const int)
+__v8hi __builtin_arc_vasrrpwbi (__v8hi, const int)
+__v8hi __builtin_arc_vasrrwi (__v8hi, const int)
+__v8hi __builtin_arc_vasrsrwi (__v8hi, const int)
+__v8hi __builtin_arc_vasrwi (__v8hi, const int)
+__v8hi __builtin_arc_vsr8awi (__v8hi, const int)
+__v8hi __builtin_arc_vsr8i (__v8hi, const int)
+@end example
+
+The following take one @code{__v8hi} argument and one @code{int} argument which
+must be a 8-bit compile time constant.  They return a @code{__v8hi}
+result.
+@example
+__v8hi __builtin_arc_vd6tapf (__v8hi, const int)
+__v8hi __builtin_arc_vmvaw (__v8hi, const int)
+__v8hi __builtin_arc_vmvw (__v8hi, const int)
+__v8hi __builtin_arc_vmvzw (__v8hi, const int)
+@end example
+
+The following take two @code{int} arguments, the second of which which
+must be a 8-bit compile time constant.  They return a @code{__v8hi}
+result:
+@example
+__v8hi __builtin_arc_vmovaw (int, const int)
+__v8hi __builtin_arc_vmovw (int, const int)
+__v8hi __builtin_arc_vmovzw (int, const int)
+@end example
+
+The following take a single @code{__v8hi} argument and return a
+@code{__v8hi} result:
+@example
+__v8hi __builtin_arc_vabsaw (__v8hi)
+__v8hi __builtin_arc_vabsw (__v8hi)
+__v8hi __builtin_arc_vaddsuw (__v8hi)
+__v8hi __builtin_arc_vexch1 (__v8hi)
+__v8hi __builtin_arc_vexch2 (__v8hi)
+__v8hi __builtin_arc_vexch4 (__v8hi)
+__v8hi __builtin_arc_vsignw (__v8hi)
+__v8hi __builtin_arc_vupbaw (__v8hi)
+__v8hi __builtin_arc_vupbw (__v8hi)
+__v8hi __builtin_arc_vupsbaw (__v8hi)
+__v8hi __builtin_arc_vupsbw (__v8hi)
+@end example
+
+The followign take two @code{int} arguments and return no result:
+@example
+void __builtin_arc_vdirun (int, int)
+void __builtin_arc_vdorun (int, int)
+@end example
+
+The following take two @code{int} arguments and return no result.  The
+first argument must a 3-bit compile time constant indicating one of
+the DR0-DR7 DMA setup channels:
+@example
+void __builtin_arc_vdiwr (const int, int)
+void __builtin_arc_vdowr (const int, int)
+@end example
+
+The following take an @code{int} argument and return no result:
+@example
+void __builtin_arc_vendrec (int)
+void __builtin_arc_vrec (int)
+void __builtin_arc_vrecrun (int)
+void __builtin_arc_vrun (int)
+@end example
+
+The following take a @code{__v8hi} argument and two @code{int}
+arguments and return a @code{__v8hi} result.  The second argument must
+be a 3-bit compile time constants, indicating one the registers I0-I7,
+and the third argument must be an 8-bit compile time constant.
+
+@emph{Note:} Although the equivalent hardware instructions do not take
+an SIMD register as an operand, these builtins overwrite the relevant
+bits of the @code{__v8hi} register provided as the first argument with
+the value loaded from the @code{[Ib, u8]} location in the SDM.
+
+@example
+__v8hi __builtin_arc_vld32 (__v8hi, const int, const int)
+__v8hi __builtin_arc_vld32wh (__v8hi, const int, const int)
+__v8hi __builtin_arc_vld32wl (__v8hi, const int, const int)
+__v8hi __builtin_arc_vld64 (__v8hi, const int, const int)
+@end example
+
+The following take two @code{int} arguments and return a @code{__v8hi}
+result.  The first argument must be a 3-bit compile time constants,
+indicating one the registers I0-I7, and the second argument must be an
+8-bit compile time constant.
+
+@example
+__v8hi __builtin_arc_vld128 (const int, const int)
+__v8hi __builtin_arc_vld64w (const int, const int)
+@end example
+
+The following take a @code{__v8hi} argument and two @code{int}
+arguments and return no result.  The second argument must be a 3-bit
+compile time constants, indicating one the registers I0-I7, and the
+third argument must be an 8-bit compile time constant.
+
+@example
+void __builtin_arc_vst128 (__v8hi, const int, const int)
+void __builtin_arc_vst64 (__v8hi, const int, const int)
+@end example
+
+The following take a @code{__v8hi} argument and three @code{int}
+arguments and return no result.  The second argument must be a 3-bit
+compile-time constant, identifying the 16-bit sub-register to be
+stored, the third argument must be a 3-bit compile time constants,
+indicating one the registers I0-I7, and the fourth argument must be an
+8-bit compile time constant.
+
+@example
+void __builtin_arc_vst16_n (__v8hi, const int, const int, const int)
+void __builtin_arc_vst32_n (__v8hi, const int, const int, const int)
+@end example
+
  @node ARM iWMMXt Built-in Functions
  @subsection ARM iWMMXt Built-in Functions
  
@@ -9091,13 +10888,40 @@ long long __builtin_arm_wxor (long long, long long)
  long long __builtin_arm_wzero ()
  @end smallexample
  
-@node ARM NEON Intrinsics
-@subsection ARM NEON Intrinsics
  
-These built-in intrinsics for the ARM Advanced SIMD extension are available
-when the @option{-mfpu=neon} switch is used:
+@node ARM C Language Extensions (ACLE)
+@subsection ARM C Language Extensions (ACLE)
+
+GCC implements extensions for C as described in the ARM C Language
+Extensions (ACLE) specification, which can be found at
+@uref{http://infocenter.arm.com/help/topic/com.arm.doc.ihi0053c/IHI0053C_acle_2_0.pdf}.
+
+As a part of ACLE, GCC implements extensions for Advanced SIMD as described in
+the ARM C Language Extensions Specification.  The complete list of Advanced SIMD
+intrinsics can be found at
+@uref{http://infocenter.arm.com/help/topic/com.arm.doc.ihi0073a/IHI0073A_arm_neon_intrinsics_ref.pdf}.
+The built-in intrinsics for the Advanced SIMD extension are available when
+NEON is enabled.
  
-@include arm-neon-intrinsics.texi
+Currently, ARM and AArch64 back-ends do not support ACLE 2.0 fully.  Both
+back-ends support CRC32 intrinsics from @file{arm_acle.h}.  The ARM backend's
+16-bit floating-point Advanded SIMD Intrinsics currently comply to ACLE v1.1.
+AArch64's backend does not have support for 16-bit floating point Advanced SIMD
+Intrinsics yet.
+
+See @ref{ARM Options} and @ref{AArch64 Options} for more information on the
+availability of extensions.
+
+@node ARM Floating Point Status and Control Intrinsics
+@subsection ARM Floating Point Status and Control Intrinsics
+
+These built-in functions are available for the ARM family of
+processors with floating-point unit.
+
+@smallexample
+unsigned int __builtin_arm_get_fpscr ()
+void __builtin_arm_set_fpscr (unsigned int)
+@end smallexample
  
  @node AVR Built-in Functions
  @subsection AVR Built-in Functions
@@ -9791,6 +11615,9 @@ AMD Family 15h Bulldozer version 2.
  @item bdver3
  AMD Family 15h Bulldozer version 3.
  
+@item bdver4
+AMD Family 15h Bulldozer version 4.
+
  @item btver2
  AMD Family 16h CPU.
  @end table
@@ -9836,6 +11663,8 @@ SSE4.2 instructions.
  AVX instructions.
  @item avx2
  AVX2 instructions.
+@item avx512f
+AVX512F instructions.
  @end table
  
  Here is an example:
@@ -9925,8 +11754,6 @@ v8qi __builtin_ia32_pmaxub (v8qi, v8qi)
  v4hi __builtin_ia32_pmaxsw (v4hi, v4hi)
  v8qi __builtin_ia32_pminub (v8qi, v8qi)
  v4hi __builtin_ia32_pminsw (v4hi, v4hi)
-int __builtin_ia32_pextrw (v4hi, int)
-v4hi __builtin_ia32_pinsrw (v4hi, int, int)
  int __builtin_ia32_pmovmskb (v8qi)
  void __builtin_ia32_maskmovq (v8qi, v8qi, char *)
  void __builtin_ia32_movntq (di *, di)
@@ -9957,26 +11784,26 @@ v4sf __builtin_ia32_addss (v4sf, v4sf)
  v4sf __builtin_ia32_subss (v4sf, v4sf)
  v4sf __builtin_ia32_mulss (v4sf, v4sf)
  v4sf __builtin_ia32_divss (v4sf, v4sf)
-v4si __builtin_ia32_cmpeqps (v4sf, v4sf)
-v4si __builtin_ia32_cmpltps (v4sf, v4sf)
-v4si __builtin_ia32_cmpleps (v4sf, v4sf)
-v4si __builtin_ia32_cmpgtps (v4sf, v4sf)
-v4si __builtin_ia32_cmpgeps (v4sf, v4sf)
-v4si __builtin_ia32_cmpunordps (v4sf, v4sf)
-v4si __builtin_ia32_cmpneqps (v4sf, v4sf)
-v4si __builtin_ia32_cmpnltps (v4sf, v4sf)
-v4si __builtin_ia32_cmpnleps (v4sf, v4sf)
-v4si __builtin_ia32_cmpngtps (v4sf, v4sf)
-v4si __builtin_ia32_cmpngeps (v4sf, v4sf)
-v4si __builtin_ia32_cmpordps (v4sf, v4sf)
-v4si __builtin_ia32_cmpeqss (v4sf, v4sf)
-v4si __builtin_ia32_cmpltss (v4sf, v4sf)
-v4si __builtin_ia32_cmpless (v4sf, v4sf)
-v4si __builtin_ia32_cmpunordss (v4sf, v4sf)
-v4si __builtin_ia32_cmpneqss (v4sf, v4sf)
-v4si __builtin_ia32_cmpnlts (v4sf, v4sf)
-v4si __builtin_ia32_cmpnless (v4sf, v4sf)
-v4si __builtin_ia32_cmpordss (v4sf, v4sf)
+v4sf __builtin_ia32_cmpeqps (v4sf, v4sf)
+v4sf __builtin_ia32_cmpltps (v4sf, v4sf)
+v4sf __builtin_ia32_cmpleps (v4sf, v4sf)
+v4sf __builtin_ia32_cmpgtps (v4sf, v4sf)
+v4sf __builtin_ia32_cmpgeps (v4sf, v4sf)
+v4sf __builtin_ia32_cmpunordps (v4sf, v4sf)
+v4sf __builtin_ia32_cmpneqps (v4sf, v4sf)
+v4sf __builtin_ia32_cmpnltps (v4sf, v4sf)
+v4sf __builtin_ia32_cmpnleps (v4sf, v4sf)
+v4sf __builtin_ia32_cmpngtps (v4sf, v4sf)
+v4sf __builtin_ia32_cmpngeps (v4sf, v4sf)
+v4sf __builtin_ia32_cmpordps (v4sf, v4sf)
+v4sf __builtin_ia32_cmpeqss (v4sf, v4sf)
+v4sf __builtin_ia32_cmpltss (v4sf, v4sf)
+v4sf __builtin_ia32_cmpless (v4sf, v4sf)
+v4sf __builtin_ia32_cmpunordss (v4sf, v4sf)
+v4sf __builtin_ia32_cmpneqss (v4sf, v4sf)
+v4sf __builtin_ia32_cmpnltss (v4sf, v4sf)
+v4sf __builtin_ia32_cmpnless (v4sf, v4sf)
+v4sf __builtin_ia32_cmpordss (v4sf, v4sf)
  v4sf __builtin_ia32_maxps (v4sf, v4sf)
  v4sf __builtin_ia32_maxss (v4sf, v4sf)
  v4sf __builtin_ia32_minps (v4sf, v4sf)
@@ -10010,18 +11837,12 @@ int __builtin_ia32_movmskps (v4sf)
  The following built-in functions are available when @option{-msse} is used.
  
  @table @code
-@item v4sf __builtin_ia32_loadaps (float *)
-Generates the @code{movaps} machine instruction as a load from memory.
-@item void __builtin_ia32_storeaps (float *, v4sf)
-Generates the @code{movaps} machine instruction as a store to memory.
  @item v4sf __builtin_ia32_loadups (float *)
  Generates the @code{movups} machine instruction as a load from memory.
  @item void __builtin_ia32_storeups (float *, v4sf)
  Generates the @code{movups} machine instruction as a store to memory.
-@item v4sf __builtin_ia32_loadsss (float *)
+@item v4sf __builtin_ia32_loadss (float *)
  Generates the @code{movss} machine instruction as a load from memory.
-@item void __builtin_ia32_storess (float *, v4sf)
-Generates the @code{movss} machine instruction as a store to memory.
  @item v4sf __builtin_ia32_loadhps (v4sf, const v2sf *)
  Generates the @code{movhps} machine instruction as a load from memory.
  @item v4sf __builtin_ia32_loadlps (v4sf, const v2sf *)
@@ -10205,22 +12026,13 @@ v2df __builtin_ia32_hsubpd (v2df, v2df)
  v4sf __builtin_ia32_hsubps (v4sf, v4sf)
  v16qi __builtin_ia32_lddqu (char const *)
  void __builtin_ia32_monitor (void *, unsigned int, unsigned int)
-v2df __builtin_ia32_movddup (v2df)
  v4sf __builtin_ia32_movshdup (v4sf)
  v4sf __builtin_ia32_movsldup (v4sf)
  void __builtin_ia32_mwait (unsigned int, unsigned int)
  @end smallexample
  
-The following built-in functions are available when @option{-msse3} is used.
-
-@table @code
-@item v2df __builtin_ia32_loadddup (double const *)
-Generates the @code{movddup} machine instruction as a load from memory.
-@end table
-
  The following built-in functions are available when @option{-mssse3} is used.
-All of them generate the machine instruction that is part of the name
-with MMX registers.
+All of them generate the machine instruction that is part of the name.
  
  @smallexample
  v2si __builtin_ia32_phaddd (v2si, v2si)
@@ -10242,8 +12054,7 @@ v4hi __builtin_ia32_pabsw (v4hi)
  @end smallexample
  
  The following built-in functions are available when @option{-mssse3} is used.
-All of them generate the machine instruction that is part of the name
-with SSE registers.
+All of them generate the machine instruction that is part of the name.
  
  @smallexample
  v4si __builtin_ia32_phaddd128 (v4si, v4si)
@@ -10531,7 +12342,7 @@ used. All of them generate the machine instruction that is part of the
  name.
  
  @smallexample
-v32qi __builtin_ia32_mpsadbw256 (v32qi,v32qi,v32qi,int)
+v32qi __builtin_ia32_mpsadbw256 (v32qi,v32qi,int)
  v32qi __builtin_ia32_pabsb256 (v32qi)
  v16hi __builtin_ia32_pabsw256 (v16hi)
  v8si __builtin_ia32_pabsd256 (v8si)
@@ -10766,8 +12577,8 @@ The following built-in functions are available when @option{-mxop} is used.
  @smallexample
  v2df __builtin_ia32_vfrczpd (v2df)
  v4sf __builtin_ia32_vfrczps (v4sf)
-v2df __builtin_ia32_vfrczsd (v2df, v2df)
-v4sf __builtin_ia32_vfrczss (v4sf, v4sf)
+v2df __builtin_ia32_vfrczsd (v2df)
+v4sf __builtin_ia32_vfrczss (v4sf)
  v4df __builtin_ia32_vfrczpd256 (v4df)
  v8sf __builtin_ia32_vfrczps256 (v8sf)
  v2di __builtin_ia32_vpcmov (v2di, v2di, v2di)
@@ -10891,42 +12702,41 @@ v8hi __builtin_ia32_vpshlw (v8hi, v8hi)
  @end smallexample
  
  The following built-in functions are available when @option{-mfma4} is used.
-All of them generate the machine instruction that is part of the name
-with MMX registers.
-
-@smallexample
-v2df __builtin_ia32_fmaddpd (v2df, v2df, v2df)
-v4sf __builtin_ia32_fmaddps (v4sf, v4sf, v4sf)
-v2df __builtin_ia32_fmaddsd (v2df, v2df, v2df)
-v4sf __builtin_ia32_fmaddss (v4sf, v4sf, v4sf)
-v2df __builtin_ia32_fmsubpd (v2df, v2df, v2df)
-v4sf __builtin_ia32_fmsubps (v4sf, v4sf, v4sf)
-v2df __builtin_ia32_fmsubsd (v2df, v2df, v2df)
-v4sf __builtin_ia32_fmsubss (v4sf, v4sf, v4sf)
-v2df __builtin_ia32_fnmaddpd (v2df, v2df, v2df)
-v4sf __builtin_ia32_fnmaddps (v4sf, v4sf, v4sf)
-v2df __builtin_ia32_fnmaddsd (v2df, v2df, v2df)
-v4sf __builtin_ia32_fnmaddss (v4sf, v4sf, v4sf)
-v2df __builtin_ia32_fnmsubpd (v2df, v2df, v2df)
-v4sf __builtin_ia32_fnmsubps (v4sf, v4sf, v4sf)
-v2df __builtin_ia32_fnmsubsd (v2df, v2df, v2df)
-v4sf __builtin_ia32_fnmsubss (v4sf, v4sf, v4sf)
-v2df __builtin_ia32_fmaddsubpd  (v2df, v2df, v2df)
-v4sf __builtin_ia32_fmaddsubps  (v4sf, v4sf, v4sf)
-v2df __builtin_ia32_fmsubaddpd  (v2df, v2df, v2df)
-v4sf __builtin_ia32_fmsubaddps  (v4sf, v4sf, v4sf)
-v4df __builtin_ia32_fmaddpd256 (v4df, v4df, v4df)
-v8sf __builtin_ia32_fmaddps256 (v8sf, v8sf, v8sf)
-v4df __builtin_ia32_fmsubpd256 (v4df, v4df, v4df)
-v8sf __builtin_ia32_fmsubps256 (v8sf, v8sf, v8sf)
-v4df __builtin_ia32_fnmaddpd256 (v4df, v4df, v4df)
-v8sf __builtin_ia32_fnmaddps256 (v8sf, v8sf, v8sf)
-v4df __builtin_ia32_fnmsubpd256 (v4df, v4df, v4df)
-v8sf __builtin_ia32_fnmsubps256 (v8sf, v8sf, v8sf)
-v4df __builtin_ia32_fmaddsubpd256 (v4df, v4df, v4df)
-v8sf __builtin_ia32_fmaddsubps256 (v8sf, v8sf, v8sf)
-v4df __builtin_ia32_fmsubaddpd256 (v4df, v4df, v4df)
-v8sf __builtin_ia32_fmsubaddps256 (v8sf, v8sf, v8sf)
+All of them generate the machine instruction that is part of the name.
+
+@smallexample
+v2df __builtin_ia32_vfmaddpd (v2df, v2df, v2df)
+v4sf __builtin_ia32_vfmaddps (v4sf, v4sf, v4sf)
+v2df __builtin_ia32_vfmaddsd (v2df, v2df, v2df)
+v4sf __builtin_ia32_vfmaddss (v4sf, v4sf, v4sf)
+v2df __builtin_ia32_vfmsubpd (v2df, v2df, v2df)
+v4sf __builtin_ia32_vfmsubps (v4sf, v4sf, v4sf)
+v2df __builtin_ia32_vfmsubsd (v2df, v2df, v2df)
+v4sf __builtin_ia32_vfmsubss (v4sf, v4sf, v4sf)
+v2df __builtin_ia32_vfnmaddpd (v2df, v2df, v2df)
+v4sf __builtin_ia32_vfnmaddps (v4sf, v4sf, v4sf)
+v2df __builtin_ia32_vfnmaddsd (v2df, v2df, v2df)
+v4sf __builtin_ia32_vfnmaddss (v4sf, v4sf, v4sf)
+v2df __builtin_ia32_vfnmsubpd (v2df, v2df, v2df)
+v4sf __builtin_ia32_vfnmsubps (v4sf, v4sf, v4sf)
+v2df __builtin_ia32_vfnmsubsd (v2df, v2df, v2df)
+v4sf __builtin_ia32_vfnmsubss (v4sf, v4sf, v4sf)
+v2df __builtin_ia32_vfmaddsubpd  (v2df, v2df, v2df)
+v4sf __builtin_ia32_vfmaddsubps  (v4sf, v4sf, v4sf)
+v2df __builtin_ia32_vfmsubaddpd  (v2df, v2df, v2df)
+v4sf __builtin_ia32_vfmsubaddps  (v4sf, v4sf, v4sf)
+v4df __builtin_ia32_vfmaddpd256 (v4df, v4df, v4df)
+v8sf __builtin_ia32_vfmaddps256 (v8sf, v8sf, v8sf)
+v4df __builtin_ia32_vfmsubpd256 (v4df, v4df, v4df)
+v8sf __builtin_ia32_vfmsubps256 (v8sf, v8sf, v8sf)
+v4df __builtin_ia32_vfnmaddpd256 (v4df, v4df, v4df)
+v8sf __builtin_ia32_vfnmaddps256 (v8sf, v8sf, v8sf)
+v4df __builtin_ia32_vfnmsubpd256 (v4df, v4df, v4df)
+v8sf __builtin_ia32_vfnmsubps256 (v8sf, v8sf, v8sf)
+v4df __builtin_ia32_vfmaddsubpd256 (v4df, v4df, v4df)
+v8sf __builtin_ia32_vfmaddsubps256 (v8sf, v8sf, v8sf)
+v4df __builtin_ia32_vfmsubaddpd256 (v4df, v4df, v4df)
+v8sf __builtin_ia32_vfmsubaddps256 (v8sf, v8sf, v8sf)
  
  @end smallexample
  
@@ -10973,6 +12783,31 @@ unsigned int __builtin_ia32_lzcnt_u32(unsigned int);
  unsigned long long __builtin_ia32_lzcnt_u64 (unsigned long long);
  @end smallexample
  
+The following built-in functions are available when @option{-mfxsr} is used.
+All of them generate the machine instruction that is part of the name.
+@smallexample
+void __builtin_ia32_fxsave (void *)
+void __builtin_ia32_fxrstor (void *)
+void __builtin_ia32_fxsave64 (void *)
+void __builtin_ia32_fxrstor64 (void *)
+@end smallexample
+
+The following built-in functions are available when @option{-mxsave} is used.
+All of them generate the machine instruction that is part of the name.
+@smallexample
+void __builtin_ia32_xsave (void *, long long)
+void __builtin_ia32_xrstor (void *, long long)
+void __builtin_ia32_xsave64 (void *, long long)
+void __builtin_ia32_xrstor64 (void *, long long)
+@end smallexample
+
+The following built-in functions are available when @option{-mxsaveopt} is used.
+All of them generate the machine instruction that is part of the name.
+@smallexample
+void __builtin_ia32_xsaveopt (void *, long long)
+void __builtin_ia32_xsaveopt64 (void *, long long)
+@end smallexample
+
  The following built-in functions are available when @option{-mtbm} is used.
  Both of them generate the immediate form of the bextr machine instruction.
  @smallexample
@@ -11000,7 +12835,6 @@ v2sf __builtin_ia32_pfrcp (v2sf)
  v2sf __builtin_ia32_pfrcpit1 (v2sf, v2sf)
  v2sf __builtin_ia32_pfrcpit2 (v2sf, v2sf)
  v2sf __builtin_ia32_pfrsqrt (v2sf)
-v2sf __builtin_ia32_pfrsqrtit1 (v2sf, v2sf)
  v2sf __builtin_ia32_pfsub (v2sf, v2sf)
  v2sf __builtin_ia32_pfsubr (v2sf, v2sf)
  v2sf __builtin_ia32_pi2fd (v2si)
@@ -11044,7 +12878,7 @@ A memory transaction commits all changes to memory in an atomic way,
  as visible to other threads. If the transaction fails it is rolled back
  and all side effects discarded.
  
-Generally there is no guarantee that a memory transaction ever suceeds
+Generally there is no guarantee that a memory transaction ever succeeds
  and suitable fallback code always needs to be supplied.
  
  @deftypefn {RTM Function} {unsigned} _xbegin ()
@@ -11845,8 +13679,84 @@ GCC provides other MIPS-specific built-in functions:
  Insert a @samp{cache} instruction with operands @var{op} and @var{addr}.
  GCC defines the preprocessor macro @code{___GCC_HAVE_BUILTIN_MIPS_CACHE}
  when this function is available.
+
+@item unsigned int __builtin_mips_get_fcsr (void)
+@itemx void __builtin_mips_set_fcsr (unsigned int @var{value})
+Get and set the contents of the floating-point control and status register
+(FPU control register 31).  These functions are only available in hard-float
+code but can be called in both MIPS16 and non-MIPS16 contexts.
+
+@code{__builtin_mips_set_fcsr} can be used to change any bit of the
+register except the condition codes, which GCC assumes are preserved.
  @end table
  
+@node MSP430 Built-in Functions
+@subsection MSP430 Built-in Functions
+
+GCC provides a couple of special builtin functions to aid in the
+writing of interrupt handlers in C.
+
+@table @code
+@item __bic_SR_register_on_exit (int @var{mask})
+This clears the indicated bits in the saved copy of the status register
+currently residing on the stack.  This only works inside interrupt
+handlers and the changes to the status register will only take affect
+once the handler returns.
+
+@item __bis_SR_register_on_exit (int @var{mask})
+This sets the indicated bits in the saved copy of the status register
+currently residing on the stack.  This only works inside interrupt
+handlers and the changes to the status register will only take affect
+once the handler returns.
+
+@item __delay_cycles (long long @var{cycles})
+This inserts an instruction sequence that takes exactly @var{cycles}
+cycles (between 0 and about 17E9) to complete.  The inserted sequence
+may use jumps, loops, or no-ops, and does not interfere with any other
+instructions.  Note that @var{cycles} must be a compile-time constant
+integer - that is, you must pass a number, not a variable that may be
+optimized to a constant later.  The number of cycles delayed by this
+builtin is exact.
+@end table
+
+@node NDS32 Built-in Functions
+@subsection NDS32 Built-in Functions
+
+These built-in functions are available for the NDS32 target:
+
+@deftypefn {Built-in Function} void __builtin_nds32_isync (int *@var{addr})
+Insert an ISYNC instruction into the instruction stream where
+@var{addr} is an instruction address for serialization.
+@end deftypefn
+
+@deftypefn {Built-in Function} void __builtin_nds32_isb (void)
+Insert an ISB instruction into the instruction stream.
+@end deftypefn
+
+@deftypefn {Built-in Function} int __builtin_nds32_mfsr (int @var{sr})
+Return the content of a system register which is mapped by @var{sr}.
+@end deftypefn
+
+@deftypefn {Built-in Function} int __builtin_nds32_mfusr (int @var{usr})
+Return the content of a user space register which is mapped by @var{usr}.
+@end deftypefn
+
+@deftypefn {Built-in Function} void __builtin_nds32_mtsr (int @var{value}, int @var{sr})
+Move the @var{value} to a system register which is mapped by @var{sr}.
+@end deftypefn
+
+@deftypefn {Built-in Function} void __builtin_nds32_mtusr (int @var{value}, int @var{usr})
+Move the @var{value} to a user space register which is mapped by @var{usr}.
+@end deftypefn
+
+@deftypefn {Built-in Function} void __builtin_nds32_setgie_en (void)
+Enable global interrupt.
+@end deftypefn
+
+@deftypefn {Built-in Function} void __builtin_nds32_setgie_dis (void)
+Disable global interrupt.
+@end deftypefn
+
  @node picoChip Built-in Functions
  @subsection picoChip Built-in Functions
  
@@ -11893,9 +13803,10 @@ float __builtin_recipdivf (float, float);
  float __builtin_rsqrtf (float);
  double __builtin_recipdiv (double, double);
  double __builtin_rsqrt (double);
-long __builtin_bpermd (long, long);
  uint64_t __builtin_ppc_get_timebase ();
  unsigned long __builtin_ppc_mftb ();
+double __builtin_unpack_longdouble (long double, int);
+long double __builtin_pack_longdouble (double, double);
  @end smallexample
  
  The @code{vec_rsqrt}, @code{__builtin_rsqrt}, and
@@ -11915,6 +13826,57 @@ The @code{__builtin_ppc_mftb} function always generates one instruction and
  returns the Time Base Register value as an unsigned long, throwing away
  the most significant word on 32-bit environments.
  
+The following built-in functions are available for the PowerPC family
+of processors, starting with ISA 2.06 or later (@option{-mcpu=power7}
+or @option{-mpopcntd}):
+@smallexample
+long __builtin_bpermd (long, long);
+int __builtin_divwe (int, int);
+int __builtin_divweo (int, int);
+unsigned int __builtin_divweu (unsigned int, unsigned int);
+unsigned int __builtin_divweuo (unsigned int, unsigned int);
+long __builtin_divde (long, long);
+long __builtin_divdeo (long, long);
+unsigned long __builtin_divdeu (unsigned long, unsigned long);
+unsigned long __builtin_divdeuo (unsigned long, unsigned long);
+unsigned int cdtbcd (unsigned int);
+unsigned int cbcdtd (unsigned int);
+unsigned int addg6s (unsigned int, unsigned int);
+@end smallexample
+
+The @code{__builtin_divde}, @code{__builtin_divdeo},
+@code{__builitin_divdeu}, @code{__builtin_divdeou} functions require a
+64-bit environment support ISA 2.06 or later.
+
+The following built-in functions are available for the PowerPC family
+of processors when hardware decimal floating point
+(@option{-mhard-dfp}) is available:
+@smallexample
+_Decimal64 __builtin_dxex (_Decimal64);
+_Decimal128 __builtin_dxexq (_Decimal128);
+_Decimal64 __builtin_ddedpd (int, _Decimal64);
+_Decimal128 __builtin_ddedpdq (int, _Decimal128);
+_Decimal64 __builtin_denbcd (int, _Decimal64);
+_Decimal128 __builtin_denbcdq (int, _Decimal128);
+_Decimal64 __builtin_diex (_Decimal64, _Decimal64);
+_Decimal128 _builtin_diexq (_Decimal128, _Decimal128);
+_Decimal64 __builtin_dscli (_Decimal64, int);
+_Decimal128 __builitn_dscliq (_Decimal128, int);
+_Decimal64 __builtin_dscri (_Decimal64, int);
+_Decimal128 __builitn_dscriq (_Decimal128, int);
+unsigned long long __builtin_unpack_dec128 (_Decimal128, int);
+_Decimal128 __builtin_pack_dec128 (unsigned long long, unsigned long long);
+@end smallexample
+
+The following built-in functions are available for the PowerPC family
+of processors when the Vector Scalar (vsx) instruction set is
+available:
+@smallexample
+unsigned long long __builtin_unpack_vector_int128 (vector __int128_t, int);
+vector __int128_t __builtin_pack_vector_int128 (unsigned long long,
+                                                unsigned long long);
+@end smallexample
+
  @node PowerPC AltiVec/VSX Built-in Functions
  @subsection PowerPC AltiVec Built-in Functions
  
@@ -12288,16 +14250,22 @@ vector bool int vec_cmplt (vector unsigned int, vector unsigned int);
  vector bool int vec_cmplt (vector signed int, vector signed int);
  vector bool int vec_cmplt (vector float, vector float);
  
+vector float vec_cpsgn (vector float, vector float);
+
  vector float vec_ctf (vector unsigned int, const int);
  vector float vec_ctf (vector signed int, const int);
+vector double vec_ctf (vector unsigned long, const int);
+vector double vec_ctf (vector signed long, const int);
  
  vector float vec_vcfsx (vector signed int, const int);
  
  vector float vec_vcfux (vector unsigned int, const int);
  
  vector signed int vec_cts (vector float, const int);
+vector signed long vec_cts (vector double, const int);
  
  vector unsigned int vec_ctu (vector float, const int);
+vector unsigned long vec_ctu (vector double, const int);
  
  void vec_dss (const int);
  
@@ -13133,6 +15101,16 @@ vector float vec_splat (vector float, const int);
  vector signed int vec_splat (vector signed int, const int);
  vector unsigned int vec_splat (vector unsigned int, const int);
  vector bool int vec_splat (vector bool int, const int);
+vector signed long vec_splat (vector signed long, const int);
+vector unsigned long vec_splat (vector unsigned long, const int);
+
+vector signed char vec_splats (signed char);
+vector unsigned char vec_splats (unsigned char);
+vector signed short vec_splats (signed short);
+vector unsigned short vec_splats (unsigned short);
+vector signed int vec_splats (signed int);
+vector unsigned int vec_splats (unsigned int);
+vector float vec_splats (float);
  
  vector float vec_vspltw (vector float, const int);
  vector signed int vec_vspltw (vector signed int, const int);
@@ -13837,17 +15815,32 @@ vector double vec_add (vector double, vector double);
  vector double vec_and (vector double, vector double);
  vector double vec_and (vector double, vector bool long);
  vector double vec_and (vector bool long, vector double);
+vector long vec_and (vector long, vector long);
+vector long vec_and (vector long, vector bool long);
+vector long vec_and (vector bool long, vector long);
+vector unsigned long vec_and (vector unsigned long, vector unsigned long);
+vector unsigned long vec_and (vector unsigned long, vector bool long);
+vector unsigned long vec_and (vector bool long, vector unsigned long);
  vector double vec_andc (vector double, vector double);
  vector double vec_andc (vector double, vector bool long);
  vector double vec_andc (vector bool long, vector double);
+vector long vec_andc (vector long, vector long);
+vector long vec_andc (vector long, vector bool long);
+vector long vec_andc (vector bool long, vector long);
+vector unsigned long vec_andc (vector unsigned long, vector unsigned long);
+vector unsigned long vec_andc (vector unsigned long, vector bool long);
+vector unsigned long vec_andc (vector bool long, vector unsigned long);
  vector double vec_ceil (vector double);
  vector bool long vec_cmpeq (vector double, vector double);
  vector bool long vec_cmpge (vector double, vector double);
  vector bool long vec_cmpgt (vector double, vector double);
  vector bool long vec_cmple (vector double, vector double);
  vector bool long vec_cmplt (vector double, vector double);
+vector double vec_cpsgn (vector double, vector double);
  vector float vec_div (vector float, vector float);
  vector double vec_div (vector double, vector double);
+vector long vec_div (vector long, vector long);
+vector unsigned long vec_div (vector unsigned long, vector unsigned long);
  vector double vec_floor (vector double);
  vector double vec_ld (int, const vector double *);
  vector double vec_ld (int, const double *);
@@ -13857,38 +15850,83 @@ vector unsigned char vec_lvsl (int, const volatile double *);
  vector unsigned char vec_lvsr (int, const volatile double *);
  vector double vec_madd (vector double, vector double, vector double);
  vector double vec_max (vector double, vector double);
+vector signed long vec_mergeh (vector signed long, vector signed long);
+vector signed long vec_mergeh (vector signed long, vector bool long);
+vector signed long vec_mergeh (vector bool long, vector signed long);
+vector unsigned long vec_mergeh (vector unsigned long, vector unsigned long);
+vector unsigned long vec_mergeh (vector unsigned long, vector bool long);
+vector unsigned long vec_mergeh (vector bool long, vector unsigned long);
+vector signed long vec_mergel (vector signed long, vector signed long);
+vector signed long vec_mergel (vector signed long, vector bool long);
+vector signed long vec_mergel (vector bool long, vector signed long);
+vector unsigned long vec_mergel (vector unsigned long, vector unsigned long);
+vector unsigned long vec_mergel (vector unsigned long, vector bool long);
+vector unsigned long vec_mergel (vector bool long, vector unsigned long);
  vector double vec_min (vector double, vector double);
  vector float vec_msub (vector float, vector float, vector float);
  vector double vec_msub (vector double, vector double, vector double);
  vector float vec_mul (vector float, vector float);
  vector double vec_mul (vector double, vector double);
+vector long vec_mul (vector long, vector long);
+vector unsigned long vec_mul (vector unsigned long, vector unsigned long);
  vector float vec_nearbyint (vector float);
  vector double vec_nearbyint (vector double);
  vector float vec_nmadd (vector float, vector float, vector float);
  vector double vec_nmadd (vector double, vector double, vector double);
  vector double vec_nmsub (vector double, vector double, vector double);
  vector double vec_nor (vector double, vector double);
+vector long vec_nor (vector long, vector long);
+vector long vec_nor (vector long, vector bool long);
+vector long vec_nor (vector bool long, vector long);
+vector unsigned long vec_nor (vector unsigned long, vector unsigned long);
+vector unsigned long vec_nor (vector unsigned long, vector bool long);
+vector unsigned long vec_nor (vector bool long, vector unsigned long);
  vector double vec_or (vector double, vector double);
  vector double vec_or (vector double, vector bool long);
  vector double vec_or (vector bool long, vector double);
-vector double vec_perm (vector double,
-                        vector double,
-                        vector unsigned char);
+vector long vec_or (vector long, vector long);
+vector long vec_or (vector long, vector bool long);
+vector long vec_or (vector bool long, vector long);
+vector unsigned long vec_or (vector unsigned long, vector unsigned long);
+vector unsigned long vec_or (vector unsigned long, vector bool long);
+vector unsigned long vec_or (vector bool long, vector unsigned long);
+vector double vec_perm (vector double, vector double, vector unsigned char);
+vector long vec_perm (vector long, vector long, vector unsigned char);
+vector unsigned long vec_perm (vector unsigned long, vector unsigned long,
+                               vector unsigned char);
  vector double vec_rint (vector double);
  vector double vec_recip (vector double, vector double);
  vector double vec_rsqrt (vector double);
  vector double vec_rsqrte (vector double);
  vector double vec_sel (vector double, vector double, vector bool long);
  vector double vec_sel (vector double, vector double, vector unsigned long);
-vector double vec_sub (vector double, vector double);
+vector long vec_sel (vector long, vector long, vector long);
+vector long vec_sel (vector long, vector long, vector unsigned long);
+vector long vec_sel (vector long, vector long, vector bool long);
+vector unsigned long vec_sel (vector unsigned long, vector unsigned long,
+                              vector long);
+vector unsigned long vec_sel (vector unsigned long, vector unsigned long,
+                              vector unsigned long);
+vector unsigned long vec_sel (vector unsigned long, vector unsigned long,
+                              vector bool long);
+vector double vec_splats (double);
+vector signed long vec_splats (signed long);
+vector unsigned long vec_splats (unsigned long);
  vector float vec_sqrt (vector float);
  vector double vec_sqrt (vector double);
  void vec_st (vector double, int, vector double *);
  void vec_st (vector double, int, double *);
+vector double vec_sub (vector double, vector double);
  vector double vec_trunc (vector double);
  vector double vec_xor (vector double, vector double);
  vector double vec_xor (vector double, vector bool long);
  vector double vec_xor (vector bool long, vector double);
+vector long vec_xor (vector long, vector long);
+vector long vec_xor (vector long, vector bool long);
+vector long vec_xor (vector bool long, vector long);
+vector unsigned long vec_xor (vector unsigned long, vector unsigned long);
+vector unsigned long vec_xor (vector unsigned long, vector bool long);
+vector unsigned long vec_xor (vector bool long, vector unsigned long);
  int vec_all_eq (vector double, vector double);
  int vec_all_ge (vector double, vector double);
  int vec_all_gt (vector double, vector double);
@@ -13965,6 +16003,35 @@ void vec_vsx_st (vector unsigned char, int, unsigned char *);
  void vec_vsx_st (vector bool char, int, vector bool char *);
  void vec_vsx_st (vector bool char, int, unsigned char *);
  void vec_vsx_st (vector bool char, int, signed char *);
+
+vector double vec_xxpermdi (vector double, vector double, int);
+vector float vec_xxpermdi (vector float, vector float, int);
+vector long long vec_xxpermdi (vector long long, vector long long, int);
+vector unsigned long long vec_xxpermdi (vector unsigned long long,
+                                        vector unsigned long long, int);
+vector int vec_xxpermdi (vector int, vector int, int);
+vector unsigned int vec_xxpermdi (vector unsigned int,
+                                  vector unsigned int, int);
+vector short vec_xxpermdi (vector short, vector short, int);
+vector unsigned short vec_xxpermdi (vector unsigned short,
+                                    vector unsigned short, int);
+vector signed char vec_xxpermdi (vector signed char, vector signed char, int);
+vector unsigned char vec_xxpermdi (vector unsigned char,
+                                   vector unsigned char, int);
+
+vector double vec_xxsldi (vector double, vector double, int);
+vector float vec_xxsldi (vector float, vector float, int);
+vector long long vec_xxsldi (vector long long, vector long long, int);
+vector unsigned long long vec_xxsldi (vector unsigned long long,
+                                      vector unsigned long long, int);
+vector int vec_xxsldi (vector int, vector int, int);
+vector unsigned int vec_xxsldi (vector unsigned int, vector unsigned int, int);
+vector short vec_xxsldi (vector short, vector short, int);
+vector unsigned short vec_xxsldi (vector unsigned short,
+                                  vector unsigned short, int);
+vector signed char vec_xxsldi (vector signed char, vector signed char, int);
+vector unsigned char vec_xxsldi (vector unsigned char,
+                                 vector unsigned char, int);
  @end smallexample
  
  Note that the @samp{vec_ld} and @samp{vec_st} built-in functions always
@@ -13988,17 +16055,30 @@ vector unsigned long long vec_add (vector unsigned long long,
                                     vector unsigned long long);
  
  int vec_all_eq (vector long long, vector long long);
+int vec_all_eq (vector unsigned long long, vector unsigned long long);
  int vec_all_ge (vector long long, vector long long);
+int vec_all_ge (vector unsigned long long, vector unsigned long long);
  int vec_all_gt (vector long long, vector long long);
+int vec_all_gt (vector unsigned long long, vector unsigned long long);
  int vec_all_le (vector long long, vector long long);
+int vec_all_le (vector unsigned long long, vector unsigned long long);
  int vec_all_lt (vector long long, vector long long);
+int vec_all_lt (vector unsigned long long, vector unsigned long long);
  int vec_all_ne (vector long long, vector long long);
+int vec_all_ne (vector unsigned long long, vector unsigned long long);
+
  int vec_any_eq (vector long long, vector long long);
+int vec_any_eq (vector unsigned long long, vector unsigned long long);
  int vec_any_ge (vector long long, vector long long);
+int vec_any_ge (vector unsigned long long, vector unsigned long long);
  int vec_any_gt (vector long long, vector long long);
+int vec_any_gt (vector unsigned long long, vector unsigned long long);
  int vec_any_le (vector long long, vector long long);
+int vec_any_le (vector unsigned long long, vector unsigned long long);
  int vec_any_lt (vector long long, vector long long);
+int vec_any_lt (vector unsigned long long, vector unsigned long long);
  int vec_any_ne (vector long long, vector long long);
+int vec_any_ne (vector unsigned long long, vector unsigned long long);
  
  vector long long vec_eqv (vector long long, vector long long);
  vector long long vec_eqv (vector bool long long, vector long long);
@@ -14036,6 +16116,14 @@ vector long long vec_max (vector long long, vector long long);
  vector unsigned long long vec_max (vector unsigned long long,
                                     vector unsigned long long);
  
+vector signed int vec_mergee (vector signed int, vector signed int);
+vector unsigned int vec_mergee (vector unsigned int, vector unsigned int);
+vector bool int vec_mergee (vector bool int, vector bool int);
+
+vector signed int vec_mergeo (vector signed int, vector signed int);
+vector unsigned int vec_mergeo (vector unsigned int, vector unsigned int);
+vector bool int vec_mergeo (vector bool int, vector bool int);
+
  vector long long vec_min (vector long long, vector long long);
  vector unsigned long long vec_min (vector unsigned long long,
                                     vector unsigned long long);
@@ -14114,6 +16202,8 @@ vector unsigned int vec_packs (vector unsigned long long,
                                 vector unsigned long long);
  
  vector unsigned int vec_packsu (vector long long, vector long long);
+vector unsigned int vec_packsu (vector unsigned long long,
+                                vector unsigned long long);
  
  vector long long vec_rl (vector long long,
                           vector unsigned long long);
@@ -14152,6 +16242,18 @@ vector unsigned long long vec_vaddudm (vector bool unsigned long long,
  vector unsigned long long vec_vaddudm (vector unsigned long long,
                                         vector bool unsigned long long);
  
+vector long long vec_vbpermq (vector signed char, vector signed char);
+vector long long vec_vbpermq (vector unsigned char, vector unsigned char);
+
+vector long long vec_cntlz (vector long long);
+vector unsigned long long vec_cntlz (vector unsigned long long);
+vector int vec_cntlz (vector int);
+vector unsigned int vec_cntlz (vector int);
+vector short vec_cntlz (vector short);
+vector unsigned short vec_cntlz (vector unsigned short);
+vector signed char vec_cntlz (vector signed char);
+vector unsigned char vec_cntlz (vector unsigned char);
+
  vector long long vec_vclz (vector long long);
  vector unsigned long long vec_vclz (vector unsigned long long);
  vector int vec_vclz (vector int);
@@ -14173,6 +16275,9 @@ vector unsigned short vec_vclzh (vector unsigned short);
  vector int vec_vclzw (vector int);
  vector unsigned int vec_vclzw (vector int);
  
+vector signed char vec_vgbbd (vector signed char);
+vector unsigned char vec_vgbbd (vector unsigned char);
+
  vector long long vec_vmaxsd (vector long long, vector long long);
  
  vector unsigned long long vec_vmaxud (vector unsigned long long,
@@ -14248,6 +16353,62 @@ vector long long vec_vupklsw (vector int);
  vector unsigned long long vec_vupklsw (vector int);
  @end smallexample
  
+If the ISA 2.07 additions to the vector/scalar (power8-vector)
+instruction set is available, the following additional functions are
+available for 64-bit targets.  New vector types
+(@var{vector __int128_t} and @var{vector __uint128_t}) are available
+to hold the @var{__int128_t} and @var{__uint128_t} types to use these
+builtins.
+
+The normal vector extract, and set operations work on
+@var{vector __int128_t} and @var{vector __uint128_t} types,
+but the index value must be 0.
+
+@smallexample
+vector __int128_t vec_vaddcuq (vector __int128_t, vector __int128_t);
+vector __uint128_t vec_vaddcuq (vector __uint128_t, vector __uint128_t);
+
+vector __int128_t vec_vadduqm (vector __int128_t, vector __int128_t);
+vector __uint128_t vec_vadduqm (vector __uint128_t, vector __uint128_t);
+
+vector __int128_t vec_vaddecuq (vector __int128_t, vector __int128_t,
+                                vector __int128_t);
+vector __uint128_t vec_vaddecuq (vector __uint128_t, vector __uint128_t, 
+                                 vector __uint128_t);
+
+vector __int128_t vec_vaddeuqm (vector __int128_t, vector __int128_t,
+                                vector __int128_t);
+vector __uint128_t vec_vaddeuqm (vector __uint128_t, vector __uint128_t, 
+                                 vector __uint128_t);
+
+vector __int128_t vec_vsubecuq (vector __int128_t, vector __int128_t,
+                                vector __int128_t);
+vector __uint128_t vec_vsubecuq (vector __uint128_t, vector __uint128_t, 
+                                 vector __uint128_t);
+
+vector __int128_t vec_vsubeuqm (vector __int128_t, vector __int128_t,
+                                vector __int128_t);
+vector __uint128_t vec_vsubeuqm (vector __uint128_t, vector __uint128_t,
+                                 vector __uint128_t);
+
+vector __int128_t vec_vsubcuq (vector __int128_t, vector __int128_t);
+vector __uint128_t vec_vsubcuq (vector __uint128_t, vector __uint128_t);
+
+__int128_t vec_vsubuqm (__int128_t, __int128_t);
+__uint128_t vec_vsubuqm (__uint128_t, __uint128_t);
+
+vector __int128_t __builtin_bcdadd (vector __int128_t, vector__int128_t);
+int __builtin_bcdadd_lt (vector __int128_t, vector__int128_t);
+int __builtin_bcdadd_eq (vector __int128_t, vector__int128_t);
+int __builtin_bcdadd_gt (vector __int128_t, vector__int128_t);
+int __builtin_bcdadd_ov (vector __int128_t, vector__int128_t);
+vector __int128_t bcdsub (vector __int128_t, vector__int128_t);
+int __builtin_bcdsub_lt (vector __int128_t, vector__int128_t);
+int __builtin_bcdsub_eq (vector __int128_t, vector__int128_t);
+int __builtin_bcdsub_gt (vector __int128_t, vector__int128_t);
+int __builtin_bcdsub_ov (vector __int128_t, vector__int128_t);
+@end smallexample
+
  If the cryptographic instructions are enabled (@option{-mcrypto} or
  @option{-mcpu=power8}), the following builtins are enabled.
  
@@ -14308,6 +16469,196 @@ The second argument to the @var{__builtin_crypto_vshasigmad} and
  integer that is 0 or 1.  The third argument to these builtin functions
  must be a constant integer in the range of 0 to 15.
  
+@node PowerPC Hardware Transactional Memory Built-in Functions
+@subsection PowerPC Hardware Transactional Memory Built-in Functions
+GCC provides two interfaces for accessing the Hardware Transactional
+Memory (HTM) instructions available on some of the PowerPC family
+of prcoessors (eg, POWER8).  The two interfaces come in a low level
+interface, consisting of built-in functions specific to PowerPC and a
+higher level interface consisting of inline functions that are common
+between PowerPC and S/390.
+
+@subsubsection PowerPC HTM Low Level Built-in Functions
+
+The following low level built-in functions are available with
+@option{-mhtm} or @option{-mcpu=CPU} where CPU is `power8' or later.
+They all generate the machine instruction that is part of the name.
+
+The HTM built-ins return true or false depending on their success and
+their arguments match exactly the type and order of the associated
+hardware instruction's operands.  Refer to the ISA manual for a
+description of each instruction's operands.
+
+@smallexample
+unsigned int __builtin_tbegin (unsigned int)
+unsigned int __builtin_tend (unsigned int)
+
+unsigned int __builtin_tabort (unsigned int)
+unsigned int __builtin_tabortdc (unsigned int, unsigned int, unsigned int)
+unsigned int __builtin_tabortdci (unsigned int, unsigned int, int)
+unsigned int __builtin_tabortwc (unsigned int, unsigned int, unsigned int)
+unsigned int __builtin_tabortwci (unsigned int, unsigned int, int)
+
+unsigned int __builtin_tcheck (unsigned int)
+unsigned int __builtin_treclaim (unsigned int)
+unsigned int __builtin_trechkpt (void)
+unsigned int __builtin_tsr (unsigned int)
+@end smallexample
+
+In addition to the above HTM built-ins, we have added built-ins for
+some common extended mnemonics of the HTM instructions:
+
+@smallexample
+unsigned int __builtin_tendall (void)
+unsigned int __builtin_tresume (void)
+unsigned int __builtin_tsuspend (void)
+@end smallexample
+
+The following set of built-in functions are available to gain access
+to the HTM specific special purpose registers.
+
+@smallexample
+unsigned long __builtin_get_texasr (void)
+unsigned long __builtin_get_texasru (void)
+unsigned long __builtin_get_tfhar (void)
+unsigned long __builtin_get_tfiar (void)
+
+void __builtin_set_texasr (unsigned long);
+void __builtin_set_texasru (unsigned long);
+void __builtin_set_tfhar (unsigned long);
+void __builtin_set_tfiar (unsigned long);
+@end smallexample
+
+Example usage of these low level built-in functions may look like:
+
+@smallexample
+#include <htmintrin.h>
+
+int num_retries = 10;
+
+while (1)
+  @{
+    if (__builtin_tbegin (0))
+      @{
+        /* Transaction State Initiated.  */
+        if (is_locked (lock))
+          __builtin_tabort (0);
+        ... transaction code...
+        __builtin_tend (0);
+        break;
+      @}
+    else
+      @{
+        /* Transaction State Failed.  Use locks if the transaction
+           failure is "persistent" or we've tried too many times.  */
+        if (num_retries-- <= 0
+            || _TEXASRU_FAILURE_PERSISTENT (__builtin_get_texasru ()))
+          @{
+            acquire_lock (lock);
+            ... non transactional fallback path...
+            release_lock (lock);
+            break;
+          @}
+      @}
+  @}
+@end smallexample
+
+One final built-in function has been added that returns the value of
+the 2-bit Transaction State field of the Machine Status Register (MSR)
+as stored in @code{CR0}.
+
+@smallexample
+unsigned long __builtin_ttest (void)
+@end smallexample
+
+This built-in can be used to determine the current transaction state
+using the following code example:
+
+@smallexample
+#include <htmintrin.h>
+
+unsigned char tx_state = _HTM_STATE (__builtin_ttest ());
+
+if (tx_state == _HTM_TRANSACTIONAL)
+  @{
+    /* Code to use in transactional state.  */
+  @}
+else if (tx_state == _HTM_NONTRANSACTIONAL)
+  @{
+    /* Code to use in non-transactional state.  */
+  @}
+else if (tx_state == _HTM_SUSPENDED)
+  @{
+    /* Code to use in transaction suspended state.  */
+  @}
+@end smallexample
+
+@subsubsection PowerPC HTM High Level Inline Functions
+
+The following high level HTM interface is made available by including
+@code{<htmxlintrin.h>} and using @option{-mhtm} or @option{-mcpu=CPU}
+where CPU is `power8' or later.  This interface is common between PowerPC
+and S/390, allowing users to write one HTM source implementation that
+can be compiled and executed on either system.
+
+@smallexample
+long __TM_simple_begin (void)
+long __TM_begin (void* const TM_buff)
+long __TM_end (void)
+void __TM_abort (void)
+void __TM_named_abort (unsigned char const code)
+void __TM_resume (void)
+void __TM_suspend (void)
+
+long __TM_is_user_abort (void* const TM_buff)
+long __TM_is_named_user_abort (void* const TM_buff, unsigned char *code)
+long __TM_is_illegal (void* const TM_buff)
+long __TM_is_footprint_exceeded (void* const TM_buff)
+long __TM_nesting_depth (void* const TM_buff)
+long __TM_is_nested_too_deep(void* const TM_buff)
+long __TM_is_conflict(void* const TM_buff)
+long __TM_is_failure_persistent(void* const TM_buff)
+long __TM_failure_address(void* const TM_buff)
+long long __TM_failure_code(void* const TM_buff)
+@end smallexample
+
+Using these common set of HTM inline functions, we can create
+a more portable version of the HTM example in the previous
+section that will work on either PowerPC or S/390:
+
+@smallexample
+#include <htmxlintrin.h>
+
+int num_retries = 10;
+TM_buff_type TM_buff;
+
+while (1)
+  @{
+    if (__TM_begin (TM_buff))
+      @{
+        /* Transaction State Initiated.  */
+        if (is_locked (lock))
+          __TM_abort ();
+        ... transaction code...
+        __TM_end ();
+        break;
+      @}
+    else
+      @{
+        /* Transaction State Failed.  Use locks if the transaction
+           failure is "persistent" or we've tried too many times.  */
+        if (num_retries-- <= 0
+            || __TM_is_failure_persistent (TM_buff))
+          @{
+            acquire_lock (lock);
+            ... non transactional fallback path...
+            release_lock (lock);
+            break;
+          @}
+      @}
+  @}
+@end smallexample
+
  @node RX Built-in Functions
  @subsection RX Built-in Functions
  GCC supports some of the RX instructions which cannot be expressed in
@@ -14530,7 +16881,7 @@ depth is returned as integer value.  For a nesting depth of 0 the code
  is not executed as part of an transaction.
  @end deftypefn
  
-@deftypefn {Built-in Function} void __builtin_non_tx_store (unsigned long long *, unsigned long long)
+@deftypefn {Built-in Function} void __builtin_non_tx_store (uint64_t *, uint64_t)
  
  Generates the @code{ntstg} machine instruction.  The second argument
  is written to the first arguments location.  The store operation will
@@ -14572,6 +16923,15 @@ int get_tcb_value (void)
  @end smallexample
  @end deftypefn
  
+@deftypefn {Built-in Function} {unsigned int} __builtin_sh_get_fpscr (void)
+Returns the value that is currently set in the @samp{FPSCR} register.
+@end deftypefn
+
+@deftypefn {Built-in Function} {void} __builtin_sh_set_fpscr (unsigned int @var{val})
+Sets the @samp{FPSCR} register to the specified value @var{val}, while
+preserving the current values of the FR, SZ and PR bits.
+@end deftypefn
+
  @node SPARC VIS Built-in Functions
  @subsection SPARC VIS Built-in Functions
  
@@ -14942,6 +17302,7 @@ for further explanation.
  * Visibility Pragmas::
  * Push/Pop Macro Pragmas::
  * Function Specific Option Pragmas::
+* Loop-Specific Pragmas::
  @end menu
  
  @node ARM Pragmas
@@ -15162,11 +17523,9 @@ adding a call to the @code{.init} section.
  @node Symbol-Renaming Pragmas
  @subsection Symbol-Renaming Pragmas
  
-For compatibility with the Solaris system headers, GCC
-supports two @code{#pragma} directives that change the name used in
-assembly for a given declaration. To get this effect
-on all platforms supported by GCC, use the asm labels extension (@pxref{Asm
-Labels}).
+GCC supports a @code{#pragma} directive that changes the name used in
+assembly for a given declaration. This effect can also be achieved
+using the asm labels extension (@pxref{Asm Labels}).
  
  @table @code
  @item redefine_extname @var{oldname} @var{newname}
@@ -15178,17 +17537,17 @@ is defined if this pragma is available (currently on all platforms).
  @end table
  
  This pragma and the asm labels extension interact in a complicated
-manner.  Here are some corner cases you may want to be aware of.
+manner.  Here are some corner cases you may want to be aware of:
  
  @enumerate
-@item Both pragmas silently apply only to declarations with external
+@item This pragma silently applies only to declarations with external
  linkage.  Asm labels do not have this restriction.
  
-@item In C++, both pragmas silently apply only to declarations with
+@item In C++, this pragma silently applies only to declarations with
  ``C'' linkage.  Again, asm labels do not have this restriction.
  
-@item If any of the three ways of changing the assembly name of a
-declaration is applied to a declaration whose assembly name has
+@item If either of the ways of changing the assembly name of a
+declaration are applied to a declaration whose assembly name has
  already been determined (either by a previous use of one of these
  features, or because the compiler needed the assembly name in order to
  generate code), and the new name is different, a warning issues and
@@ -15354,8 +17713,7 @@ TODO - Remember to fix this}.
  
  This pragma allows the user to set the visibility for multiple
  declarations without having to give each a visibility attribute
-@xref{Function Attributes}, for more information about visibility and
-the attribute syntax.
+(@pxref{Function Attributes}).
  
  In C++, @samp{#pragma GCC visibility} affects only namespace-scope
  declarations.  Class members and template specializations are not
@@ -15415,9 +17773,8 @@ function.  The parenthesis around the options is optional.
  @xref{Function Attributes}, for more information about the
  @code{target} attribute and the attribute syntax.
  
-The @code{#pragma GCC target} attribute is not implemented in GCC versions earlier
-than 4.4 for the i386/x86_64 and 4.6 for the PowerPC back ends.  At
-present, it is not implemented for other back ends.
+The @code{#pragma GCC target} pragma is presently implemented for
+i386/x86_64, PowerPC, and Nios II targets only.
  @end table
  
  @table @code
@@ -15464,6 +17821,48 @@ The @samp{#pragma GCC reset_options} pragma is not implemented in GCC
  versions earlier than 4.4.
  @end table
  
+@node Loop-Specific Pragmas
+@subsection Loop-Specific Pragmas
+
+@table @code
+@item #pragma GCC ivdep
+@cindex pragma GCC ivdep
+@end table
+
+With this pragma, the programmer asserts that there are no loop-carried
+dependencies which would prevent that consecutive iterations of
+the following loop can be executed concurrently with SIMD
+(single instruction multiple data) instructions.
+
+For example, the compiler can only unconditionally vectorize the following
+loop with the pragma:
+
+@smallexample
+void foo (int n, int *a, int *b, int *c)
+@{
+  int i, j;
+#pragma GCC ivdep
+  for (i = 0; i < n; ++i)
+    a[i] = b[i] + c[i];
+@}
+@end smallexample
+
+@noindent
+In this example, using the @code{restrict} qualifier had the same
+effect. In the following example, that would not be possible. Assume
+@math{k < -m} or @math{k >= m}. Only with the pragma, the compiler knows
+that it can unconditionally vectorize the following loop:
+
+@smallexample
+void ignore_vec_dep (int *a, int k, int c, int m)
+@{
+#pragma GCC ivdep
+  for (int i = 0; i < m; i++)
+    a[i] = a[i + k] * c;
+@}
+@end smallexample
+
+
  @node Unnamed Fields
  @section Unnamed struct/union fields within structs/unions
  @cindex @code{struct}
@@ -16259,7 +18658,7 @@ syntax to support instantiation of the compiler support data for a
  template class (i.e.@: the vtable) without instantiating any of its
  members (with @code{inline}), and instantiation of only the static data
  members of a template class, without the support data or member
-functions (with (@code{static}):
+functions (with @code{static}):
  
  @smallexample
  extern template int max (int, int);
@@ -16339,6 +18738,10 @@ unimportant.
  A redeclaration of a function or class must not add new ABI tags,
  since doing so would change the mangled name.
  
+The ABI tags apply to a name, so all instantiations and
+specializations of a template have the same tags.  The attribute will
+be ignored if applied to an explicit specialization or instantiation.
+
  The @option{-Wabi-tag} flag enables a warning about a class which does
  not have all the ABI tags used by its subobjects and virtual functions; for users with code
  that needs to coexist with an earlier ABI, using this option can help
@@ -16489,7 +18892,7 @@ namespace std @{
      template <class T> struct A @{ @};
    @}
    using namespace debug __attribute ((__strong__));
-  template <> struct A<int> @{ @};   // @r{ok to specialize}
+  template <> struct A<int> @{ @};   // @r{OK to specialize}
  
    template <class T> void f (A<T>);
  @}
@@ -16767,3 +19170,6 @@ implicitly scoped inside a C language scope.  Also, an empty prototype
  @code{()} is treated as an unspecified number of arguments, rather
  than no arguments, as C++ demands.
  @end table
+
+@c  LocalWords:  emph deftypefn builtin ARCv2EM SIMD builtins msimd
+@c  LocalWords:  typedef v4si v8hi DMA dma vdiwr vdowr followign