[thirdparty/gcc.git] / gcc / config / cris / sync.md

;; GCC machine description for CRIS atomic memory sequences.
;; Copyright (C) 2012
;; Free Software Foundation, Inc.
;;
;; This file is part of GCC.
;;
;; GCC is free software; you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
;; the Free Software Foundation; either version 3, or (at your option)
;; any later version.
;;
;; GCC is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
;; GNU General Public License for more details.
;;
;; You should have received a copy of the GNU General Public License
;; along with GCC; see the file COPYING3.  If not see
;; <http://www.gnu.org/licenses/>.

;; The CRIS atomic support yields code in three flavors, depending on
;; the CPU for which code is generated:
;;
;; - Plain old CRIS v0 (..v8)
;; - CRIS v10 (as used in ETRAX 100 LX)
;; - CRIS v32 (as used in ETRAX FS)
;;
;; The last two alternatives are similar, of LL/SC type.  They may
;; fail for other reasons; an exception, a cache miss or a bus request
;; from other parts of the system.  The difference between them is
;; just in what condition-codes are used to track LL and success or
;; failure for the store.  See the chapter on integral read-write
;; operations, chapter 1.13 in "ETRAX 100LX Programmers Manual",
;; <http://www.axis.com/files/tech_notes/etrax_100lx_prog_man-050519.pdf>
;; and chapter 2.1 in "ETRAX FS Designer's reference",
;; <http://www.axis.com/files/manuals/etrax_fs_des_ref-070821.pdf>.
;; Note that the datum being stored has to be contained fully within a
;; cache-line to be integral.  A failure to store the data integrally
;; will be flagged, but the store may still have happened in part,
;; which translates most usefully into the data having to be
;; "naturally aligned" to work.  Natural alignment is verified in the
;; generated code and will by default cause for unaligned pointers a
;; "break 8" to be executed or optionally a call to abort().  Beware
;; that options -m16bit and -m8bit may cause data to be unaligned
;; where it was otherwise aligned.  Data has a better chance of being
;; aligned if it is declared with e.g. __attribute__ ((__align__ (4))).
;;
;; The "plain old v0..v8 flavor" just assumes there's a single CPU in
;; the system, that no other parts of the system have access to memory
;; used for atomic accesses and since there's no user mode without
;; access to interrupt flags (another assumption), it just turns off
;; interrupts while doing the access.  Here, alignment is neither
;; required nor asserted.

(define_c_enum ""
  [
   CRIS_UNSPEC_ATOMIC_OP
   CRIS_UNSPEC_ATOMIC_SWAP_MEM
   CRIS_UNSPEC_ATOMIC_SWAP_BOOL
  ])

(define_constants [(CRIS_CCR_INTERRUPT_BIT 5)])

;; We use "mult" as a placeholder for "nand" (which does not have a
;; separate binary rtx operation) so we can use an iterator in the
;; define_expand and define_insn and avoid having a separate
;; mostly-identical copy.  You will see the "mult" operator in rtl
;; dumps, but it shouldn't matter as its use has one of its operands
;; inside an unspec_volatile.

(define_code_iterator atomic_op [plus minus ior and xor mult])

(define_code_attr atomic_op_name
 [(plus "add") (minus "sub") (and "and") (ior "or") (xor "xor") (mult "nand")])

;; Pairs of these are used to insert the "not" after the "and" for nand.
(define_code_attr atomic_op_mnem_pre ;; Upper-case only to sinplify testing.
 [(plus "Add.d") (minus "Sub.d") (and "And.d") (ior "Or.d") (xor "Xor")
  (mult "aNd.d")])
(define_code_attr atomic_op_mnem_post_op3
 [(plus "") (minus "") (and "") (ior "") (xor "") (mult "not %3\;")])

(define_expand "atomic_fetch_<atomic_op_name><mode>"
  [(match_operand:BWD 0 "register_operand")
   (match_operand:BWD 1 "memory_operand")
   (match_operand:BWD 2 "register_operand")
   (match_operand 3)
   (atomic_op:BWD (match_dup 0) (match_dup 1))]
  ""
{
  if (<MODE>mode != QImode && TARGET_TRAP_UNALIGNED_ATOMIC)
    cris_emit_trap_for_misalignment (operands[1]);

  expand_mem_thread_fence (INTVAL (operands[3]));
  emit_insn (gen_cris_atomic_fetch_<atomic_op_name><mode>_1 (operands[0],
							     operands[1],
							     operands[2]));
  expand_mem_thread_fence (INTVAL (operands[3]));
  DONE;
})

(define_insn "cris_atomic_fetch_<atomic_op_name><mode>_1"
  [(set (match_operand:BWD 1 "memory_operand" "+Q")
	(atomic_op:BWD
	 (unspec_volatile:BWD [(match_dup 1)] CRIS_UNSPEC_ATOMIC_OP)
	 ;; FIXME: relax this for plus, minus, and, ior.
	 (match_operand:BWD 2 "register_operand" "r")))
   (set (match_operand:BWD 0 "register_operand" "=&r")
	(match_dup 1))
   (clobber (match_scratch:SI 3 "=&r"))]
  ""
{
  /* Can't be too sure; better ICE if this happens.  */
  gcc_assert (!reg_overlap_mentioned_p (operands[2], operands[1]));

  if (TARGET_V32)
    return
      "clearf p\n"
      ".Lsync.%=:\;"
      "move<m> %1,%0\;"
      "move.d %0,%3\;"
      "<atomic_op_mnem_pre> %2,%3\;<atomic_op_mnem_post_op3>"
      "ax\;"
      "move<m> %3,%1\;"
      "bcs .Lsync.%=\;"
      "clearf p";
  else if (cris_cpu_version == 10)
    return
      "clearf\n"
      ".Lsync.%=:\;"
      "move<m> %1,%0\;"
      "move.d %0,%3\;"
      "<atomic_op_mnem_pre> %2,%3\;<atomic_op_mnem_post_op3>"
      "ax\;"
      "move<m> %3,%1\;"
      "bwf .Lsync.%=\;"
      "clearf";
  else
    {
      /* This one is for CRIS versions without load-locked-store-conditional
	 machinery; assume single-core-non-shared-memory without user
	 mode/supervisor mode distinction, and just disable interrupts
	 while performing the operation.
	 Rather than making this pattern more complex by freeing another
	 register or stack position to save condition codes (the value
	 of the interrupt-enabled bit), we check whether interrupts were
	 enabled before we disabled them and branch to a version
	 with/without afterwards re-enabling them.  */
      rtx ops[5];

      /* We have no available macro to stringify CRIS_CCR_INTERRUPT_BIT.  */
      memcpy (ops, operands, sizeof(ops));
      ops[4] = GEN_INT (CRIS_CCR_INTERRUPT_BIT);

      output_asm_insn ("move $ccr,%3\;"
		       "di\;"
		       "move<m> %1,%0\;"
		       "btstq %4,%3",
		       ops);
      return
	"bmi .Lsync.irqon.%=\;"
	"move.d %0,%3\;"

	"<atomic_op_mnem_pre> %2,%3\;<atomic_op_mnem_post_op3>"
	"ba .Lsync.irqoff.%=\;"
	"move<m> %3,%1\n"

	".Lsync.irqon.%=:\;"
	"<atomic_op_mnem_pre> %2,%3\;<atomic_op_mnem_post_op3>"
	"move<m> %3,%1\;"
	"ei\n"
	".Lsync.irqoff.%=:";
    }
})

;; This pattern is more-or-less assumed to always exist if any of the
;; other atomic patterns exist (see e.g.  comment at the
;; can_compare_and_swap_p call in omp-low.c, 4.8 era).  We'd slightly
;; prefer atomic_exchange<mode> over this, but having both would be
;; redundant.
(define_expand "atomic_compare_and_swap<mode>"
  [(match_operand:SI 0 "register_operand")
   (match_operand:BWD 1 "register_operand")
   (match_operand:BWD 2 "memory_operand")
   (match_operand:BWD 3 "general_operand")
   (match_operand:BWD 4 "register_operand")
   (match_operand 5)
   (match_operand 6)
   (match_operand 7)]
  ""
{
  if (<MODE>mode != QImode && TARGET_TRAP_UNALIGNED_ATOMIC)
    cris_emit_trap_for_misalignment (operands[2]);

  expand_mem_thread_fence (INTVAL (operands[6]));
  emit_insn (gen_cris_atomic_compare_and_swap<mode>_1 (operands[0],
						       operands[1],
						       operands[2],
						       operands[3],
						       operands[4]));
  expand_mem_thread_fence (INTVAL (operands[6]));
  DONE;
})

(define_insn "cris_atomic_compare_and_swap<mode>_1"
  [(set (match_operand:SI 0 "register_operand" "=&r")
	(unspec_volatile:SI
	 [(match_operand:BWD 2 "memory_operand" "+Q")
	  (match_operand:BWD 3 "general_operand" "g")]
	 CRIS_UNSPEC_ATOMIC_SWAP_BOOL))
   (set (match_operand:BWD 1 "register_operand" "=&r") (match_dup 2))
   (set (match_dup 2)
	(unspec_volatile:BWD
	 [(match_dup 2)
	  (match_dup 3)
	  (match_operand:BWD 4 "register_operand" "r")]
	 CRIS_UNSPEC_ATOMIC_SWAP_MEM))]
  ""
{
  if (TARGET_V32)
    return
      "clearf p\n"
      ".Lsync.repeat.%=:\;"
      "move<m> %2,%1\;"
      "cmp<m> %3,%1\;"
      "bne .Lsync.after.%=\;"
      "seq %0\;"

      "ax\;"
      "move<m> %4,%2\;"
      "bcs .Lsync.repeat.%=\;"
      "clearf p\n"
      ".Lsync.after.%=:";
  else if (cris_cpu_version == 10)
    return
      "clearf\n"
      ".Lsync.repeat.%=:\;"
      "move<m> %2,%1\;"
      "cmp<m> %3,%1\;"
      "bne .Lsync.after.%=\;"
      "seq %0\;"

      "ax\;"
      "move<m> %4,%2\;"
      "bwf .Lsync.repeat.%=\;"
      "clearf\n"
      ".Lsync.after.%=:";
  else
    {
      /* This one is for CRIS versions without load-locked-store-conditional
	 machinery; assume single-core-non-shared-memory without user
	 mode/supervisor mode distinction, and just disable interrupts
	 while performing the operation.
	 Rather than making this pattern more complex by freeing another
	 register or stack position to save condition codes (the value
	 of the interrupt-enabled bit), we check whether interrupts were
	 enabled before we disabled them and branch to a version
	 with/without afterwards re-enabling them.  */
      rtx ops[4];

      /* We have no available macro to stringify CRIS_CCR_INTERRUPT_BIT.  */
      memcpy (ops, operands, sizeof(ops));
      ops[3] = GEN_INT (CRIS_CCR_INTERRUPT_BIT);

      output_asm_insn ("move $ccr,%0\;"
		       "di\;"
		       "move<m> %2,%1\;"
		       "btstq %3,%0",
		       ops);
      return
	"bmi .Lsync.irqon.%=\;"
	"nop\;"

	"cmp<m> %3,%1\;"
	"bne .Lsync.after.%=\;"
	"seq %0\;"
	"ba .Lsync.after.%=\;"
	"move<m> %4,%2\n"

	".Lsync.irqon.%=:\;"
	"cmp<m> %3,%1\;"
	"bne .Lsync.after.%=\;"
	"seq %0\;"
	"move<m> %4,%2\;"
	"ei\n"
	".Lsync.after.%=:";
    }
})
Commit	Line	Data
21ed4444 HPN	1	;; GCC machine description for CRIS atomic memory sequences.
	2	;; Copyright (C) 2012
	3	;; Free Software Foundation, Inc.
	4	;;
	5	;; This file is part of GCC.
	6	;;
	7	;; GCC is free software; you can redistribute it and/or modify
	8	;; it under the terms of the GNU General Public License as published by
	9	;; the Free Software Foundation; either version 3, or (at your option)
	10	;; any later version.
	11	;;
	12	;; GCC is distributed in the hope that it will be useful,
	13	;; but WITHOUT ANY WARRANTY; without even the implied warranty of
	14	;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	15	;; GNU General Public License for more details.
	16	;;
	17	;; You should have received a copy of the GNU General Public License
	18	;; along with GCC; see the file COPYING3. If not see
	19	;; <http://www.gnu.org/licenses/>.
	20
	21	;; The CRIS atomic support yields code in three flavors, depending on
	22	;; the CPU for which code is generated:
	23	;;
	24	;; - Plain old CRIS v0 (..v8)
	25	;; - CRIS v10 (as used in ETRAX 100 LX)
	26	;; - CRIS v32 (as used in ETRAX FS)
	27	;;
	28	;; The last two alternatives are similar, of LL/SC type. They may
	29	;; fail for other reasons; an exception, a cache miss or a bus request
	30	;; from other parts of the system. The difference between them is
	31	;; just in what condition-codes are used to track LL and success or
	32	;; failure for the store. See the chapter on integral read-write
	33	;; operations, chapter 1.13 in "ETRAX 100LX Programmers Manual",
	34	;; <http://www.axis.com/files/tech_notes/etrax_100lx_prog_man-050519.pdf>
	35	;; and chapter 2.1 in "ETRAX FS Designer's reference",
	36	;; <http://www.axis.com/files/manuals/etrax_fs_des_ref-070821.pdf>.
	37	;; Note that the datum being stored has to be contained fully within a
	38	;; cache-line to be integral. A failure to store the data integrally
	39	;; will be flagged, but the store may still have happened in part,
	40	;; which translates most usefully into the data having to be
	41	;; "naturally aligned" to work. Natural alignment is verified in the
	42	;; generated code and will by default cause for unaligned pointers a
	43	;; "break 8" to be executed or optionally a call to abort(). Beware
	44	;; that options -m16bit and -m8bit may cause data to be unaligned
	45	;; where it was otherwise aligned. Data has a better chance of being
	46	;; aligned if it is declared with e.g. __attribute__ ((__align__ (4))).
	47	;;
	48	;; The "plain old v0..v8 flavor" just assumes there's a single CPU in
	49	;; the system, that no other parts of the system have access to memory
	50	;; used for atomic accesses and since there's no user mode without
	51	;; access to interrupt flags (another assumption), it just turns off
	52	;; interrupts while doing the access. Here, alignment is neither
	53	;; required nor asserted.
	54
	55	(define_c_enum ""
	56	[
	57	CRIS_UNSPEC_ATOMIC_OP
	58	CRIS_UNSPEC_ATOMIC_SWAP_MEM
	59	CRIS_UNSPEC_ATOMIC_SWAP_BOOL
	60	])
	61
	62	(define_constants [(CRIS_CCR_INTERRUPT_BIT 5)])
	63
	64	;; We use "mult" as a placeholder for "nand" (which does not have a
65	;; separate binary rtx operation) so we can use an iterator in the
66	;; define_expand and define_insn and avoid having a separate
67	;; mostly-identical copy. You will see the "mult" operator in rtl
68	;; dumps, but it shouldn't matter as its use has one of its operands
69	;; inside an unspec_volatile.
70
71	(define_code_iterator atomic_op [plus minus ior and xor mult])
72
73	(define_code_attr atomic_op_name
74	[(plus "add") (minus "sub") (and "and") (ior "or") (xor "xor") (mult "nand")])
75
76	;; Pairs of these are used to insert the "not" after the "and" for nand.
77	(define_code_attr atomic_op_mnem_pre ;; Upper-case only to sinplify testing.
78	[(plus "Add.d") (minus "Sub.d") (and "And.d") (ior "Or.d") (xor "Xor")
79	(mult "aNd.d")])
80	(define_code_attr atomic_op_mnem_post_op3
81	[(plus "") (minus "") (and "") (ior "") (xor "") (mult "not %3\;")])
82
83	(define_expand "atomic_fetch_<atomic_op_name><mode>"
84	[(match_operand:BWD 0 "register_operand")
85	(match_operand:BWD 1 "memory_operand")
86	(match_operand:BWD 2 "register_operand")
87	(match_operand 3)
88	(atomic_op:BWD (match_dup 0) (match_dup 1))]
89	""
90	{
91	if (<MODE>mode != QImode && TARGET_TRAP_UNALIGNED_ATOMIC)
92	cris_emit_trap_for_misalignment (operands[1]);
93
94	expand_mem_thread_fence (INTVAL (operands[3]));
95	emit_insn (gen_cris_atomic_fetch_<atomic_op_name><mode>_1 (operands[0],
96	operands[1],
97	operands[2]));
98	expand_mem_thread_fence (INTVAL (operands[3]));
99	DONE;
100	})
101
102	(define_insn "cris_atomic_fetch_<atomic_op_name><mode>_1"
103	[(set (match_operand:BWD 1 "memory_operand" "+Q")
104	(atomic_op:BWD
105	(unspec_volatile:BWD [(match_dup 1)] CRIS_UNSPEC_ATOMIC_OP)
106	;; FIXME: relax this for plus, minus, and, ior.
107	(match_operand:BWD 2 "register_operand" "r")))
108	(set (match_operand:BWD 0 "register_operand" "=&r")
109	(match_dup 1))
110	(clobber (match_scratch:SI 3 "=&r"))]
111	""
112	{
113	/* Can't be too sure; better ICE if this happens. */
114	gcc_assert (!reg_overlap_mentioned_p (operands[2], operands[1]));
115
116	if (TARGET_V32)
117	return
118	"clearf p\n"
119	".Lsync.%=:\;"
120	"move<m> %1,%0\;"
121	"move.d %0,%3\;"
122	"<atomic_op_mnem_pre> %2,%3\;<atomic_op_mnem_post_op3>"
123	"ax\;"
124	"move<m> %3,%1\;"
125	"bcs .Lsync.%=\;"
126	"clearf p";
127	else if (cris_cpu_version == 10)
128	return
129	"clearf\n"
130	".Lsync.%=:\;"
131	"move<m> %1,%0\;"
132	"move.d %0,%3\;"
133	"<atomic_op_mnem_pre> %2,%3\;<atomic_op_mnem_post_op3>"
134	"ax\;"
135	"move<m> %3,%1\;"
136	"bwf .Lsync.%=\;"
137	"clearf";
138	else
139	{
140	/* This one is for CRIS versions without load-locked-store-conditional
141	machinery; assume single-core-non-shared-memory without user
142	mode/supervisor mode distinction, and just disable interrupts
143	while performing the operation.
144	Rather than making this pattern more complex by freeing another
145	register or stack position to save condition codes (the value
146	of the interrupt-enabled bit), we check whether interrupts were
147	enabled before we disabled them and branch to a version
148	with/without afterwards re-enabling them. */
149	rtx ops[5];
150
151	/* We have no available macro to stringify CRIS_CCR_INTERRUPT_BIT. */
152	memcpy (ops, operands, sizeof(ops));
153	ops[4] = GEN_INT (CRIS_CCR_INTERRUPT_BIT);
154
155	output_asm_insn ("move $ccr,%3\;"
156	"di\;"
157	"move<m> %1,%0\;"
158	"btstq %4,%3",
159	ops);
160	return
161	"bmi .Lsync.irqon.%=\;"
162	"move.d %0,%3\;"
163
164	"<atomic_op_mnem_pre> %2,%3\;<atomic_op_mnem_post_op3>"
165	"ba .Lsync.irqoff.%=\;"
166	"move<m> %3,%1\n"
167
168	".Lsync.irqon.%=:\;"
169	"<atomic_op_mnem_pre> %2,%3\;<atomic_op_mnem_post_op3>"
170	"move<m> %3,%1\;"
171	"ei\n"
172	".Lsync.irqoff.%=:";
173	}
174	})
175
176	;; This pattern is more-or-less assumed to always exist if any of the
177	;; other atomic patterns exist (see e.g. comment at the
178	;; can_compare_and_swap_p call in omp-low.c, 4.8 era). We'd slightly
179	;; prefer atomic_exchange<mode> over this, but having both would be
180	;; redundant.
181	(define_expand "atomic_compare_and_swap<mode>"
182	[(match_operand:SI 0 "register_operand")
183	(match_operand:BWD 1 "register_operand")
184	(match_operand:BWD 2 "memory_operand")
185	(match_operand:BWD 3 "general_operand")
186	(match_operand:BWD 4 "register_operand")
187	(match_operand 5)
188	(match_operand 6)
189	(match_operand 7)]
190	""
191	{
192	if (<MODE>mode != QImode && TARGET_TRAP_UNALIGNED_ATOMIC)
193	cris_emit_trap_for_misalignment (operands[2]);
194
195	expand_mem_thread_fence (INTVAL (operands[6]));
196	emit_insn (gen_cris_atomic_compare_and_swap<mode>_1 (operands[0],
197	operands[1],
198	operands[2],
199	operands[3],
200	operands[4]));
201	expand_mem_thread_fence (INTVAL (operands[6]));
202	DONE;
203	})
204
205	(define_insn "cris_atomic_compare_and_swap<mode>_1"
206	[(set (match_operand:SI 0 "register_operand" "=&r")
207	(unspec_volatile:SI
208	[(match_operand:BWD 2 "memory_operand" "+Q")
209	(match_operand:BWD 3 "general_operand" "g")]
210	CRIS_UNSPEC_ATOMIC_SWAP_BOOL))
211	(set (match_operand:BWD 1 "register_operand" "=&r") (match_dup 2))
212	(set (match_dup 2)
213	(unspec_volatile:BWD
214	[(match_dup 2)
215	(match_dup 3)
216	(match_operand:BWD 4 "register_operand" "r")]
217	CRIS_UNSPEC_ATOMIC_SWAP_MEM))]
218	""
219	{
220	if (TARGET_V32)
221	return
222	"clearf p\n"
223	".Lsync.repeat.%=:\;"
224	"move<m> %2,%1\;"
225	"cmp<m> %3,%1\;"
226	"bne .Lsync.after.%=\;"
227	"seq %0\;"
228
229	"ax\;"
230	"move<m> %4,%2\;"
231	"bcs .Lsync.repeat.%=\;"
232	"clearf p\n"
233	".Lsync.after.%=:";
234	else if (cris_cpu_version == 10)
235	return
236	"clearf\n"
237	".Lsync.repeat.%=:\;"
238	"move<m> %2,%1\;"
239	"cmp<m> %3,%1\;"
240	"bne .Lsync.after.%=\;"
241	"seq %0\;"
242
243	"ax\;"
244	"move<m> %4,%2\;"
245	"bwf .Lsync.repeat.%=\;"
246	"clearf\n"
247	".Lsync.after.%=:";
248	else
249	{
250	/* This one is for CRIS versions without load-locked-store-conditional
251	machinery; assume single-core-non-shared-memory without user
252	mode/supervisor mode distinction, and just disable interrupts
253	while performing the operation.
254	Rather than making this pattern more complex by freeing another
255	register or stack position to save condition codes (the value
256	of the interrupt-enabled bit), we check whether interrupts were
257	enabled before we disabled them and branch to a version
258	with/without afterwards re-enabling them. */
259	rtx ops[4];
260
261	/* We have no available macro to stringify CRIS_CCR_INTERRUPT_BIT. */
262	memcpy (ops, operands, sizeof(ops));
263	ops[3] = GEN_INT (CRIS_CCR_INTERRUPT_BIT);
264
265	output_asm_insn ("move $ccr,%0\;"
266	"di\;"
267	"move<m> %2,%1\;"
268	"btstq %3,%0",
269	ops);
270	return
271	"bmi .Lsync.irqon.%=\;"
272	"nop\;"
273
274	"cmp<m> %3,%1\;"
275	"bne .Lsync.after.%=\;"
276	"seq %0\;"
277	"ba .Lsync.after.%=\;"
278	"move<m> %4,%2\n"
279
280	".Lsync.irqon.%=:\;"
281	"cmp<m> %3,%1\;"
282	"bne .Lsync.after.%=\;"
283	"seq %0\;"
284	"move<m> %4,%2\;"
285	"ei\n"
286	".Lsync.after.%=:";
287	}
288	})