]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/c-family/c-indentation.c
re PR target/70059 (Invalid codegen on AVX-512 when using _mm512_inserti64x4(x, y...
[thirdparty/gcc.git] / gcc / c-family / c-indentation.c
CommitLineData
c3388e62 1/* Implementation of -Wmisleading-indentation
818ab71a 2 Copyright (C) 2015-2016 Free Software Foundation, Inc.
c3388e62
DM
3
4This file is part of GCC.
5
6GCC is free software; you can redistribute it and/or modify it under
7the terms of the GNU General Public License as published by the Free
8Software Foundation; either version 3, or (at your option) any later
9version.
10
11GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12WARRANTY; without even the implied warranty of MERCHANTABILITY or
13FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14for more details.
15
16You should have received a copy of the GNU General Public License
17along with GCC; see the file COPYING3. If not see
18<http://www.gnu.org/licenses/>. */
19
20#include "config.h"
21#include "system.h"
22#include "coretypes.h"
23#include "tm.h"
2adfab87 24#include "c-common.h"
992118a1 25#include "c-indentation.h"
c3388e62
DM
26
27extern cpp_options *cpp_opts;
28
29/* Convert libcpp's notion of a column (a 1-based char count) to
30 the "visual column" (0-based column, respecting tabs), by reading the
31 relevant line.
1a1e101f 32
c3388e62 33 Returns true if a conversion was possible, writing the result to OUT,
1a1e101f
PP
34 otherwise returns false. If FIRST_NWS is not NULL, then write to it
35 the visual column corresponding to the first non-whitespace character
36 on the line. */
c3388e62
DM
37
38static bool
c7df95d8 39get_visual_column (expanded_location exploc, location_t loc,
1a1e101f 40 unsigned int *out,
c589e975 41 unsigned int *first_nws)
c3388e62 42{
c7df95d8
DM
43 /* PR c++/68819: if the column number is zero, we presumably
44 had a location_t > LINE_MAP_MAX_LOCATION_WITH_COLS, and so
45 we have no column information.
46 Act as if no conversion was possible, triggering the
47 error-handling path in the caller. */
48 if (!exploc.column)
49 {
50 static bool issued_note = false;
51 if (!issued_note)
52 {
53 /* Notify the user the first time this happens. */
54 issued_note = true;
55 inform (loc,
56 "-Wmisleading-indentation is disabled from this point"
57 " onwards, since column-tracking was disabled due to"
58 " the size of the code/headers");
59 }
60 return false;
61 }
62
c3388e62 63 int line_len;
31bdd08a
DM
64 const char *line = location_get_source_line (exploc.file, exploc.line,
65 &line_len);
c3388e62
DM
66 if (!line)
67 return false;
68 unsigned int vis_column = 0;
69 for (int i = 1; i < exploc.column; i++)
70 {
71 unsigned char ch = line[i - 1];
1a1e101f
PP
72
73 if (first_nws != NULL && !ISSPACE (ch))
74 {
75 *first_nws = vis_column;
76 first_nws = NULL;
77 }
78
c3388e62
DM
79 if (ch == '\t')
80 {
81 /* Round up to nearest tab stop. */
82 const unsigned int tab_width = cpp_opts->tabstop;
83 vis_column = ((vis_column + tab_width) / tab_width) * tab_width;
84 }
85 else
86 vis_column++;
87 }
88
1a1e101f
PP
89 if (first_nws != NULL)
90 *first_nws = vis_column;
c3388e62 91
1a1e101f 92 *out = vis_column;
c3388e62
DM
93 return true;
94}
95
96/* Does the given source line appear to contain a #if directive?
97 (or #ifdef/#ifndef). Ignore the possibility of it being inside a
98 comment, for simplicity.
99 Helper function for detect_preprocessor_logic. */
100
101static bool
102line_contains_hash_if (const char *file, int line_num)
103{
c3388e62 104 int line_len;
31bdd08a 105 const char *line = location_get_source_line (file, line_num, &line_len);
c3388e62
DM
106 if (!line)
107 return false;
108
109 int idx;
110
111 /* Skip leading whitespace. */
112 for (idx = 0; idx < line_len; idx++)
113 if (!ISSPACE (line[idx]))
114 break;
115 if (idx == line_len)
116 return false;
117
118 /* Require a '#' character. */
119 if (line[idx] != '#')
120 return false;
121 idx++;
122
123 /* Skip whitespace. */
124 while (idx < line_len)
125 {
126 if (!ISSPACE (line[idx]))
127 break;
128 idx++;
129 }
130
131 /* Match #if/#ifdef/#ifndef. */
132 if (idx + 2 <= line_len)
133 if (line[idx] == 'i')
134 if (line[idx + 1] == 'f')
135 return true;
136
137 return false;
138}
139
140
141/* Determine if there is preprocessor logic between
142 BODY_EXPLOC and NEXT_STMT_EXPLOC, to ensure that we don't
143 issue a warning for cases like this:
144
145 if (flagA)
146 foo ();
147 ^ BODY_EXPLOC
148 #if SOME_CONDITION_THAT_DOES_NOT_HOLD
149 if (flagB)
150 #endif
151 bar ();
152 ^ NEXT_STMT_EXPLOC
153
154 despite "bar ();" being visually aligned below "foo ();" and
155 being (as far as the parser sees) the next token.
156
157 Return true if such logic is detected. */
158
159static bool
160detect_preprocessor_logic (expanded_location body_exploc,
161 expanded_location next_stmt_exploc)
162{
163 gcc_assert (next_stmt_exploc.file == body_exploc.file);
164 gcc_assert (next_stmt_exploc.line > body_exploc.line);
165
166 if (next_stmt_exploc.line - body_exploc.line < 4)
167 return false;
168
169 /* Is there a #if/#ifdef/#ifndef directive somewhere in the lines
170 between the given locations?
171
172 This is something of a layering violation, but by necessity,
173 given the nature of what we're testing for. For example,
174 in theory we could be fooled by a #if within a comment, but
175 it's unlikely to matter. */
176 for (int line = body_exploc.line + 1; line < next_stmt_exploc.line; line++)
177 if (line_contains_hash_if (body_exploc.file, line))
178 return true;
179
180 /* Not found. */
181 return false;
182}
183
184
185/* Helper function for warn_for_misleading_indentation; see
186 description of that function below. */
187
188static bool
992118a1
PP
189should_warn_for_misleading_indentation (const token_indent_info &guard_tinfo,
190 const token_indent_info &body_tinfo,
191 const token_indent_info &next_tinfo)
c3388e62 192{
992118a1
PP
193 location_t guard_loc = guard_tinfo.location;
194 location_t body_loc = body_tinfo.location;
195 location_t next_stmt_loc = next_tinfo.location;
196
8ebca419 197 enum cpp_ttype body_type = body_tinfo.type;
992118a1
PP
198 enum cpp_ttype next_tok_type = next_tinfo.type;
199
c3388e62
DM
200 /* Don't attempt to compare the indentation of BODY_LOC and NEXT_STMT_LOC
201 if either are within macros. */
202 if (linemap_location_from_macro_expansion_p (line_table, body_loc)
203 || linemap_location_from_macro_expansion_p (line_table, next_stmt_loc))
204 return false;
205
206 /* Don't attempt to compare indentation if #line or # 44 "file"-style
207 directives are present, suggesting generated code.
208
209 All bets are off if these are present: the file that the #line
210 directive could have an entirely different coding layout to C/C++
211 (e.g. .md files).
212
213 To determine if a #line is present, in theory we could look for a
214 map with reason == LC_RENAME_VERBATIM. However, if there has
215 subsequently been a long line requiring a column number larger than
216 that representable by the original LC_RENAME_VERBATIM map, then
217 we'll have a map with reason LC_RENAME.
218 Rather than attempting to search all of the maps for a
219 LC_RENAME_VERBATIM, instead we have libcpp set a flag whenever one
220 is seen, and we check for the flag here.
221 */
222 if (line_table->seen_line_directive)
223 return false;
224
21efdd80
PP
225 /* We can't usefully warn about do-while statements since the bodies of these
226 statements are always explicitly delimited at both ends, so control flow is
227 quite obvious. */
228 if (guard_tinfo.keyword == RID_DO)
229 return false;
230
992118a1
PP
231 /* If the token following the body is a close brace or an "else"
232 then while indentation may be sloppy, there is not much ambiguity
233 about control flow, e.g.
234
235 if (foo) <- GUARD
236 bar (); <- BODY
237 else baz (); <- NEXT
238
239 {
240 while (foo) <- GUARD
241 bar (); <- BODY
242 } <- NEXT
243 baz ();
244 */
245 if (next_tok_type == CPP_CLOSE_BRACE
246 || next_tinfo.keyword == RID_ELSE)
c3388e62
DM
247 return false;
248
8ebca419
PP
249 /* Likewise, if the body of the guard is a compound statement then control
250 flow is quite visually explicit regardless of the code's possibly poor
251 indentation, e.g.
252
253 while (foo) <- GUARD
254 { <- BODY
255 bar ();
256 }
257 baz (); <- NEXT
258
259 Things only get muddy when the body of the guard does not have
260 braces, e.g.
261
262 if (foo) <- GUARD
263 bar (); <- BODY
264 baz (); <- NEXT
265 */
266 if (body_type == CPP_OPEN_BRACE)
267 return false;
268
c3388e62
DM
269 /* Don't warn here about spurious semicolons. */
270 if (next_tok_type == CPP_SEMICOLON)
271 return false;
272
6ac48155
DM
273 expanded_location body_exploc = expand_location (body_loc);
274 expanded_location next_stmt_exploc = expand_location (next_stmt_loc);
8ebca419 275 expanded_location guard_exploc = expand_location (guard_loc);
c3388e62
DM
276
277 /* They must be in the same file. */
278 if (next_stmt_exploc.file != body_exploc.file)
279 return false;
280
281 /* If NEXT_STMT_LOC and BODY_LOC are on the same line, consider
282 the location of the guard.
283
284 Cases where we want to issue a warning:
285
286 if (flag)
287 foo (); bar ();
288 ^ WARN HERE
289
290 if (flag) foo (); bar ();
291 ^ WARN HERE
292
8ebca419
PP
293
294 if (flag) ; {
295 ^ WARN HERE
296
297 if (flag)
298 ; {
299 ^ WARN HERE
300
c3388e62
DM
301 Cases where we don't want to issue a warning:
302
303 various_code (); if (flag) foo (); bar (); more_code ();
304 ^ DON'T WARN HERE. */
305 if (next_stmt_exploc.line == body_exploc.line)
306 {
c3388e62
DM
307 if (guard_exploc.file != body_exploc.file)
308 return true;
309 if (guard_exploc.line < body_exploc.line)
310 /* The guard is on a line before a line that contains both
311 the body and the next stmt. */
312 return true;
313 else if (guard_exploc.line == body_exploc.line)
314 {
315 /* They're all on the same line. */
316 gcc_assert (guard_exploc.file == next_stmt_exploc.file);
317 gcc_assert (guard_exploc.line == next_stmt_exploc.line);
1a1e101f
PP
318 unsigned int guard_vis_column;
319 unsigned int guard_line_first_nws;
c7df95d8 320 if (!get_visual_column (guard_exploc, guard_loc,
1a1e101f
PP
321 &guard_vis_column,
322 &guard_line_first_nws))
323 return false;
c3388e62
DM
324 /* Heuristic: only warn if the guard is the first thing
325 on its line. */
1a1e101f 326 if (guard_vis_column == guard_line_first_nws)
c3388e62
DM
327 return true;
328 }
329 }
330
331 /* If NEXT_STMT_LOC is on a line after BODY_LOC, consider
332 their relative locations, and of the guard.
333
334 Cases where we want to issue a warning:
335 if (flag)
336 foo ();
337 bar ();
338 ^ WARN HERE
339
340 Cases where we don't want to issue a warning:
341 if (flag)
342 foo ();
343 bar ();
344 ^ DON'T WARN HERE (autogenerated code?)
345
346 if (flagA)
347 foo ();
348 #if SOME_CONDITION_THAT_DOES_NOT_HOLD
349 if (flagB)
350 #endif
351 bar ();
352 ^ DON'T WARN HERE
6ac48155 353
8ebca419
PP
354 if (flag)
355 ;
356 foo ();
357 ^ DON'T WARN HERE
c589e975
DM
358
359 #define emit
360 if (flag)
361 foo ();
362 emit bar ();
363 ^ DON'T WARN HERE
364
c3388e62
DM
365 */
366 if (next_stmt_exploc.line > body_exploc.line)
367 {
368 /* Determine if GUARD_LOC and NEXT_STMT_LOC are aligned on the same
369 "visual column"... */
370 unsigned int next_stmt_vis_column;
c589e975 371 unsigned int next_stmt_line_first_nws;
c3388e62 372 unsigned int body_vis_column;
8ebca419 373 unsigned int body_line_first_nws;
d5398058
PP
374 unsigned int guard_vis_column;
375 unsigned int guard_line_first_nws;
c3388e62
DM
376 /* If we can't determine it, don't issue a warning. This is sometimes
377 the case for input files containing #line directives, and these
378 are often for autogenerated sources (e.g. from .md files), where
379 it's not clear that it's meaningful to look at indentation. */
c7df95d8
DM
380 if (!get_visual_column (next_stmt_exploc, next_stmt_loc,
381 &next_stmt_vis_column,
c589e975 382 &next_stmt_line_first_nws))
c3388e62 383 return false;
c7df95d8 384 if (!get_visual_column (body_exploc, body_loc,
8ebca419
PP
385 &body_vis_column,
386 &body_line_first_nws))
c3388e62 387 return false;
c7df95d8 388 if (!get_visual_column (guard_exploc, guard_loc,
d5398058
PP
389 &guard_vis_column,
390 &guard_line_first_nws))
391 return false;
392
c589e975
DM
393 /* If the line where the next stmt starts has non-whitespace
394 on it before the stmt, then don't warn:
395 #define emit
396 if (flag)
397 foo ();
398 emit bar ();
399 ^ DON'T WARN HERE
400 (PR c/69122). */
401 if (next_stmt_line_first_nws < next_stmt_vis_column)
402 return false;
403
8ebca419
PP
404 if ((body_type != CPP_SEMICOLON
405 && next_stmt_vis_column == body_vis_column)
406 /* As a special case handle the case where the body is a semicolon
407 that may be hidden by a preceding comment, e.g. */
408
409 // if (p)
410 // /* blah */;
411 // foo (1);
412
413 /* by looking instead at the column of the first non-whitespace
414 character on the body line. */
415 || (body_type == CPP_SEMICOLON
416 && body_exploc.line > guard_exploc.line
417 && body_line_first_nws != body_vis_column
d5398058 418 && next_stmt_vis_column > guard_line_first_nws))
c3388e62 419 {
8ebca419
PP
420 /* Don't warn if they are aligned on the same column
421 as the guard itself (suggesting autogenerated code that doesn't
422 bother indenting at all). We consider the column of the first
423 non-whitespace character on the guard line instead of the column
424 of the actual guard token itself because it is more sensible.
425 Consider:
426
427 if (p) {
428 foo (1);
429 } else // GUARD
430 foo (2); // BODY
431 foo (3); // NEXT
432
433 and:
434
435 if (p)
436 foo (1);
437 } else // GUARD
438 foo (2); // BODY
439 foo (3); // NEXT
440
441 If we just used the column of the guard token, we would warn on
442 the first example and not warn on the second. But we want the
443 exact opposite to happen: to not warn on the first example (which
444 is probably autogenerated) and to warn on the second (whose
445 indentation is misleading). Using the column of the first
446 non-whitespace character on the guard line makes that
447 happen. */
8ebca419 448 if (guard_line_first_nws == body_vis_column)
c3388e62
DM
449 return false;
450
8ebca419
PP
451 /* We may have something like:
452
453 if (p)
454 {
455 foo (1);
456 } else // GUARD
457 foo (2); // BODY
458 foo (3); // NEXT
459
460 in which case the columns are not aligned but the code is not
461 misleadingly indented. If the column of the body is less than
462 that of the guard line then don't warn. */
463 if (body_vis_column < guard_line_first_nws)
6ac48155
DM
464 return false;
465
c3388e62
DM
466 /* Don't warn if there is multiline preprocessor logic between
467 the two statements. */
468 if (detect_preprocessor_logic (body_exploc, next_stmt_exploc))
469 return false;
470
471 /* Otherwise, they are visually aligned: issue a warning. */
472 return true;
473 }
8ebca419
PP
474
475 /* Also issue a warning for code having the form:
476
477 if (flag);
478 foo ();
479
480 while (flag);
481 {
482 ...
483 }
484
485 for (...);
486 {
487 ...
488 }
489
490 if (flag)
491 ;
492 else if (flag);
493 foo ();
494
495 where the semicolon at the end of each guard is most likely spurious.
496
497 But do not warn on:
498
499 for (..);
500 foo ();
501
502 where the next statement is aligned with the guard.
503 */
504 if (body_type == CPP_SEMICOLON)
505 {
506 if (body_exploc.line == guard_exploc.line)
507 {
8ebca419
PP
508 if (next_stmt_vis_column > guard_line_first_nws
509 || (next_tok_type == CPP_OPEN_BRACE
6b95d7cc 510 && next_stmt_vis_column == guard_line_first_nws))
8ebca419
PP
511 return true;
512 }
513 }
c3388e62
DM
514 }
515
516 return false;
517}
518
992118a1
PP
519/* Return the string identifier corresponding to the given guard token. */
520
521static const char *
522guard_tinfo_to_string (const token_indent_info &guard_tinfo)
523{
524 switch (guard_tinfo.keyword)
525 {
526 case RID_FOR:
527 return "for";
528 case RID_ELSE:
529 return "else";
530 case RID_IF:
531 return "if";
532 case RID_WHILE:
533 return "while";
534 case RID_DO:
535 return "do";
536 default:
537 gcc_unreachable ();
538 }
539}
540
c3388e62
DM
541/* Called by the C/C++ frontends when we have a guarding statement at
542 GUARD_LOC containing a statement at BODY_LOC, where the block wasn't
543 written using braces, like this:
544
545 if (flag)
546 foo ();
547
548 along with the location of the next token, at NEXT_STMT_LOC,
549 so that we can detect followup statements that are within
550 the same "visual block" as the guarded statement, but which
551 aren't logically grouped within the guarding statement, such
552 as:
553
554 GUARD_LOC
555 |
556 V
557 if (flag)
558 foo (); <- BODY_LOC
559 bar (); <- NEXT_STMT_LOC
560
561 In the above, "bar ();" isn't guarded by the "if", but
562 is indented to misleadingly suggest that it is in the same
563 block as "foo ();".
564
565 GUARD_KIND identifies the kind of clause e.g. "if", "else" etc. */
566
567void
992118a1
PP
568warn_for_misleading_indentation (const token_indent_info &guard_tinfo,
569 const token_indent_info &body_tinfo,
570 const token_indent_info &next_tinfo)
c3388e62 571{
773ce42e
DM
572 /* Early reject for the case where -Wmisleading-indentation is disabled,
573 to avoid doing work only to have the warning suppressed inside the
574 diagnostic machinery. */
575 if (!warn_misleading_indentation)
576 return;
577
992118a1
PP
578 if (should_warn_for_misleading_indentation (guard_tinfo,
579 body_tinfo,
580 next_tinfo))
581 {
582 if (warning_at (next_tinfo.location, OPT_Wmisleading_indentation,
583 "statement is indented as if it were guarded by..."))
584 inform (guard_tinfo.location,
585 "...this %qs clause, but it is not",
586 guard_tinfo_to_string (guard_tinfo));
587 }
c3388e62 588}