]>
Commit | Line | Data |
---|---|---|
c3388e62 | 1 | /* Implementation of -Wmisleading-indentation |
818ab71a | 2 | Copyright (C) 2015-2016 Free Software Foundation, Inc. |
c3388e62 DM |
3 | |
4 | This file is part of GCC. | |
5 | ||
6 | GCC is free software; you can redistribute it and/or modify it under | |
7 | the terms of the GNU General Public License as published by the Free | |
8 | Software Foundation; either version 3, or (at your option) any later | |
9 | version. | |
10 | ||
11 | GCC is distributed in the hope that it will be useful, but WITHOUT ANY | |
12 | WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
13 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
14 | for more details. | |
15 | ||
16 | You should have received a copy of the GNU General Public License | |
17 | along with GCC; see the file COPYING3. If not see | |
18 | <http://www.gnu.org/licenses/>. */ | |
19 | ||
20 | #include "config.h" | |
21 | #include "system.h" | |
22 | #include "coretypes.h" | |
23 | #include "tm.h" | |
2adfab87 | 24 | #include "c-common.h" |
992118a1 | 25 | #include "c-indentation.h" |
c3388e62 DM |
26 | |
27 | extern cpp_options *cpp_opts; | |
28 | ||
29 | /* Convert libcpp's notion of a column (a 1-based char count) to | |
30 | the "visual column" (0-based column, respecting tabs), by reading the | |
31 | relevant line. | |
1a1e101f | 32 | |
c3388e62 | 33 | Returns true if a conversion was possible, writing the result to OUT, |
1a1e101f PP |
34 | otherwise returns false. If FIRST_NWS is not NULL, then write to it |
35 | the visual column corresponding to the first non-whitespace character | |
36 | on the line. */ | |
c3388e62 DM |
37 | |
38 | static bool | |
c7df95d8 | 39 | get_visual_column (expanded_location exploc, location_t loc, |
1a1e101f | 40 | unsigned int *out, |
c589e975 | 41 | unsigned int *first_nws) |
c3388e62 | 42 | { |
c7df95d8 DM |
43 | /* PR c++/68819: if the column number is zero, we presumably |
44 | had a location_t > LINE_MAP_MAX_LOCATION_WITH_COLS, and so | |
45 | we have no column information. | |
46 | Act as if no conversion was possible, triggering the | |
47 | error-handling path in the caller. */ | |
48 | if (!exploc.column) | |
49 | { | |
50 | static bool issued_note = false; | |
51 | if (!issued_note) | |
52 | { | |
53 | /* Notify the user the first time this happens. */ | |
54 | issued_note = true; | |
55 | inform (loc, | |
56 | "-Wmisleading-indentation is disabled from this point" | |
57 | " onwards, since column-tracking was disabled due to" | |
58 | " the size of the code/headers"); | |
59 | } | |
60 | return false; | |
61 | } | |
62 | ||
c3388e62 | 63 | int line_len; |
31bdd08a DM |
64 | const char *line = location_get_source_line (exploc.file, exploc.line, |
65 | &line_len); | |
c3388e62 DM |
66 | if (!line) |
67 | return false; | |
68 | unsigned int vis_column = 0; | |
69 | for (int i = 1; i < exploc.column; i++) | |
70 | { | |
71 | unsigned char ch = line[i - 1]; | |
1a1e101f PP |
72 | |
73 | if (first_nws != NULL && !ISSPACE (ch)) | |
74 | { | |
75 | *first_nws = vis_column; | |
76 | first_nws = NULL; | |
77 | } | |
78 | ||
c3388e62 DM |
79 | if (ch == '\t') |
80 | { | |
81 | /* Round up to nearest tab stop. */ | |
82 | const unsigned int tab_width = cpp_opts->tabstop; | |
83 | vis_column = ((vis_column + tab_width) / tab_width) * tab_width; | |
84 | } | |
85 | else | |
86 | vis_column++; | |
87 | } | |
88 | ||
1a1e101f PP |
89 | if (first_nws != NULL) |
90 | *first_nws = vis_column; | |
c3388e62 | 91 | |
1a1e101f | 92 | *out = vis_column; |
c3388e62 DM |
93 | return true; |
94 | } | |
95 | ||
96 | /* Does the given source line appear to contain a #if directive? | |
97 | (or #ifdef/#ifndef). Ignore the possibility of it being inside a | |
98 | comment, for simplicity. | |
99 | Helper function for detect_preprocessor_logic. */ | |
100 | ||
101 | static bool | |
102 | line_contains_hash_if (const char *file, int line_num) | |
103 | { | |
c3388e62 | 104 | int line_len; |
31bdd08a | 105 | const char *line = location_get_source_line (file, line_num, &line_len); |
c3388e62 DM |
106 | if (!line) |
107 | return false; | |
108 | ||
109 | int idx; | |
110 | ||
111 | /* Skip leading whitespace. */ | |
112 | for (idx = 0; idx < line_len; idx++) | |
113 | if (!ISSPACE (line[idx])) | |
114 | break; | |
115 | if (idx == line_len) | |
116 | return false; | |
117 | ||
118 | /* Require a '#' character. */ | |
119 | if (line[idx] != '#') | |
120 | return false; | |
121 | idx++; | |
122 | ||
123 | /* Skip whitespace. */ | |
124 | while (idx < line_len) | |
125 | { | |
126 | if (!ISSPACE (line[idx])) | |
127 | break; | |
128 | idx++; | |
129 | } | |
130 | ||
131 | /* Match #if/#ifdef/#ifndef. */ | |
132 | if (idx + 2 <= line_len) | |
133 | if (line[idx] == 'i') | |
134 | if (line[idx + 1] == 'f') | |
135 | return true; | |
136 | ||
137 | return false; | |
138 | } | |
139 | ||
140 | ||
141 | /* Determine if there is preprocessor logic between | |
142 | BODY_EXPLOC and NEXT_STMT_EXPLOC, to ensure that we don't | |
143 | issue a warning for cases like this: | |
144 | ||
145 | if (flagA) | |
146 | foo (); | |
147 | ^ BODY_EXPLOC | |
148 | #if SOME_CONDITION_THAT_DOES_NOT_HOLD | |
149 | if (flagB) | |
150 | #endif | |
151 | bar (); | |
152 | ^ NEXT_STMT_EXPLOC | |
153 | ||
154 | despite "bar ();" being visually aligned below "foo ();" and | |
155 | being (as far as the parser sees) the next token. | |
156 | ||
157 | Return true if such logic is detected. */ | |
158 | ||
159 | static bool | |
160 | detect_preprocessor_logic (expanded_location body_exploc, | |
161 | expanded_location next_stmt_exploc) | |
162 | { | |
163 | gcc_assert (next_stmt_exploc.file == body_exploc.file); | |
164 | gcc_assert (next_stmt_exploc.line > body_exploc.line); | |
165 | ||
166 | if (next_stmt_exploc.line - body_exploc.line < 4) | |
167 | return false; | |
168 | ||
169 | /* Is there a #if/#ifdef/#ifndef directive somewhere in the lines | |
170 | between the given locations? | |
171 | ||
172 | This is something of a layering violation, but by necessity, | |
173 | given the nature of what we're testing for. For example, | |
174 | in theory we could be fooled by a #if within a comment, but | |
175 | it's unlikely to matter. */ | |
176 | for (int line = body_exploc.line + 1; line < next_stmt_exploc.line; line++) | |
177 | if (line_contains_hash_if (body_exploc.file, line)) | |
178 | return true; | |
179 | ||
180 | /* Not found. */ | |
181 | return false; | |
182 | } | |
183 | ||
184 | ||
185 | /* Helper function for warn_for_misleading_indentation; see | |
186 | description of that function below. */ | |
187 | ||
188 | static bool | |
992118a1 PP |
189 | should_warn_for_misleading_indentation (const token_indent_info &guard_tinfo, |
190 | const token_indent_info &body_tinfo, | |
191 | const token_indent_info &next_tinfo) | |
c3388e62 | 192 | { |
992118a1 PP |
193 | location_t guard_loc = guard_tinfo.location; |
194 | location_t body_loc = body_tinfo.location; | |
195 | location_t next_stmt_loc = next_tinfo.location; | |
196 | ||
8ebca419 | 197 | enum cpp_ttype body_type = body_tinfo.type; |
992118a1 PP |
198 | enum cpp_ttype next_tok_type = next_tinfo.type; |
199 | ||
c3388e62 DM |
200 | /* Don't attempt to compare the indentation of BODY_LOC and NEXT_STMT_LOC |
201 | if either are within macros. */ | |
202 | if (linemap_location_from_macro_expansion_p (line_table, body_loc) | |
203 | || linemap_location_from_macro_expansion_p (line_table, next_stmt_loc)) | |
204 | return false; | |
205 | ||
206 | /* Don't attempt to compare indentation if #line or # 44 "file"-style | |
207 | directives are present, suggesting generated code. | |
208 | ||
209 | All bets are off if these are present: the file that the #line | |
210 | directive could have an entirely different coding layout to C/C++ | |
211 | (e.g. .md files). | |
212 | ||
213 | To determine if a #line is present, in theory we could look for a | |
214 | map with reason == LC_RENAME_VERBATIM. However, if there has | |
215 | subsequently been a long line requiring a column number larger than | |
216 | that representable by the original LC_RENAME_VERBATIM map, then | |
217 | we'll have a map with reason LC_RENAME. | |
218 | Rather than attempting to search all of the maps for a | |
219 | LC_RENAME_VERBATIM, instead we have libcpp set a flag whenever one | |
220 | is seen, and we check for the flag here. | |
221 | */ | |
222 | if (line_table->seen_line_directive) | |
223 | return false; | |
224 | ||
21efdd80 PP |
225 | /* We can't usefully warn about do-while statements since the bodies of these |
226 | statements are always explicitly delimited at both ends, so control flow is | |
227 | quite obvious. */ | |
228 | if (guard_tinfo.keyword == RID_DO) | |
229 | return false; | |
230 | ||
992118a1 PP |
231 | /* If the token following the body is a close brace or an "else" |
232 | then while indentation may be sloppy, there is not much ambiguity | |
233 | about control flow, e.g. | |
234 | ||
235 | if (foo) <- GUARD | |
236 | bar (); <- BODY | |
237 | else baz (); <- NEXT | |
238 | ||
239 | { | |
240 | while (foo) <- GUARD | |
241 | bar (); <- BODY | |
242 | } <- NEXT | |
243 | baz (); | |
244 | */ | |
245 | if (next_tok_type == CPP_CLOSE_BRACE | |
246 | || next_tinfo.keyword == RID_ELSE) | |
c3388e62 DM |
247 | return false; |
248 | ||
8ebca419 PP |
249 | /* Likewise, if the body of the guard is a compound statement then control |
250 | flow is quite visually explicit regardless of the code's possibly poor | |
251 | indentation, e.g. | |
252 | ||
253 | while (foo) <- GUARD | |
254 | { <- BODY | |
255 | bar (); | |
256 | } | |
257 | baz (); <- NEXT | |
258 | ||
259 | Things only get muddy when the body of the guard does not have | |
260 | braces, e.g. | |
261 | ||
262 | if (foo) <- GUARD | |
263 | bar (); <- BODY | |
264 | baz (); <- NEXT | |
265 | */ | |
266 | if (body_type == CPP_OPEN_BRACE) | |
267 | return false; | |
268 | ||
c3388e62 DM |
269 | /* Don't warn here about spurious semicolons. */ |
270 | if (next_tok_type == CPP_SEMICOLON) | |
271 | return false; | |
272 | ||
6ac48155 DM |
273 | expanded_location body_exploc = expand_location (body_loc); |
274 | expanded_location next_stmt_exploc = expand_location (next_stmt_loc); | |
8ebca419 | 275 | expanded_location guard_exploc = expand_location (guard_loc); |
c3388e62 DM |
276 | |
277 | /* They must be in the same file. */ | |
278 | if (next_stmt_exploc.file != body_exploc.file) | |
279 | return false; | |
280 | ||
281 | /* If NEXT_STMT_LOC and BODY_LOC are on the same line, consider | |
282 | the location of the guard. | |
283 | ||
284 | Cases where we want to issue a warning: | |
285 | ||
286 | if (flag) | |
287 | foo (); bar (); | |
288 | ^ WARN HERE | |
289 | ||
290 | if (flag) foo (); bar (); | |
291 | ^ WARN HERE | |
292 | ||
8ebca419 PP |
293 | |
294 | if (flag) ; { | |
295 | ^ WARN HERE | |
296 | ||
297 | if (flag) | |
298 | ; { | |
299 | ^ WARN HERE | |
300 | ||
c3388e62 DM |
301 | Cases where we don't want to issue a warning: |
302 | ||
303 | various_code (); if (flag) foo (); bar (); more_code (); | |
304 | ^ DON'T WARN HERE. */ | |
305 | if (next_stmt_exploc.line == body_exploc.line) | |
306 | { | |
c3388e62 DM |
307 | if (guard_exploc.file != body_exploc.file) |
308 | return true; | |
309 | if (guard_exploc.line < body_exploc.line) | |
310 | /* The guard is on a line before a line that contains both | |
311 | the body and the next stmt. */ | |
312 | return true; | |
313 | else if (guard_exploc.line == body_exploc.line) | |
314 | { | |
315 | /* They're all on the same line. */ | |
316 | gcc_assert (guard_exploc.file == next_stmt_exploc.file); | |
317 | gcc_assert (guard_exploc.line == next_stmt_exploc.line); | |
1a1e101f PP |
318 | unsigned int guard_vis_column; |
319 | unsigned int guard_line_first_nws; | |
c7df95d8 | 320 | if (!get_visual_column (guard_exploc, guard_loc, |
1a1e101f PP |
321 | &guard_vis_column, |
322 | &guard_line_first_nws)) | |
323 | return false; | |
c3388e62 DM |
324 | /* Heuristic: only warn if the guard is the first thing |
325 | on its line. */ | |
1a1e101f | 326 | if (guard_vis_column == guard_line_first_nws) |
c3388e62 DM |
327 | return true; |
328 | } | |
329 | } | |
330 | ||
331 | /* If NEXT_STMT_LOC is on a line after BODY_LOC, consider | |
332 | their relative locations, and of the guard. | |
333 | ||
334 | Cases where we want to issue a warning: | |
335 | if (flag) | |
336 | foo (); | |
337 | bar (); | |
338 | ^ WARN HERE | |
339 | ||
340 | Cases where we don't want to issue a warning: | |
341 | if (flag) | |
342 | foo (); | |
343 | bar (); | |
344 | ^ DON'T WARN HERE (autogenerated code?) | |
345 | ||
346 | if (flagA) | |
347 | foo (); | |
348 | #if SOME_CONDITION_THAT_DOES_NOT_HOLD | |
349 | if (flagB) | |
350 | #endif | |
351 | bar (); | |
352 | ^ DON'T WARN HERE | |
6ac48155 | 353 | |
8ebca419 PP |
354 | if (flag) |
355 | ; | |
356 | foo (); | |
357 | ^ DON'T WARN HERE | |
c589e975 DM |
358 | |
359 | #define emit | |
360 | if (flag) | |
361 | foo (); | |
362 | emit bar (); | |
363 | ^ DON'T WARN HERE | |
364 | ||
c3388e62 DM |
365 | */ |
366 | if (next_stmt_exploc.line > body_exploc.line) | |
367 | { | |
368 | /* Determine if GUARD_LOC and NEXT_STMT_LOC are aligned on the same | |
369 | "visual column"... */ | |
370 | unsigned int next_stmt_vis_column; | |
c589e975 | 371 | unsigned int next_stmt_line_first_nws; |
c3388e62 | 372 | unsigned int body_vis_column; |
8ebca419 | 373 | unsigned int body_line_first_nws; |
d5398058 PP |
374 | unsigned int guard_vis_column; |
375 | unsigned int guard_line_first_nws; | |
c3388e62 DM |
376 | /* If we can't determine it, don't issue a warning. This is sometimes |
377 | the case for input files containing #line directives, and these | |
378 | are often for autogenerated sources (e.g. from .md files), where | |
379 | it's not clear that it's meaningful to look at indentation. */ | |
c7df95d8 DM |
380 | if (!get_visual_column (next_stmt_exploc, next_stmt_loc, |
381 | &next_stmt_vis_column, | |
c589e975 | 382 | &next_stmt_line_first_nws)) |
c3388e62 | 383 | return false; |
c7df95d8 | 384 | if (!get_visual_column (body_exploc, body_loc, |
8ebca419 PP |
385 | &body_vis_column, |
386 | &body_line_first_nws)) | |
c3388e62 | 387 | return false; |
c7df95d8 | 388 | if (!get_visual_column (guard_exploc, guard_loc, |
d5398058 PP |
389 | &guard_vis_column, |
390 | &guard_line_first_nws)) | |
391 | return false; | |
392 | ||
c589e975 DM |
393 | /* If the line where the next stmt starts has non-whitespace |
394 | on it before the stmt, then don't warn: | |
395 | #define emit | |
396 | if (flag) | |
397 | foo (); | |
398 | emit bar (); | |
399 | ^ DON'T WARN HERE | |
400 | (PR c/69122). */ | |
401 | if (next_stmt_line_first_nws < next_stmt_vis_column) | |
402 | return false; | |
403 | ||
8ebca419 PP |
404 | if ((body_type != CPP_SEMICOLON |
405 | && next_stmt_vis_column == body_vis_column) | |
406 | /* As a special case handle the case where the body is a semicolon | |
407 | that may be hidden by a preceding comment, e.g. */ | |
408 | ||
409 | // if (p) | |
410 | // /* blah */; | |
411 | // foo (1); | |
412 | ||
413 | /* by looking instead at the column of the first non-whitespace | |
414 | character on the body line. */ | |
415 | || (body_type == CPP_SEMICOLON | |
416 | && body_exploc.line > guard_exploc.line | |
417 | && body_line_first_nws != body_vis_column | |
d5398058 | 418 | && next_stmt_vis_column > guard_line_first_nws)) |
c3388e62 | 419 | { |
8ebca419 PP |
420 | /* Don't warn if they are aligned on the same column |
421 | as the guard itself (suggesting autogenerated code that doesn't | |
422 | bother indenting at all). We consider the column of the first | |
423 | non-whitespace character on the guard line instead of the column | |
424 | of the actual guard token itself because it is more sensible. | |
425 | Consider: | |
426 | ||
427 | if (p) { | |
428 | foo (1); | |
429 | } else // GUARD | |
430 | foo (2); // BODY | |
431 | foo (3); // NEXT | |
432 | ||
433 | and: | |
434 | ||
435 | if (p) | |
436 | foo (1); | |
437 | } else // GUARD | |
438 | foo (2); // BODY | |
439 | foo (3); // NEXT | |
440 | ||
441 | If we just used the column of the guard token, we would warn on | |
442 | the first example and not warn on the second. But we want the | |
443 | exact opposite to happen: to not warn on the first example (which | |
444 | is probably autogenerated) and to warn on the second (whose | |
445 | indentation is misleading). Using the column of the first | |
446 | non-whitespace character on the guard line makes that | |
447 | happen. */ | |
8ebca419 | 448 | if (guard_line_first_nws == body_vis_column) |
c3388e62 DM |
449 | return false; |
450 | ||
8ebca419 PP |
451 | /* We may have something like: |
452 | ||
453 | if (p) | |
454 | { | |
455 | foo (1); | |
456 | } else // GUARD | |
457 | foo (2); // BODY | |
458 | foo (3); // NEXT | |
459 | ||
460 | in which case the columns are not aligned but the code is not | |
461 | misleadingly indented. If the column of the body is less than | |
462 | that of the guard line then don't warn. */ | |
463 | if (body_vis_column < guard_line_first_nws) | |
6ac48155 DM |
464 | return false; |
465 | ||
c3388e62 DM |
466 | /* Don't warn if there is multiline preprocessor logic between |
467 | the two statements. */ | |
468 | if (detect_preprocessor_logic (body_exploc, next_stmt_exploc)) | |
469 | return false; | |
470 | ||
471 | /* Otherwise, they are visually aligned: issue a warning. */ | |
472 | return true; | |
473 | } | |
8ebca419 PP |
474 | |
475 | /* Also issue a warning for code having the form: | |
476 | ||
477 | if (flag); | |
478 | foo (); | |
479 | ||
480 | while (flag); | |
481 | { | |
482 | ... | |
483 | } | |
484 | ||
485 | for (...); | |
486 | { | |
487 | ... | |
488 | } | |
489 | ||
490 | if (flag) | |
491 | ; | |
492 | else if (flag); | |
493 | foo (); | |
494 | ||
495 | where the semicolon at the end of each guard is most likely spurious. | |
496 | ||
497 | But do not warn on: | |
498 | ||
499 | for (..); | |
500 | foo (); | |
501 | ||
502 | where the next statement is aligned with the guard. | |
503 | */ | |
504 | if (body_type == CPP_SEMICOLON) | |
505 | { | |
506 | if (body_exploc.line == guard_exploc.line) | |
507 | { | |
8ebca419 PP |
508 | if (next_stmt_vis_column > guard_line_first_nws |
509 | || (next_tok_type == CPP_OPEN_BRACE | |
6b95d7cc | 510 | && next_stmt_vis_column == guard_line_first_nws)) |
8ebca419 PP |
511 | return true; |
512 | } | |
513 | } | |
c3388e62 DM |
514 | } |
515 | ||
516 | return false; | |
517 | } | |
518 | ||
992118a1 PP |
519 | /* Return the string identifier corresponding to the given guard token. */ |
520 | ||
521 | static const char * | |
522 | guard_tinfo_to_string (const token_indent_info &guard_tinfo) | |
523 | { | |
524 | switch (guard_tinfo.keyword) | |
525 | { | |
526 | case RID_FOR: | |
527 | return "for"; | |
528 | case RID_ELSE: | |
529 | return "else"; | |
530 | case RID_IF: | |
531 | return "if"; | |
532 | case RID_WHILE: | |
533 | return "while"; | |
534 | case RID_DO: | |
535 | return "do"; | |
536 | default: | |
537 | gcc_unreachable (); | |
538 | } | |
539 | } | |
540 | ||
c3388e62 DM |
541 | /* Called by the C/C++ frontends when we have a guarding statement at |
542 | GUARD_LOC containing a statement at BODY_LOC, where the block wasn't | |
543 | written using braces, like this: | |
544 | ||
545 | if (flag) | |
546 | foo (); | |
547 | ||
548 | along with the location of the next token, at NEXT_STMT_LOC, | |
549 | so that we can detect followup statements that are within | |
550 | the same "visual block" as the guarded statement, but which | |
551 | aren't logically grouped within the guarding statement, such | |
552 | as: | |
553 | ||
554 | GUARD_LOC | |
555 | | | |
556 | V | |
557 | if (flag) | |
558 | foo (); <- BODY_LOC | |
559 | bar (); <- NEXT_STMT_LOC | |
560 | ||
561 | In the above, "bar ();" isn't guarded by the "if", but | |
562 | is indented to misleadingly suggest that it is in the same | |
563 | block as "foo ();". | |
564 | ||
565 | GUARD_KIND identifies the kind of clause e.g. "if", "else" etc. */ | |
566 | ||
567 | void | |
992118a1 PP |
568 | warn_for_misleading_indentation (const token_indent_info &guard_tinfo, |
569 | const token_indent_info &body_tinfo, | |
570 | const token_indent_info &next_tinfo) | |
c3388e62 | 571 | { |
773ce42e DM |
572 | /* Early reject for the case where -Wmisleading-indentation is disabled, |
573 | to avoid doing work only to have the warning suppressed inside the | |
574 | diagnostic machinery. */ | |
575 | if (!warn_misleading_indentation) | |
576 | return; | |
577 | ||
992118a1 PP |
578 | if (should_warn_for_misleading_indentation (guard_tinfo, |
579 | body_tinfo, | |
580 | next_tinfo)) | |
581 | { | |
582 | if (warning_at (next_tinfo.location, OPT_Wmisleading_indentation, | |
583 | "statement is indented as if it were guarded by...")) | |
584 | inform (guard_tinfo.location, | |
585 | "...this %qs clause, but it is not", | |
586 | guard_tinfo_to_string (guard_tinfo)); | |
587 | } | |
c3388e62 | 588 | } |