From: Jakub Jelinek Date: Wed, 30 Jul 2025 11:20:59 +0000 (+0200) Subject: libcpp: Fix up comma diagnostics in preprocessor for C++ [PR120778] X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=8f185d3d7a2bcbbfb1a8f70ac602ee6e4ac34080;p=thirdparty%2Fgcc.git libcpp: Fix up comma diagnostics in preprocessor for C++ [PR120778] The P2843R3 Preprocessing is never undefined paper contains comments that various compilers handle comma operators in preprocessor expressions incorrectly and I think they are right. In both C and C++ the grammar uses constant-expression non-terminal for #if/#elif and in both C and C++ that NT is conditional-expression, so #if 1, 2 is IMHO clearly wrong in both languages. C89 then says for constant-expression "Constant expressions shall not contain assignment, increment, decrement, function-call, or comma operators, except when they are contained within the operand of a sizeof operator." Because all the remaining identifiers in the #if/#elif expression are replaced with 0 I think assignments, increment, decrement and function-call aren't that big deal because (0 = 1) or ++4 etc. are all invalid, but for comma expressions I think it matters. In r0-56429 PR456 Joseph has added !CPP_OPTION (pfile, c99) to handle that correctly. Then C99 changed that to: "Constant expressions shall not contain assignment, increment, decrement, function-call, or comma operators, except when they are contained within a subexpression that is not evaluated." That made for C99+ #if 1 || (1, 2) etc. valid but #if (1, 2) is still invalid, ditto #if 1 ? 1, 2 : 3 In C++ I can't find anything like that though, and as can be seen on say int a[(1, 2)]; int b[1 ? 1, 2 : 3]; being accepted by C++ and rejected by C while int c[1, 2]; int d[1 ? 2 : 3, 4]; being rejected in both C and C++, so I think for C++ it is indeed just the grammar that prevents #if 1, 2. When it is the second operand of ?: or inside of () the grammar just uses expression and that allows comma operator. So, the following patch uses different decisions for C++ when to diagnose comma operator in preprocessor expressions, for C++ tracks if it is inside of () (obviously () around #embed clauses don't count unless one uses limit ((1, 2)) etc.) or inside of the second ?: operand and allows comma operator there and disallows elsewhere. BTW, I wonder if anything in the standard disallows <=> in the preprocessor expressions. Say #if (0 <=> 1) < 0 etc. #include constexpr int a = (0 <=> 1) < 0; is valid (but not valid without #include ) and the expressions don't use any identifiers. 2025-07-30 Jakub Jelinek PR c++/120778 * internal.h (struct lexer_state): Add comma_ok member. * expr.cc (_cpp_parse_expr): Initialize it to 0, increment on CPP_OPEN_PAREN and CPP_QUERY, decrement on CPP_CLOSE_PAREN and CPP_COLON. (num_binary_op): For C++ pedwarn on comma operator if pfile->state.comma_ok is 0 instead of !c99 or skip_eval. * g++.dg/cpp/if-comma-1.C: New test. --- diff --git a/gcc/testsuite/g++.dg/cpp/if-comma-1.C b/gcc/testsuite/g++.dg/cpp/if-comma-1.C new file mode 100644 index 00000000000..0daaff9922f --- /dev/null +++ b/gcc/testsuite/g++.dg/cpp/if-comma-1.C @@ -0,0 +1,42 @@ +// PR c++/120778 +// { dg-do preprocess } +// { dg-options "-pedantic-errors" } + +#if (1, 2) +#define M1 1 +#else +#error +#endif +#if 1 ? 2, 3 : 4 +#define M2 2 +#else +#error +#endif +#if 0 ? 2, 0 : 1 +#define M3 3 +#else +#error +#endif +#if 0 || (1, 2) +#define M4 4 +#else +#error +#endif +#if 1 || (1, 2) +#define M5 5 +#else +#error +#endif +#if (1, 2) && 1 +#define M6 6 +#else +#error +#endif +#if 1 && (1, 2) +#define M7 7 +#else +#error +#endif +#if M1 + M2 + M3 + M4 + M5 + M6 + M7 != 28 +#error +#endif diff --git a/libcpp/expr.cc b/libcpp/expr.cc index 7bb57a340d8..910848d4f78 100644 --- a/libcpp/expr.cc +++ b/libcpp/expr.cc @@ -1460,6 +1460,7 @@ _cpp_parse_expr (cpp_reader *pfile, const char *dir, location_t virtual_location = 0; pfile->state.skip_eval = 0; + pfile->state.comma_ok = 0; /* Set up detection of #if ! defined(). */ pfile->mi_ind_cmacro = 0; @@ -1521,6 +1522,10 @@ _cpp_parse_expr (cpp_reader *pfile, const char *dir, lex_count--; continue; + case CPP_OPEN_PAREN: + pfile->state.comma_ok++; + break; + default: if ((int) op.op <= (int) CPP_EQ || (int) op.op >= (int) CPP_PLUS_EQ) SYNTAX_ERROR2_AT (op.loc, @@ -1574,13 +1579,16 @@ _cpp_parse_expr (cpp_reader *pfile, const char *dir, case CPP_CLOSE_PAREN: if (pfile->state.in_directive == 3 && top == pfile->op_stack) goto embed_done; + pfile->state.comma_ok--; continue; case CPP_OR_OR: if (!num_zerop (top->value)) pfile->state.skip_eval++; break; - case CPP_AND_AND: case CPP_QUERY: + pfile->state.comma_ok++; + /* FALLTHRU */ + case CPP_AND_AND: if (num_zerop (top->value)) pfile->state.skip_eval++; break; @@ -1592,6 +1600,8 @@ _cpp_parse_expr (cpp_reader *pfile, const char *dir, pfile->state.skip_eval++; else pfile->state.skip_eval--; + pfile->state.comma_ok--; + break; default: break; } @@ -2209,8 +2219,10 @@ num_binary_op (cpp_reader *pfile, cpp_num lhs, cpp_num rhs, enum cpp_ttype op) /* Comma. */ default: /* case CPP_COMMA: */ - if (CPP_PEDANTIC (pfile) && (!CPP_OPTION (pfile, c99) - || !pfile->state.skip_eval)) + if (CPP_PEDANTIC (pfile) + && (CPP_OPTION (pfile, cplusplus) + ? !pfile->state.comma_ok + : (!CPP_OPTION (pfile, c99) || !pfile->state.skip_eval))) cpp_pedwarning (pfile, CPP_W_PEDANTIC, "comma operator in operand of #%s", pfile->state.in_directive == 3 diff --git a/libcpp/internal.h b/libcpp/internal.h index 8ca4c75b6cc..bcf55593b0c 100644 --- a/libcpp/internal.h +++ b/libcpp/internal.h @@ -282,6 +282,9 @@ struct lexer_state /* Nonzero to skip evaluating part of an expression. */ unsigned int skip_eval; + /* Nonzero if CPP_COMMA is valid in expression in C++. */ + unsigned int comma_ok; + /* Nonzero when tokenizing a deferred pragma. */ unsigned char in_deferred_pragma;