struct function_format_info
{
- int format_type; /* type of format (printf, scanf, etc.) */
+ enum format_type format_type; /* type of format (printf, scanf, etc.) */
+ /* IS_RAW is relevant only for GCC diagnostic format functions.
+ It is set for "raw" formatting functions like pp_printf that
+ are not intended to produce complete diagnostics according to
+ GCC guidelines, and clear for others like error and warning
+ whose format string is checked for proper quoting and spelling. */
+ bool is_raw;
unsigned HOST_WIDE_INT format_num; /* number of format argument */
unsigned HOST_WIDE_INT first_arg_num; /* number of first arg (zero for varargs) */
};
static bool decode_format_attr (const_tree, tree, tree, function_format_info *,
bool);
-static int decode_format_type (const char *);
+static format_type decode_format_type (const char *, bool * = NULL);
static bool check_format_string (const_tree argument,
unsigned HOST_WIDE_INT format_num,
return warned;
}
+
+/* Emit a warning as per format_warning_va, but construct the substring_loc
+ for the substring at offset (POS1, POS2 - 1) within a string constant
+ FORMAT_STRING_CST at FMT_STRING_LOC. */
+
+ATTRIBUTE_GCC_DIAG (6,7)
+static bool
+format_warning_substr (location_t fmt_string_loc, tree format_string_cst,
+ int pos1, int pos2, int opt, const char *gmsgid, ...)
+{
+ va_list ap;
+ va_start (ap, gmsgid);
+ tree string_type = TREE_TYPE (format_string_cst);
+
+ pos2 -= 1;
+
+ substring_loc fmt_loc (fmt_string_loc, string_type, pos1, pos1, pos2);
+ format_string_diagnostic_t diag (fmt_loc, NULL, UNKNOWN_LOCATION, NULL,
+ NULL);
+ bool warned = diag.emit_warning_va (opt, gmsgid, &ap);
+ va_end (ap);
+
+ return warned;
+}
+
+
/* Check that we have a pointer to a string suitable for use as a format.
The default is to check for a char type.
For objective-c dialects, this is extended to include references to string
{
const char *p = IDENTIFIER_POINTER (format_type_id);
- p = convert_format_name_to_system_name (p);
+ info->format_type = decode_format_type (p, &info->is_raw);
- info->format_type = decode_format_type (p);
-
if (!c_dialect_objc ()
&& info->format_type == gcc_objc_string_format_type)
{
if (info->first_arg_num != 0 && info->first_arg_num <= info->format_num)
{
gcc_assert (!validated_p);
- error ("format string argument follows the args to be formatted");
+ error ("format string argument follows the arguments to be formatted");
return false;
}
char conversion_char);
/* Decode a format type from a string, returning the type, or
- format_type_error if not valid, in which case the caller should print an
- error message. */
-static int
-decode_format_type (const char *s)
+ format_type_error if not valid, in which case the caller should
+ print an error message. On success, when IS_RAW is non-null, set
+ *IS_RAW when the format type corresponds to a GCC "raw" diagnostic
+ formatting function and clear it otherwise. */
+static format_type
+decode_format_type (const char *s, bool *is_raw /* = NULL */)
{
- int i;
- int slen;
+ bool is_raw_buf;
+
+ if (!is_raw)
+ is_raw = &is_raw_buf;
+
+ *is_raw = false;
s = convert_format_name_to_system_name (s);
- slen = strlen (s);
- for (i = 0; i < n_format_types; i++)
+
+ size_t slen = strlen (s);
+ for (int i = 0; i < n_format_types; i++)
{
- int alen;
+ /* Check for a match with no underscores. */
if (!strcmp (s, format_types[i].name))
- return i;
- alen = strlen (format_types[i].name);
+ return static_cast<format_type> (i);
+
+ /* Check for leading and trailing underscores. */
+ size_t alen = strlen (format_types[i].name);
if (slen == alen + 4 && s[0] == '_' && s[1] == '_'
&& s[slen - 1] == '_' && s[slen - 2] == '_'
&& !strncmp (s + 2, format_types[i].name, alen))
- return i;
+ return static_cast<format_type>(i);
+
+ /* Check for the "_raw" suffix and no leading underscores. */
+ if (slen == alen + 4
+ && !strncmp (s, format_types[i].name, alen)
+ && !strcmp (s + alen, "_raw"))
+ {
+ *is_raw = true;
+ return static_cast<format_type>(i);
+ }
+
+ /* Check for the "_raw__" suffix and leading underscores. */
+ if (slen == alen + 8 && s[0] == '_' && s[1] == '_'
+ && !strncmp (s + 2, format_types[i].name, alen)
+ && !strcmp (s + 2 + alen, "_raw__"))
+ {
+ *is_raw = true;
+ return static_cast<format_type>(i);
+ }
}
+
return format_type_error;
}
format++;
if (*format == '$')
{
- warning (OPT_Wformat_, "$ operand number used after format without operand number");
+ warning (OPT_Wformat_,
+ "%<$%>operand number used after format without operand number");
return true;
}
return false;
found_pointer_gap = true;
else
warning_at (res->format_string_loc, OPT_Wformat_,
- "format argument %d unused before used argument %d in $-style format",
+ "format argument %d unused before used argument %d "
+ "in %<$%>-style format",
i + 1, dollar_max_arg_used);
}
}
}
if (res.number_dollar_extra_args > 0 && res.number_non_literal == 0
&& res.number_other == 0)
- warning_at (loc, OPT_Wformat_extra_args, "unused arguments in $-style format");
+ warning_at (loc, OPT_Wformat_extra_args,
+ "unused arguments in %<$%>-style format");
if (res.number_empty > 0 && res.number_non_literal == 0
&& res.number_other == 0)
warning_at (loc, OPT_Wformat_zero_length, "zero-length %s format string",
return true;
}
+/* Describes "paired tokens" within the format string that are
+ expected to be balanced. */
+
+struct baltoks_t
+{
+ baltoks_t (): singlequote (), doublequote () { }
+
+ typedef auto_vec<const char *> balanced_tokens_t;
+ /* Vectors of pointers to opening brackets ('['), curly brackets ('{'),
+ quoting directives (like GCC "%<"), parentheses, and angle brackets
+ ('<'). Used to detect unbalanced tokens. */
+ balanced_tokens_t brackets;
+ balanced_tokens_t curly;
+ balanced_tokens_t quotdirs;
+ balanced_tokens_t parens;
+ balanced_tokens_t pointy;
+ /* Pointer to the last opening quote. */
+ const char *singlequote;
+ const char *doublequote;
+};
+
+/* Describes a keyword, operator, or other name. */
+
+struct token_t
+{
+ const char *name; /* Keyword/operator name. */
+ unsigned char len; /* Its length. */
+ const char *alt; /* Alternate spelling. */
+};
+
+/* Helper for initializing global token_t arrays below. */
+#define NAME(name) { name, sizeof name - 1, NULL }
+
+/* C/C++ operators that are expected to be quoted within the format
+ string. */
+
+static const token_t c_opers[] =
+ {
+ NAME ("!="), NAME ("%="), NAME ("&&"), NAME ("&="), NAME ("*="),
+ NAME ("++"), NAME ("+="), NAME ("--"), NAME ("-="), NAME ("->"),
+ NAME ("/="), NAME ("<<"), NAME ("<<="), NAME ("<="), NAME ("=="),
+ NAME (">="), NAME (">>="), NAME (">>"), NAME ("?:"), NAME ("^="),
+ NAME ("|="), NAME ("||")
+ };
+
+static const token_t cxx_opers[] =
+ {
+ NAME ("->*"), NAME (".*"), NAME ("::"), NAME ("<=>")
+ };
+
+/* Common C/C++ keywords that are expected to be quoted within the format
+ string. Keywords like auto, inline, or volatile are exccluded because
+ they are sometimes used in common terms like /auto variables/, /inline
+ function/, or /volatile access/ where they should not be quoted. */
+
+static const token_t c_keywords[] =
+ {
+#undef NAME
+#define NAME(name, alt) { name, sizeof name - 1, alt }
+
+ NAME ("alignas", NULL),
+ NAME ("alignof", NULL),
+ NAME ("asm", NULL),
+ NAME ("bool", NULL),
+ NAME ("char", NULL),
+ NAME ("const %", NULL),
+ NAME ("const-qualified", "%<const%>-qualified"),
+ NAME ("float", NULL),
+ NAME ("ifunc", NULL),
+ NAME ("int", NULL),
+ NAME ("long double", NULL),
+ NAME ("long int", NULL),
+ NAME ("long long", NULL),
+ NAME ("malloc", NULL),
+ NAME ("noclone", NULL),
+ NAME ("noinline", NULL),
+ NAME ("nonnull", NULL),
+ NAME ("noreturn", NULL),
+ NAME ("nothrow", NULL),
+ NAME ("offsetof", NULL),
+ NAME ("readonly", "read-only"),
+ NAME ("readwrite", "read-write"),
+ NAME ("restrict %", NULL),
+ NAME ("restrict-qualified", "%<restrict%>-qualified"),
+ NAME ("short int", NULL),
+ NAME ("signed char", NULL),
+ NAME ("signed int", NULL),
+ NAME ("signed long", NULL),
+ NAME ("signed short", NULL),
+ NAME ("sizeof", NULL),
+ NAME ("typeof", NULL),
+ NAME ("unsigned char", NULL),
+ NAME ("unsigned int", NULL),
+ NAME ("unsigned long", NULL),
+ NAME ("unsigned short", NULL),
+ NAME ("volatile %", NULL),
+ NAME ("volatile-qualified", "%<volatile%>-qualified"),
+ NAME ("weakref", NULL),
+ };
+
+static const token_t cxx_keywords[] =
+ {
+ /* C++ only keywords and operators. */
+ NAME ("catch", NULL),
+ NAME ("constexpr if", NULL),
+ NAME ("constexpr", NULL),
+ NAME ("consteval", NULL),
+ NAME ("decltype", NULL),
+ NAME ("nullptr", NULL),
+ NAME ("operator delete", NULL),
+ NAME ("operator new", NULL),
+ NAME ("typeid", NULL),
+ NAME ("typeinfo", NULL)
+ };
+
+/* Blacklisted words such as misspellings that should be avoided in favor
+ of the specified alternatives. */
+static const struct
+{
+ const char *name; /* Bad word. */
+ unsigned char len; /* Its length. */
+ const char *alt; /* Preferred alternative. */
+} badwords[] =
+ {
+ NAME ("arg", "argument"),
+ NAME ("bitfield", "bit-field"),
+ NAME ("builtin function", "built-in function"),
+ NAME ("can not", "cannot"),
+ NAME ("commandline option", "command-line option"),
+ NAME ("commandline", "command line"),
+ NAME ("command line option", "command-line option"),
+ NAME ("decl", "declaration"),
+ NAME ("enumeral", "enumerated"),
+ NAME ("floating point", "floating-point"),
+ NAME ("non-zero", "nonzero"),
+ NAME ("reg", "register"),
+ NAME ("stmt", "statement"),
+ };
+
+/* Common contractions that should be avoided in favor of the specified
+ alternatives. */
+
+static const struct
+{
+ const char *name; /* Contraction. */
+ unsigned char len; /* Its length. */
+ const char *alt; /* Preferred alternative. */
+} contrs[] =
+ {
+ NAME ("can't", "cannot"),
+ NAME ("didn't", "did not"),
+ /* These are commonly abused. Avoid diagnosing them for now.
+ NAME ("isn't", "is not"),
+ NAME ("don't", "is not"),
+ */
+ NAME ("mustn't", "must not"),
+ NAME ("needn't", "need not"),
+ NAME ("should't", "should not"),
+ NAME ("that's", "that is"),
+ NAME ("there's", "there is"),
+ NAME ("they're", "they are"),
+ NAME ("what's", "what is"),
+ NAME ("won't", "will not")
+ };
+
+/* Check for unquoted TOKENS. FORMAT_STRING_LOC is the location of
+ the format string, FORMAT_STRING_CST the format string itself (as
+ a tree), ORIG_FORMAT_CHARS and FORMAT_CHARS are pointers to
+ the beginning of the format string and the character currently
+ being processed, and BALTOKS describes paired "tokens" within
+ the format string that are expected to be balanced.
+ Returns a pointer to the last processed character or null when
+ nothing was done. */
+
+static const char*
+check_tokens (const token_t *tokens, unsigned ntoks,
+ location_t format_string_loc, tree format_string_cst,
+ const char *orig_format_chars, const char *format_chars,
+ baltoks_t &baltoks)
+{
+ /* For brevity. */
+ const int opt = OPT_Wformat_diag;
+ /* Zero-based starting position of a problem sequence. */
+ int fmtchrpos = format_chars - orig_format_chars;
+
+ /* For identifier-like "words," set to the word length. */
+ unsigned wlen = 0;
+ /* Set for an operator, clear for an identifier/word. */
+ bool is_oper = false;
+ bool underscore = false;
+
+ if (format_chars[0] == '_' || ISALPHA (format_chars[0]))
+ {
+ while (format_chars[wlen] == '_' || ISALNUM (format_chars[wlen]))
+ {
+ underscore |= format_chars[wlen] == '_';
+ ++wlen;
+ }
+ }
+ else
+ is_oper = true;
+
+ for (unsigned i = 0; i != ntoks; ++i)
+ {
+ unsigned toklen = tokens[i].len;
+
+ if (toklen < wlen
+ || strncmp (format_chars, tokens[i].name, toklen))
+ continue;
+
+ if (toklen == 2
+ && format_chars - orig_format_chars > 0
+ && (TOUPPER (format_chars[-1]) == 'C'
+ || TOUPPER (format_chars[-1]) == 'G'))
+ return format_chars + toklen - 1; /* Reference to C++ or G++. */
+
+ if (ISPUNCT (format_chars[toklen - 1]))
+ {
+ if (format_chars[toklen - 1] == format_chars[toklen])
+ return NULL; /* Operator followed by another punctuator. */
+ }
+ else if (ISALNUM (format_chars[toklen]))
+ return NULL; /* Keyword prefix for a longer word. */
+
+ if (toklen == 2
+ && format_chars[0] == '-'
+ && format_chars[1] == '-'
+ && ISALNUM (format_chars[2]))
+ return NULL; /* Probably option like --help. */
+
+ /* Allow this ugly warning for the time being. */
+ if (toklen == 2
+ && format_chars - orig_format_chars > 6
+ && !strncmp (format_chars - 7, " count >= width of ", 19))
+ return format_chars + 10;
+
+ /* The token is a type if it ends in an alphabetic character. */
+ bool is_type = (ISALPHA (tokens[i].name[toklen - 1])
+ && strchr (tokens[i].name, ' '));
+
+ /* Backtrack to the last alphabetic character (for tokens whose
+ names end in '%'). */
+ if (!is_oper)
+ while (!ISALPHA (tokens[i].name[toklen - 1]))
+ --toklen;
+
+ if (format_warning_substr (format_string_loc, format_string_cst,
+ fmtchrpos, fmtchrpos + toklen, opt,
+ (is_type
+ ? G_("unquoted type name %<%.*s%> in format")
+ : (is_oper
+ ? G_("unquoted operator %<%.*s%> in format")
+ : G_("unquoted keyword %<%.*s%> in format"))),
+ toklen, format_chars)
+ && tokens[i].alt)
+ inform (format_string_loc, "use %qs instead", tokens[i].alt);
+
+ return format_chars + toklen - 1;
+ }
+
+ /* Diagnose unquoted __attribute__. Consider any parenthesized
+ argument to the attribute to avoid redundant warnings for
+ the double parentheses that might follow. */
+ if (!strncmp (format_chars, "__attribute", sizeof "__attribute" - 1))
+ {
+ unsigned nchars = sizeof "__attribute" - 1;
+ while ('_' == format_chars[nchars])
+ ++nchars;
+
+ for (int i = nchars; format_chars[i]; ++i)
+ if (' ' != format_chars[i])
+ {
+ nchars = i;
+ break;
+ }
+
+ if (format_chars[nchars] == '(')
+ {
+ baltoks.parens.safe_push (format_chars + nchars);
+
+ ++nchars;
+ bool close = false;
+ if (format_chars[nchars] == '(')
+ {
+ baltoks.parens.safe_push (format_chars + nchars);
+ close = true;
+ ++nchars;
+ }
+ for (int i = nchars; format_chars[i]; ++i)
+ if (')' == format_chars[i])
+ {
+ if (baltoks.parens.length () > 0)
+ baltoks.parens.pop ();
+ nchars = i + 1;
+ break;
+ }
+
+ if (close && format_chars[nchars] == ')')
+ {
+ if (baltoks.parens.length () > 0)
+ baltoks.parens.pop ();
+ ++nchars;
+ }
+ }
+
+ format_warning_substr (format_string_loc, format_string_cst,
+ fmtchrpos, fmtchrpos + nchars, opt,
+ "unquoted attribute in format");
+ return format_chars + nchars - 1;
+ }
+
+ /* Diagnose unquoted built-ins. */
+ if (format_chars[0] == '_'
+ && format_chars[1] == '_'
+ && (!strncmp (format_chars + 2, "atomic", sizeof "atomic" - 1)
+ || !strncmp (format_chars + 2, "builtin", sizeof "builtin" - 1)
+ || !strncmp (format_chars + 2, "sync", sizeof "sync" - 1)))
+ {
+ format_warning_substr (format_string_loc, format_string_cst,
+ fmtchrpos, fmtchrpos + wlen, opt,
+ "unquoted name of built-in function %<%.*s%> "
+ "in format",
+ wlen, format_chars);
+ return format_chars + wlen - 1;
+ }
+
+ /* Diagnose unquoted substrings of alphanumeric characters containing
+ underscores. They most likely refer to identifiers and should be
+ quoted. */
+ if (underscore)
+ format_warning_substr (format_string_loc, format_string_cst,
+ format_chars - orig_format_chars,
+ format_chars + wlen - orig_format_chars,
+ opt,
+ "unquoted identifier or keyword %<%.*s%> in format",
+ wlen, format_chars);
+ else
+ {
+ /* Diagnose some common missspellings. */
+ for (unsigned i = 0; i != sizeof badwords / sizeof *badwords; ++i)
+ {
+ unsigned badwlen = strspn (badwords[i].name, " -");
+ if (wlen >= badwlen
+ && (wlen <= badwords[i].len
+ || (wlen == badwords[i].len + 1U
+ && TOUPPER (format_chars[wlen - 1]) == 'S'))
+ && !strncasecmp (format_chars, badwords[i].name, badwords[i].len))
+ {
+ /* Handle singular as well as plural forms of all bad words
+ even though the latter don't necessarily make sense for
+ all of the former (like "can nots"). */
+ badwlen = badwords[i].len;
+ const char *plural = "";
+ if (TOUPPER (format_chars[badwlen]) == 'S')
+ {
+ ++badwlen;
+ plural = "s";
+ }
+
+ format_warning_substr (format_string_loc, format_string_cst,
+ fmtchrpos, fmtchrpos + badwords[i].len,
+ opt,
+ "misspelled term %<%.*s%> in format; "
+ "use %<%s%s%> instead",
+ badwlen, format_chars,
+ badwords[i].alt, plural);
+
+ return format_chars + badwords[i].len - 1;
+ }
+ }
+
+ /* Skip C++/G++. */
+ if (!strncasecmp (format_chars, "c++", 3)
+ || !strncasecmp (format_chars, "g++", 3))
+ return format_chars + 2;
+ }
+
+ return wlen ? format_chars + wlen - 1 : NULL;
+}
+
+/* Check plain text in a format string of a GCC diagnostic function
+ for common quoting, punctuation, and spelling mistakes, and issue
+ -Wformat-diag warnings if they are found. FORMAT_STRING_LOC is
+ the location of the format string, FORMAT_STRING_CST the format
+ string itself (as a tree), ORIG_FORMAT_CHARS and FORMAT_CHARS are
+ pointers to the beginning of the format string and the character
+ currently being processed, and BALTOKS describes paired "tokens"
+ within the format string that are expected to be balanced.
+ Returns a pointer to the last processed character. */
+
+static const char*
+check_plain (location_t format_string_loc, tree format_string_cst,
+ const char *orig_format_chars, const char *format_chars,
+ baltoks_t &baltoks)
+{
+ /* For brevity. */
+ const int opt = OPT_Wformat_diag;
+ /* Zero-based starting position of a problem sequence. */
+ int fmtchrpos = format_chars - orig_format_chars;
+
+ if (*format_chars == '%')
+ {
+ /* Diagnose %<%s%> and suggest using %qs instead. */
+ if (!strncmp (format_chars, "%<%s%>", 6))
+ format_warning_substr (format_string_loc, format_string_cst,
+ fmtchrpos, fmtchrpos + 6, opt,
+ "quoted %qs directive in format; "
+ "use %qs instead", "%s", "%qs");
+ else if (format_chars - orig_format_chars > 2
+ && !strncasecmp (format_chars - 3, "can%'t", 5)
+ && !ISALPHA (format_chars[1]))
+ format_warning_substr (format_string_loc,
+ format_string_cst,
+ fmtchrpos - 3, fmtchrpos + 3, opt,
+ "contraction %<%.*s%> in format; "
+ "use %qs instead",
+ 6, format_chars - 3, "cannot");
+
+ return format_chars;
+ }
+
+ if (baltoks.quotdirs.length ())
+ {
+ /* Skip over all plain text within a quoting directive until
+ the next directive. */
+ while (*format_chars && '%' != *format_chars)
+ ++format_chars;
+
+ return format_chars;
+ }
+
+ /* The length of the problem sequence. */
+ int nchars = 0;
+
+ /* Diagnose any whitespace characters other than <space> but only
+ leading, trailing, and two or more consecutive <space>s. Do
+ this before diagnosing control characters because whitespace
+ is a subset of controls. */
+ const char *other_than_space = NULL;
+ while (ISSPACE (format_chars[nchars]))
+ {
+ if (format_chars[nchars] != ' ' && !other_than_space)
+ other_than_space = format_chars + nchars;
+ ++nchars;
+ }
+
+ if (nchars)
+ {
+ /* This is the most common problem: go the extra mile to describe
+ the problem in as much helpful detail as possible. */
+ if (other_than_space)
+ {
+ format_warning_substr (format_string_loc, format_string_cst,
+ fmtchrpos, fmtchrpos + nchars, opt,
+ "unquoted whitespace character %qc in format",
+ *other_than_space);
+ return format_chars + nchars - 1;
+ }
+
+ if (fmtchrpos == 0)
+ /* Accept strings of leading spaces with no warning. */
+ return format_chars + nchars - 1;
+
+ if (!format_chars[nchars])
+ {
+ format_warning_substr (format_string_loc, format_string_cst,
+ fmtchrpos, fmtchrpos + nchars, opt,
+ "spurious trailing space in format");
+ return format_chars + nchars - 1;
+ }
+
+ if (nchars > 1)
+ {
+ if (nchars == 2
+ && orig_format_chars < format_chars
+ && format_chars[-1] == '.'
+ && format_chars[0] == ' '
+ && format_chars[1] == ' ')
+ {
+ /* A period followed by two spaces. */
+ if (ISUPPER (*orig_format_chars))
+ {
+ /* If the part before the period is a capitalized
+ sentence check to make sure that what follows
+ is also capitalized. */
+ if (ISLOWER (format_chars[2]))
+ format_warning_substr (format_string_loc, format_string_cst,
+ fmtchrpos, fmtchrpos + nchars, opt,
+ "inconsistent capitalization in "
+ "format");
+ }
+ }
+ else
+ format_warning_substr (format_string_loc, format_string_cst,
+ fmtchrpos, fmtchrpos + nchars, opt,
+ "unquoted sequence of %i consecutive "
+ "space characters in format", nchars);
+ return format_chars + nchars - 1;
+ }
+
+ format_chars += nchars;
+ nchars = 0;
+ }
+
+ fmtchrpos = format_chars - orig_format_chars;
+
+ /* Diagnose any unquoted control characters other than the terminating
+ NUL. */
+ while (format_chars[nchars] && ISCNTRL (format_chars[nchars]))
+ ++nchars;
+
+ if (nchars > 1)
+ {
+ format_warning_substr (format_string_loc, format_string_cst,
+ fmtchrpos, fmtchrpos + nchars, opt,
+ "unquoted control characters in format");
+ return format_chars + nchars - 1;
+ }
+ if (nchars)
+ {
+ format_warning_substr (format_string_loc, format_string_cst,
+ fmtchrpos, fmtchrpos + nchars, opt,
+ "unquoted control character %qc in format",
+ *format_chars);
+ return format_chars + nchars - 1;
+ }
+
+ if (ISPUNCT (format_chars[0]))
+ {
+ size_t nelts = sizeof c_opers / sizeof *c_opers;
+ if (const char *ret = check_tokens (c_opers, nelts,
+ format_string_loc, format_string_cst,
+ orig_format_chars, format_chars,
+ baltoks))
+ return ret;
+
+ nelts = c_dialect_cxx () ? sizeof cxx_opers / sizeof *cxx_opers : 0;
+ if (const char *ret = check_tokens (cxx_opers, nelts,
+ format_string_loc, format_string_cst,
+ orig_format_chars, format_chars,
+ baltoks))
+ return ret;
+ }
+
+ if (ISALPHA (format_chars[0]))
+ {
+ size_t nelts = sizeof c_keywords / sizeof *c_keywords;
+ if (const char *ret = check_tokens (c_keywords, nelts,
+ format_string_loc, format_string_cst,
+ orig_format_chars, format_chars,
+ baltoks))
+ return ret;
+
+ nelts = c_dialect_cxx () ? sizeof cxx_keywords / sizeof *cxx_keywords : 0;
+ if (const char *ret = check_tokens (cxx_keywords, nelts,
+ format_string_loc, format_string_cst,
+ orig_format_chars, format_chars,
+ baltoks))
+ return ret;
+ }
+
+ nchars = 0;
+
+ /* Diagnose unquoted options. */
+ if ((format_chars == orig_format_chars
+ || format_chars[-1] == ' ')
+ && format_chars[0] == '-'
+ && ((format_chars[1] == '-'
+ && ISALPHA (format_chars[2]))
+ || ISALPHA (format_chars[1])))
+ {
+ nchars = 1;
+ while (ISALNUM (format_chars[nchars])
+ || '_' == format_chars[nchars]
+ || '-' == format_chars[nchars]
+ || '+' == format_chars[nchars])
+ ++nchars;
+
+ format_warning_substr (format_string_loc, format_string_cst,
+ fmtchrpos, fmtchrpos + nchars, opt,
+ "unquoted option name %<%.*s%> in format",
+ nchars, format_chars);
+ return format_chars + nchars - 1;
+ }
+
+ /* Diagnose leading, trailing, and two or more consecutive punctuation
+ characters. */
+ const char *unbalanced = NULL;
+ while ('%' != format_chars[nchars]
+ && ISPUNCT (format_chars[nchars])
+ && !unbalanced)
+ {
+ switch (format_chars[nchars])
+ {
+ case '[':
+ baltoks.brackets.safe_push (format_chars + nchars);
+ break;
+ case '{':
+ baltoks.curly.safe_push (format_chars + nchars);
+ break;
+ case '(':
+ baltoks.parens.safe_push (format_chars + nchars);
+ break;
+ case '<':
+ baltoks.pointy.safe_push (format_chars + nchars);
+ break;
+
+ case ']':
+ if (baltoks.brackets.length () > 0)
+ baltoks.brackets.pop ();
+ else
+ unbalanced = format_chars + nchars;
+ break;
+ case '}':
+ if (baltoks.curly.length () > 0)
+ baltoks.curly.pop ();
+ else
+ unbalanced = format_chars + nchars;
+ break;
+ case ')':
+ if (baltoks.parens.length () > 0)
+ baltoks.parens.pop ();
+ else
+ unbalanced = format_chars + nchars;
+ break;
+ case '>':
+ if (baltoks.pointy.length () > 0)
+ baltoks.pointy.pop ();
+ else
+ unbalanced = format_chars + nchars;
+ break;
+ }
+
+ ++nchars;
+ }
+
+ if (unbalanced)
+ {
+ format_warning_substr (format_string_loc, format_string_cst,
+ fmtchrpos, fmtchrpos + nchars, opt,
+ "unbalanced punctuation character %qc in format",
+ *unbalanced);
+ return format_chars + nchars - 1;
+ }
+
+ if (nchars)
+ {
+ /* Consider any identifier that follows the pound ('#') sign
+ a preprocessing directive. */
+ if (nchars == 1
+ && format_chars[0] == '#'
+ && ISALPHA (format_chars[1]))
+ {
+ while (ISALNUM (format_chars[nchars])
+ || format_chars[nchars] == '_')
+ ++nchars;
+
+ format_warning_substr (format_string_loc, format_string_cst,
+ fmtchrpos, fmtchrpos + nchars, opt,
+ "unquoted preprocessing directive %<%.*s%> "
+ "in format", nchars, format_chars);
+ return format_chars + nchars - 1;
+ }
+
+ /* Diagnose a bare single quote. */
+ if (nchars == 1
+ && format_chars[0] == '\''
+ && format_chars - orig_format_chars
+ && ISALPHA (format_chars[-1])
+ && ISALPHA (format_chars[1]))
+ {
+ /* Diagnose a subset of contractions that are best avoided. */
+ for (unsigned i = 0; i != sizeof contrs / sizeof *contrs; ++i)
+ {
+ const char *apos = strchr (contrs[i].name, '\'');
+ gcc_assert (apos != NULL);
+ int off = apos - contrs[i].name;
+
+ if (format_chars - orig_format_chars >= off
+ && !strncmp (format_chars - off,
+ contrs[i].name, contrs[i].len))
+ {
+ format_warning_substr (format_string_loc,
+ format_string_cst,
+ fmtchrpos, fmtchrpos + nchars, opt,
+ "contraction %<%.*s%> in format; "
+ "use %qs instead",
+ contrs[i].len, contrs[i].name,
+ contrs[i].alt);
+ return format_chars + nchars - 1;
+ }
+ }
+
+ if (format_warning_substr (format_string_loc, format_string_cst,
+ fmtchrpos, fmtchrpos + nchars, opt,
+ "bare apostrophe %<'%> in format"))
+ inform (format_string_loc,
+ "if avoiding the apostrophe is not feasible, enclose "
+ "it in a pair of %qs and %qs directives instead",
+ "%<", "%>");
+ return format_chars + nchars - 1;
+ }
+
+ /* Diagnose a backtick (grave accent). */
+ if (nchars == 1
+ && format_chars[0] == '`')
+ {
+ if (format_warning_substr (format_string_loc, format_string_cst,
+ fmtchrpos, fmtchrpos + nchars, opt,
+ "grave accent %<`%> in format"))
+ inform (format_string_loc,
+ "use the apostrophe directive %qs instead", "%'");
+ return format_chars + nchars - 1;
+ }
+
+ /* Diagnose a punctuation character after a space. */
+ if (nchars == 1
+ && format_chars - orig_format_chars
+ && format_chars[-1] == ' '
+ && strspn (format_chars, "!?:;.,") == 1)
+ {
+ format_warning_substr (format_string_loc, format_string_cst,
+ fmtchrpos - 1, fmtchrpos, opt,
+ "space followed by punctuation character "
+ "%<%c%>", format_chars[0]);
+ return format_chars;
+ }
+
+ if (nchars == 1)
+ {
+ if (!strncmp (format_chars, "\"%s\"", 4))
+ {
+ if (format_warning_substr (format_string_loc, format_string_cst,
+ fmtchrpos, fmtchrpos + 4, opt,
+ "quoted %qs directive in format",
+ "%s"))
+ inform (format_string_loc, "if using %qs is not feasible, "
+ "use %qs instead", "%qs", "\"%-s\"");
+ }
+
+ if (format_chars[0] == '"')
+ {
+ baltoks.doublequote = baltoks.doublequote ? NULL : format_chars;
+ return format_chars + nchars - 1;
+ }
+ if (format_chars[0] == '\'')
+ {
+ baltoks.singlequote = baltoks.singlequote ? NULL : format_chars;
+ return format_chars + nchars - 1;
+ }
+ }
+
+ if (fmtchrpos == 0)
+ {
+ if (nchars == 1
+ && format_chars[0] == '(')
+ ; /* Text beginning in an open parenthesis. */
+ else if (nchars == 3
+ && !strncmp (format_chars, "...", 3)
+ && format_chars[3])
+ ; /* Text beginning in an ellipsis. */
+ else
+ {
+ format_warning_substr (format_string_loc, format_string_cst,
+ fmtchrpos, fmtchrpos + nchars, opt,
+ "spurious leading punctuation sequence "
+ "%<%.*s%> in format",
+ nchars, format_chars);
+ return format_chars + nchars - 1;
+ }
+ }
+ else if (!format_chars[nchars])
+ {
+ if (nchars == 1
+ && (format_chars[nchars - 1] == ':'
+ || format_chars[nchars - 1] == ')'))
+ ; /* Text ending in a colon or a closing parenthesis. */
+ else if (nchars == 1
+ && ((ISUPPER (*orig_format_chars)
+ && format_chars[nchars - 1] == '.')
+ || strspn (format_chars + nchars - 1, "?])") == 1))
+ ; /* Capitalized sentence terminated by a single period,
+ or text ending in a question mark, closing bracket,
+ or parenthesis. */
+ else if (nchars == 2
+ && format_chars[0] == '?'
+ && format_chars[1] == ')')
+ ; /* A question mark after a closing parenthetical note. */
+ else if (nchars == 2
+ && format_chars[0] == ')'
+ && (format_chars[1] == '?'
+ || format_chars[1] == ';'
+ || format_chars[1] == ':'
+ || (ISUPPER (*orig_format_chars)
+ && format_chars[1] == '.')))
+ ; /* Closing parenthetical note followed by a question mark,
+ semicolon, or colon at the end of the string, or by
+ a period at the end of a capitalized sentence. */
+ else if (nchars == 3
+ && format_chars - orig_format_chars > 0
+ && !strncmp (format_chars, "...", 3))
+ ; /* Text ending in the ellipsis. */
+ else
+ format_warning_substr (format_string_loc, format_string_cst,
+ fmtchrpos, fmtchrpos + nchars, opt,
+ "spurious trailing punctuation sequence "
+ "%<%.*s%> in format",
+ nchars, format_chars);
+
+ return format_chars + nchars - 1;
+ }
+ else if (nchars == 2
+ && format_chars[0] == ')'
+ && (format_chars[1] == ':'
+ || format_chars[1] == ';'
+ || format_chars[1] == ',')
+ && format_chars[2] == ' ')
+ ; /* Closing parenthetical note followed by a colon, semicolon
+ or a comma followed by a space in the middle of the string. */
+ else if (nchars > 1)
+ format_warning_substr (format_string_loc, format_string_cst,
+ fmtchrpos, fmtchrpos + nchars, opt,
+ "unquoted sequence of %i consecutive "
+ "punctuation characters %q.*s in format",
+ nchars, nchars, format_chars);
+ return format_chars + nchars - 1;
+ }
+
+ nchars = 0;
+
+ /* Finally, diagnose any unquoted non-graph, non-punctuation characters
+ other than the terminating NUL. */
+ while (format_chars[nchars]
+ && '%' != format_chars[nchars]
+ && !ISPUNCT (format_chars[nchars])
+ && !ISGRAPH (format_chars[nchars]))
+ ++nchars;
+
+ if (nchars > 1)
+ {
+ format_warning_substr (format_string_loc, format_string_cst,
+ fmtchrpos, fmtchrpos + nchars, opt,
+ "unquoted non-graph characters in format");
+ return format_chars + nchars - 1;
+ }
+ if (nchars)
+ {
+ format_warning_substr (format_string_loc, format_string_cst,
+ fmtchrpos, fmtchrpos + nchars, opt,
+ "unquoted non-graph character %qc in format",
+ *format_chars);
+ return format_chars + nchars - 1;
+ }
+
+ return format_chars;
+}
+
+/* Diagnose unbalanced tokens described by BALTOKS in format string
+ ORIG_FORMAT_CHARS and the corresponding FORMAT_STRING_CST. */
+
+static void
+maybe_diag_unbalanced_tokens (location_t format_string_loc,
+ const char *orig_format_chars,
+ tree format_string_cst,
+ baltoks_t &baltoks)
+{
+ const char *unbalanced = NULL;
+
+ if (baltoks.brackets.length ())
+ unbalanced = baltoks.brackets.pop ();
+ else if (baltoks.curly.length ())
+ unbalanced = baltoks.curly.pop ();
+ else if (baltoks.parens.length ())
+ unbalanced = baltoks.parens.pop ();
+ else if (baltoks.pointy.length ())
+ unbalanced = baltoks.pointy.pop ();
+
+ if (unbalanced)
+ format_warning_at_char (format_string_loc, format_string_cst,
+ unbalanced - orig_format_chars + 1,
+ OPT_Wformat_diag,
+ "unbalanced punctuation character %<%c%> in format",
+ *unbalanced);
+
+ if (baltoks.quotdirs.length ())
+ format_warning_at_char (format_string_loc, format_string_cst,
+ baltoks.quotdirs.pop () - orig_format_chars,
+ OPT_Wformat_,
+ "unterminated quoting directive");
+
+ const char *quote
+ = baltoks.singlequote ? baltoks.singlequote : baltoks.doublequote;
+
+ if (quote)
+ format_warning_at_char (format_string_loc, format_string_cst,
+ quote - orig_format_chars + 1,
+ OPT_Wformat_diag,
+ "unterminated quote character %<%c%> in format",
+ *quote);
+}
+
/* Do the main part of checking a call to a format function. FORMAT_CHARS
is the NUL-terminated format string (which at this point may contain
internal NUL characters); FORMAT_LENGTH is its length (excluding the
and it didn't use $; 1 if $ formats are in use. */
int has_operand_number = -1;
- /* Vector of pointers to opening quoting directives (like GCC "%<"). */
- auto_vec<const char*> quotdirs;
+ /* Vectors of pointers to opening quoting directives (like GCC "%<"),
+ opening braces, brackets, and parentheses. Used to detect unbalanced
+ tokens. */
+ baltoks_t baltoks;
/* Pointers to the most recent color directives (like GCC's "%r or %R").
A starting color directive much be terminated before the end of
init_dollar_format_checking (info->first_arg_num, first_fillin_param);
+ /* In GCC diagnostic functions check plain directives (substrings within
+ the format string that don't start with %) for quoting and punctuations
+ problems. */
+ bool ck_plain = (!info->is_raw
+ && (info->format_type == gcc_diag_format_type
+ || info->format_type == gcc_tdiag_format_type
+ || info->format_type == gcc_cdiag_format_type
+ || info->format_type == gcc_cxxdiag_format_type));
+
while (*format_chars != 0)
{
- if (*format_chars++ != '%')
+ if (ck_plain)
+ format_chars = check_plain (format_string_loc,
+ format_string_cst,
+ orig_format_chars, format_chars,
+ baltoks);
+
+ if (*format_chars == 0 || *format_chars++ != '%')
continue;
+
if (*format_chars == 0)
{
format_warning_at_char (format_string_loc, format_string_cst,
continue;
}
+ /* ARGUMENT_PARSER ctor takes FORMAT_CHARS by reference and calls
+ to ARG_PARSER members may modify the variable. */
flag_chars_t flag_chars;
argument_parser arg_parser (info, format_chars, format_string_cst,
orig_format_chars, format_string_loc,
flag_chars.validate (fki, fci, flag_specs, format_chars,
format_string_cst,
format_string_loc, orig_format_chars, format_char,
- quotdirs.length () > 0);
+ baltoks.quotdirs.length () > 0);
const int alloc_flag = flag_chars.get_alloc_flag (fki);
const bool suppressed = flag_chars.assignment_suppression_p (fki);
if (quot_begin_p && !quot_end_p)
{
- if (quotdirs.length ())
+ if (baltoks.quotdirs.length ())
format_warning_at_char (format_string_loc, format_string_cst,
format_chars - orig_format_chars,
OPT_Wformat_,
"nested quoting directive");
- quotdirs.safe_push (format_chars);
+ baltoks.quotdirs.safe_push (format_chars);
}
else if (!quot_begin_p && quot_end_p)
{
- if (quotdirs.length ())
- quotdirs.pop ();
+ if (baltoks.quotdirs.length ())
+ baltoks.quotdirs.pop ();
else
format_warning_at_char (format_string_loc, format_string_cst,
format_chars - orig_format_chars,
/* Diagnose directives that shouldn't appear in a quoted sequence.
(They are denoted by a double quote in FLAGS2.) */
- if (quotdirs.length ())
+ if (baltoks.quotdirs.length ())
{
if (strchr (fci->flags2, '"'))
format_warning_at_char (format_string_loc, format_string_cst,
if (has_operand_number > 0)
finish_dollar_format_checking (res, fki->flags & (int) FMT_FLAG_DOLLAR_GAP_POINTER_OK);
- if (quotdirs.length ())
- format_warning_at_char (format_string_loc, format_string_cst,
- quotdirs.pop () - orig_format_chars,
- OPT_Wformat_, "unterminated quoting directive");
+ maybe_diag_unbalanced_tokens (format_string_loc, orig_format_chars,
+ format_string_cst, baltoks);
+
if (color_begin && !color_end)
format_warning_at_char (format_string_loc, format_string_cst,
color_begin - orig_format_chars,
if (arg_num != info.first_arg_num)
{
if (!(flags & (int) ATTR_FLAG_BUILT_IN))
- error ("args to be formatted is not %<...%>");
+ error ("argument to be formatted is not %<...%>");
*no_add_attrs = true;
return NULL_TREE;
}