#include "if-error.h"
#include "sentence.h"
#include "c-ctype.h"
+#include "c-strstr.h"
#include "unictype.h"
#include "unistr.h"
#include "quote.h"
#define _(str) gettext (str)
+#define SIZEOF(a) (sizeof(a) / sizeof(a[0]))
+
/* Function that implements a single syntax check.
MP is a message.
}
+/* Determine whether a string (msgid or msgid_plural) contains a URL. */
+static bool
+string_has_url (const char *string)
+{
+ /* Test for the common pattern of URLs that reside on the internet
+ (not "file:"). */
+ static const char *patterns[] =
+ {
+ "mailto:",
+ "http://", "https://",
+ "ftp://",
+ "irc://", "ircs://"
+ };
+ size_t i;
+
+ for (i = 0; i < SIZEOF (patterns); i++)
+ {
+ const char *pattern = patterns[i];
+ /* msgid and msgid_plural are typically entirely ASCII. Therefore here
+ it's OK to use the <c-ctype.h> functions; no need for UTF-8 aware
+ <unictype.h> functions. */
+ const char *string_tail;
+ for (string_tail = string;;)
+ {
+ const char *found = c_strstr (string_tail, pattern);
+ if (found == NULL)
+ break;
+ /* Test whether the pattern starts at a word boundary. */
+ if (found == string_tail || !(c_isalnum (found[-1]) || found[-1] == '_'))
+ {
+ /* Find the end of the URL. */
+ const char *found_end = found + strlen (pattern);
+ const char *p = found_end;
+ while (*p != '\0'
+ && !(c_isspace (*p) || *p == '<' || *p == '>' || *p == '"'))
+ p++;
+ if (p > found_end)
+ {
+ /* Here *p == '\0' or
+ (c_isspace (*p) || *p == '<' || *p == '>' || *p == '"').
+ This implies !(c_isalnum (*p) || *p == '_'). */
+ /* In case of a "mailto" URL, test for a '@'. */
+ if (!(i == 0) || memchr (found, '@', p - found_end) != NULL)
+ {
+ /* Yes, it looks like a URL. */
+ return true;
+ }
+ }
+ }
+ string_tail = found + 1;
+ }
+ }
+
+ return false;
+}
+
+/* Determine whether a message contains a URL. */
+static bool
+message_has_url (const message_ty *mp)
+{
+ return string_has_url (mp->msgid)
+ || (mp->msgid_plural != NULL && string_has_url (mp->msgid_plural));
+}
+
+
+/* Determine whether a string (msgid or msgid_plural) contains an
+ email address. */
+static bool
+string_has_email (const char *string)
+{
+ const char *string_tail;
+ for (string_tail = string;;)
+ {
+ /* An email address consists of LOCALPART@DOMAIN. */
+ const char *at = strchr (string_tail, '@');
+ if (at == NULL)
+ break;
+ /* Find the start of the email address. */
+ const char *start;
+ {
+ const char *p = at;
+ while (p > string)
+ {
+ char c = p[-1];
+ if (!(c_isalnum (c)
+ || c == '!' || c == '#' || c == '$' || c == '%' || c == '&'
+ || c == '\'' || c == '*' || c == '+' || c == '-' || c == '.'
+ || c == '/' || c == '=' || c == '?' || c == '^' || c == '_'
+ || c == '`' || c == '{' || c == '|' || c == '}' || c == '~'))
+ break;
+ /* Consecutive dots not allowed. */
+ if (c == '.' && p[0] == '.')
+ break;
+ p--;
+ }
+ start = p;
+ }
+ if (start < at && start[0] != '.' && at[-1] != '.')
+ {
+ /* Find the end of the email address. */
+ const char *end;
+ const char *last_dot_in_domain = NULL;
+ {
+ const char *p = at + 1;
+ while (*p != '\0')
+ {
+ char c = *p;
+ if (!(c_isalnum (c) || c == '-' || c == '.'))
+ break;
+ /* Consecutive dots not allowed. */
+ if (c == '.' && p[-1] == '.')
+ break;
+ if (c == '.')
+ last_dot_in_domain = p;
+ p++;
+ }
+ end = p;
+ }
+ if (at + 1 < end && at[1] != '.' && end[-1] != '.'
+ /* The domain should contain a dot. */
+ && last_dot_in_domain != NULL
+ /* We can't enumerate all the possible top-level domains, but at
+ least we know that they are all 2 or more characters long. */
+ && end - (last_dot_in_domain + 1) >= 2)
+ {
+ /* Yes, it looks like an email address. */
+ return true;
+ }
+ }
+ string_tail = at + 1;
+ }
+
+ return false;
+}
+
+/* Determine whether a message contains an email address. */
+static bool
+message_has_email (const message_ty *mp)
+{
+ return string_has_email (mp->msgid)
+ || (mp->msgid_plural != NULL && string_has_email (mp->msgid_plural));
+}
+
+
+/* Perform the URL check on a non-obsolete message. */
+static void
+url_check_message (const message_ty *mp)
+{
+ if (message_has_url (mp))
+ if_error (IF_SEVERITY_WARNING,
+ mp->pos.file_name, mp->pos.line_number, (size_t)(-1), false,
+ _("Message contains an embedded URL. Better move it out of the translatable string, see %s"),
+ "https://www.gnu.org/software/gettext/manual/html_node/No-embedded-URLs.html");
+ else if (message_has_email (mp))
+ if_error (IF_SEVERITY_WARNING,
+ mp->pos.file_name, mp->pos.line_number, (size_t)(-1), false,
+ _("Message contains an embedded email address. Better move it out of the translatable string, see %s"),
+ "https://www.gnu.org/software/gettext/manual/html_node/No-embedded-URLs.html");
+}
+
+
/* Perform all checks on a message list.
Return the number of errors that were seen. */
int
message_ty *mp = mlp->item[j];
if (!is_header (mp))
- seen_errors += syntax_check_message (mp) + format_check_message (mp);
+ {
+ seen_errors += syntax_check_message (mp) + format_check_message (mp);
+ url_check_message (mp);
+ }
}
return seen_errors;