Cleanup: finer-grained unit tests for valid_utf8_string().
File: util/valid_utf8_string.c.
+
+ Style: converted failed test reports to "got before want"
+ order, and converted tests to "fail before pass" order.
+ Files: util/valid_utf8_string.c, util/printable.c.
+
+ Cleanup: added a valid_utf8_stringz() function to simplify
+ most calls to validate null-terminated strings, eliminating
+ the runtime cost and code maintenance cost of 17 strlen()
+ calls. Files: src/bounce/bounce_notify_util.c,
+ src/cleanup/cleanup_addr.c, src/global/dict_ldap.c,
+ src/global/dict_mysql.c, src/global/dict_pgsql.c,
+ src/global/dict_sqlite.c, src/oqmgr/qmgr_deliver.c,
+ src/postalias/postalias.c, src/postmap/postmap.c,
+ src/postscreen/postscreen_smtpd.c, src/qmgr/qmgr_deliver.c,
+ src/smtpd/smtpd.c, src/smtpd/smtpd_check.c,
+ src/trivial-rewrite/resolve.c, src/util/casefold.c,
+ src/util/dict_inline.c, src/util/dict_thash.c,
+ src/util/dict_utf8.c, src/util/midna_domain.c,
+ src/util/printable.c, src/util/stringops.h,
+ src/util/valid_utf8_string.c.
rpk
sni
Amawalk
+resychronization
smtpd smtpd c
proto postconf proto postscreen postscreen c
global maillog_client c master master c smtp smtp c
+ src postalias postalias c src postmap postmap c
+ src postalias postalias c src postmap postmap c
+ src smtpd smtpd c src smtpd smtpd_check c
post_mail_fprintf(bounce, "X-%s-Queue-ID: %s",
bounce_info->mail_name, bounce_info->queue_id);
-#define IS_UTF8_ADDRESS(str, len) \
- ((str)[0] != 0 && !allascii(str) && valid_utf8_string((str), (len)))
+#define IS_UTF8_ADDRESS(str) \
+ ((str)[0] != 0 && !allascii(str) && valid_utf8_stringz(str))
/* Fix 20140708: use "utf-8" or "rfc822" as appropriate. */
if (VSTRING_LEN(bounce_info->sender) > 0)
post_mail_fprintf(bounce, "X-%s-Sender: %s; %s",
bounce_info->mail_name, bounce_info->smtputf8
- && IS_UTF8_ADDRESS(STR(bounce_info->sender),
- VSTRING_LEN(bounce_info->sender)) ?
+ && IS_UTF8_ADDRESS(STR(bounce_info->sender)) ?
"utf-8" : "rfc822", STR(bounce_info->sender));
if (bounce_info->arrival_time > 0)
post_mail_fprintf(bounce, "Arrival-Date: %s",
/* Fix 20140708: Don't send "utf-8" type with non-UTF8 address. */
post_mail_fprintf(bounce, "Final-Recipient: %s; %s",
bounce_info->smtputf8
- && IS_UTF8_ADDRESS(rcpt->address,
- strlen(rcpt->address)) ?
+ && IS_UTF8_ADDRESS(rcpt->address) ?
"utf-8" : "rfc822", rcpt->address);
/*
/* Fix 20140708: Don't send "utf-8" type with non-UTF8 address. */
post_mail_fprintf(bounce, "Original-Recipient: %s; %s",
bounce_info->smtputf8
- && IS_UTF8_ADDRESS(rcpt->orig_addr,
- strlen(rcpt->orig_addr)) ?
+ && IS_UTF8_ADDRESS(rcpt->orig_addr) ?
"utf-8" : "rfc822", rcpt->orig_addr);
}
post_mail_fprintf(bounce, "Action: %s",
}
/* Fix 20140711: Auto-detect an UTF8 sender. */
if (var_smtputf8_enable && *STR(clean_addr) && !allascii(STR(clean_addr))
- && valid_utf8_string(STR(clean_addr), LEN(clean_addr))) {
+ && valid_utf8_stringz(STR(clean_addr))) {
state->smtputf8 |= SMTPUTF8_FLAG_SENDER;
/* Fix 20140713: request SMTPUTF8 support selectively. */
if (state->flags & CLEANUP_FLAG_AUTOUTF8)
}
/* Fix 20140711: Auto-detect an UTF8 recipient. */
if (var_smtputf8_enable && *STR(clean_addr) && !allascii(STR(clean_addr))
- && valid_utf8_string(STR(clean_addr), LEN(clean_addr))) {
+ && valid_utf8_stringz(STR(clean_addr))) {
/* Fix 20140713: request SMTPUTF8 support selectively. */
if (state->flags & CLEANUP_FLAG_AUTOUTF8)
state->smtputf8 |= SMTPUTF8_FLAG_REQUESTED;
}
/* Fix 20140711: Auto-detect an UTF8 recipient. */
if (var_smtputf8_enable && *STR(clean_addr) && !allascii(STR(clean_addr))
- && valid_utf8_string(STR(clean_addr), LEN(clean_addr))) {
+ && valid_utf8_stringz(STR(clean_addr))) {
/* Fix 20140713: request SMTPUTF8 support selectively. */
if (state->flags & CLEANUP_FLAG_AUTOUTF8)
state->smtputf8 |= SMTPUTF8_FLAG_REQUESTED;
/* url_attrs - attributes we want from LDAP URL */
-static char **url_attrs(DICT_LDAP *dict_ldap, LDAPURLDesc * url)
+static char **url_attrs(DICT_LDAP *dict_ldap, LDAPURLDesc *url)
{
static ARGV *attrs;
char **a1;
* Don't frustrate future attempts to make Postfix UTF-8 transparent.
*/
if ((dict->flags & DICT_FLAG_UTF8_ACTIVE) == 0
- && !valid_utf8_string(name, strlen(name))) {
+ && !valid_utf8_stringz(name)) {
if (msg_verbose)
msg_info("%s: %s: Skipping lookup of non-UTF-8 key '%s'",
myname, dict_ldap->parser->name, name);
*/
#ifdef SNAPSHOT
if ((dict->flags & DICT_FLAG_UTF8_ACTIVE) == 0
- && !valid_utf8_string(name, strlen(name))) {
+ && !valid_utf8_stringz(name)) {
if (msg_verbose)
msg_info("%s: %s: Skipping lookup of non-UTF-8 key '%s'",
myname, dict_mysql->parser->name, name);
*/
#ifdef SNAPSHOT
if ((dict->flags & DICT_FLAG_UTF8_ACTIVE) == 0
- && !valid_utf8_string(name, strlen(name))) {
+ && !valid_utf8_stringz(name)) {
if (msg_verbose)
msg_info("%s: %s: Skipping lookup of non-UTF-8 key '%s'",
myname, dict_pgsql->parser->name, name);
* Don't frustrate future attempts to make Postfix UTF-8 transparent.
*/
if ((dict->flags & DICT_FLAG_UTF8_ACTIVE) == 0
- && !valid_utf8_string(name, strlen(name))) {
+ && !valid_utf8_stringz(name)) {
if (msg_verbose)
msg_info("%s: %s: Skipping lookup of non-UTF-8 key '%s'",
myname, dict_sqlite->parser->name, name);
* Patches change both the patchlevel and the release date. Snapshots have no
* patchlevel; they change the release date only.
*/
-#define MAIL_RELEASE_DATE "20231011"
+#define MAIL_RELEASE_DATE "20231012"
#define MAIL_VERSION_NUMBER "3.9"
#ifdef SNAPSHOT
*/
for (recipient = list.info; recipient < list.info + list.len; recipient++)
if (var_smtputf8_enable && (addr = recipient->address)[0]
- && !allascii(addr) && valid_utf8_string(addr, strlen(addr))) {
+ && !allascii(addr) && valid_utf8_stringz(addr)) {
smtputf8 |= SMTPUTF8_FLAG_RECIPIENT;
if (message->verp_delims)
smtputf8 |= SMTPUTF8_FLAG_SENDER;
*/
if ((mkmap->dict->flags & DICT_FLAG_UTF8_ACTIVE)
&& !allascii(STR(line_buffer))
- && !valid_utf8_string(STR(line_buffer), LEN(line_buffer))) {
+ && !valid_utf8_stringz(STR(line_buffer))) {
msg_warn("%s, line %d: non-UTF-8 input \"%s\""
" -- ignoring this line",
VSTREAM_PATH(source_fp), lineno, STR(line_buffer));
*/
if ((mkmap->dict->flags & DICT_FLAG_UTF8_ACTIVE)
&& !allascii(STR(line_buffer))
- && !valid_utf8_string(STR(line_buffer), LEN(line_buffer))) {
+ && !valid_utf8_stringz(STR(line_buffer))) {
msg_warn("%s, line %d: non-UTF-8 input \"%s\""
" -- ignoring this line",
VSTREAM_PATH(source_fp), lineno, STR(line_buffer));
}
/*
- * Bare newline test.
+ * Bare newline test. Note: at this point, state->cmd_buffer is
+ * not null-terminated and may contain embedded null bytes.
*/
if (ch == '\n') {
if ((state->flags & PSC_STATE_MASK_BARLF_TODO_SKIP)
/*
* Avoid complaints from Postfix maps about malformed content.
*/
-#define PSC_BAD_UTF8(str, len) \
- (var_smtputf8_enable && !valid_utf8_string((str), (len)))
+#define PSC_BAD_UTF8(str) \
+ (var_smtputf8_enable && !valid_utf8_stringz(str))
/*
* Terminate the command buffer, and apply the last-resort command
* editing workaround.
*/
VSTRING_TERMINATE(state->cmd_buffer);
- if (psc_cmd_filter != 0 && !PSC_BAD_UTF8(STR(state->cmd_buffer),
- LEN(state->cmd_buffer))) {
+ if (psc_cmd_filter != 0 && !PSC_BAD_UTF8(STR(state->cmd_buffer))) {
const char *cp;
for (cp = STR(state->cmd_buffer); *cp && IS_SPACE_TAB(*cp); cp++)
if ((state->flags & PSC_STATE_MASK_NSMTP_TODO_SKIP)
== PSC_STATE_FLAG_NSMTP_TODO && cmdp->name == 0
&& (is_header(command)
- || PSC_BAD_UTF8(command, strlen(command))
+ || PSC_BAD_UTF8(command)
/* Ignore forbid_cmds lookup errors. Non-critical feature. */
|| (*var_psc_forbid_cmds
&& string_list_match(psc_forbid_cmds, command)))) {
*/
for (recipient = list.info; recipient < list.info + list.len; recipient++)
if (var_smtputf8_enable && (addr = recipient->address)[0]
- && !allascii(addr) && valid_utf8_string(addr, strlen(addr))) {
+ && !allascii(addr) && valid_utf8_stringz(addr)) {
smtputf8 |= SMTPUTF8_FLAG_RECIPIENT;
if (message->verp_delims)
smtputf8 |= SMTPUTF8_FLAG_SENDER;
watchdog_pat();
smtpd_chat_query(state);
/* Safety: protect internal interfaces against malformed UTF-8. */
- if (var_smtputf8_enable && valid_utf8_string(STR(state->buffer),
- LEN(state->buffer)) == 0) {
+ if (var_smtputf8_enable
+ && valid_utf8_stringz(STR(state->buffer)) == 0) {
state->error_mask |= MAIL_ERROR_PROTOCOL;
smtpd_chat_reply(state, "500 5.5.2 Error: bad UTF-8 syntax");
state->error_count++;
if (result == SMTPD_CHECK_DUNNO)
result = *respt;
if (!var_smtpd_tls_enable_rpk
- || *action == SMTPD_ACL_SEARCH_CODE_PKEY_FPRINT)
+ || *action == SMTPD_ACL_SEARCH_CODE_PKEY_FPRINT)
break;
}
} else if (!var_smtpd_tls_ask_ccert) {
&& cert_result != SMTPD_CHECK_DUNNO
&& cert_result != pkey_result) {
msg_warn("%s: %s: %s: Fragile access policy: %s=yes, but"
- " the action for certificate fingerprint \"%s\" !="
+ " the action for certificate fingerprint \"%s\" !="
" the action for public key fingerprint \"%s\"",
myname, state->namaddr, acl->map_type_name,
VAR_SMTPD_TLS_ENABLE_RPK,
{
int retval;
- if ((retval = valid_utf8_string(action, strlen(action))) == 0)
+ if ((retval = valid_utf8_stringz(action)) == 0)
msg_warn("malformed UTF-8 in policy server %s response: \"%s\"",
server, action);
return (retval);
ENCODE_CN(issuer, issuer_buf, state->tls_context->issuer_CN);
#define NONEMPTY(x) ((x) != 0 && (*x) != 0)
+
/*
* XXX: Too noisy to warn for each policy lookup, especially because we
* don't even know whether the policy server will use the fingerprint. So
if (!valid_mailhost_literal(rcpt_domain, DONT_GRIPE))
*flags |= RESOLVE_FLAG_ERROR;
} else if (var_smtputf8_enable
- && valid_utf8_string(STR(nextrcpt), LEN(nextrcpt)) == 0) {
+ && valid_utf8_stringz(STR(nextrcpt)) == 0) {
*flags |= RESOLVE_FLAG_ERROR;
} else if (!valid_utf8_hostname(var_smtputf8_enable, rcpt_domain,
DONT_GRIPE)) {
encode_utf8(buffer, codepoint);
if (msg_verbose)
vstream_printf("U+%X -> %s\n", codepoint, STR(buffer));
- if (valid_utf8_string(STR(buffer), LEN(buffer)) == 0)
+ if (valid_utf8_stringz(STR(buffer)) == 0)
msg_fatal("bad utf-8 encoding for U+%X\n", codepoint);
casefold(dest, STR(buffer));
}
*/
if (DICT_NEED_UTF8_ACTIVATION(util_utf8_enable, dict_flags)
&& allascii(name) == 0
- && valid_utf8_string(name, strlen(name)) == 0)
+ && valid_utf8_stringz(name) == 0)
DICT_INLINE_RETURN(dict_surrogate(DICT_TYPE_INLINE, name,
open_flags, dict_flags,
"bad UTF-8 syntax: \"%s:%s\"; "
*/
if ((dict->flags & DICT_FLAG_UTF8_ACTIVE)
&& allascii(STR(line_buffer)) == 0
- && valid_utf8_string(STR(line_buffer), LEN(line_buffer)) == 0) {
+ && valid_utf8_stringz(STR(line_buffer)) == 0) {
msg_warn("%s, line %d: non-UTF-8 input \"%s\""
" -- ignoring this line",
VSTREAM_PATH(fp), lineno, STR(line_buffer));
" is this an alias file?", path, lineno);
/*
- * Optionally treat the value as a filename, and replace the value
- * with the BASE64-encoded content of the named file.
+ * Optionally treat the value as a filename, and replace the
+ * value with the BASE64-encoded content of the named file.
*/
if (dict_flags & DICT_FLAG_SRC_RHS_IS_FILE) {
VSTRING *base64_buf;
/*
* Validate UTF-8 without casefolding.
*/
- if (!allascii(string) && valid_utf8_string(string, strlen(string)) == 0) {
+ if (!allascii(string) && valid_utf8_stringz(string) == 0) {
if (err)
*err = "malformed UTF-8 or invalid codepoint";
return (0);
static int dict_utf8_check(const char *string, CONST_CHAR_STAR *err)
{
- if (!allascii(string) && valid_utf8_string(string, strlen(string)) == 0) {
+ if (!allascii(string) && valid_utf8_stringz(string) == 0) {
if (err)
*err = "malformed UTF-8 or invalid codepoint";
return (0);
/*
* Paranoia: do not expose uidna_*() to unfiltered network data.
*/
- if (allascii(name) == 0 && valid_utf8_string(name, strlen(name)) == 0) {
+ if (allascii(name) == 0 && valid_utf8_stringz(name) == 0) {
msg_warn("%s: Problem translating domain \"%.100s\" to ASCII form: %s",
myname, name, "malformed UTF-8");
return (0);
/*
* Paranoia: do not expose uidna_*() to unfiltered network data.
*/
- if (allascii(name) == 0 && valid_utf8_string(name, strlen(name)) == 0) {
+ if (allascii(name) == 0 && valid_utf8_stringz(name) == 0) {
msg_warn("%s: Problem translating domain \"%.100s\" to UTF-8 form: %s",
myname, name, "malformed UTF-8");
return (0);
#include "stringops.h"
#include "parse_utf8_char.h"
-int util_utf8_enable = 0;
+int util_utf8_enable = 0;
/* printable - binary compatibility */
* Test cases for 1-, 2-, and 3-byte encodings. Originally contributed by
* Viktor Dukhovni, and annotated using translate.google.com.
*
+ * See valid_utf8_string.c for single-error tests.
+ *
* XXX Need a test for 4-byte encodings, preferably with strings that can be
* displayed.
*/
input = mystrdup(tp->input);
actual = printable(input, '?');
- if (strcmp(actual, tp->expected) == 0) {
- vstream_fprintf(VSTREAM_ERR, "input: >%s<, want and got: >%s<\n",
+ if (strcmp(actual, tp->expected) != 0) {
+ vstream_fprintf(VSTREAM_ERR, "input: >%s<, got: >%s<, want: >%s<\n",
+ tp->input, actual, tp->expected);
+ vstream_fprintf(VSTREAM_ERR, "FAIL %s\n", tp->name);
+ fail++;
+ } else {
+ vstream_fprintf(VSTREAM_ERR, "input: >%s<, got and want: >%s<\n",
tp->input, actual);
vstream_fprintf(VSTREAM_ERR, "PASS %s\n", tp->name);
pass++;
- } else {
- vstream_fprintf(VSTREAM_ERR, "input: >%s<, want: >%s<, got: >%s<\n",
- tp->input, tp->expected, actual);
- vstream_fprintf(VSTREAM_ERR, "FAIL %s\n", tp->name);
- fail++;
}
myfree(input);
}
extern const char *WARN_UNUSED_RESULT split_nameval(char *, char **, char **);
extern const char *WARN_UNUSED_RESULT split_qnameval(char *, char **, char **);
extern int valid_utf8_string(const char *, ssize_t);
+extern int valid_utf8_stringz(const char *);
extern size_t balpar(const char *, const char *);
extern char *WARN_UNUSED_RESULT extpar(char **, const char *, int);
extern int strcasecmp_utf8x(int, const char *, const char *);
/* int valid_utf8_string(str, len)
/* const char *str;
/* ssize_t len;
+/*
+/* int valid_utf8_stringz(str)
+/* const char *str;
+/* ssize_t len;
/* DESCRIPTION
/* valid_utf8_string() determines if all bytes in a string
/* satisfy parse_utf8_char(3h) checks. See there for any
/* implementation limitations.
/*
+/* valid_utf8_stringz() determines the same for zero-terminated
+/* strings.
+/*
/* A zero-length string is considered valid.
/* DIAGNOSTICS
/* The result value is zero when the caller specifies a negative
return (1);
}
+/* valid_utf8_stringz - validate string according to RFC 3629 */
+
+int valid_utf8_stringz(const char *str)
+{
+ const char *cp;
+ const char *last;
+
+ /*
+ * Ideally, the compiler will inline parse_utf8_char(), propagate the
+ * null pointer constant value, and eliminate code branches that test
+ * whether 0 != 0.
+ */
+ for (cp = str; *cp; cp++) {
+ if ((last = parse_utf8_char(cp, 0)) != 0)
+ cp = last;
+ else
+ return (0);
+ }
+ return (1);
+}
+
/*
* Stand-alone test program. Each string is a line without line terminator.
*/
#include <msg_vstream.h>
/*
- * Test cases for 1-, 2-, and 3-byte encodings. See printable() tests for
- * provenance.
+ * Test cases for 1-, 2-, and 3-byte encodings. See printable.c for UTF8
+ * parser resychronization tests.
*
* XXX Need a test for 4-byte encodings, preferably with strings that can be
* displayed.
*
- * XXX Need tests for over-long encodings and surrogates.
+ * XXX Need tests with hand-crafted over-long encodings and surrogates.
*/
struct testcase {
const char *name;
util_utf8_enable = 1;
for (pass = fail = 0, tp = testcases; tp < testcases + NUM_TESTS; tp++) {
- int actual;
+ int actual_l;
+ int actual_z;
/*
* Notes:
* inputs and outputs. Use vstream_fprintf() instead.
*/
vstream_fprintf(VSTREAM_ERR, "RUN %s\n", tp->name);
- actual = valid_utf8_string(tp->input, strlen(tp->input));
-
- if (actual == tp->expected) {
- vstream_fprintf(VSTREAM_ERR, "input: >%s<, want and got: >%s<\n",
- tp->input, valid_to_str(actual));
- vstream_fprintf(VSTREAM_ERR, "PASS %s\n", tp->name);
- pass++;
- } else {
- vstream_fprintf(VSTREAM_ERR, "input: >%s<, want: >%s<, got: >%s<\n",
- tp->input, valid_to_str(tp->expected),
- valid_to_str(actual));
+ actual_l = valid_utf8_string(tp->input, strlen(tp->input));
+ actual_z = valid_utf8_stringz(tp->input);
+
+ if (actual_l != tp->expected) {
+ vstream_fprintf(VSTREAM_ERR,
+ "input: >%s<, 'actual_l' got: >%s<, want: >%s<\n",
+ tp->input, valid_to_str(actual_l),
+ valid_to_str(tp->expected));
+ vstream_fprintf(VSTREAM_ERR, "FAIL %s\n", tp->name);
+ fail++;
+ } else if (actual_z != tp->expected) {
+ vstream_fprintf(VSTREAM_ERR,
+ "input: >%s<, 'actual_z' got: >%s<, want: >%s<\n",
+ tp->input, valid_to_str(actual_z),
+ valid_to_str(tp->expected));
vstream_fprintf(VSTREAM_ERR, "FAIL %s\n", tp->name);
fail++;
+ } else {
+ vstream_fprintf(VSTREAM_ERR, "input: >%s<, got and want: >%s<\n",
+ tp->input, valid_to_str(actual_l));
+ vstream_fprintf(VSTREAM_ERR, "PASS %s\n", tp->name);
+ pass++;
}
}
msg_info("PASS=%d FAIL=%d", pass, fail);