From: Amos Jeffries Date: Sat, 5 Mar 2011 00:32:41 +0000 (+1300) Subject: RFC 1738 encoder upgraded action flags X-Git-Tag: take06~27^2~115 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=afbdbe91d34a09a1d777732e30b1d6f100d51afb;p=thirdparty%2Fsquid.git RFC 1738 encoder upgraded action flags This updates the encoder flags parameter to allow selection of individual character sets of CTRLS, UNSAFE, and RESERVED. Also, to permit selective omission of the % and space characters from the UNSAFE set. Also, an optimization is added to skip SAFE characters on a short-circuit. --- diff --git a/include/rfc1738.h b/include/rfc1738.h index 2d7b9a4b79..de7ade4a3c 100644 --- a/include/rfc1738.h +++ b/include/rfc1738.h @@ -6,33 +6,49 @@ extern "C" { #endif /* Encoder rfc1738_do_escape flag values. */ -#define RFC1738_ESCAPE_UNSAFE 0 -#define RFC1738_ESCAPE_RESERVED 1 -#define RFC1738_ESCAPE_UNESCAPED -1 - +#define RFC1738_ESCAPE_CTRLS 1 +#define RFC1738_ESCAPE_UNSAFE 2 +#define RFC1738_ESCAPE_RESERVED 4 +#define RFC1738_ESCAPE_ALL (RFC1738_ESCAPE_UNSAFE|RFC1738_ESCAPE_RESERVED|RFC1738_ESCAPE_CTRLS) + // exclusions +#define RFC1738_ESCAPE_NOSPACE 128 +#define RFC1738_ESCAPE_NOPERCENT 256 + // Backward compatibility +#define RFC1738_ESCAPE_UNESCAPED (RFC1738_ESCAPE_UNSAFE|RFC1738_ESCAPE_CTRLS|RFC1738_ESCAPE_NOPERCENT) /** * \group rfc1738 RFC 1738 URL-escaping library * * Public API is formed of a triplet of encode functions mapping to the rfc1738_do_encode() engine. * - * ASCII characters are split into three groups: + * ASCII characters are split into four groups: * \item SAFE Characters which are safe to occur in any URL. For example A,B,C - * \item UNSAFE Characters which are completely usafe to occur in any URL. For example; backspace, tab, space, newline + * \item CTRLS Binary control codes. Dangerous to include in URLs. + * \item UNSAFE Characters which are completely usafe to occur in any URL. For example; backspace, tab, space, newline. * \item RESERVED Characters which are reserved for special meaning and may only occur in certain parts of a URL. * * Returns a static buffer containing the RFC 1738 compliant, escaped version of the given url. * - * \param flags RFC1738_ESCAPE_UNSAFE Only encode unsafe characters. Ignore reserved. - * \param flags RFC1738_ESCAPE_RESERVED Encode all unsafe and reserved characters. - * \param flags RFC1738_ESCAPE_UNESCAPED Encode all unsafe characters which have not already been encoded. + * \param flags RFC1738_ESCAPE_CTRLS Encode the blatantly dangerous binary codes. + * \param flags RFC1738_ESCAPE_UNSAFE Encode printable unsafe characters (excluding CTRLs). + * \param flags RFC1738_ESCAPE_RESERVED Encode reserved characters. + * \param flags RFC1738_ESCAPE_ALL Encode all binary CTRL, unsafe and reserved characters. + * \param flags RFC1738_ESCAPE_NOSPACE Ignore the space whitespace character. + * \param flags RFC1738_ESCAPE_NOPERCENT Ignore the escaping delimiter '%'. */ extern char *rfc1738_do_escape(const char *url, int flags); /* Old API functions */ -#define rfc1738_escape(x) rfc1738_do_escape(x, RFC1738_ESCAPE_UNSAFE) -#define rfc1738_escape_part(x) rfc1738_do_escape(x, RFC1738_ESCAPE_RESERVED) -#define rfc1738_escape_unescaped(x) rfc1738_do_escape(x, RFC1738_ESCAPE_UNESCAPED) + + /* Default RFC 1738 escaping. Escape all UNSAFE characters and binary CTRL codes */ +#define rfc1738_escape(x) rfc1738_do_escape(x, RFC1738_ESCAPE_UNSAFE|RFC1738_ESCAPE_CTRLS) + + /* Escape a partial URL. Encoding every binary code, unsafe or reserved character. */ +#define rfc1738_escape_part(x) rfc1738_do_escape(x, RFC1738_ESCAPE_ALL) + + /* Escape a URL. Encoding every unsafe characters but skipping reserved and already-encoded bytes. + * Suitable for safely encoding an absolute URL which may be encoded but is not trusted. */ +#define rfc1738_escape_unescaped(x) rfc1738_do_escape(x, RFC1738_ESCAPE_UNSAFE|RFC1738_ESCAPE_CTRLS|RFC1738_ESCAPE_NOPERCENT) /** diff --git a/lib/rfc1738.c b/lib/rfc1738.c index 382a5c72a9..1c8be7705f 100644 --- a/lib/rfc1738.c +++ b/lib/rfc1738.c @@ -34,7 +34,6 @@ #include "config.h" #include "rfc1738.h" -//#include "util.h" #if HAVE_STDIO_H #include @@ -53,6 +52,7 @@ static char rfc1738_unsafe_chars[] = { (char) 0x22, /* " */ (char) 0x23, /* # */ #if 0 /* done in code */ + (char) 0x20, /* space */ (char) 0x25, /* % */ #endif (char) 0x7B, /* { */ @@ -64,8 +64,7 @@ static char rfc1738_unsafe_chars[] = { (char) 0x5B, /* [ */ (char) 0x5D, /* ] */ (char) 0x60, /* ` */ - (char) 0x27, /* ' */ - (char) 0x20 /* space */ + (char) 0x27 /* ' */ }; static char rfc1738_reserved_chars[] = { @@ -97,36 +96,49 @@ rfc1738_do_escape(const char *url, int flags) buf = (char*)xcalloc(bufsize, 1); } for (p = url, q = buf; *p != '\0' && q < (buf + bufsize - 1); p++, q++) { + + /* a-z, A-Z and 0-9 are SAFE. */ + if ((*p >= 'a' && *p <= 'z') || (*p >= 'A' && *p <= 'Z') || (*p >= '0' && *p <= '9')) { + *q = *p; + continue; + } + do_escape = 0; /* RFC 1738 defines these chars as unsafe */ - for (i = 0; i < sizeof(rfc1738_unsafe_chars); i++) { - if (*p == rfc1738_unsafe_chars[i]) { - do_escape = 1; - break; + if ((flags & RFC1738_ESCAPE_UNSAFE)) { + for (i = 0;i < sizeof(rfc1738_unsafe_chars); i++) { + if (*p == rfc1738_unsafe_chars[i]) { + do_escape = 1; + break; + } } + /* Handle % separately */ + if (!(flags & RFC1738_ESCAPE_NOPERCENT) && *p == '%') + do_escape = 1; + /* Handle space separately */ + else if (!(flags & RFC1738_ESCAPE_NOSPACE) && *p <= ' ') + do_escape = 1; } - /* Handle % separately */ - if (flags != RFC1738_ESCAPE_UNESCAPED && *p == '%') - do_escape = 1; /* RFC 1738 defines these chars as reserved */ - for (i = 0; i < sizeof(rfc1738_reserved_chars) && flags == RFC1738_ESCAPE_RESERVED; i++) { - if (*p == rfc1738_reserved_chars[i]) { - do_escape = 1; - break; + if ((flags & RFC1738_ESCAPE_RESERVED) && do_escape == 0) { + for (i = 0; i < sizeof(rfc1738_reserved_chars); i++) { + if (*p == rfc1738_reserved_chars[i]) { + do_escape = 1; + break; + } } } - /* RFC 1738 says any control chars (0x00-0x1F) are encoded */ - if ((unsigned char) *p <= (unsigned char) 0x1F) { - do_escape = 1; - } - /* RFC 1738 says 0x7f is encoded */ - if (*p == (char) 0x7F) { - do_escape = 1; - } - /* RFC 1738 says any non-US-ASCII are encoded */ - if (((unsigned char) *p >= (unsigned char) 0x80)) { - do_escape = 1; + if ((flags & RFC1738_ESCAPE_CTRLS) && do_escape == 0) { + /* RFC 1738 says any control chars (0x00-0x1F) are encoded */ + if ((unsigned char) *p <= (unsigned char) 0x1F) + do_escape = 1; + /* RFC 1738 says 0x7f is encoded */ + else if (*p == (char) 0x7F) + do_escape = 1; + /* RFC 1738 says any non-US-ASCII are encoded */ + else if (((unsigned char) *p >= (unsigned char) 0x80)) + do_escape = 1; } /* Do the triplet encoding, or just copy the char */ /* note: we do not need snprintf here as q is appropriately diff --git a/lib/tests/testRFC1738.cc b/lib/tests/testRFC1738.cc index c67a85a53b..d989bad82b 100644 --- a/lib/tests/testRFC1738.cc +++ b/lib/tests/testRFC1738.cc @@ -87,10 +87,6 @@ void testRFC1738::testUrlEncode() { char *result; -#define RFC1738_ESCAPE_UNSAFE 0 -#define RFC1738_ESCAPE_RESERVED 1 -#define RFC1738_ESCAPE_UNESCAPED -1 - /* TEST: Escaping only unsafe characters */ /* regular URL (no encoding needed) */