#endif
/* Encoder rfc1738_do_escape flag values. */
-#define RFC1738_ESCAPE_UNSAFE 0
-#define RFC1738_ESCAPE_RESERVED 1
-#define RFC1738_ESCAPE_UNESCAPED -1
-
+#define RFC1738_ESCAPE_CTRLS 1
+#define RFC1738_ESCAPE_UNSAFE 2
+#define RFC1738_ESCAPE_RESERVED 4
+#define RFC1738_ESCAPE_ALL (RFC1738_ESCAPE_UNSAFE|RFC1738_ESCAPE_RESERVED|RFC1738_ESCAPE_CTRLS)
+ // exclusions
+#define RFC1738_ESCAPE_NOSPACE 128
+#define RFC1738_ESCAPE_NOPERCENT 256
+ // Backward compatibility
+#define RFC1738_ESCAPE_UNESCAPED (RFC1738_ESCAPE_UNSAFE|RFC1738_ESCAPE_CTRLS|RFC1738_ESCAPE_NOPERCENT)
/**
* \group rfc1738 RFC 1738 URL-escaping library
*
* Public API is formed of a triplet of encode functions mapping to the rfc1738_do_encode() engine.
*
- * ASCII characters are split into three groups:
+ * ASCII characters are split into four groups:
* \item SAFE Characters which are safe to occur in any URL. For example A,B,C
- * \item UNSAFE Characters which are completely usafe to occur in any URL. For example; backspace, tab, space, newline
+ * \item CTRLS Binary control codes. Dangerous to include in URLs.
+ * \item UNSAFE Characters which are completely usafe to occur in any URL. For example; backspace, tab, space, newline.
* \item RESERVED Characters which are reserved for special meaning and may only occur in certain parts of a URL.
*
* Returns a static buffer containing the RFC 1738 compliant, escaped version of the given url.
*
- * \param flags RFC1738_ESCAPE_UNSAFE Only encode unsafe characters. Ignore reserved.
- * \param flags RFC1738_ESCAPE_RESERVED Encode all unsafe and reserved characters.
- * \param flags RFC1738_ESCAPE_UNESCAPED Encode all unsafe characters which have not already been encoded.
+ * \param flags RFC1738_ESCAPE_CTRLS Encode the blatantly dangerous binary codes.
+ * \param flags RFC1738_ESCAPE_UNSAFE Encode printable unsafe characters (excluding CTRLs).
+ * \param flags RFC1738_ESCAPE_RESERVED Encode reserved characters.
+ * \param flags RFC1738_ESCAPE_ALL Encode all binary CTRL, unsafe and reserved characters.
+ * \param flags RFC1738_ESCAPE_NOSPACE Ignore the space whitespace character.
+ * \param flags RFC1738_ESCAPE_NOPERCENT Ignore the escaping delimiter '%'.
*/
extern char *rfc1738_do_escape(const char *url, int flags);
/* Old API functions */
-#define rfc1738_escape(x) rfc1738_do_escape(x, RFC1738_ESCAPE_UNSAFE)
-#define rfc1738_escape_part(x) rfc1738_do_escape(x, RFC1738_ESCAPE_RESERVED)
-#define rfc1738_escape_unescaped(x) rfc1738_do_escape(x, RFC1738_ESCAPE_UNESCAPED)
+
+ /* Default RFC 1738 escaping. Escape all UNSAFE characters and binary CTRL codes */
+#define rfc1738_escape(x) rfc1738_do_escape(x, RFC1738_ESCAPE_UNSAFE|RFC1738_ESCAPE_CTRLS)
+
+ /* Escape a partial URL. Encoding every binary code, unsafe or reserved character. */
+#define rfc1738_escape_part(x) rfc1738_do_escape(x, RFC1738_ESCAPE_ALL)
+
+ /* Escape a URL. Encoding every unsafe characters but skipping reserved and already-encoded bytes.
+ * Suitable for safely encoding an absolute URL which may be encoded but is not trusted. */
+#define rfc1738_escape_unescaped(x) rfc1738_do_escape(x, RFC1738_ESCAPE_UNSAFE|RFC1738_ESCAPE_CTRLS|RFC1738_ESCAPE_NOPERCENT)
/**
#include "config.h"
#include "rfc1738.h"
-//#include "util.h"
#if HAVE_STDIO_H
#include <stdio.h>
(char) 0x22, /* " */
(char) 0x23, /* # */
#if 0 /* done in code */
+ (char) 0x20, /* space */
(char) 0x25, /* % */
#endif
(char) 0x7B, /* { */
(char) 0x5B, /* [ */
(char) 0x5D, /* ] */
(char) 0x60, /* ` */
- (char) 0x27, /* ' */
- (char) 0x20 /* space */
+ (char) 0x27 /* ' */
};
static char rfc1738_reserved_chars[] = {
buf = (char*)xcalloc(bufsize, 1);
}
for (p = url, q = buf; *p != '\0' && q < (buf + bufsize - 1); p++, q++) {
+
+ /* a-z, A-Z and 0-9 are SAFE. */
+ if ((*p >= 'a' && *p <= 'z') || (*p >= 'A' && *p <= 'Z') || (*p >= '0' && *p <= '9')) {
+ *q = *p;
+ continue;
+ }
+
do_escape = 0;
/* RFC 1738 defines these chars as unsafe */
- for (i = 0; i < sizeof(rfc1738_unsafe_chars); i++) {
- if (*p == rfc1738_unsafe_chars[i]) {
- do_escape = 1;
- break;
+ if ((flags & RFC1738_ESCAPE_UNSAFE)) {
+ for (i = 0;i < sizeof(rfc1738_unsafe_chars); i++) {
+ if (*p == rfc1738_unsafe_chars[i]) {
+ do_escape = 1;
+ break;
+ }
}
+ /* Handle % separately */
+ if (!(flags & RFC1738_ESCAPE_NOPERCENT) && *p == '%')
+ do_escape = 1;
+ /* Handle space separately */
+ else if (!(flags & RFC1738_ESCAPE_NOSPACE) && *p <= ' ')
+ do_escape = 1;
}
- /* Handle % separately */
- if (flags != RFC1738_ESCAPE_UNESCAPED && *p == '%')
- do_escape = 1;
/* RFC 1738 defines these chars as reserved */
- for (i = 0; i < sizeof(rfc1738_reserved_chars) && flags == RFC1738_ESCAPE_RESERVED; i++) {
- if (*p == rfc1738_reserved_chars[i]) {
- do_escape = 1;
- break;
+ if ((flags & RFC1738_ESCAPE_RESERVED) && do_escape == 0) {
+ for (i = 0; i < sizeof(rfc1738_reserved_chars); i++) {
+ if (*p == rfc1738_reserved_chars[i]) {
+ do_escape = 1;
+ break;
+ }
}
}
- /* RFC 1738 says any control chars (0x00-0x1F) are encoded */
- if ((unsigned char) *p <= (unsigned char) 0x1F) {
- do_escape = 1;
- }
- /* RFC 1738 says 0x7f is encoded */
- if (*p == (char) 0x7F) {
- do_escape = 1;
- }
- /* RFC 1738 says any non-US-ASCII are encoded */
- if (((unsigned char) *p >= (unsigned char) 0x80)) {
- do_escape = 1;
+ if ((flags & RFC1738_ESCAPE_CTRLS) && do_escape == 0) {
+ /* RFC 1738 says any control chars (0x00-0x1F) are encoded */
+ if ((unsigned char) *p <= (unsigned char) 0x1F)
+ do_escape = 1;
+ /* RFC 1738 says 0x7f is encoded */
+ else if (*p == (char) 0x7F)
+ do_escape = 1;
+ /* RFC 1738 says any non-US-ASCII are encoded */
+ else if (((unsigned char) *p >= (unsigned char) 0x80))
+ do_escape = 1;
}
/* Do the triplet encoding, or just copy the char */
/* note: we do not need snprintf here as q is appropriately