* https://datatracker.ietf.org/doc/html/rfc3986#section-5.2.4
*/
+static bool is_dot(const char **str, size_t *clen)
+{
+ const char *p = *str;
+ if(*p == '.') {
+ (*str)++;
+ (*clen)--;
+ return TRUE;
+ }
+ else if((*clen >= 3) &&
+ (p[0] == '%') && (p[1] == '2') && ((p[2] | 0x20) == 'e')) {
+ *str += 3;
+ *clen -= 3;
+ return TRUE;
+ }
+ return FALSE;
+}
+
+#define ISSLASH(x) ((x) == '/')
+
/*
* dedotdotify()
* @unittest: 1395
* passed in and strips them off according to the rules in RFC 3986 section
* 5.2.4.
*
- * The function handles a query part ('?' + stuff) appended but it expects
- * that fragments ('#' + stuff) have already been cut off.
+ * The function handles a path. It should not contain the query nor fragment.
*
* RETURNS
*
UNITTEST int dedotdotify(const char *input, size_t clen, char **outp);
UNITTEST int dedotdotify(const char *input, size_t clen, char **outp)
{
- char *outptr;
- const char *endp = &input[clen];
- char *out;
+ struct dynbuf out;
+ CURLcode result = CURLE_OK;
*outp = NULL;
/* the path always starts with a slash, and a slash has not dot */
- if((clen < 2) || !memchr(input, '.', clen))
+ if(clen < 2)
return 0;
- out = malloc(clen + 1);
- if(!out)
- return 1; /* out of memory */
-
- *out = 0; /* null-terminates, for inputs like "./" */
- outptr = out;
-
- do {
- bool dotdot = TRUE;
- if(*input == '.') {
- /* A. If the input buffer begins with a prefix of "../" or "./", then
- remove that prefix from the input buffer; otherwise, */
-
- if(!strncmp("./", input, 2)) {
- input += 2;
- clen -= 2;
- }
- else if(!strncmp("../", input, 3)) {
- input += 3;
- clen -= 3;
- }
- /* D. if the input buffer consists only of "." or "..", then remove
- that from the input buffer; otherwise, */
+ Curl_dyn_init(&out, clen + 1);
+
+ /* A. If the input buffer begins with a prefix of "../" or "./", then
+ remove that prefix from the input buffer; otherwise, */
+ if(is_dot(&input, &clen)) {
+ const char *p = input;
+ size_t blen = clen;
+
+ if(!clen)
+ /* . [end] */
+ goto end;
+ else if(ISSLASH(*p)) {
+ /* one dot followed by a slash */
+ input = p + 1;
+ clen--;
+ }
- else if(!strcmp(".", input) || !strcmp("..", input) ||
- !strncmp(".?", input, 2) || !strncmp("..?", input, 3)) {
- *out = 0;
- break;
+ /* D. if the input buffer consists only of "." or "..", then remove
+ that from the input buffer; otherwise, */
+ else if(is_dot(&p, &blen)) {
+ if(!blen)
+ /* .. [end] */
+ goto end;
+ else if(ISSLASH(*p)) {
+ /* ../ */
+ input = p + 1;
+ clen = blen - 1;
}
- else
- dotdot = FALSE;
}
- else if(*input == '/') {
+ }
+
+ while(clen && !result) { /* until end of path content */
+ if(ISSLASH(*input)) {
+ const char *p = &input[1];
+ size_t blen = clen - 1;
/* B. if the input buffer begins with a prefix of "/./" or "/.", where
"." is a complete path segment, then replace that prefix with "/" in
the input buffer; otherwise, */
- if(!strncmp("/./", input, 3)) {
- input += 2;
- clen -= 2;
- }
- else if(!strcmp("/.", input) || !strncmp("/.?", input, 3)) {
- *outptr++ = '/';
- *outptr = 0;
- break;
- }
-
- /* C. if the input buffer begins with a prefix of "/../" or "/..",
- where ".." is a complete path segment, then replace that prefix with
- "/" in the input buffer and remove the last segment and its
- preceding "/" (if any) from the output buffer; otherwise, */
-
- else if(!strncmp("/../", input, 4)) {
- input += 3;
- clen -= 3;
- /* remove the last segment from the output buffer */
- while(outptr > out) {
- outptr--;
- if(*outptr == '/')
- break;
+ if(is_dot(&p, &blen)) {
+ if(!blen) { /* /. */
+ result = Curl_dyn_addn(&out, "/", 1);
+ break;
}
- *outptr = 0; /* null-terminate where it stops */
- }
- else if(!strcmp("/..", input) || !strncmp("/..?", input, 4)) {
- /* remove the last segment from the output buffer */
- while(outptr > out) {
- outptr--;
- if(*outptr == '/')
- break;
+ else if(ISSLASH(*p)) { /* /./ */
+ input = p;
+ clen = blen;
+ continue;
+ }
+
+ /* C. if the input buffer begins with a prefix of "/../" or "/..",
+ where ".." is a complete path segment, then replace that prefix
+ with "/" in the input buffer and remove the last segment and its
+ preceding "/" (if any) from the output buffer; otherwise, */
+ else if(is_dot(&p, &blen) && (ISSLASH(*p) || !blen)) {
+ /* remove the last segment from the output buffer */
+ size_t len = Curl_dyn_len(&out);
+ if(len) {
+ char *ptr = Curl_dyn_ptr(&out);
+ char *last = memrchr(ptr, '/', len);
+ if(last)
+ /* trim the output at the slash */
+ Curl_dyn_setlen(&out, last - ptr);
+ }
+
+ if(blen) { /* /../ */
+ input = p;
+ clen = blen;
+ continue;
+ }
+ result = Curl_dyn_addn(&out, "/", 1);
+ break;
}
- *outptr++ = '/';
- *outptr = 0; /* null-terminate where it stops */
- break;
}
- else
- dotdot = FALSE;
- }
- else
- dotdot = FALSE;
-
- if(!dotdot) {
- /* E. move the first path segment in the input buffer to the end of
- the output buffer, including the initial "/" character (if any) and
- any subsequent characters up to, but not including, the next "/"
- character or the end of the input buffer. */
-
- do {
- *outptr++ = *input++;
- clen--;
- } while(*input && (*input != '/') && (*input != '?'));
- *outptr = 0;
}
- /* continue until end of path */
- } while(input < endp);
+ /* E. move the first path segment in the input buffer to the end of
+ the output buffer, including the initial "/" character (if any) and
+ any subsequent characters up to, but not including, the next "/"
+ character or the end of the input buffer. */
- *outp = out;
- return 0; /* success */
+ result = Curl_dyn_addn(&out, input, 1);
+ input++;
+ clen--;
+ }
+end:
+ if(!result) {
+ if(Curl_dyn_len(&out))
+ *outp = Curl_dyn_ptr(&out);
+ else {
+ *outp = strdup("");
+ if(!*outp)
+ return 1;
+ }
+ }
+ return result ? 1 : 0; /* success */
}
static CURLUcode parseurl(const char *url, CURLU *u, unsigned int flags)
unsigned int i;
int fails = 0;
const struct dotdot pairs[] = {
+ { "%2f%2e%2e%2f/../a", "%2f%2e%2e%2f/a" },
+ { "%2f%2e%2e%2f/../", "%2f%2e%2e%2f/" },
+ { "%2f%2e%2e%2f/.", "%2f%2e%2e%2f/" },
+ { "%2f%2e%2e%2f/", "%2f%2e%2e%2f/" },
+ { "%2f%2e%2e%2f", "%2f%2e%2e%2f" },
+ { "%2f%2e%2e%2", "%2f%2e%2e%2" },
+ { "%2f%2e%2e%", "%2f%2e%2e%" },
+ { "%2f%2e%2e", "%2f%2e%2e" },
+ { "%2f%2e%2", "%2f%2e%2" },
+ { "%2f%2e%", "%2f%2e%" },
+ { "%2f%2e", "%2f%2e" },
+ { "%2f%2", "%2f%2" },
+ { "%2f%", "%2f%" },
+ { "%2f", "%2f" },
+ { "%2", "%2" },
+ { "%", NULL },
+ { "2", NULL },
+ { "e", NULL },
+ { ".", NULL },
+ { "./", "" },
+ { "..", "" },
+ { "../", "" },
+ { "../a", "a" },
+ { "///moo.", "///moo." },
+ { ".///moo.", "//moo." },
+ { "./moo..", "moo.." },
+ { "./moo../", "moo../" },
+ { "./moo../.m", "moo../.m" },
+ { "./moo", "moo" },
+ { "../moo", "moo" },
+ { "../moo?", "moo?" },
+ { "../moo?#", "moo?#" },
+ { "../moo?#?..", "moo?#?.." },
+ { "/../moo/..", "/" },
+ { "/a/c/%2e%2E/b", "/a/b" },
+ { "/a/%2e/g", "/a/g" },
+ { "/a/b/c/./g", "/a/b/c/g" },
+ { "/a/c/../b", "/a/b" },
{ "/a/b/c/./../../g", "/a/g" },
+ { "/a/b/c/./%2e%2E/../g", "/a/g" },
+ { "/a/b/c/./../%2e%2E/g", "/a/g" },
+ { "/a/b/c/%2E/%2e%2E/%2e%2E/g", "/a/g" },
{ "mid/content=5/../6", "mid/6" },
{ "/hello/../moo", "/moo" },
{ "/1/../1", "/1" },
{ "/1/./1", "/1/1" },
+ { "/1/%2e/1", "/1/1" },
+ { "/1/%2E/1", "/1/1" },
{ "/1/..", "/" },
{ "/1/.", "/1/" },
+ { "/1/%2e", "/1/" },
+ { "/1/%2E", "/1/" },
{ "/1/./..", "/" },
+ { "/1/%2e/.%2E", "/" },
+ { "/1/./%2e.", "/" },
{ "/1/./../2", "/2" },
{ "/hello/1/./../2", "/hello/2" },
- { "test/this", NULL },
+ { "test/this", "test/this" },
{ "test/this/../now", "test/now" },
{ "/1../moo../foo", "/1../moo../foo"},
{ "/../../moo", "/moo"},
{ "/../../moo?", "/moo?"},
- { "/123?", NULL},
- { "/../moo/..?", "/" },
+ { "/123?", "/123?" },
{ "/", NULL },
{ "", NULL },
{ "/.../", "/.../" },
- { "./moo", "moo" },
- { "../moo", "moo" },
{ "/.", "/" },
{ "/..", "/" },
{ "/moo/..", "/" },