]> git.ipfire.org Git - thirdparty/curl.git/commitdiff
URL parser: allow URLs to use one, two or three slashes
authorDaniel Stenberg <daniel@haxx.se>
Sun, 8 May 2016 13:11:10 +0000 (15:11 +0200)
committerDaniel Stenberg <daniel@haxx.se>
Mon, 30 May 2016 21:13:55 +0000 (23:13 +0200)
Mostly in order to support broken web sites that redirect to broken URLs
that are accepted by browsers.

Browsers are typically even more leniant than this as the WHATWG URL
spec they should allow an _infinite_ amount. I tested 8000 slashes with
Firefox and it just worked.

Added test case 1141, 1142 and 1143 to verify the new parser.

Closes #791

lib/url.c
tests/data/Makefile.inc
tests/data/test1141 [new file with mode: 0644]
tests/data/test1142 [new file with mode: 0644]
tests/data/test1143 [new file with mode: 0644]

index 2a3026650760e468f5d4dad86260161fdf5bc2e9..3f0bde258612cd43b3d6d15365dcec77576c3deb 100644 (file)
--- a/lib/url.c
+++ b/lib/url.c
@@ -4141,12 +4141,17 @@ static CURLcode parseurlandfillconn(struct SessionHandle *data,
   }
   else {
     /* clear path */
+    char slashbuf[4];
     path[0]=0;
 
-    if(2 > sscanf(data->change.url,
-                   "%15[^\n:]://%[^\n/?]%[^\n]",
-                   protobuf,
-                   conn->host.name, path)) {
+    rc = sscanf(data->change.url,
+                "%15[^\n:]:%3[/]%[^\n/?]%[^\n]",
+                protobuf, slashbuf, conn->host.name, path);
+    if(2 == rc) {
+      failf(data, "Bad URL");
+      return CURLE_URL_MALFORMAT;
+    }
+    if(3 > rc) {
 
       /*
        * The URL was badly formatted, let's try the browser-style _without_
@@ -4197,8 +4202,23 @@ static CURLcode parseurlandfillconn(struct SessionHandle *data,
 
       *prot_missing = TRUE; /* not given in URL */
     }
-    else
+    else {
+      size_t s = strlen(slashbuf);
       protop = protobuf;
+      if(s != 2) {
+        infof(data, "Unwillingly accepted illegal URL using %d slash%s!\n",
+              s, s>1?"es":"");
+
+        if(data->change.url_alloc)
+          free(data->change.url);
+        /* repair the URL to use two slashes */
+        data->change.url = aprintf("%s://%s%s",
+                                   protobuf, conn->host.name, path);
+        if(!data->change.url)
+          return CURLE_OUT_OF_MEMORY;
+        data->change.url_alloc = TRUE;
+      }
+    }
   }
 
   /* We search for '?' in the host name (but only on the right side of a
index e3b5a880f598ded38521aa87ba4cd9c04e2c5313..aa82227a6212a067f3129a6ff7fddcf823b22868 100644 (file)
@@ -119,7 +119,7 @@ test1104 test1105 test1106 test1107 test1108 test1109 test1110 test1111 \
 test1112 test1113 test1114 test1115 test1116 test1117 test1118 test1119 \
 test1120 test1121 test1122 test1123 test1124 test1125 test1126 test1127 \
 test1128 test1129 test1130 test1131 test1132 test1133 test1134 test1135 \
-test1136 test1137 test1138 test1139 test1140 \
+test1136 test1137 test1138 test1139 test1140 test1141 test1142 test1143 \
 \
 test1200 test1201 test1202 test1203 test1204 test1205 test1206 test1207 \
 test1208 test1209 test1210 test1211 test1212 test1213 test1214 test1215 \
diff --git a/tests/data/test1141 b/tests/data/test1141
new file mode 100644 (file)
index 0000000..31c505f
--- /dev/null
@@ -0,0 +1,67 @@
+<testcase>
+<info>
+<keywords>
+HTTP
+HTTP GET
+followlocation
+</keywords>
+</info>
+# Server-side
+<reply>
+<data>
+HTTP/1.1 302 This is a weirdo text message
+Connection: close
+Location: http:///foo.example.com/want/11410001
+
+This server reply is for testing
+</data>
+<data1>
+HTTP/1.1 200 hello
+Connection: close
+Content-Length: 4
+
+hej
+</data1>
+<datacheck>
+HTTP/1.1 302 This is a weirdo text message
+Connection: close
+Location: http:///foo.example.com/want/11410001
+
+HTTP/1.1 200 hello
+Connection: close
+Content-Length: 4
+
+hej
+</datacheck>
+</reply>
+
+# Client-side
+<client>
+<server>
+http
+</server>
+ <name>
+HTTP redirect to http:/// (three slashes!)
+ </name>
+ <command>
+%HOSTIP:%HTTPPORT/want/1141 -L -x http://%HOSTIP:%HTTPPORT
+</command>
+</client>
+
+# Verify data after the test has been "shot"
+<verify>
+<strip>
+^User-Agent:.*
+</strip>
+<protocol>
+GET http://%HOSTIP:%HTTPPORT/want/1141 HTTP/1.1\r
+Host: %HOSTIP:%HTTPPORT\r
+Accept: */*\r
+\r
+GET http://foo.example.com/want/11410001 HTTP/1.1\r
+Host: foo.example.com\r
+Accept: */*\r
+\r
+</protocol>
+</verify>
+</testcase>
diff --git a/tests/data/test1142 b/tests/data/test1142
new file mode 100644 (file)
index 0000000..ebb0891
--- /dev/null
@@ -0,0 +1,62 @@
+<testcase>
+<info>
+<keywords>
+HTTP
+HTTP GET
+followlocation
+</keywords>
+</info>
+# Server-side
+<reply>
+<data>
+HTTP/1.1 302 This is a weirdo text message
+Connection: close
+Location: http:////foo.example.com/want/11420001
+
+This server reply is for testing
+</data>
+<data1>
+HTTP/1.1 200 hello
+Connection: close
+Content-Length: 4
+
+hej
+</data1>
+<datacheck>
+HTTP/1.1 302 This is a weirdo text message
+Connection: close
+Location: http:////foo.example.com/want/11420001
+
+</datacheck>
+</reply>
+
+# Client-side
+<client>
+<server>
+http
+</server>
+ <name>
+HTTP redirect to http://// (four slashes!)
+ </name>
+ <command>
+%HOSTIP:%HTTPPORT/want/1142 -L -x http://%HOSTIP:%HTTPPORT
+</command>
+</client>
+
+# Verify data after the test has been "shot"
+<verify>
+<strip>
+^User-Agent:.*
+</strip>
+<protocol>
+GET http://%HOSTIP:%HTTPPORT/want/1142 HTTP/1.1\r
+Host: %HOSTIP:%HTTPPORT\r
+Accept: */*\r
+\r
+</protocol>
+# 3, CURLE_URL_MALFORMAT for the four slashes
+<errorcode>
+3
+</errorcode>
+</verify>
+</testcase>
diff --git a/tests/data/test1143 b/tests/data/test1143
new file mode 100644 (file)
index 0000000..4f2f443
--- /dev/null
@@ -0,0 +1,45 @@
+<testcase>
+<info>
+<keywords>
+HTTP
+HTTP GET
+followlocation
+</keywords>
+</info>
+# Server-side
+<reply>
+<data>
+HTTP/1.1 200 hello
+Connection: close
+Content-Length: 4
+
+hej
+</data>
+</reply>
+
+# Client-side
+<client>
+<server>
+http
+</server>
+ <name>
+HTTP URL with http:/ (one slash!)
+ </name>
+ <command>
+http:/%HOSTIP:%HTTPPORT/want/1143
+</command>
+</client>
+
+# Verify data after the test has been "shot"
+<verify>
+<strip>
+^User-Agent:.*
+</strip>
+<protocol>
+GET /want/1143 HTTP/1.1\r
+Host: %HOSTIP:%HTTPPORT\r
+Accept: */*\r
+\r
+</protocol>
+</verify>
+</testcase>