From: Lennart Poettering <lennart@poettering.net>
Date: Tue, 2 Feb 2021 21:37:36 +0000 (+0100)
Subject: import-util: tweak url patching helper
X-Git-Tag: v250-rc1~895^2~1
X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=56ce4adafef11fb1fd3e11000a1c405bac17ca56;p=thirdparty%2Fsystemd.git

import-util: tweak url patching helper

let's share some code between import_url_last_component() and
import_url_change_last_component(), and make sure we never eat up the
hostname component of the URL when parsing out the last component.

Let's also make import_url_change_last_component() more generic so that
we can also use it for append components to paths, instead of replacing
suffixes.
---

diff --git a/src/shared/import-util.c b/src/shared/import-util.c
index 2a30e40686f..bb1e5136a73 100644
--- a/src/shared/import-util.c
+++ b/src/shared/import-util.c
@@ -14,53 +14,114 @@
 #include "string-table.h"
 #include "string-util.h"
 
-int import_url_last_component(const char *url, char **ret) {
-        const char *e, *p;
-        char *s;
+static const char *skip_protocol_and_hostname(const char *url) {
+        const char *d;
+        size_t n;
+
+        /* A very very lenient implementation of RFC3986 Section 3.2 */
+
+        /* Find colon separating protocol and hostname */
+        d = strchr(url, ':');
+        if (!d || url == d)
+                return NULL;
+        d++;
+
+        /* Skip slashes after colon */
+        d += strspn(d, "/");
+
+        /* Skip everything till next slash or end */
+        n = strcspn(d, "/?#");
+        if (n == 0)
+                return NULL;
+
+        return d + n;
+}
 
-        e = strchrnul(url, '?');
+int import_url_last_component(
+                const char *url,
+                char **ret) {
 
-        while (e > url && e[-1] == '/')
+        const char *e, *p, *h;
+
+        /* This extracts the last path component of the specified URI, i.e. the last non-empty substrings
+         * between two "/" characters. This ignores "Query" and "Fragment" suffixes (as per RFC3986). */
+
+        h = skip_protocol_and_hostname(url);
+        if (!h)
+                return -EINVAL;
+
+        e = h + strcspn(h, "?#"); /* Cut off "Query" and "Fragment" */
+
+        while (e > h && e[-1] == '/') /* Eat trailing slashes */
                 e--;
 
         p = e;
-        while (p > url && p[-1] != '/')
+        while (p > h && p[-1] != '/') /* Find component before that */
                 p--;
 
-        if (e <= p)
-                return -EINVAL;
+        if (e <= p) /* Empty component? */
+                return -EADDRNOTAVAIL;
 
-        s = strndup(p, e - p);
-        if (!s)
-                return -ENOMEM;
+        if (ret) {
+                char *s;
+
+                s = strndup(p, e - p);
+                if (!s)
+                        return -ENOMEM;
+
+                *ret = s;
+        }
 
-        *ret = s;
         return 0;
 }
 
-int import_url_change_last_component(const char *url, const char *suffix, char **ret) {
-        const char *e;
+int import_url_change_suffix(
+                const char *url,
+                size_t n_drop_components,
+                const char *suffix,
+                char **ret) {
+
+        const char *e, *h;
         char *s;
 
         assert(url);
         assert(ret);
 
-        e = strchrnul(url, '?');
+        /* This drops the specified number of path components of the specified URI, i.e. the specified number
+         * of non-empty substring between two "/" characters from the end of the string, and then append the
+         * specified suffix instead. Before doing all this it chops off the "Query" and "Fragment" suffixes
+         * (they are *not* readded to the final URL). Note that n_drop_components may be 0 (in which case the
+         * component are simply added to the end). The suffix may be specified as NULL or empty string in
+         * which case nothing is appended, only the specified number of components chopped off. Note that the
+         * function may be called with n_drop_components == 0 and suffix == NULL, in which case the "Query"
+         * and "Fragment" is chopped off, and ensured the URL ends in a single "/", and that's it. */
+
+        h = skip_protocol_and_hostname(url);
+        if (!h)
+                return -EINVAL;
 
-        while (e > url && e[-1] == '/')
-                e--;
+        e = h + strcspn(h, "?#"); /* Cut off "Query" and "Fragment" */
 
-        while (e > url && e[-1] != '/')
+        while (e > h && e[-1] == '/') /* Eat trailing slashes */
                 e--;
 
-        if (e <= url)
-                return -EINVAL;
+        /* Drop the specified number of components from the end. Note that this is pretty lenient: if there
+         * are less component we silently drop those and then append the suffix to the top. */
+        while (n_drop_components > 0) {
+                while (e > h && e[-1] != '/') /* Eat last word (we don't mind if empty) */
+                        e--;
+
+                while (e > h && e[-1] == '/') /* Eat slashes before the last word */
+                        e--;
+
+                n_drop_components--;
+        }
 
-        s = new(char, (e - url) + strlen(suffix) + 1);
+        s = new(char, (e - url) + 1 + strlen_ptr(suffix) + 1);
         if (!s)
                 return -ENOMEM;
 
-        strcpy(mempcpy(s, url, e - url), suffix);
+        strcpy(stpcpy(mempcpy(s, url, e - url), "/"), strempty(suffix));
         *ret = s;
         return 0;
 }
diff --git a/src/shared/import-util.h b/src/shared/import-util.h
index c7ec3b4eabd..3b2425b9165 100644
--- a/src/shared/import-util.h
+++ b/src/shared/import-util.h
@@ -14,7 +14,16 @@ typedef enum ImportVerify {
 } ImportVerify;
 
 int import_url_last_component(const char *url, char **ret);
-int import_url_change_last_component(const char *url, const char *suffix, char **ret);
+
+int import_url_change_suffix(const char *url, size_t n_drop_components, const char *suffix, char **ret);
+
+static inline int import_url_change_last_component(const char *url, const char *suffix, char **ret) {
+        return import_url_change_suffix(url, 1, suffix, ret);
+}
+
+static inline int import_url_append_component(const char *url, const char *suffix, char **ret) {
+        return import_url_change_suffix(url, 0, suffix, ret);
+}
 
 const char* import_verify_to_string(ImportVerify v) _const_;
 ImportVerify import_verify_from_string(const char *s) _pure_;