import-util: tweak url patching helper

author Lennart Poettering <lennart@poettering.net>

Tue, 2 Feb 2021 21:37:36 +0000 (22:37 +0100)

committer Lennart Poettering <lennart@poettering.net>

Fri, 30 Jul 2021 14:23:20 +0000 (16:23 +0200)
author Lennart Poettering <lennart@poettering.net>
Tue, 2 Feb 2021 21:37:36 +0000 (22:37 +0100)
committer Lennart Poettering <lennart@poettering.net>
Fri, 30 Jul 2021 14:23:20 +0000 (16:23 +0200)
diff --git a/src/shared/import-util.c b/src/shared/import-util.c

index 2a30e40686f4d58b1d2dfc1bf9aaaa20cda5b671..bb1e5136a736289d34ea3cc06d2b9a3504e763ed 100644 (file)
--- a/src/shared/import-util.c
+++ b/src/shared/import-util.c
@@ -14,53 +14,114 @@
  #include "string-table.h"
  #include "string-util.h"
  
-int import_url_last_component(const char *url, char **ret) {
-        const char *e, *p;
-        char *s;
+static const char *skip_protocol_and_hostname(const char *url) {
+        const char *d;
+        size_t n;
+
+        /* A very very lenient implementation of RFC3986 Section 3.2 */
+
+        /* Find colon separating protocol and hostname */
+        d = strchr(url, ':');
+        if (!d || url == d)
+                return NULL;
+        d++;
+
+        /* Skip slashes after colon */
+        d += strspn(d, "/");
+
+        /* Skip everything till next slash or end */
+        n = strcspn(d, "/?#");
+        if (n == 0)
+                return NULL;
+
+        return d + n;
+}
  
-        e = strchrnul(url, '?');
+int import_url_last_component(
+                const char *url,
+                char **ret) {
  
-        while (e > url && e[-1] == '/')
+        const char *e, *p, *h;
+
+        /* This extracts the last path component of the specified URI, i.e. the last non-empty substrings
+         * between two "/" characters. This ignores "Query" and "Fragment" suffixes (as per RFC3986). */
+
+        h = skip_protocol_and_hostname(url);
+        if (!h)
+                return -EINVAL;
+
+        e = h + strcspn(h, "?#"); /* Cut off "Query" and "Fragment" */
+
+        while (e > h && e[-1] == '/') /* Eat trailing slashes */
                  e--;
  
          p = e;
-        while (p > url && p[-1] != '/')
+        while (p > h && p[-1] != '/') /* Find component before that */
                  p--;
  
-        if (e <= p)
-                return -EINVAL;
+        if (e <= p) /* Empty component? */
+                return -EADDRNOTAVAIL;
  
-        s = strndup(p, e - p);
-        if (!s)
-                return -ENOMEM;
+        if (ret) {
+                char *s;
+
+                s = strndup(p, e - p);
+                if (!s)
+                        return -ENOMEM;
+
+                *ret = s;
+        }
  
-        *ret = s;
          return 0;
  }
  
-int import_url_change_last_component(const char *url, const char *suffix, char **ret) {
-        const char *e;
+int import_url_change_suffix(
+                const char *url,
+                size_t n_drop_components,
+                const char *suffix,
+                char **ret) {
+
+        const char *e, *h;
          char *s;
  
          assert(url);
          assert(ret);
  
-        e = strchrnul(url, '?');
+        /* This drops the specified number of path components of the specified URI, i.e. the specified number
+         * of non-empty substring between two "/" characters from the end of the string, and then append the
+         * specified suffix instead. Before doing all this it chops off the "Query" and "Fragment" suffixes
+         * (they are *not* readded to the final URL). Note that n_drop_components may be 0 (in which case the
+         * component are simply added to the end). The suffix may be specified as NULL or empty string in
+         * which case nothing is appended, only the specified number of components chopped off. Note that the
+         * function may be called with n_drop_components == 0 and suffix == NULL, in which case the "Query"
+         * and "Fragment" is chopped off, and ensured the URL ends in a single "/", and that's it. */
+
+        h = skip_protocol_and_hostname(url);
+        if (!h)
+                return -EINVAL;
  
-        while (e > url && e[-1] == '/')
-                e--;
+        e = h + strcspn(h, "?#"); /* Cut off "Query" and "Fragment" */
  
-        while (e > url && e[-1] != '/')
+        while (e > h && e[-1] == '/') /* Eat trailing slashes */
                  e--;
  
-        if (e <= url)
-                return -EINVAL;
+        /* Drop the specified number of components from the end. Note that this is pretty lenient: if there
+         * are less component we silently drop those and then append the suffix to the top. */
+        while (n_drop_components > 0) {
+                while (e > h && e[-1] != '/') /* Eat last word (we don't mind if empty) */
+                        e--;
+
+                while (e > h && e[-1] == '/') /* Eat slashes before the last word */
+                        e--;
+
+                n_drop_components--;
+        }
  
-        s = new(char, (e - url) + strlen(suffix) + 1);
+        s = new(char, (e - url) + 1 + strlen_ptr(suffix) + 1);
          if (!s)
                  return -ENOMEM;
  
-        strcpy(mempcpy(s, url, e - url), suffix);
+        strcpy(stpcpy(mempcpy(s, url, e - url), "/"), strempty(suffix));
          *ret = s;
          return 0;
  }
diff --git a/src/shared/import-util.h b/src/shared/import-util.h

index c7ec3b4eabda71281acf80d0ef76cc4fff7ec904..3b2425b916537cdb499375844c0aff39fab61e7a 100644 (file)
--- a/src/shared/import-util.h
+++ b/src/shared/import-util.h
@@ -14,7 +14,16 @@ typedef enum ImportVerify {
  } ImportVerify;
  
  int import_url_last_component(const char *url, char **ret);
-int import_url_change_last_component(const char *url, const char *suffix, char **ret);
+
+int import_url_change_suffix(const char *url, size_t n_drop_components, const char *suffix, char **ret);
+
+static inline int import_url_change_last_component(const char *url, const char *suffix, char **ret) {
+        return import_url_change_suffix(url, 1, suffix, ret);
+}
+
+static inline int import_url_append_component(const char *url, const char *suffix, char **ret) {
+        return import_url_change_suffix(url, 0, suffix, ret);
+}
  
  const char* import_verify_to_string(ImportVerify v) _const_;
  ImportVerify import_verify_from_string(const char *s) _pure_;
author	Lennart Poettering <lennart@poettering.net>
	Tue, 2 Feb 2021 21:37:36 +0000 (22:37 +0100)
committer	Lennart Poettering <lennart@poettering.net>
	Fri, 30 Jul 2021 14:23:20 +0000 (16:23 +0200)
src/shared/import-util.c		patch \| blob \| blame \| history
src/shared/import-util.h		patch \| blob \| blame \| history