]> git.ipfire.org Git - thirdparty/asterisk.git/commitdiff
res_http_media_cache.c: Parse media URLs to find extensions.
authorSean Bright <sean.bright@gmail.com>
Fri, 2 Jul 2021 15:15:05 +0000 (11:15 -0400)
committerJoshua Colp <jcolp@sangoma.com>
Mon, 19 Jul 2021 11:53:50 +0000 (06:53 -0500)
Use cURL's URL parsing API, falling back to the urlparser library, to
parse playback URLs in order to find their file extensions.

For backwards compatibility, we first look at the full URL, then at
any Content-Type header, and finally at just the path portion of the
URL.

ASTERISK-27871 #close

Change-Id: I16d0682f6d794be96539261b3e48f237909139cb

main/media_cache.c
res/res_http_media_cache.c
tests/test_http_media_cache.c

index b303643f5e7c71df43bb610e7ff3a79a7b03d460..1899fb453f943a324fe773f076d24bb303a0bea8 100644 (file)
@@ -124,24 +124,6 @@ static void media_cache_item_del_from_astdb(struct ast_bucket_file *bucket_file)
        ast_free(hash_value);
 }
 
-/*!
- * \internal
- * \brief Normalize the value of a Content-Type header
- *
- * This will trim off any optional parameters after the type/subtype.
- */
-static void normalize_content_type_header(char *content_type)
-{
-       char *params = strchr(content_type, ';');
-
-       if (params) {
-               *params-- = 0;
-               while (params > content_type && (*params == ' ' || *params == '\t')) {
-                       *params-- = 0;
-               }
-       }
-}
-
 /*!
  * \internal
  * \brief Update the name of the file backing a \c bucket_file
@@ -150,47 +132,24 @@ static void normalize_content_type_header(char *content_type)
 static void bucket_file_update_path(struct ast_bucket_file *bucket_file,
        const char *preferred_file_name)
 {
-       char *ext;
-
        if (!ast_strlen_zero(preferred_file_name) && strcmp(bucket_file->path, preferred_file_name)) {
                /* Use the preferred file name if available */
-
                rename(bucket_file->path, preferred_file_name);
                ast_copy_string(bucket_file->path, preferred_file_name,
                        sizeof(bucket_file->path));
-       } else if (!strchr(bucket_file->path, '.') && (ext = strrchr(ast_sorcery_object_get_id(bucket_file), '.'))) {
-               /* If we don't have a file extension and were provided one in the URI, use it */
-               char found_ext[32];
-               char new_path[PATH_MAX + sizeof(found_ext)];
-
-               ast_bucket_file_metadata_set(bucket_file, "ext", ext);
-
-               /* Don't pass '.' while checking for supported extension */
-               if (!ast_get_format_for_file_ext(ext + 1)) {
-                       /* If the file extension passed in the URI isn't supported check for the
-                        * extension based on the MIME type passed in the Content-Type header before
-                        * giving up.
-                        * If a match is found then retrieve the extension from the supported list
-                        * corresponding to the mime-type and use that to rename the file */
-                       struct ast_bucket_metadata *header = ast_bucket_file_metadata_get(bucket_file, "content-type");
-                       if (header) {
-                               char *mime_type = ast_strdup(header->value);
-                               if (mime_type) {
-                                       normalize_content_type_header(mime_type);
-                                       if (!ast_strlen_zero(mime_type)) {
-                                               if (ast_get_extension_for_mime_type(mime_type, found_ext, sizeof(found_ext))) {
-                                                       ext = found_ext;
-                                               }
-                                       }
-                                       ast_free(mime_type);
-                               }
-                               ao2_ref(header, -1);
+       } else if (!strchr(bucket_file->path, '.')) {
+               struct ast_bucket_metadata *ext =
+                       ast_bucket_file_metadata_get(bucket_file, "ext");
+
+               if (ext) {
+                       char *new_path;
+                       if (ast_asprintf(&new_path, "%s%s", bucket_file->path, ext->value) != -1) {
+                               rename(bucket_file->path, new_path);
+                               ast_copy_string(bucket_file->path, new_path, sizeof(bucket_file->path));
+                               ast_free(new_path);
                        }
+                       ao2_ref(ext, -1);
                }
-
-               snprintf(new_path, sizeof(new_path), "%s%s", bucket_file->path, ext);
-               rename(bucket_file->path, new_path);
-               ast_copy_string(bucket_file->path, new_path, sizeof(bucket_file->path));
        }
 }
 
index d761442b41ab4280c1ecf3090cce110956441838..ba9428d597ce19ce2a0560bbf6dc93108b492706 100644 (file)
@@ -35,6 +35,7 @@
 
 #include <curl/curl.h>
 
+#include "asterisk/file.h"
 #include "asterisk/module.h"
 #include "asterisk/bucket.h"
 #include "asterisk/sorcery.h"
@@ -155,6 +156,176 @@ static void bucket_file_set_expiration(struct ast_bucket_file *bucket_file)
        ast_bucket_file_metadata_set(bucket_file, "__actual_expires", time_buf);
 }
 
+static char *file_extension_from_string(const char *str, char *buffer, size_t capacity)
+{
+       const char *ext;
+
+       ext = strrchr(str, '.');
+       if (ext && ast_get_format_for_file_ext(ext + 1)) {
+               ast_debug(3, "Found extension '%s' at end of string\n", ext);
+               ast_copy_string(buffer, ext, capacity);
+               return buffer;
+       }
+
+       return NULL;
+}
+
+static char *file_extension_from_url(struct ast_bucket_file *bucket_file, char *buffer, size_t capacity)
+{
+       return file_extension_from_string(ast_sorcery_object_get_id(bucket_file), buffer, capacity);
+}
+
+/*!
+ * \internal
+ * \brief Normalize the value of a Content-Type header
+ *
+ * This will trim off any optional parameters after the type/subtype.
+ */
+static void normalize_content_type_header(char *content_type)
+{
+       char *params = strchr(content_type, ';');
+
+       if (params) {
+               *params-- = 0;
+               while (params > content_type && (*params == ' ' || *params == '\t')) {
+                       *params-- = 0;
+               }
+       }
+}
+
+static char *file_extension_from_content_type(struct ast_bucket_file *bucket_file, char *buffer, size_t capacity)
+{
+       /* Check for the extension based on the MIME type passed in the Content-Type
+        * header.
+        *
+        * If a match is found then retrieve the extension from the supported list
+        * corresponding to the mime-type and use that to rename the file */
+
+       struct ast_bucket_metadata *header;
+       char *mime_type;
+
+       header = ast_bucket_file_metadata_get(bucket_file, "content-type");
+       if (!header) {
+               return NULL;
+       }
+
+       mime_type = ast_strdup(header->value);
+       if (mime_type) {
+               normalize_content_type_header(mime_type);
+               if (!ast_strlen_zero(mime_type)) {
+                       if (ast_get_extension_for_mime_type(mime_type, buffer, sizeof(buffer))) {
+                               ast_debug(3, "Derived extension '%s' from MIME type %s\n",
+                                       buffer,
+                                       mime_type);
+                               ast_free(mime_type);
+                               ao2_ref(header, -1);
+                               return buffer;
+                       }
+               }
+       }
+       ast_free(mime_type);
+       ao2_ref(header, -1);
+
+       return NULL;
+}
+
+/* The URL parsing API was introduced in 7.62.0 */
+#if LIBCURL_VERSION_NUM >= 0x073e00
+
+static char *file_extension_from_url_path(struct ast_bucket_file *bucket_file, char *buffer, size_t capacity)
+{
+       char *path;
+       CURLU *h;
+
+       h = curl_url();
+       if (!h) {
+               ast_log(LOG_ERROR, "Failed to allocate cURL URL handle\n");
+               return NULL;
+       }
+
+       if (curl_url_set(h, CURLUPART_URL, ast_sorcery_object_get_id(bucket_file), 0)) {
+               ast_log(LOG_ERROR, "Failed to parse URL: %s\n",
+                       ast_sorcery_object_get_id(bucket_file));
+               curl_url_cleanup(h);
+               return NULL;
+       }
+
+       curl_url_get(h, CURLUPART_PATH, &path, 0);
+
+       /* Just parse it as a string like before, but without the extra cruft */
+       buffer = file_extension_from_string(path, buffer, capacity);
+
+       curl_free(path);
+       curl_url_cleanup(h);
+
+       return buffer;
+}
+
+#elif defined(HAVE_URIPARSER)
+
+#include <uriparser/Uri.h>
+
+static char *file_extension_from_url_path(struct ast_bucket_file *bucket_file, char *buffer, size_t capacity)
+{
+       UriParserStateA state;
+       UriUriA full_uri;
+       char *path;
+
+       state.uri = &full_uri;
+       if (uriParseUriA(&state, ast_sorcery_object_get_id(bucket_file)) != URI_SUCCESS
+          || !full_uri.scheme.first
+          || !full_uri.scheme.afterLast
+          || !full_uri.pathTail) {
+               ast_log(LOG_ERROR, "Failed to parse URL: %s\n",
+                       ast_sorcery_object_get_id(bucket_file));
+               uriFreeUriMembersA(&full_uri);
+               return NULL;
+       }
+
+       if (ast_asprintf(&path,
+                       "%.*s",
+                       (int) (full_uri.pathTail->text.afterLast - full_uri.pathTail->text.first),
+                       full_uri.pathTail->text.first) != -1) {
+               /* Just parse it as a string like before, but without the extra cruft */
+               file_extension_from_string(path, buffer, capacity);
+               ast_free(path);
+               uriFreeUriMembersA(&full_uri);
+               return buffer;
+       }
+
+       uriFreeUriMembersA(&full_uri);
+       return NULL;
+}
+
+#else
+
+static char *file_extension_from_url_path(struct ast_bucket_file *bucket_file, char *buffer, size_t capacity)
+{
+       /* NOP */
+       return NULL;
+}
+
+#endif
+
+static void bucket_file_set_extension(struct ast_bucket_file *bucket_file)
+{
+       /* We will attempt to determine an extension in the following order for backwards
+        * compatibility:
+        *
+        * 1. Look at tail end of URL for extension
+        * 2. Use the Content-Type header if present
+        * 3. Parse the URL (assuming we can) and look at the tail of the path
+        */
+
+       char buffer[64];
+
+       if (file_extension_from_url(bucket_file, buffer, sizeof(buffer))
+          || file_extension_from_content_type(bucket_file, buffer, sizeof(buffer))
+          || file_extension_from_url_path(bucket_file, buffer, sizeof(buffer))) {
+               ast_bucket_file_metadata_set(bucket_file, "ext", buffer);
+       }
+}
+
 /*! \internal
  * \brief Return whether or not we should always revalidate against the server
  */
@@ -278,6 +449,7 @@ static int bucket_file_run_curl(struct ast_bucket_file *bucket_file)
 
        if (http_code / 100 == 2) {
                bucket_file_set_expiration(bucket_file);
+               bucket_file_set_extension(bucket_file);
                return 0;
        } else {
                ast_log(LOG_WARNING, "Failed to retrieve URL '%s': server returned %ld\n",
index c1975390a5e7704a6bd838d7f60a6f733e3e9bab..dfb28b703e0e0372d37e14be08ab8f5a688ea118 100644 (file)
 #include "asterisk/bucket.h"
 #include "asterisk/test.h"
 
+#undef INCLUDE_URI_PARSING_TESTS
+#if defined(HAVE_CURL)
+# include <curl/curl.h>
+#endif
+#if (defined(HAVE_CURL) && LIBCURL_VERSION_NUM >= 0x073e00) || defined(HAVE_URIPARSER)
+# define INCLUDE_URI_PARSING_TESTS 1
+#endif
+
 #define CATEGORY "/res/http_media_cache/"
 
 #define TEST_URI "test_media_cache"
@@ -57,6 +65,7 @@ struct test_options {
        struct timeval expires;
        const char *status_text;
        const char *etag;
+       const char *content_type;
 };
 
 static struct test_options options;
@@ -125,6 +134,10 @@ static int http_callback(struct ast_tcptls_session_instance *ser, const struct a
                }
        }
 
+       if (!ast_strlen_zero(options.content_type)) {
+               ast_str_append(&http_header, 0, "Content-Type: %s\r\n", options.content_type);
+       }
+
        if (options.cache_control.maxage) {
                SET_OR_APPEND_CACHE_CONTROL(cache_control);
                ast_str_append(&cache_control, 0, "max-age=%d", options.cache_control.maxage);
@@ -220,6 +233,77 @@ static void bucket_file_cleanup(void *obj)
        }
 }
 
+AST_TEST_DEFINE(retrieve_content_type)
+{
+       RAII_VAR(struct ast_bucket_file *, bucket_file, NULL, bucket_file_cleanup);
+       char uri[1024];
+
+       switch (cmd) {
+       case TEST_INIT:
+               info->name = __func__;
+               info->category = CATEGORY;
+               info->summary = "Test retrieval of a resource with a Content-Type header";
+               info->description =
+                       "This test covers retrieval of a resource whose URL does not end with\n"
+                       "a parseable extension and whose response includes a Content-Type\n"
+                       "header that we recognize.";
+               return AST_TEST_NOT_RUN;
+       case TEST_EXECUTE:
+               break;
+       }
+
+       options.send_file = 1;
+       options.status_code = 200;
+       options.status_text = "OK";
+       options.content_type = "audio/wav";
+
+       snprintf(uri, sizeof(uri), "%s/%s", server_uri, "foo.wav?account_id=1234");
+
+       bucket_file = ast_bucket_file_retrieve(uri);
+       ast_test_validate(test, bucket_file != NULL);
+       ast_test_validate(test, !strcmp(uri, ast_sorcery_object_get_id(bucket_file)));
+       ast_test_validate(test, !ast_strlen_zero(bucket_file->path));
+       VALIDATE_STR_METADATA(test, bucket_file, "ext", ".wav");
+
+       return AST_TEST_PASS;
+}
+
+#ifdef INCLUDE_URI_PARSING_TESTS
+AST_TEST_DEFINE(retrieve_parsed_uri)
+{
+       RAII_VAR(struct ast_bucket_file *, bucket_file, NULL, bucket_file_cleanup);
+       char uri[1024];
+
+       switch (cmd) {
+       case TEST_INIT:
+               info->name = __func__;
+               info->category = CATEGORY;
+               info->summary = "Test retrieval of a resource with a complex URI";
+               info->description =
+                       "This test covers retrieval of a resource whose URL does not end with\n"
+                       "a parseable extension, but the path portion of the URL does end with\n"
+                       "parseable extension.";
+               return AST_TEST_NOT_RUN;
+       case TEST_EXECUTE:
+               break;
+       }
+
+       options.send_file = 1;
+       options.status_code = 200;
+       options.status_text = "OK";
+
+       snprintf(uri, sizeof(uri), "%s/%s", server_uri, "foo.wav?account_id=1234");
+
+       bucket_file = ast_bucket_file_retrieve(uri);
+       ast_test_validate(test, bucket_file != NULL);
+       ast_test_validate(test, !strcmp(uri, ast_sorcery_object_get_id(bucket_file)));
+       ast_test_validate(test, !ast_strlen_zero(bucket_file->path));
+       VALIDATE_STR_METADATA(test, bucket_file, "ext", ".wav");
+
+       return AST_TEST_PASS;
+}
+#endif
+
 AST_TEST_DEFINE(retrieve_cache_control_directives)
 {
        RAII_VAR(struct ast_bucket_file *, bucket_file, NULL, bucket_file_cleanup);
@@ -670,6 +754,11 @@ static int load_module(void)
        AST_TEST_REGISTER(retrieve_etag_expired);
        AST_TEST_REGISTER(retrieve_cache_control_age);
        AST_TEST_REGISTER(retrieve_cache_control_directives);
+       AST_TEST_REGISTER(retrieve_content_type);
+
+#ifdef INCLUDE_URI_PARSING_TESTS
+       AST_TEST_REGISTER(retrieve_parsed_uri);
+#endif
 
        ast_test_register_init(CATEGORY, pre_test_cb);
 
@@ -688,6 +777,11 @@ static int unload_module(void)
        AST_TEST_UNREGISTER(retrieve_etag_expired);
        AST_TEST_UNREGISTER(retrieve_cache_control_age);
        AST_TEST_UNREGISTER(retrieve_cache_control_directives);
+       AST_TEST_REGISTER(retrieve_content_type);
+
+#ifdef INCLUDE_URI_PARSING_TESTS
+       AST_TEST_REGISTER(retrieve_parsed_uri);
+#endif
 
        return 0;
 }