res_http_media_cache.c: Parse media URLs to find extensions.

author Sean Bright <sean.bright@gmail.com>

Fri, 2 Jul 2021 15:15:05 +0000 (11:15 -0400)

committer Joshua Colp <jcolp@sangoma.com>

Mon, 19 Jul 2021 11:53:50 +0000 (06:53 -0500)
author Sean Bright <sean.bright@gmail.com>
Fri, 2 Jul 2021 15:15:05 +0000 (11:15 -0400)
committer Joshua Colp <jcolp@sangoma.com>
Mon, 19 Jul 2021 11:53:50 +0000 (06:53 -0500)
diff --git a/main/media_cache.c b/main/media_cache.c

index b303643f5e7c71df43bb610e7ff3a79a7b03d460..1899fb453f943a324fe773f076d24bb303a0bea8 100644 (file)
--- a/main/media_cache.c
+++ b/main/media_cache.c
@@ -124,24 +124,6 @@ static void media_cache_item_del_from_astdb(struct ast_bucket_file *bucket_file)
         ast_free(hash_value);
  }
  
-/*!
- * \internal
- * \brief Normalize the value of a Content-Type header
- *
- * This will trim off any optional parameters after the type/subtype.
- */
-static void normalize_content_type_header(char *content_type)
-{
-       char *params = strchr(content_type, ';');
-
-       if (params) {
-               *params-- = 0;
-               while (params > content_type && (*params == ' ' || *params == '\t')) {
-                       *params-- = 0;
-               }
-       }
-}
-
  /*!
   * \internal
   * \brief Update the name of the file backing a \c bucket_file
@@ -150,47 +132,24 @@ static void normalize_content_type_header(char *content_type)
  static void bucket_file_update_path(struct ast_bucket_file *bucket_file,
         const char *preferred_file_name)
  {
-       char *ext;
-
         if (!ast_strlen_zero(preferred_file_name) && strcmp(bucket_file->path, preferred_file_name)) {
                 /* Use the preferred file name if available */
-
                 rename(bucket_file->path, preferred_file_name);
                 ast_copy_string(bucket_file->path, preferred_file_name,
                         sizeof(bucket_file->path));
-       } else if (!strchr(bucket_file->path, '.') && (ext = strrchr(ast_sorcery_object_get_id(bucket_file), '.'))) {
-               /* If we don't have a file extension and were provided one in the URI, use it */
-               char found_ext[32];
-               char new_path[PATH_MAX + sizeof(found_ext)];
-
-               ast_bucket_file_metadata_set(bucket_file, "ext", ext);
-
-               /* Don't pass '.' while checking for supported extension */
-               if (!ast_get_format_for_file_ext(ext + 1)) {
-                       /* If the file extension passed in the URI isn't supported check for the
-                        * extension based on the MIME type passed in the Content-Type header before
-                        * giving up.
-                        * If a match is found then retrieve the extension from the supported list
-                        * corresponding to the mime-type and use that to rename the file */
-                       struct ast_bucket_metadata *header = ast_bucket_file_metadata_get(bucket_file, "content-type");
-                       if (header) {
-                               char *mime_type = ast_strdup(header->value);
-                               if (mime_type) {
-                                       normalize_content_type_header(mime_type);
-                                       if (!ast_strlen_zero(mime_type)) {
-                                               if (ast_get_extension_for_mime_type(mime_type, found_ext, sizeof(found_ext))) {
-                                                       ext = found_ext;
-                                               }
-                                       }
-                                       ast_free(mime_type);
-                               }
-                               ao2_ref(header, -1);
+       } else if (!strchr(bucket_file->path, '.')) {
+               struct ast_bucket_metadata *ext =
+                       ast_bucket_file_metadata_get(bucket_file, "ext");
+
+               if (ext) {
+                       char *new_path;
+                       if (ast_asprintf(&new_path, "%s%s", bucket_file->path, ext->value) != -1) {
+                               rename(bucket_file->path, new_path);
+                               ast_copy_string(bucket_file->path, new_path, sizeof(bucket_file->path));
+                               ast_free(new_path);
                         }
+                       ao2_ref(ext, -1);
                 }
-
-               snprintf(new_path, sizeof(new_path), "%s%s", bucket_file->path, ext);
-               rename(bucket_file->path, new_path);
-               ast_copy_string(bucket_file->path, new_path, sizeof(bucket_file->path));
         }
  }
  
diff --git a/res/res_http_media_cache.c b/res/res_http_media_cache.c

index d761442b41ab4280c1ecf3090cce110956441838..ba9428d597ce19ce2a0560bbf6dc93108b492706 100644 (file)
--- a/res/res_http_media_cache.c
+++ b/res/res_http_media_cache.c
@@ -35,6 +35,7 @@
  
  #include <curl/curl.h>
  
+#include "asterisk/file.h"
  #include "asterisk/module.h"
  #include "asterisk/bucket.h"
  #include "asterisk/sorcery.h"
@@ -155,6 +156,176 @@ static void bucket_file_set_expiration(struct ast_bucket_file *bucket_file)
         ast_bucket_file_metadata_set(bucket_file, "__actual_expires", time_buf);
  }
  
+static char *file_extension_from_string(const char *str, char *buffer, size_t capacity)
+{
+       const char *ext;
+
+       ext = strrchr(str, '.');
+       if (ext && ast_get_format_for_file_ext(ext + 1)) {
+               ast_debug(3, "Found extension '%s' at end of string\n", ext);
+               ast_copy_string(buffer, ext, capacity);
+               return buffer;
+       }
+
+       return NULL;
+}
+
+static char *file_extension_from_url(struct ast_bucket_file *bucket_file, char *buffer, size_t capacity)
+{
+       return file_extension_from_string(ast_sorcery_object_get_id(bucket_file), buffer, capacity);
+}
+
+/*!
+ * \internal
+ * \brief Normalize the value of a Content-Type header
+ *
+ * This will trim off any optional parameters after the type/subtype.
+ */
+static void normalize_content_type_header(char *content_type)
+{
+       char *params = strchr(content_type, ';');
+
+       if (params) {
+               *params-- = 0;
+               while (params > content_type && (*params == ' ' || *params == '\t')) {
+                       *params-- = 0;
+               }
+       }
+}
+
+static char *file_extension_from_content_type(struct ast_bucket_file *bucket_file, char *buffer, size_t capacity)
+{
+       /* Check for the extension based on the MIME type passed in the Content-Type
+        * header.
+        *
+        * If a match is found then retrieve the extension from the supported list
+        * corresponding to the mime-type and use that to rename the file */
+
+       struct ast_bucket_metadata *header;
+       char *mime_type;
+
+       header = ast_bucket_file_metadata_get(bucket_file, "content-type");
+       if (!header) {
+               return NULL;
+       }
+
+       mime_type = ast_strdup(header->value);
+       if (mime_type) {
+               normalize_content_type_header(mime_type);
+               if (!ast_strlen_zero(mime_type)) {
+                       if (ast_get_extension_for_mime_type(mime_type, buffer, sizeof(buffer))) {
+                               ast_debug(3, "Derived extension '%s' from MIME type %s\n",
+                                       buffer,
+                                       mime_type);
+                               ast_free(mime_type);
+                               ao2_ref(header, -1);
+                               return buffer;
+                       }
+               }
+       }
+       ast_free(mime_type);
+       ao2_ref(header, -1);
+
+       return NULL;
+}
+
+/* The URL parsing API was introduced in 7.62.0 */
+#if LIBCURL_VERSION_NUM >= 0x073e00
+
+static char *file_extension_from_url_path(struct ast_bucket_file *bucket_file, char *buffer, size_t capacity)
+{
+       char *path;
+       CURLU *h;
+
+       h = curl_url();
+       if (!h) {
+               ast_log(LOG_ERROR, "Failed to allocate cURL URL handle\n");
+               return NULL;
+       }
+
+       if (curl_url_set(h, CURLUPART_URL, ast_sorcery_object_get_id(bucket_file), 0)) {
+               ast_log(LOG_ERROR, "Failed to parse URL: %s\n",
+                       ast_sorcery_object_get_id(bucket_file));
+               curl_url_cleanup(h);
+               return NULL;
+       }
+
+       curl_url_get(h, CURLUPART_PATH, &path, 0);
+
+       /* Just parse it as a string like before, but without the extra cruft */
+       buffer = file_extension_from_string(path, buffer, capacity);
+
+       curl_free(path);
+       curl_url_cleanup(h);
+
+       return buffer;
+}
+
+#elif defined(HAVE_URIPARSER)
+
+#include <uriparser/Uri.h>
+
+static char *file_extension_from_url_path(struct ast_bucket_file *bucket_file, char *buffer, size_t capacity)
+{
+       UriParserStateA state;
+       UriUriA full_uri;
+       char *path;
+
+       state.uri = &full_uri;
+       if (uriParseUriA(&state, ast_sorcery_object_get_id(bucket_file)) != URI_SUCCESS
+          || !full_uri.scheme.first
+          || !full_uri.scheme.afterLast
+          || !full_uri.pathTail) {
+               ast_log(LOG_ERROR, "Failed to parse URL: %s\n",
+                       ast_sorcery_object_get_id(bucket_file));
+               uriFreeUriMembersA(&full_uri);
+               return NULL;
+       }
+
+       if (ast_asprintf(&path,
+                       "%.*s",
+                       (int) (full_uri.pathTail->text.afterLast - full_uri.pathTail->text.first),
+                       full_uri.pathTail->text.first) != -1) {
+               /* Just parse it as a string like before, but without the extra cruft */
+               file_extension_from_string(path, buffer, capacity);
+               ast_free(path);
+               uriFreeUriMembersA(&full_uri);
+               return buffer;
+       }
+
+       uriFreeUriMembersA(&full_uri);
+       return NULL;
+}
+
+#else
+
+static char *file_extension_from_url_path(struct ast_bucket_file *bucket_file, char *buffer, size_t capacity)
+{
+       /* NOP */
+       return NULL;
+}
+
+#endif
+
+static void bucket_file_set_extension(struct ast_bucket_file *bucket_file)
+{
+       /* We will attempt to determine an extension in the following order for backwards
+        * compatibility:
+        *
+        * 1. Look at tail end of URL for extension
+        * 2. Use the Content-Type header if present
+        * 3. Parse the URL (assuming we can) and look at the tail of the path
+        */
+
+       char buffer[64];
+
+       if (file_extension_from_url(bucket_file, buffer, sizeof(buffer))
+          || file_extension_from_content_type(bucket_file, buffer, sizeof(buffer))
+          || file_extension_from_url_path(bucket_file, buffer, sizeof(buffer))) {
+               ast_bucket_file_metadata_set(bucket_file, "ext", buffer);
+       }
+}
+
  /*! \internal
   * \brief Return whether or not we should always revalidate against the server
   */
@@ -278,6 +449,7 @@ static int bucket_file_run_curl(struct ast_bucket_file *bucket_file)
  
         if (http_code / 100 == 2) {
                 bucket_file_set_expiration(bucket_file);
+               bucket_file_set_extension(bucket_file);
                 return 0;
         } else {
                 ast_log(LOG_WARNING, "Failed to retrieve URL '%s': server returned %ld\n",
diff --git a/tests/test_http_media_cache.c b/tests/test_http_media_cache.c

index c1975390a5e7704a6bd838d7f60a6f733e3e9bab..dfb28b703e0e0372d37e14be08ab8f5a688ea118 100644 (file)
--- a/tests/test_http_media_cache.c
+++ b/tests/test_http_media_cache.c
@@ -41,6 +41,14 @@
  #include "asterisk/bucket.h"
  #include "asterisk/test.h"
  
+#undef INCLUDE_URI_PARSING_TESTS
+#if defined(HAVE_CURL)
+# include <curl/curl.h>
+#endif
+#if (defined(HAVE_CURL) && LIBCURL_VERSION_NUM >= 0x073e00) || defined(HAVE_URIPARSER)
+# define INCLUDE_URI_PARSING_TESTS 1
+#endif
+
  #define CATEGORY "/res/http_media_cache/"
  
  #define TEST_URI "test_media_cache"
@@ -57,6 +65,7 @@ struct test_options {
         struct timeval expires;
         const char *status_text;
         const char *etag;
+       const char *content_type;
  };
  
  static struct test_options options;
@@ -125,6 +134,10 @@ static int http_callback(struct ast_tcptls_session_instance *ser, const struct a
                 }
         }
  
+       if (!ast_strlen_zero(options.content_type)) {
+               ast_str_append(&http_header, 0, "Content-Type: %s\r\n", options.content_type);
+       }
+
         if (options.cache_control.maxage) {
                 SET_OR_APPEND_CACHE_CONTROL(cache_control);
                 ast_str_append(&cache_control, 0, "max-age=%d", options.cache_control.maxage);
@@ -220,6 +233,77 @@ static void bucket_file_cleanup(void *obj)
         }
  }
  
+AST_TEST_DEFINE(retrieve_content_type)
+{
+       RAII_VAR(struct ast_bucket_file *, bucket_file, NULL, bucket_file_cleanup);
+       char uri[1024];
+
+       switch (cmd) {
+       case TEST_INIT:
+               info->name = __func__;
+               info->category = CATEGORY;
+               info->summary = "Test retrieval of a resource with a Content-Type header";
+               info->description =
+                       "This test covers retrieval of a resource whose URL does not end with\n"
+                       "a parseable extension and whose response includes a Content-Type\n"
+                       "header that we recognize.";
+               return AST_TEST_NOT_RUN;
+       case TEST_EXECUTE:
+               break;
+       }
+
+       options.send_file = 1;
+       options.status_code = 200;
+       options.status_text = "OK";
+       options.content_type = "audio/wav";
+
+       snprintf(uri, sizeof(uri), "%s/%s", server_uri, "foo.wav?account_id=1234");
+
+       bucket_file = ast_bucket_file_retrieve(uri);
+       ast_test_validate(test, bucket_file != NULL);
+       ast_test_validate(test, !strcmp(uri, ast_sorcery_object_get_id(bucket_file)));
+       ast_test_validate(test, !ast_strlen_zero(bucket_file->path));
+       VALIDATE_STR_METADATA(test, bucket_file, "ext", ".wav");
+
+       return AST_TEST_PASS;
+}
+
+#ifdef INCLUDE_URI_PARSING_TESTS
+AST_TEST_DEFINE(retrieve_parsed_uri)
+{
+       RAII_VAR(struct ast_bucket_file *, bucket_file, NULL, bucket_file_cleanup);
+       char uri[1024];
+
+       switch (cmd) {
+       case TEST_INIT:
+               info->name = __func__;
+               info->category = CATEGORY;
+               info->summary = "Test retrieval of a resource with a complex URI";
+               info->description =
+                       "This test covers retrieval of a resource whose URL does not end with\n"
+                       "a parseable extension, but the path portion of the URL does end with\n"
+                       "parseable extension.";
+               return AST_TEST_NOT_RUN;
+       case TEST_EXECUTE:
+               break;
+       }
+
+       options.send_file = 1;
+       options.status_code = 200;
+       options.status_text = "OK";
+
+       snprintf(uri, sizeof(uri), "%s/%s", server_uri, "foo.wav?account_id=1234");
+
+       bucket_file = ast_bucket_file_retrieve(uri);
+       ast_test_validate(test, bucket_file != NULL);
+       ast_test_validate(test, !strcmp(uri, ast_sorcery_object_get_id(bucket_file)));
+       ast_test_validate(test, !ast_strlen_zero(bucket_file->path));
+       VALIDATE_STR_METADATA(test, bucket_file, "ext", ".wav");
+
+       return AST_TEST_PASS;
+}
+#endif
+
  AST_TEST_DEFINE(retrieve_cache_control_directives)
  {
         RAII_VAR(struct ast_bucket_file *, bucket_file, NULL, bucket_file_cleanup);
@@ -670,6 +754,11 @@ static int load_module(void)
         AST_TEST_REGISTER(retrieve_etag_expired);
         AST_TEST_REGISTER(retrieve_cache_control_age);
         AST_TEST_REGISTER(retrieve_cache_control_directives);
+       AST_TEST_REGISTER(retrieve_content_type);
+
+#ifdef INCLUDE_URI_PARSING_TESTS
+       AST_TEST_REGISTER(retrieve_parsed_uri);
+#endif
  
         ast_test_register_init(CATEGORY, pre_test_cb);
  
@@ -688,6 +777,11 @@ static int unload_module(void)
         AST_TEST_UNREGISTER(retrieve_etag_expired);
         AST_TEST_UNREGISTER(retrieve_cache_control_age);
         AST_TEST_UNREGISTER(retrieve_cache_control_directives);
+       AST_TEST_REGISTER(retrieve_content_type);
+
+#ifdef INCLUDE_URI_PARSING_TESTS
+       AST_TEST_REGISTER(retrieve_parsed_uri);
+#endif
  
         return 0;
  }
author	Sean Bright <sean.bright@gmail.com>
	Fri, 2 Jul 2021 15:15:05 +0000 (11:15 -0400)
committer	Joshua Colp <jcolp@sangoma.com>
	Mon, 19 Jul 2021 11:53:50 +0000 (06:53 -0500)
main/media_cache.c		patch \| blob \| blame \| history
res/res_http_media_cache.c		patch \| blob \| blame \| history
tests/test_http_media_cache.c		patch \| blob \| blame \| history