From: Mike Brady <4265913+mikebrady@users.noreply.github.com>
Date: Tue, 1 Jun 2021 07:56:47 +0000 (+0100)
Subject: Rough and hacky but works with the bit depth of the output DAC to automatically choos... 
X-Git-Tag: 4.1-dev~100
X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=d4ea91aa6532105aa80647831437c269c7c96068;p=thirdparty%2Fshairport-sync.git

Rough and hacky but works with the bit depth of the output DAC to automatically choose the correct AAC decoding sample size. Allocation of rotating buffers is fixed and profligate.
---

diff --git a/audio_alsa.c b/audio_alsa.c
index f0c10987..d5757bef 100644
--- a/audio_alsa.c
+++ b/audio_alsa.c
@@ -525,7 +525,7 @@ int actual_open_alsa_device(int do_auto_setup) {
     }
     if (ret == 0) {
       config.output_format = trial_format;
-      debug(2, "alsa: output format chosen is \"%s\".",
+      debug(1, "alsa: output format chosen is \"%s\".",
             sps_format_description_string(config.output_format));
     } else {
       warn("audio_alsa: Could not automatically set the output format for device \"%s\": %s",
@@ -566,7 +566,7 @@ int actual_open_alsa_device(int do_auto_setup) {
     }
     if (ret == 0) {
       config.output_rate = actual_sample_rate;
-      debug(2, "alsa: output speed chosen is %d.", config.output_rate);
+      debug(1, "alsa: output speed chosen is %d.", config.output_rate);
     } else {
       warn("audio_alsa: Could not automatically set the output rate for device \"%s\": %s",
            alsa_out_dev, snd_strerror(ret));
diff --git a/player.c b/player.c
index c307a9cc..a0e10d94 100644
--- a/player.c
+++ b/player.c
@@ -384,10 +384,12 @@ static void terminate_decoders(rtsp_conn_info *conn) {
 }
 
 static void init_buffer(rtsp_conn_info *conn) {
+  debug(1,"input_bytes_per_frame: %d.", conn->input_bytes_per_frame);
+  debug(1,"input_bit_depth: %d.", conn->input_bit_depth);
   int i;
   for (i = 0; i < BUFFER_FRAMES; i++)
-    conn->audio_buffer[i].data = malloc(conn->input_bytes_per_frame * conn->max_frames_per_packet);
-  ab_resync(conn);
+//    conn->audio_buffer[i].data = malloc(conn->input_bytes_per_frame * conn->max_frames_per_packet);
+    conn->audio_buffer[i].data = malloc(8 * conn->max_frames_per_packet); // todo
 }
 
 static void free_audio_buffers(rtsp_conn_info *conn) {
@@ -1692,7 +1694,6 @@ void *player_thread_func(void *arg) {
   conn->packet_count = 0;
   conn->packet_count_since_flush = 0;
   conn->previous_random_number = 0;
-  conn->input_bytes_per_frame = 4;
   conn->decoder_in_use = 0;
   conn->ab_buffering = 1;
   conn->ab_synced = 0;
@@ -1711,6 +1712,7 @@ void *player_thread_func(void *arg) {
                              // No pthread cancellation point in here
   // This must be after init_alac_decoder
   init_buffer(conn); // will need a corresponding deallocation. No cancellation points in here
+  ab_resync(conn);
 
   if (conn->stream.encrypted) {
 #ifdef CONFIG_MBEDTLS
@@ -2114,10 +2116,56 @@ void *player_thread_func(void *arg) {
                 *outpl++ = rl;
               }
             }
+          } break;
+          case 32: {
+            int i, j;
+            int32_t ls, rs;
+            int32_t ll = 0, rl = 0;
+            int32_t *inps = (int32_t*) inbuf;
+            int32_t *outpl = (int32_t *)conn->tbuf;
+            for (i = 0; i < inbuflength; i++) {
+              ls = *inps++;
+              rs = *inps++;
+
+              // here, do the mode stuff -- mono / reverse stereo / leftonly / rightonly
+
+              switch (config.playback_mode) {
+              case ST_mono: {
+                int64_t both = ls + rs;
+                both = both >> 1;
+                uint32_t both32 = both;
+                ll = both32;
+                rl = both32;
+              } break;
+              case ST_reverse_stereo: {
+                ll = rs;
+                rl = ls;
+              } break;
+              case ST_left_only:
+                rl = ls;
+                ll = ls;
+                break;
+              case ST_right_only:
+                ll = rs;
+                rl = rs;
+                break;
+              case ST_stereo:
+                ll = ls;
+                rl = rs;
+                break; // nothing extra to do
+              }
 
+              // here, replicate the samples if you're upsampling
+
+              for (j = 0; j < conn->output_sample_ratio; j++) {
+                *outpl++ = ll;
+                *outpl++ = rl;
+              }
+            }
           } break;
+
           default:
-            die("Shairport Sync only supports 16 bit input");
+            die("Shairport Sync only supports 16 or 32 bit input");
           }
 
           inbuflength *= conn->output_sample_ratio;
@@ -3085,6 +3133,7 @@ int player_prepare_to_play(rtsp_conn_info *conn) {
   activity_monitor_signify_activity(
       1); // active, and should be before play's command hook, command_start()
   command_start();
+  conn->input_bytes_per_frame = 4; // default -- may be changed later
   // call on the output device to prepare itself
   if ((config.output) && (config.output->prepare))
     config.output->prepare();
diff --git a/rtp.c b/rtp.c
index 0540306b..dc3ba0aa 100644
--- a/rtp.c
+++ b/rtp.c
@@ -290,8 +290,8 @@ void *rtp_control_receiver(void *arg) {
                                                                 obfp += 2;
                                                               };
                                                               *obfp = 0;
-                                             
-                                             
+
+
                                                               // get raw timestamp information
                                                               // I think that a good way to understand these timestamps is that
                                                               // (1) the rtlt below is the timestamp of the frame that should be playing at the
@@ -302,19 +302,19 @@ void *rtp_control_receiver(void *arg) {
                                                               // Thus, (3) the latency can be calculated by subtracting the second from the
                                                               // first.
                                                               // There must be more to it -- there something missing.
-                                             
+
                                                               // In addition, it seems that if the value of the short represented by the second
                                                               // pair of bytes in the packet is 7
                                                               // then an extra time lag is expected to be added, presumably by
                                                               // the AirPort Express.
-                                             
+
                                                               // Best guess is that this delay is 11,025 frames.
-                                             
+
                                                               uint32_t rtlt = nctohl(&packet[4]); // raw timestamp less latency
                                                               uint32_t rt = nctohl(&packet[16]);  // raw timestamp
-                                             
+
                                                               uint32_t fl = nctohs(&packet[2]); //
-                                             
+
                                                               debug(1,"Sync Packet of %d bytes received: \"%s\", flags: %d, timestamps %u and %u,
                                                           giving a latency of %d frames.",plen,obf,fl,rt,rtlt,rt-rtlt);
                                                               //debug(1,"Monotonic timestamps are: %" PRId64 " and %" PRId64 "
@@ -2082,9 +2082,45 @@ void *rtp_buffered_audio_processor(void *arg) {
   av_opt_set_int(swr, "in_channel_layout", AV_CH_LAYOUT_STEREO, 0);
   av_opt_set_int(swr, "out_channel_layout", AV_CH_LAYOUT_STEREO, 0);
   av_opt_set_int(swr, "in_sample_rate", 44100, 0);
-  av_opt_set_int(swr, "out_sample_rate", 44100, 0);
+  av_opt_set_int(swr, "out_sample_rate", config.output_rate, 0);
   av_opt_set_sample_fmt(swr, "in_sample_fmt", AV_SAMPLE_FMT_FLTP, 0);
-  av_opt_set_sample_fmt(swr, "out_sample_fmt", AV_SAMPLE_FMT_S16, 0);
+
+  enum AVSampleFormat av_format;
+  switch (config.output_format) {
+    case SPS_FORMAT_S32:
+    case SPS_FORMAT_S32_LE:
+    case SPS_FORMAT_S32_BE:
+    case SPS_FORMAT_S24:
+    case SPS_FORMAT_S24_LE:
+    case SPS_FORMAT_S24_BE:
+    case SPS_FORMAT_S24_3LE:
+    case SPS_FORMAT_S24_3BE:
+     av_format = AV_SAMPLE_FMT_S32;
+      conn->input_bytes_per_frame = 8; // the output from the decoder will be input to the player
+      conn->input_bit_depth = 32;
+       debug(1,"32-bit output format chosen");
+      break;
+    case SPS_FORMAT_S16:
+    case SPS_FORMAT_S16_LE:
+    case SPS_FORMAT_S16_BE:
+      av_format = AV_SAMPLE_FMT_S16;
+      conn->input_bytes_per_frame = 4;
+      conn->input_bit_depth = 16;
+      break;
+    case SPS_FORMAT_U8:
+      av_format = AV_SAMPLE_FMT_U8;
+      conn->input_bytes_per_frame = 2;
+      conn->input_bit_depth = 8;
+      break;
+    default:
+      debug(1,"Unsupported DAC output format %u. AV_SAMPLE_FMT_S16 decoding chosen. Good luck!", config.output_format);
+      av_format = AV_SAMPLE_FMT_S16;
+      conn->input_bytes_per_frame = 4; // the output from the decoder will be input to the player
+      conn->input_bit_depth = 16;
+      break;
+  };
+
+  av_opt_set_sample_fmt(swr, "out_sample_fmt", av_format, 0);
   swr_init(swr);
 
   uint8_t packet[16 * 1024];
@@ -2099,7 +2135,7 @@ void *rtp_buffered_audio_processor(void *arg) {
 
   int finished = 0;
   int pcm_buffer_size =
-      (1024 + 352) * 8; // This seems to be right. 8 is for 2 * 32-bit samples per frame
+      (1024 + 352) * conn->input_bytes_per_frame;
   uint8_t pcm_buffer[pcm_buffer_size];
 
   int pcm_buffer_occupancy = 0;
@@ -2220,7 +2256,7 @@ void *rtp_buffered_audio_processor(void *arg) {
         // debug(1,"sleep for 20 ms");
         usleep(20000); // wait for a while
       } else {
-        if ((pcm_buffer_occupancy - pcm_buffer_read_point) >= (352 * 4)) {
+        if ((pcm_buffer_occupancy - pcm_buffer_read_point) >= (352 * conn->input_bytes_per_frame)) {
           new_buffer_needed = 0;
           // send a frame to the player if allowed
           // it it's way too late, it means that a new anchor time is needed
@@ -2238,7 +2274,7 @@ void *rtp_buffered_audio_processor(void *arg) {
                 0) {
               int64_t lead_time = buffer_should_be_time - get_absolute_time_in_ns();
               // debug(1,"lead time in buffered_audio is %f milliseconds.", lead_time * 0.000001);
-              if (blocks_read > 2) {
+              if (blocks_read > 3) {
                 if ((lead_time >= (int64_t)(requested_lead_time * 1000000000)) ||
                     (streaming_has_started != 0)) {
                   if (streaming_has_started == 0)
@@ -2261,7 +2297,7 @@ void *rtp_buffered_audio_processor(void *arg) {
               }
 
               pcm_buffer_read_point_rtptime += 352;
-              pcm_buffer_read_point += 352 * 4;
+              pcm_buffer_read_point += 352 * conn->input_bytes_per_frame;
             }
             // usleep(2000); // let other stuff happens
           } else {
@@ -2271,7 +2307,7 @@ void *rtp_buffered_audio_processor(void *arg) {
           new_buffer_needed = 1;
           if (pcm_buffer_read_point != 0) {
             // debug(1,"pcm_buffer_read_point (frames): %u, pcm_buffer_occupancy (frames): %u",
-            // pcm_buffer_read_point/4, pcm_buffer_occupancy/4);
+            // pcm_buffer_read_point/conn->input_bytes_per_frame, pcm_buffer_occupancy/conn->input_bytes_per_frame);
             // if there is anything to move down to the front of the buffer, do it now;
             if ((pcm_buffer_occupancy - pcm_buffer_read_point) > 0) {
               // move the remaining frames down to the start of the buffer
@@ -2314,8 +2350,6 @@ void *rtp_buffered_audio_processor(void *arg) {
         strerror_r(errno, (char *)errorstring, sizeof(errorstring));
         debug(1, "error in rtp_buffered_audio_processor %d: \"%s\". Could not recv a data packet.",
               errno, errorstring);
-        // if ((config.diagnostic_drop_packet_fraction == 0.0) ||
-        //     (drand48() > config.diagnostic_drop_packet_fraction)) {
       } else if (nread > 0) {
         blocks_read++; // note, this doesn't mean they are valid audio blocks
         // debug(1, "Realtime Audio Receiver Packet of length %d received.", nread);
@@ -2445,13 +2479,13 @@ void *rtp_buffered_audio_processor(void *arg) {
                           debug(1, "error %d during decoding", ret);
                         } else {
                           av_samples_alloc(&pcm_audio, &dst_linesize, codec_context->channels,
-                                           decoded_frame->nb_samples, AV_SAMPLE_FMT_S16, 1);
+                                           decoded_frame->nb_samples, av_format, 1);
                           // remember to free pcm_audio
                           ret = swr_convert(swr, &pcm_audio, decoded_frame->nb_samples,
                                             (const uint8_t **)decoded_frame->extended_data,
                                             decoded_frame->nb_samples);
                           dst_bufsize = av_samples_get_buffer_size(
-                              &dst_linesize, codec_context->channels, ret, AV_SAMPLE_FMT_S16, 1);
+                              &dst_linesize, codec_context->channels, ret, av_format, 1);
                           // debug(1,"generated %d bytes of PCM", dst_bufsize);
                           // copy the PCM audio into the PCM buffer.
                           // make sure it's big enough first
@@ -2463,20 +2497,20 @@ void *rtp_buffered_audio_processor(void *arg) {
                             int32_t samples_remaining =
                                 (flush_from_timestamp - pcm_buffer_read_point_rtptime);
                             if ((samples_remaining > 0) &&
-                                ((samples_remaining * 4) < dst_bufsize)) {
+                                ((samples_remaining * conn->input_bytes_per_frame) < dst_bufsize)) {
                               debug(2,
                                     "samples remaining before flush: %d, number of samples %d. "
                                     "flushFromTS: %u, pcm_buffer_read_point_rtptime: %u.",
-                                    samples_remaining, dst_bufsize / 4, flush_from_timestamp,
+                                    samples_remaining, dst_bufsize / conn->input_bytes_per_frame, flush_from_timestamp,
                                     pcm_buffer_read_point_rtptime);
-                              dst_bufsize = samples_remaining * 4;
+                              dst_bufsize = samples_remaining * conn->input_bytes_per_frame;
                             }
                           }
                           if ((pcm_buffer_size - pcm_buffer_occupancy) < dst_bufsize) {
                             debug(1,
                                   "pcm_buffer_read_point (frames): %u, pcm_buffer_occupancy "
                                   "(frames): %u",
-                                  pcm_buffer_read_point / 4, pcm_buffer_occupancy / 4);
+                                  pcm_buffer_read_point / conn->input_bytes_per_frame, pcm_buffer_occupancy / conn->input_bytes_per_frame);
                             pcm_buffer_size = dst_bufsize + pcm_buffer_occupancy;
                             debug(1, "fatal error! pcm buffer too small at %d bytes.",
                                   pcm_buffer_size);
diff --git a/rtsp.c b/rtsp.c
index 079d6df6..c50b7679 100644
--- a/rtsp.c
+++ b/rtsp.c
@@ -2070,6 +2070,15 @@ void handle_setup_2(rtsp_conn_info *conn, rtsp_message *req, rtsp_message *resp)
         die("Error %d: could not find a TCP port to use as a buffered_audio port", err);
       }
 
+      // hack.
+      conn->max_frames_per_packet = 352; // number of audio frames per packet.
+      conn->input_rate = 44100;
+      conn->input_num_channels = 2;
+      conn->input_bit_depth = 16;
+      conn->input_bytes_per_frame = conn->input_num_channels * ((conn->input_bit_depth + 7) / 8);
+
+      player_prepare_to_play(conn); // get capabilities of DAC before creating the buffered audio thread
+
       pthread_create(&conn->rtp_buffered_audio_thread, NULL, &rtp_buffered_audio_processor,
                      (void *)conn);
 
@@ -2078,14 +2087,6 @@ void handle_setup_2(rtsp_conn_info *conn, rtsp_message *req, rtsp_message *resp)
       plist_dict_set_item(stream0dict, "audioBufferSize",
                           plist_new_uint(conn->ap2_audio_buffer_size));
 
-      // hack.
-      conn->max_frames_per_packet = 352; // number of audio frames per packet.
-      conn->input_rate = 44100;
-      conn->input_num_channels = 2;
-      conn->input_bit_depth = 16;
-      conn->input_bytes_per_frame = conn->input_num_channels * ((conn->input_bit_depth + 7) / 8);
-
-      player_prepare_to_play(conn);
       player_play(conn);
 
       conn->rtp_running = 1; // hack!