]> git.ipfire.org Git - thirdparty/shairport-sync.git/commitdiff
Rough and hacky but works with the bit depth of the output DAC to automatically choos...
authorMike Brady <4265913+mikebrady@users.noreply.github.com>
Tue, 1 Jun 2021 07:56:47 +0000 (08:56 +0100)
committerMike Brady <4265913+mikebrady@users.noreply.github.com>
Tue, 1 Jun 2021 07:56:47 +0000 (08:56 +0100)
audio_alsa.c
player.c
rtp.c
rtsp.c

index f0c109876895e03716a611a1972f42fa4c29ec7e..d5757befd550e0078efff3a19508304b995c270f 100644 (file)
@@ -525,7 +525,7 @@ int actual_open_alsa_device(int do_auto_setup) {
     }
     if (ret == 0) {
       config.output_format = trial_format;
-      debug(2, "alsa: output format chosen is \"%s\".",
+      debug(1, "alsa: output format chosen is \"%s\".",
             sps_format_description_string(config.output_format));
     } else {
       warn("audio_alsa: Could not automatically set the output format for device \"%s\": %s",
@@ -566,7 +566,7 @@ int actual_open_alsa_device(int do_auto_setup) {
     }
     if (ret == 0) {
       config.output_rate = actual_sample_rate;
-      debug(2, "alsa: output speed chosen is %d.", config.output_rate);
+      debug(1, "alsa: output speed chosen is %d.", config.output_rate);
     } else {
       warn("audio_alsa: Could not automatically set the output rate for device \"%s\": %s",
            alsa_out_dev, snd_strerror(ret));
index c307a9cc232d945e45155eaa919b4fc519f23d99..a0e10d943472627ef54e138ee2e5bd8d400beff5 100644 (file)
--- a/player.c
+++ b/player.c
@@ -384,10 +384,12 @@ static void terminate_decoders(rtsp_conn_info *conn) {
 }
 
 static void init_buffer(rtsp_conn_info *conn) {
+  debug(1,"input_bytes_per_frame: %d.", conn->input_bytes_per_frame);
+  debug(1,"input_bit_depth: %d.", conn->input_bit_depth);
   int i;
   for (i = 0; i < BUFFER_FRAMES; i++)
-    conn->audio_buffer[i].data = malloc(conn->input_bytes_per_frame * conn->max_frames_per_packet);
-  ab_resync(conn);
+//    conn->audio_buffer[i].data = malloc(conn->input_bytes_per_frame * conn->max_frames_per_packet);
+    conn->audio_buffer[i].data = malloc(8 * conn->max_frames_per_packet); // todo
 }
 
 static void free_audio_buffers(rtsp_conn_info *conn) {
@@ -1692,7 +1694,6 @@ void *player_thread_func(void *arg) {
   conn->packet_count = 0;
   conn->packet_count_since_flush = 0;
   conn->previous_random_number = 0;
-  conn->input_bytes_per_frame = 4;
   conn->decoder_in_use = 0;
   conn->ab_buffering = 1;
   conn->ab_synced = 0;
@@ -1711,6 +1712,7 @@ void *player_thread_func(void *arg) {
                              // No pthread cancellation point in here
   // This must be after init_alac_decoder
   init_buffer(conn); // will need a corresponding deallocation. No cancellation points in here
+  ab_resync(conn);
 
   if (conn->stream.encrypted) {
 #ifdef CONFIG_MBEDTLS
@@ -2114,10 +2116,56 @@ void *player_thread_func(void *arg) {
                 *outpl++ = rl;
               }
             }
+          } break;
+          case 32: {
+            int i, j;
+            int32_t ls, rs;
+            int32_t ll = 0, rl = 0;
+            int32_t *inps = (int32_t*) inbuf;
+            int32_t *outpl = (int32_t *)conn->tbuf;
+            for (i = 0; i < inbuflength; i++) {
+              ls = *inps++;
+              rs = *inps++;
+
+              // here, do the mode stuff -- mono / reverse stereo / leftonly / rightonly
+
+              switch (config.playback_mode) {
+              case ST_mono: {
+                int64_t both = ls + rs;
+                both = both >> 1;
+                uint32_t both32 = both;
+                ll = both32;
+                rl = both32;
+              } break;
+              case ST_reverse_stereo: {
+                ll = rs;
+                rl = ls;
+              } break;
+              case ST_left_only:
+                rl = ls;
+                ll = ls;
+                break;
+              case ST_right_only:
+                ll = rs;
+                rl = rs;
+                break;
+              case ST_stereo:
+                ll = ls;
+                rl = rs;
+                break; // nothing extra to do
+              }
 
+              // here, replicate the samples if you're upsampling
+
+              for (j = 0; j < conn->output_sample_ratio; j++) {
+                *outpl++ = ll;
+                *outpl++ = rl;
+              }
+            }
           } break;
+
           default:
-            die("Shairport Sync only supports 16 bit input");
+            die("Shairport Sync only supports 16 or 32 bit input");
           }
 
           inbuflength *= conn->output_sample_ratio;
@@ -3085,6 +3133,7 @@ int player_prepare_to_play(rtsp_conn_info *conn) {
   activity_monitor_signify_activity(
       1); // active, and should be before play's command hook, command_start()
   command_start();
+  conn->input_bytes_per_frame = 4; // default -- may be changed later
   // call on the output device to prepare itself
   if ((config.output) && (config.output->prepare))
     config.output->prepare();
diff --git a/rtp.c b/rtp.c
index 0540306b59676ce51cf4be010698680c0372581b..dc3ba0aa4c5a6018f00e910072c3bcdcd78dbe5f 100644 (file)
--- a/rtp.c
+++ b/rtp.c
@@ -290,8 +290,8 @@ void *rtp_control_receiver(void *arg) {
                                                                 obfp += 2;
                                                               };
                                                               *obfp = 0;
-                                             
-                                             
+
+
                                                               // get raw timestamp information
                                                               // I think that a good way to understand these timestamps is that
                                                               // (1) the rtlt below is the timestamp of the frame that should be playing at the
@@ -302,19 +302,19 @@ void *rtp_control_receiver(void *arg) {
                                                               // Thus, (3) the latency can be calculated by subtracting the second from the
                                                               // first.
                                                               // There must be more to it -- there something missing.
-                                             
+
                                                               // In addition, it seems that if the value of the short represented by the second
                                                               // pair of bytes in the packet is 7
                                                               // then an extra time lag is expected to be added, presumably by
                                                               // the AirPort Express.
-                                             
+
                                                               // Best guess is that this delay is 11,025 frames.
-                                             
+
                                                               uint32_t rtlt = nctohl(&packet[4]); // raw timestamp less latency
                                                               uint32_t rt = nctohl(&packet[16]);  // raw timestamp
-                                             
+
                                                               uint32_t fl = nctohs(&packet[2]); //
-                                             
+
                                                               debug(1,"Sync Packet of %d bytes received: \"%s\", flags: %d, timestamps %u and %u,
                                                           giving a latency of %d frames.",plen,obf,fl,rt,rtlt,rt-rtlt);
                                                               //debug(1,"Monotonic timestamps are: %" PRId64 " and %" PRId64 "
@@ -2082,9 +2082,45 @@ void *rtp_buffered_audio_processor(void *arg) {
   av_opt_set_int(swr, "in_channel_layout", AV_CH_LAYOUT_STEREO, 0);
   av_opt_set_int(swr, "out_channel_layout", AV_CH_LAYOUT_STEREO, 0);
   av_opt_set_int(swr, "in_sample_rate", 44100, 0);
-  av_opt_set_int(swr, "out_sample_rate", 44100, 0);
+  av_opt_set_int(swr, "out_sample_rate", config.output_rate, 0);
   av_opt_set_sample_fmt(swr, "in_sample_fmt", AV_SAMPLE_FMT_FLTP, 0);
-  av_opt_set_sample_fmt(swr, "out_sample_fmt", AV_SAMPLE_FMT_S16, 0);
+
+  enum AVSampleFormat av_format;
+  switch (config.output_format) {
+    case SPS_FORMAT_S32:
+    case SPS_FORMAT_S32_LE:
+    case SPS_FORMAT_S32_BE:
+    case SPS_FORMAT_S24:
+    case SPS_FORMAT_S24_LE:
+    case SPS_FORMAT_S24_BE:
+    case SPS_FORMAT_S24_3LE:
+    case SPS_FORMAT_S24_3BE:
+     av_format = AV_SAMPLE_FMT_S32;
+      conn->input_bytes_per_frame = 8; // the output from the decoder will be input to the player
+      conn->input_bit_depth = 32;
+       debug(1,"32-bit output format chosen");
+      break;
+    case SPS_FORMAT_S16:
+    case SPS_FORMAT_S16_LE:
+    case SPS_FORMAT_S16_BE:
+      av_format = AV_SAMPLE_FMT_S16;
+      conn->input_bytes_per_frame = 4;
+      conn->input_bit_depth = 16;
+      break;
+    case SPS_FORMAT_U8:
+      av_format = AV_SAMPLE_FMT_U8;
+      conn->input_bytes_per_frame = 2;
+      conn->input_bit_depth = 8;
+      break;
+    default:
+      debug(1,"Unsupported DAC output format %u. AV_SAMPLE_FMT_S16 decoding chosen. Good luck!", config.output_format);
+      av_format = AV_SAMPLE_FMT_S16;
+      conn->input_bytes_per_frame = 4; // the output from the decoder will be input to the player
+      conn->input_bit_depth = 16;
+      break;
+  };
+
+  av_opt_set_sample_fmt(swr, "out_sample_fmt", av_format, 0);
   swr_init(swr);
 
   uint8_t packet[16 * 1024];
@@ -2099,7 +2135,7 @@ void *rtp_buffered_audio_processor(void *arg) {
 
   int finished = 0;
   int pcm_buffer_size =
-      (1024 + 352) * 8; // This seems to be right. 8 is for 2 * 32-bit samples per frame
+      (1024 + 352) * conn->input_bytes_per_frame;
   uint8_t pcm_buffer[pcm_buffer_size];
 
   int pcm_buffer_occupancy = 0;
@@ -2220,7 +2256,7 @@ void *rtp_buffered_audio_processor(void *arg) {
         // debug(1,"sleep for 20 ms");
         usleep(20000); // wait for a while
       } else {
-        if ((pcm_buffer_occupancy - pcm_buffer_read_point) >= (352 * 4)) {
+        if ((pcm_buffer_occupancy - pcm_buffer_read_point) >= (352 * conn->input_bytes_per_frame)) {
           new_buffer_needed = 0;
           // send a frame to the player if allowed
           // it it's way too late, it means that a new anchor time is needed
@@ -2238,7 +2274,7 @@ void *rtp_buffered_audio_processor(void *arg) {
                 0) {
               int64_t lead_time = buffer_should_be_time - get_absolute_time_in_ns();
               // debug(1,"lead time in buffered_audio is %f milliseconds.", lead_time * 0.000001);
-              if (blocks_read > 2) {
+              if (blocks_read > 3) {
                 if ((lead_time >= (int64_t)(requested_lead_time * 1000000000)) ||
                     (streaming_has_started != 0)) {
                   if (streaming_has_started == 0)
@@ -2261,7 +2297,7 @@ void *rtp_buffered_audio_processor(void *arg) {
               }
 
               pcm_buffer_read_point_rtptime += 352;
-              pcm_buffer_read_point += 352 * 4;
+              pcm_buffer_read_point += 352 * conn->input_bytes_per_frame;
             }
             // usleep(2000); // let other stuff happens
           } else {
@@ -2271,7 +2307,7 @@ void *rtp_buffered_audio_processor(void *arg) {
           new_buffer_needed = 1;
           if (pcm_buffer_read_point != 0) {
             // debug(1,"pcm_buffer_read_point (frames): %u, pcm_buffer_occupancy (frames): %u",
-            // pcm_buffer_read_point/4, pcm_buffer_occupancy/4);
+            // pcm_buffer_read_point/conn->input_bytes_per_frame, pcm_buffer_occupancy/conn->input_bytes_per_frame);
             // if there is anything to move down to the front of the buffer, do it now;
             if ((pcm_buffer_occupancy - pcm_buffer_read_point) > 0) {
               // move the remaining frames down to the start of the buffer
@@ -2314,8 +2350,6 @@ void *rtp_buffered_audio_processor(void *arg) {
         strerror_r(errno, (char *)errorstring, sizeof(errorstring));
         debug(1, "error in rtp_buffered_audio_processor %d: \"%s\". Could not recv a data packet.",
               errno, errorstring);
-        // if ((config.diagnostic_drop_packet_fraction == 0.0) ||
-        //     (drand48() > config.diagnostic_drop_packet_fraction)) {
       } else if (nread > 0) {
         blocks_read++; // note, this doesn't mean they are valid audio blocks
         // debug(1, "Realtime Audio Receiver Packet of length %d received.", nread);
@@ -2445,13 +2479,13 @@ void *rtp_buffered_audio_processor(void *arg) {
                           debug(1, "error %d during decoding", ret);
                         } else {
                           av_samples_alloc(&pcm_audio, &dst_linesize, codec_context->channels,
-                                           decoded_frame->nb_samples, AV_SAMPLE_FMT_S16, 1);
+                                           decoded_frame->nb_samples, av_format, 1);
                           // remember to free pcm_audio
                           ret = swr_convert(swr, &pcm_audio, decoded_frame->nb_samples,
                                             (const uint8_t **)decoded_frame->extended_data,
                                             decoded_frame->nb_samples);
                           dst_bufsize = av_samples_get_buffer_size(
-                              &dst_linesize, codec_context->channels, ret, AV_SAMPLE_FMT_S16, 1);
+                              &dst_linesize, codec_context->channels, ret, av_format, 1);
                           // debug(1,"generated %d bytes of PCM", dst_bufsize);
                           // copy the PCM audio into the PCM buffer.
                           // make sure it's big enough first
@@ -2463,20 +2497,20 @@ void *rtp_buffered_audio_processor(void *arg) {
                             int32_t samples_remaining =
                                 (flush_from_timestamp - pcm_buffer_read_point_rtptime);
                             if ((samples_remaining > 0) &&
-                                ((samples_remaining * 4) < dst_bufsize)) {
+                                ((samples_remaining * conn->input_bytes_per_frame) < dst_bufsize)) {
                               debug(2,
                                     "samples remaining before flush: %d, number of samples %d. "
                                     "flushFromTS: %u, pcm_buffer_read_point_rtptime: %u.",
-                                    samples_remaining, dst_bufsize / 4, flush_from_timestamp,
+                                    samples_remaining, dst_bufsize / conn->input_bytes_per_frame, flush_from_timestamp,
                                     pcm_buffer_read_point_rtptime);
-                              dst_bufsize = samples_remaining * 4;
+                              dst_bufsize = samples_remaining * conn->input_bytes_per_frame;
                             }
                           }
                           if ((pcm_buffer_size - pcm_buffer_occupancy) < dst_bufsize) {
                             debug(1,
                                   "pcm_buffer_read_point (frames): %u, pcm_buffer_occupancy "
                                   "(frames): %u",
-                                  pcm_buffer_read_point / 4, pcm_buffer_occupancy / 4);
+                                  pcm_buffer_read_point / conn->input_bytes_per_frame, pcm_buffer_occupancy / conn->input_bytes_per_frame);
                             pcm_buffer_size = dst_bufsize + pcm_buffer_occupancy;
                             debug(1, "fatal error! pcm buffer too small at %d bytes.",
                                   pcm_buffer_size);
diff --git a/rtsp.c b/rtsp.c
index 079d6df6d35354740bf98e0332b898b7a4c4ce9d..c50b76793cbd4db16d6e128eb98acd559d649ad5 100644 (file)
--- a/rtsp.c
+++ b/rtsp.c
@@ -2070,6 +2070,15 @@ void handle_setup_2(rtsp_conn_info *conn, rtsp_message *req, rtsp_message *resp)
         die("Error %d: could not find a TCP port to use as a buffered_audio port", err);
       }
 
+      // hack.
+      conn->max_frames_per_packet = 352; // number of audio frames per packet.
+      conn->input_rate = 44100;
+      conn->input_num_channels = 2;
+      conn->input_bit_depth = 16;
+      conn->input_bytes_per_frame = conn->input_num_channels * ((conn->input_bit_depth + 7) / 8);
+
+      player_prepare_to_play(conn); // get capabilities of DAC before creating the buffered audio thread
+
       pthread_create(&conn->rtp_buffered_audio_thread, NULL, &rtp_buffered_audio_processor,
                      (void *)conn);
 
@@ -2078,14 +2087,6 @@ void handle_setup_2(rtsp_conn_info *conn, rtsp_message *req, rtsp_message *resp)
       plist_dict_set_item(stream0dict, "audioBufferSize",
                           plist_new_uint(conn->ap2_audio_buffer_size));
 
-      // hack.
-      conn->max_frames_per_packet = 352; // number of audio frames per packet.
-      conn->input_rate = 44100;
-      conn->input_num_channels = 2;
-      conn->input_bit_depth = 16;
-      conn->input_bytes_per_frame = conn->input_num_channels * ((conn->input_bit_depth + 7) / 8);
-
-      player_prepare_to_play(conn);
       player_play(conn);
 
       conn->rtp_running = 1; // hack!