player.c

   1 /*
   2  * Slave-clocked ALAC stream player. This file is part of Shairport.
   3  * Copyright (c) James Laird 2011, 2013
   4  * All rights reserved.
   5  *
   6  * Modifications for audio synchronisation, AirPlay 2
   7  * and related work, copyright (c) Mike Brady 2014 -- 2023
   8  * All rights reserved.
   9  *
  10  * Permission is hereby granted, free of charge, to any person
  11  * obtaining a copy of this software and associated documentation
  12  * files (the "Software"), to deal in the Software without
  13  * restriction, including without limitation the rights to use,
  14  * copy, modify, merge, publish, distribute, sublicense, and/or
  15  * sell copies of the Software, and to permit persons to whom the
  16  * Software is furnished to do so, subject to the following conditions:
  17  *
  18  * The above copyright notice and this permission notice shall be
  19  * included in all copies or substantial portions of the Software.
  20  *
  21  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  22  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
  23  * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  24  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
  25  * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
  26  * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  27  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  28  * OTHER DEALINGS IN THE SOFTWARE.
  29  */
  30
  31 #include <assert.h>
  32 #include <errno.h>
  33 #include <fcntl.h>
  34 #include <inttypes.h>
  35 #include <limits.h>
  36 #include <math.h>
  37 #include <pthread.h>
  38 #include <stdarg.h>
  39 #include <stdio.h>
  40 #include <stdlib.h>
  41 #include <string.h>
  42 #include <sys/stat.h>
  43 #include <sys/syslog.h>
  44 #include <sys/types.h>
  45 #include <unistd.h>
  46
  47 #include "config.h"
  48
  49 #ifdef CONFIG_MBEDTLS
  50 #include <mbedtls/aes.h>
  51 #include <mbedtls/havege.h>
  52 #endif
  53
  54 #ifdef CONFIG_POLARSSL
  55 #include <polarssl/aes.h>
  56 #include <polarssl/havege.h>
  57 #endif
  58
  59 #ifdef CONFIG_OPENSSL
  60 #include <openssl/aes.h> // needed for older AES stuff
  61 #include <openssl/bio.h> // needed for BIO_new_mem_buf
  62 #include <openssl/err.h> // needed for ERR_error_string, ERR_get_error
  63 #include <openssl/evp.h> // needed for EVP_PKEY_CTX_new, EVP_PKEY_sign_init, EVP_PKEY_sign
  64 #include <openssl/pem.h> // needed for PEM_read_bio_RSAPrivateKey, EVP_PKEY_CTX_set_rsa_padding
  65 #include <openssl/rsa.h> // needed for EVP_PKEY_CTX_set_rsa_padding
  66 #endif
  67
  68 #ifdef CONFIG_SOXR
  69 #include <soxr.h>
  70 #endif
  71
  72 #ifdef CONFIG_CONVOLUTION
  73 #include <FFTConvolver/convolver.h>
  74 #endif
  75
  76 #ifdef CONFIG_METADATA_HUB
  77 #include "metadata_hub.h"
  78 #endif
  79
  80 #ifdef CONFIG_DACP_CLIENT
  81 #include "dacp.h"
  82 #endif
  83
  84 #include "common.h"
  85 #include "mdns.h"
  86 #include "player.h"
  87 #include "rtp.h"
  88 #include "rtsp.h"
  89
  90 #include "alac.h"
  91
  92 #ifdef CONFIG_APPLE_ALAC
  93 #include "apple_alac.h"
  94 #endif
  95
  96 #ifdef CONFIG_AIRPLAY_2
  97 #include "ptp-utilities.h"
  98 #endif
  99
 100 #include "loudness.h"
 101
 102 #include "activity_monitor.h"
 103
 104 // make the first audio packet deliberately early to bias the sync error of
 105 // the very first packet, making the error more likely to be too early
 106 // rather than too late. It it's too early,
 107 // a delay exactly compensating for it can be sent just before the
 108 // first packet. This should exactly compensate for the error.
 109
 110 int64_t first_frame_early_bias = 8;
 111
 112 // default buffer size
 113 // needs to be a power of 2 because of the way BUFIDX(seqno) works
 114 // #define BUFFER_FRAMES 512
 115 #define MAX_PACKET 2048
 116
 117 // DAC buffer occupancy stuff
 118 #define DAC_BUFFER_QUEUE_MINIMUM_LENGTH 2500
 119
 120 // static abuf_t audio_buffer[BUFFER_FRAMES];
 121 #define BUFIDX(seqno) ((seq_t)(seqno) % BUFFER_FRAMES)
 122
 123 int32_t modulo_32_offset(uint32_t from, uint32_t to) { return to - from; }
 124
 125 void do_flush(uint32_t timestamp, rtsp_conn_info *conn);
 126
 127 void ab_resync(rtsp_conn_info *conn) {
 128   int i;
 129   for (i = 0; i < BUFFER_FRAMES; i++) {
 130     conn->audio_buffer[i].ready = 0;
 131     conn->audio_buffer[i].resend_request_number = 0;
 132     conn->audio_buffer[i].resend_time =
 133         0; // this is either zero or the time the last resend was requested.
 134     conn->audio_buffer[i].initialisation_time =
 135         0; // this is either the time the packet was received or the time it was noticed the packet
 136            // was missing.
 137     conn->audio_buffer[i].sequence_number = 0;
 138   }
 139   conn->ab_synced = 0;
 140   conn->last_seqno_read = -1;
 141   conn->ab_buffering = 1;
 142 }
 143
 144 // the sequence numbers will wrap pretty often.
 145 // this returns true if the second arg is strictly after the first
 146 static inline int is_after(seq_t a, seq_t b) {
 147   int16_t d = b - a;
 148   return d > 0;
 149 }
 150
 151 void reset_input_flow_metrics(rtsp_conn_info *conn) {
 152   conn->play_number_after_flush = 0;
 153   conn->packet_count_since_flush = 0;
 154   conn->input_frame_rate_starting_point_is_valid = 0;
 155   conn->initial_reference_time = 0;
 156   conn->initial_reference_timestamp = 0;
 157 }
 158
 159 void unencrypted_packet_decode(unsigned char *packet, int length, short *dest, int *outsize,
 160                                int size_limit, rtsp_conn_info *conn) {
 161   if (conn->stream.type == ast_apple_lossless) {
 162 #ifdef CONFIG_APPLE_ALAC
 163     if (config.use_apple_decoder) {
 164       if (conn->decoder_in_use != 1 << decoder_apple_alac) {
 165         debug(2, "Apple ALAC Decoder used on encrypted audio.");
 166         conn->decoder_in_use = 1 << decoder_apple_alac;
 167       }
 168       apple_alac_decode_frame(packet, length, (unsigned char *)dest, outsize);
 169       *outsize = *outsize * 4; // bring the size to bytes
 170     } else
 171 #endif
 172     {
 173       if (conn->decoder_in_use != 1 << decoder_hammerton) {
 174         debug(2, "Hammerton Decoder used on encrypted audio.");
 175         conn->decoder_in_use = 1 << decoder_hammerton;
 176       }
 177       alac_decode_frame(conn->decoder_info, packet, (unsigned char *)dest, outsize);
 178     }
 179   } else if (conn->stream.type == ast_uncompressed) {
 180     int length_to_use = length;
 181     if (length_to_use > size_limit) {
 182       warn("unencrypted_packet_decode: uncompressed audio packet too long (size: %d bytes) to "
 183            "process -- truncated",
 184            length);
 185       length_to_use = size_limit;
 186     }
 187     int i;
 188     short *source = (short *)packet;
 189     for (i = 0; i < (length_to_use / 2); i++) {
 190       *dest = ntohs(*source);
 191       dest++;
 192       source++;
 193     }
 194     *outsize = length_to_use;
 195   }
 196 }
 197
 198 #ifdef CONFIG_OPENSSL
 199 // Thanks to
 200 // https://stackoverflow.com/questions/27558625/how-do-i-use-aes-cbc-encrypt-128-openssl-properly-in-ubuntu
 201 // for inspiration. Changed to a 128-bit key and no padding.
 202
 203 int openssl_aes_decrypt_cbc(unsigned char *ciphertext, int ciphertext_len, unsigned char *key,
 204                             unsigned char *iv, unsigned char *plaintext) {
 205   EVP_CIPHER_CTX *ctx;
 206   int len;
 207   int plaintext_len = 0;
 208   ctx = EVP_CIPHER_CTX_new();
 209   if (ctx != NULL) {
 210     if (EVP_DecryptInit_ex(ctx, EVP_aes_128_cbc(), NULL, key, iv) == 1) {
 211       EVP_CIPHER_CTX_set_padding(ctx, 0); // no padding -- always returns 1
 212       // no need to allow space for padding in the output, as padding is disabled
 213       if (EVP_DecryptUpdate(ctx, plaintext, &len, ciphertext, ciphertext_len) == 1) {
 214         plaintext_len = len;
 215         if (EVP_DecryptFinal_ex(ctx, plaintext + len, &len) == 1) {
 216           plaintext_len += len;
 217         } else {
 218           debug(1, "EVP_DecryptFinal_ex error \"%s\".", ERR_error_string(ERR_get_error(), NULL));
 219         }
 220       } else {
 221         debug(1, "EVP_DecryptUpdate error \"%s\".", ERR_error_string(ERR_get_error(), NULL));
 222       }
 223     } else {
 224       debug(1, "EVP_DecryptInit_ex error \"%s\".", ERR_error_string(ERR_get_error(), NULL));
 225     }
 226     EVP_CIPHER_CTX_free(ctx);
 227   } else {
 228     debug(1, "EVP_CIPHER_CTX_new error \"%s\".", ERR_error_string(ERR_get_error(), NULL));
 229   }
 230   return plaintext_len;
 231 }
 232 #endif
 233 int audio_packet_decode(short *dest, int *destlen, uint8_t *buf, int len, rtsp_conn_info *conn) {
 234   // parameters: where the decoded stuff goes, its length in samples,
 235   // the incoming packet, the length of the incoming packet in bytes
 236   // destlen should contain the allowed max number of samples on entry
 237
 238   if (len > MAX_PACKET) {
 239     warn("Incoming audio packet size is too large at %d; it should not exceed %d.", len,
 240          MAX_PACKET);
 241     return -1;
 242   }
 243   unsigned char packet[MAX_PACKET];
 244   // unsigned char packetp[MAX_PACKET];
 245   assert(len <= MAX_PACKET);
 246   int reply = 0;                                          // everything okay
 247   int outsize = conn->input_bytes_per_frame * (*destlen); // the size the output should be, in bytes
 248   int maximum_possible_outsize = outsize;
 249
 250   if (conn->stream.encrypted) {
 251     unsigned char iv[16];
 252     int aeslen = len & ~0xf;
 253     memcpy(iv, conn->stream.aesiv, sizeof(iv));
 254 #ifdef CONFIG_MBEDTLS
 255     mbedtls_aes_crypt_cbc(&conn->dctx, MBEDTLS_AES_DECRYPT, aeslen, iv, buf, packet);
 256 #endif
 257 #ifdef CONFIG_POLARSSL
 258     aes_crypt_cbc(&conn->dctx, AES_DECRYPT, aeslen, iv, buf, packet);
 259 #endif
 260 #ifdef CONFIG_OPENSSL
 261     openssl_aes_decrypt_cbc(buf, aeslen, conn->stream.aeskey, iv, packet);
 262 #endif
 263     memcpy(packet + aeslen, buf + aeslen, len - aeslen);
 264     unencrypted_packet_decode(packet, len, dest, &outsize, maximum_possible_outsize, conn);
 265   } else {
 266     // not encrypted
 267     unencrypted_packet_decode(buf, len, dest, &outsize, maximum_possible_outsize, conn);
 268   }
 269
 270   if (outsize > maximum_possible_outsize) {
 271     debug(2,
 272           "Output from alac_decode larger (%d bytes, not frames) than expected (%d bytes) -- "
 273           "truncated, but buffer overflow possible! Encrypted = %d.",
 274           outsize, maximum_possible_outsize, conn->stream.encrypted);
 275     reply = -1; // output packet is the wrong size
 276   }
 277
 278   if (conn->input_bytes_per_frame != 0)
 279     *destlen = outsize / conn->input_bytes_per_frame;
 280   else
 281     die("Unexpectedly, conn->input_bytes_per_frame is zero.");
 282   if ((outsize % conn->input_bytes_per_frame) != 0)
 283     debug(1,
 284           "Number of audio frames (%d) does not correspond exactly to the number of bytes (%d) "
 285           "and the audio frame size (%d).",
 286           *destlen, outsize, conn->input_bytes_per_frame);
 287   return reply;
 288 }
 289
 290 static int init_alac_decoder(int32_t fmtp[12], rtsp_conn_info *conn) {
 291
 292   // clang-format off
 293
 294   // This is a guess, but the format of the fmtp looks identical to the format of an
 295   // ALACSpecificCOnfig which is detailed in the file ALACMagicCookieDescription.txt
 296   // in the Apple ALAC sample implementation
 297   // Here it is:
 298
 299   /*
 300
 301     * ALAC Specific Info (24 bytes) (mandatory)
 302     __________________________________________________________________________________________________________________________________
 303
 304     The Apple Lossless codec stores specific information about the encoded stream in the ALACSpecificConfig. This
 305     info is vended by the encoder and is used to setup the decoder for a given encoded bitstream.
 306
 307     When read from and written to a file, the fields of this struct must be in big-endian order.
 308     When vended by the encoder (and received by the decoder) the struct values will be in big-endian order.
 309
 310
 311         struct      ALACSpecificConfig (defined in ALACAudioTypes.h)
 312         abstract    This struct is used to describe codec provided information about the encoded Apple Lossless bitstream.
 313                     It must accompany the encoded stream in the containing audio file and be provided to the decoder.
 314
 315         field       frameLength             uint32_t        indicating the frames per packet when no explicit frames per packet setting is
 316                                                             present in the packet header. The encoder frames per packet can be explicitly set
 317                                                             but for maximum compatibility, the default encoder setting of 4096 should be used.
 318
 319         field       compatibleVersion       uint8_t         indicating compatible version,
 320                                                             value must be set to 0
 321
 322         field       bitDepth                uint8_t         describes the bit depth of the source PCM data (maximum value = 32)
 323
 324         field       pb                      uint8_t         currently unused tuning parameter.
 325                                                             value should be set to 40
 326
 327         field       mb                      uint8_t         currently unused tuning parameter.
 328                                                             value should be set to 10
 329
 330         field       kb                      uint8_t         currently unused tuning parameter.
 331                                                             value should be set to 14
 332
 333         field       numChannels             uint8_t         describes the channel count (1 = mono, 2 = stereo, etc...)
 334                                                             when channel layout info is not provided in the 'magic cookie', a channel count > 2
 335                                                             describes a set of discreet channels with no specific ordering
 336
 337         field       maxRun                  uint16_t        currently unused.
 338                                                             value should be set to 255
 339
 340         field       maxFrameBytes           uint32_t        the maximum size of an Apple Lossless packet within the encoded stream.
 341                                                             value of 0 indicates unknown
 342
 343         field       avgBitRate              uint32_t        the average bit rate in bits per second of the Apple Lossless stream.
 344                                                             value of 0 indicates unknown
 345
 346         field       sampleRate              uint32_t        sample rate of the encoded stream
 347
 348
 349     typedef struct ALACSpecificConfig
 350     {
 351             uint32_t        frameLength;
 352             uint8_t         compatibleVersion;
 353             uint8_t         bitDepth;
 354             uint8_t         pb;
 355             uint8_t         mb;
 356             uint8_t         kb;
 357             uint8_t         numChannels;
 358             uint16_t        maxRun;
 359             uint32_t        maxFrameBytes;
 360             uint32_t        avgBitRate;
 361             uint32_t        sampleRate;
 362
 363     } ALACSpecificConfig;
 364
 365    */
 366
 367    // We are going to go on that basis
 368
 369   // clang-format on
 370
 371   alac_file *alac;
 372
 373   alac = alac_create(conn->input_bit_depth,
 374                      conn->input_num_channels); // no pthread cancellation point in here
 375   if (!alac)
 376     return 1;
 377   conn->decoder_info = alac;
 378
 379   alac->setinfo_max_samples_per_frame = conn->max_frames_per_packet;
 380   alac->setinfo_7a = fmtp[2];
 381   alac->setinfo_sample_size = conn->input_bit_depth;
 382   alac->setinfo_rice_historymult = fmtp[4];
 383   alac->setinfo_rice_initialhistory = fmtp[5];
 384   alac->setinfo_rice_kmodifier = fmtp[6];
 385   alac->setinfo_7f = fmtp[7];
 386   alac->setinfo_80 = fmtp[8];
 387   alac->setinfo_82 = fmtp[9];
 388   alac->setinfo_86 = fmtp[10];
 389   alac->setinfo_8a_rate = fmtp[11];
 390   alac_allocate_buffers(alac); // no pthread cancellation point in here
 391
 392 #ifdef CONFIG_APPLE_ALAC
 393   apple_alac_init(fmtp); // no pthread cancellation point in here
 394 #endif
 395
 396   return 0;
 397 }
 398
 399 static void terminate_decoders(rtsp_conn_info *conn) {
 400   alac_free(conn->decoder_info);
 401 #ifdef CONFIG_APPLE_ALAC
 402   apple_alac_terminate();
 403 #endif
 404 }
 405
 406 uint64_t buffers_allocated = 0;
 407 uint64_t buffers_released = 0;
 408 static void init_buffer(rtsp_conn_info *conn) {
 409   // debug(1,"input_bytes_per_frame: %d.", conn->input_bytes_per_frame);
 410   // debug(1,"input_bit_depth: %d.", conn->input_bit_depth);
 411   int i;
 412   for (i = 0; i < BUFFER_FRAMES; i++) {
 413     //    conn->audio_buffer[i].data = malloc(conn->input_bytes_per_frame *
 414     //    conn->max_frames_per_packet);
 415     void *allocation = malloc(8 * conn->max_frames_per_packet);
 416     if (allocation == NULL) {
 417       die("could not allocate memory for audio buffers. %" PRId64 " buffers allocated, %" PRId64
 418           " buffers released.",
 419           buffers_allocated, buffers_released);
 420     } else {
 421       conn->audio_buffer[i].data = allocation;
 422       buffers_allocated++;
 423     }
 424   }
 425 }
 426
 427 static void free_audio_buffers(rtsp_conn_info *conn) {
 428   int i;
 429   for (i = 0; i < BUFFER_FRAMES; i++) {
 430     free(conn->audio_buffer[i].data);
 431     buffers_released++;
 432   }
 433   debug(2, "%" PRId64 " buffers allocated, %" PRId64 " buffers released.", buffers_allocated,
 434         buffers_released);
 435 }
 436
 437 int first_possibly_missing_frame = -1;
 438
 439 void reset_buffer(rtsp_conn_info *conn) {
 440   debug_mutex_lock(&conn->ab_mutex, 30000, 0);
 441   ab_resync(conn);
 442   debug_mutex_unlock(&conn->ab_mutex, 0);
 443   if (config.output->flush) {
 444     config.output->flush(); // no cancellation points
 445                             //            debug(1, "reset_buffer: flush output device.");
 446   }
 447 }
 448
 449 void get_audio_buffer_size_and_occupancy(unsigned int *size, unsigned int *occupancy,
 450                                          rtsp_conn_info *conn) {
 451   debug_mutex_lock(&conn->ab_mutex, 30000, 0);
 452   *size = BUFFER_FRAMES;
 453   if (conn->ab_synced) {
 454     int16_t occ =
 455         conn->ab_write - conn->ab_read; // will be zero or positive if read and write are within
 456                                         // 2^15 of each other and write is at or after read
 457     *occupancy = occ;
 458   } else {
 459     *occupancy = 0;
 460   }
 461   debug_mutex_unlock(&conn->ab_mutex, 0);
 462 }
 463
 464 void player_put_packet(int original_format, seq_t seqno, uint32_t actual_timestamp, uint8_t *data,
 465                        int len, rtsp_conn_info *conn) {
 466
 467   // if it's original format, it has a valid seqno and must be decoded
 468   // otherwise, it can take the next seqno and doesn't need decoding.
 469
 470   // ignore a request to flush that has been made before the first packet...
 471   if (conn->packet_count == 0) {
 472     debug_mutex_lock(&conn->flush_mutex, 1000, 1);
 473     conn->flush_requested = 0;
 474     conn->flush_rtp_timestamp = 0;
 475     debug_mutex_unlock(&conn->flush_mutex, 3);
 476   }
 477
 478   debug_mutex_lock(&conn->ab_mutex, 30000, 0);
 479   uint64_t time_now = get_absolute_time_in_ns();
 480   conn->packet_count++;
 481   conn->packet_count_since_flush++;
 482   conn->time_of_last_audio_packet = time_now;
 483   if (conn->connection_state_to_output) { // if we are supposed to be processing these packets
 484     abuf_t *abuf = 0;
 485     if (!conn->ab_synced) {
 486       conn->ab_write = seqno;
 487       conn->ab_read = seqno;
 488       conn->ab_synced = 1;
 489       conn->first_packet_timestamp = 0;
 490       debug(2, "Connection %d: synced by first packet, seqno %u.", conn->connection_number, seqno);
 491     } else if (original_format == 0) {
 492       // if the packet is coming in original format, the sequence number is important
 493       // otherwise, ignore is by setting it equal to the expected sequence number in ab_write
 494       seqno = conn->ab_write;
 495     }
 496     if (conn->ab_write ==
 497         seqno) { // if this is the expected packet (which could be the first packet...)
 498       if (conn->input_frame_rate_starting_point_is_valid == 0) {
 499         if ((conn->packet_count_since_flush >= 500) && (conn->packet_count_since_flush <= 510)) {
 500           conn->frames_inward_measurement_start_time = time_now;
 501           conn->frames_inward_frames_received_at_measurement_start_time = actual_timestamp;
 502           conn->input_frame_rate_starting_point_is_valid = 1; // valid now
 503         }
 504       }
 505       conn->frames_inward_measurement_time = time_now;
 506       conn->frames_inward_frames_received_at_measurement_time = actual_timestamp;
 507       abuf = conn->audio_buffer + BUFIDX(seqno);
 508       conn->ab_write = seqno + 1;                 // move the write pointer to the next free space
 509     } else if (is_after(conn->ab_write, seqno)) { // newer than expected
 510       int32_t gap = seqno - conn->ab_write;
 511       if (gap <= 0)
 512         debug(1, "Unexpected gap size: %d.", gap);
 513       int i;
 514       for (i = 0; i < gap; i++) {
 515         abuf = conn->audio_buffer + BUFIDX(conn->ab_write + i);
 516         abuf->ready = 0; // to be sure, to be sure
 517         abuf->resend_request_number = 0;
 518         abuf->initialisation_time =
 519             time_now;          // this represents when the packet was noticed to be missing
 520         abuf->status = 1 << 0; // signifying missing
 521         abuf->resend_time = 0;
 522         abuf->given_timestamp = 0;
 523         abuf->sequence_number = 0;
 524       }
 525       abuf = conn->audio_buffer + BUFIDX(seqno);
 526       //        rtp_request_resend(ab_write, gap);
 527       //        resend_requests++;
 528       conn->ab_write = seqno + 1;
 529     } else if (is_after(conn->ab_read, seqno)) { // older than expected but not too late
 530       conn->late_packets++;
 531       abuf = conn->audio_buffer + BUFIDX(seqno);
 532     } else { // too late.
 533       conn->too_late_packets++;
 534     }
 535
 536     if (abuf) {
 537       int datalen = conn->max_frames_per_packet;
 538       abuf->initialisation_time = time_now;
 539       abuf->resend_time = 0;
 540       if ((original_format != 0) &&
 541           (audio_packet_decode(abuf->data, &datalen, data, len, conn) == 0)) {
 542         abuf->ready = 1;
 543         abuf->status = 0; // signifying that it was received
 544         abuf->length = datalen;
 545         abuf->given_timestamp = actual_timestamp;
 546         abuf->sequence_number = seqno;
 547       } else if (original_format == 0) {
 548         memcpy(abuf->data, data, len * conn->input_bytes_per_frame);
 549         abuf->ready = 1;
 550         abuf->status = 0; // signifying that it was received
 551         abuf->length = len;
 552         abuf->given_timestamp = actual_timestamp;
 553         abuf->sequence_number = seqno;
 554       } else {
 555         debug(1, "Bad audio packet detected and discarded.");
 556         abuf->ready = 0;
 557         abuf->status = 1 << 1; // bad packet, discarded
 558         abuf->resend_request_number = 0;
 559         abuf->given_timestamp = 0;
 560         abuf->sequence_number = 0;
 561       }
 562     }
 563
 564     int rc = pthread_cond_signal(&conn->flowcontrol);
 565     if (rc)
 566       debug(1, "Error signalling flowcontrol.");
 567
 568     // resend checks
 569     {
 570       uint64_t minimum_wait_time =
 571           (uint64_t)(config.resend_control_first_check_time * (uint64_t)1000000000);
 572       uint64_t resend_repeat_interval =
 573           (uint64_t)(config.resend_control_check_interval_time * (uint64_t)1000000000);
 574       uint64_t minimum_remaining_time = (uint64_t)(
 575           (config.resend_control_last_check_time + config.audio_backend_buffer_desired_length) *
 576           (uint64_t)1000000000);
 577       uint64_t latency_time = (uint64_t)(conn->latency * (uint64_t)1000000000);
 578       latency_time = latency_time / (uint64_t)conn->input_rate;
 579
 580       // find the first frame that is missing, if known
 581       int x = conn->ab_read;
 582       if (first_possibly_missing_frame >= 0) {
 583         // if it's within the range
 584         int16_t buffer_size = conn->ab_write - conn->ab_read; // must be positive
 585         if (buffer_size >= 0) {
 586           int16_t position_in_buffer = first_possibly_missing_frame - conn->ab_read;
 587           if ((position_in_buffer >= 0) && (position_in_buffer < buffer_size))
 588             x = first_possibly_missing_frame;
 589         }
 590       }
 591
 592       first_possibly_missing_frame = -1; // has not been set
 593
 594       int missing_frame_run_count = 0;
 595       int start_of_missing_frame_run = -1;
 596       int number_of_missing_frames = 0;
 597       while (x != conn->ab_write) {
 598         abuf_t *check_buf = conn->audio_buffer + BUFIDX(x);
 599         if (!check_buf->ready) {
 600           if (first_possibly_missing_frame < 0)
 601             first_possibly_missing_frame = x;
 602           number_of_missing_frames++;
 603           // debug(1, "frame %u's initialisation_time is 0x%" PRIx64 ", latency_time is 0x%"
 604           // PRIx64 ", time_now is 0x%" PRIx64 ", minimum_remaining_time is 0x%" PRIx64 ".", x,
 605           // check_buf->initialisation_time, latency_time, time_now, minimum_remaining_time);
 606           int too_late = ((check_buf->initialisation_time < (time_now - latency_time)) ||
 607                           ((check_buf->initialisation_time - (time_now - latency_time)) <
 608                            minimum_remaining_time));
 609           int too_early = ((time_now - check_buf->initialisation_time) < minimum_wait_time);
 610           int too_soon_after_last_request =
 611               ((check_buf->resend_time != 0) &&
 612                ((time_now - check_buf->resend_time) <
 613                 resend_repeat_interval)); // time_now can never be less than the time_tag
 614
 615           if (too_late)
 616             check_buf->status |= 1 << 2; // too late
 617           else
 618             check_buf->status &= 0xFF - (1 << 2); // not too late
 619           if (too_early)
 620             check_buf->status |= 1 << 3; // too early
 621           else
 622             check_buf->status &= 0xFF - (1 << 3); // not too early
 623           if (too_soon_after_last_request)
 624             check_buf->status |= 1 << 4; // too soon after last request
 625           else
 626             check_buf->status &= 0xFF - (1 << 4); // not too soon after last request
 627
 628           if ((!too_soon_after_last_request) && (!too_late) && (!too_early)) {
 629             if (start_of_missing_frame_run == -1) {
 630               start_of_missing_frame_run = x;
 631               missing_frame_run_count = 1;
 632             } else {
 633               missing_frame_run_count++;
 634             }
 635             check_buf->resend_time = time_now; // setting the time to now because we are
 636                                                // definitely going to take action
 637             check_buf->resend_request_number++;
 638             debug(3, "Frame %d is missing with ab_read of %u and ab_write of %u.", x, conn->ab_read,
 639                   conn->ab_write);
 640           }
 641           // if (too_late) {
 642           //   debug(1,"too late to get missing frame %u.", x);
 643           // }
 644         }
 645         // if (number_of_missing_frames != 0)
 646         //  debug(1,"check with x = %u, ab_read = %u, ab_write = %u, first_possibly_missing_frame
 647         //  = %d.", x, conn->ab_read, conn->ab_write, first_possibly_missing_frame);
 648         x = (x + 1) & 0xffff;
 649         if (((check_buf->ready) || (x == conn->ab_write)) && (missing_frame_run_count > 0)) {
 650           // send a resend request
 651           if (missing_frame_run_count > 1)
 652             debug(3, "request resend of %d packets starting at seqno %u.", missing_frame_run_count,
 653                   start_of_missing_frame_run);
 654           if (config.disable_resend_requests == 0) {
 655             debug_mutex_unlock(&conn->ab_mutex, 3);
 656             rtp_request_resend(start_of_missing_frame_run, missing_frame_run_count, conn);
 657             debug_mutex_lock(&conn->ab_mutex, 20000, 1);
 658             conn->resend_requests++;
 659           }
 660           start_of_missing_frame_run = -1;
 661           missing_frame_run_count = 0;
 662         }
 663       }
 664       if (number_of_missing_frames == 0)
 665         first_possibly_missing_frame = conn->ab_write;
 666     }
 667   }
 668   debug_mutex_unlock(&conn->ab_mutex, 0);
 669 }
 670
 671 int32_t rand_in_range(int32_t exclusive_range_limit) {
 672   static uint32_t lcg_prev = 12345;
 673   // returns a pseudo random integer in the range 0 to (exclusive_range_limit-1) inclusive
 674   int64_t sp = lcg_prev;
 675   int64_t rl = exclusive_range_limit;
 676   lcg_prev = lcg_prev * 69069 + 3; // crappy psrg
 677   sp = sp * rl; // 64 bit calculation. Interesting part is above the 32 rightmost bits;
 678   return sp >> 32;
 679 }
 680
 681 static inline void process_sample(int32_t sample, char **outp, sps_format_t format, int volume,
 682                                   int dither, rtsp_conn_info *conn) {
 683   /*
 684   {
 685                 static int old_volume = 0;
 686                 if (volume != old_volume) {
 687                         debug(1,"Volume is now %d.",volume);
 688                         old_volume = volume;
 689                 }
 690   }
 691   */
 692
 693   int64_t hyper_sample = sample;
 694   int result = 0;
 695
 696   if (config.loudness) {
 697     hyper_sample <<=
 698         32; // Do not apply volume as it has already been done with the Loudness DSP filter
 699   } else {
 700     int64_t hyper_volume = (int64_t)volume << 16;
 701     hyper_sample = hyper_sample * hyper_volume; // this is 64 bit bit multiplication -- we may need
 702                                                 // to dither it down to its target resolution
 703   }
 704
 705   // next, do dither, if necessary
 706   if (dither) {
 707
 708     // add a TPDF dither -- see
 709     // http://educypedia.karadimov.info/library/DitherExplained.pdf
 710     // and the discussion around https://www.hydrogenaud.io/forums/index.php?showtopic=16963&st=25
 711
 712     // I think, for a 32 --> 16 bits, the range of
 713     // random numbers needs to be from -2^16 to 2^16, i.e. from -65536 to 65536 inclusive, not from
 714     // -32768 to +32767
 715
 716     // Actually, what would be generated here is from -65535 to 65535, i.e. one less on the limits.
 717
 718     // See the original paper at
 719     // http://www.ece.rochester.edu/courses/ECE472/resources/Papers/Lipshitz_1992.pdf
 720     // by Lipshitz, Wannamaker and Vanderkooy, 1992.
 721
 722     int64_t dither_mask = 0;
 723     switch (format) {
 724     case SPS_FORMAT_S32:
 725     case SPS_FORMAT_S32_LE:
 726     case SPS_FORMAT_S32_BE:
 727       dither_mask = (int64_t)1 << (64 - 32);
 728       break;
 729     case SPS_FORMAT_S24:
 730     case SPS_FORMAT_S24_LE:
 731     case SPS_FORMAT_S24_BE:
 732     case SPS_FORMAT_S24_3LE:
 733     case SPS_FORMAT_S24_3BE:
 734       dither_mask = (int64_t)1 << (64 - 24);
 735       break;
 736     case SPS_FORMAT_S16:
 737     case SPS_FORMAT_S16_LE:
 738     case SPS_FORMAT_S16_BE:
 739       dither_mask = (int64_t)1 << (64 - 16);
 740       break;
 741     case SPS_FORMAT_S8:
 742     case SPS_FORMAT_U8:
 743       dither_mask = (int64_t)1 << (64 - 8);
 744       break;
 745     case SPS_FORMAT_UNKNOWN:
 746       die("Unexpected SPS_FORMAT_UNKNOWN while calculating dither mask.");
 747       break;
 748     case SPS_FORMAT_AUTO:
 749       die("Unexpected SPS_FORMAT_AUTO while calculating dither mask.");
 750       break;
 751     case SPS_FORMAT_INVALID:
 752       die("Unexpected SPS_FORMAT_INVALID while calculating dither mask.");
 753       break;
 754     }
 755     dither_mask -= 1;
 756     int64_t r = r64i();
 757
 758     int64_t tpdf = (r & dither_mask) - (conn->previous_random_number & dither_mask);
 759     conn->previous_random_number = r;
 760     // add dither, allowing for clipping
 761     if (tpdf >= 0) {
 762       if (INT64_MAX - tpdf >= hyper_sample)
 763         hyper_sample += tpdf;
 764       else
 765         hyper_sample = INT64_MAX;
 766     } else {
 767       if (INT64_MIN - tpdf <= hyper_sample)
 768         hyper_sample += tpdf;
 769       else
 770         hyper_sample = INT64_MIN;
 771     }
 772     // dither is complete here
 773   }
 774
 775   // move the result to the desired position in the int64_t
 776   char *op = *outp;
 777   uint8_t byt;
 778   switch (format) {
 779   case SPS_FORMAT_S32_LE:
 780     hyper_sample >>= (64 - 32);
 781     byt = (uint8_t)hyper_sample;
 782     *op++ = byt;
 783     byt = (uint8_t)(hyper_sample >> 8);
 784     *op++ = byt;
 785     byt = (uint8_t)(hyper_sample >> 16);
 786     *op++ = byt;
 787     byt = (uint8_t)(hyper_sample >> 24);
 788     *op++ = byt;
 789     result = 4;
 790     break;
 791   case SPS_FORMAT_S32_BE:
 792     hyper_sample >>= (64 - 32);
 793     byt = (uint8_t)(hyper_sample >> 24);
 794     *op++ = byt;
 795     byt = (uint8_t)(hyper_sample >> 16);
 796     *op++ = byt;
 797     byt = (uint8_t)(hyper_sample >> 8);
 798     *op++ = byt;
 799     byt = (uint8_t)hyper_sample;
 800     *op++ = byt;
 801     result = 4;
 802     break;
 803   case SPS_FORMAT_S32:
 804     hyper_sample >>= (64 - 32);
 805     *(int32_t *)op = hyper_sample;
 806     result = 4;
 807     break;
 808   case SPS_FORMAT_S24_3LE:
 809     hyper_sample >>= (64 - 24);
 810     byt = (uint8_t)hyper_sample;
 811     *op++ = byt;
 812     byt = (uint8_t)(hyper_sample >> 8);
 813     *op++ = byt;
 814     byt = (uint8_t)(hyper_sample >> 16);
 815     *op++ = byt;
 816     result = 3;
 817     break;
 818   case SPS_FORMAT_S24_3BE:
 819     hyper_sample >>= (64 - 24);
 820     byt = (uint8_t)(hyper_sample >> 16);
 821     *op++ = byt;
 822     byt = (uint8_t)(hyper_sample >> 8);
 823     *op++ = byt;
 824     byt = (uint8_t)hyper_sample;
 825     *op++ = byt;
 826     result = 3;
 827     break;
 828   case SPS_FORMAT_S24_LE:
 829     hyper_sample >>= (64 - 24);
 830     byt = (uint8_t)hyper_sample;
 831     *op++ = byt;
 832     byt = (uint8_t)(hyper_sample >> 8);
 833     *op++ = byt;
 834     byt = (uint8_t)(hyper_sample >> 16);
 835     *op++ = byt;
 836     *op++ = 0;
 837     result = 4;
 838     break;
 839   case SPS_FORMAT_S24_BE:
 840     hyper_sample >>= (64 - 24);
 841     *op++ = 0;
 842     byt = (uint8_t)(hyper_sample >> 16);
 843     *op++ = byt;
 844     byt = (uint8_t)(hyper_sample >> 8);
 845     *op++ = byt;
 846     byt = (uint8_t)hyper_sample;
 847     *op++ = byt;
 848     result = 4;
 849     break;
 850   case SPS_FORMAT_S24:
 851     hyper_sample >>= (64 - 24);
 852     *(int32_t *)op = hyper_sample;
 853     result = 4;
 854     break;
 855   case SPS_FORMAT_S16_LE:
 856     hyper_sample >>= (64 - 16);
 857     byt = (uint8_t)hyper_sample;
 858     *op++ = byt;
 859     byt = (uint8_t)(hyper_sample >> 8);
 860     *op++ = byt;
 861     result = 2;
 862     break;
 863   case SPS_FORMAT_S16_BE:
 864     hyper_sample >>= (64 - 16);
 865     byt = (uint8_t)(hyper_sample >> 8);
 866     *op++ = byt;
 867     byt = (uint8_t)hyper_sample;
 868     *op++ = byt;
 869     result = 2;
 870     break;
 871   case SPS_FORMAT_S16:
 872     hyper_sample >>= (64 - 16);
 873     *(int16_t *)op = (int16_t)hyper_sample;
 874     result = 2;
 875     break;
 876   case SPS_FORMAT_S8:
 877     hyper_sample >>= (int8_t)(64 - 8);
 878     *op = hyper_sample;
 879     result = 1;
 880     break;
 881   case SPS_FORMAT_U8:
 882     hyper_sample >>= (uint8_t)(64 - 8);
 883     hyper_sample += 128;
 884     *op = hyper_sample;
 885     result = 1;
 886     break;
 887   case SPS_FORMAT_UNKNOWN:
 888     die("Unexpected SPS_FORMAT_UNKNOWN while outputting samples");
 889     break;
 890   case SPS_FORMAT_AUTO:
 891     die("Unexpected SPS_FORMAT_AUTO while outputting samples");
 892     break;
 893   case SPS_FORMAT_INVALID:
 894     die("Unexpected SPS_FORMAT_INVALID while outputting samples");
 895     break;
 896   }
 897
 898   *outp += result;
 899 }
 900
 901 void buffer_get_frame_cleanup_handler(void *arg) {
 902   rtsp_conn_info *conn = (rtsp_conn_info *)arg;
 903   debug_mutex_unlock(&conn->ab_mutex, 0);
 904 }
 905
 906 // get the next frame, when available. return 0 if underrun/stream reset.
 907 static abuf_t *buffer_get_frame(rtsp_conn_info *conn) {
 908   // int16_t buf_fill;
 909   uint64_t local_time_now;
 910   // struct timespec tn;
 911   abuf_t *curframe = NULL;
 912   int notified_buffer_empty = 0; // diagnostic only
 913
 914   debug_mutex_lock(&conn->ab_mutex, 30000, 0);
 915
 916   int wait;
 917   long dac_delay = 0; // long because alsa returns a long
 918
 919   int have_sent_prefiller_silence =
 920       0; // set to true when we have sent at least one silent frame to the DAC
 921
 922   pthread_cleanup_push(buffer_get_frame_cleanup_handler,
 923                        (void *)conn); // undo what's been done so far
 924   do {
 925
 926     // get the time
 927     local_time_now = get_absolute_time_in_ns(); // type okay
 928     // debug(3, "buffer_get_frame is iterating");
 929
 930     // we must have timing information before we can do anything here
 931     if (have_timestamp_timing_information(conn)) {
 932
 933       int rco = get_requested_connection_state_to_output();
 934
 935       if (conn->connection_state_to_output != rco) {
 936         conn->connection_state_to_output = rco;
 937         // change happening
 938         if (conn->connection_state_to_output == 0) { // going off
 939           debug(2, "request flush because connection_state_to_output is off");
 940           debug_mutex_lock(&conn->flush_mutex, 1000, 1);
 941           conn->flush_requested = 1;
 942           conn->flush_rtp_timestamp = 0;
 943           debug_mutex_unlock(&conn->flush_mutex, 3);
 944         }
 945       }
 946
 947       if (config.output->is_running)
 948         if (config.output->is_running() != 0) { // if the back end isn't running for any reason
 949           debug(2, "request flush because back end is not running");
 950           debug_mutex_lock(&conn->flush_mutex, 1000, 0);
 951           conn->flush_requested = 1;
 952           conn->flush_rtp_timestamp = 0;
 953           debug_mutex_unlock(&conn->flush_mutex, 0);
 954         }
 955
 956       debug_mutex_lock(&conn->flush_mutex, 1000, 0);
 957       pthread_cleanup_push(mutex_unlock, &conn->flush_mutex);
 958       if (conn->flush_requested == 1) {
 959         if (conn->flush_output_flushed == 0)
 960           if (config.output->flush) {
 961             config.output->flush(); // no cancellation points
 962             debug(2, "flush request: flush output device.");
 963           }
 964         conn->flush_output_flushed = 1;
 965       }
 966       // now check to see it the flush request is for frames in the buffer or not
 967       // if the first_packet_timestamp is zero, don't check
 968       int flush_needed = 0;
 969       int drop_request = 0;
 970       if (conn->flush_requested == 1) {
 971         if (conn->flush_rtp_timestamp == 0) {
 972           debug(1, "flush request: flush frame 0 -- flush assumed to be needed.");
 973           flush_needed = 1;
 974           drop_request = 1;
 975         } else {
 976           if ((conn->ab_synced) && ((conn->ab_write - conn->ab_read) > 0)) {
 977             abuf_t *firstPacket = conn->audio_buffer + BUFIDX(conn->ab_read);
 978             abuf_t *lastPacket = conn->audio_buffer + BUFIDX(conn->ab_write - 1);
 979             if ((firstPacket != NULL) && (firstPacket->ready)) {
 980               uint32_t first_frame_in_buffer = firstPacket->given_timestamp;
 981               int32_t offset_from_first_frame = conn->flush_rtp_timestamp - first_frame_in_buffer;
 982               if ((lastPacket != NULL) && (lastPacket->ready)) {
 983                 // we have enough information to check if the flush is needed or can be discarded
 984                 uint32_t last_frame_in_buffer =
 985                     lastPacket->given_timestamp + lastPacket->length - 1;
 986
 987                 // clang-format off
 988                 // Now we have to work out if the flush frame is in the buffer.
 989
 990                 // If it is later than the end of the buffer, flush everything and keep the
 991                 // request active.
 992
 993                 // If it is in the buffer, we need to flush part of the buffer.
 994                 // (Actually we flush the entire buffer and drop the request.)
 995
 996                 // If it is before the buffer, no flush is needed. Drop the request.
 997                 // clang-format on
 998
 999                 if (offset_from_first_frame > 0) {
1000                   int32_t offset_to_last_frame = last_frame_in_buffer - conn->flush_rtp_timestamp;
1001                   if (offset_to_last_frame >= 0) {
1002                     debug(2,
1003                           "flush request: flush frame %u active -- buffer contains %u frames, from "
1004                           "%u to %u.",
1005                           conn->flush_rtp_timestamp,
1006                           last_frame_in_buffer - first_frame_in_buffer + 1, first_frame_in_buffer,
1007                           last_frame_in_buffer);
1008
1009                     // We need to drop all complete frames leading up to the frame containing
1010                     // the flush request frame.
1011                     int32_t offset_to_flush_frame = 0;
1012                     abuf_t *current_packet = NULL;
1013                     do {
1014                       current_packet = conn->audio_buffer + BUFIDX(conn->ab_read);
1015                       if (current_packet != NULL) {
1016                         uint32_t last_frame_in_current_packet =
1017                             current_packet->given_timestamp + current_packet->length - 1;
1018                         offset_to_flush_frame =
1019                             conn->flush_rtp_timestamp - last_frame_in_current_packet;
1020                         if (offset_to_flush_frame > 0) {
1021                           debug(2,
1022                                 "flush to %u request: flush buffer %u, from "
1023                                 "%u to %u. ab_write is: %u.",
1024                                 conn->flush_rtp_timestamp, conn->ab_read,
1025                                 current_packet->given_timestamp,
1026                                 current_packet->given_timestamp + current_packet->length - 1,
1027                                 conn->ab_write);
1028                           conn->ab_read++;
1029                         }
1030                       } else {
1031                         debug(1, "NULL current_packet");
1032                       }
1033                     } while ((current_packet == NULL) || (offset_to_flush_frame > 0));
1034                     // now remove any frames from the buffer that are before the flush frame itself.
1035                     int32_t frames_to_remove =
1036                         conn->flush_rtp_timestamp - current_packet->given_timestamp;
1037                     if (frames_to_remove > 0) {
1038                       debug(2, "%u frames to remove from current buffer", frames_to_remove);
1039                       void *dest = (void *)current_packet->data;
1040                       void *source = dest + conn->input_bytes_per_frame * frames_to_remove;
1041                       size_t frames_remaining = (current_packet->length - frames_to_remove);
1042                       memmove(dest, source, frames_remaining * conn->input_bytes_per_frame);
1043                       current_packet->given_timestamp = conn->flush_rtp_timestamp;
1044                       current_packet->length = frames_remaining;
1045                     }
1046                     debug(
1047                         2,
1048                         "flush request: flush frame %u complete -- buffer contains %u frames, from "
1049                         "%u to %u -- flushed to %u in buffer %u, with %u frames remaining.",
1050                         conn->flush_rtp_timestamp, last_frame_in_buffer - first_frame_in_buffer + 1,
1051                         first_frame_in_buffer, last_frame_in_buffer,
1052                         current_packet->given_timestamp, conn->ab_read,
1053                         last_frame_in_buffer - current_packet->given_timestamp + 1);
1054                     drop_request = 1;
1055                   } else {
1056                     if (conn->flush_rtp_timestamp == last_frame_in_buffer + 1) {
1057                       debug(
1058                           2,
1059                           "flush request: flush frame %u completed -- buffer contained %u frames, "
1060                           "from "
1061                           "%u to %u",
1062                           conn->flush_rtp_timestamp,
1063                           last_frame_in_buffer - first_frame_in_buffer + 1, first_frame_in_buffer,
1064                           last_frame_in_buffer);
1065                       drop_request = 1;
1066                     } else {
1067                       debug(2,
1068                             "flush request: flush frame %u pending -- buffer contains %u frames, "
1069                             "from "
1070                             "%u to %u",
1071                             conn->flush_rtp_timestamp,
1072                             last_frame_in_buffer - first_frame_in_buffer + 1, first_frame_in_buffer,
1073                             last_frame_in_buffer);
1074                     }
1075                     flush_needed = 1;
1076                   }
1077                 } else {
1078                   debug(2,
1079                         "flush request: flush frame %u expired -- buffer contains %u frames, "
1080                         "from %u "
1081                         "to %u",
1082                         conn->flush_rtp_timestamp, last_frame_in_buffer - first_frame_in_buffer + 1,
1083                         first_frame_in_buffer, last_frame_in_buffer);
1084                   drop_request = 1;
1085                 }
1086               }
1087             }
1088           } else {
1089             debug(3,
1090                   "flush request: flush frame %u  -- buffer not synced or empty: synced: %d, "
1091                   "ab_read: "
1092                   "%u, ab_write: %u",
1093                   conn->flush_rtp_timestamp, conn->ab_synced, conn->ab_read, conn->ab_write);
1094             conn->flush_requested = 0; // remove the request
1095             // leave flush request pending and don't do a buffer flush, because there isn't one
1096           }
1097         }
1098       }
1099       if (flush_needed) {
1100         debug(2, "flush request: flush done.");
1101         ab_resync(conn); // no cancellation points
1102         conn->first_packet_timestamp = 0;
1103         conn->first_packet_time_to_play = 0;
1104         conn->time_since_play_started = 0;
1105         have_sent_prefiller_silence = 0;
1106         dac_delay = 0;
1107       }
1108       if (drop_request) {
1109         conn->flush_requested = 0;
1110         conn->flush_rtp_timestamp = 0;
1111         conn->flush_output_flushed = 0;
1112       }
1113       pthread_cleanup_pop(1); // unlock the conn->flush_mutex
1114
1115       // skip out-of-date frames, and even more if we haven't seen the first frame
1116       int out_of_date = 1;
1117       uint32_t should_be_frame;
1118
1119       uint64_t time_to_aim_for = local_time_now;
1120       uint64_t desired_lead_time = 120000000;
1121       if (conn->first_packet_timestamp == 0)
1122         time_to_aim_for = time_to_aim_for + desired_lead_time;
1123
1124       while ((conn->ab_synced) && ((conn->ab_write - conn->ab_read) > 0) && (out_of_date != 0)) {
1125         abuf_t *thePacket = conn->audio_buffer + BUFIDX(conn->ab_read);
1126         if ((thePacket != NULL) && (thePacket->ready)) {
1127           local_time_to_frame(time_to_aim_for, &should_be_frame, conn);
1128           // debug(1,"should_be frame is %u.",should_be_frame);
1129           int32_t frame_difference = thePacket->given_timestamp - should_be_frame;
1130           if (frame_difference < 0) {
1131             debug(2, "Dropping out of date packet %u with timestamp %u. Lead time is %f seconds.",
1132                   conn->ab_read, thePacket->given_timestamp,
1133                   frame_difference * 1.0 / 44100.0 + desired_lead_time * 0.000000001);
1134             conn->ab_read++;
1135           } else {
1136             if (conn->first_packet_timestamp == 0)
1137               debug(2, "Accepting packet %u with timestamp %u. Lead time is %f seconds.",
1138                     conn->ab_read, thePacket->given_timestamp,
1139                     frame_difference * 1.0 / 44100.0 + desired_lead_time * 0.000000001);
1140             out_of_date = 0;
1141           }
1142         } else {
1143           debug(2, "Packet %u empty or not ready.", conn->ab_read);
1144           conn->ab_read++;
1145         }
1146       }
1147
1148       if (conn->ab_synced) {
1149         curframe = conn->audio_buffer + BUFIDX(conn->ab_read);
1150         if (curframe != NULL) {
1151           uint64_t should_be_time;
1152           frame_to_local_time(curframe->given_timestamp, &should_be_time, conn);
1153           int64_t time_difference = should_be_time - local_time_now;
1154           debug(3, "Check packet from buffer %u, timestamp %u, %f seconds ahead.", conn->ab_read,
1155                 curframe->given_timestamp, 0.000000001 * time_difference);
1156         } else {
1157           debug(3, "Check packet from buffer %u, empty.", conn->ab_read);
1158         }
1159
1160         if ((conn->ab_read != conn->ab_write) &&
1161             (curframe->ready)) { // it could be synced and empty, under
1162                                  // exceptional circumstances, with the
1163                                  // frame unused, thus apparently ready
1164
1165           if (curframe->sequence_number != conn->ab_read) {
1166             // some kind of sync problem has occurred.
1167             if (BUFIDX(curframe->sequence_number) == BUFIDX(conn->ab_read)) {
1168               // it looks like aliasing has happened
1169               // jump to the new incoming stuff...
1170               conn->ab_read = curframe->sequence_number;
1171               debug(1, "Aliasing of buffer index -- reset.");
1172             } else {
1173               debug(1, "Inconsistent sequence numbers detected");
1174             }
1175           }
1176         }
1177
1178         if ((curframe) && (curframe->ready)) {
1179           notified_buffer_empty = 0; // at least one buffer now -- diagnostic only.
1180           if (conn->ab_buffering) {  // if we are getting packets but not yet forwarding them to the
1181                                      // player
1182             if (conn->first_packet_timestamp == 0) { // if this is the very first packet
1183               conn->first_packet_timestamp =
1184                   curframe->given_timestamp; // we will keep buffering until we are
1185                                              // supposed to start playing this
1186
1187               // Here, calculate when we should start playing. We need to know when to allow the
1188               // packets to be sent to the player.
1189
1190               // every second or so, we get a reference on when a particular packet should be
1191               // played.
1192
1193               // It probably won't  be the timestamp of our first packet, however, so we might
1194               // have to do some calculations.
1195
1196               // To calculate when the first packet will be played, we figure out the exact time
1197               // the packet should be played according to its timestamp and the reference time.
1198               // The desired latency, typically 88200 frames, will be calculated for in rtp.c,
1199               // and any desired backend latency offset included in it there.
1200
1201               uint64_t should_be_time;
1202
1203               frame_to_local_time(conn->first_packet_timestamp, // this will go modulo 2^32
1204                                   &should_be_time, conn);
1205
1206               conn->first_packet_time_to_play = should_be_time;
1207
1208               int64_t lt = conn->first_packet_time_to_play - local_time_now;
1209
1210               // can't be too late because we skipped late packets already, FLW.
1211               debug(2, "Connection %d: Lead time for first frame %" PRId64 ": %f seconds.",
1212                     conn->connection_number, conn->first_packet_timestamp, lt * 0.000000001);
1213 #ifdef CONFIG_METADATA
1214               // say we have started receiving frames here
1215               send_ssnc_metadata(
1216                   'pffr', NULL, 0,
1217                   0); // "first frame received", but don't wait if the queue is locked
1218 #endif
1219             }
1220
1221             if (conn->first_packet_time_to_play != 0) {
1222               // Now that we know the timing of the first packet...
1223               if (config.output->delay) {
1224                 // and that the output device is capable of synchronization...
1225
1226                 // We may send packets of
1227                 // silence from now until the time the first audio packet should be sent
1228                 // and then we will send the first packet, which will be followed by
1229                 // the subsequent packets.
1230                 // here, we figure out whether and what silence to send.
1231
1232                 uint64_t should_be_time;
1233
1234                 // readjust first packet time to play
1235                 frame_to_local_time(conn->first_packet_timestamp, // this will go modulo 2^32
1236                                     &should_be_time, conn);
1237
1238                 int64_t change_in_should_be_time =
1239                     (int64_t)(should_be_time - conn->first_packet_time_to_play);
1240
1241                 if (fabs(0.000001 * change_in_should_be_time) >
1242                     0.001) // the clock drift estimation might be nudging the estimate, and we can
1243                            // ignore this unless if's more than a microsecond
1244                   debug(2,
1245                         "Change in estimated first_packet_time: %f milliseconds for first_packet.",
1246                         0.000001 * change_in_should_be_time);
1247
1248                 conn->first_packet_time_to_play = should_be_time;
1249
1250                 int64_t lead_time =
1251                     conn->first_packet_time_to_play - local_time_now; // negative means late
1252                 if (lead_time < 0) {
1253                   debug(1, "Gone past starting time for %u by %" PRId64 " nanoseconds.",
1254                         conn->first_packet_timestamp, -lead_time);
1255                   conn->ab_buffering = 0;
1256                 } else {
1257                   // do some calculations
1258                   if ((config.audio_backend_silent_lead_in_time_auto == 1) ||
1259                       (lead_time <=
1260                        (int64_t)(config.audio_backend_silent_lead_in_time * (int64_t)1000000000))) {
1261                     // debug(1, "Lead time: %" PRId64 " nanoseconds.", lead_time);
1262                     int resp = 0;
1263                     dac_delay = 0;
1264                     if (have_sent_prefiller_silence != 0)
1265                       resp = config.output->delay(
1266                           &dac_delay); // we know the output device must have a delay function
1267                     if (resp == 0) {
1268                       int64_t gross_frame_gap =
1269                           ((conn->first_packet_time_to_play - local_time_now) *
1270                            config.output_rate) /
1271                           1000000000;
1272                       int64_t exact_frame_gap = gross_frame_gap - dac_delay;
1273                       int64_t frames_needed_to_maintain_desired_buffer =
1274                           (int64_t)(config.audio_backend_buffer_desired_length *
1275                                     config.output_rate) -
1276                           dac_delay;
1277                       // below, remember that exact_frame_gap and
1278                       // frames_needed_to_maintain_desired_buffer could both be negative
1279                       int64_t fs = frames_needed_to_maintain_desired_buffer;
1280
1281                       // if there isn't enough time to have the desired buffer size
1282                       if (exact_frame_gap <= frames_needed_to_maintain_desired_buffer) {
1283                         fs = conn->max_frames_per_packet * 2;
1284                       }
1285                       // if we are very close to the end of buffering, i.e. within two
1286                       // frame-lengths, add the remaining silence needed and end buffering
1287                       if (exact_frame_gap <= conn->max_frames_per_packet * 2) {
1288                         fs = exact_frame_gap;
1289                         if (fs > first_frame_early_bias)
1290                           fs = fs - first_frame_early_bias; // deliberately make the first packet a
1291                                                             // tiny bit early so that the player may
1292                                                             // compensate for it at the last minute
1293                         conn->ab_buffering = 0;
1294                       }
1295                       void *silence;
1296                       if (fs > 0) {
1297                         silence = malloc(conn->output_bytes_per_frame * fs);
1298                         if (silence == NULL)
1299                           debug(1, "Failed to allocate %d byte silence buffer.", fs);
1300                         else {
1301                           // generate frames of silence with dither if necessary
1302                           conn->previous_random_number = generate_zero_frames(
1303                               silence, fs, config.output_format, conn->enable_dither,
1304                               conn->previous_random_number);
1305                           config.output->play(silence, fs, play_samples_are_untimed, 0, 0);
1306                           debug(3, "Sent %" PRId64 " frames of silence", fs);
1307                           free(silence);
1308                           have_sent_prefiller_silence = 1;
1309                         }
1310                       }
1311                     } else {
1312
1313                       if (resp == sps_extra_code_output_stalled) {
1314                         if (config.unfixable_error_reported == 0) {
1315                           config.unfixable_error_reported = 1;
1316                           if (config.cmd_unfixable) {
1317                             command_execute(config.cmd_unfixable, "output_device_stalled", 1);
1318                           } else {
1319                             die("an unrecoverable error, \"output_device_stalled\", has been "
1320                                 "detected.");
1321                           }
1322                         }
1323                       } else {
1324                         debug(3, "Unexpected response to getting dac delay: %d.", resp);
1325                       }
1326                     }
1327                   }
1328                 }
1329               } else {
1330                 // if the output device doesn't have a delay, we simply send the lead-in
1331                 int64_t lead_time =
1332                     conn->first_packet_time_to_play - local_time_now; // negative if we are late
1333                 void *silence;
1334                 int64_t frame_gap = (lead_time * config.output_rate) / 1000000000;
1335                 // debug(1,"%d frames needed.",frame_gap);
1336                 while (frame_gap > 0) {
1337                   ssize_t fs = config.output_rate / 10;
1338                   if (fs > frame_gap)
1339                     fs = frame_gap;
1340
1341                   silence = malloc(conn->output_bytes_per_frame * fs);
1342                   if (silence == NULL)
1343                     debug(1, "Failed to allocate %d frame silence buffer.", fs);
1344                   else {
1345                     // debug(1, "No delay function -- outputting %d frames of silence.", fs);
1346                     conn->previous_random_number =
1347                         generate_zero_frames(silence, fs, config.output_format, conn->enable_dither,
1348                                              conn->previous_random_number);
1349                     config.output->play(silence, fs, play_samples_are_untimed, 0, 0);
1350                     free(silence);
1351                   }
1352                   frame_gap -= fs;
1353                 }
1354                 conn->ab_buffering = 0;
1355               }
1356             }
1357 #ifdef CONFIG_METADATA
1358             if (conn->ab_buffering == 0) {
1359               send_ssnc_metadata('prsm', NULL, 0,
1360                                  0); // "resume", but don't wait if the queue is locked
1361             }
1362 #endif
1363           }
1364         }
1365       }
1366
1367       // Here, we work out whether to release a packet or wait
1368       // We release a packet when the time is right.
1369
1370       // To work out when the time is right, we need to take account of (1) the actual time the
1371       // packet should be released, (2) the latency requested, (3) the audio backend latency offset
1372       // and (4) the desired length of the audio backend's buffer
1373
1374       // The time is right if the current time is later or the same as
1375       // The packet time + (latency + latency offset - backend_buffer_length).
1376       // Note: the last three items are expressed in frames and must be converted to time.
1377
1378       int do_wait = 0; // don't wait unless we can really prove we must
1379       if ((conn->ab_synced) && (curframe) && (curframe->ready) && (curframe->given_timestamp)) {
1380         do_wait = 1; // if the current frame exists and is ready, then wait unless it's time to let
1381                      // it go...
1382
1383         // here, get the time to play the current frame.
1384
1385         if (have_timestamp_timing_information(conn)) { // if we have a reference time
1386
1387           uint64_t time_to_play;
1388
1389           // we must enable packets to be released early enough for the
1390           // audio buffer to be filled to the desired length
1391
1392           uint32_t buffer_latency_offset =
1393               (uint32_t)(config.audio_backend_buffer_desired_length * conn->input_rate);
1394           frame_to_local_time(curframe->given_timestamp -
1395                                   buffer_latency_offset, // this will go modulo 2^32
1396                               &time_to_play, conn);
1397
1398           if (local_time_now >= time_to_play) {
1399             do_wait = 0;
1400           }
1401           // here, do a sanity check. if the time_to_play is not within a few seconds of the
1402           // time now, the frame is probably not meant to be there, so let it go.
1403           if (do_wait != 0) {
1404             // this is a hack.
1405             // we subtract two 2^n unsigned numbers and get a signed 2^n result.
1406             // If we think of the calculation as occurring in modulo 2^n arithmetic
1407             // then the signed result's magnitude represents the shorter distance around
1408             // the modulo wheel of values from one number to the other.
1409             // The sign indicates the direction: positive means clockwise (upwards) from the
1410             // second number to the first (i.e. the first number comes "after" the second).
1411
1412             int64_t time_difference = local_time_now - time_to_play;
1413             if ((time_difference > 10000000000) || (time_difference < -10000000000)) {
1414               debug(2,
1415                     "crazy time interval of %f seconds between time now: 0x%" PRIx64
1416                     " and time of packet: %" PRIx64 ".",
1417                     0.000000001 * time_difference, local_time_now, time_to_play);
1418               debug(2, "packet rtptime: %u, reference_timestamp: %u", curframe->given_timestamp,
1419                     conn->anchor_rtptime);
1420
1421               do_wait = 0; // let it go
1422             }
1423           }
1424         }
1425       }
1426       if (do_wait == 0)
1427         if ((conn->ab_synced != 0) && (conn->ab_read == conn->ab_write)) { // the buffer is empty!
1428           if (notified_buffer_empty == 0) {
1429             debug(2, "Connection %d: Buffer Empty", conn->connection_number);
1430             notified_buffer_empty = 1;
1431             // reset_input_flow_metrics(conn); // don't do a full flush parameters reset
1432             conn->initial_reference_time = 0;
1433             conn->initial_reference_timestamp = 0;
1434             conn->first_packet_timestamp = 0; // make sure the first packet isn't late
1435           }
1436           do_wait = 1;
1437         }
1438       wait = (conn->ab_buffering || (do_wait != 0) || (!conn->ab_synced));
1439     } else {
1440       wait = 1; // keep waiting until the timing information becomes available
1441     }
1442     if (wait) {
1443       if (conn->input_rate == 0)
1444         die("input_rate is zero -- should never happen!");
1445       uint64_t time_to_wait_for_wakeup_ns =
1446           1000000000 / conn->input_rate;      // this is time period of one frame
1447       time_to_wait_for_wakeup_ns *= 12 * 352; // two full 352-frame packets
1448       time_to_wait_for_wakeup_ns /= 3;        // two thirds of a packet time
1449
1450 #ifdef COMPILE_FOR_LINUX_AND_FREEBSD_AND_CYGWIN_AND_OPENBSD
1451       uint64_t time_of_wakeup_ns = get_realtime_in_ns() + time_to_wait_for_wakeup_ns;
1452       uint64_t sec = time_of_wakeup_ns / 1000000000;
1453       uint64_t nsec = time_of_wakeup_ns % 1000000000;
1454
1455       struct timespec time_of_wakeup;
1456       time_of_wakeup.tv_sec = sec;
1457       time_of_wakeup.tv_nsec = nsec;
1458
1459       int rc = pthread_cond_timedwait(&conn->flowcontrol, &conn->ab_mutex,
1460                                       &time_of_wakeup); // this is a pthread cancellation point
1461       if ((rc != 0) && (rc != ETIMEDOUT))
1462         debug(3, "pthread_cond_timedwait returned error code %d.", rc);
1463 #endif
1464 #ifdef COMPILE_FOR_OSX
1465       uint64_t sec = time_to_wait_for_wakeup_ns / 1000000000;
1466       uint64_t nsec = time_to_wait_for_wakeup_ns % 1000000000;
1467       struct timespec time_to_wait;
1468       time_to_wait.tv_sec = sec;
1469       time_to_wait.tv_nsec = nsec;
1470       pthread_cond_timedwait_relative_np(&conn->flowcontrol, &conn->ab_mutex, &time_to_wait);
1471 #endif
1472     }
1473   } while (wait);
1474
1475   // seq_t read = conn->ab_read;
1476   if (curframe) {
1477     if (!curframe->ready) {
1478       // debug(1, "Supplying a silent frame for frame %u", read);
1479       conn->missing_packets++;
1480       curframe->given_timestamp = 0; // indicate a silent frame should be substituted
1481     }
1482     curframe->ready = 0;
1483   }
1484   conn->ab_read++;
1485   pthread_cleanup_pop(1);
1486   return curframe;
1487 }
1488
1489 static inline int32_t mean_32(int32_t a, int32_t b) {
1490   int64_t al = a;
1491   int64_t bl = b;
1492   int64_t mean = (al + bl) / 2;
1493   int32_t r = (int32_t)mean;
1494   if (r != mean)
1495     debug(1, "Error calculating average of two int32_t values: %d, %d.", a, b);
1496   return r;
1497 }
1498
1499 // this takes an array of signed 32-bit integers and (a) removes or inserts a frame as specified in
1500 // stuff,
1501 // (b) multiplies each sample by the fixedvolume (a 16-bit quantity)
1502 // (c) dithers the result to the output size 32/24/16/8 bits
1503 // (d) outputs the result in the approprate format
1504 // formats accepted so far include U8, S8, S16, S24, S24_3LE, S24_3BE and S32
1505
1506 // stuff: 1 means add 1; 0 means do nothing; -1 means remove 1
1507 static int stuff_buffer_basic_32(int32_t *inptr, int length, sps_format_t l_output_format,
1508                                  char *outptr, int stuff, int dither, rtsp_conn_info *conn) {
1509   if (length < 3)
1510     die("buffer length expected to be 3 or more, but is %d!", length);
1511   int tstuff = stuff;
1512   char *l_outptr = outptr;
1513   if ((stuff > 1) || (stuff < -1) || (length < 100)) {
1514     // debug(1, "Stuff argument to stuff_buffer must be from -1 to +1 and length >100.");
1515     tstuff = 0; // if any of these conditions hold, don't stuff anything/
1516   }
1517
1518   int i;
1519   int stuffsamp = length;
1520   if (tstuff)
1521     //      stuffsamp = rand() % (length - 1);
1522     stuffsamp =
1523         (rand() % (length - 2)) + 1; // ensure there's always a sample before and after the item
1524
1525   for (i = 0; i < stuffsamp; i++) { // the whole frame, if no stuffing
1526     process_sample(*inptr++, &l_outptr, l_output_format, conn->fix_volume, dither, conn);
1527     process_sample(*inptr++, &l_outptr, l_output_format, conn->fix_volume, dither, conn);
1528   };
1529   if (tstuff) {
1530     if (tstuff == 1) {
1531       // debug(3, "+++++++++");
1532       // interpolate one sample
1533       process_sample(mean_32(inptr[-2], inptr[0]), &l_outptr, l_output_format, conn->fix_volume,
1534                      dither, conn);
1535       process_sample(mean_32(inptr[-1], inptr[1]), &l_outptr, l_output_format, conn->fix_volume,
1536                      dither, conn);
1537     } else if (stuff == -1) {
1538       // debug(3, "---------");
1539       inptr++;
1540       inptr++;
1541     }
1542
1543     // if you're removing, i.e. stuff < 0, copy that much less over. If you're adding, do all the
1544     // rest.
1545     int remainder = length;
1546     if (tstuff < 0)
1547       remainder = remainder + tstuff; // don't run over the correct end of the output buffer
1548
1549     for (i = stuffsamp; i < remainder; i++) {
1550       process_sample(*inptr++, &l_outptr, l_output_format, conn->fix_volume, dither, conn);
1551       process_sample(*inptr++, &l_outptr, l_output_format, conn->fix_volume, dither, conn);
1552     }
1553   }
1554   conn->amountStuffed = tstuff;
1555   return length + tstuff;
1556 }
1557
1558 #ifdef CONFIG_SOXR
1559 // this takes an array of signed 32-bit integers and
1560 // (a) uses libsoxr to
1561 // resample the array to have one more or one less frame, as specified in
1562 // stuff,
1563 // (b) multiplies each sample by the fixedvolume (a 16-bit quantity)
1564 // (c) dithers the result to the output size 32/24/16/8 bits
1565 // (d) outputs the result in the approprate format
1566 // formats accepted so far include U8, S8, S16, S24, S24_3LE, S24_3BE and S32
1567
1568 int32_t stat_n = 0;
1569 double stat_mean = 0.0;
1570 double stat_M2 = 0.0;
1571 double longest_soxr_execution_time = 0.0;
1572 int64_t packets_processed = 0;
1573
1574 int stuff_buffer_soxr_32(int32_t *inptr, int32_t *scratchBuffer, int length,
1575                          sps_format_t l_output_format, char *outptr, int stuff, int dither,
1576                          rtsp_conn_info *conn) {
1577   if (scratchBuffer == NULL) {
1578     die("soxr scratchBuffer not initialised.");
1579   }
1580   packets_processed++;
1581   int tstuff = stuff;
1582   if ((stuff > 1) || (stuff < -1) || (length < 100)) {
1583     // debug(1, "Stuff argument to stuff_buffer must be from -1 to +1 and length >100.");
1584     tstuff = 0; // if any of these conditions hold, don't stuff anything/
1585   }
1586
1587   if (tstuff) {
1588     // debug(1,"Stuff %d.",stuff);
1589
1590     soxr_io_spec_t io_spec;
1591     io_spec.itype = SOXR_INT32_I;
1592     io_spec.otype = SOXR_INT32_I;
1593     io_spec.scale = 1.0; // this seems to crash if not = 1.0
1594     io_spec.e = NULL;
1595     io_spec.flags = 0;
1596
1597     size_t odone;
1598
1599     uint64_t soxr_start_time = get_absolute_time_in_ns();
1600
1601     soxr_error_t error = soxr_oneshot(length, length + tstuff, 2, // Rates and # of chans.
1602                                       inptr, length, NULL,        // Input.
1603                                       scratchBuffer, length + tstuff, &odone, // Output.
1604                                       &io_spec,    // Input, output and transfer spec.
1605                                       NULL, NULL); // Default configuration.
1606
1607     if (error)
1608       die("soxr error: %s\n", "error: %s\n", soxr_strerror(error));
1609
1610     if (odone > (size_t)(length + 1))
1611       die("odone = %u!\n", odone);
1612
1613     // mean and variance calculations from "online_variance" algorithm at
1614     // https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Online_algorithm
1615
1616     double soxr_execution_time = (get_absolute_time_in_ns() - soxr_start_time) * 0.000000001;
1617     // debug(1,"soxr_execution_time_us: %10.1f",soxr_execution_time_us);
1618     if (soxr_execution_time > longest_soxr_execution_time)
1619       longest_soxr_execution_time = soxr_execution_time;
1620     stat_n += 1;
1621     double stat_delta = soxr_execution_time - stat_mean;
1622     if (stat_n != 0)
1623       stat_mean += stat_delta / stat_n;
1624     else
1625       warn("calculation error for stat_n");
1626     stat_M2 += stat_delta * (soxr_execution_time - stat_mean);
1627
1628     int i;
1629     int32_t *ip, *op;
1630     ip = inptr;
1631     op = scratchBuffer;
1632
1633     const int gpm = 5;
1634     // keep the first (dpm) samples, to mitigate the Gibbs phenomenon
1635     for (i = 0; i < gpm; i++) {
1636       *op++ = *ip++;
1637       *op++ = *ip++;
1638     }
1639
1640     // keep the last (dpm) samples, to mitigate the Gibbs phenomenon
1641
1642     // pointer arithmetic, baby -- it's da bomb.
1643     op = scratchBuffer + (length + tstuff - gpm) * 2;
1644     ip = inptr + (length - gpm) * 2;
1645     for (i = 0; i < gpm; i++) {
1646       *op++ = *ip++;
1647       *op++ = *ip++;
1648     }
1649
1650     // now, do the volume, dither and formatting processing
1651     ip = scratchBuffer;
1652     char *l_outptr = outptr;
1653     for (i = 0; i < length + tstuff; i++) {
1654       process_sample(*ip++, &l_outptr, l_output_format, conn->fix_volume, dither, conn);
1655       process_sample(*ip++, &l_outptr, l_output_format, conn->fix_volume, dither, conn);
1656     };
1657
1658   } else { // the whole frame, if no stuffing
1659
1660     // now, do the volume, dither and formatting processing
1661     int32_t *ip = inptr;
1662     char *l_outptr = outptr;
1663     int i;
1664
1665     for (i = 0; i < length; i++) {
1666       process_sample(*ip++, &l_outptr, l_output_format, conn->fix_volume, dither, conn);
1667       process_sample(*ip++, &l_outptr, l_output_format, conn->fix_volume, dither, conn);
1668     };
1669   }
1670
1671   if (packets_processed % 1250 == 0) {
1672     debug(3,
1673           "soxr_oneshot execution time in nanoseconds: mean, standard deviation and max "
1674           "for %" PRId32 " interpolations in the last "
1675           "1250 packets. %10.6f, %10.6f, %10.6f.",
1676           stat_n, stat_mean, stat_n <= 1 ? 0.0 : sqrtf(stat_M2 / (stat_n - 1)),
1677           longest_soxr_execution_time);
1678     stat_n = 0;
1679     stat_mean = 0.0;
1680     stat_M2 = 0.0;
1681     longest_soxr_execution_time = 0.0;
1682   }
1683
1684   conn->amountStuffed = tstuff;
1685   return length + tstuff;
1686 }
1687 #endif
1688
1689 void player_thread_initial_cleanup_handler(__attribute__((unused)) void *arg) {
1690   rtsp_conn_info *conn = (rtsp_conn_info *)arg;
1691   debug(3, "Connection %d: player thread main loop exit via player_thread_initial_cleanup_handler.",
1692         conn->connection_number);
1693 }
1694
1695 char line_of_stats[1024];
1696 int statistics_row; // statistics_line 0 means print the headings; anything else 1 means print the
1697                     // values. Set to 0 the first time out.
1698 int statistics_column; // used to index through the statistics_print_profile array to check if it
1699                        // should be printed
1700 int was_a_previous_column;
1701 int *statistics_print_profile;
1702
1703 // these arrays specify which of the statistics specified by the statistics_item calls will actually
1704 // be printed -- 2 means print, 1 means print only in a debug mode, 0 means skip
1705
1706 // clang-format off
1707 int ap1_synced_statistics_print_profile[] =                  {2, 2, 2, 0, 2, 1, 1, 2, 1, 1, 1, 0, 1, 1, 2, 2, 1, 1};
1708 int ap1_nosync_statistics_print_profile[] =                  {2, 0, 0, 0, 2, 1, 1, 2, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0};
1709 int ap1_nodelay_statistics_print_profile[] =                 {0, 0, 0, 0, 2, 1, 1, 2, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0};
1710
1711 int ap2_realtime_synced_stream_statistics_print_profile[] =  {2, 2, 2, 0, 2, 1, 1, 2, 1, 1, 1, 0, 0, 1, 2, 2, 0, 0};
1712 int ap2_realtime_nosync_stream_statistics_print_profile[] =  {2, 0, 0, 0, 2, 1, 1, 2, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0};
1713 int ap2_realtime_nodelay_stream_statistics_print_profile[] = {0, 0, 0, 0, 2, 1, 1, 2, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0};
1714
1715 int ap2_buffered_synced_stream_statistics_print_profile[] =  {2, 2, 2, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 2, 2, 0, 0};
1716 int ap2_buffered_nosync_stream_statistics_print_profile[] =  {2, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0};
1717 int ap2_buffered_nodelay_stream_statistics_print_profile[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0};
1718 // clang-format on
1719
1720 void statistics_item(const char *heading, const char *format, ...) {
1721   if (((statistics_print_profile[statistics_column] == 1) && (debuglev != 0)) ||
1722       (statistics_print_profile[statistics_column] == 2)) { // include this column?
1723     if (was_a_previous_column != 0) {
1724       if (statistics_row == 0)
1725         strcat(line_of_stats, " | ");
1726       else
1727         strcat(line_of_stats, "   ");
1728     }
1729     if (statistics_row == 0) {
1730       strcat(line_of_stats, heading);
1731     } else {
1732       char b[1024];
1733       b[0] = 0;
1734       va_list args;
1735       va_start(args, format);
1736       vsnprintf(b, sizeof(b), format, args);
1737       va_end(args);
1738       strcat(line_of_stats, b);
1739     }
1740     was_a_previous_column = 1;
1741   }
1742   statistics_column++;
1743 }
1744
1745 double suggested_volume(rtsp_conn_info *conn) {
1746   double response = config.airplay_volume;
1747   if ((conn != NULL) && (conn->own_airplay_volume_set != 0)) {
1748     response = conn->own_airplay_volume;
1749   } else if (config.airplay_volume > config.high_threshold_airplay_volume) {
1750     int64_t volume_validity_time = config.limit_to_high_volume_threshold_time_in_minutes;
1751     // zero means never check the volume
1752     if (volume_validity_time != 0) {
1753       // If the volume is higher than the high volume threshold
1754       // and enough time has gone past, suggest the default volume.
1755       uint64_t time_now = get_absolute_time_in_ns();
1756       int64_t time_since_last_access_to_volume_info =
1757           time_now - config.last_access_to_volume_info_time;
1758
1759       volume_validity_time = volume_validity_time * 60;         // to seconds
1760       volume_validity_time = volume_validity_time * 1000000000; // to nanoseconds
1761
1762       if ((config.airplay_volume > config.high_threshold_airplay_volume) &&
1763           ((config.last_access_to_volume_info_time == 0) ||
1764            (time_since_last_access_to_volume_info > volume_validity_time))) {
1765
1766         debug(2,
1767               "the current volume %.6f is higher than the high volume threshold %.6f, so the "
1768               "default volume %.6f is suggested.",
1769               config.airplay_volume, config.high_threshold_airplay_volume,
1770               config.default_airplay_volume);
1771         response = config.default_airplay_volume;
1772       }
1773     }
1774   }
1775   return response;
1776 }
1777
1778 void player_thread_cleanup_handler(void *arg) {
1779   rtsp_conn_info *conn = (rtsp_conn_info *)arg;
1780
1781   if ((principal_conn == conn) && (conn != NULL)) {
1782     if (config.output->stop) {
1783       debug(2, "Connection %d: Stop the output backend.", conn->connection_number);
1784       config.output->stop();
1785     }
1786   } else {
1787     if (conn != NULL)
1788       debug(1, "Connection %d: this conn is not the principal_conn.", conn->connection_number);
1789   }
1790
1791   int oldState;
1792   pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &oldState);
1793   debug(3, "Connection %d: player thread main loop exit via player_thread_cleanup_handler.",
1794         conn->connection_number);
1795
1796   if (config.statistics_requested) {
1797     int64_t time_playing = get_absolute_time_in_ns() - conn->playstart;
1798     time_playing = time_playing / 1000000000;
1799     int64_t elapsedHours = time_playing / 3600;
1800     int64_t elapsedMin = (time_playing / 60) % 60;
1801     int64_t elapsedSec = time_playing % 60;
1802     if (conn->frame_rate_valid)
1803       inform("Connection %d: Playback stopped. Total playing time %02" PRId64 ":%02" PRId64
1804              ":%02" PRId64 ". "
1805              "Output: %0.2f (raw), %0.2f (corrected) "
1806              "frames per second.",
1807              conn->connection_number, elapsedHours, elapsedMin, elapsedSec, conn->raw_frame_rate,
1808              conn->corrected_frame_rate);
1809     else
1810       inform("Connection %d: Playback stopped. Total playing time %02" PRId64 ":%02" PRId64
1811              ":%02" PRId64 ".",
1812              conn->connection_number, elapsedHours, elapsedMin, elapsedSec);
1813   }
1814
1815 #ifdef CONFIG_DACP_CLIENT
1816   relinquish_dacp_server_information(
1817       conn); // say it doesn't belong to this conversation thread any more...
1818 #else
1819   mdns_dacp_monitor_set_id(NULL); // say we're not interested in following that DACP id any more
1820 #endif
1821
1822   // four possibilities
1823   // 1 -- Classic Airplay -- "AirPlay 1"
1824   // 2 -- AirPlay 2 in Classic Airplay mode
1825   // 3 -- AirPlay 2 in Buffered Audio Mode
1826   // 4 -- AirPlay 3 in Realtime Audio Mode.
1827
1828 #ifdef CONFIG_AIRPLAY_2
1829   if (conn->airplay_type == ap_2) {
1830     debug(2, "Cancelling AP2 timing, control and audio threads...");
1831
1832     if (conn->airplay_stream_type == realtime_stream) {
1833       debug(2, "Connection %d: Delete Realtime Audio Stream thread", conn->connection_number);
1834       pthread_cancel(conn->rtp_realtime_audio_thread);
1835       pthread_join(conn->rtp_realtime_audio_thread, NULL);
1836
1837     } else if (conn->airplay_stream_type == buffered_stream) {
1838
1839       debug(2, "Connection %d: Delete Buffered Audio Stream thread", conn->connection_number);
1840       pthread_cancel(conn->rtp_buffered_audio_thread);
1841       pthread_join(conn->rtp_buffered_audio_thread, NULL);
1842
1843     } else {
1844       die("Unrecognised Stream Type");
1845     }
1846
1847     debug(2, "Connection %d: Delete AirPlay 2 Control thread");
1848     pthread_cancel(conn->rtp_ap2_control_thread);
1849     pthread_join(conn->rtp_ap2_control_thread, NULL);
1850
1851   } else {
1852     debug(2, "Cancelling AP1-compatible timing, control and audio threads...");
1853 #else
1854   debug(2, "Cancelling AP1 timing, control and audio threads...");
1855 #endif
1856     debug(3, "Cancel timing thread.");
1857     pthread_cancel(conn->rtp_timing_thread);
1858     debug(3, "Join timing thread.");
1859     pthread_join(conn->rtp_timing_thread, NULL);
1860     debug(3, "Timing thread terminated.");
1861     debug(3, "Cancel control thread.");
1862     pthread_cancel(conn->rtp_control_thread);
1863     debug(3, "Join control thread.");
1864     pthread_join(conn->rtp_control_thread, NULL);
1865     debug(3, "Control thread terminated.");
1866     debug(3, "Cancel audio thread.");
1867     pthread_cancel(conn->rtp_audio_thread);
1868     debug(3, "Join audio thread.");
1869     pthread_join(conn->rtp_audio_thread, NULL);
1870     debug(3, "Audio thread terminated.");
1871 #ifdef CONFIG_AIRPLAY_2
1872   }
1873   // ptp_send_control_message_string("T"); // remove all timing peers to force the master to 0
1874   reset_anchor_info(conn);
1875 #endif
1876
1877   if (conn->outbuf) {
1878     free(conn->outbuf);
1879     conn->outbuf = NULL;
1880   }
1881   if (conn->sbuf) {
1882     free(conn->sbuf);
1883     conn->sbuf = NULL;
1884   }
1885   if (conn->tbuf) {
1886     free(conn->tbuf);
1887     conn->tbuf = NULL;
1888   }
1889
1890   if (conn->statistics) {
1891     free(conn->statistics);
1892     conn->statistics = NULL;
1893   }
1894   free_audio_buffers(conn);
1895   if (conn->stream.type == ast_apple_lossless)
1896     terminate_decoders(conn);
1897
1898   conn->rtp_running = 0;
1899   pthread_setcancelstate(oldState, NULL);
1900   debug(2, "Connection %d: player terminated.", conn->connection_number);
1901 }
1902
1903 void *player_thread_func(void *arg) {
1904   rtsp_conn_info *conn = (rtsp_conn_info *)arg;
1905 #ifdef CONFIG_METADATA
1906   uint64_t time_of_last_metadata_progress_update =
1907       0; // the assignment is to stop a compiler warning...
1908 #endif
1909   uint64_t previous_frames_played = 0; // initialised to avoid a "possibly uninitialised" warning
1910   uint64_t previous_raw_measurement_time =
1911       0; // initialised to avoid a "possibly uninitialised" warning
1912   uint64_t previous_corrected_measurement_time =
1913       0; // initialised to avoid a "possibly uninitialised" warning
1914   int previous_frames_played_valid = 0;
1915
1916   // pthread_cleanup_push(player_thread_initial_cleanup_handler, arg);
1917   conn->latency_warning_issued =
1918       0; // be permitted to generate a warning each time a play is attempted
1919   conn->packet_count = 0;
1920   conn->packet_count_since_flush = 0;
1921   conn->previous_random_number = 0;
1922   conn->decoder_in_use = 0;
1923   conn->ab_buffering = 1;
1924   conn->ab_synced = 0;
1925   conn->first_packet_timestamp = 0;
1926   conn->flush_requested = 0;
1927   conn->flush_output_flushed = 0; // only send a flush command to the output device once
1928   conn->flush_rtp_timestamp = 0;  // it seems this number has a special significance -- it seems to
1929                                   // be used as a null operand, so we'll use it like that too
1930   conn->fix_volume = 0x10000;
1931
1932 #ifdef CONFIG_AIRPLAY_2
1933   conn->ap2_flush_requested = 0;
1934   conn->ap2_flush_from_valid = 0;
1935   conn->ap2_rate = 0;
1936   conn->ap2_play_enabled = 0;
1937 #endif
1938
1939   // reset_anchor_info(conn);
1940
1941   if (conn->stream.type == ast_apple_lossless)
1942     init_alac_decoder((int32_t *)&conn->stream.fmtp,
1943                       conn); // this sets up incoming rate, bit depth, channels.
1944                              // No pthread cancellation point in here
1945   // This must be after init_alac_decoder
1946   init_buffer(conn); // will need a corresponding deallocation. No cancellation points in here
1947   ab_resync(conn);
1948
1949   if (conn->stream.encrypted) {
1950 #ifdef CONFIG_MBEDTLS
1951     memset(&conn->dctx, 0, sizeof(mbedtls_aes_context));
1952     mbedtls_aes_setkey_dec(&conn->dctx, conn->stream.aeskey, 128);
1953 #endif
1954
1955 #ifdef CONFIG_POLARSSL
1956     memset(&conn->dctx, 0, sizeof(aes_context));
1957     aes_setkey_dec(&conn->dctx, conn->stream.aeskey, 128);
1958 #endif
1959   }
1960
1961   conn->timestamp_epoch = 0; // indicate that the next timestamp will be the first one.
1962   conn->maximum_timestamp_interval = conn->input_rate * 60; // actually there shouldn't be more than
1963                                                             // about 13 seconds of a gap between
1964                                                             // successive rtptimes, at worst
1965
1966   conn->output_sample_ratio = config.output_rate / conn->input_rate;
1967
1968   // Sign extending rtptime calculations to 64 bit is needed from time to time.
1969
1970   // The standard rtptime is unsigned 32 bits,
1971   // so you can do modulo 2^32 difference calculations
1972   // and get a signed result simply by typing the result as a signed 32-bit number.
1973
1974   // So long as you can be sure the numbers are within 2^31 of each other,
1975   // the sign of the result calculated in this way indicates the order of the operands.
1976   // For example, if you subtract a from b and the result is positive, you can conclude
1977   // b is the same as or comes after a in module 2^32 order.
1978
1979   // We want to do the same with the rtptime calculations for multiples of
1980   // the rtptimes (1, 2, 4 or 8 times), and we want to do this in signed 64-bit/
1981   // Therefore we need to sign extend these modulo 2^32, 2^33, 2^34, or 2^35 bit unsigned
1982   // numbers on the same basis.
1983
1984   // That is what the output_rtptime_sign_bit, output_rtptime_mask, output_rtptime_mask_not and
1985   // output_rtptime_sign_mask are for -- see later, calculating the sync error.
1986
1987   int output_rtptime_sign_bit;
1988   switch (conn->output_sample_ratio) {
1989   case 1:
1990     output_rtptime_sign_bit = 31;
1991     break;
1992   case 2:
1993     output_rtptime_sign_bit = 32;
1994     break;
1995   case 4:
1996     output_rtptime_sign_bit = 33;
1997     break;
1998   case 8:
1999     output_rtptime_sign_bit = 34;
2000     break;
2001   default:
2002     debug(1, "error with output ratio -- can't calculate sign bit number");
2003     output_rtptime_sign_bit = 31;
2004     break;
2005   }
2006
2007   //  debug(1, "Output sample ratio is %d.", conn->output_sample_ratio);
2008   //  debug(1, "Output output_rtptime_sign_bit: %d.", output_rtptime_sign_bit);
2009
2010   int64_t output_rtptime_mask = 1;
2011   output_rtptime_mask = output_rtptime_mask << (output_rtptime_sign_bit + 1);
2012   output_rtptime_mask = output_rtptime_mask - 1;
2013
2014   int64_t output_rtptime_mask_not = output_rtptime_mask;
2015   output_rtptime_mask_not = ~output_rtptime_mask;
2016
2017   int64_t output_rtptime_sign_mask = 1;
2018   output_rtptime_sign_mask = output_rtptime_sign_mask << output_rtptime_sign_bit;
2019
2020   conn->max_frame_size_change =
2021       1 * conn->output_sample_ratio; // we add or subtract one frame at the nominal
2022                                      // rate, multiply it by the frame ratio.
2023                                      // but, on some occasions, more than one frame could be added
2024
2025   switch (config.output_format) {
2026   case SPS_FORMAT_S24_3LE:
2027   case SPS_FORMAT_S24_3BE:
2028     conn->output_bytes_per_frame = 6;
2029     break;
2030
2031   case SPS_FORMAT_S24:
2032   case SPS_FORMAT_S24_LE:
2033   case SPS_FORMAT_S24_BE:
2034     conn->output_bytes_per_frame = 8;
2035     break;
2036   case SPS_FORMAT_S32:
2037   case SPS_FORMAT_S32_LE:
2038   case SPS_FORMAT_S32_BE:
2039     conn->output_bytes_per_frame = 8;
2040     break;
2041   default:
2042     conn->output_bytes_per_frame = 4;
2043   }
2044
2045   debug(3, "Output frame bytes is %d.", conn->output_bytes_per_frame);
2046
2047   conn->dac_buffer_queue_minimum_length = (uint64_t)(
2048       config.audio_backend_buffer_interpolation_threshold_in_seconds * config.output_rate);
2049   debug(3, "dac_buffer_queue_minimum_length is %" PRIu64 " frames.",
2050         conn->dac_buffer_queue_minimum_length);
2051
2052   conn->session_corrections = 0;
2053   conn->connection_state_to_output = get_requested_connection_state_to_output();
2054 // this is about half a minute
2055 // #define trend_interval 3758
2056
2057 // this is about 8 seconds
2058 #define trend_interval 1003
2059
2060   int number_of_statistics, oldest_statistic, newest_statistic;
2061   int frames_seen_in_this_logging_interval = 0;
2062   int at_least_one_frame_seen_this_session = 0;
2063   int64_t tsum_of_sync_errors, tsum_of_corrections, tsum_of_insertions_and_deletions,
2064       tsum_of_drifts;
2065   int64_t previous_sync_error = 0, previous_correction = 0;
2066   uint64_t minimum_dac_queue_size;
2067   int32_t minimum_buffer_occupancy;
2068   int32_t maximum_buffer_occupancy;
2069
2070 #ifdef CONFIG_AIRPLAY_2
2071   conn->ap2_audio_buffer_minimum_size = -1;
2072 #endif
2073
2074   conn->raw_frame_rate = 0.0;
2075   conn->corrected_frame_rate = 0.0;
2076   conn->frame_rate_valid = 0;
2077
2078   conn->input_frame_rate = 0.0;
2079   conn->input_frame_rate_starting_point_is_valid = 0;
2080
2081   conn->buffer_occupancy = 0;
2082
2083   int play_samples = 0;
2084   uint64_t current_delay;
2085   int play_number = 0;
2086   conn->play_number_after_flush = 0;
2087   //  int last_timestamp = 0; // for debugging only
2088   conn->time_of_last_audio_packet = 0;
2089   // conn->shutdown_requested = 0;
2090   number_of_statistics = oldest_statistic = newest_statistic = 0;
2091   tsum_of_sync_errors = tsum_of_corrections = tsum_of_insertions_and_deletions = tsum_of_drifts = 0;
2092
2093   const int print_interval = trend_interval; // don't ask...
2094   // I think it's useful to keep this prime to prevent it from falling into a pattern with some
2095   // other process.
2096
2097   static char rnstate[256];
2098   initstate(time(NULL), rnstate, 256);
2099
2100   signed short *inbuf;
2101   int inbuflength;
2102
2103   unsigned int output_bit_depth = 16; // default;
2104
2105   switch (config.output_format) {
2106   case SPS_FORMAT_S8:
2107   case SPS_FORMAT_U8:
2108     output_bit_depth = 8;
2109     break;
2110   case SPS_FORMAT_S16:
2111   case SPS_FORMAT_S16_LE:
2112   case SPS_FORMAT_S16_BE:
2113     output_bit_depth = 16;
2114     break;
2115   case SPS_FORMAT_S24:
2116   case SPS_FORMAT_S24_LE:
2117   case SPS_FORMAT_S24_BE:
2118   case SPS_FORMAT_S24_3LE:
2119   case SPS_FORMAT_S24_3BE:
2120     output_bit_depth = 24;
2121     break;
2122   case SPS_FORMAT_S32:
2123   case SPS_FORMAT_S32_LE:
2124   case SPS_FORMAT_S32_BE:
2125     output_bit_depth = 32;
2126     break;
2127   case SPS_FORMAT_UNKNOWN:
2128     die("Unknown format choosing output bit depth");
2129     break;
2130   case SPS_FORMAT_AUTO:
2131     die("Invalid format -- SPS_FORMAT_AUTO -- choosing output bit depth");
2132     break;
2133   case SPS_FORMAT_INVALID:
2134     die("Invalid format -- SPS_FORMAT_INVALID -- choosing output bit depth");
2135     break;
2136   }
2137
2138   debug(3, "Output bit depth is %d.", output_bit_depth);
2139
2140   if (conn->input_bit_depth > output_bit_depth) {
2141     debug(3, "Dithering will be enabled because the input bit depth is greater than the output bit "
2142              "depth");
2143   }
2144   if (config.output->parameters == NULL) {
2145     debug(3, "Dithering will be enabled because the output volume is being altered in software");
2146   }
2147
2148   if ((config.output->parameters == NULL) || (conn->input_bit_depth > output_bit_depth) ||
2149       (config.playback_mode == ST_mono))
2150     conn->enable_dither = 1;
2151
2152   // remember, the output device may never have been initialised prior to this call
2153   config.output->start(config.output_rate, config.output_format); // will need a corresponding stop
2154
2155   // we need an intermediate "transition" buffer
2156
2157   conn->tbuf = malloc(
2158       sizeof(int32_t) * 2 *
2159       (conn->max_frames_per_packet * conn->output_sample_ratio + conn->max_frame_size_change));
2160   if (conn->tbuf == NULL)
2161     die("Failed to allocate memory for the transition buffer.");
2162
2163   // initialise this, because soxr stuffing might be chosen later
2164
2165   conn->sbuf = malloc(
2166       sizeof(int32_t) * 2 *
2167       (conn->max_frames_per_packet * conn->output_sample_ratio + conn->max_frame_size_change));
2168   if (conn->sbuf == NULL)
2169     die("Failed to allocate memory for the sbuf buffer.");
2170
2171   // The size of these dependents on the number of frames, the size of each frame and the maximum
2172   // size change
2173   conn->outbuf = malloc(
2174       conn->output_bytes_per_frame *
2175       (conn->max_frames_per_packet * conn->output_sample_ratio + conn->max_frame_size_change));
2176   if (conn->outbuf == NULL)
2177     die("Failed to allocate memory for an output buffer.");
2178   conn->first_packet_timestamp = 0;
2179   conn->missing_packets = conn->late_packets = conn->too_late_packets = conn->resend_requests = 0;
2180   int sync_error_out_of_bounds =
2181       0; // number of times in a row that there's been a serious sync error
2182
2183   conn->statistics = malloc(sizeof(stats_t) * trend_interval);
2184   if (conn->statistics == NULL)
2185     die("Failed to allocate a statistics buffer");
2186
2187   conn->framesProcessedInThisEpoch = 0;
2188   conn->framesGeneratedInThisEpoch = 0;
2189   conn->correctionsRequestedInThisEpoch = 0;
2190   statistics_row = 0; // statistics_line 0 means print the headings; anything else 1 means print the
2191                       // values. Set to 0 the first time out.
2192
2193   // decide on what statistics profile to use, if requested
2194 #ifdef CONFIG_AIRPLAY_2
2195   if (conn->airplay_type == ap_2) {
2196     if (conn->airplay_stream_type == realtime_stream) {
2197       if (config.output->delay) {
2198         if (config.no_sync == 0)
2199           statistics_print_profile = ap2_realtime_synced_stream_statistics_print_profile;
2200         else
2201           statistics_print_profile = ap2_realtime_nosync_stream_statistics_print_profile;
2202       } else {
2203         statistics_print_profile = ap2_realtime_nodelay_stream_statistics_print_profile;
2204       }
2205     } else {
2206       if (config.output->delay) {
2207         if (config.no_sync == 0)
2208           statistics_print_profile = ap2_buffered_synced_stream_statistics_print_profile;
2209         else
2210           statistics_print_profile = ap2_buffered_nosync_stream_statistics_print_profile;
2211       } else {
2212         statistics_print_profile = ap2_buffered_nodelay_stream_statistics_print_profile;
2213       }
2214     }
2215   } else {
2216 #endif
2217     if (config.output->delay) {
2218       if (config.no_sync == 0)
2219         statistics_print_profile = ap1_synced_statistics_print_profile;
2220       else
2221         statistics_print_profile = ap1_nosync_statistics_print_profile;
2222     } else {
2223       statistics_print_profile = ap1_nodelay_statistics_print_profile;
2224     }
2225 // airplay 1 stuff here
2226 #ifdef CONFIG_AIRPLAY_2
2227   }
2228 #endif
2229
2230 #ifdef CONFIG_AIRPLAY_2
2231   if (conn->timing_type == ts_ntp) {
2232 #endif
2233
2234     // create and start the timing, control and audio receiver threads
2235     pthread_create(&conn->rtp_audio_thread, NULL, &rtp_audio_receiver, (void *)conn);
2236     pthread_create(&conn->rtp_control_thread, NULL, &rtp_control_receiver, (void *)conn);
2237     pthread_create(&conn->rtp_timing_thread, NULL, &rtp_timing_receiver, (void *)conn);
2238
2239 #ifdef CONFIG_AIRPLAY_2
2240   }
2241 #endif
2242
2243   pthread_cleanup_push(player_thread_cleanup_handler, arg); // undo what's been done so far
2244
2245   // stop looking elsewhere for DACP stuff
2246   int oldState;
2247   pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &oldState);
2248
2249 #ifdef CONFIG_DACP_CLIENT
2250   set_dacp_server_information(conn);
2251 #else
2252   mdns_dacp_monitor_set_id(conn->dacp_id);
2253 #endif
2254
2255   pthread_setcancelstate(oldState, NULL);
2256
2257   // if not already set, set the volume to the pending_airplay_volume, if any, or otherwise to the
2258   // suggested volume.
2259
2260   double initial_volume = suggested_volume(conn);
2261   debug(2, "Set initial volume to %.6f.", initial_volume);
2262   player_volume(initial_volume, conn); // will contain a cancellation point if asked to wait
2263
2264   debug(2, "Play begin");
2265   while (1) {
2266 #ifdef CONFIG_METADATA
2267     int this_is_the_first_frame = 0; // will be set if it is
2268 #endif
2269     // check a few parameters to ensure they are non-zero
2270     if (config.output_rate == 0)
2271       debug(1, "config.output_rate is zero!");
2272     if (conn->output_sample_ratio == 0)
2273       debug(1, "conn->output_sample_ratio is zero!");
2274     if (conn->input_rate == 0)
2275       debug(1, "conn->input_rate is zero!");
2276     if (conn->input_bytes_per_frame == 0)
2277       debug(1, "conn->input_bytes_per_frame is zero!");
2278
2279     pthread_testcancel();                     // allow a pthread_cancel request to take effect.
2280     abuf_t *inframe = buffer_get_frame(conn); // this has cancellation point(s), but it's not
2281                                               // guaranteed that they'll always be executed
2282     uint64_t local_time_now = get_absolute_time_in_ns(); // types okay
2283     config.last_access_to_volume_info_time =
2284         local_time_now; // ensure volume info remains seen as valid
2285
2286     if (inframe) {
2287       inbuf = inframe->data;
2288       inbuflength = inframe->length;
2289       if (inbuf) {
2290         if (play_number == 0)
2291           conn->playstart = get_absolute_time_in_ns();
2292         play_number++;
2293         //        if (play_number % 100 == 0)
2294         //          debug(3, "Play frame %d.", play_number);
2295         conn->play_number_after_flush++;
2296         if (inframe->given_timestamp == 0) {
2297           debug(2,
2298                 "Player has supplied a silent frame, (possibly frame %u) for play number %d, "
2299                 "status 0x%X after %u resend requests.",
2300                 conn->last_seqno_read + 1, play_number, inframe->status,
2301                 inframe->resend_request_number);
2302           conn->last_seqno_read =
2303               ((conn->last_seqno_read + 1) & 0xffff); // manage the packet out of sequence minder
2304
2305           void *silence = malloc(conn->output_bytes_per_frame * conn->max_frames_per_packet *
2306                                  conn->output_sample_ratio);
2307           if (silence == NULL) {
2308             debug(1, "Failed to allocate memory for a silent frame silence buffer.");
2309           } else {
2310             // the player may change the contents of the buffer, so it has to be zeroed each time;
2311             // might as well malloc and free it locally
2312             conn->previous_random_number = generate_zero_frames(
2313                 silence, conn->max_frames_per_packet * conn->output_sample_ratio,
2314                 config.output_format, conn->enable_dither, conn->previous_random_number);
2315             config.output->play(silence, conn->max_frames_per_packet * conn->output_sample_ratio,
2316                                 play_samples_are_untimed, 0, 0);
2317             free(silence);
2318           }
2319         } else if (conn->play_number_after_flush < 10) {
2320           /*
2321           int64_t difference = 0;
2322           if (last_timestamp)
2323             difference = inframe->timestamp - last_timestamp;
2324           last_timestamp = inframe->timestamp;
2325           debug(1, "Play number %d, monotonic timestamp %llx, difference
2326           %lld.",conn->play_number_after_flush,inframe->timestamp,difference);
2327           */
2328           void *silence = malloc(conn->output_bytes_per_frame * conn->max_frames_per_packet *
2329                                  conn->output_sample_ratio);
2330           if (silence == NULL) {
2331             debug(1, "Failed to allocate memory for a flush silence buffer.");
2332           } else {
2333             // the player may change the contents of the buffer, so it has to be zeroed each time;
2334             // might as well malloc and free it locally
2335             conn->previous_random_number = generate_zero_frames(
2336                 silence, conn->max_frames_per_packet * conn->output_sample_ratio,
2337                 config.output_format, conn->enable_dither, conn->previous_random_number);
2338             config.output->play(silence, conn->max_frames_per_packet * conn->output_sample_ratio,
2339                                 play_samples_are_untimed, 0, 0);
2340             free(silence);
2341           }
2342         } else {
2343
2344           if (((config.output->parameters == NULL) && (config.ignore_volume_control == 0) &&
2345                (config.airplay_volume != 0.0)) ||
2346               (conn->input_bit_depth > output_bit_depth) || (config.playback_mode == ST_mono))
2347             conn->enable_dither = 1;
2348           else
2349             conn->enable_dither = 0;
2350
2351           // here, let's transform the frame of data, if necessary
2352
2353           switch (conn->input_bit_depth) {
2354           case 16: {
2355             int i, j;
2356             int16_t ls, rs;
2357             int32_t ll = 0, rl = 0;
2358             int16_t *inps = inbuf;
2359             // int16_t *outps = tbuf;
2360             int32_t *outpl = (int32_t *)conn->tbuf;
2361             for (i = 0; i < inbuflength; i++) {
2362               ls = *inps++;
2363               rs = *inps++;
2364
2365               // here, do the mode stuff -- mono / reverse stereo / leftonly / rightonly
2366               // also, raise the 16-bit samples to 32 bits.
2367
2368               switch (config.playback_mode) {
2369               case ST_mono: {
2370                 int32_t lsl = ls;
2371                 int32_t rsl = rs;
2372                 int32_t both = lsl + rsl;
2373                 both = both << (16 - 1); // keep all 17 bits of the sum of the 16bit left and right
2374                                          // -- the 17th bit will influence dithering later
2375                 ll = both;
2376                 rl = both;
2377               } break;
2378               case ST_reverse_stereo: {
2379                 ll = rs;
2380                 rl = ls;
2381                 ll = ll << 16;
2382                 rl = rl << 16;
2383               } break;
2384               case ST_left_only:
2385                 rl = ls;
2386                 ll = ls;
2387                 ll = ll << 16;
2388                 rl = rl << 16;
2389                 break;
2390               case ST_right_only:
2391                 ll = rs;
2392                 rl = rs;
2393                 ll = ll << 16;
2394                 rl = rl << 16;
2395                 break;
2396               case ST_stereo:
2397                 ll = ls;
2398                 rl = rs;
2399                 ll = ll << 16;
2400                 rl = rl << 16;
2401                 break; // nothing extra to do
2402               }
2403
2404               // here, replicate the samples if you're upsampling
2405
2406               for (j = 0; j < conn->output_sample_ratio; j++) {
2407                 *outpl++ = ll;
2408                 *outpl++ = rl;
2409               }
2410             }
2411           } break;
2412           case 32: {
2413             int i, j;
2414             int32_t ls, rs;
2415             int32_t ll = 0, rl = 0;
2416             int32_t *inps = (int32_t *)inbuf;
2417             int32_t *outpl = (int32_t *)conn->tbuf;
2418             for (i = 0; i < inbuflength; i++) {
2419               ls = *inps++;
2420               rs = *inps++;
2421
2422               // here, do the mode stuff -- mono / reverse stereo / leftonly / rightonly
2423
2424               switch (config.playback_mode) {
2425               case ST_mono: {
2426                 int64_t lsl = ls;
2427                 int64_t rsl = rs;
2428                 int64_t both = lsl + rsl;
2429                 both = both >> 1;
2430                 uint32_t both32 = both;
2431                 ll = both32;
2432                 rl = both32;
2433               } break;
2434               case ST_reverse_stereo: {
2435                 ll = rs;
2436                 rl = ls;
2437               } break;
2438               case ST_left_only:
2439                 rl = ls;
2440                 ll = ls;
2441                 break;
2442               case ST_right_only:
2443                 ll = rs;
2444                 rl = rs;
2445                 break;
2446               case ST_stereo:
2447                 ll = ls;
2448                 rl = rs;
2449                 break; // nothing extra to do
2450               }
2451
2452               // here, replicate the samples if you're upsampling
2453
2454               for (j = 0; j < conn->output_sample_ratio; j++) {
2455                 *outpl++ = ll;
2456                 *outpl++ = rl;
2457               }
2458             }
2459           } break;
2460
2461           default:
2462             die("Shairport Sync only supports 16 or 32 bit input");
2463           }
2464
2465           inbuflength *= conn->output_sample_ratio;
2466
2467           // We have a frame of data. We need to see if we want to add or remove a frame from it to
2468           // keep in sync.
2469           // So we calculate the timing error for the first frame in the DAC.
2470           // If it's ahead of time, we add one audio frame to this frame to delay a subsequent frame
2471           // If it's late, we remove an audio frame from this frame to bring a subsequent frame
2472           // forward in time
2473
2474           // now, go back as far as the total latency less, say, 100 ms, and check the presence of
2475           // frames from then onwards
2476
2477           frames_seen_in_this_logging_interval++;
2478
2479           // This is the timing error for the next audio frame in the DAC, if applicable
2480           int64_t sync_error = 0;
2481
2482           int amount_to_stuff = 0;
2483
2484           // check sequencing
2485           if (conn->last_seqno_read == -1)
2486             conn->last_seqno_read =
2487                 inframe->sequence_number; // int32_t from seq_t, i.e. uint16_t, so okay.
2488           else {
2489             conn->last_seqno_read =
2490                 (conn->last_seqno_read + 1) & 0xffff; // int32_t from seq_t, i.e. uint16_t, so okay.
2491             if (inframe->sequence_number !=
2492                 conn->last_seqno_read) { // seq_t, ei.e. uint16_t and int32_t, so okay
2493               debug(2,
2494                     "Player: packets out of sequence: expected: %u, got: %u, with ab_read: %u "
2495                     "and ab_write: %u.",
2496                     conn->last_seqno_read, inframe->sequence_number, conn->ab_read, conn->ab_write);
2497               conn->last_seqno_read = inframe->sequence_number; // reset warning...
2498             }
2499           }
2500
2501           int16_t bo = conn->ab_write - conn->ab_read; // do this in 16 bits
2502           conn->buffer_occupancy = bo;                 // 32 bits
2503
2504           if ((frames_seen_in_this_logging_interval == 1) ||
2505               (conn->buffer_occupancy < minimum_buffer_occupancy))
2506             minimum_buffer_occupancy = conn->buffer_occupancy;
2507
2508           if ((frames_seen_in_this_logging_interval == 1) ||
2509               (conn->buffer_occupancy > maximum_buffer_occupancy))
2510             maximum_buffer_occupancy = conn->buffer_occupancy;
2511
2512           // now, before outputting anything to the output device, check the stats
2513
2514           if (play_number % print_interval == 0) {
2515
2516             // here, calculate the input and output frame rates, where possible, even if statistics
2517             // have not been requested
2518             // this is to calculate them in case they are needed by the D-Bus interface or
2519             // elsewhere.
2520
2521             if (conn->input_frame_rate_starting_point_is_valid) {
2522               uint64_t elapsed_reception_time, frames_received;
2523               elapsed_reception_time =
2524                   conn->frames_inward_measurement_time - conn->frames_inward_measurement_start_time;
2525               frames_received = conn->frames_inward_frames_received_at_measurement_time -
2526                                 conn->frames_inward_frames_received_at_measurement_start_time;
2527               conn->input_frame_rate =
2528                   (1.0E9 * frames_received) /
2529                   elapsed_reception_time; // an IEEE double calculation with two 64-bit integers
2530             } else {
2531               conn->input_frame_rate = 0.0;
2532             }
2533
2534             int stats_status = 0;
2535             if ((config.output->delay) && (config.no_sync == 0) && (config.output->stats)) {
2536               uint64_t frames_sent_for_play;
2537               uint64_t raw_measurement_time;
2538               uint64_t corrected_measurement_time;
2539               uint64_t actual_delay;
2540               stats_status =
2541                   config.output->stats(&raw_measurement_time, &corrected_measurement_time,
2542                                        &actual_delay, &frames_sent_for_play);
2543               // debug(1,"status: %d, actual_delay: %" PRIu64 ", frames_sent_for_play: %" PRIu64 ",
2544               // frames_played: %" PRIu64 ".", stats_status, actual_delay, frames_sent_for_play,
2545               // frames_sent_for_play - actual_delay);
2546               uint64_t frames_played = frames_sent_for_play - actual_delay;
2547               // If the status is zero, it means that there were no output problems since the
2548               // last time the stats call was made. Thus, the frame rate should be valid.
2549               if ((stats_status == 0) && (previous_frames_played_valid != 0)) {
2550                 uint64_t frames_played_in_this_interval = frames_played - previous_frames_played;
2551                 int64_t raw_interval = raw_measurement_time - previous_raw_measurement_time;
2552                 int64_t corrected_interval =
2553                     corrected_measurement_time - previous_corrected_measurement_time;
2554                 if (raw_interval != 0) {
2555                   conn->raw_frame_rate = (1e9 * frames_played_in_this_interval) / raw_interval;
2556                   conn->corrected_frame_rate =
2557                       (1e9 * frames_played_in_this_interval) / corrected_interval;
2558                   conn->frame_rate_valid = 1;
2559                   // debug(1,"frames_played_in_this_interval: %" PRIu64 ", interval: %" PRId64 ",
2560                   // rate: %f.",
2561                   //  frames_played_in_this_interval, interval, conn->frame_rate);
2562                 }
2563               }
2564
2565               // uncomment the if statement if your want to get as long a period for
2566               // calculating the frame rate as possible without an output break or error
2567               if ((stats_status != 0) || (previous_frames_played_valid == 0)) {
2568                 // if we have just detected an outputting error, or if we have no
2569                 // starting information
2570                 if (stats_status != 0)
2571                   conn->frame_rate_valid = 0;
2572                 previous_frames_played = frames_played;
2573                 previous_raw_measurement_time = raw_measurement_time;
2574                 previous_corrected_measurement_time = corrected_measurement_time;
2575                 previous_frames_played_valid = 1;
2576               }
2577             }
2578
2579             // we can now calculate running averages for sync error (frames), corrections (ppm),
2580             // insertions plus deletions (ppm), drift (ppm)
2581             double moving_average_sync_error = 0.0;
2582             double moving_average_correction = 0.0;
2583             double moving_average_insertions_plus_deletions = 0.0;
2584             if (number_of_statistics == 0) {
2585               debug(2, "number_of_statistics is zero!");
2586             } else {
2587               moving_average_sync_error = (1.0 * tsum_of_sync_errors) / number_of_statistics;
2588               moving_average_correction = (1.0 * tsum_of_corrections) / number_of_statistics;
2589               moving_average_insertions_plus_deletions =
2590                   (1.0 * tsum_of_insertions_and_deletions) / number_of_statistics;
2591               // double moving_average_drift = (1.0 * tsum_of_drifts) / number_of_statistics;
2592             }
2593             // if ((play_number/print_interval)%20==0)
2594             // figure out which statistics profile to use, depending on the kind of stream
2595
2596             if (config.statistics_requested) {
2597
2598               if (frames_seen_in_this_logging_interval) {
2599                 do {
2600                   line_of_stats[0] = '\0';
2601                   statistics_column = 0;
2602                   was_a_previous_column = 0;
2603                   statistics_item("Sync Error ms", "%*.2f", 13,
2604                                   1000 * moving_average_sync_error / config.output_rate);
2605                   statistics_item("Net Sync PPM", "%*.1f", 12,
2606                                   moving_average_correction * 1000000 /
2607                                       (352 * conn->output_sample_ratio));
2608                   statistics_item("All Sync PPM", "%*.1f", 12,
2609                                   moving_average_insertions_plus_deletions * 1000000 /
2610                                       (352 * conn->output_sample_ratio));
2611                   statistics_item("    Packets", "%*d", 11, play_number);
2612                   statistics_item("Missing", "%*" PRIu64 "", 7, conn->missing_packets);
2613                   statistics_item("  Late", "%*" PRIu64 "", 6, conn->late_packets);
2614                   statistics_item("Too Late", "%*" PRIu64 "", 8, conn->too_late_packets);
2615                   statistics_item("Resend Reqs", "%*" PRIu64 "", 11, conn->resend_requests);
2616                   statistics_item("Min DAC Queue", "%*" PRIu64 "", 13, minimum_dac_queue_size);
2617                   statistics_item("Min Buffers", "%*" PRIu32 "", 11, minimum_buffer_occupancy);
2618                   statistics_item("Max Buffers", "%*" PRIu32 "", 11, maximum_buffer_occupancy);
2619 #ifdef CONFIG_AIRPLAY_2
2620                   if (conn->ap2_audio_buffer_minimum_size > 10 * 1024)
2621                     statistics_item("Min Buffer Size", "%*" PRIu32 "k", 14,
2622                                     conn->ap2_audio_buffer_minimum_size / 1024);
2623                   else
2624                     statistics_item("Min Buffer Size", "%*" PRIu32 "", 15,
2625                                     conn->ap2_audio_buffer_minimum_size);
2626 #else
2627                   statistics_item("N/A", "   "); // dummy -- should never be visible
2628 #endif
2629                   statistics_item("Nominal FPS", "%*.2f", 11, conn->remote_frame_rate);
2630                   statistics_item("Received FPS", "%*.2f", 12, conn->input_frame_rate);
2631                   // only make the next two columns appear if we are getting stats information from
2632                   // the back end
2633                   if (config.output->stats) {
2634                     if (conn->frame_rate_valid) {
2635                       statistics_item("Output FPS (r)", "%*.2f", 14, conn->raw_frame_rate);
2636                       statistics_item("Output FPS (c)", "%*.2f", 14, conn->corrected_frame_rate);
2637                     } else {
2638                       statistics_item("Output FPS (r)", "           N/A");
2639                       statistics_item("Output FPS (c)", "           N/A");
2640                     }
2641                   } else {
2642                     statistics_column = statistics_column + 2;
2643                   }
2644                   statistics_item("Source Drift PPM", "%*.2f", 16,
2645                                   (conn->local_to_remote_time_gradient - 1.0) * 1000000);
2646                   statistics_item("Drift Samples", "%*d", 13,
2647                                   conn->local_to_remote_time_gradient_sample_count);
2648                   /*
2649                   statistics_item("estimated (unused) correction ppm", "%*.2f",
2650                                   strlen("estimated (unused) correction ppm"),
2651                                   (conn->frame_rate_valid != 0)
2652                                       ? ((conn->frame_rate -
2653                                           conn->remote_frame_rate * conn->output_sample_ratio *
2654                                               conn->local_to_remote_time_gradient) *
2655                                          1000000) /
2656                                             conn->frame_rate
2657                                       : 0.0);
2658                   */
2659                   statistics_row++;
2660                   inform(line_of_stats);
2661                 } while (statistics_row < 2);
2662               } else {
2663                 inform("No frames received in the last sampling interval.");
2664               }
2665             }
2666 #ifdef CONFIG_AIRPLAY_2
2667             conn->ap2_audio_buffer_minimum_size = -1;
2668 #endif
2669           }
2670
2671           // here, we want to check (a) if we are meant to do synchronisation,
2672           // (b) if we have a delay procedure, (c) if we can get the delay.
2673
2674           // If any of these are false, we don't do any synchronisation stuff
2675
2676           int resp = -1; // use this as a flag -- if negative, we can't rely on a real known delay
2677           current_delay = -1; // use this as a failure flag
2678
2679           if (config.output->delay) {
2680             long l_delay;
2681             resp = config.output->delay(&l_delay);
2682             if (resp == 0) { // no error
2683               current_delay = l_delay;
2684               if (l_delay >= 0)
2685                 current_delay = l_delay;
2686               else {
2687                 debug(2, "Underrun of %ld frames reported, but ignored.", l_delay);
2688                 current_delay =
2689                     0; // could get a negative value if there was underrun, but ignore it.
2690               }
2691               if ((frames_seen_in_this_logging_interval == 1) ||
2692                   (current_delay < minimum_dac_queue_size)) {
2693                 minimum_dac_queue_size = current_delay; // update for display later
2694               }
2695             } else {
2696               current_delay = 0;
2697               if ((resp == sps_extra_code_output_stalled) &&
2698                   (config.unfixable_error_reported == 0)) {
2699                 config.unfixable_error_reported = 1;
2700                 if (config.cmd_unfixable) {
2701                   warn("Connection %d: An unfixable error has been detected -- output device is "
2702                        "stalled. Executing the "
2703                        "\"run_this_if_an_unfixable_error_is_detected\" command.",
2704                        conn->connection_number);
2705                   command_execute(config.cmd_unfixable, "output_device_stalled", 1);
2706                 } else {
2707                   warn("Connection %d: An unfixable error has been detected -- output device is "
2708                        "stalled. \"No "
2709                        "run_this_if_an_unfixable_error_is_detected\" command provided -- nothing "
2710                        "done.",
2711                        conn->connection_number);
2712                 }
2713               } else {
2714                 if ((resp != -EBUSY) &&
2715                     (resp != -ENODEV)) // delay and not-there errors can be reported if the device
2716                                        // is (hopefully temporarily) busy or unavailable
2717                   debug(1, "Delay error %d when checking running latency.", resp);
2718               }
2719             }
2720           }
2721
2722           if (resp == 0) {
2723
2724             uint32_t should_be_frame_32;
2725             // this is denominated in the frame rate of the incoming stream
2726             local_time_to_frame(local_time_now, &should_be_frame_32, conn);
2727
2728             int64_t should_be_frame = should_be_frame_32;
2729             should_be_frame = should_be_frame * conn->output_sample_ratio;
2730
2731             // current_delay is denominated in the frame rate of the outgoing stream
2732             int64_t will_be_frame = inframe->given_timestamp;
2733             will_be_frame = will_be_frame * conn->output_sample_ratio;
2734             will_be_frame = (will_be_frame - current_delay) &
2735                             output_rtptime_mask; // this is to make sure it's unsigned modulo 2^bits
2736                                                  // for the rtptime
2737
2738             // Now we have a tricky piece of calculation to perform.
2739             // We know the rtptimes are unsigned in 32 or more bits -- call it r bits. We have to
2740             // calculate the difference between them. on the basis that they should be within
2741             // 2^(r-1) of one another, so that the unsigned subtraction result, modulo 2^r, if
2742             // interpreted as a signed number, should yield the difference _and_ the ordering.
2743
2744             sync_error = should_be_frame - will_be_frame; // this is done in int64_t form
2745
2746             // int64_t t_ping = should_be_frame - conn->anchor_rtptime;
2747             // if (t_ping < 0)
2748             //   debug(1, "Frame %" PRIu64 " is %" PRId64 " frames before anchor time %" PRIu64 ".",
2749             //   should_be_frame, -t_ping, conn->anchor_rtptime);
2750
2751             // sign-extend the r-bit unsigned int calculation by treating it as an r-bit signed
2752             // integer
2753             if ((sync_error & output_rtptime_sign_mask) !=
2754                 0) { // check what would be the sign bit in "r" bit unsigned arithmetic
2755                      // result is negative
2756               sync_error = sync_error | output_rtptime_mask_not;
2757             } else {
2758               // result is positive
2759               sync_error = sync_error & output_rtptime_mask;
2760             }
2761
2762             if (at_least_one_frame_seen_this_session == 0) {
2763               at_least_one_frame_seen_this_session = 1;
2764 #ifdef CONFIG_METADATA
2765               this_is_the_first_frame = 1;
2766 #endif
2767
2768               // debug(2,"first frame real sync error (positive --> late): %" PRId64 " frames.",
2769               // sync_error);
2770
2771               // this is a sneaky attempt to make a final adjustment to the timing of the first
2772               // packet
2773
2774               // the very first packet generally has a first_frame_early_bias subtracted from its
2775               // timing to make it more likely that it will be early than late, making it possible
2776               // to compensate for it be adding a few frames of silence.
2777
2778               // debug(2,"first frame real sync error (positive --> late): %" PRId64 " frames.",
2779               // sync_error);
2780
2781               // remove the bias when reporting the error to make it the true error
2782               debug(2,
2783                     "first frame sync error (positive --> late): %" PRId64
2784                     " frames, %.3f mS at %d frames per second output.",
2785                     sync_error + first_frame_early_bias,
2786                     (1000.0 * (sync_error + first_frame_early_bias)) / config.output_rate,
2787                     config.output_rate);
2788
2789               // if the packet is early, add the frames needed to put it in sync.
2790               if (sync_error < 0) {
2791                 size_t final_adjustment_length_sized = -sync_error;
2792                 char *final_adjustment_silence =
2793                     malloc(conn->output_bytes_per_frame * final_adjustment_length_sized);
2794                 if (final_adjustment_silence) {
2795
2796                   conn->previous_random_number = generate_zero_frames(
2797                       final_adjustment_silence, final_adjustment_length_sized, config.output_format,
2798                       conn->enable_dither, conn->previous_random_number);
2799                   int final_adjustment = -sync_error;
2800                   final_adjustment = final_adjustment - first_frame_early_bias;
2801                   debug(2,
2802                         "final sync adjustment: %" PRId64
2803                         " silent frames added with a bias of %" PRId64 " frames.",
2804                         -sync_error, first_frame_early_bias);
2805                   config.output->play(final_adjustment_silence, final_adjustment_length_sized,
2806                                       play_samples_are_untimed, 0, 0);
2807                   free(final_adjustment_silence);
2808                 } else {
2809                   warn("Failed to allocate memory for a final_adjustment_silence buffer of %d "
2810                        "frames for a "
2811                        "sync error of %d frames.",
2812                        final_adjustment_length_sized, sync_error);
2813                 }
2814                 sync_error = 0; // say the error was fixed!
2815               }
2816               // since this is the first frame of audio, inform the user if requested...
2817 #ifdef CONFIG_AIRPLAY_2
2818               if (conn->airplay_stream_type == realtime_stream) {
2819                 if (conn->airplay_type == ap_1) {
2820 #ifdef CONFIG_METADATA
2821                   send_ssnc_metadata('styp', "Classic", strlen("Classic"), 1);
2822 #endif
2823                   if (config.statistics_requested)
2824                     inform("Connection %d: Playback started at frame %" PRId64
2825                            " -- Classic AirPlay (\"AirPlay 1\") Compatible.",
2826                            conn->connection_number, inframe->given_timestamp);
2827                 } else {
2828 #ifdef CONFIG_METADATA
2829                   send_ssnc_metadata('styp', "Realtime", strlen("Realtime"), 1);
2830 #endif
2831                   if (config.statistics_requested)
2832                     inform("Connection %d: Playback started at frame %" PRId64
2833                            " -- AirPlay 2 Realtime.",
2834                            conn->connection_number, inframe->given_timestamp);
2835                 }
2836               } else {
2837 #ifdef CONFIG_METADATA
2838                 send_ssnc_metadata('styp', "Buffered", strlen("Buffered"), 1);
2839 #endif
2840                 if (config.statistics_requested)
2841                   inform("Connection %d: Playback started at frame %" PRId64
2842                          " -- AirPlay 2 Buffered.",
2843                          conn->connection_number, inframe->given_timestamp);
2844               }
2845 #else
2846 #ifdef CONFIG_METADATA
2847               send_ssnc_metadata('styp', "Classic", strlen("Classic"), 1);
2848 #endif
2849               if (config.statistics_requested)
2850                 inform("Connection %d: Playback started at frame %" PRId64
2851                        " -- Classic AirPlay (\"AirPlay 1\").",
2852                        conn->connection_number, inframe->given_timestamp);
2853 #endif
2854             }
2855             // not too sure if abs() is implemented for int64_t, so we'll do it manually
2856             int64_t abs_sync_error = sync_error;
2857             if (abs_sync_error < 0)
2858               abs_sync_error = -abs_sync_error;
2859
2860             if ((config.no_sync == 0) && (inframe->given_timestamp != 0) &&
2861                 (config.resync_threshold > 0.0) &&
2862                 (abs_sync_error > config.resync_threshold * config.output_rate)) {
2863               sync_error_out_of_bounds++;
2864             } else {
2865               sync_error_out_of_bounds = 0;
2866             }
2867
2868             if (sync_error_out_of_bounds > 3) {
2869               // debug(1, "lost sync with source for %d consecutive packets -- flushing and "
2870               //          "resyncing. Error: %lld.",
2871               //        sync_error_out_of_bounds, sync_error);
2872               sync_error_out_of_bounds = 0;
2873
2874               uint64_t frames_sent_for_play = 0;
2875               uint64_t actual_delay = 0;
2876
2877               if ((config.output->delay) && (config.no_sync == 0) && (config.output->stats)) {
2878                 uint64_t raw_measurement_time;
2879                 uint64_t corrected_measurement_time;
2880                 config.output->stats(&raw_measurement_time, &corrected_measurement_time,
2881                                      &actual_delay, &frames_sent_for_play);
2882               }
2883
2884               int64_t filler_length =
2885                   (int64_t)(config.resync_threshold * config.output_rate); // number of samples
2886               if ((sync_error > 0) && (sync_error > filler_length)) {
2887                 debug(1,
2888                       "Large positive (i.e. late) sync error of %" PRId64
2889                       " frames (%f seconds), at frame: %" PRIu32 ".",
2890                       sync_error, (sync_error * 1.0) / config.output_rate,
2891                       inframe->given_timestamp);
2892                 // debug(1, "%" PRId64 " frames sent to DAC. DAC buffer contains %" PRId64 "
2893                 // frames.",
2894                 //      frames_sent_for_play, actual_delay);
2895                 // the sync error is output frames, but we have to work out how many source frames
2896                 // to drop there may be a multiple (the conn->output_sample_ratio) of output frames
2897                 // per input frame...
2898                 int64_t source_frames_to_drop = sync_error;
2899                 source_frames_to_drop = source_frames_to_drop / conn->output_sample_ratio;
2900
2901                 // drop some extra frames to give the pipeline a chance to recover
2902                 int64_t extra_frames_to_drop =
2903                     (int64_t)(conn->input_rate * config.resync_recovery_time);
2904                 source_frames_to_drop += extra_frames_to_drop;
2905
2906                 uint32_t frames_to_drop = source_frames_to_drop;
2907                 uint32_t flush_to_frame = inframe->given_timestamp + frames_to_drop;
2908
2909                 do_flush(flush_to_frame, conn);
2910
2911               } else if ((sync_error < 0) && ((-sync_error) > filler_length)) {
2912                 debug(1,
2913                       "Large negative (i.e. early) sync error of %" PRId64
2914                       " frames (%f seconds), at frame: %" PRIu32 ".",
2915                       sync_error, (sync_error * 1.0) / config.output_rate,
2916                       inframe->given_timestamp);
2917                 debug(3, "%" PRId64 " frames sent to DAC. DAC buffer contains %" PRId64 " frames.",
2918                       frames_sent_for_play, actual_delay);
2919                 int64_t silence_length = -sync_error;
2920                 if (silence_length > (filler_length * 5))
2921                   silence_length = filler_length * 5;
2922                 size_t silence_length_sized = silence_length;
2923                 char *long_silence = malloc(conn->output_bytes_per_frame * silence_length_sized);
2924                 if (long_silence) {
2925
2926                   conn->previous_random_number =
2927                       generate_zero_frames(long_silence, silence_length_sized, config.output_format,
2928                                            conn->enable_dither, conn->previous_random_number);
2929
2930                   debug(2, "Play a silence of %d frames.", silence_length_sized);
2931                   config.output->play(long_silence, silence_length_sized, play_samples_are_untimed,
2932                                       0, 0);
2933                   free(long_silence);
2934                 } else {
2935                   warn("Failed to allocate memory for a long_silence buffer of %d frames for a "
2936                        "sync error of %d frames.",
2937                        silence_length_sized, sync_error);
2938                 }
2939                 reset_input_flow_metrics(conn);
2940               }
2941             } else {
2942
2943               /*
2944               // before we finally commit to this frame, check its sequencing and timing
2945               // require a certain error before bothering to fix it...
2946               if (sync_error > config.tolerance * config.output_rate) { // int64_t > int, okay
2947                 amount_to_stuff = -1;
2948               }
2949               if (sync_error < -config.tolerance * config.output_rate) {
2950                 amount_to_stuff = 1;
2951               }
2952               */
2953
2954               if (amount_to_stuff == 0) {
2955                 // use a "V" shaped function to decide if stuffing should occur
2956                 int64_t s = r64i();
2957                 s = s >> 31;
2958                 s = s * config.tolerance * config.output_rate;
2959                 s = (s >> 32) + config.tolerance * config.output_rate; // should be a number from 0
2960                                                                        // to config.tolerance *
2961                                                                        // config.output_rate;
2962                 if ((sync_error > 0) && (sync_error > s)) {
2963                   // debug(1,"Extra stuff -1");
2964                   amount_to_stuff = -1;
2965                 }
2966                 if ((sync_error < 0) && (sync_error < (-s))) {
2967                   // debug(1,"Extra stuff +1");
2968                   amount_to_stuff = 1;
2969                 }
2970               }
2971
2972               // try to keep the corrections definitely below 1 in 1000 audio frames
2973
2974               // calculate the time elapsed since the play session started.
2975
2976               if (amount_to_stuff) {
2977                 if ((local_time_now) && (conn->first_packet_time_to_play) &&
2978                     (local_time_now >= conn->first_packet_time_to_play)) {
2979
2980                   int64_t tp =
2981                       (local_time_now - conn->first_packet_time_to_play) /
2982                       1000000000; // seconds int64_t from uint64_t which is always positive, so ok
2983
2984                   if (tp < 5)
2985                     amount_to_stuff = 0; // wait at least five seconds
2986                   /*
2987                   else if (tp < 30) {
2988                     if ((random() % 1000) >
2989                         352) // keep it to about 1:1000 for the first thirty seconds
2990                       amount_to_stuff = 0;
2991                   }
2992                   */
2993                 }
2994               }
2995
2996               if (config.no_sync != 0)
2997                 amount_to_stuff = 0; // no stuffing if it's been disabled
2998
2999               // Apply DSP here
3000
3001               // check the state of loudness and convolution flags here and don't change them for
3002               // the frame
3003
3004               int do_loudness = config.loudness;
3005
3006 #ifdef CONFIG_CONVOLUTION
3007               int do_convolution = 0;
3008               if ((config.convolution) && (config.convolver_valid))
3009                 do_convolution = 1;
3010
3011               // we will apply the convolution gain if convolution is enabled, even if there is no
3012               // valid convolution happening
3013
3014               int convolution_is_enabled = 0;
3015               if (config.convolution)
3016                 convolution_is_enabled = 1;
3017 #endif
3018
3019               if (do_loudness
3020 #ifdef CONFIG_CONVOLUTION
3021                   || convolution_is_enabled
3022 #endif
3023               ) {
3024                 int32_t *tbuf32 = (int32_t *)conn->tbuf;
3025                 float fbuf_l[inbuflength];
3026                 float fbuf_r[inbuflength];
3027
3028                 // Deinterleave, and convert to float
3029                 int i;
3030                 for (i = 0; i < inbuflength; ++i) {
3031                   fbuf_l[i] = tbuf32[2 * i];
3032                   fbuf_r[i] = tbuf32[2 * i + 1];
3033                 }
3034
3035 #ifdef CONFIG_CONVOLUTION
3036                 // Apply convolution
3037                 if (do_convolution) {
3038                   convolver_process_l(fbuf_l, inbuflength);
3039                   convolver_process_r(fbuf_r, inbuflength);
3040                 }
3041                 if (convolution_is_enabled) {
3042                   float gain = pow(10.0, config.convolution_gain / 20.0);
3043                   for (i = 0; i < inbuflength; ++i) {
3044                     fbuf_l[i] *= gain;
3045                     fbuf_r[i] *= gain;
3046                   }
3047                 }
3048 #endif
3049
3050                 if (do_loudness) {
3051                   // Apply volume and loudness
3052                   // Volume must be applied here because the loudness filter will increase the
3053                   // signal level and it would saturate the int32_t otherwise
3054                   float gain = conn->fix_volume / 65536.0f;
3055                   // float gain_db = 20 * log10(gain);
3056                   // debug(1, "Applying soft volume dB: %f k: %f", gain_db, gain);
3057
3058                   for (i = 0; i < inbuflength; ++i) {
3059                     fbuf_l[i] = loudness_process(&loudness_l, fbuf_l[i] * gain);
3060                     fbuf_r[i] = loudness_process(&loudness_r, fbuf_r[i] * gain);
3061                   }
3062                 }
3063
3064                 // Interleave and convert back to int32_t
3065                 for (i = 0; i < inbuflength; ++i) {
3066                   tbuf32[2 * i] = fbuf_l[i];
3067                   tbuf32[2 * i + 1] = fbuf_r[i];
3068                 }
3069               }
3070
3071 #ifdef CONFIG_SOXR
3072               if ((current_delay < conn->dac_buffer_queue_minimum_length) ||
3073                   (config.packet_stuffing == ST_basic) ||
3074                   (config.soxr_delay_index == 0) || // not computed
3075                   ((config.packet_stuffing == ST_auto) &&
3076                    (config.soxr_delay_index >
3077                     config.soxr_delay_threshold)) // if the CPU is deemed too slow
3078               ) {
3079 #endif
3080                 play_samples =
3081                     stuff_buffer_basic_32((int32_t *)conn->tbuf, inbuflength, config.output_format,
3082                                           conn->outbuf, amount_to_stuff, conn->enable_dither, conn);
3083 #ifdef CONFIG_SOXR
3084               } else { // soxr requested or auto requested with the index less or equal to the
3085                        // threshold
3086                 play_samples = stuff_buffer_soxr_32((int32_t *)conn->tbuf, (int32_t *)conn->sbuf,
3087                                                     inbuflength, config.output_format, conn->outbuf,
3088                                                     amount_to_stuff, conn->enable_dither, conn);
3089               }
3090 #endif
3091
3092               /*
3093               {
3094                 int co;
3095                 int is_silent=1;
3096                 short *p = outbuf;
3097                 for (co=0;co<play_samples;co++) {
3098                   if (*p!=0)
3099                     is_silent=0;
3100                   p++;
3101                 }
3102                 if (is_silent)
3103                   debug(1,"Silence!");
3104               }
3105               */
3106
3107               if (conn->outbuf == NULL)
3108                 debug(1, "NULL outbuf to play -- skipping it.");
3109               else {
3110                 if (play_samples == 0)
3111                   debug(1, "play_samples==0 skipping it (1).");
3112                 else {
3113                   if (conn->software_mute_enabled) {
3114                     generate_zero_frames(conn->outbuf, play_samples, config.output_format,
3115                                          conn->enable_dither, conn->previous_random_number);
3116                   }
3117                   uint64_t should_be_time;
3118                   frame_to_local_time(inframe->given_timestamp, &should_be_time, conn);
3119
3120                   config.output->play(conn->outbuf, play_samples, play_samples_are_timed,
3121                                       inframe->given_timestamp, should_be_time);
3122 #ifdef CONFIG_METADATA
3123                   // debug(1,"config.metadata_progress_interval is %f.",
3124                   // config.metadata_progress_interval);
3125                   if (config.metadata_progress_interval != 0.0) {
3126                     char hb[128];
3127                     if (this_is_the_first_frame != 0) {
3128                       memset(hb, 0, 128);
3129                       snprintf(hb, 127, "%" PRIu32 "/%" PRId64 "", inframe->given_timestamp,
3130                                should_be_time);
3131                       send_ssnc_metadata('phb0', hb, strlen(hb), 1);
3132                       send_ssnc_metadata('phbt', hb, strlen(hb), 1);
3133                       time_of_last_metadata_progress_update = local_time_now;
3134                     } else {
3135                       uint64_t mx = 1000000000;
3136                       uint64_t iv = config.metadata_progress_interval * mx;
3137                       iv = iv + time_of_last_metadata_progress_update;
3138                       int64_t delta = iv - local_time_now;
3139                       if (delta <= 0) {
3140                         memset(hb, 0, 128);
3141                         snprintf(hb, 127, "%" PRIu32 "/%" PRId64 "", inframe->given_timestamp,
3142                                  should_be_time);
3143                         send_ssnc_metadata('phbt', hb, strlen(hb), 1);
3144                         time_of_last_metadata_progress_update = local_time_now;
3145                       }
3146                     }
3147                   }
3148 #endif
3149                 }
3150               }
3151
3152               // check for loss of sync
3153               // timestamp of zero means an inserted silent frame in place of a missing frame
3154               /*
3155               if ((config.no_sync == 0) && (inframe->timestamp != 0) &&
3156                   && (config.resync_threshold > 0.0) &&
3157                   (abs_sync_error > config.resync_threshold * config.output_rate)) {
3158                 sync_error_out_of_bounds++;
3159                 // debug(1,"Sync error out of bounds: Error: %lld; previous error: %lld; DAC: %lld;
3160                 // timestamp: %llx, time now
3161                 //
3162               %llx",sync_error,previous_sync_error,current_delay,inframe->timestamp,local_time_now);
3163                 if (sync_error_out_of_bounds > 3) {
3164                   debug(1, "Lost sync with source for %d consecutive packets -- flushing and "
3165                            "resyncing. Error: %lld.",
3166                         sync_error_out_of_bounds, sync_error);
3167                   sync_error_out_of_bounds = 0;
3168                   player_flush(nt, conn);
3169                 }
3170               } else {
3171                 sync_error_out_of_bounds = 0;
3172               }
3173               */
3174             }
3175           } else {
3176
3177             // if this is the first frame, see if it's close to when it's supposed to be
3178             // release, which will be its time plus latency and any offset_time
3179             if (at_least_one_frame_seen_this_session == 0) {
3180 #ifdef CONFIG_METADATA
3181               this_is_the_first_frame = 1;
3182 #endif
3183               at_least_one_frame_seen_this_session = 1;
3184             }
3185
3186             play_samples =
3187                 stuff_buffer_basic_32((int32_t *)conn->tbuf, inbuflength, config.output_format,
3188                                       conn->outbuf, 0, conn->enable_dither, conn);
3189             if (conn->outbuf == NULL)
3190               debug(1, "NULL outbuf to play -- skipping it.");
3191             else {
3192               if (conn->software_mute_enabled) {
3193                 generate_zero_frames(conn->outbuf, play_samples, config.output_format,
3194                                      conn->enable_dither, conn->previous_random_number);
3195               }
3196               uint64_t should_be_time;
3197               frame_to_local_time(inframe->given_timestamp, &should_be_time, conn);
3198               config.output->play(conn->outbuf, play_samples, play_samples_are_timed,
3199                                   inframe->given_timestamp, should_be_time);
3200 #ifdef CONFIG_METADATA
3201               // debug(1,"config.metadata_progress_interval is %f.",
3202               // config.metadata_progress_interval);
3203               if (config.metadata_progress_interval != 0.0) {
3204                 char hb[128];
3205                 if (this_is_the_first_frame != 0) {
3206                   memset(hb, 0, 128);
3207                   snprintf(hb, 127, "%" PRIu32 "/%" PRId64 "", inframe->given_timestamp,
3208                            should_be_time);
3209                   send_ssnc_metadata('phb0', hb, strlen(hb), 1);
3210                   send_ssnc_metadata('phbt', hb, strlen(hb), 1);
3211                   time_of_last_metadata_progress_update = local_time_now;
3212                 } else {
3213                   uint64_t mx = 1000000000;
3214                   uint64_t iv = config.metadata_progress_interval * mx;
3215                   iv = iv + time_of_last_metadata_progress_update;
3216                   int64_t delta = iv - local_time_now;
3217                   if (delta <= 0) {
3218                     memset(hb, 0, 128);
3219                     snprintf(hb, 127, "%" PRIu32 "/%" PRId64 "", inframe->given_timestamp,
3220                              should_be_time);
3221                     send_ssnc_metadata('phbt', hb, strlen(hb), 1);
3222                     time_of_last_metadata_progress_update = local_time_now;
3223                   }
3224                 }
3225               }
3226 #endif
3227             }
3228           }
3229
3230           // mark the frame as finished
3231           inframe->given_timestamp = 0;
3232           inframe->sequence_number = 0;
3233           inframe->resend_time = 0;
3234           inframe->initialisation_time = 0;
3235
3236           // if we've just printed out statistics, note that in the next interval
3237           // we haven't seen any frames yet
3238
3239           if (play_number % print_interval == 0) {
3240             frames_seen_in_this_logging_interval = 0;
3241           }
3242
3243           // update the watchdog
3244           if ((config.dont_check_timeout == 0) && (config.timeout != 0)) {
3245             uint64_t time_now = get_absolute_time_in_ns();
3246             debug_mutex_lock(&conn->watchdog_mutex, 1000, 0);
3247             conn->watchdog_bark_time = time_now;
3248             debug_mutex_unlock(&conn->watchdog_mutex, 0);
3249           }
3250
3251           // debug(1,"Sync error %lld frames. Amount to stuff %d." ,sync_error,amount_to_stuff);
3252
3253           // new stats calculation. We want a running average of sync error, drift, adjustment,
3254           // number of additions+subtractions
3255
3256           // this is a misleading hack -- the statistics should include some data on the number of
3257           // valid samples and the number of times sync wasn't checked due to non availability of a
3258           // delay figure.
3259           // for the present, stats are only updated when sync has been checked
3260           if (config.output->delay != NULL) {
3261             if (number_of_statistics == trend_interval) {
3262               // here we remove the oldest statistical data and take it from the summaries as well
3263               tsum_of_sync_errors -= conn->statistics[oldest_statistic].sync_error;
3264               tsum_of_drifts -= conn->statistics[oldest_statistic].drift;
3265               if (conn->statistics[oldest_statistic].correction > 0)
3266                 tsum_of_insertions_and_deletions -= conn->statistics[oldest_statistic].correction;
3267               else
3268                 tsum_of_insertions_and_deletions += conn->statistics[oldest_statistic].correction;
3269               tsum_of_corrections -= conn->statistics[oldest_statistic].correction;
3270               oldest_statistic = (oldest_statistic + 1) % trend_interval;
3271               number_of_statistics--;
3272             }
3273
3274             conn->statistics[newest_statistic].sync_error = sync_error;
3275             conn->statistics[newest_statistic].correction = conn->amountStuffed;
3276
3277             if (number_of_statistics == 0)
3278               conn->statistics[newest_statistic].drift = 0;
3279             else
3280               conn->statistics[newest_statistic].drift =
3281                   sync_error - previous_sync_error - previous_correction;
3282
3283             previous_sync_error = sync_error;
3284             previous_correction = conn->amountStuffed;
3285
3286             tsum_of_sync_errors += sync_error;
3287             tsum_of_drifts += conn->statistics[newest_statistic].drift;
3288             if (conn->amountStuffed > 0) {
3289               tsum_of_insertions_and_deletions += conn->amountStuffed;
3290             } else {
3291               tsum_of_insertions_and_deletions -= conn->amountStuffed;
3292             }
3293             tsum_of_corrections += conn->amountStuffed;
3294             conn->session_corrections += conn->amountStuffed;
3295
3296             newest_statistic = (newest_statistic + 1) % trend_interval;
3297             number_of_statistics++;
3298           }
3299         }
3300       }
3301     }
3302   }
3303
3304   debug(1, "This should never be called.");
3305   pthread_cleanup_pop(1); // pop the cleanup handler
3306                           //  debug(1, "This should never be called either.");
3307                           //  pthread_cleanup_pop(1); // pop the initial cleanup handler
3308   pthread_exit(NULL);
3309 }
3310
3311 void player_volume_without_notification(double airplay_volume, rtsp_conn_info *conn) {
3312   debug_mutex_lock(&conn->volume_control_mutex, 5000, 1);
3313   // first, see if we are hw only, sw only, both with hw attenuation on the top or both with sw
3314   // attenuation on top
3315
3316   enum volume_mode_type { vol_sw_only, vol_hw_only, vol_both } volume_mode;
3317
3318   // take account of whether there is a hardware mixer, if a max volume has been specified and if a
3319   // range has been specified
3320   // the range might imply that both hw and software mixers are needed, so calculate this
3321
3322   int32_t hw_max_db = 0, hw_min_db = 0; // zeroed to quieten an incorrect uninitialised warning
3323   int32_t sw_max_db = 0, sw_min_db = -9630;
3324
3325   if (config.output->parameters) {
3326     volume_mode = vol_hw_only;
3327     audio_parameters audio_information;
3328     config.output->parameters(&audio_information);
3329     hw_max_db = audio_information.maximum_volume_dB;
3330     hw_min_db = audio_information.minimum_volume_dB;
3331     if (config.volume_max_db_set) {
3332       if (((config.volume_max_db * 100) <= hw_max_db) &&
3333           ((config.volume_max_db * 100) >= hw_min_db))
3334         hw_max_db = (int32_t)config.volume_max_db * 100;
3335       else if (config.volume_range_db) {
3336         hw_max_db = hw_min_db;
3337         sw_max_db = (config.volume_max_db * 100) - hw_min_db;
3338       } else {
3339         warn("The maximum output level is outside the range of the hardware mixer -- ignored");
3340       }
3341     }
3342
3343     // here, we have set limits on the hw_max_db and the sw_max_db
3344     // but we haven't actually decided whether we need both hw and software attenuation
3345     // only if a range is specified could we need both
3346     if (config.volume_range_db) {
3347       // see if the range requested exceeds the hardware range available
3348       int32_t desired_range_db = (int32_t)trunc(config.volume_range_db * 100);
3349       if ((desired_range_db) > (hw_max_db - hw_min_db)) {
3350         volume_mode = vol_both;
3351         int32_t desired_sw_range = desired_range_db - (hw_max_db - hw_min_db);
3352         if ((sw_max_db - desired_sw_range) < sw_min_db)
3353           warn("The range requested is too large to accommodate -- ignored.");
3354         else
3355           sw_min_db = (sw_max_db - desired_sw_range);
3356       } else {
3357         hw_min_db = hw_max_db - desired_range_db;
3358       }
3359     }
3360   } else {
3361     // debug(1,"has no hardware mixer");
3362     volume_mode = vol_sw_only;
3363     if (config.volume_max_db_set) {
3364       if (((config.volume_max_db * 100) <= sw_max_db) &&
3365           ((config.volume_max_db * 100) >= sw_min_db))
3366         sw_max_db = (int32_t)config.volume_max_db * 100;
3367     }
3368     if (config.volume_range_db) {
3369       // see if the range requested exceeds the software range available
3370       int32_t desired_range_db = (int32_t)trunc(config.volume_range_db * 100);
3371       if ((desired_range_db) > (sw_max_db - sw_min_db))
3372         warn("The range requested is too large to accommodate -- ignored.");
3373       else
3374         sw_min_db = (sw_max_db - desired_range_db);
3375     }
3376   }
3377
3378   // here, we know whether it's hw volume control only, sw only or both, and we have the hw and sw
3379   // limits.
3380   // if it's both, we haven't decided whether hw or sw should be on top
3381   // we have to consider the settings ignore_volume_control and mute.
3382
3383   if (airplay_volume == -144.0) {
3384
3385     if ((config.output->mute) && (config.output->mute(1) == 0))
3386       debug(2,
3387             "player_volume_without_notification: volume mode is %d, airplay_volume is %f, "
3388             "hardware mute is enabled.",
3389             volume_mode, airplay_volume);
3390     else {
3391       conn->software_mute_enabled = 1;
3392       debug(2,
3393             "player_volume_without_notification: volume mode is %d, airplay_volume is %f, "
3394             "software mute is enabled.",
3395             volume_mode, airplay_volume);
3396     }
3397
3398   } else {
3399     int32_t max_db = 0, min_db = 0;
3400     switch (volume_mode) {
3401     case vol_hw_only:
3402       max_db = hw_max_db;
3403       min_db = hw_min_db;
3404       break;
3405     case vol_sw_only:
3406       max_db = sw_max_db;
3407       min_db = sw_min_db;
3408       break;
3409     case vol_both:
3410       // debug(1, "dB range passed is hw: %d, sw: %d, total: %d", hw_max_db - hw_min_db,
3411       //      sw_max_db - sw_min_db, (hw_max_db - hw_min_db) + (sw_max_db - sw_min_db));
3412       max_db =
3413           (hw_max_db - hw_min_db) + (sw_max_db - sw_min_db); // this should be the range requested
3414       min_db = 0;
3415       break;
3416     default:
3417       debug(1, "player_volume_without_notification: error: not in a volume mode");
3418       break;
3419     }
3420     double scaled_attenuation = max_db;
3421     if (config.ignore_volume_control == 0) {
3422
3423       if (config.volume_control_profile == VCP_standard)
3424         scaled_attenuation = vol2attn(airplay_volume, max_db, min_db); // no cancellation points
3425       else if (config.volume_control_profile == VCP_flat)
3426         scaled_attenuation =
3427             flat_vol2attn(airplay_volume, max_db, min_db); // no cancellation points
3428       else if (config.volume_control_profile == VCP_dasl_tapered)
3429         scaled_attenuation =
3430             dasl_tapered_vol2attn(airplay_volume, max_db, min_db); // no cancellation points
3431       else
3432         debug(1, "player_volume_without_notification: unrecognised volume control profile");
3433     }
3434     // so here we have the scaled attenuation. If it's for hw or sw only, it's straightforward.
3435     double hardware_attenuation = 0.0;
3436     double software_attenuation = 0.0;
3437
3438     switch (volume_mode) {
3439     case vol_hw_only:
3440       hardware_attenuation = scaled_attenuation;
3441       break;
3442     case vol_sw_only:
3443       software_attenuation = scaled_attenuation;
3444       break;
3445     case vol_both:
3446       // here, we now the attenuation required, so we have to apportion it to the sw and hw mixers
3447       // if we give the hw priority, that means when lowering the volume, set the hw volume to its
3448       // lowest
3449       // before using the sw attenuation.
3450       // similarly, if we give the sw priority, that means when lowering the volume, set the sw
3451       // volume to its lowest
3452       // before using the hw attenuation.
3453       // one imagines that hw priority is likely to be much better
3454       // if (config.volume_range_hw_priority) {
3455       if (config.volume_range_hw_priority != 0) {
3456         // hw priority
3457         if ((sw_max_db - sw_min_db) > scaled_attenuation) {
3458           software_attenuation = sw_min_db + scaled_attenuation;
3459           hardware_attenuation = hw_min_db;
3460         } else {
3461           software_attenuation = sw_max_db;
3462           hardware_attenuation = hw_min_db + scaled_attenuation - (sw_max_db - sw_min_db);
3463         }
3464       } else {
3465         // sw priority
3466         if ((hw_max_db - hw_min_db) > scaled_attenuation) {
3467           hardware_attenuation = hw_min_db + scaled_attenuation;
3468           software_attenuation = sw_min_db;
3469         } else {
3470           hardware_attenuation = hw_max_db;
3471           software_attenuation = sw_min_db + scaled_attenuation - (hw_max_db - hw_min_db);
3472         }
3473       }
3474       break;
3475     default:
3476       debug(1, "player_volume_without_notification: error: not in a volume mode");
3477       break;
3478     }
3479
3480     if (((volume_mode == vol_hw_only) || (volume_mode == vol_both)) && (config.output->volume)) {
3481       config.output->volume(hardware_attenuation); // otherwise set the output to the lowest value
3482       // debug(1,"Hardware attenuation set to %f for airplay volume of
3483       // %f.",hardware_attenuation,airplay_volume);
3484       if (volume_mode == vol_hw_only)
3485         conn->fix_volume = 0x10000;
3486     }
3487
3488     if ((volume_mode == vol_sw_only) || (volume_mode == vol_both)) {
3489       double temp_fix_volume = 65536.0 * pow(10, software_attenuation / 2000);
3490
3491       if (config.ignore_volume_control == 0)
3492         debug(2, "Software attenuation set to %f, i.e %f out of 65,536, for airplay volume of %f",
3493               software_attenuation, temp_fix_volume, airplay_volume);
3494       else
3495         debug(2, "Software attenuation set to %f, i.e %f out of 65,536. Volume control is ignored.",
3496               software_attenuation, temp_fix_volume);
3497
3498       conn->fix_volume = temp_fix_volume;
3499
3500       // if (config.loudness)
3501       loudness_set_volume(software_attenuation / 100);
3502     }
3503
3504     if (config.logOutputLevel) {
3505       inform("Output Level set to: %.2f dB.", scaled_attenuation / 100.0);
3506     }
3507
3508 #ifdef CONFIG_METADATA
3509     // here, send the 'pvol' metadata message when the airplay volume information
3510     // is being used by shairport sync to control the output volume
3511     char dv[128];
3512     memset(dv, 0, 128);
3513     if (config.ignore_volume_control == 0) {
3514       if (volume_mode == vol_both) {
3515         // normalise the maximum output to the hardware device's max output
3516         snprintf(dv, 127, "%.2f,%.2f,%.2f,%.2f", airplay_volume,
3517                  (scaled_attenuation - max_db + hw_max_db) / 100.0,
3518                  (min_db - max_db + hw_max_db) / 100.0, (max_db - max_db + hw_max_db) / 100.0);
3519       } else {
3520         snprintf(dv, 127, "%.2f,%.2f,%.2f,%.2f", airplay_volume, scaled_attenuation / 100.0,
3521                  min_db / 100.0, max_db / 100.0);
3522       }
3523     } else {
3524       snprintf(dv, 127, "%.2f,%.2f,%.2f,%.2f", airplay_volume, 0.0, 0.0, 0.0);
3525     }
3526     send_ssnc_metadata('pvol', dv, strlen(dv), 1);
3527 #endif
3528
3529     if (config.output->mute)
3530       config.output->mute(0);
3531     conn->software_mute_enabled = 0;
3532
3533     debug(2,
3534           "player_volume_without_notification: volume mode is %d, airplay volume is %.2f, "
3535           "software_attenuation dB: %.2f, hardware_attenuation dB: %.2f, muting "
3536           "is disabled.",
3537           volume_mode, airplay_volume, software_attenuation / 100.0, hardware_attenuation / 100.0);
3538   }
3539   // here, store the volume for possible use in the future
3540   config.airplay_volume = airplay_volume;
3541   conn->own_airplay_volume = airplay_volume;
3542   debug_mutex_unlock(&conn->volume_control_mutex, 3);
3543 }
3544
3545 void player_volume(double airplay_volume, rtsp_conn_info *conn) {
3546   command_set_volume(airplay_volume);
3547   player_volume_without_notification(airplay_volume, conn);
3548 }
3549
3550 void do_flush(uint32_t timestamp, rtsp_conn_info *conn) {
3551
3552   debug(3, "do_flush: flush to %u.", timestamp);
3553   debug_mutex_lock(&conn->flush_mutex, 1000, 1);
3554   conn->flush_requested = 1;
3555   conn->flush_rtp_timestamp = timestamp; // flush all packets up to, but not including, this one.
3556   reset_input_flow_metrics(conn);
3557   debug_mutex_unlock(&conn->flush_mutex, 3);
3558 }
3559
3560 void player_flush(uint32_t timestamp, rtsp_conn_info *conn) {
3561   debug(3, "player_flush");
3562   do_flush(timestamp, conn);
3563 #ifdef CONFIG_METADATA
3564   // only send a flush metadata message if the first packet has been seen -- it's a bogus message
3565   // otherwise
3566   if (conn->first_packet_timestamp) {
3567     char numbuf[32];
3568     snprintf(numbuf, sizeof(numbuf), "%u", timestamp);
3569     send_ssnc_metadata('pfls', numbuf, strlen(numbuf), 1); // contains cancellation points
3570   }
3571 #endif
3572 }
3573
3574 /*
3575 void player_full_flush(rtsp_conn_info *conn) {
3576   debug(3, "player_full_flush");
3577   // this basically flushes everything from the player
3578   // here, find the rtptime of the last from in the buffer and add 1 to it
3579   // so as to ask to flush everything
3580   int flush_needed = 0;
3581   uint32_t rtpTimestamp;
3582   debug_mutex_lock(&conn->ab_mutex, 30000, 0);
3583   if ((conn->ab_synced != 0) && (conn->ab_write != conn->ab_read)) {
3584     abuf_t *abuf = NULL;
3585     seq_t last_seqno_written;
3586     do {
3587       last_seqno_written = conn->ab_write - 1;
3588       abuf = conn->audio_buffer + BUFIDX(last_seqno_written);
3589     } while ((abuf->ready == 0) && (last_seqno_written != conn->ab_read));
3590     if ((abuf != NULL) && (abuf->ready != 0)) {
3591       rtpTimestamp = abuf->given_timestamp + abuf->length + 1;
3592       debug(2, "full flush needed to %u", rtpTimestamp);
3593       flush_needed = 1;
3594     } else {
3595       debug(2, "full flush not needed");
3596     }
3597   } else {
3598     debug(2, "full flush not needed -- buffers empty or not synced");
3599   }
3600   debug_mutex_unlock(&conn->ab_mutex, 0);
3601   if (flush_needed)
3602     player_flush(rtpTimestamp, conn);
3603 }
3604 */
3605
3606 // perpare_to_play and play are split so that we can get the capabilities of the
3607 // dac etc. before initialising any decoders etc.
3608 // for example, if we have 32-bit DACs, we can ask for 32 bit decodes
3609
3610 int player_prepare_to_play(rtsp_conn_info *conn) {
3611   // need to use conn in place of stream below. Need to put the stream as a parameter to he
3612   if (conn->player_thread != NULL)
3613     die("Trying to create a second player thread for this RTSP session");
3614   if (config.buffer_start_fill > BUFFER_FRAMES)
3615     die("specified buffer starting fill %d > buffer size %d", config.buffer_start_fill,
3616         BUFFER_FRAMES);
3617   // active, and should be before play's command hook, command_start()
3618   command_start();
3619   conn->input_bytes_per_frame = 4; // default -- may be changed later
3620   // call on the output device to prepare itself
3621   if ((config.output) && (config.output->prepare))
3622     config.output->prepare();
3623   return 0;
3624 }
3625
3626 int player_play(rtsp_conn_info *conn) {
3627   debug(2, "Connection %d: player_play.", conn->connection_number);
3628   pthread_cleanup_debug_mutex_lock(&conn->player_create_delete_mutex, 5000, 1);
3629   if (conn->player_thread == NULL) {
3630     pthread_t *pt = malloc(sizeof(pthread_t));
3631     if (pt == NULL)
3632       die("Couldn't allocate space for pthread_t");
3633     int rc = pthread_create(pt, NULL, player_thread_func, (void *)conn);
3634     if (rc)
3635       debug(1, "Connection %d: error creating player_thread: %s", conn->connection_number,
3636             strerror(errno));
3637     conn->player_thread = pt; // set _after_ creation of thread
3638   } else {
3639     debug(1, "Connection %d: player thread already exists.", conn->connection_number);
3640   }
3641   pthread_cleanup_pop(1); // release the player_create_delete_mutex
3642 #ifdef CONFIG_METADATA
3643   send_ssnc_metadata('pbeg', NULL, 0, 1); // contains cancellation points
3644 #endif
3645   return 0;
3646 }
3647
3648 int player_stop(rtsp_conn_info *conn) {
3649   // note -- this may be called from another connection thread.
3650   debug(2, "Connection %d: player_stop.", conn->connection_number);
3651   int response = 0; // okay
3652   pthread_cleanup_debug_mutex_lock(&conn->player_create_delete_mutex, 5000, 1);
3653   pthread_t *pt = conn->player_thread;
3654   if (pt) {
3655     debug(3, "player_thread cancel...");
3656     conn->player_thread = NULL; // cleared _before_ cancelling of thread
3657     pthread_cancel(*pt);
3658     debug(3, "player_thread join...");
3659     if (pthread_join(*pt, NULL) == -1) {
3660       char errorstring[1024];
3661       strerror_r(errno, (char *)errorstring, sizeof(errorstring));
3662       debug(1, "Connection %d: error %d joining player thread: \"%s\".", conn->connection_number,
3663             errno, (char *)errorstring);
3664     } else {
3665       debug(2, "Connection %d: player_stop successful.", conn->connection_number);
3666     }
3667     free(pt);
3668     response = 0; // deleted
3669   } else {
3670     debug(2, "Connection %d: no player thread.", conn->connection_number);
3671     response = -1; // already deleted or never created...
3672   }
3673   pthread_cleanup_pop(1); // release the player_create_delete_mutex
3674   if (response == 0) {    // if the thread was just stopped and deleted...
3675 #ifdef CONFIG_AIRPLAY_2
3676     ptp_send_control_message_string("E"); // signify play is "E"nding
3677 #endif
3678 #ifdef CONFIG_METADATA
3679     send_ssnc_metadata('pend', NULL, 0, 1); // contains cancellation points
3680 #endif
3681     command_stop();
3682   }
3683   return response;
3684 }