From 7b9cd28e9050e82ab2bce42405a628f230cb8b11 Mon Sep 17 00:00:00 2001 From: Yann Pomarede Date: Mon, 27 Feb 2017 22:43:23 +0100 Subject: [PATCH] 2 audio filters added, a volume-dependent-loudness and a convolution filter. --- .gitignore | 6 +- FFTConvolver/AudioFFT.cpp | 1018 +++++++++++++++++++++++++++++++++ FFTConvolver/AudioFFT.h | 176 ++++++ FFTConvolver/FFTConvolver.cpp | 202 +++++++ FFTConvolver/FFTConvolver.h | 100 ++++ FFTConvolver/Utilities.cpp | 113 ++++ FFTConvolver/Utilities.h | 351 ++++++++++++ FFTConvolver/convolver.cpp | 67 +++ FFTConvolver/convolver.h | 14 + Makefile.am | 6 +- common.h | 13 +- configure.ac | 11 +- loudness.c | 55 ++ loudness.h | 16 + player.c | 85 ++- scripts/shairport-sync.conf | 30 + shairport.c | 70 ++- 17 files changed, 2322 insertions(+), 11 deletions(-) create mode 100755 FFTConvolver/AudioFFT.cpp create mode 100755 FFTConvolver/AudioFFT.h create mode 100755 FFTConvolver/FFTConvolver.cpp create mode 100755 FFTConvolver/FFTConvolver.h create mode 100644 FFTConvolver/Utilities.cpp create mode 100644 FFTConvolver/Utilities.h create mode 100644 FFTConvolver/convolver.cpp create mode 100644 FFTConvolver/convolver.h create mode 100644 loudness.c create mode 100644 loudness.h diff --git a/.gitignore b/.gitignore index 9e055931..d98a86df 100644 --- a/.gitignore +++ b/.gitignore @@ -1,7 +1,7 @@ /INSTALL /shairport-sync /shairport-sync.exe -/*.o +*.o /*~ *.xml~ /config.mk @@ -26,3 +26,7 @@ scripts/shairport-sync # Some eclipse project files .cproject .project + +# macOS stuff +.DS_Store +shairport-sync.xcodeproj diff --git a/FFTConvolver/AudioFFT.cpp b/FFTConvolver/AudioFFT.cpp new file mode 100755 index 00000000..32b3ddaa --- /dev/null +++ b/FFTConvolver/AudioFFT.cpp @@ -0,0 +1,1018 @@ +// ================================================================================== +// Copyright (c) 2016 HiFi-LoFi +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is furnished +// to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +// FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +// COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +// IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +// ================================================================================== + +#include "AudioFFT.h" + +#include +#include +#include + + +#if defined(AUDIOFFT_APPLE_ACCELERATE) + #define AUDIOFFT_APPLE_ACCELERATE_USED + #include + #include +#elif defined (AUDIOFFT_FFTW3) + #define AUDIOFFT_FFTW3_USED + #include +#else + #if !defined(AUDIOFFT_OOURA) + #define AUDIOFFT_OOURA + #endif + #define AUDIOFFT_OOURA_USED + #include +#endif + + +namespace audiofft +{ + + namespace details + { + + static bool IsPowerOf2(size_t val) + { + return (val == 1 || (val & (val-1)) == 0); + } + + + template + void ConvertBuffer(TypeDest* dest, const TypeSrc* src, size_t len) + { + for (size_t i=0; i(src[i]); + } + } + + + template + void ScaleBuffer(TypeDest* dest, const TypeSrc* src, const TypeFactor factor, size_t len) + { + for (size_t i=0; i(static_cast(src[i]) * factor); + } + } + + + // ================================================================ + + +#ifdef AUDIOFFT_OOURA_USED + + /** + * @internal + * @class OouraFFT + * @brief FFT implementation based on the great radix-4 routines by Takuya Ooura + */ + class OouraFFT : public AudioFFTImpl + { + public: + OouraFFT() : + AudioFFTImpl(), + _size(0), + _ip(), + _w(), + _buffer() + { + } + + virtual void init(size_t size) override + { + if (_size != size) + { + _ip.resize(2 + static_cast(std::sqrt(static_cast(size)))); + _w.resize(size / 2); + _buffer.resize(size); + _size = size; + + const int size4 = static_cast(_size) / 4; + makewt(size4, _ip.data(), _w.data()); + makect(size4, _ip.data(), _w.data() + size4); + } + } + + virtual void fft(const float* data, float* re, float* im) override + { + // Convert into the format as required by the Ooura FFT + ConvertBuffer(&_buffer[0], data, _size); + + rdft(static_cast(_size), +1, _buffer.data(), _ip.data(), _w.data()); + + // Convert back to split-complex + { + double* b = &_buffer[0]; + double* bEnd = b + _size; + float *r = re; + float *i = im; + while (b != bEnd) + { + *(r++) = static_cast(*(b++)); + *(i++) = static_cast(-(*(b++))); + } + } + const size_t size2 = _size / 2; + re[size2] = -im[0]; + im[0] = 0.0; + im[size2] = 0.0; + } + + virtual void ifft(float* data, const float* re, const float* im) override + { + // Convert into the format as required by the Ooura FFT + { + double* b = &_buffer[0]; + double* bEnd = b + _size; + const float *r = re; + const float *i = im; + while (b != bEnd) + { + *(b++) = static_cast(*(r++)); + *(b++) = -static_cast(*(i++)); + } + _buffer[1] = re[_size / 2]; + } + + rdft(static_cast(_size), -1, _buffer.data(), _ip.data(), _w.data()); + + // Convert back to split-complex + ScaleBuffer(data, &_buffer[0], 2.0 / static_cast(_size), _size); + } + + private: + size_t _size; + std::vector _ip; + std::vector _w; + std::vector _buffer; + + void rdft(int n, int isgn, double *a, int *ip, double *w) + { + int nw = ip[0]; + int nc = ip[1]; + + if (isgn >= 0) + { + if (n > 4) + { + bitrv2(n, ip + 2, a); + cftfsub(n, a, w); + rftfsub(n, a, nc, w + nw); + } + else if (n == 4) + { + cftfsub(n, a, w); + } + double xi = a[0] - a[1]; + a[0] += a[1]; + a[1] = xi; + } + else + { + a[1] = 0.5 * (a[0] - a[1]); + a[0] -= a[1]; + if (n > 4) + { + rftbsub(n, a, nc, w + nw); + bitrv2(n, ip + 2, a); + cftbsub(n, a, w); + } + else if (n == 4) + { + cftfsub(n, a, w); + } + } + } + + + /* -------- initializing routines -------- */ + + void makewt(int nw, int *ip, double *w) + { + int j, nwh; + double delta, x, y; + + ip[0] = nw; + ip[1] = 1; + if (nw > 2) { + nwh = nw >> 1; + delta = atan(1.0) / nwh; + w[0] = 1; + w[1] = 0; + w[nwh] = cos(delta * nwh); + w[nwh + 1] = w[nwh]; + if (nwh > 2) { + for (j = 2; j < nwh; j += 2) { + x = cos(delta * j); + y = sin(delta * j); + w[j] = x; + w[j + 1] = y; + w[nw - j] = y; + w[nw - j + 1] = x; + } + bitrv2(nw, ip + 2, w); + } + } + } + + + void makect(int nc, int *ip, double *c) + { + int j, nch; + double delta; + + ip[1] = nc; + if (nc > 1) { + nch = nc >> 1; + delta = atan(1.0) / nch; + c[0] = cos(delta * nch); + c[nch] = 0.5 * c[0]; + for (j = 1; j < nch; j++) { + c[j] = 0.5 * cos(delta * j); + c[nc - j] = 0.5 * sin(delta * j); + } + } + } + + + /* -------- child routines -------- */ + + + void bitrv2(int n, int *ip, double *a) + { + int j, j1, k, k1, l, m, m2; + double xr, xi, yr, yi; + + ip[0] = 0; + l = n; + m = 1; + while ((m << 3) < l) { + l >>= 1; + for (j = 0; j < m; j++) { + ip[m + j] = ip[j] + l; + } + m <<= 1; + } + m2 = 2 * m; + if ((m << 3) == l) { + for (k = 0; k < m; k++) { + for (j = 0; j < k; j++) { + j1 = 2 * j + ip[k]; + k1 = 2 * k + ip[j]; + xr = a[j1]; + xi = a[j1 + 1]; + yr = a[k1]; + yi = a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + j1 += m2; + k1 += 2 * m2; + xr = a[j1]; + xi = a[j1 + 1]; + yr = a[k1]; + yi = a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + j1 += m2; + k1 -= m2; + xr = a[j1]; + xi = a[j1 + 1]; + yr = a[k1]; + yi = a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + j1 += m2; + k1 += 2 * m2; + xr = a[j1]; + xi = a[j1 + 1]; + yr = a[k1]; + yi = a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + } + j1 = 2 * k + m2 + ip[k]; + k1 = j1 + m2; + xr = a[j1]; + xi = a[j1 + 1]; + yr = a[k1]; + yi = a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + } + } else { + for (k = 1; k < m; k++) { + for (j = 0; j < k; j++) { + j1 = 2 * j + ip[k]; + k1 = 2 * k + ip[j]; + xr = a[j1]; + xi = a[j1 + 1]; + yr = a[k1]; + yi = a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + j1 += m2; + k1 += m2; + xr = a[j1]; + xi = a[j1 + 1]; + yr = a[k1]; + yi = a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + } + } + } + } + + + void cftfsub(int n, double *a, double *w) + { + int j, j1, j2, j3, l; + double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; + + l = 2; + if (n > 8) { + cft1st(n, a, w); + l = 8; + while ((l << 2) < n) { + cftmdl(n, l, a, w); + l <<= 2; + } + } + if ((l << 2) == n) { + for (j = 0; j < l; j += 2) { + j1 = j + l; + j2 = j1 + l; + j3 = j2 + l; + x0r = a[j] + a[j1]; + x0i = a[j + 1] + a[j1 + 1]; + x1r = a[j] - a[j1]; + x1i = a[j + 1] - a[j1 + 1]; + x2r = a[j2] + a[j3]; + x2i = a[j2 + 1] + a[j3 + 1]; + x3r = a[j2] - a[j3]; + x3i = a[j2 + 1] - a[j3 + 1]; + a[j] = x0r + x2r; + a[j + 1] = x0i + x2i; + a[j2] = x0r - x2r; + a[j2 + 1] = x0i - x2i; + a[j1] = x1r - x3i; + a[j1 + 1] = x1i + x3r; + a[j3] = x1r + x3i; + a[j3 + 1] = x1i - x3r; + } + } else { + for (j = 0; j < l; j += 2) { + j1 = j + l; + x0r = a[j] - a[j1]; + x0i = a[j + 1] - a[j1 + 1]; + a[j] += a[j1]; + a[j + 1] += a[j1 + 1]; + a[j1] = x0r; + a[j1 + 1] = x0i; + } + } + } + + + void cftbsub(int n, double *a, double *w) + { + int j, j1, j2, j3, l; + double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; + + l = 2; + if (n > 8) { + cft1st(n, a, w); + l = 8; + while ((l << 2) < n) { + cftmdl(n, l, a, w); + l <<= 2; + } + } + if ((l << 2) == n) { + for (j = 0; j < l; j += 2) { + j1 = j + l; + j2 = j1 + l; + j3 = j2 + l; + x0r = a[j] + a[j1]; + x0i = -a[j + 1] - a[j1 + 1]; + x1r = a[j] - a[j1]; + x1i = -a[j + 1] + a[j1 + 1]; + x2r = a[j2] + a[j3]; + x2i = a[j2 + 1] + a[j3 + 1]; + x3r = a[j2] - a[j3]; + x3i = a[j2 + 1] - a[j3 + 1]; + a[j] = x0r + x2r; + a[j + 1] = x0i - x2i; + a[j2] = x0r - x2r; + a[j2 + 1] = x0i + x2i; + a[j1] = x1r - x3i; + a[j1 + 1] = x1i - x3r; + a[j3] = x1r + x3i; + a[j3 + 1] = x1i + x3r; + } + } else { + for (j = 0; j < l; j += 2) { + j1 = j + l; + x0r = a[j] - a[j1]; + x0i = -a[j + 1] + a[j1 + 1]; + a[j] += a[j1]; + a[j + 1] = -a[j + 1] - a[j1 + 1]; + a[j1] = x0r; + a[j1 + 1] = x0i; + } + } + } + + + void cft1st(int n, double *a, double *w) + { + int j, k1, k2; + double wk1r, wk1i, wk2r, wk2i, wk3r, wk3i; + double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; + + x0r = a[0] + a[2]; + x0i = a[1] + a[3]; + x1r = a[0] - a[2]; + x1i = a[1] - a[3]; + x2r = a[4] + a[6]; + x2i = a[5] + a[7]; + x3r = a[4] - a[6]; + x3i = a[5] - a[7]; + a[0] = x0r + x2r; + a[1] = x0i + x2i; + a[4] = x0r - x2r; + a[5] = x0i - x2i; + a[2] = x1r - x3i; + a[3] = x1i + x3r; + a[6] = x1r + x3i; + a[7] = x1i - x3r; + wk1r = w[2]; + x0r = a[8] + a[10]; + x0i = a[9] + a[11]; + x1r = a[8] - a[10]; + x1i = a[9] - a[11]; + x2r = a[12] + a[14]; + x2i = a[13] + a[15]; + x3r = a[12] - a[14]; + x3i = a[13] - a[15]; + a[8] = x0r + x2r; + a[9] = x0i + x2i; + a[12] = x2i - x0i; + a[13] = x0r - x2r; + x0r = x1r - x3i; + x0i = x1i + x3r; + a[10] = wk1r * (x0r - x0i); + a[11] = wk1r * (x0r + x0i); + x0r = x3i + x1r; + x0i = x3r - x1i; + a[14] = wk1r * (x0i - x0r); + a[15] = wk1r * (x0i + x0r); + k1 = 0; + for (j = 16; j < n; j += 16) { + k1 += 2; + k2 = 2 * k1; + wk2r = w[k1]; + wk2i = w[k1 + 1]; + wk1r = w[k2]; + wk1i = w[k2 + 1]; + wk3r = wk1r - 2 * wk2i * wk1i; + wk3i = 2 * wk2i * wk1r - wk1i; + x0r = a[j] + a[j + 2]; + x0i = a[j + 1] + a[j + 3]; + x1r = a[j] - a[j + 2]; + x1i = a[j + 1] - a[j + 3]; + x2r = a[j + 4] + a[j + 6]; + x2i = a[j + 5] + a[j + 7]; + x3r = a[j + 4] - a[j + 6]; + x3i = a[j + 5] - a[j + 7]; + a[j] = x0r + x2r; + a[j + 1] = x0i + x2i; + x0r -= x2r; + x0i -= x2i; + a[j + 4] = wk2r * x0r - wk2i * x0i; + a[j + 5] = wk2r * x0i + wk2i * x0r; + x0r = x1r - x3i; + x0i = x1i + x3r; + a[j + 2] = wk1r * x0r - wk1i * x0i; + a[j + 3] = wk1r * x0i + wk1i * x0r; + x0r = x1r + x3i; + x0i = x1i - x3r; + a[j + 6] = wk3r * x0r - wk3i * x0i; + a[j + 7] = wk3r * x0i + wk3i * x0r; + wk1r = w[k2 + 2]; + wk1i = w[k2 + 3]; + wk3r = wk1r - 2 * wk2r * wk1i; + wk3i = 2 * wk2r * wk1r - wk1i; + x0r = a[j + 8] + a[j + 10]; + x0i = a[j + 9] + a[j + 11]; + x1r = a[j + 8] - a[j + 10]; + x1i = a[j + 9] - a[j + 11]; + x2r = a[j + 12] + a[j + 14]; + x2i = a[j + 13] + a[j + 15]; + x3r = a[j + 12] - a[j + 14]; + x3i = a[j + 13] - a[j + 15]; + a[j + 8] = x0r + x2r; + a[j + 9] = x0i + x2i; + x0r -= x2r; + x0i -= x2i; + a[j + 12] = -wk2i * x0r - wk2r * x0i; + a[j + 13] = -wk2i * x0i + wk2r * x0r; + x0r = x1r - x3i; + x0i = x1i + x3r; + a[j + 10] = wk1r * x0r - wk1i * x0i; + a[j + 11] = wk1r * x0i + wk1i * x0r; + x0r = x1r + x3i; + x0i = x1i - x3r; + a[j + 14] = wk3r * x0r - wk3i * x0i; + a[j + 15] = wk3r * x0i + wk3i * x0r; + } + } + + + void cftmdl(int n, int l, double *a, double *w) + { + int j, j1, j2, j3, k, k1, k2, m, m2; + double wk1r, wk1i, wk2r, wk2i, wk3r, wk3i; + double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; + + m = l << 2; + for (j = 0; j < l; j += 2) { + j1 = j + l; + j2 = j1 + l; + j3 = j2 + l; + x0r = a[j] + a[j1]; + x0i = a[j + 1] + a[j1 + 1]; + x1r = a[j] - a[j1]; + x1i = a[j + 1] - a[j1 + 1]; + x2r = a[j2] + a[j3]; + x2i = a[j2 + 1] + a[j3 + 1]; + x3r = a[j2] - a[j3]; + x3i = a[j2 + 1] - a[j3 + 1]; + a[j] = x0r + x2r; + a[j + 1] = x0i + x2i; + a[j2] = x0r - x2r; + a[j2 + 1] = x0i - x2i; + a[j1] = x1r - x3i; + a[j1 + 1] = x1i + x3r; + a[j3] = x1r + x3i; + a[j3 + 1] = x1i - x3r; + } + wk1r = w[2]; + for (j = m; j < l + m; j += 2) { + j1 = j + l; + j2 = j1 + l; + j3 = j2 + l; + x0r = a[j] + a[j1]; + x0i = a[j + 1] + a[j1 + 1]; + x1r = a[j] - a[j1]; + x1i = a[j + 1] - a[j1 + 1]; + x2r = a[j2] + a[j3]; + x2i = a[j2 + 1] + a[j3 + 1]; + x3r = a[j2] - a[j3]; + x3i = a[j2 + 1] - a[j3 + 1]; + a[j] = x0r + x2r; + a[j + 1] = x0i + x2i; + a[j2] = x2i - x0i; + a[j2 + 1] = x0r - x2r; + x0r = x1r - x3i; + x0i = x1i + x3r; + a[j1] = wk1r * (x0r - x0i); + a[j1 + 1] = wk1r * (x0r + x0i); + x0r = x3i + x1r; + x0i = x3r - x1i; + a[j3] = wk1r * (x0i - x0r); + a[j3 + 1] = wk1r * (x0i + x0r); + } + k1 = 0; + m2 = 2 * m; + for (k = m2; k < n; k += m2) { + k1 += 2; + k2 = 2 * k1; + wk2r = w[k1]; + wk2i = w[k1 + 1]; + wk1r = w[k2]; + wk1i = w[k2 + 1]; + wk3r = wk1r - 2 * wk2i * wk1i; + wk3i = 2 * wk2i * wk1r - wk1i; + for (j = k; j < l + k; j += 2) { + j1 = j + l; + j2 = j1 + l; + j3 = j2 + l; + x0r = a[j] + a[j1]; + x0i = a[j + 1] + a[j1 + 1]; + x1r = a[j] - a[j1]; + x1i = a[j + 1] - a[j1 + 1]; + x2r = a[j2] + a[j3]; + x2i = a[j2 + 1] + a[j3 + 1]; + x3r = a[j2] - a[j3]; + x3i = a[j2 + 1] - a[j3 + 1]; + a[j] = x0r + x2r; + a[j + 1] = x0i + x2i; + x0r -= x2r; + x0i -= x2i; + a[j2] = wk2r * x0r - wk2i * x0i; + a[j2 + 1] = wk2r * x0i + wk2i * x0r; + x0r = x1r - x3i; + x0i = x1i + x3r; + a[j1] = wk1r * x0r - wk1i * x0i; + a[j1 + 1] = wk1r * x0i + wk1i * x0r; + x0r = x1r + x3i; + x0i = x1i - x3r; + a[j3] = wk3r * x0r - wk3i * x0i; + a[j3 + 1] = wk3r * x0i + wk3i * x0r; + } + wk1r = w[k2 + 2]; + wk1i = w[k2 + 3]; + wk3r = wk1r - 2 * wk2r * wk1i; + wk3i = 2 * wk2r * wk1r - wk1i; + for (j = k + m; j < l + (k + m); j += 2) { + j1 = j + l; + j2 = j1 + l; + j3 = j2 + l; + x0r = a[j] + a[j1]; + x0i = a[j + 1] + a[j1 + 1]; + x1r = a[j] - a[j1]; + x1i = a[j + 1] - a[j1 + 1]; + x2r = a[j2] + a[j3]; + x2i = a[j2 + 1] + a[j3 + 1]; + x3r = a[j2] - a[j3]; + x3i = a[j2 + 1] - a[j3 + 1]; + a[j] = x0r + x2r; + a[j + 1] = x0i + x2i; + x0r -= x2r; + x0i -= x2i; + a[j2] = -wk2i * x0r - wk2r * x0i; + a[j2 + 1] = -wk2i * x0i + wk2r * x0r; + x0r = x1r - x3i; + x0i = x1i + x3r; + a[j1] = wk1r * x0r - wk1i * x0i; + a[j1 + 1] = wk1r * x0i + wk1i * x0r; + x0r = x1r + x3i; + x0i = x1i - x3r; + a[j3] = wk3r * x0r - wk3i * x0i; + a[j3 + 1] = wk3r * x0i + wk3i * x0r; + } + } + } + + + void rftfsub(int n, double *a, int nc, double *c) + { + int j, k, kk, ks, m; + double wkr, wki, xr, xi, yr, yi; + + m = n >> 1; + ks = 2 * nc / m; + kk = 0; + for (j = 2; j < m; j += 2) { + k = n - j; + kk += ks; + wkr = 0.5 - c[nc - kk]; + wki = c[kk]; + xr = a[j] - a[k]; + xi = a[j + 1] + a[k + 1]; + yr = wkr * xr - wki * xi; + yi = wkr * xi + wki * xr; + a[j] -= yr; + a[j + 1] -= yi; + a[k] += yr; + a[k + 1] -= yi; + } + } + + + void rftbsub(int n, double *a, int nc, double *c) + { + int j, k, kk, ks, m; + double wkr, wki, xr, xi, yr, yi; + + a[1] = -a[1]; + m = n >> 1; + ks = 2 * nc / m; + kk = 0; + for (j = 2; j < m; j += 2) { + k = n - j; + kk += ks; + wkr = 0.5 - c[nc - kk]; + wki = c[kk]; + xr = a[j] - a[k]; + xi = a[j + 1] + a[k + 1]; + yr = wkr * xr + wki * xi; + yi = wkr * xi - wki * xr; + a[j] -= yr; + a[j + 1] = yi - a[j + 1]; + a[k] += yr; + a[k + 1] = yi - a[k + 1]; + } + a[m + 1] = -a[m + 1]; + } + + OouraFFT(const OouraFFT&) = delete; + OouraFFT& operator=(const OouraFFT&) = delete; + }; + + std::unique_ptr MakeAudioFFTImpl() + { + return std::unique_ptr(new OouraFFT()); + } + + +#endif // AUDIOFFT_OOURA_USED + + + // ================================================================ + + +#ifdef AUDIOFFT_APPLE_ACCELERATE_USED + + + /** + * @internal + * @class AppleAccelerateFFT + * @brief FFT implementation using the Apple Accelerate framework internally + */ + class AppleAccelerateFFT : public AudioFFTImpl + { + public: + AppleAccelerateFFT() : + AudioFFTImpl(), + _size(0), + _powerOf2(0), + _fftSetup(0), + _re(), + _im() + { + } + + virtual ~AppleAccelerateFFT() + { + init(0); + } + + virtual void init(size_t size) override + { + if (_fftSetup) + { + vDSP_destroy_fftsetup(_fftSetup); + _size = 0; + _powerOf2 = 0; + _fftSetup = 0; + _re.clear(); + _im.clear(); + } + + if (size > 0) + { + _size = size; + _powerOf2 = 0; + while ((1 << _powerOf2) < _size) + { + ++_powerOf2; + } + _fftSetup = vDSP_create_fftsetup(_powerOf2, FFT_RADIX2); + _re.resize(_size / 2); + _im.resize(_size / 2); + } + } + + virtual void fft(const float* data, float* re, float* im) override + { + const size_t size2 = _size / 2; + DSPSplitComplex splitComplex; + splitComplex.realp = re; + splitComplex.imagp = im; + vDSP_ctoz(reinterpret_cast(data), 2, &splitComplex, 1, size2); + vDSP_fft_zrip(_fftSetup, &splitComplex, 1, _powerOf2, FFT_FORWARD); + const float factor = 0.5f; + vDSP_vsmul(re, 1, &factor, re, 1, size2); + vDSP_vsmul(im, 1, &factor, im, 1, size2); + re[size2] = im[0]; + im[0] = 0.0f; + im[size2] = 0.0f; + } + + virtual void ifft(float* data, const float* re, const float* im) override + { + const size_t size2 = _size / 2; + ::memcpy(_re.data(), re, size2 * sizeof(float)); + ::memcpy(_im.data(), im, size2 * sizeof(float)); + _im[0] = re[size2]; + DSPSplitComplex splitComplex; + splitComplex.realp = _re.data(); + splitComplex.imagp = _im.data(); + vDSP_fft_zrip(_fftSetup, &splitComplex, 1, _powerOf2, FFT_INVERSE); + vDSP_ztoc(&splitComplex, 1, reinterpret_cast(data), 2, size2); + const float factor = 1.0f / static_cast(_size); + vDSP_vsmul(data, 1, &factor, data, 1, _size); + } + + private: + size_t _size; + size_t _powerOf2; + FFTSetup _fftSetup; + std::vector _re; + std::vector _im; + + AppleAccelerateFFT(const AppleAccelerateFFT&) = delete; + AppleAccelerateFFT& operator=(const AppleAccelerateFFT&) = delete; + }; + + + std::unique_ptr MakeAudioFFTImpl() + { + return std::unique_ptr(new AppleAccelerateFFT()); + } + + +#endif // AUDIOFFT_APPLE_ACCELERATE_USED + + + // ================================================================ + + +#ifdef AUDIOFFT_FFTW3_USED + + + /** + * @internal + * @class FFTW3FFT + * @brief FFT implementation using FFTW3 internally (see fftw.org) + */ + class FFTW3FFT : public AudioFFTImpl + { + public: + FFTW3FFT() : + AudioFFTImpl(), + _size(0), + _complexSize(0), + _planForward(0), + _planBackward(0), + _data(0), + _re(0), + _im(0) + { + } + + virtual ~FFTW3FFT() + { + init(0); + } + + virtual void init(size_t size) override + { + if (_size != size) + { + if (_size > 0) + { + fftwf_destroy_plan(_planForward); + fftwf_destroy_plan(_planBackward); + _planForward = 0; + _planBackward = 0; + _size = 0; + _complexSize = 0; + + if (_data) + { + fftwf_free(_data); + _data = 0; + } + + if (_re) + { + fftwf_free(_re); + _re = 0; + } + + if (_im) + { + fftwf_free(_im); + _im = 0; + } + } + + if (size > 0) + { + _size = size; + _complexSize = AudioFFT::ComplexSize(_size); + const size_t complexSize = AudioFFT::ComplexSize(_size); + _data = reinterpret_cast(fftwf_malloc(_size * sizeof(float))); + _re = reinterpret_cast(fftwf_malloc(complexSize * sizeof(float))); + _im = reinterpret_cast(fftwf_malloc(complexSize * sizeof(float))); + + fftw_iodim dim; + dim.n = static_cast(size); + dim.is = 1; + dim.os = 1; + _planForward = fftwf_plan_guru_split_dft_r2c(1, &dim, 0, 0, _data, _re, _im, FFTW_MEASURE); + _planBackward = fftwf_plan_guru_split_dft_c2r(1, &dim, 0, 0, _re, _im, _data, FFTW_MEASURE); + } + } + } + + virtual void fft(const float* data, float* re, float* im) override + { + ::memcpy(_data, data, _size * sizeof(float)); + fftwf_execute_split_dft_r2c(_planForward, _data, _re, _im); + ::memcpy(re, _re, _complexSize * sizeof(float)); + ::memcpy(im, _im, _complexSize * sizeof(float)); + } + + void ifft(float* data, const float* re, const float* im) + { + ::memcpy(_re, re, _complexSize * sizeof(float)); + ::memcpy(_im, im, _complexSize * sizeof(float)); + fftwf_execute_split_dft_c2r(_planBackward, _re, _im, _data); + ScaleBuffer(data, _data, 1.0f / static_cast(_size), _size); + } + + private: + size_t _size; + size_t _complexSize; + fftwf_plan _planForward; + fftwf_plan _planBackward; + float* _data; + float* _re; + float* _im; + + FFTW3FFT(const FFTW3FFT&) = delete; + FFTW3FFT& operator=(const FFTW3FFT&) = delete; + }; + + + std::unique_ptr MakeAudioFFTImpl() + { + return std::unique_ptr(new FFTW3FFT()); + } + + +#endif // AUDIOFFT_FFTW3_USED + + } // End of namespace details + + + // ============================================================= + + + AudioFFT::AudioFFT() : + _impl(details::MakeAudioFFTImpl()) + { + } + + + void AudioFFT::init(size_t size) + { + assert(details::IsPowerOf2(size)); + _impl->init(size); + } + + + void AudioFFT::fft(const float* data, float* re, float* im) + { + _impl->fft(data, re, im); + } + + + void AudioFFT::ifft(float* data, const float* re, const float* im) + { + _impl->ifft(data, re, im); + } + + + size_t AudioFFT::ComplexSize(size_t size) + { + return (size / 2) + 1; + } + +} // End of namespace diff --git a/FFTConvolver/AudioFFT.h b/FFTConvolver/AudioFFT.h new file mode 100755 index 00000000..80cd2edc --- /dev/null +++ b/FFTConvolver/AudioFFT.h @@ -0,0 +1,176 @@ +// ================================================================================== +// Copyright (c) 2016 HiFi-LoFi +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is furnished +// to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +// FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +// COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +// IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +// ================================================================================== + +#ifndef _AUDIOFFT_H +#define _AUDIOFFT_H + + +/** +* AudioFFT provides real-to-complex/complex-to-real FFT routines. +* +* Features: +* +* - Real-complex FFT and complex-real inverse FFT for power-of-2-sized real data. +* +* - Uniform interface to different FFT implementations (currently Ooura, FFTW3 and Apple Accelerate). +* +* - Complex data is handled in "split-complex" format, i.e. there are separate +* arrays for the real and imaginary parts which can be useful for SIMD optimizations +* (split-complex arrays have to be of length (size/2+1) representing bins from DC +* to Nyquist frequency). +* +* - Output is "ready to use" (all scaling etc. is already handled internally). +* +* - No allocations/deallocations after the initialization which makes it usable +* for real-time audio applications (that's what I wrote it for and using it). +* +* +* How to use it in your project: +* +* - Add the .h and .cpp file to your project - that's all. +* +* - To get extra speed, you can link FFTW3 to your project and define +* AUDIOFFT_FFTW3 (however, please check whether your project suits the +* according license). +* +* - To get the best speed on Apple platforms, you can link the Apple +* Accelerate framework to your project and define +* AUDIOFFT_APPLE_ACCELERATE (however, please check whether your +* project suits the according license). +* +* +* Remarks: +* +* - AudioFFT is not intended to be the fastest FFT, but to be a fast-enough +* FFT suitable for most audio applications. +* +* - AudioFFT uses the quite liberal MIT license. +* +* +* Example usage: +* @code +* #include "AudioFFT.h" +* +* void Example() +* { +* const size_t fftSize = 1024; // Needs to be power of 2! +* +* std::vector input(fftSize, 0.0f); +* std::vector re(audiofft::AudioFFT::ComplexSize(fftSize)); +* std::vector im(audiofft::AudioFFT::ComplexSize(fftSize)); +* std::vector output(fftSize); +* +* audiofft::AudioFFT fft; +* fft.init(1024); +* fft.fft(input.data(), re.data(), im.data()); +* fft.ifft(output.data(), re.data(), im.data()); +* } +* @endcode +*/ + + +#include +#include + + +namespace audiofft +{ + + namespace details + { + + class AudioFFTImpl + { + public: + AudioFFTImpl() = default; + virtual ~AudioFFTImpl() = default; + virtual void init(size_t size) = 0; + virtual void fft(const float* data, float* re, float* im) = 0; + virtual void ifft(float* data, const float* re, const float* im) = 0; + + private: + AudioFFTImpl(const AudioFFTImpl&) = delete; + AudioFFTImpl& operator=(const AudioFFTImpl&) = delete; + }; + } + + + // ====================================================== + + + /** + * @class AudioFFT + * @brief Performs 1D FFTs + */ + class AudioFFT + { + public: + /** + * @brief Constructor + */ + AudioFFT(); + + /** + * @brief Initializes the FFT object + * @param size Size of the real input (must be power 2) + */ + void init(size_t size); + + /** + * @brief Performs the forward FFT + * @param data The real input data (has to be of the length as specified in init()) + * @param re The real part of the complex output (has to be of length as returned by ComplexSize()) + * @param im The imaginary part of the complex output (has to be of length as returned by ComplexSize()) + */ + void fft(const float* data, float* re, float* im); + + /** + * @brief Performs the inverse FFT + * @param data The real output data (has to be of the length as specified in init()) + * @param re The real part of the complex input (has to be of length as returned by ComplexSize()) + * @param im The imaginary part of the complex input (has to be of length as returned by ComplexSize()) + */ + void ifft(float* data, const float* re, const float* im); + + /** + * @brief Calculates the necessary size of the real/imaginary complex arrays + * @param size The size of the real data + * @return The size of the real/imaginary complex arrays + */ + static size_t ComplexSize(size_t size); + + private: + std::unique_ptr _impl; + + AudioFFT(const AudioFFT&) = delete; + AudioFFT& operator=(const AudioFFT&) = delete; + }; + + + /** + * @deprecated + * @brief Let's keep an AudioFFTBase type around for now because it has been here already in the 1st version in order to avoid breaking existing code. + */ + typedef AudioFFT AudioFFTBase; + +} // End of namespace + +#endif // Header guard diff --git a/FFTConvolver/FFTConvolver.cpp b/FFTConvolver/FFTConvolver.cpp new file mode 100755 index 00000000..049dbed4 --- /dev/null +++ b/FFTConvolver/FFTConvolver.cpp @@ -0,0 +1,202 @@ +// ================================================================================== +// Copyright (c) 2012 HiFi-LoFi +// +// This is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . +// ================================================================================== + +#include "FFTConvolver.h" + +#include +#include + +#if defined (FFTCONVOLVER_USE_SSE) + #include +#endif + + +namespace fftconvolver +{ + +FFTConvolver::FFTConvolver() : + _blockSize(0), + _segSize(0), + _segCount(0), + _fftComplexSize(0), + _segments(), + _segmentsIR(), + _fftBuffer(), + _fft(), + _preMultiplied(), + _conv(), + _overlap(), + _current(0), + _inputBuffer(), + _inputBufferFill(0) +{ +} + + +FFTConvolver::~FFTConvolver() +{ + reset(); +} + + +void FFTConvolver::reset() +{ + for (size_t i=0; i<_segCount; ++i) + { + delete _segments[i]; + delete _segmentsIR[i]; + } + + _blockSize = 0; + _segSize = 0; + _segCount = 0; + _fftComplexSize = 0; + _segments.clear(); + _segmentsIR.clear(); + _fftBuffer.clear(); + _fft.init(0); + _preMultiplied.clear(); + _conv.clear(); + _overlap.clear(); + _current = 0; + _inputBuffer.clear(); + _inputBufferFill = 0; +} + + +bool FFTConvolver::init(size_t blockSize, const Sample* ir, size_t irLen) +{ + reset(); + + if (blockSize == 0) + { + return false; + } + + // Ignore zeros at the end of the impulse response because they only waste computation time + while (irLen > 0 && ::fabs(ir[irLen-1]) < 0.000001f) + { + --irLen; + } + + if (irLen == 0) + { + return true; + } + + _blockSize = NextPowerOf2(blockSize); + _segSize = 2 * _blockSize; + _segCount = static_cast(::ceil(static_cast(irLen) / static_cast(_blockSize))); + _fftComplexSize = audiofft::AudioFFT::ComplexSize(_segSize); + + // FFT + _fft.init(_segSize); + _fftBuffer.resize(_segSize); + + // Prepare segments + for (size_t i=0; i<_segCount; ++i) + { + _segments.push_back(new SplitComplex(_fftComplexSize)); + } + + // Prepare IR + for (size_t i=0; i<_segCount; ++i) + { + SplitComplex* segment = new SplitComplex(_fftComplexSize); + const size_t remaining = irLen - (i * _blockSize); + const size_t sizeCopy = (remaining >= _blockSize) ? _blockSize : remaining; + CopyAndPad(_fftBuffer, &ir[i*_blockSize], sizeCopy); + _fft.fft(_fftBuffer.data(), segment->re(), segment->im()); + _segmentsIR.push_back(segment); + } + + // Prepare convolution buffers + _preMultiplied.resize(_fftComplexSize); + _conv.resize(_fftComplexSize); + _overlap.resize(_blockSize); + + // Prepare input buffer + _inputBuffer.resize(_blockSize); + _inputBufferFill = 0; + + // Reset current position + _current = 0; + + return true; +} + + +void FFTConvolver::process(const Sample* input, Sample* output, size_t len) +{ + if (_segCount == 0) + { + ::memset(output, 0, len * sizeof(Sample)); + return; + } + + size_t processed = 0; + while (processed < len) + { + const bool inputBufferWasEmpty = (_inputBufferFill == 0); + const size_t processing = std::min(len-processed, _blockSize-_inputBufferFill); + const size_t inputBufferPos = _inputBufferFill; + ::memcpy(_inputBuffer.data()+inputBufferPos, input+processed, processing * sizeof(Sample)); + + // Forward FFT + CopyAndPad(_fftBuffer, &_inputBuffer[0], _blockSize); + _fft.fft(_fftBuffer.data(), _segments[_current]->re(), _segments[_current]->im()); + + // Complex multiplication + if (inputBufferWasEmpty) + { + _preMultiplied.setZero(); + for (size_t i=1; i<_segCount; ++i) + { + const size_t indexIr = i; + const size_t indexAudio = (_current + i) % _segCount; + ComplexMultiplyAccumulate(_preMultiplied, *_segmentsIR[indexIr], *_segments[indexAudio]); + } + } + _conv.copyFrom(_preMultiplied); + ComplexMultiplyAccumulate(_conv, *_segments[_current], *_segmentsIR[0]); + + // Backward FFT + _fft.ifft(_fftBuffer.data(), _conv.re(), _conv.im()); + + // Add overlap + Sum(output+processed, _fftBuffer.data()+inputBufferPos, _overlap.data()+inputBufferPos, processing); + + // Input buffer full => Next block + _inputBufferFill += processing; + if (_inputBufferFill == _blockSize) + { + // Input buffer is empty again now + _inputBuffer.setZero(); + _inputBufferFill = 0; + + // Save the overlap + ::memcpy(_overlap.data(), _fftBuffer.data()+_blockSize, _blockSize * sizeof(Sample)); + + // Update current segment + _current = (_current > 0) ? (_current - 1) : (_segCount - 1); + } + + processed += processing; + } +} + +} // End of namespace fftconvolver diff --git a/FFTConvolver/FFTConvolver.h b/FFTConvolver/FFTConvolver.h new file mode 100755 index 00000000..0c446088 --- /dev/null +++ b/FFTConvolver/FFTConvolver.h @@ -0,0 +1,100 @@ +// ================================================================================== +// Copyright (c) 2012 HiFi-LoFi +// +// This is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . +// ================================================================================== + +#ifndef _FFTCONVOLVER_FFTCONVOLVER_H +#define _FFTCONVOLVER_FFTCONVOLVER_H + +#include "AudioFFT.h" +#include "Utilities.h" + +#include + + +namespace fftconvolver +{ + +/** +* @class FFTConvolver +* @brief Implementation of a partitioned FFT convolution algorithm with uniform block size +* +* Some notes on how to use it: +* +* - After initialization with an impulse response, subsequent data portions of +* arbitrary length can be convolved. The convolver internally can handle +* this by using appropriate buffering. +* +* - The convolver works without "latency" (except for the required +* processing time, of course), i.e. the output always is the convolved +* input for each processing call. +* +* - The convolver is suitable for real-time processing which means that no +* "unpredictable" operations like allocations, locking, API calls, etc. are +* performed during processing (all necessary allocations and preparations take +* place during initialization). +*/ +class FFTConvolver +{ +public: + FFTConvolver(); + virtual ~FFTConvolver(); + + /** + * @brief Initializes the convolver + * @param blockSize Block size internally used by the convolver (partition size) + * @param ir The impulse response + * @param irLen Length of the impulse response + * @return true: Success - false: Failed + */ + bool init(size_t blockSize, const Sample* ir, size_t irLen); + + /** + * @brief Convolves the the given input samples and immediately outputs the result + * @param input The input samples + * @param output The convolution result + * @param len Number of input/output samples + */ + void process(const Sample* input, Sample* output, size_t len); + + /** + * @brief Resets the convolver and discards the set impulse response + */ + void reset(); + +private: + size_t _blockSize; + size_t _segSize; + size_t _segCount; + size_t _fftComplexSize; + std::vector _segments; + std::vector _segmentsIR; + SampleBuffer _fftBuffer; + audiofft::AudioFFT _fft; + SplitComplex _preMultiplied; + SplitComplex _conv; + SampleBuffer _overlap; + size_t _current; + SampleBuffer _inputBuffer; + size_t _inputBufferFill; + + // Prevent uncontrolled usage + FFTConvolver(const FFTConvolver&); + FFTConvolver& operator=(const FFTConvolver&); +}; + +} // End of namespace fftconvolver + +#endif // Header guard diff --git a/FFTConvolver/Utilities.cpp b/FFTConvolver/Utilities.cpp new file mode 100644 index 00000000..a30ff148 --- /dev/null +++ b/FFTConvolver/Utilities.cpp @@ -0,0 +1,113 @@ +// ================================================================================== +// Copyright (c) 2012 HiFi-LoFi +// +// This is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . +// ================================================================================== + +#include "Utilities.h" + + +namespace fftconvolver +{ + +bool SSEEnabled() +{ +#if defined(FFTCONVOLVER_USE_SSE) + return true; +#else + return false; +#endif +} + + +void Sum(Sample* FFTCONVOLVER_RESTRICT result, + const Sample* FFTCONVOLVER_RESTRICT a, + const Sample* FFTCONVOLVER_RESTRICT b, + size_t len) +{ + const size_t end4 = 4 * (len / 4); + for (size_t i=0; i. +// ================================================================================== + +#ifndef _FFTCONVOLVER_UTILITIES_H +#define _FFTCONVOLVER_UTILITIES_H + +#include +#include +#include +#include +#include + + +namespace fftconvolver +{ + +#if defined(__SSE__) || (defined(_M_IX86_FP) && _M_IX86_FP >= 2) + #if !defined(FFTCONVOLVER_USE_SSE) && !defined(FFTCONVOLVER_DONT_USE_SSE) + #define FFTCONVOLVER_USE_SSE + #endif +#endif + + +#if defined (FFTCONVOLVER_USE_SSE) + #include +#endif + + +#if defined(__GNUC__) + #define FFTCONVOLVER_RESTRICT __restrict__ +#else + #define FFTCONVOLVER_RESTRICT +#endif + + +/** +* @brief Returns whether SSE optimization for the convolver is enabled +* @return true: Enabled - false: Disabled +*/ +bool SSEEnabled(); + + +/** +* @class Buffer +* @brief Simple buffer implementation (uses 16-byte alignment if SSE optimization is enabled) +*/ +template +class Buffer +{ +public: + explicit Buffer(size_t initialSize = 0) : + _data(0), + _size(0) + { + resize(initialSize); + } + + virtual ~Buffer() + { + clear(); + } + + void clear() + { + deallocate(_data); + _data = 0; + _size = 0; + } + + void resize(size_t size) + { + if (_size != size) + { + clear(); + + if (size > 0) + { + assert(!_data && _size == 0); + _data = allocate(size); + _size = size; + } + } + setZero(); + } + + size_t size() const + { + return _size; + } + + void setZero() + { + ::memset(_data, 0, _size * sizeof(T)); + } + + void copyFrom(const Buffer& other) + { + assert(_size == other._size); + if (this != &other) + { + ::memcpy(_data, other._data, _size * sizeof(T)); + } + } + + T& operator[](size_t index) + { + assert(_data && index < _size); + return _data[index]; + } + + const T& operator[](size_t index) const + { + assert(_data && index < _size); + return _data[index]; + } + + operator bool() const + { + return (_data != 0 && _size > 0); + } + + T* data() + { + return _data; + } + + const T* data() const + { + return _data; + } + + static void Swap(Buffer& a, Buffer& b) + { + std::swap(a._data, b._data); + std::swap(a._size, b._size); + } + +private: + T* allocate(size_t size) + { +#if defined(FFTCONVOLVER_USE_SSE) + return static_cast(_mm_malloc(size * sizeof(T), 16)); +#else + return new T[size]; +#endif + } + + void deallocate(T* ptr) + { +#if defined(FFTCONVOLVER_USE_SSE) + _mm_free(ptr); +#else + delete [] ptr; +#endif + } + + T* _data; + size_t _size; + + // Prevent uncontrolled usage + Buffer(const Buffer&); + Buffer& operator=(const Buffer&); +}; + + +/** +* @brief Type of one sample +*/ +typedef float Sample; + + +/** +* @brief Buffer for samples +*/ +typedef Buffer SampleBuffer; + + +/** +* @class SplitComplex +* @brief Buffer for split-complex representation of FFT results +* +* The split-complex representation stores the real and imaginary parts +* of FFT results in two different memory buffers which is useful e.g. for +* SIMD optimizations. +*/ +class SplitComplex +{ +public: + explicit SplitComplex(size_t initialSize = 0) : + _size(0), + _re(), + _im() + { + resize(initialSize); + } + + ~SplitComplex() + { + clear(); + } + + void clear() + { + _re.clear(); + _im.clear(); + _size = 0; + } + + void resize(size_t newSize) + { + _re.resize(newSize); + _im.resize(newSize); + _size = newSize; + } + + void setZero() + { + _re.setZero(); + _im.setZero(); + } + + void copyFrom(const SplitComplex& other) + { + _re.copyFrom(other._re); + _im.copyFrom(other._im); + } + + Sample* re() + { + return _re.data(); + } + + const Sample* re() const + { + return _re.data(); + } + + Sample* im() + { + return _im.data(); + } + + const Sample* im() const + { + return _im.data(); + } + + size_t size() const + { + return _size; + } + +private: + size_t _size; + SampleBuffer _re; + SampleBuffer _im; + + // Prevent uncontrolled usage + SplitComplex(const SplitComplex&); + SplitComplex& operator=(const SplitComplex&); +}; + + +/** +* @brief Returns the next power of 2 of a given number +* @param val The number +* @return The next power of 2 +*/ +template +T NextPowerOf2(const T& val) +{ + T nextPowerOf2 = 1; + while (nextPowerOf2 < val) + { + nextPowerOf2 *= 2; + } + return nextPowerOf2; +} + + +/** +* @brief Sums two given sample arrays +* @param result The result array +* @param a The 1st array +* @param b The 2nd array +* @param len The length of the arrays +*/ +void Sum(Sample* FFTCONVOLVER_RESTRICT result, + const Sample* FFTCONVOLVER_RESTRICT a, + const Sample* FFTCONVOLVER_RESTRICT b, + size_t len); + + +/** +* @brief Copies a source array into a destination buffer and pads the destination buffer with zeros +* @param dest The destination buffer +* @param src The source array +* @param srcSize The size of the source array +*/ +template +void CopyAndPad(Buffer& dest, const T* src, size_t srcSize) +{ + assert(dest.size() >= srcSize); + ::memcpy(dest.data(), src, srcSize * sizeof(T)); + ::memset(dest.data() + srcSize, 0, (dest.size()-srcSize) * sizeof(T)); +} + + +/** +* @brief Adds the complex product of two split-complex buffers to a result buffer +* @param result The result buffer +* @param a The 1st factor of the complex product +* @param b The 2nd factor of the complex product +*/ +void ComplexMultiplyAccumulate(SplitComplex& result, const SplitComplex& a, const SplitComplex& b); + + +/** +* @brief Adds the complex product of two split-complex arrays to a result array +* @param re The real part of the result buffer +* @param im The imaginary part of the result buffer +* @param reA The real part of the 1st factor of the complex product +* @param imA The imaginary part of the 1st factor of the complex product +* @param reB The real part of the 2nd factor of the complex product +* @param imB The imaginary part of the 2nd factor of the complex product +*/ +void ComplexMultiplyAccumulate(Sample* FFTCONVOLVER_RESTRICT re, + Sample* FFTCONVOLVER_RESTRICT im, + const Sample* FFTCONVOLVER_RESTRICT reA, + const Sample* FFTCONVOLVER_RESTRICT imA, + const Sample* FFTCONVOLVER_RESTRICT reB, + const Sample* FFTCONVOLVER_RESTRICT imB, + const size_t len); + +} // End of namespace fftconvolver + +#endif // Header guard diff --git a/FFTConvolver/convolver.cpp b/FFTConvolver/convolver.cpp new file mode 100644 index 00000000..4589c8ed --- /dev/null +++ b/FFTConvolver/convolver.cpp @@ -0,0 +1,67 @@ + + +#include "convolver.h" +#include +#include "FFTConvolver.h" +#include "Utilities.h" +extern "C" { +#include "../common.h" +} + + +static fftconvolver::FFTConvolver convolver_l; +static fftconvolver::FFTConvolver convolver_r; + + +void convolver_init(const char* filename, int max_length) +{ + SF_INFO info; + assert(filename); + SNDFILE* file = sf_open(filename, SFM_READ, &info); + assert(file); + + if (info.samplerate != 44100) + die("Impulse file \"%s\" sample rate is %d Hz. Only 44100 Hz is supported", filename, info.samplerate); + + if (info.channels != 1 && info.channels != 2) + die("Impulse file \"%s\" contains %d channels. Only 1 or 2 is supported.", filename, info.channels); + + const size_t size = info.frames > max_length ? max_length : info.frames; + float buffer[size*info.channels]; + + size_t l = sf_readf_float(file, buffer, size); + assert(l == size); + + if (info.channels == 1) { + convolver_l.init(352, buffer, size); + convolver_r.init(352, buffer, size); + } else { + // deinterleave + float buffer_l[size]; + float buffer_r[size]; + + int i; + for (i=0; i>Including convolution support) + HAS_CONVOLUTION=1 + AM_INIT_AUTOMAKE([subdir-objects]) + AC_DEFINE([CONFIG_CONVOLUTION], 1, [Needed by the compiler.]) + AC_CHECK_LIB([sndfile], [sf_open], , AC_MSG_ERROR(Convolution support requires the sndfile library!))], ) +AM_CONDITIONAL([USE_CONVOLUTION], [test "x$HAS_CONVOLUTION" = "x1"]) + # Look for dns_sd flag AC_ARG_WITH(dns_sd, [ --with-dns_sd = choose dns_sd mDNS support], [ AC_MSG_RESULT(>>Including dns_sd for mDNS support) @@ -295,8 +304,6 @@ AC_TYPE_UINT8_T AC_FUNC_ALLOCA AC_FUNC_ERROR_AT_LINE AC_FUNC_FORK -AC_FUNC_MALLOC -AC_FUNC_REALLOC AC_CHECK_FUNCS([atexit clock_gettime gethostname inet_ntoa memchr memmove memset mkfifo pow select socket stpcpy strcasecmp strchr strdup strerror strstr strtol strtoul]) AC_CONFIG_FILES([Makefile man/Makefile scripts/shairport-sync.service]) diff --git a/loudness.c b/loudness.c new file mode 100644 index 00000000..5db17a8c --- /dev/null +++ b/loudness.c @@ -0,0 +1,55 @@ +#include "loudness.h" +#include +#include "common.h" + +loudness_processor loudness_r; +loudness_processor loudness_l; + + +void _loudness_set_volume(loudness_processor *p, float volume) +{ + float gain = -(volume-config.loudness_reference_volume_db)*0.5; + if (gain < 0) + gain = 0; + + float Fc = 10.0; + float Q = 0.5; + + // Formula from http://www.earlevel.com/main/2011/01/02/biquad-formulas/ + float Fs = 44100.0; + + float K = tan(M_PI * Fc / Fs); + float V = pow(10.0, gain / 20.0); + + float norm = 1 / (1 + 1/Q * K + K * K); + p->a0 = (1 + V/Q * K + K * K) * norm; + p->a1 = 2 * (K * K - 1) * norm; + p->a2 = (1 - V/Q * K + K * K) * norm; + p->b1 = p->a1; + p->b2 = (1 - 1/Q * K + K * K) * norm; +} + +float loudness_process(loudness_processor *p, float i0) +{ + float o0 = p->a0*i0 + p->a1*p->i1 + p->a2*p->i2 - p->b1*p->o1 - p->b2*p->o2; + + p->o2 = p->o1; + p->o1 = o0; + + p->i2 = p->i1; + p->i1 = i0; + + return o0; +} + + +void loudness_set_volume(float volume) +{ + float gain = -(volume-config.loudness_reference_volume_db)*0.5; + if (gain < 0) + gain = 0; + + inform("Volume: %.1f dB - Loudness gain @10Hz: %.1f dB", volume, gain); + _loudness_set_volume(&loudness_l, volume); + _loudness_set_volume(&loudness_r, volume); +} diff --git a/loudness.h b/loudness.h new file mode 100644 index 00000000..c1a219a2 --- /dev/null +++ b/loudness.h @@ -0,0 +1,16 @@ +#pragma once + +#include + +typedef struct { + float a0, a1, a2, b1, b2; + float i1, i2, o1, o2; +} loudness_processor; + + +extern loudness_processor loudness_r; +extern loudness_processor loudness_l; + +void loudness_set_volume(float volume); +float loudness_process(loudness_processor *p, float sample); + diff --git a/player.c b/player.c index f035989d..ebc98843 100644 --- a/player.c +++ b/player.c @@ -65,6 +65,10 @@ #include #endif +#ifdef CONFIG_CONVOLUTION +#include +#endif + #include "common.h" #include "player.h" #include "rtp.h" @@ -76,6 +80,8 @@ #include "apple_alac.h" #endif +#include "loudness.h" + // parameters from the source static unsigned char *aesiv; #ifdef HAVE_LIBSSL @@ -639,11 +645,16 @@ static inline void process_sample(int32_t sample, char **outp, enum sps_format_t int dither) { int64_t hyper_sample = sample; int result; - int64_t hyper_volume = (int64_t)volume << 16; - hyper_sample = hyper_sample * hyper_volume; // this is 64 bit bit multiplication -- we may need to - // dither it down to its - // target resolution - + + if (config.loudness) { + hyper_sample <<= 32; // Do not apply volume as it has already been done with the Loudness DSP filter + } else { + int64_t hyper_volume = (int64_t)volume << 16; + hyper_sample = hyper_sample * hyper_volume; // this is 64 bit bit multiplication -- we may need to + // dither it down to its + // target resolution + } + // next, do dither, if necessary if (dither) { @@ -1815,6 +1826,67 @@ static void *player_thread_func(void *arg) { if (config.no_sync != 0) amount_to_stuff = 0; // no stuffing if it's been disabled + + // Apply DSP here + if (config.loudness +#ifdef CONFIG_CONVOLUTION + || config.convolution +#endif + ) + { + int32_t *tbuf32 = (int32_t*)tbuf; + float fbuf_l[inbuflength]; + float fbuf_r[inbuflength]; + + // Deinterleave, and convert to float + int i; + for (i=0; i #include +#ifdef CONFIG_CONVOLUTION +#include +#endif + static int shutting_down = 0; static char *appName = NULL; char configuration_file_path[4096 + 1]; @@ -159,6 +163,9 @@ char *get_version_string() { #ifdef HAVE_LIBSOXR strcat(version_string, "-soxr"); #endif +#ifdef CONFIG_CONVOLUTION + strcat(version_string, "-convolution"); +#endif #ifdef CONFIG_METADATA strcat(version_string, "-metadata"); #endif @@ -661,7 +668,59 @@ int parse_options(int argc, char **argv) { config.timeout = value; config.dont_check_timeout = 0; // this is for legacy -- only set by -t 0 } - + +#ifdef CONFIG_CONVOLUTION + + if (config_lookup_string(config.cfg, "dsp.convolution", &str)) { + if (strcasecmp(str, "no") == 0) + config.convolution = 0; + else if (strcasecmp(str, "yes") == 0) + config.convolution = 1; + else + die("Invalid dsp.convolution. It should be \"yes\" or \"no\""); + + } + + if (config_lookup_float(config.cfg, "dsp.convolution_gain", &dvalue)) { + config.convolution_gain = dvalue; + if (dvalue > 10 || dvalue < -50) + die("Invalid value \"%f\" for dsp.convolution_gain. It should be between -50 and +10 dB", dvalue); + } + + config.convolution_max_length = 8192; + if (config_lookup_int(config.cfg, "dsp.convolution_max_length", &value)) { + config.convolution_max_length = value; + + if (value < 1 || value > 200000) + die("dsp.convolution_max_length must be within 1 and 200000"); + } + + if (config_lookup_string(config.cfg, "dsp.convolution_ir_file", &str)) { + config.convolution_ir_file = str; + convolver_init(config.convolution_ir_file, config.convolution_max_length); + } + + if (config.convolution && config.convolution_ir_file == NULL) { + die("Convolution enabled but no convolution_ir_file provided"); + } +#endif + + if (config_lookup_string(config.cfg, "dsp.loudness", &str)) { + if (strcasecmp(str, "no") == 0) + config.loudness = 0; + else if (strcasecmp(str, "yes") == 0) + config.loudness = 1; + else + die("Invalid dsp.convolution. It should be \"yes\" or \"no\""); + } + + config.loudness_reference_volume_db = -20; + if (config_lookup_float(config.cfg, "dsp.loudness_reference_volume_db", &dvalue)) { + config.loudness_reference_volume_db = dvalue; + if (dvalue > 0 || dvalue < -100) + die("Invalid value \"%f\" for dsp.loudness_reference_volume_db. It should be between -100 and 0", dvalue); + } + } else { if (config_error_type(&config_file_stuff) == CONFIG_ERR_FILE_IO) debug(1, "Error reading configuration file \"%s\": \"%s\".", @@ -1223,6 +1282,15 @@ int main(int argc, char **argv) { debug(1, "get-coverart is %d.", config.get_coverart); #endif +#ifdef CONFIG_CONVOLUTION + debug(1, "convolution is %d.", config.convolution); + debug(1, "convolution IR file is \"%s\"", config.convolution_ir_file); + debug(1, "convolution max length %d", config.convolution_max_length); + debug(1, "convolution gain is %f", config.convolution_gain); +#endif + debug(1, "loudness is %d.", config.loudness); + debug(1, "loudness reference level is %f", config.loudness_reference_volume_db); + uint8_t ap_md5[16]; #ifdef HAVE_LIBSSL -- 2.39.2