From 7b9cd28e9050e82ab2bce42405a628f230cb8b11 Mon Sep 17 00:00:00 2001
From: Yann Pomarede <yann.pomarede@gmail.com>
Date: Mon, 27 Feb 2017 22:43:23 +0100
Subject: [PATCH] 2 audio filters added, a volume-dependent-loudness and a
 convolution filter.

---
 .gitignore                    |    6 +-
 FFTConvolver/AudioFFT.cpp     | 1018 +++++++++++++++++++++++++++++++++
 FFTConvolver/AudioFFT.h       |  176 ++++++
 FFTConvolver/FFTConvolver.cpp |  202 +++++++
 FFTConvolver/FFTConvolver.h   |  100 ++++
 FFTConvolver/Utilities.cpp    |  113 ++++
 FFTConvolver/Utilities.h      |  351 ++++++++++++
 FFTConvolver/convolver.cpp    |   67 +++
 FFTConvolver/convolver.h      |   14 +
 Makefile.am                   |    6 +-
 common.h                      |   13 +-
 configure.ac                  |   11 +-
 loudness.c                    |   55 ++
 loudness.h                    |   16 +
 player.c                      |   85 ++-
 scripts/shairport-sync.conf   |   30 +
 shairport.c                   |   70 ++-
 17 files changed, 2322 insertions(+), 11 deletions(-)
 create mode 100755 FFTConvolver/AudioFFT.cpp
 create mode 100755 FFTConvolver/AudioFFT.h
 create mode 100755 FFTConvolver/FFTConvolver.cpp
 create mode 100755 FFTConvolver/FFTConvolver.h
 create mode 100644 FFTConvolver/Utilities.cpp
 create mode 100644 FFTConvolver/Utilities.h
 create mode 100644 FFTConvolver/convolver.cpp
 create mode 100644 FFTConvolver/convolver.h
 create mode 100644 loudness.c
 create mode 100644 loudness.h
diff --git a/.gitignore b/.gitignore
index 9e055931..d98a86df 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,7 +1,7 @@
 /INSTALL
 /shairport-sync
 /shairport-sync.exe
-/*.o
+*.o
 /*~
 *.xml~
 /config.mk
@@ -26,3 +26,7 @@ scripts/shairport-sync
 # Some eclipse project files
 .cproject
 .project
+
+# macOS stuff
+.DS_Store
+shairport-sync.xcodeproj
diff --git a/FFTConvolver/AudioFFT.cpp b/FFTConvolver/AudioFFT.cpp
new file mode 100755
index 00000000..32b3ddaa
--- /dev/null
+++ b/FFTConvolver/AudioFFT.cpp
@@ -0,0 +1,1018 @@
+// ==================================================================================
+// Copyright (c) 2016 HiFi-LoFi
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is furnished
+// to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
+// FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
+// COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+// IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+// ==================================================================================
+
+#include "AudioFFT.h"
+
+#include <cassert>
+#include <cmath>
+#include <cstring>
+
+
+#if defined(AUDIOFFT_APPLE_ACCELERATE)
+  #define AUDIOFFT_APPLE_ACCELERATE_USED
+  #include <Accelerate/Accelerate.h>
+  #include <vector>
+#elif defined (AUDIOFFT_FFTW3)
+  #define AUDIOFFT_FFTW3_USED
+  #include <fftw3.h>
+#else
+  #if !defined(AUDIOFFT_OOURA)
+    #define AUDIOFFT_OOURA
+  #endif
+  #define AUDIOFFT_OOURA_USED
+  #include <vector>
+#endif
+
+
+namespace audiofft
+{
+
+  namespace details
+  {
+
+    static bool IsPowerOf2(size_t val)
+    {
+      return (val == 1 || (val & (val-1)) == 0);
+    }
+
+
+    template<typename TypeDest, typename TypeSrc>
+    void ConvertBuffer(TypeDest* dest, const TypeSrc* src, size_t len)
+    {
+      for (size_t i=0; i<len; ++i)
+      {
+        dest[i] = static_cast<TypeDest>(src[i]);
+      }
+    }
+
+
+    template<typename TypeDest, typename TypeSrc, typename TypeFactor>
+    void ScaleBuffer(TypeDest* dest, const TypeSrc* src, const TypeFactor factor, size_t len)
+    {
+      for (size_t i=0; i<len; ++i)
+      {
+        dest[i] = static_cast<TypeDest>(static_cast<TypeFactor>(src[i]) * factor);
+      }
+    }
+
+
+    // ================================================================
+
+
+#ifdef AUDIOFFT_OOURA_USED
+
+    /**
+     * @internal
+     * @class OouraFFT
+     * @brief FFT implementation based on the great radix-4 routines by Takuya Ooura
+     */
+    class OouraFFT : public AudioFFTImpl
+    {
+    public:
+      OouraFFT() :
+        AudioFFTImpl(),
+        _size(0),
+        _ip(),
+        _w(),
+        _buffer()
+      {
+      }
+
+      virtual void init(size_t size) override
+      {
+        if (_size != size)
+        {
+          _ip.resize(2 + static_cast<int>(std::sqrt(static_cast<double>(size))));
+          _w.resize(size / 2);
+          _buffer.resize(size);
+          _size = size;
+
+          const int size4 = static_cast<int>(_size) / 4;
+          makewt(size4, _ip.data(), _w.data());
+          makect(size4, _ip.data(), _w.data() + size4);
+        }
+      }
+
+      virtual void fft(const float* data, float* re, float* im) override
+      {
+        // Convert into the format as required by the Ooura FFT
+        ConvertBuffer(&_buffer[0], data, _size);
+
+        rdft(static_cast<int>(_size), +1, _buffer.data(), _ip.data(), _w.data());
+
+        // Convert back to split-complex
+        {
+          double* b = &_buffer[0];
+          double* bEnd = b + _size;
+          float *r = re;
+          float *i = im;
+          while (b != bEnd)
+          {
+            *(r++) = static_cast<float>(*(b++));
+            *(i++) = static_cast<float>(-(*(b++)));
+          }
+        }
+        const size_t size2 = _size / 2;
+        re[size2] = -im[0];
+        im[0] = 0.0;
+        im[size2] = 0.0;
+      }
+
+      virtual void ifft(float* data, const float* re, const float* im) override
+      {
+        // Convert into the format as required by the Ooura FFT
+        {
+          double* b = &_buffer[0];
+          double* bEnd = b + _size;
+          const float *r = re;
+          const float *i = im;
+          while (b != bEnd)
+          {
+            *(b++) = static_cast<double>(*(r++));
+            *(b++) = -static_cast<double>(*(i++));
+          }
+          _buffer[1] = re[_size / 2];
+        }
+
+        rdft(static_cast<int>(_size), -1, _buffer.data(), _ip.data(), _w.data());
+
+        // Convert back to split-complex
+        ScaleBuffer(data, &_buffer[0], 2.0 / static_cast<double>(_size), _size);
+      }
+
+    private:
+      size_t _size;
+      std::vector<int> _ip;
+      std::vector<double> _w;
+      std::vector<double> _buffer;
+
+      void rdft(int n, int isgn, double *a, int *ip, double *w)
+      {
+        int nw = ip[0];
+        int nc = ip[1];
+
+        if (isgn >= 0)
+        {
+          if (n > 4)
+          {
+            bitrv2(n, ip + 2, a);
+            cftfsub(n, a, w);
+            rftfsub(n, a, nc, w + nw);
+          }
+          else if (n == 4)
+          {
+            cftfsub(n, a, w);
+          }
+          double xi = a[0] - a[1];
+          a[0] += a[1];
+          a[1] = xi;
+        }
+        else
+        {
+          a[1] = 0.5 * (a[0] - a[1]);
+          a[0] -= a[1];
+          if (n > 4)
+          {
+            rftbsub(n, a, nc, w + nw);
+            bitrv2(n, ip + 2, a);
+            cftbsub(n, a, w);
+          }
+          else if (n == 4)
+          {
+            cftfsub(n, a, w);
+          }
+        }
+      }
+
+
+      /* -------- initializing routines -------- */
+
+      void makewt(int nw, int *ip, double *w)
+      {
+        int j, nwh;
+        double delta, x, y;
+
+        ip[0] = nw;
+        ip[1] = 1;
+        if (nw > 2) {
+          nwh = nw >> 1;
+          delta = atan(1.0) / nwh;
+          w[0] = 1;
+          w[1] = 0;
+          w[nwh] = cos(delta * nwh);
+          w[nwh + 1] = w[nwh];
+          if (nwh > 2) {
+            for (j = 2; j < nwh; j += 2) {
+              x = cos(delta * j);
+              y = sin(delta * j);
+              w[j] = x;
+              w[j + 1] = y;
+              w[nw - j] = y;
+              w[nw - j + 1] = x;
+            }
+            bitrv2(nw, ip + 2, w);
+          }
+        }
+      }
+
+
+      void makect(int nc, int *ip, double *c)
+      {
+        int j, nch;
+        double delta;
+
+        ip[1] = nc;
+        if (nc > 1) {
+          nch = nc >> 1;
+          delta = atan(1.0) / nch;
+          c[0] = cos(delta * nch);
+          c[nch] = 0.5 * c[0];
+          for (j = 1; j < nch; j++) {
+            c[j] = 0.5 * cos(delta * j);
+            c[nc - j] = 0.5 * sin(delta * j);
+          }
+        }
+      }
+
+
+      /* -------- child routines -------- */
+
+
+      void bitrv2(int n, int *ip, double *a)
+      {
+        int j, j1, k, k1, l, m, m2;
+        double xr, xi, yr, yi;
+
+        ip[0] = 0;
+        l = n;
+        m = 1;
+        while ((m << 3) < l) {
+          l >>= 1;
+          for (j = 0; j < m; j++) {
+            ip[m + j] = ip[j] + l;
+          }
+          m <<= 1;
+        }
+        m2 = 2 * m;
+        if ((m << 3) == l) {
+          for (k = 0; k < m; k++) {
+            for (j = 0; j < k; j++) {
+              j1 = 2 * j + ip[k];
+              k1 = 2 * k + ip[j];
+              xr = a[j1];
+              xi = a[j1 + 1];
+              yr = a[k1];
+              yi = a[k1 + 1];
+              a[j1] = yr;
+              a[j1 + 1] = yi;
+              a[k1] = xr;
+              a[k1 + 1] = xi;
+              j1 += m2;
+              k1 += 2 * m2;
+              xr = a[j1];
+              xi = a[j1 + 1];
+              yr = a[k1];
+              yi = a[k1 + 1];
+              a[j1] = yr;
+              a[j1 + 1] = yi;
+              a[k1] = xr;
+              a[k1 + 1] = xi;
+              j1 += m2;
+              k1 -= m2;
+              xr = a[j1];
+              xi = a[j1 + 1];
+              yr = a[k1];
+              yi = a[k1 + 1];
+              a[j1] = yr;
+              a[j1 + 1] = yi;
+              a[k1] = xr;
+              a[k1 + 1] = xi;
+              j1 += m2;
+              k1 += 2 * m2;
+              xr = a[j1];
+              xi = a[j1 + 1];
+              yr = a[k1];
+              yi = a[k1 + 1];
+              a[j1] = yr;
+              a[j1 + 1] = yi;
+              a[k1] = xr;
+              a[k1 + 1] = xi;
+            }
+            j1 = 2 * k + m2 + ip[k];
+            k1 = j1 + m2;
+            xr = a[j1];
+            xi = a[j1 + 1];
+            yr = a[k1];
+            yi = a[k1 + 1];
+            a[j1] = yr;
+            a[j1 + 1] = yi;
+            a[k1] = xr;
+            a[k1 + 1] = xi;
+          }
+        } else {
+          for (k = 1; k < m; k++) {
+            for (j = 0; j < k; j++) {
+              j1 = 2 * j + ip[k];
+              k1 = 2 * k + ip[j];
+              xr = a[j1];
+              xi = a[j1 + 1];
+              yr = a[k1];
+              yi = a[k1 + 1];
+              a[j1] = yr;
+              a[j1 + 1] = yi;
+              a[k1] = xr;
+              a[k1 + 1] = xi;
+              j1 += m2;
+              k1 += m2;
+              xr = a[j1];
+              xi = a[j1 + 1];
+              yr = a[k1];
+              yi = a[k1 + 1];
+              a[j1] = yr;
+              a[j1 + 1] = yi;
+              a[k1] = xr;
+              a[k1 + 1] = xi;
+            }
+          }
+        }
+      }
+
+
+      void cftfsub(int n, double *a, double *w)
+      {
+        int j, j1, j2, j3, l;
+        double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
+
+        l = 2;
+        if (n > 8) {
+          cft1st(n, a, w);
+          l = 8;
+          while ((l << 2) < n) {
+            cftmdl(n, l, a, w);
+            l <<= 2;
+          }
+        }
+        if ((l << 2) == n) {
+          for (j = 0; j < l; j += 2) {
+            j1 = j + l;
+            j2 = j1 + l;
+            j3 = j2 + l;
+            x0r = a[j] + a[j1];
+            x0i = a[j + 1] + a[j1 + 1];
+            x1r = a[j] - a[j1];
+            x1i = a[j + 1] - a[j1 + 1];
+            x2r = a[j2] + a[j3];
+            x2i = a[j2 + 1] + a[j3 + 1];
+            x3r = a[j2] - a[j3];
+            x3i = a[j2 + 1] - a[j3 + 1];
+            a[j] = x0r + x2r;
+            a[j + 1] = x0i + x2i;
+            a[j2] = x0r - x2r;
+            a[j2 + 1] = x0i - x2i;
+            a[j1] = x1r - x3i;
+            a[j1 + 1] = x1i + x3r;
+            a[j3] = x1r + x3i;
+            a[j3 + 1] = x1i - x3r;
+          }
+        } else {
+          for (j = 0; j < l; j += 2) {
+            j1 = j + l;
+            x0r = a[j] - a[j1];
+            x0i = a[j + 1] - a[j1 + 1];
+            a[j] += a[j1];
+            a[j + 1] += a[j1 + 1];
+            a[j1] = x0r;
+            a[j1 + 1] = x0i;
+          }
+        }
+      }
+
+
+      void cftbsub(int n, double *a, double *w)
+      {
+        int j, j1, j2, j3, l;
+        double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
+
+        l = 2;
+        if (n > 8) {
+          cft1st(n, a, w);
+          l = 8;
+          while ((l << 2) < n) {
+            cftmdl(n, l, a, w);
+            l <<= 2;
+          }
+        }
+        if ((l << 2) == n) {
+          for (j = 0; j < l; j += 2) {
+            j1 = j + l;
+            j2 = j1 + l;
+            j3 = j2 + l;
+            x0r = a[j] + a[j1];
+            x0i = -a[j + 1] - a[j1 + 1];
+            x1r = a[j] - a[j1];
+            x1i = -a[j + 1] + a[j1 + 1];
+            x2r = a[j2] + a[j3];
+            x2i = a[j2 + 1] + a[j3 + 1];
+            x3r = a[j2] - a[j3];
+            x3i = a[j2 + 1] - a[j3 + 1];
+            a[j] = x0r + x2r;
+            a[j + 1] = x0i - x2i;
+            a[j2] = x0r - x2r;
+            a[j2 + 1] = x0i + x2i;
+            a[j1] = x1r - x3i;
+            a[j1 + 1] = x1i - x3r;
+            a[j3] = x1r + x3i;
+            a[j3 + 1] = x1i + x3r;
+          }
+        } else {
+          for (j = 0; j < l; j += 2) {
+            j1 = j + l;
+            x0r = a[j] - a[j1];
+            x0i = -a[j + 1] + a[j1 + 1];
+            a[j] += a[j1];
+            a[j + 1] = -a[j + 1] - a[j1 + 1];
+            a[j1] = x0r;
+            a[j1 + 1] = x0i;
+          }
+        }
+      }
+
+
+      void cft1st(int n, double *a, double *w)
+      {
+        int j, k1, k2;
+        double wk1r, wk1i, wk2r, wk2i, wk3r, wk3i;
+        double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
+
+        x0r = a[0] + a[2];
+        x0i = a[1] + a[3];
+        x1r = a[0] - a[2];
+        x1i = a[1] - a[3];
+        x2r = a[4] + a[6];
+        x2i = a[5] + a[7];
+        x3r = a[4] - a[6];
+        x3i = a[5] - a[7];
+        a[0] = x0r + x2r;
+        a[1] = x0i + x2i;
+        a[4] = x0r - x2r;
+        a[5] = x0i - x2i;
+        a[2] = x1r - x3i;
+        a[3] = x1i + x3r;
+        a[6] = x1r + x3i;
+        a[7] = x1i - x3r;
+        wk1r = w[2];
+        x0r = a[8] + a[10];
+        x0i = a[9] + a[11];
+        x1r = a[8] - a[10];
+        x1i = a[9] - a[11];
+        x2r = a[12] + a[14];
+        x2i = a[13] + a[15];
+        x3r = a[12] - a[14];
+        x3i = a[13] - a[15];
+        a[8] = x0r + x2r;
+        a[9] = x0i + x2i;
+        a[12] = x2i - x0i;
+        a[13] = x0r - x2r;
+        x0r = x1r - x3i;
+        x0i = x1i + x3r;
+        a[10] = wk1r * (x0r - x0i);
+        a[11] = wk1r * (x0r + x0i);
+        x0r = x3i + x1r;
+        x0i = x3r - x1i;
+        a[14] = wk1r * (x0i - x0r);
+        a[15] = wk1r * (x0i + x0r);
+        k1 = 0;
+        for (j = 16; j < n; j += 16) {
+          k1 += 2;
+          k2 = 2 * k1;
+          wk2r = w[k1];
+          wk2i = w[k1 + 1];
+          wk1r = w[k2];
+          wk1i = w[k2 + 1];
+          wk3r = wk1r - 2 * wk2i * wk1i;
+          wk3i = 2 * wk2i * wk1r - wk1i;
+          x0r = a[j] + a[j + 2];
+          x0i = a[j + 1] + a[j + 3];
+          x1r = a[j] - a[j + 2];
+          x1i = a[j + 1] - a[j + 3];
+          x2r = a[j + 4] + a[j + 6];
+          x2i = a[j + 5] + a[j + 7];
+          x3r = a[j + 4] - a[j + 6];
+          x3i = a[j + 5] - a[j + 7];
+          a[j] = x0r + x2r;
+          a[j + 1] = x0i + x2i;
+          x0r -= x2r;
+          x0i -= x2i;
+          a[j + 4] = wk2r * x0r - wk2i * x0i;
+          a[j + 5] = wk2r * x0i + wk2i * x0r;
+          x0r = x1r - x3i;
+          x0i = x1i + x3r;
+          a[j + 2] = wk1r * x0r - wk1i * x0i;
+          a[j + 3] = wk1r * x0i + wk1i * x0r;
+          x0r = x1r + x3i;
+          x0i = x1i - x3r;
+          a[j + 6] = wk3r * x0r - wk3i * x0i;
+          a[j + 7] = wk3r * x0i + wk3i * x0r;
+          wk1r = w[k2 + 2];
+          wk1i = w[k2 + 3];
+          wk3r = wk1r - 2 * wk2r * wk1i;
+          wk3i = 2 * wk2r * wk1r - wk1i;
+          x0r = a[j + 8] + a[j + 10];
+          x0i = a[j + 9] + a[j + 11];
+          x1r = a[j + 8] - a[j + 10];
+          x1i = a[j + 9] - a[j + 11];
+          x2r = a[j + 12] + a[j + 14];
+          x2i = a[j + 13] + a[j + 15];
+          x3r = a[j + 12] - a[j + 14];
+          x3i = a[j + 13] - a[j + 15];
+          a[j + 8] = x0r + x2r;
+          a[j + 9] = x0i + x2i;
+          x0r -= x2r;
+          x0i -= x2i;
+          a[j + 12] = -wk2i * x0r - wk2r * x0i;
+          a[j + 13] = -wk2i * x0i + wk2r * x0r;
+          x0r = x1r - x3i;
+          x0i = x1i + x3r;
+          a[j + 10] = wk1r * x0r - wk1i * x0i;
+          a[j + 11] = wk1r * x0i + wk1i * x0r;
+          x0r = x1r + x3i;
+          x0i = x1i - x3r;
+          a[j + 14] = wk3r * x0r - wk3i * x0i;
+          a[j + 15] = wk3r * x0i + wk3i * x0r;
+        }
+      }
+
+
+      void cftmdl(int n, int l, double *a, double *w)
+      {
+        int j, j1, j2, j3, k, k1, k2, m, m2;
+        double wk1r, wk1i, wk2r, wk2i, wk3r, wk3i;
+        double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
+
+        m = l << 2;
+        for (j = 0; j < l; j += 2) {
+          j1 = j + l;
+          j2 = j1 + l;
+          j3 = j2 + l;
+          x0r = a[j] + a[j1];
+          x0i = a[j + 1] + a[j1 + 1];
+          x1r = a[j] - a[j1];
+          x1i = a[j + 1] - a[j1 + 1];
+          x2r = a[j2] + a[j3];
+          x2i = a[j2 + 1] + a[j3 + 1];
+          x3r = a[j2] - a[j3];
+          x3i = a[j2 + 1] - a[j3 + 1];
+          a[j] = x0r + x2r;
+          a[j + 1] = x0i + x2i;
+          a[j2] = x0r - x2r;
+          a[j2 + 1] = x0i - x2i;
+          a[j1] = x1r - x3i;
+          a[j1 + 1] = x1i + x3r;
+          a[j3] = x1r + x3i;
+          a[j3 + 1] = x1i - x3r;
+        }
+        wk1r = w[2];
+        for (j = m; j < l + m; j += 2) {
+          j1 = j + l;
+          j2 = j1 + l;
+          j3 = j2 + l;
+          x0r = a[j] + a[j1];
+          x0i = a[j + 1] + a[j1 + 1];
+          x1r = a[j] - a[j1];
+          x1i = a[j + 1] - a[j1 + 1];
+          x2r = a[j2] + a[j3];
+          x2i = a[j2 + 1] + a[j3 + 1];
+          x3r = a[j2] - a[j3];
+          x3i = a[j2 + 1] - a[j3 + 1];
+          a[j] = x0r + x2r;
+          a[j + 1] = x0i + x2i;
+          a[j2] = x2i - x0i;
+          a[j2 + 1] = x0r - x2r;
+          x0r = x1r - x3i;
+          x0i = x1i + x3r;
+          a[j1] = wk1r * (x0r - x0i);
+          a[j1 + 1] = wk1r * (x0r + x0i);
+          x0r = x3i + x1r;
+          x0i = x3r - x1i;
+          a[j3] = wk1r * (x0i - x0r);
+          a[j3 + 1] = wk1r * (x0i + x0r);
+        }
+        k1 = 0;
+        m2 = 2 * m;
+        for (k = m2; k < n; k += m2) {
+          k1 += 2;
+          k2 = 2 * k1;
+          wk2r = w[k1];
+          wk2i = w[k1 + 1];
+          wk1r = w[k2];
+          wk1i = w[k2 + 1];
+          wk3r = wk1r - 2 * wk2i * wk1i;
+          wk3i = 2 * wk2i * wk1r - wk1i;
+          for (j = k; j < l + k; j += 2) {
+            j1 = j + l;
+            j2 = j1 + l;
+            j3 = j2 + l;
+            x0r = a[j] + a[j1];
+            x0i = a[j + 1] + a[j1 + 1];
+            x1r = a[j] - a[j1];
+            x1i = a[j + 1] - a[j1 + 1];
+            x2r = a[j2] + a[j3];
+            x2i = a[j2 + 1] + a[j3 + 1];
+            x3r = a[j2] - a[j3];
+            x3i = a[j2 + 1] - a[j3 + 1];
+            a[j] = x0r + x2r;
+            a[j + 1] = x0i + x2i;
+            x0r -= x2r;
+            x0i -= x2i;
+            a[j2] = wk2r * x0r - wk2i * x0i;
+            a[j2 + 1] = wk2r * x0i + wk2i * x0r;
+            x0r = x1r - x3i;
+            x0i = x1i + x3r;
+            a[j1] = wk1r * x0r - wk1i * x0i;
+            a[j1 + 1] = wk1r * x0i + wk1i * x0r;
+            x0r = x1r + x3i;
+            x0i = x1i - x3r;
+            a[j3] = wk3r * x0r - wk3i * x0i;
+            a[j3 + 1] = wk3r * x0i + wk3i * x0r;
+          }
+          wk1r = w[k2 + 2];
+          wk1i = w[k2 + 3];
+          wk3r = wk1r - 2 * wk2r * wk1i;
+          wk3i = 2 * wk2r * wk1r - wk1i;
+          for (j = k + m; j < l + (k + m); j += 2) {
+            j1 = j + l;
+            j2 = j1 + l;
+            j3 = j2 + l;
+            x0r = a[j] + a[j1];
+            x0i = a[j + 1] + a[j1 + 1];
+            x1r = a[j] - a[j1];
+            x1i = a[j + 1] - a[j1 + 1];
+            x2r = a[j2] + a[j3];
+            x2i = a[j2 + 1] + a[j3 + 1];
+            x3r = a[j2] - a[j3];
+            x3i = a[j2 + 1] - a[j3 + 1];
+            a[j] = x0r + x2r;
+            a[j + 1] = x0i + x2i;
+            x0r -= x2r;
+            x0i -= x2i;
+            a[j2] = -wk2i * x0r - wk2r * x0i;
+            a[j2 + 1] = -wk2i * x0i + wk2r * x0r;
+            x0r = x1r - x3i;
+            x0i = x1i + x3r;
+            a[j1] = wk1r * x0r - wk1i * x0i;
+            a[j1 + 1] = wk1r * x0i + wk1i * x0r;
+            x0r = x1r + x3i;
+            x0i = x1i - x3r;
+            a[j3] = wk3r * x0r - wk3i * x0i;
+            a[j3 + 1] = wk3r * x0i + wk3i * x0r;
+          }
+        }
+      }
+
+
+      void rftfsub(int n, double *a, int nc, double *c)
+      {
+        int j, k, kk, ks, m;
+        double wkr, wki, xr, xi, yr, yi;
+
+        m = n >> 1;
+        ks = 2 * nc / m;
+        kk = 0;
+        for (j = 2; j < m; j += 2) {
+          k = n - j;
+          kk += ks;
+          wkr = 0.5 - c[nc - kk];
+          wki = c[kk];
+          xr = a[j] - a[k];
+          xi = a[j + 1] + a[k + 1];
+          yr = wkr * xr - wki * xi;
+          yi = wkr * xi + wki * xr;
+          a[j] -= yr;
+          a[j + 1] -= yi;
+          a[k] += yr;
+          a[k + 1] -= yi;
+        }
+      }
+
+
+      void rftbsub(int n, double *a, int nc, double *c)
+      {
+        int j, k, kk, ks, m;
+        double wkr, wki, xr, xi, yr, yi;
+
+        a[1] = -a[1];
+        m = n >> 1;
+        ks = 2 * nc / m;
+        kk = 0;
+        for (j = 2; j < m; j += 2) {
+          k = n - j;
+          kk += ks;
+          wkr = 0.5 - c[nc - kk];
+          wki = c[kk];
+          xr = a[j] - a[k];
+          xi = a[j + 1] + a[k + 1];
+          yr = wkr * xr + wki * xi;
+          yi = wkr * xi - wki * xr;
+          a[j] -= yr;
+          a[j + 1] = yi - a[j + 1];
+          a[k] += yr;
+          a[k + 1] = yi - a[k + 1];
+        }
+        a[m + 1] = -a[m + 1];
+      }
+
+      OouraFFT(const OouraFFT&) = delete;
+      OouraFFT& operator=(const OouraFFT&) = delete;
+    };
+
+    std::unique_ptr<AudioFFTImpl> MakeAudioFFTImpl()
+    {
+      return std::unique_ptr<OouraFFT>(new OouraFFT());
+    }
+
+
+#endif // AUDIOFFT_OOURA_USED
+
+
+    // ================================================================
+
+
+#ifdef AUDIOFFT_APPLE_ACCELERATE_USED
+
+
+    /**
+     * @internal
+     * @class AppleAccelerateFFT
+     * @brief FFT implementation using the Apple Accelerate framework internally
+     */
+    class AppleAccelerateFFT : public AudioFFTImpl
+    {
+    public:
+      AppleAccelerateFFT() :
+        AudioFFTImpl(),
+        _size(0),
+        _powerOf2(0),
+        _fftSetup(0),
+        _re(),
+        _im()
+      {
+      }
+
+      virtual ~AppleAccelerateFFT()
+      {
+        init(0);
+      }
+
+      virtual void init(size_t size) override
+      {
+        if (_fftSetup)
+        {
+          vDSP_destroy_fftsetup(_fftSetup);
+          _size = 0;
+          _powerOf2 = 0;
+          _fftSetup = 0;
+          _re.clear();
+          _im.clear();
+        }
+
+        if (size > 0)
+        {
+          _size = size;
+          _powerOf2 = 0;
+          while ((1 << _powerOf2) < _size)
+          {
+            ++_powerOf2;
+          }
+          _fftSetup = vDSP_create_fftsetup(_powerOf2, FFT_RADIX2);
+          _re.resize(_size / 2);
+          _im.resize(_size / 2);
+        }
+      }
+
+      virtual void fft(const float* data, float* re, float* im) override
+      {
+        const size_t size2 = _size / 2;
+        DSPSplitComplex splitComplex;
+        splitComplex.realp = re;
+        splitComplex.imagp = im;
+        vDSP_ctoz(reinterpret_cast<const COMPLEX*>(data), 2, &splitComplex, 1, size2);
+        vDSP_fft_zrip(_fftSetup, &splitComplex, 1, _powerOf2, FFT_FORWARD);
+        const float factor = 0.5f;
+        vDSP_vsmul(re, 1, &factor, re, 1, size2);
+        vDSP_vsmul(im, 1, &factor, im, 1, size2);
+        re[size2] = im[0];
+        im[0] = 0.0f;
+        im[size2] = 0.0f;
+      }
+
+      virtual void ifft(float* data, const float* re, const float* im) override
+      {
+        const size_t size2 = _size / 2;
+        ::memcpy(_re.data(), re, size2 * sizeof(float));
+        ::memcpy(_im.data(), im, size2 * sizeof(float));
+        _im[0] = re[size2];
+        DSPSplitComplex splitComplex;
+        splitComplex.realp = _re.data();
+        splitComplex.imagp = _im.data();
+        vDSP_fft_zrip(_fftSetup, &splitComplex, 1, _powerOf2, FFT_INVERSE);
+        vDSP_ztoc(&splitComplex, 1, reinterpret_cast<COMPLEX*>(data), 2, size2);
+        const float factor = 1.0f / static_cast<float>(_size);
+        vDSP_vsmul(data, 1, &factor, data, 1, _size);
+      }
+
+    private:
+      size_t _size;
+      size_t _powerOf2;
+      FFTSetup _fftSetup;
+      std::vector<float> _re;
+      std::vector<float> _im;
+
+      AppleAccelerateFFT(const AppleAccelerateFFT&) = delete;
+      AppleAccelerateFFT& operator=(const AppleAccelerateFFT&) = delete;
+    };
+
+
+    std::unique_ptr<AudioFFTImpl> MakeAudioFFTImpl()
+    {
+      return std::unique_ptr<AppleAccelerateFFT>(new AppleAccelerateFFT());
+    }
+
+
+#endif // AUDIOFFT_APPLE_ACCELERATE_USED
+
+
+    // ================================================================
+
+
+#ifdef AUDIOFFT_FFTW3_USED
+
+
+    /**
+     * @internal
+     * @class FFTW3FFT
+     * @brief FFT implementation using FFTW3 internally (see fftw.org)
+     */
+    class FFTW3FFT : public AudioFFTImpl
+    {
+    public:
+      FFTW3FFT() :
+        AudioFFTImpl(),
+       _size(0),
+       _complexSize(0),
+       _planForward(0),
+       _planBackward(0),
+       _data(0),
+       _re(0),
+       _im(0)
+      {
+      }
+
+      virtual ~FFTW3FFT()
+      {
+        init(0);
+      }
+
+      virtual void init(size_t size) override
+      {
+        if (_size != size)
+        {
+          if (_size > 0)
+          {
+            fftwf_destroy_plan(_planForward);
+            fftwf_destroy_plan(_planBackward);
+            _planForward = 0;
+            _planBackward = 0;
+            _size = 0;
+            _complexSize = 0;
+
+            if (_data)
+            {
+              fftwf_free(_data);
+              _data = 0;
+            }
+
+            if (_re)
+            {
+              fftwf_free(_re);
+              _re = 0;
+            }
+
+            if (_im)
+            {
+              fftwf_free(_im);
+              _im = 0;
+            }
+          }
+
+          if (size > 0)
+          {
+            _size = size;
+            _complexSize = AudioFFT::ComplexSize(_size);
+            const size_t complexSize = AudioFFT::ComplexSize(_size);
+            _data = reinterpret_cast<float*>(fftwf_malloc(_size * sizeof(float)));
+            _re = reinterpret_cast<float*>(fftwf_malloc(complexSize * sizeof(float)));
+            _im = reinterpret_cast<float*>(fftwf_malloc(complexSize * sizeof(float)));
+
+            fftw_iodim dim;
+            dim.n = static_cast<int>(size);
+            dim.is = 1;
+            dim.os = 1;
+            _planForward = fftwf_plan_guru_split_dft_r2c(1, &dim, 0, 0, _data, _re, _im, FFTW_MEASURE);
+            _planBackward = fftwf_plan_guru_split_dft_c2r(1, &dim, 0, 0, _re, _im, _data, FFTW_MEASURE);
+          }
+        }
+      }
+
+      virtual void fft(const float* data, float* re, float* im) override
+      {
+        ::memcpy(_data, data, _size * sizeof(float));
+        fftwf_execute_split_dft_r2c(_planForward, _data, _re, _im);
+        ::memcpy(re, _re, _complexSize * sizeof(float));
+        ::memcpy(im, _im, _complexSize * sizeof(float));
+      }
+
+      void ifft(float* data, const float* re, const float* im)
+      {
+        ::memcpy(_re, re, _complexSize * sizeof(float));
+        ::memcpy(_im, im, _complexSize * sizeof(float));
+        fftwf_execute_split_dft_c2r(_planBackward, _re, _im, _data);
+        ScaleBuffer(data, _data, 1.0f / static_cast<float>(_size), _size);
+      }
+
+    private:
+      size_t _size;
+      size_t _complexSize;
+      fftwf_plan _planForward;
+      fftwf_plan _planBackward;
+      float* _data;
+      float* _re;
+      float* _im;
+
+      FFTW3FFT(const FFTW3FFT&) = delete;
+      FFTW3FFT& operator=(const FFTW3FFT&) = delete;
+    };
+
+
+    std::unique_ptr<AudioFFTImpl> MakeAudioFFTImpl()
+    {
+      return std::unique_ptr<FFTW3FFT>(new FFTW3FFT());
+    }
+
+
+#endif // AUDIOFFT_FFTW3_USED
+
+  } // End of namespace details
+
+
+  // =============================================================
+
+
+  AudioFFT::AudioFFT() :
+    _impl(details::MakeAudioFFTImpl())
+  {
+  }
+
+
+  void AudioFFT::init(size_t size)
+  {
+    assert(details::IsPowerOf2(size));
+    _impl->init(size);
+  }
+
+
+  void AudioFFT::fft(const float* data, float* re, float* im)
+  {
+    _impl->fft(data, re, im);
+  }
+
+
+  void AudioFFT::ifft(float* data, const float* re, const float* im)
+  {
+    _impl->ifft(data, re, im);
+  }
+
+
+  size_t AudioFFT::ComplexSize(size_t size)
+  {
+    return (size / 2) + 1;
+  }
+
+} // End of namespace
diff --git a/FFTConvolver/AudioFFT.h b/FFTConvolver/AudioFFT.h
new file mode 100755
index 00000000..80cd2edc
--- /dev/null
+++ b/FFTConvolver/AudioFFT.h
@@ -0,0 +1,176 @@
+// ==================================================================================
+// Copyright (c) 2016 HiFi-LoFi
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is furnished
+// to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
+// FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
+// COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+// IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+// ==================================================================================
+
+#ifndef _AUDIOFFT_H
+#define _AUDIOFFT_H
+
+
+/**
+* AudioFFT provides real-to-complex/complex-to-real FFT routines.
+*
+* Features:
+*
+* - Real-complex FFT and complex-real inverse FFT for power-of-2-sized real data.
+*
+* - Uniform interface to different FFT implementations (currently Ooura, FFTW3 and Apple Accelerate).
+*
+* - Complex data is handled in "split-complex" format, i.e. there are separate
+*   arrays for the real and imaginary parts which can be useful for SIMD optimizations
+*   (split-complex arrays have to be of length (size/2+1) representing bins from DC
+*   to Nyquist frequency).
+*
+* - Output is "ready to use" (all scaling etc. is already handled internally).
+*
+* - No allocations/deallocations after the initialization which makes it usable
+*   for real-time audio applications (that's what I wrote it for and using it).
+*
+*
+* How to use it in your project:
+*
+* - Add the .h and .cpp file to your project - that's all.
+*
+* - To get extra speed, you can link FFTW3 to your project and define
+*   AUDIOFFT_FFTW3 (however, please check whether your project suits the
+*   according license).
+*
+* - To get the best speed on Apple platforms, you can link the Apple
+*   Accelerate framework to your project and define
+*   AUDIOFFT_APPLE_ACCELERATE  (however, please check whether your
+*   project suits the according license).
+*
+*
+* Remarks:
+*
+* - AudioFFT is not intended to be the fastest FFT, but to be a fast-enough
+*   FFT suitable for most audio applications.
+*
+* - AudioFFT uses the quite liberal MIT license.
+*
+*
+* Example usage:
+* @code
+* #include "AudioFFT.h"
+*
+* void Example()
+* {
+*   const size_t fftSize = 1024; // Needs to be power of 2!
+*
+*   std::vector<float> input(fftSize, 0.0f);
+*   std::vector<float> re(audiofft::AudioFFT::ComplexSize(fftSize));
+*   std::vector<float> im(audiofft::AudioFFT::ComplexSize(fftSize));
+*   std::vector<float> output(fftSize);
+*
+*   audiofft::AudioFFT fft;
+*   fft.init(1024);
+*   fft.fft(input.data(), re.data(), im.data());
+*   fft.ifft(output.data(), re.data(), im.data());
+* }
+* @endcode
+*/
+
+
+#include <cstddef>
+#include <memory>
+
+
+namespace audiofft
+{
+
+  namespace details
+  {
+
+    class AudioFFTImpl
+    {
+    public:
+      AudioFFTImpl() = default;
+      virtual ~AudioFFTImpl() = default;
+      virtual void init(size_t size) = 0;
+      virtual void fft(const float* data, float* re, float* im) = 0;
+      virtual void ifft(float* data, const float* re, const float* im) = 0;
+
+    private:
+      AudioFFTImpl(const AudioFFTImpl&) = delete;
+      AudioFFTImpl& operator=(const AudioFFTImpl&) = delete;
+    };
+  }
+
+
+  // ======================================================
+
+
+  /**
+   * @class AudioFFT
+   * @brief Performs 1D FFTs
+   */
+  class AudioFFT
+  {
+  public:
+    /**
+     * @brief Constructor
+     */
+    AudioFFT();
+
+    /**
+     * @brief Initializes the FFT object
+     * @param size Size of the real input (must be power 2)
+     */
+    void init(size_t size);
+
+    /**
+     * @brief Performs the forward FFT
+     * @param data The real input data (has to be of the length as specified in init())
+     * @param re The real part of the complex output (has to be of length as returned by ComplexSize())
+     * @param im The imaginary part of the complex output (has to be of length as returned by ComplexSize())
+     */
+    void fft(const float* data, float* re, float* im);
+
+    /**
+     * @brief Performs the inverse FFT
+     * @param data The real output data (has to be of the length as specified in init())
+     * @param re The real part of the complex input (has to be of length as returned by ComplexSize())
+     * @param im The imaginary part of the complex input (has to be of length as returned by ComplexSize())
+     */
+    void ifft(float* data, const float* re, const float* im);
+
+    /**
+     * @brief Calculates the necessary size of the real/imaginary complex arrays
+     * @param size The size of the real data
+     * @return The size of the real/imaginary complex arrays
+     */
+    static size_t ComplexSize(size_t size);
+
+  private:
+    std::unique_ptr<details::AudioFFTImpl> _impl;
+
+    AudioFFT(const AudioFFT&) = delete;
+    AudioFFT& operator=(const AudioFFT&) = delete;
+  };
+
+
+  /**
+   * @deprecated
+   * @brief Let's keep an AudioFFTBase type around for now because it has been here already in the 1st version in order to avoid breaking existing code.
+   */
+  typedef AudioFFT AudioFFTBase;
+
+} // End of namespace
+
+#endif // Header guard
diff --git a/FFTConvolver/FFTConvolver.cpp b/FFTConvolver/FFTConvolver.cpp
new file mode 100755
index 00000000..049dbed4
--- /dev/null
+++ b/FFTConvolver/FFTConvolver.cpp
@@ -0,0 +1,202 @@
+// ==================================================================================
+// Copyright (c) 2012 HiFi-LoFi
+//
+// This is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+// ==================================================================================
+
+#include "FFTConvolver.h"
+
+#include <cassert>
+#include <cmath>
+
+#if defined (FFTCONVOLVER_USE_SSE)
+  #include <xmmintrin.h>
+#endif
+
+
+namespace fftconvolver
+{  
+
+FFTConvolver::FFTConvolver() :
+  _blockSize(0),
+  _segSize(0),
+  _segCount(0),
+  _fftComplexSize(0),
+  _segments(),
+  _segmentsIR(),
+  _fftBuffer(),
+  _fft(),
+  _preMultiplied(),
+  _conv(),
+  _overlap(),
+  _current(0),
+  _inputBuffer(),
+  _inputBufferFill(0)
+{
+}
+
+  
+FFTConvolver::~FFTConvolver()
+{
+  reset();
+}
+
+  
+void FFTConvolver::reset()
+{  
+  for (size_t i=0; i<_segCount; ++i)
+  {
+    delete _segments[i];
+    delete _segmentsIR[i];
+  }
+  
+  _blockSize = 0;
+  _segSize = 0;
+  _segCount = 0;
+  _fftComplexSize = 0;
+  _segments.clear();
+  _segmentsIR.clear();
+  _fftBuffer.clear();
+  _fft.init(0);
+  _preMultiplied.clear();
+  _conv.clear();
+  _overlap.clear();
+  _current = 0;
+  _inputBuffer.clear();
+  _inputBufferFill = 0;
+}
+
+  
+bool FFTConvolver::init(size_t blockSize, const Sample* ir, size_t irLen)
+{
+  reset();
+
+  if (blockSize == 0)
+  {
+    return false;
+  }
+  
+  // Ignore zeros at the end of the impulse response because they only waste computation time
+  while (irLen > 0 && ::fabs(ir[irLen-1]) < 0.000001f)
+  {
+    --irLen;
+  }
+
+  if (irLen == 0)
+  {
+    return true;
+  }
+  
+  _blockSize = NextPowerOf2(blockSize);
+  _segSize = 2 * _blockSize;
+  _segCount = static_cast<size_t>(::ceil(static_cast<float>(irLen) / static_cast<float>(_blockSize)));
+  _fftComplexSize = audiofft::AudioFFT::ComplexSize(_segSize);
+  
+  // FFT
+  _fft.init(_segSize);
+  _fftBuffer.resize(_segSize);
+  
+  // Prepare segments
+  for (size_t i=0; i<_segCount; ++i)
+  {
+    _segments.push_back(new SplitComplex(_fftComplexSize));    
+  }
+  
+  // Prepare IR
+  for (size_t i=0; i<_segCount; ++i)
+  {
+    SplitComplex* segment = new SplitComplex(_fftComplexSize);
+    const size_t remaining = irLen - (i * _blockSize);
+    const size_t sizeCopy = (remaining >= _blockSize) ? _blockSize : remaining;
+    CopyAndPad(_fftBuffer, &ir[i*_blockSize], sizeCopy);
+    _fft.fft(_fftBuffer.data(), segment->re(), segment->im());
+    _segmentsIR.push_back(segment);
+  }
+  
+  // Prepare convolution buffers  
+  _preMultiplied.resize(_fftComplexSize);
+  _conv.resize(_fftComplexSize);
+  _overlap.resize(_blockSize);
+  
+  // Prepare input buffer
+  _inputBuffer.resize(_blockSize);
+  _inputBufferFill = 0;
+
+  // Reset current position
+  _current = 0;
+  
+  return true;
+}
+
+
+void FFTConvolver::process(const Sample* input, Sample* output, size_t len)
+{
+  if (_segCount == 0)
+  {
+    ::memset(output, 0, len * sizeof(Sample));
+    return;
+  }
+
+  size_t processed = 0;
+  while (processed < len)
+  {
+    const bool inputBufferWasEmpty = (_inputBufferFill == 0);
+    const size_t processing = std::min(len-processed, _blockSize-_inputBufferFill);
+    const size_t inputBufferPos = _inputBufferFill;
+    ::memcpy(_inputBuffer.data()+inputBufferPos, input+processed, processing * sizeof(Sample));
+
+    // Forward FFT
+    CopyAndPad(_fftBuffer, &_inputBuffer[0], _blockSize); 
+    _fft.fft(_fftBuffer.data(), _segments[_current]->re(), _segments[_current]->im());
+
+    // Complex multiplication
+    if (inputBufferWasEmpty)
+    {
+      _preMultiplied.setZero();
+      for (size_t i=1; i<_segCount; ++i)
+      {
+        const size_t indexIr = i;
+        const size_t indexAudio = (_current + i) % _segCount;
+        ComplexMultiplyAccumulate(_preMultiplied, *_segmentsIR[indexIr], *_segments[indexAudio]);
+      }
+    }
+    _conv.copyFrom(_preMultiplied);
+    ComplexMultiplyAccumulate(_conv, *_segments[_current], *_segmentsIR[0]);
+
+    // Backward FFT
+    _fft.ifft(_fftBuffer.data(), _conv.re(), _conv.im());
+
+    // Add overlap
+    Sum(output+processed, _fftBuffer.data()+inputBufferPos, _overlap.data()+inputBufferPos, processing);
+
+    // Input buffer full => Next block
+    _inputBufferFill += processing;
+    if (_inputBufferFill == _blockSize)
+    {
+      // Input buffer is empty again now
+      _inputBuffer.setZero();
+      _inputBufferFill = 0;
+
+      // Save the overlap
+      ::memcpy(_overlap.data(), _fftBuffer.data()+_blockSize, _blockSize * sizeof(Sample));
+
+      // Update current segment
+      _current = (_current > 0) ? (_current - 1) : (_segCount - 1);
+    }
+
+    processed += processing;
+  }
+}
+  
+} // End of namespace fftconvolver
diff --git a/FFTConvolver/FFTConvolver.h b/FFTConvolver/FFTConvolver.h
new file mode 100755
index 00000000..0c446088
--- /dev/null
+++ b/FFTConvolver/FFTConvolver.h
@@ -0,0 +1,100 @@
+// ==================================================================================
+// Copyright (c) 2012 HiFi-LoFi
+//
+// This is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+// ==================================================================================
+
+#ifndef _FFTCONVOLVER_FFTCONVOLVER_H
+#define _FFTCONVOLVER_FFTCONVOLVER_H
+
+#include "AudioFFT.h"
+#include "Utilities.h"
+
+#include <vector>
+
+
+namespace fftconvolver
+{ 
+
+/**
+* @class FFTConvolver
+* @brief Implementation of a partitioned FFT convolution algorithm with uniform block size
+*
+* Some notes on how to use it:
+*
+* - After initialization with an impulse response, subsequent data portions of
+*   arbitrary length can be convolved. The convolver internally can handle
+*   this by using appropriate buffering.
+*
+* - The convolver works without "latency" (except for the required
+*   processing time, of course), i.e. the output always is the convolved
+*   input for each processing call.
+*
+* - The convolver is suitable for real-time processing which means that no
+*   "unpredictable" operations like allocations, locking, API calls, etc. are
+*   performed during processing (all necessary allocations and preparations take
+*   place during initialization).
+*/
+class FFTConvolver
+{  
+public:
+  FFTConvolver();  
+  virtual ~FFTConvolver();
+  
+  /**
+  * @brief Initializes the convolver
+  * @param blockSize Block size internally used by the convolver (partition size)
+  * @param ir The impulse response
+  * @param irLen Length of the impulse response
+  * @return true: Success - false: Failed
+  */
+  bool init(size_t blockSize, const Sample* ir, size_t irLen);
+
+  /**
+  * @brief Convolves the the given input samples and immediately outputs the result
+  * @param input The input samples
+  * @param output The convolution result
+  * @param len Number of input/output samples
+  */
+  void process(const Sample* input, Sample* output, size_t len);
+
+  /**
+  * @brief Resets the convolver and discards the set impulse response
+  */
+  void reset();
+  
+private:
+  size_t _blockSize;
+  size_t _segSize;
+  size_t _segCount;
+  size_t _fftComplexSize;
+  std::vector<SplitComplex*> _segments;
+  std::vector<SplitComplex*> _segmentsIR;
+  SampleBuffer _fftBuffer;
+  audiofft::AudioFFT _fft;
+  SplitComplex _preMultiplied;
+  SplitComplex _conv;
+  SampleBuffer _overlap;
+  size_t _current;
+  SampleBuffer _inputBuffer;
+  size_t _inputBufferFill;
+
+  // Prevent uncontrolled usage
+  FFTConvolver(const FFTConvolver&);
+  FFTConvolver& operator=(const FFTConvolver&);
+};
+  
+} // End of namespace fftconvolver
+
+#endif // Header guard
diff --git a/FFTConvolver/Utilities.cpp b/FFTConvolver/Utilities.cpp
new file mode 100644
index 00000000..a30ff148
--- /dev/null
+++ b/FFTConvolver/Utilities.cpp
@@ -0,0 +1,113 @@
+// ==================================================================================
+// Copyright (c) 2012 HiFi-LoFi
+//
+// This is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+// ==================================================================================
+
+#include "Utilities.h"
+
+
+namespace fftconvolver
+{
+
+bool SSEEnabled()
+{
+#if defined(FFTCONVOLVER_USE_SSE)
+  return true;
+#else
+  return false;
+#endif
+}
+
+
+void Sum(Sample* FFTCONVOLVER_RESTRICT result,
+         const Sample* FFTCONVOLVER_RESTRICT a,
+         const Sample* FFTCONVOLVER_RESTRICT b,
+         size_t len)
+{
+  const size_t end4 = 4 * (len / 4);
+  for (size_t i=0; i<end4; i+=4)
+  {
+    result[i+0] = a[i+0] + b[i+0];
+    result[i+1] = a[i+1] + b[i+1];
+    result[i+2] = a[i+2] + b[i+2];
+    result[i+3] = a[i+3] + b[i+3];
+  }
+  for (size_t i=end4; i<len; ++i)
+  {
+    result[i] = a[i] + b[i];
+  }
+}
+
+
+void ComplexMultiplyAccumulate(SplitComplex& result, const SplitComplex& a, const SplitComplex& b)
+{
+  assert(result.size() == a.size());
+  assert(result.size() == b.size());
+  ComplexMultiplyAccumulate(result.re(), result.im(), a.re(), a.im(), b.re(), b.im(), result.size());
+}
+
+
+void ComplexMultiplyAccumulate(Sample* FFTCONVOLVER_RESTRICT re, 
+                               Sample* FFTCONVOLVER_RESTRICT im,
+                               const Sample* FFTCONVOLVER_RESTRICT reA,
+                               const Sample* FFTCONVOLVER_RESTRICT imA,
+                               const Sample* FFTCONVOLVER_RESTRICT reB,
+                               const Sample* FFTCONVOLVER_RESTRICT imB,
+                               const size_t len)
+{
+#if defined(FFTCONVOLVER_USE_SSE)
+  const size_t end4 = 4 * (len / 4);
+  for (size_t i=0; i<end4; i+=4)
+  {
+    const __m128 ra = _mm_load_ps(&reA[i]);
+    const __m128 rb = _mm_load_ps(&reB[i]);
+    const __m128 ia = _mm_load_ps(&imA[i]);
+    const __m128 ib = _mm_load_ps(&imB[i]);
+    __m128 real = _mm_load_ps(&re[i]);
+    __m128 imag = _mm_load_ps(&im[i]);
+    real = _mm_add_ps(real, _mm_mul_ps(ra, rb));
+    real = _mm_sub_ps(real, _mm_mul_ps(ia, ib));
+    _mm_store_ps(&re[i], real);
+    imag = _mm_add_ps(imag, _mm_mul_ps(ra, ib));
+    imag = _mm_add_ps(imag, _mm_mul_ps(ia, rb));
+    _mm_store_ps(&im[i], imag);
+  }
+  for (size_t i=end4; i<len; ++i)
+  {
+    re[i] += reA[i] * reB[i] - imA[i] * imB[i];
+    im[i] += reA[i] * imB[i] + imA[i] * reB[i];
+  }
+#else
+  const size_t end4 = 4 * (len / 4);
+  for (size_t i=0; i<end4; i+=4)
+  {
+    re[i+0] += reA[i+0] * reB[i+0] - imA[i+0] * imB[i+0];
+    re[i+1] += reA[i+1] * reB[i+1] - imA[i+1] * imB[i+1];
+    re[i+2] += reA[i+2] * reB[i+2] - imA[i+2] * imB[i+2];
+    re[i+3] += reA[i+3] * reB[i+3] - imA[i+3] * imB[i+3];
+    im[i+0] += reA[i+0] * imB[i+0] + imA[i+0] * reB[i+0];
+    im[i+1] += reA[i+1] * imB[i+1] + imA[i+1] * reB[i+1];
+    im[i+2] += reA[i+2] * imB[i+2] + imA[i+2] * reB[i+2];
+    im[i+3] += reA[i+3] * imB[i+3] + imA[i+3] * reB[i+3];
+  }
+  for (size_t i=end4; i<len; ++i)
+  {
+    re[i] += reA[i] * reB[i] - imA[i] * imB[i];
+    im[i] += reA[i] * imB[i] + imA[i] * reB[i];
+  }
+#endif
+}
+
+} // End of namespace fftconvolver
diff --git a/FFTConvolver/Utilities.h b/FFTConvolver/Utilities.h
new file mode 100644
index 00000000..61e9fb09
--- /dev/null
+++ b/FFTConvolver/Utilities.h
@@ -0,0 +1,351 @@
+// ==================================================================================
+// Copyright (c) 2012 HiFi-LoFi
+//
+// This is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+// ==================================================================================
+
+#ifndef _FFTCONVOLVER_UTILITIES_H
+#define _FFTCONVOLVER_UTILITIES_H
+
+#include <algorithm>
+#include <cassert>
+#include <cstddef>
+#include <cstring>
+#include <new>
+
+
+namespace fftconvolver
+{
+
+#if defined(__SSE__) || (defined(_M_IX86_FP) && _M_IX86_FP >= 2)
+  #if !defined(FFTCONVOLVER_USE_SSE) && !defined(FFTCONVOLVER_DONT_USE_SSE)
+    #define FFTCONVOLVER_USE_SSE
+  #endif
+#endif
+
+
+#if defined (FFTCONVOLVER_USE_SSE)
+  #include <xmmintrin.h>
+#endif
+
+
+#if defined(__GNUC__)
+  #define FFTCONVOLVER_RESTRICT __restrict__
+#else
+  #define FFTCONVOLVER_RESTRICT
+#endif
+
+
+/**
+* @brief Returns whether SSE optimization for the convolver is enabled
+* @return true: Enabled - false: Disabled
+*/
+bool SSEEnabled();
+
+
+/**
+* @class Buffer
+* @brief Simple buffer implementation (uses 16-byte alignment if SSE optimization is enabled)
+*/
+template<typename T>
+class Buffer
+{
+public:  
+  explicit Buffer(size_t initialSize = 0) :
+    _data(0),
+    _size(0)
+  {
+    resize(initialSize);
+  }
+
+  virtual ~Buffer()
+  {
+    clear();
+  }
+
+  void clear()
+  {
+    deallocate(_data);
+    _data = 0;
+    _size = 0;
+  }
+
+  void resize(size_t size)
+  {
+    if (_size != size)
+    {
+      clear();
+
+      if (size > 0)
+      {
+        assert(!_data && _size == 0);
+        _data = allocate(size);
+        _size = size;
+      }
+    }
+    setZero();
+  }
+
+  size_t size() const
+  {
+    return _size;
+  }
+
+  void setZero()
+  {
+    ::memset(_data, 0, _size * sizeof(T));
+  }
+
+  void copyFrom(const Buffer<T>& other)
+  {
+    assert(_size == other._size);
+    if (this != &other)
+    {
+      ::memcpy(_data, other._data, _size * sizeof(T));
+    }
+  }
+
+  T& operator[](size_t index)
+  {
+    assert(_data && index < _size);
+    return _data[index];
+  }
+
+  const T& operator[](size_t index) const
+  {
+    assert(_data && index < _size);
+    return _data[index];
+  }
+
+  operator bool() const
+  {
+    return (_data != 0 && _size > 0);
+  }
+
+  T* data()
+  {
+    return _data;
+  }
+
+  const T* data() const
+  {
+    return _data;
+  }
+
+  static void Swap(Buffer<T>& a, Buffer<T>& b)
+  {
+    std::swap(a._data, b._data);
+    std::swap(a._size, b._size);
+  }
+
+private:
+  T* allocate(size_t size)
+  {
+#if defined(FFTCONVOLVER_USE_SSE)
+    return static_cast<T*>(_mm_malloc(size * sizeof(T), 16));
+#else
+    return new T[size];
+#endif
+  }
+  
+  void deallocate(T* ptr)
+  {
+#if defined(FFTCONVOLVER_USE_SSE)
+    _mm_free(ptr);
+#else
+    delete [] ptr;
+#endif
+  }
+
+  T* _data;
+  size_t _size;
+
+  // Prevent uncontrolled usage
+  Buffer(const Buffer&);
+  Buffer& operator=(const Buffer&);
+};
+
+
+/**
+* @brief Type of one sample
+*/
+typedef float Sample;
+
+
+/**
+* @brief Buffer for samples
+*/
+typedef Buffer<Sample> SampleBuffer;
+
+
+/**
+* @class SplitComplex
+* @brief Buffer for split-complex representation of FFT results
+*
+* The split-complex representation stores the real and imaginary parts
+* of FFT results in two different memory buffers which is useful e.g. for
+* SIMD optimizations.
+*/
+class SplitComplex
+{
+public:
+  explicit SplitComplex(size_t initialSize = 0) : 
+    _size(0),
+    _re(),
+    _im()
+  {
+    resize(initialSize);
+  }
+
+  ~SplitComplex()
+  {
+    clear();
+  }
+
+  void clear()
+  {
+    _re.clear();
+    _im.clear();
+    _size = 0;
+  }
+
+  void resize(size_t newSize)
+  {
+    _re.resize(newSize);
+    _im.resize(newSize);
+    _size = newSize;
+  }
+
+  void setZero()
+  {
+    _re.setZero();
+    _im.setZero();
+  }
+
+  void copyFrom(const SplitComplex& other)
+  {
+    _re.copyFrom(other._re);
+    _im.copyFrom(other._im);
+  }
+
+  Sample* re()
+  {
+    return _re.data();
+  }
+
+  const Sample* re() const
+  {
+    return _re.data();
+  }
+
+  Sample* im()
+  {
+    return _im.data();
+  }
+
+  const Sample* im() const
+  {
+    return _im.data();
+  }
+
+  size_t size() const
+  {
+    return _size;
+  }
+
+private:
+  size_t _size;
+  SampleBuffer _re;
+  SampleBuffer _im;
+
+  // Prevent uncontrolled usage
+  SplitComplex(const SplitComplex&);
+  SplitComplex& operator=(const SplitComplex&);
+};
+
+
+/**
+* @brief Returns the next power of 2 of a given number
+* @param val The number
+* @return The next power of 2
+*/
+template<typename T>
+T NextPowerOf2(const T& val)
+{
+  T nextPowerOf2 = 1;
+  while (nextPowerOf2 < val)
+  {
+    nextPowerOf2 *= 2;
+  }
+  return nextPowerOf2;
+}
+  
+
+/**
+* @brief Sums two given sample arrays
+* @param result The result array
+* @param a The 1st array
+* @param b The 2nd array
+* @param len The length of the arrays
+*/
+void Sum(Sample* FFTCONVOLVER_RESTRICT result,
+         const Sample* FFTCONVOLVER_RESTRICT a,
+         const Sample* FFTCONVOLVER_RESTRICT b,
+         size_t len);
+
+
+/**
+* @brief Copies a source array into a destination buffer and pads the destination buffer with zeros
+* @param dest The destination buffer
+* @param src The source array
+* @param srcSize The size of the source array
+*/
+template<typename T>
+void CopyAndPad(Buffer<T>& dest, const T* src, size_t srcSize)
+{
+  assert(dest.size() >= srcSize);
+  ::memcpy(dest.data(), src, srcSize * sizeof(T));
+  ::memset(dest.data() + srcSize, 0, (dest.size()-srcSize) * sizeof(T)); 
+}
+
+
+/**
+* @brief Adds the complex product of two split-complex buffers to a result buffer
+* @param result The result buffer
+* @param a The 1st factor of the complex product
+* @param b The 2nd factor of the complex product
+*/
+void ComplexMultiplyAccumulate(SplitComplex& result, const SplitComplex& a, const SplitComplex& b);
+
+
+/**
+* @brief Adds the complex product of two split-complex arrays to a result array
+* @param re The real part of the result buffer
+* @param im The imaginary part of the result buffer
+* @param reA The real part of the 1st factor of the complex product
+* @param imA The imaginary part of the 1st factor of the complex product
+* @param reB The real part of the 2nd factor of the complex product
+* @param imB The imaginary part of the 2nd factor of the complex product
+*/
+void ComplexMultiplyAccumulate(Sample* FFTCONVOLVER_RESTRICT re, 
+                               Sample* FFTCONVOLVER_RESTRICT im,
+                               const Sample* FFTCONVOLVER_RESTRICT reA,
+                               const Sample* FFTCONVOLVER_RESTRICT imA,
+                               const Sample* FFTCONVOLVER_RESTRICT reB,
+                               const Sample* FFTCONVOLVER_RESTRICT imB,
+                               const size_t len);
+  
+} // End of namespace fftconvolver
+
+#endif // Header guard
diff --git a/FFTConvolver/convolver.cpp b/FFTConvolver/convolver.cpp
new file mode 100644
index 00000000..4589c8ed
--- /dev/null
+++ b/FFTConvolver/convolver.cpp
@@ -0,0 +1,67 @@
+
+
+#include "convolver.h"
+#include <sndfile.h>
+#include "FFTConvolver.h"
+#include "Utilities.h"
+extern "C" {
+#include "../common.h"
+}
+
+
+static fftconvolver::FFTConvolver convolver_l;
+static fftconvolver::FFTConvolver convolver_r;
+
+
+void convolver_init(const char* filename, int max_length)
+{
+  SF_INFO info;
+  assert(filename);
+  SNDFILE* file = sf_open(filename, SFM_READ, &info);
+  assert(file);
+  
+  if (info.samplerate != 44100)
+    die("Impulse file \"%s\" sample rate is %d Hz. Only 44100 Hz is supported", filename, info.samplerate);
+  
+  if (info.channels != 1 && info.channels != 2)
+    die("Impulse file \"%s\" contains %d channels. Only 1 or 2 is supported.", filename, info.channels);
+  
+  const size_t size = info.frames > max_length ? max_length : info.frames;
+  float buffer[size*info.channels];
+  
+  size_t l = sf_readf_float(file, buffer, size);
+  assert(l == size);
+  
+  if (info.channels == 1) {
+    convolver_l.init(352, buffer, size);
+    convolver_r.init(352, buffer, size);
+  } else {
+    // deinterleave
+    float buffer_l[size];
+    float buffer_r[size];
+    
+    int i;
+    for (i=0; i<size; ++i)
+    {
+      buffer_l[i] = buffer[2*i+0];
+      buffer_r[i] = buffer[2*i+1];
+    }
+    
+    convolver_l.init(352, buffer_l, size);
+    convolver_r.init(352, buffer_r, size);
+  }
+  
+  debug(1, "IR initialized from \"%s\" with %d channels and %d samples", filename, info.channels, size);
+  
+  sf_close(file);
+}
+
+void convolver_process_l(float* data, int length)
+{
+  convolver_l.process(data, data, length);
+}
+
+void convolver_process_r(float* data, int length)
+{
+  convolver_r.process(data, data, length);
+}
diff --git a/FFTConvolver/convolver.h b/FFTConvolver/convolver.h
new file mode 100644
index 00000000..1b5a98ae
--- /dev/null
+++ b/FFTConvolver/convolver.h
@@ -0,0 +1,14 @@
+// C wrapper to C++ FFTConvolver
+#pragma once
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+  
+void convolver_init(const char* file, int max_length);
+void convolver_process_l(float* data, int length);
+void convolver_process_r(float* data, int length);
+  
+#ifdef __cplusplus
+}
+#endif
diff --git a/Makefile.am b/Makefile.am
index c6e4d5e7..a2f973d0 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -1,7 +1,7 @@
 SUBDIRS = man
 
 bin_PROGRAMS = shairport-sync
-shairport_sync_SOURCES = shairport.c rtsp.c mdns.c mdns_external.c common.c rtp.c player.c alac.c audio.c
+shairport_sync_SOURCES = shairport.c rtsp.c mdns.c mdns_external.c common.c rtp.c player.c alac.c audio.c loudness.c
 
 AM_CFLAGS = -Wno-multichar -DSYSCONFDIR=\"$(sysconfdir)\"
 AM_CPPFLAGS = -Wno-multichar -DSYSCONFDIR=\"$(sysconfdir)\"
@@ -55,6 +55,10 @@ if USE_PULSE
 shairport_sync_SOURCES += audio_pulse.c
 endif
 
+if USE_CONVOLUTION
+shairport_sync_SOURCES += FFTConvolver/AudioFFT.cpp FFTConvolver/FFTConvolver.cpp FFTConvolver/Utilities.cpp FFTConvolver/convolver.cpp
+endif
+
 if USE_DNS_SD
 shairport_sync_SOURCES += mdns_dns_sd.c
 endif
diff --git a/common.h b/common.h
index 3a1cab58..d3beeb0d 100644
--- a/common.h
+++ b/common.h
@@ -135,6 +135,17 @@ typedef struct {
                             // native range.
   enum sps_format_t output_format;
   int output_rate;
+  
+#ifdef CONFIG_CONVOLUTION
+  int convolution;
+  const char* convolution_ir_file;
+  float convolution_gain;
+  int convolution_max_length;
+#endif
+  
+  int loudness;
+  float loudness_reference_volume_db;
+  
 } shairport_cfg;
 
 // true if Shairport Sync is supposed to be sending output to the output device, false otherwise
@@ -160,7 +171,7 @@ void r64arrayinit();
 uint64_t ranarray64u();
 int64_t ranarray64i();
 
-int debuglev;
+extern int debuglev;
 void die(char *format, ...);
 void warn(char *format, ...);
 void inform(char *format, ...);
diff --git a/configure.ac b/configure.ac
index 8ec31411..18f5322f 100644
--- a/configure.ac
+++ b/configure.ac
@@ -238,6 +238,15 @@ AC_ARG_WITH(pulseaudio, [  --with-pulseaudio = choose PulseAudio API support. N.
   AC_CHECK_LIB([pulse], [pa_stream_peek], , AC_MSG_ERROR(PulseAudio support requires the libpulse-dev library.))], )
 AM_CONDITIONAL([USE_PULSE], [test "x$HAS_PULSE" = "x1"])
 
+# Look for Convolution flag
+AC_ARG_WITH(convolution, [  --with-convolution = choose audio DSP convolution support], [
+  AC_MSG_RESULT(>>Including convolution support)
+  HAS_CONVOLUTION=1
+  AM_INIT_AUTOMAKE([subdir-objects])
+  AC_DEFINE([CONFIG_CONVOLUTION], 1, [Needed by the compiler.])
+  AC_CHECK_LIB([sndfile], [sf_open], , AC_MSG_ERROR(Convolution support requires the sndfile library!))], )
+AM_CONDITIONAL([USE_CONVOLUTION], [test "x$HAS_CONVOLUTION" = "x1"])
+
 # Look for dns_sd flag
 AC_ARG_WITH(dns_sd, [  --with-dns_sd = choose dns_sd mDNS support], [
   AC_MSG_RESULT(>>Including dns_sd for mDNS support)
@@ -295,8 +304,6 @@ AC_TYPE_UINT8_T
 AC_FUNC_ALLOCA
 AC_FUNC_ERROR_AT_LINE
 AC_FUNC_FORK
-AC_FUNC_MALLOC
-AC_FUNC_REALLOC
 AC_CHECK_FUNCS([atexit clock_gettime gethostname inet_ntoa memchr memmove memset mkfifo pow select socket stpcpy strcasecmp strchr strdup strerror strstr strtol strtoul])
 
 AC_CONFIG_FILES([Makefile man/Makefile scripts/shairport-sync.service])
diff --git a/loudness.c b/loudness.c
new file mode 100644
index 00000000..5db17a8c
--- /dev/null
+++ b/loudness.c
@@ -0,0 +1,55 @@
+#include "loudness.h"
+#include <math.h>
+#include "common.h"
+
+loudness_processor loudness_r;
+loudness_processor loudness_l;
+
+
+void _loudness_set_volume(loudness_processor *p, float volume)
+{
+  float gain = -(volume-config.loudness_reference_volume_db)*0.5;
+  if (gain < 0)
+    gain = 0;
+  
+  float Fc = 10.0;
+  float Q = 0.5;
+  
+  // Formula from http://www.earlevel.com/main/2011/01/02/biquad-formulas/
+  float Fs = 44100.0;
+  
+  float K = tan(M_PI * Fc / Fs);
+  float V = pow(10.0, gain / 20.0);
+  
+  float norm = 1 / (1 + 1/Q * K + K * K);
+  p->a0 = (1 + V/Q * K + K * K) * norm;
+  p->a1 = 2 * (K * K - 1) * norm;
+  p->a2 = (1 - V/Q * K + K * K) * norm;
+  p->b1 = p->a1;
+  p->b2 = (1 - 1/Q * K + K * K) * norm;
+}
+
+float loudness_process(loudness_processor *p, float i0)
+{
+  float o0 = p->a0*i0 + p->a1*p->i1 + p->a2*p->i2 - p->b1*p->o1 - p->b2*p->o2;
+  
+  p->o2 = p->o1;
+  p->o1 = o0;
+  
+  p->i2 = p->i1;
+  p->i1 = i0;
+  
+  return o0;
+}
+
+
+void loudness_set_volume(float volume)
+{
+  float gain = -(volume-config.loudness_reference_volume_db)*0.5;
+  if (gain < 0)
+    gain = 0;
+  
+  inform("Volume: %.1f dB - Loudness gain @10Hz: %.1f dB", volume, gain);
+  _loudness_set_volume(&loudness_l, volume);
+  _loudness_set_volume(&loudness_r, volume);
+}
diff --git a/loudness.h b/loudness.h
new file mode 100644
index 00000000..c1a219a2
--- /dev/null
+++ b/loudness.h
@@ -0,0 +1,16 @@
+#pragma once
+
+#include <stdio.h>
+
+typedef struct {
+  float a0, a1, a2, b1, b2;
+  float i1, i2, o1, o2;
+} loudness_processor;
+
+
+extern loudness_processor loudness_r;
+extern loudness_processor loudness_l;
+
+void loudness_set_volume(float volume);
+float loudness_process(loudness_processor *p, float sample);
+
diff --git a/player.c b/player.c
index f035989d..ebc98843 100644
--- a/player.c
+++ b/player.c
@@ -65,6 +65,10 @@
 #include <soxr.h>
 #endif
 
+#ifdef CONFIG_CONVOLUTION
+#include <FFTConvolver/convolver.h>
+#endif
+
 #include "common.h"
 #include "player.h"
 #include "rtp.h"
@@ -76,6 +80,8 @@
 #include "apple_alac.h"
 #endif
 
+#include "loudness.h"
+
 // parameters from the source
 static unsigned char *aesiv;
 #ifdef HAVE_LIBSSL
@@ -639,11 +645,16 @@ static inline void process_sample(int32_t sample, char **outp, enum sps_format_t
                                   int dither) {
   int64_t hyper_sample = sample;
   int result;
-  int64_t hyper_volume = (int64_t)volume << 16;
-  hyper_sample = hyper_sample * hyper_volume; // this is 64 bit bit multiplication -- we may need to
-                                              // dither it down to its
-                                              // target resolution
-
+  
+  if (config.loudness) {
+    hyper_sample <<= 32; // Do not apply volume as it has already been done with the Loudness DSP filter
+  } else {
+    int64_t hyper_volume = (int64_t)volume << 16;
+    hyper_sample = hyper_sample * hyper_volume; // this is 64 bit bit multiplication -- we may need to
+                                                // dither it down to its
+                                                // target resolution
+  }
+  
   // next, do dither, if necessary
   if (dither) {
 
@@ -1815,6 +1826,67 @@ static void *player_thread_func(void *arg) {
             if (config.no_sync != 0)
               amount_to_stuff = 0; // no stuffing if it's been disabled
 
+            
+            // Apply DSP here
+            if (config.loudness
+#ifdef CONFIG_CONVOLUTION
+                || config.convolution
+#endif
+                )
+            {
+              int32_t *tbuf32 = (int32_t*)tbuf;
+              float fbuf_l[inbuflength];
+              float fbuf_r[inbuflength];
+              
+              // Deinterleave, and convert to float
+              int i;
+              for (i=0; i<inbuflength; ++i)
+              {
+                fbuf_l[i] = tbuf32[2*i];
+                fbuf_r[i] = tbuf32[2*i+1];
+              }
+              
+#ifdef CONFIG_CONVOLUTION
+              // Apply convolution
+              if (config.convolution)
+              {
+                convolver_process_l(fbuf_l, inbuflength);
+                convolver_process_r(fbuf_r, inbuflength);
+                
+                float gain = pow(10.0, config.convolution_gain/20.0);
+                for (i=0; i<inbuflength; ++i)
+                {
+                  fbuf_l[i] *= gain;
+                  fbuf_r[i] *= gain;
+                }
+                
+              }
+#endif
+              
+              if (config.loudness)
+              {
+                // Apply volume and loudness
+                // Volume must be applied here because the loudness filter will increase the signal level and it would saturate the int32_t otherwise                
+                float gain = fix_volume / 65536.0f;
+                float gain_db = 20*log10(gain);
+                //debug(1, "Applying soft volume dB: %f k: %f", gain_db, gain);
+                
+                for (i=0; i<inbuflength; ++i)
+                {
+                  fbuf_l[i] = loudness_process(&loudness_l, fbuf_l[i] * gain);
+                  fbuf_r[i] = loudness_process(&loudness_r, fbuf_r[i] * gain);
+                }
+              }
+              
+              // Interleave and convert back to int32_t
+              for (i=0; i<inbuflength; ++i)
+              {
+                tbuf32[2*i]   = fbuf_l[i];
+                tbuf32[2*i+1] = fbuf_r[i];
+              }
+              
+            }
+            
 #ifdef HAVE_LIBSOXR
             switch (config.packet_stuffing) {
             case ST_basic:
@@ -2257,6 +2329,9 @@ void player_volume(double airplay_volume) {
   pthread_mutex_lock(&vol_mutex);
   fix_volume = temp_fix_volume;
   pthread_mutex_unlock(&vol_mutex);
+  
+  if (config.loudness)
+    loudness_set_volume(software_attenuation/100);
 
 #ifdef CONFIG_METADATA
   char *dv = malloc(128); // will be freed in the metadata thread
diff --git a/scripts/shairport-sync.conf b/scripts/shairport-sync.conf
index d19e85f3..0ebe7021 100644
--- a/scripts/shairport-sync.conf
+++ b/scripts/shairport-sync.conf
@@ -35,6 +35,36 @@ general =
 //	interface = "name"; // Use this advanced setting to specify the interface on which Shairport Sync should provide its service. Leave it commented out to get the default, which is to select the interface(s) automatically.
 };
 
+
+dsp =
+{
+
+//////////////////////////////////////////
+//  This convolution filter can be used to apply almost any correction to the audio signal, like frequency and phase correction.
+//  For example you could measure (with a good microphone and a sweep-sine) the frequency response of your speakers + room,
+//  and apply a correction to get a flat response curve.
+//////////////////////////////////////////
+//
+//  convolution = "yes";                  // Activate the convolution filter.
+//  convolution_ir_file = "impulse.wav";  // Impulse Response file to be convolved to the audio stream
+//  convolution_gain = -4.0;              // Static gain applied to prevent clipping during the convolution process
+//  convolution_max_length = 44100;       // Truncate the input file to this length in order to save CPU.
+
+
+//////////////////////////////////////////
+//  This loudness filter is used to compensate for human ear non linearity.
+//  When the volume decreases, our ears loose more sentisitivity in the low range frequencies than in the mid range ones.
+//  This filter aims at compensating for this loss, applying a variable gain to low frequencies depending on the volume.
+//  More info can be found here: https://en.wikipedia.org/wiki/Equal-loudness_contour
+//  For this filter to work properly, you should disable (or set to a fix value) all other volume control and only let shairport-sync control your volume.
+//  The setting "loudness_reference_volume_db" should be set at the volume reported by shairport-sync when listening to music at a normal listening volume.
+//////////////////////////////////////////
+//
+//  loudness = "yes";                     // Activate the filter
+//  loudness_reference_volume_db = -20.0; // Above this level the filter will have no effect anymore. Below this level it will gradually boost the low frequencies.
+
+};
+
 // How to deal with metadata, including artwork
 metadata =
 {
diff --git a/shairport.c b/shairport.c
index c34a54a7..34074b74 100644
--- a/shairport.c
+++ b/shairport.c
@@ -66,6 +66,10 @@
 #include <libdaemon/dpid.h>
 #include <libdaemon/dsignal.h>
 
+#ifdef CONFIG_CONVOLUTION
+#include <FFTConvolver/convolver.h>
+#endif
+
 static int shutting_down = 0;
 static char *appName = NULL;
 char configuration_file_path[4096 + 1];
@@ -159,6 +163,9 @@ char *get_version_string() {
 #ifdef HAVE_LIBSOXR
     strcat(version_string, "-soxr");
 #endif
+#ifdef CONFIG_CONVOLUTION
+    strcat(version_string, "-convolution");
+#endif
 #ifdef CONFIG_METADATA
     strcat(version_string, "-metadata");
 #endif
@@ -661,7 +668,59 @@ int parse_options(int argc, char **argv) {
         config.timeout = value;
         config.dont_check_timeout = 0; // this is for legacy -- only set by -t 0
       }
-
+      
+#ifdef CONFIG_CONVOLUTION
+      
+      if (config_lookup_string(config.cfg, "dsp.convolution", &str)) {
+        if (strcasecmp(str, "no") == 0)
+          config.convolution = 0;
+        else if (strcasecmp(str, "yes") == 0)
+          config.convolution = 1;
+        else
+          die("Invalid dsp.convolution. It should be \"yes\" or \"no\"");
+        
+      }
+      
+      if (config_lookup_float(config.cfg, "dsp.convolution_gain", &dvalue)) {
+        config.convolution_gain = dvalue;
+        if (dvalue > 10 || dvalue < -50)
+          die("Invalid value \"%f\" for dsp.convolution_gain. It should be between -50 and +10 dB", dvalue);
+      }
+      
+      config.convolution_max_length = 8192;
+      if (config_lookup_int(config.cfg, "dsp.convolution_max_length", &value)) {
+        config.convolution_max_length = value;
+        
+        if (value < 1 || value > 200000)
+          die("dsp.convolution_max_length must be within 1 and 200000");
+      }
+      
+      if (config_lookup_string(config.cfg, "dsp.convolution_ir_file", &str)) {
+        config.convolution_ir_file = str;
+        convolver_init(config.convolution_ir_file, config.convolution_max_length);
+      }
+      
+      if (config.convolution && config.convolution_ir_file == NULL) {
+        die("Convolution enabled but no convolution_ir_file provided");
+      }
+#endif
+      
+      if (config_lookup_string(config.cfg, "dsp.loudness", &str)) {
+        if (strcasecmp(str, "no") == 0)
+          config.loudness = 0;
+        else if (strcasecmp(str, "yes") == 0)
+          config.loudness = 1;
+        else
+          die("Invalid dsp.convolution. It should be \"yes\" or \"no\"");
+      }
+      
+      config.loudness_reference_volume_db = -20;
+      if (config_lookup_float(config.cfg, "dsp.loudness_reference_volume_db", &dvalue)) {
+        config.loudness_reference_volume_db = dvalue;
+        if (dvalue > 0 || dvalue < -100)
+          die("Invalid value \"%f\" for dsp.loudness_reference_volume_db. It should be between -100 and 0", dvalue);
+      }
+      
     } else {
       if (config_error_type(&config_file_stuff) == CONFIG_ERR_FILE_IO)
         debug(1, "Error reading configuration file \"%s\": \"%s\".",
@@ -1223,6 +1282,15 @@ int main(int argc, char **argv) {
   debug(1, "get-coverart is %d.", config.get_coverart);
 #endif
 
+#ifdef CONFIG_CONVOLUTION
+  debug(1, "convolution is %d.", config.convolution);
+  debug(1, "convolution IR file is \"%s\"", config.convolution_ir_file);
+  debug(1, "convolution max length %d", config.convolution_max_length);
+  debug(1, "convolution gain is %f", config.convolution_gain);
+#endif
+  debug(1, "loudness is %d.", config.loudness);
+  debug(1, "loudness reference level is %f", config.loudness_reference_volume_db);
+  
   uint8_t ap_md5[16];
 
 #ifdef HAVE_LIBSSL
-- 
2.39.2