Merge branch 'release' of git://git.kernel.org/pub/scm/linux/kernel/git/lenb/linux...

[linux-2.6-omap-h63xx.git] / drivers / staging / echo / echo.c
diff --git a/drivers/staging/echo/echo.c b/drivers/staging/echo/echo.c

index 13792ef286f39aab8d174399b9d989f3376f60a9..b8f2c5e9dee5e575e6e3a5053f9f186c92a4cb19 100644 (file)
--- a/drivers/staging/echo/echo.c
+++ b/drivers/staging/echo/echo.c
@@ -74,7 +74,6 @@
  
     Steve also has some nice notes on echo cancellers in echo.h
  
-
     References:
  
     [1] Ochiai, Areseki, and Ogihara, "Echo Canceller with Two Echo
@@ -105,20 +104,18 @@
     Mark, Pawel, and Pavel.
  */
  
-#include <linux/kernel.h>       /* We're doing kernel work */
+#include <linux/kernel.h>      /* We're doing kernel work */
  #include <linux/module.h>
  #include <linux/kernel.h>
  #include <linux/slab.h>
-#define malloc(a) kmalloc((a), GFP_KERNEL)
-#define free(a) kfree(a)
  
  #include "bit_operations.h"
  #include "echo.h"
  
  #define MIN_TX_POWER_FOR_ADAPTION   64
  #define MIN_RX_POWER_FOR_ADAPTION   64
-#define DTD_HANGOVER               600     /* 600 samples, or 75ms     */
-#define DC_LOG2BETA                  3     /* log2() of DC filter Beta */
+#define DTD_HANGOVER               600 /* 600 samples, or 75ms     */
+#define DC_LOG2BETA                  3 /* log2() of DC filter Beta */
  
  /*-----------------------------------------------------------------------*\
                                 FUNCTIONS
@@ -126,59 +123,58 @@
  
  /* adapting coeffs using the traditional stochastic descent (N)LMS algorithm */
  
-
-#ifdef __BLACKFIN_ASM__
-static void __inline__ lms_adapt_bg(struct oslec_state *ec, int clean, int shift)
+#ifdef __bfin__
+static void __inline__ lms_adapt_bg(struct oslec_state *ec, int clean,
+                                   int shift)
  {
-    int i, j;
-    int offset1;
-    int offset2;
-    int factor;
-    int exp;
-    int16_t *phist;
-    int n;
-
-    if (shift > 0)
-       factor = clean << shift;
-    else
-       factor = clean >> -shift;
-
-    /* Update the FIR taps */
-
-    offset2 = ec->curr_pos;
-    offset1 = ec->taps - offset2;
-    phist = &ec->fir_state_bg.history[offset2];
-
-    /* st: and en: help us locate the assembler in echo.s */
-
-    //asm("st:");
-    n = ec->taps;
-    for (i = 0, j = offset2;  i < n;  i++, j++)
-    {
-       exp = *phist++ * factor;
-       ec->fir_taps16[1][i] += (int16_t) ((exp+(1<<14)) >> 15);
-    }
-    //asm("en:");
-
-    /* Note the asm for the inner loop above generated by Blackfin gcc
-       4.1.1 is pretty good (note even parallel instructions used):
-
-       R0 = W [P0++] (X);
-       R0 *= R2;
-       R0 = R0 + R3 (NS) ||
-       R1 = W [P1] (X) ||
-       nop;
-       R0 >>>= 15;
-       R0 = R0 + R1;
-       W [P1++] = R0;
-
-       A block based update algorithm would be much faster but the
-       above can't be improved on much.  Every instruction saved in
-       the loop above is 2 MIPs/ch!  The for loop above is where the
-       Blackfin spends most of it's time - about 17 MIPs/ch measured
-       with speedtest.c with 256 taps (32ms).  Write-back and
-       Write-through cache gave about the same performance.
-    */
+       int i, j;
+       int offset1;
+       int offset2;
+       int factor;
+       int exp;
+       int16_t *phist;
+       int n;
+
+       if (shift > 0)
+               factor = clean << shift;
+       else
+               factor = clean >> -shift;
+
+       /* Update the FIR taps */
+
+       offset2 = ec->curr_pos;
+       offset1 = ec->taps - offset2;
+       phist = &ec->fir_state_bg.history[offset2];
+
+       /* st: and en: help us locate the assembler in echo.s */
+
+       //asm("st:");
+       n = ec->taps;
+       for (i = 0, j = offset2; i < n; i++, j++) {
+               exp = *phist++ * factor;
+               ec->fir_taps16[1][i] += (int16_t) ((exp + (1 << 14)) >> 15);
+       }
+       //asm("en:");
+
+       /* Note the asm for the inner loop above generated by Blackfin gcc
+          4.1.1 is pretty good (note even parallel instructions used):
+
+          R0 = W [P0++] (X);
+          R0 *= R2;
+          R0 = R0 + R3 (NS) ||
+          R1 = W [P1] (X) ||
+          nop;
+          R0 >>>= 15;
+          R0 = R0 + R1;
+          W [P1++] = R0;
+
+          A block based update algorithm would be much faster but the
+          above can't be improved on much.  Every instruction saved in
+          the loop above is 2 MIPs/ch!  The for loop above is where the
+          Blackfin spends most of it's time - about 17 MIPs/ch measured
+          with speedtest.c with 256 taps (32ms).  Write-back and
+          Write-through cache gave about the same performance.
+        */
  }
  
  /*
@@ -200,97 +196,91 @@ static void __inline__ lms_adapt_bg(struct oslec_state *ec, int clean, int shift
  */
  
  #else
-static __inline__ void lms_adapt_bg(struct oslec_state *ec, int clean, int shift)
+static __inline__ void lms_adapt_bg(struct oslec_state *ec, int clean,
+                                   int shift)
  {
-    int i;
-
-    int offset1;
-    int offset2;
-    int factor;
-    int exp;
-
-    if (shift > 0)
-       factor = clean << shift;
-    else
-       factor = clean >> -shift;
-
-    /* Update the FIR taps */
-
-    offset2 = ec->curr_pos;
-    offset1 = ec->taps - offset2;
-
-    for (i = ec->taps - 1;  i >= offset1;  i--)
-    {
-       exp = (ec->fir_state_bg.history[i - offset1]*factor);
-       ec->fir_taps16[1][i] += (int16_t) ((exp+(1<<14)) >> 15);
-    }
-    for (  ;  i >= 0;  i--)
-    {
-       exp = (ec->fir_state_bg.history[i + offset2]*factor);
-       ec->fir_taps16[1][i] += (int16_t) ((exp+(1<<14)) >> 15);
-    }
+       int i;
+
+       int offset1;
+       int offset2;
+       int factor;
+       int exp;
+
+       if (shift > 0)
+               factor = clean << shift;
+       else
+               factor = clean >> -shift;
+
+       /* Update the FIR taps */
+
+       offset2 = ec->curr_pos;
+       offset1 = ec->taps - offset2;
+
+       for (i = ec->taps - 1; i >= offset1; i--) {
+               exp = (ec->fir_state_bg.history[i - offset1] * factor);
+               ec->fir_taps16[1][i] += (int16_t) ((exp + (1 << 14)) >> 15);
+       }
+       for (; i >= 0; i--) {
+               exp = (ec->fir_state_bg.history[i + offset2] * factor);
+               ec->fir_taps16[1][i] += (int16_t) ((exp + (1 << 14)) >> 15);
+       }
  }
  #endif
  
-/*- End of function --------------------------------------------------------*/
-
  struct oslec_state *oslec_create(int len, int adaption_mode)
  {
-    struct oslec_state *ec;
-    int i;
-    int j;
-
-    ec = kmalloc(sizeof(*ec), GFP_KERNEL);
-    if (ec == NULL)
-        return  NULL;
-    memset(ec, 0, sizeof(*ec));
-
-    ec->taps = len;
-    ec->log2taps = top_bit(len);
-    ec->curr_pos = ec->taps - 1;
-
-    for (i = 0;  i < 2;  i++)
-    {
-        if ((ec->fir_taps16[i] = (int16_t *) malloc((ec->taps)*sizeof(int16_t))) == NULL)
-        {
-            for (j = 0;  j < i;  j++)
-                kfree(ec->fir_taps16[j]);
-            kfree(ec);
-            return  NULL;
-        }
-        memset(ec->fir_taps16[i], 0, (ec->taps)*sizeof(int16_t));
-    }
-
-    fir16_create(&ec->fir_state,
-                 ec->fir_taps16[0],
-                 ec->taps);
-    fir16_create(&ec->fir_state_bg,
-                 ec->fir_taps16[1],
-                 ec->taps);
-
-    for(i=0; i<5; i++) {
-      ec->xvtx[i] = ec->yvtx[i] = ec->xvrx[i] = ec->yvrx[i] = 0;
-    }
-
-    ec->cng_level = 1000;
-    oslec_adaption_mode(ec, adaption_mode);
-
-    ec->snapshot = (int16_t*)malloc(ec->taps*sizeof(int16_t));
-    memset(ec->snapshot, 0, sizeof(int16_t)*ec->taps);
-
-    ec->cond_met = 0;
-    ec->Pstates = 0;
-    ec->Ltxacc = ec->Lrxacc = ec->Lcleanacc = ec->Lclean_bgacc = 0;
-    ec->Ltx = ec->Lrx = ec->Lclean = ec->Lclean_bg = 0;
-    ec->tx_1 = ec->tx_2 = ec->rx_1 = ec->rx_2 = 0;
-    ec->Lbgn = ec->Lbgn_acc = 0;
-    ec->Lbgn_upper = 200;
-    ec->Lbgn_upper_acc = ec->Lbgn_upper << 13;
-
-    return  ec;
+       struct oslec_state *ec;
+       int i;
+
+       ec = kzalloc(sizeof(*ec), GFP_KERNEL);
+       if (!ec)
+               return NULL;
+
+       ec->taps = len;
+       ec->log2taps = top_bit(len);
+       ec->curr_pos = ec->taps - 1;
+
+       for (i = 0; i < 2; i++) {
+               ec->fir_taps16[i] =
+                   kcalloc(ec->taps, sizeof(int16_t), GFP_KERNEL);
+               if (!ec->fir_taps16[i])
+                       goto error_oom;
+       }
+
+       fir16_create(&ec->fir_state, ec->fir_taps16[0], ec->taps);
+       fir16_create(&ec->fir_state_bg, ec->fir_taps16[1], ec->taps);
+
+       for (i = 0; i < 5; i++) {
+               ec->xvtx[i] = ec->yvtx[i] = ec->xvrx[i] = ec->yvrx[i] = 0;
+       }
+
+       ec->cng_level = 1000;
+       oslec_adaption_mode(ec, adaption_mode);
+
+       ec->snapshot = kcalloc(ec->taps, sizeof(int16_t), GFP_KERNEL);
+       if (!ec->snapshot)
+               goto error_oom;
+
+       ec->cond_met = 0;
+       ec->Pstates = 0;
+       ec->Ltxacc = ec->Lrxacc = ec->Lcleanacc = ec->Lclean_bgacc = 0;
+       ec->Ltx = ec->Lrx = ec->Lclean = ec->Lclean_bg = 0;
+       ec->tx_1 = ec->tx_2 = ec->rx_1 = ec->rx_2 = 0;
+       ec->Lbgn = ec->Lbgn_acc = 0;
+       ec->Lbgn_upper = 200;
+       ec->Lbgn_upper_acc = ec->Lbgn_upper << 13;
+
+       return ec;
+
+      error_oom:
+       for (i = 0; i < 2; i++)
+               kfree(ec->fir_taps16[i]);
+
+       kfree(ec);
+       return NULL;
  }
+
  EXPORT_SYMBOL_GPL(oslec_create);
-/*- End of function --------------------------------------------------------*/
  
  void oslec_free(struct oslec_state *ec)
  {
@@ -298,299 +288,301 @@ void oslec_free(struct oslec_state *ec)
  
         fir16_free(&ec->fir_state);
         fir16_free(&ec->fir_state_bg);
-       for (i = 0;  i < 2;  i++)
+       for (i = 0; i < 2; i++)
                 kfree(ec->fir_taps16[i]);
         kfree(ec->snapshot);
         kfree(ec);
  }
+
  EXPORT_SYMBOL_GPL(oslec_free);
-/*- End of function --------------------------------------------------------*/
  
  void oslec_adaption_mode(struct oslec_state *ec, int adaption_mode)
  {
-    ec->adaption_mode = adaption_mode;
+       ec->adaption_mode = adaption_mode;
  }
+
  EXPORT_SYMBOL_GPL(oslec_adaption_mode);
-/*- End of function --------------------------------------------------------*/
  
  void oslec_flush(struct oslec_state *ec)
  {
-    int i;
+       int i;
  
-    ec->Ltxacc = ec->Lrxacc = ec->Lcleanacc = ec->Lclean_bgacc = 0;
-    ec->Ltx = ec->Lrx = ec->Lclean = ec->Lclean_bg = 0;
-    ec->tx_1 = ec->tx_2 = ec->rx_1 = ec->rx_2 = 0;
+       ec->Ltxacc = ec->Lrxacc = ec->Lcleanacc = ec->Lclean_bgacc = 0;
+       ec->Ltx = ec->Lrx = ec->Lclean = ec->Lclean_bg = 0;
+       ec->tx_1 = ec->tx_2 = ec->rx_1 = ec->rx_2 = 0;
  
-    ec->Lbgn = ec->Lbgn_acc = 0;
-    ec->Lbgn_upper = 200;
-    ec->Lbgn_upper_acc = ec->Lbgn_upper << 13;
+       ec->Lbgn = ec->Lbgn_acc = 0;
+       ec->Lbgn_upper = 200;
+       ec->Lbgn_upper_acc = ec->Lbgn_upper << 13;
  
-    ec->nonupdate_dwell = 0;
+       ec->nonupdate_dwell = 0;
  
-    fir16_flush(&ec->fir_state);
-    fir16_flush(&ec->fir_state_bg);
-    ec->fir_state.curr_pos = ec->taps - 1;
-    ec->fir_state_bg.curr_pos = ec->taps - 1;
-    for (i = 0;  i < 2;  i++)
-        memset(ec->fir_taps16[i], 0, ec->taps*sizeof(int16_t));
+       fir16_flush(&ec->fir_state);
+       fir16_flush(&ec->fir_state_bg);
+       ec->fir_state.curr_pos = ec->taps - 1;
+       ec->fir_state_bg.curr_pos = ec->taps - 1;
+       for (i = 0; i < 2; i++)
+               memset(ec->fir_taps16[i], 0, ec->taps * sizeof(int16_t));
  
-    ec->curr_pos = ec->taps - 1;
-    ec->Pstates = 0;
+       ec->curr_pos = ec->taps - 1;
+       ec->Pstates = 0;
  }
+
  EXPORT_SYMBOL_GPL(oslec_flush);
-/*- End of function --------------------------------------------------------*/
  
-void oslec_snapshot(struct oslec_state *ec) {
-    memcpy(ec->snapshot, ec->fir_taps16[0], ec->taps*sizeof(int16_t));
+void oslec_snapshot(struct oslec_state *ec)
+{
+       memcpy(ec->snapshot, ec->fir_taps16[0], ec->taps * sizeof(int16_t));
  }
+
  EXPORT_SYMBOL_GPL(oslec_snapshot);
-/*- End of function --------------------------------------------------------*/
  
  /* Dual Path Echo Canceller ------------------------------------------------*/
  
  int16_t oslec_update(struct oslec_state *ec, int16_t tx, int16_t rx)
  {
-    int32_t echo_value;
-    int clean_bg;
-    int tmp, tmp1;
-
-    /* Input scaling was found be required to prevent problems when tx
-       starts clipping.  Another possible way to handle this would be the
-       filter coefficent scaling. */
-
-    ec->tx = tx; ec->rx = rx;
-    tx >>=1;
-    rx >>=1;
-
-    /*
-       Filter DC, 3dB point is 160Hz (I think), note 32 bit precision required
-       otherwise values do not track down to 0. Zero at DC, Pole at (1-Beta)
-       only real axis.  Some chip sets (like Si labs) don't need
-       this, but something like a $10 X100P card does.  Any DC really slows
-       down convergence.
-
-       Note: removes some low frequency from the signal, this reduces
-       the speech quality when listening to samples through headphones
-       but may not be obvious through a telephone handset.
-
-       Note that the 3dB frequency in radians is approx Beta, e.g. for
-       Beta = 2^(-3) = 0.125, 3dB freq is 0.125 rads = 159Hz.
-    */
-
-    if (ec->adaption_mode & ECHO_CAN_USE_RX_HPF) {
-      tmp = rx << 15;
+       int32_t echo_value;
+       int clean_bg;
+       int tmp, tmp1;
+
+       /* Input scaling was found be required to prevent problems when tx
+          starts clipping.  Another possible way to handle this would be the
+          filter coefficent scaling. */
+
+       ec->tx = tx;
+       ec->rx = rx;
+       tx >>= 1;
+       rx >>= 1;
+
+       /*
+          Filter DC, 3dB point is 160Hz (I think), note 32 bit precision required
+          otherwise values do not track down to 0. Zero at DC, Pole at (1-Beta)
+          only real axis.  Some chip sets (like Si labs) don't need
+          this, but something like a $10 X100P card does.  Any DC really slows
+          down convergence.
+
+          Note: removes some low frequency from the signal, this reduces
+          the speech quality when listening to samples through headphones
+          but may not be obvious through a telephone handset.
+
+          Note that the 3dB frequency in radians is approx Beta, e.g. for
+          Beta = 2^(-3) = 0.125, 3dB freq is 0.125 rads = 159Hz.
+        */
+
+       if (ec->adaption_mode & ECHO_CAN_USE_RX_HPF) {
+               tmp = rx << 15;
  #if 1
-        /* Make sure the gain of the HPF is 1.0. This can still saturate a little under
-           impulse conditions, and it might roll to 32768 and need clipping on sustained peak
-           level signals. However, the scale of such clipping is small, and the error due to
-           any saturation should not markedly affect the downstream processing. */
-        tmp -= (tmp >> 4);
+               /* Make sure the gain of the HPF is 1.0. This can still saturate a little under
+                  impulse conditions, and it might roll to 32768 and need clipping on sustained peak
+                  level signals. However, the scale of such clipping is small, and the error due to
+                  any saturation should not markedly affect the downstream processing. */
+               tmp -= (tmp >> 4);
  #endif
-      ec->rx_1 += -(ec->rx_1>>DC_LOG2BETA) + tmp - ec->rx_2;
+               ec->rx_1 += -(ec->rx_1 >> DC_LOG2BETA) + tmp - ec->rx_2;
+
+               /* hard limit filter to prevent clipping.  Note that at this stage
+                  rx should be limited to +/- 16383 due to right shift above */
+               tmp1 = ec->rx_1 >> 15;
+               if (tmp1 > 16383)
+                       tmp1 = 16383;
+               if (tmp1 < -16383)
+                       tmp1 = -16383;
+               rx = tmp1;
+               ec->rx_2 = tmp;
+       }
  
-      /* hard limit filter to prevent clipping.  Note that at this stage
-        rx should be limited to +/- 16383 due to right shift above */
-      tmp1 = ec->rx_1 >> 15;
-      if (tmp1 > 16383) tmp1 = 16383;
-      if (tmp1 < -16383) tmp1 = -16383;
-      rx = tmp1;
-      ec->rx_2 = tmp;
-    }
+       /* Block average of power in the filter states.  Used for
+          adaption power calculation. */
  
-    /* Block average of power in the filter states.  Used for
-       adaption power calculation. */
+       {
+               int new, old;
+
+               /* efficient "out with the old and in with the new" algorithm so
+                  we don't have to recalculate over the whole block of
+                  samples. */
+               new = (int)tx *(int)tx;
+               old = (int)ec->fir_state.history[ec->fir_state.curr_pos] *
+                   (int)ec->fir_state.history[ec->fir_state.curr_pos];
+               ec->Pstates +=
+                   ((new - old) + (1 << ec->log2taps)) >> ec->log2taps;
+               if (ec->Pstates < 0)
+                       ec->Pstates = 0;
+       }
  
-    {
-       int new, old;
+       /* Calculate short term average levels using simple single pole IIRs */
  
-       /* efficient "out with the old and in with the new" algorithm so
-          we don't have to recalculate over the whole block of
-          samples. */
-       new = (int)tx * (int)tx;
-       old = (int)ec->fir_state.history[ec->fir_state.curr_pos] *
-              (int)ec->fir_state.history[ec->fir_state.curr_pos];
-       ec->Pstates += ((new - old) + (1<<ec->log2taps)) >> ec->log2taps;
-       if (ec->Pstates < 0) ec->Pstates = 0;
-    }
-
-    /* Calculate short term average levels using simple single pole IIRs */
-
-    ec->Ltxacc += abs(tx) - ec->Ltx;
-    ec->Ltx = (ec->Ltxacc + (1<<4)) >> 5;
-    ec->Lrxacc += abs(rx) - ec->Lrx;
-    ec->Lrx = (ec->Lrxacc + (1<<4)) >> 5;
-
-    /* Foreground filter ---------------------------------------------------*/
-
-    ec->fir_state.coeffs = ec->fir_taps16[0];
-    echo_value = fir16(&ec->fir_state, tx);
-    ec->clean = rx - echo_value;
-    ec->Lcleanacc += abs(ec->clean) - ec->Lclean;
-    ec->Lclean = (ec->Lcleanacc + (1<<4)) >> 5;
-
-    /* Background filter ---------------------------------------------------*/
-
-    echo_value = fir16(&ec->fir_state_bg, tx);
-    clean_bg = rx - echo_value;
-    ec->Lclean_bgacc += abs(clean_bg) - ec->Lclean_bg;
-    ec->Lclean_bg = (ec->Lclean_bgacc + (1<<4)) >> 5;
-
-    /* Background Filter adaption -----------------------------------------*/
-
-    /* Almost always adap bg filter, just simple DT and energy
-       detection to minimise adaption in cases of strong double talk.
-       However this is not critical for the dual path algorithm.
-    */
-    ec->factor = 0;
-    ec->shift = 0;
-    if ((ec->nonupdate_dwell == 0)) {
-       int   P, logP, shift;
-
-       /* Determine:
-
-          f = Beta * clean_bg_rx/P ------ (1)
-
-          where P is the total power in the filter states.
-
-          The Boffins have shown that if we obey (1) we converge
-          quickly and avoid instability.
-
-          The correct factor f must be in Q30, as this is the fixed
-          point format required by the lms_adapt_bg() function,
-          therefore the scaled version of (1) is:
-
-          (2^30) * f  = (2^30) * Beta * clean_bg_rx/P
-              factor  = (2^30) * Beta * clean_bg_rx/P         ----- (2)
-
-          We have chosen Beta = 0.25 by experiment, so:
-
-              factor  = (2^30) * (2^-2) * clean_bg_rx/P
-
-                                       (30 - 2 - log2(P))
-              factor  = clean_bg_rx 2                         ----- (3)
-
-          To avoid a divide we approximate log2(P) as top_bit(P),
-          which returns the position of the highest non-zero bit in
-          P.  This approximation introduces an error as large as a
-          factor of 2, but the algorithm seems to handle it OK.
-
-          Come to think of it a divide may not be a big deal on a
-          modern DSP, so its probably worth checking out the cycles
-          for a divide versus a top_bit() implementation.
-       */
-
-       P = MIN_TX_POWER_FOR_ADAPTION + ec->Pstates;
-       logP = top_bit(P) + ec->log2taps;
-       shift = 30 - 2 - logP;
-       ec->shift = shift;
-
-       lms_adapt_bg(ec, clean_bg, shift);
-    }
-
-    /* very simple DTD to make sure we dont try and adapt with strong
-       near end speech */
-
-    ec->adapt = 0;
-    if ((ec->Lrx > MIN_RX_POWER_FOR_ADAPTION) && (ec->Lrx > ec->Ltx))
-       ec->nonupdate_dwell = DTD_HANGOVER;
-    if (ec->nonupdate_dwell)
-       ec->nonupdate_dwell--;
+       ec->Ltxacc += abs(tx) - ec->Ltx;
+       ec->Ltx = (ec->Ltxacc + (1 << 4)) >> 5;
+       ec->Lrxacc += abs(rx) - ec->Lrx;
+       ec->Lrx = (ec->Lrxacc + (1 << 4)) >> 5;
  
-    /* Transfer logic ------------------------------------------------------*/
+       /* Foreground filter --------------------------------------------------- */
  
-    /* These conditions are from the dual path paper [1], I messed with
-       them a bit to improve performance. */
+       ec->fir_state.coeffs = ec->fir_taps16[0];
+       echo_value = fir16(&ec->fir_state, tx);
+       ec->clean = rx - echo_value;
+       ec->Lcleanacc += abs(ec->clean) - ec->Lclean;
+       ec->Lclean = (ec->Lcleanacc + (1 << 4)) >> 5;
  
-    if ((ec->adaption_mode & ECHO_CAN_USE_ADAPTION) &&
-       (ec->nonupdate_dwell == 0) &&
-       (8*ec->Lclean_bg < 7*ec->Lclean) /* (ec->Lclean_bg < 0.875*ec->Lclean) */ &&
-       (8*ec->Lclean_bg < ec->Ltx)      /* (ec->Lclean_bg < 0.125*ec->Ltx)    */ )
-    {
-       if (ec->cond_met == 6) {
-           /* BG filter has had better results for 6 consecutive samples */
-           ec->adapt = 1;
-           memcpy(ec->fir_taps16[0], ec->fir_taps16[1], ec->taps*sizeof(int16_t));
-       }
-       else
-           ec->cond_met++;
-    }
-    else
-       ec->cond_met = 0;
+       /* Background filter --------------------------------------------------- */
  
-    /* Non-Linear Processing ---------------------------------------------------*/
+       echo_value = fir16(&ec->fir_state_bg, tx);
+       clean_bg = rx - echo_value;
+       ec->Lclean_bgacc += abs(clean_bg) - ec->Lclean_bg;
+       ec->Lclean_bg = (ec->Lclean_bgacc + (1 << 4)) >> 5;
  
-    ec->clean_nlp = ec->clean;
-    if (ec->adaption_mode & ECHO_CAN_USE_NLP)
-    {
-        /* Non-linear processor - a fancy way to say "zap small signals, to avoid
-           residual echo due to (uLaw/ALaw) non-linearity in the channel.". */
+       /* Background Filter adaption ----------------------------------------- */
  
-      if ((16*ec->Lclean < ec->Ltx))
-      {
-       /* Our e/c has improved echo by at least 24 dB (each factor of 2 is 6dB,
-          so 2*2*2*2=16 is the same as 6+6+6+6=24dB) */
-        if (ec->adaption_mode & ECHO_CAN_USE_CNG)
-       {
-           ec->cng_level = ec->Lbgn;
-
-           /* Very elementary comfort noise generation.  Just random
-              numbers rolled off very vaguely Hoth-like.  DR: This
-              noise doesn't sound quite right to me - I suspect there
-              are some overlfow issues in the filtering as it's too
-              "crackly".  TODO: debug this, maybe just play noise at
-              high level or look at spectrum.
-           */
-
-           ec->cng_rndnum = 1664525U*ec->cng_rndnum + 1013904223U;
-           ec->cng_filter = ((ec->cng_rndnum & 0xFFFF) - 32768 + 5*ec->cng_filter) >> 3;
-           ec->clean_nlp = (ec->cng_filter*ec->cng_level*8) >> 14;
-
-        }
-        else if (ec->adaption_mode & ECHO_CAN_USE_CLIP)
-       {
-           /* This sounds much better than CNG */
-           if (ec->clean_nlp > ec->Lbgn)
-             ec->clean_nlp = ec->Lbgn;
-           if (ec->clean_nlp < -ec->Lbgn)
-             ec->clean_nlp = -ec->Lbgn;
+       /* Almost always adap bg filter, just simple DT and energy
+          detection to minimise adaption in cases of strong double talk.
+          However this is not critical for the dual path algorithm.
+        */
+       ec->factor = 0;
+       ec->shift = 0;
+       if ((ec->nonupdate_dwell == 0)) {
+               int P, logP, shift;
+
+               /* Determine:
+
+                  f = Beta * clean_bg_rx/P ------ (1)
+
+                  where P is the total power in the filter states.
+
+                  The Boffins have shown that if we obey (1) we converge
+                  quickly and avoid instability.
+
+                  The correct factor f must be in Q30, as this is the fixed
+                  point format required by the lms_adapt_bg() function,
+                  therefore the scaled version of (1) is:
+
+                  (2^30) * f  = (2^30) * Beta * clean_bg_rx/P
+                  factor  = (2^30) * Beta * clean_bg_rx/P         ----- (2)
+
+                  We have chosen Beta = 0.25 by experiment, so:
+
+                  factor  = (2^30) * (2^-2) * clean_bg_rx/P
+
+                  (30 - 2 - log2(P))
+                  factor  = clean_bg_rx 2                         ----- (3)
+
+                  To avoid a divide we approximate log2(P) as top_bit(P),
+                  which returns the position of the highest non-zero bit in
+                  P.  This approximation introduces an error as large as a
+                  factor of 2, but the algorithm seems to handle it OK.
+
+                  Come to think of it a divide may not be a big deal on a
+                  modern DSP, so its probably worth checking out the cycles
+                  for a divide versus a top_bit() implementation.
+                */
+
+               P = MIN_TX_POWER_FOR_ADAPTION + ec->Pstates;
+               logP = top_bit(P) + ec->log2taps;
+               shift = 30 - 2 - logP;
+               ec->shift = shift;
+
+               lms_adapt_bg(ec, clean_bg, shift);
         }
-       else
-        {
-         /* just mute the residual, doesn't sound very good, used mainly
-            in G168 tests */
-          ec->clean_nlp = 0;
-        }
-      }
-      else {
-         /* Background noise estimator.  I tried a few algorithms
-            here without much luck.  This very simple one seems to
-            work best, we just average the level using a slow (1 sec
-            time const) filter if the current level is less than a
-            (experimentally derived) constant.  This means we dont
-            include high level signals like near end speech.  When
-            combined with CNG or especially CLIP seems to work OK.
-         */
-         if (ec->Lclean < 40) {
-             ec->Lbgn_acc += abs(ec->clean) - ec->Lbgn;
-             ec->Lbgn = (ec->Lbgn_acc + (1<<11)) >> 12;
-         }
-       }
-    }
-
-    /* Roll around the taps buffer */
-    if (ec->curr_pos <= 0)
-        ec->curr_pos = ec->taps;
-    ec->curr_pos--;
-
-    if (ec->adaption_mode & ECHO_CAN_DISABLE)
-      ec->clean_nlp = rx;
-
-    /* Output scaled back up again to match input scaling */
-
-    return (int16_t) ec->clean_nlp << 1;
+
+       /* very simple DTD to make sure we dont try and adapt with strong
+          near end speech */
+
+       ec->adapt = 0;
+       if ((ec->Lrx > MIN_RX_POWER_FOR_ADAPTION) && (ec->Lrx > ec->Ltx))
+               ec->nonupdate_dwell = DTD_HANGOVER;
+       if (ec->nonupdate_dwell)
+               ec->nonupdate_dwell--;
+
+       /* Transfer logic ------------------------------------------------------ */
+
+       /* These conditions are from the dual path paper [1], I messed with
+          them a bit to improve performance. */
+
+       if ((ec->adaption_mode & ECHO_CAN_USE_ADAPTION) &&
+           (ec->nonupdate_dwell == 0) &&
+           (8 * ec->Lclean_bg <
+            7 * ec->Lclean) /* (ec->Lclean_bg < 0.875*ec->Lclean) */ &&
+           (8 * ec->Lclean_bg <
+            ec->Ltx) /* (ec->Lclean_bg < 0.125*ec->Ltx)    */ ) {
+               if (ec->cond_met == 6) {
+                       /* BG filter has had better results for 6 consecutive samples */
+                       ec->adapt = 1;
+                       memcpy(ec->fir_taps16[0], ec->fir_taps16[1],
+                              ec->taps * sizeof(int16_t));
+               } else
+                       ec->cond_met++;
+       } else
+               ec->cond_met = 0;
+
+       /* Non-Linear Processing --------------------------------------------------- */
+
+       ec->clean_nlp = ec->clean;
+       if (ec->adaption_mode & ECHO_CAN_USE_NLP) {
+               /* Non-linear processor - a fancy way to say "zap small signals, to avoid
+                  residual echo due to (uLaw/ALaw) non-linearity in the channel.". */
+
+               if ((16 * ec->Lclean < ec->Ltx)) {
+                       /* Our e/c has improved echo by at least 24 dB (each factor of 2 is 6dB,
+                          so 2*2*2*2=16 is the same as 6+6+6+6=24dB) */
+                       if (ec->adaption_mode & ECHO_CAN_USE_CNG) {
+                               ec->cng_level = ec->Lbgn;
+
+                               /* Very elementary comfort noise generation.  Just random
+                                  numbers rolled off very vaguely Hoth-like.  DR: This
+                                  noise doesn't sound quite right to me - I suspect there
+                                  are some overlfow issues in the filtering as it's too
+                                  "crackly".  TODO: debug this, maybe just play noise at
+                                  high level or look at spectrum.
+                                */
+
+                               ec->cng_rndnum =
+                                   1664525U * ec->cng_rndnum + 1013904223U;
+                               ec->cng_filter =
+                                   ((ec->cng_rndnum & 0xFFFF) - 32768 +
+                                    5 * ec->cng_filter) >> 3;
+                               ec->clean_nlp =
+                                   (ec->cng_filter * ec->cng_level * 8) >> 14;
+
+                       } else if (ec->adaption_mode & ECHO_CAN_USE_CLIP) {
+                               /* This sounds much better than CNG */
+                               if (ec->clean_nlp > ec->Lbgn)
+                                       ec->clean_nlp = ec->Lbgn;
+                               if (ec->clean_nlp < -ec->Lbgn)
+                                       ec->clean_nlp = -ec->Lbgn;
+                       } else {
+                               /* just mute the residual, doesn't sound very good, used mainly
+                                  in G168 tests */
+                               ec->clean_nlp = 0;
+                       }
+               } else {
+                       /* Background noise estimator.  I tried a few algorithms
+                          here without much luck.  This very simple one seems to
+                          work best, we just average the level using a slow (1 sec
+                          time const) filter if the current level is less than a
+                          (experimentally derived) constant.  This means we dont
+                          include high level signals like near end speech.  When
+                          combined with CNG or especially CLIP seems to work OK.
+                        */
+                       if (ec->Lclean < 40) {
+                               ec->Lbgn_acc += abs(ec->clean) - ec->Lbgn;
+                               ec->Lbgn = (ec->Lbgn_acc + (1 << 11)) >> 12;
+                       }
+               }
+       }
+
+       /* Roll around the taps buffer */
+       if (ec->curr_pos <= 0)
+               ec->curr_pos = ec->taps;
+       ec->curr_pos--;
+
+       if (ec->adaption_mode & ECHO_CAN_DISABLE)
+               ec->clean_nlp = rx;
+
+       /* Output scaled back up again to match input scaling */
+
+       return (int16_t) ec->clean_nlp << 1;
  }
+
  EXPORT_SYMBOL_GPL(oslec_update);
-/*- End of function --------------------------------------------------------*/
  
  /* This function is seperated from the echo canceller is it is usually called
     as part of the tx process.  See rx HP (DC blocking) filter above, it's
@@ -613,28 +605,32 @@ EXPORT_SYMBOL_GPL(oslec_update);
     precision, which noise shapes things, giving very clean DC removal.
  */
  
-int16_t oslec_hpf_tx(struct oslec_state *ec, int16_t tx) {
-    int tmp, tmp1;
+int16_t oslec_hpf_tx(struct oslec_state * ec, int16_t tx)
+{
+       int tmp, tmp1;
  
-    if (ec->adaption_mode & ECHO_CAN_USE_TX_HPF) {
-        tmp = tx << 15;
+       if (ec->adaption_mode & ECHO_CAN_USE_TX_HPF) {
+               tmp = tx << 15;
  #if 1
-        /* Make sure the gain of the HPF is 1.0. The first can still saturate a little under
-           impulse conditions, and it might roll to 32768 and need clipping on sustained peak
-           level signals. However, the scale of such clipping is small, and the error due to
-           any saturation should not markedly affect the downstream processing. */
-        tmp -= (tmp >> 4);
+               /* Make sure the gain of the HPF is 1.0. The first can still saturate a little under
+                  impulse conditions, and it might roll to 32768 and need clipping on sustained peak
+                  level signals. However, the scale of such clipping is small, and the error due to
+                  any saturation should not markedly affect the downstream processing. */
+               tmp -= (tmp >> 4);
  #endif
-        ec->tx_1 += -(ec->tx_1>>DC_LOG2BETA) + tmp - ec->tx_2;
-        tmp1 = ec->tx_1 >> 15;
-       if (tmp1 > 32767) tmp1 = 32767;
-       if (tmp1 < -32767) tmp1 = -32767;
-       tx = tmp1;
-        ec->tx_2 = tmp;
-    }
-
-    return tx;
+               ec->tx_1 += -(ec->tx_1 >> DC_LOG2BETA) + tmp - ec->tx_2;
+               tmp1 = ec->tx_1 >> 15;
+               if (tmp1 > 32767)
+                       tmp1 = 32767;
+               if (tmp1 < -32767)
+                       tmp1 = -32767;
+               tx = tmp1;
+               ec->tx_2 = tmp;
+       }
+
+       return tx;
  }
+
  EXPORT_SYMBOL_GPL(oslec_hpf_tx);
  
  MODULE_LICENSE("GPL");