202 lines
		
	
	
		
			6.4 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
		
		
			
		
	
	
			202 lines
		
	
	
		
			6.4 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| 
								 | 
							
								/* Copyright (c) 2002-2008 Jean-Marc Valin
							 | 
						||
| 
								 | 
							
								   Copyright (c) 2007-2008 CSIRO
							 | 
						||
| 
								 | 
							
								   Copyright (c) 2007-2009 Xiph.Org Foundation
							 | 
						||
| 
								 | 
							
								   Written by Jean-Marc Valin */
							 | 
						||
| 
								 | 
							
								/**
							 | 
						||
| 
								 | 
							
								   @file mathops.h
							 | 
						||
| 
								 | 
							
								   @brief Various math functions
							 | 
						||
| 
								 | 
							
								*/
							 | 
						||
| 
								 | 
							
								/*
							 | 
						||
| 
								 | 
							
								   Redistribution and use in source and binary forms, with or without
							 | 
						||
| 
								 | 
							
								   modification, are permitted provided that the following conditions
							 | 
						||
| 
								 | 
							
								   are met:
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								   - Redistributions of source code must retain the above copyright
							 | 
						||
| 
								 | 
							
								   notice, this list of conditions and the following disclaimer.
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								   - Redistributions in binary form must reproduce the above copyright
							 | 
						||
| 
								 | 
							
								   notice, this list of conditions and the following disclaimer in the
							 | 
						||
| 
								 | 
							
								   documentation and/or other materials provided with the distribution.
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
							 | 
						||
| 
								 | 
							
								   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
							 | 
						||
| 
								 | 
							
								   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
							 | 
						||
| 
								 | 
							
								   A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
							 | 
						||
| 
								 | 
							
								   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
							 | 
						||
| 
								 | 
							
								   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
							 | 
						||
| 
								 | 
							
								   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
							 | 
						||
| 
								 | 
							
								   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
							 | 
						||
| 
								 | 
							
								   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
							 | 
						||
| 
								 | 
							
								   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
							 | 
						||
| 
								 | 
							
								   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
							 | 
						||
| 
								 | 
							
								*/
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								#ifdef HAVE_CONFIG_H
							 | 
						||
| 
								 | 
							
								#include "config.h"
							 | 
						||
| 
								 | 
							
								#endif
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								#include "mathops.h"
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								/*Compute floor(sqrt(_val)) with exact arithmetic.
							 | 
						||
| 
								 | 
							
								  This has been tested on all possible 32-bit inputs.*/
							 | 
						||
| 
								 | 
							
								unsigned isqrt32(celt_uint32 _val){
							 | 
						||
| 
								 | 
							
								  unsigned b;
							 | 
						||
| 
								 | 
							
								  unsigned g;
							 | 
						||
| 
								 | 
							
								  int      bshift;
							 | 
						||
| 
								 | 
							
								  /*Uses the second method from
							 | 
						||
| 
								 | 
							
								     http://www.azillionmonkeys.com/qed/sqroot.html
							 | 
						||
| 
								 | 
							
								    The main idea is to search for the largest binary digit b such that
							 | 
						||
| 
								 | 
							
								     (g+b)*(g+b) <= _val, and add it to the solution g.*/
							 | 
						||
| 
								 | 
							
								  g=0;
							 | 
						||
| 
								 | 
							
								  bshift=EC_ILOG(_val)-1>>1;
							 | 
						||
| 
								 | 
							
								  b=1U<<bshift;
							 | 
						||
| 
								 | 
							
								  do{
							 | 
						||
| 
								 | 
							
								    celt_uint32 t;
							 | 
						||
| 
								 | 
							
								    t=((celt_uint32)g<<1)+b<<bshift;
							 | 
						||
| 
								 | 
							
								    if(t<=_val){
							 | 
						||
| 
								 | 
							
								      g+=b;
							 | 
						||
| 
								 | 
							
								      _val-=t;
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								    b>>=1;
							 | 
						||
| 
								 | 
							
								    bshift--;
							 | 
						||
| 
								 | 
							
								  }
							 | 
						||
| 
								 | 
							
								  while(bshift>=0);
							 | 
						||
| 
								 | 
							
								  return g;
							 | 
						||
| 
								 | 
							
								}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								#ifdef FIXED_POINT
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								celt_word32 frac_div32(celt_word32 a, celt_word32 b)
							 | 
						||
| 
								 | 
							
								{
							 | 
						||
| 
								 | 
							
								   celt_word16 rcp;
							 | 
						||
| 
								 | 
							
								   celt_word32 result, rem;
							 | 
						||
| 
								 | 
							
								   int shift = celt_ilog2(b)-29;
							 | 
						||
| 
								 | 
							
								   a = VSHR32(a,shift);
							 | 
						||
| 
								 | 
							
								   b = VSHR32(b,shift);
							 | 
						||
| 
								 | 
							
								   /* 16-bit reciprocal */
							 | 
						||
| 
								 | 
							
								   rcp = ROUND16(celt_rcp(ROUND16(b,16)),3);
							 | 
						||
| 
								 | 
							
								   result = SHL32(MULT16_32_Q15(rcp, a),2);
							 | 
						||
| 
								 | 
							
								   rem = a-MULT32_32_Q31(result, b);
							 | 
						||
| 
								 | 
							
								   result += SHL32(MULT16_32_Q15(rcp, rem),2);
							 | 
						||
| 
								 | 
							
								   return result;
							 | 
						||
| 
								 | 
							
								}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								/** Reciprocal sqrt approximation in the range [0.25,1) (Q16 in, Q14 out) */
							 | 
						||
| 
								 | 
							
								celt_word16 celt_rsqrt_norm(celt_word32 x)
							 | 
						||
| 
								 | 
							
								{
							 | 
						||
| 
								 | 
							
								   celt_word16 n;
							 | 
						||
| 
								 | 
							
								   celt_word16 r;
							 | 
						||
| 
								 | 
							
								   celt_word16 r2;
							 | 
						||
| 
								 | 
							
								   celt_word16 y;
							 | 
						||
| 
								 | 
							
								   /* Range of n is [-16384,32767] ([-0.5,1) in Q15). */
							 | 
						||
| 
								 | 
							
								   n = x-32768;
							 | 
						||
| 
								 | 
							
								   /* Get a rough initial guess for the root.
							 | 
						||
| 
								 | 
							
								      The optimal minimax quadratic approximation (using relative error) is
							 | 
						||
| 
								 | 
							
								       r = 1.437799046117536+n*(-0.823394375837328+n*0.4096419668459485).
							 | 
						||
| 
								 | 
							
								      Coefficients here, and the final result r, are Q14.*/
							 | 
						||
| 
								 | 
							
								   r = ADD16(23557, MULT16_16_Q15(n, ADD16(-13490, MULT16_16_Q15(n, 6713))));
							 | 
						||
| 
								 | 
							
								   /* We want y = x*r*r-1 in Q15, but x is 32-bit Q16 and r is Q14.
							 | 
						||
| 
								 | 
							
								      We can compute the result from n and r using Q15 multiplies with some
							 | 
						||
| 
								 | 
							
								       adjustment, carefully done to avoid overflow.
							 | 
						||
| 
								 | 
							
								      Range of y is [-1564,1594]. */
							 | 
						||
| 
								 | 
							
								   r2 = MULT16_16_Q15(r, r);
							 | 
						||
| 
								 | 
							
								   y = SHL16(SUB16(ADD16(MULT16_16_Q15(r2, n), r2), 16384), 1);
							 | 
						||
| 
								 | 
							
								   /* Apply a 2nd-order Householder iteration: r += r*y*(y*0.375-0.5).
							 | 
						||
| 
								 | 
							
								      This yields the Q14 reciprocal square root of the Q16 x, with a maximum
							 | 
						||
| 
								 | 
							
								       relative error of 1.04956E-4, a (relative) RMSE of 2.80979E-5, and a
							 | 
						||
| 
								 | 
							
								       peak absolute error of 2.26591/16384. */
							 | 
						||
| 
								 | 
							
								   return ADD16(r, MULT16_16_Q15(r, MULT16_16_Q15(y,
							 | 
						||
| 
								 | 
							
								              SUB16(MULT16_16_Q15(y, 12288), 16384))));
							 | 
						||
| 
								 | 
							
								}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								/** Sqrt approximation (QX input, QX/2 output) */
							 | 
						||
| 
								 | 
							
								celt_word32 celt_sqrt(celt_word32 x)
							 | 
						||
| 
								 | 
							
								{
							 | 
						||
| 
								 | 
							
								   int k;
							 | 
						||
| 
								 | 
							
								   celt_word16 n;
							 | 
						||
| 
								 | 
							
								   celt_word32 rt;
							 | 
						||
| 
								 | 
							
								   static const celt_word16 C[5] = {23175, 11561, -3011, 1699, -664};
							 | 
						||
| 
								 | 
							
								   if (x==0)
							 | 
						||
| 
								 | 
							
								      return 0;
							 | 
						||
| 
								 | 
							
								   k = (celt_ilog2(x)>>1)-7;
							 | 
						||
| 
								 | 
							
								   x = VSHR32(x, (k<<1));
							 | 
						||
| 
								 | 
							
								   n = x-32768;
							 | 
						||
| 
								 | 
							
								   rt = ADD16(C[0], MULT16_16_Q15(n, ADD16(C[1], MULT16_16_Q15(n, ADD16(C[2],
							 | 
						||
| 
								 | 
							
								              MULT16_16_Q15(n, ADD16(C[3], MULT16_16_Q15(n, (C[4])))))))));
							 | 
						||
| 
								 | 
							
								   rt = VSHR32(rt,7-k);
							 | 
						||
| 
								 | 
							
								   return rt;
							 | 
						||
| 
								 | 
							
								}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								#define L1 32767
							 | 
						||
| 
								 | 
							
								#define L2 -7651
							 | 
						||
| 
								 | 
							
								#define L3 8277
							 | 
						||
| 
								 | 
							
								#define L4 -626
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								static inline celt_word16 _celt_cos_pi_2(celt_word16 x)
							 | 
						||
| 
								 | 
							
								{
							 | 
						||
| 
								 | 
							
								   celt_word16 x2;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								   x2 = MULT16_16_P15(x,x);
							 | 
						||
| 
								 | 
							
								   return ADD16(1,MIN16(32766,ADD32(SUB16(L1,x2), MULT16_16_P15(x2, ADD32(L2, MULT16_16_P15(x2, ADD32(L3, MULT16_16_P15(L4, x2
							 | 
						||
| 
								 | 
							
								                                                                                ))))))));
							 | 
						||
| 
								 | 
							
								}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								#undef L1
							 | 
						||
| 
								 | 
							
								#undef L2
							 | 
						||
| 
								 | 
							
								#undef L3
							 | 
						||
| 
								 | 
							
								#undef L4
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								celt_word16 celt_cos_norm(celt_word32 x)
							 | 
						||
| 
								 | 
							
								{
							 | 
						||
| 
								 | 
							
								   x = x&0x0001ffff;
							 | 
						||
| 
								 | 
							
								   if (x>SHL32(EXTEND32(1), 16))
							 | 
						||
| 
								 | 
							
								      x = SUB32(SHL32(EXTEND32(1), 17),x);
							 | 
						||
| 
								 | 
							
								   if (x&0x00007fff)
							 | 
						||
| 
								 | 
							
								   {
							 | 
						||
| 
								 | 
							
								      if (x<SHL32(EXTEND32(1), 15))
							 | 
						||
| 
								 | 
							
								      {
							 | 
						||
| 
								 | 
							
								         return _celt_cos_pi_2(EXTRACT16(x));
							 | 
						||
| 
								 | 
							
								      } else {
							 | 
						||
| 
								 | 
							
								         return NEG32(_celt_cos_pi_2(EXTRACT16(65536-x)));
							 | 
						||
| 
								 | 
							
								      }
							 | 
						||
| 
								 | 
							
								   } else {
							 | 
						||
| 
								 | 
							
								      if (x&0x0000ffff)
							 | 
						||
| 
								 | 
							
								         return 0;
							 | 
						||
| 
								 | 
							
								      else if (x&0x0001ffff)
							 | 
						||
| 
								 | 
							
								         return -32767;
							 | 
						||
| 
								 | 
							
								      else
							 | 
						||
| 
								 | 
							
								         return 32767;
							 | 
						||
| 
								 | 
							
								   }
							 | 
						||
| 
								 | 
							
								}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								/** Reciprocal approximation (Q15 input, Q16 output) */
							 | 
						||
| 
								 | 
							
								celt_word32 celt_rcp(celt_word32 x)
							 | 
						||
| 
								 | 
							
								{
							 | 
						||
| 
								 | 
							
								   int i;
							 | 
						||
| 
								 | 
							
								   celt_word16 n;
							 | 
						||
| 
								 | 
							
								   celt_word16 r;
							 | 
						||
| 
								 | 
							
								   celt_assert2(x>0, "celt_rcp() only defined for positive values");
							 | 
						||
| 
								 | 
							
								   i = celt_ilog2(x);
							 | 
						||
| 
								 | 
							
								   /* n is Q15 with range [0,1). */
							 | 
						||
| 
								 | 
							
								   n = VSHR32(x,i-15)-32768;
							 | 
						||
| 
								 | 
							
								   /* Start with a linear approximation:
							 | 
						||
| 
								 | 
							
								      r = 1.8823529411764706-0.9411764705882353*n.
							 | 
						||
| 
								 | 
							
								      The coefficients and the result are Q14 in the range [15420,30840].*/
							 | 
						||
| 
								 | 
							
								   r = ADD16(30840, MULT16_16_Q15(-15420, n));
							 | 
						||
| 
								 | 
							
								   /* Perform two Newton iterations:
							 | 
						||
| 
								 | 
							
								      r -= r*((r*n)-1.Q15)
							 | 
						||
| 
								 | 
							
								         = r*((r*n)+(r-1.Q15)). */
							 | 
						||
| 
								 | 
							
								   r = SUB16(r, MULT16_16_Q15(r,
							 | 
						||
| 
								 | 
							
								             ADD16(MULT16_16_Q15(r, n), ADD16(r, -32768))));
							 | 
						||
| 
								 | 
							
								   /* We subtract an extra 1 in the second iteration to avoid overflow; it also
							 | 
						||
| 
								 | 
							
								       neatly compensates for truncation error in the rest of the process. */
							 | 
						||
| 
								 | 
							
								   r = SUB16(r, ADD16(1, MULT16_16_Q15(r,
							 | 
						||
| 
								 | 
							
								             ADD16(MULT16_16_Q15(r, n), ADD16(r, -32768)))));
							 | 
						||
| 
								 | 
							
								   /* r is now the Q15 solution to 2/(n+1), with a maximum relative error
							 | 
						||
| 
								 | 
							
								       of 7.05346E-5, a (relative) RMSE of 2.14418E-5, and a peak absolute
							 | 
						||
| 
								 | 
							
								       error of 1.24665/32768. */
							 | 
						||
| 
								 | 
							
								   return VSHR32(EXTEND32(r),i-16);
							 | 
						||
| 
								 | 
							
								}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								#endif
							 |