Keyboard firmwares for Atmel AVR and Cortex-M
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

arm_fir_q31.c 11KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365
  1. /* ----------------------------------------------------------------------
  2. * Copyright (C) 2010-2013 ARM Limited. All rights reserved.
  3. *
  4. * $Date: 17. January 2013
  5. * $Revision: V1.4.1
  6. *
  7. * Project: CMSIS DSP Library
  8. * Title: arm_fir_q31.c
  9. *
  10. * Description: Q31 FIR filter processing function.
  11. *
  12. * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
  13. *
  14. * Redistribution and use in source and binary forms, with or without
  15. * modification, are permitted provided that the following conditions
  16. * are met:
  17. * - Redistributions of source code must retain the above copyright
  18. * notice, this list of conditions and the following disclaimer.
  19. * - Redistributions in binary form must reproduce the above copyright
  20. * notice, this list of conditions and the following disclaimer in
  21. * the documentation and/or other materials provided with the
  22. * distribution.
  23. * - Neither the name of ARM LIMITED nor the names of its contributors
  24. * may be used to endorse or promote products derived from this
  25. * software without specific prior written permission.
  26. *
  27. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  28. * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  29. * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
  30. * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
  31. * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  32. * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
  33. * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  34. * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  35. * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  36. * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
  37. * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  38. * POSSIBILITY OF SUCH DAMAGE.
  39. * -------------------------------------------------------------------- */
  40. #include "arm_math.h"
  41. /**
  42. * @ingroup groupFilters
  43. */
  44. /**
  45. * @addtogroup FIR
  46. * @{
  47. */
  48. /**
  49. * @param[in] *S points to an instance of the Q31 FIR filter structure.
  50. * @param[in] *pSrc points to the block of input data.
  51. * @param[out] *pDst points to the block of output data.
  52. * @param[in] blockSize number of samples to process per call.
  53. * @return none.
  54. *
  55. * @details
  56. * <b>Scaling and Overflow Behavior:</b>
  57. * \par
  58. * The function is implemented using an internal 64-bit accumulator.
  59. * The accumulator has a 2.62 format and maintains full precision of the intermediate multiplication results but provides only a single guard bit.
  60. * Thus, if the accumulator result overflows it wraps around rather than clip.
  61. * In order to avoid overflows completely the input signal must be scaled down by log2(numTaps) bits.
  62. * After all multiply-accumulates are performed, the 2.62 accumulator is right shifted by 31 bits and saturated to 1.31 format to yield the final result.
  63. *
  64. * \par
  65. * Refer to the function <code>arm_fir_fast_q31()</code> for a faster but less precise implementation of this filter for Cortex-M3 and Cortex-M4.
  66. */
  67. void arm_fir_q31(
  68. const arm_fir_instance_q31 * S,
  69. q31_t * pSrc,
  70. q31_t * pDst,
  71. uint32_t blockSize)
  72. {
  73. q31_t *pState = S->pState; /* State pointer */
  74. q31_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */
  75. q31_t *pStateCurnt; /* Points to the current sample of the state */
  76. #ifndef ARM_MATH_CM0_FAMILY
  77. /* Run the below code for Cortex-M4 and Cortex-M3 */
  78. q31_t x0, x1, x2; /* Temporary variables to hold state */
  79. q31_t c0; /* Temporary variable to hold coefficient value */
  80. q31_t *px; /* Temporary pointer for state */
  81. q31_t *pb; /* Temporary pointer for coefficient buffer */
  82. q63_t acc0, acc1, acc2; /* Accumulators */
  83. uint32_t numTaps = S->numTaps; /* Number of filter coefficients in the filter */
  84. uint32_t i, tapCnt, blkCnt, tapCntN3; /* Loop counters */
  85. /* S->pState points to state array which contains previous frame (numTaps - 1) samples */
  86. /* pStateCurnt points to the location where the new input data should be written */
  87. pStateCurnt = &(S->pState[(numTaps - 1u)]);
  88. /* Apply loop unrolling and compute 4 output values simultaneously.
  89. * The variables acc0 ... acc3 hold output values that are being computed:
  90. *
  91. * acc0 = b[numTaps-1] * x[n-numTaps-1] + b[numTaps-2] * x[n-numTaps-2] + b[numTaps-3] * x[n-numTaps-3] +...+ b[0] * x[0]
  92. * acc1 = b[numTaps-1] * x[n-numTaps] + b[numTaps-2] * x[n-numTaps-1] + b[numTaps-3] * x[n-numTaps-2] +...+ b[0] * x[1]
  93. * acc2 = b[numTaps-1] * x[n-numTaps+1] + b[numTaps-2] * x[n-numTaps] + b[numTaps-3] * x[n-numTaps-1] +...+ b[0] * x[2]
  94. * acc3 = b[numTaps-1] * x[n-numTaps+2] + b[numTaps-2] * x[n-numTaps+1] + b[numTaps-3] * x[n-numTaps] +...+ b[0] * x[3]
  95. */
  96. blkCnt = blockSize / 3;
  97. blockSize = blockSize - (3 * blkCnt);
  98. tapCnt = numTaps / 3;
  99. tapCntN3 = numTaps - (3 * tapCnt);
  100. /* First part of the processing with loop unrolling. Compute 4 outputs at a time.
  101. ** a second loop below computes the remaining 1 to 3 samples. */
  102. while(blkCnt > 0u)
  103. {
  104. /* Copy three new input samples into the state buffer */
  105. *pStateCurnt++ = *pSrc++;
  106. *pStateCurnt++ = *pSrc++;
  107. *pStateCurnt++ = *pSrc++;
  108. /* Set all accumulators to zero */
  109. acc0 = 0;
  110. acc1 = 0;
  111. acc2 = 0;
  112. /* Initialize state pointer */
  113. px = pState;
  114. /* Initialize coefficient pointer */
  115. pb = pCoeffs;
  116. /* Read the first two samples from the state buffer:
  117. * x[n-numTaps], x[n-numTaps-1] */
  118. x0 = *(px++);
  119. x1 = *(px++);
  120. /* Loop unrolling. Process 3 taps at a time. */
  121. i = tapCnt;
  122. while(i > 0u)
  123. {
  124. /* Read the b[numTaps] coefficient */
  125. c0 = *pb;
  126. /* Read x[n-numTaps-2] sample */
  127. x2 = *(px++);
  128. /* Perform the multiply-accumulates */
  129. acc0 += ((q63_t) x0 * c0);
  130. acc1 += ((q63_t) x1 * c0);
  131. acc2 += ((q63_t) x2 * c0);
  132. /* Read the coefficient and state */
  133. c0 = *(pb + 1u);
  134. x0 = *(px++);
  135. /* Perform the multiply-accumulates */
  136. acc0 += ((q63_t) x1 * c0);
  137. acc1 += ((q63_t) x2 * c0);
  138. acc2 += ((q63_t) x0 * c0);
  139. /* Read the coefficient and state */
  140. c0 = *(pb + 2u);
  141. x1 = *(px++);
  142. /* update coefficient pointer */
  143. pb += 3u;
  144. /* Perform the multiply-accumulates */
  145. acc0 += ((q63_t) x2 * c0);
  146. acc1 += ((q63_t) x0 * c0);
  147. acc2 += ((q63_t) x1 * c0);
  148. /* Decrement the loop counter */
  149. i--;
  150. }
  151. /* If the filter length is not a multiple of 3, compute the remaining filter taps */
  152. i = tapCntN3;
  153. while(i > 0u)
  154. {
  155. /* Read coefficients */
  156. c0 = *(pb++);
  157. /* Fetch 1 state variable */
  158. x2 = *(px++);
  159. /* Perform the multiply-accumulates */
  160. acc0 += ((q63_t) x0 * c0);
  161. acc1 += ((q63_t) x1 * c0);
  162. acc2 += ((q63_t) x2 * c0);
  163. /* Reuse the present sample states for next sample */
  164. x0 = x1;
  165. x1 = x2;
  166. /* Decrement the loop counter */
  167. i--;
  168. }
  169. /* Advance the state pointer by 3 to process the next group of 3 samples */
  170. pState = pState + 3;
  171. /* The results in the 3 accumulators are in 2.30 format. Convert to 1.31
  172. ** Then store the 3 outputs in the destination buffer. */
  173. *pDst++ = (q31_t) (acc0 >> 31u);
  174. *pDst++ = (q31_t) (acc1 >> 31u);
  175. *pDst++ = (q31_t) (acc2 >> 31u);
  176. /* Decrement the samples loop counter */
  177. blkCnt--;
  178. }
  179. /* If the blockSize is not a multiple of 3, compute any remaining output samples here.
  180. ** No loop unrolling is used. */
  181. while(blockSize > 0u)
  182. {
  183. /* Copy one sample at a time into state buffer */
  184. *pStateCurnt++ = *pSrc++;
  185. /* Set the accumulator to zero */
  186. acc0 = 0;
  187. /* Initialize state pointer */
  188. px = pState;
  189. /* Initialize Coefficient pointer */
  190. pb = (pCoeffs);
  191. i = numTaps;
  192. /* Perform the multiply-accumulates */
  193. do
  194. {
  195. acc0 += (q63_t) * (px++) * (*(pb++));
  196. i--;
  197. } while(i > 0u);
  198. /* The result is in 2.62 format. Convert to 1.31
  199. ** Then store the output in the destination buffer. */
  200. *pDst++ = (q31_t) (acc0 >> 31u);
  201. /* Advance state pointer by 1 for the next sample */
  202. pState = pState + 1;
  203. /* Decrement the samples loop counter */
  204. blockSize--;
  205. }
  206. /* Processing is complete.
  207. ** Now copy the last numTaps - 1 samples to the satrt of the state buffer.
  208. ** This prepares the state buffer for the next function call. */
  209. /* Points to the start of the state buffer */
  210. pStateCurnt = S->pState;
  211. tapCnt = (numTaps - 1u) >> 2u;
  212. /* copy data */
  213. while(tapCnt > 0u)
  214. {
  215. *pStateCurnt++ = *pState++;
  216. *pStateCurnt++ = *pState++;
  217. *pStateCurnt++ = *pState++;
  218. *pStateCurnt++ = *pState++;
  219. /* Decrement the loop counter */
  220. tapCnt--;
  221. }
  222. /* Calculate remaining number of copies */
  223. tapCnt = (numTaps - 1u) % 0x4u;
  224. /* Copy the remaining q31_t data */
  225. while(tapCnt > 0u)
  226. {
  227. *pStateCurnt++ = *pState++;
  228. /* Decrement the loop counter */
  229. tapCnt--;
  230. }
  231. #else
  232. /* Run the below code for Cortex-M0 */
  233. q31_t *px; /* Temporary pointer for state */
  234. q31_t *pb; /* Temporary pointer for coefficient buffer */
  235. q63_t acc; /* Accumulator */
  236. uint32_t numTaps = S->numTaps; /* Length of the filter */
  237. uint32_t i, tapCnt, blkCnt; /* Loop counters */
  238. /* S->pState buffer contains previous frame (numTaps - 1) samples */
  239. /* pStateCurnt points to the location where the new input data should be written */
  240. pStateCurnt = &(S->pState[(numTaps - 1u)]);
  241. /* Initialize blkCnt with blockSize */
  242. blkCnt = blockSize;
  243. while(blkCnt > 0u)
  244. {
  245. /* Copy one sample at a time into state buffer */
  246. *pStateCurnt++ = *pSrc++;
  247. /* Set the accumulator to zero */
  248. acc = 0;
  249. /* Initialize state pointer */
  250. px = pState;
  251. /* Initialize Coefficient pointer */
  252. pb = pCoeffs;
  253. i = numTaps;
  254. /* Perform the multiply-accumulates */
  255. do
  256. {
  257. /* acc = b[numTaps-1] * x[n-numTaps-1] + b[numTaps-2] * x[n-numTaps-2] + b[numTaps-3] * x[n-numTaps-3] +...+ b[0] * x[0] */
  258. acc += (q63_t) * px++ * *pb++;
  259. i--;
  260. } while(i > 0u);
  261. /* The result is in 2.62 format. Convert to 1.31
  262. ** Then store the output in the destination buffer. */
  263. *pDst++ = (q31_t) (acc >> 31u);
  264. /* Advance state pointer by 1 for the next sample */
  265. pState = pState + 1;
  266. /* Decrement the samples loop counter */
  267. blkCnt--;
  268. }
  269. /* Processing is complete.
  270. ** Now copy the last numTaps - 1 samples to the starting of the state buffer.
  271. ** This prepares the state buffer for the next function call. */
  272. /* Points to the start of the state buffer */
  273. pStateCurnt = S->pState;
  274. /* Copy numTaps number of values */
  275. tapCnt = numTaps - 1u;
  276. /* Copy the data */
  277. while(tapCnt > 0u)
  278. {
  279. *pStateCurnt++ = *pState++;
  280. /* Decrement the loop counter */
  281. tapCnt--;
  282. }
  283. #endif /* #ifndef ARM_MATH_CM0_FAMILY */
  284. }
  285. /**
  286. * @} end of FIR group
  287. */