Keyboard firmwares for Atmel AVR and Cortex-M
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

arm_fir_decimate_f32.c 16KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524
  1. /* ----------------------------------------------------------------------
  2. * Copyright (C) 2010-2013 ARM Limited. All rights reserved.
  3. *
  4. * $Date: 17. January 2013
  5. * $Revision: V1.4.1
  6. *
  7. * Project: CMSIS DSP Library
  8. * Title: arm_fir_decimate_f32.c
  9. *
  10. * Description: FIR decimation for floating-point sequences.
  11. *
  12. * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
  13. *
  14. * Redistribution and use in source and binary forms, with or without
  15. * modification, are permitted provided that the following conditions
  16. * are met:
  17. * - Redistributions of source code must retain the above copyright
  18. * notice, this list of conditions and the following disclaimer.
  19. * - Redistributions in binary form must reproduce the above copyright
  20. * notice, this list of conditions and the following disclaimer in
  21. * the documentation and/or other materials provided with the
  22. * distribution.
  23. * - Neither the name of ARM LIMITED nor the names of its contributors
  24. * may be used to endorse or promote products derived from this
  25. * software without specific prior written permission.
  26. *
  27. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  28. * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  29. * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
  30. * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
  31. * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  32. * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
  33. * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  34. * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  35. * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  36. * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
  37. * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  38. * POSSIBILITY OF SUCH DAMAGE.
  39. * -------------------------------------------------------------------- */
  40. #include "arm_math.h"
  41. /**
  42. * @ingroup groupFilters
  43. */
  44. /**
  45. * @defgroup FIR_decimate Finite Impulse Response (FIR) Decimator
  46. *
  47. * These functions combine an FIR filter together with a decimator.
  48. * They are used in multirate systems for reducing the sample rate of a signal without introducing aliasing distortion.
  49. * Conceptually, the functions are equivalent to the block diagram below:
  50. * \image html FIRDecimator.gif "Components included in the FIR Decimator functions"
  51. * When decimating by a factor of <code>M</code>, the signal should be prefiltered by a lowpass filter with a normalized
  52. * cutoff frequency of <code>1/M</code> in order to prevent aliasing distortion.
  53. * The user of the function is responsible for providing the filter coefficients.
  54. *
  55. * The FIR decimator functions provided in the CMSIS DSP Library combine the FIR filter and the decimator in an efficient manner.
  56. * Instead of calculating all of the FIR filter outputs and discarding <code>M-1</code> out of every <code>M</code>, only the
  57. * samples output by the decimator are computed.
  58. * The functions operate on blocks of input and output data.
  59. * <code>pSrc</code> points to an array of <code>blockSize</code> input values and
  60. * <code>pDst</code> points to an array of <code>blockSize/M</code> output values.
  61. * In order to have an integer number of output samples <code>blockSize</code>
  62. * must always be a multiple of the decimation factor <code>M</code>.
  63. *
  64. * The library provides separate functions for Q15, Q31 and floating-point data types.
  65. *
  66. * \par Algorithm:
  67. * The FIR portion of the algorithm uses the standard form filter:
  68. * <pre>
  69. * y[n] = b[0] * x[n] + b[1] * x[n-1] + b[2] * x[n-2] + ...+ b[numTaps-1] * x[n-numTaps+1]
  70. * </pre>
  71. * where, <code>b[n]</code> are the filter coefficients.
  72. * \par
  73. * The <code>pCoeffs</code> points to a coefficient array of size <code>numTaps</code>.
  74. * Coefficients are stored in time reversed order.
  75. * \par
  76. * <pre>
  77. * {b[numTaps-1], b[numTaps-2], b[N-2], ..., b[1], b[0]}
  78. * </pre>
  79. * \par
  80. * <code>pState</code> points to a state array of size <code>numTaps + blockSize - 1</code>.
  81. * Samples in the state buffer are stored in the order:
  82. * \par
  83. * <pre>
  84. * {x[n-numTaps+1], x[n-numTaps], x[n-numTaps-1], x[n-numTaps-2]....x[0], x[1], ..., x[blockSize-1]}
  85. * </pre>
  86. * The state variables are updated after each block of data is processed, the coefficients are untouched.
  87. *
  88. * \par Instance Structure
  89. * The coefficients and state variables for a filter are stored together in an instance data structure.
  90. * A separate instance structure must be defined for each filter.
  91. * Coefficient arrays may be shared among several instances while state variable array should be allocated separately.
  92. * There are separate instance structure declarations for each of the 3 supported data types.
  93. *
  94. * \par Initialization Functions
  95. * There is also an associated initialization function for each data type.
  96. * The initialization function performs the following operations:
  97. * - Sets the values of the internal structure fields.
  98. * - Zeros out the values in the state buffer.
  99. * - Checks to make sure that the size of the input is a multiple of the decimation factor.
  100. * To do this manually without calling the init function, assign the follow subfields of the instance structure:
  101. * numTaps, pCoeffs, M (decimation factor), pState. Also set all of the values in pState to zero.
  102. *
  103. * \par
  104. * Use of the initialization function is optional.
  105. * However, if the initialization function is used, then the instance structure cannot be placed into a const data section.
  106. * To place an instance structure into a const data section, the instance structure must be manually initialized.
  107. * The code below statically initializes each of the 3 different data type filter instance structures
  108. * <pre>
  109. *arm_fir_decimate_instance_f32 S = {M, numTaps, pCoeffs, pState};
  110. *arm_fir_decimate_instance_q31 S = {M, numTaps, pCoeffs, pState};
  111. *arm_fir_decimate_instance_q15 S = {M, numTaps, pCoeffs, pState};
  112. * </pre>
  113. * where <code>M</code> is the decimation factor; <code>numTaps</code> is the number of filter coefficients in the filter;
  114. * <code>pCoeffs</code> is the address of the coefficient buffer;
  115. * <code>pState</code> is the address of the state buffer.
  116. * Be sure to set the values in the state buffer to zeros when doing static initialization.
  117. *
  118. * \par Fixed-Point Behavior
  119. * Care must be taken when using the fixed-point versions of the FIR decimate filter functions.
  120. * In particular, the overflow and saturation behavior of the accumulator used in each function must be considered.
  121. * Refer to the function specific documentation below for usage guidelines.
  122. */
  123. /**
  124. * @addtogroup FIR_decimate
  125. * @{
  126. */
  127. /**
  128. * @brief Processing function for the floating-point FIR decimator.
  129. * @param[in] *S points to an instance of the floating-point FIR decimator structure.
  130. * @param[in] *pSrc points to the block of input data.
  131. * @param[out] *pDst points to the block of output data.
  132. * @param[in] blockSize number of input samples to process per call.
  133. * @return none.
  134. */
  135. void arm_fir_decimate_f32(
  136. const arm_fir_decimate_instance_f32 * S,
  137. float32_t * pSrc,
  138. float32_t * pDst,
  139. uint32_t blockSize)
  140. {
  141. float32_t *pState = S->pState; /* State pointer */
  142. float32_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */
  143. float32_t *pStateCurnt; /* Points to the current sample of the state */
  144. float32_t *px, *pb; /* Temporary pointers for state and coefficient buffers */
  145. float32_t sum0; /* Accumulator */
  146. float32_t x0, c0; /* Temporary variables to hold state and coefficient values */
  147. uint32_t numTaps = S->numTaps; /* Number of filter coefficients in the filter */
  148. uint32_t i, tapCnt, blkCnt, outBlockSize = blockSize / S->M; /* Loop counters */
  149. #ifndef ARM_MATH_CM0_FAMILY
  150. uint32_t blkCntN4;
  151. float32_t *px0, *px1, *px2, *px3;
  152. float32_t acc0, acc1, acc2, acc3;
  153. float32_t x1, x2, x3;
  154. /* Run the below code for Cortex-M4 and Cortex-M3 */
  155. /* S->pState buffer contains previous frame (numTaps - 1) samples */
  156. /* pStateCurnt points to the location where the new input data should be written */
  157. pStateCurnt = S->pState + (numTaps - 1u);
  158. /* Total number of output samples to be computed */
  159. blkCnt = outBlockSize / 4;
  160. blkCntN4 = outBlockSize - (4 * blkCnt);
  161. while(blkCnt > 0u)
  162. {
  163. /* Copy 4 * decimation factor number of new input samples into the state buffer */
  164. i = 4 * S->M;
  165. do
  166. {
  167. *pStateCurnt++ = *pSrc++;
  168. } while(--i);
  169. /* Set accumulators to zero */
  170. acc0 = 0.0f;
  171. acc1 = 0.0f;
  172. acc2 = 0.0f;
  173. acc3 = 0.0f;
  174. /* Initialize state pointer for all the samples */
  175. px0 = pState;
  176. px1 = pState + S->M;
  177. px2 = pState + 2 * S->M;
  178. px3 = pState + 3 * S->M;
  179. /* Initialize coeff pointer */
  180. pb = pCoeffs;
  181. /* Loop unrolling. Process 4 taps at a time. */
  182. tapCnt = numTaps >> 2;
  183. /* Loop over the number of taps. Unroll by a factor of 4.
  184. ** Repeat until we've computed numTaps-4 coefficients. */
  185. while(tapCnt > 0u)
  186. {
  187. /* Read the b[numTaps-1] coefficient */
  188. c0 = *(pb++);
  189. /* Read x[n-numTaps-1] sample for acc0 */
  190. x0 = *(px0++);
  191. /* Read x[n-numTaps-1] sample for acc1 */
  192. x1 = *(px1++);
  193. /* Read x[n-numTaps-1] sample for acc2 */
  194. x2 = *(px2++);
  195. /* Read x[n-numTaps-1] sample for acc3 */
  196. x3 = *(px3++);
  197. /* Perform the multiply-accumulate */
  198. acc0 += x0 * c0;
  199. acc1 += x1 * c0;
  200. acc2 += x2 * c0;
  201. acc3 += x3 * c0;
  202. /* Read the b[numTaps-2] coefficient */
  203. c0 = *(pb++);
  204. /* Read x[n-numTaps-2] sample for acc0, acc1, acc2, acc3 */
  205. x0 = *(px0++);
  206. x1 = *(px1++);
  207. x2 = *(px2++);
  208. x3 = *(px3++);
  209. /* Perform the multiply-accumulate */
  210. acc0 += x0 * c0;
  211. acc1 += x1 * c0;
  212. acc2 += x2 * c0;
  213. acc3 += x3 * c0;
  214. /* Read the b[numTaps-3] coefficient */
  215. c0 = *(pb++);
  216. /* Read x[n-numTaps-3] sample acc0, acc1, acc2, acc3 */
  217. x0 = *(px0++);
  218. x1 = *(px1++);
  219. x2 = *(px2++);
  220. x3 = *(px3++);
  221. /* Perform the multiply-accumulate */
  222. acc0 += x0 * c0;
  223. acc1 += x1 * c0;
  224. acc2 += x2 * c0;
  225. acc3 += x3 * c0;
  226. /* Read the b[numTaps-4] coefficient */
  227. c0 = *(pb++);
  228. /* Read x[n-numTaps-4] sample acc0, acc1, acc2, acc3 */
  229. x0 = *(px0++);
  230. x1 = *(px1++);
  231. x2 = *(px2++);
  232. x3 = *(px3++);
  233. /* Perform the multiply-accumulate */
  234. acc0 += x0 * c0;
  235. acc1 += x1 * c0;
  236. acc2 += x2 * c0;
  237. acc3 += x3 * c0;
  238. /* Decrement the loop counter */
  239. tapCnt--;
  240. }
  241. /* If the filter length is not a multiple of 4, compute the remaining filter taps */
  242. tapCnt = numTaps % 0x4u;
  243. while(tapCnt > 0u)
  244. {
  245. /* Read coefficients */
  246. c0 = *(pb++);
  247. /* Fetch state variables for acc0, acc1, acc2, acc3 */
  248. x0 = *(px0++);
  249. x1 = *(px1++);
  250. x2 = *(px2++);
  251. x3 = *(px3++);
  252. /* Perform the multiply-accumulate */
  253. acc0 += x0 * c0;
  254. acc1 += x1 * c0;
  255. acc2 += x2 * c0;
  256. acc3 += x3 * c0;
  257. /* Decrement the loop counter */
  258. tapCnt--;
  259. }
  260. /* Advance the state pointer by the decimation factor
  261. * to process the next group of decimation factor number samples */
  262. pState = pState + 4 * S->M;
  263. /* The result is in the accumulator, store in the destination buffer. */
  264. *pDst++ = acc0;
  265. *pDst++ = acc1;
  266. *pDst++ = acc2;
  267. *pDst++ = acc3;
  268. /* Decrement the loop counter */
  269. blkCnt--;
  270. }
  271. while(blkCntN4 > 0u)
  272. {
  273. /* Copy decimation factor number of new input samples into the state buffer */
  274. i = S->M;
  275. do
  276. {
  277. *pStateCurnt++ = *pSrc++;
  278. } while(--i);
  279. /* Set accumulator to zero */
  280. sum0 = 0.0f;
  281. /* Initialize state pointer */
  282. px = pState;
  283. /* Initialize coeff pointer */
  284. pb = pCoeffs;
  285. /* Loop unrolling. Process 4 taps at a time. */
  286. tapCnt = numTaps >> 2;
  287. /* Loop over the number of taps. Unroll by a factor of 4.
  288. ** Repeat until we've computed numTaps-4 coefficients. */
  289. while(tapCnt > 0u)
  290. {
  291. /* Read the b[numTaps-1] coefficient */
  292. c0 = *(pb++);
  293. /* Read x[n-numTaps-1] sample */
  294. x0 = *(px++);
  295. /* Perform the multiply-accumulate */
  296. sum0 += x0 * c0;
  297. /* Read the b[numTaps-2] coefficient */
  298. c0 = *(pb++);
  299. /* Read x[n-numTaps-2] sample */
  300. x0 = *(px++);
  301. /* Perform the multiply-accumulate */
  302. sum0 += x0 * c0;
  303. /* Read the b[numTaps-3] coefficient */
  304. c0 = *(pb++);
  305. /* Read x[n-numTaps-3] sample */
  306. x0 = *(px++);
  307. /* Perform the multiply-accumulate */
  308. sum0 += x0 * c0;
  309. /* Read the b[numTaps-4] coefficient */
  310. c0 = *(pb++);
  311. /* Read x[n-numTaps-4] sample */
  312. x0 = *(px++);
  313. /* Perform the multiply-accumulate */
  314. sum0 += x0 * c0;
  315. /* Decrement the loop counter */
  316. tapCnt--;
  317. }
  318. /* If the filter length is not a multiple of 4, compute the remaining filter taps */
  319. tapCnt = numTaps % 0x4u;
  320. while(tapCnt > 0u)
  321. {
  322. /* Read coefficients */
  323. c0 = *(pb++);
  324. /* Fetch 1 state variable */
  325. x0 = *(px++);
  326. /* Perform the multiply-accumulate */
  327. sum0 += x0 * c0;
  328. /* Decrement the loop counter */
  329. tapCnt--;
  330. }
  331. /* Advance the state pointer by the decimation factor
  332. * to process the next group of decimation factor number samples */
  333. pState = pState + S->M;
  334. /* The result is in the accumulator, store in the destination buffer. */
  335. *pDst++ = sum0;
  336. /* Decrement the loop counter */
  337. blkCntN4--;
  338. }
  339. /* Processing is complete.
  340. ** Now copy the last numTaps - 1 samples to the satrt of the state buffer.
  341. ** This prepares the state buffer for the next function call. */
  342. /* Points to the start of the state buffer */
  343. pStateCurnt = S->pState;
  344. i = (numTaps - 1u) >> 2;
  345. /* copy data */
  346. while(i > 0u)
  347. {
  348. *pStateCurnt++ = *pState++;
  349. *pStateCurnt++ = *pState++;
  350. *pStateCurnt++ = *pState++;
  351. *pStateCurnt++ = *pState++;
  352. /* Decrement the loop counter */
  353. i--;
  354. }
  355. i = (numTaps - 1u) % 0x04u;
  356. /* copy data */
  357. while(i > 0u)
  358. {
  359. *pStateCurnt++ = *pState++;
  360. /* Decrement the loop counter */
  361. i--;
  362. }
  363. #else
  364. /* Run the below code for Cortex-M0 */
  365. /* S->pState buffer contains previous frame (numTaps - 1) samples */
  366. /* pStateCurnt points to the location where the new input data should be written */
  367. pStateCurnt = S->pState + (numTaps - 1u);
  368. /* Total number of output samples to be computed */
  369. blkCnt = outBlockSize;
  370. while(blkCnt > 0u)
  371. {
  372. /* Copy decimation factor number of new input samples into the state buffer */
  373. i = S->M;
  374. do
  375. {
  376. *pStateCurnt++ = *pSrc++;
  377. } while(--i);
  378. /* Set accumulator to zero */
  379. sum0 = 0.0f;
  380. /* Initialize state pointer */
  381. px = pState;
  382. /* Initialize coeff pointer */
  383. pb = pCoeffs;
  384. tapCnt = numTaps;
  385. while(tapCnt > 0u)
  386. {
  387. /* Read coefficients */
  388. c0 = *pb++;
  389. /* Fetch 1 state variable */
  390. x0 = *px++;
  391. /* Perform the multiply-accumulate */
  392. sum0 += x0 * c0;
  393. /* Decrement the loop counter */
  394. tapCnt--;
  395. }
  396. /* Advance the state pointer by the decimation factor
  397. * to process the next group of decimation factor number samples */
  398. pState = pState + S->M;
  399. /* The result is in the accumulator, store in the destination buffer. */
  400. *pDst++ = sum0;
  401. /* Decrement the loop counter */
  402. blkCnt--;
  403. }
  404. /* Processing is complete.
  405. ** Now copy the last numTaps - 1 samples to the start of the state buffer.
  406. ** This prepares the state buffer for the next function call. */
  407. /* Points to the start of the state buffer */
  408. pStateCurnt = S->pState;
  409. /* Copy numTaps number of values */
  410. i = (numTaps - 1u);
  411. /* copy data */
  412. while(i > 0u)
  413. {
  414. *pStateCurnt++ = *pState++;
  415. /* Decrement the loop counter */
  416. i--;
  417. }
  418. #endif /* #ifndef ARM_MATH_CM0_FAMILY */
  419. }
  420. /**
  421. * @} end of FIR_decimate group
  422. */