Keyboard firmwares for Atmel AVR and Cortex-M
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

arm_fir_interpolate_f32.c 19KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581
  1. /* ----------------------------------------------------------------------
  2. * Copyright (C) 2010-2013 ARM Limited. All rights reserved.
  3. *
  4. * $Date: 17. January 2013
  5. * $Revision: V1.4.1
  6. *
  7. * Project: CMSIS DSP Library
  8. * Title: arm_fir_interpolate_f32.c
  9. *
  10. * Description: FIR interpolation for floating-point sequences.
  11. *
  12. * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
  13. *
  14. * Redistribution and use in source and binary forms, with or without
  15. * modification, are permitted provided that the following conditions
  16. * are met:
  17. * - Redistributions of source code must retain the above copyright
  18. * notice, this list of conditions and the following disclaimer.
  19. * - Redistributions in binary form must reproduce the above copyright
  20. * notice, this list of conditions and the following disclaimer in
  21. * the documentation and/or other materials provided with the
  22. * distribution.
  23. * - Neither the name of ARM LIMITED nor the names of its contributors
  24. * may be used to endorse or promote products derived from this
  25. * software without specific prior written permission.
  26. *
  27. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  28. * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  29. * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
  30. * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
  31. * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  32. * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
  33. * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  34. * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  35. * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  36. * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
  37. * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  38. * POSSIBILITY OF SUCH DAMAGE.
  39. * -------------------------------------------------------------------- */
  40. #include "arm_math.h"
  41. /**
  42. * @defgroup FIR_Interpolate Finite Impulse Response (FIR) Interpolator
  43. *
  44. * These functions combine an upsampler (zero stuffer) and an FIR filter.
  45. * They are used in multirate systems for increasing the sample rate of a signal without introducing high frequency images.
  46. * Conceptually, the functions are equivalent to the block diagram below:
  47. * \image html FIRInterpolator.gif "Components included in the FIR Interpolator functions"
  48. * After upsampling by a factor of <code>L</code>, the signal should be filtered by a lowpass filter with a normalized
  49. * cutoff frequency of <code>1/L</code> in order to eliminate high frequency copies of the spectrum.
  50. * The user of the function is responsible for providing the filter coefficients.
  51. *
  52. * The FIR interpolator functions provided in the CMSIS DSP Library combine the upsampler and FIR filter in an efficient manner.
  53. * The upsampler inserts <code>L-1</code> zeros between each sample.
  54. * Instead of multiplying by these zero values, the FIR filter is designed to skip them.
  55. * This leads to an efficient implementation without any wasted effort.
  56. * The functions operate on blocks of input and output data.
  57. * <code>pSrc</code> points to an array of <code>blockSize</code> input values and
  58. * <code>pDst</code> points to an array of <code>blockSize*L</code> output values.
  59. *
  60. * The library provides separate functions for Q15, Q31, and floating-point data types.
  61. *
  62. * \par Algorithm:
  63. * The functions use a polyphase filter structure:
  64. * <pre>
  65. * y[n] = b[0] * x[n] + b[L] * x[n-1] + ... + b[L*(phaseLength-1)] * x[n-phaseLength+1]
  66. * y[n+1] = b[1] * x[n] + b[L+1] * x[n-1] + ... + b[L*(phaseLength-1)+1] * x[n-phaseLength+1]
  67. * ...
  68. * y[n+(L-1)] = b[L-1] * x[n] + b[2*L-1] * x[n-1] + ....+ b[L*(phaseLength-1)+(L-1)] * x[n-phaseLength+1]
  69. * </pre>
  70. * This approach is more efficient than straightforward upsample-then-filter algorithms.
  71. * With this method the computation is reduced by a factor of <code>1/L</code> when compared to using a standard FIR filter.
  72. * \par
  73. * <code>pCoeffs</code> points to a coefficient array of size <code>numTaps</code>.
  74. * <code>numTaps</code> must be a multiple of the interpolation factor <code>L</code> and this is checked by the
  75. * initialization functions.
  76. * Internally, the function divides the FIR filter's impulse response into shorter filters of length
  77. * <code>phaseLength=numTaps/L</code>.
  78. * Coefficients are stored in time reversed order.
  79. * \par
  80. * <pre>
  81. * {b[numTaps-1], b[numTaps-2], b[N-2], ..., b[1], b[0]}
  82. * </pre>
  83. * \par
  84. * <code>pState</code> points to a state array of size <code>blockSize + phaseLength - 1</code>.
  85. * Samples in the state buffer are stored in the order:
  86. * \par
  87. * <pre>
  88. * {x[n-phaseLength+1], x[n-phaseLength], x[n-phaseLength-1], x[n-phaseLength-2]....x[0], x[1], ..., x[blockSize-1]}
  89. * </pre>
  90. * The state variables are updated after each block of data is processed, the coefficients are untouched.
  91. *
  92. * \par Instance Structure
  93. * The coefficients and state variables for a filter are stored together in an instance data structure.
  94. * A separate instance structure must be defined for each filter.
  95. * Coefficient arrays may be shared among several instances while state variable array should be allocated separately.
  96. * There are separate instance structure declarations for each of the 3 supported data types.
  97. *
  98. * \par Initialization Functions
  99. * There is also an associated initialization function for each data type.
  100. * The initialization function performs the following operations:
  101. * - Sets the values of the internal structure fields.
  102. * - Zeros out the values in the state buffer.
  103. * - Checks to make sure that the length of the filter is a multiple of the interpolation factor.
  104. * To do this manually without calling the init function, assign the follow subfields of the instance structure:
  105. * L (interpolation factor), pCoeffs, phaseLength (numTaps / L), pState. Also set all of the values in pState to zero.
  106. *
  107. * \par
  108. * Use of the initialization function is optional.
  109. * However, if the initialization function is used, then the instance structure cannot be placed into a const data section.
  110. * To place an instance structure into a const data section, the instance structure must be manually initialized.
  111. * The code below statically initializes each of the 3 different data type filter instance structures
  112. * <pre>
  113. * arm_fir_interpolate_instance_f32 S = {L, phaseLength, pCoeffs, pState};
  114. * arm_fir_interpolate_instance_q31 S = {L, phaseLength, pCoeffs, pState};
  115. * arm_fir_interpolate_instance_q15 S = {L, phaseLength, pCoeffs, pState};
  116. * </pre>
  117. * where <code>L</code> is the interpolation factor; <code>phaseLength=numTaps/L</code> is the
  118. * length of each of the shorter FIR filters used internally,
  119. * <code>pCoeffs</code> is the address of the coefficient buffer;
  120. * <code>pState</code> is the address of the state buffer.
  121. * Be sure to set the values in the state buffer to zeros when doing static initialization.
  122. *
  123. * \par Fixed-Point Behavior
  124. * Care must be taken when using the fixed-point versions of the FIR interpolate filter functions.
  125. * In particular, the overflow and saturation behavior of the accumulator used in each function must be considered.
  126. * Refer to the function specific documentation below for usage guidelines.
  127. */
  128. /**
  129. * @addtogroup FIR_Interpolate
  130. * @{
  131. */
  132. /**
  133. * @brief Processing function for the floating-point FIR interpolator.
  134. * @param[in] *S points to an instance of the floating-point FIR interpolator structure.
  135. * @param[in] *pSrc points to the block of input data.
  136. * @param[out] *pDst points to the block of output data.
  137. * @param[in] blockSize number of input samples to process per call.
  138. * @return none.
  139. */
  140. #ifndef ARM_MATH_CM0_FAMILY
  141. /* Run the below code for Cortex-M4 and Cortex-M3 */
  142. void arm_fir_interpolate_f32(
  143. const arm_fir_interpolate_instance_f32 * S,
  144. float32_t * pSrc,
  145. float32_t * pDst,
  146. uint32_t blockSize)
  147. {
  148. float32_t *pState = S->pState; /* State pointer */
  149. float32_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */
  150. float32_t *pStateCurnt; /* Points to the current sample of the state */
  151. float32_t *ptr1, *ptr2; /* Temporary pointers for state and coefficient buffers */
  152. float32_t sum0; /* Accumulators */
  153. float32_t x0, c0; /* Temporary variables to hold state and coefficient values */
  154. uint32_t i, blkCnt, j; /* Loop counters */
  155. uint16_t phaseLen = S->phaseLength, tapCnt; /* Length of each polyphase filter component */
  156. float32_t acc0, acc1, acc2, acc3;
  157. float32_t x1, x2, x3;
  158. uint32_t blkCntN4;
  159. float32_t c1, c2, c3;
  160. /* S->pState buffer contains previous frame (phaseLen - 1) samples */
  161. /* pStateCurnt points to the location where the new input data should be written */
  162. pStateCurnt = S->pState + (phaseLen - 1u);
  163. /* Initialise blkCnt */
  164. blkCnt = blockSize / 4;
  165. blkCntN4 = blockSize - (4 * blkCnt);
  166. /* Samples loop unrolled by 4 */
  167. while(blkCnt > 0u)
  168. {
  169. /* Copy new input sample into the state buffer */
  170. *pStateCurnt++ = *pSrc++;
  171. *pStateCurnt++ = *pSrc++;
  172. *pStateCurnt++ = *pSrc++;
  173. *pStateCurnt++ = *pSrc++;
  174. /* Address modifier index of coefficient buffer */
  175. j = 1u;
  176. /* Loop over the Interpolation factor. */
  177. i = (S->L);
  178. while(i > 0u)
  179. {
  180. /* Set accumulator to zero */
  181. acc0 = 0.0f;
  182. acc1 = 0.0f;
  183. acc2 = 0.0f;
  184. acc3 = 0.0f;
  185. /* Initialize state pointer */
  186. ptr1 = pState;
  187. /* Initialize coefficient pointer */
  188. ptr2 = pCoeffs + (S->L - j);
  189. /* Loop over the polyPhase length. Unroll by a factor of 4.
  190. ** Repeat until we've computed numTaps-(4*S->L) coefficients. */
  191. tapCnt = phaseLen >> 2u;
  192. x0 = *(ptr1++);
  193. x1 = *(ptr1++);
  194. x2 = *(ptr1++);
  195. while(tapCnt > 0u)
  196. {
  197. /* Read the input sample */
  198. x3 = *(ptr1++);
  199. /* Read the coefficient */
  200. c0 = *(ptr2);
  201. /* Perform the multiply-accumulate */
  202. acc0 += x0 * c0;
  203. acc1 += x1 * c0;
  204. acc2 += x2 * c0;
  205. acc3 += x3 * c0;
  206. /* Read the coefficient */
  207. c1 = *(ptr2 + S->L);
  208. /* Read the input sample */
  209. x0 = *(ptr1++);
  210. /* Perform the multiply-accumulate */
  211. acc0 += x1 * c1;
  212. acc1 += x2 * c1;
  213. acc2 += x3 * c1;
  214. acc3 += x0 * c1;
  215. /* Read the coefficient */
  216. c2 = *(ptr2 + S->L * 2);
  217. /* Read the input sample */
  218. x1 = *(ptr1++);
  219. /* Perform the multiply-accumulate */
  220. acc0 += x2 * c2;
  221. acc1 += x3 * c2;
  222. acc2 += x0 * c2;
  223. acc3 += x1 * c2;
  224. /* Read the coefficient */
  225. c3 = *(ptr2 + S->L * 3);
  226. /* Read the input sample */
  227. x2 = *(ptr1++);
  228. /* Perform the multiply-accumulate */
  229. acc0 += x3 * c3;
  230. acc1 += x0 * c3;
  231. acc2 += x1 * c3;
  232. acc3 += x2 * c3;
  233. /* Upsampling is done by stuffing L-1 zeros between each sample.
  234. * So instead of multiplying zeros with coefficients,
  235. * Increment the coefficient pointer by interpolation factor times. */
  236. ptr2 += 4 * S->L;
  237. /* Decrement the loop counter */
  238. tapCnt--;
  239. }
  240. /* If the polyPhase length is not a multiple of 4, compute the remaining filter taps */
  241. tapCnt = phaseLen % 0x4u;
  242. while(tapCnt > 0u)
  243. {
  244. /* Read the input sample */
  245. x3 = *(ptr1++);
  246. /* Read the coefficient */
  247. c0 = *(ptr2);
  248. /* Perform the multiply-accumulate */
  249. acc0 += x0 * c0;
  250. acc1 += x1 * c0;
  251. acc2 += x2 * c0;
  252. acc3 += x3 * c0;
  253. /* Increment the coefficient pointer by interpolation factor times. */
  254. ptr2 += S->L;
  255. /* update states for next sample processing */
  256. x0 = x1;
  257. x1 = x2;
  258. x2 = x3;
  259. /* Decrement the loop counter */
  260. tapCnt--;
  261. }
  262. /* The result is in the accumulator, store in the destination buffer. */
  263. *pDst = acc0;
  264. *(pDst + S->L) = acc1;
  265. *(pDst + 2 * S->L) = acc2;
  266. *(pDst + 3 * S->L) = acc3;
  267. pDst++;
  268. /* Increment the address modifier index of coefficient buffer */
  269. j++;
  270. /* Decrement the loop counter */
  271. i--;
  272. }
  273. /* Advance the state pointer by 1
  274. * to process the next group of interpolation factor number samples */
  275. pState = pState + 4;
  276. pDst += S->L * 3;
  277. /* Decrement the loop counter */
  278. blkCnt--;
  279. }
  280. /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
  281. ** No loop unrolling is used. */
  282. while(blkCntN4 > 0u)
  283. {
  284. /* Copy new input sample into the state buffer */
  285. *pStateCurnt++ = *pSrc++;
  286. /* Address modifier index of coefficient buffer */
  287. j = 1u;
  288. /* Loop over the Interpolation factor. */
  289. i = S->L;
  290. while(i > 0u)
  291. {
  292. /* Set accumulator to zero */
  293. sum0 = 0.0f;
  294. /* Initialize state pointer */
  295. ptr1 = pState;
  296. /* Initialize coefficient pointer */
  297. ptr2 = pCoeffs + (S->L - j);
  298. /* Loop over the polyPhase length. Unroll by a factor of 4.
  299. ** Repeat until we've computed numTaps-(4*S->L) coefficients. */
  300. tapCnt = phaseLen >> 2u;
  301. while(tapCnt > 0u)
  302. {
  303. /* Read the coefficient */
  304. c0 = *(ptr2);
  305. /* Upsampling is done by stuffing L-1 zeros between each sample.
  306. * So instead of multiplying zeros with coefficients,
  307. * Increment the coefficient pointer by interpolation factor times. */
  308. ptr2 += S->L;
  309. /* Read the input sample */
  310. x0 = *(ptr1++);
  311. /* Perform the multiply-accumulate */
  312. sum0 += x0 * c0;
  313. /* Read the coefficient */
  314. c0 = *(ptr2);
  315. /* Increment the coefficient pointer by interpolation factor times. */
  316. ptr2 += S->L;
  317. /* Read the input sample */
  318. x0 = *(ptr1++);
  319. /* Perform the multiply-accumulate */
  320. sum0 += x0 * c0;
  321. /* Read the coefficient */
  322. c0 = *(ptr2);
  323. /* Increment the coefficient pointer by interpolation factor times. */
  324. ptr2 += S->L;
  325. /* Read the input sample */
  326. x0 = *(ptr1++);
  327. /* Perform the multiply-accumulate */
  328. sum0 += x0 * c0;
  329. /* Read the coefficient */
  330. c0 = *(ptr2);
  331. /* Increment the coefficient pointer by interpolation factor times. */
  332. ptr2 += S->L;
  333. /* Read the input sample */
  334. x0 = *(ptr1++);
  335. /* Perform the multiply-accumulate */
  336. sum0 += x0 * c0;
  337. /* Decrement the loop counter */
  338. tapCnt--;
  339. }
  340. /* If the polyPhase length is not a multiple of 4, compute the remaining filter taps */
  341. tapCnt = phaseLen % 0x4u;
  342. while(tapCnt > 0u)
  343. {
  344. /* Perform the multiply-accumulate */
  345. sum0 += *(ptr1++) * (*ptr2);
  346. /* Increment the coefficient pointer by interpolation factor times. */
  347. ptr2 += S->L;
  348. /* Decrement the loop counter */
  349. tapCnt--;
  350. }
  351. /* The result is in the accumulator, store in the destination buffer. */
  352. *pDst++ = sum0;
  353. /* Increment the address modifier index of coefficient buffer */
  354. j++;
  355. /* Decrement the loop counter */
  356. i--;
  357. }
  358. /* Advance the state pointer by 1
  359. * to process the next group of interpolation factor number samples */
  360. pState = pState + 1;
  361. /* Decrement the loop counter */
  362. blkCntN4--;
  363. }
  364. /* Processing is complete.
  365. ** Now copy the last phaseLen - 1 samples to the satrt of the state buffer.
  366. ** This prepares the state buffer for the next function call. */
  367. /* Points to the start of the state buffer */
  368. pStateCurnt = S->pState;
  369. tapCnt = (phaseLen - 1u) >> 2u;
  370. /* copy data */
  371. while(tapCnt > 0u)
  372. {
  373. *pStateCurnt++ = *pState++;
  374. *pStateCurnt++ = *pState++;
  375. *pStateCurnt++ = *pState++;
  376. *pStateCurnt++ = *pState++;
  377. /* Decrement the loop counter */
  378. tapCnt--;
  379. }
  380. tapCnt = (phaseLen - 1u) % 0x04u;
  381. /* copy data */
  382. while(tapCnt > 0u)
  383. {
  384. *pStateCurnt++ = *pState++;
  385. /* Decrement the loop counter */
  386. tapCnt--;
  387. }
  388. }
  389. #else
  390. /* Run the below code for Cortex-M0 */
  391. void arm_fir_interpolate_f32(
  392. const arm_fir_interpolate_instance_f32 * S,
  393. float32_t * pSrc,
  394. float32_t * pDst,
  395. uint32_t blockSize)
  396. {
  397. float32_t *pState = S->pState; /* State pointer */
  398. float32_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */
  399. float32_t *pStateCurnt; /* Points to the current sample of the state */
  400. float32_t *ptr1, *ptr2; /* Temporary pointers for state and coefficient buffers */
  401. float32_t sum; /* Accumulator */
  402. uint32_t i, blkCnt; /* Loop counters */
  403. uint16_t phaseLen = S->phaseLength, tapCnt; /* Length of each polyphase filter component */
  404. /* S->pState buffer contains previous frame (phaseLen - 1) samples */
  405. /* pStateCurnt points to the location where the new input data should be written */
  406. pStateCurnt = S->pState + (phaseLen - 1u);
  407. /* Total number of intput samples */
  408. blkCnt = blockSize;
  409. /* Loop over the blockSize. */
  410. while(blkCnt > 0u)
  411. {
  412. /* Copy new input sample into the state buffer */
  413. *pStateCurnt++ = *pSrc++;
  414. /* Loop over the Interpolation factor. */
  415. i = S->L;
  416. while(i > 0u)
  417. {
  418. /* Set accumulator to zero */
  419. sum = 0.0f;
  420. /* Initialize state pointer */
  421. ptr1 = pState;
  422. /* Initialize coefficient pointer */
  423. ptr2 = pCoeffs + (i - 1u);
  424. /* Loop over the polyPhase length */
  425. tapCnt = phaseLen;
  426. while(tapCnt > 0u)
  427. {
  428. /* Perform the multiply-accumulate */
  429. sum += *ptr1++ * *ptr2;
  430. /* Increment the coefficient pointer by interpolation factor times. */
  431. ptr2 += S->L;
  432. /* Decrement the loop counter */
  433. tapCnt--;
  434. }
  435. /* The result is in the accumulator, store in the destination buffer. */
  436. *pDst++ = sum;
  437. /* Decrement the loop counter */
  438. i--;
  439. }
  440. /* Advance the state pointer by 1
  441. * to process the next group of interpolation factor number samples */
  442. pState = pState + 1;
  443. /* Decrement the loop counter */
  444. blkCnt--;
  445. }
  446. /* Processing is complete.
  447. ** Now copy the last phaseLen - 1 samples to the start of the state buffer.
  448. ** This prepares the state buffer for the next function call. */
  449. /* Points to the start of the state buffer */
  450. pStateCurnt = S->pState;
  451. tapCnt = phaseLen - 1u;
  452. while(tapCnt > 0u)
  453. {
  454. *pStateCurnt++ = *pState++;
  455. /* Decrement the loop counter */
  456. tapCnt--;
  457. }
  458. }
  459. #endif /* #ifndef ARM_MATH_CM0_FAMILY */
  460. /**
  461. * @} end of FIR_Interpolate group
  462. */