Keyboard firmwares for Atmel AVR and Cortex-M
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

arm_fir_interpolate_q31.c 14KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504
  1. /*-----------------------------------------------------------------------------
  2. * Copyright (C) 2010-2013 ARM Limited. All rights reserved.
  3. *
  4. * $Date: 17. January 2013
  5. * $Revision: V1.4.1
  6. *
  7. * Project: CMSIS DSP Library
  8. * Title: arm_fir_interpolate_q31.c
  9. *
  10. * Description: Q31 FIR interpolation.
  11. *
  12. * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
  13. *
  14. * Redistribution and use in source and binary forms, with or without
  15. * modification, are permitted provided that the following conditions
  16. * are met:
  17. * - Redistributions of source code must retain the above copyright
  18. * notice, this list of conditions and the following disclaimer.
  19. * - Redistributions in binary form must reproduce the above copyright
  20. * notice, this list of conditions and the following disclaimer in
  21. * the documentation and/or other materials provided with the
  22. * distribution.
  23. * - Neither the name of ARM LIMITED nor the names of its contributors
  24. * may be used to endorse or promote products derived from this
  25. * software without specific prior written permission.
  26. *
  27. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  28. * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  29. * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
  30. * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
  31. * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  32. * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
  33. * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  34. * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  35. * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  36. * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
  37. * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  38. * POSSIBILITY OF SUCH DAMAGE.
  39. * ---------------------------------------------------------------------------*/
  40. #include "arm_math.h"
  41. /**
  42. * @ingroup groupFilters
  43. */
  44. /**
  45. * @addtogroup FIR_Interpolate
  46. * @{
  47. */
  48. /**
  49. * @brief Processing function for the Q31 FIR interpolator.
  50. * @param[in] *S points to an instance of the Q31 FIR interpolator structure.
  51. * @param[in] *pSrc points to the block of input data.
  52. * @param[out] *pDst points to the block of output data.
  53. * @param[in] blockSize number of input samples to process per call.
  54. * @return none.
  55. *
  56. * <b>Scaling and Overflow Behavior:</b>
  57. * \par
  58. * The function is implemented using an internal 64-bit accumulator.
  59. * The accumulator has a 2.62 format and maintains full precision of the intermediate multiplication results but provides only a single guard bit.
  60. * Thus, if the accumulator result overflows it wraps around rather than clip.
  61. * In order to avoid overflows completely the input signal must be scaled down by <code>1/(numTaps/L)</code>.
  62. * since <code>numTaps/L</code> additions occur per output sample.
  63. * After all multiply-accumulates are performed, the 2.62 accumulator is truncated to 1.32 format and then saturated to 1.31 format.
  64. */
  65. #ifndef ARM_MATH_CM0_FAMILY
  66. /* Run the below code for Cortex-M4 and Cortex-M3 */
  67. void arm_fir_interpolate_q31(
  68. const arm_fir_interpolate_instance_q31 * S,
  69. q31_t * pSrc,
  70. q31_t * pDst,
  71. uint32_t blockSize)
  72. {
  73. q31_t *pState = S->pState; /* State pointer */
  74. q31_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */
  75. q31_t *pStateCurnt; /* Points to the current sample of the state */
  76. q31_t *ptr1, *ptr2; /* Temporary pointers for state and coefficient buffers */
  77. q63_t sum0; /* Accumulators */
  78. q31_t x0, c0; /* Temporary variables to hold state and coefficient values */
  79. uint32_t i, blkCnt, j; /* Loop counters */
  80. uint16_t phaseLen = S->phaseLength, tapCnt; /* Length of each polyphase filter component */
  81. uint32_t blkCntN2;
  82. q63_t acc0, acc1;
  83. q31_t x1;
  84. /* S->pState buffer contains previous frame (phaseLen - 1) samples */
  85. /* pStateCurnt points to the location where the new input data should be written */
  86. pStateCurnt = S->pState + ((q31_t) phaseLen - 1);
  87. /* Initialise blkCnt */
  88. blkCnt = blockSize / 2;
  89. blkCntN2 = blockSize - (2 * blkCnt);
  90. /* Samples loop unrolled by 2 */
  91. while(blkCnt > 0u)
  92. {
  93. /* Copy new input sample into the state buffer */
  94. *pStateCurnt++ = *pSrc++;
  95. *pStateCurnt++ = *pSrc++;
  96. /* Address modifier index of coefficient buffer */
  97. j = 1u;
  98. /* Loop over the Interpolation factor. */
  99. i = (S->L);
  100. while(i > 0u)
  101. {
  102. /* Set accumulator to zero */
  103. acc0 = 0;
  104. acc1 = 0;
  105. /* Initialize state pointer */
  106. ptr1 = pState;
  107. /* Initialize coefficient pointer */
  108. ptr2 = pCoeffs + (S->L - j);
  109. /* Loop over the polyPhase length. Unroll by a factor of 4.
  110. ** Repeat until we've computed numTaps-(4*S->L) coefficients. */
  111. tapCnt = phaseLen >> 2u;
  112. x0 = *(ptr1++);
  113. while(tapCnt > 0u)
  114. {
  115. /* Read the input sample */
  116. x1 = *(ptr1++);
  117. /* Read the coefficient */
  118. c0 = *(ptr2);
  119. /* Perform the multiply-accumulate */
  120. acc0 += (q63_t) x0 *c0;
  121. acc1 += (q63_t) x1 *c0;
  122. /* Read the coefficient */
  123. c0 = *(ptr2 + S->L);
  124. /* Read the input sample */
  125. x0 = *(ptr1++);
  126. /* Perform the multiply-accumulate */
  127. acc0 += (q63_t) x1 *c0;
  128. acc1 += (q63_t) x0 *c0;
  129. /* Read the coefficient */
  130. c0 = *(ptr2 + S->L * 2);
  131. /* Read the input sample */
  132. x1 = *(ptr1++);
  133. /* Perform the multiply-accumulate */
  134. acc0 += (q63_t) x0 *c0;
  135. acc1 += (q63_t) x1 *c0;
  136. /* Read the coefficient */
  137. c0 = *(ptr2 + S->L * 3);
  138. /* Read the input sample */
  139. x0 = *(ptr1++);
  140. /* Perform the multiply-accumulate */
  141. acc0 += (q63_t) x1 *c0;
  142. acc1 += (q63_t) x0 *c0;
  143. /* Upsampling is done by stuffing L-1 zeros between each sample.
  144. * So instead of multiplying zeros with coefficients,
  145. * Increment the coefficient pointer by interpolation factor times. */
  146. ptr2 += 4 * S->L;
  147. /* Decrement the loop counter */
  148. tapCnt--;
  149. }
  150. /* If the polyPhase length is not a multiple of 4, compute the remaining filter taps */
  151. tapCnt = phaseLen % 0x4u;
  152. while(tapCnt > 0u)
  153. {
  154. /* Read the input sample */
  155. x1 = *(ptr1++);
  156. /* Read the coefficient */
  157. c0 = *(ptr2);
  158. /* Perform the multiply-accumulate */
  159. acc0 += (q63_t) x0 *c0;
  160. acc1 += (q63_t) x1 *c0;
  161. /* Increment the coefficient pointer by interpolation factor times. */
  162. ptr2 += S->L;
  163. /* update states for next sample processing */
  164. x0 = x1;
  165. /* Decrement the loop counter */
  166. tapCnt--;
  167. }
  168. /* The result is in the accumulator, store in the destination buffer. */
  169. *pDst = (q31_t) (acc0 >> 31);
  170. *(pDst + S->L) = (q31_t) (acc1 >> 31);
  171. pDst++;
  172. /* Increment the address modifier index of coefficient buffer */
  173. j++;
  174. /* Decrement the loop counter */
  175. i--;
  176. }
  177. /* Advance the state pointer by 1
  178. * to process the next group of interpolation factor number samples */
  179. pState = pState + 2;
  180. pDst += S->L;
  181. /* Decrement the loop counter */
  182. blkCnt--;
  183. }
  184. /* If the blockSize is not a multiple of 2, compute any remaining output samples here.
  185. ** No loop unrolling is used. */
  186. blkCnt = blkCntN2;
  187. /* Loop over the blockSize. */
  188. while(blkCnt > 0u)
  189. {
  190. /* Copy new input sample into the state buffer */
  191. *pStateCurnt++ = *pSrc++;
  192. /* Address modifier index of coefficient buffer */
  193. j = 1u;
  194. /* Loop over the Interpolation factor. */
  195. i = S->L;
  196. while(i > 0u)
  197. {
  198. /* Set accumulator to zero */
  199. sum0 = 0;
  200. /* Initialize state pointer */
  201. ptr1 = pState;
  202. /* Initialize coefficient pointer */
  203. ptr2 = pCoeffs + (S->L - j);
  204. /* Loop over the polyPhase length. Unroll by a factor of 4.
  205. ** Repeat until we've computed numTaps-(4*S->L) coefficients. */
  206. tapCnt = phaseLen >> 2;
  207. while(tapCnt > 0u)
  208. {
  209. /* Read the coefficient */
  210. c0 = *(ptr2);
  211. /* Upsampling is done by stuffing L-1 zeros between each sample.
  212. * So instead of multiplying zeros with coefficients,
  213. * Increment the coefficient pointer by interpolation factor times. */
  214. ptr2 += S->L;
  215. /* Read the input sample */
  216. x0 = *(ptr1++);
  217. /* Perform the multiply-accumulate */
  218. sum0 += (q63_t) x0 *c0;
  219. /* Read the coefficient */
  220. c0 = *(ptr2);
  221. /* Increment the coefficient pointer by interpolation factor times. */
  222. ptr2 += S->L;
  223. /* Read the input sample */
  224. x0 = *(ptr1++);
  225. /* Perform the multiply-accumulate */
  226. sum0 += (q63_t) x0 *c0;
  227. /* Read the coefficient */
  228. c0 = *(ptr2);
  229. /* Increment the coefficient pointer by interpolation factor times. */
  230. ptr2 += S->L;
  231. /* Read the input sample */
  232. x0 = *(ptr1++);
  233. /* Perform the multiply-accumulate */
  234. sum0 += (q63_t) x0 *c0;
  235. /* Read the coefficient */
  236. c0 = *(ptr2);
  237. /* Increment the coefficient pointer by interpolation factor times. */
  238. ptr2 += S->L;
  239. /* Read the input sample */
  240. x0 = *(ptr1++);
  241. /* Perform the multiply-accumulate */
  242. sum0 += (q63_t) x0 *c0;
  243. /* Decrement the loop counter */
  244. tapCnt--;
  245. }
  246. /* If the polyPhase length is not a multiple of 4, compute the remaining filter taps */
  247. tapCnt = phaseLen & 0x3u;
  248. while(tapCnt > 0u)
  249. {
  250. /* Read the coefficient */
  251. c0 = *(ptr2);
  252. /* Increment the coefficient pointer by interpolation factor times. */
  253. ptr2 += S->L;
  254. /* Read the input sample */
  255. x0 = *(ptr1++);
  256. /* Perform the multiply-accumulate */
  257. sum0 += (q63_t) x0 *c0;
  258. /* Decrement the loop counter */
  259. tapCnt--;
  260. }
  261. /* The result is in the accumulator, store in the destination buffer. */
  262. *pDst++ = (q31_t) (sum0 >> 31);
  263. /* Increment the address modifier index of coefficient buffer */
  264. j++;
  265. /* Decrement the loop counter */
  266. i--;
  267. }
  268. /* Advance the state pointer by 1
  269. * to process the next group of interpolation factor number samples */
  270. pState = pState + 1;
  271. /* Decrement the loop counter */
  272. blkCnt--;
  273. }
  274. /* Processing is complete.
  275. ** Now copy the last phaseLen - 1 samples to the satrt of the state buffer.
  276. ** This prepares the state buffer for the next function call. */
  277. /* Points to the start of the state buffer */
  278. pStateCurnt = S->pState;
  279. tapCnt = (phaseLen - 1u) >> 2u;
  280. /* copy data */
  281. while(tapCnt > 0u)
  282. {
  283. *pStateCurnt++ = *pState++;
  284. *pStateCurnt++ = *pState++;
  285. *pStateCurnt++ = *pState++;
  286. *pStateCurnt++ = *pState++;
  287. /* Decrement the loop counter */
  288. tapCnt--;
  289. }
  290. tapCnt = (phaseLen - 1u) % 0x04u;
  291. /* copy data */
  292. while(tapCnt > 0u)
  293. {
  294. *pStateCurnt++ = *pState++;
  295. /* Decrement the loop counter */
  296. tapCnt--;
  297. }
  298. }
  299. #else
  300. void arm_fir_interpolate_q31(
  301. const arm_fir_interpolate_instance_q31 * S,
  302. q31_t * pSrc,
  303. q31_t * pDst,
  304. uint32_t blockSize)
  305. {
  306. q31_t *pState = S->pState; /* State pointer */
  307. q31_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */
  308. q31_t *pStateCurnt; /* Points to the current sample of the state */
  309. q31_t *ptr1, *ptr2; /* Temporary pointers for state and coefficient buffers */
  310. /* Run the below code for Cortex-M0 */
  311. q63_t sum; /* Accumulator */
  312. q31_t x0, c0; /* Temporary variables to hold state and coefficient values */
  313. uint32_t i, blkCnt; /* Loop counters */
  314. uint16_t phaseLen = S->phaseLength, tapCnt; /* Length of each polyphase filter component */
  315. /* S->pState buffer contains previous frame (phaseLen - 1) samples */
  316. /* pStateCurnt points to the location where the new input data should be written */
  317. pStateCurnt = S->pState + ((q31_t) phaseLen - 1);
  318. /* Total number of intput samples */
  319. blkCnt = blockSize;
  320. /* Loop over the blockSize. */
  321. while(blkCnt > 0u)
  322. {
  323. /* Copy new input sample into the state buffer */
  324. *pStateCurnt++ = *pSrc++;
  325. /* Loop over the Interpolation factor. */
  326. i = S->L;
  327. while(i > 0u)
  328. {
  329. /* Set accumulator to zero */
  330. sum = 0;
  331. /* Initialize state pointer */
  332. ptr1 = pState;
  333. /* Initialize coefficient pointer */
  334. ptr2 = pCoeffs + (i - 1u);
  335. tapCnt = phaseLen;
  336. while(tapCnt > 0u)
  337. {
  338. /* Read the coefficient */
  339. c0 = *(ptr2);
  340. /* Increment the coefficient pointer by interpolation factor times. */
  341. ptr2 += S->L;
  342. /* Read the input sample */
  343. x0 = *ptr1++;
  344. /* Perform the multiply-accumulate */
  345. sum += (q63_t) x0 *c0;
  346. /* Decrement the loop counter */
  347. tapCnt--;
  348. }
  349. /* The result is in the accumulator, store in the destination buffer. */
  350. *pDst++ = (q31_t) (sum >> 31);
  351. /* Decrement the loop counter */
  352. i--;
  353. }
  354. /* Advance the state pointer by 1
  355. * to process the next group of interpolation factor number samples */
  356. pState = pState + 1;
  357. /* Decrement the loop counter */
  358. blkCnt--;
  359. }
  360. /* Processing is complete.
  361. ** Now copy the last phaseLen - 1 samples to the satrt of the state buffer.
  362. ** This prepares the state buffer for the next function call. */
  363. /* Points to the start of the state buffer */
  364. pStateCurnt = S->pState;
  365. tapCnt = phaseLen - 1u;
  366. /* copy data */
  367. while(tapCnt > 0u)
  368. {
  369. *pStateCurnt++ = *pState++;
  370. /* Decrement the loop counter */
  371. tapCnt--;
  372. }
  373. }
  374. #endif /* #ifndef ARM_MATH_CM0_FAMILY */
  375. /**
  376. * @} end of FIR_Interpolate group
  377. */