Keyboard firmwares for Atmel AVR and Cortex-M
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

arm_fir_interpolate_q15.c 15KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508
  1. /*-----------------------------------------------------------------------------
  2. * Copyright (C) 2010-2013 ARM Limited. All rights reserved.
  3. *
  4. * $Date: 17. January 2013
  5. * $Revision: V1.4.1
  6. *
  7. * Project: CMSIS DSP Library
  8. * Title: arm_fir_interpolate_q15.c
  9. *
  10. * Description: Q15 FIR interpolation.
  11. *
  12. * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
  13. *
  14. * Redistribution and use in source and binary forms, with or without
  15. * modification, are permitted provided that the following conditions
  16. * are met:
  17. * - Redistributions of source code must retain the above copyright
  18. * notice, this list of conditions and the following disclaimer.
  19. * - Redistributions in binary form must reproduce the above copyright
  20. * notice, this list of conditions and the following disclaimer in
  21. * the documentation and/or other materials provided with the
  22. * distribution.
  23. * - Neither the name of ARM LIMITED nor the names of its contributors
  24. * may be used to endorse or promote products derived from this
  25. * software without specific prior written permission.
  26. *
  27. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  28. * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  29. * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
  30. * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
  31. * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  32. * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
  33. * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  34. * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  35. * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  36. * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
  37. * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  38. * POSSIBILITY OF SUCH DAMAGE.
  39. * ---------------------------------------------------------------------------*/
  40. #include "arm_math.h"
  41. /**
  42. * @ingroup groupFilters
  43. */
  44. /**
  45. * @addtogroup FIR_Interpolate
  46. * @{
  47. */
  48. /**
  49. * @brief Processing function for the Q15 FIR interpolator.
  50. * @param[in] *S points to an instance of the Q15 FIR interpolator structure.
  51. * @param[in] *pSrc points to the block of input data.
  52. * @param[out] *pDst points to the block of output data.
  53. * @param[in] blockSize number of input samples to process per call.
  54. * @return none.
  55. *
  56. * <b>Scaling and Overflow Behavior:</b>
  57. * \par
  58. * The function is implemented using a 64-bit internal accumulator.
  59. * Both coefficients and state variables are represented in 1.15 format and multiplications yield a 2.30 result.
  60. * The 2.30 intermediate results are accumulated in a 64-bit accumulator in 34.30 format.
  61. * There is no risk of internal overflow with this approach and the full precision of intermediate multiplications is preserved.
  62. * After all additions have been performed, the accumulator is truncated to 34.15 format by discarding low 15 bits.
  63. * Lastly, the accumulator is saturated to yield a result in 1.15 format.
  64. */
  65. #ifndef ARM_MATH_CM0_FAMILY
  66. /* Run the below code for Cortex-M4 and Cortex-M3 */
  67. void arm_fir_interpolate_q15(
  68. const arm_fir_interpolate_instance_q15 * S,
  69. q15_t * pSrc,
  70. q15_t * pDst,
  71. uint32_t blockSize)
  72. {
  73. q15_t *pState = S->pState; /* State pointer */
  74. q15_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */
  75. q15_t *pStateCurnt; /* Points to the current sample of the state */
  76. q15_t *ptr1, *ptr2; /* Temporary pointers for state and coefficient buffers */
  77. q63_t sum0; /* Accumulators */
  78. q15_t x0, c0; /* Temporary variables to hold state and coefficient values */
  79. uint32_t i, blkCnt, j, tapCnt; /* Loop counters */
  80. uint16_t phaseLen = S->phaseLength; /* Length of each polyphase filter component */
  81. uint32_t blkCntN2;
  82. q63_t acc0, acc1;
  83. q15_t x1;
  84. /* S->pState buffer contains previous frame (phaseLen - 1) samples */
  85. /* pStateCurnt points to the location where the new input data should be written */
  86. pStateCurnt = S->pState + ((q31_t) phaseLen - 1);
  87. /* Initialise blkCnt */
  88. blkCnt = blockSize / 2;
  89. blkCntN2 = blockSize - (2 * blkCnt);
  90. /* Samples loop unrolled by 2 */
  91. while(blkCnt > 0u)
  92. {
  93. /* Copy new input sample into the state buffer */
  94. *pStateCurnt++ = *pSrc++;
  95. *pStateCurnt++ = *pSrc++;
  96. /* Address modifier index of coefficient buffer */
  97. j = 1u;
  98. /* Loop over the Interpolation factor. */
  99. i = (S->L);
  100. while(i > 0u)
  101. {
  102. /* Set accumulator to zero */
  103. acc0 = 0;
  104. acc1 = 0;
  105. /* Initialize state pointer */
  106. ptr1 = pState;
  107. /* Initialize coefficient pointer */
  108. ptr2 = pCoeffs + (S->L - j);
  109. /* Loop over the polyPhase length. Unroll by a factor of 4.
  110. ** Repeat until we've computed numTaps-(4*S->L) coefficients. */
  111. tapCnt = phaseLen >> 2u;
  112. x0 = *(ptr1++);
  113. while(tapCnt > 0u)
  114. {
  115. /* Read the input sample */
  116. x1 = *(ptr1++);
  117. /* Read the coefficient */
  118. c0 = *(ptr2);
  119. /* Perform the multiply-accumulate */
  120. acc0 += (q63_t) x0 *c0;
  121. acc1 += (q63_t) x1 *c0;
  122. /* Read the coefficient */
  123. c0 = *(ptr2 + S->L);
  124. /* Read the input sample */
  125. x0 = *(ptr1++);
  126. /* Perform the multiply-accumulate */
  127. acc0 += (q63_t) x1 *c0;
  128. acc1 += (q63_t) x0 *c0;
  129. /* Read the coefficient */
  130. c0 = *(ptr2 + S->L * 2);
  131. /* Read the input sample */
  132. x1 = *(ptr1++);
  133. /* Perform the multiply-accumulate */
  134. acc0 += (q63_t) x0 *c0;
  135. acc1 += (q63_t) x1 *c0;
  136. /* Read the coefficient */
  137. c0 = *(ptr2 + S->L * 3);
  138. /* Read the input sample */
  139. x0 = *(ptr1++);
  140. /* Perform the multiply-accumulate */
  141. acc0 += (q63_t) x1 *c0;
  142. acc1 += (q63_t) x0 *c0;
  143. /* Upsampling is done by stuffing L-1 zeros between each sample.
  144. * So instead of multiplying zeros with coefficients,
  145. * Increment the coefficient pointer by interpolation factor times. */
  146. ptr2 += 4 * S->L;
  147. /* Decrement the loop counter */
  148. tapCnt--;
  149. }
  150. /* If the polyPhase length is not a multiple of 4, compute the remaining filter taps */
  151. tapCnt = phaseLen % 0x4u;
  152. while(tapCnt > 0u)
  153. {
  154. /* Read the input sample */
  155. x1 = *(ptr1++);
  156. /* Read the coefficient */
  157. c0 = *(ptr2);
  158. /* Perform the multiply-accumulate */
  159. acc0 += (q63_t) x0 *c0;
  160. acc1 += (q63_t) x1 *c0;
  161. /* Increment the coefficient pointer by interpolation factor times. */
  162. ptr2 += S->L;
  163. /* update states for next sample processing */
  164. x0 = x1;
  165. /* Decrement the loop counter */
  166. tapCnt--;
  167. }
  168. /* The result is in the accumulator, store in the destination buffer. */
  169. *pDst = (q15_t) (__SSAT((acc0 >> 15), 16));
  170. *(pDst + S->L) = (q15_t) (__SSAT((acc1 >> 15), 16));
  171. pDst++;
  172. /* Increment the address modifier index of coefficient buffer */
  173. j++;
  174. /* Decrement the loop counter */
  175. i--;
  176. }
  177. /* Advance the state pointer by 1
  178. * to process the next group of interpolation factor number samples */
  179. pState = pState + 2;
  180. pDst += S->L;
  181. /* Decrement the loop counter */
  182. blkCnt--;
  183. }
  184. /* If the blockSize is not a multiple of 2, compute any remaining output samples here.
  185. ** No loop unrolling is used. */
  186. blkCnt = blkCntN2;
  187. /* Loop over the blockSize. */
  188. while(blkCnt > 0u)
  189. {
  190. /* Copy new input sample into the state buffer */
  191. *pStateCurnt++ = *pSrc++;
  192. /* Address modifier index of coefficient buffer */
  193. j = 1u;
  194. /* Loop over the Interpolation factor. */
  195. i = S->L;
  196. while(i > 0u)
  197. {
  198. /* Set accumulator to zero */
  199. sum0 = 0;
  200. /* Initialize state pointer */
  201. ptr1 = pState;
  202. /* Initialize coefficient pointer */
  203. ptr2 = pCoeffs + (S->L - j);
  204. /* Loop over the polyPhase length. Unroll by a factor of 4.
  205. ** Repeat until we've computed numTaps-(4*S->L) coefficients. */
  206. tapCnt = phaseLen >> 2;
  207. while(tapCnt > 0u)
  208. {
  209. /* Read the coefficient */
  210. c0 = *(ptr2);
  211. /* Upsampling is done by stuffing L-1 zeros between each sample.
  212. * So instead of multiplying zeros with coefficients,
  213. * Increment the coefficient pointer by interpolation factor times. */
  214. ptr2 += S->L;
  215. /* Read the input sample */
  216. x0 = *(ptr1++);
  217. /* Perform the multiply-accumulate */
  218. sum0 += (q63_t) x0 *c0;
  219. /* Read the coefficient */
  220. c0 = *(ptr2);
  221. /* Increment the coefficient pointer by interpolation factor times. */
  222. ptr2 += S->L;
  223. /* Read the input sample */
  224. x0 = *(ptr1++);
  225. /* Perform the multiply-accumulate */
  226. sum0 += (q63_t) x0 *c0;
  227. /* Read the coefficient */
  228. c0 = *(ptr2);
  229. /* Increment the coefficient pointer by interpolation factor times. */
  230. ptr2 += S->L;
  231. /* Read the input sample */
  232. x0 = *(ptr1++);
  233. /* Perform the multiply-accumulate */
  234. sum0 += (q63_t) x0 *c0;
  235. /* Read the coefficient */
  236. c0 = *(ptr2);
  237. /* Increment the coefficient pointer by interpolation factor times. */
  238. ptr2 += S->L;
  239. /* Read the input sample */
  240. x0 = *(ptr1++);
  241. /* Perform the multiply-accumulate */
  242. sum0 += (q63_t) x0 *c0;
  243. /* Decrement the loop counter */
  244. tapCnt--;
  245. }
  246. /* If the polyPhase length is not a multiple of 4, compute the remaining filter taps */
  247. tapCnt = phaseLen & 0x3u;
  248. while(tapCnt > 0u)
  249. {
  250. /* Read the coefficient */
  251. c0 = *(ptr2);
  252. /* Increment the coefficient pointer by interpolation factor times. */
  253. ptr2 += S->L;
  254. /* Read the input sample */
  255. x0 = *(ptr1++);
  256. /* Perform the multiply-accumulate */
  257. sum0 += (q63_t) x0 *c0;
  258. /* Decrement the loop counter */
  259. tapCnt--;
  260. }
  261. /* The result is in the accumulator, store in the destination buffer. */
  262. *pDst++ = (q15_t) (__SSAT((sum0 >> 15), 16));
  263. j++;
  264. /* Decrement the loop counter */
  265. i--;
  266. }
  267. /* Advance the state pointer by 1
  268. * to process the next group of interpolation factor number samples */
  269. pState = pState + 1;
  270. /* Decrement the loop counter */
  271. blkCnt--;
  272. }
  273. /* Processing is complete.
  274. ** Now copy the last phaseLen - 1 samples to the satrt of the state buffer.
  275. ** This prepares the state buffer for the next function call. */
  276. /* Points to the start of the state buffer */
  277. pStateCurnt = S->pState;
  278. i = ((uint32_t) phaseLen - 1u) >> 2u;
  279. /* copy data */
  280. while(i > 0u)
  281. {
  282. #ifndef UNALIGNED_SUPPORT_DISABLE
  283. *__SIMD32(pStateCurnt)++ = *__SIMD32(pState)++;
  284. *__SIMD32(pStateCurnt)++ = *__SIMD32(pState)++;
  285. #else
  286. *pStateCurnt++ = *pState++;
  287. *pStateCurnt++ = *pState++;
  288. *pStateCurnt++ = *pState++;
  289. *pStateCurnt++ = *pState++;
  290. #endif /* #ifndef UNALIGNED_SUPPORT_DISABLE */
  291. /* Decrement the loop counter */
  292. i--;
  293. }
  294. i = ((uint32_t) phaseLen - 1u) % 0x04u;
  295. while(i > 0u)
  296. {
  297. *pStateCurnt++ = *pState++;
  298. /* Decrement the loop counter */
  299. i--;
  300. }
  301. }
  302. #else
  303. /* Run the below code for Cortex-M0 */
  304. void arm_fir_interpolate_q15(
  305. const arm_fir_interpolate_instance_q15 * S,
  306. q15_t * pSrc,
  307. q15_t * pDst,
  308. uint32_t blockSize)
  309. {
  310. q15_t *pState = S->pState; /* State pointer */
  311. q15_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */
  312. q15_t *pStateCurnt; /* Points to the current sample of the state */
  313. q15_t *ptr1, *ptr2; /* Temporary pointers for state and coefficient buffers */
  314. q63_t sum; /* Accumulator */
  315. q15_t x0, c0; /* Temporary variables to hold state and coefficient values */
  316. uint32_t i, blkCnt, tapCnt; /* Loop counters */
  317. uint16_t phaseLen = S->phaseLength; /* Length of each polyphase filter component */
  318. /* S->pState buffer contains previous frame (phaseLen - 1) samples */
  319. /* pStateCurnt points to the location where the new input data should be written */
  320. pStateCurnt = S->pState + (phaseLen - 1u);
  321. /* Total number of intput samples */
  322. blkCnt = blockSize;
  323. /* Loop over the blockSize. */
  324. while(blkCnt > 0u)
  325. {
  326. /* Copy new input sample into the state buffer */
  327. *pStateCurnt++ = *pSrc++;
  328. /* Loop over the Interpolation factor. */
  329. i = S->L;
  330. while(i > 0u)
  331. {
  332. /* Set accumulator to zero */
  333. sum = 0;
  334. /* Initialize state pointer */
  335. ptr1 = pState;
  336. /* Initialize coefficient pointer */
  337. ptr2 = pCoeffs + (i - 1u);
  338. /* Loop over the polyPhase length */
  339. tapCnt = (uint32_t) phaseLen;
  340. while(tapCnt > 0u)
  341. {
  342. /* Read the coefficient */
  343. c0 = *ptr2;
  344. /* Increment the coefficient pointer by interpolation factor times. */
  345. ptr2 += S->L;
  346. /* Read the input sample */
  347. x0 = *ptr1++;
  348. /* Perform the multiply-accumulate */
  349. sum += ((q31_t) x0 * c0);
  350. /* Decrement the loop counter */
  351. tapCnt--;
  352. }
  353. /* Store the result after converting to 1.15 format in the destination buffer */
  354. *pDst++ = (q15_t) (__SSAT((sum >> 15), 16));
  355. /* Decrement the loop counter */
  356. i--;
  357. }
  358. /* Advance the state pointer by 1
  359. * to process the next group of interpolation factor number samples */
  360. pState = pState + 1;
  361. /* Decrement the loop counter */
  362. blkCnt--;
  363. }
  364. /* Processing is complete.
  365. ** Now copy the last phaseLen - 1 samples to the start of the state buffer.
  366. ** This prepares the state buffer for the next function call. */
  367. /* Points to the start of the state buffer */
  368. pStateCurnt = S->pState;
  369. i = (uint32_t) phaseLen - 1u;
  370. while(i > 0u)
  371. {
  372. *pStateCurnt++ = *pState++;
  373. /* Decrement the loop counter */
  374. i--;
  375. }
  376. }
  377. #endif /* #ifndef ARM_MATH_CM0_FAMILY */
  378. /**
  379. * @} end of FIR_Interpolate group
  380. */