Keyboard firmwares for Atmel AVR and Cortex-M
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

arm_iir_lattice_q15.c 13KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464
  1. /* ----------------------------------------------------------------------
  2. * Copyright (C) 2010-2013 ARM Limited. All rights reserved.
  3. *
  4. * $Date: 17. January 2013
  5. * $Revision: V1.4.1
  6. *
  7. * Project: CMSIS DSP Library
  8. * Title: arm_iir_lattice_q15.c
  9. *
  10. * Description: Q15 IIR lattice filter processing function.
  11. *
  12. * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
  13. *
  14. * Redistribution and use in source and binary forms, with or without
  15. * modification, are permitted provided that the following conditions
  16. * are met:
  17. * - Redistributions of source code must retain the above copyright
  18. * notice, this list of conditions and the following disclaimer.
  19. * - Redistributions in binary form must reproduce the above copyright
  20. * notice, this list of conditions and the following disclaimer in
  21. * the documentation and/or other materials provided with the
  22. * distribution.
  23. * - Neither the name of ARM LIMITED nor the names of its contributors
  24. * may be used to endorse or promote products derived from this
  25. * software without specific prior written permission.
  26. *
  27. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  28. * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  29. * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
  30. * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
  31. * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  32. * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
  33. * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  34. * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  35. * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  36. * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
  37. * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  38. * POSSIBILITY OF SUCH DAMAGE.
  39. * -------------------------------------------------------------------- */
  40. #include "arm_math.h"
  41. /**
  42. * @ingroup groupFilters
  43. */
  44. /**
  45. * @addtogroup IIR_Lattice
  46. * @{
  47. */
  48. /**
  49. * @brief Processing function for the Q15 IIR lattice filter.
  50. * @param[in] *S points to an instance of the Q15 IIR lattice structure.
  51. * @param[in] *pSrc points to the block of input data.
  52. * @param[out] *pDst points to the block of output data.
  53. * @param[in] blockSize number of samples to process.
  54. * @return none.
  55. *
  56. * @details
  57. * <b>Scaling and Overflow Behavior:</b>
  58. * \par
  59. * The function is implemented using a 64-bit internal accumulator.
  60. * Both coefficients and state variables are represented in 1.15 format and multiplications yield a 2.30 result.
  61. * The 2.30 intermediate results are accumulated in a 64-bit accumulator in 34.30 format.
  62. * There is no risk of internal overflow with this approach and the full precision of intermediate multiplications is preserved.
  63. * After all additions have been performed, the accumulator is truncated to 34.15 format by discarding low 15 bits.
  64. * Lastly, the accumulator is saturated to yield a result in 1.15 format.
  65. */
  66. void arm_iir_lattice_q15(
  67. const arm_iir_lattice_instance_q15 * S,
  68. q15_t * pSrc,
  69. q15_t * pDst,
  70. uint32_t blockSize)
  71. {
  72. #ifndef ARM_MATH_CM0_FAMILY
  73. /* Run the below code for Cortex-M4 and Cortex-M3 */
  74. q31_t fcurr, fnext, gcurr = 0, gnext; /* Temporary variables for lattice stages */
  75. q15_t gnext1, gnext2; /* Temporary variables for lattice stages */
  76. uint32_t stgCnt; /* Temporary variables for counts */
  77. q63_t acc; /* Accumlator */
  78. uint32_t blkCnt, tapCnt; /* Temporary variables for counts */
  79. q15_t *px1, *px2, *pk, *pv; /* temporary pointers for state and coef */
  80. uint32_t numStages = S->numStages; /* number of stages */
  81. q15_t *pState; /* State pointer */
  82. q15_t *pStateCurnt; /* State current pointer */
  83. q15_t out; /* Temporary variable for output */
  84. q31_t v; /* Temporary variable for ladder coefficient */
  85. #ifdef UNALIGNED_SUPPORT_DISABLE
  86. q15_t v1, v2;
  87. #endif
  88. blkCnt = blockSize;
  89. pState = &S->pState[0];
  90. /* Sample processing */
  91. while(blkCnt > 0u)
  92. {
  93. /* Read Sample from input buffer */
  94. /* fN(n) = x(n) */
  95. fcurr = *pSrc++;
  96. /* Initialize state read pointer */
  97. px1 = pState;
  98. /* Initialize state write pointer */
  99. px2 = pState;
  100. /* Set accumulator to zero */
  101. acc = 0;
  102. /* Initialize Ladder coeff pointer */
  103. pv = &S->pvCoeffs[0];
  104. /* Initialize Reflection coeff pointer */
  105. pk = &S->pkCoeffs[0];
  106. /* Process sample for first tap */
  107. gcurr = *px1++;
  108. /* fN-1(n) = fN(n) - kN * gN-1(n-1) */
  109. fnext = fcurr - (((q31_t) gcurr * (*pk)) >> 15);
  110. fnext = __SSAT(fnext, 16);
  111. /* gN(n) = kN * fN-1(n) + gN-1(n-1) */
  112. gnext = (((q31_t) fnext * (*pk++)) >> 15) + gcurr;
  113. gnext = __SSAT(gnext, 16);
  114. /* write gN(n) into state for next sample processing */
  115. *px2++ = (q15_t) gnext;
  116. /* y(n) += gN(n) * vN */
  117. acc += (q31_t) ((gnext * (*pv++)));
  118. /* Update f values for next coefficient processing */
  119. fcurr = fnext;
  120. /* Loop unrolling. Process 4 taps at a time. */
  121. tapCnt = (numStages - 1u) >> 2;
  122. while(tapCnt > 0u)
  123. {
  124. /* Process sample for 2nd, 6th ...taps */
  125. /* Read gN-2(n-1) from state buffer */
  126. gcurr = *px1++;
  127. /* Process sample for 2nd, 6th .. taps */
  128. /* fN-2(n) = fN-1(n) - kN-1 * gN-2(n-1) */
  129. fnext = fcurr - (((q31_t) gcurr * (*pk)) >> 15);
  130. fnext = __SSAT(fnext, 16);
  131. /* gN-1(n) = kN-1 * fN-2(n) + gN-2(n-1) */
  132. gnext = (((q31_t) fnext * (*pk++)) >> 15) + gcurr;
  133. gnext1 = (q15_t) __SSAT(gnext, 16);
  134. /* write gN-1(n) into state */
  135. *px2++ = (q15_t) gnext1;
  136. /* Process sample for 3nd, 7th ...taps */
  137. /* Read gN-3(n-1) from state */
  138. gcurr = *px1++;
  139. /* Process sample for 3rd, 7th .. taps */
  140. /* fN-3(n) = fN-2(n) - kN-2 * gN-3(n-1) */
  141. fcurr = fnext - (((q31_t) gcurr * (*pk)) >> 15);
  142. fcurr = __SSAT(fcurr, 16);
  143. /* gN-2(n) = kN-2 * fN-3(n) + gN-3(n-1) */
  144. gnext = (((q31_t) fcurr * (*pk++)) >> 15) + gcurr;
  145. gnext2 = (q15_t) __SSAT(gnext, 16);
  146. /* write gN-2(n) into state */
  147. *px2++ = (q15_t) gnext2;
  148. /* Read vN-1 and vN-2 at a time */
  149. #ifndef UNALIGNED_SUPPORT_DISABLE
  150. v = *__SIMD32(pv)++;
  151. #else
  152. v1 = *pv++;
  153. v2 = *pv++;
  154. #ifndef ARM_MATH_BIG_ENDIAN
  155. v = __PKHBT(v1, v2, 16);
  156. #else
  157. v = __PKHBT(v2, v1, 16);
  158. #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
  159. #endif /* #ifndef UNALIGNED_SUPPORT_DISABLE */
  160. /* Pack gN-1(n) and gN-2(n) */
  161. #ifndef ARM_MATH_BIG_ENDIAN
  162. gnext = __PKHBT(gnext1, gnext2, 16);
  163. #else
  164. gnext = __PKHBT(gnext2, gnext1, 16);
  165. #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
  166. /* y(n) += gN-1(n) * vN-1 */
  167. /* process for gN-5(n) * vN-5, gN-9(n) * vN-9 ... */
  168. /* y(n) += gN-2(n) * vN-2 */
  169. /* process for gN-6(n) * vN-6, gN-10(n) * vN-10 ... */
  170. acc = __SMLALD(gnext, v, acc);
  171. /* Process sample for 4th, 8th ...taps */
  172. /* Read gN-4(n-1) from state */
  173. gcurr = *px1++;
  174. /* Process sample for 4th, 8th .. taps */
  175. /* fN-4(n) = fN-3(n) - kN-3 * gN-4(n-1) */
  176. fnext = fcurr - (((q31_t) gcurr * (*pk)) >> 15);
  177. fnext = __SSAT(fnext, 16);
  178. /* gN-3(n) = kN-3 * fN-1(n) + gN-1(n-1) */
  179. gnext = (((q31_t) fnext * (*pk++)) >> 15) + gcurr;
  180. gnext1 = (q15_t) __SSAT(gnext, 16);
  181. /* write gN-3(n) for the next sample process */
  182. *px2++ = (q15_t) gnext1;
  183. /* Process sample for 5th, 9th ...taps */
  184. /* Read gN-5(n-1) from state */
  185. gcurr = *px1++;
  186. /* Process sample for 5th, 9th .. taps */
  187. /* fN-5(n) = fN-4(n) - kN-4 * gN-5(n-1) */
  188. fcurr = fnext - (((q31_t) gcurr * (*pk)) >> 15);
  189. fcurr = __SSAT(fcurr, 16);
  190. /* gN-4(n) = kN-4 * fN-5(n) + gN-5(n-1) */
  191. gnext = (((q31_t) fcurr * (*pk++)) >> 15) + gcurr;
  192. gnext2 = (q15_t) __SSAT(gnext, 16);
  193. /* write gN-4(n) for the next sample process */
  194. *px2++ = (q15_t) gnext2;
  195. /* Read vN-3 and vN-4 at a time */
  196. #ifndef UNALIGNED_SUPPORT_DISABLE
  197. v = *__SIMD32(pv)++;
  198. #else
  199. v1 = *pv++;
  200. v2 = *pv++;
  201. #ifndef ARM_MATH_BIG_ENDIAN
  202. v = __PKHBT(v1, v2, 16);
  203. #else
  204. v = __PKHBT(v2, v1, 16);
  205. #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
  206. #endif /* #ifndef UNALIGNED_SUPPORT_DISABLE */
  207. /* Pack gN-3(n) and gN-4(n) */
  208. #ifndef ARM_MATH_BIG_ENDIAN
  209. gnext = __PKHBT(gnext1, gnext2, 16);
  210. #else
  211. gnext = __PKHBT(gnext2, gnext1, 16);
  212. #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
  213. /* y(n) += gN-4(n) * vN-4 */
  214. /* process for gN-8(n) * vN-8, gN-12(n) * vN-12 ... */
  215. /* y(n) += gN-3(n) * vN-3 */
  216. /* process for gN-7(n) * vN-7, gN-11(n) * vN-11 ... */
  217. acc = __SMLALD(gnext, v, acc);
  218. tapCnt--;
  219. }
  220. fnext = fcurr;
  221. /* If the filter length is not a multiple of 4, compute the remaining filter taps */
  222. tapCnt = (numStages - 1u) % 0x4u;
  223. while(tapCnt > 0u)
  224. {
  225. gcurr = *px1++;
  226. /* Process sample for last taps */
  227. fnext = fcurr - (((q31_t) gcurr * (*pk)) >> 15);
  228. fnext = __SSAT(fnext, 16);
  229. gnext = (((q31_t) fnext * (*pk++)) >> 15) + gcurr;
  230. gnext = __SSAT(gnext, 16);
  231. /* Output samples for last taps */
  232. acc += (q31_t) (((q31_t) gnext * (*pv++)));
  233. *px2++ = (q15_t) gnext;
  234. fcurr = fnext;
  235. tapCnt--;
  236. }
  237. /* y(n) += g0(n) * v0 */
  238. acc += (q31_t) (((q31_t) fnext * (*pv++)));
  239. out = (q15_t) __SSAT(acc >> 15, 16);
  240. *px2++ = (q15_t) fnext;
  241. /* write out into pDst */
  242. *pDst++ = out;
  243. /* Advance the state pointer by 4 to process the next group of 4 samples */
  244. pState = pState + 1u;
  245. blkCnt--;
  246. }
  247. /* Processing is complete. Now copy last S->numStages samples to start of the buffer
  248. for the preperation of next frame process */
  249. /* Points to the start of the state buffer */
  250. pStateCurnt = &S->pState[0];
  251. pState = &S->pState[blockSize];
  252. stgCnt = (numStages >> 2u);
  253. /* copy data */
  254. while(stgCnt > 0u)
  255. {
  256. #ifndef UNALIGNED_SUPPORT_DISABLE
  257. *__SIMD32(pStateCurnt)++ = *__SIMD32(pState)++;
  258. *__SIMD32(pStateCurnt)++ = *__SIMD32(pState)++;
  259. #else
  260. *pStateCurnt++ = *pState++;
  261. *pStateCurnt++ = *pState++;
  262. *pStateCurnt++ = *pState++;
  263. *pStateCurnt++ = *pState++;
  264. #endif /* #ifndef UNALIGNED_SUPPORT_DISABLE */
  265. /* Decrement the loop counter */
  266. stgCnt--;
  267. }
  268. /* Calculation of count for remaining q15_t data */
  269. stgCnt = (numStages) % 0x4u;
  270. /* copy data */
  271. while(stgCnt > 0u)
  272. {
  273. *pStateCurnt++ = *pState++;
  274. /* Decrement the loop counter */
  275. stgCnt--;
  276. }
  277. #else
  278. /* Run the below code for Cortex-M0 */
  279. q31_t fcurr, fnext = 0, gcurr = 0, gnext; /* Temporary variables for lattice stages */
  280. uint32_t stgCnt; /* Temporary variables for counts */
  281. q63_t acc; /* Accumlator */
  282. uint32_t blkCnt, tapCnt; /* Temporary variables for counts */
  283. q15_t *px1, *px2, *pk, *pv; /* temporary pointers for state and coef */
  284. uint32_t numStages = S->numStages; /* number of stages */
  285. q15_t *pState; /* State pointer */
  286. q15_t *pStateCurnt; /* State current pointer */
  287. q15_t out; /* Temporary variable for output */
  288. blkCnt = blockSize;
  289. pState = &S->pState[0];
  290. /* Sample processing */
  291. while(blkCnt > 0u)
  292. {
  293. /* Read Sample from input buffer */
  294. /* fN(n) = x(n) */
  295. fcurr = *pSrc++;
  296. /* Initialize state read pointer */
  297. px1 = pState;
  298. /* Initialize state write pointer */
  299. px2 = pState;
  300. /* Set accumulator to zero */
  301. acc = 0;
  302. /* Initialize Ladder coeff pointer */
  303. pv = &S->pvCoeffs[0];
  304. /* Initialize Reflection coeff pointer */
  305. pk = &S->pkCoeffs[0];
  306. tapCnt = numStages;
  307. while(tapCnt > 0u)
  308. {
  309. gcurr = *px1++;
  310. /* Process sample */
  311. /* fN-1(n) = fN(n) - kN * gN-1(n-1) */
  312. fnext = fcurr - ((gcurr * (*pk)) >> 15);
  313. fnext = __SSAT(fnext, 16);
  314. /* gN(n) = kN * fN-1(n) + gN-1(n-1) */
  315. gnext = ((fnext * (*pk++)) >> 15) + gcurr;
  316. gnext = __SSAT(gnext, 16);
  317. /* Output samples */
  318. /* y(n) += gN(n) * vN */
  319. acc += (q31_t) ((gnext * (*pv++)));
  320. /* write gN(n) into state for next sample processing */
  321. *px2++ = (q15_t) gnext;
  322. /* Update f values for next coefficient processing */
  323. fcurr = fnext;
  324. tapCnt--;
  325. }
  326. /* y(n) += g0(n) * v0 */
  327. acc += (q31_t) ((fnext * (*pv++)));
  328. out = (q15_t) __SSAT(acc >> 15, 16);
  329. *px2++ = (q15_t) fnext;
  330. /* write out into pDst */
  331. *pDst++ = out;
  332. /* Advance the state pointer by 1 to process the next group of samples */
  333. pState = pState + 1u;
  334. blkCnt--;
  335. }
  336. /* Processing is complete. Now copy last S->numStages samples to start of the buffer
  337. for the preperation of next frame process */
  338. /* Points to the start of the state buffer */
  339. pStateCurnt = &S->pState[0];
  340. pState = &S->pState[blockSize];
  341. stgCnt = numStages;
  342. /* copy data */
  343. while(stgCnt > 0u)
  344. {
  345. *pStateCurnt++ = *pState++;
  346. /* Decrement the loop counter */
  347. stgCnt--;
  348. }
  349. #endif /* #ifndef ARM_MATH_CM0_FAMILY */
  350. }
  351. /**
  352. * @} end of IIR_Lattice group
  353. */