You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
This repo is archived. You can view files and clone it, but cannot push or open issues/pull-requests.

arm_cfft_radix4_q15.c 55KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917
  1. /* ----------------------------------------------------------------------
  2. * Copyright (C) 2010-2013 ARM Limited. All rights reserved.
  3. *
  4. * $Date: 17. January 2013
  5. * $Revision: V1.4.1
  6. *
  7. * Project: CMSIS DSP Library
  8. * Title: arm_cfft_radix4_q15.c
  9. *
  10. * Description: This file has function definition of Radix-4 FFT & IFFT function and
  11. * In-place bit reversal using bit reversal table
  12. *
  13. * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
  14. *
  15. * Redistribution and use in source and binary forms, with or without
  16. * modification, are permitted provided that the following conditions
  17. * are met:
  18. * - Redistributions of source code must retain the above copyright
  19. * notice, this list of conditions and the following disclaimer.
  20. * - Redistributions in binary form must reproduce the above copyright
  21. * notice, this list of conditions and the following disclaimer in
  22. * the documentation and/or other materials provided with the
  23. * distribution.
  24. * - Neither the name of ARM LIMITED nor the names of its contributors
  25. * may be used to endorse or promote products derived from this
  26. * software without specific prior written permission.
  27. *
  28. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  29. * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  30. * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
  31. * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
  32. * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  33. * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
  34. * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  35. * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  36. * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  37. * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
  38. * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  39. * POSSIBILITY OF SUCH DAMAGE.
  40. * -------------------------------------------------------------------- */
  41. #include "arm_math.h"
  42. void arm_radix4_butterfly_q15(
  43. q15_t * pSrc16,
  44. uint32_t fftLen,
  45. q15_t * pCoef16,
  46. uint32_t twidCoefModifier);
  47. void arm_radix4_butterfly_inverse_q15(
  48. q15_t * pSrc16,
  49. uint32_t fftLen,
  50. q15_t * pCoef16,
  51. uint32_t twidCoefModifier);
  52. void arm_bitreversal_q15(
  53. q15_t * pSrc,
  54. uint32_t fftLen,
  55. uint16_t bitRevFactor,
  56. uint16_t * pBitRevTab);
  57. /**
  58. * @ingroup groupTransforms
  59. */
  60. /**
  61. * @addtogroup ComplexFFT
  62. * @{
  63. */
  64. /**
  65. * @details
  66. * @brief Processing function for the Q15 CFFT/CIFFT.
  67. * @param[in] *S points to an instance of the Q15 CFFT/CIFFT structure.
  68. * @param[in, out] *pSrc points to the complex data buffer. Processing occurs in-place.
  69. * @return none.
  70. *
  71. * \par Input and output formats:
  72. * \par
  73. * Internally input is downscaled by 2 for every stage to avoid saturations inside CFFT/CIFFT process.
  74. * Hence the output format is different for different FFT sizes.
  75. * The input and output formats for different FFT sizes and number of bits to upscale are mentioned in the tables below for CFFT and CIFFT:
  76. * \par
  77. * \image html CFFTQ15.gif "Input and Output Formats for Q15 CFFT"
  78. * \image html CIFFTQ15.gif "Input and Output Formats for Q15 CIFFT"
  79. */
  80. void arm_cfft_radix4_q15(
  81. const arm_cfft_radix4_instance_q15 * S,
  82. q15_t * pSrc)
  83. {
  84. if(S->ifftFlag == 1u)
  85. {
  86. /* Complex IFFT radix-4 */
  87. arm_radix4_butterfly_inverse_q15(pSrc, S->fftLen, S->pTwiddle,
  88. S->twidCoefModifier);
  89. }
  90. else
  91. {
  92. /* Complex FFT radix-4 */
  93. arm_radix4_butterfly_q15(pSrc, S->fftLen, S->pTwiddle,
  94. S->twidCoefModifier);
  95. }
  96. if(S->bitReverseFlag == 1u)
  97. {
  98. /* Bit Reversal */
  99. arm_bitreversal_q15(pSrc, S->fftLen, S->bitRevFactor, S->pBitRevTable);
  100. }
  101. }
  102. /**
  103. * @} end of ComplexFFT group
  104. */
  105. /*
  106. * Radix-4 FFT algorithm used is :
  107. *
  108. * Input real and imaginary data:
  109. * x(n) = xa + j * ya
  110. * x(n+N/4 ) = xb + j * yb
  111. * x(n+N/2 ) = xc + j * yc
  112. * x(n+3N 4) = xd + j * yd
  113. *
  114. *
  115. * Output real and imaginary data:
  116. * x(4r) = xa'+ j * ya'
  117. * x(4r+1) = xb'+ j * yb'
  118. * x(4r+2) = xc'+ j * yc'
  119. * x(4r+3) = xd'+ j * yd'
  120. *
  121. *
  122. * Twiddle factors for radix-4 FFT:
  123. * Wn = co1 + j * (- si1)
  124. * W2n = co2 + j * (- si2)
  125. * W3n = co3 + j * (- si3)
  126. * The real and imaginary output values for the radix-4 butterfly are
  127. * xa' = xa + xb + xc + xd
  128. * ya' = ya + yb + yc + yd
  129. * xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1)
  130. * yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1)
  131. * xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2)
  132. * yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2)
  133. * xd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3)
  134. * yd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3)
  135. *
  136. */
  137. /**
  138. * @brief Core function for the Q15 CFFT butterfly process.
  139. * @param[in, out] *pSrc16 points to the in-place buffer of Q15 data type.
  140. * @param[in] fftLen length of the FFT.
  141. * @param[in] *pCoef16 points to twiddle coefficient buffer.
  142. * @param[in] twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
  143. * @return none.
  144. */
  145. void arm_radix4_butterfly_q15(
  146. q15_t * pSrc16,
  147. uint32_t fftLen,
  148. q15_t * pCoef16,
  149. uint32_t twidCoefModifier)
  150. {
  151. #ifndef ARM_MATH_CM0_FAMILY
  152. /* Run the below code for Cortex-M4 and Cortex-M3 */
  153. q31_t R, S, T, U;
  154. q31_t C1, C2, C3, out1, out2;
  155. uint32_t n1, n2, ic, i0, i1, i2, i3, j, k;
  156. q15_t in;
  157. q15_t *ptr1;
  158. q31_t xaya, xbyb, xcyc, xdyd;
  159. /* Total process is divided into three stages */
  160. /* process first stage, middle stages, & last stage */
  161. /* Initializations for the first stage */
  162. n2 = fftLen;
  163. n1 = n2;
  164. /* n2 = fftLen/4 */
  165. n2 >>= 2u;
  166. /* Index for twiddle coefficient */
  167. ic = 0u;
  168. /* Index for input read and output write */
  169. i0 = 0u;
  170. j = n2;
  171. /* Input is in 1.15(q15) format */
  172. /* start of first stage process */
  173. do
  174. {
  175. /* Butterfly implementation */
  176. /* index calculation for the input as, */
  177. /* pSrc16[i0 + 0], pSrc16[i0 + fftLen/4], pSrc16[i0 + fftLen/2], pSrc16[i0 + 3fftLen/4] */
  178. i1 = i0 + n2;
  179. i2 = i1 + n2;
  180. i3 = i2 + n2;
  181. /* Reading i0, i0+fftLen/2 inputs */
  182. /* Read ya (real), xa(imag) input */
  183. T = _SIMD32_OFFSET(pSrc16 + (2u * i0));
  184. in = ((int16_t) (T & 0xFFFF)) >> 2;
  185. T = ((T >> 2) & 0xFFFF0000) | (in & 0xFFFF);
  186. /* Read yc (real), xc(imag) input */
  187. S = _SIMD32_OFFSET(pSrc16 + (2u * i2));
  188. in = ((int16_t) (S & 0xFFFF)) >> 2;
  189. S = ((S >> 2) & 0xFFFF0000) | (in & 0xFFFF);
  190. /* R = packed((ya + yc), (xa + xc) ) */
  191. R = __QADD16(T, S);
  192. /* S = packed((ya - yc), (xa - xc) ) */
  193. S = __QSUB16(T, S);
  194. /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */
  195. /* Read yb (real), xb(imag) input */
  196. T = _SIMD32_OFFSET(pSrc16 + (2u * i1));
  197. in = ((int16_t) (T & 0xFFFF)) >> 2;
  198. T = ((T >> 2) & 0xFFFF0000) | (in & 0xFFFF);
  199. /* Read yd (real), xd(imag) input */
  200. U = _SIMD32_OFFSET(pSrc16 + (2u * i3));
  201. in = ((int16_t) (U & 0xFFFF)) >> 2;
  202. U = ((U >> 2) & 0xFFFF0000) | (in & 0xFFFF);
  203. /* T = packed((yb + yd), (xb + xd) ) */
  204. T = __QADD16(T, U);
  205. /* writing the butterfly processed i0 sample */
  206. /* xa' = xa + xb + xc + xd */
  207. /* ya' = ya + yb + yc + yd */
  208. _SIMD32_OFFSET(pSrc16 + (2u * i0)) = __SHADD16(R, T);
  209. /* R = packed((ya + yc) - (yb + yd), (xa + xc)- (xb + xd)) */
  210. R = __QSUB16(R, T);
  211. /* co2 & si2 are read from SIMD Coefficient pointer */
  212. C2 = _SIMD32_OFFSET(pCoef16 + (4u * ic));
  213. #ifndef ARM_MATH_BIG_ENDIAN
  214. /* xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */
  215. out1 = __SMUAD(C2, R) >> 16u;
  216. /* yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
  217. out2 = __SMUSDX(C2, R);
  218. #else
  219. /* xc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
  220. out1 = __SMUSDX(R, C2) >> 16u;
  221. /* yc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */
  222. out2 = __SMUAD(C2, R);
  223. #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
  224. /* Reading i0+fftLen/4 */
  225. /* T = packed(yb, xb) */
  226. T = _SIMD32_OFFSET(pSrc16 + (2u * i1));
  227. in = ((int16_t) (T & 0xFFFF)) >> 2;
  228. T = ((T >> 2) & 0xFFFF0000) | (in & 0xFFFF);
  229. /* writing the butterfly processed i0 + fftLen/4 sample */
  230. /* writing output(xc', yc') in little endian format */
  231. _SIMD32_OFFSET(pSrc16 + (2u * i1)) =
  232. (q31_t) ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF);
  233. /* Butterfly calculations */
  234. /* U = packed(yd, xd) */
  235. U = _SIMD32_OFFSET(pSrc16 + (2u * i3));
  236. in = ((int16_t) (U & 0xFFFF)) >> 2;
  237. U = ((U >> 2) & 0xFFFF0000) | (in & 0xFFFF);
  238. /* T = packed(yb-yd, xb-xd) */
  239. T = __QSUB16(T, U);
  240. #ifndef ARM_MATH_BIG_ENDIAN
  241. /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */
  242. R = __QASX(S, T);
  243. /* S = packed((ya-yc) - (xb- xd), (xa-xc) + (yb-yd)) */
  244. S = __QSAX(S, T);
  245. #else
  246. /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */
  247. R = __QSAX(S, T);
  248. /* S = packed((ya-yc) - (xb- xd), (xa-xc) + (yb-yd)) */
  249. S = __QASX(S, T);
  250. #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
  251. /* co1 & si1 are read from SIMD Coefficient pointer */
  252. C1 = _SIMD32_OFFSET(pCoef16 + (2u * ic));
  253. /* Butterfly process for the i0+fftLen/2 sample */
  254. #ifndef ARM_MATH_BIG_ENDIAN
  255. /* xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */
  256. out1 = __SMUAD(C1, S) >> 16u;
  257. /* yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */
  258. out2 = __SMUSDX(C1, S);
  259. #else
  260. /* xb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */
  261. out1 = __SMUSDX(S, C1) >> 16u;
  262. /* yb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */
  263. out2 = __SMUAD(C1, S);
  264. #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
  265. /* writing output(xb', yb') in little endian format */
  266. _SIMD32_OFFSET(pSrc16 + (2u * i2)) =
  267. ((out2) & 0xFFFF0000) | ((out1) & 0x0000FFFF);
  268. /* co3 & si3 are read from SIMD Coefficient pointer */
  269. C3 = _SIMD32_OFFSET(pCoef16 + (6u * ic));
  270. /* Butterfly process for the i0+3fftLen/4 sample */
  271. #ifndef ARM_MATH_BIG_ENDIAN
  272. /* xd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3) */
  273. out1 = __SMUAD(C3, R) >> 16u;
  274. /* yd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3) */
  275. out2 = __SMUSDX(C3, R);
  276. #else
  277. /* xd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3) */
  278. out1 = __SMUSDX(R, C3) >> 16u;
  279. /* yd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3) */
  280. out2 = __SMUAD(C3, R);
  281. #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
  282. /* writing output(xd', yd') in little endian format */
  283. _SIMD32_OFFSET(pSrc16 + (2u * i3)) =
  284. ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF);
  285. /* Twiddle coefficients index modifier */
  286. ic = ic + twidCoefModifier;
  287. /* Updating input index */
  288. i0 = i0 + 1u;
  289. } while(--j);
  290. /* data is in 4.11(q11) format */
  291. /* end of first stage process */
  292. /* start of middle stage process */
  293. /* Twiddle coefficients index modifier */
  294. twidCoefModifier <<= 2u;
  295. /* Calculation of Middle stage */
  296. for (k = fftLen / 4u; k > 4u; k >>= 2u)
  297. {
  298. /* Initializations for the middle stage */
  299. n1 = n2;
  300. n2 >>= 2u;
  301. ic = 0u;
  302. for (j = 0u; j <= (n2 - 1u); j++)
  303. {
  304. /* index calculation for the coefficients */
  305. C1 = _SIMD32_OFFSET(pCoef16 + (2u * ic));
  306. C2 = _SIMD32_OFFSET(pCoef16 + (4u * ic));
  307. C3 = _SIMD32_OFFSET(pCoef16 + (6u * ic));
  308. /* Twiddle coefficients index modifier */
  309. ic = ic + twidCoefModifier;
  310. /* Butterfly implementation */
  311. for (i0 = j; i0 < fftLen; i0 += n1)
  312. {
  313. /* index calculation for the input as, */
  314. /* pSrc16[i0 + 0], pSrc16[i0 + fftLen/4], pSrc16[i0 + fftLen/2], pSrc16[i0 + 3fftLen/4] */
  315. i1 = i0 + n2;
  316. i2 = i1 + n2;
  317. i3 = i2 + n2;
  318. /* Reading i0, i0+fftLen/2 inputs */
  319. /* Read ya (real), xa(imag) input */
  320. T = _SIMD32_OFFSET(pSrc16 + (2u * i0));
  321. /* Read yc (real), xc(imag) input */
  322. S = _SIMD32_OFFSET(pSrc16 + (2u * i2));
  323. /* R = packed( (ya + yc), (xa + xc)) */
  324. R = __QADD16(T, S);
  325. /* S = packed((ya - yc), (xa - xc)) */
  326. S = __QSUB16(T, S);
  327. /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */
  328. /* Read yb (real), xb(imag) input */
  329. T = _SIMD32_OFFSET(pSrc16 + (2u * i1));
  330. /* Read yd (real), xd(imag) input */
  331. U = _SIMD32_OFFSET(pSrc16 + (2u * i3));
  332. /* T = packed( (yb + yd), (xb + xd)) */
  333. T = __QADD16(T, U);
  334. /* writing the butterfly processed i0 sample */
  335. /* xa' = xa + xb + xc + xd */
  336. /* ya' = ya + yb + yc + yd */
  337. out1 = __SHADD16(R, T);
  338. in = ((int16_t) (out1 & 0xFFFF)) >> 1;
  339. out1 = ((out1 >> 1) & 0xFFFF0000) | (in & 0xFFFF);
  340. _SIMD32_OFFSET(pSrc16 + (2u * i0)) = out1;
  341. /* R = packed( (ya + yc) - (yb + yd), (xa + xc) - (xb + xd)) */
  342. R = __SHSUB16(R, T);
  343. #ifndef ARM_MATH_BIG_ENDIAN
  344. /* (ya-yb+yc-yd)* (si2) + (xa-xb+xc-xd)* co2 */
  345. out1 = __SMUAD(C2, R) >> 16u;
  346. /* (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
  347. out2 = __SMUSDX(C2, R);
  348. #else
  349. /* (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
  350. out1 = __SMUSDX(R, C2) >> 16u;
  351. /* (ya-yb+yc-yd)* (si2) + (xa-xb+xc-xd)* co2 */
  352. out2 = __SMUAD(C2, R);
  353. #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
  354. /* Reading i0+3fftLen/4 */
  355. /* Read yb (real), xb(imag) input */
  356. T = _SIMD32_OFFSET(pSrc16 + (2u * i1));
  357. /* writing the butterfly processed i0 + fftLen/4 sample */
  358. /* xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */
  359. /* yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
  360. _SIMD32_OFFSET(pSrc16 + (2u * i1)) =
  361. ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF);
  362. /* Butterfly calculations */
  363. /* Read yd (real), xd(imag) input */
  364. U = _SIMD32_OFFSET(pSrc16 + (2u * i3));
  365. /* T = packed(yb-yd, xb-xd) */
  366. T = __QSUB16(T, U);
  367. #ifndef ARM_MATH_BIG_ENDIAN
  368. /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */
  369. R = __SHASX(S, T);
  370. /* S = packed((ya-yc) - (xb- xd), (xa-xc) + (yb-yd)) */
  371. S = __SHSAX(S, T);
  372. /* Butterfly process for the i0+fftLen/2 sample */
  373. out1 = __SMUAD(C1, S) >> 16u;
  374. out2 = __SMUSDX(C1, S);
  375. #else
  376. /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */
  377. R = __SHSAX(S, T);
  378. /* S = packed((ya-yc) - (xb- xd), (xa-xc) + (yb-yd)) */
  379. S = __SHASX(S, T);
  380. /* Butterfly process for the i0+fftLen/2 sample */
  381. out1 = __SMUSDX(S, C1) >> 16u;
  382. out2 = __SMUAD(C1, S);
  383. #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
  384. /* xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */
  385. /* yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */
  386. _SIMD32_OFFSET(pSrc16 + (2u * i2)) =
  387. ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF);
  388. /* Butterfly process for the i0+3fftLen/4 sample */
  389. #ifndef ARM_MATH_BIG_ENDIAN
  390. out1 = __SMUAD(C3, R) >> 16u;
  391. out2 = __SMUSDX(C3, R);
  392. #else
  393. out1 = __SMUSDX(R, C3) >> 16u;
  394. out2 = __SMUAD(C3, R);
  395. #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
  396. /* xd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3) */
  397. /* yd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3) */
  398. _SIMD32_OFFSET(pSrc16 + (2u * i3)) =
  399. ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF);
  400. }
  401. }
  402. /* Twiddle coefficients index modifier */
  403. twidCoefModifier <<= 2u;
  404. }
  405. /* end of middle stage process */
  406. /* data is in 10.6(q6) format for the 1024 point */
  407. /* data is in 8.8(q8) format for the 256 point */
  408. /* data is in 6.10(q10) format for the 64 point */
  409. /* data is in 4.12(q12) format for the 16 point */
  410. /* Initializations for the last stage */
  411. j = fftLen >> 2;
  412. ptr1 = &pSrc16[0];
  413. /* start of last stage process */
  414. /* Butterfly implementation */
  415. do
  416. {
  417. /* Read xa (real), ya(imag) input */
  418. xaya = *__SIMD32(ptr1)++;
  419. /* Read xb (real), yb(imag) input */
  420. xbyb = *__SIMD32(ptr1)++;
  421. /* Read xc (real), yc(imag) input */
  422. xcyc = *__SIMD32(ptr1)++;
  423. /* Read xd (real), yd(imag) input */
  424. xdyd = *__SIMD32(ptr1)++;
  425. /* R = packed((ya + yc), (xa + xc)) */
  426. R = __QADD16(xaya, xcyc);
  427. /* T = packed((yb + yd), (xb + xd)) */
  428. T = __QADD16(xbyb, xdyd);
  429. /* pointer updation for writing */
  430. ptr1 = ptr1 - 8u;
  431. /* xa' = xa + xb + xc + xd */
  432. /* ya' = ya + yb + yc + yd */
  433. *__SIMD32(ptr1)++ = __SHADD16(R, T);
  434. /* T = packed((yb + yd), (xb + xd)) */
  435. T = __QADD16(xbyb, xdyd);
  436. /* xc' = (xa-xb+xc-xd) */
  437. /* yc' = (ya-yb+yc-yd) */
  438. *__SIMD32(ptr1)++ = __SHSUB16(R, T);
  439. /* S = packed((ya - yc), (xa - xc)) */
  440. S = __QSUB16(xaya, xcyc);
  441. /* Read yd (real), xd(imag) input */
  442. /* T = packed( (yb - yd), (xb - xd)) */
  443. U = __QSUB16(xbyb, xdyd);
  444. #ifndef ARM_MATH_BIG_ENDIAN
  445. /* xb' = (xa+yb-xc-yd) */
  446. /* yb' = (ya-xb-yc+xd) */
  447. *__SIMD32(ptr1)++ = __SHSAX(S, U);
  448. /* xd' = (xa-yb-xc+yd) */
  449. /* yd' = (ya+xb-yc-xd) */
  450. *__SIMD32(ptr1)++ = __SHASX(S, U);
  451. #else
  452. /* xb' = (xa+yb-xc-yd) */
  453. /* yb' = (ya-xb-yc+xd) */
  454. *__SIMD32(ptr1)++ = __SHASX(S, U);
  455. /* xd' = (xa-yb-xc+yd) */
  456. /* yd' = (ya+xb-yc-xd) */
  457. *__SIMD32(ptr1)++ = __SHSAX(S, U);
  458. #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
  459. } while(--j);
  460. /* end of last stage process */
  461. /* output is in 11.5(q5) format for the 1024 point */
  462. /* output is in 9.7(q7) format for the 256 point */
  463. /* output is in 7.9(q9) format for the 64 point */
  464. /* output is in 5.11(q11) format for the 16 point */
  465. #else
  466. /* Run the below code for Cortex-M0 */
  467. q15_t R0, R1, S0, S1, T0, T1, U0, U1;
  468. q15_t Co1, Si1, Co2, Si2, Co3, Si3, out1, out2;
  469. uint32_t n1, n2, ic, i0, i1, i2, i3, j, k;
  470. /* Total process is divided into three stages */
  471. /* process first stage, middle stages, & last stage */
  472. /* Initializations for the first stage */
  473. n2 = fftLen;
  474. n1 = n2;
  475. /* n2 = fftLen/4 */
  476. n2 >>= 2u;
  477. /* Index for twiddle coefficient */
  478. ic = 0u;
  479. /* Index for input read and output write */
  480. i0 = 0u;
  481. j = n2;
  482. /* Input is in 1.15(q15) format */
  483. /* start of first stage process */
  484. do
  485. {
  486. /* Butterfly implementation */
  487. /* index calculation for the input as, */
  488. /* pSrc16[i0 + 0], pSrc16[i0 + fftLen/4], pSrc16[i0 + fftLen/2], pSrc16[i0 + 3fftLen/4] */
  489. i1 = i0 + n2;
  490. i2 = i1 + n2;
  491. i3 = i2 + n2;
  492. /* Reading i0, i0+fftLen/2 inputs */
  493. /* input is down scale by 4 to avoid overflow */
  494. /* Read ya (real), xa(imag) input */
  495. T0 = pSrc16[i0 * 2u] >> 2u;
  496. T1 = pSrc16[(i0 * 2u) + 1u] >> 2u;
  497. /* input is down scale by 4 to avoid overflow */
  498. /* Read yc (real), xc(imag) input */
  499. S0 = pSrc16[i2 * 2u] >> 2u;
  500. S1 = pSrc16[(i2 * 2u) + 1u] >> 2u;
  501. /* R0 = (ya + yc) */
  502. R0 = __SSAT(T0 + S0, 16u);
  503. /* R1 = (xa + xc) */
  504. R1 = __SSAT(T1 + S1, 16u);
  505. /* S0 = (ya - yc) */
  506. S0 = __SSAT(T0 - S0, 16);
  507. /* S1 = (xa - xc) */
  508. S1 = __SSAT(T1 - S1, 16);
  509. /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */
  510. /* input is down scale by 4 to avoid overflow */
  511. /* Read yb (real), xb(imag) input */
  512. T0 = pSrc16[i1 * 2u] >> 2u;
  513. T1 = pSrc16[(i1 * 2u) + 1u] >> 2u;
  514. /* input is down scale by 4 to avoid overflow */
  515. /* Read yd (real), xd(imag) input */
  516. U0 = pSrc16[i3 * 2u] >> 2u;
  517. U1 = pSrc16[(i3 * 2u) + 1] >> 2u;
  518. /* T0 = (yb + yd) */
  519. T0 = __SSAT(T0 + U0, 16u);
  520. /* T1 = (xb + xd) */
  521. T1 = __SSAT(T1 + U1, 16u);
  522. /* writing the butterfly processed i0 sample */
  523. /* ya' = ya + yb + yc + yd */
  524. /* xa' = xa + xb + xc + xd */
  525. pSrc16[i0 * 2u] = (R0 >> 1u) + (T0 >> 1u);
  526. pSrc16[(i0 * 2u) + 1u] = (R1 >> 1u) + (T1 >> 1u);
  527. /* R0 = (ya + yc) - (yb + yd) */
  528. /* R1 = (xa + xc) - (xb + xd) */
  529. R0 = __SSAT(R0 - T0, 16u);
  530. R1 = __SSAT(R1 - T1, 16u);
  531. /* co2 & si2 are read from Coefficient pointer */
  532. Co2 = pCoef16[2u * ic * 2u];
  533. Si2 = pCoef16[(2u * ic * 2u) + 1];
  534. /* xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */
  535. out1 = (short) ((Co2 * R0 + Si2 * R1) >> 16u);
  536. /* yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
  537. out2 = (short) ((-Si2 * R0 + Co2 * R1) >> 16u);
  538. /* Reading i0+fftLen/4 */
  539. /* input is down scale by 4 to avoid overflow */
  540. /* T0 = yb, T1 = xb */
  541. T0 = pSrc16[i1 * 2u] >> 2;
  542. T1 = pSrc16[(i1 * 2u) + 1] >> 2;
  543. /* writing the butterfly processed i0 + fftLen/4 sample */
  544. /* writing output(xc', yc') in little endian format */
  545. pSrc16[i1 * 2u] = out1;
  546. pSrc16[(i1 * 2u) + 1] = out2;
  547. /* Butterfly calculations */
  548. /* input is down scale by 4 to avoid overflow */
  549. /* U0 = yd, U1 = xd */
  550. U0 = pSrc16[i3 * 2u] >> 2;
  551. U1 = pSrc16[(i3 * 2u) + 1] >> 2;
  552. /* T0 = yb-yd */
  553. T0 = __SSAT(T0 - U0, 16);
  554. /* T1 = xb-xd */
  555. T1 = __SSAT(T1 - U1, 16);
  556. /* R1 = (ya-yc) + (xb- xd), R0 = (xa-xc) - (yb-yd)) */
  557. R0 = (short) __SSAT((q31_t) (S0 - T1), 16);
  558. R1 = (short) __SSAT((q31_t) (S1 + T0), 16);
  559. /* S1 = (ya-yc) - (xb- xd), S0 = (xa-xc) + (yb-yd)) */
  560. S0 = (short) __SSAT(((q31_t) S0 + T1), 16u);
  561. S1 = (short) __SSAT(((q31_t) S1 - T0), 16u);
  562. /* co1 & si1 are read from Coefficient pointer */
  563. Co1 = pCoef16[ic * 2u];
  564. Si1 = pCoef16[(ic * 2u) + 1];
  565. /* Butterfly process for the i0+fftLen/2 sample */
  566. /* xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */
  567. out1 = (short) ((Si1 * S1 + Co1 * S0) >> 16);
  568. /* yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */
  569. out2 = (short) ((-Si1 * S0 + Co1 * S1) >> 16);
  570. /* writing output(xb', yb') in little endian format */
  571. pSrc16[i2 * 2u] = out1;
  572. pSrc16[(i2 * 2u) + 1] = out2;
  573. /* Co3 & si3 are read from Coefficient pointer */
  574. Co3 = pCoef16[3u * (ic * 2u)];
  575. Si3 = pCoef16[(3u * (ic * 2u)) + 1];
  576. /* Butterfly process for the i0+3fftLen/4 sample */
  577. /* xd' = (xa-yb-xc+yd)* Co3 + (ya+xb-yc-xd)* (si3) */
  578. out1 = (short) ((Si3 * R1 + Co3 * R0) >> 16u);
  579. /* yd' = (ya+xb-yc-xd)* Co3 - (xa-yb-xc+yd)* (si3) */
  580. out2 = (short) ((-Si3 * R0 + Co3 * R1) >> 16u);
  581. /* writing output(xd', yd') in little endian format */
  582. pSrc16[i3 * 2u] = out1;
  583. pSrc16[(i3 * 2u) + 1] = out2;
  584. /* Twiddle coefficients index modifier */
  585. ic = ic + twidCoefModifier;
  586. /* Updating input index */
  587. i0 = i0 + 1u;
  588. } while(--j);
  589. /* data is in 4.11(q11) format */
  590. /* end of first stage process */
  591. /* start of middle stage process */
  592. /* Twiddle coefficients index modifier */
  593. twidCoefModifier <<= 2u;
  594. /* Calculation of Middle stage */
  595. for (k = fftLen / 4u; k > 4u; k >>= 2u)
  596. {
  597. /* Initializations for the middle stage */
  598. n1 = n2;
  599. n2 >>= 2u;
  600. ic = 0u;
  601. for (j = 0u; j <= (n2 - 1u); j++)
  602. {
  603. /* index calculation for the coefficients */
  604. Co1 = pCoef16[ic * 2u];
  605. Si1 = pCoef16[(ic * 2u) + 1u];
  606. Co2 = pCoef16[2u * (ic * 2u)];
  607. Si2 = pCoef16[(2u * (ic * 2u)) + 1u];
  608. Co3 = pCoef16[3u * (ic * 2u)];
  609. Si3 = pCoef16[(3u * (ic * 2u)) + 1u];
  610. /* Twiddle coefficients index modifier */
  611. ic = ic + twidCoefModifier;
  612. /* Butterfly implementation */
  613. for (i0 = j; i0 < fftLen; i0 += n1)
  614. {
  615. /* index calculation for the input as, */
  616. /* pSrc16[i0 + 0], pSrc16[i0 + fftLen/4], pSrc16[i0 + fftLen/2], pSrc16[i0 + 3fftLen/4] */
  617. i1 = i0 + n2;
  618. i2 = i1 + n2;
  619. i3 = i2 + n2;
  620. /* Reading i0, i0+fftLen/2 inputs */
  621. /* Read ya (real), xa(imag) input */
  622. T0 = pSrc16[i0 * 2u];
  623. T1 = pSrc16[(i0 * 2u) + 1u];
  624. /* Read yc (real), xc(imag) input */
  625. S0 = pSrc16[i2 * 2u];
  626. S1 = pSrc16[(i2 * 2u) + 1u];
  627. /* R0 = (ya + yc), R1 = (xa + xc) */
  628. R0 = __SSAT(T0 + S0, 16);
  629. R1 = __SSAT(T1 + S1, 16);
  630. /* S0 = (ya - yc), S1 =(xa - xc) */
  631. S0 = __SSAT(T0 - S0, 16);
  632. S1 = __SSAT(T1 - S1, 16);
  633. /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */
  634. /* Read yb (real), xb(imag) input */
  635. T0 = pSrc16[i1 * 2u];
  636. T1 = pSrc16[(i1 * 2u) + 1u];
  637. /* Read yd (real), xd(imag) input */
  638. U0 = pSrc16[i3 * 2u];
  639. U1 = pSrc16[(i3 * 2u) + 1u];
  640. /* T0 = (yb + yd), T1 = (xb + xd) */
  641. T0 = __SSAT(T0 + U0, 16);
  642. T1 = __SSAT(T1 + U1, 16);
  643. /* writing the butterfly processed i0 sample */
  644. /* xa' = xa + xb + xc + xd */
  645. /* ya' = ya + yb + yc + yd */
  646. out1 = ((R0 >> 1u) + (T0 >> 1u)) >> 1u;
  647. out2 = ((R1 >> 1u) + (T1 >> 1u)) >> 1u;
  648. pSrc16[i0 * 2u] = out1;
  649. pSrc16[(2u * i0) + 1u] = out2;
  650. /* R0 = (ya + yc) - (yb + yd), R1 = (xa + xc) - (xb + xd) */
  651. R0 = (R0 >> 1u) - (T0 >> 1u);
  652. R1 = (R1 >> 1u) - (T1 >> 1u);
  653. /* (ya-yb+yc-yd)* (si2) + (xa-xb+xc-xd)* co2 */
  654. out1 = (short) ((Co2 * R0 + Si2 * R1) >> 16u);
  655. /* (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
  656. out2 = (short) ((-Si2 * R0 + Co2 * R1) >> 16u);
  657. /* Reading i0+3fftLen/4 */
  658. /* Read yb (real), xb(imag) input */
  659. T0 = pSrc16[i1 * 2u];
  660. T1 = pSrc16[(i1 * 2u) + 1u];
  661. /* writing the butterfly processed i0 + fftLen/4 sample */
  662. /* xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */
  663. /* yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
  664. pSrc16[i1 * 2u] = out1;
  665. pSrc16[(i1 * 2u) + 1u] = out2;
  666. /* Butterfly calculations */
  667. /* Read yd (real), xd(imag) input */
  668. U0 = pSrc16[i3 * 2u];
  669. U1 = pSrc16[(i3 * 2u) + 1u];
  670. /* T0 = yb-yd, T1 = xb-xd */
  671. T0 = __SSAT(T0 - U0, 16);
  672. T1 = __SSAT(T1 - U1, 16);
  673. /* R0 = (ya-yc) + (xb- xd), R1 = (xa-xc) - (yb-yd)) */
  674. R0 = (S0 >> 1u) - (T1 >> 1u);
  675. R1 = (S1 >> 1u) + (T0 >> 1u);
  676. /* S0 = (ya-yc) - (xb- xd), S1 = (xa-xc) + (yb-yd)) */
  677. S0 = (S0 >> 1u) + (T1 >> 1u);
  678. S1 = (S1 >> 1u) - (T0 >> 1u);
  679. /* Butterfly process for the i0+fftLen/2 sample */
  680. out1 = (short) ((Co1 * S0 + Si1 * S1) >> 16u);
  681. out2 = (short) ((-Si1 * S0 + Co1 * S1) >> 16u);
  682. /* xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */
  683. /* yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */
  684. pSrc16[i2 * 2u] = out1;
  685. pSrc16[(i2 * 2u) + 1u] = out2;
  686. /* Butterfly process for the i0+3fftLen/4 sample */
  687. out1 = (short) ((Si3 * R1 + Co3 * R0) >> 16u);
  688. out2 = (short) ((-Si3 * R0 + Co3 * R1) >> 16u);
  689. /* xd' = (xa-yb-xc+yd)* Co3 + (ya+xb-yc-xd)* (si3) */
  690. /* yd' = (ya+xb-yc-xd)* Co3 - (xa-yb-xc+yd)* (si3) */
  691. pSrc16[i3 * 2u] = out1;
  692. pSrc16[(i3 * 2u) + 1u] = out2;
  693. }
  694. }
  695. /* Twiddle coefficients index modifier */
  696. twidCoefModifier <<= 2u;
  697. }
  698. /* end of middle stage process */
  699. /* data is in 10.6(q6) format for the 1024 point */
  700. /* data is in 8.8(q8) format for the 256 point */
  701. /* data is in 6.10(q10) format for the 64 point */
  702. /* data is in 4.12(q12) format for the 16 point */
  703. /* Initializations for the last stage */
  704. n1 = n2;
  705. n2 >>= 2u;
  706. /* start of last stage process */
  707. /* Butterfly implementation */
  708. for (i0 = 0u; i0 <= (fftLen - n1); i0 += n1)
  709. {
  710. /* index calculation for the input as, */
  711. /* pSrc16[i0 + 0], pSrc16[i0 + fftLen/4], pSrc16[i0 + fftLen/2], pSrc16[i0 + 3fftLen/4] */
  712. i1 = i0 + n2;
  713. i2 = i1 + n2;
  714. i3 = i2 + n2;
  715. /* Reading i0, i0+fftLen/2 inputs */
  716. /* Read ya (real), xa(imag) input */
  717. T0 = pSrc16[i0 * 2u];
  718. T1 = pSrc16[(i0 * 2u) + 1u];
  719. /* Read yc (real), xc(imag) input */
  720. S0 = pSrc16[i2 * 2u];
  721. S1 = pSrc16[(i2 * 2u) + 1u];
  722. /* R0 = (ya + yc), R1 = (xa + xc) */
  723. R0 = __SSAT(T0 + S0, 16u);
  724. R1 = __SSAT(T1 + S1, 16u);
  725. /* S0 = (ya - yc), S1 = (xa - xc) */
  726. S0 = __SSAT(T0 - S0, 16u);
  727. S1 = __SSAT(T1 - S1, 16u);
  728. /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */
  729. /* Read yb (real), xb(imag) input */
  730. T0 = pSrc16[i1 * 2u];
  731. T1 = pSrc16[(i1 * 2u) + 1u];
  732. /* Read yd (real), xd(imag) input */
  733. U0 = pSrc16[i3 * 2u];
  734. U1 = pSrc16[(i3 * 2u) + 1u];
  735. /* T0 = (yb + yd), T1 = (xb + xd)) */
  736. T0 = __SSAT(T0 + U0, 16u);
  737. T1 = __SSAT(T1 + U1, 16u);
  738. /* writing the butterfly processed i0 sample */
  739. /* xa' = xa + xb + xc + xd */
  740. /* ya' = ya + yb + yc + yd */
  741. pSrc16[i0 * 2u] = (R0 >> 1u) + (T0 >> 1u);
  742. pSrc16[(i0 * 2u) + 1u] = (R1 >> 1u) + (T1 >> 1u);
  743. /* R0 = (ya + yc) - (yb + yd), R1 = (xa + xc) - (xb + xd) */
  744. R0 = (R0 >> 1u) - (T0 >> 1u);
  745. R1 = (R1 >> 1u) - (T1 >> 1u);
  746. /* Read yb (real), xb(imag) input */
  747. T0 = pSrc16[i1 * 2u];
  748. T1 = pSrc16[(i1 * 2u) + 1u];
  749. /* writing the butterfly processed i0 + fftLen/4 sample */
  750. /* xc' = (xa-xb+xc-xd) */
  751. /* yc' = (ya-yb+yc-yd) */
  752. pSrc16[i1 * 2u] = R0;
  753. pSrc16[(i1 * 2u) + 1u] = R1;
  754. /* Read yd (real), xd(imag) input */
  755. U0 = pSrc16[i3 * 2u];
  756. U1 = pSrc16[(i3 * 2u) + 1u];
  757. /* T0 = (yb - yd), T1 = (xb - xd) */
  758. T0 = __SSAT(T0 - U0, 16u);
  759. T1 = __SSAT(T1 - U1, 16u);
  760. /* writing the butterfly processed i0 + fftLen/2 sample */
  761. /* xb' = (xa+yb-xc-yd) */
  762. /* yb' = (ya-xb-yc+xd) */
  763. pSrc16[i2 * 2u] = (S0 >> 1u) + (T1 >> 1u);
  764. pSrc16[(i2 * 2u) + 1u] = (S1 >> 1u) - (T0 >> 1u);
  765. /* writing the butterfly processed i0 + 3fftLen/4 sample */
  766. /* xd' = (xa-yb-xc+yd) */
  767. /* yd' = (ya+xb-yc-xd) */
  768. pSrc16[i3 * 2u] = (S0 >> 1u) - (T1 >> 1u);
  769. pSrc16[(i3 * 2u) + 1u] = (S1 >> 1u) + (T0 >> 1u);
  770. }
  771. /* end of last stage process */
  772. /* output is in 11.5(q5) format for the 1024 point */
  773. /* output is in 9.7(q7) format for the 256 point */
  774. /* output is in 7.9(q9) format for the 64 point */
  775. /* output is in 5.11(q11) format for the 16 point */
  776. #endif /* #ifndef ARM_MATH_CM0_FAMILY */
  777. }
  778. /**
  779. * @brief Core function for the Q15 CIFFT butterfly process.
  780. * @param[in, out] *pSrc16 points to the in-place buffer of Q15 data type.
  781. * @param[in] fftLen length of the FFT.
  782. * @param[in] *pCoef16 points to twiddle coefficient buffer.
  783. * @param[in] twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
  784. * @return none.
  785. */
  786. /*
  787. * Radix-4 IFFT algorithm used is :
  788. *
  789. * CIFFT uses same twiddle coefficients as CFFT function
  790. * x[k] = x[n] + (j)k * x[n + fftLen/4] + (-1)k * x[n+fftLen/2] + (-j)k * x[n+3*fftLen/4]
  791. *
  792. *
  793. * IFFT is implemented with following changes in equations from FFT
  794. *
  795. * Input real and imaginary data:
  796. * x(n) = xa + j * ya
  797. * x(n+N/4 ) = xb + j * yb
  798. * x(n+N/2 ) = xc + j * yc
  799. * x(n+3N 4) = xd + j * yd
  800. *
  801. *
  802. * Output real and imaginary data:
  803. * x(4r) = xa'+ j * ya'
  804. * x(4r+1) = xb'+ j * yb'
  805. * x(4r+2) = xc'+ j * yc'
  806. * x(4r+3) = xd'+ j * yd'
  807. *
  808. *
  809. * Twiddle factors for radix-4 IFFT:
  810. * Wn = co1 + j * (si1)
  811. * W2n = co2 + j * (si2)
  812. * W3n = co3 + j * (si3)
  813. * The real and imaginary output values for the radix-4 butterfly are
  814. * xa' = xa + xb + xc + xd
  815. * ya' = ya + yb + yc + yd
  816. * xb' = (xa-yb-xc+yd)* co1 - (ya+xb-yc-xd)* (si1)
  817. * yb' = (ya+xb-yc-xd)* co1 + (xa-yb-xc+yd)* (si1)
  818. * xc' = (xa-xb+xc-xd)* co2 - (ya-yb+yc-yd)* (si2)
  819. * yc' = (ya-yb+yc-yd)* co2 + (xa-xb+xc-xd)* (si2)
  820. * xd' = (xa+yb-xc-yd)* co3 - (ya-xb-yc+xd)* (si3)
  821. * yd' = (ya-xb-yc+xd)* co3 + (xa+yb-xc-yd)* (si3)
  822. *
  823. */
  824. void arm_radix4_butterfly_inverse_q15(
  825. q15_t * pSrc16,
  826. uint32_t fftLen,
  827. q15_t * pCoef16,
  828. uint32_t twidCoefModifier)
  829. {
  830. #ifndef ARM_MATH_CM0_FAMILY
  831. /* Run the below code for Cortex-M4 and Cortex-M3 */
  832. q31_t R, S, T, U;
  833. q31_t C1, C2, C3, out1, out2;
  834. uint32_t n1, n2, ic, i0, i1, i2, i3, j, k;
  835. q15_t in;
  836. q15_t *ptr1;
  837. q31_t xaya, xbyb, xcyc, xdyd;
  838. /* Total process is divided into three stages */
  839. /* process first stage, middle stages, & last stage */
  840. /* Initializations for the first stage */
  841. n2 = fftLen;
  842. n1 = n2;
  843. /* n2 = fftLen/4 */
  844. n2 >>= 2u;
  845. /* Index for twiddle coefficient */
  846. ic = 0u;
  847. /* Index for input read and output write */
  848. i0 = 0u;
  849. j = n2;
  850. /* Input is in 1.15(q15) format */
  851. /* start of first stage process */
  852. do
  853. {
  854. /* Butterfly implementation */
  855. /* index calculation for the input as, */
  856. /* pSrc16[i0 + 0], pSrc16[i0 + fftLen/4], pSrc16[i0 + fftLen/2], pSrc16[i0 + 3fftLen/4] */
  857. i1 = i0 + n2;
  858. i2 = i1 + n2;
  859. i3 = i2 + n2;
  860. /* Reading i0, i0+fftLen/2 inputs */
  861. /* Read ya (real), xa(imag) input */
  862. T = _SIMD32_OFFSET(pSrc16 + (2u * i0));
  863. in = ((int16_t) (T & 0xFFFF)) >> 2;
  864. T = ((T >> 2) & 0xFFFF0000) | (in & 0xFFFF);
  865. /* Read yc (real), xc(imag) input */
  866. S = _SIMD32_OFFSET(pSrc16 + (2u * i2));
  867. in = ((int16_t) (S & 0xFFFF)) >> 2;
  868. S = ((S >> 2) & 0xFFFF0000) | (in & 0xFFFF);
  869. /* R = packed((ya + yc), (xa + xc) ) */
  870. R = __QADD16(T, S);
  871. /* S = packed((ya - yc), (xa - xc) ) */
  872. S = __QSUB16(T, S);
  873. /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */
  874. /* Read yb (real), xb(imag) input */
  875. T = _SIMD32_OFFSET(pSrc16 + (2u * i1));
  876. in = ((int16_t) (T & 0xFFFF)) >> 2;
  877. T = ((T >> 2) & 0xFFFF0000) | (in & 0xFFFF);
  878. /* Read yd (real), xd(imag) input */
  879. U = _SIMD32_OFFSET(pSrc16 + (2u * i3));
  880. in = ((int16_t) (U & 0xFFFF)) >> 2;
  881. U = ((U >> 2) & 0xFFFF0000) | (in & 0xFFFF);
  882. /* T = packed((yb + yd), (xb + xd) ) */
  883. T = __QADD16(T, U);
  884. /* writing the butterfly processed i0 sample */
  885. /* xa' = xa + xb + xc + xd */
  886. /* ya' = ya + yb + yc + yd */
  887. _SIMD32_OFFSET(pSrc16 + (2u * i0)) = __SHADD16(R, T);
  888. /* R = packed((ya + yc) - (yb + yd), (xa + xc)- (xb + xd)) */
  889. R = __QSUB16(R, T);
  890. /* co2 & si2 are read from SIMD Coefficient pointer */
  891. C2 = _SIMD32_OFFSET(pCoef16 + (4u * ic));
  892. #ifndef ARM_MATH_BIG_ENDIAN
  893. /* xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */
  894. out1 = __SMUSD(C2, R) >> 16u;
  895. /* yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
  896. out2 = __SMUADX(C2, R);
  897. #else
  898. /* xc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
  899. out1 = __SMUADX(C2, R) >> 16u;
  900. /* yc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */
  901. out2 = __SMUSD(__QSUB16(0, C2), R);
  902. #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
  903. /* Reading i0+fftLen/4 */
  904. /* T = packed(yb, xb) */
  905. T = _SIMD32_OFFSET(pSrc16 + (2u * i1));
  906. in = ((int16_t) (T & 0xFFFF)) >> 2;
  907. T = ((T >> 2) & 0xFFFF0000) | (in & 0xFFFF);
  908. /* writing the butterfly processed i0 + fftLen/4 sample */
  909. /* writing output(xc', yc') in little endian format */
  910. _SIMD32_OFFSET(pSrc16 + (2u * i1)) =
  911. (q31_t) ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF);
  912. /* Butterfly calculations */
  913. /* U = packed(yd, xd) */
  914. U = _SIMD32_OFFSET(pSrc16 + (2u * i3));
  915. in = ((int16_t) (U & 0xFFFF)) >> 2;
  916. U = ((U >> 2) & 0xFFFF0000) | (in & 0xFFFF);
  917. /* T = packed(yb-yd, xb-xd) */
  918. T = __QSUB16(T, U);
  919. #ifndef ARM_MATH_BIG_ENDIAN
  920. /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */
  921. R = __QSAX(S, T);
  922. /* S = packed((ya-yc) + (xb- xd), (xa-xc) - (yb-yd)) */
  923. S = __QASX(S, T);
  924. #else
  925. /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */
  926. R = __QASX(S, T);
  927. /* S = packed((ya-yc) - (xb- xd), (xa-xc) + (yb-yd)) */
  928. S = __QSAX(S, T);
  929. #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
  930. /* co1 & si1 are read from SIMD Coefficient pointer */
  931. C1 = _SIMD32_OFFSET(pCoef16 + (2u * ic));
  932. /* Butterfly process for the i0+fftLen/2 sample */
  933. #ifndef ARM_MATH_BIG_ENDIAN
  934. /* xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */
  935. out1 = __SMUSD(C1, S) >> 16u;
  936. /* yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */
  937. out2 = __SMUADX(C1, S);
  938. #else
  939. /* xb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */
  940. out1 = __SMUADX(C1, S) >> 16u;
  941. /* yb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */
  942. out2 = __SMUSD(__QSUB16(0, C1), S);
  943. #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
  944. /* writing output(xb', yb') in little endian format */
  945. _SIMD32_OFFSET(pSrc16 + (2u * i2)) =
  946. ((out2) & 0xFFFF0000) | ((out1) & 0x0000FFFF);
  947. /* co3 & si3 are read from SIMD Coefficient pointer */
  948. C3 = _SIMD32_OFFSET(pCoef16 + (6u * ic));
  949. /* Butterfly process for the i0+3fftLen/4 sample */
  950. #ifndef ARM_MATH_BIG_ENDIAN
  951. /* xd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3) */
  952. out1 = __SMUSD(C3, R) >> 16u;
  953. /* yd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3) */
  954. out2 = __SMUADX(C3, R);
  955. #else
  956. /* xd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3) */
  957. out1 = __SMUADX(C3, R) >> 16u;
  958. /* yd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3) */
  959. out2 = __SMUSD(__QSUB16(0, C3), R);
  960. #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
  961. /* writing output(xd', yd') in little endian format */
  962. _SIMD32_OFFSET(pSrc16 + (2u * i3)) =
  963. ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF);
  964. /* Twiddle coefficients index modifier */
  965. ic = ic + twidCoefModifier;
  966. /* Updating input index */
  967. i0 = i0 + 1u;
  968. } while(--j);
  969. /* data is in 4.11(q11) format */
  970. /* end of first stage process */
  971. /* start of middle stage process */
  972. /* Twiddle coefficients index modifier */
  973. twidCoefModifier <<= 2u;
  974. /* Calculation of Middle stage */
  975. for (k = fftLen / 4u; k > 4u; k >>= 2u)
  976. {
  977. /* Initializations for the middle stage */
  978. n1 = n2;
  979. n2 >>= 2u;
  980. ic = 0u;
  981. for (j = 0u; j <= (n2 - 1u); j++)
  982. {
  983. /* index calculation for the coefficients */
  984. C1 = _SIMD32_OFFSET(pCoef16 + (2u * ic));
  985. C2 = _SIMD32_OFFSET(pCoef16 + (4u * ic));
  986. C3 = _SIMD32_OFFSET(pCoef16 + (6u * ic));
  987. /* Twiddle coefficients index modifier */
  988. ic = ic + twidCoefModifier;
  989. /* Butterfly implementation */
  990. for (i0 = j; i0 < fftLen; i0 += n1)
  991. {
  992. /* index calculation for the input as, */
  993. /* pSrc16[i0 + 0], pSrc16[i0 + fftLen/4], pSrc16[i0 + fftLen/2], pSrc16[i0 + 3fftLen/4] */
  994. i1 = i0 + n2;
  995. i2 = i1 + n2;
  996. i3 = i2 + n2;
  997. /* Reading i0, i0+fftLen/2 inputs */
  998. /* Read ya (real), xa(imag) input */
  999. T = _SIMD32_OFFSET(pSrc16 + (2u * i0));
  1000. /* Read yc (real), xc(imag) input */
  1001. S = _SIMD32_OFFSET(pSrc16 + (2u * i2));
  1002. /* R = packed( (ya + yc), (xa + xc)) */
  1003. R = __QADD16(T, S);
  1004. /* S = packed((ya - yc), (xa - xc)) */
  1005. S = __QSUB16(T, S);
  1006. /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */
  1007. /* Read yb (real), xb(imag) input */
  1008. T = _SIMD32_OFFSET(pSrc16 + (2u * i1));
  1009. /* Read yd (real), xd(imag) input */
  1010. U = _SIMD32_OFFSET(pSrc16 + (2u * i3));
  1011. /* T = packed( (yb + yd), (xb + xd)) */
  1012. T = __QADD16(T, U);
  1013. /* writing the butterfly processed i0 sample */
  1014. /* xa' = xa + xb + xc + xd */
  1015. /* ya' = ya + yb + yc + yd */
  1016. out1 = __SHADD16(R, T);
  1017. in = ((int16_t) (out1 & 0xFFFF)) >> 1;
  1018. out1 = ((out1 >> 1) & 0xFFFF0000) | (in & 0xFFFF);
  1019. _SIMD32_OFFSET(pSrc16 + (2u * i0)) = out1;
  1020. /* R = packed( (ya + yc) - (yb + yd), (xa + xc) - (xb + xd)) */
  1021. R = __SHSUB16(R, T);
  1022. #ifndef ARM_MATH_BIG_ENDIAN
  1023. /* (ya-yb+yc-yd)* (si2) + (xa-xb+xc-xd)* co2 */
  1024. out1 = __SMUSD(C2, R) >> 16u;
  1025. /* (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
  1026. out2 = __SMUADX(C2, R);
  1027. #else
  1028. /* (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
  1029. out1 = __SMUADX(R, C2) >> 16u;
  1030. /* (ya-yb+yc-yd)* (si2) + (xa-xb+xc-xd)* co2 */
  1031. out2 = __SMUSD(__QSUB16(0, C2), R);
  1032. #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
  1033. /* Reading i0+3fftLen/4 */
  1034. /* Read yb (real), xb(imag) input */
  1035. T = _SIMD32_OFFSET(pSrc16 + (2u * i1));
  1036. /* writing the butterfly processed i0 + fftLen/4 sample */
  1037. /* xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */
  1038. /* yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
  1039. _SIMD32_OFFSET(pSrc16 + (2u * i1)) =
  1040. ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF);
  1041. /* Butterfly calculations */
  1042. /* Read yd (real), xd(imag) input */
  1043. U = _SIMD32_OFFSET(pSrc16 + (2u * i3));
  1044. /* T = packed(yb-yd, xb-xd) */
  1045. T = __QSUB16(T, U);
  1046. #ifndef ARM_MATH_BIG_ENDIAN
  1047. /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */
  1048. R = __SHSAX(S, T);
  1049. /* S = packed((ya-yc) - (xb- xd), (xa-xc) + (yb-yd)) */
  1050. S = __SHASX(S, T);
  1051. /* Butterfly process for the i0+fftLen/2 sample */
  1052. out1 = __SMUSD(C1, S) >> 16u;
  1053. out2 = __SMUADX(C1, S);
  1054. #else
  1055. /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */
  1056. R = __SHASX(S, T);
  1057. /* S = packed((ya-yc) - (xb- xd), (xa-xc) + (yb-yd)) */
  1058. S = __SHSAX(S, T);
  1059. /* Butterfly process for the i0+fftLen/2 sample */
  1060. out1 = __SMUADX(S, C1) >> 16u;
  1061. out2 = __SMUSD(__QSUB16(0, C1), S);
  1062. #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
  1063. /* xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */
  1064. /* yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */
  1065. _SIMD32_OFFSET(pSrc16 + (2u * i2)) =
  1066. ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF);
  1067. /* Butterfly process for the i0+3fftLen/4 sample */
  1068. #ifndef ARM_MATH_BIG_ENDIAN
  1069. out1 = __SMUSD(C3, R) >> 16u;
  1070. out2 = __SMUADX(C3, R);
  1071. #else
  1072. out1 = __SMUADX(C3, R) >> 16u;
  1073. out2 = __SMUSD(__QSUB16(0, C3), R);
  1074. #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
  1075. /* xd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3) */
  1076. /* yd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3) */
  1077. _SIMD32_OFFSET(pSrc16 + (2u * i3)) =
  1078. ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF);
  1079. }
  1080. }
  1081. /* Twiddle coefficients index modifier */
  1082. twidCoefModifier <<= 2u;
  1083. }
  1084. /* end of middle stage process */
  1085. /* data is in 10.6(q6) format for the 1024 point */
  1086. /* data is in 8.8(q8) format for the 256 point */
  1087. /* data is in 6.10(q10) format for the 64 point */
  1088. /* data is in 4.12(q12) format for the 16 point */
  1089. /* Initializations for the last stage */
  1090. j = fftLen >> 2;
  1091. ptr1 = &pSrc16[0];
  1092. /* start of last stage process */
  1093. /* Butterfly implementation */
  1094. do
  1095. {
  1096. /* Read xa (real), ya(imag) input */
  1097. xaya = *__SIMD32(ptr1)++;
  1098. /* Read xb (real), yb(imag) input */
  1099. xbyb = *__SIMD32(ptr1)++;
  1100. /* Read xc (real), yc(imag) input */
  1101. xcyc = *__SIMD32(ptr1)++;
  1102. /* Read xd (real), yd(imag) input */
  1103. xdyd = *__SIMD32(ptr1)++;
  1104. /* R = packed((ya + yc), (xa + xc)) */
  1105. R = __QADD16(xaya, xcyc);
  1106. /* T = packed((yb + yd), (xb + xd)) */
  1107. T = __QADD16(xbyb, xdyd);
  1108. /* pointer updation for writing */
  1109. ptr1 = ptr1 - 8u;
  1110. /* xa' = xa + xb + xc + xd */
  1111. /* ya' = ya + yb + yc + yd */
  1112. *__SIMD32(ptr1)++ = __SHADD16(R, T);
  1113. /* T = packed((yb + yd), (xb + xd)) */
  1114. T = __QADD16(xbyb, xdyd);
  1115. /* xc' = (xa-xb+xc-xd) */
  1116. /* yc' = (ya-yb+yc-yd) */
  1117. *__SIMD32(ptr1)++ = __SHSUB16(R, T);
  1118. /* S = packed((ya - yc), (xa - xc)) */
  1119. S = __QSUB16(xaya, xcyc);
  1120. /* Read yd (real), xd(imag) input */
  1121. /* T = packed( (yb - yd), (xb - xd)) */
  1122. U = __QSUB16(xbyb, xdyd);
  1123. #ifndef ARM_MATH_BIG_ENDIAN
  1124. /* xb' = (xa+yb-xc-yd) */
  1125. /* yb' = (ya-xb-yc+xd) */
  1126. *__SIMD32(ptr1)++ = __SHASX(S, U);
  1127. /* xd' = (xa-yb-xc+yd) */
  1128. /* yd' = (ya+xb-yc-xd) */
  1129. *__SIMD32(ptr1)++ = __SHSAX(S, U);
  1130. #else
  1131. /* xb' = (xa+yb-xc-yd) */
  1132. /* yb' = (ya-xb-yc+xd) */
  1133. *__SIMD32(ptr1)++ = __SHSAX(S, U);
  1134. /* xd' = (xa-yb-xc+yd) */
  1135. /* yd' = (ya+xb-yc-xd) */
  1136. *__SIMD32(ptr1)++ = __SHASX(S, U);
  1137. #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
  1138. } while(--j);
  1139. /* end of last stage process */
  1140. /* output is in 11.5(q5) format for the 1024 point */
  1141. /* output is in 9.7(q7) format for the 256 point */
  1142. /* output is in 7.9(q9) format for the 64 point */
  1143. /* output is in 5.11(q11) format for the 16 point */
  1144. #else
  1145. /* Run the below code for Cortex-M0 */
  1146. q15_t R0, R1, S0, S1, T0, T1, U0, U1;
  1147. q15_t Co1, Si1, Co2, Si2, Co3, Si3, out1, out2;
  1148. uint32_t n1, n2, ic, i0, i1, i2, i3, j, k;
  1149. /* Total process is divided into three stages */
  1150. /* process first stage, middle stages, & last stage */
  1151. /* Initializations for the first stage */
  1152. n2 = fftLen;
  1153. n1 = n2;
  1154. /* n2 = fftLen/4 */
  1155. n2 >>= 2u;
  1156. /* Index for twiddle coefficient */
  1157. ic = 0u;
  1158. /* Index for input read and output write */
  1159. i0 = 0u;
  1160. j = n2;
  1161. /* Input is in 1.15(q15) format */
  1162. /* Start of first stage process */
  1163. do
  1164. {
  1165. /* Butterfly implementation */
  1166. /* index calculation for the input as, */
  1167. /* pSrc16[i0 + 0], pSrc16[i0 + fftLen/4], pSrc16[i0 + fftLen/2], pSrc16[i0 + 3fftLen/4] */
  1168. i1 = i0 + n2;
  1169. i2 = i1 + n2;
  1170. i3 = i2 + n2;
  1171. /* Reading i0, i0+fftLen/2 inputs */
  1172. /* input is down scale by 4 to avoid overflow */
  1173. /* Read ya (real), xa(imag) input */
  1174. T0 = pSrc16[i0 * 2u] >> 2u;
  1175. T1 = pSrc16[(i0 * 2u) + 1u] >> 2u;
  1176. /* input is down scale by 4 to avoid overflow */
  1177. /* Read yc (real), xc(imag) input */
  1178. S0 = pSrc16[i2 * 2u] >> 2u;
  1179. S1 = pSrc16[(i2 * 2u) + 1u] >> 2u;
  1180. /* R0 = (ya + yc), R1 = (xa + xc) */
  1181. R0 = __SSAT(T0 + S0, 16u);
  1182. R1 = __SSAT(T1 + S1, 16u);
  1183. /* S0 = (ya - yc), S1 = (xa - xc) */
  1184. S0 = __SSAT(T0 - S0, 16u);
  1185. S1 = __SSAT(T1 - S1, 16u);
  1186. /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */
  1187. /* input is down scale by 4 to avoid overflow */
  1188. /* Read yb (real), xb(imag) input */
  1189. T0 = pSrc16[i1 * 2u] >> 2u;
  1190. T1 = pSrc16[(i1 * 2u) + 1u] >> 2u;
  1191. /* Read yd (real), xd(imag) input */
  1192. /* input is down scale by 4 to avoid overflow */
  1193. U0 = pSrc16[i3 * 2u] >> 2u;
  1194. U1 = pSrc16[(i3 * 2u) + 1u] >> 2u;
  1195. /* T0 = (yb + yd), T1 = (xb + xd) */
  1196. T0 = __SSAT(T0 + U0, 16u);
  1197. T1 = __SSAT(T1 + U1, 16u);
  1198. /* writing the butterfly processed i0 sample */
  1199. /* xa' = xa + xb + xc + xd */
  1200. /* ya' = ya + yb + yc + yd */
  1201. pSrc16[i0 * 2u] = (R0 >> 1u) + (T0 >> 1u);
  1202. pSrc16[(i0 * 2u) + 1u] = (R1 >> 1u) + (T1 >> 1u);
  1203. /* R0 = (ya + yc) - (yb + yd), R1 = (xa + xc)- (xb + xd) */
  1204. R0 = __SSAT(R0 - T0, 16u);
  1205. R1 = __SSAT(R1 - T1, 16u);
  1206. /* co2 & si2 are read from Coefficient pointer */
  1207. Co2 = pCoef16[2u * ic * 2u];
  1208. Si2 = pCoef16[(2u * ic * 2u) + 1u];
  1209. /* xc' = (xa-xb+xc-xd)* co2 - (ya-yb+yc-yd)* (si2) */
  1210. out1 = (short) ((Co2 * R0 - Si2 * R1) >> 16u);
  1211. /* yc' = (ya-yb+yc-yd)* co2 + (xa-xb+xc-xd)* (si2) */
  1212. out2 = (short) ((Si2 * R0 + Co2 * R1) >> 16u);
  1213. /* Reading i0+fftLen/4 */
  1214. /* input is down scale by 4 to avoid overflow */
  1215. /* T0 = yb, T1 = xb */
  1216. T0 = pSrc16[i1 * 2u] >> 2u;
  1217. T1 = pSrc16[(i1 * 2u) + 1u] >> 2u;
  1218. /* writing the butterfly processed i0 + fftLen/4 sample */
  1219. /* writing output(xc', yc') in little endian format */
  1220. pSrc16[i1 * 2u] = out1;
  1221. pSrc16[(i1 * 2u) + 1u] = out2;
  1222. /* Butterfly calculations */
  1223. /* input is down scale by 4 to avoid overflow */
  1224. /* U0 = yd, U1 = xd) */
  1225. U0 = pSrc16[i3 * 2u] >> 2u;
  1226. U1 = pSrc16[(i3 * 2u) + 1u] >> 2u;
  1227. /* T0 = yb-yd, T1 = xb-xd) */
  1228. T0 = __SSAT(T0 - U0, 16u);
  1229. T1 = __SSAT(T1 - U1, 16u);
  1230. /* R0 = (ya-yc) - (xb- xd) , R1 = (xa-xc) + (yb-yd) */
  1231. R0 = (short) __SSAT((q31_t) (S0 + T1), 16);
  1232. R1 = (short) __SSAT((q31_t) (S1 - T0), 16);
  1233. /* S = (ya-yc) + (xb- xd), S1 = (xa-xc) - (yb-yd) */
  1234. S0 = (short) __SSAT((q31_t) (S0 - T1), 16);
  1235. S1 = (short) __SSAT((q31_t) (S1 + T0), 16);
  1236. /* co1 & si1 are read from Coefficient pointer */
  1237. Co1 = pCoef16[ic * 2u];
  1238. Si1 = pCoef16[(ic * 2u) + 1u];
  1239. /* Butterfly process for the i0+fftLen/2 sample */
  1240. /* xb' = (xa-yb-xc+yd)* co1 - (ya+xb-yc-xd)* (si1) */
  1241. out1 = (short) ((Co1 * S0 - Si1 * S1) >> 16u);
  1242. /* yb' = (ya+xb-yc-xd)* co1 + (xa-yb-xc+yd)* (si1) */
  1243. out2 = (short) ((Si1 * S0 + Co1 * S1) >> 16u);
  1244. /* writing output(xb', yb') in little endian format */
  1245. pSrc16[i2 * 2u] = out1;
  1246. pSrc16[(i2 * 2u) + 1u] = out2;
  1247. /* Co3 & si3 are read from Coefficient pointer */
  1248. Co3 = pCoef16[3u * ic * 2u];
  1249. Si3 = pCoef16[(3u * ic * 2u) + 1u];
  1250. /* Butterfly process for the i0+3fftLen/4 sample */
  1251. /* xd' = (xa+yb-xc-yd)* Co3 - (ya-xb-yc+xd)* (si3) */
  1252. out1 = (short) ((Co3 * R0 - Si3 * R1) >> 16u);
  1253. /* yd' = (ya-xb-yc+xd)* Co3 + (xa+yb-xc-yd)* (si3) */
  1254. out2 = (short) ((Si3 * R0 + Co3 * R1) >> 16u);
  1255. /* writing output(xd', yd') in little endian format */
  1256. pSrc16[i3 * 2u] = out1;
  1257. pSrc16[(i3 * 2u) + 1u] = out2;
  1258. /* Twiddle coefficients index modifier */
  1259. ic = ic + twidCoefModifier;
  1260. /* Updating input index */
  1261. i0 = i0 + 1u;
  1262. } while(--j);
  1263. /* End of first stage process */
  1264. /* data is in 4.11(q11) format */
  1265. /* Start of Middle stage process */
  1266. /* Twiddle coefficients index modifier */
  1267. twidCoefModifier <<= 2u;
  1268. /* Calculation of Middle stage */
  1269. for (k = fftLen / 4u; k > 4u; k >>= 2u)
  1270. {
  1271. /* Initializations for the middle stage */
  1272. n1 = n2;
  1273. n2 >>= 2u;
  1274. ic = 0u;
  1275. for (j = 0u; j <= (n2 - 1u); j++)
  1276. {
  1277. /* index calculation for the coefficients */
  1278. Co1 = pCoef16[ic * 2u];
  1279. Si1 = pCoef16[(ic * 2u) + 1u];
  1280. Co2 = pCoef16[2u * ic * 2u];
  1281. Si2 = pCoef16[2u * ic * 2u + 1u];
  1282. Co3 = pCoef16[3u * ic * 2u];
  1283. Si3 = pCoef16[(3u * ic * 2u) + 1u];
  1284. /* Twiddle coefficients index modifier */
  1285. ic = ic + twidCoefModifier;
  1286. /* Butterfly implementation */
  1287. for (i0 = j; i0 < fftLen; i0 += n1)
  1288. {
  1289. /* index calculation for the input as, */
  1290. /* pSrc16[i0 + 0], pSrc16[i0 + fftLen/4], pSrc16[i0 + fftLen/2], pSrc16[i0 + 3fftLen/4] */
  1291. i1 = i0 + n2;
  1292. i2 = i1 + n2;
  1293. i3 = i2 + n2;
  1294. /* Reading i0, i0+fftLen/2 inputs */
  1295. /* Read ya (real), xa(imag) input */
  1296. T0 = pSrc16[i0 * 2u];
  1297. T1 = pSrc16[(i0 * 2u) + 1u];
  1298. /* Read yc (real), xc(imag) input */
  1299. S0 = pSrc16[i2 * 2u];
  1300. S1 = pSrc16[(i2 * 2u) + 1u];
  1301. /* R0 = (ya + yc), R1 = (xa + xc) */
  1302. R0 = __SSAT(T0 + S0, 16u);
  1303. R1 = __SSAT(T1 + S1, 16u);
  1304. /* S0 = (ya - yc), S1 = (xa - xc) */
  1305. S0 = __SSAT(T0 - S0, 16u);
  1306. S1 = __SSAT(T1 - S1, 16u);
  1307. /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */
  1308. /* Read yb (real), xb(imag) input */
  1309. T0 = pSrc16[i1 * 2u];
  1310. T1 = pSrc16[(i1 * 2u) + 1u];
  1311. /* Read yd (real), xd(imag) input */
  1312. U0 = pSrc16[i3 * 2u];
  1313. U1 = pSrc16[(i3 * 2u) + 1u];
  1314. /* T0 = (yb + yd), T1 = (xb + xd) */
  1315. T0 = __SSAT(T0 + U0, 16u);
  1316. T1 = __SSAT(T1 + U1, 16u);
  1317. /* writing the butterfly processed i0 sample */
  1318. /* xa' = xa + xb + xc + xd */
  1319. /* ya' = ya + yb + yc + yd */
  1320. pSrc16[i0 * 2u] = ((R0 >> 1u) + (T0 >> 1u)) >> 1u;
  1321. pSrc16[(i0 * 2u) + 1u] = ((R1 >> 1u) + (T1 >> 1u)) >> 1u;
  1322. /* R0 = (ya + yc) - (yb + yd), R1 = (xa + xc) - (xb + xd) */
  1323. R0 = (R0 >> 1u) - (T0 >> 1u);
  1324. R1 = (R1 >> 1u) - (T1 >> 1u);
  1325. /* (ya-yb+yc-yd)* (si2) - (xa-xb+xc-xd)* co2 */
  1326. out1 = (short) ((Co2 * R0 - Si2 * R1) >> 16);
  1327. /* (ya-yb+yc-yd)* co2 + (xa-xb+xc-xd)* (si2) */
  1328. out2 = (short) ((Si2 * R0 + Co2 * R1) >> 16);
  1329. /* Reading i0+3fftLen/4 */
  1330. /* Read yb (real), xb(imag) input */
  1331. T0 = pSrc16[i1 * 2u];
  1332. T1 = pSrc16[(i1 * 2u) + 1u];
  1333. /* writing the butterfly processed i0 + fftLen/4 sample */
  1334. /* xc' = (xa-xb+xc-xd)* co2 - (ya-yb+yc-yd)* (si2) */
  1335. /* yc' = (ya-yb+yc-yd)* co2 + (xa-xb+xc-xd)* (si2) */
  1336. pSrc16[i1 * 2u] = out1;
  1337. pSrc16[(i1 * 2u) + 1u] = out2;
  1338. /* Butterfly calculations */
  1339. /* Read yd (real), xd(imag) input */
  1340. U0 = pSrc16[i3 * 2u];
  1341. U1 = pSrc16[(i3 * 2u) + 1u];
  1342. /* T0 = yb-yd, T1 = xb-xd) */
  1343. T0 = __SSAT(T0 - U0, 16u);
  1344. T1 = __SSAT(T1 - U1, 16u);
  1345. /* R0 = (ya-yc) - (xb- xd) , R1 = (xa-xc) + (yb-yd) */
  1346. R0 = (S0 >> 1u) + (T1 >> 1u);
  1347. R1 = (S1 >> 1u) - (T0 >> 1u);
  1348. /* S1 = (ya-yc) + (xb- xd), S1 = (xa-xc) - (yb-yd) */
  1349. S0 = (S0 >> 1u) - (T1 >> 1u);
  1350. S1 = (S1 >> 1u) + (T0 >> 1u);
  1351. /* Butterfly process for the i0+fftLen/2 sample */
  1352. out1 = (short) ((Co1 * S0 - Si1 * S1) >> 16u);
  1353. out2 = (short) ((Si1 * S0 + Co1 * S1) >> 16u);
  1354. /* xb' = (xa-yb-xc+yd)* co1 - (ya+xb-yc-xd)* (si1) */
  1355. /* yb' = (ya+xb-yc-xd)* co1 + (xa-yb-xc+yd)* (si1) */
  1356. pSrc16[i2 * 2u] = out1;
  1357. pSrc16[(i2 * 2u) + 1u] = out2;
  1358. /* Butterfly process for the i0+3fftLen/4 sample */
  1359. out1 = (short) ((Co3 * R0 - Si3 * R1) >> 16u);
  1360. out2 = (short) ((Si3 * R0 + Co3 * R1) >> 16u);
  1361. /* xd' = (xa+yb-xc-yd)* Co3 - (ya-xb-yc+xd)* (si3) */
  1362. /* yd' = (ya-xb-yc+xd)* Co3 + (xa+yb-xc-yd)* (si3) */
  1363. pSrc16[i3 * 2u] = out1;
  1364. pSrc16[(i3 * 2u) + 1u] = out2;
  1365. }
  1366. }
  1367. /* Twiddle coefficients index modifier */
  1368. twidCoefModifier <<= 2u;
  1369. }
  1370. /* End of Middle stages process */
  1371. /* data is in 10.6(q6) format for the 1024 point */
  1372. /* data is in 8.8(q8) format for the 256 point */
  1373. /* data is in 6.10(q10) format for the 64 point */
  1374. /* data is in 4.12(q12) format for the 16 point */
  1375. /* start of last stage process */
  1376. /* Initializations for the last stage */
  1377. n1 = n2;
  1378. n2 >>= 2u;
  1379. /* Butterfly implementation */
  1380. for (i0 = 0u; i0 <= (fftLen - n1); i0 += n1)
  1381. {
  1382. /* index calculation for the input as, */
  1383. /* pSrc16[i0 + 0], pSrc16[i0 + fftLen/4], pSrc16[i0 + fftLen/2], pSrc16[i0 + 3fftLen/4] */
  1384. i1 = i0 + n2;
  1385. i2 = i1 + n2;
  1386. i3 = i2 + n2;
  1387. /* Reading i0, i0+fftLen/2 inputs */
  1388. /* Read ya (real), xa(imag) input */
  1389. T0 = pSrc16[i0 * 2u];
  1390. T1 = pSrc16[(i0 * 2u) + 1u];
  1391. /* Read yc (real), xc(imag) input */
  1392. S0 = pSrc16[i2 * 2u];
  1393. S1 = pSrc16[(i2 * 2u) + 1u];
  1394. /* R0 = (ya + yc), R1 = (xa + xc) */
  1395. R0 = __SSAT(T0 + S0, 16u);
  1396. R1 = __SSAT(T1 + S1, 16u);
  1397. /* S0 = (ya - yc), S1 = (xa - xc) */
  1398. S0 = __SSAT(T0 - S0, 16u);
  1399. S1 = __SSAT(T1 - S1, 16u);
  1400. /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */
  1401. /* Read yb (real), xb(imag) input */
  1402. T0 = pSrc16[i1 * 2u];
  1403. T1 = pSrc16[(i1 * 2u) + 1u];
  1404. /* Read yd (real), xd(imag) input */
  1405. U0 = pSrc16[i3 * 2u];
  1406. U1 = pSrc16[(i3 * 2u) + 1u];
  1407. /* T0 = (yb + yd), T1 = (xb + xd) */
  1408. T0 = __SSAT(T0 + U0, 16u);
  1409. T1 = __SSAT(T1 + U1, 16u);
  1410. /* writing the butterfly processed i0 sample */
  1411. /* xa' = xa + xb + xc + xd */
  1412. /* ya' = ya + yb + yc + yd */
  1413. pSrc16[i0 * 2u] = (R0 >> 1u) + (T0 >> 1u);
  1414. pSrc16[(i0 * 2u) + 1u] = (R1 >> 1u) + (T1 >> 1u);
  1415. /* R0 = (ya + yc) - (yb + yd), R1 = (xa + xc) - (xb + xd) */
  1416. R0 = (R0 >> 1u) - (T0 >> 1u);
  1417. R1 = (R1 >> 1u) - (T1 >> 1u);
  1418. /* Read yb (real), xb(imag) input */
  1419. T0 = pSrc16[i1 * 2u];
  1420. T1 = pSrc16[(i1 * 2u) + 1u];
  1421. /* writing the butterfly processed i0 + fftLen/4 sample */
  1422. /* xc' = (xa-xb+xc-xd) */
  1423. /* yc' = (ya-yb+yc-yd) */
  1424. pSrc16[i1 * 2u] = R0;
  1425. pSrc16[(i1 * 2u) + 1u] = R1;
  1426. /* Read yd (real), xd(imag) input */
  1427. U0 = pSrc16[i3 * 2u];
  1428. U1 = pSrc16[(i3 * 2u) + 1u];
  1429. /* T0 = (yb - yd), T1 = (xb - xd) */
  1430. T0 = __SSAT(T0 - U0, 16u);
  1431. T1 = __SSAT(T1 - U1, 16u);
  1432. /* writing the butterfly processed i0 + fftLen/2 sample */
  1433. /* xb' = (xa-yb-xc+yd) */
  1434. /* yb' = (ya+xb-yc-xd) */
  1435. pSrc16[i2 * 2u] = (S0 >> 1u) - (T1 >> 1u);
  1436. pSrc16[(i2 * 2u) + 1u] = (S1 >> 1u) + (T0 >> 1u);
  1437. /* writing the butterfly processed i0 + 3fftLen/4 sample */
  1438. /* xd' = (xa+yb-xc-yd) */
  1439. /* yd' = (ya-xb-yc+xd) */
  1440. pSrc16[i3 * 2u] = (S0 >> 1u) + (T1 >> 1u);
  1441. pSrc16[(i3 * 2u) + 1u] = (S1 >> 1u) - (T0 >> 1u);
  1442. }
  1443. /* end of last stage process */
  1444. /* output is in 11.5(q5) format for the 1024 point */
  1445. /* output is in 9.7(q7) format for the 256 point */
  1446. /* output is in 7.9(q9) format for the 64 point */
  1447. /* output is in 5.11(q11) format for the 16 point */
  1448. #endif /* #ifndef ARM_MATH_CM0_FAMILY */
  1449. }