Keyboard firmwares for Atmel AVR and Cortex-M
Vous ne pouvez pas sélectionner plus de 25 sujets Les noms de sujets doivent commencer par une lettre ou un nombre, peuvent contenir des tirets ('-') et peuvent comporter jusqu'à 35 caractères.

arm_lms_norm_f32.c 16KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466
  1. /* ----------------------------------------------------------------------
  2. * Copyright (C) 2010-2013 ARM Limited. All rights reserved.
  3. *
  4. * $Date: 17. January 2013
  5. * $Revision: V1.4.1
  6. *
  7. * Project: CMSIS DSP Library
  8. * Title: arm_lms_norm_f32.c
  9. *
  10. * Description: Processing function for the floating-point Normalised LMS.
  11. *
  12. * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
  13. *
  14. * Redistribution and use in source and binary forms, with or without
  15. * modification, are permitted provided that the following conditions
  16. * are met:
  17. * - Redistributions of source code must retain the above copyright
  18. * notice, this list of conditions and the following disclaimer.
  19. * - Redistributions in binary form must reproduce the above copyright
  20. * notice, this list of conditions and the following disclaimer in
  21. * the documentation and/or other materials provided with the
  22. * distribution.
  23. * - Neither the name of ARM LIMITED nor the names of its contributors
  24. * may be used to endorse or promote products derived from this
  25. * software without specific prior written permission.
  26. *
  27. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  28. * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  29. * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
  30. * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
  31. * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  32. * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
  33. * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  34. * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  35. * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  36. * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
  37. * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  38. * POSSIBILITY OF SUCH DAMAGE.
  39. * -------------------------------------------------------------------- */
  40. #include "arm_math.h"
  41. /**
  42. * @ingroup groupFilters
  43. */
  44. /**
  45. * @defgroup LMS_NORM Normalized LMS Filters
  46. *
  47. * This set of functions implements a commonly used adaptive filter.
  48. * It is related to the Least Mean Square (LMS) adaptive filter and includes an additional normalization
  49. * factor which increases the adaptation rate of the filter.
  50. * The CMSIS DSP Library contains normalized LMS filter functions that operate on Q15, Q31, and floating-point data types.
  51. *
  52. * A normalized least mean square (NLMS) filter consists of two components as shown below.
  53. * The first component is a standard transversal or FIR filter.
  54. * The second component is a coefficient update mechanism.
  55. * The NLMS filter has two input signals.
  56. * The "input" feeds the FIR filter while the "reference input" corresponds to the desired output of the FIR filter.
  57. * That is, the FIR filter coefficients are updated so that the output of the FIR filter matches the reference input.
  58. * The filter coefficient update mechanism is based on the difference between the FIR filter output and the reference input.
  59. * This "error signal" tends towards zero as the filter adapts.
  60. * The NLMS processing functions accept the input and reference input signals and generate the filter output and error signal.
  61. * \image html LMS.gif "Internal structure of the NLMS adaptive filter"
  62. *
  63. * The functions operate on blocks of data and each call to the function processes
  64. * <code>blockSize</code> samples through the filter.
  65. * <code>pSrc</code> points to input signal, <code>pRef</code> points to reference signal,
  66. * <code>pOut</code> points to output signal and <code>pErr</code> points to error signal.
  67. * All arrays contain <code>blockSize</code> values.
  68. *
  69. * The functions operate on a block-by-block basis.
  70. * Internally, the filter coefficients <code>b[n]</code> are updated on a sample-by-sample basis.
  71. * The convergence of the LMS filter is slower compared to the normalized LMS algorithm.
  72. *
  73. * \par Algorithm:
  74. * The output signal <code>y[n]</code> is computed by a standard FIR filter:
  75. * <pre>
  76. * y[n] = b[0] * x[n] + b[1] * x[n-1] + b[2] * x[n-2] + ...+ b[numTaps-1] * x[n-numTaps+1]
  77. * </pre>
  78. *
  79. * \par
  80. * The error signal equals the difference between the reference signal <code>d[n]</code> and the filter output:
  81. * <pre>
  82. * e[n] = d[n] - y[n].
  83. * </pre>
  84. *
  85. * \par
  86. * After each sample of the error signal is computed the instanteous energy of the filter state variables is calculated:
  87. * <pre>
  88. * E = x[n]^2 + x[n-1]^2 + ... + x[n-numTaps+1]^2.
  89. * </pre>
  90. * The filter coefficients <code>b[k]</code> are then updated on a sample-by-sample basis:
  91. * <pre>
  92. * b[k] = b[k] + e[n] * (mu/E) * x[n-k], for k=0, 1, ..., numTaps-1
  93. * </pre>
  94. * where <code>mu</code> is the step size and controls the rate of coefficient convergence.
  95. *\par
  96. * In the APIs, <code>pCoeffs</code> points to a coefficient array of size <code>numTaps</code>.
  97. * Coefficients are stored in time reversed order.
  98. * \par
  99. * <pre>
  100. * {b[numTaps-1], b[numTaps-2], b[N-2], ..., b[1], b[0]}
  101. * </pre>
  102. * \par
  103. * <code>pState</code> points to a state array of size <code>numTaps + blockSize - 1</code>.
  104. * Samples in the state buffer are stored in the order:
  105. * \par
  106. * <pre>
  107. * {x[n-numTaps+1], x[n-numTaps], x[n-numTaps-1], x[n-numTaps-2]....x[0], x[1], ..., x[blockSize-1]}
  108. * </pre>
  109. * \par
  110. * Note that the length of the state buffer exceeds the length of the coefficient array by <code>blockSize-1</code> samples.
  111. * The increased state buffer length allows circular addressing, which is traditionally used in FIR filters,
  112. * to be avoided and yields a significant speed improvement.
  113. * The state variables are updated after each block of data is processed.
  114. * \par Instance Structure
  115. * The coefficients and state variables for a filter are stored together in an instance data structure.
  116. * A separate instance structure must be defined for each filter and
  117. * coefficient and state arrays cannot be shared among instances.
  118. * There are separate instance structure declarations for each of the 3 supported data types.
  119. *
  120. * \par Initialization Functions
  121. * There is also an associated initialization function for each data type.
  122. * The initialization function performs the following operations:
  123. * - Sets the values of the internal structure fields.
  124. * - Zeros out the values in the state buffer.
  125. * To do this manually without calling the init function, assign the follow subfields of the instance structure:
  126. * numTaps, pCoeffs, mu, energy, x0, pState. Also set all of the values in pState to zero.
  127. * For Q7, Q15, and Q31 the following fields must also be initialized;
  128. * recipTable, postShift
  129. *
  130. * \par
  131. * Instance structure cannot be placed into a const data section and it is recommended to use the initialization function.
  132. * \par Fixed-Point Behavior:
  133. * Care must be taken when using the Q15 and Q31 versions of the normalised LMS filter.
  134. * The following issues must be considered:
  135. * - Scaling of coefficients
  136. * - Overflow and saturation
  137. *
  138. * \par Scaling of Coefficients:
  139. * Filter coefficients are represented as fractional values and
  140. * coefficients are restricted to lie in the range <code>[-1 +1)</code>.
  141. * The fixed-point functions have an additional scaling parameter <code>postShift</code>.
  142. * At the output of the filter's accumulator is a shift register which shifts the result by <code>postShift</code> bits.
  143. * This essentially scales the filter coefficients by <code>2^postShift</code> and
  144. * allows the filter coefficients to exceed the range <code>[+1 -1)</code>.
  145. * The value of <code>postShift</code> is set by the user based on the expected gain through the system being modeled.
  146. *
  147. * \par Overflow and Saturation:
  148. * Overflow and saturation behavior of the fixed-point Q15 and Q31 versions are
  149. * described separately as part of the function specific documentation below.
  150. */
  151. /**
  152. * @addtogroup LMS_NORM
  153. * @{
  154. */
  155. /**
  156. * @brief Processing function for floating-point normalized LMS filter.
  157. * @param[in] *S points to an instance of the floating-point normalized LMS filter structure.
  158. * @param[in] *pSrc points to the block of input data.
  159. * @param[in] *pRef points to the block of reference data.
  160. * @param[out] *pOut points to the block of output data.
  161. * @param[out] *pErr points to the block of error data.
  162. * @param[in] blockSize number of samples to process.
  163. * @return none.
  164. */
  165. void arm_lms_norm_f32(
  166. arm_lms_norm_instance_f32 * S,
  167. float32_t * pSrc,
  168. float32_t * pRef,
  169. float32_t * pOut,
  170. float32_t * pErr,
  171. uint32_t blockSize)
  172. {
  173. float32_t *pState = S->pState; /* State pointer */
  174. float32_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */
  175. float32_t *pStateCurnt; /* Points to the current sample of the state */
  176. float32_t *px, *pb; /* Temporary pointers for state and coefficient buffers */
  177. float32_t mu = S->mu; /* Adaptive factor */
  178. uint32_t numTaps = S->numTaps; /* Number of filter coefficients in the filter */
  179. uint32_t tapCnt, blkCnt; /* Loop counters */
  180. float32_t energy; /* Energy of the input */
  181. float32_t sum, e, d; /* accumulator, error, reference data sample */
  182. float32_t w, x0, in; /* weight factor, temporary variable to hold input sample and state */
  183. /* Initializations of error, difference, Coefficient update */
  184. e = 0.0f;
  185. d = 0.0f;
  186. w = 0.0f;
  187. energy = S->energy;
  188. x0 = S->x0;
  189. /* S->pState points to buffer which contains previous frame (numTaps - 1) samples */
  190. /* pStateCurnt points to the location where the new input data should be written */
  191. pStateCurnt = &(S->pState[(numTaps - 1u)]);
  192. /* Loop over blockSize number of values */
  193. blkCnt = blockSize;
  194. #ifndef ARM_MATH_CM0_FAMILY
  195. /* Run the below code for Cortex-M4 and Cortex-M3 */
  196. while(blkCnt > 0u)
  197. {
  198. /* Copy the new input sample into the state buffer */
  199. *pStateCurnt++ = *pSrc;
  200. /* Initialize pState pointer */
  201. px = pState;
  202. /* Initialize coeff pointer */
  203. pb = (pCoeffs);
  204. /* Read the sample from input buffer */
  205. in = *pSrc++;
  206. /* Update the energy calculation */
  207. energy -= x0 * x0;
  208. energy += in * in;
  209. /* Set the accumulator to zero */
  210. sum = 0.0f;
  211. /* Loop unrolling. Process 4 taps at a time. */
  212. tapCnt = numTaps >> 2;
  213. while(tapCnt > 0u)
  214. {
  215. /* Perform the multiply-accumulate */
  216. sum += (*px++) * (*pb++);
  217. sum += (*px++) * (*pb++);
  218. sum += (*px++) * (*pb++);
  219. sum += (*px++) * (*pb++);
  220. /* Decrement the loop counter */
  221. tapCnt--;
  222. }
  223. /* If the filter length is not a multiple of 4, compute the remaining filter taps */
  224. tapCnt = numTaps % 0x4u;
  225. while(tapCnt > 0u)
  226. {
  227. /* Perform the multiply-accumulate */
  228. sum += (*px++) * (*pb++);
  229. /* Decrement the loop counter */
  230. tapCnt--;
  231. }
  232. /* The result in the accumulator, store in the destination buffer. */
  233. *pOut++ = sum;
  234. /* Compute and store error */
  235. d = (float32_t) (*pRef++);
  236. e = d - sum;
  237. *pErr++ = e;
  238. /* Calculation of Weighting factor for updating filter coefficients */
  239. /* epsilon value 0.000000119209289f */
  240. w = (e * mu) / (energy + 0.000000119209289f);
  241. /* Initialize pState pointer */
  242. px = pState;
  243. /* Initialize coeff pointer */
  244. pb = (pCoeffs);
  245. /* Loop unrolling. Process 4 taps at a time. */
  246. tapCnt = numTaps >> 2;
  247. /* Update filter coefficients */
  248. while(tapCnt > 0u)
  249. {
  250. /* Perform the multiply-accumulate */
  251. *pb += w * (*px++);
  252. pb++;
  253. *pb += w * (*px++);
  254. pb++;
  255. *pb += w * (*px++);
  256. pb++;
  257. *pb += w * (*px++);
  258. pb++;
  259. /* Decrement the loop counter */
  260. tapCnt--;
  261. }
  262. /* If the filter length is not a multiple of 4, compute the remaining filter taps */
  263. tapCnt = numTaps % 0x4u;
  264. while(tapCnt > 0u)
  265. {
  266. /* Perform the multiply-accumulate */
  267. *pb += w * (*px++);
  268. pb++;
  269. /* Decrement the loop counter */
  270. tapCnt--;
  271. }
  272. x0 = *pState;
  273. /* Advance state pointer by 1 for the next sample */
  274. pState = pState + 1;
  275. /* Decrement the loop counter */
  276. blkCnt--;
  277. }
  278. S->energy = energy;
  279. S->x0 = x0;
  280. /* Processing is complete. Now copy the last numTaps - 1 samples to the
  281. satrt of the state buffer. This prepares the state buffer for the
  282. next function call. */
  283. /* Points to the start of the pState buffer */
  284. pStateCurnt = S->pState;
  285. /* Loop unrolling for (numTaps - 1u)/4 samples copy */
  286. tapCnt = (numTaps - 1u) >> 2u;
  287. /* copy data */
  288. while(tapCnt > 0u)
  289. {
  290. *pStateCurnt++ = *pState++;
  291. *pStateCurnt++ = *pState++;
  292. *pStateCurnt++ = *pState++;
  293. *pStateCurnt++ = *pState++;
  294. /* Decrement the loop counter */
  295. tapCnt--;
  296. }
  297. /* Calculate remaining number of copies */
  298. tapCnt = (numTaps - 1u) % 0x4u;
  299. /* Copy the remaining q31_t data */
  300. while(tapCnt > 0u)
  301. {
  302. *pStateCurnt++ = *pState++;
  303. /* Decrement the loop counter */
  304. tapCnt--;
  305. }
  306. #else
  307. /* Run the below code for Cortex-M0 */
  308. while(blkCnt > 0u)
  309. {
  310. /* Copy the new input sample into the state buffer */
  311. *pStateCurnt++ = *pSrc;
  312. /* Initialize pState pointer */
  313. px = pState;
  314. /* Initialize pCoeffs pointer */
  315. pb = pCoeffs;
  316. /* Read the sample from input buffer */
  317. in = *pSrc++;
  318. /* Update the energy calculation */
  319. energy -= x0 * x0;
  320. energy += in * in;
  321. /* Set the accumulator to zero */
  322. sum = 0.0f;
  323. /* Loop over numTaps number of values */
  324. tapCnt = numTaps;
  325. while(tapCnt > 0u)
  326. {
  327. /* Perform the multiply-accumulate */
  328. sum += (*px++) * (*pb++);
  329. /* Decrement the loop counter */
  330. tapCnt--;
  331. }
  332. /* The result in the accumulator is stored in the destination buffer. */
  333. *pOut++ = sum;
  334. /* Compute and store error */
  335. d = (float32_t) (*pRef++);
  336. e = d - sum;
  337. *pErr++ = e;
  338. /* Calculation of Weighting factor for updating filter coefficients */
  339. /* epsilon value 0.000000119209289f */
  340. w = (e * mu) / (energy + 0.000000119209289f);
  341. /* Initialize pState pointer */
  342. px = pState;
  343. /* Initialize pCcoeffs pointer */
  344. pb = pCoeffs;
  345. /* Loop over numTaps number of values */
  346. tapCnt = numTaps;
  347. while(tapCnt > 0u)
  348. {
  349. /* Perform the multiply-accumulate */
  350. *pb += w * (*px++);
  351. pb++;
  352. /* Decrement the loop counter */
  353. tapCnt--;
  354. }
  355. x0 = *pState;
  356. /* Advance state pointer by 1 for the next sample */
  357. pState = pState + 1;
  358. /* Decrement the loop counter */
  359. blkCnt--;
  360. }
  361. S->energy = energy;
  362. S->x0 = x0;
  363. /* Processing is complete. Now copy the last numTaps - 1 samples to the
  364. satrt of the state buffer. This prepares the state buffer for the
  365. next function call. */
  366. /* Points to the start of the pState buffer */
  367. pStateCurnt = S->pState;
  368. /* Copy (numTaps - 1u) samples */
  369. tapCnt = (numTaps - 1u);
  370. /* Copy the remaining q31_t data */
  371. while(tapCnt > 0u)
  372. {
  373. *pStateCurnt++ = *pState++;
  374. /* Decrement the loop counter */
  375. tapCnt--;
  376. }
  377. #endif /* #ifndef ARM_MATH_CM0_FAMILY */
  378. }
  379. /**
  380. * @} end of LMS_NORM group
  381. */