138 lines
4.3 KiB
C
138 lines
4.3 KiB
C
#include <math.h>
|
|
#include <endian.h>
|
|
|
|
#include "dsp/helpers/tables.h"
|
|
|
|
/* Convert (little-endian) SC16 values with a range of -2048..+2047 to unsigned 16-bit magnitudes */
|
|
|
|
void STARCH_IMPL(magnitude_sc16q11, exact_u32) (const sc16_t *in, uint16_t *out, unsigned len)
|
|
{
|
|
const sc16_t * restrict in_align = STARCH_ALIGNED(in);
|
|
uint16_t * restrict out_align = STARCH_ALIGNED(out);
|
|
|
|
while (len--) {
|
|
uint32_t I = abs((int16_t) le16toh(in_align[0].I));
|
|
uint32_t Q = abs((int16_t) le16toh(in_align[0].Q));
|
|
|
|
uint32_t magsq = I * I + Q * Q;
|
|
float mag = sqrtf(magsq) * 32;
|
|
if (mag > 65535.0)
|
|
mag = 65535.0;
|
|
out_align[0] = (uint16_t)mag;
|
|
|
|
out_align += 1;
|
|
in_align += 1;
|
|
}
|
|
}
|
|
|
|
void STARCH_IMPL(magnitude_sc16q11, exact_float) (const sc16_t *in, uint16_t *out, unsigned len)
|
|
{
|
|
const sc16_t * restrict in_align = STARCH_ALIGNED(in);
|
|
uint16_t * restrict out_align = STARCH_ALIGNED(out);
|
|
|
|
while (len--) {
|
|
float I = abs((int16_t) le16toh(in_align[0].I)) * 32;
|
|
float Q = abs((int16_t) le16toh(in_align[0].Q)) * 32;
|
|
|
|
float magsq = I * I + Q * Q;
|
|
float mag = sqrtf(magsq);
|
|
if (mag > 65535.0)
|
|
mag = 65535.0;
|
|
out_align[0] = (uint16_t)mag;
|
|
|
|
out_align += 1;
|
|
in_align += 1;
|
|
}
|
|
}
|
|
|
|
void STARCH_IMPL(magnitude_sc16q11, 11bit_table) (const sc16_t *in, uint16_t *out, unsigned len)
|
|
{
|
|
const uint16_t * restrict table = get_sc16q11_mag_11bit_table();
|
|
const sc16_t * restrict in_align = STARCH_ALIGNED(in);
|
|
uint16_t * restrict out_align = STARCH_ALIGNED(out);
|
|
|
|
while (len--) {
|
|
uint16_t I = abs((int16_t)le16toh(in_align[0].I));
|
|
if (I >= 2048)
|
|
I = 2047;
|
|
uint16_t Q = abs((int16_t)le16toh(in_align[0].Q));
|
|
if (Q >= 2048)
|
|
Q = 2047;
|
|
out_align[0] = table[(Q << 11) | I];
|
|
|
|
in_align += 1;
|
|
out_align += 1;
|
|
}
|
|
}
|
|
|
|
void STARCH_IMPL(magnitude_sc16q11, 12bit_table) (const sc16_t *in, uint16_t *out, unsigned len)
|
|
{
|
|
const uint16_t * restrict table = get_sc16q11_mag_12bit_table();
|
|
const sc16_t * restrict in_align = STARCH_ALIGNED(in);
|
|
uint16_t * restrict out_align = STARCH_ALIGNED(out);
|
|
|
|
while (len--) {
|
|
unsigned index = ((in_align[0].I & 4095) << 12) | (in_align[0].Q & 4095);
|
|
out_align[0] = table[index];
|
|
|
|
in_align += 1;
|
|
out_align += 1;
|
|
}
|
|
}
|
|
|
|
#ifdef STARCH_FEATURE_NEON
|
|
|
|
#include <arm_neon.h>
|
|
|
|
void STARCH_IMPL_REQUIRES(magnitude_sc16q11, neon_vrsqrte, STARCH_FEATURE_NEON) (const sc16_t *in, uint16_t *out, unsigned len)
|
|
{
|
|
const int16_t * restrict in_align = (const int16_t *) STARCH_ALIGNED(in);
|
|
uint16_t * restrict out_align = STARCH_ALIGNED(out);
|
|
|
|
/* This uses NEON's floating-point reciprocal square root estimate instruction (vrsqrte).
|
|
* The estimate is accurate to about 9 bits of mantissa, which is good enough for our purposes.
|
|
*/
|
|
|
|
unsigned len4 = len >> 2;
|
|
while (len4--) {
|
|
int16x4x2_t iq = vld2_s16(in_align);
|
|
int16x4_t i16 = iq.val[0]; /* Q11 */
|
|
int16x4_t q16 = iq.val[1]; /* Q11 */
|
|
|
|
uint32x4_t isq = vreinterpretq_u32_s32(vmull_s16(i16, i16)); /* Q22, unsigned */
|
|
uint32x4_t qsq = vreinterpretq_u32_s32(vmull_s16(q16, q16)); /* Q22, unsigned */
|
|
uint32x4_t magsq = vqaddq_u32(isq, qsq); /* Q22, unsigned */
|
|
|
|
float32x4_t magsq_f32 = vcvtq_n_f32_u32(magsq, 22);
|
|
float32x4_t mag_f32 = vmulq_f32(magsq_f32, vrsqrteq_f32(magsq_f32)); /* sqrt(x) = x * (1/sqrt(x)) */
|
|
uint16x4_t mag_u16 = vqmovn_u32(vcvtq_n_u32_f32(mag_f32, 16));
|
|
|
|
vst1_u16(out_align, mag_u16);
|
|
|
|
in_align += 8;
|
|
out_align += 4;
|
|
}
|
|
|
|
unsigned len1 = len & 3;
|
|
while (len1--) {
|
|
int16x4x2_t iq = vld2_dup_s16(in_align);
|
|
int16x4_t i16 = iq.val[0];
|
|
int16x4_t q16 = iq.val[1];
|
|
|
|
uint32x4_t isq = vreinterpretq_u32_s32(vmull_s16(i16, i16));
|
|
uint32x4_t qsq = vreinterpretq_u32_s32(vmull_s16(q16, q16));
|
|
uint32x4_t magsq = vqaddq_u32(isq, qsq);
|
|
|
|
float32x4_t magsq_f32 = vcvtq_n_f32_u32(magsq, 22);
|
|
float32x4_t mag_f32 = vmulq_f32(magsq_f32, vrsqrteq_f32(magsq_f32));
|
|
uint16x4_t mag_u16 = vqmovn_u32(vcvtq_n_u32_f32(mag_f32, 16));
|
|
|
|
vst1_lane_u16(out_align, mag_u16, 0);
|
|
|
|
in_align += 2;
|
|
out_align += 1;
|
|
}
|
|
}
|
|
|
|
#endif /* STARCH_FEATURE_NEON */
|