Optimize decimation performance

This commit is contained in:
Jan Hamal Dvořák 2024-08-03 08:50:39 +02:00
parent 49cf85006d
commit 347582cfc1

View file

@ -24,7 +24,7 @@
#define VREG_VOLTAGE VREG_VOLTAGE_1_20 #define VREG_VOLTAGE VREG_VOLTAGE_1_20
#define CLK_SYS_HZ (288 * MHZ) #define CLK_SYS_HZ (288 * MHZ)
#define INIT_SAMPLE_RATE 100000 #define INIT_SAMPLE_RATE 200000
#define INIT_FREQ 94600000 #define INIT_FREQ 94600000
#define INIT_GAIN 127 #define INIT_GAIN 127
@ -62,20 +62,21 @@ static uint32_t lo_phase[LO_NUM_PHASES][LO_PHASE_WORDS]
static uint32_t nco_addr = (uint32_t)lo_phase; static uint32_t nco_addr = (uint32_t)lo_phase;
#define DECIMATE 16 #define DECIMATE 16
#define RX_BITS_DEPTH 10 #define RX_BITS_DEPTH 8
#define RX_WORDS (1 << (RX_BITS_DEPTH - 2)) #define RX_WORDS (1 << (RX_BITS_DEPTH - 2))
#define RX_STRIDE (2 * DECIMATE)
static_assert(RX_WORDS >= 2 * RX_STRIDE, "RX_WORDS >= 2 * RX_STRIDE");
static uint32_t rx_cos[RX_WORDS] __attribute__((__aligned__(1 << RX_BITS_DEPTH))); static uint32_t rx_cos[RX_WORDS] __attribute__((__aligned__(1 << RX_BITS_DEPTH)));
static const uint32_t *rx_start = rx_cos;
static const uint32_t *rx_end = rx_cos + RX_WORDS - 1;
#define NUM_GAINS 29 #define NUM_GAINS 29
static int gains[NUM_GAINS] = { 0, 9, 14, 27, 37, 77, 87, 125, 144, 157, static int gains[NUM_GAINS] = { 0, 9, 14, 27, 37, 77, 87, 125, 144, 157,
166, 197, 207, 229, 254, 280, 297, 328, 338, 364, 166, 197, 207, 229, 254, 280, 297, 328, 338, 364,
372, 386, 402, 421, 434, 439, 445, 480, 496 }; 372, 386, 402, 421, 434, 439, 445, 480, 496 };
static int sample_rate = INIT_SAMPLE_RATE; static int sample_rate = INIT_SAMPLE_RATE;
static int dc_level = CLK_SYS_HZ / INIT_SAMPLE_RATE / 2; static int max_amplitude = CLK_SYS_HZ / INIT_SAMPLE_RATE / 2;
static int max_amplitude_mul = 65536 / (CLK_SYS_HZ / INIT_SAMPLE_RATE / 2);
static int gain = INIT_GAIN; static int gain = INIT_GAIN;
static int frequency = INIT_FREQ; static int frequency = INIT_FREQ;
@ -454,97 +455,116 @@ struct IQ {
int I, Q; int I, Q;
}; };
inline static int get_next_sample() inline static const uint32_t *next_stride()
{ {
static const uint32_t *tail = rx_cos; static int tail = 0;
const uint32_t *head = (const uint32_t *)dma_hw->ch[dma_ch_in_cos].write_addr; int head, delta;
while (head == tail) { loop:
asm volatile("nop; nop; nop; nop"); head = (dma_hw->ch[dma_ch_in_cos].write_addr >> 2) & (RX_WORDS - 1);
head = (const uint32_t *)dma_hw->ch[dma_ch_in_cos].write_addr; delta = head - tail;
if (delta < 0)
delta += RX_WORDS;
if (delta < RX_STRIDE)
goto loop;
const uint32_t *stride = rx_cos + tail;
tail = (tail + RX_STRIDE) & (RX_WORDS - 1);
return stride;
} }
int value = -(*tail++); inline static int nextQ(const uint32_t **stride)
value *= 2; {
value -= *tail++; int x2 = *(*stride)++;
int x1 = *(*stride)++;
if (tail > rx_end) return x2 + x2 + x1 + max_amplitude;
tail = rx_start;
return gain * value - dc_level;
} }
inline static struct IQ next_sample() inline static struct IQ next_sample()
{ {
int I = 0, Q = 0; int I = 0, Q = 0;
int x15 = get_next_sample(); const uint32_t *stride = next_stride();
int x15 = nextQ(&stride);
I += 93 * x15; I += 93 * x15;
Q += 39 * x15; Q += 39 * x15;
int x14 = get_next_sample(); int x14 = nextQ(&stride);
I += 71 * x14; I += 71 * x14;
Q += 71 * x14; Q += 71 * x14;
int x13 = get_next_sample(); int x13 = nextQ(&stride);
I += 39 * x13; I += 39 * x13;
Q += 93 * x13; Q += 93 * x13;
int x12 = get_next_sample(); int x12 = nextQ(&stride);
I += 0 * x12; I += 0 * x12;
Q += 101 * x12; Q += 101 * x12;
int x11 = get_next_sample(); int x11 = nextQ(&stride);
I += -39 * x11; I += -39 * x11;
Q += 93 * x11; Q += 93 * x11;
int x10 = get_next_sample(); int x10 = nextQ(&stride);
I += -71 * x10; I += -71 * x10;
Q += 71 * x10; Q += 71 * x10;
int x09 = get_next_sample(); int x09 = nextQ(&stride);
I += -93 * x09; I += -93 * x09;
Q += 39 * x09; Q += 39 * x09;
int x08 = get_next_sample(); int x08 = nextQ(&stride);
I += -101 * x08; I += -101 * x08;
Q += 0 * x08; Q += 0 * x08;
int x07 = get_next_sample(); int x07 = nextQ(&stride);
I += -93 * x07; I += -93 * x07;
Q += -39 * x07; Q += -39 * x07;
int x06 = get_next_sample(); int x06 = nextQ(&stride);
I += -71 * x06; I += -71 * x06;
Q += -71 * x06; Q += -71 * x06;
int x05 = get_next_sample(); int x05 = nextQ(&stride);
I += -39 * x05; I += -39 * x05;
Q += -93 * x05; Q += -93 * x05;
int x04 = get_next_sample(); int x04 = nextQ(&stride);
I += 0 * x04; I += 0 * x04;
Q += -101 * x04; Q += -101 * x04;
int x03 = get_next_sample(); int x03 = nextQ(&stride);
I += 39 * x03; I += 39 * x03;
Q += -93 * x03; Q += -93 * x03;
int x02 = get_next_sample(); int x02 = nextQ(&stride);
I += 71 * x02; I += 71 * x02;
Q += -71 * x02; Q += -71 * x02;
int x01 = get_next_sample(); int x01 = nextQ(&stride);
I += 93 * x01; I += 93 * x01;
Q += -39 * x01; Q += -39 * x01;
int x00 = get_next_sample(); int x00 = nextQ(&stride);
I += 101 * x00; I += 101 * x00;
Q += 0 * x00; Q += 0 * x00;
I *= gain;
I /= 1024; I /= 1024;
I *= max_amplitude_mul;
I /= (1 << 16);
Q *= gain;
Q /= 1024; Q /= 1024;
Q *= max_amplitude_mul;
Q /= (1 << 16);
return (struct IQ){ I, Q }; return (struct IQ){ I, Q };
} }
@ -566,8 +586,6 @@ static void rf_rx(void)
int64_t I = IQ.I; int64_t I = IQ.I;
int64_t Q = IQ.Q; int64_t Q = IQ.Q;
I /= dc_level;
if (I > 127) if (I > 127)
I = 127; I = 127;
else if (I < -128) else if (I < -128)
@ -575,8 +593,6 @@ static void rf_rx(void)
*blockptr++ = (uint8_t)I + 128; *blockptr++ = (uint8_t)I + 128;
Q /= dc_level;
if (Q > 127) if (Q > 127)
Q = 127; Q = 127;
else if (Q < -128) else if (Q < -128)
@ -600,7 +616,8 @@ static void run_command(uint8_t cmd, uint32_t arg)
} else if (0x02 == cmd) { } else if (0x02 == cmd) {
/* Set the rate at which IQ sample pairs are sent */ /* Set the rate at which IQ sample pairs are sent */
sample_rate = arg; sample_rate = arg;
dc_level = CLK_SYS_HZ / sample_rate / 2; max_amplitude = CLK_SYS_HZ / sample_rate / 2;
max_amplitude_mul = 65536 / max_amplitude;
dma_timer_set_fraction(dma_t_samp, 1, CLK_SYS_HZ / (sample_rate * DECIMATE)); dma_timer_set_fraction(dma_t_samp, 1, CLK_SYS_HZ / (sample_rate * DECIMATE));
rx_lo_init(frequency + sample_rate); rx_lo_init(frequency + sample_rate);
} else if (0x04 == cmd) { } else if (0x04 == cmd) {