From 15f559d816f83e107380b7d7592e63be519ef32f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Hamal=20Dvo=C5=99=C3=A1k?= Date: Sat, 3 Aug 2024 09:48:00 +0200 Subject: [PATCH] Optimize decimation performance --- src/main.c | 95 +++++++++++++++++++++++++++++++----------------------- 1 file changed, 55 insertions(+), 40 deletions(-) diff --git a/src/main.c b/src/main.c index b35603b..b25a228 100644 --- a/src/main.c +++ b/src/main.c @@ -24,7 +24,7 @@ #define VREG_VOLTAGE VREG_VOLTAGE_1_20 #define CLK_SYS_HZ (288 * MHZ) -#define INIT_SAMPLE_RATE 100000 +#define INIT_SAMPLE_RATE 200000 #define INIT_FREQ 94600000 #define INIT_GAIN 127 @@ -64,20 +64,19 @@ static uint32_t nco_addr = (uint32_t)lo_phase; #define DECIMATE 16 #define RX_BITS_DEPTH 8 #define RX_WORDS (1 << (RX_BITS_DEPTH - 2)) +#define RX_STRIDE (2 * DECIMATE) -static_assert(RX_WORDS >= 4 * DECIMATE, "RX_WORDS >= 4 * DECIMATE"); +static_assert(RX_WORDS >= 2 * RX_STRIDE, "RX_WORDS >= 2 * RX_STRIDE"); static uint32_t rx_cos[RX_WORDS] __attribute__((__aligned__(1 << RX_BITS_DEPTH))); -static const uint32_t *rx_start = rx_cos; -static const uint32_t *rx_end = rx_cos + RX_WORDS - 1; - #define NUM_GAINS 29 static int gains[NUM_GAINS] = { 0, 9, 14, 27, 37, 77, 87, 125, 144, 157, 166, 197, 207, 229, 254, 280, 297, 328, 338, 364, 372, 386, 402, 421, 434, 439, 445, 480, 496 }; static int sample_rate = INIT_SAMPLE_RATE; -static int dc_level = CLK_SYS_HZ / INIT_SAMPLE_RATE / 2; +static int max_amplitude = CLK_SYS_HZ / INIT_SAMPLE_RATE / 2; +static int max_amplitude_mul = 65536 / (CLK_SYS_HZ / INIT_SAMPLE_RATE / 2); static int gain = INIT_GAIN; static int frequency = INIT_FREQ; @@ -456,97 +455,116 @@ struct IQ { int I, Q; }; -inline static int get_next_sample() +inline static const uint32_t *next_stride() { - static const uint32_t *tail = rx_cos; + static int tail = 0; - const uint32_t *head = (const uint32_t *)dma_hw->ch[dma_ch_in_cos].write_addr; + int head, delta; - while (head == tail) { - asm volatile("nop; nop; nop; nop"); - head = (const uint32_t *)dma_hw->ch[dma_ch_in_cos].write_addr; - } +loop: + head = (dma_hw->ch[dma_ch_in_cos].write_addr >> 2) & (RX_WORDS - 1); + delta = head - tail; - int value = -(*tail++); - value *= 2; - value -= *tail++; + if (delta < 0) + delta += RX_WORDS; - if (tail > rx_end) - tail = rx_start; + if (delta < RX_STRIDE) + goto loop; - return gain * value - dc_level; + const uint32_t *stride = rx_cos + tail; + + tail = (tail + RX_STRIDE) & (RX_WORDS - 1); + + return stride; +} + +inline static int nextQ(const uint32_t **stride) +{ + int x2 = *(*stride)++; + int x1 = *(*stride)++; + + return (x2 << 1) + x1 + max_amplitude; } inline static struct IQ next_sample() { int I = 0, Q = 0; - int x15 = get_next_sample(); + const uint32_t *stride = next_stride(); + + int x15 = nextQ(&stride); I += 93 * x15; Q += 39 * x15; - int x14 = get_next_sample(); + int x14 = nextQ(&stride); I += 71 * x14; Q += 71 * x14; - int x13 = get_next_sample(); + int x13 = nextQ(&stride); I += 39 * x13; Q += 93 * x13; - int x12 = get_next_sample(); + int x12 = nextQ(&stride); I += 0 * x12; Q += 101 * x12; - int x11 = get_next_sample(); + int x11 = nextQ(&stride); I += -39 * x11; Q += 93 * x11; - int x10 = get_next_sample(); + int x10 = nextQ(&stride); I += -71 * x10; Q += 71 * x10; - int x09 = get_next_sample(); + int x09 = nextQ(&stride); I += -93 * x09; Q += 39 * x09; - int x08 = get_next_sample(); + int x08 = nextQ(&stride); I += -101 * x08; Q += 0 * x08; - int x07 = get_next_sample(); + int x07 = nextQ(&stride); I += -93 * x07; Q += -39 * x07; - int x06 = get_next_sample(); + int x06 = nextQ(&stride); I += -71 * x06; Q += -71 * x06; - int x05 = get_next_sample(); + int x05 = nextQ(&stride); I += -39 * x05; Q += -93 * x05; - int x04 = get_next_sample(); + int x04 = nextQ(&stride); I += 0 * x04; Q += -101 * x04; - int x03 = get_next_sample(); + int x03 = nextQ(&stride); I += 39 * x03; Q += -93 * x03; - int x02 = get_next_sample(); + int x02 = nextQ(&stride); I += 71 * x02; Q += -71 * x02; - int x01 = get_next_sample(); + int x01 = nextQ(&stride); I += 93 * x01; Q += -39 * x01; - int x00 = get_next_sample(); + int x00 = nextQ(&stride); I += 101 * x00; Q += 0 * x00; + I *= gain; I /= 1024; + I *= max_amplitude_mul; + I /= (1 << 16); + + Q *= gain; Q /= 1024; + Q *= max_amplitude_mul; + Q /= (1 << 16); return (struct IQ){ I, Q }; } @@ -568,8 +586,6 @@ static void rf_rx(void) int64_t I = IQ.I; int64_t Q = IQ.Q; - I /= dc_level; - if (I > 127) I = 127; else if (I < -128) @@ -577,8 +593,6 @@ static void rf_rx(void) *blockptr++ = (uint8_t)I + 128; - Q /= dc_level; - if (Q > 127) Q = 127; else if (Q < -128) @@ -602,7 +616,8 @@ static void run_command(uint8_t cmd, uint32_t arg) } else if (0x02 == cmd) { /* Set the rate at which IQ sample pairs are sent */ sample_rate = arg; - dc_level = CLK_SYS_HZ / sample_rate / 2; + max_amplitude = CLK_SYS_HZ / sample_rate / 2; + max_amplitude_mul = 65536 / max_amplitude; dma_timer_set_fraction(dma_t_samp, 1, CLK_SYS_HZ / (sample_rate * DECIMATE)); rx_lo_init(frequency + sample_rate); } else if (0x04 == cmd) {