From d99f61dd6b70c60f0425b3e12207605d6ef27e70 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jan=20Hamal=20Dvo=C5=99=C3=A1k?= <mordae@anilinux.org>
Date: Sun, 4 Aug 2024 17:32:14 +0200
Subject: [PATCH] Convert to QSD

---
 grc/PicoSDR-WBFM.grc |   6 +-
 src/main.c           | 819 +++++++++++++++++++++++++------------------
 util/bridge.py       |   4 +
 3 files changed, 484 insertions(+), 345 deletions(-)

diff --git a/grc/PicoSDR-WBFM.grc b/grc/PicoSDR-WBFM.grc
index 7f2a5c0..4646137 100644
--- a/grc/PicoSDR-WBFM.grc
+++ b/grc/PicoSDR-WBFM.grc
@@ -37,7 +37,7 @@ blocks:
   id: variable
   parameters:
     comment: ''
-    value: '88_200_000'
+    value: '94_615_000'
   states:
     bus_sink: false
     bus_source: false
@@ -49,7 +49,7 @@ blocks:
   id: variable
   parameters:
     comment: ''
-    value: '192_000'
+    value: '200_000'
   states:
     bus_sink: false
     bus_source: false
@@ -374,7 +374,7 @@ blocks:
     freq7: 100e6
     freq8: 100e6
     freq9: 100e6
-    gain0: '30'
+    gain0: '24'
     gain1: '10'
     gain10: '10'
     gain11: '10'
diff --git a/src/main.c b/src/main.c
index a4b3bc3..f4357d2 100644
--- a/src/main.c
+++ b/src/main.c
@@ -20,90 +20,120 @@
 #include <stdio.h>
 #include <limits.h>
 #include <stdlib.h>
+#include <string.h>
 
 #define VREG_VOLTAGE VREG_VOLTAGE_1_20
 #define CLK_SYS_HZ (300 * MHZ)
 
+/* Pin mapping */
+#define PIN_RX0 6
+#define PIN_RX1 7
+#define PIN_RX2 8
+#define PIN_RX3 9
+
+#define PIN_FB0 10
+#define PIN_FB1 11
+#define PIN_FB2 12
+#define PIN_FB3 13
+
+#define PIN_A 14
+#define PIN_B 15
+
+#define PSU_PIN 23
+
+#define SM_LO 0
+#define SM_FB 1
+#define SM_RXI 2
+#define SM_RXQ 3
+
+#define SM_ACCI0 0
+#define SM_ACCI1 1
+#define SM_ACCQ0 2
+#define SM_ACCQ1 3
+
+/* PIO code origins */
+static int8_t origin_lo = -1;
+static int8_t origin_fb = -1;
+static int8_t origin_rx = 0;
+static int8_t origin_acc = 0;
+
+/*
+ * NCO (Numerically Controlled Oscillator)
+ * Must have 256 phases with 256 bytes each for 1-byte DMA writes to work.
+ */
+#define NCO_NUM_PHASES 256
+#define NCO_PHASE_BITS 8
+#define NCO_PHASE_WORDS (1 << (NCO_PHASE_BITS - 2))
+#define NCO_PHASE_COS (3u << 30)
+#define NCO_PHASE_SIN 0
+
+static uint32_t nco_phase[NCO_NUM_PHASES][NCO_PHASE_WORDS]
+	__attribute__((__aligned__(NCO_NUM_PHASES * 4 * NCO_PHASE_WORDS)));
+
+static uint32_t nco_addr = (uint32_t)nco_phase;
+static uint32_t nco_step_base = 0x80000000;
+static uint32_t nco_step = 0x80000000;
+static uint32_t nco_null = 0;
+
+/* Bit combinations to output for { I+ Q+ I- Q- } */
+static const uint32_t nco_quadrature[] = { 2, 3, 1, 0 };
+
+/* Sampling and gain */
 #define INIT_SAMPLE_RATE 200000
 #define INIT_FREQ 94600000
 #define INIT_GAIN 127
-
-#define LO_PIN 9
-#define RX_PIN 13
-#define FB_PIN 5
-#define PSU_PIN 23
-
-#define PIO pio1
-#define SM_LO 0
-#define SM_FB 1
-#define SM_RX 2
-#define SM_AD 3
-
-#define IQ_SAMPLES 32
-#define IQ_BLOCK_LEN (2 * IQ_SAMPLES)
-#define IQ_QUEUE_LEN 8
-
-/*
- * NOTE: Must have 256 phases with 256 bytes each.
- *       Otherwise the DMA 1-byte write trick wouldn't work.
- */
-
-#define LO_NUM_PHASES 256
-#define LO_PHASE_BITS 8
-#define LO_PHASE_WORDS (1 << (LO_PHASE_BITS - 2))
-#define STEP_BASE ((UINT_MAX + 1.0) / CLK_SYS_HZ)
-
-static uint32_t nco_step = (uint32_t)(STEP_BASE * INIT_FREQ) * 32 * LO_PHASE_WORDS;
-static uint32_t nco_null = 0;
-
-static uint32_t lo_phase[LO_NUM_PHASES][LO_PHASE_WORDS]
-	__attribute__((__aligned__(LO_NUM_PHASES * 4 * LO_PHASE_WORDS)));
-
-static uint32_t nco_addr = (uint32_t)lo_phase;
-
-#define DECIMATE 16
-#define RX_BITS_DEPTH 8
-#define RX_WORDS (1 << (RX_BITS_DEPTH - 2))
-#define RX_STRIDE (2 * DECIMATE)
-
-static_assert(RX_WORDS >= 2 * RX_STRIDE, "RX_WORDS >= 2 * RX_STRIDE");
-
-static uint32_t rx_cos[RX_WORDS] __attribute__((__aligned__(1 << RX_BITS_DEPTH)));
-
 #define NUM_GAINS 29
+#define DECIMATE 4
+
 static int gains[NUM_GAINS] = { 0,   9,	  14,  27,  37,	 77,  87,  125, 144, 157,
 				166, 197, 207, 229, 254, 280, 297, 328, 338, 364,
 				372, 386, 402, 421, 434, 439, 445, 480, 496 };
-static int sample_rate = INIT_SAMPLE_RATE;
-static int max_amplitude = CLK_SYS_HZ / INIT_SAMPLE_RATE / 2;
-static int max_amplitude_mul = 65536 / (CLK_SYS_HZ / INIT_SAMPLE_RATE / 2);
 static int gain = INIT_GAIN;
 static int frequency = INIT_FREQ;
+static int sample_rate = INIT_SAMPLE_RATE;
+static int max_amplitude = CLK_SYS_HZ / INIT_SAMPLE_RATE;
 
-static int dma_ch_rx1 = -1;
-static int dma_ch_rx2 = -1;
+/* Whenever we need an extra parameter, misuses PPM. */
+static int tweak = 0;
 
-static int dma_ch_nco1 = -1;
-static int dma_ch_nco2 = -1;
-static int dma_ch_nco3 = -1;
-static int dma_ch_mix = -1;
-
-static int dma_ch_samp_cos = -1;
-
-static int dma_t_samp = -1;
-
-static int dma_ch_in_cos = -1;
+/* Output queue */
+#define IQ_SAMPLES 32
+#define IQ_BLOCK_LEN (2 * IQ_SAMPLES)
+#define IQ_QUEUE_LEN 8
 
 static queue_t iq_queue;
 static uint8_t iq_queue_buffer[IQ_QUEUE_LEN][IQ_BLOCK_LEN];
 static size_t iq_queue_pos = 0;
 
-static uint32_t rnd = 0;
+#define RX_BIT_DEPTH 11
+#define RX_WORDS (1 << (RX_BIT_DEPTH - 1))
 
-static int origin_lo = -1;
-static int origin_rx = -1;
-static int origin_fb = -1;
-static int origin_ad = 0;
+/* NCO phase accumulation, address preparation, LO triggering */
+static int dma_ch_nco1 = -1;
+static int dma_ch_nco2 = -1;
+static int dma_ch_nco3 = -1;
+
+/* Driving multiplexer A, B pins using NCO data */
+static int dma_ch_lo = -1;
+
+/* Receiving [I+, I-] and [Q+, Q-] data */
+static int dma_ch_rxi = -1;
+static int dma_ch_rxq = -1;
+
+/* Sampling the accumulators */
+static int dma_ch_samp_i0 = -1;
+static int dma_ch_samp_i1 = -1;
+static int dma_ch_samp_q0 = -1;
+static int dma_ch_samp_q1 = -1;
+
+/* Sampling rate limiter */
+static int dma_t_samp = -1;
+
+/* Sampling instruction and DMA script. */
+static const uint32_t samp_insn = 16;
+
+/* Random number generator */
+static uint32_t rnd = 0;
 
 inline static __unused uint32_t rnd_next()
 {
@@ -111,6 +141,10 @@ inline static __unused uint32_t rnd_next()
 	return rnd;
 }
 
+/*
+ * Remove chaining on a given DMA channel.
+ * Handy when one wants to abort a chained DMA channel.
+ */
 static void dma_channel_clear_chain_to(int ch)
 {
 	uint32_t ctrl = dma_hw->ch[ch].al1_ctrl;
@@ -119,16 +153,34 @@ static void dma_channel_clear_chain_to(int ch)
 	dma_hw->ch[ch].al1_ctrl = ctrl;
 }
 
+/*
+ * Read next sample from PIO FIFO without any checks.
+ * Blocks until one is available.
+ */
+inline static uint32_t pio_sm_get_blocking_unsafe(pio_hw_t *pio, int sm)
+{
+	while (pio->fstat & (1u << (PIO_FSTAT_RXEMPTY_LSB + sm)))
+		asm volatile("nop");
+
+	return pio->rxf[sm];
+}
+
 static void init_lo()
 {
-	gpio_disable_pulls(LO_PIN);
-	pio_gpio_init(PIO, LO_PIN);
+	gpio_disable_pulls(PIN_A);
+	gpio_disable_pulls(PIN_B);
 
-	gpio_set_drive_strength(LO_PIN, GPIO_DRIVE_STRENGTH_12MA);
-	gpio_set_slew_rate(LO_PIN, GPIO_SLEW_RATE_FAST);
+	pio_gpio_init(pio0, PIN_A);
+	pio_gpio_init(pio0, PIN_B);
+
+	gpio_set_drive_strength(PIN_A, GPIO_DRIVE_STRENGTH_12MA);
+	gpio_set_drive_strength(PIN_B, GPIO_DRIVE_STRENGTH_12MA);
+
+	gpio_set_slew_rate(PIN_A, GPIO_SLEW_RATE_FAST);
+	gpio_set_slew_rate(PIN_B, GPIO_SLEW_RATE_FAST);
 
 	const uint16_t insn[] = {
-		pio_encode_out(pio_pindirs, 1),
+		pio_encode_out(pio_pins, 2),
 	};
 
 	pio_program_t prog = {
@@ -137,41 +189,49 @@ static void init_lo()
 		.origin = origin_lo,
 	};
 
-	pio_sm_restart(PIO, SM_LO);
-	pio_sm_clear_fifos(PIO, SM_LO);
+	pio_sm_restart(pio0, SM_LO);
+	pio_sm_clear_fifos(pio0, SM_LO);
 
-	if (pio_can_add_program(PIO, &prog))
-		origin_lo = pio_add_program(PIO, &prog);
+	if (pio_can_add_program(pio0, &prog))
+		origin_lo = pio_add_program(pio0, &prog);
 
 	pio_sm_config pc = pio_get_default_sm_config();
-	sm_config_set_out_pins(&pc, LO_PIN, 1);
-	sm_config_set_set_pins(&pc, LO_PIN, 1);
+	sm_config_set_out_pins(&pc, PIN_A, 2);
+	sm_config_set_set_pins(&pc, PIN_A, 2);
 	sm_config_set_wrap(&pc, origin_lo, origin_lo + prog.length - 1);
 	sm_config_set_clkdiv_int_frac(&pc, 1, 0);
 	sm_config_set_fifo_join(&pc, PIO_FIFO_JOIN_TX);
 	sm_config_set_out_shift(&pc, false, true, 32);
-	pio_sm_init(PIO, SM_LO, origin_lo, &pc);
+	pio_sm_init(pio0, SM_LO, origin_lo, &pc);
 
-	pio_sm_set_consecutive_pindirs(PIO, SM_LO, LO_PIN, 1, GPIO_IN);
-	pio_sm_exec_wait_blocking(PIO, SM_LO, pio_encode_set(pio_pins, 0));
+	pio_sm_set_consecutive_pindirs(pio0, SM_LO, PIN_A, 2, GPIO_OUT);
+	pio_sm_exec_wait_blocking(pio0, SM_LO, pio_encode_set(pio_pins, 0));
 }
 
 static void init_fb()
 {
-	gpio_disable_pulls(FB_PIN);
-	pio_gpio_init(PIO, FB_PIN);
+	gpio_disable_pulls(PIN_FB0);
+	gpio_disable_pulls(PIN_FB1);
+	gpio_disable_pulls(PIN_FB2);
+	gpio_disable_pulls(PIN_FB3);
 
-	// NOTE: Not sure if this is ideal.
-	hw_set_bits(&PIO->input_sync_bypass, 1u << RX_PIN);
+	pio_gpio_init(pio0, PIN_FB0);
+	pio_gpio_init(pio0, PIN_FB1);
+	pio_gpio_init(pio0, PIN_FB2);
+	pio_gpio_init(pio0, PIN_FB3);
 
-	gpio_set_input_hysteresis_enabled(RX_PIN, false);
-	gpio_set_drive_strength(FB_PIN, GPIO_DRIVE_STRENGTH_2MA);
-	gpio_set_slew_rate(FB_PIN, GPIO_SLEW_RATE_SLOW);
+	gpio_set_drive_strength(PIN_FB0, GPIO_DRIVE_STRENGTH_2MA);
+	gpio_set_drive_strength(PIN_FB1, GPIO_DRIVE_STRENGTH_2MA);
+	gpio_set_drive_strength(PIN_FB2, GPIO_DRIVE_STRENGTH_2MA);
+	gpio_set_drive_strength(PIN_FB3, GPIO_DRIVE_STRENGTH_2MA);
+
+	gpio_set_slew_rate(PIN_FB0, GPIO_SLEW_RATE_SLOW);
+	gpio_set_slew_rate(PIN_FB1, GPIO_SLEW_RATE_SLOW);
+	gpio_set_slew_rate(PIN_FB2, GPIO_SLEW_RATE_SLOW);
+	gpio_set_slew_rate(PIN_FB3, GPIO_SLEW_RATE_SLOW);
 
 	const uint16_t insn[] = {
-		pio_encode_mov_not(pio_pins, pio_pins) | pio_encode_sideset(1, 1) |
-			pio_encode_delay(0),
-		//pio_encode_nop() | pio_encode_sideset(1, 0) | pio_encode_delay(0),
+		pio_encode_mov_not(pio_pins, pio_pins) | pio_encode_sideset(4, 0x0f),
 	};
 
 	pio_program_t prog = {
@@ -180,32 +240,47 @@ static void init_fb()
 		.origin = origin_fb,
 	};
 
-	pio_sm_restart(PIO, SM_FB);
-	pio_sm_clear_fifos(PIO, SM_FB);
+	pio_sm_restart(pio0, SM_FB);
+	pio_sm_clear_fifos(pio0, SM_FB);
 
-	if (pio_can_add_program(PIO, &prog))
-		origin_fb = pio_add_program(PIO, &prog);
+	if (pio_can_add_program(pio0, &prog))
+		origin_fb = pio_add_program(pio0, &prog);
 
 	pio_sm_config pc = pio_get_default_sm_config();
-	sm_config_set_sideset(&pc, 1, false, true);
-	sm_config_set_in_pins(&pc, RX_PIN);
-	sm_config_set_out_pins(&pc, FB_PIN, 1);
-	sm_config_set_set_pins(&pc, FB_PIN, 1);
-	sm_config_set_sideset_pins(&pc, FB_PIN);
+	sm_config_set_sideset(&pc, 4, false, true);
+	sm_config_set_in_pins(&pc, PIN_RX0);
+	sm_config_set_out_pins(&pc, PIN_FB0, 4);
+	sm_config_set_set_pins(&pc, PIN_FB0, 4);
+	sm_config_set_sideset_pins(&pc, PIN_FB0);
 	sm_config_set_wrap(&pc, origin_fb, origin_fb + prog.length - 1);
 	sm_config_set_clkdiv_int_frac(&pc, 1, 0);
-	pio_sm_init(PIO, SM_FB, origin_fb, &pc);
+	pio_sm_init(pio0, SM_FB, origin_fb, &pc);
 
-	pio_sm_set_consecutive_pindirs(PIO, SM_FB, FB_PIN, 1, GPIO_OUT);
+	pio_sm_set_consecutive_pindirs(pio0, SM_FB, PIN_FB0, 4, GPIO_OUT);
 }
 
 static void init_rx()
 {
-	gpio_disable_pulls(RX_PIN);
-	pio_gpio_init(PIO, RX_PIN);
+	gpio_disable_pulls(PIN_RX0);
+	gpio_disable_pulls(PIN_RX1);
+	gpio_disable_pulls(PIN_RX2);
+	gpio_disable_pulls(PIN_RX3);
+
+	pio_gpio_init(pio0, PIN_RX0);
+	pio_gpio_init(pio0, PIN_RX1);
+	pio_gpio_init(pio0, PIN_RX2);
+	pio_gpio_init(pio0, PIN_RX3);
+
+	gpio_set_input_hysteresis_enabled(PIN_RX0, false);
+	gpio_set_input_hysteresis_enabled(PIN_RX1, false);
+	gpio_set_input_hysteresis_enabled(PIN_RX2, false);
+	gpio_set_input_hysteresis_enabled(PIN_RX3, false);
+
+	hw_set_bits(&pio0->input_sync_bypass,
+		    (1u << PIN_RX0) | (1u << PIN_RX1) | (1u << PIN_RX2) | (1u << PIN_RX3));
 
 	const uint16_t insn[] = {
-		pio_encode_in(pio_pins, 1) | pio_encode_delay(0),
+		pio_encode_in(pio_pins, 2) | pio_encode_delay(0),
 	};
 
 	pio_program_t prog = {
@@ -214,44 +289,57 @@ static void init_rx()
 		.origin = origin_rx,
 	};
 
-	pio_sm_restart(PIO, SM_RX);
-	pio_sm_clear_fifos(PIO, SM_RX);
+	pio_sm_restart(pio0, SM_RXI);
+	pio_sm_restart(pio0, SM_RXQ);
 
-	if (pio_can_add_program(PIO, &prog))
-		origin_rx = pio_add_program(PIO, &prog);
+	pio_sm_clear_fifos(pio0, SM_RXI);
+	pio_sm_clear_fifos(pio0, SM_RXQ);
+
+	if (pio_can_add_program(pio0, &prog))
+		origin_rx = pio_add_program(pio0, &prog);
 
 	pio_sm_config pc = pio_get_default_sm_config();
-	sm_config_set_in_pins(&pc, RX_PIN);
+	sm_config_set_in_pins(&pc, PIN_RX0);
 	sm_config_set_wrap(&pc, origin_rx, origin_rx + prog.length - 1);
 	sm_config_set_clkdiv_int_frac(&pc, 1, 0);
 	sm_config_set_fifo_join(&pc, PIO_FIFO_JOIN_RX);
 	sm_config_set_in_shift(&pc, false, true, 32);
-	pio_sm_init(PIO, SM_RX, origin_rx, &pc);
+	pio_sm_init(pio0, SM_RXI, origin_rx, &pc);
 
-	pio_sm_set_consecutive_pindirs(PIO, SM_RX, RX_PIN, 1, GPIO_IN);
+	sm_config_set_in_pins(&pc, PIN_RX2);
+	pio_sm_init(pio0, SM_RXQ, origin_rx, &pc);
+
+	pio_sm_set_consecutive_pindirs(pio0, SM_RXI, PIN_RX0, 2, GPIO_IN);
+	pio_sm_set_consecutive_pindirs(pio0, SM_RXQ, PIN_RX2, 2, GPIO_IN);
 }
 
-static const uint32_t samp_insn = 16;
-
-static void init_ad()
+static void init_iq()
 {
+	/*
+	 * Samples arrive interleaved as [+, -, +, -].
+	 *
+	 * That means we need to swap the sign of the negative samples
+	 * inside the lookup table to arrive at correct unsigned total.
+	 */
+
 	const uint16_t insn[] = {
-		pio_encode_out(pio_pc, 4), // 0000 +0
-		pio_encode_jmp_x_dec(0),   // 0001 +1
-		pio_encode_jmp_x_dec(0),   // 0010 +1
-		pio_encode_jmp_y_dec(0),   // 0011 +2
-		pio_encode_jmp_x_dec(0),   // 0100 +1
-		pio_encode_jmp_y_dec(0),   // 0101 +2
-		pio_encode_jmp_y_dec(0),   // 0110 +2
-		pio_encode_jmp_y_dec(1),   // 0111 +2 +1
-		pio_encode_jmp_x_dec(0),   // 1000 +1
-		pio_encode_jmp_y_dec(0),   // 1001 +2
-		pio_encode_jmp_y_dec(0),   // 1010 +2
-		pio_encode_jmp_y_dec(1),   // 1011 +2 +1
-		pio_encode_jmp_y_dec(0),   // 1100 +2
-		pio_encode_jmp_y_dec(1),   // 1101 +2 +1
-		pio_encode_jmp_y_dec(1),   // 1110 +2 +1
-		pio_encode_jmp_y_dec(3),   // 1111 +2 +2
+		//                         // nom. swap tot.
+		pio_encode_jmp_y_dec(5),   // 0000 0101 +2
+		pio_encode_jmp_x_dec(5),   // 0001 0100 +1
+		pio_encode_jmp_y_dec(1),   // 0010 0111 +2 +1
+		pio_encode_jmp_y_dec(5),   // 0011 0110 +2
+		pio_encode_jmp_x_dec(5),   // 0100 0001 +1
+		pio_encode_out(pio_pc, 4), // 0101 0000 --
+		pio_encode_jmp_y_dec(5),   // 0110 0011 +2
+		pio_encode_jmp_x_dec(5),   // 0111 0010 +1
+		pio_encode_jmp_y_dec(1),   // 1000 1101 +2 +1
+		pio_encode_jmp_y_dec(5),   // 1001 1100 +2
+		pio_encode_jmp_y_dec(0),   // 1010 1111 +2 +2
+		pio_encode_jmp_y_dec(1),   // 1011 1110 +2 +1
+		pio_encode_jmp_y_dec(5),   // 1100 1001 +2
+		pio_encode_jmp_x_dec(5),   // 1101 1000 +1
+		pio_encode_jmp_y_dec(1),   // 1110 1011 +2 +1
+		pio_encode_jmp_y_dec(5),   // 1111 1010 +2
 
 		/*
 		 * Should wrap here.
@@ -269,21 +357,32 @@ static void init_ad()
 	pio_program_t prog = {
 		.instructions = insn,
 		.length = sizeof(insn) / sizeof(*insn),
-		.origin = origin_ad,
+		.origin = origin_acc,
 	};
 
-	pio_sm_restart(PIO, SM_AD);
-	pio_sm_clear_fifos(PIO, SM_AD);
+	pio_sm_restart(pio1, SM_ACCI0);
+	pio_sm_restart(pio1, SM_ACCI1);
+	pio_sm_restart(pio1, SM_ACCQ0);
+	pio_sm_restart(pio1, SM_ACCQ1);
 
-	if (pio_can_add_program(PIO, &prog))
-		pio_add_program(PIO, &prog);
+	pio_sm_clear_fifos(pio1, SM_ACCI0);
+	pio_sm_clear_fifos(pio1, SM_ACCI1);
+	pio_sm_clear_fifos(pio1, SM_ACCQ0);
+	pio_sm_clear_fifos(pio1, SM_ACCQ1);
+
+	if (pio_can_add_program(pio1, &prog))
+		origin_acc = pio_add_program(pio1, &prog);
 
 	pio_sm_config pc = pio_get_default_sm_config();
-	sm_config_set_wrap(&pc, origin_ad, origin_ad + 15);
+	sm_config_set_wrap(&pc, origin_acc, origin_acc + 15);
 	sm_config_set_clkdiv_int_frac(&pc, 1, 0);
 	sm_config_set_in_shift(&pc, false, true, 32);
 	sm_config_set_out_shift(&pc, false, true, 32);
-	pio_sm_init(PIO, SM_AD, origin_ad, &pc);
+
+	pio_sm_init(pio1, SM_ACCI0, origin_acc + 5, &pc);
+	pio_sm_init(pio1, SM_ACCI1, origin_acc + 5, &pc);
+	pio_sm_init(pio1, SM_ACCQ0, origin_acc + 5, &pc);
+	pio_sm_init(pio1, SM_ACCQ1, origin_acc + 5, &pc);
 }
 
 static void lo_generate_phase(uint32_t *buf, size_t len, uint32_t step, uint32_t phase)
@@ -291,9 +390,15 @@ static void lo_generate_phase(uint32_t *buf, size_t len, uint32_t step, uint32_t
 	for (size_t i = 0; i < len; i++) {
 		uint32_t bits = 0;
 
-		for (int j = 0; j < 32; j++) {
-			bits |= phase >> 31;
-			bits <<= 1;
+		for (int j = 0; j < 16; j++) {
+			int noise = (int)rnd_next() / 8 + (int)rnd_next() / 8;
+
+			uint32_t s = (((phase + noise + NCO_PHASE_COS) >> 31) << 1) |
+				     ((phase + noise + NCO_PHASE_SIN) >> 31);
+
+			bits |= nco_quadrature[s];
+			bits <<= 2;
+
 			phase += step;
 		}
 
@@ -301,50 +406,47 @@ static void lo_generate_phase(uint32_t *buf, size_t len, uint32_t step, uint32_t
 	}
 }
 
-static void rx_lo_init(double freq)
+inline static uint32_t step_from_freq(uint32_t freq)
 {
-	uint32_t step = STEP_BASE * freq;
+	uint64_t tmp = freq;
+	tmp <<= 32;
+	tmp /= CLK_SYS_HZ;
+	return tmp;
+}
 
-	for (uint32_t i = 0; i < LO_NUM_PHASES; i++)
-		lo_generate_phase(lo_phase[i], LO_PHASE_WORDS, step, i << 24);
+static void rx_lo_init(uint32_t freq)
+{
+	uint32_t step = step_from_freq(freq);
 
-	nco_step = step * 32 * LO_PHASE_WORDS;
+	for (uint32_t i = 0; i < NCO_NUM_PHASES; i++)
+		lo_generate_phase(nco_phase[i], NCO_PHASE_WORDS, step, i << 24);
+
+	nco_step_base = step * 32 * NCO_PHASE_WORDS;
+	nco_step = nco_step_base;
 }
 
 static void rf_rx_start()
 {
-	dma_ch_rx1 = dma_claim_unused_channel(true);
-	dma_ch_rx2 = dma_claim_unused_channel(true);
-
 	dma_ch_nco1 = dma_claim_unused_channel(true);
 	dma_ch_nco2 = dma_claim_unused_channel(true);
 	dma_ch_nco3 = dma_claim_unused_channel(true);
-	dma_ch_mix = dma_claim_unused_channel(true);
-
-	dma_ch_samp_cos = dma_claim_unused_channel(true);
+	dma_ch_lo = dma_claim_unused_channel(true);
+	dma_ch_rxi = dma_claim_unused_channel(true);
+	dma_ch_rxq = dma_claim_unused_channel(true);
+	dma_ch_samp_i0 = dma_claim_unused_channel(true);
+	dma_ch_samp_i1 = dma_claim_unused_channel(true);
+	dma_ch_samp_q0 = dma_claim_unused_channel(true);
+	dma_ch_samp_q1 = dma_claim_unused_channel(true);
 
 	dma_channel_config dma_conf;
 
-	/* Copy PDM bitstream into decimator. */
-	dma_conf = dma_channel_get_default_config(dma_ch_rx1);
-	channel_config_set_transfer_data_size(&dma_conf, DMA_SIZE_32);
-	channel_config_set_read_increment(&dma_conf, false);
-	channel_config_set_write_increment(&dma_conf, false);
-	channel_config_set_dreq(&dma_conf, pio_get_dreq(PIO, SM_RX, GPIO_IN));
-	channel_config_set_chain_to(&dma_conf, dma_ch_rx2);
-	dma_channel_configure(dma_ch_rx1, &dma_conf, &PIO->txf[SM_AD], &PIO->rxf[SM_RX], UINT_MAX,
-			      false);
-
-	dma_conf = dma_channel_get_default_config(dma_ch_rx2);
-	channel_config_set_transfer_data_size(&dma_conf, DMA_SIZE_32);
-	channel_config_set_read_increment(&dma_conf, false);
-	channel_config_set_write_increment(&dma_conf, false);
-	channel_config_set_dreq(&dma_conf, pio_get_dreq(PIO, SM_RX, GPIO_IN));
-	channel_config_set_chain_to(&dma_conf, dma_ch_rx1);
-	dma_channel_configure(dma_ch_rx2, &dma_conf, &PIO->txf[SM_AD], &PIO->rxf[SM_RX], UINT_MAX,
-			      false);
-
-	/* Step the NCO. */
+	/*
+	 * Step the NCO
+	 *
+	 * We are using the DMA sniffer to hold the accumulated phase.
+	 * Since our pregenerated phase data hold both cosine and sine
+	 * bits, we can manage with just one such accumulator.
+	 */
 	dma_conf = dma_channel_get_default_config(dma_ch_nco1);
 	channel_config_set_transfer_data_size(&dma_conf, DMA_SIZE_32);
 	channel_config_set_read_increment(&dma_conf, false);
@@ -352,10 +454,16 @@ static void rf_rx_start()
 	channel_config_set_chain_to(&dma_conf, dma_ch_nco2);
 	dma_channel_configure(dma_ch_nco1, &dma_conf, &nco_null, &nco_step, 1, false);
 
-	/* DMA above will increment the phase accumulator. */
+	/* DMA above shall increment the phase accumulator. */
 	dma_sniffer_enable(dma_ch_nco1, DMA_SNIFF_CTRL_CALC_VALUE_SUM, true);
 
-	/* Prepare the phase address. */
+	/*
+	 * Prepare the phase address
+	 *
+	 * We cannot use 1-byte write to modify the trigger register directly,
+	 * because the logic would distribute the byte across whole word.
+	 * We can target a single byte inside RAM, though.
+	 */
 	dma_conf = dma_channel_get_default_config(dma_ch_nco2);
 	channel_config_set_transfer_data_size(&dma_conf, DMA_SIZE_8);
 	channel_config_set_read_increment(&dma_conf, false);
@@ -364,215 +472,221 @@ static void rf_rx_start()
 	dma_channel_configure(dma_ch_nco2, &dma_conf, (uint8_t *)(&nco_addr) + 1,
 			      ((uint8_t *)&dma_hw->sniff_data) + 3, 1, false);
 
-	/* Trigger LO using the address. */
+	/* Trigger LO using the generated address */
 	dma_conf = dma_channel_get_default_config(dma_ch_nco3);
 	channel_config_set_transfer_data_size(&dma_conf, DMA_SIZE_32);
 	channel_config_set_read_increment(&dma_conf, false);
 	channel_config_set_write_increment(&dma_conf, false);
-	dma_channel_configure(dma_ch_nco3, &dma_conf, &dma_hw->ch[dma_ch_mix].al3_read_addr_trig,
+	dma_channel_configure(dma_ch_nco3, &dma_conf, &dma_hw->ch[dma_ch_lo].al3_read_addr_trig,
 			      &nco_addr, 1, false);
 
-	/* Drive the LO capacitor. */
-	dma_conf = dma_channel_get_default_config(dma_ch_mix);
+	/*
+	 * Drive the LO bits
+	 *
+	 * We are driving the quadrature mixer, actually, but I guess this
+	 * arrangement counts as LO too. We output 2 bits per tick. Once we
+	 * finish outputing the whole phase buffer (in 1024 ticks), we circle
+	 * back to the NCO and ask for a new one.
+	 */
+	dma_conf = dma_channel_get_default_config(dma_ch_lo);
 	channel_config_set_transfer_data_size(&dma_conf, DMA_SIZE_32);
 	channel_config_set_read_increment(&dma_conf, true);
 	channel_config_set_write_increment(&dma_conf, false);
-	channel_config_set_dreq(&dma_conf, pio_get_dreq(PIO, SM_LO, GPIO_OUT));
+	channel_config_set_dreq(&dma_conf, pio_get_dreq(pio0, SM_LO, GPIO_OUT));
 	channel_config_set_chain_to(&dma_conf, dma_ch_nco1);
-	dma_channel_configure(dma_ch_mix, &dma_conf, &PIO->txf[SM_LO], lo_phase, LO_PHASE_WORDS,
+	dma_channel_configure(dma_ch_lo, &dma_conf, &pio0->txf[SM_LO], NULL, NCO_PHASE_WORDS,
 			      false);
 
-	/* Trigger accumulator values push. */
-	dma_conf = dma_channel_get_default_config(dma_ch_samp_cos);
+	/*
+	 * Read the incoming bits
+	 *
+	 * We have a single PIO per channel, reading 2 bits per tick.
+	 * Our adder cannot cope with that, because it processes at worst
+	 * 4 bits per 3 ticks. Thus we use two accumulators per channel.
+	 *
+	 * To save on DMA channels, we make use of the fact that the FIFOs are
+	 * arranged sequentially in memory and interleave the accumulators.
+	 * Since we are sure that accumulators are faster than receivers,
+	 * we can safely block on receiver DREQ.
+	 */
+	dma_conf = dma_channel_get_default_config(dma_ch_rxi);
+	channel_config_set_transfer_data_size(&dma_conf, DMA_SIZE_32);
+	channel_config_set_read_increment(&dma_conf, false);
+	channel_config_set_write_increment(&dma_conf, true);
+	channel_config_set_ring(&dma_conf, true, 3);
+	channel_config_set_dreq(&dma_conf, pio_get_dreq(pio0, SM_RXI, GPIO_IN));
+	channel_config_set_chain_to(&dma_conf, dma_ch_rxq);
+	dma_channel_configure(dma_ch_rxi, &dma_conf, &pio1->txf[SM_ACCI0], &pio0->rxf[SM_RXI], 1,
+			      false);
+
+	dma_conf = dma_channel_get_default_config(dma_ch_rxq);
+	channel_config_set_transfer_data_size(&dma_conf, DMA_SIZE_32);
+	channel_config_set_read_increment(&dma_conf, false);
+	channel_config_set_write_increment(&dma_conf, true);
+	channel_config_set_ring(&dma_conf, true, 3);
+	channel_config_set_dreq(&dma_conf, pio_get_dreq(pio0, SM_RXQ, GPIO_IN));
+	channel_config_set_chain_to(&dma_conf, dma_ch_rxi);
+	dma_channel_configure(dma_ch_rxq, &dma_conf, &pio1->txf[SM_ACCQ0], &pio0->rxf[SM_RXQ], 1,
+			      false);
+
+	/*
+	 * Trigger accumulator values push
+	 *
+	 * We need to inject a jump instruction stored inside samp_insn to all
+	 * four accumulators at the pace given by the timer to make them emit
+	 * current totals and zero the counters.
+	 */
+	dma_conf = dma_channel_get_default_config(dma_ch_samp_i0);
+	channel_config_set_high_priority(&dma_conf, true);
 	channel_config_set_transfer_data_size(&dma_conf, DMA_SIZE_32);
 	channel_config_set_read_increment(&dma_conf, false);
 	channel_config_set_write_increment(&dma_conf, false);
-	channel_config_set_high_priority(&dma_conf, true);
 	channel_config_set_dreq(&dma_conf, dma_get_timer_dreq(dma_t_samp));
-	dma_channel_configure(dma_ch_samp_cos, &dma_conf, &PIO->sm[SM_AD].instr, &samp_insn,
-			      UINT_MAX, false);
+	channel_config_set_chain_to(&dma_conf, dma_ch_samp_i1);
+	dma_channel_configure(dma_ch_samp_i0, &dma_conf, &pio1->sm[SM_ACCI0].instr, &samp_insn, 1,
+			      false);
 
-	init_ad();
+	dma_conf = dma_channel_get_default_config(dma_ch_samp_i1);
+	channel_config_set_high_priority(&dma_conf, true);
+	channel_config_set_transfer_data_size(&dma_conf, DMA_SIZE_32);
+	channel_config_set_read_increment(&dma_conf, false);
+	channel_config_set_write_increment(&dma_conf, false);
+	channel_config_set_chain_to(&dma_conf, dma_ch_samp_i0);
+	dma_channel_configure(dma_ch_samp_i1, &dma_conf, &pio1->sm[SM_ACCI1].instr, &samp_insn, 1,
+			      false);
+
+	dma_conf = dma_channel_get_default_config(dma_ch_samp_q0);
+	channel_config_set_high_priority(&dma_conf, true);
+	channel_config_set_transfer_data_size(&dma_conf, DMA_SIZE_32);
+	channel_config_set_read_increment(&dma_conf, false);
+	channel_config_set_write_increment(&dma_conf, false);
+	channel_config_set_dreq(&dma_conf, dma_get_timer_dreq(dma_t_samp));
+	channel_config_set_chain_to(&dma_conf, dma_ch_samp_q1);
+	dma_channel_configure(dma_ch_samp_q0, &dma_conf, &pio1->sm[SM_ACCQ0].instr, &samp_insn, 1,
+			      false);
+
+	dma_conf = dma_channel_get_default_config(dma_ch_samp_q1);
+	channel_config_set_high_priority(&dma_conf, true);
+	channel_config_set_transfer_data_size(&dma_conf, DMA_SIZE_32);
+	channel_config_set_read_increment(&dma_conf, false);
+	channel_config_set_write_increment(&dma_conf, false);
+	channel_config_set_chain_to(&dma_conf, dma_ch_samp_q0);
+	dma_channel_configure(dma_ch_samp_q1, &dma_conf, &pio1->sm[SM_ACCQ1].instr, &samp_insn, 1,
+			      false);
+
+	init_iq();
 	init_lo();
-	init_fb();
 	init_rx();
+	init_fb();
+
+	pio_set_sm_mask_enabled(pio1, 0x0f, true);
+	pio_set_sm_mask_enabled(pio0, 0x0f, true);
 
-	dma_channel_start(dma_ch_rx1);
 	dma_channel_start(dma_ch_nco1);
-	dma_channel_start(dma_ch_samp_cos);
-
-	pio_set_sm_mask_enabled(PIO, 0x0f, true);
+	dma_channel_start(dma_ch_rxi);
+	dma_channel_start(dma_ch_samp_i0);
+	dma_channel_start(dma_ch_samp_q0);
 }
 
 static void rf_rx_stop(void)
 {
-	pio_set_sm_mask_enabled(PIO, 0x0f, false);
+	pio_set_sm_mask_enabled(pio0, 0x0f, false);
+	pio_set_sm_mask_enabled(pio1, 0x0f, false);
 
 	sleep_us(10);
 
-	dma_channel_clear_chain_to(dma_ch_rx1);
-	dma_channel_clear_chain_to(dma_ch_rx2);
 	dma_channel_clear_chain_to(dma_ch_nco1);
 	dma_channel_clear_chain_to(dma_ch_nco2);
 	dma_channel_clear_chain_to(dma_ch_nco3);
-	dma_channel_clear_chain_to(dma_ch_mix);
-	dma_channel_clear_chain_to(dma_ch_samp_cos);
+	dma_channel_clear_chain_to(dma_ch_lo);
+	dma_channel_clear_chain_to(dma_ch_rxi);
+	dma_channel_clear_chain_to(dma_ch_rxq);
+	dma_channel_clear_chain_to(dma_ch_samp_i0);
+	dma_channel_clear_chain_to(dma_ch_samp_i1);
+	dma_channel_clear_chain_to(dma_ch_samp_q0);
+	dma_channel_clear_chain_to(dma_ch_samp_q1);
 
-	dma_channel_abort(dma_ch_rx1);
-	dma_channel_abort(dma_ch_rx2);
 	dma_channel_abort(dma_ch_nco1);
 	dma_channel_abort(dma_ch_nco2);
 	dma_channel_abort(dma_ch_nco3);
-	dma_channel_abort(dma_ch_mix);
-	dma_channel_abort(dma_ch_samp_cos);
+	dma_channel_abort(dma_ch_lo);
+	dma_channel_abort(dma_ch_rxi);
+	dma_channel_abort(dma_ch_rxq);
+	dma_channel_abort(dma_ch_samp_i0);
+	dma_channel_abort(dma_ch_samp_i1);
+	dma_channel_abort(dma_ch_samp_q0);
+	dma_channel_abort(dma_ch_samp_q1);
 
-	dma_channel_cleanup(dma_ch_rx1);
-	dma_channel_cleanup(dma_ch_rx2);
 	dma_channel_cleanup(dma_ch_nco1);
 	dma_channel_cleanup(dma_ch_nco2);
 	dma_channel_cleanup(dma_ch_nco3);
-	dma_channel_cleanup(dma_ch_mix);
-	dma_channel_cleanup(dma_ch_samp_cos);
+	dma_channel_cleanup(dma_ch_lo);
+	dma_channel_cleanup(dma_ch_rxi);
+	dma_channel_cleanup(dma_ch_rxq);
+	dma_channel_cleanup(dma_ch_samp_i0);
+	dma_channel_cleanup(dma_ch_samp_i1);
+	dma_channel_cleanup(dma_ch_samp_q0);
+	dma_channel_cleanup(dma_ch_samp_q1);
 
-	dma_channel_unclaim(dma_ch_rx1);
-	dma_channel_unclaim(dma_ch_rx2);
 	dma_channel_unclaim(dma_ch_nco1);
 	dma_channel_unclaim(dma_ch_nco2);
 	dma_channel_unclaim(dma_ch_nco3);
-	dma_channel_unclaim(dma_ch_mix);
-	dma_channel_unclaim(dma_ch_samp_cos);
+	dma_channel_unclaim(dma_ch_lo);
+	dma_channel_unclaim(dma_ch_rxi);
+	dma_channel_unclaim(dma_ch_rxq);
+	dma_channel_unclaim(dma_ch_samp_i0);
+	dma_channel_unclaim(dma_ch_samp_i1);
+	dma_channel_unclaim(dma_ch_samp_q0);
+	dma_channel_unclaim(dma_ch_samp_q1);
 
-	dma_ch_rx1 = -1;
-	dma_ch_rx2 = -1;
 	dma_ch_nco1 = -1;
 	dma_ch_nco2 = -1;
 	dma_ch_nco3 = -1;
-	dma_ch_mix = -1;
-	dma_ch_samp_cos = -1;
+	dma_ch_lo = -1;
+	dma_ch_rxi = -1;
+	dma_ch_rxq = -1;
+	dma_ch_samp_i0 = -1;
+	dma_ch_samp_i1 = -1;
+	dma_ch_samp_q0 = -1;
+	dma_ch_samp_q1 = -1;
 }
 
-struct IQ {
-	int I, Q;
-};
-
-inline static const uint32_t *next_stride()
+inline static int nextI()
 {
-	static int tail = 0;
+	int I = 0;
 
-	int head, delta;
+	I -= 2 * pio_sm_get_blocking_unsafe(pio1, SM_ACCI0);
+	I -= pio_sm_get_blocking_unsafe(pio1, SM_ACCI0);
 
-loop:
-	head = (dma_hw->ch[dma_ch_in_cos].write_addr >> 2) & (RX_WORDS - 1);
-	delta = head - tail;
+	I -= 2 * pio_sm_get_blocking_unsafe(pio1, SM_ACCI1);
+	I -= pio_sm_get_blocking_unsafe(pio1, SM_ACCI1);
 
-	if (delta < 0)
-		delta += RX_WORDS;
-
-	if (delta < RX_STRIDE)
-		goto loop;
-
-	const uint32_t *stride = rx_cos + tail;
-
-	tail = (tail + RX_STRIDE) & (RX_WORDS - 1);
-
-	return stride;
+	return I;
 }
 
-inline static int nextQ(const uint32_t **stride)
+inline static int nextQ()
 {
-	int x2 = *(*stride)++;
-	int x1 = *(*stride)++;
+	int Q = 0;
 
-	return x2 + x2 + x1 + max_amplitude;
-}
+	Q -= 2 * pio_sm_get_blocking_unsafe(pio1, SM_ACCQ0);
+	Q -= pio_sm_get_blocking_unsafe(pio1, SM_ACCQ0);
 
-inline static struct IQ next_sample()
-{
-	int I = 0, Q = 0;
+	Q -= 2 * pio_sm_get_blocking_unsafe(pio1, SM_ACCQ1);
+	Q -= pio_sm_get_blocking_unsafe(pio1, SM_ACCQ1);
 
-	const uint32_t *stride = next_stride();
-
-	int x15 = nextQ(&stride);
-	I += 93 * x15;
-	Q += 39 * x15;
-
-	int x14 = nextQ(&stride);
-	I += 71 * x14;
-	Q += 71 * x14;
-
-	int x13 = nextQ(&stride);
-	I += 39 * x13;
-	Q += 93 * x13;
-
-	int x12 = nextQ(&stride);
-	I += 0 * x12;
-	Q += 101 * x12;
-
-	int x11 = nextQ(&stride);
-	I += -39 * x11;
-	Q += 93 * x11;
-
-	int x10 = nextQ(&stride);
-	I += -71 * x10;
-	Q += 71 * x10;
-
-	int x09 = nextQ(&stride);
-	I += -93 * x09;
-	Q += 39 * x09;
-
-	int x08 = nextQ(&stride);
-	I += -101 * x08;
-	Q += 0 * x08;
-
-	int x07 = nextQ(&stride);
-	I += -93 * x07;
-	Q += -39 * x07;
-
-	int x06 = nextQ(&stride);
-	I += -71 * x06;
-	Q += -71 * x06;
-
-	int x05 = nextQ(&stride);
-	I += -39 * x05;
-	Q += -93 * x05;
-
-	int x04 = nextQ(&stride);
-	I += 0 * x04;
-	Q += -101 * x04;
-
-	int x03 = nextQ(&stride);
-	I += 39 * x03;
-	Q += -93 * x03;
-
-	int x02 = nextQ(&stride);
-	I += 71 * x02;
-	Q += -71 * x02;
-
-	int x01 = nextQ(&stride);
-	I += 93 * x01;
-	Q += -39 * x01;
-
-	int x00 = nextQ(&stride);
-	I += 101 * x00;
-	Q += 0 * x00;
-
-	I *= gain;
-	I /= 1024;
-	I *= max_amplitude_mul;
-	I += 127.4 * (1 << 16);
-	I /= (1 << 16);
-
-	Q *= gain;
-	Q /= 1024;
-	Q *= max_amplitude_mul;
-	Q += 127.4 * (1 << 16);
-	Q /= (1 << 16);
-
-	return (struct IQ){ I, Q };
+	return Q;
 }
 
 static void rf_rx(void)
 {
+	int dcI = 0, dcQ = 0;
+
+	int Ih1[DECIMATE] = { 0 };
+	int Ia1 = 0;
+
+	int Qh1[DECIMATE] = { 0 };
+	int Qa1 = 0;
+
 	while (true) {
 		if (multicore_fifo_rvalid()) {
 			multicore_fifo_pop_blocking();
@@ -584,9 +698,34 @@ static void rf_rx(void)
 		uint8_t *blockptr = block;
 
 		for (int i = 0; i < IQ_SAMPLES; i++) {
-			struct IQ IQ = next_sample();
-			int64_t I = IQ.I;
-			int64_t Q = IQ.Q;
+			nco_step = nco_step_base + (int)rnd_next / 256 + (int)rnd_next() / 256;
+
+			int I = 0, Q = 0;
+
+			for (int d = 0; d < DECIMATE; d++) {
+				int Is = nextI();
+				Ia1 += Is - Ih1[d];
+				Ih1[d] = Is;
+				I += Ia1;
+
+				int Qs = nextQ();
+				Qa1 += Qs - Qh1[d];
+				Qh1[d] = Qs;
+				Q += Qa1;
+			}
+
+			I /= DECIMATE;
+			Q /= DECIMATE;
+
+			I = ((I << 14) - dcI) / (1 << 14);
+			dcI += I;
+
+			Q = ((Q << 14) - dcQ) / (1 << 14);
+			dcQ += Q;
+
+			I *= gain;
+			I /= max_amplitude;
+			I += 128;
 
 			if (I < 0)
 				I = 0;
@@ -595,6 +734,10 @@ static void rf_rx(void)
 
 			*blockptr++ = I;
 
+			Q *= gain;
+			Q /= max_amplitude;
+			Q += 128;
+
 			if (Q < 0)
 				Q = 0;
 			else if (Q > 255)
@@ -614,17 +757,20 @@ static void run_command(uint8_t cmd, uint32_t arg)
 	if (0x01 == cmd) {
 		/* Tune to a new center frequency */
 		frequency = arg;
-		rx_lo_init(frequency + sample_rate);
+		rx_lo_init(frequency);
 	} else if (0x02 == cmd) {
 		/* Set the rate at which IQ sample pairs are sent */
 		sample_rate = arg;
-		max_amplitude = CLK_SYS_HZ / sample_rate / 2;
-		max_amplitude_mul = 65536 / max_amplitude;
+		max_amplitude = CLK_SYS_HZ / sample_rate;
 		dma_timer_set_fraction(dma_t_samp, 1, CLK_SYS_HZ / (sample_rate * DECIMATE));
-		rx_lo_init(frequency + sample_rate);
+		rx_lo_init(frequency);
 	} else if (0x04 == cmd) {
 		/* Set the tuner gain level */
 		gain = INIT_GAIN * powf(10.0f, arg / 200.0f);
+	} else if (0x05 == cmd) {
+		/* Normally PPM, but we use it for whatever we need atm. */
+		tweak = (int)arg;
+		rx_lo_init(frequency);
 	} else if (0x0d == cmd) {
 		/* Set tuner gain by the tuner's gain index */
 
@@ -664,18 +810,6 @@ static void do_rx()
 	rf_rx_start();
 	sleep_us(100);
 
-	dma_ch_in_cos = dma_claim_unused_channel(true);
-
-	dma_channel_config dma_conf;
-
-	dma_conf = dma_channel_get_default_config(dma_ch_in_cos);
-	channel_config_set_transfer_data_size(&dma_conf, DMA_SIZE_32);
-	channel_config_set_read_increment(&dma_conf, false);
-	channel_config_set_write_increment(&dma_conf, true);
-	channel_config_set_ring(&dma_conf, GPIO_OUT, RX_BITS_DEPTH);
-	channel_config_set_dreq(&dma_conf, pio_get_dreq(PIO, SM_AD, false));
-	dma_channel_configure(dma_ch_in_cos, &dma_conf, rx_cos, &PIO->rxf[SM_AD], UINT_MAX, true);
-
 	multicore_launch_core1(rf_rx);
 
 	const uint8_t *block;
@@ -708,12 +842,6 @@ done:
 	multicore_reset_core1();
 
 	rf_rx_stop();
-
-	dma_channel_clear_chain_to(dma_ch_in_cos);
-	dma_channel_abort(dma_ch_in_cos);
-	dma_channel_cleanup(dma_ch_in_cos);
-	dma_channel_unclaim(dma_ch_in_cos);
-	dma_ch_in_cos = -1;
 }
 
 int main()
@@ -735,7 +863,7 @@ int main()
 
 	queue_init(&iq_queue, sizeof(uint8_t *), IQ_QUEUE_LEN);
 
-	rx_lo_init(frequency + sample_rate);
+	rx_lo_init(frequency);
 
 	dma_t_samp = dma_claim_unused_timer(true);
 	dma_timer_set_fraction(dma_t_samp, 1, CLK_SYS_HZ / (sample_rate * DECIMATE));
@@ -749,6 +877,13 @@ int main()
 			fflush(stdout);
 
 			do_rx();
+
+			gain = INIT_GAIN;
+			frequency = INIT_FREQ;
+			sample_rate = INIT_SAMPLE_RATE;
+			max_amplitude = CLK_SYS_HZ / INIT_SAMPLE_RATE;
+			tweak = 0;
+			rx_lo_init(frequency);
 		}
 
 		sleep_ms(10);
diff --git a/util/bridge.py b/util/bridge.py
index 0367077..39d5054 100755
--- a/util/bridge.py
+++ b/util/bridge.py
@@ -62,6 +62,10 @@ def bridge(frequency, device):
 
             print("Begin")
 
+            header = fp.read(12)
+            if header:
+                peer.send(header)
+
             try:
                 cmd = b""