It's dirty but I think I figured out a hack: of the 11-byte payload that I get from the SPI device, I only actually care about 4 of those bytes. That just barely fits into a "single", as the DMA describes it, which allows me to hack towards what I need using TRIGM.
I get what I need using 3 LPDMA channels:
- A loop that reads 11 bytes from SPI and dumps them in a buffer in RAM upon request (activated by SPI RX request, no formal trigger configuration).
- A memory-to-memory DMA that copies a "single" from the SPI buffer fed by LPDMA channel 1 and puts it into a much larger buffer. The magic that makes this work is that setting TR1 SDW and DDW to 0b10 and setting TR2 TRIGM to 0b11 results in the LPDMA copying exactly 4 bytes per transfer complete, instead of the entire BNDT.
- An SPI reset procedure triggered by transaction complete from queue 1, allowing it to read on the next trigger.
This is heavily inspired by the sens_acquisition demonstration module from the firmware package Github repo which uses a similar hack.
Later I'll be setting up an interrupt from channel 2 to wake the CPU and have the GPDMA copy the buffer from SRAM4 into the other SRAMs. Eventually I'll need to write the data out to SDMMC as well and the question will be what's more power efficient: wake the SDMMC frequently so that RAM can stay asleep or power on RAM so the SDMMC can stay asleep.
If I could make a wish: LPDMA should have two "BNDT" type things: one for how many bytes to read before signalling transfer complete and one for how many bytes to read per trigger.
Also for anyone else who's new and runs into similar problems: forget Cube and the shiny GUI tools. Read the reference manual. It's huge and intimidating but where Cube is confusing and opaque, the reference manual is clear and straightforward. For example Cube doesn't tell you why there are two "trigger" values when you're setting up an SPI receive in an LPBAM queue: one is for the LPDMA LLI, the other is for SPI autonomous mode. Cube also doesn't tell you that it's possible to enable an EXTI line with neither an event nor an interrupt and you'll be able to use it to trigger autonomous peripherals without waking the CPU.
Anywho, example code for my terrible hack:
/* USER CODE BEGIN PV */
static uint8_t __attribute__((section(".lpbamSection"), aligned(4))) single_reading_buffer[12] =
{ 0 };
static uint32_t __attribute__((section(".lpbamSection"), aligned(4))) channel0_llis[2] =
{ 0 };
static uint32_t __attribute__((section(".lpbamSection"), aligned(4))) lp_readings_buffer[4000] =
{ 0 };
static uint32_t __attribute__((section(".lpbamSection"), aligned(4))) channel1_llis[2] =
{ 0 };
static uint32_t __attribute__((section(".lpbamSection"), aligned(4))) spi_cr1_disable =
0;
static uint32_t __attribute__((section(".lpbamSection"), aligned(4))) spi_cr1_enable =
0;
static uint32_t __attribute__((section(".lpbamSection"), aligned(4))) spi_fcr_clear =
0;
static uint32_t __attribute__((section(".lpbamSection"), aligned(4))) spi_cr2_size =
0;
static uint32_t __attribute__((section(".lpbamSection"), aligned(4))) spi_reset_llis[4][5] =
{ 0 };
/* USER CODE END PV */
void sendSpiStart() {
const uint8_t start_buf = 0x08;
if (HAL_SPI_Transmit(&hspi3, &start_buf, 1, 499) != HAL_OK) {
Error_Handler();
}
}
void setupSpiForLpbam() {
__HAL_SPI_DISABLE(&hspi3);
// Enable pre-scaler bypass, DMA requests, 8-bit frame size.
SPI3->CFG1 = SPI_CFG1_BPASS | SPI_CFG1_RXDMAEN | 0b111;
// Enable SS output management, clock phase, master, simplex receiver, 4-clock SS idle.
SPI3->CFG2 = SPI_CFG2_SSOM | SPI_CFG2_SSOE | SPI_CFG2_CPHA | SPI_CFG2_MASTER
| SPI_CFG2_COMM_1 | SPI_CFG2_MSSI_2;
// Set transaction size to 11-byte.
SPI3->CR2 = 11;
// Set trigger to EXTI4 falling edge.
SPI3->AUTOCR =
SPI_AUTOCR_TRIGEN | SPI_AUTOCR_TRIGPOL | SPI_AUTOCR_TRIGSEL_2;
}
// Channel 0 takes SPI readings and puts them in a temporary buffer. Should output transfer complete when the block (11 bytes) is full.
void setupLpbamChannel0() {
// Reset LPDMA channel 0.
LPDMA1_Channel0->CCR |= DMA_CCR_RESET;
while (LPDMA1_Channel0->CCR & DMA_CCR_EN) {
}
// Enable error interrupts and high priority.
LPDMA1_Channel0->CCR = DMA_CCR_TOIE | DMA_CCR_SUSPIE | DMA_CCR_USEIE
| DMA_CCR_ULEIE | DMA_CCR_DTEIE | DMA_CCR_PRIO_0 | DMA_CCR_PRIO_1;
// DINC, everything else default.
LPDMA1_Channel0->CTR1 = DMA_CTR1_DINC;
// SPI requests, everything else default.
LPDMA1_Channel0->CTR2 = 2;
// Set source to SPI3 RX register.
LPDMA1_Channel0->CSAR = (uint32_t) &SPI3->RXDR;
// Set up an LLI that resets the block size and destination address.
const uint32_t channel0_llis_addr = (uint32_t) &channel0_llis;
// LLIs use a 64K-aligned base address plus a 32-bit aligned offset from that base.
const uint32_t channel0_llis_addr_64k_aligned_base = channel0_llis_addr
& 0xFFFF0000;
const uint32_t channel0_llis_addr_offset_from_base = channel0_llis_addr
& 0x0000FFFF;
// Set size and destination of buffer.
LPDMA1_Channel0->CBR1 = channel0_llis[0] = 11;
// Set the destination to the second element, not the first. Since the entire buffer is 4-byte aligned, the 3 status bytes will end up out of alignment but the four 16-bit channel readings that come after will be aligned.
LPDMA1_Channel0->CDAR = channel0_llis[1] =
(uint32_t) &single_reading_buffer[1];
// Set up LLI offset and which registers to update. The 0xFFFC drops the last two bits from the offset as they're ignored.
LPDMA1_Channel0->CLBAR = channel0_llis_addr_64k_aligned_base;
LPDMA1_Channel0->CLLR = DMA_CLLR_UB1 | DMA_CLLR_UDA
| (0xFFFC & channel0_llis_addr_offset_from_base);
}
// Channel 1 takes the individual readings from channel 0 and moves them to a larger buffer. When the buffer is full or half-full, it sends an interrupt to wake the CPU to do more with them.
void setupLpbamChannel1() {
// Reset LPDMA channel 1.
LPDMA1_Channel1->CCR |= DMA_CCR_RESET;
while (LPDMA1_Channel1->CCR & DMA_CCR_EN) {
}
// Enable error interrupts, half/complete transfer interrupts and high priority.
LPDMA1_Channel1->CCR = DMA_CCR_TOIE | DMA_CCR_SUSPIE | DMA_CCR_USEIE
| DMA_CCR_ULEIE | DMA_CCR_DTEIE | DMA_CCR_HTIE | DMA_CCR_TCIE
| DMA_CCR_PRIO_0 | DMA_CCR_PRIO_1;
// DINC, source and data size 1 word (32-bit, so 2 channels at 16-bit each). The word size is a hack to let us get
// a full logical sample but treat it as a "single" that can be triggered using TRIGM in CTR2.
LPDMA1_Channel1->CTR1 = DMA_CTR1_DINC | DMA_CTR1_DDW_LOG2_1
| DMA_CTR1_SDW_LOG2_1;
// Trigger on rising edge of channel 0 transfer complete, set "software" request (hopefully memory-to-memory transfer).
// Important: 00 in TCEM means that transfer (half) complete event happens relative to the block, not the LLI.
// Important: 11 in TRIGM means that each trigger transfers one "single", which in this case is 4 bytes (per CTR1 SDW and DDW).
LPDMA1_Channel1->CTR2 = DMA_CTR2_TRIGPOL_0 | (18 << DMA_CTR2_TRIGSEL_Pos)
| DMA_CTR2_SWREQ | DMA_CTR2_TRIGM_0 | DMA_CTR2_TRIGM_1;
// Set source to the offset of the first channel in the sample buffer, which is located after a dummy byte and the 3 status bytes.
LPDMA1_Channel1->CSAR = (uint32_t) &single_reading_buffer[4];
// Set up an LLI that resets the block size and destination address.
const uint32_t llis_addr = (uint32_t) &channel1_llis;
// LLIs use a 64K-aligned base address plus a 32-bit aligned offset from that base.
const uint32_t llis_addr_64k_aligned_base = llis_addr & 0xFFFF0000;
const uint32_t llis_addr_offset_from_base = llis_addr & 0x0000FFFF;
// Set size and destination of buffer.
LPDMA1_Channel1->CBR1 = channel1_llis[0] = sizeof(lp_readings_buffer);
LPDMA1_Channel1->CDAR = channel1_llis[1] = (uint32_t) lp_readings_buffer;
// Set up LLI offset and which registers to update. The 0xFFFC drops the last two bits from the offset as they're ignored.
LPDMA1_Channel1->CLBAR = llis_addr_64k_aligned_base;
LPDMA1_Channel1->CLLR = DMA_CLLR_UB1 | DMA_CLLR_UDA
| (0xFFFC & llis_addr_offset_from_base);
}
// Channel 2 resets the SPI peripheral after each read so that it's ready for the next one.
void setupLpbamChannel2() {
spi_cr1_disable = SPI3->CR1 & (~SPI_CR1_SPE);
spi_cr1_enable = SPI3->CR1 | SPI_CR1_SPE;
spi_fcr_clear = 0x007F00;
spi_cr2_size = 11;
// Reset LPDMA channel 2.
LPDMA1_Channel2->CCR |= DMA_CCR_RESET;
while (LPDMA1_Channel2->CCR & DMA_CCR_EN) {
}
// Enable error interrupts and high priority.
LPDMA1_Channel2->CCR = DMA_CCR_TOIE | DMA_CCR_SUSPIE | DMA_CCR_USEIE
| DMA_CCR_ULEIE | DMA_CCR_DTEIE | DMA_CCR_PRIO_0 | DMA_CCR_PRIO_1;
// Everything default, including source/dest address increment.
LPDMA1_Channel2->CTR1 = 0;
// Set up an LLI that resets the block size and destination address.
const uint32_t llis_addr = (uint32_t) &spi_reset_llis;
// LLIs use a 64K-aligned base address plus a 32-bit aligned offset from that base.
const uint32_t llis_addr_64k_aligned_base = llis_addr & 0xFFFF0000;
const uint32_t llis_addr_offset_from_base = llis_addr & 0x0000FFFF;
// ----- Disable SPI -----
// CTR2: Trigger on rising edge of channel 0 transfer complete, set "software" request (hopefully memory-to-memory transfer).
spi_reset_llis[0][0] = DMA_CTR2_TRIGPOL_0 | (18 << DMA_CTR2_TRIGSEL_Pos)
| DMA_CTR2_SWREQ;
// CBR1: Set transfer size to 4 bytes (size of SPI CR1 register)
spi_reset_llis[0][1] = 4;
// CSAR: Set source to prepared CR1 register.
spi_reset_llis[0][2] = (uint32_t) &spi_cr1_disable;
// CDAR: Set destination to SPI CR1
spi_reset_llis[0][3] = (uint32_t) &SPI3->CR1;
// Set the relevant registers to load and the address of the next LLI in the chain.
spi_reset_llis[0][4] =
DMA_CLLR_UT2 | DMA_CLLR_UB1 | DMA_CLLR_USA | DMA_CLLR_UDA
| DMA_CLLR_ULL
| (0xFFFC
& (llis_addr_offset_from_base
+ sizeof(spi_reset_llis[0])));
// ----- Clear flags -----
// CTR2: No trigger, memory-to-memory
spi_reset_llis[1][0] = DMA_CTR2_SWREQ;
// CBR1: Set transfer size to 4 bytes (size of SPI CR1 register)
spi_reset_llis[1][1] = 4;
// CSAR: Set source to prepared FCR register.
spi_reset_llis[1][2] = (uint32_t) &spi_fcr_clear;
// CDAR: Set destination to SPI FCR
spi_reset_llis[1][3] = (uint32_t) &SPI3->IFCR;
// Set the relevant registers to load and the address of the next LLI in the chain.
spi_reset_llis[1][4] =
DMA_CLLR_UT2 | DMA_CLLR_UB1 | DMA_CLLR_USA | DMA_CLLR_UDA | DMA_CLLR_ULL
| (0xFFFC
& (llis_addr_offset_from_base
+ sizeof(spi_reset_llis[0]) * 2));
// ----- Set size -----
// CTR2: No trigger, memory-to-memory
spi_reset_llis[2][0] = DMA_CTR2_SWREQ;
// CBR1: Set transfer size to 4 bytes (size of SPI CR1 register)
spi_reset_llis[2][1] = 4;
// CSAR: Set source to prepared FCR register.
spi_reset_llis[2][2] = (uint32_t) &spi_cr2_size;
// CDAR: Set destination to SPI FCR
spi_reset_llis[2][3] = (uint32_t) &SPI3->CR2;
// Set the relevant registers to load and the address of the next LLI in the chain.
spi_reset_llis[2][4] =
DMA_CLLR_UT2 | DMA_CLLR_UB1 | DMA_CLLR_USA | DMA_CLLR_UDA | DMA_CLLR_ULL
| (0xFFFC
& (llis_addr_offset_from_base
+ sizeof(spi_reset_llis[0]) * 3));
// ----- Enable SPI -----
// CTR2: No trigger, memory-to-memory
spi_reset_llis[3][0] = DMA_CTR2_SWREQ;
// CBR1: Set transfer size to 4 bytes (size of SPI CR1 register)
spi_reset_llis[3][1] = 4;
// CSAR: Set source to prepared FCR register.
spi_reset_llis[3][2] = (uint32_t) &spi_cr1_enable;
// CDAR: Set destination to SPI FCR
spi_reset_llis[3][3] = (uint32_t) &SPI3->CR1;
// Set the relevant registers to load and the address of first LLI.
spi_reset_llis[3][4] =
DMA_CLLR_UT2 | DMA_CLLR_UB1 | DMA_CLLR_USA | DMA_CLLR_UDA | DMA_CLLR_ULL
| (0xFFFC & llis_addr_offset_from_base);
// Set base address
LPDMA1_Channel2->CLBAR = llis_addr_64k_aligned_base;
// Set up first transfer
LPDMA1_Channel2->CTR2 = spi_reset_llis[0][0];
LPDMA1_Channel2->CBR1 = spi_reset_llis[0][1];
LPDMA1_Channel2->CSAR = spi_reset_llis[0][2];
LPDMA1_Channel2->CDAR = spi_reset_llis[0][3];
LPDMA1_Channel2->CLLR = spi_reset_llis[0][4];
}
void runLpbamLowLevel() {
// Allow LPDMA and SPI3 in sleep/stop modes.
__HAL_RCC_LPDMA1_CLK_ENABLE();
__HAL_RCC_LPDMA1_CLKAM_ENABLE();
__HAL_RCC_LPDMA1_CLK_SLEEP_ENABLE();
__HAL_RCC_SPI3_CLK_ENABLE();
__HAL_RCC_SPI3_CLKAM_ENABLE();
__HAL_RCC_SPI3_CLK_SLEEP_ENABLE();
__HAL_RCC_SRAM4_CLK_ENABLE();
__HAL_RCC_SRAM4_CLKAM_ENABLE();
__HAL_RCC_SRAM4_CLK_SLEEP_ENABLE();
// Set wake up clock to MSI (which is configured to 4MHz and already present during STOP modes when the CPU is likely to be woken).
__HAL_RCC_WAKEUPSTOP_CLK_CONFIG(RCC_STOP_WAKEUPCLOCK_MSI);
setupSpiForLpbam();
setupLpbamChannel0();
setupLpbamChannel1();
setupLpbamChannel2();
// Enable LPDMA Channel 2.
LPDMA1_Channel2->CCR |= DMA_CCR_EN;
LPDMA1_Channel1->CCR |= DMA_CCR_EN;
// Enable LPDMA Channel 0.
LPDMA1_Channel0->CCR |= DMA_CCR_EN;
// Enable SPI3.
__HAL_SPI_ENABLE(&hspi3);
}