Skip to main content
Graduate II
December 28, 2023
Solved

STM32H7A3 LPUART and BDMA

  • December 28, 2023
  • 2 replies
  • 5776 views

I'm trying to get the LPUART working with BDMA on a STM32H7A3 MCU.  Eventually I'd like to set it up so the MCU goes into a low power state and wakes up when it receives a LF character match in the incoming message, but for now I'm just trying to get a small test program working starting with just transmitting some bytes using BDMA.  I have DMA with character match working on a regular UART with DMA and I'm using that code as an example along with the reference manual sections on the LPUART and BDMA. I think I'm doing everything the reference manual says to do but maybe I missed or misunderstood something. I can send characters out the LPUART without DMA and see them show up on my logic analyzer but when I try to do a BDMA transmit, nothing shows up on the LPUART_TX line to trigger the logic analyzer.

I've read this article ( https://community.st.com/t5/stm32-mcus/dma-is-not-working-on-stm32h7-devices/ta-p/49498 ) many times and it seems pretty clear I should be making sure my tx buffer is in SRAM4 but I haven't figured out how to do that yet. 

I use CubeMX to generate the startup code and the VisualGDB development environment to develop the application.  I attached the .ioc file for reference.

Thanks for the help

Here's the test code

 

 

TEST(CPFTestGroup, LPUARTTest)
{
	const ALIGN_32BYTES(std::uint8_t getFPCmd[]) = "abcdefghijklmnop";
	const uint32_t getFPCmdSize = sizeof(getFPCmd);

	SystemClock::configSystemClock();
	MX_LPUART1_UART_Init();
	MX_BDMA2_Init();

//Sending characters byte by byte works
	for (int i = 0; i < 128; i++)
	{
		LL_LPUART_TransmitData8(LPUART1, i);
		LL_mDelay(1);
	}

//Using BDMA doesn't work
 //Setup TX
	LL_BDMA_ConfigAddresses(BDMA2, LL_BDMA_CHANNEL_1, reinterpret_cast<uint32_t>(getFPCmd),
		LL_LPUART_DMA_GetRegAddr(LPUART1, LL_LPUART_DMA_REG_DATA_TRANSMIT),	LL_BDMA_DIRECTION_MEMORY_TO_PERIPH);
	LL_BDMA_SetDataLength(BDMA2, LL_BDMA_CHANNEL_1, 16);

	LL_BDMA_EnableChannel(BDMA2, LL_BDMA_CHANNEL_1);
	LL_LPUART_EnableDMAReq_TX(LPUART1);
}

 

 

Here's the MX_LPUART1_UART_Init  and MX_BDMA2_Init() code generated by CubeMX

 

 

void MX_LPUART1_UART_Init(void)
{

 /* USER CODE BEGIN LPUART1_Init 0 */

 /* USER CODE END LPUART1_Init 0 */

 LL_LPUART_InitTypeDef LPUART_InitStruct = {0};

 LL_GPIO_InitTypeDef GPIO_InitStruct = {0};

 LL_RCC_SetLPUARTClockSource(LL_RCC_LPUART1_CLKSOURCE_LSE);

 /* Peripheral clock enable */
 LL_APB4_GRP1_EnableClock(LL_APB4_GRP1_PERIPH_LPUART1);

 LL_AHB4_GRP1_EnableClock(LL_AHB4_GRP1_PERIPH_GPIOB);
 /**LPUART1 GPIO Configuration
 PB6 ------> LPUART1_TX
 PB7 ------> LPUART1_RX
 */
 GPIO_InitStruct.Pin = LL_GPIO_PIN_6|LL_GPIO_PIN_7;
 GPIO_InitStruct.Mode = LL_GPIO_MODE_ALTERNATE;
 GPIO_InitStruct.Speed = LL_GPIO_SPEED_FREQ_LOW;
 GPIO_InitStruct.OutputType = LL_GPIO_OUTPUT_PUSHPULL;
 GPIO_InitStruct.Pull = LL_GPIO_PULL_NO;
 GPIO_InitStruct.Alternate = LL_GPIO_AF_8;
 LL_GPIO_Init(GPIOB, &GPIO_InitStruct);

 /* LPUART1 DMA Init */

 /* LPUART1_RX Init */
 LL_BDMA_SetPeriphRequest(BDMA2, LL_BDMA_CHANNEL_0, LL_DMAMUX2_REQ_LPUART1_RX);

 LL_BDMA_SetDataTransferDirection(BDMA2, LL_BDMA_CHANNEL_0, LL_BDMA_DIRECTION_PERIPH_TO_MEMORY);

 LL_BDMA_SetChannelPriorityLevel(BDMA2, LL_BDMA_CHANNEL_0, LL_BDMA_PRIORITY_LOW);

 LL_BDMA_SetMode(BDMA2, LL_BDMA_CHANNEL_0, LL_BDMA_MODE_NORMAL);

 LL_BDMA_SetPeriphIncMode(BDMA2, LL_BDMA_CHANNEL_0, LL_BDMA_PERIPH_NOINCREMENT);

 LL_BDMA_SetMemoryIncMode(BDMA2, LL_BDMA_CHANNEL_0, LL_BDMA_MEMORY_INCREMENT);

 LL_BDMA_SetPeriphSize(BDMA2, LL_BDMA_CHANNEL_0, LL_BDMA_PDATAALIGN_BYTE);

 LL_BDMA_SetMemorySize(BDMA2, LL_BDMA_CHANNEL_0, LL_BDMA_MDATAALIGN_BYTE);

 /* LPUART1_TX Init */
 LL_BDMA_SetPeriphRequest(BDMA2, LL_BDMA_CHANNEL_1, LL_DMAMUX2_REQ_LPUART1_TX);

 LL_BDMA_SetDataTransferDirection(BDMA2, LL_BDMA_CHANNEL_1, LL_BDMA_DIRECTION_MEMORY_TO_PERIPH);

 LL_BDMA_SetChannelPriorityLevel(BDMA2, LL_BDMA_CHANNEL_1, LL_BDMA_PRIORITY_LOW);

 LL_BDMA_SetMode(BDMA2, LL_BDMA_CHANNEL_1, LL_BDMA_MODE_NORMAL);

 LL_BDMA_SetPeriphIncMode(BDMA2, LL_BDMA_CHANNEL_1, LL_BDMA_PERIPH_NOINCREMENT);

 LL_BDMA_SetMemoryIncMode(BDMA2, LL_BDMA_CHANNEL_1, LL_BDMA_MEMORY_INCREMENT);

 LL_BDMA_SetPeriphSize(BDMA2, LL_BDMA_CHANNEL_1, LL_BDMA_PDATAALIGN_BYTE);

 LL_BDMA_SetMemorySize(BDMA2, LL_BDMA_CHANNEL_1, LL_BDMA_MDATAALIGN_BYTE);

 /* LPUART1 interrupt Init */
 NVIC_SetPriority(LPUART1_IRQn, NVIC_EncodePriority(NVIC_GetPriorityGrouping(),0, 0));
 NVIC_EnableIRQ(LPUART1_IRQn);

 /* USER CODE BEGIN LPUART1_Init 1 */

 /* USER CODE END LPUART1_Init 1 */
 LPUART_InitStruct.PrescalerValue = LL_LPUART_PRESCALER_DIV1;
 LPUART_InitStruct.BaudRate = 9600;
 LPUART_InitStruct.DataWidth = LL_LPUART_DATAWIDTH_8B;
 LPUART_InitStruct.StopBits = LL_LPUART_STOPBITS_1;
 LPUART_InitStruct.Parity = LL_LPUART_PARITY_NONE;
 LPUART_InitStruct.TransferDirection = LL_LPUART_DIRECTION_TX_RX;
 LPUART_InitStruct.HardwareFlowControl = LL_LPUART_HWCONTROL_NONE;
 LL_LPUART_Init(LPUART1, &LPUART_InitStruct);
 LL_LPUART_SetTXFIFOThreshold(LPUART1, LL_LPUART_FIFOTHRESHOLD_1_8);
 LL_LPUART_SetRXFIFOThreshold(LPUART1, LL_LPUART_FIFOTHRESHOLD_1_8);
 LL_LPUART_DisableFIFO(LPUART1);

 /* USER CODE BEGIN WKUPType LPUART1 */

 /* USER CODE END WKUPType LPUART1 */

 LL_LPUART_Enable(LPUART1);

 /* Polling LPUART1 initialisation */
 while((!(LL_LPUART_IsActiveFlag_TEACK(LPUART1))) || (!(LL_LPUART_IsActiveFlag_REACK(LPUART1))))
 {
 }
 /* USER CODE BEGIN LPUART1_Init 2 */

 /* USER CODE END LPUART1_Init 2 */

}
void MX_BDMA2_Init(void)
{

 /* Init with LL driver */
 /* DMA controller clock enable */
 __HAL_RCC_BDMA2_CLK_ENABLE();

 /* DMA interrupt init */
 /* BDMA2_Channel0_IRQn interrupt configuration */
 NVIC_SetPriority(BDMA2_Channel0_IRQn, NVIC_EncodePriority(NVIC_GetPriorityGrouping(),0, 0));
 NVIC_EnableIRQ(BDMA2_Channel0_IRQn);
 /* BDMA2_Channel1_IRQn interrupt configuration */
 NVIC_SetPriority(BDMA2_Channel1_IRQn, NVIC_EncodePriority(NVIC_GetPriorityGrouping(),0, 0));
 NVIC_EnableIRQ(BDMA2_Channel1_IRQn);

}

 

 

Here's configSystemClock and the SystemClock_Config generated by CubeMX

 

 

void SystemClock::configSystemClock()
{
	SystemClock_Config();
}

void SystemClock_Config(void)
{
	/*AXI clock gating */
	RCC->CKGAENR = 0xFFFFFFFF;

	LL_FLASH_SetLatency(LL_FLASH_LATENCY_3);
	while (LL_FLASH_GetLatency() != LL_FLASH_LATENCY_3)
	{
	}
	LL_PWR_ConfigSupply(LL_PWR_DIRECT_SMPS_SUPPLY);
	LL_PWR_SetRegulVoltageScaling(LL_PWR_REGU_VOLTAGE_SCALE0);
	while (LL_PWR_IsActiveFlag_VOS() == 0)
	{
	}
	LL_RCC_HSE_Enable();

	/* Wait till HSE is ready */
	while (LL_RCC_HSE_IsReady() != 1)
	{

	}
	LL_PWR_EnableBkUpAccess();
	LL_RCC_LSE_SetDriveCapability(LL_RCC_LSEDRIVE_LOW);
	LL_RCC_LSE_Enable();

	/* Wait till LSE is ready */
	while (LL_RCC_LSE_IsReady() != 1)
	{

	}
	LL_RCC_HSE_EnableCSS();
	LL_RCC_LSE_EnableCSS();
	LL_RCC_PLL_SetSource(LL_RCC_PLLSOURCE_HSE);
	LL_RCC_PLL1P_Enable();
	LL_RCC_PLL1Q_Enable();
	LL_RCC_PLL1_SetVCOInputRange(LL_RCC_PLLINPUTRANGE_8_16);
	LL_RCC_PLL1_SetVCOOutputRange(LL_RCC_PLLVCORANGE_WIDE);
	LL_RCC_PLL1_SetM(3);
	LL_RCC_PLL1_SetN(70);
	LL_RCC_PLL1_SetP(2);
	LL_RCC_PLL1_SetQ(35);
	LL_RCC_PLL1_SetR(2);
	LL_RCC_PLL1_Enable();

	/* Wait till PLL is ready */
	while (LL_RCC_PLL1_IsReady() != 1)
	{
	}

	/* Intermediate AHB prescaler 2 when target frequency clock is higher than 80 MHz */
	LL_RCC_SetAHBPrescaler(LL_RCC_AHB_DIV_2);

	LL_RCC_SetSysClkSource(LL_RCC_SYS_CLKSOURCE_PLL1);

	/* Wait till System clock is ready */
	while (LL_RCC_GetSysClkSource() != LL_RCC_SYS_CLKSOURCE_STATUS_PLL1)
	{

	}
	LL_RCC_SetAHBPrescaler(LL_RCC_AHB_DIV_2);
	LL_RCC_SetAPB1Prescaler(LL_RCC_APB1_DIV_1);
	LL_RCC_SetAPB2Prescaler(LL_RCC_APB2_DIV_1);
	LL_RCC_SetAPB3Prescaler(LL_RCC_APB3_DIV_1);
	LL_RCC_SetAPB4Prescaler(LL_RCC_APB4_DIV_1);
	LL_SetSystemCoreClock(280000000);

	/* Update the time base */
	if (HAL_InitTick(TICK_INT_PRIORITY) != HAL_OK)
	{
		SystemClock_Error_Handler();
	}
	LL_RCC_HSE_EnableCSS();
	LL_RCC_LSE_EnableCSS();
}

 

 

 

    This topic has been closed for replies.
    Best answer by AScha.3

    D-cache should not make things more difficult , so i recommend to not use it.

    Just in Cube do not enable it, later can enable and do the cache management - was my idea, to make it more easy.

    ->

    AScha3_0-1703841692534.png

    I forgot: you have to add this to the xxx_flash.ld file   (just before the comment line : /*Remove... )

     .RAM_SRD_sec :					/* (NOLOAD) <- no initialize */ 
     {
     	. = ALIGN(32);
     *(.RAM_SRD_section) 
     . = ALIGN(32);
     } >RAM_SRD
    
     /* Remove information from the compiler libraries */

    And after mod the xxx_flash.ld file you see it is used ? ->

    AScha3_1-1703843164263.png

    Now it should work.   8)

     

    2 replies

    Super User
    December 28, 2023

    H7A3 has D-cache - how you manage this /with dma ? (see nothing in your text)

    Simple solution: switch off (or not enable) D-cache . (NOW ! if bdma working...can work on this, cache management.)

    If you have your data in ram, bdma cannot work:

    AScha3_0-1703788868192.png

    Then to put your data array to a certain ram , write (example for my H7 :(

     

     

    __attribute__((section(".RAM_SRD_section"))) int32_t playbuf[4096];

     

    If the ram section to use is not in the linker script, you have to modify it, to have RAM_SRD , + give it a name.

    For bdma only SRD area is useful:

    AScha3_1-1703788969581.png

    Now look at your xxx_FLASH.ld file and edit...add new section, if not there (i have no H7A3 with this, so i cannot just give you one);

    My H7A3 xx.ld file just has:

     

    /* Memories definition */
    MEMORY
    {
     DTCMRAM (xrw) : ORIGIN = 0x20000000, LENGTH = 128K
     ITCMRAM (xrw) : ORIGIN = 0x00000000, LENGTH = 64K
     RAM (xrw) : ORIGIN = 0x24000000, LENGTH = 1024K
     FLASH (rx) : ORIGIN = 0x8000000, LENGTH = 2048K
    }

     

     

    -> modify -> add srd ram

     

    /* Memories definition */
    MEMORY
    {
     DTCMRAM (xrw) : ORIGIN = 0x20000000, LENGTH = 128K
     ITCMRAM (xrw) : ORIGIN = 0x00000000, LENGTH = 64K
     RAM (xrw) : ORIGIN = 0x24000000, LENGTH = 1024K
     RAM_SRD (xrw) : ORIGIN = 0x38000000, LENGTH = 32K
     FLASH (rx) : ORIGIN = 0x8000000, LENGTH = 2048K
    }

     

     

    ed.: I forgot:

     you have to add this to the xxx_flash.ld file   (just before the comment line : /*Remove... )

     .RAM_SRD_sec :					/* (NOLOAD) <- no initialize */ 
     {
     	. = ALIGN(32);
     *(.RAM_SRD_section) 
     . = ALIGN(32);
     } >RAM_SRD
    
     /* Remove information from the compiler libraries */

     

     ...maybe like this ... then try.

    mageneAuthor
    Graduate II
    December 29, 2023

    @AScha.3 That helps a lot but I'm not there yet.  I modified my test code to look like this:

    //Per the reference manual (RM0455), BDMA only has access to SRD ram section
    __attribute__((section(".RAM_SRD_section"))) ALIGN_32BYTES(std::uint8_t getFPCmd[]) = "abcdefghijklmnopqrstuvwxyz";
    
    TEST(CPFTestGroup, LPUARTTest)
    {
    	const uint32_t getFPCmdSize = sizeof(getFPCmd);
    
    	SystemClock::configSystemClock();
    	MX_LPUART1_UART_Init();
    	MX_BDMA2_Init();
    
    	for (int i = 0; i < 128; i++)
    	{
    		LL_LPUART_TransmitData8(LPUART1, i);
    		LL_mDelay(1);
    	}
    
     //Setup TX
    	LL_BDMA_ConfigAddresses(BDMA2, LL_BDMA_CHANNEL_1, reinterpret_cast<uint32_t>(getFPCmd),
    		LL_LPUART_DMA_GetRegAddr(LPUART1, LL_LPUART_DMA_REG_DATA_TRANSMIT),	LL_BDMA_DIRECTION_MEMORY_TO_PERIPH);
    	LL_BDMA_SetDataLength(BDMA2, LL_BDMA_CHANNEL_1, 16);
    
    	LL_BDMA_EnableChannel(BDMA2, LL_BDMA_CHANNEL_1);
    	LL_LPUART_EnableDMAReq_TX(LPUART1);
    
    	LL_mDelay(2000);
    	std::cout << "LPUARTTest Done" << std::endl;
    }

    and modified the STM32H7A3LI_flash.lds file using VisualGDB to look like this

    /* Generated by LinkerScriptGenerator [http://visualgdb.com/tools/LinkerScriptGenerator]
     * Target: STM32H7A3LI
     * The file is provided under the BSD license.
     */
    
    ENTRY(Reset_Handler)
    
    MEMORY
    {
    	FLASH (RX) : ORIGIN = 0x08000000, LENGTH = 2M
    	SRAM (RWX) : ORIGIN = 0x24000000, LENGTH = 1M
    	DTCMRAM (RWX) : ORIGIN = 0x20000000, LENGTH = 128K
    	ITCMRAM (RWX) : ORIGIN = 0x00000000, LENGTH = 64K
    	/* --- begin generated external memories -- */
    	RAM_SRD (RWX) : ORIGIN = 0x38000000, LENGTH = 32K
    	/* --- end generated external memories -- */
    }

    but when I try to disable cache like this

    __attribute__((section(".RAM_SRD_section"))) ALIGN_32BYTES(std::uint8_t getFPCmd[]) = "abcdefghijklmnopqrstuvwxyz";
    
    TEST(CPFTestGroup, LPUARTTest)
    {
    	SCB_DisableDCache();
    	const uint32_t getFPCmdSize = sizeof(getFPCmd);
    
    	SystemClock::configSystemClock();
    	MX_LPUART1_UART_Init();
    	MX_BDMA2_Init();

    the program immediately throws a "Received a SIGTRAP: Trace/breakpoint trap" exception, seemingly before any code runs, and jumps into the hard fault handler here:

    void __attribute__ ((weak, naked)) HardFault_Handler() 
    {
    	//If you hit the breakpoint below, one of the interrupts was unhandled in your code. 
    	//Define the following function in your code to handle it:
    	//	extern "C" void HardFault_Handler();
    	__asm("bkpt 255");
    	__asm("bx lr");
    }

    Thanks again for the help.  Seems like ST might want to consider an example for this.

    mageneAuthor
    Graduate II
    December 29, 2023

    I got past the hard fault exception by replacing SCB_DisableDCache(); with SCB_InvalidateDCache(); like this but I'm still not seeing any characters on the logic analyzer when I try to transmit with the BDMA module.  Any help will be greatly appreciated.

    Thanks

    //Per the reference manual (RM0455), BDMA only has access to SRD ram section
    __attribute__((section("RAM_SRD_section"))) ALIGN_32BYTES(std::uint8_t getFPCmd[]) = "abcdefghijklmnopqrstuvwxyz";
    
    TEST(CPFTestGroup, LPUARTTest)
    {
    	SCB_InvalidateDCache();
    	const uint32_t getFPCmdSize = sizeof(getFPCmd);
    
    	SystemClock::configSystemClock();
    	MX_LPUART1_UART_Init();
    	MX_BDMA2_Init();
    
    	for (int i = 0; i < 128; i++)
    	{
    		LL_LPUART_TransmitData8(LPUART1, i);
    		LL_mDelay(1);
    	}
    
     //Setup TX
    	LL_BDMA_ConfigAddresses(BDMA2, LL_BDMA_CHANNEL_1, reinterpret_cast<uint32_t>(getFPCmd),
    		LL_LPUART_DMA_GetRegAddr(LPUART1, LL_LPUART_DMA_REG_DATA_TRANSMIT),	LL_BDMA_DIRECTION_MEMORY_TO_PERIPH);
    	LL_BDMA_SetDataLength(BDMA2, LL_BDMA_CHANNEL_1, 16);
    
    	LL_BDMA_EnableChannel(BDMA2, LL_BDMA_CHANNEL_1);
    	LL_LPUART_EnableDMAReq_TX(LPUART1);
    
    	LL_mDelay(2000);
    	std::cout << "LPUARTTest Done" << std::endl;
    }

     

    AScha.3Answer
    Super User
    December 29, 2023

    D-cache should not make things more difficult , so i recommend to not use it.

    Just in Cube do not enable it, later can enable and do the cache management - was my idea, to make it more easy.

    ->

    AScha3_0-1703841692534.png

    I forgot: you have to add this to the xxx_flash.ld file   (just before the comment line : /*Remove... )

     .RAM_SRD_sec :					/* (NOLOAD) <- no initialize */ 
     {
     	. = ALIGN(32);
     *(.RAM_SRD_section) 
     . = ALIGN(32);
     } >RAM_SRD
    
     /* Remove information from the compiler libraries */

    And after mod the xxx_flash.ld file you see it is used ? ->

    AScha3_1-1703843164263.png

    Now it should work.   8)