Skip to main content
Graduate
December 23, 2024
Question

STM32H750 assembly: same delay loops, but one takes 5x longer?

  • December 23, 2024
  • 2 replies
  • 1030 views

Hello,

I have simple project in assembly and can't understand/explain why one time delay loop takes much longer than the other one (LEDs should be blinking one on, second off and vice versa). But one such state takes approx. 5 times more time than the other, although loops seem to be written equally demanding

Maybe I'm doing something obvious wrong. BTW, we have also spotted such behaviour also on some other projects - one loop taking much more time, although it should take same time. I've tried running the code from Flash or RAM and same happens in both situations.

Any hint, advice? Thanks... 

 

Short description: main loop has two identical SW delay loops, in between on/off state is written to 3 variables. SysTick Handler is triggered each ms and reads the state of variables and reflects to real outputs. Green LED is on approx. 5 times longer that red one.

 

/*
 * Main.s
 */


 .syntax unified
 .cpu cortex-m7
 .thumb


///////////////////////////////////////////////////////////////////////////////
// Definitions
///////////////////////////////////////////////////////////////////////////////
// Definitions section. Define all the registers and
// constants here for code readability.

// Constants

	.equ LEDDELAY, 6400

// For LOOPTC Software delay
// By default 64MHz internal HSI clock is enabled
// Internal loop takes N cycles

// Register Addresses
// You can find the base addresses for all peripherals from Memory Map section 2.3.2
// RM0433 on page 131. Then the offsets can be found on their relevant sections.

// RCC base address is 0x58024400
// AHB4ENR register offset is 0xE0
	.equ RCC_AHB4ENR, 0x580244E0 // RCC AHB4 peripheral clock reg

// GPIOA base address is 0x58020000
	.equ GPIOA_BASE, 0x58020000 // GPIOI base address)

// GPIOI base address is 0x58022000
	.equ GPIOI_BASE, 0x58022000 // GPIOI base address)

// GPIOJ base address is 0x58022000
	.equ GPIOJ_BASE, 0x58022400 // GPIOJ base address)

// MODER register offset is 0x00
	.equ GPIOx_MODER, 0x00 // GPIOx port mode register
// ODR register offset is 0x14
	.equ GPIOx_ODR, 0x14 // GPIOx output data register
// BSSR register offset is 0x18
	.equ GPIOx_BSRR, 0x18 // GPIOx port set/reset register


// Values for BSRR register - pin PI13: LED is on, when GPIO is off (Red)
	.equ LED2_OFF, 0x00002000 	// Setting pin to 1 -> LED is off
	.equ LED2_ON, 	 0x20000000 	// Setting pin to 0 -> LED is on

// Values for BSRR register - pin PJ2: LED is on, when GPIO is off (Green)
	.equ LED1_OFF, 0x00000004 	// Setting pin to 1 -> LED is off
	.equ LED1_ON, 	 0x00040000 	// Setting pin to 0 -> LED is on

// Values for BSRR register - pin PA3: PA3
	.equ PA3_ON, 0x00000008 	// Setting pin to 1
	.equ PA3_OFF, 0x00080000 	// Setting pin to 0

// Vector table offset register definition
// Important for relocated Vector table on running from RAM
	.equ VTOR,0xE000ED08

// SysTick Timer definitions
	.equ SCS_BASE,0xe000e000
	.equ SCS_SYST_CSR,0x10// Control/Status register
	.equ SCS_SYST_RVR,0x14// Value to countdown from
	.equ SCS_SYST_CVR,0x18// Current value

	.equ	 SYSTICK_RELOAD_1MS,	63999 //1 msec at 64MHz ...


// Start of data section
 		.data

 		.align

LED1: .word 0		// LED1 State (Green)
LED2: .word 0		// LED2 State (Red)
PA3: .word 0		// PA3 pin State



// Start of text section
 .text

 .type main, %function
 .global main

 	 	.align
main:
 
	 bl 	INIT // Priprava V/I in sistemskih naprav za kontrolo LED diod in PA3

 ldr r1,=LED1
 ldr r2,=LED2
 ldr r3,=PA3

 mov r4,#0xff // LED(Pin) On value
 mov r5,#0 // LED(Pin) Off value

loop:

		str r4,[r1] // Vklop LED1 diode (Green)
		str r5,[r2] // Izklop LED2 diode (Red)
		str r4,[r3] // Vklop PA3
// bl WRITEOUT // Prenesi na prikljucke

@ delay half cycle
 mov r0,#500
ZAN1: ldr r6, =LEDDELAY
ZAN1n: subs r6, r6,#1
 bne ZAN1n
 subs r0,r0,#1
 bne ZAN1

		str r5,[r1] // Izklop LED1 diode (Green)
		str r4,[r2] // Vklop LED2 diode (Red)
		str r5,[r3] // Izklop PA3
// bl WRITEOUT // Prenesi na prikljucke

@ delay half cycle
 mov r0,#500
ZAN2: ldr r6, =LEDDELAY
ZAN2n: subs r6, r6,#1
 bne ZAN2n
 subs r0,r0,#1
 bne ZAN2

		b loop // skok na vrstico loop:


__end: 	b 	__end


INIT:
 		push {r0,r1,lr}

 bl INIT_IO

// If running code from FLASH comment next 3 lines!!!
 ldr r1, =VTOR // Set Vector table addr. to 0x24000000
		ldr r0, =0x24000000
		str r0, [r1]

		bl INIT_TC_PSP // Priprava SysTick časovnika s prek

	 	pop {r0,r1,pc}

INIT_IO:
 	push {r5, r6, lr}

	// Enable GPIOA,I,J Peripheral Clock (bit 8 in AHB4ENR register)
	ldr r6, = RCC_AHB4ENR // Load peripheral clock reg address to r6
	ldr r5, [r6] // Read its content to r5
	orr r5, #0x00000300 // Set bits 8 and 9 to enable GPIOI,J clock
	orr r5, #0x00000001 // Set bits 1 to enable GPIOA clock
	str r5, [r6] // Store result in peripheral clock register

	// Make GPIOA Pin3 as output pin (bits 7:6 in MODER register)
	ldr r6, =GPIOA_BASE // Load GPIOA BASE address to r6
	ldr r5, [r6,#GPIOx_MODER] // Read GPIOA_MODER content to r5
	and r5, #0xFFFFFF3F // Clear bits 7-6 for PA3
	orr r5, #0x00000040 // Write 01 to bits 7-6 for PA3
	str r5, [r6] // Store result in GPIO MODER register

	// Make GPIOI Pin13 as output pin (bits 27:26 in MODER register)
	ldr r6, =GPIOI_BASE // Load GPIOI BASE address to r6
	ldr r5, [r6,#GPIOx_MODER] // Read GPIOI_MODER content to r5
	and r5, #0xF3FFFFFF // Clear bits 27-26 for P13
	orr r5, #0x04000000 // Write 01 to bits 27-26 for P13
	str r5, [r6] // Store result in GPIO MODER register

	// Make GPIOJ Pin2 as output pin (bits 5:4 in MODER register)
	ldr r6, =GPIOJ_BASE // Load GPIOJ BASE address to r6
	ldr r5, [r6,#GPIOx_MODER] // Read GPIOJ_MODER content to r5
	and r5, #0xFFFFFFCF // Clear bits 5-4 for P2
	orr r5, #0x00000010 // Write 01 to bits 5-4 for PJ2
	str r5, [r6] // Store result in GPIO MODER register

 	pop {r5, r6, pc}

INIT_TC_PSP:
	 	push {r0, r1, lr}
		ldr r1, =SCS_BASE

		ldr r0, =SYSTICK_RELOAD_1MS
		str r0, [r1, #SCS_SYST_RVR]

		mov r0, #0
		str r0, [r1, #SCS_SYST_CVR]

		mov r0, #0b111 // Set TickInt to 1 as well
		str r0, [r1, #SCS_SYST_CSR]

	 	pop {r0, r1, pc}

.global SysTick_Handler
.section .text.SysTick_Handler,"ax",%progbits
.type SysTick_Handler, %function

SysTick_Handler:

		push {r3, r4, r5, r6, lr}

// -----------------------------------
// Set LED1 from LED1 variable
		ldr r3,=LED1 // Load LED1 value
		ldr r4,[r3]

		cmp r4,#0
		beq L1ON

		mov r5, #LED1_OFF
		b CONT1
L1ON: 	mov r5, #LED1_ON

CONT1: // Set GPIOJ Pins through BSRR register
		ldr r6, =GPIOJ_BASE // Load GPIOD BASE address to r6
		str r5, [r6,#GPIOx_BSRR] // Write to BSRR register

// -----------------------------------
// Set LED2 from LED2 variable
		ldr r3,=LED2 // Load LED1 value
		ldr r4,[r3]

		cmp r4,#0
		beq L2ON

		mov r5, #LED2_OFF
		b CONT2
L2ON: 	mov r5, #LED2_ON

CONT2: // Set GPIOI Pins through BSRR register
		ldr r6, =GPIOI_BASE // Load GPIOD BASE address to r6
		str r5, [r6,#GPIOx_BSRR] // Write to BSRR register

// -----------------------------------
// Set PA3 from PA3 variable
		ldr r3,=PA3 // Load PA3 value
		ldr r4,[r3]

		cmp r4,#0
		beq L3ON

		mov r5, #PA3_OFF
		b CONT3
L3ON: 	mov r5, #PA3_ON

CONT3: // Set GPIOA Pins through BSRR register
		ldr r6, =GPIOA_BASE // Load GPIOD BASE address to r6
		str r5, [r6,#GPIOx_BSRR] // Write to BSRR register

RET: 	pop {r3, r4, r5, r6, pc }

 

    This topic has been closed for replies.

    2 replies

    Graduate II
    December 23, 2024

    When checking timing, drive the pins directly so you can scope them. I'm not sure of the value of establishing different beat frequencies from setting variables, and then actioning them on a 1 KHz tick.

    You can measure cycles via DWT CYCCNT

    Check alignments of branch targets.

    Perhaps put the delay in subroutine, to confirm that the same code in the same location has consistent behaviour.

    bullyAuthor
    Graduate
    December 24, 2024

    Hello,

    thanks for tips.

    Can I ask for more info about "alignments of branch targets"?

     

    I've extended the code with CYCCNT measurements, but I guess I'm doing something wrong, because it measures only for the first time, then it seems to repeat same value.

    Is there any more detailed docs about DWT counters? I'm also interested in other counters for pipeline performance. I have never suceeded to read others - only CYCCNT and only first time in the code...

     

    Thanks.

    /*
     * Main.s
     */
    
    
     .syntax unified
     .cpu cortex-m7
     .thumb
    
    
    ///////////////////////////////////////////////////////////////////////////////
    // Definitions
    ///////////////////////////////////////////////////////////////////////////////
    // Definitions section. Define all the registers and
    // constants here for code readability.
    
    // Constants
    
    	.equ LEDDELAY, 64000
    
    // For LOOPTC Software delay
    // By default 64MHz internal HSI clock is enabled
    // Internal loop takes N cycles
    
    // Register Addresses
    // You can find the base addresses for all peripherals from Memory Map section 2.3.2
    // RM0433 on page 131. Then the offsets can be found on their relevant sections.
    
    // RCC base address is 0x58024400
    // AHB4ENR register offset is 0xE0
    	.equ RCC_AHB4ENR, 0x580244E0 // RCC AHB4 peripheral clock reg
    
    // GPIOA base address is 0x58020000
    	.equ GPIOA_BASE, 0x58020000 // GPIOI base address)
    
    // GPIOI base address is 0x58022000
    	.equ GPIOI_BASE, 0x58022000 // GPIOI base address)
    
    // GPIOJ base address is 0x58022000
    	.equ GPIOJ_BASE, 0x58022400 // GPIOJ base address)
    
    // MODER register offset is 0x00
    	.equ GPIOx_MODER, 0x00 // GPIOx port mode register
    // ODR register offset is 0x14
    	.equ GPIOx_ODR, 0x14 // GPIOx output data register
    // BSSR register offset is 0x18
    	.equ GPIOx_BSRR, 0x18 // GPIOx port set/reset register
    
    
    // Values for BSRR register - pin PI13: LED is on, when GPIO is off (Red)
    	.equ LED2_OFF, 0x00002000 	// Setting pin to 1 -> LED is off
    	.equ LED2_ON, 	 0x20000000 	// Setting pin to 0 -> LED is on
    
    // Values for BSRR register - pin PJ2: LED is on, when GPIO is off (Green)
    	.equ LED1_OFF, 0x00000004 	// Setting pin to 1 -> LED is off
    	.equ LED1_ON, 	 0x00040000 	// Setting pin to 0 -> LED is on
    
    // Values for BSRR register - pin PA3: PA3
    	.equ PA3_ON, 0x00000008 	// Setting pin to 1
    	.equ PA3_OFF, 0x00080000 	// Setting pin to 0
    
    // Vector table offset register definition
    // Important for relocated Vector table on running from RAM
    	.equ VTOR,0xE000ED08
    
    // SysTick Timer definitions
    	.equ SCS_BASE,0xe000e000
    	.equ SCS_SYST_CSR,0x10// Control/Status register
    	.equ SCS_SYST_RVR,0x14// Value to countdown from
    	.equ SCS_SYST_CVR,0x18// Current value
    
    	.equ	 SYSTICK_RELOAD_1MS,	63999 //1 msec at 64MHz ...
    
    // Register Addresses
    
    	.equ DWT_BASE, 	0xE0001000 // DWT Base address
    
    	.equ DWT_CTRL, 	0x00 // DWT_CTRL reg (RM0433, pp.3209)
    	.equ DWT_CYCCNT, 	0x04 // increments on each clock cycle when the processor is not halted in debug state.
    	.equ DWT_CPICNT, 	0x08 // additional cycles required to execute multi-cycle instructions, and instruction fetch stalls
    	.equ DWT_EXCCNT, 	0x0C // count the total cycles spent in interrupt processing (cycles spent performing exception entry and exit procedures)
    	.equ DWT_SLPCNT, 	0x10 // count the total number of cycles during which the processor is sleeping (cycles spent sleeping)
    	.equ DWT_LSUCNT, 	0x14 // counts the total number of cycles that the processor is processing an LSU operation (cycles spent waiting for loads and stores to complete)
    								 // For example, an LDR that takes two cycles to complete increments this counter one cycle.
    								 // Equivalently, an LDR that stalls for two cycles (and so takes four cycles), increments counter three times.
    	.equ DWT_FOLDCNT, 0x18 // count the total number of folded instructions (cycles saved by instructions which execute in zero cycles)
    								 // This counts 1 for each instruction that takes 0 cycles.
    
    // If the processor configuration includes the DWT profiling counters, the instruction count can be calculated as:
    
    // instructions executed = DWT_CYCCNT - DWT_CPICNT - DWT_EXCCNT - DWT_SLEEPCNT - DWT_LSUCNT + DWT_FOLDCNT
    
    
    	.equ DWT_LAR, 	0xFB0 // DWT_LAR DWT_LAR = 0xC5ACCE55; // unlock (CM7)
    	.equ DEMCR, 	 0xE000EDFC // SCB_DEMCR |= 0x01000000;
    
    // Start of data section
     		.data
    
     		.align
    
    LED1: .word 0		// LED1 State (Green)
    LED2: .word 0		// LED2 State (Red)
    PA3: .word 0		// PA3 pin State
    
    
    
    // Start of text section
     .text
    
     .type main, %function
     .global main
    
     	 	.align
    main:
     
    	 bl 	INIT // Priprava V/I in sistemskih naprav za kontrolo LED diod in PA3
    
    		bl INIT_CNT
    		ldr r0, =DWT_BASE
    
     ldr r1,=LED1
     ldr r2,=LED2
     ldr r3,=PA3
    
     mov r4,#0xff // LED(Pin) On value
     mov r5,#0 // LED(Pin) Off value
    
    loop:
    
    		str r4,[r1] // Vklop LED1 diode (Green)
    		str r5,[r2] // Izklop LED2 diode (Red)
    		str r4,[r3] // Vklop PA3
    // bl WRITEOUT // Prenesi na prikljucke
    
    		bl INIT_CNT
    		bl RESET_CNT
    		bl ENABLE_CNT
    
    		// Read DWT Counter before value
    		ldr r8, [r0,#DWT_CYCCNT]
    
    @ delay half cycle
     mov r0,#500
    ZAN1: ldr r6, =LEDDELAY
    ZAN1n: subs r6, r6,#1
     bne ZAN1n
     subs r0,r0,#1
     bne ZAN1
    
    // Read DWT Counter after value
    		ldr r10, [r0,#DWT_CYCCNT]
    
    		sub r8,r10,r8 // Difference in r0
    
    
    		str r5,[r1] // Izklop LED1 diode (Green)
    		str r4,[r2] // Vklop LED2 diode (Red)
    		str r5,[r3] // Izklop PA3
    // bl WRITEOUT // Prenesi na prikljucke
    
    
    		bl INIT_CNT
    		bl RESET_CNT
    		bl ENABLE_CNT
    
    		// Read DWT Counter before value
    		ldr r9, [r0,#DWT_CYCCNT]
    
    @ delay half cycle
     mov r0,#500
    ZAN2: ldr r6, =LEDDELAY
    ZAN2n: subs r6, r6,#1
     bne ZAN2n
     subs r0,r0,#1
     bne ZAN2
    
     // Read DWT Counter after value
    		ldr r10, [r0,#DWT_CYCCNT]
    
    		sub r11,r10,r9 // Difference in r0
    
    
    		b loop // skok na vrstico loop:
    
    
    __end: 	b 	__end
    
    
    INIT:
     		push {r0,r1,lr}
    
     bl INIT_IO
    
    // If running code from FLASH comment next 3 lines!!!
     ldr r1, =VTOR // Set Vector table addr. to 0x24000000
    		ldr r0, =0x24000000
    		str r0, [r1]
    
    		bl INIT_TC_PSP // Priprava SysTick časovnika s prek
    
    	 	pop {r0,r1,pc}
    
    INIT_IO:
     	push {r5, r6, lr}
    
    	// Enable GPIOA,I,J Peripheral Clock (bit 8 in AHB4ENR register)
    	ldr r6, = RCC_AHB4ENR // Load peripheral clock reg address to r6
    	ldr r5, [r6] // Read its content to r5
    	orr r5, #0x00000300 // Set bits 8 and 9 to enable GPIOI,J clock
    	orr r5, #0x00000001 // Set bits 1 to enable GPIOA clock
    	str r5, [r6] // Store result in peripheral clock register
    
    	// Make GPIOA Pin3 as output pin (bits 7:6 in MODER register)
    	ldr r6, =GPIOA_BASE // Load GPIOA BASE address to r6
    	ldr r5, [r6,#GPIOx_MODER] // Read GPIOA_MODER content to r5
    	and r5, #0xFFFFFF3F // Clear bits 7-6 for PA3
    	orr r5, #0x00000040 // Write 01 to bits 7-6 for PA3
    	str r5, [r6] // Store result in GPIO MODER register
    
    	// Make GPIOI Pin13 as output pin (bits 27:26 in MODER register)
    	ldr r6, =GPIOI_BASE // Load GPIOI BASE address to r6
    	ldr r5, [r6,#GPIOx_MODER] // Read GPIOI_MODER content to r5
    	and r5, #0xF3FFFFFF // Clear bits 27-26 for P13
    	orr r5, #0x04000000 // Write 01 to bits 27-26 for P13
    	str r5, [r6] // Store result in GPIO MODER register
    
    	// Make GPIOJ Pin2 as output pin (bits 5:4 in MODER register)
    	ldr r6, =GPIOJ_BASE // Load GPIOJ BASE address to r6
    	ldr r5, [r6,#GPIOx_MODER] // Read GPIOJ_MODER content to r5
    	and r5, #0xFFFFFFCF // Clear bits 5-4 for P2
    	orr r5, #0x00000010 // Write 01 to bits 5-4 for PJ2
    	str r5, [r6] // Store result in GPIO MODER register
    
     	pop {r5, r6, pc}
    
    INIT_TC_PSP:
    	 	push {r0, r1, lr}
    		ldr r1, =SCS_BASE
    
    		ldr r0, =SYSTICK_RELOAD_1MS
    		str r0, [r1, #SCS_SYST_RVR]
    
    		mov r0, #0
    		str r0, [r1, #SCS_SYST_CVR]
    
    		mov r0, #0b111 // Set TickInt to 1 as well
    		str r0, [r1, #SCS_SYST_CSR]
    
    	 	pop {r0, r1, pc}
    
    .global SysTick_Handler
    .section .text.SysTick_Handler,"ax",%progbits
    .type SysTick_Handler, %function
    
    SysTick_Handler:
    
    		push {r3, r4, r5, r6, lr}
    
    // -----------------------------------
    // Set LED1 from LED1 variable
    		ldr r3,=LED1 // Load LED1 value
    		ldr r4,[r3]
    
    		cmp r4,#0
    		beq L1ON
    
    		mov r5, #LED1_OFF
    		b CONT1
    L1ON: 	mov r5, #LED1_ON
    
    CONT1: // Set GPIOJ Pins through BSRR register
    		ldr r6, =GPIOJ_BASE // Load GPIOD BASE address to r6
    		str r5, [r6,#GPIOx_BSRR] // Write to BSRR register
    
    // -----------------------------------
    // Set LED2 from LED2 variable
    		ldr r3,=LED2 // Load LED1 value
    		ldr r4,[r3]
    
    		cmp r4,#0
    		beq L2ON
    
    		mov r5, #LED2_OFF
    		b CONT2
    L2ON: 	mov r5, #LED2_ON
    
    CONT2: // Set GPIOI Pins through BSRR register
    		ldr r6, =GPIOI_BASE // Load GPIOD BASE address to r6
    		str r5, [r6,#GPIOx_BSRR] // Write to BSRR register
    
    // -----------------------------------
    // Set PA3 from PA3 variable
    		ldr r3,=PA3 // Load PA3 value
    		ldr r4,[r3]
    
    		cmp r4,#0
    		beq L3ON
    
    		mov r5, #PA3_OFF
    		b CONT3
    L3ON: 	mov r5, #PA3_ON
    
    CONT3: // Set GPIOA Pins through BSRR register
    		ldr r6, =GPIOA_BASE // Load GPIOD BASE address to r6
    		str r5, [r6,#GPIOx_BSRR] // Write to BSRR register
    
    RET: 	pop {r3, r4, r5, r6, pc }
    
    INIT_CNT:
    	 	push {r0-r2, lr}
    
    
    		// Added in 2024 :
    		ldr r1,=DWT_BASE
    		ldr r0,=0xC5ACCE55
    		str r0,[r1,#DWT_LAR] // *DWT_LAR = 0xC5ACCE55; // unlock (CM7)
    
    		ldr r1,=DEMCR
    		ldr r0,[r1]
    		orr r0,r0,#0x01000000
    		str r0,[r1] // *SCB_DEMCR |= 0x01000000;
    		// End: Added in 2024 :
    
    
     	mov r0,#0
     	ldr r1, =DWT_BASE
    
    // Disable DWT Counters
    		ldr r2, [r1,#DWT_CTRL]
    		bic r2,r2,#1 // Disabling CYCCNTENA bit
    		str r2, [r1,#DWT_CTRL]
    
    // Reset DWT Counters
    		str r0, [r1,#DWT_CYCCNT]
    		str r0, [r1,#DWT_CPICNT]
    		str r0, [r1,#DWT_EXCCNT]
    		str r0, [r1,#DWT_SLPCNT]
    		str r0, [r1,#DWT_LSUCNT]
    		str r0, [r1,#DWT_FOLDCNT]
    
    	 	pop {r0-r2, pc}
    
    RESET_CNT:
    	 	push {r0-r2, lr}
    
    	 	mov r0,#0
    	 	ldr r1, =DWT_BASE
    
    
    // Disable DWT Counters
    		ldr r2, [r1,#DWT_CTRL]
    		bic r2,r2,#1 // Disabling CYCCNTENA bit
    		str r2, [r1,#DWT_CTRL]
    
    // Reset DWT Counters
    		str r0, [r1,#DWT_CYCCNT]
    		str r0, [r1,#DWT_CPICNT]
    		str r0, [r1,#DWT_EXCCNT]
    		str r0, [r1,#DWT_SLPCNT]
    		str r0, [r1,#DWT_LSUCNT]
    		str r0, [r1,#DWT_FOLDCNT]
    
    	 	pop {r0-r2, pc}
    
    ENABLE_CNT:
    	 	push {r0-r2, lr}
    
    		ldr r0,=0xC5ACCE55
    		str r0,[r1,#DWT_LAR] // *DWT_LAR = 0xC5ACCE55; // unlock (CM7)
    
    	 	ldr r1, =DWT_BASE
    
    // Enable DWT Counters
    		ldr r2, [r1,#DWT_CTRL]
    		orr r2,r2,#1 // Enabling CYCCNTENA bit
    		str r2, [r1,#DWT_CTRL]
    
    	 	pop {r0-r2, pc}

     

    bullyAuthor
    Graduate
    December 27, 2024

    Hello,

    I've found some errors in my code, but still cannot explain weird behaviour.

    If I uncomment two lines of reading CYCCNT counter before entering SW delay nested loop (there are two such loops in program), then both LEDs blink for the same time.

    If I comment thoise sections, green LED is on much longer (aprox. 8x) than red LED (which stays in the same time interval).

    I really can't understand or explain such behaviour?

    Also, I've managed to activate also other counters and get surprising results.

    I've measured 32 050 210 cycles on second delay loop and have calculated number of executed instructions according to formula:



     

    instructions executed = DWT_CYCCNT - DWT_CPICNT - DWT_EXCCNT - DWT_SLEEPCNT - DWT_LSUCNT + DWT_FOLDCNT

     

    and get 32 049 740 instructions, which is approx. CPI of 1.

    But if I calculate briefly, the number in one nested delay loop should be above 64 000 000.

    Is this difference because of two-issue pipeline?

     

    Anyone has an idea what might be wrong?

    Thanks in advance.

     

    I'm including corrected code :

     

    /*
     * Main.s
     *
     * Comment: both time delays appear equal if
     * at least one of CYCCNT reading two lines codes are uncommented:
     * Read DWT Counter before value 1 or Read DWT Counter before value 2.
     *
     * Otherwise: Green LED is switched on much more time that Red LED.
     */
    
    
     .syntax unified
     .cpu cortex-m7
     .thumb
    
    
    ///////////////////////////////////////////////////////////////////////////////
    // Definitions
    ///////////////////////////////////////////////////////////////////////////////
    // Definitions section. Define all the registers and
    // constants here for code readability.
    
    // Constants
    
    	.equ LEDDELAY, 64000
    
    // For LOOPTC Software delay
    // By default 64MHz internal HSI clock is enabled
    // Internal loop takes N cycles
    
    // Register Addresses
    // You can find the base addresses for all peripherals from Memory Map section 2.3.2
    // RM0433 on page 131. Then the offsets can be found on their relevant sections.
    
    // RCC base address is 0x58024400
    // AHB4ENR register offset is 0xE0
    	.equ RCC_AHB4ENR, 0x580244E0 // RCC AHB4 peripheral clock reg
    
    // GPIOA base address is 0x58020000
    	.equ GPIOA_BASE, 0x58020000 // GPIOI base address)
    
    // GPIOI base address is 0x58022000
    	.equ GPIOI_BASE, 0x58022000 // GPIOI base address)
    
    // GPIOJ base address is 0x58022000
    	.equ GPIOJ_BASE, 0x58022400 // GPIOJ base address)
    
    // MODER register offset is 0x00
    	.equ GPIOx_MODER, 0x00 // GPIOx port mode register
    // ODR register offset is 0x14
    	.equ GPIOx_ODR, 0x14 // GPIOx output data register
    // BSSR register offset is 0x18
    	.equ GPIOx_BSRR, 0x18 // GPIOx port set/reset register
    
    
    // Values for BSRR register - pin PI13: LED is on, when GPIO is off (Red)
    	.equ LED2_OFF, 0x00002000 	// Setting pin to 1 -> LED is off
    	.equ LED2_ON, 	 0x20000000 	// Setting pin to 0 -> LED is on
    
    // Values for BSRR register - pin PJ2: LED is on, when GPIO is off (Green)
    	.equ LED1_OFF, 0x00000004 	// Setting pin to 1 -> LED is off
    	.equ LED1_ON, 	 0x00040000 	// Setting pin to 0 -> LED is on
    
    // Values for BSRR register - pin PA3: PA3
    	.equ PA3_ON, 0x00000008 	// Setting pin to 1
    	.equ PA3_OFF, 0x00080000 	// Setting pin to 0
    
    // Vector table offset register definition
    // Important for relocated Vector table on running from RAM
    	.equ VTOR,0xE000ED08
    
    // SysTick Timer definitions
    	.equ SCS_BASE,0xe000e000
    	.equ SCS_SYST_CSR,0x10// Control/Status register
    	.equ SCS_SYST_RVR,0x14// Value to countdown from
    	.equ SCS_SYST_CVR,0x18// Current value
    
    	.equ	 SYSTICK_RELOAD_1MS,	63999 //1 msec at 64MHz ...
    
    // Register Addresses
    
    	.equ DWT_BASE, 	0xE0001000 // DWT Base address
    
    	.equ DWT_CTRL, 	0x00 // DWT_CTRL reg (RM0433, pp.3209)
    	.equ DWT_CYCCNT, 	0x04 // increments on each clock cycle when the processor is not halted in debug state.
    	.equ DWT_CPICNT, 	0x08 // additional cycles required to execute multi-cycle instructions, and instruction fetch stalls
    	.equ DWT_EXCCNT, 	0x0C // count the total cycles spent in interrupt processing (cycles spent performing exception entry and exit procedures)
    	.equ DWT_SLPCNT, 	0x10 // count the total number of cycles during which the processor is sleeping (cycles spent sleeping)
    	.equ DWT_LSUCNT, 	0x14 // counts the total number of cycles that the processor is processing an LSU operation (cycles spent waiting for loads and stores to complete)
    								 // For example, an LDR that takes two cycles to complete increments this counter one cycle.
    								 // Equivalently, an LDR that stalls for two cycles (and so takes four cycles), increments counter three times.
    	.equ DWT_FOLDCNT, 0x18 // count the total number of folded instructions (cycles saved by instructions which execute in zero cycles)
    								 // This counts 1 for each instruction that takes 0 cycles.
    
    	.equ DWT_CTRL_ENABLE_CNTs, 0x003f0001 // Enable bits 16-21 and 1
    // If the processor configuration includes the DWT profiling counters, the instruction count can be calculated as:
    
    // instructions executed = DWT_CYCCNT - DWT_CPICNT - DWT_EXCCNT - DWT_SLEEPCNT - DWT_LSUCNT + DWT_FOLDCNT
    	.equ DWT_LAR, 	0xFB0 // DWT_LAR DWT_LAR = 0xC5ACCE55; // unlock (CM7)
    	.equ DEMCR, 	 0xE000EDFC // SCB_DEMCR |= 0x01000000;
    
    // Start of data section
     		.data
    
     		.align
    LED1: .word 0		// LED1 State (Green)
    LED2: .word 0		// LED2 State (Red)
    PA3: .word 0		// PA3 pin State
    
    
    
    // Start of text section
     .text
    
     .type main, %function
     .global main
    
     	 	.align
    main:
    
    	 bl 	INIT // Priprava V/I in sistemskih naprav za kontrolo LED diod in PA3
    
    		//bl INIT_CNT
    		//bl RESET_CNT
    
     ldr r1,=LED1
     ldr r2,=LED2
     ldr r3,=PA3
    
     mov r4,#0xff // LED(Pin) On value
     mov r5,#0 // LED(Pin) Off value
    
    loop:
    
    		str r4,[r1] // Izklop LED1 diode (Green)
    		str r5,[r2] // Vklop LED2 diode (Red)
    		str r4,[r3] // Vklop PA3 High
    // bl WRITEOUT // Prenesi na prikljucke
    
    		//bl RESET_CNT
    
    		// Read DWT Counter before value 1
    		// Uncommenting following 2 lines code works (both delays are same,
    		//											otherwise one delay takes much longer ???
    		//ldr r0, =DWT_BASE
    		//ldr r8, [r0,#DWT_CYCCNT]
    
    @ delay half cycle
     mov r0,#500
    ZAN1: ldr r6, =LEDDELAY
    ZAN1n: subs r6, r6,#1
     bne ZAN1n
     subs r0,r0,#1
     bne ZAN1
    
    // Read DWT Counter after value
    		//ldr r0, =DWT_BASE
    		//ldr r10, [r0,#DWT_CYCCNT]
    		//sub r8,r10,r8 // Difference in r8
    
    // Calculate and read other counters
    		//mov r0,r8
    		//bl CALC_CNT
    		//mov r11,r0 // Number of instr. in r11
    
    
    		str r5,[r1] // Vklop LED1 diode (Green)
    		str r4,[r2] // Izklop LED2 diode (Red)
    		str r5,[r3] // Izklop PA3 Low
    // bl WRITEOUT // Prenesi na prikljucke
    
    
    		// bl RESET_CNT
    
    		// Read DWT Counter before value 2
    		// Uncommenting following 2 lines code works (both delays are same,
    		//											otherwise one delay takes much longer ???
    		ldr r0, =DWT_BASE
    		ldr r9, [r0,#DWT_CYCCNT]
    
    @ delay half cycle
     mov r0,#500
    ZAN2: ldr r6, =LEDDELAY
    ZAN2n: subs r6, r6,#1
     bne ZAN2n
     subs r0,r0,#1
     bne ZAN2
    
     // Read DWT Counter after value
    //		ldr r0, =DWT_BASE
    //		ldr r10, [r0,#DWT_CYCCNT]
    
    // 	sub r10,r10,r9 // Difference in r10
    
    // Calculate and read other counters
    //		mov r0,r10
    //		bl CALC_CNT
    //		mov r12,r0 // Number of instr. in r12
    
    		b loop // skok na vrstico loop:
    
    
    __end: 	b 	__end
    
    
    INIT:
     		push {r0,r1,lr}
    
     bl INIT_IO
    
    // If running code from FLASH comment next 3 lines!!!
     ldr r1, =VTOR // Set Vector table addr. to 0x24000000
    		ldr r0, =0x24000000
    		str r0, [r1]
    
    		bl INIT_TC_PSP // Priprava SysTick časovnika s prek
    
    	 	pop {r0,r1,pc}
    
    INIT_IO:
     	push {r5, r6, lr}
    
    	// Enable GPIOA,I,J Peripheral Clock (bit 8 in AHB4ENR register)
    	ldr r6, = RCC_AHB4ENR // Load peripheral clock reg address to r6
    	ldr r5, [r6] // Read its content to r5
    	orr r5, #0x00000300 // Set bits 8 and 9 to enable GPIOI,J clock
    	orr r5, #0x00000001 // Set bits 1 to enable GPIOA clock
    	str r5, [r6] // Store result in peripheral clock register
    
    	// Make GPIOA Pin3 as output pin (bits 7:6 in MODER register)
    	ldr r6, =GPIOA_BASE // Load GPIOA BASE address to r6
    	ldr r5, [r6,#GPIOx_MODER] // Read GPIOA_MODER content to r5
    	and r5, #0xFFFFFF3F // Clear bits 7-6 for PA3
    	orr r5, #0x00000040 // Write 01 to bits 7-6 for PA3
    	str r5, [r6] // Store result in GPIO MODER register
    
    	// Make GPIOI Pin13 as output pin (bits 27:26 in MODER register)
    	ldr r6, =GPIOI_BASE // Load GPIOI BASE address to r6
    	ldr r5, [r6,#GPIOx_MODER] // Read GPIOI_MODER content to r5
    	and r5, #0xF3FFFFFF // Clear bits 27-26 for P13
    	orr r5, #0x04000000 // Write 01 to bits 27-26 for P13
    	str r5, [r6] // Store result in GPIO MODER register
    
    	// Make GPIOJ Pin2 as output pin (bits 5:4 in MODER register)
    	ldr r6, =GPIOJ_BASE // Load GPIOJ BASE address to r6
    	ldr r5, [r6,#GPIOx_MODER] // Read GPIOJ_MODER content to r5
    	and r5, #0xFFFFFFCF // Clear bits 5-4 for P2
    	orr r5, #0x00000010 // Write 01 to bits 5-4 for PJ2
    	str r5, [r6] // Store result in GPIO MODER register
    
     	pop {r5, r6, pc}
    
    INIT_TC_PSP:
    	 	push {r0, r1, lr}
    		ldr r1, =SCS_BASE
    
    		ldr r0, =SYSTICK_RELOAD_1MS
    		str r0, [r1, #SCS_SYST_RVR]
    
    		mov r0, #0
    		str r0, [r1, #SCS_SYST_CVR]
    
    		mov r0, #0b111 // Set TickInt to 1 as well
    		str r0, [r1, #SCS_SYST_CSR]
    
    	 	pop {r0, r1, pc}
    
    .global SysTick_Handler
    .section .text.SysTick_Handler,"ax",%progbits
    .type SysTick_Handler, %function
    
    SysTick_Handler:
    
    		push {r3, r4, r5, r6, lr}
    
    // -----------------------------------
    // Set LED1 from LED1 variable
    		ldr r3,=LED1 // Load LED1 value
    		ldr r4,[r3]
    
    		cmp r4,#0
    		beq L1ON
    
    		mov r5, #LED1_OFF
    		b CONT1
    L1ON: 	mov r5, #LED1_ON
    
    CONT1: // Set GPIOJ Pins through BSRR register
    		ldr r6, =GPIOJ_BASE // Load GPIOD BASE address to r6
    		str r5, [r6,#GPIOx_BSRR] // Write to BSRR register
    
    // -----------------------------------
    // Set LED2 from LED2 variable
    		ldr r3,=LED2 // Load LED1 value
    		ldr r4,[r3]
    
    		cmp r4,#0
    		beq L2ON
    
    		mov r5, #LED2_OFF
    		b CONT2
    L2ON: 	mov r5, #LED2_ON
    
    CONT2: // Set GPIOI Pins through BSRR register
    		ldr r6, =GPIOI_BASE // Load GPIOD BASE address to r6
    		str r5, [r6,#GPIOx_BSRR] // Write to BSRR register
    
    // -----------------------------------
    // Set PA3 from PA3 variable
    		ldr r3,=PA3 // Load PA3 value
    		ldr r4,[r3]
    
    		cmp r4,#0
    		beq L3ON
    
    		mov r5, #PA3_OFF
    		b CONT3
    L3ON: 	mov r5, #PA3_ON
    
    CONT3: // Set GPIOA Pins through BSRR register
    		ldr r6, =GPIOA_BASE // Load GPIOD BASE address to r6
    		str r5, [r6,#GPIOx_BSRR] // Write to BSRR register
    
    RET: 	pop {r3, r4, r5, r6, pc }
    
    INIT_CNT:
    	 	push {r0-r2, lr}
    
    
    		// Added in 2024 (but it seems not needed for H7):
    //		ldr r1,=DWT_BASE
    //		ldr r0,=0xC5ACCE55
    //		str r0,[r1,#DWT_LAR] // *DWT_LAR = 0xC5ACCE55; // unlock (CM7)
    
    //		bit [24]	TRCENA Global enable for all DWT and ITM features:
    //				0 = DWT and ITM blocks disabled.
    //				1 = DWT and ITM blocks enabled.
    		ldr r1,=DEMCR
    		ldr r0,[r1]
    		orr r0,r0,#0x01000000
    		str r0,[r1] // *SCB_DEMCR |= 0x01000000;
    		// End: Added in 2024 :
    
    
     	ldr r1, =DWT_BASE
    
    // Disable DWT Counters
    		ldr r2, [r1,#DWT_CTRL]
    		// bic r2,r2,#1 // Disabling CYCCNTENA bit
    		ldr r0,=DWT_CTRL_ENABLE_CNTs // Mask for enabling all Counters bits
    		bic r2,r2,r0 // Disabling all counter bits
    		str r2, [r1,#DWT_CTRL]
    
    // Reset DWT Counters
     	mov r0,#0
    		str r0, [r1,#DWT_CYCCNT]
    		str r0, [r1,#DWT_CPICNT]
    		str r0, [r1,#DWT_EXCCNT]
    		str r0, [r1,#DWT_SLPCNT]
    		str r0, [r1,#DWT_LSUCNT]
    		str r0, [r1,#DWT_FOLDCNT]
    
    	 	pop {r0-r2, pc}
    
    RESET_CNT:
    	 	push {r0-r2, lr}
    
    	 	ldr r1, =DWT_BASE
    
    
    // Disable DWT Counters
    		ldr r2, [r1,#DWT_CTRL]
    		// bic r2,r2,#1 // Disabling CYCCNTENA bit
    		ldr r0,=DWT_CTRL_ENABLE_CNTs // Enabling all Counters bits
    		bic r2,r2,r0 // Disabling all counter bits
    		str r2, [r1,#DWT_CTRL]
    
    // Reset DWT Counters
    	 	mov r0,#0
    		str r0, [r1,#DWT_CYCCNT]
    		str r0, [r1,#DWT_CPICNT]
    		str r0, [r1,#DWT_EXCCNT]
    		str r0, [r1,#DWT_SLPCNT]
    		str r0, [r1,#DWT_LSUCNT]
    		str r0, [r1,#DWT_FOLDCNT]
    
     bl ENABLE_CNT
    	 	pop {r0-r2, pc}
    
    CALC_CNT: // DWT Counter is in r0
    	 	push {r1-r6,r8, lr}
    
     	ldr r1, =DWT_BASE
    
    // Disable DWT Counters
    		ldr r2, [r1,#DWT_CTRL]
    		// bic r2,r2,#1 // Disabling CYCCNTENA bit
    		ldr r3,=DWT_CTRL_ENABLE_CNTs // Enabling all Counters bits
    		bic r2,r2,r3 // Disabling all counter bits
    		str r2, [r1,#DWT_CTRL]
    
    // instructions executed = DWT_CYCCNT - DWT_CPICNT - DWT_EXCCNT - DWT_SLEEPCNT - DWT_LSUCNT + DWT_FOLDCNT
    // Read other DWT Counters
    
    		ldr r2, [r1,#DWT_CPICNT]
    		sub r8,r0,r2
    
    		ldr r3, [r1,#DWT_EXCCNT]
    		sub r8,r8,r3
    
    		ldr r4, [r1,#DWT_SLPCNT]
    		sub r8,r8,r4
    
    		ldr r5, [r1,#DWT_LSUCNT]
    		sub r8,r8,r5
    
    		ldr r6, [r1,#DWT_FOLDCNT]
    		add r8,r8,r6
    
    		// r8 contains number of instructions
    		mov r0,r8
    
    	 	pop {r1-r6,r8, pc}
    
    
    ENABLE_CNT:
    	 	push {r0-r2, lr}
    
    	 	ldr r1, =DWT_BASE
    
    // Enable DWT Counters
    		ldr r2, [r1,#DWT_CTRL]
    //		orr r2,r2,#1 // Enabling CYCCNTENA bit
    		ldr r0,=DWT_CTRL_ENABLE_CNTs // Enabling all Counters bits
    		orr r2,r2,r0
    		str r2, [r1,#DWT_CTRL]
    
    	 	pop {r0-r2, pc}