Skip to main content
Graduate II
October 16, 2024
Question

H7 OCTOSPI HyperRAM data throughput changing with compilation

  • October 16, 2024
  • 13 replies
  • 3073 views

Heyho,

I'm using the H733 (custom board) / H735 (eval kit) with Infineon's HyperRAM S70KL1281 / S70KL1282 at 100 MHz for some time now, all working great, except for one thing that is very annoying:

  • the data throughput from / to HyperRAM seems to depend on compilation, even though the OCTOSPI peripheral was not changed
  • after some compilations it's about 178 Mbyte / s, after another only 54 MB/s.
  • data throughput is constant for one compilation, no matter if I call the test function at MCU power up or while operating with all other peripherals running
  • no caching anywhere

I'm pretty sure that it's not "faulty" timing measurements, using the cycle counter and disabling all interrupt calls around the for loops.

  • Is there something wrong in my test function?
  • Is it maybe "only" how the for loop / iteration is compiled?
  • right now I can't get it back to the high speed, so no map / list file
  • my scope here is too old and slow to check the signal lines

Here's the test function, first writing to HyperRAM, then reading:

/* +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */
/* OCTOSPI HyperRAM test
 */
#define HYPER_TEST_UART		1

uint32_t OspiHypRamTest(uint8_t u8CountDown)
{
	uint32_t i = 0;
	uint32_t u32Val = 0xFFFFFFFF;
	uint32_t u32MaxLen = (uint32_t)((uint32_t)OSPI_HYPERRAM_END_ADDR / 4);
	uint32_t u32Errors = 0;
	uint32_t u32Data = 0;
	uint32_t u32CycStart = 0;
	uint32_t u32Cycles = 0;
	float flClockMHz = (float)HAL_RCC_GetSysClockFreq() / 1E6;
	float flVal = 0.0f;
	uint32_t *pu32MemAddr = NULL;

	if( 	 OCTOSPI1 == pOspiHypRam ) pu32MemAddr = (uint32_t *)OCTOSPI1_BASE;
	else if( OCTOSPI2 == pOspiHypRam ) pu32MemAddr = (uint32_t *)OCTOSPI2_BASE;

#if HYPER_TEST_UART
	uart_printf("\n\r+++++++++++++++++++++++++++++++++++++++++++++++++\n\r");
	uart_printf("OCTOSPI HyperRAM test, memory mapped, IRQs OFF\n\rcounting ");
	if( 0 == u8CountDown ) uart_printf("UP, start with 0\n\r\n\r");
	else uart_printf("DOWN, start with %08lX\n\r\n\r", u32Val);

	uart_printf("writing bytes: %lu\n\r", (uint32_t)OSPI_HYPERRAM_END_ADDR);
#endif

__DSB();
__disable_irq();

/* write complete HyperRAM */
	/* UP - should be faster */
	if( 0 == u8CountDown )
	{
		u32CycStart = DWT->CYCCNT;
		for( i = 0; i < u32MaxLen; i++ )
		{
			pu32MemAddr[i] = i;
		}
		__DMB();
		__DSB();
		u32Cycles = DWT->CYCCNT;
	}
	/* DOWN */
	else
	{
		u32Val = 0xFFFFFFFF;
		u32CycStart = DWT->CYCCNT;
		for( i = 0; i < u32MaxLen; i++ )
		{
			pu32MemAddr[i] = u32Val;
			u32Val--;
		}
		__DMB();
		__DSB();
		u32Cycles = DWT->CYCCNT;
	}

__enable_irq();
__DSB();

	u32Cycles -= u32CycStart;

	flVal = (float)u32Cycles / flClockMHz;
	flOspiRamSpeedMBpsMmWr = (float)OSPI_HYPERRAM_END_ADDR / flVal;
	flOspiRamSpeedMBpsMmWr *= (float)MEGA_CORRECTION;

#if HYPER_TEST_UART
	uart_printf("%lu CPU cycles = %.1f ms\n\r", u32Cycles, (flVal / 1000.0f));
	uart_printf("\n\r-> %.2f MB/s (%.0f Mbit/s) WRITE\n\r\n\r", flOspiRamSpeedMBpsMmWr, (8.0f * flOspiRamSpeedMBpsMmWr));

	uart_printf("reading & comparing bytes: %lu\n\r", (uint32_t)OSPI_HYPERRAM_END_ADDR);
#endif

__DSB();

	if( 	 OCTOSPI1 == pOspiHypRam ) pu32MemAddr = (uint32_t *)OCTOSPI1_BASE;
	else if( OCTOSPI2 == pOspiHypRam ) pu32MemAddr = (uint32_t *)OCTOSPI2_BASE;

__disable_irq();
__DSB();

/* read complete HyperRAM and compare */
	/* UP - should be faster */
	if( 0 == u8CountDown )
	{
		u32CycStart = DWT->CYCCNT;
		for( i = 0; i < u32MaxLen; i++ )
		{
			u32Data = pu32MemAddr[i];
			if( u32Data != i ) u32Errors++;
		}
		__DMB();
		__DSB();

		u32Cycles = DWT->CYCCNT;
	}
	/* DOWN */
	else
	{
		u32Val = 0xFFFFFFFF;
		u32CycStart = DWT->CYCCNT;
		for( i = 0; i < u32MaxLen; i++ )
		{
			u32Data = pu32MemAddr[i];
			if( u32Data != (u32Val - i) ) u32Errors++;
		}
		__DMB();
		__DSB();

		u32Cycles = DWT->CYCCNT;
	}
__enable_irq();

	u32Cycles -= u32CycStart;

	flVal = (float)u32Cycles / flClockMHz;
	flOspiRamSpeedMBpsMmRd = (float)OSPI_HYPERRAM_END_ADDR / flVal;
	flOspiRamSpeedMBpsMmRd *= (float)MEGA_CORRECTION;

#if HYPER_TEST_UART
	uart_printf("%lu CPU cycles = %.1f ms\n\r", u32Cycles, (flVal / 1000.0f));
	uart_printf("\n\r-> %.2f MB/s (%.0f Mbit/s) READ\n\r", flOspiRamSpeedMBpsMmRd, (8.0f * flOspiRamSpeedMBpsMmRd));

	if( 0 == u32Errors ) uart_printf("\n\rNULL errors\n\r");
	else uart_printf("\n\r# ERR: u32Errors = %lu\n\r", u32Errors);
	uart_printf("-------------------------------------------------\n\r");
#endif

	return u32Errors;
}

Anybody any ideas?

Thanks in advance!

    This topic has been closed for replies.

    13 replies

    LCEAuthor
    Graduate II
    October 21, 2024

    @STOne-32 

    so here are the results of the 64 bit version (including ALIGN(8) in .ld):

    HyperRAM speed:

    UP:
    Read = 113.10 MB/s
    Write = 117.38 MB/s

     

    DOWN:
    Read = 105.42 MB/s
    Write = 179.32 MB/s

    /* 64 bit version */
    	uint64_t i = 0;
    	uint64_t u64Val = 0xFFFFFFFF;
    	uint64_t u64MaxLen = (uint64_t)((uint64_t)OSPI_HYPERRAM_END_ADDR / 8);
    	uint64_t u64Data = 0;
    	uint64_t *pu64MemAddr = NULL;
    
    	uint32_t u32CycStart = 0;
    	uint32_t u32Cycles = 0;
    	float flClockMHz = (float)HAL_RCC_GetSysClockFreq() / 1E6;
    	float flVal = 0.0f;
    	uint32_t u32Errors = 0;
    
    	if( 	 OCTOSPI1 == pOspiHypRam ) pu64MemAddr = (uint64_t *)OCTOSPI1_BASE;
    	else if( OCTOSPI2 == pOspiHypRam ) pu64MemAddr = (uint64_t *)OCTOSPI2_BASE;
    
    /* ++++++++++++++++++++++++++++++++++++++ */
    /* WRITE complete HyperRAM */
    
    __DSB();
    __disable_irq();
    
    	/* UP - should be faster */
    	if( 0 == u8CountDown )
    	{
    		u32CycStart = DWT->CYCCNT;
    		for( i = 0; i < u64MaxLen; i++ )
    		{
    			pu64MemAddr[i] = i;
    		}
    	}
    	/* DOWN */
    	else
    	{
    		u64Val = 0xFFFFFFFF;
    		u32CycStart = DWT->CYCCNT;
    		for( i = 0; i < u64MaxLen; i++ )
    		{
    			pu64MemAddr[i] = u64Val;
    			u64Val--;
    		}
    	}
    	__DMB();
    	__DSB();
    
    	u32Cycles = DWT->CYCCNT;
    
    __enable_irq();
    __DSB();
    
    	/* WRITE speed calculation */
    	u32Cycles -= u32CycStart;
    	flVal = (float)u32Cycles / flClockMHz;
    	flOspiRamSpeedMBpsMmWr = (float)OSPI_HYPERRAM_END_ADDR / flVal;
    	flOspiRamSpeedMBpsMmWr *= (float)MEGA_CORRECTION;
    
    __DSB();
    
    	if( 	 OCTOSPI1 == pOspiHypRam ) pu64MemAddr = (uint64_t *)OCTOSPI1_BASE;
    	else if( OCTOSPI2 == pOspiHypRam ) pu64MemAddr = (uint64_t *)OCTOSPI2_BASE;
    
    /* ++++++++++++++++++++++++++++++++++++++ */
    /* READ & CHECK */
    
    __disable_irq();
    __DSB();
    
    	/* UP - should be faster */
    	if( 0 == u8CountDown )
    	{
    		u32CycStart = DWT->CYCCNT;
    		for( i = 0; i < u64MaxLen; i++ )
    		{
    			u64Data = pu64MemAddr[i];
    			if( u64Data != i ) u32Errors++;
    		}
    	}
    	/* DOWN */
    	else
    	{
    		u64Val = 0xFFFFFFFF;
    		u32CycStart = DWT->CYCCNT;
    		for( i = 0; i < u64MaxLen; i++ )
    		{
    			u64Data = pu64MemAddr[i];
    			if( u64Data != (u64Val - i) ) u32Errors++;
    		}
    	}
    	__DMB();
    	__DSB();
    
    	u32Cycles = DWT->CYCCNT;
    
    __enable_irq();
    
    	/* READ speed calculation */
    	u32Cycles -= u32CycStart;
    	flVal = (float)u32Cycles / flClockMHz;
    	flOspiRamSpeedMBpsMmRd = (float)OSPI_HYPERRAM_END_ADDR / flVal;
    	flOspiRamSpeedMBpsMmRd *= (float)MEGA_CORRECTION;
    LCEAuthor
    Graduate II
    October 21, 2024

    And here's the list file:

    080523b0 <OspiHypRamTest>:
     80523b0:	b5f8 	push	{r3, r4, r5, r6, r7, lr}
     80523b2:	4605 	mov	r5, r0
     80523b4:	f01d fa94 	bl	806f8e0 <HAL_RCC_GetSysClockFreq>
     80523b8:	ee07 0a90 	vmov	s15, r0
     80523bc:	4e5c 	ldr	r6, [pc, #368]	; (8052530 <OspiHypRamTest+0x180>)
     80523be:	4a5d 	ldr	r2, [pc, #372]	; (8052534 <OspiHypRamTest+0x184>)
     80523c0:	eeb8 7a67 	vcvt.f32.u32	s14, s15
     80523c4:	6833 	ldr	r3, [r6, #0]
     80523c6:	ed9f 6b58 	vldr	d6, [pc, #352]	; 8052528 <OspiHypRamTest+0x178>
     80523ca:	eeb7 7ac7 	vcvt.f64.f32	d7, s14
     80523ce:	4293 	cmp	r3, r2
     80523d0:	ee27 7b06 	vmul.f64	d7, d7, d6
     80523d4:	eeb7 7bc7 	vcvt.f32.f64	s14, d7
     80523d8:	f000 809f 	beq.w	805251a <OspiHypRamTest+0x16a>
     80523dc:	4956 	ldr	r1, [pc, #344]	; (8052538 <OspiHypRamTest+0x188>)
     80523de:	428b 	cmp	r3, r1
     80523e0:	bf0c 	ite	eq
     80523e2:	f04f 41e0 	moveq.w	r1, #1879048192	; 0x70000000
     80523e6:	2100 	movne	r1, #0
     80523e8:	f3bf 8f4f 	dsb	sy
     80523ec:	b672 	cpsid	i
     80523ee:	4b53 	ldr	r3, [pc, #332]	; (805253c <OspiHypRamTest+0x18c>)
     80523f0:	3908 	subs	r1, #8
     80523f2:	685f 	ldr	r7, [r3, #4]
     80523f4:	468c 	mov	ip, r1
     80523f6:	2d00 	cmp	r5, #0
     80523f8:	d17c 	bne.n	80524f4 <OspiHypRamTest+0x144>
     80523fa:	462b 	mov	r3, r5
     80523fc:	462a 	mov	r2, r5
     80523fe:	1c5c 	adds	r4, r3, #1
     8052400:	f84c 3f08 	str.w	r3, [ip, #8]!
     8052404:	f8cc 2004 	str.w	r2, [ip, #4]
     8052408:	4623 	mov	r3, r4
     805240a:	f142 0200 	adc.w	r2, r2, #0
     805240e:	f5a4 1400 	sub.w	r4, r4, #2097152	; 0x200000
     8052412:	ea54 0002 	orrs.w	r0, r4, r2
     8052416:	d1f2 	bne.n	80523fe <OspiHypRamTest+0x4e>
     8052418:	f3bf 8f5f 	dmb	sy
     805241c:	f3bf 8f4f 	dsb	sy
     8052420:	4b46 	ldr	r3, [pc, #280]	; (805253c <OspiHypRamTest+0x18c>)
     8052422:	685b 	ldr	r3, [r3, #4]
     8052424:	b662 	cpsie	i
     8052426:	f3bf 8f4f 	dsb	sy
     805242a:	1bdb 	subs	r3, r3, r7
     805242c:	ed9f 6a44 	vldr	s12, [pc, #272]	; 8052540 <OspiHypRamTest+0x190>
     8052430:	eddf 6a44 	vldr	s13, [pc, #272]	; 8052544 <OspiHypRamTest+0x194>
     8052434:	ee07 3a90 	vmov	s15, r3
     8052438:	4a43 	ldr	r2, [pc, #268]	; (8052548 <OspiHypRamTest+0x198>)
     805243a:	ee27 7a26 	vmul.f32	s14, s14, s13
     805243e:	eef8 7a67 	vcvt.f32.u32	s15, s15
     8052442:	eec6 6a27 	vdiv.f32	s13, s12, s15
     8052446:	ee66 7a87 	vmul.f32	s15, s13, s14
     805244a:	edc2 7a00 	vstr	s15, [r2]
     805244e:	f3bf 8f4f 	dsb	sy
     8052452:	4a38 	ldr	r2, [pc, #224]	; (8052534 <OspiHypRamTest+0x184>)
     8052454:	6833 	ldr	r3, [r6, #0]
     8052456:	4293 	cmp	r3, r2
     8052458:	d062 	beq.n	8052520 <OspiHypRamTest+0x170>
     805245a:	f502 42a0 	add.w	r2, r2, #20480	; 0x5000
     805245e:	483b 	ldr	r0, [pc, #236]	; (805254c <OspiHypRamTest+0x19c>)
     8052460:	4293 	cmp	r3, r2
     8052462:	bf08 	it	eq
     8052464:	4601 	moveq	r1, r0
     8052466:	b672 	cpsid	i
     8052468:	f3bf 8f4f 	dsb	sy
     805246c:	bb4d 	cbnz	r5, 80524c2 <OspiHypRamTest+0x112>
     805246e:	4628 	mov	r0, r5
     8052470:	4b32 	ldr	r3, [pc, #200]	; (805253c <OspiHypRamTest+0x18c>)
     8052472:	4602 	mov	r2, r0
     8052474:	685d 	ldr	r5, [r3, #4]
     8052476:	4603 	mov	r3, r0
     8052478:	f851 6f08 	ldr.w	r6, [r1, #8]!
     805247c:	684c 	ldr	r4, [r1, #4]
     805247e:	42a2 	cmp	r2, r4
     8052480:	bf08 	it	eq
     8052482:	42b3 	cmpeq	r3, r6
     8052484:	bf18 	it	ne
     8052486:	3001 	addne	r0, #1
     8052488:	3301 	adds	r3, #1
     805248a:	f142 0200 	adc.w	r2, r2, #0
     805248e:	f5a3 1400 	sub.w	r4, r3, #2097152	; 0x200000
     8052492:	4314 	orrs	r4, r2
     8052494:	d1f0 	bne.n	8052478 <OspiHypRamTest+0xc8>
     8052496:	f3bf 8f5f 	dmb	sy
     805249a:	f3bf 8f4f 	dsb	sy
     805249e:	4b27 	ldr	r3, [pc, #156]	; (805253c <OspiHypRamTest+0x18c>)
     80524a0:	685b 	ldr	r3, [r3, #4]
     80524a2:	b662 	cpsie	i
     80524a4:	1b5b 	subs	r3, r3, r5
     80524a6:	ed9f 6a26 	vldr	s12, [pc, #152]	; 8052540 <OspiHypRamTest+0x190>
     80524aa:	4a29 	ldr	r2, [pc, #164]	; (8052550 <OspiHypRamTest+0x1a0>)
     80524ac:	ee07 3a90 	vmov	s15, r3
     80524b0:	eef8 7a67 	vcvt.f32.u32	s15, s15
     80524b4:	eec6 6a27 	vdiv.f32	s13, s12, s15
     80524b8:	ee26 7a87 	vmul.f32	s14, s13, s14
     80524bc:	ed82 7a00 	vstr	s14, [r2]
     80524c0:	bdf8 	pop	{r3, r4, r5, r6, r7, pc}
     80524c2:	2200 	movs	r2, #0
     80524c4:	481d 	ldr	r0, [pc, #116]	; (805253c <OspiHypRamTest+0x18c>)
     80524c6:	f04f 33ff 	mov.w	r3, #4294967295
     80524ca:	f46f 1c00 	mvn.w	ip, #2097152	; 0x200000
     80524ce:	6845 	ldr	r5, [r0, #4]
     80524d0:	4616 	mov	r6, r2
     80524d2:	4610 	mov	r0, r2
     80524d4:	f851 4f08 	ldr.w	r4, [r1, #8]!
     80524d8:	684f 	ldr	r7, [r1, #4]
     80524da:	42ba 	cmp	r2, r7
     80524dc:	bf08 	it	eq
     80524de:	42a3 	cmpeq	r3, r4
     80524e0:	bf18 	it	ne
     80524e2:	3001 	addne	r0, #1
     80524e4:	3b01 	subs	r3, #1
     80524e6:	f162 0200 	sbc.w	r2, r2, #0
     80524ea:	42b2 	cmp	r2, r6
     80524ec:	bf08 	it	eq
     80524ee:	4563 	cmpeq	r3, ip
     80524f0:	d1f0 	bne.n	80524d4 <OspiHypRamTest+0x124>
     80524f2:	e7d0 	b.n	8052496 <OspiHypRamTest+0xe6>
     80524f4:	2200 	movs	r2, #0
     80524f6:	f04f 33ff 	mov.w	r3, #4294967295
     80524fa:	f46f 1e00 	mvn.w	lr, #2097152	; 0x200000
     80524fe:	4610 	mov	r0, r2
     8052500:	1e5c 	subs	r4, r3, #1
     8052502:	f84c 3f08 	str.w	r3, [ip, #8]!
     8052506:	f8cc 2004 	str.w	r2, [ip, #4]
     805250a:	f162 0200 	sbc.w	r2, r2, #0
     805250e:	4623 	mov	r3, r4
     8052510:	4282 	cmp	r2, r0
     8052512:	bf08 	it	eq
     8052514:	4574 	cmpeq	r4, lr
     8052516:	d1f3 	bne.n	8052500 <OspiHypRamTest+0x150>
     8052518:	e77e 	b.n	8052418 <OspiHypRamTest+0x68>
     805251a:	f04f 4110 	mov.w	r1, #2415919104	; 0x90000000
     805251e:	e763 	b.n	80523e8 <OspiHypRamTest+0x38>
     8052520:	490c 	ldr	r1, [pc, #48]	; (8052554 <OspiHypRamTest+0x1a4>)
     8052522:	e7a0 	b.n	8052466 <OspiHypRamTest+0xb6>
     8052524:	f3af 8000 	nop.w
     8052528:	a0b5ed8d 	.word	0xa0b5ed8d
     805252c:	3eb0c6f7 	.word	0x3eb0c6f7
     8052530:	24002cbc 	.word	0x24002cbc
     8052534:	52005000 	.word	0x52005000
     8052538:	5200a000 	.word	0x5200a000
     805253c:	e0001000 	.word	0xe0001000
     8052540:	4b800000 	.word	0x4b800000
     8052544:	3f742400 	.word	0x3f742400
     8052548:	24002bfc 	.word	0x24002bfc
     805254c:	6ffffff8 	.word	0x6ffffff8
     8052550:	24002bf8 	.word	0x24002bf8
     8052554:	8ffffff8 	.word	0x8ffffff8
    LCEAuthor
    Graduate II
    October 21, 2024

    My biggest worries were actually the different results after compilations, when I didn't touch anything concerning HyperRAM.

    This is actually stable since I added that UART wait before starting the test loops.