From 4e56308087666fa412bc442038b5f3032aa29408 Mon Sep 17 00:00:00 2001 From: David Welch Date: Wed, 6 Jun 2012 14:14:57 -0400 Subject: [PATCH] adding second benchmark --- bench02/Makefile | 35 ++++++ bench02/README | 112 ++++++++++++++++++ bench02/bench02.c | 86 ++++++++++++++ bench02/memmap | 12 ++ bench02/novectors.s | 282 ++++++++++++++++++++++++++++++++++++++++++++ bench02/uart.c | 116 ++++++++++++++++++ 6 files changed, 643 insertions(+) create mode 100644 bench02/Makefile create mode 100644 bench02/README create mode 100644 bench02/bench02.c create mode 100644 bench02/memmap create mode 100644 bench02/novectors.s create mode 100644 bench02/uart.c diff --git a/bench02/Makefile b/bench02/Makefile new file mode 100644 index 0000000..9e9548a --- /dev/null +++ b/bench02/Makefile @@ -0,0 +1,35 @@ + +ARMGNU ?= arm-none-eabi + +COPS = -mthumb-interwork -Wall -O2 -nostdlib -nostartfiles -ffreestanding + +all : bench02.hex bench02.bin + +clean : + rm -f *.o + rm -f *.bin + rm -f *.hex + rm -f *.elf + rm -f *.list + rm -f *.img + +novectors.o : novectors.s + $(ARMGNU)-as novectors.s -o novectors.o + +bench02.o : bench02.c + $(ARMGNU)-gcc $(COPS) -c bench02.c -o bench02.o + +uart.o : uart.c + $(ARMGNU)-gcc $(COPS) -c uart.c -o uart.o + +bench02.elf : memmap novectors.o bench02.o uart.o + $(ARMGNU)-ld -T memmap novectors.o bench02.o uart.o -o bench02.elf + $(ARMGNU)-objdump -D bench02.elf > bench02.list + +bench02.bin : bench02.elf + $(ARMGNU)-objcopy bench02.elf -O binary bench02.bin + +bench02.hex : bench02.elf + $(ARMGNU)-objcopy bench02.elf -O ihex bench02.hex + + diff --git a/bench02/README b/bench02/README new file mode 100644 index 0000000..93d709f --- /dev/null +++ b/bench02/README @@ -0,0 +1,112 @@ + +See the top level README for information on where to find the +schematic and programmers reference manual for the ARM processor +on the raspberry pi. Also find information on how to load and run +these programs. + +This is actually the second benchmark program, twain is the first but +called it twain not bench01. + +Just experimenting with the execution time of the processor. The +raspberry pi faq says + + +What SoC are you using? + +The SoC is a Broadcom BCM2835. This contains an ARM1176JZFS, with +floating point, running at 700Mhz... + + +The BCM2835 docs provided plus the blinker experiments lean heavily +toward the system clock running at 250MHz. What is the ARM doing? +Is it really 700MHz? Let's see what we see. + + +12345678 +02DB6DF3 ARMTEST0 +02DB6E15 ARMTEST0 +00AB6E32 ARMTEST1 +00836E31 ARMTEST2 +037000D7 ARMTEST3 +02DB6E25 THUMBTEST0 +00AB6E26 THUMBTEST1 +037000A7 THUMBTEST2 + +ARMTEST0 +0x01000000 subs instructions +0x01000000 bne instructions +0x02000000 instructions +02DB6E15 system clocks +1.43 clocks per instruction. 175Mips. + +ARMTEST1 +0x01000000 sub instructions +0x00200000 bne instructions +0x01200000 instructions +00AB6E32 system clocks +1.68 instructions per clock. 420Mips + +ARMTEST2 +0x01000000 sub instructions +0x00100000 bne instructions +0x01100000 instructions +00836E31 system clocks +2.07 instructions per clock. 517Mips + +ARMTEST3 +0x01000000 sub instructions +0x08000000 nop instructions +0x00100000 bne instructions +0x09100000 instructions +037000D7 system clocks +2.64 instructions per clock. 659Mips + +THUMBTEST0 +0x01000000 subs instructions +0x01000000 bne instructions +0x02000000 instructions +02DB6E25 system clocks +1.43 clocks per instruction. 175Mips. + +THUMBTEST1 +0x01000000 sub instructions +0x00200000 bne instructions +0x01200000 instructions +00AB6E26 system clocks +1.68 instructions per clock. 420Mips + +THUMBTEST3 +0x01000000 sub instructions +0x08000000 nop instructions +0x00100000 bne instructions +0x09100000 instructions +037000A7 system clocks +2.64 instructions per clock. 659Mips + +Being super scalar I was hoping to find more than 700 million instructions +per second. So far no luck. Since the thumb instructions I am using +are mostly 16 bit, was curious to know if because you can fetch twice +as many thumb instructions in the same number of memory cycles vs arm +instructions is there a thumb performance boost, so far have not seen +any of that either. this could still be gated by execution and not +fetching from cache. Or they could be fetching each thumb instruction +separately even if it is the same memory location read twice. + +It appears that the gpu bootloader along with loading the arm and +releasing reset, you can use a config.txt file to make some adjustments +before the arm boots. The arm frequency is one of those. Now the +web pages talk about /boot/config.txt from an on chip linux viewpoint +if you actually boot linux and go to /boot you find it is the fat +partition on the sd card with start.elf and the other gpu bootloader +files along with the arm boot file kernel.img. So you dont need to +go through all of that go to the place where you have kernel.img, do +NOT make a boot dir, stay in that dir and create/modify the file +config.txt, create/modify a line that says: + +arm_freq=200 + +Where the number is in megahertz. You can re-run these or other tests +and see that you did in fact change the freqency. remove the config.txt +file, comment the line with a # or change it to 700 and you will get +700mhz the default. I would still like to see 700 million instructions +per second that I can measure, will see what happens. diff --git a/bench02/bench02.c b/bench02/bench02.c new file mode 100644 index 0000000..f0c4b5b --- /dev/null +++ b/bench02/bench02.c @@ -0,0 +1,86 @@ + +//------------------------------------------------------------------------- +//------------------------------------------------------------------------- + +extern void PUT32 ( unsigned int, unsigned int ); +extern unsigned int GET32 ( unsigned int ); +extern void dummy ( unsigned int ); +extern void uart_init ( void ); +extern void hexstrings ( unsigned int d ); +extern void hexstring ( unsigned int d ); +extern void init_timer ( void ); +extern unsigned int timer_tick ( void ); +extern void start_l1cache ( void ); +extern void stop_l1cache ( void ); +extern void ARMTEST0 ( unsigned int ); +extern void ARMTEST1 ( unsigned int ); +extern void ARMTEST2 ( unsigned int ); +extern void ARMTEST3 ( unsigned int ); +extern void THUMBTEST0 ( unsigned int ); +extern void THUMBTEST1 ( unsigned int ); +extern void THUMBTEST2 ( unsigned int ); + +//------------------------------------------------------------------------ +int notmain ( void ) +{ + unsigned int beg,end; + + uart_init(); + hexstring(0x12345678); + + start_l1cache(); + + init_timer(); + beg=timer_tick(); + ARMTEST0(0x01000000); + end=timer_tick(); + hexstring(end-beg); + + init_timer(); + beg=timer_tick(); + ARMTEST0(0x01000000); + end=timer_tick(); + hexstring(end-beg); + + init_timer(); + beg=timer_tick(); + ARMTEST1(0x01000000); + end=timer_tick(); + hexstring(end-beg); + + init_timer(); + beg=timer_tick(); + ARMTEST2(0x01000000); + end=timer_tick(); + hexstring(end-beg); + + init_timer(); + beg=timer_tick(); + ARMTEST3(0x01000000); + end=timer_tick(); + hexstring(end-beg); + + init_timer(); + beg=timer_tick(); + THUMBTEST0(0x01000000); + end=timer_tick(); + hexstring(end-beg); + + init_timer(); + beg=timer_tick(); + THUMBTEST1(0x01000000); + end=timer_tick(); + hexstring(end-beg); + + init_timer(); + beg=timer_tick(); + THUMBTEST2(0x01000000); + end=timer_tick(); + hexstring(end-beg); + + stop_l1cache(); + + return(0); +} +//------------------------------------------------------------------------- +//------------------------------------------------------------------------- diff --git a/bench02/memmap b/bench02/memmap new file mode 100644 index 0000000..8d9566d --- /dev/null +++ b/bench02/memmap @@ -0,0 +1,12 @@ + +MEMORY +{ + ram : ORIGIN = 0x00000000, LENGTH = 0x1000 +} + +SECTIONS +{ + .text : { *(.text*) } > ram + .bss : { *(.bss*) } > ram +} + diff --git a/bench02/novectors.s b/bench02/novectors.s new file mode 100644 index 0000000..f66bf30 --- /dev/null +++ b/bench02/novectors.s @@ -0,0 +1,282 @@ + +.code 32 + +.globl _start +_start: + b reset +reset: + ldr sp,stack_start + ldr r0,thumb_start_add + bx r0 + +stack_start: .word 0x10000 +thumb_start_add: .word thumb_start + +.globl start_l1cache +start_l1cache: + mov r0, #0 + mcr p15, 0, r0, c7, c7, 0 ;@ invalidate caches + mcr p15, 0, r0, c8, c7, 0 ;@ invalidate tlb + mrc p15, 0, r0, c1, c0, 0 + orr r0,r0,#0x1000 ;@ instruction + orr r0,r0,#0x0004 ;@ data + mcr p15, 0, r0, c1, c0, 0 + bx lr + +.globl stop_l1cache +stop_l1cache: + mrc p15, 0, r0, c1, c0, 0 + bic r0,r0,#0x1000 ;@ instruction + bic r0,r0,#0x0004 ;@ data + mcr p15, 0, r0, c1, c0, 0 + bx lr + + nop + +.globl ARMTEST0 +ARMTEST0: + subs r0,r0,#1 + bne ARMTEST0 + bx lr + + nop + +.globl ARMTEST1 +ARMTEST1: + subs r0,r0,#1 + subs r0,r0,#1 + subs r0,r0,#1 + subs r0,r0,#1 + subs r0,r0,#1 + subs r0,r0,#1 + subs r0,r0,#1 + subs r0,r0,#1 + bne ARMTEST1 + bx lr + +.globl ARMTEST2 +ARMTEST2: + subs r0,r0,#1 + subs r0,r0,#1 + subs r0,r0,#1 + subs r0,r0,#1 + subs r0,r0,#1 + subs r0,r0,#1 + subs r0,r0,#1 + subs r0,r0,#1 + subs r0,r0,#1 + subs r0,r0,#1 + subs r0,r0,#1 + subs r0,r0,#1 + subs r0,r0,#1 + subs r0,r0,#1 + subs r0,r0,#1 + subs r0,r0,#1 + bne ARMTEST2 + bx lr + +.globl ARMTEST3 +ARMTEST3: + subs r0,r0,#1 + nop + nop + nop + nop + nop + nop + nop + nop + subs r0,r0,#1 + nop + nop + nop + nop + nop + nop + nop + nop + subs r0,r0,#1 + nop + nop + nop + nop + nop + nop + nop + nop + subs r0,r0,#1 + nop + nop + nop + nop + nop + nop + nop + nop + subs r0,r0,#1 + nop + nop + nop + nop + nop + nop + nop + nop + subs r0,r0,#1 + nop + nop + nop + nop + nop + nop + nop + nop + subs r0,r0,#1 + nop + nop + nop + nop + nop + nop + nop + nop + subs r0,r0,#1 + nop + nop + nop + nop + nop + nop + nop + nop + bne ARMTEST3 + bx lr + +;@ ----------------------------------------------------- + +.thumb + +.thumb_func +thumb_start: + bl notmain +hang: b hang + +.thumb_func +.globl PUT32 +PUT32: + str r1,[r0] + bx lr + +.thumb_func +.globl GET32 +GET32: + ldr r0,[r0] + bx lr + +.thumb_func +.globl dummy +dummy: + bx lr + +.thumb_func +.globl THUMBTEST0 +THUMBTEST0: + sub r0,r0,#1 + bne THUMBTEST0 + bx lr + + +.thumb_func +.globl THUMBTEST1 +THUMBTEST1: + sub r0,r0,#1 + sub r0,r0,#1 + sub r0,r0,#1 + sub r0,r0,#1 + sub r0,r0,#1 + sub r0,r0,#1 + sub r0,r0,#1 + sub r0,r0,#1 + bne THUMBTEST1 + bx lr + + +.thumb_func +.globl THUMBTEST2 +THUMBTEST2: + sub r0,r0,#1 + nop + nop + nop + nop + nop + nop + nop + nop + sub r0,r0,#1 + nop + nop + nop + nop + nop + nop + nop + nop + sub r0,r0,#1 + nop + nop + nop + nop + nop + nop + nop + nop + sub r0,r0,#1 + nop + nop + nop + nop + nop + nop + nop + nop + sub r0,r0,#1 + nop + nop + nop + nop + nop + nop + nop + nop + sub r0,r0,#1 + nop + nop + nop + nop + nop + nop + nop + nop + sub r0,r0,#1 + nop + nop + nop + nop + nop + nop + nop + nop + sub r0,r0,#1 + nop + nop + nop + nop + nop + nop + nop + nop + bne THUMBTEST2 + bx lr + + diff --git a/bench02/uart.c b/bench02/uart.c new file mode 100644 index 0000000..43e357a --- /dev/null +++ b/bench02/uart.c @@ -0,0 +1,116 @@ + +//------------------------------------------------------------------------- +//------------------------------------------------------------------------- + +extern void PUT32 ( unsigned int, unsigned int ); +extern unsigned int GET32 ( unsigned int ); +extern void dummy ( unsigned int ); + +#define GPFSEL1 0x20200004 +#define GPSET0 0x2020001C +#define GPCLR0 0x20200028 +#define GPPUD 0x20200094 +#define GPPUDCLK0 0x20200098 + +#define AUX_ENABLES 0x20215004 +#define AUX_MU_IO_REG 0x20215040 +#define AUX_MU_IER_REG 0x20215044 +#define AUX_MU_IIR_REG 0x20215048 +#define AUX_MU_LCR_REG 0x2021504C +#define AUX_MU_MCR_REG 0x20215050 +#define AUX_MU_LSR_REG 0x20215054 +#define AUX_MU_MSR_REG 0x20215058 +#define AUX_MU_SCRATCH 0x2021505C +#define AUX_MU_CNTL_REG 0x20215060 +#define AUX_MU_STAT_REG 0x20215064 +#define AUX_MU_BAUD_REG 0x20215068 + +#define ARM_TIMER_LOD 0x2000B400 +#define ARM_TIMER_VAL 0x2000B404 +#define ARM_TIMER_CTL 0x2000B408 +#define ARM_TIMER_DIV 0x2000B41C +#define ARM_TIMER_CNT 0x2000B420 + +//GPIO14 TXD0 and TXD1 +//GPIO15 RXD0 and RXD1 +//alt function 5 for uart1 +//alt function 0 for uart0 + +//((250,000,000/115200)/8)-1 = 270 +//------------------------------------------------------------------------ +void uart_putc ( unsigned int c ) +{ + while(1) + { + if(GET32(AUX_MU_LSR_REG)&0x20) break; + } + PUT32(AUX_MU_IO_REG,c); +} +//------------------------------------------------------------------------ +void hexstrings ( unsigned int d ) +{ + //unsigned int ra; + unsigned int rb; + unsigned int rc; + + rb=32; + while(1) + { + rb-=4; + rc=(d>>rb)&0xF; + if(rc>9) rc+=0x37; else rc+=0x30; + uart_putc(rc); + if(rb==0) break; + } + uart_putc(0x20); +} +//------------------------------------------------------------------------ +void hexstring ( unsigned int d ) +{ + hexstrings(d); + uart_putc(0x0D); + uart_putc(0x0A); +} +//------------------------------------------------------------------------ +int uart_init ( void ) +{ + unsigned int ra; + + PUT32(AUX_ENABLES,1); + PUT32(AUX_MU_IER_REG,0); + PUT32(AUX_MU_CNTL_REG,0); + PUT32(AUX_MU_LCR_REG,3); + PUT32(AUX_MU_MCR_REG,0); + PUT32(AUX_MU_IER_REG,0); + PUT32(AUX_MU_IIR_REG,0xC6); + PUT32(AUX_MU_BAUD_REG,270); + + ra=GET32(GPFSEL1); + ra&=~(7<<12); //gpio14 + ra|=2<<12; //alt5 + ra&=~(7<<15); //gpio15 + ra|=2<<15; //alt5 + PUT32(GPFSEL1,ra); + + PUT32(GPPUD,0); + for(ra=0;ra<150;ra++) dummy(ra); + PUT32(GPPUDCLK0,(1<<14)|(1<<15)); + for(ra=0;ra<150;ra++) dummy(ra); + PUT32(GPPUDCLK0,0); + + PUT32(AUX_MU_CNTL_REG,3); + return(0); +} +//------------------------------------------------------------------------- +void init_timer ( void ) +{ + PUT32(ARM_TIMER_CTL,0x00000000); + PUT32(ARM_TIMER_CTL,0x00000200); +} +//------------------------------------------------------------------------- +unsigned int timer_tick ( void ) +{ + return(GET32(ARM_TIMER_CNT)); +} +//------------------------------------------------------------------------- +//-------------------------------------------------------------------------