adding second benchmark

This commit is contained in:
David Welch
2012-06-06 14:14:57 -04:00
parent f4b7737ae3
commit 4e56308087
6 changed files with 643 additions and 0 deletions

35
bench02/Makefile Normal file
View File

@@ -0,0 +1,35 @@
ARMGNU ?= arm-none-eabi
COPS = -mthumb-interwork -Wall -O2 -nostdlib -nostartfiles -ffreestanding
all : bench02.hex bench02.bin
clean :
rm -f *.o
rm -f *.bin
rm -f *.hex
rm -f *.elf
rm -f *.list
rm -f *.img
novectors.o : novectors.s
$(ARMGNU)-as novectors.s -o novectors.o
bench02.o : bench02.c
$(ARMGNU)-gcc $(COPS) -c bench02.c -o bench02.o
uart.o : uart.c
$(ARMGNU)-gcc $(COPS) -c uart.c -o uart.o
bench02.elf : memmap novectors.o bench02.o uart.o
$(ARMGNU)-ld -T memmap novectors.o bench02.o uart.o -o bench02.elf
$(ARMGNU)-objdump -D bench02.elf > bench02.list
bench02.bin : bench02.elf
$(ARMGNU)-objcopy bench02.elf -O binary bench02.bin
bench02.hex : bench02.elf
$(ARMGNU)-objcopy bench02.elf -O ihex bench02.hex

112
bench02/README Normal file
View File

@@ -0,0 +1,112 @@
See the top level README for information on where to find the
schematic and programmers reference manual for the ARM processor
on the raspberry pi. Also find information on how to load and run
these programs.
This is actually the second benchmark program, twain is the first but
called it twain not bench01.
Just experimenting with the execution time of the processor. The
raspberry pi faq says
What SoC are you using?
The SoC is a Broadcom BCM2835. This contains an ARM1176JZFS, with
floating point, running at 700Mhz...
The BCM2835 docs provided plus the blinker experiments lean heavily
toward the system clock running at 250MHz. What is the ARM doing?
Is it really 700MHz? Let's see what we see.
12345678
02DB6DF3 ARMTEST0
02DB6E15 ARMTEST0
00AB6E32 ARMTEST1
00836E31 ARMTEST2
037000D7 ARMTEST3
02DB6E25 THUMBTEST0
00AB6E26 THUMBTEST1
037000A7 THUMBTEST2
ARMTEST0
0x01000000 subs instructions
0x01000000 bne instructions
0x02000000 instructions
02DB6E15 system clocks
1.43 clocks per instruction. 175Mips.
ARMTEST1
0x01000000 sub instructions
0x00200000 bne instructions
0x01200000 instructions
00AB6E32 system clocks
1.68 instructions per clock. 420Mips
ARMTEST2
0x01000000 sub instructions
0x00100000 bne instructions
0x01100000 instructions
00836E31 system clocks
2.07 instructions per clock. 517Mips
ARMTEST3
0x01000000 sub instructions
0x08000000 nop instructions
0x00100000 bne instructions
0x09100000 instructions
037000D7 system clocks
2.64 instructions per clock. 659Mips
THUMBTEST0
0x01000000 subs instructions
0x01000000 bne instructions
0x02000000 instructions
02DB6E25 system clocks
1.43 clocks per instruction. 175Mips.
THUMBTEST1
0x01000000 sub instructions
0x00200000 bne instructions
0x01200000 instructions
00AB6E26 system clocks
1.68 instructions per clock. 420Mips
THUMBTEST3
0x01000000 sub instructions
0x08000000 nop instructions
0x00100000 bne instructions
0x09100000 instructions
037000A7 system clocks
2.64 instructions per clock. 659Mips
Being super scalar I was hoping to find more than 700 million instructions
per second. So far no luck. Since the thumb instructions I am using
are mostly 16 bit, was curious to know if because you can fetch twice
as many thumb instructions in the same number of memory cycles vs arm
instructions is there a thumb performance boost, so far have not seen
any of that either. this could still be gated by execution and not
fetching from cache. Or they could be fetching each thumb instruction
separately even if it is the same memory location read twice.
It appears that the gpu bootloader along with loading the arm and
releasing reset, you can use a config.txt file to make some adjustments
before the arm boots. The arm frequency is one of those. Now the
web pages talk about /boot/config.txt from an on chip linux viewpoint
if you actually boot linux and go to /boot you find it is the fat
partition on the sd card with start.elf and the other gpu bootloader
files along with the arm boot file kernel.img. So you dont need to
go through all of that go to the place where you have kernel.img, do
NOT make a boot dir, stay in that dir and create/modify the file
config.txt, create/modify a line that says:
arm_freq=200
Where the number is in megahertz. You can re-run these or other tests
and see that you did in fact change the freqency. remove the config.txt
file, comment the line with a # or change it to 700 and you will get
700mhz the default. I would still like to see 700 million instructions
per second that I can measure, will see what happens.

86
bench02/bench02.c Normal file
View File

@@ -0,0 +1,86 @@
//-------------------------------------------------------------------------
//-------------------------------------------------------------------------
extern void PUT32 ( unsigned int, unsigned int );
extern unsigned int GET32 ( unsigned int );
extern void dummy ( unsigned int );
extern void uart_init ( void );
extern void hexstrings ( unsigned int d );
extern void hexstring ( unsigned int d );
extern void init_timer ( void );
extern unsigned int timer_tick ( void );
extern void start_l1cache ( void );
extern void stop_l1cache ( void );
extern void ARMTEST0 ( unsigned int );
extern void ARMTEST1 ( unsigned int );
extern void ARMTEST2 ( unsigned int );
extern void ARMTEST3 ( unsigned int );
extern void THUMBTEST0 ( unsigned int );
extern void THUMBTEST1 ( unsigned int );
extern void THUMBTEST2 ( unsigned int );
//------------------------------------------------------------------------
int notmain ( void )
{
unsigned int beg,end;
uart_init();
hexstring(0x12345678);
start_l1cache();
init_timer();
beg=timer_tick();
ARMTEST0(0x01000000);
end=timer_tick();
hexstring(end-beg);
init_timer();
beg=timer_tick();
ARMTEST0(0x01000000);
end=timer_tick();
hexstring(end-beg);
init_timer();
beg=timer_tick();
ARMTEST1(0x01000000);
end=timer_tick();
hexstring(end-beg);
init_timer();
beg=timer_tick();
ARMTEST2(0x01000000);
end=timer_tick();
hexstring(end-beg);
init_timer();
beg=timer_tick();
ARMTEST3(0x01000000);
end=timer_tick();
hexstring(end-beg);
init_timer();
beg=timer_tick();
THUMBTEST0(0x01000000);
end=timer_tick();
hexstring(end-beg);
init_timer();
beg=timer_tick();
THUMBTEST1(0x01000000);
end=timer_tick();
hexstring(end-beg);
init_timer();
beg=timer_tick();
THUMBTEST2(0x01000000);
end=timer_tick();
hexstring(end-beg);
stop_l1cache();
return(0);
}
//-------------------------------------------------------------------------
//-------------------------------------------------------------------------

12
bench02/memmap Normal file
View File

@@ -0,0 +1,12 @@
MEMORY
{
ram : ORIGIN = 0x00000000, LENGTH = 0x1000
}
SECTIONS
{
.text : { *(.text*) } > ram
.bss : { *(.bss*) } > ram
}

282
bench02/novectors.s Normal file
View File

@@ -0,0 +1,282 @@
.code 32
.globl _start
_start:
b reset
reset:
ldr sp,stack_start
ldr r0,thumb_start_add
bx r0
stack_start: .word 0x10000
thumb_start_add: .word thumb_start
.globl start_l1cache
start_l1cache:
mov r0, #0
mcr p15, 0, r0, c7, c7, 0 ;@ invalidate caches
mcr p15, 0, r0, c8, c7, 0 ;@ invalidate tlb
mrc p15, 0, r0, c1, c0, 0
orr r0,r0,#0x1000 ;@ instruction
orr r0,r0,#0x0004 ;@ data
mcr p15, 0, r0, c1, c0, 0
bx lr
.globl stop_l1cache
stop_l1cache:
mrc p15, 0, r0, c1, c0, 0
bic r0,r0,#0x1000 ;@ instruction
bic r0,r0,#0x0004 ;@ data
mcr p15, 0, r0, c1, c0, 0
bx lr
nop
.globl ARMTEST0
ARMTEST0:
subs r0,r0,#1
bne ARMTEST0
bx lr
nop
.globl ARMTEST1
ARMTEST1:
subs r0,r0,#1
subs r0,r0,#1
subs r0,r0,#1
subs r0,r0,#1
subs r0,r0,#1
subs r0,r0,#1
subs r0,r0,#1
subs r0,r0,#1
bne ARMTEST1
bx lr
.globl ARMTEST2
ARMTEST2:
subs r0,r0,#1
subs r0,r0,#1
subs r0,r0,#1
subs r0,r0,#1
subs r0,r0,#1
subs r0,r0,#1
subs r0,r0,#1
subs r0,r0,#1
subs r0,r0,#1
subs r0,r0,#1
subs r0,r0,#1
subs r0,r0,#1
subs r0,r0,#1
subs r0,r0,#1
subs r0,r0,#1
subs r0,r0,#1
bne ARMTEST2
bx lr
.globl ARMTEST3
ARMTEST3:
subs r0,r0,#1
nop
nop
nop
nop
nop
nop
nop
nop
subs r0,r0,#1
nop
nop
nop
nop
nop
nop
nop
nop
subs r0,r0,#1
nop
nop
nop
nop
nop
nop
nop
nop
subs r0,r0,#1
nop
nop
nop
nop
nop
nop
nop
nop
subs r0,r0,#1
nop
nop
nop
nop
nop
nop
nop
nop
subs r0,r0,#1
nop
nop
nop
nop
nop
nop
nop
nop
subs r0,r0,#1
nop
nop
nop
nop
nop
nop
nop
nop
subs r0,r0,#1
nop
nop
nop
nop
nop
nop
nop
nop
bne ARMTEST3
bx lr
;@ -----------------------------------------------------
.thumb
.thumb_func
thumb_start:
bl notmain
hang: b hang
.thumb_func
.globl PUT32
PUT32:
str r1,[r0]
bx lr
.thumb_func
.globl GET32
GET32:
ldr r0,[r0]
bx lr
.thumb_func
.globl dummy
dummy:
bx lr
.thumb_func
.globl THUMBTEST0
THUMBTEST0:
sub r0,r0,#1
bne THUMBTEST0
bx lr
.thumb_func
.globl THUMBTEST1
THUMBTEST1:
sub r0,r0,#1
sub r0,r0,#1
sub r0,r0,#1
sub r0,r0,#1
sub r0,r0,#1
sub r0,r0,#1
sub r0,r0,#1
sub r0,r0,#1
bne THUMBTEST1
bx lr
.thumb_func
.globl THUMBTEST2
THUMBTEST2:
sub r0,r0,#1
nop
nop
nop
nop
nop
nop
nop
nop
sub r0,r0,#1
nop
nop
nop
nop
nop
nop
nop
nop
sub r0,r0,#1
nop
nop
nop
nop
nop
nop
nop
nop
sub r0,r0,#1
nop
nop
nop
nop
nop
nop
nop
nop
sub r0,r0,#1
nop
nop
nop
nop
nop
nop
nop
nop
sub r0,r0,#1
nop
nop
nop
nop
nop
nop
nop
nop
sub r0,r0,#1
nop
nop
nop
nop
nop
nop
nop
nop
sub r0,r0,#1
nop
nop
nop
nop
nop
nop
nop
nop
bne THUMBTEST2
bx lr

116
bench02/uart.c Normal file
View File

@@ -0,0 +1,116 @@
//-------------------------------------------------------------------------
//-------------------------------------------------------------------------
extern void PUT32 ( unsigned int, unsigned int );
extern unsigned int GET32 ( unsigned int );
extern void dummy ( unsigned int );
#define GPFSEL1 0x20200004
#define GPSET0 0x2020001C
#define GPCLR0 0x20200028
#define GPPUD 0x20200094
#define GPPUDCLK0 0x20200098
#define AUX_ENABLES 0x20215004
#define AUX_MU_IO_REG 0x20215040
#define AUX_MU_IER_REG 0x20215044
#define AUX_MU_IIR_REG 0x20215048
#define AUX_MU_LCR_REG 0x2021504C
#define AUX_MU_MCR_REG 0x20215050
#define AUX_MU_LSR_REG 0x20215054
#define AUX_MU_MSR_REG 0x20215058
#define AUX_MU_SCRATCH 0x2021505C
#define AUX_MU_CNTL_REG 0x20215060
#define AUX_MU_STAT_REG 0x20215064
#define AUX_MU_BAUD_REG 0x20215068
#define ARM_TIMER_LOD 0x2000B400
#define ARM_TIMER_VAL 0x2000B404
#define ARM_TIMER_CTL 0x2000B408
#define ARM_TIMER_DIV 0x2000B41C
#define ARM_TIMER_CNT 0x2000B420
//GPIO14 TXD0 and TXD1
//GPIO15 RXD0 and RXD1
//alt function 5 for uart1
//alt function 0 for uart0
//((250,000,000/115200)/8)-1 = 270
//------------------------------------------------------------------------
void uart_putc ( unsigned int c )
{
while(1)
{
if(GET32(AUX_MU_LSR_REG)&0x20) break;
}
PUT32(AUX_MU_IO_REG,c);
}
//------------------------------------------------------------------------
void hexstrings ( unsigned int d )
{
//unsigned int ra;
unsigned int rb;
unsigned int rc;
rb=32;
while(1)
{
rb-=4;
rc=(d>>rb)&0xF;
if(rc>9) rc+=0x37; else rc+=0x30;
uart_putc(rc);
if(rb==0) break;
}
uart_putc(0x20);
}
//------------------------------------------------------------------------
void hexstring ( unsigned int d )
{
hexstrings(d);
uart_putc(0x0D);
uart_putc(0x0A);
}
//------------------------------------------------------------------------
int uart_init ( void )
{
unsigned int ra;
PUT32(AUX_ENABLES,1);
PUT32(AUX_MU_IER_REG,0);
PUT32(AUX_MU_CNTL_REG,0);
PUT32(AUX_MU_LCR_REG,3);
PUT32(AUX_MU_MCR_REG,0);
PUT32(AUX_MU_IER_REG,0);
PUT32(AUX_MU_IIR_REG,0xC6);
PUT32(AUX_MU_BAUD_REG,270);
ra=GET32(GPFSEL1);
ra&=~(7<<12); //gpio14
ra|=2<<12; //alt5
ra&=~(7<<15); //gpio15
ra|=2<<15; //alt5
PUT32(GPFSEL1,ra);
PUT32(GPPUD,0);
for(ra=0;ra<150;ra++) dummy(ra);
PUT32(GPPUDCLK0,(1<<14)|(1<<15));
for(ra=0;ra<150;ra++) dummy(ra);
PUT32(GPPUDCLK0,0);
PUT32(AUX_MU_CNTL_REG,3);
return(0);
}
//-------------------------------------------------------------------------
void init_timer ( void )
{
PUT32(ARM_TIMER_CTL,0x00000000);
PUT32(ARM_TIMER_CTL,0x00000200);
}
//-------------------------------------------------------------------------
unsigned int timer_tick ( void )
{
return(GET32(ARM_TIMER_CNT));
}
//-------------------------------------------------------------------------
//-------------------------------------------------------------------------