adding second benchmark
This commit is contained in:
35
bench02/Makefile
Normal file
35
bench02/Makefile
Normal file
@@ -0,0 +1,35 @@
|
||||
|
||||
ARMGNU ?= arm-none-eabi
|
||||
|
||||
COPS = -mthumb-interwork -Wall -O2 -nostdlib -nostartfiles -ffreestanding
|
||||
|
||||
all : bench02.hex bench02.bin
|
||||
|
||||
clean :
|
||||
rm -f *.o
|
||||
rm -f *.bin
|
||||
rm -f *.hex
|
||||
rm -f *.elf
|
||||
rm -f *.list
|
||||
rm -f *.img
|
||||
|
||||
novectors.o : novectors.s
|
||||
$(ARMGNU)-as novectors.s -o novectors.o
|
||||
|
||||
bench02.o : bench02.c
|
||||
$(ARMGNU)-gcc $(COPS) -c bench02.c -o bench02.o
|
||||
|
||||
uart.o : uart.c
|
||||
$(ARMGNU)-gcc $(COPS) -c uart.c -o uart.o
|
||||
|
||||
bench02.elf : memmap novectors.o bench02.o uart.o
|
||||
$(ARMGNU)-ld -T memmap novectors.o bench02.o uart.o -o bench02.elf
|
||||
$(ARMGNU)-objdump -D bench02.elf > bench02.list
|
||||
|
||||
bench02.bin : bench02.elf
|
||||
$(ARMGNU)-objcopy bench02.elf -O binary bench02.bin
|
||||
|
||||
bench02.hex : bench02.elf
|
||||
$(ARMGNU)-objcopy bench02.elf -O ihex bench02.hex
|
||||
|
||||
|
||||
112
bench02/README
Normal file
112
bench02/README
Normal file
@@ -0,0 +1,112 @@
|
||||
|
||||
See the top level README for information on where to find the
|
||||
schematic and programmers reference manual for the ARM processor
|
||||
on the raspberry pi. Also find information on how to load and run
|
||||
these programs.
|
||||
|
||||
This is actually the second benchmark program, twain is the first but
|
||||
called it twain not bench01.
|
||||
|
||||
Just experimenting with the execution time of the processor. The
|
||||
raspberry pi faq says
|
||||
|
||||
|
||||
What SoC are you using?
|
||||
|
||||
The SoC is a Broadcom BCM2835. This contains an ARM1176JZFS, with
|
||||
floating point, running at 700Mhz...
|
||||
|
||||
|
||||
The BCM2835 docs provided plus the blinker experiments lean heavily
|
||||
toward the system clock running at 250MHz. What is the ARM doing?
|
||||
Is it really 700MHz? Let's see what we see.
|
||||
|
||||
|
||||
12345678
|
||||
02DB6DF3 ARMTEST0
|
||||
02DB6E15 ARMTEST0
|
||||
00AB6E32 ARMTEST1
|
||||
00836E31 ARMTEST2
|
||||
037000D7 ARMTEST3
|
||||
02DB6E25 THUMBTEST0
|
||||
00AB6E26 THUMBTEST1
|
||||
037000A7 THUMBTEST2
|
||||
|
||||
ARMTEST0
|
||||
0x01000000 subs instructions
|
||||
0x01000000 bne instructions
|
||||
0x02000000 instructions
|
||||
02DB6E15 system clocks
|
||||
1.43 clocks per instruction. 175Mips.
|
||||
|
||||
ARMTEST1
|
||||
0x01000000 sub instructions
|
||||
0x00200000 bne instructions
|
||||
0x01200000 instructions
|
||||
00AB6E32 system clocks
|
||||
1.68 instructions per clock. 420Mips
|
||||
|
||||
ARMTEST2
|
||||
0x01000000 sub instructions
|
||||
0x00100000 bne instructions
|
||||
0x01100000 instructions
|
||||
00836E31 system clocks
|
||||
2.07 instructions per clock. 517Mips
|
||||
|
||||
ARMTEST3
|
||||
0x01000000 sub instructions
|
||||
0x08000000 nop instructions
|
||||
0x00100000 bne instructions
|
||||
0x09100000 instructions
|
||||
037000D7 system clocks
|
||||
2.64 instructions per clock. 659Mips
|
||||
|
||||
THUMBTEST0
|
||||
0x01000000 subs instructions
|
||||
0x01000000 bne instructions
|
||||
0x02000000 instructions
|
||||
02DB6E25 system clocks
|
||||
1.43 clocks per instruction. 175Mips.
|
||||
|
||||
THUMBTEST1
|
||||
0x01000000 sub instructions
|
||||
0x00200000 bne instructions
|
||||
0x01200000 instructions
|
||||
00AB6E26 system clocks
|
||||
1.68 instructions per clock. 420Mips
|
||||
|
||||
THUMBTEST3
|
||||
0x01000000 sub instructions
|
||||
0x08000000 nop instructions
|
||||
0x00100000 bne instructions
|
||||
0x09100000 instructions
|
||||
037000A7 system clocks
|
||||
2.64 instructions per clock. 659Mips
|
||||
|
||||
Being super scalar I was hoping to find more than 700 million instructions
|
||||
per second. So far no luck. Since the thumb instructions I am using
|
||||
are mostly 16 bit, was curious to know if because you can fetch twice
|
||||
as many thumb instructions in the same number of memory cycles vs arm
|
||||
instructions is there a thumb performance boost, so far have not seen
|
||||
any of that either. this could still be gated by execution and not
|
||||
fetching from cache. Or they could be fetching each thumb instruction
|
||||
separately even if it is the same memory location read twice.
|
||||
|
||||
It appears that the gpu bootloader along with loading the arm and
|
||||
releasing reset, you can use a config.txt file to make some adjustments
|
||||
before the arm boots. The arm frequency is one of those. Now the
|
||||
web pages talk about /boot/config.txt from an on chip linux viewpoint
|
||||
if you actually boot linux and go to /boot you find it is the fat
|
||||
partition on the sd card with start.elf and the other gpu bootloader
|
||||
files along with the arm boot file kernel.img. So you dont need to
|
||||
go through all of that go to the place where you have kernel.img, do
|
||||
NOT make a boot dir, stay in that dir and create/modify the file
|
||||
config.txt, create/modify a line that says:
|
||||
|
||||
arm_freq=200
|
||||
|
||||
Where the number is in megahertz. You can re-run these or other tests
|
||||
and see that you did in fact change the freqency. remove the config.txt
|
||||
file, comment the line with a # or change it to 700 and you will get
|
||||
700mhz the default. I would still like to see 700 million instructions
|
||||
per second that I can measure, will see what happens.
|
||||
86
bench02/bench02.c
Normal file
86
bench02/bench02.c
Normal file
@@ -0,0 +1,86 @@
|
||||
|
||||
//-------------------------------------------------------------------------
|
||||
//-------------------------------------------------------------------------
|
||||
|
||||
extern void PUT32 ( unsigned int, unsigned int );
|
||||
extern unsigned int GET32 ( unsigned int );
|
||||
extern void dummy ( unsigned int );
|
||||
extern void uart_init ( void );
|
||||
extern void hexstrings ( unsigned int d );
|
||||
extern void hexstring ( unsigned int d );
|
||||
extern void init_timer ( void );
|
||||
extern unsigned int timer_tick ( void );
|
||||
extern void start_l1cache ( void );
|
||||
extern void stop_l1cache ( void );
|
||||
extern void ARMTEST0 ( unsigned int );
|
||||
extern void ARMTEST1 ( unsigned int );
|
||||
extern void ARMTEST2 ( unsigned int );
|
||||
extern void ARMTEST3 ( unsigned int );
|
||||
extern void THUMBTEST0 ( unsigned int );
|
||||
extern void THUMBTEST1 ( unsigned int );
|
||||
extern void THUMBTEST2 ( unsigned int );
|
||||
|
||||
//------------------------------------------------------------------------
|
||||
int notmain ( void )
|
||||
{
|
||||
unsigned int beg,end;
|
||||
|
||||
uart_init();
|
||||
hexstring(0x12345678);
|
||||
|
||||
start_l1cache();
|
||||
|
||||
init_timer();
|
||||
beg=timer_tick();
|
||||
ARMTEST0(0x01000000);
|
||||
end=timer_tick();
|
||||
hexstring(end-beg);
|
||||
|
||||
init_timer();
|
||||
beg=timer_tick();
|
||||
ARMTEST0(0x01000000);
|
||||
end=timer_tick();
|
||||
hexstring(end-beg);
|
||||
|
||||
init_timer();
|
||||
beg=timer_tick();
|
||||
ARMTEST1(0x01000000);
|
||||
end=timer_tick();
|
||||
hexstring(end-beg);
|
||||
|
||||
init_timer();
|
||||
beg=timer_tick();
|
||||
ARMTEST2(0x01000000);
|
||||
end=timer_tick();
|
||||
hexstring(end-beg);
|
||||
|
||||
init_timer();
|
||||
beg=timer_tick();
|
||||
ARMTEST3(0x01000000);
|
||||
end=timer_tick();
|
||||
hexstring(end-beg);
|
||||
|
||||
init_timer();
|
||||
beg=timer_tick();
|
||||
THUMBTEST0(0x01000000);
|
||||
end=timer_tick();
|
||||
hexstring(end-beg);
|
||||
|
||||
init_timer();
|
||||
beg=timer_tick();
|
||||
THUMBTEST1(0x01000000);
|
||||
end=timer_tick();
|
||||
hexstring(end-beg);
|
||||
|
||||
init_timer();
|
||||
beg=timer_tick();
|
||||
THUMBTEST2(0x01000000);
|
||||
end=timer_tick();
|
||||
hexstring(end-beg);
|
||||
|
||||
stop_l1cache();
|
||||
|
||||
return(0);
|
||||
}
|
||||
//-------------------------------------------------------------------------
|
||||
//-------------------------------------------------------------------------
|
||||
12
bench02/memmap
Normal file
12
bench02/memmap
Normal file
@@ -0,0 +1,12 @@
|
||||
|
||||
MEMORY
|
||||
{
|
||||
ram : ORIGIN = 0x00000000, LENGTH = 0x1000
|
||||
}
|
||||
|
||||
SECTIONS
|
||||
{
|
||||
.text : { *(.text*) } > ram
|
||||
.bss : { *(.bss*) } > ram
|
||||
}
|
||||
|
||||
282
bench02/novectors.s
Normal file
282
bench02/novectors.s
Normal file
@@ -0,0 +1,282 @@
|
||||
|
||||
.code 32
|
||||
|
||||
.globl _start
|
||||
_start:
|
||||
b reset
|
||||
reset:
|
||||
ldr sp,stack_start
|
||||
ldr r0,thumb_start_add
|
||||
bx r0
|
||||
|
||||
stack_start: .word 0x10000
|
||||
thumb_start_add: .word thumb_start
|
||||
|
||||
.globl start_l1cache
|
||||
start_l1cache:
|
||||
mov r0, #0
|
||||
mcr p15, 0, r0, c7, c7, 0 ;@ invalidate caches
|
||||
mcr p15, 0, r0, c8, c7, 0 ;@ invalidate tlb
|
||||
mrc p15, 0, r0, c1, c0, 0
|
||||
orr r0,r0,#0x1000 ;@ instruction
|
||||
orr r0,r0,#0x0004 ;@ data
|
||||
mcr p15, 0, r0, c1, c0, 0
|
||||
bx lr
|
||||
|
||||
.globl stop_l1cache
|
||||
stop_l1cache:
|
||||
mrc p15, 0, r0, c1, c0, 0
|
||||
bic r0,r0,#0x1000 ;@ instruction
|
||||
bic r0,r0,#0x0004 ;@ data
|
||||
mcr p15, 0, r0, c1, c0, 0
|
||||
bx lr
|
||||
|
||||
nop
|
||||
|
||||
.globl ARMTEST0
|
||||
ARMTEST0:
|
||||
subs r0,r0,#1
|
||||
bne ARMTEST0
|
||||
bx lr
|
||||
|
||||
nop
|
||||
|
||||
.globl ARMTEST1
|
||||
ARMTEST1:
|
||||
subs r0,r0,#1
|
||||
subs r0,r0,#1
|
||||
subs r0,r0,#1
|
||||
subs r0,r0,#1
|
||||
subs r0,r0,#1
|
||||
subs r0,r0,#1
|
||||
subs r0,r0,#1
|
||||
subs r0,r0,#1
|
||||
bne ARMTEST1
|
||||
bx lr
|
||||
|
||||
.globl ARMTEST2
|
||||
ARMTEST2:
|
||||
subs r0,r0,#1
|
||||
subs r0,r0,#1
|
||||
subs r0,r0,#1
|
||||
subs r0,r0,#1
|
||||
subs r0,r0,#1
|
||||
subs r0,r0,#1
|
||||
subs r0,r0,#1
|
||||
subs r0,r0,#1
|
||||
subs r0,r0,#1
|
||||
subs r0,r0,#1
|
||||
subs r0,r0,#1
|
||||
subs r0,r0,#1
|
||||
subs r0,r0,#1
|
||||
subs r0,r0,#1
|
||||
subs r0,r0,#1
|
||||
subs r0,r0,#1
|
||||
bne ARMTEST2
|
||||
bx lr
|
||||
|
||||
.globl ARMTEST3
|
||||
ARMTEST3:
|
||||
subs r0,r0,#1
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
subs r0,r0,#1
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
subs r0,r0,#1
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
subs r0,r0,#1
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
subs r0,r0,#1
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
subs r0,r0,#1
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
subs r0,r0,#1
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
subs r0,r0,#1
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
bne ARMTEST3
|
||||
bx lr
|
||||
|
||||
;@ -----------------------------------------------------
|
||||
|
||||
.thumb
|
||||
|
||||
.thumb_func
|
||||
thumb_start:
|
||||
bl notmain
|
||||
hang: b hang
|
||||
|
||||
.thumb_func
|
||||
.globl PUT32
|
||||
PUT32:
|
||||
str r1,[r0]
|
||||
bx lr
|
||||
|
||||
.thumb_func
|
||||
.globl GET32
|
||||
GET32:
|
||||
ldr r0,[r0]
|
||||
bx lr
|
||||
|
||||
.thumb_func
|
||||
.globl dummy
|
||||
dummy:
|
||||
bx lr
|
||||
|
||||
.thumb_func
|
||||
.globl THUMBTEST0
|
||||
THUMBTEST0:
|
||||
sub r0,r0,#1
|
||||
bne THUMBTEST0
|
||||
bx lr
|
||||
|
||||
|
||||
.thumb_func
|
||||
.globl THUMBTEST1
|
||||
THUMBTEST1:
|
||||
sub r0,r0,#1
|
||||
sub r0,r0,#1
|
||||
sub r0,r0,#1
|
||||
sub r0,r0,#1
|
||||
sub r0,r0,#1
|
||||
sub r0,r0,#1
|
||||
sub r0,r0,#1
|
||||
sub r0,r0,#1
|
||||
bne THUMBTEST1
|
||||
bx lr
|
||||
|
||||
|
||||
.thumb_func
|
||||
.globl THUMBTEST2
|
||||
THUMBTEST2:
|
||||
sub r0,r0,#1
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
sub r0,r0,#1
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
sub r0,r0,#1
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
sub r0,r0,#1
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
sub r0,r0,#1
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
sub r0,r0,#1
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
sub r0,r0,#1
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
sub r0,r0,#1
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
bne THUMBTEST2
|
||||
bx lr
|
||||
|
||||
|
||||
116
bench02/uart.c
Normal file
116
bench02/uart.c
Normal file
@@ -0,0 +1,116 @@
|
||||
|
||||
//-------------------------------------------------------------------------
|
||||
//-------------------------------------------------------------------------
|
||||
|
||||
extern void PUT32 ( unsigned int, unsigned int );
|
||||
extern unsigned int GET32 ( unsigned int );
|
||||
extern void dummy ( unsigned int );
|
||||
|
||||
#define GPFSEL1 0x20200004
|
||||
#define GPSET0 0x2020001C
|
||||
#define GPCLR0 0x20200028
|
||||
#define GPPUD 0x20200094
|
||||
#define GPPUDCLK0 0x20200098
|
||||
|
||||
#define AUX_ENABLES 0x20215004
|
||||
#define AUX_MU_IO_REG 0x20215040
|
||||
#define AUX_MU_IER_REG 0x20215044
|
||||
#define AUX_MU_IIR_REG 0x20215048
|
||||
#define AUX_MU_LCR_REG 0x2021504C
|
||||
#define AUX_MU_MCR_REG 0x20215050
|
||||
#define AUX_MU_LSR_REG 0x20215054
|
||||
#define AUX_MU_MSR_REG 0x20215058
|
||||
#define AUX_MU_SCRATCH 0x2021505C
|
||||
#define AUX_MU_CNTL_REG 0x20215060
|
||||
#define AUX_MU_STAT_REG 0x20215064
|
||||
#define AUX_MU_BAUD_REG 0x20215068
|
||||
|
||||
#define ARM_TIMER_LOD 0x2000B400
|
||||
#define ARM_TIMER_VAL 0x2000B404
|
||||
#define ARM_TIMER_CTL 0x2000B408
|
||||
#define ARM_TIMER_DIV 0x2000B41C
|
||||
#define ARM_TIMER_CNT 0x2000B420
|
||||
|
||||
//GPIO14 TXD0 and TXD1
|
||||
//GPIO15 RXD0 and RXD1
|
||||
//alt function 5 for uart1
|
||||
//alt function 0 for uart0
|
||||
|
||||
//((250,000,000/115200)/8)-1 = 270
|
||||
//------------------------------------------------------------------------
|
||||
void uart_putc ( unsigned int c )
|
||||
{
|
||||
while(1)
|
||||
{
|
||||
if(GET32(AUX_MU_LSR_REG)&0x20) break;
|
||||
}
|
||||
PUT32(AUX_MU_IO_REG,c);
|
||||
}
|
||||
//------------------------------------------------------------------------
|
||||
void hexstrings ( unsigned int d )
|
||||
{
|
||||
//unsigned int ra;
|
||||
unsigned int rb;
|
||||
unsigned int rc;
|
||||
|
||||
rb=32;
|
||||
while(1)
|
||||
{
|
||||
rb-=4;
|
||||
rc=(d>>rb)&0xF;
|
||||
if(rc>9) rc+=0x37; else rc+=0x30;
|
||||
uart_putc(rc);
|
||||
if(rb==0) break;
|
||||
}
|
||||
uart_putc(0x20);
|
||||
}
|
||||
//------------------------------------------------------------------------
|
||||
void hexstring ( unsigned int d )
|
||||
{
|
||||
hexstrings(d);
|
||||
uart_putc(0x0D);
|
||||
uart_putc(0x0A);
|
||||
}
|
||||
//------------------------------------------------------------------------
|
||||
int uart_init ( void )
|
||||
{
|
||||
unsigned int ra;
|
||||
|
||||
PUT32(AUX_ENABLES,1);
|
||||
PUT32(AUX_MU_IER_REG,0);
|
||||
PUT32(AUX_MU_CNTL_REG,0);
|
||||
PUT32(AUX_MU_LCR_REG,3);
|
||||
PUT32(AUX_MU_MCR_REG,0);
|
||||
PUT32(AUX_MU_IER_REG,0);
|
||||
PUT32(AUX_MU_IIR_REG,0xC6);
|
||||
PUT32(AUX_MU_BAUD_REG,270);
|
||||
|
||||
ra=GET32(GPFSEL1);
|
||||
ra&=~(7<<12); //gpio14
|
||||
ra|=2<<12; //alt5
|
||||
ra&=~(7<<15); //gpio15
|
||||
ra|=2<<15; //alt5
|
||||
PUT32(GPFSEL1,ra);
|
||||
|
||||
PUT32(GPPUD,0);
|
||||
for(ra=0;ra<150;ra++) dummy(ra);
|
||||
PUT32(GPPUDCLK0,(1<<14)|(1<<15));
|
||||
for(ra=0;ra<150;ra++) dummy(ra);
|
||||
PUT32(GPPUDCLK0,0);
|
||||
|
||||
PUT32(AUX_MU_CNTL_REG,3);
|
||||
return(0);
|
||||
}
|
||||
//-------------------------------------------------------------------------
|
||||
void init_timer ( void )
|
||||
{
|
||||
PUT32(ARM_TIMER_CTL,0x00000000);
|
||||
PUT32(ARM_TIMER_CTL,0x00000200);
|
||||
}
|
||||
//-------------------------------------------------------------------------
|
||||
unsigned int timer_tick ( void )
|
||||
{
|
||||
return(GET32(ARM_TIMER_CNT));
|
||||
}
|
||||
//-------------------------------------------------------------------------
|
||||
//-------------------------------------------------------------------------
|
||||
Reference in New Issue
Block a user