adding an ldrex/strex test

This commit is contained in:
David Welch
2012-05-31 14:57:59 -04:00
parent 86dbb30d76
commit 442892d626
6 changed files with 431 additions and 0 deletions

36
extest/Makefile Normal file
View File

@@ -0,0 +1,36 @@
ARMGNU ?= arm-none-linux-gnueabi
COPS = -Wall -O2 -nostdlib -nostartfiles -ffreestanding
all : notmain.bin
clean :
rm -f *.o
rm -f *.bin
rm -f *.elf
rm -f *.list
vectors.o : vectors.s
$(ARMGNU)-as vectors.s -o vectors.o
notmain.o : notmain.c
$(ARMGNU)-gcc $(COPS) -c notmain.c -o notmain.o
uart.o : uart.c
$(ARMGNU)-gcc $(COPS) -c uart.c -o uart.o
notmain.bin : memmap vectors.o uart.o notmain.o
$(ARMGNU)-ld -T memmap vectors.o notmain.o uart.o -o notmain.elf
$(ARMGNU)-objdump -D notmain.elf > notmain.list
$(ARMGNU)-objcopy notmain.elf -O binary notmain.bin

45
extest/README Normal file
View File

@@ -0,0 +1,45 @@
See the top level README for information on where to find the
schematic and programmers reference manual for the ARM processor
on the raspberry pi. Also find information on how to load and run
these programs. And how to hook up your host system to the uart on the
raspberry pi.
This example does a couple of things. First it is a simple example of
using the mmu. It does it such that the physical address = virtual
address. Why would you do that? To turn on the data cache and not
have memory mapped I/O (uart ports, timers hardware, etc) be cached.
Second, the ldrex/strex instruction pair can and have been improperly
used by programmers. The issue is that programmers are told in the
ARM ARM that ldrex/strex has been added among other reasons to allow
for atomic protection in memory shared by the multiple processors.
Programmers are warned that for uniprocessor systems the hardware may
not have the protection mechanism (which means DONT use ldrex/strex).
In the AMBA/AXI spec hardware engineers are told that for normal LDR/STR
type accesses return OKAY, meaning it worked, no problems. For exclusive
accesses (LDREX/STREX) return EXOKAY if it was a successfully exclusive
access. But if you dont support exclusive accesses in your memory
controller then just return OKAY. Well a return of OKAY for an STREX
is a fail as far as the exclusive access goes. And you will see in
very popular, open source, software the LDREX/STREX instruction pair
used for any ARMv6 or newer processor, uniprocessor or multi. Dumb
luck prevails because the L1 cache in the ARMv6 and newer handles the
LDREX/STREX pair and makes it all work. So if L1 is off, and the access
goes out on the AXI bus where the vendor's memory controller hardware
lives. You are at the mercy of the hardware vendor, and a very popular
ARMv6 or newer vendor doesnt necessarily support exclusive access for
a uniprocessor system.
Now when trying to remember and find all of these details, I figured it
would be trivial to test a new system to see if it has this problem.
Simple right, just two instructions, check the result right? Wrong,
you need the mmu on. So I had to go revisit all of that as well and
build this example for myself.
The good news, is from what I can tell the processor used here does
appear to work.

12
extest/memmap Normal file
View File

@@ -0,0 +1,12 @@
MEMORY
{
ram : ORIGIN = 0x00000000, LENGTH = 0x10000
}
SECTIONS
{
.text : { *(.text*) } > ram
.bss : { *(.bss*) } > ram
}

158
extest/notmain.c Normal file
View File

@@ -0,0 +1,158 @@
//-------------------------------------------------------------------------
//-------------------------------------------------------------------------
void PUT32 ( unsigned int, unsigned int );
unsigned int GET32 ( unsigned int );
extern void uart_init ( void );
extern void hexstring ( unsigned int d );
extern void hexstrings ( unsigned int d );
extern void start_l1cache ( void );
extern void stop_l1cache ( void );
extern void start_mmu ( unsigned int, unsigned int );
extern unsigned int LDREX ( unsigned int, unsigned int );
extern unsigned int STREX ( unsigned int, unsigned int, unsigned int );
extern unsigned int EXTEST ( unsigned int, unsigned int, unsigned int );
//Need a top level entry for every 1MB section, 20 bits.
//memory base addresses 0x100000, 0x200000, 0x300000, etc.
//making a one to one virtual to physical map
//virtual 0x20201234 -> 0x20201234 physical
//
//bits 31:20 of the virtual address index into the top level table
//1<<((31-20)+1) 4096 entries. 0x1000 32 bit entries, 0x4000 bytes.
//
//Bits 31:10 of the top level table point at the course page table
//bits 19:12 of the address index into this table.
//1<<((19-12)+1) 256 entries, 0x100 entries 0x400 bytes per entry
//
//Using a course entry
//
//the hardware looks in the first table and gets an entry. Bits in the
//entry determine what kind it is, course, section, super section,
//just using course here as it doesnt save any space using the others.
//You can put the coarse entry anywhere. Going to pack it in next to
//the top level table. going to limit the size of the table so only
//so many entries will be allowed
//
//Using small pages (4096 byte)
//
//bits 31:12 of the small page descriptor in the course table are the
//physical address in memory. bits 11:0 of the physical address come
//from the virtual address.
#define MMUTABLEBASE 0x00100000
#define MMUTABLESIZE (0x8000)
#define MMUTABLEMASK ((MMUTABLESIZE-1)>>2)
#define TOP_LEVEL_WORDS (1<<((31-20)+1))
#define COARSE_TABLE_WORDS (1<<((19-12)+1))
#define SMALL_TABLE_WORDS (1<<((11-0)+1))
unsigned int nextfree;
//-------------------------------------------------------------------
unsigned int next_coarse_offset ( unsigned int x )
{
unsigned int mask;
mask=(~0)<<(10-2);
mask=~mask;
while(x&mask) x++; //lazy brute force
return(x);
}
//-------------------------------------------------------------------
unsigned int add_one ( unsigned int add, unsigned int flags )
{
unsigned int ra;
unsigned int rb;
unsigned int rc;
//bits 31:20 index into the top level table
ra=add>>20;
rc=MMUTABLEBASE+(ra<<2);
rb=GET32(rc);
if(rb)
{
//printf("Address %08X already allocated\n",add);
hexstring(add);
hexstring(rc);
hexstring(rb);
hexstring(0xBADADD);
return(1);
}
add=ra<<20;
rb=next_coarse_offset(nextfree);
rc=rb+COARSE_TABLE_WORDS;
if(rc>=MMUTABLESIZE)
{
//printf("Not enough room\n");
hexstring(0xBAD);
return(1);
}
nextfree=rc;
//use course page table pointer on top level table
PUT32(MMUTABLEBASE+(ra<<2),(MMUTABLEBASE+(rb<<2))|0x00000001);
//fill in the course page table. with small entries
for(ra=0;ra<COARSE_TABLE_WORDS;ra++)
{
PUT32(MMUTABLEBASE+(rb<<2)+(ra<<2),(add+(ra<<12))|0x00000032|flags);
}
return(0);
}
//-------------------------------------------------------------------------
int notmain ( void )
{
unsigned int ra,rb,rc;
uart_init();
for(ra=0;ra<0x20;ra++) hexstring(ra);
hexstring(0x12345678);
for(nextfree=0;nextfree<TOP_LEVEL_WORDS;nextfree++) PUT32(MMUTABLEBASE+(nextfree<<2),0);
//nextfree=TOP_LEVEL_WORDS;
//ram used by the stack and the program
if(add_one(0x00000000,0x0000|8|4)) return(1);
//Memory mapped I/O used by the uart, etc, not cached
if(add_one(0x20000000,0x0000)) return(1);
if(add_one(0x20100000,0x0000)) return(1);
if(add_one(0x20200000,0x0000)) return(1);
//not enabling data cache just yet.
start_mmu(MMUTABLEBASE,0x00800001);
hexstring(0x12345678);
PUT32(0x00009000,0x1234);
ra=LDREX(0,0x00009000);
rb=STREX(0,0xABCD,0x00009000);
rc=EXTEST(0,0x1234,0x00009000);
hexstrings(ra); hexstrings(rb); hexstring(rc);
ra=LDREX(0,0x00009000);
rb=STREX(0,0xABCD,0x00009000);
rc=EXTEST(0,0x1234,0x00009000);
hexstrings(ra); hexstrings(rb); hexstring(rc);
start_l1cache();
GET32(0x00009000);
ra=LDREX(0,0x00009000);
rb=STREX(0,0x5678,0x00009000);
rc=EXTEST(0,0x2345,0x00009000);
stop_l1cache();
hexstrings(ra); hexstrings(rb); hexstring(rc);
ra=LDREX(0,0x00009000);
rb=STREX(0,0x1234,0x00009000);
rc=EXTEST(0,0x3456,0x00009000);
hexstrings(ra); hexstrings(rb); hexstring(rc);
hexstring(0x12345678);
return(0);
}
//-------------------------------------------------------------------------
//-------------------------------------------------------------------------

98
extest/uart.c Normal file
View File

@@ -0,0 +1,98 @@
//-------------------------------------------------------------------------
//-------------------------------------------------------------------------
extern void PUT32 ( unsigned int, unsigned int );
extern unsigned int GET32 ( unsigned int );
extern void dummy ( unsigned int );
#define GPFSEL1 0x20200004
#define GPSET0 0x2020001C
#define GPCLR0 0x20200028
#define GPPUD 0x20200094
#define GPPUDCLK0 0x20200098
#define AUX_ENABLES 0x20215004
#define AUX_MU_IO_REG 0x20215040
#define AUX_MU_IER_REG 0x20215044
#define AUX_MU_IIR_REG 0x20215048
#define AUX_MU_LCR_REG 0x2021504C
#define AUX_MU_MCR_REG 0x20215050
#define AUX_MU_LSR_REG 0x20215054
#define AUX_MU_MSR_REG 0x20215058
#define AUX_MU_SCRATCH 0x2021505C
#define AUX_MU_CNTL_REG 0x20215060
#define AUX_MU_STAT_REG 0x20215064
#define AUX_MU_BAUD_REG 0x20215068
//GPIO14 TXD0 and TXD1
//GPIO15 RXD0 and RXD1
//alt function 5 for uart1
//alt function 0 for uart0
//((250,000,000/115200)/8)-1 = 270
//------------------------------------------------------------------------
void uart_putc ( unsigned int c )
{
while(1)
{
if(GET32(AUX_MU_LSR_REG)&0x20) break;
}
PUT32(AUX_MU_IO_REG,c);
}
//------------------------------------------------------------------------
void hexstrings ( unsigned int d )
{
//unsigned int ra;
unsigned int rb;
unsigned int rc;
rb=32;
while(1)
{
rb-=4;
rc=(d>>rb)&0xF;
if(rc>9) rc+=0x37; else rc+=0x30;
uart_putc(rc);
if(rb==0) break;
}
uart_putc(0x20);
}
//------------------------------------------------------------------------
void hexstring ( unsigned int d )
{
hexstrings(d);
uart_putc(0x0D);
uart_putc(0x0A);
}
//------------------------------------------------------------------------
void uart_init ( void )
{
unsigned int ra;
PUT32(AUX_ENABLES,1);
PUT32(AUX_MU_IER_REG,0);
PUT32(AUX_MU_CNTL_REG,0);
PUT32(AUX_MU_LCR_REG,3);
PUT32(AUX_MU_MCR_REG,0);
PUT32(AUX_MU_IER_REG,0);
PUT32(AUX_MU_IIR_REG,0xC6);
PUT32(AUX_MU_BAUD_REG,270);
ra=GET32(GPFSEL1);
ra&=~(7<<12); //gpio14
ra|=2<<12; //alt5
ra&=~(7<<15); //gpio15
ra|=2<<15; //alt5
PUT32(GPFSEL1,ra);
PUT32(GPPUD,0);
for(ra=0;ra<150;ra++) dummy(ra);
PUT32(GPPUDCLK0,(1<<14)|(1<<15));
for(ra=0;ra<150;ra++) dummy(ra);
PUT32(GPPUDCLK0,0);
PUT32(AUX_MU_CNTL_REG,3);
}
//-------------------------------------------------------------------------
//-------------------------------------------------------------------------

82
extest/vectors.s Normal file
View File

@@ -0,0 +1,82 @@
;@.globl _start
;@_start:
b reset
.globl reset
reset:
mov sp,#0x10000
bl notmain
hang: b hang
.globl PUT32
PUT32:
str r1,[r0]
bx lr
.globl GET32
GET32:
ldr r0,[r0]
bx lr
.globl dummy
dummy:
bx lr
.globl LDREX
LDREX:
ldrex r0,[r1]
bx lr
.globl STREX
STREX:
strex r0,r1,[r2]
bx lr
.globl EXTEST
EXTEST:
ldrex r3,[r2]
strex r0,r1,[r2]
bx lr
.globl start_l1cache
start_l1cache:
mov r0, #0
mcr p15, 0, r0, c7, c7, 0 ;@ invalidate caches
mcr p15, 0, r0, c8, c7, 0 ;@ invalidate tlb
mrc p15, 0, r0, c1, c0, 0
orr r0,r0,#0x1000 ;@ instruction
orr r0,r0,#0x0004 ;@ data
mcr p15, 0, r0, c1, c0, 0
bx lr
.globl stop_l1cache
stop_l1cache:
mrc p15, 0, r0, c1, c0, 0
bic r0,r0,#0x1000 ;@ instruction
bic r0,r0,#0x0004 ;@ data
mcr p15, 0, r0, c1, c0, 0
bx lr
.globl start_mmu
start_mmu:
mov r2,#0
mcr p15,0,r2,c7,c7,0 ;@ invalidate caches
mcr p15,0,r2,c8,c7,0 ;@ invalidate tlb
mvn r2,#0
mcr p15,0,r2,c3,c0,0 ;@ domain
mcr p15,0,r0,c2,c0,0 ;@ tlb base
mcr p15,0,r0,c2,c0,1 ;@ tlb base
mrc p15,0,r2,c1,c0,0
orr r2,r2,r1
mcr p15,0,r2,c1,c0,0
bx lr