diff --git a/extest/Makefile b/extest/Makefile new file mode 100644 index 0000000..5b36607 --- /dev/null +++ b/extest/Makefile @@ -0,0 +1,36 @@ + + + +ARMGNU ?= arm-none-linux-gnueabi + +COPS = -Wall -O2 -nostdlib -nostartfiles -ffreestanding + +all : notmain.bin + +clean : + rm -f *.o + rm -f *.bin + rm -f *.elf + rm -f *.list + +vectors.o : vectors.s + $(ARMGNU)-as vectors.s -o vectors.o + +notmain.o : notmain.c + $(ARMGNU)-gcc $(COPS) -c notmain.c -o notmain.o + +uart.o : uart.c + $(ARMGNU)-gcc $(COPS) -c uart.c -o uart.o + +notmain.bin : memmap vectors.o uart.o notmain.o + $(ARMGNU)-ld -T memmap vectors.o notmain.o uart.o -o notmain.elf + $(ARMGNU)-objdump -D notmain.elf > notmain.list + $(ARMGNU)-objcopy notmain.elf -O binary notmain.bin + + + + + + + + diff --git a/extest/README b/extest/README new file mode 100644 index 0000000..66bca16 --- /dev/null +++ b/extest/README @@ -0,0 +1,45 @@ + +See the top level README for information on where to find the +schematic and programmers reference manual for the ARM processor +on the raspberry pi. Also find information on how to load and run +these programs. And how to hook up your host system to the uart on the +raspberry pi. + +This example does a couple of things. First it is a simple example of +using the mmu. It does it such that the physical address = virtual +address. Why would you do that? To turn on the data cache and not +have memory mapped I/O (uart ports, timers hardware, etc) be cached. + +Second, the ldrex/strex instruction pair can and have been improperly +used by programmers. The issue is that programmers are told in the +ARM ARM that ldrex/strex has been added among other reasons to allow +for atomic protection in memory shared by the multiple processors. +Programmers are warned that for uniprocessor systems the hardware may +not have the protection mechanism (which means DONT use ldrex/strex). +In the AMBA/AXI spec hardware engineers are told that for normal LDR/STR +type accesses return OKAY, meaning it worked, no problems. For exclusive +accesses (LDREX/STREX) return EXOKAY if it was a successfully exclusive +access. But if you dont support exclusive accesses in your memory +controller then just return OKAY. Well a return of OKAY for an STREX +is a fail as far as the exclusive access goes. And you will see in +very popular, open source, software the LDREX/STREX instruction pair +used for any ARMv6 or newer processor, uniprocessor or multi. Dumb +luck prevails because the L1 cache in the ARMv6 and newer handles the +LDREX/STREX pair and makes it all work. So if L1 is off, and the access +goes out on the AXI bus where the vendor's memory controller hardware +lives. You are at the mercy of the hardware vendor, and a very popular +ARMv6 or newer vendor doesnt necessarily support exclusive access for +a uniprocessor system. + +Now when trying to remember and find all of these details, I figured it +would be trivial to test a new system to see if it has this problem. +Simple right, just two instructions, check the result right? Wrong, +you need the mmu on. So I had to go revisit all of that as well and +build this example for myself. + +The good news, is from what I can tell the processor used here does +appear to work. + + + + diff --git a/extest/memmap b/extest/memmap new file mode 100644 index 0000000..1b42076 --- /dev/null +++ b/extest/memmap @@ -0,0 +1,12 @@ + +MEMORY +{ + ram : ORIGIN = 0x00000000, LENGTH = 0x10000 +} + +SECTIONS +{ + .text : { *(.text*) } > ram + .bss : { *(.bss*) } > ram +} + diff --git a/extest/notmain.c b/extest/notmain.c new file mode 100644 index 0000000..369c743 --- /dev/null +++ b/extest/notmain.c @@ -0,0 +1,158 @@ + +//------------------------------------------------------------------------- +//------------------------------------------------------------------------- +void PUT32 ( unsigned int, unsigned int ); +unsigned int GET32 ( unsigned int ); +extern void uart_init ( void ); +extern void hexstring ( unsigned int d ); +extern void hexstrings ( unsigned int d ); +extern void start_l1cache ( void ); +extern void stop_l1cache ( void ); +extern void start_mmu ( unsigned int, unsigned int ); +extern unsigned int LDREX ( unsigned int, unsigned int ); +extern unsigned int STREX ( unsigned int, unsigned int, unsigned int ); +extern unsigned int EXTEST ( unsigned int, unsigned int, unsigned int ); + +//Need a top level entry for every 1MB section, 20 bits. +//memory base addresses 0x100000, 0x200000, 0x300000, etc. +//making a one to one virtual to physical map +//virtual 0x20201234 -> 0x20201234 physical +// +//bits 31:20 of the virtual address index into the top level table +//1<<((31-20)+1) 4096 entries. 0x1000 32 bit entries, 0x4000 bytes. +// +//Bits 31:10 of the top level table point at the course page table +//bits 19:12 of the address index into this table. +//1<<((19-12)+1) 256 entries, 0x100 entries 0x400 bytes per entry +// +//Using a course entry +// +//the hardware looks in the first table and gets an entry. Bits in the +//entry determine what kind it is, course, section, super section, +//just using course here as it doesnt save any space using the others. +//You can put the coarse entry anywhere. Going to pack it in next to +//the top level table. going to limit the size of the table so only +//so many entries will be allowed +// +//Using small pages (4096 byte) +// +//bits 31:12 of the small page descriptor in the course table are the +//physical address in memory. bits 11:0 of the physical address come +//from the virtual address. + +#define MMUTABLEBASE 0x00100000 + +#define MMUTABLESIZE (0x8000) +#define MMUTABLEMASK ((MMUTABLESIZE-1)>>2) + +#define TOP_LEVEL_WORDS (1<<((31-20)+1)) +#define COARSE_TABLE_WORDS (1<<((19-12)+1)) +#define SMALL_TABLE_WORDS (1<<((11-0)+1)) + +unsigned int nextfree; + +//------------------------------------------------------------------- +unsigned int next_coarse_offset ( unsigned int x ) +{ + unsigned int mask; + + mask=(~0)<<(10-2); + mask=~mask; + while(x&mask) x++; //lazy brute force + return(x); +} +//------------------------------------------------------------------- +unsigned int add_one ( unsigned int add, unsigned int flags ) +{ + unsigned int ra; + unsigned int rb; + unsigned int rc; + + //bits 31:20 index into the top level table + ra=add>>20; + rc=MMUTABLEBASE+(ra<<2); + rb=GET32(rc); + if(rb) + { + //printf("Address %08X already allocated\n",add); + hexstring(add); + hexstring(rc); + hexstring(rb); + hexstring(0xBADADD); + return(1); + } + add=ra<<20; + + rb=next_coarse_offset(nextfree); + rc=rb+COARSE_TABLE_WORDS; + if(rc>=MMUTABLESIZE) + { + //printf("Not enough room\n"); + hexstring(0xBAD); + return(1); + } + nextfree=rc; + //use course page table pointer on top level table + PUT32(MMUTABLEBASE+(ra<<2),(MMUTABLEBASE+(rb<<2))|0x00000001); + //fill in the course page table. with small entries + for(ra=0;ra>rb)&0xF; + if(rc>9) rc+=0x37; else rc+=0x30; + uart_putc(rc); + if(rb==0) break; + } + uart_putc(0x20); +} +//------------------------------------------------------------------------ +void hexstring ( unsigned int d ) +{ + hexstrings(d); + uart_putc(0x0D); + uart_putc(0x0A); +} +//------------------------------------------------------------------------ +void uart_init ( void ) +{ + unsigned int ra; + + PUT32(AUX_ENABLES,1); + PUT32(AUX_MU_IER_REG,0); + PUT32(AUX_MU_CNTL_REG,0); + PUT32(AUX_MU_LCR_REG,3); + PUT32(AUX_MU_MCR_REG,0); + PUT32(AUX_MU_IER_REG,0); + PUT32(AUX_MU_IIR_REG,0xC6); + PUT32(AUX_MU_BAUD_REG,270); + + ra=GET32(GPFSEL1); + ra&=~(7<<12); //gpio14 + ra|=2<<12; //alt5 + ra&=~(7<<15); //gpio15 + ra|=2<<15; //alt5 + PUT32(GPFSEL1,ra); + + PUT32(GPPUD,0); + for(ra=0;ra<150;ra++) dummy(ra); + PUT32(GPPUDCLK0,(1<<14)|(1<<15)); + for(ra=0;ra<150;ra++) dummy(ra); + PUT32(GPPUDCLK0,0); + + PUT32(AUX_MU_CNTL_REG,3); +} +//------------------------------------------------------------------------- +//------------------------------------------------------------------------- diff --git a/extest/vectors.s b/extest/vectors.s new file mode 100644 index 0000000..13052db --- /dev/null +++ b/extest/vectors.s @@ -0,0 +1,82 @@ + +;@.globl _start +;@_start: + b reset + + .globl reset +reset: + mov sp,#0x10000 + bl notmain +hang: b hang + + +.globl PUT32 +PUT32: + str r1,[r0] + bx lr + +.globl GET32 +GET32: + ldr r0,[r0] + bx lr + +.globl dummy +dummy: + bx lr + + +.globl LDREX +LDREX: + ldrex r0,[r1] + bx lr + +.globl STREX +STREX: + strex r0,r1,[r2] + bx lr + +.globl EXTEST +EXTEST: + ldrex r3,[r2] + strex r0,r1,[r2] + bx lr + + +.globl start_l1cache +start_l1cache: + mov r0, #0 + mcr p15, 0, r0, c7, c7, 0 ;@ invalidate caches + mcr p15, 0, r0, c8, c7, 0 ;@ invalidate tlb + mrc p15, 0, r0, c1, c0, 0 + orr r0,r0,#0x1000 ;@ instruction + orr r0,r0,#0x0004 ;@ data + mcr p15, 0, r0, c1, c0, 0 + bx lr + +.globl stop_l1cache +stop_l1cache: + mrc p15, 0, r0, c1, c0, 0 + bic r0,r0,#0x1000 ;@ instruction + bic r0,r0,#0x0004 ;@ data + mcr p15, 0, r0, c1, c0, 0 + bx lr + + +.globl start_mmu +start_mmu: + mov r2,#0 + mcr p15,0,r2,c7,c7,0 ;@ invalidate caches + mcr p15,0,r2,c8,c7,0 ;@ invalidate tlb + + mvn r2,#0 + mcr p15,0,r2,c3,c0,0 ;@ domain + + mcr p15,0,r0,c2,c0,0 ;@ tlb base + mcr p15,0,r0,c2,c0,1 ;@ tlb base + + mrc p15,0,r2,c1,c0,0 + orr r2,r2,r1 + mcr p15,0,r2,c1,c0,0 + + bx lr +