more work on the mmu example finding status information, etc.

2014-11-16 14:56:55 -05:00
parent 8f9900103f
commit 257a54fa26
3 changed files with 86 additions and 81 deletions
--- a/mmu/Makefile
+++ b/mmu/Makefile
@@ -3,9 +3,7 @@ ARMGNU ?= arm-none-eabi

 COPS = -Wall -O2 -nostdlib -nostartfiles -ffreestanding 

-gcc : notmain.hex
-
-all : gcc clang
+all : notmain.hex

 clean :
 	rm -f *.o
@@ -38,33 +36,3 @@ notmain.hex : memmap novectors.o periph.o notmain.o



-
-
-
-LOPS = -Wall -m32 -emit-llvm
-LLCOPS0 = -march=arm 
-LLCOPS1 = -march=arm -mcpu=arm1176jzf-s
-LLCOPS = $(LLCOPS1)
-COPS = -Wall  -O2 -nostdlib -nostartfiles -ffreestanding
-OOPS = -std-compile-opts
-
-clang : notmain.bin
-
-notmain.bc : notmain.c
-	clang $(LOPS) -c notmain.c -o notmain.bc
-
-periph.bc : periph.c
-	clang $(LOPS) -c periph.c -o periph.bc
-
-notmain.clang.elf : loader novectors.o notmain.bc periph.bc
-	llvm-link periph.bc notmain.bc -o notmain.nopt.bc
-	opt $(OOPS) notmain.nopt.bc -o notmain.opt.bc
-	llc $(LLCOPS) notmain.opt.bc -o notmain.clang.s
-	$(ARMGNU)-as notmain.clang.s -o notmain.clang.o
-	$(ARMGNU)-ld -o notmain.clang.elf -T loader novectors.o notmain.clang.o
-	$(ARMGNU)-objdump -D notmain.clang.elf > notmain.clang.list
-
-notmain.bin : notmain.clang.elf
-	$(ARMGNU)-objcopy notmain.clang.elf notmain.clang.bin -O binary
-
-
--- a/mmu/README
+++ b/mmu/README
@@ -2,10 +2,7 @@
 See the top level README file for more information on documentation
 and how to run these programs.

-This example demonstrates MMU basics.  This uses sections rather than
-pages, my extest example uses pages, which was the wrong way to go with
-that, overly complicated for a simple example.  Will fix that some
-day, for now this serves as a simple mmu example.
+This example demonstrates MMU basics.

 So what an MMU does or at least what an MMU does for us is it translates
 virtual addresses into physical addresses as well as checking access
@@ -30,7 +27,7 @@ is the address 0x00008000 where we assume our program is loaded before
 the GPU lets the ARM start.

 Basically ignore the man behind the curtain, you generally dont deal
-with this, the arm is usually the main processor and the memory system
+with this, the ARM is usually the main processor and the memory system
 is designed around it rather than what we have in this chip.

 So physical addresses are the addresses that are used on the ARM's
@@ -132,11 +129,7 @@ of replacement bits left over in a 32 bit word are limited.  But if
 we were to have a second table, then between the first and second
 tables we have 64 bits so when we have a bunch of bits to replace
 meaning we have a smaller block of memory being virtualized somewhere
-else, we will need the secondary table.  My extest example uses
-pages simply because at the time I wrote that the first example
-I found usable out there was page based.  Now I know that section
-based is much simpler so long as you can tolerate virtualizing whole
-1MByte sections.
+else, we will need the secondary table.  

 So you may be thinking that we have a chicken and egg problem, but we
 dont.  We want to access something at some address, that act causes
@@ -169,7 +162,7 @@ secondary translation tables live.  This is important SBZ means should
 be zero, the lower 14 bits assuming X is zero, must be zero so we
 must choose an address that has the lower 14 bits zero.  I have chosen
 0x00004000 which just barely makes that requirement.  I assume
-that my program is loaded into the arm address 0x8000, I will need
+that my program is loaded into the ARM address 0x8000, I will need
 to have some exception handlers at 0x0000, but 0x4000 to 0x8000 is
 not being used (I have my stack elsewhere).

@@ -212,7 +205,7 @@ MMU is not only there to remap memory space, but it is also there to
 allow for control over access permissions and to allow control over
 caching.  Separate controls for each page or section.  So working
 backward we want to have our uart which is in the section 0x20200000
-be available to us after the mmu is enabled.  It really makes it so
+be available to us after the MMU is enabled.  It really makes it so
 much easier if we have the virtual match the physical for peripherals
 and actually this example starts off with virtual matching physical
 for all the sections we care about.  So we need 0x202.... to result
@@ -234,7 +227,7 @@ we are doing is polling and we dont evict that cached value then all
 we will ever see is the stale, cached, regsiter value, if that
 value did not show that tx buff was empty, then we will never see
 the indication when it changes.  So never make a peripherals space
-cacheable.  This is a good place to point out the purpose fo an mmu
+cacheable.  This is a good place to point out the purpose fo an MMU
 again cache control.  Right now we can see that the MMU even with
 virtual = physical, allows us to turn on the data cache, but gives
 us control that we can mark perhipheral address spaces as not
@@ -326,12 +319,12 @@ no access for example.

 Since I usually use the MMU in bare metal to enable data caching on ram
 I set my domain controls to 0b11, no checking and I simply make all
-the mmu sections domain number 0.
+the MMU sections domain number 0.

 So we end up with this simple function that allows us to add first level
-descriptors in the mmu translation table.
+descriptors in the MMU translation table.

-unsigned int mmu_section ( unsigned int vadd, unsigned int padd, unsigned int flags )
+unsigned int MMU_section ( unsigned int vadd, unsigned int padd, unsigned int flags )
 {
    unsigned int ra;
    unsigned int rb;
@@ -345,7 +338,7 @@ unsigned int mmu_section ( unsigned int vadd, unsigned int padd, unsigned int fl
    return(0);
 }

-So what you have to do to turn on the mmu is to first figure out all
+So what you have to do to turn on the MMU is to first figure out all
 the memory you are going to access, and make sure you have entries
 for that.  Now if you do the math, 12 bits off the top are the
 first level index, that is 4096 things, times 4 bytes per that is 16KBytes
@@ -355,47 +348,47 @@ uncached access...Basically completely map the virtual to physical
 one to one.  I didnt do that, I was a little more concervative on the
 clock cycles, not that that really matters here...For this example I
 wanted to have the memory we are really using around 0x00000000 and
-then some entries I can play with to show you the mmu is working and
+then some entries I can play with to show you the MMU is working and
 then the entries for the peripherals I am using.

-    mmu_section(0x00000000,0x00000000,0x0000|8|4);
-    mmu_section(0x00100000,0x00100000,0x0000);
-    mmu_section(0x00200000,0x00200000,0x0000);
-    mmu_section(0x00300000,0x00300000,0x0000);
+    MMU_section(0x00000000,0x00000000,0x0000|8|4);
+    MMU_section(0x00100000,0x00100000,0x0000);
+    MMU_section(0x00200000,0x00200000,0x0000);
+    MMU_section(0x00300000,0x00300000,0x0000);
    //peripherals
-    mmu_section(0x20000000,0x20000000,0x0000); //NOT CACHED!
-    mmu_section(0x20200000,0x20200000,0x0000); //NOT CACHED!
+    MMU_section(0x20000000,0x20000000,0x0000); //NOT CACHED!
+    MMU_section(0x20200000,0x20200000,0x0000); //NOT CACHED!

 I didnt need to cache that first section, but did, will leave it up
 to you to do a read performance test of some sort to determine if the
 cache when enabled does make it faster.

 So once our tables are setup then we need to actually turn the
-mmu on.  Now I cant figure out where I got this from, and I have
+MMU on.  Now I cant figure out where I got this from, and I have
 modified it in this repo.  According to this manual it was with the
 ARMv6 that we got the DSB feature which says wait for either cache
-or mmu to finish something before continuing.  In particular when
+or MMU to finish something before continuing.  In particular when
 initializing a cache to start it up you want to clean out all the
 entries in a safe way you dont want to evict them and hose memory
 you want to invalidate everything, mark it such that the cache lines
-are empty/available.  not mentioned yet but the mmu has a mini cache
+are empty/available.  not mentioned yet but the MMU has a mini cache
 that it uses for things it has looked up, think about every access we
-do through the mmu, imagine if it had to do walk the descriptor tables
+do through the MMU, imagine if it had to do walk the descriptor tables
 every single read or write could require two more reads from the
 table.  So there is this TLB which caches up the last N number of
 descriptor table lookups.  Well like cache memory on power up, the
 tlb might be full of random bits as well, so we need to invalidate
 that too.  Then this dsb thing comes in, we do the dsb instruction
-to tell the processor to wait for the cache subsystem and mmu subsystem
+to tell the processor to wait for the cache subsystem and MMU subsystem
 to finish wiping their internal tables before we go forward and
 turn them on and try to use them.

 After we invalidate the cache and tlb, and you may be asking why are
-we messing with the cache?  Well the mmu gets us access to the data
-cache since we need the mmu to distinguish ram from peripherals before
-generically turning on the data cache.  Second in the arm the mmu
+we messing with the cache?  Well the MMU gets us access to the data
+cache since we need the MMU to distinguish ram from peripherals before
+generically turning on the data cache.  Second in the ARM the MMU
 enable bit and the cache enable bits are in the same register so it
-makes sense to just do cache enabling and mmu enabling in one function
+makes sense to just do cache enabling and MMU enabling in one function
 call.

 So after the DSB we set our domain control bits, now in this example
@@ -410,14 +403,14 @@ sure what the difference is, why there are two...
 Understand I have been runnign on ARMv6 systems without the DSB for
 some time and it just works, so maybe that is dumb luck...

-Now I can start the mmu.  This code relies on the caller to set
-the mmu enable and I and D cache enables.  This is because this
+Now I can start the MMU.  This code relies on the caller to set
+the MMU enable and I and D cache enables.  This is because this
 is derived from code where sometimes I turn things on or dont turn
 things on and wanted it generic.


-.globl start_mmu
-start_mmu:
+.globl start_MMU
+start_MMU:
    mov r2,#0
    mcr p15,0,r2,c7,c7,0 ;@ invalidate caches
    mcr p15,0,r2,c8,c7,0 ;@ invalidate tlb
@@ -436,7 +429,7 @@ start_mmu:

    bx lr

-I am going to mess with the translation tables after the mmu is started
+I am going to mess with the translation tables after the MMU is started
 so I assume we have to invalidate when a table entry changes so that
 just in case the old one is cached up in the tlb, we can force the
 read of the new one by invalidating all the tlbs.
@@ -451,7 +444,7 @@ invalidate_tlbs:

 So the program starts by putting a few things in memory spaced
 apart such that they will be in different sections when the
-mmu is turned on.  We write then read those back.
+MMU is turned on.  We write then read those back.


 DEADBEEF
@@ -460,7 +453,7 @@ DEADBEEF
 00245678
 00345678

-Now the mmu is turned on with these sections mapped with virtual =
+Now the MMU is turned on with these sections mapped with virtual =
 physical.

 00045678
@@ -482,16 +475,38 @@ comes from what virtual.
 And then the icing on the cake, one section is marked as domain 1
 instead of domain 0, domain 1 was set for 0b00 no access so when we
 touch that domain we should get an access violation.
-
-00045678
-00000010
+                                                                         
+00045678                                                                        
+00000010                                                                        

 How do I know what that means with that output.  Well from my blinker07
 example we touched on exceptions (interrupts).  I made a generic test
 fixture such that anything other than a reset prints something out
 and then hangs.   In no way shape or form is this a complete handler
 but what it does show is that it is the exception that is at address
-0x00000010 that gets hit which is data abort, so now you can read
-up on how to determine this data abort was from an mmu fault, what
-virtual address was being accessed, or whatever...
+0x00000010 that gets hit which is data abort.  So figuring out it was
+a data abort (pretty much expected) have that then read the data fault
+status registers, being a data access we expect the data/combined one
+to show somthing and the instruction one to not.  Adding that
+instrumentation resulted in.
+                                                                            
+00045678                                                                        
+00000010                                                                        
+00000019                                                                        
+00000000                                                                        
+00008110                                                                        
+E5900000                                                                        
+00145678           
+
+Now I switched to the ARM1176JZF-S Technical Reference Manual for more
+detail and that shows the 0x01 was domain 1, the domain we used for
+that access. then the 0x9 means Domain Section Fault.
+
+The lr during the abort shows us the instruction, which you would need
+to disassemble to figure out the address, or at least that is one
+way to do it perhaps there is a status register for that.
+
+The instruction and the address match our expectations for this fault.
+
+

--- a/mmu/novectors.s
+++ b/mmu/novectors.s
@@ -47,8 +47,10 @@ handler_0C:
    b handler

 handler_10:
+    mov r7,r0
    mov r0,#0x10
-    b handler
+    ;@b handler
+    b data_abort

 handler_14:
    mov r0,#0x14
@@ -64,11 +66,31 @@ handler_1C:


 handler:
+    mov r4,lr
    mov sp,#0x00004000
    bl hexstring
+    mov r0,r4
+    bl hexstring
    b hang

-
+data_abort:
+    mov r6,lr
+    ldr r8,[r6,#-8]
+    mrc p15,0,r4,c5,c0,0 ;@ data/combined 
+    mrc p15,0,r5,c5,c0,1 ;@ instruction 
+    mov sp,#0x00004000
+    bl hexstring
+    mov r0,r4
+    bl hexstring
+    mov r0,r5
+    bl hexstring
+    mov r0,r6
+    bl hexstring
+    mov r0,r8
+    bl hexstring
+    mov r0,r7
+    bl hexstring
+    b hang

 .globl PUT32
 PUT32: