From 794354527de0e31ccecfd02a7e9867c4e7f0e0ae Mon Sep 17 00:00:00 2001
From: David Welch <dwelch@dwelch.com>
Date: Wed, 13 Jun 2012 02:34:58 -0400
Subject: [PATCH] adding float02, some fun with testfloat

---
 float02/Makefile    |  33 +++
 float02/README      |  13 +
 float02/float02.c   | 134 +++++++++
 float02/memmap      |  12 +
 float02/slowfloat.c | 664 ++++++++++++++++++++++++++++++++++++++++++++
 float02/uart.c      | 122 ++++++++
 float02/uart.h      |  11 +
 float02/vectors.s   | 110 ++++++++
 8 files changed, 1099 insertions(+)
 create mode 100644 float02/Makefile
 create mode 100644 float02/README
 create mode 100644 float02/float02.c
 create mode 100644 float02/memmap
 create mode 100644 float02/slowfloat.c
 create mode 100644 float02/uart.c
 create mode 100644 float02/uart.h
 create mode 100644 float02/vectors.s

diff --git a/float02/Makefile b/float02/Makefile
new file mode 100644
index 0000000..9b2ac12
--- /dev/null
+++ b/float02/Makefile
@@ -0,0 +1,33 @@
+
+ARMGNU = arm-none-eabi
+
+
+ARMGNU ?= arm-none-eabi
+
+AOPS = --warn --fatal-warnings -mcpu=arm1176jzf-s -march=armv6
+COPS = -Wall -O2 -nostdlib -nostartfiles -ffreestanding -mcpu=arm1176jzf-s -mtune=arm1176jzf-s -mhard-float
+
+all : float02.bin 
+
+vectors.o : vectors.s
+	$(ARMGNU)-as $(AOPS) -mfpu=vfp vectors.s -o vectors.o
+
+float02.o : float02.c slowfloat.c uart.h
+	$(ARMGNU)-gcc $(COPS) -c float02.c -o float02.o
+
+uart.o : uart.c uart.h
+	$(ARMGNU)-gcc $(COPS) -c uart.c -o uart.o
+
+float02.bin : memmap vectors.o uart.o float02.o
+	$(ARMGNU)-ld -o float02.elf -T memmap vectors.o uart.o float02.o
+	$(ARMGNU)-objdump -D float02.elf > float02.list
+	$(ARMGNU)-objcopy float02.elf float02.bin -O binary
+
+
+clean:
+	rm -f *.bin
+	rm -f *.o
+	rm -f *.elf
+	rm -f *.list
+
+
diff --git a/float02/README b/float02/README
new file mode 100644
index 0000000..541c2f2
--- /dev/null
+++ b/float02/README
@@ -0,0 +1,13 @@
+
+See the README one level up about where to find the reference manual
+for the stm32f4 and schematics for the board.
+
+Just having some fun.  Curious to know how the hardware fpu compared
+to TestFloat.  Extracted only the add function which is easy/boring
+and it looks good.  A NaN is not a fixed bit pattern so not surprising
+they dont match.  I guess I could have said if this is a nan and that
+is a nan then pass, instead I said if TestFloat says nan, continue;
+
+llvm likes to throw memcpy()'s into code with structures where gcc
+doesnt.  Very ugly memcpy() implementation, which probably hurts llvm's
+overall slowfloat performance.
diff --git a/float02/float02.c b/float02/float02.c
new file mode 100644
index 0000000..7c55c18
--- /dev/null
+++ b/float02/float02.c
@@ -0,0 +1,134 @@
+
+
+//-------------------------------------------------------------------
+//-------------------------------------------------------------------
+
+#include "slowfloat.c"
+#include "uart.h"
+
+unsigned int myfun ( float x );
+
+unsigned int m4add ( unsigned int, unsigned int );
+unsigned int m4ftoi ( unsigned int );
+unsigned int m4itof ( unsigned int );
+//-------------------------------------------------------------------
+int notmain ( void )
+{
+    unsigned int ra,rb,rc;
+    unsigned int fa,fb,fc;
+    unsigned int xc;
+    unsigned int beg,end;
+    unsigned int errors;
+    unsigned int cases;
+
+    uart_init();
+    init_timer();
+    hexstring(0x12345678);
+    hexstring(myfun(-2));
+    hexstring(myfun(-4));
+    hexstring(myfun(-8));
+    hexstring(myfun(-5));
+    hexstring(m4itof(0xFFFFFFFE));
+    hexstring(m4itof(5));
+    hexstring(m4ftoi(0xC0000000));
+    hexstring(m4ftoi(0xC0800000));
+    hexstring(m4ftoi(0xC1000000));
+
+
+
+//enum {
+    //float_round_nearest_even = 0,
+    //float_round_down         = 1,
+    //float_round_up           = 2,
+    //float_round_to_zero      = 3
+//};
+    slow_float_rounding_mode = float_round_nearest_even;
+//enum {
+    //float_tininess_after_rounding  = 0,
+    //float_tininess_before_rounding = 1
+//};
+    slow_float_detect_tininess = float_tininess_before_rounding;
+
+    errors=0;
+    cases=0;
+    beg=timer_tick();
+    for(ra=0;ra<int32NumP1;ra++)
+    {
+        for(rb=0;rb<int32NumP1;rb++)
+        {
+            slow_float_exception_flags = 0;
+            fa=cases32[ra];
+            fb=cases32[rb];
+//if(((fa&0x7F800000)==0)&&(fa&0x007FFFFF))
+//{
+////    hexstrings(0xBAD00); hexstring(fa);
+//continue;
+//}
+//if(((fb&0x7F800000)==0)&&(fb&0x007FFFFF))
+//{
+////    hexstrings(0xBAD01); hexstring(fb);
+//continue;
+//}
+            fc=slow_float32_add(fa,fb);
+//if(((fc&0x7F800000)==0)&&(fc&0x007FFFFF))
+//{
+    //hexstrings(0xBAD02); hexstring(fc);
+//continue;
+//}
+if(fc==0xFFFFFFFF) continue; //is a nan thing
+//hexstrings(fa); hexstring(fb);
+            cases++;
+            xc=m4add(fa,fb);
+            if(fc!=xc)
+            {
+                hexstrings(slow_float_exception_flags);
+                hexstrings(fa);
+                hexstrings(fb);
+                hexstrings(fc);
+                hexstring(xc);
+                errors++;
+            }
+            if(errors>20) break;
+        }
+        if(errors>20) break;
+    }
+    end=timer_tick();
+    hexstring(end-beg);
+    hexstring(cases);
+
+
+    errors=0;
+    cases=0;
+    beg=timer_tick();
+    for(ra=0;ra<int32NumP1;ra++)
+    {
+        slow_float_exception_flags = 0;
+        fa=cases32[ra];
+        rb=slow_float32_to_int32(fa);
+
+if(slow_float_exception_flags&1) continue;
+
+        cases++;
+//hexstrings(fa); hexstring(rb);
+        rc=m4ftoi(fa);
+        if(rb!=rc)
+        {
+            hexstrings(slow_float_exception_flags);
+            hexstrings(fa);
+            hexstrings(rb);
+            hexstring(rc);
+            errors++;
+        }
+        if(errors>20) break;
+    }
+    end=timer_tick();
+    hexstring(end-beg);
+    hexstring(cases);
+
+
+
+
+    hexstring(0x12345678);
+    return(0);
+}
+//-------------------------------------------------------------------
diff --git a/float02/memmap b/float02/memmap
new file mode 100644
index 0000000..4226666
--- /dev/null
+++ b/float02/memmap
@@ -0,0 +1,12 @@
+
+MEMORY
+{
+    ram : ORIGIN = 0x8000, LENGTH = 0x30000-0x8000
+}
+
+SECTIONS
+{
+    .text : { *(.text*) } > ram
+    .bss : { *(.bss*) } > ram
+}
+
diff --git a/float02/slowfloat.c b/float02/slowfloat.c
new file mode 100644
index 0000000..d925335
--- /dev/null
+++ b/float02/slowfloat.c
@@ -0,0 +1,664 @@
+
+
+/*
+===============================================================================
+
+This C source file is part of TestFloat, Release 2a, a package of programs
+for testing the correctness of floating-point arithmetic complying to the
+IEC/IEEE Standard for Floating-Point.
+
+Written by John R. Hauser.  More information is available through the Web
+page `http://HTTP.CS.Berkeley.EDU/~jhauser/arithmetic/TestFloat.html'.
+
+THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
+has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
+TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
+PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
+AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
+
+Derivative works are acceptable, even for commercial purposes, so long as
+(1) they include prominent notice that the work is derivative, and (2) they
+include prominent notice akin to these four paragraphs for those parts of
+this code that are retained.
+
+===============================================================================
+*/
+
+
+/*
+
+This is heavily modified in that just enough stuff from testfloat
+and a few lines from softfloat in order to get a 32 bit add function.
+
+*/
+
+
+//#include "milieu.h"
+typedef char flag;
+typedef unsigned char uint8;
+typedef signed char int8;
+typedef int uint16;
+typedef int int16;
+typedef unsigned int uint32;
+typedef signed int int32;
+
+typedef unsigned char bits8;
+typedef signed char sbits8;
+typedef unsigned short int bits16;
+typedef signed short int sbits16;
+typedef unsigned int bits32;
+typedef signed int sbits32;
+
+enum {
+    FALSE = 0,
+    TRUE  = 1
+};
+
+//#include "softfloat.h"
+
+typedef unsigned int float32;
+extern signed char float_detect_tininess;
+enum {
+    float_tininess_after_rounding  = 0,
+    float_tininess_before_rounding = 1
+};
+extern signed char float_rounding_mode;
+enum {
+    float_round_nearest_even = 0,
+    float_round_down         = 1,
+    float_round_up           = 2,
+    float_round_to_zero      = 3
+};
+extern signed char float_exception_flags;
+enum {
+    float_flag_invalid   =  1,
+    float_flag_divbyzero =  4,
+    float_flag_overflow  =  8,
+    float_flag_underflow = 16,
+    float_flag_inexact   = 32
+};
+
+//#include "slowfloat.h"
+
+
+typedef struct {
+    bits32 a0, a1;
+} bits64X;
+
+typedef struct {
+    flag isNaN;
+    flag isInf;
+    flag isZero;
+    flag sign;
+    int16 exp;
+    bits64X sig;
+} floatX;
+
+
+int8 slow_float_rounding_mode;
+int8 slow_float_exception_flags;
+int8 slow_float_detect_tininess;
+
+
+static const floatX floatXNaN = { TRUE, FALSE, FALSE, FALSE, 0, { 0, 0 } };
+static const floatX floatXPositiveZero =
+    { FALSE, FALSE, TRUE, FALSE, 0, { 0, 0 } };
+static const floatX floatXNegativeZero =
+    { FALSE, FALSE, TRUE, TRUE, 0, { 0, 0 } };
+
+
+float32 slow_float32_add( float32, float32 );
+
+
+static bits64X shortShift64Left( bits64X a, int8 shiftCount )
+{
+    int8 negShiftCount;
+
+    negShiftCount = ( - shiftCount & 31 );
+    a.a0 = ( a.a0<<shiftCount ) | ( a.a1>>negShiftCount );
+    a.a1 <<= shiftCount;
+    return a;
+
+}
+
+static bits64X shortShift64RightJamming( bits64X a, int8 shiftCount )
+{
+    int8 negShiftCount;
+    bits32 extra;
+
+    negShiftCount = ( - shiftCount & 31 );
+    extra = a.a1<<negShiftCount;
+    a.a1 = ( a.a0<<negShiftCount ) | ( a.a1>>shiftCount ) | ( extra != 0 );
+    a.a0 >>= shiftCount;
+    return a;
+
+}
+
+static bits64X neg64( bits64X a )
+{
+
+    if ( a.a1 == 0 ) {
+        a.a0 = - a.a0;
+    }
+    else {
+        a.a1 = - a.a1;
+        a.a0 = ~ a.a0;
+    }
+    return a;
+
+}
+
+static bits64X add64( bits64X a, bits64X b )
+{
+
+    a.a1 += b.a1;
+    a.a0 += b.a0 + ( a.a1 < b.a1 );
+    return a;
+
+}
+
+static flag eq64( bits64X a, bits64X b )
+{
+
+    return ( a.a0 == b.a0 ) && ( a.a1 == b.a1 );
+
+}
+
+//static flag le64( bits64X a, bits64X b )
+//{
+
+    //return ( a.a0 < b.a0 ) || ( ( a.a0 == b.a0 ) && ( a.a1 <= b.a1 ) );
+
+//}
+
+//static flag lt64( bits64X a, bits64X b )
+//{
+
+    //return ( a.a0 < b.a0 ) || ( ( a.a0 == b.a0 ) && ( a.a1 < b.a1 ) );
+
+//}
+
+static floatX roundFloatXTo24( flag isTiny, floatX zx )
+{
+
+    if ( zx.sig.a1 ) {
+        slow_float_exception_flags |= float_flag_inexact;
+        if ( isTiny ) slow_float_exception_flags |= float_flag_underflow;
+        switch ( slow_float_rounding_mode ) {
+         case float_round_nearest_even:
+            if ( zx.sig.a1 < 0x80000000 ) goto noIncrement;
+            if ( ( zx.sig.a1 == 0x80000000 ) && ! ( zx.sig.a0 & 1 ) ) {
+                goto noIncrement;
+            }
+            break;
+         case float_round_to_zero:
+            goto noIncrement;
+         case float_round_down:
+            if ( ! zx.sign ) goto noIncrement;
+            break;
+         case float_round_up:
+            if ( zx.sign ) goto noIncrement;
+            break;
+        }
+        ++zx.sig.a0;
+        if ( zx.sig.a0 == 0x01000000 ) {
+            zx.sig.a0 = 0x00800000;
+            ++zx.exp;
+        }
+    }
+ noIncrement:
+    zx.sig.a1 = 0;
+    return zx;
+
+}
+
+static floatX roundFloatXTo53( flag isTiny, floatX zx )
+{
+    int8 roundBits;
+
+    roundBits = zx.sig.a1 & 7;
+    zx.sig.a1 -= roundBits;
+    if ( roundBits ) {
+        slow_float_exception_flags |= float_flag_inexact;
+        if ( isTiny ) slow_float_exception_flags |= float_flag_underflow;
+        switch ( slow_float_rounding_mode ) {
+         case float_round_nearest_even:
+            if ( roundBits < 4 ) goto noIncrement;
+            if ( ( roundBits == 4 ) && ! ( zx.sig.a1 & 8 ) ) goto noIncrement;
+            break;
+         case float_round_to_zero:
+            goto noIncrement;
+         case float_round_down:
+            if ( ! zx.sign ) goto noIncrement;
+            break;
+         case float_round_up:
+            if ( zx.sign ) goto noIncrement;
+            break;
+        }
+        zx.sig.a1 += 8;
+        zx.sig.a0 += ( zx.sig.a1 == 0 );
+        if ( zx.sig.a0 == 0x01000000 ) {
+            zx.sig.a0 = 0x00800000;
+            ++zx.exp;
+        }
+    }
+ noIncrement:
+    return zx;
+
+}
+
+//static floatX int32ToFloatX( int32 a )
+//{
+    //floatX ax;
+
+    //ax.isNaN = FALSE;
+    //ax.isInf = FALSE;
+    //ax.sign = ( a < 0 );
+    //ax.sig.a1 = ax.sign ? - a : a;
+    //ax.sig.a0 = 0;
+    //if ( a == 0 ) {
+        //ax.isZero = TRUE;
+        //return ax;
+    //}
+    //ax.isZero = FALSE;
+    //ax.sig = shortShift64Left( ax.sig, 23 );
+    //ax.exp = 32;
+    //while ( ax.sig.a0 < 0x00800000 ) {
+        //ax.sig = shortShift64Left( ax.sig, 1 );
+        //--ax.exp;
+    //}
+    //return ax;
+
+//}
+
+static int32 floatXToInt32( floatX ax )
+{
+    int8 savedExceptionFlags;
+    int16 shiftCount;
+    int32 z;
+
+    if ( ax.isInf || ax.isNaN ) {
+        slow_float_exception_flags |= float_flag_invalid;
+        return ( ax.isInf & ax.sign ) ? 0x80000000 : 0x7FFFFFFF;
+    }
+    if ( ax.isZero ) return 0;
+    savedExceptionFlags = slow_float_exception_flags;
+    shiftCount = 52 - ax.exp;
+    if ( 56 < shiftCount ) {
+        ax.sig.a1 = 1;
+        ax.sig.a0 = 0;
+    }
+    else {
+        while ( 0 < shiftCount ) {
+            ax.sig = shortShift64RightJamming( ax.sig, 1 );
+            --shiftCount;
+        }
+    }
+    ax = roundFloatXTo53( FALSE, ax );
+    ax.sig = shortShift64RightJamming( ax.sig, 3 );
+    z = ax.sig.a1;
+    if ( ax.sign ) z = - z;
+    if (    ( shiftCount < 0 )
+         || ax.sig.a0
+         || ( ( z != 0 ) && ( ( ax.sign ^ ( z < 0 ) ) != 0 ) )
+       ) {
+        slow_float_exception_flags = savedExceptionFlags | float_flag_invalid;
+        return ax.sign ? 0x80000000 : 0x7FFFFFFF;
+    }
+    return z;
+
+}
+
+static floatX float32ToFloatX( float32 a )
+{
+    int16 expField;
+    floatX ax;
+
+    ax.isNaN = FALSE;
+    ax.isInf = FALSE;
+    ax.isZero = FALSE;
+    ax.sign = ( ( a & 0x80000000 ) != 0 );
+    expField = ( a>>23 ) & 0xFF;
+    ax.sig.a1 = 0;
+    ax.sig.a0 = a & 0x007FFFFF;
+    if ( expField == 0 ) {
+        if ( ax.sig.a0 == 0 ) {
+            ax.isZero = TRUE;
+        }
+        else {
+            expField = 1 - 0x7F;
+            do {
+                ax.sig.a0 <<= 1;
+                --expField;
+            } while ( ax.sig.a0 < 0x00800000 );
+            ax.exp = expField;
+        }
+    }
+    else if ( expField == 0xFF ) {
+        if ( ax.sig.a0 == 0 ) {
+            ax.isInf = TRUE;
+        }
+        else {
+            ax.isNaN = TRUE;
+        }
+    }
+    else {
+        ax.sig.a0 |= 0x00800000;
+        ax.exp = expField - 0x7F;
+    }
+    return ax;
+
+}
+
+static float32 floatXToFloat32( floatX zx )
+{
+    floatX savedZ;
+    flag isTiny;
+    int16 expField;
+    float32 z;
+
+    z=0;
+
+    if ( zx.isZero ) return zx.sign ? 0x80000000 : 0;
+    if ( zx.isInf ) return zx.sign ? 0xFF800000 : 0x7F800000;
+    if ( zx.isNaN ) return 0xFFFFFFFF;
+    while ( 0x01000000 <= zx.sig.a0 ) {
+        zx.sig = shortShift64RightJamming( zx.sig, 1 );
+        ++zx.exp;
+    }
+    while ( zx.sig.a0 < 0x00800000 ) {
+        zx.sig = shortShift64Left( zx.sig, 1 );
+        --zx.exp;
+    }
+    savedZ = zx;
+    isTiny =
+           ( slow_float_detect_tininess == float_tininess_before_rounding )
+        && ( zx.exp + 0x7F <= 0 );
+    zx = roundFloatXTo24( isTiny, zx );
+    expField = zx.exp + 0x7F;
+    if ( 0xFF <= expField ) {
+        slow_float_exception_flags |=
+            float_flag_overflow | float_flag_inexact;
+        if ( zx.sign ) {
+            switch ( slow_float_rounding_mode ) {
+             case float_round_nearest_even:
+             case float_round_down:
+                z = 0xFF800000;
+                break;
+             case float_round_to_zero:
+             case float_round_up:
+                z = 0xFF7FFFFF;
+                break;
+            }
+        }
+        else {
+            switch ( slow_float_rounding_mode ) {
+             case float_round_nearest_even:
+             case float_round_up:
+                z = 0x7F800000;
+                break;
+             case float_round_to_zero:
+             case float_round_down:
+                z = 0x7F7FFFFF;
+                break;
+            }
+        }
+        return z;
+    }
+    if ( expField <= 0 ) {
+        isTiny = TRUE;
+        zx = savedZ;
+        expField = zx.exp + 0x7F;
+        if ( expField < -27 ) {
+            zx.sig.a1 = ( zx.sig.a0 != 0 ) || ( zx.sig.a1 != 0 );
+            zx.sig.a0 = 0;
+        }
+        else {
+            while ( expField <= 0 ) {
+                zx.sig = shortShift64RightJamming( zx.sig, 1 );
+                ++expField;
+            }
+        }
+        zx = roundFloatXTo24( isTiny, zx );
+        expField = ( 0x00800000 <= zx.sig.a0 ) ? 1 : 0;
+    }
+    z = expField;
+    z <<= 23;
+    if ( zx.sign ) z |= 0x80000000;
+    z |= zx.sig.a0 & 0x007FFFFF;
+    return z;
+
+}
+
+
+
+
+static floatX floatXInvalid( void )
+{
+
+    slow_float_exception_flags |= float_flag_invalid;
+    return floatXNaN;
+
+}
+
+static floatX floatXAdd( floatX ax, floatX bx )
+{
+    int16 expDiff;
+    floatX zx;
+
+    if ( ax.isNaN ) return ax;
+    if ( bx.isNaN ) return bx;
+    if ( ax.isInf && bx.isInf ) {
+        if ( ax.sign == bx.sign ) return ax;
+        return floatXInvalid();
+    }
+    if ( ax.isInf ) return ax;
+    if ( bx.isInf ) return bx;
+    if ( ax.isZero && bx.isZero ) {
+        if ( ax.sign == bx.sign ) return ax;
+        goto completeCancellation;
+    }
+    if (    ( ax.sign != bx.sign )
+         && ( ax.exp == bx.exp )
+         && eq64( ax.sig, bx.sig )
+       ) {
+ completeCancellation:
+        return
+              ( slow_float_rounding_mode == float_round_down ) ?
+                  floatXNegativeZero
+            : floatXPositiveZero;
+    }
+    if ( ax.isZero ) return bx;
+    if ( bx.isZero ) return ax;
+    expDiff = ax.exp - bx.exp;
+    if ( expDiff < 0 ) {
+        zx = ax;
+        zx.exp = bx.exp;
+        if ( expDiff < -56 ) {
+            zx.sig.a1 = 1;
+            zx.sig.a0 = 0;
+        }
+        else {
+            while ( expDiff < 0 ) {
+                zx.sig = shortShift64RightJamming( zx.sig, 1 );
+                ++expDiff;
+            }
+        }
+        if ( ax.sign != bx.sign ) zx.sig = neg64( zx.sig );
+        zx.sign = bx.sign;
+        zx.sig = add64( zx.sig, bx.sig );
+    }
+    else {
+        zx = bx;
+        zx.exp = ax.exp;
+        if ( 56 < expDiff ) {
+            zx.sig.a1 = 1;
+            zx.sig.a0 = 0;
+        }
+        else {
+            while ( 0 < expDiff ) {
+                zx.sig = shortShift64RightJamming( zx.sig, 1 );
+                --expDiff;
+            }
+        }
+        if ( ax.sign != bx.sign ) zx.sig = neg64( zx.sig );
+        zx.sign = ax.sign;
+        zx.sig = add64( zx.sig, ax.sig );
+    }
+    if ( zx.sig.a0 & 0x80000000 ) {
+        zx.sig = neg64( zx.sig );
+        zx.sign = ! zx.sign;
+    }
+    return zx;
+
+}
+
+
+float32 slow_float32_add( float32 a, float32 b )
+{
+
+    return
+        floatXToFloat32(
+            floatXAdd( float32ToFloatX( a ), float32ToFloatX( b ) ) );
+
+}
+
+int32 slow_float32_to_int32 ( float32 a )
+{
+    return(floatXToInt32(float32ToFloatX(a)));
+}
+
+
+//#define int32NumP1 124
+#define int32NumP1 79
+
+static const uint32 cases32[ int32NumP1 ] =
+{
+    0x00000000,
+    //0x00000001,
+    //0x00000002,
+    //0x00000004,
+    //0x00000008,
+    //0x00000010,
+    //0x00000020,
+    //0x00000040,
+    //0x00000080,
+    //0x00000100,
+    //0x00000200,
+    //0x00000400,
+    //0x00000800,
+    //0x00001000,
+    //0x00002000,
+    //0x00004000,
+    //0x00008000,
+    //0x00010000,
+    //0x00020000,
+    //0x00040000,
+    //0x00080000,
+    //0x00100000,
+    //0x00200000,
+    //0x00400000,
+    0x00800000,
+    0x01000000,
+    0x02000000,
+    0x04000000,
+    0x08000000,
+    0x10000000,
+    0x20000000,
+    0x40000000,
+    0x80000000,
+    0xC0000000,
+    0xE0000000,
+    0xF0000000,
+    0xF8000000,
+    0xFC000000,
+    0xFE000000,
+    0xFF000000,
+    0xFF800000,
+    0xFFC00000,
+    0xFFE00000,
+    0xFFF00000,
+    0xFFF80000,
+    0xFFFC0000,
+    0xFFFE0000,
+    0xFFFF0000,
+    0xFFFF8000,
+    0xFFFFC000,
+    0xFFFFE000,
+    0xFFFFF000,
+    0xFFFFF800,
+    0xFFFFFC00,
+    0xFFFFFE00,
+    0xFFFFFF00,
+    0xFFFFFF80,
+    0xFFFFFFC0,
+    0xFFFFFFE0,
+    0xFFFFFFF0,
+    0xFFFFFFF8,
+    0xFFFFFFFC,
+    0xFFFFFFFE,
+    0xFFFFFFFF,
+    0xFFFFFFFD,
+    0xFFFFFFFB,
+    0xFFFFFFF7,
+    0xFFFFFFEF,
+    0xFFFFFFDF,
+    0xFFFFFFBF,
+    0xFFFFFF7F,
+    0xFFFFFEFF,
+    0xFFFFFDFF,
+    0xFFFFFBFF,
+    0xFFFFF7FF,
+    0xFFFFEFFF,
+    0xFFFFDFFF,
+    0xFFFFBFFF,
+    0xFFFF7FFF,
+    0xFFFEFFFF,
+    0xFFFDFFFF,
+    0xFFFBFFFF,
+    0xFFF7FFFF,
+    0xFFEFFFFF,
+    0xFFDFFFFF,
+    0xFFBFFFFF,
+    0xFF7FFFFF,
+    0xFEFFFFFF,
+    0xFDFFFFFF,
+    0xFBFFFFFF,
+    0xF7FFFFFF,
+    0xEFFFFFFF,
+    0xDFFFFFFF,
+    0xBFFFFFFF,
+    0x7FFFFFFF,
+    0x3FFFFFFF,
+    0x1FFFFFFF,
+    0x0FFFFFFF,
+    0x07FFFFFF,
+    0x03FFFFFF,
+    0x01FFFFFF,
+    0x00FFFFFF,
+    //0x007FFFFF,
+    //0x003FFFFF,
+    //0x001FFFFF,
+    //0x000FFFFF,
+    //0x0007FFFF,
+    //0x0003FFFF,
+    //0x0001FFFF,
+    //0x0000FFFF,
+    //0x00007FFF,
+    //0x00003FFF,
+    //0x00001FFF,
+    //0x00000FFF,
+    //0x000007FF,
+    //0x000003FF,
+    //0x000001FF,
+    //0x000000FF,
+    //0x0000007F,
+    //0x0000003F,
+    //0x0000001F,
+    //0x0000000F,
+    //0x00000007,
+    //0x00000003
+};
+
+
+
diff --git a/float02/uart.c b/float02/uart.c
new file mode 100644
index 0000000..c877121
--- /dev/null
+++ b/float02/uart.c
@@ -0,0 +1,122 @@
+
+//-------------------------------------------------------------------------
+//-------------------------------------------------------------------------
+
+unsigned int myfun ( float x )
+{
+    return((unsigned int)x);
+}
+
+
+extern void PUT32 ( unsigned int, unsigned int );
+extern unsigned int GET32 ( unsigned int );
+extern void dummy ( unsigned int );
+
+#define GPFSEL1 0x20200004
+#define GPSET0  0x2020001C
+#define GPCLR0  0x20200028
+#define GPPUD       0x20200094
+#define GPPUDCLK0   0x20200098
+
+#define AUX_ENABLES     0x20215004
+#define AUX_MU_IO_REG   0x20215040
+#define AUX_MU_IER_REG  0x20215044
+#define AUX_MU_IIR_REG  0x20215048
+#define AUX_MU_LCR_REG  0x2021504C
+#define AUX_MU_MCR_REG  0x20215050
+#define AUX_MU_LSR_REG  0x20215054
+#define AUX_MU_MSR_REG  0x20215058
+#define AUX_MU_SCRATCH  0x2021505C
+#define AUX_MU_CNTL_REG 0x20215060
+#define AUX_MU_STAT_REG 0x20215064
+#define AUX_MU_BAUD_REG 0x20215068
+
+#define ARM_TIMER_LOD 0x2000B400
+#define ARM_TIMER_VAL 0x2000B404
+#define ARM_TIMER_CTL 0x2000B408
+#define ARM_TIMER_DIV 0x2000B41C
+#define ARM_TIMER_CNT 0x2000B420
+
+//GPIO14  TXD0 and TXD1
+//GPIO15  RXD0 and RXD1
+//alt function 5 for uart1
+//alt function 0 for uart0
+
+//((250,000,000/115200)/8)-1 = 270
+//------------------------------------------------------------------------
+void uart_putc ( unsigned int c )
+{
+    while(1)
+    {
+        if(GET32(AUX_MU_LSR_REG)&0x20) break;
+    }
+    PUT32(AUX_MU_IO_REG,c);
+}
+//------------------------------------------------------------------------
+void hexstrings ( unsigned int d )
+{
+    //unsigned int ra;
+    unsigned int rb;
+    unsigned int rc;
+
+    rb=32;
+    while(1)
+    {
+        rb-=4;
+        rc=(d>>rb)&0xF;
+        if(rc>9) rc+=0x37; else rc+=0x30;
+        uart_putc(rc);
+        if(rb==0) break;
+    }
+    uart_putc(0x20);
+}
+//------------------------------------------------------------------------
+void hexstring ( unsigned int d )
+{
+    hexstrings(d);
+    uart_putc(0x0D);
+    uart_putc(0x0A);
+}
+//------------------------------------------------------------------------
+int uart_init ( void )
+{
+    unsigned int ra;
+
+    PUT32(AUX_ENABLES,1);
+    PUT32(AUX_MU_IER_REG,0);
+    PUT32(AUX_MU_CNTL_REG,0);
+    PUT32(AUX_MU_LCR_REG,3);
+    PUT32(AUX_MU_MCR_REG,0);
+    PUT32(AUX_MU_IER_REG,0);
+    PUT32(AUX_MU_IIR_REG,0xC6);
+    PUT32(AUX_MU_BAUD_REG,270);
+
+    ra=GET32(GPFSEL1);
+    ra&=~(7<<12); //gpio14
+    ra|=2<<12;    //alt5
+    ra&=~(7<<15); //gpio15
+    ra|=2<<15;    //alt5
+    PUT32(GPFSEL1,ra);
+
+    PUT32(GPPUD,0);
+    for(ra=0;ra<150;ra++) dummy(ra);
+    PUT32(GPPUDCLK0,(1<<14)|(1<<15));
+    for(ra=0;ra<150;ra++) dummy(ra);
+    PUT32(GPPUDCLK0,0);
+
+    PUT32(AUX_MU_CNTL_REG,3);
+    return(0);
+}
+//-------------------------------------------------------------------------
+void init_timer ( void )
+{
+    PUT32(ARM_TIMER_CTL,0x00000000);
+    PUT32(ARM_TIMER_CTL,0x00000200);
+}
+//-------------------------------------------------------------------------
+unsigned int timer_tick ( void )
+{
+    return(GET32(ARM_TIMER_CNT));
+}
+//-------------------------------------------------------------------------
+//-------------------------------------------------------------------------
diff --git a/float02/uart.h b/float02/uart.h
new file mode 100644
index 0000000..e1cb489
--- /dev/null
+++ b/float02/uart.h
@@ -0,0 +1,11 @@
+
+//-------------------------------------------------------------------------
+//-------------------------------------------------------------------------
+void uart_putc ( unsigned int c );
+void hexstrings ( unsigned int d );
+void hexstring ( unsigned int d );
+int uart_init ( void );
+void init_timer ( void );
+unsigned int timer_tick ( void );
+//-------------------------------------------------------------------------
+//-------------------------------------------------------------------------
diff --git a/float02/vectors.s b/float02/vectors.s
new file mode 100644
index 0000000..450ab92
--- /dev/null
+++ b/float02/vectors.s
@@ -0,0 +1,110 @@
+
+.globl _start
+_start:
+    ldr pc,reset_handler
+    ldr pc,undefined_handler
+    ldr pc,swi_handler
+    ldr pc,prefetch_handler
+    ldr pc,data_handler
+    ldr pc,unused_handler
+    ldr pc,irq_handler
+    ldr pc,fiq_handler
+reset_handler:      .word reset
+undefined_handler:  .word undef
+swi_handler:        .word hang
+prefetch_handler:   .word hang
+data_handler:       .word hang
+unused_handler:     .word hang
+irq_handler:        .word hang
+fiq_handler:        .word hang
+
+reset:
+    mov r0,#0x8000
+    mov r1,#0x0000
+    ldmia r0!,{r2,r3,r4,r5,r6,r7,r8,r9}
+    stmia r1!,{r2,r3,r4,r5,r6,r7,r8,r9}
+    ldmia r0!,{r2,r3,r4,r5,r6,r7,r8,r9}
+    stmia r1!,{r2,r3,r4,r5,r6,r7,r8,r9}
+
+
+    ;@ enable fpu
+    mrc p15, 0, r0, c1, c0, 2
+    orr r0,r0,#0x300000 ;@ single precision
+    orr r0,r0,#0xC00000 ;@ double precision
+    mcr p15, 0, r0, c1, c0, 2
+    mov r0,#0x40000000
+    fmxr fpexc,r0
+
+
+
+    mov sp,#0x00200000
+    bl notmain
+hang: b hang
+
+
+    .word 0
+    .word 0
+undef: b .
+    .word 0
+    .word 0
+
+
+.globl PUT32
+PUT32:
+    str r1,[r0]
+    bx lr
+
+.globl GET32
+GET32:
+    ldr r0,[r0]
+    bx lr
+
+.globl dummy
+dummy:
+    bx lr
+
+
+.globl start_l1cache
+start_l1cache:
+    mov r0, #0
+    mcr p15, 0, r0, c7, c7, 0 ;@ invalidate caches
+    mcr p15, 0, r0, c8, c7, 0 ;@ invalidate tlb
+    mrc p15, 0, r0, c1, c0, 0
+    orr r0,r0,#0x1000 ;@ instruction
+    orr r0,r0,#0x0004 ;@ data
+    mcr p15, 0, r0, c1, c0, 0
+    bx lr
+
+.globl stop_l1cache
+stop_l1cache:
+    mrc p15, 0, r0, c1, c0, 0
+    bic r0,r0,#0x1000 ;@ instruction
+    bic r0,r0,#0x0004 ;@ data
+    mcr p15, 0, r0, c1, c0, 0
+    bx lr
+
+.globl m4add
+m4add:
+    vmov s0,r0
+    vmov s1,r1
+    vadd.f32 s2,s0,s1
+    vmov r0,s2
+    bx lr
+
+.globl m4ftoi
+m4ftoi:
+    vmov s0,r0
+    vcvt.s32.f32 s2,s0
+    vmov r0,s2
+    bx lr
+
+
+.globl m4itof
+m4itof:
+    vmov s0,r0
+    vcvt.f32.u32 s2,s0
+    vmov r0,s2
+    bx lr
+
+
+