Merge pull request #69 from alexfru/master

Floats in Smaller C!
2016-01-16 23:35:14 -08:00
parent fa094d1744 bfc3125556
commit 4e90456341
5 changed files with 2093 additions and 455 deletions
--- a/src/cmd/smlrc/Makefile
+++ b/src/cmd/smlrc/Makefile
@@ -32,5 +32,5 @@ install:        smlrc
 clean:
 		rm -f *.o smlrc smlrc.dis smlrc.elf
 ###
-smlrc.o: smlrc.c cgmips.c
+smlrc.o: smlrc.c fp.c cgmips.c
 		${CC} ${CFLAGS} -mips16 smlrc.c -c -o $@
--- a/src/cmd/smlrc/cgmips.c
+++ b/src/cmd/smlrc/cgmips.c
@@ -546,6 +546,11 @@ void GenFxnProlog(void)
    int i, cnt = CurFxnParamCntMax;
    if (cnt > 4)
      cnt = 4;
+    // TBD!!! for structure passing use the cumulative parameter size
+    // instead of the number of parameters. Currently this bug is masked
+    // by the subroutine that pushes structures on the stack (it copies
+    // all words except the first to the stack). But passing structures
+    // in registers from assembly code won't always work.
    for (i = 0; i < cnt; i++)
      GenPrintInstr2Operands(MipsInstrSW, 0,
                             MipsOpRegA0 + i, 0,
--- a/src/cmd/smlrc/fp.c
+++ b/src/cmd/smlrc/fp.c
@@ -0,0 +1,323 @@
+/*
+  Copyright (c) 2016, Alexey Frunze
+  2-clause BSD license.
+*/
+
+// This file implements the basic arithmetic functions on floats and
+// expects that the host representation of floats coinsides with the
+// target representation of floats and is IEEE 754 single precision.
+// It is also expected that the host implementation does not raise
+// any signals nor results in undefined behavior when handling
+// special cases involving infinities, NaNs and denormal numbers.
+
+// Make sure float and int are the same size, so they can be
+// copied one into another byte by byte with memcpy().
+extern char StAtIcAsSeRt[(sizeof(float) == sizeof(unsigned)) ? 1 : -1];
+
+// There can be up to 112 significant digits in a 32-bit single precision
+// floating point number (in the maximum positive denormal number).
+// We'll ignore any following digits, but convert these 112 with proper
+// rounding to even, which is enough for all floats representable exactly,
+// which should be enough for most practical purposes.
+#define MAX_CONST_DIGITS 112 // must be big enough for decimal/octal ints as well
+unsigned char ConstDigits[MAX_CONST_DIGITS];
+#define FP_MANT_BITS  23    // bits in mantissa, excluding the implicit 1
+#define FP_EXP_BITS   8     // bits in exponent
+#define FP_MIN_EXP    (-46) // from min denormal, ~1.40e-45
+                            // (-46 is chosen because we need to handle numbers
+                            // like 9e-46 which round up to a non-zero denormal
+                            // number ~1.40e-45)
+#define FP_MAX_EXP    38    // from max normal, ~3.40e+38
+#define FP_BUF_SIZE   (((((MAX_CONST_DIGITS-FP_MIN_EXP)*10+2)/3+FP_MANT_BITS+2)+7)/8)
+unsigned char ConstBinDigits[FP_BUF_SIZE];
+
+STATIC
+unsigned f2u(unsigned f)
+{
+  float ff;
+  memcpy(&ff, &f, sizeof ff);
+  if (ff != ff || ff <= -1.0f || ff >= (float)0x10000*0x10000/* 2**32 */)
+  {
+    warnFloat2Int();
+    ff = 0;
+  }
+  return ff;
+}
+
+STATIC
+unsigned f2i(unsigned f, int opsz)
+{
+  float ff;
+  int ovf = 0;
+  memcpy(&ff, &f, sizeof ff);
+  if (ff != ff)
+  {
+    ovf = 1;
+  }
+  else
+  {
+    switch (opsz)
+    {
+    case 1: // unsigned char
+      ovf = ff <= -1.0f || ff >= 256.0f;
+      break;
+    case 2: // unsigned short
+      ovf = ff <= -1.0f || ff >= 65536.0f;
+      break;
+    case -1: // signed char
+      ovf = ff <= -129.0f || ff >= 128.0f;
+      break;
+    case -2: // signed short
+      ovf = ff <= -32769.0f || ff >= 32768.0f;
+      break;
+    default: // signed int
+      ovf = ff < -(float)0x8000*0x10000/* -2**31 */ || ff >= (float)0x8000*0x10000/* 2**31 */;
+      break;
+    }
+  }
+  if (ovf)
+  {
+    warnFloat2Int();
+    ff = 0;
+  }
+  return (int)ff;
+}
+
+STATIC
+unsigned u2f(unsigned i)
+{
+  float f = i;
+  memcpy(&i, &f, sizeof i);
+  return i;
+}
+
+STATIC
+unsigned i2f(unsigned i)
+{
+  float f = (int)i;
+  memcpy(&i, &f, sizeof i);
+  return i;
+}
+
+STATIC
+unsigned fneg(unsigned f)
+{
+  float ff;
+  memcpy(&ff, &f, sizeof ff);
+  ff = -ff;
+  memcpy(&f, &ff, sizeof f);
+  return f;
+}
+
+STATIC
+unsigned fadd(unsigned fa, unsigned fb)
+{
+  float ffa, ffb;
+  memcpy(&ffa, &fa, sizeof ffa);
+  memcpy(&ffb, &fb, sizeof ffb);
+  ffa = ffa + ffb;
+  memcpy(&fa, &ffa, sizeof fa);
+  return fa;
+}
+
+STATIC
+unsigned fsub(unsigned fa, unsigned fb)
+{
+  return fadd(fa, fneg(fb));
+}
+
+STATIC
+unsigned fmul(unsigned fa, unsigned fb)
+{
+  float ffa, ffb;
+  memcpy(&ffa, &fa, sizeof ffa);
+  memcpy(&ffb, &fb, sizeof ffb);
+  ffa = ffa * ffb;
+  memcpy(&fa, &ffa, sizeof fa);
+  return fa;
+}
+
+STATIC
+unsigned fdiv(unsigned fa, unsigned fb)
+{
+  float ffa, ffb;
+  memcpy(&ffa, &fa, sizeof ffa);
+  memcpy(&ffb, &fb, sizeof ffb);
+  ffa = ffa / ffb;
+  memcpy(&fa, &ffa, sizeof fa);
+  return fa;
+}
+
+STATIC
+int fcmp(unsigned fa, unsigned fb, int nanRes)
+{
+  float ffa, ffb;
+  memcpy(&ffa, &fa, sizeof ffa);
+  memcpy(&ffb, &fb, sizeof ffb);
+  if (ffa != ffa || ffb != ffb)
+    return nanRes;
+  if (ffa < ffb)
+    return -1;
+  if (ffa > ffb)
+    return +1;
+  return 0;
+}
+
+STATIC
+void ChainMultiplyAdd(unsigned char* pChain,
+                      unsigned       ChainLen,
+                      unsigned char  Multiplier,
+                      unsigned char  Addend)
+{
+  unsigned carry = Addend;
+
+  while (ChainLen--)
+  {
+    carry += *pChain * Multiplier;
+    *pChain++ = carry & 0xFFu;
+    carry >>= 8;
+  }
+}
+
+STATIC
+void ChainDivide(unsigned char* pChain,
+                 unsigned       ChainLen,
+                 unsigned char  Divisor,
+                 unsigned char* pRemainder)
+{
+  unsigned remainder = 0;
+
+  while (ChainLen)
+  {
+    remainder += pChain[ChainLen - 1];
+    pChain[ChainLen - 1] = remainder / Divisor;
+    remainder = (remainder % Divisor) << 8;
+    ChainLen--;
+  }
+
+  if (pRemainder != NULL)
+    *pRemainder = remainder >> 8;
+}
+
+// Multiplies an integer (cnt decimal digits (0 to 9) from digits[]) by
+// 10**eexp, converts the product to a float and returns it as unsigned int.
+// This is an inefficient but straightforward algorithm with proper rounding.
+STATIC
+unsigned d2f(unsigned char* digits, int cnt, int eexp)
+{
+  unsigned numDecDigits;
+  unsigned denDecDigits;
+  unsigned numBinDigits;
+  unsigned numBytes;
+  int tmp;
+  unsigned char remainder;
+  int binExp = 0;
+  int inexact = 0;
+  int lastInexact = 0;
+  unsigned res;
+  
+  // 0?
+  if (cnt == 1 && *digits == 0)
+    return 0;
+  // less than the denormalized minimum?
+  if (eexp < FP_MIN_EXP - (cnt - 1))
+    return 0;
+
+  // greater than the normalized maximum?
+  if (eexp > FP_MAX_EXP - (cnt - 1))
+    return 0x7F800000; // +INF
+
+  numDecDigits = cnt + ((eexp >= 0) ? eexp : 0);
+  denDecDigits = 1 + ((eexp < 0) ? -eexp : 0);
+
+  // 10/3=3.3(3) > log2(10)~=3.32
+  if (eexp >= 0)
+  {
+    unsigned t1 = (numDecDigits * 10 + 2) / 3;
+    unsigned t2 = FP_MANT_BITS + 1;
+    numBinDigits = (t1 >= t2) ? t1 : t2;
+  }
+  else
+  {
+    unsigned t1 = (numDecDigits * 10 + 2) / 3;
+    unsigned t2 = (denDecDigits * 10 + 2) / 3 + FP_MANT_BITS + 1 + 1;
+    numBinDigits = (t1 >= t2) ? t1 : t2;
+  }
+
+  numBytes = (numBinDigits + 7) / 8;
+  if (numBytes > (unsigned)FP_BUF_SIZE)
+    errorInternal(200);
+  memset(ConstBinDigits, 0, numBytes);
+
+  // Convert the numerator to binary
+  for (tmp = 0; tmp < cnt; tmp++)
+    ChainMultiplyAdd(ConstBinDigits, numBytes, 10, ConstDigits[tmp]);
+  for (tmp = eexp; tmp > 0; tmp--)
+    ChainMultiplyAdd(ConstBinDigits, numBytes, 10, 0);
+
+  // If the denominator isn't 1, divide the numerator by the denominator
+  // getting at least FractionBitCnt+2 significant bits of quotient
+  if (eexp < 0)
+  {
+    binExp = -(int)(numBinDigits - (numDecDigits * 10 + 2) / 3);
+    for (tmp = binExp; tmp < 0; tmp++)
+      ChainMultiplyAdd(ConstBinDigits, numBytes, 2, 0);
+    for (tmp = eexp; tmp < 0; tmp++)
+      ChainDivide(ConstBinDigits, numBytes, 10, &remainder),
+      lastInexact = inexact, inexact |= !!remainder;
+  }
+
+  // Find the most significant bit and normalize the mantissa
+  // by shifting it left
+  for (tmp = numBytes - 1; tmp >= 0 && !ConstBinDigits[tmp]; tmp--);
+  if (tmp >= 0)
+  {
+    tmp = tmp * 8 + 7;
+    while (!(ConstBinDigits[tmp / 8] & (1 << tmp % 8))) tmp--;
+    while (tmp < FP_MANT_BITS)
+      ChainMultiplyAdd(ConstBinDigits, numBytes, 2, 0), binExp--, tmp++;
+  }
+
+  // Find the most significant bit and normalize the mantissa
+  // by shifting it right
+  do
+  {
+    remainder = 0;
+    for (tmp = numBytes - 1; tmp >= 0 && !ConstBinDigits[tmp]; tmp--);
+    if (tmp >= 0)
+    {
+      tmp = tmp * 8 + 7;
+      while (!(ConstBinDigits[tmp / 8] & (1 << tmp % 8))) tmp--;
+      while (tmp > FP_MANT_BITS)
+        ChainDivide(ConstBinDigits, numBytes, 2, &remainder),
+        lastInexact = inexact, inexact |= !!remainder, binExp++, tmp--;
+      while (binExp < 2 - (1 << (FP_EXP_BITS - 1)) - FP_MANT_BITS)
+        ChainDivide(ConstBinDigits, numBytes, 2, &remainder),
+        lastInexact = inexact, inexact |= !!remainder, binExp++;
+    }
+    // Round to nearest even
+    remainder &= (lastInexact | (ConstBinDigits[0] & 1));
+    if (remainder)
+      ChainMultiplyAdd(ConstBinDigits, numBytes, 1, 1);
+  } while (remainder);
+
+  // Collect the result's mantissa
+  res = 0;
+  while (tmp >= 0)
+  {
+    res <<= 8;
+    res |= ConstBinDigits[tmp / 8];
+    tmp -= 8;
+  }
+
+  // Collect the result's exponent
+  binExp += (1 << (FP_EXP_BITS - 1)) - 1 + FP_MANT_BITS;
+  if (!(res & (1u << FP_MANT_BITS))) binExp = 0; // subnormal or 0
+  res &= ~(1u << FP_MANT_BITS);
+  if (binExp >= (1 << FP_EXP_BITS) - 1)
+    binExp = (1 << FP_EXP_BITS) - 1, res = 0, inexact |= 1; // +INF
+  res |= (unsigned)binExp << FP_MANT_BITS;
+
+  return res;
+}
+
--- a/src/cmd/smlrc/lb.c
+++ b/src/cmd/smlrc/lb.c
@@ -1,5 +1,5 @@
 /*
-Copyright (c) 2013-2015, Alexey Frunze
+Copyright (c) 2013-2016, Alexey Frunze
 All rights reserved.

 Redistribution and use in source and binary forms, with or without
@@ -66,6 +66,10 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 extern int main(int argc, char** argv);
 void exit(int);

+#ifdef __SMALLER_C_32__
+extern void __x87init(void);
+#endif
+
 #ifdef _RETROBSD
 void __start__(int argc, char** argv)
 {
@@ -75,6 +79,7 @@ void __start__(int argc, char** argv)
 #ifdef _LINUX
 void __start__(int argc, char** argv)
 {
+  __x87init();
  exit(main(argc, argv));
 }
 #else
@@ -85,6 +90,9 @@ void __start__(void)
  int argc;
  char** argv;
  __setargs__(&argc, &argv);
+#ifdef __SMALLER_C_32__
+  __x87init();
+#endif
  exit(main(argc, argv));
 }
 #endif
@@ -371,6 +379,22 @@ void* memset(void* s, int c, unsigned n)
  return s;
 }

+#ifdef __SMALLER_C_32__
+int memcmp(void* s1, void* s2, unsigned n)
+{
+  if (n)
+  {
+    unsigned char *p1 = s1, *p2 = s2;
+    do
+    {
+      if (*p1++ != *p2++)
+        return (*--p1 - *--p2);
+    } while (--n);
+  }
+  return 0;
+}
+#endif
+
 int fputc(int c, FILE* stream);
 int putchar(int c);

@@ -1548,4 +1572,187 @@ int fsetpos(FILE* stream, fpos_t* pos)

  return 0;
 }
+
+#ifdef __SMALLER_C_32__
+#ifndef _RETROBSD
+
+#ifdef __HUGE__
+#define xbp "bp"
+#define xsp "sp"
+#else
+#define xbp "ebp"
+#define xsp "esp"
+#endif
+
+void __x87init(void)
+{
+  // Mask all exceptions, set rounding to nearest even and precision to 64 bits.
+  // TBD??? Actually check for x87???
+  asm("fninit");
+}
+
+float __floatsisf(int i)
+{
+  asm
+  (
+  "fild dword ["xbp"+8]\n"
+  "fstp dword ["xbp"+8]\n"
+  "mov  eax, ["xbp"+8]"
+  );
+}
+
+float __floatunsisf(unsigned i)
+{
+  asm
+  (
+  "push dword 0\n"
+  "push dword ["xbp"+8]\n"
+  "fild qword ["xbp"-8]\n" // load 32-bit unsigned int as 64-bit signed int
+  "add  "xsp", 8\n"
+  "fstp dword ["xbp"+8]\n"
+  "mov  eax, ["xbp"+8]"
+  );
+}
+
+int __fixsfsi(float f)
+{
+  asm
+  (
+  "sub    "xsp", 4\n"
+  "fnstcw ["xbp"-2]\n" // save rounding
+  "mov    ax, ["xbp"-2]\n"
+  "mov    ah, 0x0c\n" // rounding towards 0 (AKA truncate) since we don't have fisttp
+  "mov    ["xbp"-4], ax\n"
+  "fld    dword ["xbp"+8]\n"
+  "fldcw  ["xbp"-4]\n" // rounding = truncation
+  "fistp  dword ["xbp"+8]\n"
+  "fldcw  ["xbp"-2]\n" // restore rounding
+  "add    "xsp", 4\n"
+  "mov    eax, ["xbp"+8]"
+  );
+}
+
+unsigned __fixunssfsi(float f)
+{
+  asm
+  (
+  "sub    "xsp", 12\n"
+  "fnstcw ["xbp"-2]\n" // save rounding
+  "mov    ax, ["xbp"-2]\n"
+  "mov    ah, 0x0c\n" // rounding towards 0 (AKA truncate) since we don't have fisttp
+  "mov    ["xbp"-4], ax\n"
+  "fld    dword ["xbp"+8]\n"
+  "fldcw  ["xbp"-4]\n" // rounding = truncation
+  "fistp  qword ["xbp"-12]\n" // store 64-bit signed int
+  "fldcw  ["xbp"-2]\n" // restore rounding
+  "mov    eax, ["xbp"-12]\n" // take low 32 bits
+  "add    "xsp", 12"
+  );
+}
+
+float __addsf3(float a, float b)
+{
+  asm
+  (
+  "fld  dword ["xbp"+8]\n"
+  "fadd dword ["xbp"+12]\n"
+  "fstp dword ["xbp"+8]\n"
+  "mov  eax, ["xbp"+8]"
+  );
+}
+
+float __subsf3(float a, float b)
+{
+  asm
+  (
+  "fld  dword ["xbp"+8]\n"
+  "fsub dword ["xbp"+12]\n"
+  "fstp dword ["xbp"+8]\n"
+  "mov  eax, ["xbp"+8]"
+  );
+}
+
+float __negsf2(float f)
+{
+  asm
+  (
+  "mov eax, ["xbp"+8]\n"
+  "xor eax, 0x80000000"
+  );
+}
+
+float __mulsf3(float a, float b)
+{
+  asm
+  (
+  "fld  dword ["xbp"+8]\n"
+  "fmul dword ["xbp"+12]\n"
+  "fstp dword ["xbp"+8]\n"
+  "mov  eax, ["xbp"+8]"
+  );
+}
+
+float __divsf3(float a, float b)
+{
+  asm
+  (
+  "fld  dword ["xbp"+8]\n"
+  "fdiv dword ["xbp"+12]\n"
+  "fstp dword ["xbp"+8]\n"
+  "mov  eax, ["xbp"+8]"
+  );
+}
+
+float __lesf2(float a, float b)
+{
+  asm
+  (
+  "fld      dword ["xbp"+12]\n"
+  "fld      dword ["xbp"+8]\n"
+  "fucompp\n"
+  "fstsw    ax\n" // must use these moves since we don't have fucomip
+  "sahf\n"
+  "jnp      .ordered\n"
+  "mov      eax, +1\n" // return +1 if either a or b (or both) is a NaN
+  "jmp      .done\n"
+  ".ordered:\n"
+  "jnz      .unequal\n"
+  "xor      eax, eax\n" // return 0 if a == b
+  "jmp      .done\n"
+  ".unequal:\n"
+  "sbb      eax, eax\n"
+  "jc       .done\n"    // return -1 if a < b
+  "inc      eax\n"      // return +1 if a > b
+  ".done:"
+  );
+}
+
+float __gesf2(float a, float b)
+{
+  asm
+  (
+  "fld      dword ["xbp"+12]\n"
+  "fld      dword ["xbp"+8]\n"
+  "fucompp\n"
+  "fstsw    ax\n" // must use these moves since we don't have fucomip
+  "sahf\n"
+  "jnp      .ordered\n"
+  "mov      eax, -1\n" // return -1 if either a or b (or both) is a NaN
+  "jmp      .done\n"
+  ".ordered:\n"
+  "jnz      .unequal\n"
+  "xor      eax, eax\n" // return 0 if a == b
+  "jmp      .done\n"
+  ".unequal:\n"
+  "sbb      eax, eax\n"
+  "jc       .done\n"    // return -1 if a < b
+  "inc      eax\n"      // return +1 if a > b
+  ".done:"
+  );
+}
+
+#endif
+#endif
+
 #endif /*__APPLE__*/
+
--- a/src/cmd/smlrc/smlrc.c
+++ b/src/cmd/smlrc/smlrc.c