Floats in Smaller C!

double is an alias for float. IOW, only 32-bit single precision floats are supported. This isn't conformant, but OK for some embedded systems (e.g. RetroBSD) and simple compilers like this. Also, the following operators are not supported with floats at the moment: ++, --, +=, -=, *=, /=. But +, -, *, /, =, ||, &&, ?:, !, comparison, casts, if/while/for are OK.
2016-01-16 22:45:42 -08:00
parent fa094d1744
commit bfc3125556
5 changed files with 2093 additions and 455 deletions
--- a/src/cmd/smlrc/Makefile
+++ b/src/cmd/smlrc/Makefile
@@ -32,5 +32,5 @@ install:        smlrc
 clean:
 		rm -f *.o smlrc smlrc.dis smlrc.elf
 ###
-smlrc.o: smlrc.c cgmips.c
+smlrc.o: smlrc.c fp.c cgmips.c
 		${CC} ${CFLAGS} -mips16 smlrc.c -c -o $@
--- a/src/cmd/smlrc/cgmips.c
+++ b/src/cmd/smlrc/cgmips.c
@@ -546,6 +546,11 @@ void GenFxnProlog(void)
    int i, cnt = CurFxnParamCntMax;
    if (cnt > 4)
      cnt = 4;
+    // TBD!!! for structure passing use the cumulative parameter size
+    // instead of the number of parameters. Currently this bug is masked
+    // by the subroutine that pushes structures on the stack (it copies
+    // all words except the first to the stack). But passing structures
+    // in registers from assembly code won't always work.
    for (i = 0; i < cnt; i++)
      GenPrintInstr2Operands(MipsInstrSW, 0,
                             MipsOpRegA0 + i, 0,
--- a/src/cmd/smlrc/fp.c
+++ b/src/cmd/smlrc/fp.c
@@ -0,0 +1,323 @@
+/*
+  Copyright (c) 2016, Alexey Frunze
+  2-clause BSD license.
+*/
+
+// This file implements the basic arithmetic functions on floats and
+// expects that the host representation of floats coinsides with the
+// target representation of floats and is IEEE 754 single precision.
+// It is also expected that the host implementation does not raise
+// any signals nor results in undefined behavior when handling
+// special cases involving infinities, NaNs and denormal numbers.
+
+// Make sure float and int are the same size, so they can be
+// copied one into another byte by byte with memcpy().
+extern char StAtIcAsSeRt[(sizeof(float) == sizeof(unsigned)) ? 1 : -1];
+
+// There can be up to 112 significant digits in a 32-bit single precision
+// floating point number (in the maximum positive denormal number).
+// We'll ignore any following digits, but convert these 112 with proper
+// rounding to even, which is enough for all floats representable exactly,
+// which should be enough for most practical purposes.
+#define MAX_CONST_DIGITS 112 // must be big enough for decimal/octal ints as well
+unsigned char ConstDigits[MAX_CONST_DIGITS];
+#define FP_MANT_BITS  23    // bits in mantissa, excluding the implicit 1
+#define FP_EXP_BITS   8     // bits in exponent
+#define FP_MIN_EXP    (-46) // from min denormal, ~1.40e-45
+                            // (-46 is chosen because we need to handle numbers
+                            // like 9e-46 which round up to a non-zero denormal
+                            // number ~1.40e-45)
+#define FP_MAX_EXP    38    // from max normal, ~3.40e+38
+#define FP_BUF_SIZE   (((((MAX_CONST_DIGITS-FP_MIN_EXP)*10+2)/3+FP_MANT_BITS+2)+7)/8)
+unsigned char ConstBinDigits[FP_BUF_SIZE];
+
+STATIC
+unsigned f2u(unsigned f)
+{
+  float ff;
+  memcpy(&ff, &f, sizeof ff);
+  if (ff != ff || ff <= -1.0f || ff >= (float)0x10000*0x10000/* 2**32 */)
+  {
+    warnFloat2Int();
+    ff = 0;
+  }
+  return ff;
+}
+
+STATIC
+unsigned f2i(unsigned f, int opsz)
+{
+  float ff;
+  int ovf = 0;
+  memcpy(&ff, &f, sizeof ff);
+  if (ff != ff)
+  {
+    ovf = 1;
+  }
+  else
+  {
+    switch (opsz)
+    {
+    case 1: // unsigned char
+      ovf = ff <= -1.0f || ff >= 256.0f;
+      break;
+    case 2: // unsigned short
+      ovf = ff <= -1.0f || ff >= 65536.0f;
+      break;
+    case -1: // signed char
+      ovf = ff <= -129.0f || ff >= 128.0f;
+      break;
+    case -2: // signed short
+      ovf = ff <= -32769.0f || ff >= 32768.0f;
+      break;
+    default: // signed int
+      ovf = ff < -(float)0x8000*0x10000/* -2**31 */ || ff >= (float)0x8000*0x10000/* 2**31 */;
+      break;
+    }
+  }
+  if (ovf)
+  {
+    warnFloat2Int();
+    ff = 0;
+  }
+  return (int)ff;
+}
+
+STATIC
+unsigned u2f(unsigned i)
+{
+  float f = i;
+  memcpy(&i, &f, sizeof i);
+  return i;
+}
+
+STATIC
+unsigned i2f(unsigned i)
+{
+  float f = (int)i;
+  memcpy(&i, &f, sizeof i);
+  return i;
+}
+
+STATIC
+unsigned fneg(unsigned f)
+{
+  float ff;
+  memcpy(&ff, &f, sizeof ff);
+  ff = -ff;
+  memcpy(&f, &ff, sizeof f);
+  return f;
+}
+
+STATIC
+unsigned fadd(unsigned fa, unsigned fb)
+{
+  float ffa, ffb;
+  memcpy(&ffa, &fa, sizeof ffa);
+  memcpy(&ffb, &fb, sizeof ffb);
+  ffa = ffa + ffb;
+  memcpy(&fa, &ffa, sizeof fa);
+  return fa;
+}
+
+STATIC
+unsigned fsub(unsigned fa, unsigned fb)
+{
+  return fadd(fa, fneg(fb));
+}
+
+STATIC
+unsigned fmul(unsigned fa, unsigned fb)
+{
+  float ffa, ffb;
+  memcpy(&ffa, &fa, sizeof ffa);
+  memcpy(&ffb, &fb, sizeof ffb);
+  ffa = ffa * ffb;
+  memcpy(&fa, &ffa, sizeof fa);
+  return fa;
+}
+
+STATIC
+unsigned fdiv(unsigned fa, unsigned fb)
+{
+  float ffa, ffb;
+  memcpy(&ffa, &fa, sizeof ffa);
+  memcpy(&ffb, &fb, sizeof ffb);
+  ffa = ffa / ffb;
+  memcpy(&fa, &ffa, sizeof fa);
+  return fa;
+}
+
+STATIC
+int fcmp(unsigned fa, unsigned fb, int nanRes)
+{
+  float ffa, ffb;
+  memcpy(&ffa, &fa, sizeof ffa);
+  memcpy(&ffb, &fb, sizeof ffb);
+  if (ffa != ffa || ffb != ffb)
+    return nanRes;
+  if (ffa < ffb)
+    return -1;
+  if (ffa > ffb)
+    return +1;
+  return 0;
+}
+
+STATIC
+void ChainMultiplyAdd(unsigned char* pChain,
+                      unsigned       ChainLen,
+                      unsigned char  Multiplier,
+                      unsigned char  Addend)
+{
+  unsigned carry = Addend;
+
+  while (ChainLen--)
+  {
+    carry += *pChain * Multiplier;
+    *pChain++ = carry & 0xFFu;
+    carry >>= 8;
+  }
+}
+
+STATIC
+void ChainDivide(unsigned char* pChain,
+                 unsigned       ChainLen,
+                 unsigned char  Divisor,
+                 unsigned char* pRemainder)
+{
+  unsigned remainder = 0;
+
+  while (ChainLen)
+  {
+    remainder += pChain[ChainLen - 1];
+    pChain[ChainLen - 1] = remainder / Divisor;
+    remainder = (remainder % Divisor) << 8;
+    ChainLen--;
+  }
+
+  if (pRemainder != NULL)
+    *pRemainder = remainder >> 8;
+}
+
+// Multiplies an integer (cnt decimal digits (0 to 9) from digits[]) by
+// 10**eexp, converts the product to a float and returns it as unsigned int.
+// This is an inefficient but straightforward algorithm with proper rounding.
+STATIC
+unsigned d2f(unsigned char* digits, int cnt, int eexp)
+{
+  unsigned numDecDigits;
+  unsigned denDecDigits;
+  unsigned numBinDigits;
+  unsigned numBytes;
+  int tmp;
+  unsigned char remainder;
+  int binExp = 0;
+  int inexact = 0;
+  int lastInexact = 0;
+  unsigned res;
+  
+  // 0?
+  if (cnt == 1 && *digits == 0)
+    return 0;
+  // less than the denormalized minimum?
+  if (eexp < FP_MIN_EXP - (cnt - 1))
+    return 0;
+
+  // greater than the normalized maximum?
+  if (eexp > FP_MAX_EXP - (cnt - 1))
+    return 0x7F800000; // +INF
+
+  numDecDigits = cnt + ((eexp >= 0) ? eexp : 0);
+  denDecDigits = 1 + ((eexp < 0) ? -eexp : 0);
+
+  // 10/3=3.3(3) > log2(10)~=3.32
+  if (eexp >= 0)
+  {
+    unsigned t1 = (numDecDigits * 10 + 2) / 3;
+    unsigned t2 = FP_MANT_BITS + 1;
+    numBinDigits = (t1 >= t2) ? t1 : t2;
+  }
+  else
+  {
+    unsigned t1 = (numDecDigits * 10 + 2) / 3;
+    unsigned t2 = (denDecDigits * 10 + 2) / 3 + FP_MANT_BITS + 1 + 1;
+    numBinDigits = (t1 >= t2) ? t1 : t2;
+  }
+
+  numBytes = (numBinDigits + 7) / 8;
+  if (numBytes > (unsigned)FP_BUF_SIZE)
+    errorInternal(200);
+  memset(ConstBinDigits, 0, numBytes);
+
+  // Convert the numerator to binary
+  for (tmp = 0; tmp < cnt; tmp++)
+    ChainMultiplyAdd(ConstBinDigits, numBytes, 10, ConstDigits[tmp]);
+  for (tmp = eexp; tmp > 0; tmp--)
+    ChainMultiplyAdd(ConstBinDigits, numBytes, 10, 0);
+
+  // If the denominator isn't 1, divide the numerator by the denominator
+  // getting at least FractionBitCnt+2 significant bits of quotient
+  if (eexp < 0)
+  {
+    binExp = -(int)(numBinDigits - (numDecDigits * 10 + 2) / 3);
+    for (tmp = binExp; tmp < 0; tmp++)
+      ChainMultiplyAdd(ConstBinDigits, numBytes, 2, 0);
+    for (tmp = eexp; tmp < 0; tmp++)
+      ChainDivide(ConstBinDigits, numBytes, 10, &remainder),
+      lastInexact = inexact, inexact |= !!remainder;
+  }
+
+  // Find the most significant bit and normalize the mantissa
+  // by shifting it left
+  for (tmp = numBytes - 1; tmp >= 0 && !ConstBinDigits[tmp]; tmp--);
+  if (tmp >= 0)
+  {
+    tmp = tmp * 8 + 7;
+    while (!(ConstBinDigits[tmp / 8] & (1 << tmp % 8))) tmp--;
+    while (tmp < FP_MANT_BITS)
+      ChainMultiplyAdd(ConstBinDigits, numBytes, 2, 0), binExp--, tmp++;
+  }
+
+  // Find the most significant bit and normalize the mantissa
+  // by shifting it right
+  do
+  {
+    remainder = 0;
+    for (tmp = numBytes - 1; tmp >= 0 && !ConstBinDigits[tmp]; tmp--);
+    if (tmp >= 0)
+    {
+      tmp = tmp * 8 + 7;
+      while (!(ConstBinDigits[tmp / 8] & (1 << tmp % 8))) tmp--;
+      while (tmp > FP_MANT_BITS)
+        ChainDivide(ConstBinDigits, numBytes, 2, &remainder),
+        lastInexact = inexact, inexact |= !!remainder, binExp++, tmp--;
+      while (binExp < 2 - (1 << (FP_EXP_BITS - 1)) - FP_MANT_BITS)
+        ChainDivide(ConstBinDigits, numBytes, 2, &remainder),
+        lastInexact = inexact, inexact |= !!remainder, binExp++;
+    }
+    // Round to nearest even
+    remainder &= (lastInexact | (ConstBinDigits[0] & 1));
+    if (remainder)
+      ChainMultiplyAdd(ConstBinDigits, numBytes, 1, 1);
+  } while (remainder);
+
+  // Collect the result's mantissa
+  res = 0;
+  while (tmp >= 0)
+  {
+    res <<= 8;
+    res |= ConstBinDigits[tmp / 8];
+    tmp -= 8;
+  }
+
+  // Collect the result's exponent
+  binExp += (1 << (FP_EXP_BITS - 1)) - 1 + FP_MANT_BITS;
+  if (!(res & (1u << FP_MANT_BITS))) binExp = 0; // subnormal or 0
+  res &= ~(1u << FP_MANT_BITS);
+  if (binExp >= (1 << FP_EXP_BITS) - 1)
+    binExp = (1 << FP_EXP_BITS) - 1, res = 0, inexact |= 1; // +INF
+  res |= (unsigned)binExp << FP_MANT_BITS;
+
+  return res;
+}
+
--- a/src/cmd/smlrc/lb.c
+++ b/src/cmd/smlrc/lb.c
@@ -1,5 +1,5 @@
 /*
-Copyright (c) 2013-2015, Alexey Frunze
+Copyright (c) 2013-2016, Alexey Frunze
 All rights reserved.

 Redistribution and use in source and binary forms, with or without
@@ -66,6 +66,10 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 extern int main(int argc, char** argv);
 void exit(int);

+#ifdef __SMALLER_C_32__
+extern void __x87init(void);
+#endif
+
 #ifdef _RETROBSD
 void __start__(int argc, char** argv)
 {
@@ -75,6 +79,7 @@ void __start__(int argc, char** argv)
 #ifdef _LINUX
 void __start__(int argc, char** argv)
 {
+  __x87init();
  exit(main(argc, argv));
 }
 #else
@@ -85,6 +90,9 @@ void __start__(void)
  int argc;
  char** argv;
  __setargs__(&argc, &argv);
+#ifdef __SMALLER_C_32__
+  __x87init();
+#endif
  exit(main(argc, argv));
 }
 #endif
@@ -371,6 +379,22 @@ void* memset(void* s, int c, unsigned n)
  return s;
 }

+#ifdef __SMALLER_C_32__
+int memcmp(void* s1, void* s2, unsigned n)
+{
+  if (n)
+  {
+    unsigned char *p1 = s1, *p2 = s2;
+    do
+    {
+      if (*p1++ != *p2++)
+        return (*--p1 - *--p2);
+    } while (--n);
+  }
+  return 0;
+}
+#endif
+
 int fputc(int c, FILE* stream);
 int putchar(int c);

@@ -1548,4 +1572,187 @@ int fsetpos(FILE* stream, fpos_t* pos)

  return 0;
 }
+
+#ifdef __SMALLER_C_32__
+#ifndef _RETROBSD
+
+#ifdef __HUGE__
+#define xbp "bp"
+#define xsp "sp"
+#else
+#define xbp "ebp"
+#define xsp "esp"
+#endif
+
+void __x87init(void)
+{
+  // Mask all exceptions, set rounding to nearest even and precision to 64 bits.
+  // TBD??? Actually check for x87???
+  asm("fninit");
+}
+
+float __floatsisf(int i)
+{
+  asm
+  (
+  "fild dword ["xbp"+8]\n"
+  "fstp dword ["xbp"+8]\n"
+  "mov  eax, ["xbp"+8]"
+  );
+}
+
+float __floatunsisf(unsigned i)
+{
+  asm
+  (
+  "push dword 0\n"
+  "push dword ["xbp"+8]\n"
+  "fild qword ["xbp"-8]\n" // load 32-bit unsigned int as 64-bit signed int
+  "add  "xsp", 8\n"
+  "fstp dword ["xbp"+8]\n"
+  "mov  eax, ["xbp"+8]"
+  );
+}
+
+int __fixsfsi(float f)
+{
+  asm
+  (
+  "sub    "xsp", 4\n"
+  "fnstcw ["xbp"-2]\n" // save rounding
+  "mov    ax, ["xbp"-2]\n"
+  "mov    ah, 0x0c\n" // rounding towards 0 (AKA truncate) since we don't have fisttp
+  "mov    ["xbp"-4], ax\n"
+  "fld    dword ["xbp"+8]\n"
+  "fldcw  ["xbp"-4]\n" // rounding = truncation
+  "fistp  dword ["xbp"+8]\n"
+  "fldcw  ["xbp"-2]\n" // restore rounding
+  "add    "xsp", 4\n"
+  "mov    eax, ["xbp"+8]"
+  );
+}
+
+unsigned __fixunssfsi(float f)
+{
+  asm
+  (
+  "sub    "xsp", 12\n"
+  "fnstcw ["xbp"-2]\n" // save rounding
+  "mov    ax, ["xbp"-2]\n"
+  "mov    ah, 0x0c\n" // rounding towards 0 (AKA truncate) since we don't have fisttp
+  "mov    ["xbp"-4], ax\n"
+  "fld    dword ["xbp"+8]\n"
+  "fldcw  ["xbp"-4]\n" // rounding = truncation
+  "fistp  qword ["xbp"-12]\n" // store 64-bit signed int
+  "fldcw  ["xbp"-2]\n" // restore rounding
+  "mov    eax, ["xbp"-12]\n" // take low 32 bits
+  "add    "xsp", 12"
+  );
+}
+
+float __addsf3(float a, float b)
+{
+  asm
+  (
+  "fld  dword ["xbp"+8]\n"
+  "fadd dword ["xbp"+12]\n"
+  "fstp dword ["xbp"+8]\n"
+  "mov  eax, ["xbp"+8]"
+  );
+}
+
+float __subsf3(float a, float b)
+{
+  asm
+  (
+  "fld  dword ["xbp"+8]\n"
+  "fsub dword ["xbp"+12]\n"
+  "fstp dword ["xbp"+8]\n"
+  "mov  eax, ["xbp"+8]"
+  );
+}
+
+float __negsf2(float f)
+{
+  asm
+  (
+  "mov eax, ["xbp"+8]\n"
+  "xor eax, 0x80000000"
+  );
+}
+
+float __mulsf3(float a, float b)
+{
+  asm
+  (
+  "fld  dword ["xbp"+8]\n"
+  "fmul dword ["xbp"+12]\n"
+  "fstp dword ["xbp"+8]\n"
+  "mov  eax, ["xbp"+8]"
+  );
+}
+
+float __divsf3(float a, float b)
+{
+  asm
+  (
+  "fld  dword ["xbp"+8]\n"
+  "fdiv dword ["xbp"+12]\n"
+  "fstp dword ["xbp"+8]\n"
+  "mov  eax, ["xbp"+8]"
+  );
+}
+
+float __lesf2(float a, float b)
+{
+  asm
+  (
+  "fld      dword ["xbp"+12]\n"
+  "fld      dword ["xbp"+8]\n"
+  "fucompp\n"
+  "fstsw    ax\n" // must use these moves since we don't have fucomip
+  "sahf\n"
+  "jnp      .ordered\n"
+  "mov      eax, +1\n" // return +1 if either a or b (or both) is a NaN
+  "jmp      .done\n"
+  ".ordered:\n"
+  "jnz      .unequal\n"
+  "xor      eax, eax\n" // return 0 if a == b
+  "jmp      .done\n"
+  ".unequal:\n"
+  "sbb      eax, eax\n"
+  "jc       .done\n"    // return -1 if a < b
+  "inc      eax\n"      // return +1 if a > b
+  ".done:"
+  );
+}
+
+float __gesf2(float a, float b)
+{
+  asm
+  (
+  "fld      dword ["xbp"+12]\n"
+  "fld      dword ["xbp"+8]\n"
+  "fucompp\n"
+  "fstsw    ax\n" // must use these moves since we don't have fucomip
+  "sahf\n"
+  "jnp      .ordered\n"
+  "mov      eax, -1\n" // return -1 if either a or b (or both) is a NaN
+  "jmp      .done\n"
+  ".ordered:\n"
+  "jnz      .unequal\n"
+  "xor      eax, eax\n" // return 0 if a == b
+  "jmp      .done\n"
+  ".unequal:\n"
+  "sbb      eax, eax\n"
+  "jc       .done\n"    // return -1 if a < b
+  "inc      eax\n"      // return +1 if a > b
+  ".done:"
+  );
+}
+
+#endif
+#endif
+
 #endif /*__APPLE__*/
+
--- a/src/cmd/smlrc/smlrc.c
+++ b/src/cmd/smlrc/smlrc.c