Files
ldc/backend/cod1.c
Alexey Prokhin caad8cde58 Squashed 'dmd2/' content from commit 10017d5
git-subtree-dir: dmd2
git-subtree-split: 10017d50eaaff4ecdc37a0153b6c37ea0b004c81
2012-04-05 11:10:48 +04:00

3991 lines
135 KiB
C

// Copyright (C) 1984-1998 by Symantec
// Copyright (C) 2000-2012 by Digital Mars
// All Rights Reserved
// http://www.digitalmars.com
// Written by Walter Bright
/*
* This source file is made available for personal use
* only. The license is in /dmd/src/dmd/backendlicense.txt
* or /dm/src/dmd/backendlicense.txt
* For any other uses, please contact Digital Mars.
*/
#if !SPP
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <time.h>
#if __sun&&__SVR4
#include <alloca.h>
#endif
#include "cc.h"
#include "el.h"
#include "oper.h"
#include "code.h"
#include "global.h"
#include "type.h"
#include "xmm.h"
static char __file__[] = __FILE__; /* for tassert.h */
#include "tassert.h"
/* Generate the appropriate ESC instruction */
#define ESC(MF,b) (0xD8 + ((MF) << 1) + (b))
enum MF
{ // Values for MF
MFfloat = 0,
MFlong = 1,
MFdouble = 2,
MFword = 3
};
code * genf2(code *c,unsigned op,unsigned rm);
targ_size_t paramsize(elem *e,unsigned stackalign);
STATIC code * funccall (elem *,unsigned,unsigned,regm_t *,regm_t);
/* array to convert from index register to r/m field */
/* AX CX DX BX SP BP SI DI */
static const signed char regtorm32[8] = { 0, 1, 2, 3,-1, 5, 6, 7 };
signed char regtorm [8] = { -1,-1,-1, 7,-1, 6, 4, 5 };
/**************************
* Determine if e is a 32 bit scaled index addressing mode.
* Returns:
* 0 not a scaled index addressing mode
* !=0 the value for ss in the SIB byte
*/
int isscaledindex(elem *e)
{ targ_uns ss;
assert(!I16);
while (e->Eoper == OPcomma)
e = e->E2;
if (!(e->Eoper == OPshl && !e->Ecount &&
e->E2->Eoper == OPconst &&
(ss = e->E2->EV.Vuns) <= 3
)
)
ss = 0;
return ss;
}
/*********************************************
* Generate code for which isscaledindex(e) returned a non-zero result.
*/
code *cdisscaledindex(elem *e,regm_t *pidxregs,regm_t keepmsk)
{ code *c;
regm_t r;
// Load index register with result of e->E1
c = NULL;
while (e->Eoper == OPcomma)
{
r = 0;
c = cat(c,scodelem(e->E1,&r,keepmsk,TRUE));
freenode(e);
e = e->E2;
}
assert(e->Eoper == OPshl);
c = cat(c,scodelem(e->E1,pidxregs,keepmsk,TRUE));
freenode(e->E2);
freenode(e);
return c;
}
/***********************************
* Determine index if we can do two LEA instructions as a multiply.
* Returns:
* 0 can't do it
*/
static struct Ssindex
{
targ_uns product;
char ss1;
char ss2;
char ssflags;
#define SSFLnobp 1 // can't have EBP in relconst
#define SSFLnobase1 2 // no base register for first LEA
#define SSFLnobase 4 // no base register
#define SSFLlea 8 // can do it in one LEA
} ssindex_array[] =
{ {0, 0,0}, // [0] is a place holder
{3, 1,0,SSFLnobp | SSFLlea},
{5, 2,0,SSFLnobp | SSFLlea},
{9, 3,0,SSFLnobp | SSFLlea},
{6, 1,1,SSFLnobase},
{12,1,2,SSFLnobase},
{24,1,3,SSFLnobase},
{10,2,1,SSFLnobase},
{20,2,2,SSFLnobase},
{40,2,3,SSFLnobase},
{18,3,1,SSFLnobase},
{36,3,2,SSFLnobase},
{72,3,3,SSFLnobase},
{15,2,1,SSFLnobp},
{25,2,2,SSFLnobp},
{27,3,1,SSFLnobp},
{45,3,2,SSFLnobp},
{81,3,3,SSFLnobp},
{16,3,1,SSFLnobase1 | SSFLnobase},
{32,3,2,SSFLnobase1 | SSFLnobase},
{64,3,3,SSFLnobase1 | SSFLnobase},
};
int ssindex(int op,targ_uns product)
{ int i;
if (op == OPshl)
product = 1 << product;
for (i = 1; i < arraysize(ssindex_array); i++)
{
if (ssindex_array[i].product == product)
return i;
}
return 0;
}
/***************************************
* Build an EA of the form disp[base][index*scale].
* Input:
* c struct to fill in
* base base register (-1 if none)
* index index register (-1 if none)
* scale scale factor - 1,2,4,8
* disp displacement
*/
void buildEA(code *c,int base,int index,int scale,targ_size_t disp)
{ unsigned char rm;
unsigned char sib;
unsigned char rex = 0;
sib = 0;
if (!I16)
{ unsigned ss;
assert(index != SP);
switch (scale)
{ case 1: ss = 0; break;
case 2: ss = 1; break;
case 4: ss = 2; break;
case 8: ss = 3; break;
default: assert(0);
}
if (base == -1)
{
if (index == -1)
rm = modregrm(0,0,5);
else
{
rm = modregrm(0,0,4);
sib = modregrm(ss,index & 7,5);
if (index & 8)
rex |= REX_X;
}
}
else if (index == -1)
{
if (base == SP)
{
rm = modregrm(2,0,4);
sib = modregrm(0,4,SP);
}
else
{ rm = modregrm(2,0,base & 7);
if (base & 8)
{ rex |= REX_B;
if (base == R12)
{
rm = modregrm(2,0,4);
sib = modregrm(0,4,4);
}
}
}
}
else
{
rm = modregrm(2,0,4);
sib = modregrm(ss,index & 7,base & 7);
if (index & 8)
rex |= REX_X;
if (base & 8)
rex |= REX_B;
}
}
else
{
// -1 AX CX DX BX SP BP SI DI
static unsigned char EA16rm[9][9] =
{
{ 0x06,0x09,0x09,0x09,0x87,0x09,0x86,0x84,0x85, }, // -1
{ 0x09,0x09,0x09,0x09,0x09,0x09,0x09,0x09,0x09, }, // AX
{ 0x09,0x09,0x09,0x09,0x09,0x09,0x09,0x09,0x09, }, // CX
{ 0x09,0x09,0x09,0x09,0x09,0x09,0x09,0x09,0x09, }, // DX
{ 0x87,0x09,0x09,0x09,0x09,0x09,0x09,0x80,0x81, }, // BX
{ 0x09,0x09,0x09,0x09,0x09,0x09,0x09,0x09,0x09, }, // SP
{ 0x86,0x09,0x09,0x09,0x09,0x09,0x09,0x82,0x83, }, // BP
{ 0x84,0x09,0x09,0x09,0x80,0x09,0x82,0x09,0x09, }, // SI
{ 0x85,0x09,0x09,0x09,0x81,0x09,0x83,0x09,0x09, } // DI
};
assert(scale == 1);
rm = EA16rm[base + 1][index + 1];
assert(rm != 9);
}
c->Irm = rm;
c->Isib = sib;
c->Irex = rex;
c->IFL1 = FLconst;
c->IEV1.Vuns = disp;
}
/*********************************************
* Build REX, modregrm and sib bytes
*/
unsigned buildModregrm(int mod, int reg, int rm)
{ unsigned m;
if (I16)
m = modregrm(mod, reg, rm);
else
{
if ((rm & 7) == SP && mod != 3)
m = (modregrm(0,4,SP) << 8) | modregrm(mod,reg & 7,4);
else
m = modregrm(mod,reg & 7,rm & 7);
if (reg & 8)
m |= REX_R << 16;
if (rm & 8)
m |= REX_B << 16;
}
return m;
}
/****************************************
* Generate code for eecontext
*/
void genEEcode()
{ regm_t retregs;
code *c;
eecontext.EEin++;
regcon.immed.mval = 0;
retregs = 0; //regmask(eecontext.EEelem->Ety);
assert(EEoffset >= REGSIZE);
c = genc2(NULL,0x81,modregrm(3,5,SP),EEoffset - REGSIZE); // SUB ESP,EEoffset
gen1(c,0x50 + SI); // PUSH ESI
genadjesp(c,EEoffset);
c = gencodelem(c,eecontext.EEelem,&retregs, FALSE);
assignaddrc(c);
pinholeopt(c,NULL);
jmpaddr(c);
eecontext.EEcode = gen1(c,0xCC); // INT 3
eecontext.EEin--;
}
/********************************************
* Gen a save/restore sequence for mask of registers.
*/
void gensaverestore2(regm_t regm,code **csave,code **crestore)
{
code *cs1 = *csave;
code *cs2 = *crestore;
//printf("gensaverestore2(%s)\n", regm_str(regm));
regm &= mBP | mES | ALLREGS | XMMREGS | mST0 | mST01;
for (int i = 0; regm; i++)
{
if (regm & 1)
{
if (i == ES)
{
cs1 = gen1(cs1, 0x06); // PUSH ES
cs2 = cat(gen1(CNIL, 0x07),cs2); // POP ES
}
else if (i == ST0 || i == ST01)
{
gensaverestore87(1 << i, &cs1, &cs2);
}
else if (i >= XMM0)
{ unsigned idx;
cs1 = regsave.save(cs1, i, &idx);
cs2 = regsave.restore(cs2, i, idx);
}
else
{
cs1 = gen1(cs1,0x50 + (i & 7)); // PUSH i
code *c = gen1(NULL, 0x58 + (i & 7)); // POP i
if (i & 8)
{ code_orrex(cs1, REX_B);
code_orrex(c, REX_B);
}
cs2 = cat(c,cs2);
}
}
regm >>= 1;
}
*csave = cs1;
*crestore = cs2;
}
void gensaverestore(regm_t regm,code **csave,code **crestore)
{
*csave = NULL;
*crestore = NULL;
gensaverestore2(regm, csave, crestore);
}
/****************************************
* Clean parameters off stack.
* Input:
* numpara amount to adjust stack pointer
* keepmsk mask of registers to not destroy
*/
code *genstackclean(code *c,unsigned numpara,regm_t keepmsk)
{
//dbg_printf("genstackclean(numpara = %d, stackclean = %d)\n",numpara,cgstate.stackclean);
if (numpara && (cgstate.stackclean || STACKALIGN == 16))
{
#if 0 // won't work if operand of scodelem
if (numpara == stackpush && // if this is all those pushed
needframe && // and there will be a BP
!config.windows &&
!(regcon.mvar & fregsaved) // and no registers will be pushed
)
c = genregs(c,0x89,BP,SP); // MOV SP,BP
else
#endif
{ regm_t scratchm = 0;
if (numpara == REGSIZE && config.flags4 & CFG4space)
{
scratchm = ALLREGS & ~keepmsk & regcon.used & ~regcon.mvar;
}
if (scratchm)
{ unsigned r;
c = cat(c,allocreg(&scratchm,&r,TYint));
c = gen1(c,0x58 + r); // POP r
}
else
{ c = genc2(c,0x81,modregrm(3,0,SP),numpara); // ADD SP,numpara
if (I64)
code_orrex(c, REX_W);
}
}
stackpush -= numpara;
c = genadjesp(c,-numpara);
}
return c;
}
/*********************************
* Generate code for a logical expression.
* Input:
* e elem
* jcond
* bit 1 if TRUE then goto jump address if e
* if FALSE then goto jump address if !e
* 2 don't call save87()
* fltarg FLcode or FLblock, flavor of target if e evaluates to jcond
* targ either code or block pointer to destination
*/
code *logexp(elem *e,int jcond,unsigned fltarg,code *targ)
{ code *c,*ce,*cnop;
regm_t retregs;
unsigned op;
//printf("logexp(e = %p, jcond = %d)\n", e, jcond);
int no87 = (jcond & 2) == 0;
_chkstack();
code *cc = docommas(&e); // scan down commas
cgstate.stackclean++;
if (EOP(e) && !e->Ecount) /* if operator and not common sub */
{ con_t regconsave;
switch (e->Eoper)
{ case OPoror:
if (jcond & 1)
{ c = logexp(e->E1,jcond,fltarg,targ);
regconsave = regcon;
ce = logexp(e->E2,jcond,fltarg,targ);
}
else
{ cnop = gennop(CNIL);
c = logexp(e->E1,jcond | 1,FLcode,cnop);
regconsave = regcon;
ce = logexp(e->E2,jcond,fltarg,targ);
ce = cat(ce,cnop);
}
cnop = CNIL;
goto L1;
case OPandand:
if (jcond & 1)
{ cnop = gennop(CNIL); /* a dummy target address */
c = logexp(e->E1,jcond & ~1,FLcode,cnop);
regconsave = regcon;
ce = logexp(e->E2,jcond,fltarg,targ);
}
else
{ c = logexp(e->E1,jcond,fltarg,targ);
regconsave = regcon;
ce = logexp(e->E2,jcond,fltarg,targ);
cnop = CNIL;
}
L1: andregcon(&regconsave);
freenode(e);
c = cat4(cc,c,ce,cnop);
goto Lret;
case OPnot:
jcond ^= 1;
case OPbool:
case OPs8_16:
case OPu8_16:
case OPs16_32:
case OPu16_32:
case OPs32_64:
case OPu32_64:
case OPu32_d:
case OPd_ld:
c = logexp(e->E1,jcond,fltarg,targ);
freenode(e);
goto Lretc;
case OPcond:
{
code *cnop2 = gennop(CNIL); // addresses of start of leaves
cnop = gennop(CNIL);
c = logexp(e->E1,FALSE,FLcode,cnop2); /* eval condition */
con_t regconold = regcon;
ce = logexp(e->E2->E1,jcond,fltarg,targ);
ce = genjmp(ce,JMP,FLcode,(block *) cnop); /* skip second leaf */
regconsave = regcon;
regcon = regconold;
code_next(cnop2) = logexp(e->E2->E2,jcond,fltarg,targ);
andregcon(&regconold);
andregcon(&regconsave);
freenode(e->E2);
freenode(e);
c = cat6(cc,c,NULL,ce,cnop2,cnop);
goto Lret;
}
}
}
/* Special code for signed long compare.
* Not necessary for I64 until we do cents.
*/
if (OTrel2(e->Eoper) && /* if < <= >= > */
!e->Ecount &&
( (I16 && tybasic(e->E1->Ety) == TYlong && tybasic(e->E2->Ety) == TYlong) ||
(I32 && tybasic(e->E1->Ety) == TYllong && tybasic(e->E2->Ety) == TYllong))
)
{
c = longcmp(e,jcond,fltarg,targ);
goto Lretc;
}
retregs = mPSW; /* return result in flags */
op = jmpopcode(e); /* get jump opcode */
if (!(jcond & 1))
op ^= 0x101; // toggle jump condition(s)
c = codelem(e,&retregs,TRUE); /* evaluate elem */
if (no87)
c = cat(c,cse_flush(no87)); // flush CSE's to memory
genjmp(c,op,fltarg,(block *) targ); /* generate jmp instruction */
Lretc:
c = cat(cc,c);
Lret:
cgstate.stackclean--;
return c;
}
/******************************
* Routine to aid in setting things up for gen().
* Look for common subexpression.
* Can handle indirection operators, but not if they're common subs.
* Input:
* e -> elem where we get some of the data from
* cs -> partially filled code to add
* op = opcode
* reg = reg field of (mod reg r/m)
* offset = data to be added to Voffset field
* keepmsk = mask of registers we must not destroy
* desmsk = mask of registers destroyed by executing the instruction
* Returns:
* pointer to code generated
*/
code *loadea(elem *e,code *cs,unsigned op,unsigned reg,targ_size_t offset,
regm_t keepmsk,regm_t desmsk)
{
code *c,*cg,*cd;
#ifdef DEBUG
if (debugw)
printf("loadea: e=%p cs=%p op=x%x reg=%d offset=%lld keepmsk=x%x desmsk=x%x\n",
e,cs,op,reg,(unsigned long long)offset,keepmsk,desmsk);
#endif
assert(e);
cs->Iflags = 0;
cs->Irex = 0;
cs->Iop = op;
tym_t tym = e->Ety;
int sz = tysize(tym);
/* Determine if location we want to get is in a register. If so, */
/* substitute the register for the EA. */
/* Note that operators don't go through this. CSE'd operators are */
/* picked up by comsub(). */
if (e->Ecount && /* if cse */
e->Ecount != e->Ecomsub && /* and cse was generated */
op != 0x8D && op != 0xC4 && /* and not an LEA or LES */
(op != 0xFF || reg != 3) && /* and not CALLF MEM16 */
(op & 0xFFF8) != 0xD8) // and not 8087 opcode
{
assert(!EOP(e)); /* can't handle this */
regm_t rm = regcon.cse.mval & ~regcon.cse.mops & ~regcon.mvar; // possible regs
if (sz > REGSIZE) // value is in 2 or 4 registers
{
if (I16 && sz == 8) // value is in 4 registers
{ static regm_t rmask[4] = { mDX,mCX,mBX,mAX };
rm &= rmask[offset >> 1];
}
else if (offset)
rm &= mMSW; /* only high words */
else
rm &= mLSW; /* only low words */
}
for (unsigned i = 0; rm; i++)
{ if (mask[i] & rm)
{ if (regcon.cse.value[i] == e && // if register has elem
/* watch out for a CWD destroying DX */
!(i == DX && op == 0xF7 && desmsk & mDX))
{
/* if ES, then it can only be a load */
if (i == ES)
{ if (op != 0x8B)
goto L1; /* not a load */
cs->Iop = 0x8C; /* MOV reg,ES */
cs->Irm = modregrm(3,0,reg & 7);
if (reg & 8)
code_orrex(cs, REX_B);
}
else // XXX reg,i
{
cs->Irm = modregrm(3,reg & 7,i & 7);
if (reg & 8)
cs->Irex |= REX_R;
if (i & 8)
cs->Irex |= REX_B;
if (sz == 1 && I64 && (i >= 4 || reg >= 4))
cs->Irex |= REX;
if (I64 && (sz == 8 || sz == 16))
cs->Irex |= REX_W;
}
c = CNIL;
goto L2;
}
rm &= ~mask[i];
}
}
}
L1:
c = getlvalue(cs,e,keepmsk);
if (offset == REGSIZE)
getlvalue_msw(cs);
else
cs->IEVoffset1 += offset;
if (I64)
{ if (reg >= 4 && sz == 1) // if byte register
// Can only address those 8 bit registers if a REX byte is present
cs->Irex |= REX;
if ((op & 0xFFFFFFF8) == 0xD8)
cs->Irex &= ~REX_W; // not needed for x87 ops
}
code_newreg(cs, reg); // OR in reg field
if (!I16)
{
if (reg == 6 && op == 0xFF || /* don't PUSH a word */
op == 0x0FB7 || op == 0x0FBF || /* MOVZX/MOVSX */
(op & 0xFFF8) == 0xD8 || /* 8087 instructions */
op == 0x8D) /* LEA */
{
cs->Iflags &= ~CFopsize;
if (reg == 6 && op == 0xFF) // if PUSH
cs->Irex &= ~REX_W; // REX is ignored for PUSH anyway
}
}
else if ((op & 0xFFF8) == 0xD8 && ADDFWAIT())
cs->Iflags |= CFwait;
L2:
cg = getregs(desmsk); /* save any regs we destroy */
/* KLUDGE! fix up DX for divide instructions */
cd = CNIL;
if (op == 0xF7 && desmsk == (mAX|mDX)) /* if we need to fix DX */
{ if (reg == 7) /* if IDIV */
{ cd = gen1(cd,0x99); // CWD
if (I64 && sz == 8)
code_orrex(cd, REX_W);
}
else if (reg == 6) // if DIV
{ cd = genregs(cd,0x33,DX,DX); // XOR DX,DX
if (I64 && sz == 8)
code_orrex(cd, REX_W);
}
}
// Eliminate MOV reg,reg
if ((cs->Iop & ~3) == 0x88 &&
(cs->Irm & 0xC7) == modregrm(3,0,reg & 7))
{
unsigned r = cs->Irm & 7;
if (cs->Irex & REX_B)
r |= 8;
if (r == reg)
cs->Iop = NOP;
}
return cat4(c,cg,cd,gen(NULL,cs));
}
/**************************
* Get addressing mode.
*/
unsigned getaddrmode(regm_t idxregs)
{
unsigned mode;
if (I16)
{
mode = (idxregs & mBX) ? modregrm(2,0,7) : /* [BX] */
(idxregs & mDI) ? modregrm(2,0,5): /* [DI] */
(idxregs & mSI) ? modregrm(2,0,4): /* [SI] */
(assert(0),1);
}
else
{ unsigned reg = findreg(idxregs & (ALLREGS | mBP));
if (reg == R12)
mode = (REX_B << 16) | (modregrm(0,4,4) << 8) | modregrm(2,0,4);
else
mode = modregrmx(2,0,reg);
}
return mode;
}
void setaddrmode(code *c, regm_t idxregs)
{
unsigned mode = getaddrmode(idxregs);
c->Irm = mode & 0xFF;
c->Isib = mode >> 8;
c->Irex &= ~REX_B;
c->Irex |= mode >> 16;
}
/**********************************************
*/
void getlvalue_msw(code *c)
{
if (c->IFL1 == FLreg)
{
unsigned regmsw = c->IEVsym1->Sregmsw;
c->Irm = (c->Irm & ~7) | (regmsw & 7);
if (regmsw & 8)
c->Irex |= REX_B;
else
c->Irex &= ~REX_B;
}
else
c->IEVoffset1 += REGSIZE;
}
/**********************************************
*/
void getlvalue_lsw(code *c)
{
if (c->IFL1 == FLreg)
{
unsigned reglsw = c->IEVsym1->Sreglsw;
c->Irm = (c->Irm & ~7) | (reglsw & 7);
if (reglsw & 8)
c->Irex |= REX_B;
else
c->Irex &= ~REX_B;
}
else
c->IEVoffset1 -= REGSIZE;
}
/******************
* Compute addressing mode.
* Generate & return sequence of code (if any).
* Return in cs the info on it.
* Input:
* pcs -> where to store data about addressing mode
* e -> the lvalue elem
* keepmsk mask of registers we must not destroy or use
* if (keepmsk & RMstore), this will be only a store operation
* into the lvalue
* if (keepmsk & RMload), this will be a read operation only
*/
code *getlvalue(code *pcs,elem *e,regm_t keepmsk)
{ regm_t idxregs;
unsigned fl,f,opsave;
code *c;
elem *e1;
elem *e11;
elem *e12;
bool e1isadd,e1free;
unsigned reg;
tym_t e1ty;
symbol *s;
//printf("getlvalue(e = %p)\n",e);
//elem_print(e);
assert(e);
elem_debug(e);
if (e->Eoper == OPvar || e->Eoper == OPrelconst)
{ s = e->EV.sp.Vsym;
fl = s->Sfl;
if (tyfloating(s->ty()))
obj_fltused();
}
else
fl = FLoper;
pcs->IFL1 = fl;
pcs->Iflags = CFoff; /* only want offsets */
pcs->Irex = 0;
pcs->IEVoffset1 = 0;
tym_t ty = e->Ety;
unsigned sz = tysize(ty);
if (tyfloating(ty))
obj_fltused();
if (I64 && (sz == 8 || sz == 16))
pcs->Irex |= REX_W;
if (!I16 && sz == SHORTSIZE)
pcs->Iflags |= CFopsize;
if (ty & mTYvolatile)
pcs->Iflags |= CFvolatile;
c = CNIL;
switch (fl)
{
#if 0 && TARGET_LINUX
case FLgot:
case FLgotoff:
gotref = 1;
pcs->IEVsym1 = s;
pcs->IEVoffset1 = e->EV.sp.Voffset;
if (e->Eoper == OPvar && fl == FLgot)
{
code *c1;
unsigned saveop = pcs->Iop;
idxregs = allregs & ~keepmsk; // get a scratch register
c = allocreg(&idxregs,&reg,TYptr);
pcs->Irm = modregrm(2,reg,BX); // BX has GOT
pcs->Isib = 0;
//pcs->Iflags |= CFvolatile;
pcs->Iop = 0x8B;
c = gen(c,pcs); // MOV reg,disp[EBX]
pcs->Irm = modregrm(0,0,reg);
pcs->IEVoffset1 = 0;
pcs->Iop = saveop;
}
else
{
pcs->Irm = modregrm(2,0,BX); // disp[EBX] is addr
pcs->Isib = 0;
}
break;
#endif
case FLoper:
#ifdef DEBUG
if (debugw) printf("getlvalue(e = %p, km = x%x)\n",e,keepmsk);
#endif
switch (e->Eoper)
{
case OPadd: // this way when we want to do LEA
e1 = e;
e1free = FALSE;
e1isadd = TRUE;
break;
case OPind:
case OPpostinc: // when doing (*p++ = ...)
case OPpostdec: // when doing (*p-- = ...)
case OPbt:
case OPbtc:
case OPbtr:
case OPbts:
e1 = e->E1;
e1free = TRUE;
e1isadd = e1->Eoper == OPadd;
break;
default:
#ifdef DEBUG
elem_print(e);
#endif
assert(0);
}
e1ty = tybasic(e1->Ety);
if (e1isadd)
{ e12 = e1->E2;
e11 = e1->E1;
}
/* First see if we can replace *(e+&v) with
* MOV idxreg,e
* EA = [ES:] &v+idxreg
*/
f = FLconst;
if (e1isadd &&
((e12->Eoper == OPrelconst
#if TARGET_SEGMENTED
&& (f = el_fl(e12)) != FLfardata
#endif
) ||
(e12->Eoper == OPconst && !I16 && !e1->Ecount && (!I64 || el_signx32(e12)))) &&
!(I64 && config.flags3 & CFG3pic) &&
e1->Ecount == e1->Ecomsub &&
#if TARGET_SEGMENTED
(!e1->Ecount || (~keepmsk & ALLREGS & mMSW) || (e1ty != TYfptr && e1ty != TYhptr)) &&
#endif
tysize(e11->Ety) == REGSIZE
)
{ unsigned char t; /* component of r/m field */
int ss;
int ssi;
#if !TARGET_SEGMENTED
if (e12->Eoper == OPrelconst)
f = el_fl(e12);
#endif
/*assert(datafl[f]);*/ /* what if addr of func? */
if (!I16)
{ /* Any register can be an index register */
regm_t idxregs = allregs & ~keepmsk;
assert(idxregs);
/* See if e1->E1 can be a scaled index */
ss = isscaledindex(e11);
if (ss)
{
/* Load index register with result of e11->E1 */
c = cdisscaledindex(e11,&idxregs,keepmsk);
reg = findreg(idxregs);
{
t = stackfl[f] ? 2 : 0;
pcs->Irm = modregrm(t,0,4);
pcs->Isib = modregrm(ss,reg & 7,5);
if (reg & 8)
pcs->Irex |= REX_X;
}
}
else if ((e11->Eoper == OPmul || e11->Eoper == OPshl) &&
!e11->Ecount &&
e11->E2->Eoper == OPconst &&
(ssi = ssindex(e11->Eoper,e11->E2->EV.Vuns)) != 0
)
{
regm_t scratchm;
#if 0 && TARGET_LINUX
assert(f != FLgot && f != FLgotoff);
#endif
char ssflags = ssindex_array[ssi].ssflags;
if (ssflags & SSFLnobp && stackfl[f])
goto L6;
// Load index register with result of e11->E1
c = scodelem(e11->E1,&idxregs,keepmsk,TRUE);
reg = findreg(idxregs);
int ss1 = ssindex_array[ssi].ss1;
if (ssflags & SSFLlea)
{
assert(!stackfl[f]);
pcs->Irm = modregrm(2,0,4);
pcs->Isib = modregrm(ss1,reg & 7,reg & 7);
if (reg & 8)
pcs->Irex |= REX_X | REX_B;
}
else
{ int rbase;
unsigned r;
scratchm = ALLREGS & ~keepmsk;
c = cat(c,allocreg(&scratchm,&r,TYint));
if (ssflags & SSFLnobase1)
{ t = 0;
rbase = 5;
}
else
{ t = 0;
rbase = reg;
if (rbase == BP || rbase == R13)
{ static unsigned imm32[4] = {1+1,2+1,4+1,8+1};
// IMUL r,BP,imm32
c = genc2(c,0x69,modregxrmx(3,r,rbase),imm32[ss1]);
goto L7;
}
}
c = gen2sib(c,0x8D,modregxrm(t,r,4),modregrm(ss1,reg & 7,rbase & 7));
if (reg & 8)
code_orrex(c, REX_X);
if (rbase & 8)
code_orrex(c, REX_B);
if (I64)
code_orrex(c, REX_W);
if (ssflags & SSFLnobase1)
{ code_last(c)->IFL1 = FLconst;
code_last(c)->IEV1.Vuns = 0;
}
L7:
if (ssflags & SSFLnobase)
{ t = stackfl[f] ? 2 : 0;
rbase = 5;
}
else
{ t = 2;
rbase = r;
assert(rbase != BP);
}
pcs->Irm = modregrm(t,0,4);
pcs->Isib = modregrm(ssindex_array[ssi].ss2,r & 7,rbase & 7);
if (r & 8)
pcs->Irex |= REX_X;
if (rbase & 8)
pcs->Irex |= REX_B;
}
freenode(e11->E2);
freenode(e11);
}
else
{
L6:
/* Load index register with result of e11 */
c = scodelem(e11,&idxregs,keepmsk,TRUE);
setaddrmode(pcs, idxregs);
#if 0 && TARGET_LINUX
if (e12->EV.sp.Vsym->Sfl == FLgot || e12->EV.sp.Vsym->Sfl == FLgotoff)
{
gotref = 1;
#if 1
reg = findreg(idxregs & (ALLREGS | mBP));
pcs->Irm = modregrm(2,0,4);
pcs->Isib = modregrm(0,reg,BX);
#else
pcs->Isib = modregrm(0,pcs->Irm,BX);
pcs->Irm = modregrm(2,0,4);
#endif
}
else
#endif
if (stackfl[f]) /* if we need [EBP] too */
{ unsigned idx = pcs->Irm & 7;
if (pcs->Irex & REX_B)
pcs->Irex = (pcs->Irex & ~REX_B) | REX_X;
pcs->Isib = modregrm(0,idx,BP);
pcs->Irm = modregrm(2,0,4);
}
}
}
else
{
idxregs = IDXREGS & ~keepmsk; /* only these can be index regs */
assert(idxregs);
#if 0 && TARGET_LINUX
assert(f != FLgot && f != FLgotoff);
#endif
if (stackfl[f]) /* if stack data type */
{ idxregs &= mSI | mDI; /* BX can't index off stack */
if (!idxregs) goto L1; /* index regs aren't avail */
t = 6; /* [BP+SI+disp] */
}
else
t = 0; /* [SI + disp] */
c = scodelem(e11,&idxregs,keepmsk,TRUE); /* load idx reg */
pcs->Irm = getaddrmode(idxregs) ^ t;
}
if (f == FLpara)
refparam = TRUE;
else if (f == FLauto || f == FLtmp || f == FLbprel || f == FLfltreg)
reflocal = TRUE;
#if TARGET_SEGMENTED
else if (f == FLcsdata || tybasic(e12->Ety) == TYcptr)
pcs->Iflags |= CFcs;
#endif
else
assert(f != FLreg);
pcs->IFL1 = f;
if (f != FLconst)
pcs->IEVsym1 = e12->EV.sp.Vsym;
pcs->IEVoffset1 = e12->EV.sp.Voffset; /* += ??? */
/* If e1 is a CSE, we must generate an addressing mode */
/* but also leave EA in registers so others can use it */
if (e1->Ecount)
{ unsigned flagsave;
idxregs = IDXREGS & ~keepmsk;
c = cat(c,allocreg(&idxregs,&reg,TYoffset));
#if TARGET_SEGMENTED
/* If desired result is a far pointer, we'll have */
/* to load another register with the segment of v */
if (e1ty == TYfptr)
{
unsigned msreg;
idxregs |= mMSW & ALLREGS & ~keepmsk;
c = cat(c,allocreg(&idxregs,&msreg,TYfptr));
msreg = findregmsw(idxregs);
/* MOV msreg,segreg */
c = genregs(c,0x8C,segfl[f],msreg);
}
#endif
opsave = pcs->Iop;
flagsave = pcs->Iflags;
pcs->Iop = 0x8D;
code_newreg(pcs, reg);
if (!I16)
pcs->Iflags &= ~CFopsize;
if (I64)
pcs->Irex |= REX_W;
c = gen(c,pcs); /* LEA idxreg,EA */
cssave(e1,idxregs,TRUE);
if (!I16)
pcs->Iflags = flagsave;
if (stackfl[f] && (config.wflags & WFssneds)) // if pointer into stack
pcs->Iflags |= CFss; // add SS: override
pcs->Iop = opsave;
pcs->IFL1 = FLoffset;
pcs->IEV1.Vuns = 0;
setaddrmode(pcs, idxregs);
}
freenode(e12);
if (e1free)
freenode(e1);
goto Lptr;
}
L1:
/* The rest of the cases could be a far pointer */
idxregs = (I16 ? IDXREGS : allregs) & ~keepmsk; // only these can be index regs
assert(idxregs);
if (!I16 &&
(sz == REGSIZE || (I64 && sz == 4)) &&
keepmsk & RMstore)
idxregs |= regcon.mvar;
#if TARGET_SEGMENTED
switch (e1ty)
{ case TYfptr: /* if far pointer */
case TYhptr:
idxregs = (mES | IDXREGS) & ~keepmsk; // need segment too
assert(idxregs & mES);
pcs->Iflags |= CFes; /* ES segment override */
break;
case TYsptr: /* if pointer to stack */
if (config.wflags & WFssneds) // if SS != DS
pcs->Iflags |= CFss; /* then need SS: override */
break;
case TYcptr: /* if pointer to code */
pcs->Iflags |= CFcs; /* then need CS: override */
break;
}
#endif
pcs->IFL1 = FLoffset;
pcs->IEV1.Vuns = 0;
/* see if we can replace *(e+c) with
* MOV idxreg,e
* [MOV ES,segment]
* EA = [ES:]c[idxreg]
*/
if (e1isadd && e12->Eoper == OPconst &&
(!I64 || el_signx32(e12)) &&
(tysize(e12->Ety) == REGSIZE || (I64 && tysize(e12->Ety) == 4)) &&
(!e1->Ecount || !e1free)
)
{ int ss;
pcs->IEV1.Vuns = e12->EV.Vuns;
freenode(e12);
if (e1free) freenode(e1);
if (!I16 && e11->Eoper == OPadd && !e11->Ecount &&
tysize(e11->Ety) == REGSIZE)
{
e12 = e11->E2;
e11 = e11->E1;
e1 = e1->E1;
e1free = TRUE;
goto L4;
}
if (!I16 && (ss = isscaledindex(e11)) != 0)
{ // (v * scale) + const
c = cdisscaledindex(e11,&idxregs,keepmsk);
reg = findreg(idxregs);
pcs->Irm = modregrm(0,0,4);
pcs->Isib = modregrm(ss,reg & 7,5);
if (reg & 8)
pcs->Irex |= REX_X;
}
else
{
c = scodelem(e11,&idxregs,keepmsk,TRUE); // load index reg
setaddrmode(pcs, idxregs);
}
goto Lptr;
}
/* Look for *(v1 + v2)
* EA = [v1][v2]
*/
if (!I16 && e1isadd && (!e1->Ecount || !e1free) &&
(tysize[e1ty] == REGSIZE || (I64 && tysize[e1ty] == 4)))
{ code *c2;
regm_t idxregs2;
unsigned base,index;
int ss;
L4:
// Look for *(v1 + v2 << scale)
ss = isscaledindex(e12);
if (ss)
{
c = scodelem(e11,&idxregs,keepmsk,TRUE);
idxregs2 = allregs & ~(idxregs | keepmsk);
c2 = cdisscaledindex(e12,&idxregs2,keepmsk | idxregs);
}
// Look for *(v1 << scale + v2)
else if ((ss = isscaledindex(e11)) != 0)
{
idxregs2 = idxregs;
c = cdisscaledindex(e11,&idxregs2,keepmsk);
idxregs = allregs & ~(idxregs2 | keepmsk);
c2 = scodelem(e12,&idxregs,keepmsk | idxregs2,TRUE);
}
// Look for *(((v1 << scale) + c1) + v2)
else if (e11->Eoper == OPadd && !e11->Ecount &&
e11->E2->Eoper == OPconst &&
(ss = isscaledindex(e11->E1)) != 0
)
{
pcs->IEV1.Vuns = e11->E2->EV.Vuns;
idxregs2 = idxregs;
c = cdisscaledindex(e11->E1,&idxregs2,keepmsk);
idxregs = allregs & ~(idxregs2 | keepmsk);
c2 = scodelem(e12,&idxregs,keepmsk | idxregs2,TRUE);
freenode(e11->E2);
freenode(e11);
}
else
{
c = scodelem(e11,&idxregs,keepmsk,TRUE);
idxregs2 = allregs & ~(idxregs | keepmsk);
c2 = scodelem(e12,&idxregs2,keepmsk | idxregs,TRUE);
}
c = cat(c,c2);
base = findreg(idxregs);
index = findreg(idxregs2);
pcs->Irm = modregrm(2,0,4);
pcs->Isib = modregrm(ss,index & 7,base & 7);
if (index & 8)
pcs->Irex |= REX_X;
if (base & 8)
pcs->Irex |= REX_B;
if (e1free) freenode(e1);
goto Lptr;
}
/* give up and replace *e1 with
* MOV idxreg,e
* EA = 0[idxreg]
* pinholeopt() will usually correct the 0, we need it in case
* we have a pointer to a long and need an offset to the second
* word.
*/
assert(e1free);
c = scodelem(e1,&idxregs,keepmsk,TRUE); /* load index register */
setaddrmode(pcs, idxregs);
Lptr:
if (config.flags3 & CFG3ptrchk)
cod3_ptrchk(&c,pcs,keepmsk); // validate pointer code
break;
case FLdatseg:
assert(0);
#if 0
pcs->Irm = modregrm(0,0,BPRM);
pcs->IEVpointer1 = e->EVpointer;
break;
#endif
case FLfltreg:
reflocal = TRUE;
pcs->Irm = modregrm(2,0,BPRM);
pcs->IEV1.Vint = 0;
break;
case FLreg:
goto L2;
case FLpara:
refparam = TRUE;
pcs->Irm = modregrm(2,0,BPRM);
goto L2;
case FLauto:
if (s->Sclass == SCfastpar && regcon.params & mask[s->Spreg])
{
if (keepmsk & RMload)
{
if (sz == REGSIZE) // could this be (sz <= REGSIZE) ?
{
pcs->Irm = modregrm(3,0,s->Spreg & 7);
if (s->Spreg & 8)
pcs->Irex |= REX_B;
regcon.used |= mask[s->Spreg];
break;
}
}
else
regcon.params &= ~mask[s->Spreg];
}
case FLtmp:
case FLbprel:
reflocal = TRUE;
pcs->Irm = modregrm(2,0,BPRM);
goto L2;
case FLextern:
if (s->Sident[0] == '_' && memcmp(s->Sident + 1,"tls_array",10) == 0)
{
#if TARGET_LINUX || TARGET_FREEBSD || TARGET_OPENBSD || TARGET_SOLARIS
// Rewrite as GS:[0000], or FS:[0000] for 64 bit
if (I64)
{
pcs->Irm = modregrm(0, 0, 4);
pcs->Isib = modregrm(0, 4, 5); // don't use [RIP] addressing
pcs->IFL1 = FLconst;
pcs->IEV1.Vuns = 0;
pcs->Iflags = CFfs;
pcs->Irex |= REX_W;
}
else
{
pcs->Irm = modregrm(0, 0, BPRM);
pcs->IFL1 = FLconst;
pcs->IEV1.Vuns = 0;
pcs->Iflags = CFgs;
}
break;
#else
pcs->Iflags |= CFfs; // add FS: override
#endif
}
#if TARGET_SEGMENTED
if (s->ty() & mTYcs && LARGECODE)
goto Lfardata;
#endif
goto L3;
case FLdata:
case FLudata:
#if TARGET_SEGMENTED
case FLcsdata:
#endif
#if TARGET_LINUX || TARGET_OSX || TARGET_FREEBSD || TARGET_OPENBSD || TARGET_SOLARIS
case FLgot:
case FLgotoff:
case FLtlsdata:
#endif
L3:
pcs->Irm = modregrm(0,0,BPRM);
L2:
if (fl == FLreg)
{
#ifdef DEBUG
if (!(s->Sregm & regcon.mvar)) symbol_print(s);
#endif
assert(s->Sregm & regcon.mvar);
/* Attempting to paint a float as an integer or an integer as a float
* will cause serious problems since the EA is loaded separatedly from
* the opcode. The only way to deal with this is to prevent enregistering
* such variables.
*/
if (tyxmmreg(ty) && !(s->Sregm & XMMREGS) ||
!tyxmmreg(ty) && (s->Sregm & XMMREGS))
cgreg_unregister(s->Sregm);
if (
s->Sclass == SCregpar ||
s->Sclass == SCparameter)
{ refparam = TRUE;
reflocal = TRUE; // kludge to set up prolog
}
pcs->Irm = modregrm(3,0,s->Sreglsw & 7);
if (s->Sreglsw & 8)
pcs->Irex |= REX_B;
if (e->EV.sp.Voffset == 1 && sz == 1)
{ assert(s->Sregm & BYTEREGS);
assert(s->Sreglsw < 4);
pcs->Irm |= 4; // use 2nd byte of register
}
else
{ assert(!e->EV.sp.Voffset);
if (I64 && sz == 1 && s->Sreglsw >= 4)
pcs->Irex |= REX;
}
}
#if TARGET_SEGMENTED
else if (s->ty() & mTYcs && !(fl == FLextern && LARGECODE))
{
pcs->Iflags |= CFcs | CFoff;
}
#endif
#if TARGET_LINUX || TARGET_OSX || TARGET_FREEBSD || TARGET_OPENBSD || TARGET_SOLARIS
if (I64 && config.flags3 & CFG3pic &&
(fl == FLtlsdata || s->ty() & mTYthread))
{
pcs->Iflags |= CFopsize;
pcs->Irex = 0x48;
}
#endif
pcs->IEVsym1 = s;
pcs->IEVoffset1 = e->EV.sp.Voffset;
if (sz == 1)
{ /* Don't use SI or DI for this variable */
s->Sflags |= GTbyte;
if (e->EV.sp.Voffset > 1)
s->Sflags &= ~GTregcand;
}
else if (e->EV.sp.Voffset)
s->Sflags &= ~GTregcand;
if (!(keepmsk & RMstore)) // if not store only
{ s->Sflags |= SFLread; // assume we are doing a read
}
break;
case FLpseudo:
#if MARS
assert(0);
#else
{
unsigned u = s->Sreglsw;
c = getregs(pseudomask[u]);
pcs->Irm = modregrm(3,0,pseudoreg[u] & 7);
break;
}
#endif
#if TARGET_SEGMENTED
case FLfardata:
#endif
case FLfunc: /* reading from code seg */
if (config.exe & EX_flat)
goto L3;
Lfardata:
{
regm_t regm = ALLREGS & ~keepmsk; // need scratch register
code *c1 = allocreg(&regm,&reg,TYint);
/* MOV mreg,seg of symbol */
c = gencs(CNIL,0xB8 + reg,0,FLextern,s);
c->Iflags = CFseg;
c = gen2(c,0x8E,modregrmx(3,0,reg)); /* MOV ES,reg */
c = cat3(c1,getregs(mES),c);
pcs->Iflags |= CFes | CFoff; /* ES segment override */
goto L3;
}
case FLstack:
assert(!I16);
pcs->Irm = modregrm(2,0,4);
pcs->Isib = modregrm(0,4,SP);
pcs->IEVsym1 = s;
pcs->IEVoffset1 = e->EV.sp.Voffset;
break;
default:
#ifdef DEBUG
WRFL((enum FL)fl);
symbol_print(s);
#endif
assert(0);
}
return c;
}
/*****************************
* Given an opcode and EA in cs, generate code
* for each floating register in turn.
* Input:
* tym either TYdouble or TYfloat
*/
code *fltregs(code *pcs,tym_t tym)
{ code *c;
assert(!I64);
tym = tybasic(tym);
if (I32)
{
c = getregs((tym == TYfloat) ? mAX : mAX | mDX);
if (tym != TYfloat)
{
pcs->IEVoffset1 += REGSIZE;
NEWREG(pcs->Irm,DX);
c = gen(c,pcs);
pcs->IEVoffset1 -= REGSIZE;
}
NEWREG(pcs->Irm,AX);
c = gen(c,pcs);
}
else
{
c = getregs((tym == TYfloat) ? FLOATREGS_16 : DOUBLEREGS_16);
pcs->IEVoffset1 += (tym == TYfloat) ? 2 : 6;
if (tym == TYfloat)
NEWREG(pcs->Irm,DX);
else
NEWREG(pcs->Irm,AX);
c = gen(c,pcs);
pcs->IEVoffset1 -= 2;
if (tym == TYfloat)
NEWREG(pcs->Irm,AX);
else
NEWREG(pcs->Irm,BX);
gen(c,pcs);
if (tym != TYfloat)
{ pcs->IEVoffset1 -= 2;
NEWREG(pcs->Irm,CX);
gen(c,pcs);
pcs->IEVoffset1 -= 2; /* note that exit is with Voffset unaltered */
NEWREG(pcs->Irm,DX);
gen(c,pcs);
}
}
return c;
}
/*****************************
* Given a result in registers, test it for TRUE or FALSE.
* Will fail if TYfptr and the reg is ES!
* If saveflag is TRUE, preserve the contents of the
* registers.
*/
code *tstresult(regm_t regm,tym_t tym,unsigned saveflag)
{
unsigned scrreg; /* scratch register */
regm_t scrregm;
#ifdef DEBUG
//if (!(regm & (mBP | ALLREGS)))
// printf("tstresult(regm = %s, tym = x%x, saveflag = %d)\n",
// regm_str(regm),tym,saveflag);
#endif
assert(regm & (XMMREGS | mBP | ALLREGS));
tym = tybasic(tym);
code *ce = CNIL;
unsigned reg = findreg(regm);
unsigned sz = tysize[tym];
if (sz == 1)
{ assert(regm & BYTEREGS);
ce = genregs(ce,0x84,reg,reg); // TEST regL,regL
if (I64 && reg >= 4)
code_orrex(ce, REX);
return ce;
}
if (regm & XMMREGS)
{
unsigned xreg;
regm_t xregs = XMMREGS & ~regm;
ce = allocreg(&xregs, &xreg, TYdouble);
unsigned op = 0;
if (tym == TYdouble || tym == TYidouble || tym == TYcdouble)
op = 0x660000;
ce = gen2(ce,op | 0x0F57,modregrm(3,xreg-XMM0,xreg-XMM0)); // XORPS xreg,xreg
gen2(ce,op | 0x0F2E,modregrm(3,xreg-XMM0,reg-XMM0)); // UCOMISS xreg,reg
if (tym == TYcfloat || tym == TYcdouble)
{ code *cnop = gennop(CNIL);
genjmp(ce,JNE,FLcode,(block *) cnop); // JNE L1
genjmp(ce,JP, FLcode,(block *) cnop); // JP L1
reg = findreg(regm & ~mask[reg]);
gen2(ce,op | 0x0F2E,modregrm(3,xreg-XMM0,reg-XMM0)); // UCOMISS xreg,reg
ce = cat(ce, cnop);
}
return ce;
}
if (sz <= REGSIZE)
{
if (!I16)
{
if (tym == TYfloat)
{ if (saveflag)
{
scrregm = allregs & ~regm; /* possible scratch regs */
ce = allocreg(&scrregm,&scrreg,TYoffset); /* allocate scratch reg */
ce = genmovreg(ce,scrreg,reg); /* MOV scrreg,msreg */
reg = scrreg;
}
ce = cat(ce,getregs(mask[reg]));
return gen2(ce,0xD1,modregrmx(3,4,reg)); // SHL reg,1
}
ce = gentstreg(ce,reg); // TEST reg,reg
if (sz == SHORTSIZE)
ce->Iflags |= CFopsize; /* 16 bit operands */
else if (sz == 8)
code_orrex(ce, REX_W);
}
else
ce = gentstreg(ce,reg); // TEST reg,reg
return ce;
}
if (saveflag || tyfv(tym))
{
scrregm = ALLREGS & ~regm; /* possible scratch regs */
ce = allocreg(&scrregm,&scrreg,TYoffset); /* allocate scratch reg */
if (I32 || sz == REGSIZE * 2)
{ code *c;
assert(regm & mMSW && regm & mLSW);
reg = findregmsw(regm);
if (I32)
{
if (tyfv(tym))
{ c = genregs(CNIL,0x0FB7,scrreg,reg); // MOVZX scrreg,msreg
ce = cat(ce,c);
}
else
{ ce = genmovreg(ce,scrreg,reg); /* MOV scrreg,msreg */
if (tym == TYdouble || tym == TYdouble_alias)
gen2(ce,0xD1,modregrm(3,4,scrreg)); /* SHL scrreg,1 */
}
}
else
{
ce = genmovreg(ce,scrreg,reg); /* MOV scrreg,msreg */
if (tym == TYfloat)
gen2(ce,0xD1,modregrm(3,4,scrreg)); /* SHL scrreg,1 */
}
reg = findreglsw(regm);
genorreg(ce,scrreg,reg); /* OR scrreg,lsreg */
}
else if (sz == 8)
{ /* !I32 */
ce = genmovreg(ce,scrreg,AX); /* MOV scrreg,AX */
if (tym == TYdouble || tym == TYdouble_alias)
gen2(ce,0xD1,modregrm(3,4,scrreg)); // SHL scrreg,1
genorreg(ce,scrreg,BX); /* OR scrreg,BX */
genorreg(ce,scrreg,CX); /* OR scrreg,CX */
genorreg(ce,scrreg,DX); /* OR scrreg,DX */
}
else
assert(0);
}
else
{
if (I32 || sz == REGSIZE * 2)
{
/* can't test ES:LSW for 0 */
assert(regm & mMSW & ALLREGS && regm & (mLSW | mBP));
reg = findregmsw(regm);
ce = getregs(mask[reg]); /* we're going to trash reg */
if (tyfloating(tym) && sz == 2 * intsize)
ce = gen2(ce,0xD1,modregrm(3,4,reg)); // SHL reg,1
ce = genorreg(ce,reg,findreglsw(regm)); // OR reg,reg+1
if (I64)
code_orrex(ce, REX_W);
}
else if (sz == 8)
{ assert(regm == DOUBLEREGS_16);
ce = getregs(mAX); // allocate AX
if (tym == TYdouble || tym == TYdouble_alias)
ce = gen2(ce,0xD1,modregrm(3,4,AX)); // SHL AX,1
genorreg(ce,AX,BX); // OR AX,BX
genorreg(ce,AX,CX); // OR AX,CX
genorreg(ce,AX,DX); // OR AX,DX
}
else
assert(0);
}
code_orflag(ce,CFpsw);
return ce;
}
/******************************
* Given the result of an expression is in retregs,
* generate necessary code to return result in *pretregs.
*/
code *fixresult(elem *e,regm_t retregs,regm_t *pretregs)
{ code *c,*ce;
unsigned reg,rreg;
regm_t forccs,forregs;
tym_t tym;
int sz;
//printf("fixresult(e = %p, retregs = %s, *pretregs = %s)\n",e,regm_str(retregs),regm_str(*pretregs));
if (*pretregs == 0) return CNIL; /* if don't want result */
assert(e && retregs); /* need something to work with */
forccs = *pretregs & mPSW;
forregs = *pretregs & (mST01 | mST0 | mBP | ALLREGS | mES | mSTACK | XMMREGS);
tym = tybasic(e->Ety);
#if TARGET_SEGMENTED
if (tym == TYstruct)
// Hack to support cdstreq()
tym = (forregs & mMSW) ? TYfptr : TYnptr;
#else
if (tym == TYstruct)
{
// Hack to support cdstreq()
assert(!(forregs & mMSW));
tym = TYnptr;
}
#endif
c = CNIL;
sz = tysize[tym];
if (sz == 1)
{
assert(retregs & BYTEREGS);
unsigned reg = findreg(retregs);
if (e->Eoper == OPvar &&
e->EV.sp.Voffset == 1 &&
e->EV.sp.Vsym->Sfl == FLreg)
{
assert(reg < 4);
if (forccs)
c = gen2(c,0x84,modregrm(3,reg | 4,reg | 4)); // TEST regH,regH
forccs = 0;
}
}
if ((retregs & forregs) == retregs) /* if already in right registers */
*pretregs = retregs;
else if (forregs) /* if return the result in registers */
{
if (forregs & (mST01 | mST0))
return fixresult87(e,retregs,pretregs);
ce = CNIL;
unsigned opsflag = FALSE;
if (I16 && sz == 8)
{ if (forregs & mSTACK)
{ assert(retregs == DOUBLEREGS_16);
/* Push floating regs */
c = CNIL;
ce = gen1(ce,0x50 + AX);
gen1(ce,0x50 + BX);
gen1(ce,0x50 + CX);
gen1(ce,0x50 + DX);
stackpush += DOUBLESIZE;
}
else if (retregs & mSTACK)
{ assert(forregs == DOUBLEREGS_16);
/* Pop floating regs */
c = getregs(forregs);
ce = gen1(ce,0x58 + DX);
gen1(ce,0x58 + CX);
gen1(ce,0x58 + BX);
gen1(ce,0x58 + AX);
stackpush -= DOUBLESIZE;
retregs = DOUBLEREGS_16; /* for tstresult() below */
}
else
#ifdef DEBUG
printf("retregs = x%x, forregs = x%x\n",retregs,forregs),
#endif
assert(0);
if (EOP(e))
opsflag = TRUE;
}
else
{
c = allocreg(pretregs,&rreg,tym); /* allocate return regs */
if (retregs & XMMREGS)
{
reg = findreg(retregs & XMMREGS);
// MOVSD floatreg, XMM?
ce = genfltreg(ce,xmmstore(tym),reg - XMM0,0);
if (mask[rreg] & XMMREGS)
// MOVSD XMM?, floatreg
ce = genfltreg(ce,xmmload(tym),rreg - XMM0,0);
else
{
// MOV rreg,floatreg
ce = genfltreg(ce,0x8B,rreg,0);
if (sz == 8)
{
if (I32)
{
rreg = findregmsw(*pretregs);
ce = genfltreg(ce,0x8B,rreg,4);
}
else
code_orrex(ce,REX_W);
}
}
}
else if (forregs & XMMREGS)
{
reg = findreg(retregs & (mBP | ALLREGS));
// MOV floatreg,reg
ce = genfltreg(ce,0x89,reg,0);
if (sz == 8)
{
if (I32)
{
reg = findregmsw(retregs);
ce = genfltreg(ce,0x89,reg,4);
}
else
code_orrex(ce,REX_W);
}
// MOVSS/MOVSD XMMreg,floatreg
ce = genfltreg(ce,xmmload(tym),rreg - XMM0,0);
}
else if (sz > REGSIZE)
{
unsigned msreg = findregmsw(retregs);
unsigned lsreg = findreglsw(retregs);
unsigned msrreg = findregmsw(*pretregs);
unsigned lsrreg = findreglsw(*pretregs);
ce = genmovreg(ce,msrreg,msreg); /* MOV msrreg,msreg */
ce = genmovreg(ce,lsrreg,lsreg); /* MOV lsrreg,lsreg */
}
else
{
assert(!(retregs & XMMREGS));
assert(!(forregs & XMMREGS));
reg = findreg(retregs & (mBP | ALLREGS));
ce = genmovreg(ce,rreg,reg); /* MOV rreg,reg */
}
}
c = cat(c,ce);
cssave(e,retregs | *pretregs,opsflag);
forregs = 0; /* don't care about result in reg */
/* cuz we have real result in rreg */
retregs = *pretregs & ~mPSW;
}
if (forccs) /* if return result in flags */
c = cat(c,tstresult(retregs,tym,forregs));
return c;
}
/********************************
* Generate code sequence to call C runtime library support routine.
* clib = CLIBxxxx
* keepmask = mask of registers not to destroy. Currently can
* handle only 1. Should use a temporary rather than
* push/pop for speed.
*/
int clib_inited = 0; // != 0 if initialized
code *callclib(elem *e,unsigned clib,regm_t *pretregs,regm_t keepmask)
{
//printf("callclib(e = %p, clib = %d, *pretregs = %s, keepmask = %s\n", e, clib, regm_str(*pretregs), regm_str(keepmask));
//elem_print(e);
#if TARGET_LINUX || TARGET_OSX || TARGET_FREEBSD || TARGET_OPENBSD || TARGET_SOLARIS
static symbol lib[] =
{
/* Convert destroyed regs into saved regs */
#define Z(desregs) (~(desregs) & (mBP| mES | ALLREGS))
#if TARGET_LINUX || TARGET_OSX || TARGET_FREEBSD || TARGET_OPENBSD || TARGET_SOLARIS
#define N(name) "_" name
#else
#define N(name) name
#endif
/* Shorthand to map onto SYMBOLY() */
#define Y(desregs,name) SYMBOLY(FLfunc,Z(desregs),N(name),0)
Y(0,"_LCMP__"), // CLIBlcmp
Y(mAX|mCX|mDX,"_LMUL__"), // CLIBlmul
#if 1
Y(mAX|mBX|mCX|mDX,"_LDIV__"), // CLIBldiv
Y(mAX|mBX|mCX|mDX,"_LDIV__"), // CLIBlmod
Y(mAX|mBX|mCX|mDX,"_ULDIV__"), // CLIBuldiv
Y(mAX|mBX|mCX|mDX,"_ULDIV__"), // CLIBulmod
#else
Y(ALLREGS,"_LDIV__"), // CLIBldiv
Y(ALLREGS,"_LDIV__"), // CLIBlmod
Y(ALLREGS,"_ULDIV__"), // CLIBuldiv
Y(ALLREGS,"_ULDIV__"), // CLIBulmod
#endif
#if 0
Y(DOUBLEREGS_16,"_DNEG"),
Y(mAX|mBX|mCX|mDX,"_DMUL"), // CLIBdmul
Y(mAX|mBX|mCX|mDX,"_DDIV"), // CLIBddiv
Y(0,"_DTST0"), // CLIBdtst0
Y(0,"_DTST0EXC"), // CLIBdtst0exc
Y(0,"_DCMP"), // CLIBdcmp
Y(0,"_DCMPEXC"), // CLIBdcmpexc
Y(mAX|mBX|mCX|mDX,"_DADD"), // CLIBdadd
Y(mAX|mBX|mCX|mDX,"_DSUB"), // CLIBdsub
Y(mAX|mBX|mCX|mDX,"_FMUL"), // CLIBfmul
Y(mAX|mBX|mCX|mDX,"_FDIV"), // CLIBfdiv
Y(0,"_FTST0"), // CLIBftst0
Y(0,"_FTST0EXC"), // CLIBftst0exc
Y(0,"_FCMP"), // CLIBfcmp
Y(0,"_FCMPEXC"), // CLIBfcmpexc
Y(FLOATREGS_32,"_FNEG"), // CLIBfneg
Y(mAX|mBX|mCX|mDX,"_FADD"), // CLIBfadd
Y(mAX|mBX|mCX|mDX,"_FSUB"), // CLIBfsub
#endif
Y(DOUBLEREGS_32,"_DBLLNG"), // CLIBdbllng
Y(DOUBLEREGS_32,"_LNGDBL"), // CLIBlngdbl
Y(DOUBLEREGS_32,"_DBLINT"), // CLIBdblint
Y(DOUBLEREGS_32,"_INTDBL"), // CLIBintdbl
Y(DOUBLEREGS_32,"_DBLUNS"), // CLIBdbluns
Y(DOUBLEREGS_32,"_UNSDBL"), // CLIBunsdbl
Y(mAX|mST0,"_DBLULNG"), // CLIBdblulng
#if 0
{DOUBLEREGS_16,DOUBLEREGS_32,0,INFfloat,1,1}, // _ULNGDBL@ ulngdbl
#endif
Y(DOUBLEREGS_32,"_DBLFLT"), // CLIBdblflt
Y(DOUBLEREGS_32,"_FLTDBL"), // CLIBfltdbl
Y(DOUBLEREGS_32,"_DBLLLNG"), // CLIBdblllng
Y(DOUBLEREGS_32,"_LLNGDBL"), // CLIBllngdbl
Y(DOUBLEREGS_32,"_DBLULLNG"), // CLIBdblullng
Y(DOUBLEREGS_32,"_ULLNGDBL"), // CLIBullngdbl
Y(0,"_DTST"), // CLIBdtst
Y(mES|mBX,"_HTOFPTR"), // CLIBvptrfptr
Y(mES|mBX,"_HCTOFPTR"), // CLIBcvptrfptr
Y(0,"_87TOPSW"), // CLIB87topsw
Y(mST0,"_FLTTO87"), // CLIBfltto87
Y(mST0,"_DBLTO87"), // CLIBdblto87
Y(mST0|mAX,"_DBLINT87"), // CLIBdblint87
Y(mST0|mAX|mDX,"_DBLLNG87"), // CLIBdbllng87
Y(0,"_FTST"), // CLIBftst
Y(0,"_FCOMPP"), // CLIBfcompp
Y(0,"_FTEST"), // CLIBftest
Y(0,"_FTEST0"), // CLIBftest0
Y(mST0|mAX|mBX|mCX|mDX,"_FDIVP"), // CLIBfdiv87
Y(mST0|mST01,"Cmul"), // CLIBcmul
Y(mAX|mCX|mDX|mST0|mST01,"Cdiv"), // CLIBcdiv
Y(mAX|mST0|mST01,"Ccmp"), // CLIBccmp
Y(mST0,"_U64_LDBL"), // CLIBu64_ldbl
#if ELFOBJ || MACHOBJ
Y(mST0|mAX|mDX,"_LDBLULLNG"), // CLIBld_u64
#else
Y(mST0|mAX|mDX,"__LDBLULLNG"), // CLIBld_u64
#endif
};
#else
static symbol lib[CLIBMAX] =
{
/* Convert destroyed regs into saved regs */
#define Z(desregs) (~(desregs) & (mBP| mES | ALLREGS))
/* Shorthand to map onto SYMBOLY() */
#define Y(desregs,name) SYMBOLY(FLfunc,Z(desregs),name,0)
Y(0,"_LCMP@"),
Y(mAX|mCX|mDX,"_LMUL@"),
Y(ALLREGS,"_LDIV@"),
Y(ALLREGS,"_LDIV@"),
Y(ALLREGS,"_ULDIV@"),
Y(ALLREGS,"_ULDIV@"),
Y(mAX|mBX|mCX|mDX,"_DMUL@"),
Y(mAX|mBX|mCX|mDX,"_DDIV@"),
Y(0,"_DTST0@"),
Y(0,"_DTST0EXC@"),
Y(0,"_DCMP@"),
Y(0,"_DCMPEXC@"),
/* _DNEG@ only really destroys EDX, but then EAX would hold */
/* 2 values, and we can't handle that. */
/* _DNEG@ only really destroys AX, but then BX,CX,DX would hold */
/* 2 values, and we can't handle that. */
Y(DOUBLEREGS_16,"_DNEG@"),
Y(mAX|mBX|mCX|mDX,"_DADD@"),
Y(mAX|mBX|mCX|mDX,"_DSUB@"),
Y(mAX|mBX|mCX|mDX,"_FMUL@"),
Y(mAX|mBX|mCX|mDX,"_FDIV@"),
Y(0,"_FTST0@"),
Y(0,"_FTST0EXC@"),
Y(0,"_FCMP@"),
Y(0,"_FCMPEXC@"),
Y(FLOATREGS_16,"_FNEG@"),
Y(mAX|mBX|mCX|mDX,"_FADD@"),
Y(mAX|mBX|mCX|mDX,"_FSUB@"),
Y(DOUBLEREGS_16,"_DBLLNG@"),
Y(DOUBLEREGS_16,"_LNGDBL@"),
Y(DOUBLEREGS_16,"_DBLINT@"),
Y(DOUBLEREGS_16,"_INTDBL@"),
Y(DOUBLEREGS_16,"_DBLUNS@"),
Y(DOUBLEREGS_16,"_UNSDBL@"),
Y(DOUBLEREGS_16,"_DBLULNG@"),
Y(DOUBLEREGS_16,"_ULNGDBL@"),
Y(DOUBLEREGS_16,"_DBLFLT@"),
Y(ALLREGS,"_FLTDBL@"),
Y(DOUBLEREGS_16,"_DBLLLNG@"),
Y(DOUBLEREGS_16,"_LLNGDBL@"),
#if 0
Y(DOUBLEREGS_16,"__DBLULLNG"),
#else
Y(DOUBLEREGS_16,"_DBLULLNG@"),
#endif
Y(DOUBLEREGS_16,"_ULLNGDBL@"),
Y(0,"_DTST@"),
Y(mES|mBX,"_HTOFPTR@"), // CLIBvptrfptr
Y(mES|mBX,"_HCTOFPTR@"), // CLIBcvptrfptr
Y(0,"_87TOPSW@"), // CLIB87topsw
Y(mST0,"_FLTTO87@"), // CLIBfltto87
Y(mST0,"_DBLTO87@"), // CLIBdblto87
Y(mST0|mAX,"_DBLINT87@"), // CLIBdblint87
Y(mST0|mAX|mDX,"_DBLLNG87@"), // CLIBdbllng87
Y(0,"_FTST@"),
Y(0,"_FCOMPP@"), // CLIBfcompp
Y(0,"_FTEST@"), // CLIBftest
Y(0,"_FTEST0@"), // CLIBftest0
Y(mST0|mAX|mBX|mCX|mDX,"_FDIVP"), // CLIBfdiv87
// NOTE: desregs is wrong for 16 bit code, mBX should be included
Y(mST0|mST01,"_Cmul"), // CLIBcmul
Y(mAX|mCX|mDX|mST0|mST01,"_Cdiv"), // CLIBcdiv
Y(mAX|mST0|mST01,"_Ccmp"), // CLIBccmp
Y(mST0,"_U64_LDBL"), // CLIBu64_ldbl
Y(mST0|mAX|mDX,"__LDBLULLNG"), // CLIBld_u64
};
#endif
static struct
{
regm_t retregs16; /* registers that 16 bit result is returned in */
regm_t retregs32; /* registers that 32 bit result is returned in */
char pop; /* # of bytes popped off of stack upon return */
char flags;
#define INF32 1 // if 32 bit only
#define INFfloat 2 // if this is floating point
#define INFwkdone 4 // if weak extern is already done
#define INF64 8 // if 64 bit only
char push87; // # of pushes onto the 8087 stack
char pop87; // # of pops off of the 8087 stack
} info[CLIBMAX] =
{
{0,0,0,0}, /* _LCMP@ lcmp */
{mDX|mAX,mDX|mAX,0,0}, // _LMUL@ lmul
{mDX|mAX,mDX|mAX,0,0}, // _LDIV@ ldiv
{mCX|mBX,mCX|mBX,0,0}, /* _LDIV@ lmod */
{mDX|mAX,mDX|mAX,0,0}, /* _ULDIV@ uldiv */
{mCX|mBX,mCX|mBX,0,0}, /* _ULDIV@ ulmod */
#if TARGET_WINDOS
{DOUBLEREGS_16,DOUBLEREGS_32,8,INFfloat,1,1}, // _DMUL@ dmul
{DOUBLEREGS_16,DOUBLEREGS_32,8,INFfloat,1,1}, // _DDIV@ ddiv
{0,0,0,2}, // _DTST0@
{0,0,0,2}, // _DTST0EXC@
{0,0,8,INFfloat,1,1}, // _DCMP@ dcmp
{0,0,8,INFfloat,1,1}, // _DCMPEXC@ dcmp
{DOUBLEREGS_16,DOUBLEREGS_32,0,2}, // _DNEG@ dneg
{DOUBLEREGS_16,DOUBLEREGS_32,8,INFfloat,1,1}, // _DADD@ dadd
{DOUBLEREGS_16,DOUBLEREGS_32,8,INFfloat,1,1}, // _DSUB@ dsub
{FLOATREGS_16,FLOATREGS_32,0,INFfloat,1,1}, // _FMUL@ fmul
{FLOATREGS_16,FLOATREGS_32,0,INFfloat,1,1}, // _FDIV@ fdiv
{0,0,0,2}, // _FTST0@
{0,0,0,2}, // _FTST0EXC@
{0,0,0,INFfloat,1,1}, // _FCMP@ fcmp
{0,0,0,INFfloat,1,1}, // _FCMPEXC@ fcmp
{FLOATREGS_16,FLOATREGS_32,0,2}, // _FNEG@ fneg
{FLOATREGS_16,FLOATREGS_32,0,INFfloat,1,1}, // _FADD@ fadd
{FLOATREGS_16,FLOATREGS_32,0,INFfloat,1,1}, // _FSUB@ fsub
#endif
{mDX|mAX,mAX,0,INFfloat,1,1}, // _DBLLNG@ dbllng
{DOUBLEREGS_16,DOUBLEREGS_32,0,INFfloat,1,1}, // _LNGDBL@ lngdbl
{mAX,mAX,0,INFfloat,1,1}, // _DBLINT@ dblint
{DOUBLEREGS_16,DOUBLEREGS_32,0,INFfloat,1,1}, // _INTDBL@ intdbl
{mAX,mAX,0,INFfloat,1,1}, // _DBLUNS@ dbluns
{DOUBLEREGS_16,DOUBLEREGS_32,0,INFfloat,1,1}, // _UNSDBL@ unsdbl
#if TARGET_LINUX || TARGET_OSX || TARGET_FREEBSD || TARGET_OPENBSD || TARGET_SOLARIS
{mDX|mAX,mAX,0,INF32|INFfloat,0,1}, // _DBLULNG@ dblulng
#else
{mDX|mAX,mAX,0,INFfloat,1,1}, // _DBLULNG@ dblulng
#endif
#if TARGET_WINDOS
{DOUBLEREGS_16,DOUBLEREGS_32,0,INFfloat,1,1}, // _ULNGDBL@ ulngdbl
#endif
{FLOATREGS_16,FLOATREGS_32,0,INFfloat,1,1}, // _DBLFLT@ dblflt
{DOUBLEREGS_16,DOUBLEREGS_32,0,INFfloat,1,1}, // _FLTDBL@ fltdbl
{DOUBLEREGS_16,mDX|mAX,0,INFfloat,1,1}, // _DBLLLNG@
{DOUBLEREGS_16,DOUBLEREGS_32,0,INFfloat,1,1}, // _LLNGDBL@
#if TARGET_LINUX || TARGET_OSX || TARGET_FREEBSD || TARGET_OPENBSD || TARGET_SOLARIS
{DOUBLEREGS_16,mDX|mAX,0,INFfloat,2,2}, // _DBLULLNG@
#else
{DOUBLEREGS_16,mDX|mAX,0,INFfloat,1,1}, // _DBLULLNG@
#endif
{DOUBLEREGS_16,DOUBLEREGS_32,0,INFfloat,1,1}, // _ULLNGDBL@
{0,0,0,2}, // _DTST@ dtst
{mES|mBX,mES|mBX,0,0}, // _HTOFPTR@ vptrfptr
{mES|mBX,mES|mBX,0,0}, // _HCTOFPTR@ cvptrfptr
{0,0,0,2}, // _87TOPSW@ 87topsw
{mST0,mST0,0,INFfloat,1,0}, // _FLTTO87@ fltto87
{mST0,mST0,0,INFfloat,1,0}, // _DBLTO87@ dblto87
{mAX,mAX,0,2}, // _DBLINT87@ dblint87
{mDX|mAX,mAX,0,2}, // _DBLLNG87@ dbllng87
{0,0,0,2}, // _FTST@
{mPSW,mPSW,0,INFfloat,0,2}, // _FCOMPP@
{mPSW,mPSW,0,2}, // _FTEST@
{mPSW,mPSW,0,2}, // _FTEST0@
{mST0,mST0,0,INFfloat,1,1}, // _FDIV@
{mST01,mST01,0,INF32|INFfloat,3,5}, // _Cmul
{mST01,mST01,0,INF32|INFfloat,0,2}, // _Cdiv
{mPSW, mPSW, 0,INF32|INFfloat,0,4}, // _Ccmp
{mST0,mST0,0,INF32|INF64|INFfloat,2,1}, // _U64_LDBL
{0,mDX|mAX,0,INF32|INF64|INFfloat,1,2}, // __LDBLULLNG
};
if (!clib_inited) /* if not initialized */
{
assert(sizeof(lib) / sizeof(lib[0]) == CLIBMAX);
assert(sizeof(info) / sizeof(info[0]) == CLIBMAX);
for (int i = 0; i < CLIBMAX; i++)
{ lib[i].Stype = tsclib;
#if MARS
lib[i].Sxtrnnum = 0;
lib[i].Stypidx = 0;
#endif
}
if (!I16)
{ /* Adjust table for 386 */
lib[CLIBdbllng].Sregsaved = Z(DOUBLEREGS_32);
lib[CLIBlngdbl].Sregsaved = Z(DOUBLEREGS_32);
lib[CLIBdblint].Sregsaved = Z(DOUBLEREGS_32);
lib[CLIBintdbl].Sregsaved = Z(DOUBLEREGS_32);
#if TARGET_WINDOS
lib[CLIBfneg].Sregsaved = Z(FLOATREGS_32);
lib[CLIBdneg].Sregsaved = Z(DOUBLEREGS_32);
lib[CLIBdbluns].Sregsaved = Z(DOUBLEREGS_32);
lib[CLIBunsdbl].Sregsaved = Z(DOUBLEREGS_32);
lib[CLIBdblulng].Sregsaved = Z(DOUBLEREGS_32);
lib[CLIBulngdbl].Sregsaved = Z(DOUBLEREGS_32);
#endif
lib[CLIBdblflt].Sregsaved = Z(DOUBLEREGS_32);
lib[CLIBfltdbl].Sregsaved = Z(DOUBLEREGS_32);
lib[CLIBdblllng].Sregsaved = Z(DOUBLEREGS_32);
lib[CLIBllngdbl].Sregsaved = Z(DOUBLEREGS_32);
lib[CLIBdblullng].Sregsaved = Z(DOUBLEREGS_32);
lib[CLIBullngdbl].Sregsaved = Z(DOUBLEREGS_32);
if (I64)
{
info[CLIBullngdbl].retregs32 = mAX;
info[CLIBdblullng].retregs32 = mAX;
}
}
clib_inited++;
}
#undef Z
assert(clib < CLIBMAX);
symbol *s = &lib[clib];
if (I16)
assert(!(info[clib].flags & (INF32 | INF64)));
code *cpop = CNIL;
code *c = getregs((~s->Sregsaved & (mES | mBP | ALLREGS)) & ~keepmask); // mask of regs destroyed
keepmask &= ~s->Sregsaved;
int npushed = numbitsset(keepmask);
gensaverestore2(keepmask, &c, &cpop);
#if 0
while (keepmask)
{ unsigned keepreg;
if (keepmask & (mBP|ALLREGS))
{ keepreg = findreg(keepmask & (mBP|ALLREGS));
c = gen1(c,0x50 + keepreg); /* PUSH keepreg */
cpop = cat(gen1(CNIL,0x58 + keepreg),cpop); // POP keepreg
keepmask &= ~mask[keepreg];
npushed++;
}
if (keepmask & mES)
{ c = gen1(c,0x06); /* PUSH ES */
cpop = cat(gen1(CNIL,0x07),cpop); /* POP ES */
keepmask &= ~mES;
npushed++;
}
}
#endif
c = cat(c, save87regs(info[clib].push87));
for (int i = 0; i < info[clib].push87; i++)
c = cat(c, push87());
for (int i = 0; i < info[clib].pop87; i++)
pop87();
if (config.target_cpu >= TARGET_80386 && clib == CLIBlmul && !I32)
{ static char lmul[] = {
0x66,0xc1,0xe1,0x10, // shl ECX,16
0x8b,0xcb, // mov CX,BX ;ECX = CX,BX
0x66,0xc1,0xe0,0x10, // shl EAX,16
0x66,0x0f,0xac,0xd0,0x10, // shrd EAX,EDX,16 ;EAX = DX,AX
0x66,0xf7,0xe1, // mul ECX
0x66,0x0f,0xa4,0xc2,0x10, // shld EDX,EAX,16 ;DX,AX = EAX
};
c = genasm(c,lmul,sizeof(lmul));
}
else
{ makeitextern(s);
int nalign = 0;
if (STACKALIGN == 16)
{ // Align the stack (assume no args on stack)
int npush = npushed * REGSIZE + stackpush;
if (npush & (STACKALIGN - 1))
{ nalign = STACKALIGN - (npush & (STACKALIGN - 1));
c = genc2(c,0x81,modregrm(3,5,SP),nalign); // SUB ESP,nalign
if (I64)
code_orrex(c, REX_W);
}
}
c = gencs(c,(LARGECODE) ? 0x9A : 0xE8,0,FLfunc,s); // CALL s
if (nalign)
{ c = genc2(c,0x81,modregrm(3,0,SP),nalign); // ADD ESP,nalign
if (I64)
code_orrex(c, REX_W);
}
calledafunc = 1;
if (I16 && // bug in Optlink for weak references
config.flags3 & CFG3wkfloat &&
(info[clib].flags & (INFfloat | INFwkdone)) == INFfloat)
{ info[clib].flags |= INFwkdone;
makeitextern(rtlsym[RTLSYM_INTONLY]);
obj_wkext(s,rtlsym[RTLSYM_INTONLY]);
}
}
if (I16)
stackpush -= info[clib].pop;
regm_t retregs = I16 ? info[clib].retregs16 : info[clib].retregs32;
return cat(cat(c,cpop),fixresult(e,retregs,pretregs));
}
/*************************************************
* Helper function for converting OPparam's into array of Parameters.
*/
struct Parameter { elem *e; int reg; unsigned numalign; };
void fillParameters(elem *e, Parameter *parameters, int *pi)
{
if (e->Eoper == OPparam)
{
fillParameters(e->E1, parameters, pi);
fillParameters(e->E2, parameters, pi);
freenode(e);
}
else
{
parameters[*pi].e = e;
(*pi)++;
}
}
/*******************************
* Generate code sequence for function call.
*/
code *cdfunc(elem *e,regm_t *pretregs)
{ unsigned numpara = 0;
unsigned stackpushsave;
unsigned preg;
regm_t keepmsk;
unsigned numalign = 0;
code *c;
//printf("cdfunc()\n"); elem_print(e);
assert(e);
stackpushsave = stackpush; /* so we can compute # of parameters */
cgstate.stackclean++;
c = CNIL;
keepmsk = 0;
if (OTbinary(e->Eoper)) // if parameters
{
if (I16)
{
c = cat(c, params(e->E2,2)); // push parameters
}
else if (I32)
{
unsigned stackalign = REGSIZE;
tym_t tyf = tybasic(e->E1->Ety);
// First compute numpara, the total bytes pushed on the stack
switch (tyf)
{
#if TARGET_SEGMENTED
case TYf16func:
stackalign = 2;
goto Ldefault;
#endif
case TYmfunc:
case TYjfunc:
// last parameter goes into register
elem *ep;
for (ep = e->E2; ep->Eoper == OPparam; ep = ep->E2)
{
numpara += paramsize(ep->E1,stackalign);
}
unsigned sz;
if (tyf == TYjfunc &&
// This must match type_jparam()
!(tyjparam(ep->Ety) ||
((tybasic(ep->Ety) == TYstruct || tybasic(ep->Ety) == TYarray) &&
(sz = type_size(ep->ET)) <= intsize && sz != 3 && sz)
)
)
{
numpara += paramsize(ep,stackalign);
}
break;
default:
Ldefault:
numpara += paramsize(e->E2,stackalign);
break;
}
assert((numpara & (REGSIZE - 1)) == 0);
assert((stackpush & (REGSIZE - 1)) == 0);
/* Special handling for call to __tls_get_addr, we must save registers
* before evaluating the parameter, so that the parameter load and call
* are adjacent.
*/
if (e->E2->Eoper != OPparam && e->E1->Eoper == OPvar)
{ symbol *s = e->E1->EV.sp.Vsym;
if (s == tls_get_addr_sym)
c = getregs(~s->Sregsaved & (mBP | ALLREGS | mES | XMMREGS));
}
/* Adjust start of the stack so after all args are pushed,
* the stack will be aligned.
*/
if (STACKALIGN == 16 && (numpara + stackpush) & (STACKALIGN - 1))
{
numalign = STACKALIGN - ((numpara + stackpush) & (STACKALIGN - 1));
c = genc2(c,0x81,modregrm(3,5,SP),numalign); // SUB ESP,numalign
if (I64)
code_orrex(c, REX_W);
c = genadjesp(c, numalign);
stackpush += numalign;
stackpushsave += numalign;
}
switch (tyf)
{
#if TARGET_SEGMENTED
case TYf16func:
stackalign = 2;
goto Ldefault2;
#endif
case TYmfunc: // last parameter goes into ECX
preg = CX;
goto L1;
case TYjfunc: // last parameter goes into EAX
preg = AX;
goto L1;
L1:
{ elem *ep;
elem *en;
for (ep = e->E2; ep->Eoper == OPparam; ep = en)
{
c = cat(c,params(ep->E1,stackalign));
en = ep->E2;
freenode(ep);
}
unsigned sz;
if (tyf == TYjfunc &&
// This must match type_jparam()
!(tyjparam(ep->Ety) ||
((tybasic(ep->Ety) == TYstruct || tybasic(ep->Ety) == TYarray) &&
(sz = type_size(ep->ET)) <= intsize && sz != 3 && sz)
)
)
{
c = cat(c,params(ep,stackalign));
goto Lret;
}
// preg is the register to put the parameter ep in
keepmsk = mask[preg]; // don't change preg when evaluating func address
regm_t retregs = keepmsk;
if (ep->Eoper == OPstrthis)
{ code *c2;
code *c1 = getregs(retregs);
// LEA preg,np[ESP]
unsigned np = stackpush - ep->EV.Vuns; // stack delta to parameter
c2 = genc1(CNIL,0x8D,(modregrm(0,4,SP) << 8) | modregrm(2,preg,4),FLconst,np);
if (I64)
code_orrex(c2, REX_W);
c = cat3(c,c1,c2);
}
else
{ code *cp = codelem(ep,&retregs,FALSE);
c = cat(c,cp);
}
goto Lret;
}
default:
Ldefault2:
c = cat(c, params(e->E2,stackalign)); // push parameters
break;
}
}
else
{ assert(I64);
// Easier to deal with parameters as an array: parameters[0..np]
int np = el_nparams(e->E2);
Parameter *parameters = (Parameter *)alloca(np * sizeof(Parameter));
{ int n = 0;
fillParameters(e->E2, parameters, &n);
assert(n == np);
}
/* Special handling for call to __tls_get_addr, we must save registers
* before evaluating the parameter, so that the parameter load and call
* are adjacent.
*/
if (np == 1 && e->E1->Eoper == OPvar)
{ symbol *s = e->E1->EV.sp.Vsym;
if (s == tls_get_addr_sym)
c = getregs(~s->Sregsaved & (mBP | ALLREGS | mES | XMMREGS));
}
unsigned stackalign = REGSIZE;
// Figure out which parameters go in registers
// Compute numpara, the total bytes pushed on the stack
int r = 0;
int xmmcnt = XMM0;
for (int i = np; --i >= 0;)
{
static const unsigned char argregs[6] = { DI,SI,DX,CX,R8,R9 };
elem *ep = parameters[i].e;
tym_t ty = ep->Ety;
if (r < sizeof(argregs)/sizeof(argregs[0])) // if more arg regs
{ unsigned sz;
if (
// This must match type_jparam()
ty64reg(ty) ||
((tybasic(ty) == TYstruct || tybasic(ty) == TYarray) &&
((sz = type_size(ep->ET)) == 1 || sz == 2 || sz == 4 || sz == 8))
)
{
parameters[i].reg = argregs[r];
r++;
continue; // goes in register, not stack
}
}
if (xmmcnt <= XMM7)
{
if (tyxmmreg(ty))
{
parameters[i].reg = xmmcnt;
xmmcnt++;
continue; // goes in register, not stack
}
}
// Parameter i goes on the stack
parameters[i].reg = -1; // -1 means no register
unsigned alignsize = el_alignsize(ep);
parameters[i].numalign = 0;
if (alignsize > stackalign)
{ unsigned newnumpara = (numpara + (alignsize - 1)) & ~(alignsize - 1);
parameters[i].numalign = newnumpara - numpara;
numpara = newnumpara;
}
numpara += paramsize(ep,stackalign);
}
assert((numpara & (REGSIZE - 1)) == 0);
assert((stackpush & (REGSIZE - 1)) == 0);
/* Should consider reordering the order of evaluation of the parameters
* so that args that go into registers are evaluated after args that get
* pushed. We can reorder args that are constants or relconst's.
*/
/* Adjust start of the stack so after all args are pushed,
* the stack will be aligned.
*/
if (STACKALIGN == 16 && (numpara + stackpush) & (STACKALIGN - 1))
{
numalign = STACKALIGN - ((numpara + stackpush) & (STACKALIGN - 1));
c = genc2(c,0x81,(REX_W << 16) | modregrm(3,5,SP),numalign); // SUB RSP,numalign
c = genadjesp(c, numalign);
stackpush += numalign;
stackpushsave += numalign;
}
int regsaved[XMM7 + 1];
memset(regsaved, -1, sizeof(regsaved));
code *crest = NULL;
regm_t saved = 0;
/* Parameters go into the registers RDI,RSI,RDX,RCX,R8,R9
* float and double parameters go into XMM0..XMM7
* For variadic functions, count of XMM registers used goes in AL
*/
for (int i = 0; i < np; i++)
{
elem *ep = parameters[i].e;
int preg = parameters[i].reg;
if (preg == -1)
{
/* Push parameter on stack, but keep track of registers used
* in the process. If they interfere with keepmsk, we'll have
* to save/restore them.
*/
code *csave = NULL;
regm_t overlap = msavereg & keepmsk;
msavereg |= keepmsk;
code *cp = params(ep,stackalign);
regm_t tosave = keepmsk & ~msavereg;
msavereg &= ~keepmsk | overlap;
// tosave is the mask to save and restore
for (int j = 0; tosave; j++)
{ regm_t mi = mask[j];
assert(j <= XMM7);
if (mi & tosave)
{
unsigned idx;
csave = regsave.save(csave, j, &idx);
crest = regsave.restore(crest, j, idx);
saved |= mi;
keepmsk &= ~mi; // don't need to keep these for rest of params
tosave &= ~mi;
}
}
c = cat4(c, csave, cp, NULL);
// Alignment for parameter comes after it got pushed
unsigned numalign = parameters[i].numalign;
if (numalign)
{
c = genc2(c,0x81,(REX_W << 16) | modregrm(3,5,SP),numalign); // SUB RSP,numalign
c = genadjesp(c, numalign);
stackpush += numalign;
}
}
else
{
// Goes in register preg, not stack
regm_t retregs = mask[preg];
if (ep->Eoper == OPstrthis)
{
code *c1 = getregs(retregs);
// LEA preg,np[RSP]
unsigned np = stackpush - ep->EV.Vuns; // stack delta to parameter
code *c2 = genc1(CNIL,0x8D,(REX_W << 16) |
(modregrm(0,4,SP) << 8) |
modregxrm(2,preg,4), FLconst,np);
c = cat3(c,c1,c2);
}
else
{ code *cp = scodelem(ep,&retregs,keepmsk,FALSE);
c = cat(c,cp);
}
keepmsk |= retregs; // don't change preg when evaluating func address
}
}
// Restore any register parameters we saved
c = cat4(c, getregs(saved), crest, NULL);
keepmsk |= saved;
// Variadic functions store the number of XMM registers used in AL
if (e->Eflags & EFLAGS_variadic)
{ code *c1 = getregs(mAX);
c1 = movregconst(c1,AX,xmmcnt - XMM0,1);
c = cat(c, c1);
keepmsk |= mAX;
}
}
}
else
{
/* Adjust start of the stack so
* the stack will be aligned.
*/
if (STACKALIGN == 16 && (stackpush) & (STACKALIGN - 1))
{
numalign = STACKALIGN - ((stackpush) & (STACKALIGN - 1));
c = genc2(NULL,0x81,modregrm(3,5,SP),numalign); // SUB ESP,numalign
if (I64)
code_orrex(c, REX_W);
c = genadjesp(c, numalign);
stackpush += numalign;
stackpushsave += numalign;
}
// Variadic functions store the number of XMM registers used in AL
if (I64 && e->Eflags & EFLAGS_variadic)
{ code *c1 = getregs(mAX);
c1 = movregconst(c1,AX,0,1);
c = cat(c, c1);
keepmsk |= mAX;
}
}
Lret:
cgstate.stackclean--;
if (I16)
numpara = stackpush - stackpushsave;
else
{
if (numpara != stackpush - stackpushsave)
printf("numpara = %d, stackpush = %d, stackpushsave = %d\n", numpara, stackpush, stackpushsave);
assert(numpara == stackpush - stackpushsave);
}
return cat(c,funccall(e,numpara,numalign,pretregs,keepmsk));
}
/***********************************
*/
code *cdstrthis(elem *e,regm_t *pretregs)
{
code *c1;
code *c2;
assert(tysize(e->Ety) == REGSIZE);
unsigned reg = findreg(*pretregs & allregs);
c1 = getregs(mask[reg]);
// LEA reg,np[ESP]
unsigned np = stackpush - e->EV.Vuns; // stack delta to parameter
c2 = genc1(CNIL,0x8D,(modregrm(0,4,SP) << 8) | modregxrm(2,reg,4),FLconst,np);
if (I64)
code_orrex(c2, REX_W);
return cat3(c1,c2,fixresult(e,mask[reg],pretregs));
}
/******************************
* Call function. All parameters are pushed onto the stack, numpara gives
* the size of them all.
*/
STATIC code * funccall(elem *e,unsigned numpara,unsigned numalign,regm_t *pretregs,regm_t keepmsk)
{
elem *e1;
code *c,*ce,cs;
tym_t tym1;
char farfunc;
regm_t retregs;
symbol *s;
//printf("funccall(e = %p, *pretregs = x%x, numpara = %d, numalign = %d)\n",e,*pretregs,numpara,numalign);
calledafunc = 1;
/* Determine if we need frame for function prolog/epilog */
#if TARGET_WINDOS
if (config.memmodel == Vmodel)
{
if (tyfarfunc(funcsym_p->ty()))
needframe = TRUE;
}
#endif
e1 = e->E1;
tym1 = tybasic(e1->Ety);
farfunc = tyfarfunc(tym1) || tym1 == TYifunc;
c = NULL;
if (e1->Eoper == OPvar)
{ /* Call function directly */
code *c1;
#ifdef DEBUG
if (!tyfunc(tym1)) WRTYxx(tym1);
#endif
assert(tyfunc(tym1));
s = e1->EV.sp.Vsym;
if (s->Sflags & SFLexit)
c = NULL;
else if (s != tls_get_addr_sym)
c = save87(); // assume 8087 regs are all trashed
if (s->Sflags & SFLexit)
// Function doesn't return, so don't worry about registers
// it may use
c1 = NULL;
else if (!tyfunc(s->ty()) || !(config.flags4 & CFG4optimized))
// so we can replace func at runtime
c1 = getregs(~fregsaved & (mBP | ALLREGS | mES | XMMREGS));
else
c1 = getregs(~s->Sregsaved & (mBP | ALLREGS | mES | XMMREGS));
if (strcmp(s->Sident,"alloca") == 0)
{
#if 1
s = rtlsym[RTLSYM_ALLOCA];
makeitextern(s);
c1 = cat(c1,getregs(mCX));
c1 = genc(c1,0x8D,modregrm(2,CX,BPRM),FLallocatmp,0,0,0); // LEA CX,&localsize[BP]
if (I64)
code_orrex(c1, REX_W);
usedalloca = 2; // new way
#else
usedalloca = 1; // old way
#endif
}
if (sytab[s->Sclass] & SCSS) // if function is on stack (!)
{
retregs = allregs & ~keepmsk;
s->Sflags &= ~GTregcand;
s->Sflags |= SFLread;
ce = cat(c1,cdrelconst(e1,&retregs));
#if TARGET_SEGMENTED
if (farfunc)
goto LF1;
else
#endif
goto LF2;
}
else
{ int fl;
fl = FLfunc;
if (!tyfunc(s->ty()))
fl = el_fl(e1);
if (tym1 == TYifunc)
c1 = gen1(c1,0x9C); // PUSHF
ce = CNIL;
#if TARGET_LINUX || TARGET_FREEBSD || TARGET_OPENBSD || TARGET_SOLARIS
if (s != tls_get_addr_sym)
{
//printf("call %s\n", s->Sident);
ce = load_localgot();
}
#endif
ce = gencs(ce,farfunc ? 0x9A : 0xE8,0,fl,s); // CALL extern
ce->Iflags |= farfunc ? (CFseg | CFoff) : (CFselfrel | CFoff);
#if TARGET_LINUX || TARGET_FREEBSD || TARGET_OPENBSD || TARGET_SOLARIS
if (s == tls_get_addr_sym)
{
if (I32)
{
/* Append a NOP so GNU linker has patch room
*/
ce = gen1(ce, 0x90); // NOP
code_orflag(ce, CFvolatile); // don't schedule it
}
else
{ /* Prepend 66 66 48 so GNU linker has patch room
*/
assert(I64);
ce->Irex = REX | REX_W;
ce = cat(gen1(CNIL, 0x66), ce);
ce = cat(gen1(CNIL, 0x66), ce);
}
}
#endif
}
ce = cat(c1,ce);
}
else
{ /* Call function via pointer */
elem *e11;
tym_t e11ty;
#ifdef DEBUG
if (e1->Eoper != OPind
) { WRFL((enum FL)el_fl(e1)); WROP(e1->Eoper); }
#endif
c = save87(); // assume 8087 regs are all trashed
assert(e1->Eoper == OPind);
e11 = e1->E1;
e11ty = tybasic(e11->Ety);
#if TARGET_SEGMENTED
assert(!I16 || (e11ty == (farfunc ? TYfptr : TYnptr)));
#else
assert(!I16 || (e11ty == TYnptr));
#endif
/* if we can't use loadea() */
if ((EOP(e11) || e11->Eoper == OPconst) &&
(e11->Eoper != OPind || e11->Ecount))
{
unsigned reg;
retregs = allregs & ~keepmsk;
cgstate.stackclean++;
ce = scodelem(e11,&retregs,keepmsk,TRUE);
cgstate.stackclean--;
/* Kill registers destroyed by an arbitrary function call */
ce = cat(ce,getregs((mBP | ALLREGS | mES | XMMREGS) & ~fregsaved));
#if TARGET_SEGMENTED
if (e11ty == TYfptr)
{ unsigned lsreg;
LF1:
reg = findregmsw(retregs);
lsreg = findreglsw(retregs);
floatreg = TRUE; /* use float register */
reflocal = TRUE;
ce = genc1(ce,0x89, /* MOV floatreg+2,reg */
modregrm(2,reg,BPRM),FLfltreg,REGSIZE);
genc1(ce,0x89, /* MOV floatreg,lsreg */
modregrm(2,lsreg,BPRM),FLfltreg,0);
if (tym1 == TYifunc)
gen1(ce,0x9C); // PUSHF
genc1(ce,0xFF, /* CALL [floatreg] */
modregrm(2,3,BPRM),FLfltreg,0);
}
else
#endif
{
LF2:
reg = findreg(retregs);
ce = gen2(ce,0xFF,modregrmx(3,2,reg)); /* CALL reg */
if (I64)
code_orrex(ce, REX_W);
}
}
else
{
if (tym1 == TYifunc)
c = gen1(c,0x9C); // PUSHF
// CALL [function]
cs.Iflags = 0;
cgstate.stackclean++;
ce = loadea(e11,&cs,0xFF,farfunc ? 3 : 2,0,keepmsk,(mBP|ALLREGS|mES|XMMREGS) & ~fregsaved);
cgstate.stackclean--;
freenode(e11);
}
s = NULL;
}
c = cat(c,ce);
freenode(e1);
/* See if we will need the frame pointer.
Calculate it here so we can possibly use BP to fix the stack.
*/
#if 0
if (!needframe)
{ SYMIDX si;
/* If there is a register available for this basic block */
if (config.flags4 & CFG4optimized && (ALLREGS & ~regcon.used))
;
else
{
for (si = 0; si < globsym.top; si++)
{ symbol *s = globsym.tab[si];
if (s->Sflags & GTregcand && type_size(s->Stype) != 0)
{
if (config.flags4 & CFG4optimized)
{ /* If symbol is live in this basic block and */
/* isn't already in a register */
if (s->Srange && vec_testbit(dfoidx,s->Srange) &&
s->Sfl != FLreg)
{ /* Then symbol must be allocated on stack */
needframe = TRUE;
break;
}
}
else
{ if (mfuncreg == 0) /* if no registers left */
{ needframe = TRUE;
break;
}
}
}
}
}
}
#endif
retregs = regmask(e->Ety, tym1);
// If stack needs cleanup
if (OTbinary(e->Eoper) &&
!typfunc(tym1) &&
!(s && s->Sflags & SFLexit))
{
if (tym1 == TYhfunc)
{ // Hidden parameter is popped off by the callee
c = genadjesp(c, -REGSIZE);
stackpush -= REGSIZE;
if (numpara + numalign > REGSIZE)
c = genstackclean(c, numpara + numalign - REGSIZE, retregs);
}
else
c = genstackclean(c,numpara + numalign,retregs);
}
else
{
c = genadjesp(c,-numpara);
stackpush -= numpara;
if (numalign)
c = genstackclean(c,numalign,retregs);
}
/* Special handling for functions which return a floating point
value in the top of the 8087 stack.
*/
if (retregs & mST0)
{
c = genadjfpu(c, 1);
if (*pretregs) // if we want the result
{ //assert(stackused == 0);
push87(); // one item on 8087 stack
return cat(c,fixresult87(e,retregs,pretregs));
}
else
/* Pop unused result off 8087 stack */
c = gen2(c,0xDD,modregrm(3,3,0)); /* FPOP */
}
else if (retregs & mST01)
{
c = genadjfpu(c, 2);
if (*pretregs) // if we want the result
{ assert(stackused == 0);
push87();
push87(); // two items on 8087 stack
return cat(c,fixresult_complex87(e,retregs,pretregs));
}
else
{
// Pop unused result off 8087 stack
c = gen2(c,0xDD,modregrm(3,3,0)); // FPOP
c = gen2(c,0xDD,modregrm(3,3,0)); // FPOP
}
}
return cat(c,fixresult(e,retregs,pretregs));
}
/***************************
* Determine size of everything that will be pushed.
*/
targ_size_t paramsize(elem *e,unsigned stackalign)
{
targ_size_t psize = 0;
targ_size_t szb;
while (e->Eoper == OPparam) /* if more params */
{
elem *e2 = e->E2;
psize += paramsize(e->E1,stackalign); // push them backwards
e = e2;
}
tym_t tym = tybasic(e->Ety);
if (tyscalar(tym))
szb = size(tym);
else if (tym == TYstruct)
szb = type_size(e->ET);
else
{
#ifdef DEBUG
WRTYxx(tym);
#endif
assert(0);
}
psize += align(stackalign,szb); /* align on word stack boundary */
return psize;
}
/***************************
* Generate code to push parameter list.
* stackpush is incremented by stackalign for each PUSH.
*/
code *params(elem *e,unsigned stackalign)
{ code *c,*ce,cs;
code *cp;
unsigned reg;
targ_size_t szb; // size before alignment
targ_size_t sz; // size after alignment
tym_t tym;
regm_t retregs;
elem *e1;
elem *e2;
symbol *s;
int fl;
//printf("params(e = %p, stackalign = %d)\n", e, stackalign);
cp = NULL;
stackchanged = 1;
assert(e);
while (e->Eoper == OPparam) /* if more params */
{
e2 = e->E2;
cp = cat(cp,params(e->E1,stackalign)); // push them backwards
freenode(e);
e = e2;
}
//printf("params()\n"); elem_print(e);
tym = tybasic(e->Ety);
if (tyfloating(tym))
obj_fltused();
int grex = I64 ? REX_W << 16 : 0;
/* sz = number of bytes pushed */
if (tyscalar(tym))
szb = size(tym);
else if (tym == TYstruct)
szb = type_size(e->ET);
else
{
#ifdef DEBUG
WRTYxx(tym);
#endif
assert(0);
}
sz = align(stackalign,szb); /* align on word stack boundary */
assert((sz & (stackalign - 1)) == 0); /* ensure that alignment worked */
assert((sz & (REGSIZE - 1)) == 0);
c = CNIL;
cs.Iflags = 0;
cs.Irex = 0;
switch (e->Eoper)
{
#if SCPP
case OPstrctor:
{
e1 = e->E1;
c = docommas(&e1); /* skip over any comma expressions */
c = genc2(c,0x81,grex | modregrm(3,5,SP),sz); // SUB SP,sizeof(struct)
stackpush += sz;
genadjesp(c,sz);
// Find OPstrthis and set it to stackpush
exp2_setstrthis(e1,NULL,stackpush,NULL);
retregs = 0;
ce = codelem(e1,&retregs,TRUE);
goto L2;
}
case OPstrthis:
// This is the parameter for the 'this' pointer corresponding to
// OPstrctor. We push a pointer to an object that was already
// allocated on the stack by OPstrctor.
{ unsigned np;
retregs = allregs;
c = allocreg(&retregs,&reg,TYoffset);
c = genregs(c,0x89,SP,reg); // MOV reg,SP
if (I64)
code_orrex(c, REX_W);
np = stackpush - e->EV.Vuns; // stack delta to parameter
c = genc2(c,0x81,grex | modregrmx(3,0,reg),np); // ADD reg,np
if (sz > REGSIZE)
{ c = gen1(c,0x16); // PUSH SS
stackpush += REGSIZE;
}
c = gen1(c,0x50 + (reg & 7)); // PUSH reg
if (reg & 8)
code_orrex(c, REX_B);
stackpush += REGSIZE;
genadjesp(c,sz);
ce = CNIL;
goto L2;
}
#endif
case OPstrpar:
{ code *cc,*c1,*c2,*c3;
unsigned rm;
unsigned seg; // segment override prefix flags
bool doneoff;
unsigned pushsize = REGSIZE;
unsigned op16 = 0;
unsigned npushes;
e1 = e->E1;
if (sz == 0)
{
ce = docommas(&e1); /* skip over any commas */
goto L2;
}
if ((sz & 3) == 0 && (sz / REGSIZE) <= 4 && e1->Eoper == OPvar)
{ freenode(e);
e = e1;
goto L1;
}
cc = docommas(&e1); /* skip over any commas */
seg = 0; /* assume no seg override */
retregs = sz ? IDXREGS : 0;
doneoff = FALSE;
if (!I16 && sz & 2) // if odd number of words to push
{ pushsize = 2;
op16 = 1;
}
else if (I16 && config.target_cpu >= TARGET_80386 && (sz & 3) == 0)
{ pushsize = 4; // push DWORDs at a time
op16 = 1;
}
npushes = sz / pushsize;
switch (e1->Eoper)
{ case OPind:
#if TARGET_SEGMENTED
if (sz)
{ switch (tybasic(e1->E1->Ety))
{
case TYfptr:
case TYhptr:
seg = CFes;
retregs |= mES;
break;
case TYsptr:
if (config.wflags & WFssneds)
seg = CFss;
break;
case TYcptr:
seg = CFcs;
break;
}
}
#endif
c1 = codelem(e1->E1,&retregs,FALSE);
freenode(e1);
break;
case OPvar:
/* Symbol is no longer a candidate for a register */
e1->EV.sp.Vsym->Sflags &= ~GTregcand;
if (!e1->Ecount && npushes > 4)
{ /* Kludge to point at last word in struct. */
/* Don't screw up CSEs. */
e1->EV.sp.Voffset += sz - pushsize;
doneoff = TRUE;
}
//if (LARGEDATA) /* if default isn't DS */
{ static unsigned segtocf[4] = { CFes,CFcs,CFss,0 };
unsigned s;
int fl;
fl = el_fl(e1);
#if TARGET_SEGMENTED
if (fl == FLfardata)
{ seg = CFes;
retregs |= mES;
}
else
#endif
{
s = segfl[fl];
assert(s < 4);
seg = segtocf[s];
if (seg == CFss && !(config.wflags & WFssneds))
seg = 0;
}
}
#if TARGET_SEGMENTED
if (e1->Ety & mTYfar)
{ seg = CFes;
retregs |= mES;
}
#endif
c1 = cdrelconst(e1,&retregs);
/* Reverse the effect of the previous add */
if (doneoff)
e1->EV.sp.Voffset -= sz - pushsize;
freenode(e1);
break;
case OPstreq:
//case OPcond:
if (!(config.exe & EX_flat))
{ seg = CFes;
retregs |= mES;
}
c1 = codelem(e1,&retregs,FALSE);
break;
default:
#ifdef DEBUG
elem_print(e1);
#endif
assert(0);
}
reg = findreglsw(retregs);
rm = I16 ? regtorm[reg] : regtorm32[reg];
if (op16)
seg |= CFopsize; // operand size
if (npushes <= 4)
{
assert(!doneoff);
for (c2 = CNIL; npushes > 1; npushes--)
{ c2 = genc1(c2,0xFF,buildModregrm(2,6,rm),FLconst,pushsize * (npushes - 1)); // PUSH [reg]
code_orflag(c2,seg);
genadjesp(c2,pushsize);
}
c3 = gen2(CNIL,0xFF,buildModregrm(0,6,rm)); // PUSH [reg]
c3->Iflags |= seg;
genadjesp(c3,pushsize);
ce = cat4(cc,c1,c2,c3);
}
else if (sz)
{ int size;
c2 = getregs_imm(mCX | retregs);
/* MOV CX,sz/2 */
c2 = movregconst(c2,CX,npushes,0);
if (!doneoff)
{ /* This disgusting thing should be done when */
/* reg is loaded. Too lazy to fix it now. */
/* ADD reg,sz-2 */
c2 = genc2(c2,0x81,grex | modregrmx(3,0,reg),sz-pushsize);
}
c3 = getregs(mCX); // the LOOP decrements it
c3 = gen2(c3,0xFF,buildModregrm(0,6,rm)); // PUSH [reg]
c3->Iflags |= seg | CFtarg2;
genc2(c3,0x81,grex | buildModregrm(3,5,reg),pushsize); // SUB reg,2
size = ((seg & CFSEG) ? -8 : -7) - op16;
if (code_next(c3)->Iop != 0x81)
size++;
//genc2(c3,0xE2,0,size); // LOOP .-7 or .-8
genjmp(c3,0xE2,FLcode,(block *)c3); // LOOP c3
regimmed_set(CX,0);
genadjesp(c3,sz);
ce = cat4(cc,c1,c2,c3);
}
else
ce = cat(cc,c1);
stackpush += sz;
goto L2;
}
case OPind:
if (!e->Ecount) /* if *e1 */
{ if (sz <= REGSIZE)
{ // Watch out for single byte quantities being up
// against the end of a segment or in memory-mapped I/O
if (!(config.exe & EX_flat) && szb == 1)
break;
goto L1; // can handle it with loadea()
}
// Avoid PUSH MEM on the Pentium when optimizing for speed
if (config.flags4 & CFG4speed &&
(config.target_cpu >= TARGET_80486 &&
config.target_cpu <= TARGET_PentiumMMX) &&
sz <= 2 * REGSIZE &&
!tyfloating(tym))
break;
if (tym == TYldouble || tym == TYildouble || tycomplex(tym))
break;
if (I32)
{
assert(sz == REGSIZE * 2);
ce = loadea(e,&cs,0xFF,6,REGSIZE,0,0); /* PUSH EA+4 */
ce = genadjesp(ce,REGSIZE);
}
else
{
if (sz == DOUBLESIZE)
{ ce = loadea(e,&cs,0xFF,6,DOUBLESIZE - REGSIZE,0,0); /* PUSH EA+6 */
cs.IEVoffset1 -= REGSIZE;
gen(ce,&cs); /* PUSH EA+4 */
ce = genadjesp(ce,REGSIZE);
getlvalue_lsw(&cs);
gen(ce,&cs); /* PUSH EA+2 */
}
else /* TYlong */
ce = loadea(e,&cs,0xFF,6,REGSIZE,0,0); /* PUSH EA+2 */
ce = genadjesp(ce,REGSIZE);
}
stackpush += sz;
getlvalue_lsw(&cs);
gen(ce,&cs); /* PUSH EA */
ce = genadjesp(ce,REGSIZE);
goto L2;
}
break;
#if TARGET_SEGMENTED
case OPnp_fp:
if (!e->Ecount) /* if (far *)e1 */
{
int segreg;
tym_t tym1;
e1 = e->E1;
tym1 = tybasic(e1->Ety);
/* BUG: what about pointers to functions? */
switch (tym1)
{
case TYnptr: segreg = 3<<3; break;
case TYcptr: segreg = 1<<3; break;
default: segreg = 2<<3; break;
}
if (I32 && stackalign == 2)
c = gen1(c,0x66); /* push a word */
c = gen1(c,0x06 + segreg); /* PUSH SEGREG */
if (I32 && stackalign == 2)
code_orflag(c,CFopsize); // push a word
c = genadjesp(c,stackalign);
stackpush += stackalign;
ce = params(e1,stackalign);
goto L2;
}
break;
#endif
case OPrelconst:
#if TARGET_SEGMENTED
/* Determine if we can just push the segment register */
/* Test size of type rather than TYfptr because of (long)(&v) */
s = e->EV.sp.Vsym;
//if (sytab[s->Sclass] & SCSS && !I32) // if variable is on stack
// needframe = TRUE; // then we need stack frame
if (tysize[tym] == tysize[TYfptr] &&
(fl = s->Sfl) != FLfardata &&
/* not a function that CS might not be the segment of */
(!((fl == FLfunc || s->ty() & mTYcs) &&
(s->Sclass == SCcomdat || s->Sclass == SCextern || s->Sclass == SCinline || config.wflags & WFthunk)) ||
(fl == FLfunc && config.exe == EX_DOSX)
)
)
{
stackpush += sz;
c = gen1(c,0x06 + /* PUSH SEGREG */
(((fl == FLfunc || s->ty() & mTYcs) ? 1 : segfl[fl]) << 3));
c = genadjesp(c,REGSIZE);
if (config.target_cpu >= TARGET_80286 && !e->Ecount)
{ ce = getoffset(e,STACK);
goto L2;
}
else
{ c = cat(c,offsetinreg(e,&retregs));
unsigned reg = findreg(retregs);
c = genpush(c,reg); // PUSH reg
genadjesp(c,REGSIZE);
}
goto ret;
}
if (config.target_cpu >= TARGET_80286 && !e->Ecount)
{
stackpush += sz;
if (tysize[tym] == tysize[TYfptr])
{
/* PUSH SEG e */
code *c1 = gencs(CNIL,0x68,0,FLextern,s);
c1->Iflags = CFseg;
genadjesp(c1,REGSIZE);
c = cat(c,c1);
}
ce = getoffset(e,STACK);
goto L2;
}
#endif
break; /* else must evaluate expression */
case OPvar:
L1:
if (0 && I32 && sz == 2)
{ /* 32 bit code, but pushing 16 bit values anyway */
ce = loadea(e,&cs,0xFF,6,0,0,0); /* PUSH EA */
// BUG: 0x66 fails with scheduler
ce = cat(gen1(CNIL,0x66),ce); /* 16 bit override */
stackpush += sz;
genadjesp(ce,sz);
}
else if (config.flags4 & CFG4speed &&
(config.target_cpu >= TARGET_80486 &&
config.target_cpu <= TARGET_PentiumMMX) &&
sz <= 2 * REGSIZE &&
!tyfloating(tym))
{ // Avoid PUSH MEM on the Pentium when optimizing for speed
break;
}
else
{ int regsize = REGSIZE;
unsigned flag = 0;
if (I16 && config.target_cpu >= TARGET_80386 && sz > 2 &&
!e->Ecount)
{ regsize = 4;
flag |= CFopsize;
}
ce = loadea(e,&cs,0xFF,6,sz - regsize,RMload,0); // PUSH EA+sz-2
code_orflag(ce,flag);
ce = genadjesp(ce,REGSIZE);
stackpush += sz;
while ((targ_int)(sz -= regsize) > 0)
{ ce = cat(ce,loadea(e,&cs,0xFF,6,sz - regsize,RMload,0));
code_orflag(ce,flag);
ce = genadjesp(ce,REGSIZE);
}
}
L2:
freenode(e);
c = cat(c,ce);
goto ret;
case OPconst:
{
char pushi = 0;
unsigned flag = 0;
int regsize = REGSIZE;
targ_int value;
if (tycomplex(tym))
break;
if (I64 && tyfloating(tym) && sz > 4 && boolres(e))
// Can't push 64 bit non-zero args directly
break;
if (I32 && szb == 10) // special case for long double constants
{
assert(sz == 12);
value = ((unsigned short *)&e->EV.Vldouble)[4];
stackpush += sz;
ce = genadjesp(NULL,sz);
for (int i = 2; i >= 0; i--)
{
if (reghasvalue(allregs, value, &reg))
ce = gen1(ce,0x50 + reg); // PUSH reg
else
ce = genc2(ce,0x68,0,value); // PUSH value
value = ((unsigned *)&e->EV.Vldouble)[i - 1];
}
goto L2;
}
assert(I64 || sz <= LNGDBLSIZE);
int i = sz;
if (!I16 && i == 2)
flag = CFopsize;
if (config.target_cpu >= TARGET_80286)
// && (e->Ecount == 0 || e->Ecount != e->Ecomsub))
{ pushi = 1;
if (I16 && config.target_cpu >= TARGET_80386 && i >= 4)
{ regsize = 4;
flag = CFopsize;
}
}
else if (i == REGSIZE)
break;
stackpush += sz;
ce = genadjesp(NULL,sz);
targ_uns *pi = (targ_uns *) &e->EV.Vdouble;
targ_ushort *ps = (targ_ushort *) pi;
targ_ullong *pl = (targ_ullong *)pi;
i /= regsize;
do
{
if (i) /* be careful not to go negative */
i--;
targ_size_t value = (regsize == 4) ? pi[i] : ps[i];
if (regsize == 8)
value = pl[i];
if (pushi)
{
if (I64 && regsize == 8 && value != (int)value)
{ ce = regwithvalue(ce,allregs,value,&reg,64);
goto Preg; // cannot push imm64 unless it is sign extended 32 bit value
}
if (regsize == REGSIZE && reghasvalue(allregs,value,&reg))
goto Preg;
ce = genc2(ce,(szb == 1) ? 0x6A : 0x68,0,value); // PUSH value
}
else
{
ce = regwithvalue(ce,allregs,value,&reg,0);
Preg:
ce = genpush(ce,reg); // PUSH reg
}
code_orflag(ce,flag); /* operand size */
} while (i);
goto L2;
}
default:
break;
}
retregs = tybyte(tym) ? BYTEREGS : allregs;
if (tyvector(tym))
{
retregs = XMMREGS;
c = cat(c,codelem(e,&retregs,FALSE));
stackpush += sz;
c = genadjesp(c,sz);
c = genc2(c,0x81,grex | modregrm(3,5,SP),sz); // SUB SP,sz
unsigned op = xmmstore(tym);
unsigned r = findreg(retregs);
c = gen2sib(c,op,modregxrm(0,r - XMM0,4),modregrm(0,4,SP)); // MOV [ESP],r
goto ret;
}
else if (tyfloating(tym))
{ if (config.inline8087)
{ code *c1,*c2;
unsigned op;
unsigned r;
retregs = tycomplex(tym) ? mST01 : mST0;
c = cat(c,codelem(e,&retregs,FALSE));
stackpush += sz;
c = genadjesp(c,sz);
c = genc2(c,0x81,grex | modregrm(3,5,SP),sz); // SUB SP,sz
switch (tym)
{
case TYfloat:
case TYifloat:
case TYcfloat:
op = 0xD9;
r = 3;
break;
case TYdouble:
case TYidouble:
case TYdouble_alias:
case TYcdouble:
op = 0xDD;
r = 3;
break;
case TYldouble:
case TYildouble:
case TYcldouble:
op = 0xDB;
r = 7;
break;
default:
assert(0);
}
if (!I16)
{
c1 = NULL;
c2 = NULL;
if (tycomplex(tym))
{
// FSTP sz/2[ESP]
c2 = genc1(CNIL,op,(modregrm(0,4,SP) << 8) | modregxrm(2,r,4),FLconst,sz/2);
pop87();
}
pop87();
c2 = gen2sib(c2,op,modregrm(0,r,4),modregrm(0,4,SP)); // FSTP [ESP]
}
else
{
retregs = IDXREGS; /* get an index reg */
c1 = allocreg(&retregs,&reg,TYoffset);
c1 = genregs(c1,0x89,SP,reg); /* MOV reg,SP */
pop87();
c2 = gen2(CNIL,op,modregrm(0,r,regtorm[reg])); // FSTP [reg]
}
if (LARGEDATA)
c2->Iflags |= CFss; /* want to store into stack */
genfwait(c2); // FWAIT
c = cat3(c,c1,c2);
goto ret;
}
else if (I16 && (tym == TYdouble || tym == TYdouble_alias))
retregs = mSTACK;
}
#if LONGLONG
else if (I16 && sz == 8) // if long long
retregs = mSTACK;
#endif
c = cat(c,scodelem(e,&retregs,0,TRUE));
if (retregs != mSTACK) /* if stackpush not already inc'd */
stackpush += sz;
if (sz <= REGSIZE)
{
c = genpush(c,findreg(retregs)); // PUSH reg
genadjesp(c,REGSIZE);
}
else if (sz == REGSIZE * 2)
{ c = genpush(c,findregmsw(retregs)); // PUSH msreg
genpush(c,findreglsw(retregs)); // PUSH lsreg
genadjesp(c,sz);
}
ret:
return cat(cp,c);
}
/*******************************
* Get offset portion of e, and store it in an index
* register. Return mask of index register in *pretregs.
*/
code *offsetinreg( elem *e, regm_t *pretregs)
{ regm_t retregs;
code *c;
unsigned reg;
retregs = mLSW; /* want only offset */
if (e->Ecount && e->Ecount != e->Ecomsub)
{ unsigned i;
regm_t rm;
rm = retregs & regcon.cse.mval & ~regcon.cse.mops & ~regcon.mvar; /* possible regs */
for (i = 0; rm; i++)
{ if (mask[i] & rm && regcon.cse.value[i] == e)
{ reg = i;
*pretregs = mask[i];
c = getregs(*pretregs);
goto L3;
}
rm &= ~mask[i];
}
}
*pretregs = retregs;
c = allocreg(pretregs,&reg,TYoffset);
c = cat(c,getoffset(e,reg));
L3:
cssave(e,*pretregs,FALSE);
freenode(e);
return c;
}
/******************************
* Generate code to load data into registers.
*/
code *loaddata(elem *e,regm_t *pretregs)
{ unsigned reg,nreg,op,sreg;
tym_t tym;
int sz;
code *c,*ce,cs;
regm_t flags,forregs,regm;
#ifdef DEBUG
if (debugw)
printf("loaddata(e = %p,*pretregs = %s)\n",e,regm_str(*pretregs));
//elem_print(e);
#endif
assert(e);
elem_debug(e);
if (*pretregs == 0)
return CNIL;
tym = tybasic(e->Ety);
if (tym == TYstruct)
return cdrelconst(e,pretregs);
if (tyfloating(tym))
{ obj_fltused();
if (config.inline8087)
{ if (*pretregs & mST0)
return load87(e,0,pretregs,NULL,-1);
else if (tycomplex(tym))
return cload87(e, pretregs);
}
}
sz = tysize[tym];
cs.Iflags = 0;
cs.Irex = 0;
if (*pretregs == mPSW)
{
regm = allregs;
if (e->Eoper == OPconst)
{ /* TRUE: OR SP,SP (SP is never 0) */
/* FALSE: CMP SP,SP (always equal) */
c = genregs(CNIL,(boolres(e)) ? 0x09 : 0x39,SP,SP);
if (I64)
code_orrex(c, REX_W);
}
else if (sz <= REGSIZE)
{
if (!I16 && (tym == TYfloat || tym == TYifloat))
{ c = allocreg(&regm,&reg,TYoffset); /* get a register */
ce = loadea(e,&cs,0x8B,reg,0,0,0); // MOV reg,data
c = cat(c,ce);
ce = gen2(CNIL,0xD1,modregrmx(3,4,reg)); /* SHL reg,1 */
c = cat(c,ce);
}
#if TARGET_OSX
else if (e->Eoper == OPvar && movOnly(e))
{ c = allocreg(&regm,&reg,TYoffset); /* get a register */
ce = loadea(e,&cs,0x8B,reg,0,0,0); // MOV reg,data
c = cat(c,ce);
ce = fixresult(e,regm,pretregs);
c = cat(c,ce);
}
#endif
else
{ cs.IFL2 = FLconst;
cs.IEV2.Vsize_t = 0;
op = (sz == 1) ? 0x80 : 0x81;
c = loadea(e,&cs,op,7,0,0,0); /* CMP EA,0 */
// Convert to TEST instruction if EA is a register
// (to avoid register contention on Pentium)
if ((c->Iop & ~1) == 0x38 &&
(c->Irm & modregrm(3,0,0)) == modregrm(3,0,0)
)
{ c->Iop = (c->Iop & 1) | 0x84;
code_newreg(c, c->Irm & 7);
if (c->Irex & REX_B)
//c->Irex = (c->Irex & ~REX_B) | REX_R;
c->Irex |= REX_R;
}
}
}
else if (sz < 8)
{
c = allocreg(&regm,&reg,TYoffset); /* get a register */
if (I32) // it's a 48 bit pointer
ce = loadea(e,&cs,0x0FB7,reg,REGSIZE,0,0); /* MOVZX reg,data+4 */
else
{ ce = loadea(e,&cs,0x8B,reg,REGSIZE,0,0); /* MOV reg,data+2 */
if (tym == TYfloat || tym == TYifloat) // dump sign bit
gen2(ce,0xD1,modregrm(3,4,reg)); /* SHL reg,1 */
}
c = cat(c,ce);
ce = loadea(e,&cs,0x0B,reg,0,regm,0); /* OR reg,data */
c = cat(c,ce);
}
else if (sz == 8 || (I64 && sz == 2 * REGSIZE && !tyfloating(tym)))
{
c = allocreg(&regm,&reg,TYoffset); /* get a register */
int i = sz - REGSIZE;
ce = loadea(e,&cs,0x8B,reg,i,0,0); /* MOV reg,data+6 */
if (tyfloating(tym)) // TYdouble or TYdouble_alias
gen2(ce,0xD1,modregrm(3,4,reg)); // SHL reg,1
c = cat(c,ce);
while ((i -= REGSIZE) >= 0)
{
code *c1 = loadea(e,&cs,0x0B,reg,i,regm,0); // OR reg,data+i
if (i == 0)
c1->Iflags |= CFpsw; // need the flags on last OR
c = cat(c,c1);
}
}
else if (sz == tysize[TYldouble]) // TYldouble
return load87(e,0,pretregs,NULL,-1);
else
{
#ifdef DEBUG
elem_print(e);
#endif
assert(0);
}
return c;
}
/* not for flags only */
flags = *pretregs & mPSW; /* save original */
forregs = *pretregs & (mBP | ALLREGS | mES | XMMREGS);
if (*pretregs & mSTACK)
forregs |= DOUBLEREGS;
if (e->Eoper == OPconst)
{
targ_size_t value = e->EV.Vint;
if (sz == 8)
value = e->EV.Vullong;
if (sz == REGSIZE && reghasvalue(forregs,value,&reg))
forregs = mask[reg];
regm_t save = regcon.immed.mval;
c = allocreg(&forregs,&reg,tym); /* allocate registers */
regcon.immed.mval = save; // KLUDGE!
if (sz <= REGSIZE)
{
if (sz == 1)
flags |= 1;
else if (!I16 && sz == SHORTSIZE &&
!(mask[reg] & regcon.mvar) &&
!(config.flags4 & CFG4speed)
)
flags |= 2;
if (sz == 8)
flags |= 64;
if (reg >= XMM0)
{ /* This comes about because 0, 1, pi, etc., constants don't get stored
* in the data segment, because they are x87 opcodes.
* Not so efficient. We should at least do a PXOR for 0.
*/
unsigned r;
targ_size_t value = e->EV.Vuns;
if (sz == 8)
value = e->EV.Vullong;
ce = regwithvalue(CNIL,ALLREGS,value,&r,flags);
flags = 0; // flags are already set
ce = genfltreg(ce,0x89,r,0); // MOV floatreg,r
if (sz == 8)
code_orrex(ce, REX_W);
assert(sz == 4 || sz == 8); // float or double
unsigned op = xmmload(tym);
ce = genfltreg(ce,op,reg - XMM0,0); // MOVSS/MOVSD XMMreg,floatreg
}
else
{ ce = movregconst(CNIL,reg,value,flags);
flags = 0; // flags are already set
}
}
else if (sz < 8) // far pointers, longs for 16 bit targets
{
targ_int msw,lsw;
regm_t mswflags;
msw = I32 ? e->EV.Vfp.Vseg
: (e->EV.Vulong >> 16);
lsw = e->EV.Vfp.Voff;
mswflags = 0;
if (forregs & mES)
{
ce = movregconst(CNIL,reg,msw,0); // MOV reg,segment
genregs(ce,0x8E,0,reg); // MOV ES,reg
msw = lsw; // MOV reg,offset
}
else
{
sreg = findreglsw(forregs);
ce = movregconst(CNIL,sreg,lsw,0);
reg = findregmsw(forregs);
/* Decide if we need to set flags when we load msw */
if (flags && (msw && msw|lsw || !(msw|lsw)))
{ mswflags = mPSW;
flags = 0;
}
}
ce = movregconst(ce,reg,msw,mswflags);
}
else if (sz == 8)
{
if (I32)
{
targ_long *p = (targ_long *) &e->EV.Vdouble;
if (reg >= XMM0)
{ /* This comes about because 0, 1, pi, etc., constants don't get stored
* in the data segment, because they are x87 opcodes.
* Not so efficient. We should at least do a PXOR for 0.
*/
unsigned r;
regm_t rm = ALLREGS;
ce = allocreg(&rm,&r,TYint); // allocate scratch register
ce = movregconst(ce,r,p[0],0);
ce = genfltreg(ce,0x89,r,0); // MOV floatreg,r
ce = movregconst(ce,r,p[1],0);
ce = genfltreg(ce,0x89,r,4); // MOV floatreg+4,r
unsigned op = xmmload(tym);
ce = genfltreg(ce,op,reg - XMM0,0); // MOVSS/MOVSD XMMreg,floatreg
}
else
{
ce = movregconst(CNIL,findreglsw(forregs),p[0],0);
ce = movregconst(ce,findregmsw(forregs),p[1],0);
}
}
else
{ targ_short *p = (targ_short *) &e->EV.Vdouble;
assert(reg == AX);
ce = movregconst(CNIL,AX,p[3],0); /* MOV AX,p[3] */
ce = movregconst(ce,DX,p[0],0);
ce = movregconst(ce,CX,p[1],0);
ce = movregconst(ce,BX,p[2],0);
}
}
else if (I64 && sz == 16)
{
ce = movregconst(CNIL,findreglsw(forregs),e->EV.Vcent.lsw,0);
ce = movregconst(ce,findregmsw(forregs),e->EV.Vcent.msw,0);
}
else
assert(0);
c = cat(c,ce);
}
else
{
// See if we can use register that parameter was passed in
if (regcon.params && e->EV.sp.Vsym->Sclass == SCfastpar &&
regcon.params & mask[e->EV.sp.Vsym->Spreg] &&
!(e->Eoper == OPvar && e->EV.sp.Voffset > 0) && // Must be at the base of that variable
sz <= REGSIZE) // make sure no 'paint' to a larger size happened
{
reg = e->EV.sp.Vsym->Spreg;
forregs = mask[reg];
mfuncreg &= ~forregs;
regcon.used |= forregs;
return fixresult(e,forregs,pretregs);
}
c = allocreg(&forregs,&reg,tym); /* allocate registers */
if (sz == 1)
{ regm_t nregm;
#ifdef DEBUG
if (!(forregs & BYTEREGS))
{ elem_print(e);
printf("forregs = x%x\n",forregs);
}
#endif
int op = 0x8A; // byte MOV
#if TARGET_OSX
if (movOnly(e))
op = 0x8B;
#endif
assert(forregs & BYTEREGS);
if (!I16)
c = cat(c,loadea(e,&cs,op,reg,0,0,0)); // MOV regL,data
else
{ nregm = tyuns(tym) ? BYTEREGS : mAX;
if (*pretregs & nregm)
nreg = reg; /* already allocated */
else
c = cat(c,allocreg(&nregm,&nreg,tym));
ce = loadea(e,&cs,op,nreg,0,0,0); /* MOV nregL,data */
c = cat(c,ce);
if (reg != nreg)
{ genmovreg(c,reg,nreg); /* MOV reg,nreg */
cssave(e,mask[nreg],FALSE);
}
}
}
else if (forregs & XMMREGS)
{
// Can't load from registers directly to XMM regs
//printf("test2 %s\n", e->EV.sp.Vsym->Sident);
//e->EV.sp.Vsym->Sflags &= ~GTregcand;
op = xmmload(tym);
if (e->Eoper == OPvar)
{ symbol *s = e->EV.sp.Vsym;
if (s->Sfl == FLreg && !(mask[s->Sreglsw] & XMMREGS))
{ op = LODD; // MOVD/MOVQ
/* getlvalue() will unwind this and unregister s; could use a better solution */
}
}
ce = loadea(e,&cs,op,reg,0,RMload,0); // MOVSS/MOVSD reg,data
c = cat(c,ce);
}
else if (sz <= REGSIZE)
{
ce = loadea(e,&cs,0x8B,reg,0,RMload,0); // MOV reg,data
c = cat(c,ce);
}
else if (sz <= 2 * REGSIZE && forregs & mES)
{
ce = loadea(e,&cs,0xC4,reg,0,0,mES); /* LES data */
c = cat(c,ce);
}
else if (sz <= 2 * REGSIZE)
{
if (I32 && sz == 8 &&
(*pretregs & (mSTACK | mPSW)) == mSTACK)
{ int i;
assert(0);
/* Note that we allocreg(DOUBLEREGS) needlessly */
stackchanged = 1;
i = DOUBLESIZE - REGSIZE;
do
{ c = cat(c,loadea(e,&cs,0xFF,6,i,0,0)); /* PUSH EA+i */
c = genadjesp(c,REGSIZE);
stackpush += REGSIZE;
i -= REGSIZE;
}
while (i >= 0);
return c;
}
reg = findregmsw(forregs);
ce = loadea(e,&cs,0x8B,reg,REGSIZE,forregs,0); /* MOV reg,data+2 */
if (I32 && sz == REGSIZE + 2)
ce->Iflags |= CFopsize; /* seg is 16 bits */
c = cat(c,ce);
reg = findreglsw(forregs);
ce = loadea(e,&cs,0x8B,reg,0,forregs,0);
c = cat(c,ce);
}
else if (sz >= 8)
{
code *c1,*c2,*c3;
assert(!I32);
if ((*pretregs & (mSTACK | mPSW)) == mSTACK)
{ int i;
/* Note that we allocreg(DOUBLEREGS) needlessly */
stackchanged = 1;
i = sz - REGSIZE;
do
{ c = cat(c,loadea(e,&cs,0xFF,6,i,0,0)); /* PUSH EA+i */
c = genadjesp(c,REGSIZE);
stackpush += REGSIZE;
i -= REGSIZE;
}
while (i >= 0);
return c;
}
else
{
assert(reg == AX);
ce = loadea(e,&cs,0x8B,AX,6,0,0); /* MOV AX,data+6 */
c1 = loadea(e,&cs,0x8B,BX,4,mAX,0); /* MOV BX,data+4 */
c2 = loadea(e,&cs,0x8B,CX,2,mAX|mBX,0); /* MOV CX,data+2 */
c3 = loadea(e,&cs,0x8B,DX,0,mAX|mCX|mCX,0); /* MOV DX,data */
c = cat6(c,ce,c1,c2,c3,CNIL);
}
}
else
assert(0);
}
/* Flags may already be set */
*pretregs &= flags | ~mPSW;
c = cat(c,fixresult(e,forregs,pretregs));
return c;
}
#endif // SPP