// Copyright (C) 1984-1998 by Symantec // Copyright (C) 2000-2012 by Digital Mars // All Rights Reserved // http://www.digitalmars.com // Written by Walter Bright /* * This source file is made available for personal use * only. The license is in /dmd/src/dmd/backendlicense.txt * or /dm/src/dmd/backendlicense.txt * For any other uses, please contact Digital Mars. */ #if !SPP #include #include #include #include #if __sun&&__SVR4 || _MSC_VER #include #endif #include "cc.h" #include "el.h" #include "oper.h" #include "code.h" #include "global.h" #include "type.h" #include "xmm.h" static char __file__[] = __FILE__; /* for tassert.h */ #include "tassert.h" /* Generate the appropriate ESC instruction */ #define ESC(MF,b) (0xD8 + ((MF) << 1) + (b)) enum MF { // Values for MF MFfloat = 0, MFlong = 1, MFdouble = 2, MFword = 3 }; code * genf2(code *c,unsigned op,unsigned rm); targ_size_t paramsize(elem *e,unsigned stackalign); STATIC code * funccall (elem *,unsigned,unsigned,regm_t *,regm_t); /* array to convert from index register to r/m field */ /* AX CX DX BX SP BP SI DI */ static const signed char regtorm32[8] = { 0, 1, 2, 3,-1, 5, 6, 7 }; signed char regtorm [8] = { -1,-1,-1, 7,-1, 6, 4, 5 }; /************************** * Determine if e is a 32 bit scaled index addressing mode. * Returns: * 0 not a scaled index addressing mode * !=0 the value for ss in the SIB byte */ int isscaledindex(elem *e) { targ_uns ss; assert(!I16); while (e->Eoper == OPcomma) e = e->E2; if (!(e->Eoper == OPshl && !e->Ecount && e->E2->Eoper == OPconst && (ss = e->E2->EV.Vuns) <= 3 ) ) ss = 0; return ss; } /********************************************* * Generate code for which isscaledindex(e) returned a non-zero result. */ code *cdisscaledindex(elem *e,regm_t *pidxregs,regm_t keepmsk) { code *c; regm_t r; // Load index register with result of e->E1 c = NULL; while (e->Eoper == OPcomma) { r = 0; c = cat(c,scodelem(e->E1,&r,keepmsk,TRUE)); freenode(e); e = e->E2; } assert(e->Eoper == OPshl); c = cat(c,scodelem(e->E1,pidxregs,keepmsk,TRUE)); freenode(e->E2); freenode(e); return c; } /*********************************** * Determine index if we can do two LEA instructions as a multiply. * Returns: * 0 can't do it */ static struct Ssindex { targ_uns product; char ss1; char ss2; char ssflags; #define SSFLnobp 1 // can't have EBP in relconst #define SSFLnobase1 2 // no base register for first LEA #define SSFLnobase 4 // no base register #define SSFLlea 8 // can do it in one LEA } ssindex_array[] = { {0, 0,0}, // [0] is a place holder {3, 1,0,SSFLnobp | SSFLlea}, {5, 2,0,SSFLnobp | SSFLlea}, {9, 3,0,SSFLnobp | SSFLlea}, {6, 1,1,SSFLnobase}, {12,1,2,SSFLnobase}, {24,1,3,SSFLnobase}, {10,2,1,SSFLnobase}, {20,2,2,SSFLnobase}, {40,2,3,SSFLnobase}, {18,3,1,SSFLnobase}, {36,3,2,SSFLnobase}, {72,3,3,SSFLnobase}, {15,2,1,SSFLnobp}, {25,2,2,SSFLnobp}, {27,3,1,SSFLnobp}, {45,3,2,SSFLnobp}, {81,3,3,SSFLnobp}, {16,3,1,SSFLnobase1 | SSFLnobase}, {32,3,2,SSFLnobase1 | SSFLnobase}, {64,3,3,SSFLnobase1 | SSFLnobase}, }; int ssindex(int op,targ_uns product) { int i; if (op == OPshl) product = 1 << product; for (i = 1; i < arraysize(ssindex_array); i++) { if (ssindex_array[i].product == product) return i; } return 0; } /*************************************** * Build an EA of the form disp[base][index*scale]. * Input: * c struct to fill in * base base register (-1 if none) * index index register (-1 if none) * scale scale factor - 1,2,4,8 * disp displacement */ void buildEA(code *c,int base,int index,int scale,targ_size_t disp) { unsigned char rm; unsigned char sib; unsigned char rex = 0; sib = 0; if (!I16) { unsigned ss; assert(index != SP); switch (scale) { case 1: ss = 0; break; case 2: ss = 1; break; case 4: ss = 2; break; case 8: ss = 3; break; default: assert(0); } if (base == -1) { if (index == -1) rm = modregrm(0,0,5); else { rm = modregrm(0,0,4); sib = modregrm(ss,index & 7,5); if (index & 8) rex |= REX_X; } } else if (index == -1) { if (base == SP) { rm = modregrm(2,0,4); sib = modregrm(0,4,SP); } else { rm = modregrm(2,0,base & 7); if (base & 8) { rex |= REX_B; if (base == R12) { rm = modregrm(2,0,4); sib = modregrm(0,4,4); } } } } else { rm = modregrm(2,0,4); sib = modregrm(ss,index & 7,base & 7); if (index & 8) rex |= REX_X; if (base & 8) rex |= REX_B; } } else { // -1 AX CX DX BX SP BP SI DI static unsigned char EA16rm[9][9] = { { 0x06,0x09,0x09,0x09,0x87,0x09,0x86,0x84,0x85, }, // -1 { 0x09,0x09,0x09,0x09,0x09,0x09,0x09,0x09,0x09, }, // AX { 0x09,0x09,0x09,0x09,0x09,0x09,0x09,0x09,0x09, }, // CX { 0x09,0x09,0x09,0x09,0x09,0x09,0x09,0x09,0x09, }, // DX { 0x87,0x09,0x09,0x09,0x09,0x09,0x09,0x80,0x81, }, // BX { 0x09,0x09,0x09,0x09,0x09,0x09,0x09,0x09,0x09, }, // SP { 0x86,0x09,0x09,0x09,0x09,0x09,0x09,0x82,0x83, }, // BP { 0x84,0x09,0x09,0x09,0x80,0x09,0x82,0x09,0x09, }, // SI { 0x85,0x09,0x09,0x09,0x81,0x09,0x83,0x09,0x09, } // DI }; assert(scale == 1); rm = EA16rm[base + 1][index + 1]; assert(rm != 9); } c->Irm = rm; c->Isib = sib; c->Irex = rex; c->IFL1 = FLconst; c->IEV1.Vuns = disp; } /********************************************* * Build REX, modregrm and sib bytes */ unsigned buildModregrm(int mod, int reg, int rm) { unsigned m; if (I16) m = modregrm(mod, reg, rm); else { if ((rm & 7) == SP && mod != 3) m = (modregrm(0,4,SP) << 8) | modregrm(mod,reg & 7,4); else m = modregrm(mod,reg & 7,rm & 7); if (reg & 8) m |= REX_R << 16; if (rm & 8) m |= REX_B << 16; } return m; } /**************************************** * Generate code for eecontext */ void genEEcode() { regm_t retregs; code *c; eecontext.EEin++; regcon.immed.mval = 0; retregs = 0; //regmask(eecontext.EEelem->Ety); assert(EEoffset >= REGSIZE); c = genc2(NULL,0x81,modregrm(3,5,SP),EEoffset - REGSIZE); // SUB ESP,EEoffset gen1(c,0x50 + SI); // PUSH ESI genadjesp(c,EEoffset); c = gencodelem(c,eecontext.EEelem,&retregs, FALSE); assignaddrc(c); pinholeopt(c,NULL); jmpaddr(c); eecontext.EEcode = gen1(c,0xCC); // INT 3 eecontext.EEin--; } /******************************************** * Gen a save/restore sequence for mask of registers. */ void gensaverestore2(regm_t regm,code **csave,code **crestore) { code *cs1 = *csave; code *cs2 = *crestore; //printf("gensaverestore2(%s)\n", regm_str(regm)); regm &= mBP | mES | ALLREGS | XMMREGS | mST0 | mST01; for (int i = 0; regm; i++) { if (regm & 1) { if (i == ES) { cs1 = gen1(cs1, 0x06); // PUSH ES cs2 = cat(gen1(CNIL, 0x07),cs2); // POP ES } else if (i == ST0 || i == ST01) { gensaverestore87(1 << i, &cs1, &cs2); } else if (i >= XMM0) { unsigned idx; cs1 = regsave.save(cs1, i, &idx); cs2 = regsave.restore(cs2, i, idx); } else { cs1 = gen1(cs1,0x50 + (i & 7)); // PUSH i code *c = gen1(NULL, 0x58 + (i & 7)); // POP i if (i & 8) { code_orrex(cs1, REX_B); code_orrex(c, REX_B); } cs2 = cat(c,cs2); } } regm >>= 1; } *csave = cs1; *crestore = cs2; } void gensaverestore(regm_t regm,code **csave,code **crestore) { *csave = NULL; *crestore = NULL; gensaverestore2(regm, csave, crestore); } /**************************************** * Clean parameters off stack. * Input: * numpara amount to adjust stack pointer * keepmsk mask of registers to not destroy */ code *genstackclean(code *c,unsigned numpara,regm_t keepmsk) { //dbg_printf("genstackclean(numpara = %d, stackclean = %d)\n",numpara,cgstate.stackclean); if (numpara && (cgstate.stackclean || STACKALIGN == 16)) { #if 0 // won't work if operand of scodelem if (numpara == stackpush && // if this is all those pushed needframe && // and there will be a BP !config.windows && !(regcon.mvar & fregsaved) // and no registers will be pushed ) c = genregs(c,0x89,BP,SP); // MOV SP,BP else #endif { regm_t scratchm = 0; if (numpara == REGSIZE && config.flags4 & CFG4space) { scratchm = ALLREGS & ~keepmsk & regcon.used & ~regcon.mvar; } if (scratchm) { unsigned r; c = cat(c,allocreg(&scratchm,&r,TYint)); c = gen1(c,0x58 + r); // POP r } else { c = genc2(c,0x81,modregrm(3,0,SP),numpara); // ADD SP,numpara if (I64) code_orrex(c, REX_W); } } stackpush -= numpara; c = genadjesp(c,-numpara); } return c; } /********************************* * Generate code for a logical expression. * Input: * e elem * jcond * bit 1 if TRUE then goto jump address if e * if FALSE then goto jump address if !e * 2 don't call save87() * fltarg FLcode or FLblock, flavor of target if e evaluates to jcond * targ either code or block pointer to destination */ code *logexp(elem *e,int jcond,unsigned fltarg,code *targ) { code *c,*ce,*cnop; regm_t retregs; unsigned op; //printf("logexp(e = %p, jcond = %d)\n", e, jcond); int no87 = (jcond & 2) == 0; _chkstack(); code *cc = docommas(&e); // scan down commas cgstate.stackclean++; if (EOP(e) && !e->Ecount) /* if operator and not common sub */ { con_t regconsave; switch (e->Eoper) { case OPoror: if (jcond & 1) { c = logexp(e->E1,jcond,fltarg,targ); regconsave = regcon; ce = logexp(e->E2,jcond,fltarg,targ); } else { cnop = gennop(CNIL); c = logexp(e->E1,jcond | 1,FLcode,cnop); regconsave = regcon; ce = logexp(e->E2,jcond,fltarg,targ); ce = cat(ce,cnop); } cnop = CNIL; goto L1; case OPandand: if (jcond & 1) { cnop = gennop(CNIL); /* a dummy target address */ c = logexp(e->E1,jcond & ~1,FLcode,cnop); regconsave = regcon; ce = logexp(e->E2,jcond,fltarg,targ); } else { c = logexp(e->E1,jcond,fltarg,targ); regconsave = regcon; ce = logexp(e->E2,jcond,fltarg,targ); cnop = CNIL; } L1: andregcon(®consave); freenode(e); c = cat4(cc,c,ce,cnop); goto Lret; case OPnot: jcond ^= 1; case OPbool: case OPs8_16: case OPu8_16: case OPs16_32: case OPu16_32: case OPs32_64: case OPu32_64: case OPu32_d: case OPd_ld: c = logexp(e->E1,jcond,fltarg,targ); freenode(e); goto Lretc; case OPcond: { code *cnop2 = gennop(CNIL); // addresses of start of leaves cnop = gennop(CNIL); c = logexp(e->E1,FALSE,FLcode,cnop2); /* eval condition */ con_t regconold = regcon; ce = logexp(e->E2->E1,jcond,fltarg,targ); ce = genjmp(ce,JMP,FLcode,(block *) cnop); /* skip second leaf */ regconsave = regcon; regcon = regconold; code_next(cnop2) = logexp(e->E2->E2,jcond,fltarg,targ); andregcon(®conold); andregcon(®consave); freenode(e->E2); freenode(e); c = cat6(cc,c,NULL,ce,cnop2,cnop); goto Lret; } } } /* Special code for signed long compare. * Not necessary for I64 until we do cents. */ if (OTrel2(e->Eoper) && /* if < <= >= > */ !e->Ecount && ( (I16 && tybasic(e->E1->Ety) == TYlong && tybasic(e->E2->Ety) == TYlong) || (I32 && tybasic(e->E1->Ety) == TYllong && tybasic(e->E2->Ety) == TYllong)) ) { c = longcmp(e,jcond,fltarg,targ); goto Lretc; } retregs = mPSW; /* return result in flags */ op = jmpopcode(e); /* get jump opcode */ if (!(jcond & 1)) op ^= 0x101; // toggle jump condition(s) c = codelem(e,&retregs,TRUE); /* evaluate elem */ if (no87) c = cat(c,cse_flush(no87)); // flush CSE's to memory genjmp(c,op,fltarg,(block *) targ); /* generate jmp instruction */ Lretc: c = cat(cc,c); Lret: cgstate.stackclean--; return c; } /****************************** * Routine to aid in setting things up for gen(). * Look for common subexpression. * Can handle indirection operators, but not if they're common subs. * Input: * e -> elem where we get some of the data from * cs -> partially filled code to add * op = opcode * reg = reg field of (mod reg r/m) * offset = data to be added to Voffset field * keepmsk = mask of registers we must not destroy * desmsk = mask of registers destroyed by executing the instruction * Returns: * pointer to code generated */ code *loadea(elem *e,code *cs,unsigned op,unsigned reg,targ_size_t offset, regm_t keepmsk,regm_t desmsk) { code *c,*cg,*cd; #ifdef DEBUG if (debugw) printf("loadea: e=%p cs=%p op=x%x reg=%d offset=%lld keepmsk=x%x desmsk=x%x\n", e,cs,op,reg,(unsigned long long)offset,keepmsk,desmsk); #endif assert(e); cs->Iflags = 0; cs->Irex = 0; cs->Iop = op; tym_t tym = e->Ety; int sz = tysize(tym); /* Determine if location we want to get is in a register. If so, */ /* substitute the register for the EA. */ /* Note that operators don't go through this. CSE'd operators are */ /* picked up by comsub(). */ if (e->Ecount && /* if cse */ e->Ecount != e->Ecomsub && /* and cse was generated */ op != 0x8D && op != 0xC4 && /* and not an LEA or LES */ (op != 0xFF || reg != 3) && /* and not CALLF MEM16 */ (op & 0xFFF8) != 0xD8) // and not 8087 opcode { assert(!EOP(e)); /* can't handle this */ regm_t rm = regcon.cse.mval & ~regcon.cse.mops & ~regcon.mvar; // possible regs if (sz > REGSIZE) // value is in 2 or 4 registers { if (I16 && sz == 8) // value is in 4 registers { static regm_t rmask[4] = { mDX,mCX,mBX,mAX }; rm &= rmask[offset >> 1]; } else if (offset) rm &= mMSW; /* only high words */ else rm &= mLSW; /* only low words */ } for (unsigned i = 0; rm; i++) { if (mask[i] & rm) { if (regcon.cse.value[i] == e && // if register has elem /* watch out for a CWD destroying DX */ !(i == DX && op == 0xF7 && desmsk & mDX)) { /* if ES, then it can only be a load */ if (i == ES) { if (op != 0x8B) goto L1; /* not a load */ cs->Iop = 0x8C; /* MOV reg,ES */ cs->Irm = modregrm(3,0,reg & 7); if (reg & 8) code_orrex(cs, REX_B); } else // XXX reg,i { cs->Irm = modregrm(3,reg & 7,i & 7); if (reg & 8) cs->Irex |= REX_R; if (i & 8) cs->Irex |= REX_B; if (sz == 1 && I64 && (i >= 4 || reg >= 4)) cs->Irex |= REX; if (I64 && (sz == 8 || sz == 16)) cs->Irex |= REX_W; } c = CNIL; goto L2; } rm &= ~mask[i]; } } } L1: c = getlvalue(cs,e,keepmsk); if (offset == REGSIZE) getlvalue_msw(cs); else cs->IEVoffset1 += offset; if (I64) { if (reg >= 4 && sz == 1) // if byte register // Can only address those 8 bit registers if a REX byte is present cs->Irex |= REX; if ((op & 0xFFFFFFF8) == 0xD8) cs->Irex &= ~REX_W; // not needed for x87 ops } code_newreg(cs, reg); // OR in reg field if (!I16) { if (reg == 6 && op == 0xFF || /* don't PUSH a word */ op == 0x0FB7 || op == 0x0FBF || /* MOVZX/MOVSX */ (op & 0xFFF8) == 0xD8 || /* 8087 instructions */ op == 0x8D) /* LEA */ { cs->Iflags &= ~CFopsize; if (reg == 6 && op == 0xFF) // if PUSH cs->Irex &= ~REX_W; // REX is ignored for PUSH anyway } } else if ((op & 0xFFF8) == 0xD8 && ADDFWAIT()) cs->Iflags |= CFwait; L2: cg = getregs(desmsk); /* save any regs we destroy */ /* KLUDGE! fix up DX for divide instructions */ cd = CNIL; if (op == 0xF7 && desmsk == (mAX|mDX)) /* if we need to fix DX */ { if (reg == 7) /* if IDIV */ { cd = gen1(cd,0x99); // CWD if (I64 && sz == 8) code_orrex(cd, REX_W); } else if (reg == 6) // if DIV { cd = genregs(cd,0x33,DX,DX); // XOR DX,DX if (I64 && sz == 8) code_orrex(cd, REX_W); } } // Eliminate MOV reg,reg if ((cs->Iop & ~3) == 0x88 && (cs->Irm & 0xC7) == modregrm(3,0,reg & 7)) { unsigned r = cs->Irm & 7; if (cs->Irex & REX_B) r |= 8; if (r == reg) cs->Iop = NOP; } return cat4(c,cg,cd,gen(NULL,cs)); } /************************** * Get addressing mode. */ unsigned getaddrmode(regm_t idxregs) { unsigned mode; if (I16) { mode = (idxregs & mBX) ? modregrm(2,0,7) : /* [BX] */ (idxregs & mDI) ? modregrm(2,0,5): /* [DI] */ (idxregs & mSI) ? modregrm(2,0,4): /* [SI] */ (assert(0),1); } else { unsigned reg = findreg(idxregs & (ALLREGS | mBP)); if (reg == R12) mode = (REX_B << 16) | (modregrm(0,4,4) << 8) | modregrm(2,0,4); else mode = modregrmx(2,0,reg); } return mode; } void setaddrmode(code *c, regm_t idxregs) { unsigned mode = getaddrmode(idxregs); c->Irm = mode & 0xFF; c->Isib = mode >> 8; c->Irex &= ~REX_B; c->Irex |= mode >> 16; } /********************************************** */ void getlvalue_msw(code *c) { if (c->IFL1 == FLreg) { unsigned regmsw = c->IEVsym1->Sregmsw; c->Irm = (c->Irm & ~7) | (regmsw & 7); if (regmsw & 8) c->Irex |= REX_B; else c->Irex &= ~REX_B; } else c->IEVoffset1 += REGSIZE; } /********************************************** */ void getlvalue_lsw(code *c) { if (c->IFL1 == FLreg) { unsigned reglsw = c->IEVsym1->Sreglsw; c->Irm = (c->Irm & ~7) | (reglsw & 7); if (reglsw & 8) c->Irex |= REX_B; else c->Irex &= ~REX_B; } else c->IEVoffset1 -= REGSIZE; } /****************** * Compute addressing mode. * Generate & return sequence of code (if any). * Return in cs the info on it. * Input: * pcs -> where to store data about addressing mode * e -> the lvalue elem * keepmsk mask of registers we must not destroy or use * if (keepmsk & RMstore), this will be only a store operation * into the lvalue * if (keepmsk & RMload), this will be a read operation only */ code *getlvalue(code *pcs,elem *e,regm_t keepmsk) { regm_t idxregs; unsigned fl,f,opsave; code *c; elem *e1; elem *e11; elem *e12; bool e1isadd,e1free; unsigned reg; tym_t e1ty; symbol *s; //printf("getlvalue(e = %p)\n",e); //elem_print(e); assert(e); elem_debug(e); if (e->Eoper == OPvar || e->Eoper == OPrelconst) { s = e->EV.sp.Vsym; fl = s->Sfl; if (tyfloating(s->ty())) obj_fltused(); } else fl = FLoper; pcs->IFL1 = fl; pcs->Iflags = CFoff; /* only want offsets */ pcs->Irex = 0; pcs->IEVoffset1 = 0; tym_t ty = e->Ety; unsigned sz = tysize(ty); if (tyfloating(ty)) obj_fltused(); if (I64 && (sz == 8 || sz == 16)) pcs->Irex |= REX_W; if (!I16 && sz == SHORTSIZE) pcs->Iflags |= CFopsize; if (ty & mTYvolatile) pcs->Iflags |= CFvolatile; c = CNIL; switch (fl) { #if 0 && TARGET_LINUX case FLgot: case FLgotoff: gotref = 1; pcs->IEVsym1 = s; pcs->IEVoffset1 = e->EV.sp.Voffset; if (e->Eoper == OPvar && fl == FLgot) { code *c1; unsigned saveop = pcs->Iop; idxregs = allregs & ~keepmsk; // get a scratch register c = allocreg(&idxregs,®,TYptr); pcs->Irm = modregrm(2,reg,BX); // BX has GOT pcs->Isib = 0; //pcs->Iflags |= CFvolatile; pcs->Iop = 0x8B; c = gen(c,pcs); // MOV reg,disp[EBX] pcs->Irm = modregrm(0,0,reg); pcs->IEVoffset1 = 0; pcs->Iop = saveop; } else { pcs->Irm = modregrm(2,0,BX); // disp[EBX] is addr pcs->Isib = 0; } break; #endif case FLoper: #ifdef DEBUG if (debugw) printf("getlvalue(e = %p, km = x%x)\n",e,keepmsk); #endif switch (e->Eoper) { case OPadd: // this way when we want to do LEA e1 = e; e1free = FALSE; e1isadd = TRUE; break; case OPind: case OPpostinc: // when doing (*p++ = ...) case OPpostdec: // when doing (*p-- = ...) case OPbt: case OPbtc: case OPbtr: case OPbts: e1 = e->E1; e1free = TRUE; e1isadd = e1->Eoper == OPadd; break; default: #ifdef DEBUG elem_print(e); #endif assert(0); } e1ty = tybasic(e1->Ety); if (e1isadd) { e12 = e1->E2; e11 = e1->E1; } /* First see if we can replace *(e+&v) with * MOV idxreg,e * EA = [ES:] &v+idxreg */ f = FLconst; if (e1isadd && ((e12->Eoper == OPrelconst #if TARGET_SEGMENTED && (f = el_fl(e12)) != FLfardata #endif ) || (e12->Eoper == OPconst && !I16 && !e1->Ecount && (!I64 || el_signx32(e12)))) && !(I64 && config.flags3 & CFG3pic) && e1->Ecount == e1->Ecomsub && #if TARGET_SEGMENTED (!e1->Ecount || (~keepmsk & ALLREGS & mMSW) || (e1ty != TYfptr && e1ty != TYhptr)) && #endif tysize(e11->Ety) == REGSIZE ) { unsigned char t; /* component of r/m field */ int ss; int ssi; #if !TARGET_SEGMENTED if (e12->Eoper == OPrelconst) f = el_fl(e12); #endif /*assert(datafl[f]);*/ /* what if addr of func? */ if (!I16) { /* Any register can be an index register */ regm_t idxregs = allregs & ~keepmsk; assert(idxregs); /* See if e1->E1 can be a scaled index */ ss = isscaledindex(e11); if (ss) { /* Load index register with result of e11->E1 */ c = cdisscaledindex(e11,&idxregs,keepmsk); reg = findreg(idxregs); { t = stackfl[f] ? 2 : 0; pcs->Irm = modregrm(t,0,4); pcs->Isib = modregrm(ss,reg & 7,5); if (reg & 8) pcs->Irex |= REX_X; } } else if ((e11->Eoper == OPmul || e11->Eoper == OPshl) && !e11->Ecount && e11->E2->Eoper == OPconst && (ssi = ssindex(e11->Eoper,e11->E2->EV.Vuns)) != 0 ) { regm_t scratchm; #if 0 && TARGET_LINUX assert(f != FLgot && f != FLgotoff); #endif char ssflags = ssindex_array[ssi].ssflags; if (ssflags & SSFLnobp && stackfl[f]) goto L6; // Load index register with result of e11->E1 c = scodelem(e11->E1,&idxregs,keepmsk,TRUE); reg = findreg(idxregs); int ss1 = ssindex_array[ssi].ss1; if (ssflags & SSFLlea) { assert(!stackfl[f]); pcs->Irm = modregrm(2,0,4); pcs->Isib = modregrm(ss1,reg & 7,reg & 7); if (reg & 8) pcs->Irex |= REX_X | REX_B; } else { int rbase; unsigned r; scratchm = ALLREGS & ~keepmsk; c = cat(c,allocreg(&scratchm,&r,TYint)); if (ssflags & SSFLnobase1) { t = 0; rbase = 5; } else { t = 0; rbase = reg; if (rbase == BP || rbase == R13) { static unsigned imm32[4] = {1+1,2+1,4+1,8+1}; // IMUL r,BP,imm32 c = genc2(c,0x69,modregxrmx(3,r,rbase),imm32[ss1]); goto L7; } } c = gen2sib(c,0x8D,modregxrm(t,r,4),modregrm(ss1,reg & 7,rbase & 7)); if (reg & 8) code_orrex(c, REX_X); if (rbase & 8) code_orrex(c, REX_B); if (I64) code_orrex(c, REX_W); if (ssflags & SSFLnobase1) { code_last(c)->IFL1 = FLconst; code_last(c)->IEV1.Vuns = 0; } L7: if (ssflags & SSFLnobase) { t = stackfl[f] ? 2 : 0; rbase = 5; } else { t = 2; rbase = r; assert(rbase != BP); } pcs->Irm = modregrm(t,0,4); pcs->Isib = modregrm(ssindex_array[ssi].ss2,r & 7,rbase & 7); if (r & 8) pcs->Irex |= REX_X; if (rbase & 8) pcs->Irex |= REX_B; } freenode(e11->E2); freenode(e11); } else { L6: /* Load index register with result of e11 */ c = scodelem(e11,&idxregs,keepmsk,TRUE); setaddrmode(pcs, idxregs); #if 0 && TARGET_LINUX if (e12->EV.sp.Vsym->Sfl == FLgot || e12->EV.sp.Vsym->Sfl == FLgotoff) { gotref = 1; #if 1 reg = findreg(idxregs & (ALLREGS | mBP)); pcs->Irm = modregrm(2,0,4); pcs->Isib = modregrm(0,reg,BX); #else pcs->Isib = modregrm(0,pcs->Irm,BX); pcs->Irm = modregrm(2,0,4); #endif } else #endif if (stackfl[f]) /* if we need [EBP] too */ { unsigned idx = pcs->Irm & 7; if (pcs->Irex & REX_B) pcs->Irex = (pcs->Irex & ~REX_B) | REX_X; pcs->Isib = modregrm(0,idx,BP); pcs->Irm = modregrm(2,0,4); } } } else { idxregs = IDXREGS & ~keepmsk; /* only these can be index regs */ assert(idxregs); #if 0 && TARGET_LINUX assert(f != FLgot && f != FLgotoff); #endif if (stackfl[f]) /* if stack data type */ { idxregs &= mSI | mDI; /* BX can't index off stack */ if (!idxregs) goto L1; /* index regs aren't avail */ t = 6; /* [BP+SI+disp] */ } else t = 0; /* [SI + disp] */ c = scodelem(e11,&idxregs,keepmsk,TRUE); /* load idx reg */ pcs->Irm = getaddrmode(idxregs) ^ t; } if (f == FLpara) refparam = TRUE; else if (f == FLauto || f == FLtmp || f == FLbprel || f == FLfltreg) reflocal = TRUE; #if TARGET_SEGMENTED else if (f == FLcsdata || tybasic(e12->Ety) == TYcptr) pcs->Iflags |= CFcs; #endif else assert(f != FLreg); pcs->IFL1 = f; if (f != FLconst) pcs->IEVsym1 = e12->EV.sp.Vsym; pcs->IEVoffset1 = e12->EV.sp.Voffset; /* += ??? */ /* If e1 is a CSE, we must generate an addressing mode */ /* but also leave EA in registers so others can use it */ if (e1->Ecount) { unsigned flagsave; idxregs = IDXREGS & ~keepmsk; c = cat(c,allocreg(&idxregs,®,TYoffset)); #if TARGET_SEGMENTED /* If desired result is a far pointer, we'll have */ /* to load another register with the segment of v */ if (e1ty == TYfptr) { unsigned msreg; idxregs |= mMSW & ALLREGS & ~keepmsk; c = cat(c,allocreg(&idxregs,&msreg,TYfptr)); msreg = findregmsw(idxregs); /* MOV msreg,segreg */ c = genregs(c,0x8C,segfl[f],msreg); } #endif opsave = pcs->Iop; flagsave = pcs->Iflags; pcs->Iop = 0x8D; code_newreg(pcs, reg); if (!I16) pcs->Iflags &= ~CFopsize; if (I64) pcs->Irex |= REX_W; c = gen(c,pcs); /* LEA idxreg,EA */ cssave(e1,idxregs,TRUE); if (!I16) pcs->Iflags = flagsave; if (stackfl[f] && (config.wflags & WFssneds)) // if pointer into stack pcs->Iflags |= CFss; // add SS: override pcs->Iop = opsave; pcs->IFL1 = FLoffset; pcs->IEV1.Vuns = 0; setaddrmode(pcs, idxregs); } freenode(e12); if (e1free) freenode(e1); goto Lptr; } L1: /* The rest of the cases could be a far pointer */ idxregs = (I16 ? IDXREGS : allregs) & ~keepmsk; // only these can be index regs assert(idxregs); if (!I16 && (sz == REGSIZE || (I64 && sz == 4)) && keepmsk & RMstore) idxregs |= regcon.mvar; #if TARGET_SEGMENTED switch (e1ty) { case TYfptr: /* if far pointer */ case TYhptr: idxregs = (mES | IDXREGS) & ~keepmsk; // need segment too assert(idxregs & mES); pcs->Iflags |= CFes; /* ES segment override */ break; case TYsptr: /* if pointer to stack */ if (config.wflags & WFssneds) // if SS != DS pcs->Iflags |= CFss; /* then need SS: override */ break; case TYcptr: /* if pointer to code */ pcs->Iflags |= CFcs; /* then need CS: override */ break; } #endif pcs->IFL1 = FLoffset; pcs->IEV1.Vuns = 0; /* see if we can replace *(e+c) with * MOV idxreg,e * [MOV ES,segment] * EA = [ES:]c[idxreg] */ if (e1isadd && e12->Eoper == OPconst && (!I64 || el_signx32(e12)) && (tysize(e12->Ety) == REGSIZE || (I64 && tysize(e12->Ety) == 4)) && (!e1->Ecount || !e1free) ) { int ss; pcs->IEV1.Vuns = e12->EV.Vuns; freenode(e12); if (e1free) freenode(e1); if (!I16 && e11->Eoper == OPadd && !e11->Ecount && tysize(e11->Ety) == REGSIZE) { e12 = e11->E2; e11 = e11->E1; e1 = e1->E1; e1free = TRUE; goto L4; } if (!I16 && (ss = isscaledindex(e11)) != 0) { // (v * scale) + const c = cdisscaledindex(e11,&idxregs,keepmsk); reg = findreg(idxregs); pcs->Irm = modregrm(0,0,4); pcs->Isib = modregrm(ss,reg & 7,5); if (reg & 8) pcs->Irex |= REX_X; } else { c = scodelem(e11,&idxregs,keepmsk,TRUE); // load index reg setaddrmode(pcs, idxregs); } goto Lptr; } /* Look for *(v1 + v2) * EA = [v1][v2] */ if (!I16 && e1isadd && (!e1->Ecount || !e1free) && (tysize[e1ty] == REGSIZE || (I64 && tysize[e1ty] == 4))) { code *c2; regm_t idxregs2; unsigned base,index; int ss; L4: // Look for *(v1 + v2 << scale) ss = isscaledindex(e12); if (ss) { c = scodelem(e11,&idxregs,keepmsk,TRUE); idxregs2 = allregs & ~(idxregs | keepmsk); c2 = cdisscaledindex(e12,&idxregs2,keepmsk | idxregs); } // Look for *(v1 << scale + v2) else if ((ss = isscaledindex(e11)) != 0) { idxregs2 = idxregs; c = cdisscaledindex(e11,&idxregs2,keepmsk); idxregs = allregs & ~(idxregs2 | keepmsk); c2 = scodelem(e12,&idxregs,keepmsk | idxregs2,TRUE); } // Look for *(((v1 << scale) + c1) + v2) else if (e11->Eoper == OPadd && !e11->Ecount && e11->E2->Eoper == OPconst && (ss = isscaledindex(e11->E1)) != 0 ) { pcs->IEV1.Vuns = e11->E2->EV.Vuns; idxregs2 = idxregs; c = cdisscaledindex(e11->E1,&idxregs2,keepmsk); idxregs = allregs & ~(idxregs2 | keepmsk); c2 = scodelem(e12,&idxregs,keepmsk | idxregs2,TRUE); freenode(e11->E2); freenode(e11); } else { c = scodelem(e11,&idxregs,keepmsk,TRUE); idxregs2 = allregs & ~(idxregs | keepmsk); c2 = scodelem(e12,&idxregs2,keepmsk | idxregs,TRUE); } c = cat(c,c2); base = findreg(idxregs); index = findreg(idxregs2); pcs->Irm = modregrm(2,0,4); pcs->Isib = modregrm(ss,index & 7,base & 7); if (index & 8) pcs->Irex |= REX_X; if (base & 8) pcs->Irex |= REX_B; if (e1free) freenode(e1); goto Lptr; } /* give up and replace *e1 with * MOV idxreg,e * EA = 0[idxreg] * pinholeopt() will usually correct the 0, we need it in case * we have a pointer to a long and need an offset to the second * word. */ assert(e1free); c = scodelem(e1,&idxregs,keepmsk,TRUE); /* load index register */ setaddrmode(pcs, idxregs); Lptr: if (config.flags3 & CFG3ptrchk) cod3_ptrchk(&c,pcs,keepmsk); // validate pointer code break; case FLdatseg: assert(0); #if 0 pcs->Irm = modregrm(0,0,BPRM); pcs->IEVpointer1 = e->EVpointer; break; #endif case FLfltreg: reflocal = TRUE; pcs->Irm = modregrm(2,0,BPRM); pcs->IEV1.Vint = 0; break; case FLreg: goto L2; case FLpara: refparam = TRUE; pcs->Irm = modregrm(2,0,BPRM); goto L2; case FLauto: if (s->Sclass == SCfastpar && regcon.params & mask[s->Spreg]) { if (keepmsk & RMload) { if (sz == REGSIZE) // could this be (sz <= REGSIZE) ? { pcs->Irm = modregrm(3,0,s->Spreg & 7); if (s->Spreg & 8) pcs->Irex |= REX_B; regcon.used |= mask[s->Spreg]; break; } } else regcon.params &= ~mask[s->Spreg]; } case FLtmp: case FLbprel: reflocal = TRUE; pcs->Irm = modregrm(2,0,BPRM); goto L2; case FLextern: if (s->Sident[0] == '_' && memcmp(s->Sident + 1,"tls_array",10) == 0) { #if TARGET_LINUX || TARGET_FREEBSD || TARGET_OPENBSD || TARGET_SOLARIS // Rewrite as GS:[0000], or FS:[0000] for 64 bit if (I64) { pcs->Irm = modregrm(0, 0, 4); pcs->Isib = modregrm(0, 4, 5); // don't use [RIP] addressing pcs->IFL1 = FLconst; pcs->IEV1.Vuns = 0; pcs->Iflags = CFfs; pcs->Irex |= REX_W; } else { pcs->Irm = modregrm(0, 0, BPRM); pcs->IFL1 = FLconst; pcs->IEV1.Vuns = 0; pcs->Iflags = CFgs; } break; #else pcs->Iflags |= CFfs; // add FS: override #endif } #if TARGET_SEGMENTED if (s->ty() & mTYcs && LARGECODE) goto Lfardata; #endif goto L3; case FLdata: case FLudata: #if TARGET_SEGMENTED case FLcsdata: #endif #if TARGET_LINUX || TARGET_OSX || TARGET_FREEBSD || TARGET_OPENBSD || TARGET_SOLARIS case FLgot: case FLgotoff: case FLtlsdata: #endif L3: pcs->Irm = modregrm(0,0,BPRM); L2: if (fl == FLreg) { #ifdef DEBUG if (!(s->Sregm & regcon.mvar)) symbol_print(s); #endif assert(s->Sregm & regcon.mvar); /* Attempting to paint a float as an integer or an integer as a float * will cause serious problems since the EA is loaded separatedly from * the opcode. The only way to deal with this is to prevent enregistering * such variables. */ if (tyxmmreg(ty) && !(s->Sregm & XMMREGS) || !tyxmmreg(ty) && (s->Sregm & XMMREGS)) cgreg_unregister(s->Sregm); if ( s->Sclass == SCregpar || s->Sclass == SCparameter) { refparam = TRUE; reflocal = TRUE; // kludge to set up prolog } pcs->Irm = modregrm(3,0,s->Sreglsw & 7); if (s->Sreglsw & 8) pcs->Irex |= REX_B; if (e->EV.sp.Voffset == 1 && sz == 1) { assert(s->Sregm & BYTEREGS); assert(s->Sreglsw < 4); pcs->Irm |= 4; // use 2nd byte of register } else { assert(!e->EV.sp.Voffset); if (I64 && sz == 1 && s->Sreglsw >= 4) pcs->Irex |= REX; } } #if TARGET_SEGMENTED else if (s->ty() & mTYcs && !(fl == FLextern && LARGECODE)) { pcs->Iflags |= CFcs | CFoff; } #endif #if TARGET_LINUX || TARGET_OSX || TARGET_FREEBSD || TARGET_OPENBSD || TARGET_SOLARIS if (I64 && config.flags3 & CFG3pic && (fl == FLtlsdata || s->ty() & mTYthread)) { pcs->Iflags |= CFopsize; pcs->Irex = 0x48; } #endif pcs->IEVsym1 = s; pcs->IEVoffset1 = e->EV.sp.Voffset; if (sz == 1) { /* Don't use SI or DI for this variable */ s->Sflags |= GTbyte; if (e->EV.sp.Voffset > 1) s->Sflags &= ~GTregcand; } else if (e->EV.sp.Voffset) s->Sflags &= ~GTregcand; if (!(keepmsk & RMstore)) // if not store only { s->Sflags |= SFLread; // assume we are doing a read } break; case FLpseudo: #if MARS assert(0); #else { unsigned u = s->Sreglsw; c = getregs(pseudomask[u]); pcs->Irm = modregrm(3,0,pseudoreg[u] & 7); break; } #endif #if TARGET_SEGMENTED case FLfardata: #endif case FLfunc: /* reading from code seg */ if (config.exe & EX_flat) goto L3; Lfardata: { regm_t regm = ALLREGS & ~keepmsk; // need scratch register code *c1 = allocreg(®m,®,TYint); /* MOV mreg,seg of symbol */ c = gencs(CNIL,0xB8 + reg,0,FLextern,s); c->Iflags = CFseg; c = gen2(c,0x8E,modregrmx(3,0,reg)); /* MOV ES,reg */ c = cat3(c1,getregs(mES),c); pcs->Iflags |= CFes | CFoff; /* ES segment override */ goto L3; } case FLstack: assert(!I16); pcs->Irm = modregrm(2,0,4); pcs->Isib = modregrm(0,4,SP); pcs->IEVsym1 = s; pcs->IEVoffset1 = e->EV.sp.Voffset; break; default: #ifdef DEBUG WRFL((enum FL)fl); symbol_print(s); #endif assert(0); } return c; } /***************************** * Given an opcode and EA in cs, generate code * for each floating register in turn. * Input: * tym either TYdouble or TYfloat */ code *fltregs(code *pcs,tym_t tym) { code *c; assert(!I64); tym = tybasic(tym); if (I32) { c = getregs((tym == TYfloat) ? mAX : mAX | mDX); if (tym != TYfloat) { pcs->IEVoffset1 += REGSIZE; NEWREG(pcs->Irm,DX); c = gen(c,pcs); pcs->IEVoffset1 -= REGSIZE; } NEWREG(pcs->Irm,AX); c = gen(c,pcs); } else { c = getregs((tym == TYfloat) ? FLOATREGS_16 : DOUBLEREGS_16); pcs->IEVoffset1 += (tym == TYfloat) ? 2 : 6; if (tym == TYfloat) NEWREG(pcs->Irm,DX); else NEWREG(pcs->Irm,AX); c = gen(c,pcs); pcs->IEVoffset1 -= 2; if (tym == TYfloat) NEWREG(pcs->Irm,AX); else NEWREG(pcs->Irm,BX); gen(c,pcs); if (tym != TYfloat) { pcs->IEVoffset1 -= 2; NEWREG(pcs->Irm,CX); gen(c,pcs); pcs->IEVoffset1 -= 2; /* note that exit is with Voffset unaltered */ NEWREG(pcs->Irm,DX); gen(c,pcs); } } return c; } /***************************** * Given a result in registers, test it for TRUE or FALSE. * Will fail if TYfptr and the reg is ES! * If saveflag is TRUE, preserve the contents of the * registers. */ code *tstresult(regm_t regm,tym_t tym,unsigned saveflag) { unsigned scrreg; /* scratch register */ regm_t scrregm; #ifdef DEBUG //if (!(regm & (mBP | ALLREGS))) // printf("tstresult(regm = %s, tym = x%x, saveflag = %d)\n", // regm_str(regm),tym,saveflag); #endif assert(regm & (XMMREGS | mBP | ALLREGS)); tym = tybasic(tym); code *ce = CNIL; unsigned reg = findreg(regm); unsigned sz = tysize[tym]; if (sz == 1) { assert(regm & BYTEREGS); ce = genregs(ce,0x84,reg,reg); // TEST regL,regL if (I64 && reg >= 4) code_orrex(ce, REX); return ce; } if (regm & XMMREGS) { unsigned xreg; regm_t xregs = XMMREGS & ~regm; ce = allocreg(&xregs, &xreg, TYdouble); unsigned op = 0; if (tym == TYdouble || tym == TYidouble || tym == TYcdouble) op = 0x660000; ce = gen2(ce,op | 0x0F57,modregrm(3,xreg-XMM0,xreg-XMM0)); // XORPS xreg,xreg gen2(ce,op | 0x0F2E,modregrm(3,xreg-XMM0,reg-XMM0)); // UCOMISS xreg,reg if (tym == TYcfloat || tym == TYcdouble) { code *cnop = gennop(CNIL); genjmp(ce,JNE,FLcode,(block *) cnop); // JNE L1 genjmp(ce,JP, FLcode,(block *) cnop); // JP L1 reg = findreg(regm & ~mask[reg]); gen2(ce,op | 0x0F2E,modregrm(3,xreg-XMM0,reg-XMM0)); // UCOMISS xreg,reg ce = cat(ce, cnop); } return ce; } if (sz <= REGSIZE) { if (!I16) { if (tym == TYfloat) { if (saveflag) { scrregm = allregs & ~regm; /* possible scratch regs */ ce = allocreg(&scrregm,&scrreg,TYoffset); /* allocate scratch reg */ ce = genmovreg(ce,scrreg,reg); /* MOV scrreg,msreg */ reg = scrreg; } ce = cat(ce,getregs(mask[reg])); return gen2(ce,0xD1,modregrmx(3,4,reg)); // SHL reg,1 } ce = gentstreg(ce,reg); // TEST reg,reg if (sz == SHORTSIZE) ce->Iflags |= CFopsize; /* 16 bit operands */ else if (sz == 8) code_orrex(ce, REX_W); } else ce = gentstreg(ce,reg); // TEST reg,reg return ce; } if (saveflag || tyfv(tym)) { scrregm = ALLREGS & ~regm; /* possible scratch regs */ ce = allocreg(&scrregm,&scrreg,TYoffset); /* allocate scratch reg */ if (I32 || sz == REGSIZE * 2) { code *c; assert(regm & mMSW && regm & mLSW); reg = findregmsw(regm); if (I32) { if (tyfv(tym)) { c = genregs(CNIL,0x0FB7,scrreg,reg); // MOVZX scrreg,msreg ce = cat(ce,c); } else { ce = genmovreg(ce,scrreg,reg); /* MOV scrreg,msreg */ if (tym == TYdouble || tym == TYdouble_alias) gen2(ce,0xD1,modregrm(3,4,scrreg)); /* SHL scrreg,1 */ } } else { ce = genmovreg(ce,scrreg,reg); /* MOV scrreg,msreg */ if (tym == TYfloat) gen2(ce,0xD1,modregrm(3,4,scrreg)); /* SHL scrreg,1 */ } reg = findreglsw(regm); genorreg(ce,scrreg,reg); /* OR scrreg,lsreg */ } else if (sz == 8) { /* !I32 */ ce = genmovreg(ce,scrreg,AX); /* MOV scrreg,AX */ if (tym == TYdouble || tym == TYdouble_alias) gen2(ce,0xD1,modregrm(3,4,scrreg)); // SHL scrreg,1 genorreg(ce,scrreg,BX); /* OR scrreg,BX */ genorreg(ce,scrreg,CX); /* OR scrreg,CX */ genorreg(ce,scrreg,DX); /* OR scrreg,DX */ } else assert(0); } else { if (I32 || sz == REGSIZE * 2) { /* can't test ES:LSW for 0 */ assert(regm & mMSW & ALLREGS && regm & (mLSW | mBP)); reg = findregmsw(regm); ce = getregs(mask[reg]); /* we're going to trash reg */ if (tyfloating(tym) && sz == 2 * intsize) ce = gen2(ce,0xD1,modregrm(3,4,reg)); // SHL reg,1 ce = genorreg(ce,reg,findreglsw(regm)); // OR reg,reg+1 if (I64) code_orrex(ce, REX_W); } else if (sz == 8) { assert(regm == DOUBLEREGS_16); ce = getregs(mAX); // allocate AX if (tym == TYdouble || tym == TYdouble_alias) ce = gen2(ce,0xD1,modregrm(3,4,AX)); // SHL AX,1 genorreg(ce,AX,BX); // OR AX,BX genorreg(ce,AX,CX); // OR AX,CX genorreg(ce,AX,DX); // OR AX,DX } else assert(0); } code_orflag(ce,CFpsw); return ce; } /****************************** * Given the result of an expression is in retregs, * generate necessary code to return result in *pretregs. */ code *fixresult(elem *e,regm_t retregs,regm_t *pretregs) { code *c,*ce; unsigned reg,rreg; regm_t forccs,forregs; tym_t tym; int sz; //printf("fixresult(e = %p, retregs = %s, *pretregs = %s)\n",e,regm_str(retregs),regm_str(*pretregs)); if (*pretregs == 0) return CNIL; /* if don't want result */ assert(e && retregs); /* need something to work with */ forccs = *pretregs & mPSW; forregs = *pretregs & (mST01 | mST0 | mBP | ALLREGS | mES | mSTACK | XMMREGS); tym = tybasic(e->Ety); #if TARGET_SEGMENTED if (tym == TYstruct) // Hack to support cdstreq() tym = (forregs & mMSW) ? TYfptr : TYnptr; #else if (tym == TYstruct) { // Hack to support cdstreq() assert(!(forregs & mMSW)); tym = TYnptr; } #endif c = CNIL; sz = tysize[tym]; if (sz == 1) { assert(retregs & BYTEREGS); unsigned reg = findreg(retregs); if (e->Eoper == OPvar && e->EV.sp.Voffset == 1 && e->EV.sp.Vsym->Sfl == FLreg) { assert(reg < 4); if (forccs) c = gen2(c,0x84,modregrm(3,reg | 4,reg | 4)); // TEST regH,regH forccs = 0; } } if ((retregs & forregs) == retregs) /* if already in right registers */ *pretregs = retregs; else if (forregs) /* if return the result in registers */ { if (forregs & (mST01 | mST0)) return fixresult87(e,retregs,pretregs); ce = CNIL; unsigned opsflag = FALSE; if (I16 && sz == 8) { if (forregs & mSTACK) { assert(retregs == DOUBLEREGS_16); /* Push floating regs */ c = CNIL; ce = gen1(ce,0x50 + AX); gen1(ce,0x50 + BX); gen1(ce,0x50 + CX); gen1(ce,0x50 + DX); stackpush += DOUBLESIZE; } else if (retregs & mSTACK) { assert(forregs == DOUBLEREGS_16); /* Pop floating regs */ c = getregs(forregs); ce = gen1(ce,0x58 + DX); gen1(ce,0x58 + CX); gen1(ce,0x58 + BX); gen1(ce,0x58 + AX); stackpush -= DOUBLESIZE; retregs = DOUBLEREGS_16; /* for tstresult() below */ } else #ifdef DEBUG printf("retregs = x%x, forregs = x%x\n",retregs,forregs), #endif assert(0); if (EOP(e)) opsflag = TRUE; } else { c = allocreg(pretregs,&rreg,tym); /* allocate return regs */ if (retregs & XMMREGS) { reg = findreg(retregs & XMMREGS); // MOVSD floatreg, XMM? ce = genfltreg(ce,xmmstore(tym),reg - XMM0,0); if (mask[rreg] & XMMREGS) // MOVSD XMM?, floatreg ce = genfltreg(ce,xmmload(tym),rreg - XMM0,0); else { // MOV rreg,floatreg ce = genfltreg(ce,0x8B,rreg,0); if (sz == 8) { if (I32) { rreg = findregmsw(*pretregs); ce = genfltreg(ce,0x8B,rreg,4); } else code_orrex(ce,REX_W); } } } else if (forregs & XMMREGS) { reg = findreg(retregs & (mBP | ALLREGS)); // MOV floatreg,reg ce = genfltreg(ce,0x89,reg,0); if (sz == 8) { if (I32) { reg = findregmsw(retregs); ce = genfltreg(ce,0x89,reg,4); } else code_orrex(ce,REX_W); } // MOVSS/MOVSD XMMreg,floatreg ce = genfltreg(ce,xmmload(tym),rreg - XMM0,0); } else if (sz > REGSIZE) { unsigned msreg = findregmsw(retregs); unsigned lsreg = findreglsw(retregs); unsigned msrreg = findregmsw(*pretregs); unsigned lsrreg = findreglsw(*pretregs); ce = genmovreg(ce,msrreg,msreg); /* MOV msrreg,msreg */ ce = genmovreg(ce,lsrreg,lsreg); /* MOV lsrreg,lsreg */ } else { assert(!(retregs & XMMREGS)); assert(!(forregs & XMMREGS)); reg = findreg(retregs & (mBP | ALLREGS)); ce = genmovreg(ce,rreg,reg); /* MOV rreg,reg */ } } c = cat(c,ce); cssave(e,retregs | *pretregs,opsflag); forregs = 0; /* don't care about result in reg */ /* cuz we have real result in rreg */ retregs = *pretregs & ~mPSW; } if (forccs) /* if return result in flags */ c = cat(c,tstresult(retregs,tym,forregs)); return c; } /******************************** * Generate code sequence to call C runtime library support routine. * clib = CLIBxxxx * keepmask = mask of registers not to destroy. Currently can * handle only 1. Should use a temporary rather than * push/pop for speed. */ int clib_inited = 0; // != 0 if initialized code *callclib(elem *e,unsigned clib,regm_t *pretregs,regm_t keepmask) { //printf("callclib(e = %p, clib = %d, *pretregs = %s, keepmask = %s\n", e, clib, regm_str(*pretregs), regm_str(keepmask)); //elem_print(e); #if TARGET_LINUX || TARGET_OSX || TARGET_FREEBSD || TARGET_OPENBSD || TARGET_SOLARIS static symbol lib[] = { /* Convert destroyed regs into saved regs */ #define Z(desregs) (~(desregs) & (mBP| mES | ALLREGS)) #if TARGET_LINUX || TARGET_OSX || TARGET_FREEBSD || TARGET_OPENBSD || TARGET_SOLARIS #define N(name) "_" name #else #define N(name) name #endif /* Shorthand to map onto SYMBOLY() */ #define Y(desregs,name) SYMBOLY(FLfunc,Z(desregs),N(name),0) Y(0,"_LCMP__"), // CLIBlcmp Y(mAX|mCX|mDX,"_LMUL__"), // CLIBlmul #if 1 Y(mAX|mBX|mCX|mDX,"_LDIV__"), // CLIBldiv Y(mAX|mBX|mCX|mDX,"_LDIV__"), // CLIBlmod Y(mAX|mBX|mCX|mDX,"_ULDIV__"), // CLIBuldiv Y(mAX|mBX|mCX|mDX,"_ULDIV__"), // CLIBulmod #else Y(ALLREGS,"_LDIV__"), // CLIBldiv Y(ALLREGS,"_LDIV__"), // CLIBlmod Y(ALLREGS,"_ULDIV__"), // CLIBuldiv Y(ALLREGS,"_ULDIV__"), // CLIBulmod #endif #if 0 Y(DOUBLEREGS_16,"_DNEG"), Y(mAX|mBX|mCX|mDX,"_DMUL"), // CLIBdmul Y(mAX|mBX|mCX|mDX,"_DDIV"), // CLIBddiv Y(0,"_DTST0"), // CLIBdtst0 Y(0,"_DTST0EXC"), // CLIBdtst0exc Y(0,"_DCMP"), // CLIBdcmp Y(0,"_DCMPEXC"), // CLIBdcmpexc Y(mAX|mBX|mCX|mDX,"_DADD"), // CLIBdadd Y(mAX|mBX|mCX|mDX,"_DSUB"), // CLIBdsub Y(mAX|mBX|mCX|mDX,"_FMUL"), // CLIBfmul Y(mAX|mBX|mCX|mDX,"_FDIV"), // CLIBfdiv Y(0,"_FTST0"), // CLIBftst0 Y(0,"_FTST0EXC"), // CLIBftst0exc Y(0,"_FCMP"), // CLIBfcmp Y(0,"_FCMPEXC"), // CLIBfcmpexc Y(FLOATREGS_32,"_FNEG"), // CLIBfneg Y(mAX|mBX|mCX|mDX,"_FADD"), // CLIBfadd Y(mAX|mBX|mCX|mDX,"_FSUB"), // CLIBfsub #endif Y(DOUBLEREGS_32,"_DBLLNG"), // CLIBdbllng Y(DOUBLEREGS_32,"_LNGDBL"), // CLIBlngdbl Y(DOUBLEREGS_32,"_DBLINT"), // CLIBdblint Y(DOUBLEREGS_32,"_INTDBL"), // CLIBintdbl Y(DOUBLEREGS_32,"_DBLUNS"), // CLIBdbluns Y(DOUBLEREGS_32,"_UNSDBL"), // CLIBunsdbl Y(mAX|mST0,"_DBLULNG"), // CLIBdblulng #if 0 {DOUBLEREGS_16,DOUBLEREGS_32,0,INFfloat,1,1}, // _ULNGDBL@ ulngdbl #endif Y(DOUBLEREGS_32,"_DBLFLT"), // CLIBdblflt Y(DOUBLEREGS_32,"_FLTDBL"), // CLIBfltdbl Y(DOUBLEREGS_32,"_DBLLLNG"), // CLIBdblllng Y(DOUBLEREGS_32,"_LLNGDBL"), // CLIBllngdbl Y(DOUBLEREGS_32,"_DBLULLNG"), // CLIBdblullng Y(DOUBLEREGS_32,"_ULLNGDBL"), // CLIBullngdbl Y(0,"_DTST"), // CLIBdtst Y(mES|mBX,"_HTOFPTR"), // CLIBvptrfptr Y(mES|mBX,"_HCTOFPTR"), // CLIBcvptrfptr Y(0,"_87TOPSW"), // CLIB87topsw Y(mST0,"_FLTTO87"), // CLIBfltto87 Y(mST0,"_DBLTO87"), // CLIBdblto87 Y(mST0|mAX,"_DBLINT87"), // CLIBdblint87 Y(mST0|mAX|mDX,"_DBLLNG87"), // CLIBdbllng87 Y(0,"_FTST"), // CLIBftst Y(0,"_FCOMPP"), // CLIBfcompp Y(0,"_FTEST"), // CLIBftest Y(0,"_FTEST0"), // CLIBftest0 Y(mST0|mAX|mBX|mCX|mDX,"_FDIVP"), // CLIBfdiv87 Y(mST0|mST01,"Cmul"), // CLIBcmul Y(mAX|mCX|mDX|mST0|mST01,"Cdiv"), // CLIBcdiv Y(mAX|mST0|mST01,"Ccmp"), // CLIBccmp Y(mST0,"_U64_LDBL"), // CLIBu64_ldbl #if ELFOBJ || MACHOBJ Y(mST0|mAX|mDX,"_LDBLULLNG"), // CLIBld_u64 #else Y(mST0|mAX|mDX,"__LDBLULLNG"), // CLIBld_u64 #endif }; #else static symbol lib[CLIBMAX] = { /* Convert destroyed regs into saved regs */ #define Z(desregs) (~(desregs) & (mBP| mES | ALLREGS)) /* Shorthand to map onto SYMBOLY() */ #define Y(desregs,name) SYMBOLY(FLfunc,Z(desregs),name,0) Y(0,"_LCMP@"), Y(mAX|mCX|mDX,"_LMUL@"), Y(ALLREGS,"_LDIV@"), Y(ALLREGS,"_LDIV@"), Y(ALLREGS,"_ULDIV@"), Y(ALLREGS,"_ULDIV@"), Y(mAX|mBX|mCX|mDX,"_DMUL@"), Y(mAX|mBX|mCX|mDX,"_DDIV@"), Y(0,"_DTST0@"), Y(0,"_DTST0EXC@"), Y(0,"_DCMP@"), Y(0,"_DCMPEXC@"), /* _DNEG@ only really destroys EDX, but then EAX would hold */ /* 2 values, and we can't handle that. */ /* _DNEG@ only really destroys AX, but then BX,CX,DX would hold */ /* 2 values, and we can't handle that. */ Y(DOUBLEREGS_16,"_DNEG@"), Y(mAX|mBX|mCX|mDX,"_DADD@"), Y(mAX|mBX|mCX|mDX,"_DSUB@"), Y(mAX|mBX|mCX|mDX,"_FMUL@"), Y(mAX|mBX|mCX|mDX,"_FDIV@"), Y(0,"_FTST0@"), Y(0,"_FTST0EXC@"), Y(0,"_FCMP@"), Y(0,"_FCMPEXC@"), Y(FLOATREGS_16,"_FNEG@"), Y(mAX|mBX|mCX|mDX,"_FADD@"), Y(mAX|mBX|mCX|mDX,"_FSUB@"), Y(DOUBLEREGS_16,"_DBLLNG@"), Y(DOUBLEREGS_16,"_LNGDBL@"), Y(DOUBLEREGS_16,"_DBLINT@"), Y(DOUBLEREGS_16,"_INTDBL@"), Y(DOUBLEREGS_16,"_DBLUNS@"), Y(DOUBLEREGS_16,"_UNSDBL@"), Y(DOUBLEREGS_16,"_DBLULNG@"), Y(DOUBLEREGS_16,"_ULNGDBL@"), Y(DOUBLEREGS_16,"_DBLFLT@"), Y(ALLREGS,"_FLTDBL@"), Y(DOUBLEREGS_16,"_DBLLLNG@"), Y(DOUBLEREGS_16,"_LLNGDBL@"), #if 0 Y(DOUBLEREGS_16,"__DBLULLNG"), #else Y(DOUBLEREGS_16,"_DBLULLNG@"), #endif Y(DOUBLEREGS_16,"_ULLNGDBL@"), Y(0,"_DTST@"), Y(mES|mBX,"_HTOFPTR@"), // CLIBvptrfptr Y(mES|mBX,"_HCTOFPTR@"), // CLIBcvptrfptr Y(0,"_87TOPSW@"), // CLIB87topsw Y(mST0,"_FLTTO87@"), // CLIBfltto87 Y(mST0,"_DBLTO87@"), // CLIBdblto87 Y(mST0|mAX,"_DBLINT87@"), // CLIBdblint87 Y(mST0|mAX|mDX,"_DBLLNG87@"), // CLIBdbllng87 Y(0,"_FTST@"), Y(0,"_FCOMPP@"), // CLIBfcompp Y(0,"_FTEST@"), // CLIBftest Y(0,"_FTEST0@"), // CLIBftest0 Y(mST0|mAX|mBX|mCX|mDX,"_FDIVP"), // CLIBfdiv87 // NOTE: desregs is wrong for 16 bit code, mBX should be included Y(mST0|mST01,"_Cmul"), // CLIBcmul Y(mAX|mCX|mDX|mST0|mST01,"_Cdiv"), // CLIBcdiv Y(mAX|mST0|mST01,"_Ccmp"), // CLIBccmp Y(mST0,"_U64_LDBL"), // CLIBu64_ldbl Y(mST0|mAX|mDX,"__LDBLULLNG"), // CLIBld_u64 }; #endif static struct { regm_t retregs16; /* registers that 16 bit result is returned in */ regm_t retregs32; /* registers that 32 bit result is returned in */ char pop; /* # of bytes popped off of stack upon return */ char flags; #define INF32 1 // if 32 bit only #define INFfloat 2 // if this is floating point #define INFwkdone 4 // if weak extern is already done #define INF64 8 // if 64 bit only char push87; // # of pushes onto the 8087 stack char pop87; // # of pops off of the 8087 stack } info[CLIBMAX] = { {0,0,0,0}, /* _LCMP@ lcmp */ {mDX|mAX,mDX|mAX,0,0}, // _LMUL@ lmul {mDX|mAX,mDX|mAX,0,0}, // _LDIV@ ldiv {mCX|mBX,mCX|mBX,0,0}, /* _LDIV@ lmod */ {mDX|mAX,mDX|mAX,0,0}, /* _ULDIV@ uldiv */ {mCX|mBX,mCX|mBX,0,0}, /* _ULDIV@ ulmod */ #if TARGET_WINDOS {DOUBLEREGS_16,DOUBLEREGS_32,8,INFfloat,1,1}, // _DMUL@ dmul {DOUBLEREGS_16,DOUBLEREGS_32,8,INFfloat,1,1}, // _DDIV@ ddiv {0,0,0,2}, // _DTST0@ {0,0,0,2}, // _DTST0EXC@ {0,0,8,INFfloat,1,1}, // _DCMP@ dcmp {0,0,8,INFfloat,1,1}, // _DCMPEXC@ dcmp {DOUBLEREGS_16,DOUBLEREGS_32,0,2}, // _DNEG@ dneg {DOUBLEREGS_16,DOUBLEREGS_32,8,INFfloat,1,1}, // _DADD@ dadd {DOUBLEREGS_16,DOUBLEREGS_32,8,INFfloat,1,1}, // _DSUB@ dsub {FLOATREGS_16,FLOATREGS_32,0,INFfloat,1,1}, // _FMUL@ fmul {FLOATREGS_16,FLOATREGS_32,0,INFfloat,1,1}, // _FDIV@ fdiv {0,0,0,2}, // _FTST0@ {0,0,0,2}, // _FTST0EXC@ {0,0,0,INFfloat,1,1}, // _FCMP@ fcmp {0,0,0,INFfloat,1,1}, // _FCMPEXC@ fcmp {FLOATREGS_16,FLOATREGS_32,0,2}, // _FNEG@ fneg {FLOATREGS_16,FLOATREGS_32,0,INFfloat,1,1}, // _FADD@ fadd {FLOATREGS_16,FLOATREGS_32,0,INFfloat,1,1}, // _FSUB@ fsub #endif {mDX|mAX,mAX,0,INFfloat,1,1}, // _DBLLNG@ dbllng {DOUBLEREGS_16,DOUBLEREGS_32,0,INFfloat,1,1}, // _LNGDBL@ lngdbl {mAX,mAX,0,INFfloat,1,1}, // _DBLINT@ dblint {DOUBLEREGS_16,DOUBLEREGS_32,0,INFfloat,1,1}, // _INTDBL@ intdbl {mAX,mAX,0,INFfloat,1,1}, // _DBLUNS@ dbluns {DOUBLEREGS_16,DOUBLEREGS_32,0,INFfloat,1,1}, // _UNSDBL@ unsdbl #if TARGET_LINUX || TARGET_OSX || TARGET_FREEBSD || TARGET_OPENBSD || TARGET_SOLARIS {mDX|mAX,mAX,0,INF32|INFfloat,0,1}, // _DBLULNG@ dblulng #else {mDX|mAX,mAX,0,INFfloat,1,1}, // _DBLULNG@ dblulng #endif #if TARGET_WINDOS {DOUBLEREGS_16,DOUBLEREGS_32,0,INFfloat,1,1}, // _ULNGDBL@ ulngdbl #endif {FLOATREGS_16,FLOATREGS_32,0,INFfloat,1,1}, // _DBLFLT@ dblflt {DOUBLEREGS_16,DOUBLEREGS_32,0,INFfloat,1,1}, // _FLTDBL@ fltdbl {DOUBLEREGS_16,mDX|mAX,0,INFfloat,1,1}, // _DBLLLNG@ {DOUBLEREGS_16,DOUBLEREGS_32,0,INFfloat,1,1}, // _LLNGDBL@ #if TARGET_LINUX || TARGET_OSX || TARGET_FREEBSD || TARGET_OPENBSD || TARGET_SOLARIS {DOUBLEREGS_16,mDX|mAX,0,INFfloat,2,2}, // _DBLULLNG@ #else {DOUBLEREGS_16,mDX|mAX,0,INFfloat,1,1}, // _DBLULLNG@ #endif {DOUBLEREGS_16,DOUBLEREGS_32,0,INFfloat,1,1}, // _ULLNGDBL@ {0,0,0,2}, // _DTST@ dtst {mES|mBX,mES|mBX,0,0}, // _HTOFPTR@ vptrfptr {mES|mBX,mES|mBX,0,0}, // _HCTOFPTR@ cvptrfptr {0,0,0,2}, // _87TOPSW@ 87topsw {mST0,mST0,0,INFfloat,1,0}, // _FLTTO87@ fltto87 {mST0,mST0,0,INFfloat,1,0}, // _DBLTO87@ dblto87 {mAX,mAX,0,2}, // _DBLINT87@ dblint87 {mDX|mAX,mAX,0,2}, // _DBLLNG87@ dbllng87 {0,0,0,2}, // _FTST@ {mPSW,mPSW,0,INFfloat,0,2}, // _FCOMPP@ {mPSW,mPSW,0,2}, // _FTEST@ {mPSW,mPSW,0,2}, // _FTEST0@ {mST0,mST0,0,INFfloat,1,1}, // _FDIV@ {mST01,mST01,0,INF32|INFfloat,3,5}, // _Cmul {mST01,mST01,0,INF32|INFfloat,0,2}, // _Cdiv {mPSW, mPSW, 0,INF32|INFfloat,0,4}, // _Ccmp {mST0,mST0,0,INF32|INF64|INFfloat,2,1}, // _U64_LDBL {0,mDX|mAX,0,INF32|INF64|INFfloat,1,2}, // __LDBLULLNG }; if (!clib_inited) /* if not initialized */ { assert(sizeof(lib) / sizeof(lib[0]) == CLIBMAX); assert(sizeof(info) / sizeof(info[0]) == CLIBMAX); for (int i = 0; i < CLIBMAX; i++) { lib[i].Stype = tsclib; #if MARS lib[i].Sxtrnnum = 0; lib[i].Stypidx = 0; #endif } if (!I16) { /* Adjust table for 386 */ lib[CLIBdbllng].Sregsaved = Z(DOUBLEREGS_32); lib[CLIBlngdbl].Sregsaved = Z(DOUBLEREGS_32); lib[CLIBdblint].Sregsaved = Z(DOUBLEREGS_32); lib[CLIBintdbl].Sregsaved = Z(DOUBLEREGS_32); #if TARGET_WINDOS lib[CLIBfneg].Sregsaved = Z(FLOATREGS_32); lib[CLIBdneg].Sregsaved = Z(DOUBLEREGS_32); lib[CLIBdbluns].Sregsaved = Z(DOUBLEREGS_32); lib[CLIBunsdbl].Sregsaved = Z(DOUBLEREGS_32); lib[CLIBdblulng].Sregsaved = Z(DOUBLEREGS_32); lib[CLIBulngdbl].Sregsaved = Z(DOUBLEREGS_32); #endif lib[CLIBdblflt].Sregsaved = Z(DOUBLEREGS_32); lib[CLIBfltdbl].Sregsaved = Z(DOUBLEREGS_32); lib[CLIBdblllng].Sregsaved = Z(DOUBLEREGS_32); lib[CLIBllngdbl].Sregsaved = Z(DOUBLEREGS_32); lib[CLIBdblullng].Sregsaved = Z(DOUBLEREGS_32); lib[CLIBullngdbl].Sregsaved = Z(DOUBLEREGS_32); if (I64) { info[CLIBullngdbl].retregs32 = mAX; info[CLIBdblullng].retregs32 = mAX; } } clib_inited++; } #undef Z assert(clib < CLIBMAX); symbol *s = &lib[clib]; if (I16) assert(!(info[clib].flags & (INF32 | INF64))); code *cpop = CNIL; code *c = getregs((~s->Sregsaved & (mES | mBP | ALLREGS)) & ~keepmask); // mask of regs destroyed keepmask &= ~s->Sregsaved; int npushed = numbitsset(keepmask); gensaverestore2(keepmask, &c, &cpop); #if 0 while (keepmask) { unsigned keepreg; if (keepmask & (mBP|ALLREGS)) { keepreg = findreg(keepmask & (mBP|ALLREGS)); c = gen1(c,0x50 + keepreg); /* PUSH keepreg */ cpop = cat(gen1(CNIL,0x58 + keepreg),cpop); // POP keepreg keepmask &= ~mask[keepreg]; npushed++; } if (keepmask & mES) { c = gen1(c,0x06); /* PUSH ES */ cpop = cat(gen1(CNIL,0x07),cpop); /* POP ES */ keepmask &= ~mES; npushed++; } } #endif c = cat(c, save87regs(info[clib].push87)); for (int i = 0; i < info[clib].push87; i++) c = cat(c, push87()); for (int i = 0; i < info[clib].pop87; i++) pop87(); if (config.target_cpu >= TARGET_80386 && clib == CLIBlmul && !I32) { static char lmul[] = { 0x66,0xc1,0xe1,0x10, // shl ECX,16 0x8b,0xcb, // mov CX,BX ;ECX = CX,BX 0x66,0xc1,0xe0,0x10, // shl EAX,16 0x66,0x0f,0xac,0xd0,0x10, // shrd EAX,EDX,16 ;EAX = DX,AX 0x66,0xf7,0xe1, // mul ECX 0x66,0x0f,0xa4,0xc2,0x10, // shld EDX,EAX,16 ;DX,AX = EAX }; c = genasm(c,lmul,sizeof(lmul)); } else { makeitextern(s); int nalign = 0; if (STACKALIGN == 16) { // Align the stack (assume no args on stack) int npush = npushed * REGSIZE + stackpush; if (npush & (STACKALIGN - 1)) { nalign = STACKALIGN - (npush & (STACKALIGN - 1)); c = genc2(c,0x81,modregrm(3,5,SP),nalign); // SUB ESP,nalign if (I64) code_orrex(c, REX_W); } } c = gencs(c,(LARGECODE) ? 0x9A : 0xE8,0,FLfunc,s); // CALL s if (nalign) { c = genc2(c,0x81,modregrm(3,0,SP),nalign); // ADD ESP,nalign if (I64) code_orrex(c, REX_W); } calledafunc = 1; if (I16 && // bug in Optlink for weak references config.flags3 & CFG3wkfloat && (info[clib].flags & (INFfloat | INFwkdone)) == INFfloat) { info[clib].flags |= INFwkdone; makeitextern(rtlsym[RTLSYM_INTONLY]); obj_wkext(s,rtlsym[RTLSYM_INTONLY]); } } if (I16) stackpush -= info[clib].pop; regm_t retregs = I16 ? info[clib].retregs16 : info[clib].retregs32; return cat(cat(c,cpop),fixresult(e,retregs,pretregs)); } /************************************************* * Helper function for converting OPparam's into array of Parameters. */ struct Parameter { elem *e; int reg; unsigned numalign; }; void fillParameters(elem *e, Parameter *parameters, int *pi) { if (e->Eoper == OPparam) { fillParameters(e->E1, parameters, pi); fillParameters(e->E2, parameters, pi); freenode(e); } else { parameters[*pi].e = e; (*pi)++; } } /******************************* * Generate code sequence for function call. */ code *cdfunc(elem *e,regm_t *pretregs) { unsigned numpara = 0; unsigned stackpushsave; unsigned preg; regm_t keepmsk; unsigned numalign = 0; code *c; //printf("cdfunc()\n"); elem_print(e); assert(e); stackpushsave = stackpush; /* so we can compute # of parameters */ cgstate.stackclean++; c = CNIL; keepmsk = 0; if (OTbinary(e->Eoper)) // if parameters { if (I16) { c = cat(c, params(e->E2,2)); // push parameters } else if (I32) { unsigned stackalign = REGSIZE; tym_t tyf = tybasic(e->E1->Ety); // First compute numpara, the total bytes pushed on the stack switch (tyf) { #if TARGET_SEGMENTED case TYf16func: stackalign = 2; goto Ldefault; #endif case TYmfunc: case TYjfunc: // last parameter goes into register elem *ep; for (ep = e->E2; ep->Eoper == OPparam; ep = ep->E2) { numpara += paramsize(ep->E1,stackalign); } unsigned sz; if (tyf == TYjfunc && // This must match type_jparam() !(tyjparam(ep->Ety) || ((tybasic(ep->Ety) == TYstruct || tybasic(ep->Ety) == TYarray) && (sz = type_size(ep->ET)) <= intsize && sz != 3 && sz) ) ) { numpara += paramsize(ep,stackalign); } break; default: Ldefault: numpara += paramsize(e->E2,stackalign); break; } assert((numpara & (REGSIZE - 1)) == 0); assert((stackpush & (REGSIZE - 1)) == 0); /* Special handling for call to __tls_get_addr, we must save registers * before evaluating the parameter, so that the parameter load and call * are adjacent. */ if (e->E2->Eoper != OPparam && e->E1->Eoper == OPvar) { symbol *s = e->E1->EV.sp.Vsym; if (s == tls_get_addr_sym) c = getregs(~s->Sregsaved & (mBP | ALLREGS | mES | XMMREGS)); } /* Adjust start of the stack so after all args are pushed, * the stack will be aligned. */ if (STACKALIGN == 16 && (numpara + stackpush) & (STACKALIGN - 1)) { numalign = STACKALIGN - ((numpara + stackpush) & (STACKALIGN - 1)); c = genc2(c,0x81,modregrm(3,5,SP),numalign); // SUB ESP,numalign if (I64) code_orrex(c, REX_W); c = genadjesp(c, numalign); stackpush += numalign; stackpushsave += numalign; } switch (tyf) { #if TARGET_SEGMENTED case TYf16func: stackalign = 2; goto Ldefault2; #endif case TYmfunc: // last parameter goes into ECX preg = CX; goto L1; case TYjfunc: // last parameter goes into EAX preg = AX; goto L1; L1: { elem *ep; elem *en; for (ep = e->E2; ep->Eoper == OPparam; ep = en) { c = cat(c,params(ep->E1,stackalign)); en = ep->E2; freenode(ep); } unsigned sz; if (tyf == TYjfunc && // This must match type_jparam() !(tyjparam(ep->Ety) || ((tybasic(ep->Ety) == TYstruct || tybasic(ep->Ety) == TYarray) && (sz = type_size(ep->ET)) <= intsize && sz != 3 && sz) ) ) { c = cat(c,params(ep,stackalign)); goto Lret; } // preg is the register to put the parameter ep in keepmsk = mask[preg]; // don't change preg when evaluating func address regm_t retregs = keepmsk; if (ep->Eoper == OPstrthis) { code *c2; code *c1 = getregs(retregs); // LEA preg,np[ESP] unsigned np = stackpush - ep->EV.Vuns; // stack delta to parameter c2 = genc1(CNIL,0x8D,(modregrm(0,4,SP) << 8) | modregrm(2,preg,4),FLconst,np); if (I64) code_orrex(c2, REX_W); c = cat3(c,c1,c2); } else { code *cp = codelem(ep,&retregs,FALSE); c = cat(c,cp); } goto Lret; } default: Ldefault2: c = cat(c, params(e->E2,stackalign)); // push parameters break; } } else { assert(I64); // Easier to deal with parameters as an array: parameters[0..np] int np = el_nparams(e->E2); Parameter *parameters = (Parameter *)alloca(np * sizeof(Parameter)); { int n = 0; fillParameters(e->E2, parameters, &n); assert(n == np); } /* Special handling for call to __tls_get_addr, we must save registers * before evaluating the parameter, so that the parameter load and call * are adjacent. */ if (np == 1 && e->E1->Eoper == OPvar) { symbol *s = e->E1->EV.sp.Vsym; if (s == tls_get_addr_sym) c = getregs(~s->Sregsaved & (mBP | ALLREGS | mES | XMMREGS)); } unsigned stackalign = REGSIZE; // Figure out which parameters go in registers // Compute numpara, the total bytes pushed on the stack int r = 0; int xmmcnt = XMM0; for (int i = np; --i >= 0;) { static const unsigned char argregs[6] = { DI,SI,DX,CX,R8,R9 }; elem *ep = parameters[i].e; tym_t ty = ep->Ety; if (r < sizeof(argregs)/sizeof(argregs[0])) // if more arg regs { unsigned sz; if ( // This must match type_jparam() ty64reg(ty) || ((tybasic(ty) == TYstruct || tybasic(ty) == TYarray) && ((sz = type_size(ep->ET)) == 1 || sz == 2 || sz == 4 || sz == 8)) ) { parameters[i].reg = argregs[r]; r++; continue; // goes in register, not stack } } if (xmmcnt <= XMM7) { if (tyxmmreg(ty)) { parameters[i].reg = xmmcnt; xmmcnt++; continue; // goes in register, not stack } } // Parameter i goes on the stack parameters[i].reg = -1; // -1 means no register unsigned alignsize = el_alignsize(ep); parameters[i].numalign = 0; if (alignsize > stackalign) { unsigned newnumpara = (numpara + (alignsize - 1)) & ~(alignsize - 1); parameters[i].numalign = newnumpara - numpara; numpara = newnumpara; } numpara += paramsize(ep,stackalign); } assert((numpara & (REGSIZE - 1)) == 0); assert((stackpush & (REGSIZE - 1)) == 0); /* Should consider reordering the order of evaluation of the parameters * so that args that go into registers are evaluated after args that get * pushed. We can reorder args that are constants or relconst's. */ /* Adjust start of the stack so after all args are pushed, * the stack will be aligned. */ if (STACKALIGN == 16 && (numpara + stackpush) & (STACKALIGN - 1)) { numalign = STACKALIGN - ((numpara + stackpush) & (STACKALIGN - 1)); c = genc2(c,0x81,(REX_W << 16) | modregrm(3,5,SP),numalign); // SUB RSP,numalign c = genadjesp(c, numalign); stackpush += numalign; stackpushsave += numalign; } int regsaved[XMM7 + 1]; memset(regsaved, -1, sizeof(regsaved)); code *crest = NULL; regm_t saved = 0; /* Parameters go into the registers RDI,RSI,RDX,RCX,R8,R9 * float and double parameters go into XMM0..XMM7 * For variadic functions, count of XMM registers used goes in AL */ for (int i = 0; i < np; i++) { elem *ep = parameters[i].e; int preg = parameters[i].reg; if (preg == -1) { /* Push parameter on stack, but keep track of registers used * in the process. If they interfere with keepmsk, we'll have * to save/restore them. */ code *csave = NULL; regm_t overlap = msavereg & keepmsk; msavereg |= keepmsk; code *cp = params(ep,stackalign); regm_t tosave = keepmsk & ~msavereg; msavereg &= ~keepmsk | overlap; // tosave is the mask to save and restore for (int j = 0; tosave; j++) { regm_t mi = mask[j]; assert(j <= XMM7); if (mi & tosave) { unsigned idx; csave = regsave.save(csave, j, &idx); crest = regsave.restore(crest, j, idx); saved |= mi; keepmsk &= ~mi; // don't need to keep these for rest of params tosave &= ~mi; } } c = cat4(c, csave, cp, NULL); // Alignment for parameter comes after it got pushed unsigned numalign = parameters[i].numalign; if (numalign) { c = genc2(c,0x81,(REX_W << 16) | modregrm(3,5,SP),numalign); // SUB RSP,numalign c = genadjesp(c, numalign); stackpush += numalign; } } else { // Goes in register preg, not stack regm_t retregs = mask[preg]; if (ep->Eoper == OPstrthis) { code *c1 = getregs(retregs); // LEA preg,np[RSP] unsigned np = stackpush - ep->EV.Vuns; // stack delta to parameter code *c2 = genc1(CNIL,0x8D,(REX_W << 16) | (modregrm(0,4,SP) << 8) | modregxrm(2,preg,4), FLconst,np); c = cat3(c,c1,c2); } else { code *cp = scodelem(ep,&retregs,keepmsk,FALSE); c = cat(c,cp); } keepmsk |= retregs; // don't change preg when evaluating func address } } // Restore any register parameters we saved c = cat4(c, getregs(saved), crest, NULL); keepmsk |= saved; // Variadic functions store the number of XMM registers used in AL if (e->Eflags & EFLAGS_variadic) { code *c1 = getregs(mAX); c1 = movregconst(c1,AX,xmmcnt - XMM0,1); c = cat(c, c1); keepmsk |= mAX; } } } else { /* Adjust start of the stack so * the stack will be aligned. */ if (STACKALIGN == 16 && (stackpush) & (STACKALIGN - 1)) { numalign = STACKALIGN - ((stackpush) & (STACKALIGN - 1)); c = genc2(NULL,0x81,modregrm(3,5,SP),numalign); // SUB ESP,numalign if (I64) code_orrex(c, REX_W); c = genadjesp(c, numalign); stackpush += numalign; stackpushsave += numalign; } // Variadic functions store the number of XMM registers used in AL if (I64 && e->Eflags & EFLAGS_variadic) { code *c1 = getregs(mAX); c1 = movregconst(c1,AX,0,1); c = cat(c, c1); keepmsk |= mAX; } } Lret: cgstate.stackclean--; if (I16) numpara = stackpush - stackpushsave; else { if (numpara != stackpush - stackpushsave) printf("numpara = %d, stackpush = %d, stackpushsave = %d\n", numpara, stackpush, stackpushsave); assert(numpara == stackpush - stackpushsave); } return cat(c,funccall(e,numpara,numalign,pretregs,keepmsk)); } /*********************************** */ code *cdstrthis(elem *e,regm_t *pretregs) { code *c1; code *c2; assert(tysize(e->Ety) == REGSIZE); unsigned reg = findreg(*pretregs & allregs); c1 = getregs(mask[reg]); // LEA reg,np[ESP] unsigned np = stackpush - e->EV.Vuns; // stack delta to parameter c2 = genc1(CNIL,0x8D,(modregrm(0,4,SP) << 8) | modregxrm(2,reg,4),FLconst,np); if (I64) code_orrex(c2, REX_W); return cat3(c1,c2,fixresult(e,mask[reg],pretregs)); } /****************************** * Call function. All parameters are pushed onto the stack, numpara gives * the size of them all. */ STATIC code * funccall(elem *e,unsigned numpara,unsigned numalign,regm_t *pretregs,regm_t keepmsk) { elem *e1; code *c,*ce,cs; tym_t tym1; char farfunc; regm_t retregs; symbol *s; //printf("funccall(e = %p, *pretregs = x%x, numpara = %d, numalign = %d)\n",e,*pretregs,numpara,numalign); calledafunc = 1; /* Determine if we need frame for function prolog/epilog */ #if TARGET_WINDOS if (config.memmodel == Vmodel) { if (tyfarfunc(funcsym_p->ty())) needframe = TRUE; } #endif e1 = e->E1; tym1 = tybasic(e1->Ety); farfunc = tyfarfunc(tym1) || tym1 == TYifunc; c = NULL; if (e1->Eoper == OPvar) { /* Call function directly */ code *c1; #ifdef DEBUG if (!tyfunc(tym1)) WRTYxx(tym1); #endif assert(tyfunc(tym1)); s = e1->EV.sp.Vsym; if (s->Sflags & SFLexit) c = NULL; else if (s != tls_get_addr_sym) c = save87(); // assume 8087 regs are all trashed if (s->Sflags & SFLexit) // Function doesn't return, so don't worry about registers // it may use c1 = NULL; else if (!tyfunc(s->ty()) || !(config.flags4 & CFG4optimized)) // so we can replace func at runtime c1 = getregs(~fregsaved & (mBP | ALLREGS | mES | XMMREGS)); else c1 = getregs(~s->Sregsaved & (mBP | ALLREGS | mES | XMMREGS)); if (strcmp(s->Sident,"alloca") == 0) { #if 1 s = rtlsym[RTLSYM_ALLOCA]; makeitextern(s); c1 = cat(c1,getregs(mCX)); c1 = genc(c1,0x8D,modregrm(2,CX,BPRM),FLallocatmp,0,0,0); // LEA CX,&localsize[BP] if (I64) code_orrex(c1, REX_W); usedalloca = 2; // new way #else usedalloca = 1; // old way #endif } if (sytab[s->Sclass] & SCSS) // if function is on stack (!) { retregs = allregs & ~keepmsk; s->Sflags &= ~GTregcand; s->Sflags |= SFLread; ce = cat(c1,cdrelconst(e1,&retregs)); #if TARGET_SEGMENTED if (farfunc) goto LF1; else #endif goto LF2; } else { int fl; fl = FLfunc; if (!tyfunc(s->ty())) fl = el_fl(e1); if (tym1 == TYifunc) c1 = gen1(c1,0x9C); // PUSHF ce = CNIL; #if TARGET_LINUX || TARGET_FREEBSD || TARGET_OPENBSD || TARGET_SOLARIS if (s != tls_get_addr_sym) { //printf("call %s\n", s->Sident); ce = load_localgot(); } #endif ce = gencs(ce,farfunc ? 0x9A : 0xE8,0,fl,s); // CALL extern ce->Iflags |= farfunc ? (CFseg | CFoff) : (CFselfrel | CFoff); #if TARGET_LINUX || TARGET_FREEBSD || TARGET_OPENBSD || TARGET_SOLARIS if (s == tls_get_addr_sym) { if (I32) { /* Append a NOP so GNU linker has patch room */ ce = gen1(ce, 0x90); // NOP code_orflag(ce, CFvolatile); // don't schedule it } else { /* Prepend 66 66 48 so GNU linker has patch room */ assert(I64); ce->Irex = REX | REX_W; ce = cat(gen1(CNIL, 0x66), ce); ce = cat(gen1(CNIL, 0x66), ce); } } #endif } ce = cat(c1,ce); } else { /* Call function via pointer */ elem *e11; tym_t e11ty; #ifdef DEBUG if (e1->Eoper != OPind ) { WRFL((enum FL)el_fl(e1)); WROP(e1->Eoper); } #endif c = save87(); // assume 8087 regs are all trashed assert(e1->Eoper == OPind); e11 = e1->E1; e11ty = tybasic(e11->Ety); #if TARGET_SEGMENTED assert(!I16 || (e11ty == (farfunc ? TYfptr : TYnptr))); #else assert(!I16 || (e11ty == TYnptr)); #endif /* if we can't use loadea() */ if ((EOP(e11) || e11->Eoper == OPconst) && (e11->Eoper != OPind || e11->Ecount)) { unsigned reg; retregs = allregs & ~keepmsk; cgstate.stackclean++; ce = scodelem(e11,&retregs,keepmsk,TRUE); cgstate.stackclean--; /* Kill registers destroyed by an arbitrary function call */ ce = cat(ce,getregs((mBP | ALLREGS | mES | XMMREGS) & ~fregsaved)); #if TARGET_SEGMENTED if (e11ty == TYfptr) { unsigned lsreg; LF1: reg = findregmsw(retregs); lsreg = findreglsw(retregs); floatreg = TRUE; /* use float register */ reflocal = TRUE; ce = genc1(ce,0x89, /* MOV floatreg+2,reg */ modregrm(2,reg,BPRM),FLfltreg,REGSIZE); genc1(ce,0x89, /* MOV floatreg,lsreg */ modregrm(2,lsreg,BPRM),FLfltreg,0); if (tym1 == TYifunc) gen1(ce,0x9C); // PUSHF genc1(ce,0xFF, /* CALL [floatreg] */ modregrm(2,3,BPRM),FLfltreg,0); } else #endif { LF2: reg = findreg(retregs); ce = gen2(ce,0xFF,modregrmx(3,2,reg)); /* CALL reg */ if (I64) code_orrex(ce, REX_W); } } else { if (tym1 == TYifunc) c = gen1(c,0x9C); // PUSHF // CALL [function] cs.Iflags = 0; cgstate.stackclean++; ce = loadea(e11,&cs,0xFF,farfunc ? 3 : 2,0,keepmsk,(mBP|ALLREGS|mES|XMMREGS) & ~fregsaved); cgstate.stackclean--; freenode(e11); } s = NULL; } c = cat(c,ce); freenode(e1); /* See if we will need the frame pointer. Calculate it here so we can possibly use BP to fix the stack. */ #if 0 if (!needframe) { SYMIDX si; /* If there is a register available for this basic block */ if (config.flags4 & CFG4optimized && (ALLREGS & ~regcon.used)) ; else { for (si = 0; si < globsym.top; si++) { symbol *s = globsym.tab[si]; if (s->Sflags & GTregcand && type_size(s->Stype) != 0) { if (config.flags4 & CFG4optimized) { /* If symbol is live in this basic block and */ /* isn't already in a register */ if (s->Srange && vec_testbit(dfoidx,s->Srange) && s->Sfl != FLreg) { /* Then symbol must be allocated on stack */ needframe = TRUE; break; } } else { if (mfuncreg == 0) /* if no registers left */ { needframe = TRUE; break; } } } } } } #endif retregs = regmask(e->Ety, tym1); // If stack needs cleanup if (OTbinary(e->Eoper) && !typfunc(tym1) && !(s && s->Sflags & SFLexit)) { if (tym1 == TYhfunc) { // Hidden parameter is popped off by the callee c = genadjesp(c, -REGSIZE); stackpush -= REGSIZE; if (numpara + numalign > REGSIZE) c = genstackclean(c, numpara + numalign - REGSIZE, retregs); } else c = genstackclean(c,numpara + numalign,retregs); } else { c = genadjesp(c,-numpara); stackpush -= numpara; if (numalign) c = genstackclean(c,numalign,retregs); } /* Special handling for functions which return a floating point value in the top of the 8087 stack. */ if (retregs & mST0) { c = genadjfpu(c, 1); if (*pretregs) // if we want the result { //assert(stackused == 0); push87(); // one item on 8087 stack return cat(c,fixresult87(e,retregs,pretregs)); } else /* Pop unused result off 8087 stack */ c = gen2(c,0xDD,modregrm(3,3,0)); /* FPOP */ } else if (retregs & mST01) { c = genadjfpu(c, 2); if (*pretregs) // if we want the result { assert(stackused == 0); push87(); push87(); // two items on 8087 stack return cat(c,fixresult_complex87(e,retregs,pretregs)); } else { // Pop unused result off 8087 stack c = gen2(c,0xDD,modregrm(3,3,0)); // FPOP c = gen2(c,0xDD,modregrm(3,3,0)); // FPOP } } return cat(c,fixresult(e,retregs,pretregs)); } /*************************** * Determine size of everything that will be pushed. */ targ_size_t paramsize(elem *e,unsigned stackalign) { targ_size_t psize = 0; targ_size_t szb; while (e->Eoper == OPparam) /* if more params */ { elem *e2 = e->E2; psize += paramsize(e->E1,stackalign); // push them backwards e = e2; } tym_t tym = tybasic(e->Ety); if (tyscalar(tym)) szb = size(tym); else if (tym == TYstruct) szb = type_size(e->ET); else { #ifdef DEBUG WRTYxx(tym); #endif assert(0); } psize += align(stackalign,szb); /* align on word stack boundary */ return psize; } /*************************** * Generate code to push parameter list. * stackpush is incremented by stackalign for each PUSH. */ code *params(elem *e,unsigned stackalign) { code *c,*ce,cs; code *cp; unsigned reg; targ_size_t szb; // size before alignment targ_size_t sz; // size after alignment tym_t tym; regm_t retregs; elem *e1; elem *e2; symbol *s; int fl; //printf("params(e = %p, stackalign = %d)\n", e, stackalign); cp = NULL; stackchanged = 1; assert(e); while (e->Eoper == OPparam) /* if more params */ { e2 = e->E2; cp = cat(cp,params(e->E1,stackalign)); // push them backwards freenode(e); e = e2; } //printf("params()\n"); elem_print(e); tym = tybasic(e->Ety); if (tyfloating(tym)) obj_fltused(); int grex = I64 ? REX_W << 16 : 0; /* sz = number of bytes pushed */ if (tyscalar(tym)) szb = size(tym); else if (tym == TYstruct) szb = type_size(e->ET); else { #ifdef DEBUG WRTYxx(tym); #endif assert(0); } sz = align(stackalign,szb); /* align on word stack boundary */ assert((sz & (stackalign - 1)) == 0); /* ensure that alignment worked */ assert((sz & (REGSIZE - 1)) == 0); c = CNIL; cs.Iflags = 0; cs.Irex = 0; switch (e->Eoper) { #if SCPP case OPstrctor: { e1 = e->E1; c = docommas(&e1); /* skip over any comma expressions */ c = genc2(c,0x81,grex | modregrm(3,5,SP),sz); // SUB SP,sizeof(struct) stackpush += sz; genadjesp(c,sz); // Find OPstrthis and set it to stackpush exp2_setstrthis(e1,NULL,stackpush,NULL); retregs = 0; ce = codelem(e1,&retregs,TRUE); goto L2; } case OPstrthis: // This is the parameter for the 'this' pointer corresponding to // OPstrctor. We push a pointer to an object that was already // allocated on the stack by OPstrctor. { unsigned np; retregs = allregs; c = allocreg(&retregs,®,TYoffset); c = genregs(c,0x89,SP,reg); // MOV reg,SP if (I64) code_orrex(c, REX_W); np = stackpush - e->EV.Vuns; // stack delta to parameter c = genc2(c,0x81,grex | modregrmx(3,0,reg),np); // ADD reg,np if (sz > REGSIZE) { c = gen1(c,0x16); // PUSH SS stackpush += REGSIZE; } c = gen1(c,0x50 + (reg & 7)); // PUSH reg if (reg & 8) code_orrex(c, REX_B); stackpush += REGSIZE; genadjesp(c,sz); ce = CNIL; goto L2; } #endif case OPstrpar: { code *cc,*c1,*c2,*c3; unsigned rm; unsigned seg; // segment override prefix flags bool doneoff; unsigned pushsize = REGSIZE; unsigned op16 = 0; unsigned npushes; e1 = e->E1; if (sz == 0) { ce = docommas(&e1); /* skip over any commas */ goto L2; } if ((sz & 3) == 0 && (sz / REGSIZE) <= 4 && e1->Eoper == OPvar) { freenode(e); e = e1; goto L1; } cc = docommas(&e1); /* skip over any commas */ seg = 0; /* assume no seg override */ retregs = sz ? IDXREGS : 0; doneoff = FALSE; if (!I16 && sz & 2) // if odd number of words to push { pushsize = 2; op16 = 1; } else if (I16 && config.target_cpu >= TARGET_80386 && (sz & 3) == 0) { pushsize = 4; // push DWORDs at a time op16 = 1; } npushes = sz / pushsize; switch (e1->Eoper) { case OPind: #if TARGET_SEGMENTED if (sz) { switch (tybasic(e1->E1->Ety)) { case TYfptr: case TYhptr: seg = CFes; retregs |= mES; break; case TYsptr: if (config.wflags & WFssneds) seg = CFss; break; case TYcptr: seg = CFcs; break; } } #endif c1 = codelem(e1->E1,&retregs,FALSE); freenode(e1); break; case OPvar: /* Symbol is no longer a candidate for a register */ e1->EV.sp.Vsym->Sflags &= ~GTregcand; if (!e1->Ecount && npushes > 4) { /* Kludge to point at last word in struct. */ /* Don't screw up CSEs. */ e1->EV.sp.Voffset += sz - pushsize; doneoff = TRUE; } //if (LARGEDATA) /* if default isn't DS */ { static unsigned segtocf[4] = { CFes,CFcs,CFss,0 }; unsigned s; int fl; fl = el_fl(e1); #if TARGET_SEGMENTED if (fl == FLfardata) { seg = CFes; retregs |= mES; } else #endif { s = segfl[fl]; assert(s < 4); seg = segtocf[s]; if (seg == CFss && !(config.wflags & WFssneds)) seg = 0; } } #if TARGET_SEGMENTED if (e1->Ety & mTYfar) { seg = CFes; retregs |= mES; } #endif c1 = cdrelconst(e1,&retregs); /* Reverse the effect of the previous add */ if (doneoff) e1->EV.sp.Voffset -= sz - pushsize; freenode(e1); break; case OPstreq: //case OPcond: if (!(config.exe & EX_flat)) { seg = CFes; retregs |= mES; } c1 = codelem(e1,&retregs,FALSE); break; default: #ifdef DEBUG elem_print(e1); #endif assert(0); } reg = findreglsw(retregs); rm = I16 ? regtorm[reg] : regtorm32[reg]; if (op16) seg |= CFopsize; // operand size if (npushes <= 4) { assert(!doneoff); for (c2 = CNIL; npushes > 1; npushes--) { c2 = genc1(c2,0xFF,buildModregrm(2,6,rm),FLconst,pushsize * (npushes - 1)); // PUSH [reg] code_orflag(c2,seg); genadjesp(c2,pushsize); } c3 = gen2(CNIL,0xFF,buildModregrm(0,6,rm)); // PUSH [reg] c3->Iflags |= seg; genadjesp(c3,pushsize); ce = cat4(cc,c1,c2,c3); } else if (sz) { int size; c2 = getregs_imm(mCX | retregs); /* MOV CX,sz/2 */ c2 = movregconst(c2,CX,npushes,0); if (!doneoff) { /* This disgusting thing should be done when */ /* reg is loaded. Too lazy to fix it now. */ /* ADD reg,sz-2 */ c2 = genc2(c2,0x81,grex | modregrmx(3,0,reg),sz-pushsize); } c3 = getregs(mCX); // the LOOP decrements it c3 = gen2(c3,0xFF,buildModregrm(0,6,rm)); // PUSH [reg] c3->Iflags |= seg | CFtarg2; genc2(c3,0x81,grex | buildModregrm(3,5,reg),pushsize); // SUB reg,2 size = ((seg & CFSEG) ? -8 : -7) - op16; if (code_next(c3)->Iop != 0x81) size++; //genc2(c3,0xE2,0,size); // LOOP .-7 or .-8 genjmp(c3,0xE2,FLcode,(block *)c3); // LOOP c3 regimmed_set(CX,0); genadjesp(c3,sz); ce = cat4(cc,c1,c2,c3); } else ce = cat(cc,c1); stackpush += sz; goto L2; } case OPind: if (!e->Ecount) /* if *e1 */ { if (sz <= REGSIZE) { // Watch out for single byte quantities being up // against the end of a segment or in memory-mapped I/O if (!(config.exe & EX_flat) && szb == 1) break; goto L1; // can handle it with loadea() } // Avoid PUSH MEM on the Pentium when optimizing for speed if (config.flags4 & CFG4speed && (config.target_cpu >= TARGET_80486 && config.target_cpu <= TARGET_PentiumMMX) && sz <= 2 * REGSIZE && !tyfloating(tym)) break; if (tym == TYldouble || tym == TYildouble || tycomplex(tym)) break; if (I32) { assert(sz == REGSIZE * 2); ce = loadea(e,&cs,0xFF,6,REGSIZE,0,0); /* PUSH EA+4 */ ce = genadjesp(ce,REGSIZE); } else { if (sz == DOUBLESIZE) { ce = loadea(e,&cs,0xFF,6,DOUBLESIZE - REGSIZE,0,0); /* PUSH EA+6 */ cs.IEVoffset1 -= REGSIZE; gen(ce,&cs); /* PUSH EA+4 */ ce = genadjesp(ce,REGSIZE); getlvalue_lsw(&cs); gen(ce,&cs); /* PUSH EA+2 */ } else /* TYlong */ ce = loadea(e,&cs,0xFF,6,REGSIZE,0,0); /* PUSH EA+2 */ ce = genadjesp(ce,REGSIZE); } stackpush += sz; getlvalue_lsw(&cs); gen(ce,&cs); /* PUSH EA */ ce = genadjesp(ce,REGSIZE); goto L2; } break; #if TARGET_SEGMENTED case OPnp_fp: if (!e->Ecount) /* if (far *)e1 */ { int segreg; tym_t tym1; e1 = e->E1; tym1 = tybasic(e1->Ety); /* BUG: what about pointers to functions? */ switch (tym1) { case TYnptr: segreg = 3<<3; break; case TYcptr: segreg = 1<<3; break; default: segreg = 2<<3; break; } if (I32 && stackalign == 2) c = gen1(c,0x66); /* push a word */ c = gen1(c,0x06 + segreg); /* PUSH SEGREG */ if (I32 && stackalign == 2) code_orflag(c,CFopsize); // push a word c = genadjesp(c,stackalign); stackpush += stackalign; ce = params(e1,stackalign); goto L2; } break; #endif case OPrelconst: #if TARGET_SEGMENTED /* Determine if we can just push the segment register */ /* Test size of type rather than TYfptr because of (long)(&v) */ s = e->EV.sp.Vsym; //if (sytab[s->Sclass] & SCSS && !I32) // if variable is on stack // needframe = TRUE; // then we need stack frame if (tysize[tym] == tysize[TYfptr] && (fl = s->Sfl) != FLfardata && /* not a function that CS might not be the segment of */ (!((fl == FLfunc || s->ty() & mTYcs) && (s->Sclass == SCcomdat || s->Sclass == SCextern || s->Sclass == SCinline || config.wflags & WFthunk)) || (fl == FLfunc && config.exe == EX_DOSX) ) ) { stackpush += sz; c = gen1(c,0x06 + /* PUSH SEGREG */ (((fl == FLfunc || s->ty() & mTYcs) ? 1 : segfl[fl]) << 3)); c = genadjesp(c,REGSIZE); if (config.target_cpu >= TARGET_80286 && !e->Ecount) { ce = getoffset(e,STACK); goto L2; } else { c = cat(c,offsetinreg(e,&retregs)); unsigned reg = findreg(retregs); c = genpush(c,reg); // PUSH reg genadjesp(c,REGSIZE); } goto ret; } if (config.target_cpu >= TARGET_80286 && !e->Ecount) { stackpush += sz; if (tysize[tym] == tysize[TYfptr]) { /* PUSH SEG e */ code *c1 = gencs(CNIL,0x68,0,FLextern,s); c1->Iflags = CFseg; genadjesp(c1,REGSIZE); c = cat(c,c1); } ce = getoffset(e,STACK); goto L2; } #endif break; /* else must evaluate expression */ case OPvar: L1: if (0 && I32 && sz == 2) { /* 32 bit code, but pushing 16 bit values anyway */ ce = loadea(e,&cs,0xFF,6,0,0,0); /* PUSH EA */ // BUG: 0x66 fails with scheduler ce = cat(gen1(CNIL,0x66),ce); /* 16 bit override */ stackpush += sz; genadjesp(ce,sz); } else if (config.flags4 & CFG4speed && (config.target_cpu >= TARGET_80486 && config.target_cpu <= TARGET_PentiumMMX) && sz <= 2 * REGSIZE && !tyfloating(tym)) { // Avoid PUSH MEM on the Pentium when optimizing for speed break; } else { int regsize = REGSIZE; unsigned flag = 0; if (I16 && config.target_cpu >= TARGET_80386 && sz > 2 && !e->Ecount) { regsize = 4; flag |= CFopsize; } ce = loadea(e,&cs,0xFF,6,sz - regsize,RMload,0); // PUSH EA+sz-2 code_orflag(ce,flag); ce = genadjesp(ce,REGSIZE); stackpush += sz; while ((targ_int)(sz -= regsize) > 0) { ce = cat(ce,loadea(e,&cs,0xFF,6,sz - regsize,RMload,0)); code_orflag(ce,flag); ce = genadjesp(ce,REGSIZE); } } L2: freenode(e); c = cat(c,ce); goto ret; case OPconst: { char pushi = 0; unsigned flag = 0; int regsize = REGSIZE; targ_int value; if (tycomplex(tym)) break; if (I64 && tyfloating(tym) && sz > 4 && boolres(e)) // Can't push 64 bit non-zero args directly break; if (I32 && szb == 10) // special case for long double constants { assert(sz == 12); value = ((unsigned short *)&e->EV.Vldouble)[4]; stackpush += sz; ce = genadjesp(NULL,sz); for (int i = 2; i >= 0; i--) { if (reghasvalue(allregs, value, ®)) ce = gen1(ce,0x50 + reg); // PUSH reg else ce = genc2(ce,0x68,0,value); // PUSH value value = ((unsigned *)&e->EV.Vldouble)[i - 1]; } goto L2; } assert(I64 || sz <= LNGDBLSIZE); int i = sz; if (!I16 && i == 2) flag = CFopsize; if (config.target_cpu >= TARGET_80286) // && (e->Ecount == 0 || e->Ecount != e->Ecomsub)) { pushi = 1; if (I16 && config.target_cpu >= TARGET_80386 && i >= 4) { regsize = 4; flag = CFopsize; } } else if (i == REGSIZE) break; stackpush += sz; ce = genadjesp(NULL,sz); targ_uns *pi = (targ_uns *) &e->EV.Vdouble; targ_ushort *ps = (targ_ushort *) pi; targ_ullong *pl = (targ_ullong *)pi; i /= regsize; do { if (i) /* be careful not to go negative */ i--; targ_size_t value = (regsize == 4) ? pi[i] : ps[i]; if (regsize == 8) value = pl[i]; if (pushi) { if (I64 && regsize == 8 && value != (int)value) { ce = regwithvalue(ce,allregs,value,®,64); goto Preg; // cannot push imm64 unless it is sign extended 32 bit value } if (regsize == REGSIZE && reghasvalue(allregs,value,®)) goto Preg; ce = genc2(ce,(szb == 1) ? 0x6A : 0x68,0,value); // PUSH value } else { ce = regwithvalue(ce,allregs,value,®,0); Preg: ce = genpush(ce,reg); // PUSH reg } code_orflag(ce,flag); /* operand size */ } while (i); goto L2; } default: break; } retregs = tybyte(tym) ? BYTEREGS : allregs; if (tyvector(tym)) { retregs = XMMREGS; c = cat(c,codelem(e,&retregs,FALSE)); stackpush += sz; c = genadjesp(c,sz); c = genc2(c,0x81,grex | modregrm(3,5,SP),sz); // SUB SP,sz unsigned op = xmmstore(tym); unsigned r = findreg(retregs); c = gen2sib(c,op,modregxrm(0,r - XMM0,4),modregrm(0,4,SP)); // MOV [ESP],r goto ret; } else if (tyfloating(tym)) { if (config.inline8087) { code *c1,*c2; unsigned op; unsigned r; retregs = tycomplex(tym) ? mST01 : mST0; c = cat(c,codelem(e,&retregs,FALSE)); stackpush += sz; c = genadjesp(c,sz); c = genc2(c,0x81,grex | modregrm(3,5,SP),sz); // SUB SP,sz switch (tym) { case TYfloat: case TYifloat: case TYcfloat: op = 0xD9; r = 3; break; case TYdouble: case TYidouble: case TYdouble_alias: case TYcdouble: op = 0xDD; r = 3; break; case TYldouble: case TYildouble: case TYcldouble: op = 0xDB; r = 7; break; default: assert(0); } if (!I16) { c1 = NULL; c2 = NULL; if (tycomplex(tym)) { // FSTP sz/2[ESP] c2 = genc1(CNIL,op,(modregrm(0,4,SP) << 8) | modregxrm(2,r,4),FLconst,sz/2); pop87(); } pop87(); c2 = gen2sib(c2,op,modregrm(0,r,4),modregrm(0,4,SP)); // FSTP [ESP] } else { retregs = IDXREGS; /* get an index reg */ c1 = allocreg(&retregs,®,TYoffset); c1 = genregs(c1,0x89,SP,reg); /* MOV reg,SP */ pop87(); c2 = gen2(CNIL,op,modregrm(0,r,regtorm[reg])); // FSTP [reg] } if (LARGEDATA) c2->Iflags |= CFss; /* want to store into stack */ genfwait(c2); // FWAIT c = cat3(c,c1,c2); goto ret; } else if (I16 && (tym == TYdouble || tym == TYdouble_alias)) retregs = mSTACK; } #if LONGLONG else if (I16 && sz == 8) // if long long retregs = mSTACK; #endif c = cat(c,scodelem(e,&retregs,0,TRUE)); if (retregs != mSTACK) /* if stackpush not already inc'd */ stackpush += sz; if (sz <= REGSIZE) { c = genpush(c,findreg(retregs)); // PUSH reg genadjesp(c,REGSIZE); } else if (sz == REGSIZE * 2) { c = genpush(c,findregmsw(retregs)); // PUSH msreg genpush(c,findreglsw(retregs)); // PUSH lsreg genadjesp(c,sz); } ret: return cat(cp,c); } /******************************* * Get offset portion of e, and store it in an index * register. Return mask of index register in *pretregs. */ code *offsetinreg( elem *e, regm_t *pretregs) { regm_t retregs; code *c; unsigned reg; retregs = mLSW; /* want only offset */ if (e->Ecount && e->Ecount != e->Ecomsub) { unsigned i; regm_t rm; rm = retregs & regcon.cse.mval & ~regcon.cse.mops & ~regcon.mvar; /* possible regs */ for (i = 0; rm; i++) { if (mask[i] & rm && regcon.cse.value[i] == e) { reg = i; *pretregs = mask[i]; c = getregs(*pretregs); goto L3; } rm &= ~mask[i]; } } *pretregs = retregs; c = allocreg(pretregs,®,TYoffset); c = cat(c,getoffset(e,reg)); L3: cssave(e,*pretregs,FALSE); freenode(e); return c; } /****************************** * Generate code to load data into registers. */ code *loaddata(elem *e,regm_t *pretregs) { unsigned reg,nreg,op,sreg; tym_t tym; int sz; code *c,*ce,cs; regm_t flags,forregs,regm; #ifdef DEBUG if (debugw) printf("loaddata(e = %p,*pretregs = %s)\n",e,regm_str(*pretregs)); //elem_print(e); #endif assert(e); elem_debug(e); if (*pretregs == 0) return CNIL; tym = tybasic(e->Ety); if (tym == TYstruct) return cdrelconst(e,pretregs); if (tyfloating(tym)) { obj_fltused(); if (config.inline8087) { if (*pretregs & mST0) return load87(e,0,pretregs,NULL,-1); else if (tycomplex(tym)) return cload87(e, pretregs); } } sz = tysize[tym]; cs.Iflags = 0; cs.Irex = 0; if (*pretregs == mPSW) { regm = allregs; if (e->Eoper == OPconst) { /* TRUE: OR SP,SP (SP is never 0) */ /* FALSE: CMP SP,SP (always equal) */ c = genregs(CNIL,(boolres(e)) ? 0x09 : 0x39,SP,SP); if (I64) code_orrex(c, REX_W); } else if (sz <= REGSIZE) { if (!I16 && (tym == TYfloat || tym == TYifloat)) { c = allocreg(®m,®,TYoffset); /* get a register */ ce = loadea(e,&cs,0x8B,reg,0,0,0); // MOV reg,data c = cat(c,ce); ce = gen2(CNIL,0xD1,modregrmx(3,4,reg)); /* SHL reg,1 */ c = cat(c,ce); } #if TARGET_OSX else if (e->Eoper == OPvar && movOnly(e)) { c = allocreg(®m,®,TYoffset); /* get a register */ ce = loadea(e,&cs,0x8B,reg,0,0,0); // MOV reg,data c = cat(c,ce); ce = fixresult(e,regm,pretregs); c = cat(c,ce); } #endif else { cs.IFL2 = FLconst; cs.IEV2.Vsize_t = 0; op = (sz == 1) ? 0x80 : 0x81; c = loadea(e,&cs,op,7,0,0,0); /* CMP EA,0 */ // Convert to TEST instruction if EA is a register // (to avoid register contention on Pentium) if ((c->Iop & ~1) == 0x38 && (c->Irm & modregrm(3,0,0)) == modregrm(3,0,0) ) { c->Iop = (c->Iop & 1) | 0x84; code_newreg(c, c->Irm & 7); if (c->Irex & REX_B) //c->Irex = (c->Irex & ~REX_B) | REX_R; c->Irex |= REX_R; } } } else if (sz < 8) { c = allocreg(®m,®,TYoffset); /* get a register */ if (I32) // it's a 48 bit pointer ce = loadea(e,&cs,0x0FB7,reg,REGSIZE,0,0); /* MOVZX reg,data+4 */ else { ce = loadea(e,&cs,0x8B,reg,REGSIZE,0,0); /* MOV reg,data+2 */ if (tym == TYfloat || tym == TYifloat) // dump sign bit gen2(ce,0xD1,modregrm(3,4,reg)); /* SHL reg,1 */ } c = cat(c,ce); ce = loadea(e,&cs,0x0B,reg,0,regm,0); /* OR reg,data */ c = cat(c,ce); } else if (sz == 8 || (I64 && sz == 2 * REGSIZE && !tyfloating(tym))) { c = allocreg(®m,®,TYoffset); /* get a register */ int i = sz - REGSIZE; ce = loadea(e,&cs,0x8B,reg,i,0,0); /* MOV reg,data+6 */ if (tyfloating(tym)) // TYdouble or TYdouble_alias gen2(ce,0xD1,modregrm(3,4,reg)); // SHL reg,1 c = cat(c,ce); while ((i -= REGSIZE) >= 0) { code *c1 = loadea(e,&cs,0x0B,reg,i,regm,0); // OR reg,data+i if (i == 0) c1->Iflags |= CFpsw; // need the flags on last OR c = cat(c,c1); } } else if (sz == tysize[TYldouble]) // TYldouble return load87(e,0,pretregs,NULL,-1); else { #ifdef DEBUG elem_print(e); #endif assert(0); } return c; } /* not for flags only */ flags = *pretregs & mPSW; /* save original */ forregs = *pretregs & (mBP | ALLREGS | mES | XMMREGS); if (*pretregs & mSTACK) forregs |= DOUBLEREGS; if (e->Eoper == OPconst) { targ_size_t value = e->EV.Vint; if (sz == 8) value = e->EV.Vullong; if (sz == REGSIZE && reghasvalue(forregs,value,®)) forregs = mask[reg]; regm_t save = regcon.immed.mval; c = allocreg(&forregs,®,tym); /* allocate registers */ regcon.immed.mval = save; // KLUDGE! if (sz <= REGSIZE) { if (sz == 1) flags |= 1; else if (!I16 && sz == SHORTSIZE && !(mask[reg] & regcon.mvar) && !(config.flags4 & CFG4speed) ) flags |= 2; if (sz == 8) flags |= 64; if (reg >= XMM0) { /* This comes about because 0, 1, pi, etc., constants don't get stored * in the data segment, because they are x87 opcodes. * Not so efficient. We should at least do a PXOR for 0. */ unsigned r; targ_size_t value = e->EV.Vuns; if (sz == 8) value = e->EV.Vullong; ce = regwithvalue(CNIL,ALLREGS,value,&r,flags); flags = 0; // flags are already set ce = genfltreg(ce,0x89,r,0); // MOV floatreg,r if (sz == 8) code_orrex(ce, REX_W); assert(sz == 4 || sz == 8); // float or double unsigned op = xmmload(tym); ce = genfltreg(ce,op,reg - XMM0,0); // MOVSS/MOVSD XMMreg,floatreg } else { ce = movregconst(CNIL,reg,value,flags); flags = 0; // flags are already set } } else if (sz < 8) // far pointers, longs for 16 bit targets { targ_int msw,lsw; regm_t mswflags; msw = I32 ? e->EV.Vfp.Vseg : (e->EV.Vulong >> 16); lsw = e->EV.Vfp.Voff; mswflags = 0; if (forregs & mES) { ce = movregconst(CNIL,reg,msw,0); // MOV reg,segment genregs(ce,0x8E,0,reg); // MOV ES,reg msw = lsw; // MOV reg,offset } else { sreg = findreglsw(forregs); ce = movregconst(CNIL,sreg,lsw,0); reg = findregmsw(forregs); /* Decide if we need to set flags when we load msw */ if (flags && (msw && msw|lsw || !(msw|lsw))) { mswflags = mPSW; flags = 0; } } ce = movregconst(ce,reg,msw,mswflags); } else if (sz == 8) { if (I32) { targ_long *p = (targ_long *) &e->EV.Vdouble; if (reg >= XMM0) { /* This comes about because 0, 1, pi, etc., constants don't get stored * in the data segment, because they are x87 opcodes. * Not so efficient. We should at least do a PXOR for 0. */ unsigned r; regm_t rm = ALLREGS; ce = allocreg(&rm,&r,TYint); // allocate scratch register ce = movregconst(ce,r,p[0],0); ce = genfltreg(ce,0x89,r,0); // MOV floatreg,r ce = movregconst(ce,r,p[1],0); ce = genfltreg(ce,0x89,r,4); // MOV floatreg+4,r unsigned op = xmmload(tym); ce = genfltreg(ce,op,reg - XMM0,0); // MOVSS/MOVSD XMMreg,floatreg } else { ce = movregconst(CNIL,findreglsw(forregs),p[0],0); ce = movregconst(ce,findregmsw(forregs),p[1],0); } } else { targ_short *p = (targ_short *) &e->EV.Vdouble; assert(reg == AX); ce = movregconst(CNIL,AX,p[3],0); /* MOV AX,p[3] */ ce = movregconst(ce,DX,p[0],0); ce = movregconst(ce,CX,p[1],0); ce = movregconst(ce,BX,p[2],0); } } else if (I64 && sz == 16) { ce = movregconst(CNIL,findreglsw(forregs),e->EV.Vcent.lsw,0); ce = movregconst(ce,findregmsw(forregs),e->EV.Vcent.msw,0); } else assert(0); c = cat(c,ce); } else { // See if we can use register that parameter was passed in if (regcon.params && e->EV.sp.Vsym->Sclass == SCfastpar && regcon.params & mask[e->EV.sp.Vsym->Spreg] && !(e->Eoper == OPvar && e->EV.sp.Voffset > 0) && // Must be at the base of that variable sz <= REGSIZE) // make sure no 'paint' to a larger size happened { reg = e->EV.sp.Vsym->Spreg; forregs = mask[reg]; mfuncreg &= ~forregs; regcon.used |= forregs; return fixresult(e,forregs,pretregs); } c = allocreg(&forregs,®,tym); /* allocate registers */ if (sz == 1) { regm_t nregm; #ifdef DEBUG if (!(forregs & BYTEREGS)) { elem_print(e); printf("forregs = x%x\n",forregs); } #endif int op = 0x8A; // byte MOV #if TARGET_OSX if (movOnly(e)) op = 0x8B; #endif assert(forregs & BYTEREGS); if (!I16) c = cat(c,loadea(e,&cs,op,reg,0,0,0)); // MOV regL,data else { nregm = tyuns(tym) ? BYTEREGS : mAX; if (*pretregs & nregm) nreg = reg; /* already allocated */ else c = cat(c,allocreg(&nregm,&nreg,tym)); ce = loadea(e,&cs,op,nreg,0,0,0); /* MOV nregL,data */ c = cat(c,ce); if (reg != nreg) { genmovreg(c,reg,nreg); /* MOV reg,nreg */ cssave(e,mask[nreg],FALSE); } } } else if (forregs & XMMREGS) { // Can't load from registers directly to XMM regs //printf("test2 %s\n", e->EV.sp.Vsym->Sident); //e->EV.sp.Vsym->Sflags &= ~GTregcand; op = xmmload(tym); if (e->Eoper == OPvar) { symbol *s = e->EV.sp.Vsym; if (s->Sfl == FLreg && !(mask[s->Sreglsw] & XMMREGS)) { op = LODD; // MOVD/MOVQ /* getlvalue() will unwind this and unregister s; could use a better solution */ } } ce = loadea(e,&cs,op,reg,0,RMload,0); // MOVSS/MOVSD reg,data c = cat(c,ce); } else if (sz <= REGSIZE) { ce = loadea(e,&cs,0x8B,reg,0,RMload,0); // MOV reg,data c = cat(c,ce); } else if (sz <= 2 * REGSIZE && forregs & mES) { ce = loadea(e,&cs,0xC4,reg,0,0,mES); /* LES data */ c = cat(c,ce); } else if (sz <= 2 * REGSIZE) { if (I32 && sz == 8 && (*pretregs & (mSTACK | mPSW)) == mSTACK) { int i; assert(0); /* Note that we allocreg(DOUBLEREGS) needlessly */ stackchanged = 1; i = DOUBLESIZE - REGSIZE; do { c = cat(c,loadea(e,&cs,0xFF,6,i,0,0)); /* PUSH EA+i */ c = genadjesp(c,REGSIZE); stackpush += REGSIZE; i -= REGSIZE; } while (i >= 0); return c; } reg = findregmsw(forregs); ce = loadea(e,&cs,0x8B,reg,REGSIZE,forregs,0); /* MOV reg,data+2 */ if (I32 && sz == REGSIZE + 2) ce->Iflags |= CFopsize; /* seg is 16 bits */ c = cat(c,ce); reg = findreglsw(forregs); ce = loadea(e,&cs,0x8B,reg,0,forregs,0); c = cat(c,ce); } else if (sz >= 8) { code *c1,*c2,*c3; assert(!I32); if ((*pretregs & (mSTACK | mPSW)) == mSTACK) { int i; /* Note that we allocreg(DOUBLEREGS) needlessly */ stackchanged = 1; i = sz - REGSIZE; do { c = cat(c,loadea(e,&cs,0xFF,6,i,0,0)); /* PUSH EA+i */ c = genadjesp(c,REGSIZE); stackpush += REGSIZE; i -= REGSIZE; } while (i >= 0); return c; } else { assert(reg == AX); ce = loadea(e,&cs,0x8B,AX,6,0,0); /* MOV AX,data+6 */ c1 = loadea(e,&cs,0x8B,BX,4,mAX,0); /* MOV BX,data+4 */ c2 = loadea(e,&cs,0x8B,CX,2,mAX|mBX,0); /* MOV CX,data+2 */ c3 = loadea(e,&cs,0x8B,DX,0,mAX|mCX|mCX,0); /* MOV DX,data */ c = cat6(c,ce,c1,c2,c3,CNIL); } } else assert(0); } /* Flags may already be set */ *pretregs &= flags | ~mPSW; c = cat(c,fixresult(e,forregs,pretregs)); return c; } #endif // SPP