mirror of
https://github.com/xomboverlord/ldc.git
synced 2026-04-18 17:59:04 +02:00
6459 lines
207 KiB
C
6459 lines
207 KiB
C
// Copyright (C) 1984-1998 by Symantec
|
||
// Copyright (C) 2000-2011 by Digital Mars
|
||
// All Rights Reserved
|
||
// http://www.digitalmars.com
|
||
// Written by Walter Bright
|
||
/*
|
||
* This source file is made available for personal use
|
||
* only. The license is in /dmd/src/dmd/backendlicense.txt
|
||
* or /dm/src/dmd/backendlicense.txt
|
||
* For any other uses, please contact Digital Mars.
|
||
*/
|
||
|
||
#if !SPP
|
||
|
||
#include <stdio.h>
|
||
#include <string.h>
|
||
#include <stdlib.h>
|
||
#include <time.h>
|
||
#include "cc.h"
|
||
#include "el.h"
|
||
#include "code.h"
|
||
#include "oper.h"
|
||
#include "global.h"
|
||
#include "type.h"
|
||
#include "tinfo.h"
|
||
#if SCPP
|
||
#include "exh.h"
|
||
#endif
|
||
|
||
#if HYDRATE
|
||
#include "parser.h"
|
||
#endif
|
||
|
||
static char __file__[] = __FILE__; /* for tassert.h */
|
||
#include "tassert.h"
|
||
|
||
extern targ_size_t retsize;
|
||
STATIC void pinholeopt_unittest();
|
||
STATIC void do8bit (enum FL,union evc *);
|
||
STATIC void do16bit (enum FL,union evc *,int);
|
||
STATIC void do32bit (enum FL,union evc *,int,targ_size_t = 0);
|
||
STATIC void do64bit (enum FL,union evc *,int);
|
||
|
||
static int hasframe; /* !=0 if this function has a stack frame */
|
||
static targ_size_t Foff; // BP offset of floating register
|
||
static targ_size_t CSoff; // offset of common sub expressions
|
||
static targ_size_t NDPoff; // offset of saved 8087 registers
|
||
int BPoff; // offset from BP
|
||
static int EBPtoESP; // add to EBP offset to get ESP offset
|
||
static int AAoff; // offset of alloca temporary
|
||
|
||
#if ELFOBJ || MACHOBJ
|
||
#define JMPSEG CDATA
|
||
#define JMPOFF CDoffset
|
||
#else
|
||
#define JMPSEG DATA
|
||
#define JMPOFF Doffset
|
||
#endif
|
||
|
||
/*************
|
||
* Size in bytes of each instruction.
|
||
* 0 means illegal instruction.
|
||
* bit M: if there is a modregrm field (EV1 is reserved for modregrm)
|
||
* bit T: if there is a second operand (EV2)
|
||
* bit E: if second operand is only 8 bits
|
||
* bit A: a short version exists for the AX reg
|
||
* bit R: a short version exists for regs
|
||
* bits 2..0: size of instruction (excluding optional bytes)
|
||
*/
|
||
|
||
#define M 0x80
|
||
#define T 0x40
|
||
#define E 0x20
|
||
#define A 0x10
|
||
#define R 0x08
|
||
#define W 0
|
||
|
||
static unsigned char inssize[256] =
|
||
{ M|2,M|2,M|2,M|2, T|E|2,T|3,1,1, /* 00 */
|
||
M|2,M|2,M|2,M|2, T|E|2,T|3,1,1, /* 08 */
|
||
M|2,M|2,M|2,M|2, T|E|2,T|3,1,1, /* 10 */
|
||
M|2,M|2,M|2,M|2, T|E|2,T|3,1,1, /* 18 */
|
||
M|2,M|2,M|2,M|2, T|E|2,T|3,1,1, /* 20 */
|
||
M|2,M|2,M|2,M|2, T|E|2,T|3,1,1, /* 28 */
|
||
M|2,M|2,M|2,M|2, T|E|2,T|3,1,1, /* 30 */
|
||
M|2,M|2,M|2,M|2, T|E|2,T|3,1,1, /* 38 */
|
||
1,1,1,1, 1,1,1,1, /* 40 */
|
||
1,1,1,1, 1,1,1,1, /* 48 */
|
||
1,1,1,1, 1,1,1,1, /* 50 */
|
||
1,1,1,1, 1,1,1,1, /* 58 */
|
||
1,1,M|2,M|2, 1,1,1,1, /* 60 */
|
||
T|3,M|T|4,T|E|2,M|T|E|3, 1,1,1,1, /* 68 */
|
||
T|E|2,T|E|2,T|E|2,T|E|2, T|E|2,T|E|2,T|E|2,T|E|2, /* 70 */
|
||
T|E|2,T|E|2,T|E|2,T|E|2, T|E|2,T|E|2,T|E|2,T|E|2, /* 78 */
|
||
M|T|E|A|3,M|T|A|4,M|T|E|3,M|T|E|3, M|2,M|2,M|2,M|A|R|2, /* 80 */
|
||
M|A|2,M|A|2,M|A|2,M|A|2, M|2,M|2,M|2,M|R|2, /* 88 */
|
||
1,1,1,1, 1,1,1,1, /* 90 */
|
||
1,1,T|5,1, 1,1,1,1, /* 98 */
|
||
#if 0 /* cod3_set32() patches this */
|
||
T|5,T|5,T|5,T|5, 1,1,1,1, /* A0 */
|
||
#else
|
||
T|3,T|3,T|3,T|3, 1,1,1,1, /* A0 */
|
||
#endif
|
||
T|E|2,T|3,1,1, 1,1,1,1, /* A8 */
|
||
T|E|2,T|E|2,T|E|2,T|E|2, T|E|2,T|E|2,T|E|2,T|E|2, /* B0 */
|
||
T|3,T|3,T|3,T|3, T|3,T|3,T|3,T|3, /* B8 */
|
||
M|T|E|3,M|T|E|3,T|3,1, M|2,M|2,M|T|E|R|3,M|T|R|4, /* C0 */
|
||
T|E|4,1,T|3,1, 1,T|E|2,1,1, /* C8 */
|
||
M|2,M|2,M|2,M|2, T|E|2,T|E|2,0,1, /* D0 */
|
||
/* For the floating instructions, allow room for the FWAIT */
|
||
M|2,M|2,M|2,M|2, M|2,M|2,M|2,M|2, /* D8 */
|
||
T|E|2,T|E|2,T|E|2,T|E|2, T|E|2,T|E|2,T|E|2,T|E|2, /* E0 */
|
||
T|3,T|3,T|5,T|E|2, 1,1,1,1, /* E8 */
|
||
1,0,1,1, 1,1,M|A|2,M|A|2, /* F0 */
|
||
1,1,1,1, 1,1,M|2,M|R|2 /* F8 */
|
||
};
|
||
|
||
static const unsigned char inssize32[256] =
|
||
{ 2,2,2,2, 2,5,1,1, /* 00 */
|
||
2,2,2,2, 2,5,1,1, /* 08 */
|
||
2,2,2,2, 2,5,1,1, /* 10 */
|
||
2,2,2,2, 2,5,1,1, /* 18 */
|
||
2,2,2,2, 2,5,1,1, /* 20 */
|
||
2,2,2,2, 2,5,1,1, /* 28 */
|
||
2,2,2,2, 2,5,1,1, /* 30 */
|
||
2,2,2,2, 2,5,1,1, /* 38 */
|
||
1,1,1,1, 1,1,1,1, /* 40 */
|
||
1,1,1,1, 1,1,1,1, /* 48 */
|
||
1,1,1,1, 1,1,1,1, /* 50 */
|
||
1,1,1,1, 1,1,1,1, /* 58 */
|
||
1,1,2,2, 1,1,1,1, /* 60 */
|
||
5,6,2,3, 1,1,1,1, /* 68 */
|
||
2,2,2,2, 2,2,2,2, /* 70 */
|
||
2,2,2,2, 2,2,2,2, /* 78 */
|
||
3,6,3,3, 2,2,2,2, /* 80 */
|
||
2,2,2,2, 2,2,2,2, /* 88 */
|
||
1,1,1,1, 1,1,1,1, /* 90 */
|
||
1,1,7,1, 1,1,1,1, /* 98 */
|
||
5,5,5,5, 1,1,1,1, /* A0 */
|
||
2,5,1,1, 1,1,1,1, /* A8 */
|
||
2,2,2,2, 2,2,2,2, /* B0 */
|
||
5,5,5,5, 5,5,5,5, /* B8 */
|
||
3,3,3,1, 2,2,3,6, /* C0 */
|
||
4,1,3,1, 1,2,1,1, /* C8 */
|
||
2,2,2,2, 2,2,0,1, /* D0 */
|
||
/* For the floating instructions, don't need room for the FWAIT */
|
||
2,2,2,2, 2,2,2,2, /* D8 */
|
||
|
||
2,2,2,2, 2,2,2,2, /* E0 */
|
||
5,5,7,2, 1,1,1,1, /* E8 */
|
||
1,0,1,1, 1,1,2,2, /* F0 */
|
||
1,1,1,1, 1,1,2,2 /* F8 */
|
||
};
|
||
|
||
/* For 2 byte opcodes starting with 0x0F */
|
||
static unsigned char inssize2[256] =
|
||
{ M|3,M|3,M|3,M|3, 2,2,2,2, // 00
|
||
2,2,M|3,2, 2,2,2,M|T|E|4, // 08
|
||
M|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3, // 10
|
||
M|3,2,2,2, 2,2,2,2, // 18
|
||
M|3,M|3,M|3,M|3, M|3,2,M|3,2, // 20
|
||
M|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3, // 28
|
||
2,2,2,2, 2,2,2,2, // 30
|
||
M|4,2,M|T|E|5,2, 2,2,2,2, // 38
|
||
M|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3, // 40
|
||
M|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3, // 48
|
||
M|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3, // 50
|
||
M|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3, // 58
|
||
M|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3, // 60
|
||
M|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3, // 68
|
||
M|T|E|4,M|T|E|4,M|T|E|4,M|T|E|4, M|3,M|3,M|3,2, // 70
|
||
2,2,2,2, M|3,M|3,M|3,M|3, // 78
|
||
W|T|4,W|T|4,W|T|4,W|T|4, W|T|4,W|T|4,W|T|4,W|T|4, // 80
|
||
W|T|4,W|T|4,W|T|4,W|T|4, W|T|4,W|T|4,W|T|4,W|T|4, // 88
|
||
M|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3, // 90
|
||
M|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3, // 98
|
||
2,2,2,M|3, M|T|E|4,M|3,2,2, // A0
|
||
2,2,2,M|3, M|T|E|4,M|3,M|3,M|3, // A8
|
||
M|E|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3, // B0
|
||
M|3,2,M|T|E|4,M|3, M|3,M|3,M|3,M|3, // B8
|
||
M|3,M|3,M|T|E|4,M|3, M|T|E|4,M|T|E|4,M|T|E|4,M|3, // C0
|
||
2,2,2,2, 2,2,2,2, // C8
|
||
M|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3, // D0
|
||
M|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3, // D8
|
||
M|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3, // E0
|
||
M|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3, // E8
|
||
M|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3, // F0
|
||
M|3,M|3,M|3,M|3, M|3,M|3,M|3,2 // F8
|
||
};
|
||
|
||
/*************************************************
|
||
* Allocate register temporaries
|
||
*/
|
||
|
||
code *REGSAVE::save(code *c, int reg, unsigned *pidx)
|
||
{
|
||
unsigned i;
|
||
if (reg >= XMM0)
|
||
{
|
||
alignment = 16;
|
||
idx = (idx + 15) & ~15;
|
||
i = idx;
|
||
idx += 16;
|
||
// MOVD idx[RBP],xmm
|
||
c = genc1(c,0xF20F11,modregxrm(2, reg - XMM0, BPRM),FLregsave,(targ_uns) i);
|
||
}
|
||
else
|
||
{
|
||
if (!alignment)
|
||
alignment = REGSIZE;
|
||
i = idx;
|
||
idx += REGSIZE;
|
||
// MOV idx[RBP],reg
|
||
c = genc1(c,0x89,modregxrm(2, reg, BPRM),FLregsave,(targ_uns) i);
|
||
if (I64)
|
||
code_orrex(c, REX_W);
|
||
}
|
||
reflocal = TRUE;
|
||
if (idx > top)
|
||
top = idx; // keep high water mark
|
||
*pidx = i;
|
||
return c;
|
||
}
|
||
|
||
code *REGSAVE::restore(code *c, int reg, unsigned idx)
|
||
{
|
||
if (reg >= XMM0)
|
||
{
|
||
assert(alignment == 16);
|
||
// MOVD xmm,idx[RBP]
|
||
c = genc1(c,0xF20F10,modregxrm(2, reg - XMM0, BPRM),FLregsave,(targ_uns) idx);
|
||
}
|
||
else
|
||
{ // MOV reg,idx[RBP]
|
||
c = genc1(c,0x8B,modregxrm(2, reg, BPRM),FLregsave,(targ_uns) idx);
|
||
if (I64)
|
||
code_orrex(c, REX_W);
|
||
}
|
||
return c;
|
||
}
|
||
|
||
/************************************
|
||
* Size for vex encoded instruction.
|
||
*/
|
||
|
||
unsigned char vex_inssize(code *c)
|
||
{
|
||
assert(c->Iflags & CFvex);
|
||
unsigned char ins;
|
||
if (c->Iflags & CFvex3)
|
||
{
|
||
switch (c->Ivex.mmmm)
|
||
{
|
||
case 0: // no prefix
|
||
case 1: // 0F
|
||
ins = inssize2[c->Ivex.op] + 2;
|
||
break;
|
||
case 2: // 0F 38
|
||
ins = inssize2[0x38] + 1;
|
||
break;
|
||
case 3: // 0F 3A
|
||
ins = inssize2[0x3A] + 1;
|
||
break;
|
||
default:
|
||
assert(0);
|
||
}
|
||
}
|
||
else
|
||
{
|
||
ins = inssize2[c->Ivex.op] + 1;
|
||
}
|
||
return ins;
|
||
}
|
||
|
||
/************************************
|
||
* Determine if there is a modregrm byte for code.
|
||
*/
|
||
|
||
int cod3_EA(code *c)
|
||
{ unsigned ins;
|
||
|
||
unsigned op1 = c->Iop & 0xFF;
|
||
if (op1 == ESCAPE)
|
||
ins = 0;
|
||
else if ((c->Iop & 0xFFFD00) == 0x0F3800)
|
||
ins = inssize2[(c->Iop >> 8) & 0xFF];
|
||
else if ((c->Iop & 0xFF00) == 0x0F00)
|
||
ins = inssize2[op1];
|
||
else
|
||
ins = inssize[op1];
|
||
return ins & M;
|
||
}
|
||
|
||
/********************************
|
||
* Fix global variables for 386.
|
||
*/
|
||
|
||
void cod3_set32()
|
||
{
|
||
inssize[0xA0] = T|5;
|
||
inssize[0xA1] = T|5;
|
||
inssize[0xA2] = T|5;
|
||
inssize[0xA3] = T|5;
|
||
BPRM = 5; /* [EBP] addressing mode */
|
||
fregsaved = mBP | mBX | mSI | mDI; // saved across function calls
|
||
FLOATREGS = FLOATREGS_32;
|
||
FLOATREGS2 = FLOATREGS2_32;
|
||
DOUBLEREGS = DOUBLEREGS_32;
|
||
if (config.flags3 & CFG3eseqds)
|
||
fregsaved |= mES;
|
||
|
||
for (unsigned i = 0x80; i < 0x90; i++)
|
||
inssize2[i] = W|T|6;
|
||
}
|
||
|
||
/********************************
|
||
* Fix global variables for I64.
|
||
*/
|
||
|
||
void cod3_set64()
|
||
{
|
||
inssize[0xA0] = T|5; // MOV AL,mem
|
||
inssize[0xA1] = T|5; // MOV RAX,mem
|
||
inssize[0xA2] = T|5; // MOV mem,AL
|
||
inssize[0xA3] = T|5; // MOV mem,RAX
|
||
BPRM = 5; // [RBP] addressing mode
|
||
|
||
fregsaved = mBP | mBX | mR12 | mR13 | mR14 | mR15 | mES; // saved across function calls
|
||
FLOATREGS = FLOATREGS_64;
|
||
FLOATREGS2 = FLOATREGS2_64;
|
||
DOUBLEREGS = DOUBLEREGS_64;
|
||
STACKALIGN = 16;
|
||
|
||
#if TARGET_LINUX || TARGET_OSX || TARGET_FREEBSD || TARGET_OPENBSD || TARGET_SOLARIS
|
||
ALLREGS = mAX|mBX|mCX|mDX|mSI|mDI| mR8|mR9|mR10|mR11|mR12|mR13|mR14|mR15;
|
||
BYTEREGS = ALLREGS;
|
||
#endif
|
||
|
||
for (unsigned i = 0x80; i < 0x90; i++)
|
||
inssize2[i] = W|T|6;
|
||
}
|
||
|
||
/*********************************
|
||
* Word or dword align start of function.
|
||
*/
|
||
|
||
void cod3_align()
|
||
{
|
||
static unsigned char nops[7] = { 0x90,0x90,0x90,0x90,0x90,0x90,0x90 };
|
||
unsigned nbytes;
|
||
#if OMFOBJ
|
||
if (config.flags4 & CFG4speed) // if optimized for speed
|
||
{
|
||
// Pick alignment based on CPU target
|
||
if (config.target_cpu == TARGET_80486 ||
|
||
config.target_cpu >= TARGET_PentiumPro)
|
||
{ // 486 does reads on 16 byte boundaries, so if we are near
|
||
// such a boundary, align us to it
|
||
|
||
nbytes = -Coffset & 15;
|
||
if (nbytes < 8)
|
||
{
|
||
Coffset += obj_bytes(cseg,Coffset,nbytes,nops); // XCHG AX,AX
|
||
}
|
||
}
|
||
}
|
||
#else
|
||
nbytes = -Coffset & 3;
|
||
//dbg_printf("cod3_align Coffset %x nbytes %d\n",Coffset,nbytes);
|
||
obj_bytes(cseg,Coffset,nbytes,nops);
|
||
#endif
|
||
}
|
||
|
||
/*****************************
|
||
* Given a type, return a mask of
|
||
* registers to hold that type.
|
||
* Input:
|
||
* tyf function type
|
||
*/
|
||
|
||
regm_t regmask(tym_t tym, tym_t tyf)
|
||
{
|
||
switch (tybasic(tym))
|
||
{
|
||
case TYvoid:
|
||
case TYstruct:
|
||
return 0;
|
||
case TYbool:
|
||
case TYwchar_t:
|
||
case TYchar16:
|
||
case TYchar:
|
||
case TYschar:
|
||
case TYuchar:
|
||
case TYshort:
|
||
case TYushort:
|
||
case TYint:
|
||
case TYuint:
|
||
#if JHANDLE
|
||
case TYjhandle:
|
||
#endif
|
||
case TYnullptr:
|
||
case TYnptr:
|
||
#if TARGET_SEGMENTED
|
||
case TYsptr:
|
||
case TYcptr:
|
||
#endif
|
||
return mAX;
|
||
|
||
case TYfloat:
|
||
case TYifloat:
|
||
if (I64)
|
||
return mXMM0;
|
||
if (config.exe & EX_flat)
|
||
return mST0;
|
||
case TYlong:
|
||
case TYulong:
|
||
case TYdchar:
|
||
if (!I16)
|
||
return mAX;
|
||
#if TARGET_SEGMENTED
|
||
case TYfptr:
|
||
case TYhptr:
|
||
#endif
|
||
return mDX | mAX;
|
||
|
||
case TYcent:
|
||
case TYucent:
|
||
assert(I64);
|
||
return mDX | mAX;
|
||
|
||
#if TARGET_SEGMENTED
|
||
case TYvptr:
|
||
return mDX | mBX;
|
||
#endif
|
||
|
||
case TYdouble:
|
||
case TYdouble_alias:
|
||
case TYidouble:
|
||
if (I64)
|
||
return mXMM0;
|
||
if (config.exe & EX_flat)
|
||
return mST0;
|
||
return DOUBLEREGS;
|
||
|
||
case TYllong:
|
||
case TYullong:
|
||
return I64 ? mAX : (I32 ? mDX | mAX : DOUBLEREGS);
|
||
|
||
case TYldouble:
|
||
case TYildouble:
|
||
return mST0;
|
||
|
||
case TYcfloat:
|
||
#if TARGET_LINUX || TARGET_OSX || TARGET_FREEBSD || TARGET_OPENBSD || TARGET_SOLARIS
|
||
if (I32 && tybasic(tyf) == TYnfunc)
|
||
return mDX | mAX;
|
||
#endif
|
||
case TYcdouble:
|
||
if (I64)
|
||
return mXMM0 | mXMM1;
|
||
case TYcldouble:
|
||
return mST01;
|
||
|
||
// SIMD vector types
|
||
case TYfloat4:
|
||
case TYdouble2:
|
||
case TYschar16:
|
||
case TYuchar16:
|
||
case TYshort8:
|
||
case TYushort8:
|
||
case TYlong4:
|
||
case TYulong4:
|
||
case TYllong2:
|
||
case TYullong2:
|
||
if (!config.fpxmmregs)
|
||
{ printf("SIMD operations not supported on this platform\n");
|
||
exit(1);
|
||
}
|
||
return mXMM0;
|
||
|
||
default:
|
||
#if DEBUG
|
||
WRTYxx(tym);
|
||
#endif
|
||
assert(0);
|
||
return 0;
|
||
}
|
||
}
|
||
|
||
/*******************************
|
||
* Generate block exit code
|
||
*/
|
||
void outblkexitcode(block *bl, code*& c, int& anyspill, const char* sflsave, symbol** retsym, const regm_t mfuncregsave)
|
||
{
|
||
elem *e = bl->Belem;
|
||
block *nextb;
|
||
block *bs1,*bs2;
|
||
regm_t retregs = 0;
|
||
bool jcond;
|
||
|
||
switch (bl->BC) /* block exit condition */
|
||
{
|
||
case BCiftrue:
|
||
jcond = TRUE;
|
||
bs1 = list_block(bl->Bsucc);
|
||
bs2 = list_block(list_next(bl->Bsucc));
|
||
if (bs1 == bl->Bnext)
|
||
{ // Swap bs1 and bs2
|
||
block *btmp;
|
||
|
||
jcond ^= 1;
|
||
btmp = bs1;
|
||
bs1 = bs2;
|
||
bs2 = btmp;
|
||
}
|
||
c = cat(c,logexp(e,jcond,FLblock,(code *) bs1));
|
||
nextb = bs2;
|
||
bl->Bcode = NULL;
|
||
L2:
|
||
if (nextb != bl->Bnext)
|
||
{ if (configv.addlinenumbers && bl->Bsrcpos.Slinnum &&
|
||
!(funcsym_p->ty() & mTYnaked))
|
||
cgen_linnum(&c,bl->Bsrcpos);
|
||
assert(!(bl->Bflags & BFLepilog));
|
||
c = cat(c,genjmp(CNIL,JMP,FLblock,nextb));
|
||
}
|
||
bl->Bcode = cat(bl->Bcode,c);
|
||
break;
|
||
case BCjmptab:
|
||
case BCifthen:
|
||
case BCswitch:
|
||
assert(!(bl->Bflags & BFLepilog));
|
||
doswitch(bl); /* hide messy details */
|
||
bl->Bcode = cat(c,bl->Bcode);
|
||
break;
|
||
#if MARS
|
||
case BCjcatch:
|
||
// Mark all registers as destroyed. This will prevent
|
||
// register assignments to variables used in catch blocks.
|
||
c = cat(c,getregs((I32 | I64) ? allregs : (ALLREGS | mES)));
|
||
#if 0 && TARGET_LINUX
|
||
if (config.flags3 & CFG3pic && !(allregs & mBX))
|
||
{
|
||
c = cat(c, cod3_load_got());
|
||
}
|
||
#endif
|
||
goto case_goto;
|
||
#endif
|
||
#if SCPP
|
||
case BCcatch:
|
||
// Mark all registers as destroyed. This will prevent
|
||
// register assignments to variables used in catch blocks.
|
||
c = cat(c,getregs(allregs | mES));
|
||
#if 0 && TARGET_LINUX
|
||
if (config.flags3 & CFG3pic && !(allregs & mBX))
|
||
{
|
||
c = cat(c, cod3_load_got());
|
||
}
|
||
#endif
|
||
goto case_goto;
|
||
|
||
case BCtry:
|
||
usednteh |= EHtry;
|
||
if (config.flags2 & CFG2seh)
|
||
usednteh |= NTEHtry;
|
||
goto case_goto;
|
||
#endif
|
||
case BCgoto:
|
||
nextb = list_block(bl->Bsucc);
|
||
if ((funcsym_p->Sfunc->Fflags3 & Fnteh ||
|
||
(MARS /*&& config.flags2 & CFG2seh*/)) &&
|
||
bl->Btry != nextb->Btry &&
|
||
nextb->BC != BC_finally)
|
||
{ int toindex;
|
||
int fromindex;
|
||
|
||
bl->Bcode = NULL;
|
||
c = gencodelem(c,e,&retregs,TRUE);
|
||
toindex = nextb->Btry ? nextb->Btry->Bscope_index : -1;
|
||
assert(bl->Btry);
|
||
fromindex = bl->Btry->Bscope_index;
|
||
#if MARS
|
||
if (toindex + 1 == fromindex)
|
||
{ // Simply call __finally
|
||
if (bl->Btry &&
|
||
list_block(list_next(bl->Btry->Bsucc))->BC == BCjcatch)
|
||
{
|
||
goto L2;
|
||
}
|
||
}
|
||
#endif
|
||
if (config.flags2 & CFG2seh)
|
||
c = cat(c,nteh_unwind(0,toindex));
|
||
#if MARS && (TARGET_LINUX || TARGET_OSX || TARGET_FREEBSD || TARGET_OPENBSD || TARGET_SOLARIS)
|
||
else if (toindex + 1 <= fromindex)
|
||
{
|
||
//c = cat(c, linux_unwind(0, toindex));
|
||
block *bt;
|
||
|
||
//printf("B%d: fromindex = %d, toindex = %d\n", bl->Bdfoidx, fromindex, toindex);
|
||
bt = bl;
|
||
while ((bt = bt->Btry) != NULL && bt->Bscope_index != toindex)
|
||
{ block *bf;
|
||
|
||
//printf("\tbt->Bscope_index = %d, bt->Blast_index = %d\n", bt->Bscope_index, bt->Blast_index);
|
||
bf = list_block(list_next(bt->Bsucc));
|
||
// Only look at try-finally blocks
|
||
if (bf->BC == BCjcatch)
|
||
continue;
|
||
|
||
if (bf == nextb)
|
||
continue;
|
||
//printf("\tbf = B%d, nextb = B%d\n", bf->Bdfoidx, nextb->Bdfoidx);
|
||
if (nextb->BC == BCgoto &&
|
||
!nextb->Belem &&
|
||
bf == list_block(nextb->Bsucc))
|
||
continue;
|
||
|
||
// call __finally
|
||
code *cs;
|
||
code *cr;
|
||
int nalign = 0;
|
||
|
||
gensaverestore(retregs,&cs,&cr);
|
||
if (STACKALIGN == 16)
|
||
{ int npush = (numbitsset(retregs) + 1) * REGSIZE;
|
||
if (npush & (STACKALIGN - 1))
|
||
{ nalign = STACKALIGN - (npush & (STACKALIGN - 1));
|
||
cs = genc2(cs,0x81,modregrm(3,5,SP),nalign); // SUB ESP,nalign
|
||
if (I64)
|
||
code_orrex(cs, REX_W);
|
||
}
|
||
}
|
||
cs = genc(cs,0xE8,0,0,0,FLblock,(long)list_block(bf->Bsucc));
|
||
if (nalign)
|
||
{ cs = genc2(cs,0x81,modregrm(3,0,SP),nalign); // ADD ESP,nalign
|
||
if (I64)
|
||
code_orrex(cs, REX_W);
|
||
}
|
||
c = cat3(c,cs,cr);
|
||
}
|
||
}
|
||
#endif
|
||
goto L2;
|
||
}
|
||
case_goto:
|
||
c = gencodelem(c,e,&retregs,TRUE);
|
||
if (anyspill)
|
||
{ // Add in the epilog code
|
||
code *cstore = NULL;
|
||
code *cload = NULL;
|
||
|
||
for (int i = 0; i < anyspill; i++)
|
||
{ symbol *s = globsym.tab[i];
|
||
|
||
if (s->Sflags & SFLspill &&
|
||
vec_testbit(dfoidx,s->Srange))
|
||
{
|
||
s->Sfl = sflsave[i]; // undo block register assignments
|
||
cgreg_spillreg_epilog(bl,s,&cstore,&cload);
|
||
}
|
||
}
|
||
c = cat3(c,cstore,cload);
|
||
}
|
||
|
||
L3:
|
||
bl->Bcode = NULL;
|
||
nextb = list_block(bl->Bsucc);
|
||
goto L2;
|
||
|
||
case BC_try:
|
||
if (config.flags2 & CFG2seh)
|
||
{ usednteh |= NTEH_try;
|
||
nteh_usevars();
|
||
}
|
||
else
|
||
usednteh |= EHtry;
|
||
goto case_goto;
|
||
|
||
case BC_finally:
|
||
// Mark all registers as destroyed. This will prevent
|
||
// register assignments to variables used in finally blocks.
|
||
assert(!getregs(allregs));
|
||
assert(!e);
|
||
assert(!bl->Bcode);
|
||
#if TARGET_LINUX || TARGET_OSX || TARGET_FREEBSD || TARGET_OPENBSD || TARGET_SOLARIS
|
||
if (config.flags3 & CFG3pic)
|
||
{
|
||
int nalign = 0;
|
||
if (STACKALIGN == 16)
|
||
{ nalign = STACKALIGN - REGSIZE;
|
||
c = genc2(c,0x81,modregrm(3,5,SP),nalign); // SUB ESP,nalign
|
||
if (I64)
|
||
code_orrex(c, REX_W);
|
||
}
|
||
// CALL bl->Bsucc
|
||
c = genc(c,0xE8,0,0,0,FLblock,(long)list_block(bl->Bsucc));
|
||
if (nalign)
|
||
{ c = genc2(c,0x81,modregrm(3,0,SP),nalign); // ADD ESP,nalign
|
||
if (I64)
|
||
code_orrex(c, REX_W);
|
||
}
|
||
// JMP list_next(bl->Bsucc)
|
||
nextb = list_block(list_next(bl->Bsucc));
|
||
goto L2;
|
||
}
|
||
else
|
||
#endif
|
||
{
|
||
// Generate a PUSH of the address of the successor to the
|
||
// corresponding BC_ret
|
||
//assert(list_block(list_next(bl->Bsucc))->BC == BC_ret);
|
||
// PUSH &succ
|
||
c = genc(c,0x68,0,0,0,FLblock,(long)list_block(list_next(bl->Bsucc)));
|
||
nextb = list_block(bl->Bsucc);
|
||
goto L2;
|
||
}
|
||
|
||
case BC_ret:
|
||
c = gencodelem(c,e,&retregs,TRUE);
|
||
bl->Bcode = gen1(c,0xC3); // RET
|
||
break;
|
||
|
||
#if NTEXCEPTIONS
|
||
case BC_except:
|
||
assert(!e);
|
||
usednteh |= NTEH_except;
|
||
c = cat(c,nteh_setsp(0x8B));
|
||
getregs(allregs);
|
||
goto L3;
|
||
|
||
case BC_filter:
|
||
c = cat(c,nteh_filter(bl));
|
||
// Mark all registers as destroyed. This will prevent
|
||
// register assignments to variables used in filter blocks.
|
||
getregs(allregs);
|
||
retregs = regmask(e->Ety, TYnfunc);
|
||
c = gencodelem(c,e,&retregs,TRUE);
|
||
bl->Bcode = gen1(c,0xC3); // RET
|
||
break;
|
||
#endif
|
||
|
||
case BCretexp:
|
||
retregs = regmask(e->Ety, funcsym_p->ty());
|
||
|
||
// For the final load into the return regs, don't set regcon.used,
|
||
// so that the optimizer can potentially use retregs for register
|
||
// variable assignments.
|
||
|
||
if (config.flags4 & CFG4optimized)
|
||
{ regm_t usedsave;
|
||
|
||
c = cat(c,docommas(&e));
|
||
usedsave = regcon.used;
|
||
if (EOP(e))
|
||
c = gencodelem(c,e,&retregs,TRUE);
|
||
else
|
||
{
|
||
if (e->Eoper == OPconst)
|
||
regcon.mvar = 0;
|
||
c = gencodelem(c,e,&retregs,TRUE);
|
||
regcon.used = usedsave;
|
||
if (e->Eoper == OPvar)
|
||
{ symbol *s = e->EV.sp.Vsym;
|
||
|
||
if (s->Sfl == FLreg && s->Sregm != mAX)
|
||
*retsym = s;
|
||
}
|
||
}
|
||
}
|
||
else
|
||
{
|
||
case BCret:
|
||
case BCexit:
|
||
c = gencodelem(c,e,&retregs,TRUE);
|
||
}
|
||
bl->Bcode = c;
|
||
if (retregs == mST0)
|
||
{ assert(stackused == 1);
|
||
pop87(); // account for return value
|
||
}
|
||
else if (retregs == mST01)
|
||
{ assert(stackused == 2);
|
||
pop87();
|
||
pop87(); // account for return value
|
||
}
|
||
if (bl->BC == BCexit && config.flags4 & CFG4optimized)
|
||
mfuncreg = mfuncregsave;
|
||
if (MARS || usednteh & NTEH_try)
|
||
{ block *bt;
|
||
|
||
bt = bl;
|
||
while ((bt = bt->Btry) != NULL)
|
||
{ block *bf;
|
||
|
||
bf = list_block(list_next(bt->Bsucc));
|
||
#if MARS
|
||
// Only look at try-finally blocks
|
||
if (bf->BC == BCjcatch)
|
||
{
|
||
continue;
|
||
}
|
||
#endif
|
||
if (config.flags2 & CFG2seh)
|
||
{
|
||
if (bt->Bscope_index == 0)
|
||
{
|
||
// call __finally
|
||
code *cs;
|
||
code *cr;
|
||
|
||
c = cat(c,nteh_gensindex(-1));
|
||
gensaverestore(retregs,&cs,&cr);
|
||
cs = genc(cs,0xE8,0,0,0,FLblock,(long)list_block(bf->Bsucc));
|
||
bl->Bcode = cat3(c,cs,cr);
|
||
}
|
||
else
|
||
bl->Bcode = cat(c,nteh_unwind(retregs,~0));
|
||
break;
|
||
}
|
||
else
|
||
{
|
||
// call __finally
|
||
code *cs;
|
||
code *cr;
|
||
int nalign = 0;
|
||
|
||
gensaverestore(retregs,&cs,&cr);
|
||
if (STACKALIGN == 16)
|
||
{ int npush = (numbitsset(retregs) + 1) * REGSIZE;
|
||
if (npush & (STACKALIGN - 1))
|
||
{ nalign = STACKALIGN - (npush & (STACKALIGN - 1));
|
||
cs = genc2(cs,0x81,modregrm(3,5,SP),nalign); // SUB ESP,nalign
|
||
if (I64)
|
||
code_orrex(cs, REX_W);
|
||
}
|
||
}
|
||
// CALL bf->Bsucc
|
||
cs = genc(cs,0xE8,0,0,0,FLblock,(long)list_block(bf->Bsucc));
|
||
if (nalign)
|
||
{ cs = genc2(cs,0x81,modregrm(3,0,SP),nalign); // ADD ESP,nalign
|
||
if (I64)
|
||
code_orrex(cs, REX_W);
|
||
}
|
||
bl->Bcode = c = cat3(c,cs,cr);
|
||
}
|
||
}
|
||
}
|
||
break;
|
||
|
||
#if SCPP || MARS
|
||
case BCasm:
|
||
assert(!e);
|
||
// Mark destroyed registers
|
||
assert(!c);
|
||
c = cat(c,getregs(iasm_regs(bl)));
|
||
if (bl->Bsucc)
|
||
{ nextb = list_block(bl->Bsucc);
|
||
if (!bl->Bnext)
|
||
goto L2;
|
||
if (nextb != bl->Bnext &&
|
||
bl->Bnext &&
|
||
!(bl->Bnext->BC == BCgoto &&
|
||
!bl->Bnext->Belem &&
|
||
nextb == list_block(bl->Bnext->Bsucc)))
|
||
{ code *cl;
|
||
|
||
// See if already have JMP at end of block
|
||
cl = code_last(bl->Bcode);
|
||
if (!cl || cl->Iop != JMP)
|
||
goto L2; // add JMP at end of block
|
||
}
|
||
}
|
||
break;
|
||
#endif
|
||
default:
|
||
#ifdef DEBUG
|
||
printf("bl->BC = %d\n",bl->BC);
|
||
#endif
|
||
assert(0);
|
||
}
|
||
}
|
||
|
||
/*******************************
|
||
* Generate code for blocks ending in a switch statement.
|
||
* Take BCswitch and decide on
|
||
* BCifthen use if - then code
|
||
* BCjmptab index into jump table
|
||
* BCswitch search table for match
|
||
*/
|
||
|
||
void doswitch(block *b)
|
||
{ code *cc,*c,*ce;
|
||
regm_t retregs;
|
||
unsigned ncases,n,reg,reg2,rm;
|
||
targ_llong vmax,vmin,val;
|
||
targ_llong *p;
|
||
list_t bl;
|
||
elem *e;
|
||
|
||
tym_t tys;
|
||
int sz;
|
||
unsigned char dword;
|
||
unsigned char mswsame;
|
||
#if LONGLONG
|
||
targ_ulong msw;
|
||
#else
|
||
unsigned msw;
|
||
#endif
|
||
|
||
e = b->Belem;
|
||
elem_debug(e);
|
||
cc = docommas(&e);
|
||
cgstate.stackclean++;
|
||
tys = tybasic(e->Ety);
|
||
sz = tysize[tys];
|
||
dword = (sz == 2 * REGSIZE);
|
||
mswsame = 1; // assume all msw's are the same
|
||
p = b->BS.Bswitch; /* pointer to case data */
|
||
assert(p);
|
||
ncases = *p++; /* number of cases */
|
||
|
||
vmax = MINLL; // smallest possible llong
|
||
vmin = MAXLL; // largest possible llong
|
||
for (n = 0; n < ncases; n++) // find max and min case values
|
||
{ val = *p++;
|
||
if (val > vmax) vmax = val;
|
||
if (val < vmin) vmin = val;
|
||
if (REGSIZE == 2)
|
||
{
|
||
unsigned short ms = (val >> 16) & 0xFFFF;
|
||
if (n == 0)
|
||
msw = ms;
|
||
else if (msw != ms)
|
||
mswsame = 0;
|
||
}
|
||
else // REGSIZE == 4
|
||
{
|
||
targ_ulong ms = (val >> 32) & 0xFFFFFFFF;
|
||
if (n == 0)
|
||
msw = ms;
|
||
else if (msw != ms)
|
||
mswsame = 0;
|
||
}
|
||
}
|
||
p -= ncases;
|
||
//dbg_printf("vmax = x%lx, vmin = x%lx, vmax-vmin = x%lx\n",vmax,vmin,vmax - vmin);
|
||
|
||
if (I64)
|
||
{ // For now, just generate basic if-then sequence to get us running
|
||
retregs = ALLREGS;
|
||
b->BC = BCifthen;
|
||
c = scodelem(e,&retregs,0,TRUE);
|
||
assert(!dword); // 128 bit switches not supported
|
||
reg = findreg(retregs); // reg that result is in
|
||
bl = b->Bsucc;
|
||
for (n = 0; n < ncases; n++)
|
||
{ code *cx;
|
||
val = *p;
|
||
if (sz == 4)
|
||
cx = genc2(CNIL,0x81,modregrmx(3,7,reg),val); // CMP reg,val
|
||
else if (sz == 8)
|
||
{
|
||
if (val == (int)val) // if val is a 64 bit value sign-extended from 32 bits
|
||
{
|
||
cx = genc2(CNIL,0x81,modregrmx(3,7,reg),val); // CMP reg,value32
|
||
cx->Irex |= REX_W; // 64 bit operand
|
||
}
|
||
else
|
||
{ unsigned sreg;
|
||
// MOV sreg,value64
|
||
cx = regwithvalue(CNIL, ALLREGS & ~mask[reg], val, &sreg, 64);
|
||
cx = genregs(cx,0x3B,reg,sreg); // CMP reg,sreg
|
||
code_orrex(cx, REX_W);
|
||
}
|
||
}
|
||
else
|
||
assert(0);
|
||
bl = list_next(bl);
|
||
genjmp(cx,JE,FLblock,list_block(bl)); // JE caseaddr
|
||
c = cat(c,cx);
|
||
p++;
|
||
}
|
||
if (list_block(b->Bsucc) != b->Bnext) /* if default is not next block */
|
||
c = cat(c,genjmp(CNIL,JMP,FLblock,list_block(b->Bsucc)));
|
||
ce = NULL;
|
||
}
|
||
// Need to do research on MACHOBJ to see about better methods
|
||
else if (MACHOBJ || ncases <= 3)
|
||
{ // generate if-then sequence
|
||
retregs = ALLREGS;
|
||
L1:
|
||
b->BC = BCifthen;
|
||
c = scodelem(e,&retregs,0,TRUE);
|
||
if (dword)
|
||
{ reg = findreglsw(retregs);
|
||
reg2 = findregmsw(retregs);
|
||
}
|
||
else
|
||
reg = findreg(retregs); /* reg that result is in */
|
||
bl = b->Bsucc;
|
||
if (dword && mswsame)
|
||
{ /* CMP reg2,MSW */
|
||
c = genc2(c,0x81,modregrm(3,7,reg2),msw);
|
||
genjmp(c,JNE,FLblock,list_block(b->Bsucc)); /* JNE default */
|
||
}
|
||
for (n = 0; n < ncases; n++)
|
||
{ code *cnext = CNIL;
|
||
/* CMP reg,casevalue */
|
||
c = cat(c,ce = genc2(CNIL,0x81,modregrm(3,7,reg),(targ_int)*p));
|
||
if (dword && !mswsame)
|
||
{
|
||
cnext = gennop(CNIL);
|
||
genjmp(ce,JNE,FLcode,(block *) cnext);
|
||
genc2(ce,0x81,modregrm(3,7,reg2),MSREG(*p));
|
||
}
|
||
bl = list_next(bl);
|
||
/* JE caseaddr */
|
||
genjmp(ce,JE,FLblock,list_block(bl));
|
||
c = cat(c,cnext);
|
||
p++;
|
||
}
|
||
if (list_block(b->Bsucc) != b->Bnext) /* if default is not next block */
|
||
c = cat(c,genjmp(CNIL,JMP,FLblock,list_block(b->Bsucc)));
|
||
ce = NULL;
|
||
}
|
||
#if TARGET_WINDOS // try and find relocation to support this
|
||
else if ((targ_ullong)(vmax - vmin) <= ncases * 2) // then use jump table
|
||
{ int modify;
|
||
|
||
b->BC = BCjmptab;
|
||
retregs = IDXREGS;
|
||
if (dword)
|
||
retregs |= mMSW;
|
||
modify = (vmin || !I32);
|
||
c = scodelem(e,&retregs,0,!modify);
|
||
reg = findreg(retregs & IDXREGS); /* reg that result is in */
|
||
if (dword)
|
||
reg2 = findregmsw(retregs);
|
||
if (modify)
|
||
{
|
||
assert(!(retregs & regcon.mvar));
|
||
c = cat(c,getregs(retregs));
|
||
}
|
||
if (vmin) /* if there is a minimum */
|
||
{
|
||
c = genc2(c,0x81,modregrm(3,5,reg),vmin); /* SUB reg,vmin */
|
||
if (dword)
|
||
{ genc2(c,0x81,modregrm(3,3,reg2),MSREG(vmin)); // SBB reg2,vmin
|
||
genjmp(c,JNE,FLblock,list_block(b->Bsucc)); /* JNE default */
|
||
}
|
||
}
|
||
else if (dword)
|
||
{ c = gentstreg(c,reg2); // TEST reg2,reg2
|
||
genjmp(c,JNE,FLblock,list_block(b->Bsucc)); /* JNE default */
|
||
}
|
||
if (vmax - vmin != REGMASK) /* if there is a maximum */
|
||
{ /* CMP reg,vmax-vmin */
|
||
c = genc2(c,0x81,modregrm(3,7,reg),vmax-vmin);
|
||
genjmp(c,JA,FLblock,list_block(b->Bsucc)); /* JA default */
|
||
}
|
||
if (!I32)
|
||
c = gen2(c,0xD1,modregrm(3,4,reg)); /* SHL reg,1 */
|
||
if (I32)
|
||
{
|
||
ce = genc1(CNIL,0xFF,modregrm(0,4,4),FLswitch,0); /* JMP [CS:]disp[idxreg*4] */
|
||
ce->Isib = modregrm(2,reg,5);
|
||
}
|
||
else
|
||
{ rm = getaddrmode(retregs) | modregrm(0,4,0);
|
||
ce = genc1(CNIL,0xFF,rm,FLswitch,0); /* JMP [CS:]disp[idxreg] */
|
||
}
|
||
int flags = (config.flags & CFGromable) ? CFcs : 0; // table is in code seg
|
||
ce->Iflags |= flags; // segment override
|
||
ce->IEV1.Vswitch = b;
|
||
b->Btablesize = (int) (vmax - vmin + 1) * tysize[TYnptr];
|
||
}
|
||
#endif
|
||
else /* else use switch table (BCswitch) */
|
||
{ targ_size_t disp;
|
||
int mod;
|
||
code *esw;
|
||
code *ct;
|
||
|
||
retregs = mAX; /* SCASW requires AX */
|
||
if (dword)
|
||
retregs |= mDX;
|
||
else if (ncases <= 6 || config.flags4 & CFG4speed)
|
||
goto L1;
|
||
c = scodelem(e,&retregs,0,TRUE);
|
||
if (dword && mswsame)
|
||
{ /* CMP DX,MSW */
|
||
c = genc2(c,0x81,modregrm(3,7,DX),msw);
|
||
genjmp(c,JNE,FLblock,list_block(b->Bsucc)); /* JNE default */
|
||
}
|
||
ce = getregs(mCX|mDI);
|
||
#if TARGET_LINUX || TARGET_OSX || TARGET_FREEBSD || TARGET_OPENBSD || TARGET_SOLARIS
|
||
if (config.flags3 & CFG3pic)
|
||
{ // Add in GOT
|
||
code *cx;
|
||
code *cgot;
|
||
|
||
ce = cat(ce, getregs(mDX));
|
||
cx = genc2(NULL,CALL,0,0); // CALL L1
|
||
gen1(cx, 0x58 + DI); // L1: POP EDI
|
||
|
||
// ADD EDI,_GLOBAL_OFFSET_TABLE_+3
|
||
symbol *gotsym = elfobj_getGOTsym();
|
||
cgot = gencs(CNIL,0x81,modregrm(3,0,DI),FLextern,gotsym);
|
||
cgot->Iflags = CFoff;
|
||
cgot->IEVoffset2 = 3;
|
||
|
||
makeitextern(gotsym);
|
||
|
||
genmovreg(cgot, DX, DI); // MOV EDX, EDI
|
||
// ADD EDI,offset of switch table
|
||
esw = gencs(CNIL,0x81,modregrm(3,0,DI),FLswitch,NULL);
|
||
esw->IEV2.Vswitch = b;
|
||
esw = cat3(cx, cgot, esw);
|
||
}
|
||
else
|
||
#endif
|
||
{
|
||
// MOV DI,offset of switch table
|
||
esw = gencs(CNIL,0xC7,modregrm(3,0,DI),FLswitch,NULL);
|
||
esw->IEV2.Vswitch = b;
|
||
}
|
||
ce = cat(ce,esw);
|
||
movregconst(ce,CX,ncases,0); /* MOV CX,ncases */
|
||
|
||
/* The switch table will be accessed through ES:DI.
|
||
* Therefore, load ES with proper segment value.
|
||
*/
|
||
if (config.flags3 & CFG3eseqds)
|
||
{ assert(!(config.flags & CFGromable));
|
||
ce = cat(ce,getregs(mCX)); // allocate CX
|
||
}
|
||
else
|
||
{
|
||
ce = cat(ce,getregs(mES|mCX)); // allocate ES and CX
|
||
gen1(ce,(config.flags & CFGromable) ? 0x0E : 0x1E); // PUSH CS/DS
|
||
gen1(ce,0x07); // POP ES
|
||
}
|
||
|
||
disp = (ncases - 1) * intsize; /* displacement to jump table */
|
||
if (dword && !mswsame)
|
||
{ code *cloop;
|
||
|
||
/* Build the following:
|
||
L1: SCASW
|
||
JNE L2
|
||
CMP DX,[CS:]disp[DI]
|
||
L2: LOOPNE L1
|
||
*/
|
||
|
||
mod = (disp > 127) ? 2 : 1; /* displacement size */
|
||
cloop = genc2(CNIL,0xE0,0,-7 - mod -
|
||
((config.flags & CFGromable) ? 1 : 0)); /* LOOPNE scasw */
|
||
ce = gen1(ce,0xAF); /* SCASW */
|
||
code_orflag(ce,CFtarg2); // target of jump
|
||
genjmp(ce,JNE,FLcode,(block *) cloop); /* JNE loop */
|
||
/* CMP DX,[CS:]disp[DI] */
|
||
ct = genc1(CNIL,0x39,modregrm(mod,DX,5),FLconst,disp);
|
||
int flags = (config.flags & CFGromable) ? CFcs : 0; // table is in code seg
|
||
ct->Iflags |= flags; // possible seg override
|
||
ce = cat3(ce,ct,cloop);
|
||
disp += ncases * intsize; /* skip over msw table */
|
||
}
|
||
else
|
||
{
|
||
ce = gen1(ce,0xF2); /* REPNE */
|
||
gen1(ce,0xAF); /* SCASW */
|
||
}
|
||
genjmp(ce,JNE,FLblock,list_block(b->Bsucc)); /* JNE default */
|
||
mod = (disp > 127) ? 2 : 1; /* 1 or 2 byte displacement */
|
||
if (config.flags & CFGromable)
|
||
gen1(ce,SEGCS); /* table is in code segment */
|
||
#if TARGET_LINUX || TARGET_OSX || TARGET_FREEBSD || TARGET_OPENBSD || TARGET_SOLARIS
|
||
if (config.flags3 & CFG3pic)
|
||
{ // ADD EDX,(ncases-1)*2[EDI]
|
||
ct = genc1(CNIL,0x03,modregrm(mod,DX,7),FLconst,disp);
|
||
// JMP EDX
|
||
gen2(ct,0xFF,modregrm(3,4,DX));
|
||
}
|
||
else
|
||
#endif
|
||
{ // JMP (ncases-1)*2[DI]
|
||
ct = genc1(CNIL,0xFF,modregrm(mod,4,(I32 ? 7 : 5)),FLconst,disp);
|
||
int flags = (config.flags & CFGromable) ? CFcs : 0; // table is in code seg
|
||
ct->Iflags |= flags;
|
||
}
|
||
ce = cat(ce,ct);
|
||
b->Btablesize = disp + intsize + ncases * tysize[TYnptr];
|
||
}
|
||
b->Bcode = cat3(cc,c,ce);
|
||
//assert(b->Bcode);
|
||
cgstate.stackclean--;
|
||
}
|
||
|
||
/******************************
|
||
* Output data block for a jump table (BCjmptab).
|
||
* The 'holes' in the table get filled with the
|
||
* default label.
|
||
*/
|
||
|
||
void outjmptab(block *b)
|
||
{
|
||
unsigned ncases,n;
|
||
targ_llong u,vmin,vmax,val,*p;
|
||
targ_size_t alignbytes,def,targ,*poffset;
|
||
int jmpseg;
|
||
|
||
poffset = (config.flags & CFGromable) ? &Coffset : &JMPOFF;
|
||
p = b->BS.Bswitch; /* pointer to case data */
|
||
ncases = *p++; /* number of cases */
|
||
vmax = MINLL; // smallest possible llong
|
||
vmin = MAXLL; // largest possible llong
|
||
for (n = 0; n < ncases; n++) /* find min case value */
|
||
{ val = p[n];
|
||
if (val > vmax) vmax = val;
|
||
if (val < vmin) vmin = val;
|
||
}
|
||
jmpseg = (config.flags & CFGromable) ? cseg : JMPSEG;
|
||
|
||
/* Any alignment bytes necessary */
|
||
alignbytes = align(0,*poffset) - *poffset;
|
||
obj_lidata(jmpseg,*poffset,alignbytes);
|
||
|
||
def = list_block(b->Bsucc)->Boffset; /* default address */
|
||
assert(vmin <= vmax);
|
||
for (u = vmin; ; u++)
|
||
{ targ = def; /* default */
|
||
for (n = 0; n < ncases; n++)
|
||
{ if (p[n] == u)
|
||
{ targ = list_block(list_nth(b->Bsucc,n + 1))->Boffset;
|
||
break;
|
||
}
|
||
}
|
||
reftocodseg(jmpseg,*poffset,targ);
|
||
*poffset += tysize[TYnptr];
|
||
if (u == vmax) /* for case that (vmax == ~0) */
|
||
break;
|
||
}
|
||
}
|
||
|
||
/******************************
|
||
* Output data block for a switch table.
|
||
* Two consecutive tables, the first is the case value table, the
|
||
* second is the address table.
|
||
*/
|
||
|
||
void outswitab(block *b)
|
||
{ unsigned ncases,n;
|
||
targ_llong *p;
|
||
targ_size_t val;
|
||
targ_size_t alignbytes,*poffset;
|
||
int seg; /* target segment for table */
|
||
list_t bl;
|
||
unsigned sz;
|
||
targ_size_t offset;
|
||
|
||
//printf("outswitab()\n");
|
||
p = b->BS.Bswitch; /* pointer to case data */
|
||
ncases = *p++; /* number of cases */
|
||
|
||
if (config.flags & CFGromable)
|
||
{ poffset = &Coffset;
|
||
assert(cseg == CODE);
|
||
seg = cseg;
|
||
}
|
||
else
|
||
{
|
||
poffset = &JMPOFF;
|
||
seg = JMPSEG;
|
||
}
|
||
offset = *poffset;
|
||
alignbytes = align(0,*poffset) - *poffset;
|
||
obj_lidata(seg,*poffset,alignbytes); /* any alignment bytes necessary */
|
||
assert(*poffset == offset + alignbytes);
|
||
|
||
sz = intsize;
|
||
for (n = 0; n < ncases; n++) /* send out value table */
|
||
{
|
||
//printf("\tcase %d, offset = x%x\n", n, *poffset);
|
||
#if OMFOBJ
|
||
*poffset +=
|
||
#endif
|
||
obj_bytes(seg,*poffset,sz,p);
|
||
p++;
|
||
}
|
||
offset += alignbytes + sz * ncases;
|
||
assert(*poffset == offset);
|
||
|
||
if (b->Btablesize == ncases * (REGSIZE * 2 + tysize[TYnptr]))
|
||
{
|
||
/* Send out MSW table */
|
||
p -= ncases;
|
||
for (n = 0; n < ncases; n++)
|
||
{ val = MSREG(*p);
|
||
p++;
|
||
#if OMFOBJ
|
||
*poffset +=
|
||
#endif
|
||
obj_bytes(seg,*poffset,REGSIZE,&val);
|
||
}
|
||
offset += REGSIZE * ncases;
|
||
assert(*poffset == offset);
|
||
}
|
||
|
||
bl = b->Bsucc;
|
||
for (n = 0; n < ncases; n++) /* send out address table */
|
||
{ bl = list_next(bl);
|
||
reftocodseg(seg,*poffset,list_block(bl)->Boffset);
|
||
*poffset += tysize[TYnptr];
|
||
}
|
||
assert(*poffset == offset + ncases * tysize[TYnptr]);
|
||
}
|
||
|
||
/*****************************
|
||
* Return a jump opcode relevant to the elem for a JMP TRUE.
|
||
*/
|
||
|
||
int jmpopcode(elem *e)
|
||
{ tym_t tym;
|
||
int zero,i,jp,op;
|
||
static const char jops[][2][6] =
|
||
{ /* <= > < >= == != <=0 >0 <0 >=0 ==0 !=0 */
|
||
{ {JLE,JG ,JL ,JGE,JE ,JNE},{JLE,JG ,JS ,JNS,JE ,JNE} }, /* signed */
|
||
{ {JBE,JA ,JB ,JAE,JE ,JNE},{JE ,JNE,JB ,JAE,JE ,JNE} }, /* unsigned */
|
||
#if 0
|
||
{ {JLE,JG ,JL ,JGE,JE ,JNE},{JLE,JG ,JL ,JGE,JE ,JNE} }, /* real */
|
||
{ {JBE,JA ,JB ,JAE,JE ,JNE},{JBE,JA ,JB ,JAE,JE ,JNE} }, /* 8087 */
|
||
{ {JA ,JBE,JAE,JB ,JE ,JNE},{JBE,JA ,JB ,JAE,JE ,JNE} }, /* 8087 R */
|
||
#endif
|
||
};
|
||
|
||
#define XP (JP << 8)
|
||
#define XNP (JNP << 8)
|
||
static const unsigned jfops[1][26] =
|
||
/* le gt lt ge eqeq ne unord lg leg ule ul uge */
|
||
{
|
||
{ XNP|JBE,JA,XNP|JB,JAE,XNP|JE, XP|JNE,JP, JNE,JNP, JBE,JC,XP|JAE,
|
||
|
||
/* ug ue ngt nge nlt nle ord nlg nleg nule nul nuge nug nue */
|
||
XP|JA,JE,JBE,JB, XP|JAE,XP|JA, JNP,JE, JP, JA, JNC,XNP|JB, XNP|JBE,JNE }, /* 8087 */
|
||
};
|
||
|
||
assert(e);
|
||
while (e->Eoper == OPcomma ||
|
||
/* The !EOP(e->E1) is to line up with the case in cdeq() where */
|
||
/* we decide if mPSW is passed on when evaluating E2 or not. */
|
||
(e->Eoper == OPeq && !EOP(e->E1)))
|
||
e = e->E2; /* right operand determines it */
|
||
|
||
op = e->Eoper;
|
||
if (e->Ecount != e->Ecomsub) // comsubs just get Z bit set
|
||
return JNE;
|
||
if (!OTrel(op)) // not relational operator
|
||
{
|
||
tym_t tymx = tybasic(e->Ety);
|
||
if (tyfloating(tymx) && config.inline8087 &&
|
||
(tymx == TYldouble || tymx == TYildouble || tymx == TYcldouble ||
|
||
tymx == TYcdouble || tymx == TYcfloat ||
|
||
op == OPind))
|
||
{
|
||
return XP|JNE;
|
||
}
|
||
return (op >= OPbt && op <= OPbts) ? JC : JNE;
|
||
}
|
||
|
||
if (e->E2->Eoper == OPconst)
|
||
zero = !boolres(e->E2);
|
||
else
|
||
zero = 0;
|
||
|
||
tym = e->E1->Ety;
|
||
if (tyfloating(tym))
|
||
#if 1
|
||
{ i = 0;
|
||
if (config.inline8087)
|
||
{ i = 1;
|
||
|
||
#if 1
|
||
#define NOSAHF (I64 || config.fpxmmregs)
|
||
if (rel_exception(op) || config.flags4 & CFG4fastfloat)
|
||
{
|
||
if (zero)
|
||
{
|
||
if (NOSAHF)
|
||
op = swaprel(op);
|
||
}
|
||
else if (NOSAHF)
|
||
op = swaprel(op);
|
||
else if (cmporder87(e->E2))
|
||
op = swaprel(op);
|
||
else
|
||
;
|
||
}
|
||
else
|
||
{
|
||
if (zero && config.target_cpu < TARGET_80386)
|
||
;
|
||
else
|
||
op = swaprel(op);
|
||
}
|
||
#else
|
||
if (zero && !rel_exception(op) && config.target_cpu >= TARGET_80386)
|
||
op = swaprel(op);
|
||
else if (!zero &&
|
||
(cmporder87(e->E2) || !(rel_exception(op) || config.flags4 & CFG4fastfloat)))
|
||
/* compare is reversed */
|
||
op = swaprel(op);
|
||
#endif
|
||
}
|
||
jp = jfops[0][op - OPle];
|
||
goto L1;
|
||
}
|
||
#else
|
||
i = (config.inline8087) ? (3 + cmporder87(e->E2)) : 2;
|
||
#endif
|
||
else if (tyuns(tym) || tyuns(e->E2->Ety))
|
||
i = 1;
|
||
else if (tyintegral(tym) || typtr(tym))
|
||
i = 0;
|
||
else
|
||
{
|
||
#if DEBUG
|
||
elem_print(e);
|
||
WRTYxx(tym);
|
||
#endif
|
||
assert(0);
|
||
}
|
||
|
||
jp = jops[i][zero][op - OPle]; /* table starts with OPle */
|
||
L1:
|
||
#if DEBUG
|
||
if ((jp & 0xF0) != 0x70)
|
||
WROP(op),
|
||
printf("i %d zero %d op x%x jp x%x\n",i,zero,op,jp);
|
||
#endif
|
||
assert((jp & 0xF0) == 0x70);
|
||
return jp;
|
||
}
|
||
|
||
/**********************************
|
||
* Append code to *pc which validates pointer described by
|
||
* addressing mode in *pcs. Modify addressing mode in *pcs.
|
||
* Input:
|
||
* keepmsk mask of registers we must not destroy or use
|
||
* if (keepmsk & RMstore), this will be only a store operation
|
||
* into the lvalue
|
||
*/
|
||
|
||
void cod3_ptrchk(code **pc,code *pcs,regm_t keepmsk)
|
||
{ code *c;
|
||
code *cs2;
|
||
unsigned char rm,sib;
|
||
unsigned reg;
|
||
unsigned flagsave;
|
||
unsigned opsave;
|
||
regm_t idxregs;
|
||
regm_t tosave;
|
||
regm_t used;
|
||
int i;
|
||
|
||
assert(!I64);
|
||
if (!I16 && pcs->Iflags & (CFes | CFss | CFcs | CFds | CFfs | CFgs))
|
||
return; // not designed to deal with 48 bit far pointers
|
||
|
||
c = *pc;
|
||
|
||
rm = pcs->Irm;
|
||
assert(!(rm & 0x40)); // no disp8 or reg addressing modes
|
||
|
||
// If the addressing mode is already a register
|
||
reg = rm & 7;
|
||
if (I16)
|
||
{ static const unsigned char imode[8] = { BP,BP,BP,BP,SI,DI,BP,BX };
|
||
|
||
reg = imode[reg]; // convert [SI] to SI, etc.
|
||
}
|
||
idxregs = mask[reg];
|
||
if ((rm & 0x80 && (pcs->IFL1 != FLoffset || pcs->IEV1.Vuns)) ||
|
||
!(idxregs & ALLREGS)
|
||
)
|
||
{
|
||
// Load the offset into a register, so we can push the address
|
||
idxregs = (I16 ? IDXREGS : ALLREGS) & ~keepmsk; // only these can be index regs
|
||
assert(idxregs);
|
||
c = cat(c,allocreg(&idxregs,®,TYoffset));
|
||
|
||
opsave = pcs->Iop;
|
||
flagsave = pcs->Iflags;
|
||
pcs->Iop = 0x8D;
|
||
pcs->Irm |= modregrm(0,reg,0);
|
||
pcs->Iflags &= ~(CFopsize | CFss | CFes | CFcs); // no prefix bytes needed
|
||
c = gen(c,pcs); // LEA reg,EA
|
||
|
||
pcs->Iflags = flagsave;
|
||
pcs->Iop = opsave;
|
||
}
|
||
|
||
// registers destroyed by the function call
|
||
//used = (mBP | ALLREGS | mES) & ~fregsaved;
|
||
used = 0; // much less code generated this way
|
||
|
||
cs2 = CNIL;
|
||
tosave = used & (keepmsk | idxregs);
|
||
for (i = 0; tosave; i++)
|
||
{ regm_t mi = mask[i];
|
||
|
||
assert(i < REGMAX);
|
||
if (mi & tosave) /* i = register to save */
|
||
{
|
||
int push,pop;
|
||
|
||
stackchanged = 1;
|
||
if (i == ES)
|
||
{ push = 0x06;
|
||
pop = 0x07;
|
||
}
|
||
else
|
||
{ push = 0x50 + i;
|
||
pop = push | 8;
|
||
}
|
||
c = gen1(c,push); // PUSH i
|
||
cs2 = cat(gen1(CNIL,pop),cs2); // POP i
|
||
tosave &= ~mi;
|
||
}
|
||
}
|
||
|
||
// For 16 bit models, push a far pointer
|
||
if (I16)
|
||
{ int segreg;
|
||
|
||
switch (pcs->Iflags & (CFes | CFss | CFcs | CFds | CFfs | CFgs))
|
||
{ case CFes: segreg = 0x06; break;
|
||
case CFss: segreg = 0x16; break;
|
||
case CFcs: segreg = 0x0E; break;
|
||
case 0: segreg = 0x1E; break; // DS
|
||
default:
|
||
assert(0);
|
||
}
|
||
|
||
// See if we should default to SS:
|
||
// (Happens when BP is part of the addressing mode)
|
||
if (segreg == 0x1E && (rm & 0xC0) != 0xC0 &&
|
||
rm & 2 && (rm & 7) != 7)
|
||
{ segreg = 0x16;
|
||
if (config.wflags & WFssneds)
|
||
pcs->Iflags |= CFss; // because BP won't be there anymore
|
||
}
|
||
c = gen1(c,segreg); // PUSH segreg
|
||
}
|
||
|
||
c = gen1(c,0x50 + reg); // PUSH reg
|
||
|
||
// Rewrite the addressing mode in *pcs so it is just 0[reg]
|
||
setaddrmode(pcs, idxregs);
|
||
pcs->IFL1 = FLoffset;
|
||
pcs->IEV1.Vuns = 0;
|
||
|
||
// Call the validation function
|
||
{
|
||
makeitextern(rtlsym[RTLSYM_PTRCHK]);
|
||
|
||
used &= ~(keepmsk | idxregs); // regs destroyed by this exercise
|
||
c = cat(c,getregs(used));
|
||
// CALL __ptrchk
|
||
gencs(c,(LARGECODE) ? 0x9A : CALL,0,FLfunc,rtlsym[RTLSYM_PTRCHK]);
|
||
}
|
||
|
||
*pc = cat(c,cs2);
|
||
}
|
||
|
||
|
||
|
||
/***********************************
|
||
* Determine if BP can be used as a general purpose register.
|
||
* Note parallels between this routine and prolog().
|
||
* Returns:
|
||
* 0 can't be used, needed for frame
|
||
* mBP can be used
|
||
*/
|
||
|
||
regm_t cod3_useBP()
|
||
{
|
||
tym_t tym;
|
||
tym_t tyf;
|
||
|
||
// Note that DOSX memory model cannot use EBP as a general purpose
|
||
// register, as SS != DS.
|
||
if (!(config.exe & EX_flat) || config.flags & (CFGalwaysframe | CFGnoebp))
|
||
goto Lcant;
|
||
|
||
if (anyiasm)
|
||
goto Lcant;
|
||
|
||
tyf = funcsym_p->ty();
|
||
if (tyf & mTYnaked) // if no prolog/epilog for function
|
||
goto Lcant;
|
||
|
||
if (funcsym_p->Sfunc->Fflags3 & Ffakeeh)
|
||
{
|
||
goto Lcant; // need consistent stack frame
|
||
}
|
||
|
||
tym = tybasic(tyf);
|
||
if (tym == TYifunc)
|
||
goto Lcant;
|
||
|
||
stackoffsets(0);
|
||
localsize = Aoffset; // an estimate only
|
||
// if (localsize)
|
||
{
|
||
if (!(config.flags4 & CFG4speed) ||
|
||
config.target_cpu < TARGET_Pentium ||
|
||
tyfarfunc(tym) ||
|
||
config.flags & CFGstack ||
|
||
localsize >= 0x100 || // arbitrary value < 0x1000
|
||
(usednteh & ~NTEHjmonitor) ||
|
||
usedalloca
|
||
)
|
||
goto Lcant;
|
||
}
|
||
Lcan:
|
||
return mBP;
|
||
|
||
Lcant:
|
||
return 0;
|
||
}
|
||
|
||
/***************************************
|
||
* Gen code for OPframeptr
|
||
*/
|
||
|
||
code *cdframeptr(elem *e, regm_t *pretregs)
|
||
{
|
||
unsigned reg;
|
||
code cs;
|
||
|
||
regm_t retregs = *pretregs & allregs;
|
||
if (!retregs)
|
||
retregs = allregs;
|
||
code *cg = allocreg(&retregs, ®, TYint);
|
||
|
||
cs.Iop = ESCAPE | ESCframeptr;
|
||
cs.Iflags = 0;
|
||
cs.Irex = 0;
|
||
cs.Irm = reg;
|
||
cg = gen(cg,&cs);
|
||
|
||
return cat(cg,fixresult(e,retregs,pretregs));
|
||
}
|
||
|
||
/***************************************
|
||
* Gen code for load of _GLOBAL_OFFSET_TABLE_.
|
||
* This value gets cached in the local variable 'localgot'.
|
||
*/
|
||
|
||
code *cdgot(elem *e, regm_t *pretregs)
|
||
{
|
||
#if TARGET_OSX
|
||
regm_t retregs;
|
||
unsigned reg;
|
||
code *c;
|
||
|
||
retregs = *pretregs & allregs;
|
||
if (!retregs)
|
||
retregs = allregs;
|
||
c = allocreg(&retregs, ®, TYnptr);
|
||
|
||
c = genc(c,CALL,0,0,0,FLgot,0); // CALL L1
|
||
gen1(c, 0x58 + reg); // L1: POP reg
|
||
|
||
return cat(c,fixresult(e,retregs,pretregs));
|
||
#elif TARGET_LINUX || TARGET_FREEBSD || TARGET_OPENBSD || TARGET_SOLARIS
|
||
regm_t retregs;
|
||
unsigned reg;
|
||
code *c;
|
||
code *cgot;
|
||
|
||
retregs = *pretregs & allregs;
|
||
if (!retregs)
|
||
retregs = allregs;
|
||
c = allocreg(&retregs, ®, TYnptr);
|
||
|
||
c = genc2(c,CALL,0,0); // CALL L1
|
||
gen1(c, 0x58 + reg); // L1: POP reg
|
||
|
||
// ADD reg,_GLOBAL_OFFSET_TABLE_+3
|
||
symbol *gotsym = elfobj_getGOTsym();
|
||
cgot = gencs(CNIL,0x81,modregrm(3,0,reg),FLextern,gotsym);
|
||
/* Because the 2:3 offset from L1: is hardcoded,
|
||
* this sequence of instructions must not
|
||
* have any instructions in between,
|
||
* so set CFvolatile to prevent the scheduler from rearranging it.
|
||
*/
|
||
cgot->Iflags = CFoff | CFvolatile;
|
||
cgot->IEVoffset2 = (reg == AX) ? 2 : 3;
|
||
|
||
makeitextern(gotsym);
|
||
return cat3(c,cgot,fixresult(e,retregs,pretregs));
|
||
#else
|
||
assert(0);
|
||
return NULL;
|
||
#endif
|
||
}
|
||
|
||
/**************************************************
|
||
* Load contents of localgot into EBX.
|
||
*/
|
||
|
||
code *load_localgot()
|
||
{
|
||
#if TARGET_LINUX || TARGET_FREEBSD || TARGET_OPENBSD || TARGET_SOLARIS
|
||
if (config.flags3 & CFG3pic && I32)
|
||
{
|
||
if (localgot)
|
||
{
|
||
localgot->Sflags &= ~GTregcand; // because this hack doesn't work with reg allocator
|
||
elem *e = el_var(localgot);
|
||
regm_t retregs = mBX;
|
||
code *c = codelem(e,&retregs,FALSE);
|
||
el_free(e);
|
||
return c;
|
||
}
|
||
else
|
||
{
|
||
elem *e = el_long(TYnptr, 0);
|
||
e->Eoper = OPgot;
|
||
regm_t retregs = mBX;
|
||
code *c = codelem(e,&retregs,FALSE);
|
||
el_free(e);
|
||
return c;
|
||
}
|
||
}
|
||
#endif
|
||
return NULL;
|
||
}
|
||
|
||
#if TARGET_LINUX || TARGET_OSX || TARGET_FREEBSD || TARGET_OPENBSD || TARGET_SOLARIS
|
||
/*****************************
|
||
* Returns:
|
||
* # of bytes stored
|
||
*/
|
||
|
||
#define ONS_OHD 4 // max # of extra bytes added by obj_namestring()
|
||
|
||
STATIC int obj_namestring(char *p,const char *name)
|
||
{ unsigned len;
|
||
|
||
len = strlen(name);
|
||
if (len > 255)
|
||
{
|
||
short *ps = (short *)p;
|
||
p[0] = 0xFF;
|
||
p[1] = 0;
|
||
ps[1] = len;
|
||
memcpy(p + 4,name,len);
|
||
len += ONS_OHD;
|
||
}
|
||
else
|
||
{ p[0] = len;
|
||
memcpy(p + 1,name,len);
|
||
len++;
|
||
}
|
||
return len;
|
||
}
|
||
#endif
|
||
|
||
code *genregs(code *c,unsigned op,unsigned dstreg,unsigned srcreg)
|
||
{ return gen2(c,op,modregxrmx(3,dstreg,srcreg)); }
|
||
|
||
code *gentstreg(code *c,unsigned t)
|
||
{
|
||
c = gen2(c,0x85,modregxrmx(3,t,t)); // TEST t,t
|
||
code_orflag(c,CFpsw);
|
||
return c;
|
||
}
|
||
|
||
code *genpush(code *c, unsigned reg)
|
||
{
|
||
c = gen1(c, 0x50 + (reg & 7));
|
||
if (reg & 8)
|
||
code_orrex(c, REX_B);
|
||
return c;
|
||
}
|
||
|
||
code *genpop(code *c, unsigned reg)
|
||
{
|
||
c = gen1(c, 0x58 + (reg & 7));
|
||
if (reg & 8)
|
||
code_orrex(c, REX_B);
|
||
return c;
|
||
}
|
||
|
||
/**************************
|
||
* Generate a MOV to save a register to a stack slot
|
||
*/
|
||
code *gensavereg(unsigned& reg, targ_uns slot)
|
||
{
|
||
// MOV i[BP],reg
|
||
unsigned op = 0x89; // normal mov
|
||
if (reg == ES)
|
||
{ reg = 0; // the real reg number
|
||
op = 0x8C; // segment reg mov
|
||
}
|
||
code *c = genc1(NULL,op,modregxrm(2, reg, BPRM),FLcs,slot);
|
||
if (I64)
|
||
code_orrex(c, REX_W);
|
||
|
||
return c;
|
||
}
|
||
|
||
/**************************
|
||
* Generate a MOV to,from register instruction.
|
||
* Smart enough to dump redundant register moves, and segment
|
||
* register moves.
|
||
*/
|
||
|
||
code *genmovreg(code *c,unsigned to,unsigned from)
|
||
{
|
||
#if DEBUG
|
||
if (to > ES || from > ES)
|
||
printf("genmovreg(c = %p, to = %d, from = %d)\n",c,to,from);
|
||
#endif
|
||
assert(to <= ES && from <= ES);
|
||
if (to != from)
|
||
{
|
||
if (to == ES)
|
||
c = genregs(c,0x8E,0,from);
|
||
else if (from == ES)
|
||
c = genregs(c,0x8C,0,to);
|
||
else
|
||
c = genregs(c,0x89,from,to);
|
||
if (I64)
|
||
code_orrex(c, REX_W);
|
||
}
|
||
return c;
|
||
}
|
||
|
||
/***************************************
|
||
* Generate immediate multiply instruction for r1=r2*imm.
|
||
* Optimize it into LEA's if we can.
|
||
*/
|
||
|
||
code *genmulimm(code *c,unsigned r1,unsigned r2,targ_int imm)
|
||
{ code cs;
|
||
|
||
// These optimizations should probably be put into pinholeopt()
|
||
switch (imm)
|
||
{ case 1:
|
||
c = genmovreg(c,r1,r2);
|
||
break;
|
||
case 5:
|
||
cs.Iop = LEA;
|
||
cs.Iflags = 0;
|
||
cs.Irex = 0;
|
||
buildEA(&cs,r2,r2,4,0);
|
||
cs.orReg(r1);
|
||
c = gen(c,&cs);
|
||
break;
|
||
default:
|
||
c = genc2(c,0x69,modregxrmx(3,r1,r2),imm); // IMUL r1,r2,imm
|
||
break;
|
||
}
|
||
return c;
|
||
}
|
||
|
||
/******************************
|
||
* Load CX with the value of _AHSHIFT.
|
||
*/
|
||
|
||
code *genshift(code *c)
|
||
{
|
||
#if SCPP && TX86
|
||
code *c1;
|
||
|
||
// Set up ahshift to trick ourselves into giving the right fixup,
|
||
// which must be seg-relative, external frame, external target.
|
||
c1 = gencs(CNIL,0xC7,modregrm(3,0,CX),FLfunc,rtlsym[RTLSYM_AHSHIFT]);
|
||
c1->Iflags |= CFoff;
|
||
return cat(c,c1);
|
||
#else
|
||
assert(0);
|
||
return 0;
|
||
#endif
|
||
}
|
||
|
||
/******************************
|
||
* Move constant value into reg.
|
||
* Take advantage of existing values in registers.
|
||
* If flags & mPSW
|
||
* set flags based on result
|
||
* Else if flags & 8
|
||
* do not disturb flags
|
||
* Else
|
||
* don't care about flags
|
||
* If flags & 1 then byte move
|
||
* If flags & 2 then short move (for I32 and I64)
|
||
* If flags & 4 then don't disturb unused portion of register
|
||
* If flags & 16 then reg is a byte register AL..BH
|
||
* If flags & 64 (0x40) then 64 bit move (I64 only)
|
||
* Returns:
|
||
* code (if any) generated
|
||
*/
|
||
|
||
code *movregconst(code *c,unsigned reg,targ_size_t value,regm_t flags)
|
||
{ unsigned r;
|
||
regm_t mreg;
|
||
|
||
//printf("movregconst(reg=%s, value= %lld (%llx), flags=%x)\n", regm_str(mask[reg]), value, value, flags);
|
||
#define genclrreg(a,r) genregs(a,0x31,r,r)
|
||
|
||
regm_t regm = regcon.immed.mval & mask[reg];
|
||
targ_size_t regv = regcon.immed.value[reg];
|
||
|
||
if (flags & 1) // 8 bits
|
||
{
|
||
value &= 0xFF;
|
||
regm &= BYTEREGS;
|
||
|
||
// If we already have the right value in the right register
|
||
if (regm && (regv & 0xFF) == value)
|
||
goto L2;
|
||
|
||
if (flags & 16 && reg & 4 && // if an H byte register
|
||
regcon.immed.mval & mask[reg & 3] &&
|
||
(((regv = regcon.immed.value[reg & 3]) >> 8) & 0xFF) == value)
|
||
goto L2;
|
||
|
||
/* Avoid byte register loads on Pentium Pro and Pentium II
|
||
* to avoid dependency stalls.
|
||
*/
|
||
if (config.flags4 & CFG4speed &&
|
||
config.target_cpu >= TARGET_PentiumPro && !(flags & 4))
|
||
goto L3;
|
||
|
||
// See if another register has the right value
|
||
r = 0;
|
||
for (mreg = (regcon.immed.mval & BYTEREGS); mreg; mreg >>= 1)
|
||
{
|
||
if (mreg & 1)
|
||
{
|
||
if ((regcon.immed.value[r] & 0xFF) == value)
|
||
{ c = genregs(c,0x8A,reg,r); // MOV regL,rL
|
||
if (I64 && reg >= 4 || r >= 4)
|
||
code_orrex(c, REX);
|
||
goto L2;
|
||
}
|
||
if (!(I64 && reg >= 4) &&
|
||
r < 4 && ((regcon.immed.value[r] >> 8) & 0xFF) == value)
|
||
{ c = genregs(c,0x8A,reg,r | 4); // MOV regL,rH
|
||
goto L2;
|
||
}
|
||
}
|
||
r++;
|
||
}
|
||
|
||
if (value == 0 && !(flags & 8))
|
||
{
|
||
if (!(flags & 4) && // if we can set the whole register
|
||
!(flags & 16 && reg & 4)) // and reg is not an H register
|
||
{ c = genregs(c,0x31,reg,reg); // XOR reg,reg
|
||
regimmed_set(reg,value);
|
||
regv = 0;
|
||
}
|
||
else
|
||
c = genregs(c,0x30,reg,reg); // XOR regL,regL
|
||
flags &= ~mPSW; // flags already set by XOR
|
||
}
|
||
else
|
||
{ c = genc2(c,0xC6,modregrmx(3,0,reg),value); /* MOV regL,value */
|
||
if (reg >= 4 && I64)
|
||
{
|
||
code_orrex(c, REX);
|
||
}
|
||
}
|
||
L2:
|
||
if (flags & mPSW)
|
||
genregs(c,0x84,reg,reg); // TEST regL,regL
|
||
|
||
if (regm)
|
||
// Set just the 'L' part of the register value
|
||
regimmed_set(reg,(regv & ~(targ_size_t)0xFF) | value);
|
||
else if (flags & 16 && reg & 4 && regcon.immed.mval & mask[reg & 3])
|
||
// Set just the 'H' part of the register value
|
||
regimmed_set((reg & 3),(regv & ~(targ_size_t)0xFF00) | (value << 8));
|
||
return c;
|
||
}
|
||
L3:
|
||
if (I16)
|
||
value = (targ_short) value; /* sign-extend MSW */
|
||
else if (I32)
|
||
value = (targ_int) value;
|
||
|
||
if (!I16 && flags & 2) // load 16 bit value
|
||
{
|
||
value &= 0xFFFF;
|
||
if (value == 0)
|
||
goto L1;
|
||
else
|
||
{
|
||
if (flags & mPSW)
|
||
goto L1;
|
||
code *c1 = genc2(CNIL,0xC7,modregrmx(3,0,reg),value); // MOV reg,value
|
||
c1->Iflags |= CFopsize; // yes, even for I64
|
||
c = cat(c,c1);
|
||
if (regm)
|
||
// High bits of register are not affected by 16 bit load
|
||
regimmed_set(reg,(regv & ~(targ_size_t)0xFFFF) | value);
|
||
}
|
||
return c;
|
||
}
|
||
L1:
|
||
|
||
/* If we already have the right value in the right register */
|
||
if (regm && (regv & 0xFFFFFFFF) == (value & 0xFFFFFFFF) && !(flags & 64))
|
||
{ if (flags & mPSW)
|
||
c = gentstreg(c,reg);
|
||
}
|
||
else if (flags & 64 && regm && regv == value)
|
||
{ // Look at the full 64 bits
|
||
if (flags & mPSW)
|
||
{
|
||
c = gentstreg(c,reg);
|
||
code_orrex(c, REX_W);
|
||
}
|
||
}
|
||
else
|
||
{
|
||
if (flags & mPSW)
|
||
{
|
||
switch (value)
|
||
{ case 0:
|
||
c = genclrreg(c,reg);
|
||
if (flags & 64)
|
||
code_orrex(c, REX_W);
|
||
break;
|
||
case 1:
|
||
if (I64)
|
||
goto L4;
|
||
c = genclrreg(c,reg);
|
||
goto inc;
|
||
case -1:
|
||
if (I64)
|
||
goto L4;
|
||
c = genclrreg(c,reg);
|
||
goto dec;
|
||
default:
|
||
L4:
|
||
if (flags & 64)
|
||
{
|
||
c = genc2(c,0xC7,(REX_W << 16) | modregrmx(3,0,reg),value); // MOV reg,value64
|
||
gentstreg(c,reg);
|
||
code_orrex(c, REX_W);
|
||
}
|
||
else
|
||
{ c = genc2(c,0xC7,modregrmx(3,0,reg),value); /* MOV reg,value */
|
||
gentstreg(c,reg);
|
||
}
|
||
break;
|
||
}
|
||
}
|
||
else
|
||
{
|
||
/* Look for single byte conversion */
|
||
if (regcon.immed.mval & mAX)
|
||
{
|
||
if (I32)
|
||
{ if (reg == AX && value == (targ_short) regv)
|
||
{ c = gen1(c,0x98); /* CWDE */
|
||
goto done;
|
||
}
|
||
if (reg == DX &&
|
||
value == (regcon.immed.value[AX] & 0x80000000 ? 0xFFFFFFFF : 0) &&
|
||
!(config.flags4 & CFG4speed && config.target_cpu >= TARGET_Pentium)
|
||
)
|
||
{ c = gen1(c,0x99); /* CDQ */
|
||
goto done;
|
||
}
|
||
}
|
||
else if (I16)
|
||
{
|
||
if (reg == AX &&
|
||
(targ_short) value == (signed char) regv)
|
||
{ c = gen1(c,0x98); /* CBW */
|
||
goto done;
|
||
}
|
||
|
||
if (reg == DX &&
|
||
(targ_short) value == (regcon.immed.value[AX] & 0x8000 ? (targ_short) 0xFFFF : (targ_short) 0) &&
|
||
!(config.flags4 & CFG4speed && config.target_cpu >= TARGET_Pentium)
|
||
)
|
||
{ c = gen1(c,0x99); /* CWD */
|
||
goto done;
|
||
}
|
||
}
|
||
}
|
||
if (value == 0 && !(flags & 8) && config.target_cpu >= TARGET_80486)
|
||
{ c = genclrreg(c,reg); // CLR reg
|
||
if (flags & 64)
|
||
code_orrex(c, REX_W);
|
||
goto done;
|
||
}
|
||
|
||
if (!I64 && regm && !(flags & 8))
|
||
{ if (regv + 1 == value ||
|
||
/* Catch case of (0xFFFF+1 == 0) for 16 bit compiles */
|
||
(I16 && (targ_short)(regv + 1) == (targ_short)value))
|
||
{
|
||
inc:
|
||
c = gen1(c,0x40 + reg); /* INC reg */
|
||
goto done;
|
||
}
|
||
if (regv - 1 == value)
|
||
{
|
||
dec:
|
||
c = gen1(c,0x48 + reg); /* DEC reg */
|
||
goto done;
|
||
}
|
||
}
|
||
|
||
/* See if another register has the right value */
|
||
r = 0;
|
||
for (mreg = regcon.immed.mval; mreg; mreg >>= 1)
|
||
{
|
||
#ifdef DEBUG
|
||
assert(!I16 || regcon.immed.value[r] == (targ_short)regcon.immed.value[r]);
|
||
#endif
|
||
if (mreg & 1 && regcon.immed.value[r] == value)
|
||
{ c = genmovreg(c,reg,r);
|
||
if (flags & 64)
|
||
code_orrex(c, REX_W);
|
||
goto done;
|
||
}
|
||
r++;
|
||
}
|
||
|
||
if (value == 0 && !(flags & 8))
|
||
{ c = genclrreg(c,reg); // CLR reg
|
||
if (flags & 64)
|
||
code_orrex(c, REX_W);
|
||
}
|
||
else
|
||
{ /* See if we can just load a byte */
|
||
if (regm & BYTEREGS &&
|
||
!(config.flags4 & CFG4speed && config.target_cpu >= TARGET_PentiumPro)
|
||
)
|
||
{
|
||
if ((regv & ~(targ_size_t)0xFF) == (value & ~(targ_size_t)0xFF))
|
||
{ c = movregconst(c,reg,value,(flags & 8) |4|1); // load regL
|
||
return c;
|
||
}
|
||
if (regm & (mAX|mBX|mCX|mDX) &&
|
||
(regv & ~(targ_size_t)0xFF00) == (value & ~(targ_size_t)0xFF00) &&
|
||
!I64)
|
||
{ c = movregconst(c,4|reg,value >> 8,(flags & 8) |4|1|16); // load regH
|
||
return c;
|
||
}
|
||
}
|
||
if (flags & 64)
|
||
c = genc2(c,0xC7,(REX_W << 16) | modregrmx(3,0,reg),value); // MOV reg,value64
|
||
else
|
||
c = genc2(c,0xC7,modregrmx(3,0,reg),value); // MOV reg,value
|
||
}
|
||
}
|
||
done:
|
||
regimmed_set(reg,value);
|
||
}
|
||
return c;
|
||
}
|
||
|
||
/**************************
|
||
* Generate a jump instruction.
|
||
*/
|
||
|
||
code *genjmp(code *c,unsigned op,unsigned fltarg,block *targ)
|
||
{ code cs;
|
||
code *cj;
|
||
code *cnop;
|
||
|
||
cs.Iop = op & 0xFF;
|
||
cs.Iflags = 0;
|
||
cs.Irex = 0;
|
||
if (op != JMP && op != 0xE8) // if not already long branch
|
||
cs.Iflags = CFjmp16; /* assume long branch for op = 0x7x */
|
||
cs.IFL2 = fltarg; /* FLblock (or FLcode) */
|
||
cs.IEV2.Vblock = targ; /* target block (or code) */
|
||
if (fltarg == FLcode)
|
||
((code *)targ)->Iflags |= CFtarg;
|
||
|
||
if (config.flags4 & CFG4fastfloat) // if fast floating point
|
||
return gen(c,&cs);
|
||
|
||
cj = gen(CNIL,&cs);
|
||
switch (op & 0xFF00) /* look at second jump opcode */
|
||
{
|
||
/* The JP and JNP come from floating point comparisons */
|
||
case JP << 8:
|
||
cs.Iop = JP;
|
||
gen(cj,&cs);
|
||
break;
|
||
case JNP << 8:
|
||
/* Do a JP around the jump instruction */
|
||
cnop = gennop(CNIL);
|
||
c = genjmp(c,JP,FLcode,(block *) cnop);
|
||
cat(cj,cnop);
|
||
break;
|
||
case 1 << 8: /* toggled no jump */
|
||
case 0 << 8:
|
||
break;
|
||
default:
|
||
#ifdef DEBUG
|
||
printf("jop = x%x\n",op);
|
||
#endif
|
||
assert(0);
|
||
}
|
||
return cat(c,cj);
|
||
}
|
||
|
||
/*******************************
|
||
* Generate code for a function start.
|
||
* Input:
|
||
* Coffset address of start of code
|
||
* Output:
|
||
* Coffset adjusted for size of code generated
|
||
* EBPtoESP
|
||
* hasframe
|
||
* BPoff
|
||
*/
|
||
|
||
code *prolog()
|
||
{
|
||
SYMIDX si;
|
||
unsigned reg;
|
||
char enter;
|
||
unsigned Foffset;
|
||
unsigned xlocalsize; // amount to subtract from ESP to make room for locals
|
||
unsigned pushallocreg;
|
||
char guessneedframe;
|
||
regm_t namedargs = 0;
|
||
|
||
//printf("cod3.prolog(), needframe = %d, Aalign = %d\n", needframe, Aalign);
|
||
debugx(debugw && printf("funcstart()\n"));
|
||
regcon.immed.mval = 0; /* no values in registers yet */
|
||
EBPtoESP = -REGSIZE;
|
||
hasframe = 0;
|
||
char pushds = 0;
|
||
BPoff = 0;
|
||
code *c = CNIL;
|
||
int pushalloc = 0;
|
||
tym_t tyf = funcsym_p->ty();
|
||
tym_t tym = tybasic(tyf);
|
||
unsigned farfunc = tyfarfunc(tym);
|
||
pushallocreg = (tyf == TYmfunc) ? CX : AX;
|
||
if (config.flags & CFGalwaysframe || funcsym_p->Sfunc->Fflags3 & Ffakeeh)
|
||
needframe = 1;
|
||
|
||
Lagain:
|
||
guessneedframe = needframe;
|
||
// if (needframe && config.exe & (EX_LINUX | EX_FREEBSD | EX_SOLARIS) && !(usednteh & ~NTEHjmonitor))
|
||
// usednteh |= NTEHpassthru;
|
||
|
||
/* Compute BP offsets for variables on stack.
|
||
* The organization is:
|
||
* Poff parameters
|
||
* seg of return addr (if far function)
|
||
* IP of return addr
|
||
* BP-> caller's BP
|
||
* DS (if Windows prolog/epilog)
|
||
* exception handling context symbol
|
||
* Aoff autos and regs
|
||
* regsave.off any saved registers
|
||
* Foff floating register
|
||
* AAoff alloca temporary
|
||
* CSoff common subs
|
||
* NDPoff any 8087 saved registers
|
||
* Toff temporaries
|
||
* monitor context record
|
||
* any saved registers
|
||
*/
|
||
|
||
if (tym == TYifunc)
|
||
Poff = 26;
|
||
else if (I64)
|
||
Poff = 16;
|
||
else if (I32)
|
||
Poff = farfunc ? 12 : 8;
|
||
else
|
||
Poff = farfunc ? 6 : 4;
|
||
|
||
Aoff = 0;
|
||
#if NTEXCEPTIONS == 2
|
||
Aoff -= nteh_contextsym_size();
|
||
#if MARS
|
||
if (funcsym_p->Sfunc->Fflags3 & Ffakeeh && nteh_contextsym_size() == 0)
|
||
Aoff -= 5 * 4;
|
||
#endif
|
||
#endif
|
||
Aoff = -align(0,-Aoff + Aoffset);
|
||
|
||
regsave.off = Aoff - align(0,regsave.top);
|
||
Foffset = floatreg ? (config.fpxmmregs ? 16 : DOUBLESIZE) : 0;
|
||
Foff = regsave.off - align(0,Foffset);
|
||
assert(usedalloca != 1);
|
||
AAoff = usedalloca ? (Foff - REGSIZE) : Foff;
|
||
CSoff = AAoff - align(0,cstop * REGSIZE);
|
||
NDPoff = CSoff - align(0,NDP::savetop * NDPSAVESIZE);
|
||
Toff = NDPoff - align(0,Toffset);
|
||
|
||
if (Foffset > Aalign)
|
||
Aalign = Foffset;
|
||
if (Aalign > REGSIZE)
|
||
{
|
||
// Adjust Aoff so that it is Aalign byte aligned, assuming that
|
||
// before function parameters were pushed the stack was
|
||
// Aalign byte aligned
|
||
targ_size_t psize = (Poffset + (REGSIZE - 1)) & ~(REGSIZE - 1);
|
||
int sz = psize + -Aoff + Poff + (needframe ? 0 : REGSIZE);
|
||
if (sz & (Aalign - 1))
|
||
{ int adj = Aalign - (sz & (Aalign - 1));
|
||
Aoff -= adj;
|
||
regsave.off -= adj;
|
||
Foff -= adj;
|
||
AAoff -= adj;
|
||
CSoff -= adj;
|
||
NDPoff -= adj;
|
||
Toff -= adj;
|
||
}
|
||
}
|
||
|
||
localsize = -Toff;
|
||
|
||
regm_t topush = fregsaved & ~mfuncreg; // mask of registers that need saving
|
||
int npush = 0; // number of registers that need saving
|
||
for (regm_t x = topush; x; x >>= 1)
|
||
npush += x & 1;
|
||
|
||
// Keep the stack aligned by 8 for any subsequent function calls
|
||
if (!I16 && calledafunc &&
|
||
(STACKALIGN == 16 || config.flags4 & CFG4stackalign))
|
||
{
|
||
//printf("npush = %d Poff = x%x needframe = %d localsize = x%x\n", npush, Poff, needframe, localsize);
|
||
|
||
int sz = Poff + (needframe ? 0 : -REGSIZE) + localsize + npush * REGSIZE;
|
||
if (STACKALIGN == 16)
|
||
{
|
||
if (sz & (8|4))
|
||
localsize += STACKALIGN - (sz & (8|4));
|
||
}
|
||
else if (sz & 4)
|
||
localsize += 4;
|
||
}
|
||
|
||
//printf("Foff x%02x Aoff x%02x Toff x%02x NDPoff x%02x CSoff x%02x Poff x%02x localsize x%02x\n",
|
||
//(int)Foff,(int)Aoff,(int)Toff,(int)NDPoff,(int)CSoff,(int)Poff,(int)localsize);
|
||
|
||
xlocalsize = localsize;
|
||
|
||
if (tyf & mTYnaked) // if no prolog/epilog for function
|
||
{
|
||
hasframe = 1;
|
||
return NULL;
|
||
}
|
||
|
||
if (tym == TYifunc)
|
||
{ static unsigned char ops2[] = { 0x60,0x1E,0x06,0 };
|
||
static unsigned char ops0[] = { 0x50,0x51,0x52,0x53,
|
||
0x54,0x55,0x56,0x57,
|
||
0x1E,0x06,0 };
|
||
|
||
unsigned char *p;
|
||
|
||
p = (config.target_cpu >= TARGET_80286) ? ops2 : ops0;
|
||
do
|
||
c = gen1(c,*p);
|
||
while (*++p);
|
||
c = genregs(c,0x8B,BP,SP); // MOV BP,SP
|
||
if (localsize)
|
||
c = genc2(c,0x81,modregrm(3,5,SP),localsize); // SUB SP,localsize
|
||
tyf |= mTYloadds;
|
||
hasframe = 1;
|
||
goto Lcont;
|
||
}
|
||
|
||
/* Determine if we need BP set up */
|
||
if (config.flags & CFGalwaysframe)
|
||
needframe = 1;
|
||
else
|
||
{
|
||
if (localsize)
|
||
{
|
||
if (I16 ||
|
||
!(config.flags4 & CFG4speed) ||
|
||
config.target_cpu < TARGET_Pentium ||
|
||
farfunc ||
|
||
config.flags & CFGstack ||
|
||
xlocalsize >= 0x1000 ||
|
||
(usednteh & ~NTEHjmonitor) ||
|
||
anyiasm ||
|
||
usedalloca
|
||
)
|
||
needframe = 1;
|
||
}
|
||
if (refparam && (anyiasm || I16))
|
||
needframe = 1;
|
||
}
|
||
|
||
if (needframe)
|
||
{ assert(mfuncreg & mBP); // shouldn't have used mBP
|
||
|
||
if (!guessneedframe) // if guessed wrong
|
||
goto Lagain;
|
||
}
|
||
|
||
if (I16 && config.wflags & WFwindows && farfunc)
|
||
{ int wflags;
|
||
int segreg;
|
||
|
||
#if SCPP
|
||
// alloca() can't be because the 'special' parameter won't be at
|
||
// a known offset from BP.
|
||
if (usedalloca == 1)
|
||
synerr(EM_alloca_win); // alloca() can't be in Windows functions
|
||
#endif
|
||
|
||
wflags = config.wflags;
|
||
if (wflags & WFreduced && !(tyf & mTYexport))
|
||
{ // reduced prolog/epilog for non-exported functions
|
||
wflags &= ~(WFdgroup | WFds | WFss);
|
||
}
|
||
|
||
c = getregs(mAX);
|
||
assert(!c); /* should not have any value in AX */
|
||
|
||
switch (wflags & (WFdgroup | WFds | WFss))
|
||
{ case WFdgroup: // MOV AX,DGROUP
|
||
if (wflags & WFreduced)
|
||
tyf &= ~mTYloadds; // remove redundancy
|
||
c = genc(c,0xC7,modregrm(3,0,AX),0,0,FLdatseg,(targ_uns) 0);
|
||
c->Iflags ^= CFseg | CFoff; // turn off CFoff, on CFseg
|
||
break;
|
||
case WFss:
|
||
segreg = 2; // SS
|
||
goto Lmovax;
|
||
case WFds:
|
||
segreg = 3; // DS
|
||
Lmovax:
|
||
c = gen2(c,0x8C,modregrm(3,segreg,AX)); // MOV AX,segreg
|
||
if (wflags & WFds)
|
||
gen1(c,0x90); // NOP
|
||
break;
|
||
case 0:
|
||
break;
|
||
default:
|
||
#ifdef DEBUG
|
||
printf("config.wflags = x%x\n",config.wflags);
|
||
#endif
|
||
assert(0);
|
||
}
|
||
if (wflags & WFincbp)
|
||
c = gen1(c,0x40 + BP); // INC BP
|
||
c = gen1(c,0x50 + BP); // PUSH BP
|
||
genregs(c,0x8B,BP,SP); // MOV BP,SP
|
||
if (wflags & (WFsaveds | WFds | WFss | WFdgroup))
|
||
{ gen1(c,0x1E); // PUSH DS
|
||
pushds = TRUE;
|
||
BPoff = -REGSIZE;
|
||
}
|
||
if (wflags & (WFds | WFss | WFdgroup))
|
||
gen2(c,0x8E,modregrm(3,3,AX)); // MOV DS,AX
|
||
|
||
enter = FALSE; /* don't use ENTER instruction */
|
||
hasframe = 1; /* we have a stack frame */
|
||
}
|
||
else
|
||
if (needframe) // if variables or parameters
|
||
{
|
||
if (config.wflags & WFincbp && farfunc)
|
||
c = gen1(c,0x40 + BP); /* INC BP */
|
||
if (config.target_cpu < TARGET_80286 ||
|
||
config.exe & (EX_LINUX | EX_LINUX64 | EX_OSX | EX_OSX64 | EX_FREEBSD | EX_FREEBSD64 | EX_SOLARIS | EX_SOLARIS64) ||
|
||
!localsize ||
|
||
config.flags & CFGstack ||
|
||
(xlocalsize >= 0x1000 && config.exe & EX_flat) ||
|
||
localsize >= 0x10000 ||
|
||
#if NTEXCEPTIONS == 2
|
||
(usednteh & ~NTEHjmonitor && (config.flags2 & CFG2seh)) ||
|
||
#endif
|
||
(config.target_cpu >= TARGET_80386 &&
|
||
config.flags4 & CFG4speed)
|
||
)
|
||
{
|
||
c = gen1(c,0x50 + BP); // PUSH BP
|
||
genregs(c,0x8B,BP,SP); // MOV BP,SP
|
||
if (I64)
|
||
code_orrex(c, REX_W); // MOV RBP,RSP
|
||
#if ELFOBJ || MACHOBJ
|
||
if (config.fulltypes)
|
||
// Don't reorder instructions, as dwarf CFA relies on it
|
||
code_orflag(c, CFvolatile);
|
||
#endif
|
||
enter = FALSE; /* do not use ENTER instruction */
|
||
#if NTEXCEPTIONS == 2
|
||
if (usednteh & ~NTEHjmonitor && (config.flags2 & CFG2seh))
|
||
{
|
||
code *ce = nteh_prolog();
|
||
c = cat(c,ce);
|
||
int sz = nteh_contextsym_size();
|
||
assert(sz != 0); // should be 5*4, not 0
|
||
xlocalsize -= sz; // sz is already subtracted from ESP
|
||
// by nteh_prolog()
|
||
}
|
||
#endif
|
||
#if ELFOBJ || MACHOBJ
|
||
if (config.fulltypes)
|
||
{ int off = I64 ? 16 : 8;
|
||
dwarf_CFA_set_loc(1); // address after PUSH EBP
|
||
dwarf_CFA_set_reg_offset(SP, off); // CFA is now 8[ESP]
|
||
dwarf_CFA_offset(BP, -off); // EBP is at 0[ESP]
|
||
dwarf_CFA_set_loc(3); // address after MOV EBP,ESP
|
||
// Yes, I know the parameter is 8 when we mean 0!
|
||
// But this gets the cfa register set to EBP correctly
|
||
dwarf_CFA_set_reg_offset(BP, off); // CFA is now 0[EBP]
|
||
}
|
||
#endif
|
||
}
|
||
else
|
||
enter = TRUE;
|
||
hasframe = 1;
|
||
}
|
||
|
||
if (config.flags & CFGstack) /* if stack overflow check */
|
||
goto Ladjstack;
|
||
|
||
if (needframe) /* if variables or parameters */
|
||
{
|
||
if (xlocalsize) /* if any stack offset */
|
||
{
|
||
Ladjstack:
|
||
#if !TARGET_LINUX // seems that Linux doesn't need to fault in stack pages
|
||
if ((config.flags & CFGstack && !(I32 && xlocalsize < 0x1000)) // if stack overflow check
|
||
#if TARGET_WINDOS
|
||
|| (xlocalsize >= 0x1000 && config.exe & EX_flat)
|
||
#endif
|
||
)
|
||
{
|
||
if (I16)
|
||
{
|
||
// BUG: Won't work if parameter is passed in AX
|
||
c = movregconst(c,AX,xlocalsize,FALSE); // MOV AX,localsize
|
||
makeitextern(rtlsym[RTLSYM_CHKSTK]);
|
||
// CALL _chkstk
|
||
gencs(c,(LARGECODE) ? 0x9A : CALL,0,FLfunc,rtlsym[RTLSYM_CHKSTK]);
|
||
useregs((ALLREGS | mBP | mES) & ~rtlsym[RTLSYM_CHKSTK]->Sregsaved);
|
||
}
|
||
else
|
||
{
|
||
/* Watch out for 64 bit code where EDX is passed as a register parameter
|
||
*/
|
||
int reg = I64 ? R11 : DX; // scratch register
|
||
|
||
/* MOV EDX, xlocalsize/0x1000
|
||
* L1: SUB ESP, 0x1000
|
||
* TEST [ESP],ESP
|
||
* DEC EDX
|
||
* JNE L1
|
||
* SUB ESP, xlocalsize % 0x1000
|
||
*/
|
||
c = movregconst(c, reg, xlocalsize / 0x1000, FALSE);
|
||
code *csub = genc2(NULL,0x81,modregrm(3,5,SP),0x1000);
|
||
if (I64)
|
||
code_orrex(csub, REX_W);
|
||
code_orflag(csub, CFtarg2);
|
||
gen2sib(csub, 0x85, modregrm(0,SP,4),modregrm(0,4,SP));
|
||
if (I64)
|
||
{ gen2(csub, 0xFF, (REX_W << 16) | modregrmx(3,0,R11)); // DEC R11
|
||
genc2(csub,JNE,0,(targ_uns)-14);
|
||
}
|
||
else
|
||
{ gen1(csub, 0x48 + DX); // DEC EDX
|
||
genc2(csub,JNE,0,(targ_uns)-12);
|
||
}
|
||
regimmed_set(reg,0); // reg is now 0
|
||
genc2(csub,0x81,modregrm(3,5,SP),xlocalsize & 0xFFF);
|
||
if (I64)
|
||
code_orrex(csub, REX_W);
|
||
c = cat(c,csub);
|
||
useregs(mask[reg]);
|
||
}
|
||
}
|
||
else
|
||
#endif
|
||
{
|
||
if (enter)
|
||
{ // ENTER xlocalsize,0
|
||
c = genc(c,0xC8,0,FLconst,xlocalsize,FLconst,(targ_uns) 0);
|
||
#if ELFOBJ || MACHOBJ
|
||
assert(!config.fulltypes); // didn't emit Dwarf data
|
||
#endif
|
||
}
|
||
else if (xlocalsize == REGSIZE && config.flags4 & CFG4optimized)
|
||
{ c = gen1(c,0x50 + pushallocreg); // PUSH AX
|
||
// Do this to prevent an -x[EBP] to be moved in
|
||
// front of the push.
|
||
code_orflag(c,CFvolatile);
|
||
pushalloc = 1;
|
||
}
|
||
else
|
||
{ // SUB SP,xlocalsize
|
||
c = genc2(c,0x81,modregrm(3,5,SP),xlocalsize);
|
||
if (I64)
|
||
code_orrex(c, REX_W);
|
||
}
|
||
}
|
||
|
||
if (usedalloca)
|
||
{
|
||
// Set up magic parameter for alloca()
|
||
// MOV -REGSIZE[BP],localsize - BPoff
|
||
//c = genc(c,0xC7,modregrm(2,0,BPRM),FLconst,-REGSIZE,FLconst,localsize - BPoff);
|
||
c = genc(c,0xC7,modregrm(2,0,BPRM),
|
||
FLconst,AAoff + BPoff,
|
||
FLconst,localsize - BPoff);
|
||
if (I64)
|
||
code_orrex(c, REX_W);
|
||
}
|
||
}
|
||
else
|
||
assert(usedalloca == 0);
|
||
}
|
||
else if (xlocalsize)
|
||
{
|
||
assert(I32);
|
||
|
||
if (xlocalsize == REGSIZE)
|
||
{ c = gen1(c,0x50 + pushallocreg); // PUSH AX
|
||
pushalloc = 1;
|
||
}
|
||
else if (xlocalsize == 2 * REGSIZE)
|
||
{ c = gen1(c,0x50 + pushallocreg); // PUSH AX
|
||
gen1(c,0x50 + pushallocreg); // PUSH AX
|
||
pushalloc = 1;
|
||
}
|
||
else
|
||
{ // SUB ESP,xlocalsize
|
||
c = genc2(c,0x81,modregrm(3,5,SP),xlocalsize);
|
||
if (I64)
|
||
code_orrex(c, REX_W);
|
||
}
|
||
BPoff += REGSIZE;
|
||
}
|
||
else
|
||
assert((localsize | usedalloca) == 0 || (usednteh & NTEHjmonitor));
|
||
EBPtoESP += xlocalsize;
|
||
|
||
/* The idea is to generate trace for all functions if -Nc is not thrown.
|
||
* If -Nc is thrown, generate trace only for global COMDATs, because those
|
||
* are relevant to the FUNCTIONS statement in the linker .DEF file.
|
||
* This same logic should be in epilog().
|
||
*/
|
||
if (config.flags & CFGtrace &&
|
||
(!(config.flags4 & CFG4allcomdat) ||
|
||
funcsym_p->Sclass == SCcomdat ||
|
||
funcsym_p->Sclass == SCglobal ||
|
||
(config.flags2 & CFG2comdat && SymInline(funcsym_p))
|
||
)
|
||
)
|
||
{
|
||
if (STACKALIGN == 16 && npush)
|
||
{ /* This could be avoided by moving the function call to after the
|
||
* registers are saved. But I don't remember why the call is here
|
||
* and not there.
|
||
*/
|
||
c = genc2(c,0x81,modregrm(3,5,SP),npush * REGSIZE); // SUB ESP,npush * REGSIZE
|
||
if (I64)
|
||
code_orrex(c, REX_W);
|
||
}
|
||
|
||
symbol *s = rtlsym[farfunc ? RTLSYM_TRACE_PRO_F : RTLSYM_TRACE_PRO_N];
|
||
makeitextern(s);
|
||
c = gencs(c,I16 ? 0x9A : CALL,0,FLfunc,s); // CALL _trace
|
||
if (!I16)
|
||
code_orflag(c,CFoff | CFselfrel);
|
||
/* Embedding the function name inline after the call works, but it
|
||
* makes disassembling the code annoying.
|
||
*/
|
||
#if ELFOBJ || MACHOBJ
|
||
size_t len = strlen(funcsym_p->Sident);
|
||
char *buffer = (char *)malloc(len + 4);
|
||
assert(buffer);
|
||
if (len <= 254)
|
||
{ buffer[0] = len;
|
||
memcpy(buffer + 1, funcsym_p->Sident, len);
|
||
len++;
|
||
}
|
||
else
|
||
{ buffer[0] = 0xFF;
|
||
buffer[1] = 0;
|
||
buffer[2] = len & 0xFF;
|
||
buffer[3] = len >> 8;
|
||
memcpy(buffer + 4, funcsym_p->Sident, len);
|
||
len += 4;
|
||
}
|
||
genasm(c, buffer, len); // append func name
|
||
free(buffer);
|
||
#else
|
||
char name[IDMAX+IDOHD+1];
|
||
size_t len = obj_mangle(funcsym_p,name);
|
||
assert(len < sizeof(name));
|
||
genasm(c,name,len); // append func name
|
||
#endif
|
||
if (STACKALIGN == 16 && npush)
|
||
{
|
||
c = genc2(c,0x81,modregrm(3,0,SP),npush * REGSIZE); // ADD ESP,npush * REGSIZE
|
||
if (I64)
|
||
code_orrex(c, REX_W);
|
||
}
|
||
useregs((ALLREGS | mBP | mES) & ~s->Sregsaved);
|
||
}
|
||
|
||
#if MARS
|
||
if (usednteh & NTEHjmonitor)
|
||
{ Symbol *sthis;
|
||
|
||
for (si = 0; 1; si++)
|
||
{ assert(si < globsym.top);
|
||
sthis = globsym.tab[si];
|
||
if (strcmp(sthis->Sident,"this") == 0)
|
||
break;
|
||
}
|
||
c = cat(c,nteh_monitor_prolog(sthis));
|
||
EBPtoESP += 3 * 4;
|
||
}
|
||
#endif
|
||
|
||
while (topush) /* while registers to push */
|
||
{ reg = findreg(topush);
|
||
topush &= ~mask[reg];
|
||
c = gen1(c,0x50 + (reg & 7));
|
||
if (reg & 8)
|
||
code_orrex(c, REX_B);
|
||
EBPtoESP += REGSIZE;
|
||
#if ELFOBJ || MACHOBJ
|
||
if (config.fulltypes)
|
||
{ // Emit debug_frame data giving location of saved register
|
||
// relative to 0[EBP]
|
||
pinholeopt(c, NULL);
|
||
dwarf_CFA_set_loc(calcblksize(c)); // address after PUSH reg
|
||
dwarf_CFA_offset(reg, -EBPtoESP - REGSIZE);
|
||
}
|
||
#endif
|
||
}
|
||
|
||
Lcont:
|
||
|
||
/* Determine if we need to reload DS */
|
||
if (tyf & mTYloadds)
|
||
{ code *c1;
|
||
|
||
if (!pushds) // if not already pushed
|
||
c = gen1(c,0x1E); // PUSH DS
|
||
c1 = genc(CNIL,0xC7,modregrm(3,0,AX),0,0,FLdatseg,(targ_uns) 0); /* MOV AX,DGROUP */
|
||
c1->Iflags ^= CFseg | CFoff; /* turn off CFoff, on CFseg */
|
||
c = cat(c,c1);
|
||
gen2(c,0x8E,modregrm(3,3,AX)); /* MOV DS,AX */
|
||
useregs(mAX);
|
||
}
|
||
|
||
if (tym == TYifunc)
|
||
c = gen1(c,0xFC); // CLD
|
||
|
||
#if NTEXCEPTIONS == 2
|
||
if (usednteh & NTEH_except)
|
||
c = cat(c,nteh_setsp(0x89)); // MOV __context[EBP].esp,ESP
|
||
#endif
|
||
|
||
// Load register parameters off of the stack. Do not use
|
||
// assignaddr(), as it will replace the stack reference with
|
||
// the register!
|
||
for (si = 0; si < globsym.top; si++)
|
||
{ symbol *s = globsym.tab[si];
|
||
code *c2;
|
||
unsigned sz = type_size(s->Stype);
|
||
|
||
if ((s->Sclass == SCregpar || s->Sclass == SCparameter) &&
|
||
s->Sfl == FLreg &&
|
||
(refparam
|
||
#if MARS
|
||
// This variable has been reference by a nested function
|
||
|| s->Stype->Tty & mTYvolatile
|
||
#endif
|
||
))
|
||
{
|
||
/* MOV reg,param[BP] */
|
||
//assert(refparam);
|
||
if (mask[s->Sreglsw] & XMMREGS)
|
||
{
|
||
unsigned op = xmmload(s->Stype->Tty); // MOVSS/D xreg,mem
|
||
unsigned xreg = s->Sreglsw - XMM0;
|
||
code *c2 = genc1(CNIL,op,modregxrm(2,xreg,BPRM),FLconst,Poff + s->Soffset);
|
||
if (!hasframe)
|
||
{ // Convert to ESP relative address rather than EBP
|
||
c2->Irm = modregxrm(2,xreg,4);
|
||
c2->Isib = modregrm(0,4,SP);
|
||
c2->IEVpointer1 += EBPtoESP;
|
||
}
|
||
c = cat(c,c2);
|
||
}
|
||
else
|
||
{
|
||
code *c2 = genc1(CNIL,0x8B ^ (sz == 1),
|
||
modregxrm(2,s->Sreglsw,BPRM),FLconst,Poff + s->Soffset);
|
||
if (!I16 && sz == SHORTSIZE)
|
||
c2->Iflags |= CFopsize; // operand size
|
||
if (I64 && sz >= REGSIZE)
|
||
c2->Irex |= REX_W;
|
||
if (!hasframe)
|
||
{ /* Convert to ESP relative address rather than EBP */
|
||
assert(!I16);
|
||
c2->Irm = modregxrm(2,s->Sreglsw,4);
|
||
c2->Isib = modregrm(0,4,SP);
|
||
c2->IEVpointer1 += EBPtoESP;
|
||
}
|
||
if (sz > REGSIZE)
|
||
{
|
||
code *c3 = genc1(CNIL,0x8B,
|
||
modregxrm(2,s->Sregmsw,BPRM),FLconst,Poff + s->Soffset + REGSIZE);
|
||
if (I64)
|
||
c3->Irex |= REX_W;
|
||
if (!hasframe)
|
||
{ /* Convert to ESP relative address rather than EBP */
|
||
assert(!I16);
|
||
c3->Irm = modregxrm(2,s->Sregmsw,4);
|
||
c3->Isib = modregrm(0,4,SP);
|
||
c3->IEVpointer1 += EBPtoESP;
|
||
}
|
||
c2 = cat(c2,c3);
|
||
}
|
||
c = cat(c,c2);
|
||
}
|
||
}
|
||
else if (s->Sclass == SCfastpar)
|
||
{ // Argument is passed in a register
|
||
unsigned preg = s->Spreg;
|
||
|
||
namedargs |= mask[preg];
|
||
|
||
if (s->Sfl == FLreg)
|
||
{ // MOV reg,preg
|
||
if (mask[preg] & XMMREGS)
|
||
{
|
||
unsigned op = xmmload(s->Stype->Tty); // MOVSS/D xreg,preg
|
||
unsigned xreg = s->Sreglsw - XMM0;
|
||
c = gen2(c,op,modregxrmx(3,xreg,preg - XMM0));
|
||
}
|
||
else
|
||
{
|
||
c = genmovreg(c,s->Sreglsw,preg);
|
||
if (I64 && sz == 8)
|
||
code_orrex(c, REX_W);
|
||
}
|
||
}
|
||
else if (s->Sflags & SFLdead ||
|
||
(!anyiasm && !(s->Sflags & SFLread) && s->Sflags & SFLunambig &&
|
||
#if MARS
|
||
// This variable has been reference by a nested function
|
||
!(s->Stype->Tty & mTYvolatile) &&
|
||
#endif
|
||
(config.flags4 & CFG4optimized || !config.fulltypes)))
|
||
{
|
||
// Ignore it, as it is never referenced
|
||
;
|
||
}
|
||
else
|
||
{
|
||
targ_size_t offset = Aoff + BPoff + s->Soffset;
|
||
int op = 0x89; // MOV x[EBP],preg
|
||
if (preg >= XMM0 && preg <= XMM15)
|
||
{
|
||
op = xmmstore(s->Stype->Tty);
|
||
}
|
||
if (hasframe)
|
||
{
|
||
if (!(pushalloc && preg == pushallocreg))
|
||
{
|
||
// MOV x[EBP],preg
|
||
c2 = genc1(CNIL,op,
|
||
modregxrm(2,preg,BPRM),FLconst, offset);
|
||
if (preg >= XMM0 && preg <= XMM15)
|
||
{
|
||
}
|
||
else
|
||
{
|
||
//printf("%s Aoff = %d, BPoff = %d, Soffset = %d, sz = %d\n", s->Sident, (int)Aoff, (int)BPoff, (int)s->Soffset, (int)sz);
|
||
// if (offset & 2)
|
||
// c2->Iflags |= CFopsize;
|
||
if (I64 && sz == 8)
|
||
code_orrex(c2, REX_W);
|
||
}
|
||
c = cat(c, c2);
|
||
}
|
||
}
|
||
else
|
||
{
|
||
offset += EBPtoESP;
|
||
if (!(pushalloc && preg == pushallocreg))
|
||
{
|
||
// MOV offset[ESP],preg
|
||
// BUG: byte size?
|
||
c2 = genc1(CNIL,op,
|
||
(modregrm(0,4,SP) << 8) |
|
||
modregxrm(2,preg,4),FLconst,offset);
|
||
if (preg >= XMM0 && preg <= XMM15)
|
||
{
|
||
}
|
||
else
|
||
{
|
||
if (I64 && sz == 8)
|
||
c2->Irex |= REX_W;
|
||
// if (offset & 2)
|
||
// c2->Iflags |= CFopsize;
|
||
}
|
||
c = cat(c,c2);
|
||
}
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
/* Load arguments passed in registers into the varargs save area
|
||
* so they can be accessed by va_arg().
|
||
*/
|
||
if (I64 && variadic(funcsym_p->Stype))
|
||
{
|
||
/* Look for __va_argsave
|
||
*/
|
||
symbol *sv = NULL;
|
||
for (SYMIDX si = 0; si < globsym.top; si++)
|
||
{ symbol *s = globsym.tab[si];
|
||
if (s->Sident[0] == '_' && strcmp(s->Sident, "__va_argsave") == 0)
|
||
{ sv = s;
|
||
break;
|
||
}
|
||
}
|
||
|
||
if (sv && !(sv->Sflags & SFLdead))
|
||
{
|
||
/* Generate code to move any arguments passed in registers into
|
||
* the stack variable __va_argsave,
|
||
* so we can reference it via pointers through va_arg().
|
||
* struct __va_argsave_t {
|
||
* size_t[6] regs;
|
||
* real[8] fpregs;
|
||
* uint offset_regs;
|
||
* uint offset_fpregs;
|
||
* void* stack_args;
|
||
* void* reg_args;
|
||
* }
|
||
* The MOVAPS instructions seg fault if data is not aligned on
|
||
* 16 bytes, so this gives us a nice check to ensure no mistakes.
|
||
MOV voff+0*8[RBP],EDI
|
||
MOV voff+1*8[RBP],ESI
|
||
MOV voff+2*8[RBP],RDX
|
||
MOV voff+3*8[RBP],RCX
|
||
MOV voff+4*8[RBP],R8
|
||
MOV voff+5*8[RBP],R9
|
||
MOVZX EAX,AL // AL = 0..8, # of XMM registers used
|
||
SHL EAX,2 // 4 bytes for each MOVAPS
|
||
LEA RDX,offset L2[RIP]
|
||
SUB RDX,RAX
|
||
LEA RAX,voff+6*8+0x7F[RBP]
|
||
JMP EDX
|
||
MOVAPS -0x0F[RAX],XMM7 // only save XMM registers if actually used
|
||
MOVAPS -0x1F[RAX],XMM6
|
||
MOVAPS -0x2F[RAX],XMM5
|
||
MOVAPS -0x3F[RAX],XMM4
|
||
MOVAPS -0x4F[RAX],XMM3
|
||
MOVAPS -0x5F[RAX],XMM2
|
||
MOVAPS -0x6F[RAX],XMM1
|
||
MOVAPS -0x7F[RAX],XMM0
|
||
L2:
|
||
MOV 1[RAX],offset_regs // set __va_argsave.offset_regs
|
||
MOV 5[RAX],offset_fpregs // set __va_argsave.offset_fpregs
|
||
LEA RDX, Poff+Poffset[RBP]
|
||
MOV 9[RAX],RDX // set __va_argsave.stack_args
|
||
SUB RAX,6*8+0x7F // point to start of __va_argsave
|
||
MOV 6*8+8*16+4+4+8[RAX],RAX // set __va_argsave.reg_args
|
||
*/
|
||
targ_size_t voff = Aoff + BPoff + sv->Soffset; // EBP offset of start of sv
|
||
const int vregnum = 6;
|
||
const unsigned vsize = vregnum * 8 + 8 * 16;
|
||
code *cv = CNIL;
|
||
|
||
static unsigned char regs[vregnum] = { DI,SI,DX,CX,R8,R9 };
|
||
|
||
if (!hasframe)
|
||
voff += EBPtoESP;
|
||
for (int i = 0; i < vregnum; i++)
|
||
{
|
||
unsigned r = regs[i];
|
||
if (!(mask[r] & namedargs)) // named args are already dealt with
|
||
{ unsigned ea = (REX_W << 16) | modregxrm(2,r,BPRM);
|
||
if (!hasframe)
|
||
ea = (REX_W << 16) | (modregrm(0,4,SP) << 8) | modregxrm(2,r,4);
|
||
cv = genc1(cv,0x89,ea,FLconst,voff + i*8);
|
||
}
|
||
}
|
||
|
||
cv = genregs(cv,0x0FB6,AX,AX); // MOVZX EAX,AL
|
||
genc2(cv,0xC1,modregrm(3,4,AX),2); // SHL EAX,2
|
||
int raxoff = voff+6*8+0x7F;
|
||
unsigned L2offset = (raxoff < -0x7F) ? 0x2C : 0x29;
|
||
if (!hasframe)
|
||
L2offset += 1; // +1 for sib byte
|
||
// LEA RDX,offset L2[RIP]
|
||
genc1(cv,0x8D,(REX_W << 16) | modregrm(0,DX,5),FLconst,L2offset);
|
||
genregs(cv,0x29,AX,DX); // SUB RDX,RAX
|
||
code_orrex(cv, REX_W);
|
||
// LEA RAX,voff+vsize-6*8-16+0x7F[RBP]
|
||
unsigned ea = (REX_W << 16) | modregrm(2,AX,BPRM);
|
||
if (!hasframe)
|
||
// add sib byte for [RSP] addressing
|
||
ea = (REX_W << 16) | (modregrm(0,4,SP) << 8) | modregxrm(2,AX,4);
|
||
genc1(cv,0x8D,ea,FLconst,raxoff);
|
||
gen2(cv,0xFF,modregrm(3,4,DX)); // JMP EDX
|
||
for (int i = 0; i < 8; i++)
|
||
{
|
||
// MOVAPS -15-16*i[RAX],XMM7-i
|
||
genc1(cv,0x0F29,modregrm(0,XMM7-i,0),FLconst,-15-16*i);
|
||
}
|
||
|
||
/* Compute offset_regs and offset_fpregs
|
||
*/
|
||
unsigned offset_regs = 0;
|
||
unsigned offset_fpregs = vregnum * 8;
|
||
for (int i = AX; i <= XMM7; i++)
|
||
{ regm_t m = mask[i];
|
||
if (m & namedargs)
|
||
{
|
||
if (m & (mDI|mSI|mDX|mCX|mR8|mR9))
|
||
offset_regs += 8;
|
||
else if (m & XMMREGS)
|
||
offset_fpregs += 16;
|
||
namedargs &= ~m;
|
||
if (!namedargs)
|
||
break;
|
||
}
|
||
}
|
||
// MOV 1[RAX],offset_regs
|
||
genc(cv,0xC7,modregrm(2,0,AX),FLconst,1,FLconst,offset_regs);
|
||
|
||
// MOV 5[RAX],offset_fpregs
|
||
genc(cv,0xC7,modregrm(2,0,AX),FLconst,5,FLconst,offset_fpregs);
|
||
|
||
// LEA RDX, Poff+Poffset[RBP]
|
||
ea = modregrm(2,DX,BPRM);
|
||
if (!hasframe)
|
||
ea = (modregrm(0,4,SP) << 8) | modregrm(2,DX,4);
|
||
Poffset = (Poffset + (REGSIZE - 1)) & ~(REGSIZE - 1);
|
||
genc1(cv,0x8D,(REX_W << 16) | ea,FLconst,Poff + Poffset);
|
||
|
||
// MOV 9[RAX],RDX
|
||
genc1(cv,0x89,(REX_W << 16) | modregrm(2,DX,AX),FLconst,9);
|
||
|
||
// SUB RAX,6*8+0x7F // point to start of __va_argsave
|
||
genc2(cv,0x2D,0,6*8+0x7F);
|
||
code_orrex(cv, REX_W);
|
||
|
||
// MOV 6*8+8*16+4+4+8[RAX],RAX // set __va_argsave.reg_args
|
||
genc1(cv,0x89,(REX_W << 16) | modregrm(2,AX,AX),FLconst,6*8+8*16+4+4+8);
|
||
|
||
pinholeopt(cv, NULL);
|
||
useregs(mDX|mAX);
|
||
c = cat(c,cv);
|
||
}
|
||
}
|
||
|
||
#if 0 && TARGET_LINUX
|
||
if (gotref)
|
||
{ // position independent reference
|
||
c = cat(c, cod3_load_got());
|
||
}
|
||
#endif
|
||
|
||
return c;
|
||
}
|
||
|
||
/*******************************
|
||
* Generate and return function epilog.
|
||
* Output:
|
||
* retsize Size of function epilog
|
||
*/
|
||
|
||
static targ_size_t spoff;
|
||
|
||
void epilog(block *b)
|
||
{ code *c;
|
||
code *cr;
|
||
code *ce;
|
||
code *cpopds;
|
||
unsigned reg;
|
||
unsigned regx; // register that's not a return reg
|
||
regm_t topop,regm;
|
||
tym_t tyf,tym;
|
||
int op;
|
||
char farfunc;
|
||
targ_size_t xlocalsize = localsize;
|
||
|
||
c = CNIL;
|
||
ce = b->Bcode;
|
||
tyf = funcsym_p->ty();
|
||
tym = tybasic(tyf);
|
||
farfunc = tyfarfunc(tym);
|
||
if (!(b->Bflags & BFLepilog)) // if no epilog code
|
||
goto Lret; // just generate RET
|
||
regx = (b->BC == BCret) ? AX : CX;
|
||
|
||
spoff = 0;
|
||
retsize = 0;
|
||
|
||
if (tyf & mTYnaked) // if no prolog/epilog
|
||
return;
|
||
|
||
if (tym == TYifunc)
|
||
{ static unsigned char ops2[] = { 0x07,0x1F,0x61,0xCF,0 };
|
||
static unsigned char ops0[] = { 0x07,0x1F,0x5F,0x5E,
|
||
0x5D,0x5B,0x5B,0x5A,
|
||
0x59,0x58,0xCF,0 };
|
||
unsigned char *p;
|
||
|
||
c = genregs(c,0x8B,SP,BP); // MOV SP,BP
|
||
p = (config.target_cpu >= TARGET_80286) ? ops2 : ops0;
|
||
do
|
||
gen1(c,*p);
|
||
while (*++p);
|
||
goto Lopt;
|
||
}
|
||
|
||
if (config.flags & CFGtrace &&
|
||
(!(config.flags4 & CFG4allcomdat) ||
|
||
funcsym_p->Sclass == SCcomdat ||
|
||
funcsym_p->Sclass == SCglobal ||
|
||
(config.flags2 & CFG2comdat && SymInline(funcsym_p))
|
||
)
|
||
)
|
||
{
|
||
symbol *s = rtlsym[farfunc ? RTLSYM_TRACE_EPI_F : RTLSYM_TRACE_EPI_N];
|
||
makeitextern(s);
|
||
c = gencs(c,I16 ? 0x9A : CALL,0,FLfunc,s); // CALLF _trace
|
||
if (!I16)
|
||
code_orflag(c,CFoff | CFselfrel);
|
||
useregs((ALLREGS | mBP | mES) & ~s->Sregsaved);
|
||
}
|
||
|
||
if (usednteh & ~NTEHjmonitor && (config.exe == EX_NT || MARS))
|
||
c = cat(c,nteh_epilog());
|
||
|
||
cpopds = CNIL;
|
||
if (tyf & mTYloadds)
|
||
{ cpopds = gen1(cpopds,0x1F); // POP DS
|
||
c = cat(c,cpopds);
|
||
spoff += intsize;
|
||
}
|
||
|
||
/* Pop all the general purpose registers saved on the stack
|
||
* by the prolog code. Remember to do them in the reverse
|
||
* order they were pushed.
|
||
*/
|
||
reg = I64 ? R15 : DI;
|
||
regm = 1 << reg;
|
||
topop = fregsaved & ~mfuncreg;
|
||
#ifdef DEBUG
|
||
if (topop & ~0xFFFF)
|
||
printf("fregsaved = x%x, mfuncreg = x%x\n",fregsaved,mfuncreg);
|
||
#endif
|
||
assert(!(topop & ~0xFFFF));
|
||
while (topop)
|
||
{ if (topop & regm)
|
||
{ c = gen1(c,0x58 + (reg & 7)); // POP reg
|
||
if (reg & 8)
|
||
code_orrex(c, REX_B);
|
||
topop &= ~regm;
|
||
spoff += REGSIZE;
|
||
}
|
||
regm >>= 1;
|
||
reg--;
|
||
}
|
||
|
||
#if MARS
|
||
if (usednteh & NTEHjmonitor)
|
||
{
|
||
regm_t retregs = 0;
|
||
if (b->BC == BCretexp)
|
||
retregs = regmask(b->Belem->Ety, tym);
|
||
code *cn = nteh_monitor_epilog(retregs);
|
||
c = cat(c,cn);
|
||
xlocalsize += 8;
|
||
}
|
||
#endif
|
||
|
||
if (config.wflags & WFwindows && farfunc)
|
||
{
|
||
int wflags = config.wflags;
|
||
if (wflags & WFreduced && !(tyf & mTYexport))
|
||
{ // reduced prolog/epilog for non-exported functions
|
||
wflags &= ~(WFdgroup | WFds | WFss);
|
||
if (!(wflags & WFsaveds))
|
||
goto L4;
|
||
}
|
||
|
||
if (localsize | usedalloca)
|
||
{
|
||
c = genc1(c,0x8D,modregrm(1,SP,6),FLconst,(targ_uns)-2); /* LEA SP,-2[BP] */
|
||
}
|
||
if (wflags & (WFsaveds | WFds | WFss | WFdgroup))
|
||
{ if (cpopds)
|
||
cpopds->Iop = NOP; // don't need previous one
|
||
c = gen1(c,0x1F); // POP DS
|
||
}
|
||
c = gen1(c,0x58 + BP); // POP BP
|
||
if (config.wflags & WFincbp)
|
||
gen1(c,0x48 + BP); // DEC BP
|
||
assert(hasframe);
|
||
}
|
||
else
|
||
{
|
||
if (needframe || (xlocalsize && hasframe))
|
||
{
|
||
L4:
|
||
assert(hasframe);
|
||
if (xlocalsize | usedalloca)
|
||
{ if (config.target_cpu >= TARGET_80286 &&
|
||
!(config.target_cpu >= TARGET_80386 &&
|
||
config.flags4 & CFG4speed)
|
||
)
|
||
c = gen1(c,0xC9); // LEAVE
|
||
else if (0 && xlocalsize == REGSIZE && !usedalloca && I32)
|
||
{ // This doesn't work - I should figure out why
|
||
mfuncreg &= ~mask[regx];
|
||
c = gen1(c,0x58 + regx); // POP regx
|
||
c = gen1(c,0x58 + BP); // POP BP
|
||
}
|
||
else
|
||
{ c = genregs(c,0x8B,SP,BP); // MOV SP,BP
|
||
if (I64)
|
||
code_orrex(c, REX_W); // MOV RSP,RBP
|
||
c = gen1(c,0x58 + BP); // POP BP
|
||
}
|
||
}
|
||
else
|
||
c = gen1(c,0x58 + BP); // POP BP
|
||
if (config.wflags & WFincbp && farfunc)
|
||
gen1(c,0x48 + BP); // DEC BP
|
||
}
|
||
else if (xlocalsize == REGSIZE && (!I16 || b->BC == BCret))
|
||
{ mfuncreg &= ~mask[regx];
|
||
c = gen1(c,0x58 + regx); // POP regx
|
||
}
|
||
else if (xlocalsize)
|
||
{
|
||
c = genc2(c,0x81,modregrm(3,0,SP),xlocalsize); // ADD SP,xlocalsize
|
||
if (I64)
|
||
code_orrex(c, REX_W);
|
||
}
|
||
}
|
||
if (b->BC == BCret || b->BC == BCretexp)
|
||
{
|
||
Lret:
|
||
op = tyfarfunc(tym) ? 0xCA : 0xC2;
|
||
if (tym == TYhfunc)
|
||
{
|
||
c = genc2(c,0xC2,0,4); // RET 4
|
||
}
|
||
else if (!typfunc(tym) || // if caller cleans the stack
|
||
Poffset == 0) // or nothing pushed on the stack anyway
|
||
{ op++; // to a regular RET
|
||
c = gen1(c,op);
|
||
}
|
||
else
|
||
{ // Stack is always aligned on register size boundary
|
||
Poffset = (Poffset + (REGSIZE - 1)) & ~(REGSIZE - 1);
|
||
c = genc2(c,op,0,Poffset); // RET Poffset
|
||
}
|
||
}
|
||
|
||
Lopt:
|
||
// If last instruction in ce is ADD SP,imm, and first instruction
|
||
// in c sets SP, we can dump the ADD.
|
||
cr = code_last(ce);
|
||
if (cr && c && !I64)
|
||
{
|
||
if (cr->Iop == 0x81 && cr->Irm == modregrm(3,0,SP)) // if ADD SP,imm
|
||
{
|
||
if (
|
||
c->Iop == 0xC9 || // LEAVE
|
||
(c->Iop == 0x8B && c->Irm == modregrm(3,SP,BP)) || // MOV SP,BP
|
||
(c->Iop == 0x8D && c->Irm == modregrm(1,SP,6)) // LEA SP,-imm[BP]
|
||
)
|
||
cr->Iop = NOP;
|
||
else if (c->Iop == 0x58 + BP) // if POP BP
|
||
{ cr->Iop = 0x8B;
|
||
cr->Irm = modregrm(3,SP,BP); // MOV SP,BP
|
||
}
|
||
}
|
||
#if 0 // These optimizations don't work if the called function
|
||
// cleans off the stack.
|
||
else if (c->Iop == 0xC3 && cr->Iop == CALL) // CALL near
|
||
{ cr->Iop = 0xE9; // JMP near
|
||
c->Iop = NOP;
|
||
}
|
||
else if (c->Iop == 0xCB && cr->Iop == 0x9A) // CALL far
|
||
{ cr->Iop = 0xEA; // JMP far
|
||
c->Iop = NOP;
|
||
}
|
||
#endif
|
||
}
|
||
|
||
retsize += calcblksize(c); // compute size of function epilog
|
||
b->Bcode = cat(ce,c);
|
||
}
|
||
|
||
/*******************************
|
||
* Return offset of SP from BP.
|
||
*/
|
||
|
||
targ_size_t cod3_spoff()
|
||
{
|
||
return spoff + localsize;
|
||
}
|
||
|
||
/**********************************
|
||
* Load value of _GLOBAL_OFFSET_TABLE_ into EBX
|
||
*/
|
||
|
||
code *cod3_load_got()
|
||
{
|
||
#if TARGET_LINUX || TARGET_OSX || TARGET_FREEBSD || TARGET_OPENBSD || TARGET_SOLARIS
|
||
code *c;
|
||
code *cgot;
|
||
|
||
c = genc2(NULL,CALL,0,0); // CALL L1
|
||
gen1(c, 0x58 + BX); // L1: POP EBX
|
||
|
||
// ADD EBX,_GLOBAL_OFFSET_TABLE_+3
|
||
symbol *gotsym = elfobj_getGOTsym();
|
||
cgot = gencs(CNIL,0x81,0xC3,FLextern,gotsym);
|
||
cgot->Iflags = CFoff;
|
||
cgot->IEVoffset2 = 3;
|
||
|
||
makeitextern(gotsym);
|
||
return cat(c,cgot);
|
||
#else
|
||
assert(0);
|
||
return NULL;
|
||
#endif
|
||
}
|
||
|
||
code* gen_spill_reg(Symbol* s, bool toreg)
|
||
{
|
||
code *c;
|
||
code cs;
|
||
regm_t keepmsk = toreg ? RMload : RMstore;
|
||
int sz = type_size(s->Stype);
|
||
|
||
elem* e = el_var(s); // so we can trick getlvalue() into working for us
|
||
|
||
if (mask[s->Sreglsw] & XMMREGS)
|
||
{ // Convert to save/restore of XMM register
|
||
if (toreg)
|
||
cs.Iop = xmmload(s->Stype->Tty); // MOVSS/D xreg,mem
|
||
else
|
||
cs.Iop = xmmstore(s->Stype->Tty); // MOVSS/D mem,xreg
|
||
c = getlvalue(&cs,e,keepmsk);
|
||
cs.orReg(s->Sreglsw - XMM0);
|
||
c = gen(c,&cs);
|
||
}
|
||
else
|
||
{
|
||
cs.Iop = toreg ? 0x8B : 0x89; // MOV reg,mem[ESP] : MOV mem[ESP],reg
|
||
cs.Iop ^= (sz == 1);
|
||
c = getlvalue(&cs,e,keepmsk);
|
||
cs.orReg(s->Sreglsw);
|
||
if (I64 && sz == 1 && s->Sreglsw >= 4)
|
||
cs.Irex |= REX;
|
||
c = gen(c,&cs);
|
||
if (sz > REGSIZE)
|
||
{
|
||
cs.setReg(s->Sregmsw);
|
||
getlvalue_msw(&cs);
|
||
c = gen(c,&cs);
|
||
}
|
||
}
|
||
|
||
el_free(e);
|
||
|
||
return c;
|
||
}
|
||
|
||
/****************************
|
||
* Generate code for, and output a thunk.
|
||
* Input:
|
||
* thisty Type of this pointer
|
||
* p ESP parameter offset to this pointer
|
||
* d offset to add to 'this' pointer
|
||
* d2 offset from 'this' to vptr
|
||
* i offset into vtbl[]
|
||
*/
|
||
|
||
void cod3_thunk(symbol *sthunk,symbol *sfunc,unsigned p,tym_t thisty,
|
||
targ_size_t d,int i,targ_size_t d2)
|
||
{ code *c,*c1;
|
||
targ_size_t thunkoffset;
|
||
tym_t thunkty;
|
||
|
||
cod3_align();
|
||
|
||
/* Skip over return address */
|
||
thunkty = tybasic(sthunk->ty());
|
||
#if TARGET_SEGMENTED
|
||
if (tyfarfunc(thunkty))
|
||
p += I32 ? 8 : tysize[TYfptr]; /* far function */
|
||
else
|
||
#endif
|
||
p += tysize[TYnptr];
|
||
|
||
if (!I16)
|
||
{
|
||
/*
|
||
Generate:
|
||
ADD p[ESP],d
|
||
For direct call:
|
||
JMP sfunc
|
||
For virtual call:
|
||
MOV EAX, p[ESP] EAX = this
|
||
MOV EAX, d2[EAX] EAX = this->vptr
|
||
JMP i[EAX] jump to virtual function
|
||
*/
|
||
unsigned reg = 0;
|
||
if ((targ_ptrdiff_t)d < 0)
|
||
{
|
||
d = -d;
|
||
reg = 5; // switch from ADD to SUB
|
||
}
|
||
if (thunkty == TYmfunc)
|
||
{ // ADD ECX,d
|
||
c = CNIL;
|
||
if (d)
|
||
c = genc2(c,0x81,modregrm(3,reg,CX),d);
|
||
}
|
||
else if (thunkty == TYjfunc || (I64 && thunkty == TYnfunc))
|
||
{ // ADD EAX,d
|
||
c = CNIL;
|
||
if (d)
|
||
c = genc2(c,0x81,modregrm(3,reg,I64 ? DI : AX),d);
|
||
}
|
||
else
|
||
{
|
||
c = genc(CNIL,0x81,modregrm(2,reg,4),
|
||
FLconst,p, // to this
|
||
FLconst,d); // ADD p[ESP],d
|
||
c->Isib = modregrm(0,4,SP);
|
||
}
|
||
if (I64 && c)
|
||
c->Irex |= REX_W;
|
||
}
|
||
else
|
||
{
|
||
/*
|
||
Generate:
|
||
MOV BX,SP
|
||
ADD [SS:] p[BX],d
|
||
For direct call:
|
||
JMP sfunc
|
||
For virtual call:
|
||
MOV BX, p[BX] BX = this
|
||
MOV BX, d2[BX] BX = this->vptr
|
||
JMP i[BX] jump to virtual function
|
||
*/
|
||
|
||
|
||
c = genregs(CNIL,0x89,SP,BX); /* MOV BX,SP */
|
||
c1 = genc(CNIL,0x81,modregrm(2,0,7),
|
||
FLconst,p, /* to this */
|
||
FLconst,d); /* ADD p[BX],d */
|
||
if (config.wflags & WFssneds ||
|
||
// If DS needs reloading from SS,
|
||
// then assume SS != DS on thunk entry
|
||
(config.wflags & WFss && LARGEDATA))
|
||
c1->Iflags |= CFss; /* SS: */
|
||
c = cat(c,c1);
|
||
}
|
||
|
||
if ((i & 0xFFFF) != 0xFFFF) /* if virtual call */
|
||
{ code *c2,*c3;
|
||
|
||
#define FARTHIS (tysize(thisty) > REGSIZE)
|
||
#define FARVPTR FARTHIS
|
||
|
||
#if TARGET_SEGMENTED
|
||
assert(thisty != TYvptr); /* can't handle this case */
|
||
#endif
|
||
|
||
if (!I16)
|
||
{
|
||
assert(!FARTHIS && !LARGECODE);
|
||
if (thunkty == TYmfunc) // if 'this' is in ECX
|
||
{ c1 = CNIL;
|
||
|
||
// MOV EAX,d2[ECX]
|
||
c2 = genc1(CNIL,0x8B,modregrm(2,AX,CX),FLconst,d2);
|
||
}
|
||
else if (thunkty == TYjfunc) // if 'this' is in EAX
|
||
{ c1 = CNIL;
|
||
|
||
// MOV EAX,d2[EAX]
|
||
c2 = genc1(CNIL,0x8B,modregrm(2,AX,AX),FLconst,d2);
|
||
}
|
||
else
|
||
{
|
||
// MOV EAX,p[ESP]
|
||
c1 = genc1(CNIL,0x8B,(modregrm(0,4,SP) << 8) | modregrm(2,AX,4),FLconst,(targ_uns) p);
|
||
if (I64)
|
||
c1->Irex |= REX_W;
|
||
|
||
// MOV EAX,d2[EAX]
|
||
c2 = genc1(CNIL,0x8B,modregrm(2,AX,AX),FLconst,d2);
|
||
}
|
||
if (I64)
|
||
code_orrex(c2, REX_W);
|
||
/* JMP i[EAX] */
|
||
c3 = genc1(CNIL,0xFF,modregrm(2,4,0),FLconst,(targ_uns) i);
|
||
}
|
||
else
|
||
{
|
||
/* MOV/LES BX,[SS:] p[BX] */
|
||
c1 = genc1(CNIL,(FARTHIS ? 0xC4 : 0x8B),modregrm(2,BX,7),FLconst,(targ_uns) p);
|
||
if (config.wflags & WFssneds ||
|
||
// If DS needs reloading from SS,
|
||
// then assume SS != DS on thunk entry
|
||
(config.wflags & WFss && LARGEDATA))
|
||
c1->Iflags |= CFss; /* SS: */
|
||
|
||
/* MOV/LES BX,[ES:]d2[BX] */
|
||
c2 = genc1(CNIL,(FARVPTR ? 0xC4 : 0x8B),modregrm(2,BX,7),FLconst,d2);
|
||
if (FARTHIS)
|
||
c2->Iflags |= CFes; /* ES: */
|
||
|
||
/* JMP i[BX] */
|
||
c3 = genc1(CNIL,0xFF,modregrm(2,(LARGECODE ? 5 : 4),7),FLconst,(targ_uns) i);
|
||
if (FARVPTR)
|
||
c3->Iflags |= CFes; /* ES: */
|
||
}
|
||
c = cat4(c,c1,c2,c3);
|
||
}
|
||
else
|
||
{
|
||
c1 = gencs(CNIL,(LARGECODE ? 0xEA : 0xE9),0,FLfunc,sfunc); /* JMP sfunc */
|
||
c1->Iflags |= LARGECODE ? (CFseg | CFoff) : (CFselfrel | CFoff);
|
||
c = cat(c,c1);
|
||
}
|
||
|
||
thunkoffset = Coffset;
|
||
pinholeopt(c,NULL);
|
||
codout(c);
|
||
code_free(c);
|
||
|
||
sthunk->Soffset = thunkoffset;
|
||
sthunk->Ssize = Coffset - thunkoffset; /* size of thunk */
|
||
sthunk->Sseg = cseg;
|
||
#if TARGET_LINUX || TARGET_OSX || TARGET_FREEBSD || TARGET_OPENBSD || TARGET_SOLARIS
|
||
objpubdef(cseg,sthunk,sthunk->Soffset);
|
||
#endif
|
||
searchfixlist(sthunk); /* resolve forward refs */
|
||
}
|
||
|
||
/*****************************
|
||
* Assume symbol s is extern.
|
||
*/
|
||
|
||
void makeitextern(symbol *s)
|
||
{
|
||
if (s->Sxtrnnum == 0)
|
||
{ s->Sclass = SCextern; /* external */
|
||
/*printf("makeitextern(x%x)\n",s);*/
|
||
objextern(s);
|
||
}
|
||
}
|
||
|
||
|
||
/*******************************
|
||
* Replace JMPs in Bgotocode with JMP SHORTs whereever possible.
|
||
* This routine depends on FLcode jumps to only be forward
|
||
* referenced.
|
||
* BFLjmpoptdone is set to TRUE if nothing more can be done
|
||
* with this block.
|
||
* Input:
|
||
* flag !=0 means don't have correct Boffsets yet
|
||
* Returns:
|
||
* number of bytes saved
|
||
*/
|
||
|
||
int branch(block *bl,int flag)
|
||
{ int bytesaved;
|
||
code *c,*cn,*ct;
|
||
targ_size_t offset,disp;
|
||
targ_size_t csize;
|
||
|
||
if (!flag)
|
||
bl->Bflags |= BFLjmpoptdone; // assume this will be all
|
||
c = bl->Bcode;
|
||
if (!c)
|
||
return 0;
|
||
bytesaved = 0;
|
||
offset = bl->Boffset; /* offset of start of block */
|
||
while (1)
|
||
{ unsigned char op;
|
||
|
||
csize = calccodsize(c);
|
||
cn = code_next(c);
|
||
op = c->Iop;
|
||
if ((op & ~0x0F) == 0x70 && c->Iflags & CFjmp16 ||
|
||
op == JMP)
|
||
{
|
||
L1:
|
||
switch (c->IFL2)
|
||
{
|
||
case FLblock:
|
||
if (flag) // no offsets yet, don't optimize
|
||
goto L3;
|
||
disp = c->IEV2.Vblock->Boffset - offset - csize;
|
||
|
||
/* If this is a forward branch, and there is an aligned
|
||
* block intervening, it is possible that shrinking
|
||
* the jump instruction will cause it to be out of
|
||
* range of the target. This happens if the alignment
|
||
* prevents the target block from moving correspondingly
|
||
* closer.
|
||
*/
|
||
if (disp >= 0x7F-4 && c->IEV2.Vblock->Boffset > offset)
|
||
{ /* Look for intervening alignment
|
||
*/
|
||
for (block *b = bl->Bnext; b; b = b->Bnext)
|
||
{
|
||
if (b->Balign)
|
||
{
|
||
bl->Bflags &= ~BFLjmpoptdone; // some JMPs left
|
||
goto L3;
|
||
}
|
||
if (b == c->IEV2.Vblock)
|
||
break;
|
||
}
|
||
}
|
||
|
||
break;
|
||
|
||
case FLcode:
|
||
{ code *cr;
|
||
|
||
disp = 0;
|
||
|
||
ct = c->IEV2.Vcode; /* target of branch */
|
||
assert(ct->Iflags & (CFtarg | CFtarg2));
|
||
for (cr = cn; cr; cr = code_next(cr))
|
||
{
|
||
if (cr == ct)
|
||
break;
|
||
disp += calccodsize(cr);
|
||
}
|
||
|
||
if (!cr)
|
||
{ // Didn't find it in forward search. Try backwards jump
|
||
int s = 0;
|
||
disp = 0;
|
||
for (cr = bl->Bcode; cr != cn; cr = code_next(cr))
|
||
{
|
||
assert(cr != NULL); // must have found it
|
||
if (cr == ct)
|
||
s = 1;
|
||
if (s)
|
||
disp += calccodsize(cr);
|
||
}
|
||
}
|
||
|
||
if (config.flags4 & CFG4optimized && !flag)
|
||
{
|
||
/* Propagate branch forward past junk */
|
||
while (1)
|
||
{ if (ct->Iop == NOP ||
|
||
ct->Iop == (ESCAPE | ESClinnum))
|
||
{ ct = code_next(ct);
|
||
if (!ct)
|
||
goto L2;
|
||
}
|
||
else
|
||
{ c->IEV2.Vcode = ct;
|
||
ct->Iflags |= CFtarg;
|
||
break;
|
||
}
|
||
}
|
||
|
||
/* And eliminate jmps to jmps */
|
||
if ((op == ct->Iop || ct->Iop == JMP) &&
|
||
(op == JMP || c->Iflags & CFjmp16))
|
||
{ c->IFL2 = ct->IFL2;
|
||
c->IEV2.Vcode = ct->IEV2.Vcode;
|
||
/*printf("eliminating branch\n");*/
|
||
goto L1;
|
||
}
|
||
L2: ;
|
||
}
|
||
}
|
||
break;
|
||
|
||
default:
|
||
goto L3;
|
||
}
|
||
|
||
if (disp == 0) // bra to next instruction
|
||
{ bytesaved += csize;
|
||
c->Iop = NOP; // del branch instruction
|
||
c->IEV2.Vcode = NULL;
|
||
c = cn;
|
||
if (!c)
|
||
break;
|
||
continue;
|
||
}
|
||
else if ((targ_size_t)(targ_schar)(disp - 2) == (disp - 2) &&
|
||
(targ_size_t)(targ_schar)disp == disp)
|
||
{
|
||
if (op == JMP)
|
||
{ c->Iop = JMPS; // JMP SHORT
|
||
bytesaved += I16 ? 1 : 3;
|
||
}
|
||
else // else Jcond
|
||
{ c->Iflags &= ~CFjmp16; // a branch is ok
|
||
bytesaved += I16 ? 3 : 4;
|
||
|
||
// Replace a cond jump around a call to a function that
|
||
// never returns with a cond jump to that function.
|
||
if (config.flags4 & CFG4optimized &&
|
||
config.target_cpu >= TARGET_80386 &&
|
||
disp == (I16 ? 3 : 5) &&
|
||
cn &&
|
||
cn->Iop == CALL &&
|
||
cn->IFL2 == FLfunc &&
|
||
cn->IEVsym2->Sflags & SFLexit &&
|
||
!(cn->Iflags & (CFtarg | CFtarg2))
|
||
)
|
||
{
|
||
cn->Iop = 0x0F00 | ((c->Iop & 0x0F) ^ 0x81);
|
||
c->Iop = NOP;
|
||
c->IEV2.Vcode = NULL;
|
||
bytesaved++;
|
||
|
||
// If nobody else points to ct, we can remove the CFtarg
|
||
if (flag && ct)
|
||
{ code *cx;
|
||
|
||
for (cx = bl->Bcode; 1; cx = code_next(cx))
|
||
{
|
||
if (!cx)
|
||
{ ct->Iflags &= ~CFtarg;
|
||
break;
|
||
}
|
||
if (cx->IEV2.Vcode == ct)
|
||
break;
|
||
}
|
||
}
|
||
}
|
||
}
|
||
csize = calccodsize(c);
|
||
}
|
||
else
|
||
bl->Bflags &= ~BFLjmpoptdone; // some JMPs left
|
||
}
|
||
L3:
|
||
if (cn)
|
||
{ offset += csize;
|
||
c = cn;
|
||
}
|
||
else
|
||
break;
|
||
}
|
||
//printf("bytesaved = x%x\n",bytesaved);
|
||
return bytesaved;
|
||
}
|
||
|
||
/************************************************
|
||
* Adjust all Soffset's of stack variables so they
|
||
* are all relative to the frame pointer.
|
||
*/
|
||
|
||
#if MARS
|
||
|
||
void cod3_adjSymOffsets()
|
||
{ SYMIDX si;
|
||
|
||
//printf("cod3_adjSymOffsets()\n");
|
||
for (si = 0; si < globsym.top; si++)
|
||
{ //printf("globsym.tab[%d] = %p\n",si,globsym.tab[si]);
|
||
symbol *s = globsym.tab[si];
|
||
|
||
switch (s->Sclass)
|
||
{
|
||
case SCparameter:
|
||
case SCregpar:
|
||
//printf("s = '%s', Soffset = x%x, Poff = x%x, EBPtoESP = x%x\n", s->Sident, s->Soffset, Poff, EBPtoESP);
|
||
s->Soffset += Poff;
|
||
if (0 && !(funcsym_p->Sfunc->Fflags3 & Fmember))
|
||
{
|
||
if (!hasframe)
|
||
s->Soffset += EBPtoESP;
|
||
if (funcsym_p->Sfunc->Fflags3 & Fnested)
|
||
s->Soffset += REGSIZE;
|
||
}
|
||
break;
|
||
case SCauto:
|
||
case SCfastpar:
|
||
case SCregister:
|
||
case_auto:
|
||
//printf("s = '%s', Soffset = x%x, Aoff = x%x, BPoff = x%x EBPtoESP = x%x\n", s->Sident, s->Soffset, Aoff, BPoff, EBPtoESP);
|
||
// if (!(funcsym_p->Sfunc->Fflags3 & Fnested))
|
||
s->Soffset += Aoff + BPoff;
|
||
break;
|
||
case SCbprel:
|
||
break;
|
||
default:
|
||
continue;
|
||
}
|
||
#if 0
|
||
if (!hasframe)
|
||
s->Soffset += EBPtoESP;
|
||
#endif
|
||
}
|
||
}
|
||
|
||
#endif
|
||
|
||
/*******************************
|
||
* Take symbol info in union ev and replace it with a real address
|
||
* in Vpointer.
|
||
*/
|
||
|
||
void assignaddr(block *bl)
|
||
{
|
||
int EBPtoESPsave = EBPtoESP;
|
||
int hasframesave = hasframe;
|
||
|
||
if (bl->Bflags & BFLoutsideprolog)
|
||
{ EBPtoESP = -REGSIZE;
|
||
hasframe = 0;
|
||
}
|
||
assignaddrc(bl->Bcode);
|
||
hasframe = hasframesave;
|
||
EBPtoESP = EBPtoESPsave;
|
||
}
|
||
|
||
void assignaddrc(code *c)
|
||
{
|
||
int sn;
|
||
symbol *s;
|
||
unsigned char ins,rm;
|
||
targ_size_t soff;
|
||
targ_size_t base;
|
||
|
||
base = EBPtoESP;
|
||
for (; c; c = code_next(c))
|
||
{
|
||
#ifdef DEBUG
|
||
if (0)
|
||
{ printf("assignaddrc()\n");
|
||
c->print();
|
||
}
|
||
if (code_next(c) && code_next(code_next(c)) == c)
|
||
assert(0);
|
||
#endif
|
||
if (c->Iflags & CFvex)
|
||
ins = vex_inssize(c);
|
||
else if ((c->Iop & 0xFFFD00) == 0x0F3800)
|
||
ins = inssize2[(c->Iop >> 8) & 0xFF];
|
||
else if ((c->Iop & 0xFF00) == 0x0F00)
|
||
ins = inssize2[c->Iop & 0xFF];
|
||
else if ((c->Iop & 0xFF) == ESCAPE)
|
||
{
|
||
if (c->Iop == (ESCAPE | ESCadjesp))
|
||
{
|
||
//printf("adjusting EBPtoESP (%d) by %ld\n",EBPtoESP,c->IEV2.Vint);
|
||
EBPtoESP += c->IEV1.Vint;
|
||
c->Iop = NOP;
|
||
}
|
||
if (c->Iop == (ESCAPE | ESCframeptr))
|
||
{ // Convert to load of frame pointer
|
||
// c->Irm is the register to use
|
||
if (hasframe)
|
||
{ // MOV reg,EBP
|
||
c->Iop = 0x89;
|
||
if (c->Irm & 8)
|
||
c->Irex |= REX_B;
|
||
c->Irm = modregrm(3,BP,c->Irm & 7);
|
||
}
|
||
else
|
||
{ // LEA reg,EBPtoESP[ESP]
|
||
c->Iop = 0x8D;
|
||
if (c->Irm & 8)
|
||
c->Irex |= REX_R;
|
||
c->Irm = modregrm(2,c->Irm & 7,4);
|
||
c->Isib = modregrm(0,4,SP);
|
||
c->Iflags = CFoff;
|
||
c->IFL1 = FLconst;
|
||
c->IEV1.Vuns = EBPtoESP;
|
||
}
|
||
}
|
||
if (I64)
|
||
c->Irex |= REX_W;
|
||
continue;
|
||
}
|
||
else
|
||
ins = inssize[c->Iop & 0xFF];
|
||
if (!(ins & M) ||
|
||
((rm = c->Irm) & 0xC0) == 0xC0)
|
||
goto do2; /* if no first operand */
|
||
if (is32bitaddr(I32,c->Iflags))
|
||
{
|
||
|
||
if (
|
||
((rm & 0xC0) == 0 && !((rm & 7) == 4 && (c->Isib & 7) == 5 || (rm & 7) == 5))
|
||
)
|
||
goto do2; /* if no first operand */
|
||
}
|
||
else
|
||
{
|
||
if (
|
||
((rm & 0xC0) == 0 && !((rm & 7) == 6))
|
||
)
|
||
goto do2; /* if no first operand */
|
||
}
|
||
s = c->IEVsym1;
|
||
switch (c->IFL1)
|
||
{
|
||
#if OMFOBJ
|
||
case FLdata:
|
||
if (s->Sclass == SCcomdat)
|
||
{ c->IFL1 = FLextern;
|
||
goto do2;
|
||
}
|
||
#if MARS
|
||
c->IEVseg1 = s->Sseg;
|
||
#else
|
||
c->IEVseg1 = DATA;
|
||
#endif
|
||
c->IEVpointer1 += s->Soffset;
|
||
c->IFL1 = FLdatseg;
|
||
goto do2;
|
||
case FLudata:
|
||
#if MARS
|
||
c->IEVseg1 = s->Sseg;
|
||
#else
|
||
c->IEVseg1 = UDATA;
|
||
#endif
|
||
c->IEVpointer1 += s->Soffset;
|
||
c->IFL1 = FLdatseg;
|
||
goto do2;
|
||
#else // don't loose symbol information
|
||
case FLdata:
|
||
case FLudata:
|
||
case FLtlsdata:
|
||
c->IFL1 = FLextern;
|
||
goto do2;
|
||
#endif
|
||
case FLdatseg:
|
||
c->IEVseg1 = DATA;
|
||
goto do2;
|
||
|
||
#if TARGET_SEGMENTED
|
||
case FLfardata:
|
||
case FLcsdata:
|
||
#endif
|
||
case FLpseudo:
|
||
goto do2;
|
||
|
||
case FLstack:
|
||
//printf("Soffset = %d, EBPtoESP = %d, base = %d, pointer = %d\n",
|
||
//s->Soffset,EBPtoESP,base,c->IEVpointer1);
|
||
c->IEVpointer1 += s->Soffset + EBPtoESP - base - EEoffset;
|
||
break;
|
||
|
||
case FLreg:
|
||
case FLauto:
|
||
soff = Aoff;
|
||
L1:
|
||
if (s->Sflags & SFLunambig && !(s->Sflags & SFLread) && // if never loaded
|
||
!anyiasm &&
|
||
// if not optimized, leave it in for debuggability
|
||
(config.flags4 & CFG4optimized || !config.fulltypes))
|
||
{ c->Iop = NOP; // remove references to it
|
||
continue;
|
||
}
|
||
if (s->Sfl == FLreg && c->IEVpointer1 < 2)
|
||
{ int reg = s->Sreglsw;
|
||
|
||
assert(!(s->Sregm & ~mask[reg]));
|
||
if (c->IEVpointer1 == 1)
|
||
{ assert(reg < 4); /* must be a BYTEREGS */
|
||
reg |= 4; /* convert to high byte reg */
|
||
}
|
||
if (reg & 8)
|
||
{ assert(I64);
|
||
c->Irex |= REX_B;
|
||
reg &= 7;
|
||
}
|
||
c->Irm = (c->Irm & modregrm(0,7,0))
|
||
| modregrm(3,0,reg);
|
||
assert(c->Iop != LES && c->Iop != LEA);
|
||
goto do2;
|
||
}
|
||
else
|
||
{ c->IEVpointer1 += s->Soffset + soff + BPoff;
|
||
if (s->Sflags & SFLunambig)
|
||
c->Iflags |= CFunambig;
|
||
L2:
|
||
if (!hasframe)
|
||
{ /* Convert to ESP relative address instead of EBP */
|
||
unsigned char rm;
|
||
|
||
assert(!I16);
|
||
c->IEVpointer1 += EBPtoESP;
|
||
rm = c->Irm;
|
||
if ((rm & 7) == 4) // if SIB byte
|
||
{
|
||
assert((c->Isib & 7) == BP);
|
||
assert((rm & 0xC0) != 0);
|
||
c->Isib = (c->Isib & ~7) | modregrm(0,0,SP);
|
||
}
|
||
else
|
||
{
|
||
assert((rm & 7) == 5);
|
||
c->Irm = (rm & modregrm(0,7,0))
|
||
| modregrm(2,0,4);
|
||
c->Isib = modregrm(0,4,SP);
|
||
}
|
||
}
|
||
}
|
||
break;
|
||
case FLpara:
|
||
soff = Poff - BPoff; // cancel out add of BPoff
|
||
goto L1;
|
||
case FLtmp:
|
||
soff = Toff;
|
||
goto L1;
|
||
case FLfltreg:
|
||
c->IEVpointer1 += Foff + BPoff;
|
||
c->Iflags |= CFunambig;
|
||
goto L2;
|
||
case FLallocatmp:
|
||
c->IEVpointer1 += AAoff + BPoff;
|
||
goto L2;
|
||
case FLbprel:
|
||
c->IEVpointer1 += s->Soffset;
|
||
break;
|
||
case FLcs:
|
||
sn = c->IEV1.Vuns;
|
||
if (!CSE_loaded(sn)) // if never loaded
|
||
{ c->Iop = NOP;
|
||
continue;
|
||
}
|
||
c->IEVpointer1 = sn * REGSIZE + CSoff + BPoff;
|
||
c->Iflags |= CFunambig;
|
||
goto L2;
|
||
case FLregsave:
|
||
sn = c->IEV1.Vuns;
|
||
c->IEVpointer1 = sn + regsave.off + BPoff;
|
||
c->Iflags |= CFunambig;
|
||
goto L2;
|
||
case FLndp:
|
||
#if MARS
|
||
assert(c->IEV1.Vuns < NDP::savetop);
|
||
#endif
|
||
c->IEVpointer1 = c->IEV1.Vuns * NDPSAVESIZE + NDPoff + BPoff;
|
||
c->Iflags |= CFunambig;
|
||
goto L2;
|
||
case FLoffset:
|
||
break;
|
||
case FLlocalsize:
|
||
c->IEVpointer1 += localsize;
|
||
break;
|
||
case FLconst:
|
||
default:
|
||
goto do2;
|
||
}
|
||
c->IFL1 = FLconst;
|
||
do2:
|
||
/* Ignore TEST (F6 and F7) opcodes */
|
||
if (!(ins & T)) goto done; /* if no second operand */
|
||
s = c->IEVsym2;
|
||
switch (c->IFL2)
|
||
{
|
||
#if ELFOBJ || MACHOBJ
|
||
case FLdata:
|
||
case FLudata:
|
||
case FLtlsdata:
|
||
c->IFL2 = FLextern;
|
||
goto do2;
|
||
#else
|
||
case FLdata:
|
||
if (s->Sclass == SCcomdat)
|
||
{ c->IFL2 = FLextern;
|
||
goto do2;
|
||
}
|
||
#if MARS
|
||
c->IEVseg2 = s->Sseg;
|
||
#else
|
||
c->IEVseg2 = DATA;
|
||
#endif
|
||
c->IEVpointer2 += s->Soffset;
|
||
c->IFL2 = FLdatseg;
|
||
goto done;
|
||
case FLudata:
|
||
#if MARS
|
||
c->IEVseg2 = s->Sseg;
|
||
#else
|
||
c->IEVseg2 = UDATA;
|
||
#endif
|
||
c->IEVpointer2 += s->Soffset;
|
||
c->IFL2 = FLdatseg;
|
||
goto done;
|
||
#endif
|
||
case FLdatseg:
|
||
c->IEVseg2 = DATA;
|
||
goto done;
|
||
#if TARGET_SEGMENTED
|
||
case FLcsdata:
|
||
case FLfardata:
|
||
goto done;
|
||
#endif
|
||
case FLreg:
|
||
case FLpseudo:
|
||
assert(0);
|
||
/* NOTREACHED */
|
||
case FLauto:
|
||
c->IEVpointer2 += s->Soffset + Aoff + BPoff;
|
||
break;
|
||
case FLpara:
|
||
c->IEVpointer2 += s->Soffset + Poff;
|
||
break;
|
||
case FLtmp:
|
||
c->IEVpointer2 += s->Soffset + Toff + BPoff;
|
||
break;
|
||
case FLfltreg:
|
||
c->IEVpointer2 += Foff + BPoff;
|
||
break;
|
||
case FLallocatmp:
|
||
c->IEVpointer2 += AAoff + BPoff;
|
||
break;
|
||
case FLbprel:
|
||
c->IEVpointer2 += s->Soffset;
|
||
break;
|
||
|
||
case FLstack:
|
||
c->IEVpointer2 += s->Soffset + EBPtoESP - base;
|
||
break;
|
||
|
||
case FLcs:
|
||
case FLndp:
|
||
case FLregsave:
|
||
assert(0);
|
||
/* NOTREACHED */
|
||
|
||
case FLconst:
|
||
break;
|
||
|
||
case FLlocalsize:
|
||
c->IEVpointer2 += localsize;
|
||
break;
|
||
|
||
default:
|
||
goto done;
|
||
}
|
||
c->IFL2 = FLconst;
|
||
done:
|
||
;
|
||
}
|
||
}
|
||
|
||
/*******************************
|
||
* Return offset from BP of symbol s.
|
||
*/
|
||
|
||
targ_size_t cod3_bpoffset(symbol *s)
|
||
{ targ_size_t offset;
|
||
|
||
symbol_debug(s);
|
||
offset = s->Soffset;
|
||
switch (s->Sfl)
|
||
{
|
||
case FLpara:
|
||
offset += Poff;
|
||
break;
|
||
case FLauto:
|
||
offset += Aoff + BPoff;
|
||
break;
|
||
case FLtmp:
|
||
offset += Toff + BPoff;
|
||
break;
|
||
default:
|
||
#ifdef DEBUG
|
||
WRFL((enum FL)s->Sfl);
|
||
symbol_print(s);
|
||
#endif
|
||
assert(0);
|
||
}
|
||
assert(hasframe);
|
||
return offset;
|
||
}
|
||
|
||
|
||
/*******************************
|
||
* Find shorter versions of the same instructions.
|
||
* Does these optimizations:
|
||
* replaces jmps to the next instruction with NOPs
|
||
* sign extension of modregrm displacement
|
||
* sign extension of immediate data (can't do it for OR, AND, XOR
|
||
* as the opcodes are not defined)
|
||
* short versions for AX EA
|
||
* short versions for reg EA
|
||
* Input:
|
||
* b -> block for code (or NULL)
|
||
*/
|
||
|
||
void pinholeopt(code *c,block *b)
|
||
{ targ_size_t a;
|
||
unsigned op,mod;
|
||
unsigned char ins;
|
||
int usespace;
|
||
int useopsize;
|
||
int space;
|
||
block *bn;
|
||
|
||
#ifdef DEBUG
|
||
static int tested; if (!tested) { tested++; pinholeopt_unittest(); }
|
||
#endif
|
||
|
||
#if 0
|
||
code *cstart = c;
|
||
if (debugc)
|
||
{
|
||
printf("+pinholeopt(%p)\n",c);
|
||
}
|
||
#endif
|
||
|
||
if (b)
|
||
{ bn = b->Bnext;
|
||
usespace = (config.flags4 & CFG4space && b->BC != BCasm);
|
||
useopsize = (I16 || (config.flags4 & CFG4space && b->BC != BCasm));
|
||
}
|
||
else
|
||
{ bn = NULL;
|
||
usespace = (config.flags4 & CFG4space);
|
||
useopsize = (I16 || config.flags4 & CFG4space);
|
||
}
|
||
for (; c; c = code_next(c))
|
||
{
|
||
L1:
|
||
op = c->Iop;
|
||
if (c->Iflags & CFvex)
|
||
ins = vex_inssize(c);
|
||
else if ((op & 0xFFFD00) == 0x0F3800)
|
||
ins = inssize2[(op >> 8) & 0xFF];
|
||
else if ((op & 0xFF00) == 0x0F00)
|
||
ins = inssize2[op & 0xFF];
|
||
else
|
||
ins = inssize[op & 0xFF];
|
||
if (ins & M) // if modregrm byte
|
||
{ int shortop = (c->Iflags & CFopsize) ? !I16 : I16;
|
||
int local_BPRM = BPRM;
|
||
|
||
if (c->Iflags & CFaddrsize)
|
||
local_BPRM ^= 5 ^ 6; // toggle between 5 and 6
|
||
|
||
unsigned rm = c->Irm;
|
||
unsigned reg = rm & modregrm(0,7,0); // isolate reg field
|
||
unsigned ereg = rm & 7;
|
||
//printf("c = %p, op = %02x rm = %02x\n", c, op, rm);
|
||
|
||
/* If immediate second operand */
|
||
if ((ins & T ||
|
||
((op == 0xF6 || op == 0xF7) && (reg < modregrm(0,2,0) || reg > modregrm(0,3,0)))
|
||
) &&
|
||
c->IFL2 == FLconst)
|
||
{
|
||
int flags = c->Iflags & CFpsw; /* if want result in flags */
|
||
targ_long u = c->IEV2.Vuns;
|
||
if (ins & E)
|
||
u = (signed char) u;
|
||
else if (shortop)
|
||
u = (short) u;
|
||
|
||
// Replace CMP reg,0 with TEST reg,reg
|
||
if ((op & 0xFE) == 0x80 && // 80 is CMP R8,imm8; 81 is CMP reg,imm
|
||
rm >= modregrm(3,7,AX) &&
|
||
u == 0)
|
||
{ c->Iop = (op & 1) | 0x84;
|
||
c->Irm = modregrm(3,ereg,ereg);
|
||
if (c->Irex & REX_B)
|
||
c->Irex |= REX_R;
|
||
goto L1;
|
||
}
|
||
|
||
/* Optimize ANDs with an immediate constant */
|
||
if ((op == 0x81 || op == 0x80) && reg == modregrm(0,4,0))
|
||
{
|
||
if (rm >= modregrm(3,4,AX)) // AND reg,imm
|
||
{
|
||
if (u == 0)
|
||
{ /* Replace with XOR reg,reg */
|
||
c->Iop = 0x30 | (op & 1);
|
||
c->Irm = modregrm(3,ereg,ereg);
|
||
if (c->Irex & REX_B)
|
||
c->Irex |= REX_R;
|
||
goto L1;
|
||
}
|
||
if (u == 0xFFFFFFFF && !flags)
|
||
{ c->Iop = NOP;
|
||
goto L1;
|
||
}
|
||
}
|
||
if (op == 0x81 && !flags)
|
||
{ // If we can do the operation in one byte
|
||
|
||
// If EA is not SI or DI
|
||
if ((rm < modregrm(3,4,SP) || I64) &&
|
||
(config.flags4 & CFG4space ||
|
||
config.target_cpu < TARGET_PentiumPro)
|
||
)
|
||
{
|
||
if ((u & 0xFFFFFF00) == 0xFFFFFF00)
|
||
goto L2;
|
||
else if (rm < modregrm(3,0,0) || (!c->Irex && ereg < 4))
|
||
{ if (!shortop)
|
||
{ if ((u & 0xFFFF00FF) == 0xFFFF00FF)
|
||
goto L3;
|
||
}
|
||
else
|
||
{
|
||
if ((u & 0xFF) == 0xFF)
|
||
goto L3;
|
||
}
|
||
}
|
||
}
|
||
if (!shortop && useopsize)
|
||
{
|
||
if ((u & 0xFFFF0000) == 0xFFFF0000)
|
||
{ c->Iflags ^= CFopsize;
|
||
goto L1;
|
||
}
|
||
if ((u & 0xFFFF) == 0xFFFF && rm < modregrm(3,4,AX))
|
||
{ c->IEVoffset1 += 2; /* address MSW */
|
||
c->IEV2.Vuns >>= 16;
|
||
c->Iflags ^= CFopsize;
|
||
goto L1;
|
||
}
|
||
if (rm >= modregrm(3,4,AX))
|
||
{
|
||
if (u == 0xFF && (rm <= modregrm(3,4,BX) || I64))
|
||
{ c->Iop = 0x0FB6; // MOVZX
|
||
c->Irm = modregrm(3,ereg,ereg);
|
||
if (c->Irex & REX_B)
|
||
c->Irex |= REX_R;
|
||
goto L1;
|
||
}
|
||
if (u == 0xFFFF)
|
||
{ c->Iop = 0x0FB7; // MOVZX
|
||
c->Irm = modregrm(3,ereg,ereg);
|
||
if (c->Irex & REX_B)
|
||
c->Irex |= REX_R;
|
||
goto L1;
|
||
}
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
/* Look for ADD,OR,SUB,XOR with u that we can eliminate */
|
||
if (!flags &&
|
||
(op == 0x81 || op == 0x80) &&
|
||
(reg == modregrm(0,0,0) || reg == modregrm(0,1,0) || // ADD,OR
|
||
reg == modregrm(0,5,0) || reg == modregrm(0,6,0)) // SUB, XOR
|
||
)
|
||
{
|
||
if (u == 0)
|
||
{
|
||
c->Iop = NOP;
|
||
goto L1;
|
||
}
|
||
if (u == ~0 && reg == modregrm(0,6,0)) /* XOR */
|
||
{
|
||
c->Iop = 0xF6 | (op & 1); /* NOT */
|
||
c->Irm ^= modregrm(0,6^2,0);
|
||
goto L1;
|
||
}
|
||
if (!shortop &&
|
||
useopsize &&
|
||
op == 0x81 &&
|
||
(u & 0xFFFF0000) == 0 &&
|
||
(reg == modregrm(0,6,0) || reg == modregrm(0,1,0)))
|
||
{ c->Iflags ^= CFopsize;
|
||
goto L1;
|
||
}
|
||
}
|
||
|
||
/* Look for TEST or OR or XOR with an immediate constant */
|
||
/* that we can replace with a byte operation */
|
||
if (op == 0xF7 && reg == modregrm(0,0,0) ||
|
||
op == 0x81 && reg == modregrm(0,6,0) && !flags ||
|
||
op == 0x81 && reg == modregrm(0,1,0))
|
||
{
|
||
// See if we can replace a dword with a word
|
||
// (avoid for 32 bit instructions, because CFopsize
|
||
// is too slow)
|
||
if (!shortop && useopsize)
|
||
{ if ((u & 0xFFFF0000) == 0)
|
||
{ c->Iflags ^= CFopsize;
|
||
goto L1;
|
||
}
|
||
/* If memory (not register) addressing mode */
|
||
if ((u & 0xFFFF) == 0 && rm < modregrm(3,0,AX))
|
||
{ c->IEVoffset1 += 2; /* address MSW */
|
||
c->IEV2.Vuns >>= 16;
|
||
c->Iflags ^= CFopsize;
|
||
goto L1;
|
||
}
|
||
}
|
||
|
||
// If EA is not SI or DI
|
||
if (rm < (modregrm(3,0,SP) | reg) &&
|
||
(usespace ||
|
||
config.target_cpu < TARGET_PentiumPro)
|
||
)
|
||
{
|
||
if ((u & 0xFFFFFF00) == 0)
|
||
{
|
||
L2: c->Iop--; /* to byte instruction */
|
||
c->Iflags &= ~CFopsize;
|
||
goto L1;
|
||
}
|
||
if (((u & 0xFFFF00FF) == 0 ||
|
||
(shortop && (u & 0xFF) == 0)) &&
|
||
(rm < modregrm(3,0,0) || (!c->Irex && ereg < 4)))
|
||
{
|
||
L3:
|
||
c->IEV2.Vuns >>= 8;
|
||
if (rm >= (modregrm(3,0,AX) | reg))
|
||
c->Irm |= 4; /* AX->AH, BX->BH, etc. */
|
||
else
|
||
c->IEVoffset1 += 1;
|
||
goto L2;
|
||
}
|
||
}
|
||
#if 0
|
||
// BUG: which is right?
|
||
else if ((u & 0xFFFF0000) == 0)
|
||
#else
|
||
else if (0 && op == 0xF7 &&
|
||
rm >= modregrm(3,0,SP) &&
|
||
(u & 0xFFFF0000) == 0)
|
||
#endif
|
||
c->Iflags &= ~CFopsize;
|
||
}
|
||
|
||
// Try to replace TEST reg,-1 with TEST reg,reg
|
||
if (op == 0xF6 && rm >= modregrm(3,0,AX) && rm <= modregrm(3,0,7)) // TEST regL,immed8
|
||
{ if ((u & 0xFF) == 0xFF)
|
||
{
|
||
L4: c->Iop = 0x84; // TEST regL,regL
|
||
c->Irm = modregrm(3,ereg,ereg);
|
||
if (c->Irex & REX_B)
|
||
c->Irex |= REX_R;
|
||
c->Iflags &= ~CFopsize;
|
||
goto L1;
|
||
}
|
||
}
|
||
if (op == 0xF7 && rm >= modregrm(3,0,AX) && rm <= modregrm(3,0,7) && (I64 || ereg < 4))
|
||
{ if (u == 0xFF)
|
||
goto L4;
|
||
if ((u & 0xFFFF) == 0xFF00 && shortop && !c->Irex && ereg < 4)
|
||
{ ereg |= 4; /* to regH */
|
||
goto L4;
|
||
}
|
||
}
|
||
|
||
/* Look for sign extended immediate data */
|
||
if ((signed char) u == u)
|
||
{
|
||
if (op == 0x81)
|
||
{ if (reg != 0x08 && reg != 0x20 && reg != 0x30)
|
||
c->Iop = op = 0x83; /* 8 bit sgn ext */
|
||
}
|
||
else if (op == 0x69) /* IMUL rw,ew,dw */
|
||
c->Iop = op = 0x6B; /* IMUL rw,ew,db */
|
||
}
|
||
|
||
// Look for SHIFT EA,imm8 we can replace with short form
|
||
if (u == 1 && ((op & 0xFE) == 0xC0))
|
||
c->Iop |= 0xD0;
|
||
|
||
} /* if immediate second operand */
|
||
|
||
/* Look for AX short form */
|
||
if (ins & A)
|
||
{ if (rm == modregrm(0,AX,local_BPRM) &&
|
||
!(c->Irex & REX_R) && // and it's AX, not R8
|
||
(op & ~3) == 0x88 &&
|
||
!I64)
|
||
{ op = ((op & 3) + 0xA0) ^ 2;
|
||
/* 8A-> A0 */
|
||
/* 8B-> A1 */
|
||
/* 88-> A2 */
|
||
/* 89-> A3 */
|
||
c->Iop = op;
|
||
c->IFL2 = c->IFL1;
|
||
c->IEV2 = c->IEV1;
|
||
}
|
||
|
||
/* Replace MOV REG1,REG2 with MOV EREG1,EREG2 */
|
||
else if (!I16 &&
|
||
(op == 0x89 || op == 0x8B) &&
|
||
(rm & 0xC0) == 0xC0 &&
|
||
(!b || b->BC != BCasm)
|
||
)
|
||
c->Iflags &= ~CFopsize;
|
||
|
||
// If rm is AX
|
||
else if ((rm & modregrm(3,0,7)) == modregrm(3,0,AX) && !(c->Irex & (REX_R | REX_B)))
|
||
{ switch (op)
|
||
{ case 0x80: op = reg | 4; break;
|
||
case 0x81: op = reg | 5; break;
|
||
case 0x87: op = 0x90 + (reg>>3); break; // XCHG
|
||
case 0xF6:
|
||
if (reg == 0)
|
||
op = 0xA8; /* TEST AL,immed8 */
|
||
break;
|
||
case 0xF7:
|
||
if (reg == 0)
|
||
op = 0xA9; /* TEST AX,immed16 */
|
||
break;
|
||
}
|
||
c->Iop = op;
|
||
}
|
||
}
|
||
|
||
/* Look for reg short form */
|
||
if ((ins & R) && (rm & 0xC0) == 0xC0)
|
||
{ switch (op)
|
||
{ case 0xC6: op = 0xB0 + ereg; break;
|
||
case 0xC7: op = 0xB8 + ereg; break;
|
||
case 0xFF:
|
||
switch (reg)
|
||
{ case 6<<3: op = 0x50+ereg; break;/* PUSH*/
|
||
case 0<<3: if (!I64) op = 0x40+ereg; break; /* INC*/
|
||
case 1<<3: if (!I64) op = 0x48+ereg; break; /* DEC*/
|
||
}
|
||
break;
|
||
case 0x8F: op = 0x58 + ereg; break;
|
||
case 0x87:
|
||
if (reg == 0) op = 0x90 + ereg;
|
||
break;
|
||
}
|
||
c->Iop = op;
|
||
}
|
||
|
||
// Look to replace SHL reg,1 with ADD reg,reg
|
||
if ((op & ~1) == 0xD0 &&
|
||
(rm & modregrm(3,7,0)) == modregrm(3,4,0) &&
|
||
config.target_cpu >= TARGET_80486)
|
||
{
|
||
c->Iop &= 1;
|
||
c->Irm = (rm & modregrm(3,0,7)) | (ereg << 3);
|
||
if (c->Irex & REX_B)
|
||
c->Irex |= REX_R;
|
||
if (!(c->Iflags & CFpsw) && !I16)
|
||
c->Iflags &= ~CFopsize;
|
||
goto L1;
|
||
}
|
||
|
||
/* Look for sign extended modregrm displacement, or 0
|
||
* displacement.
|
||
*/
|
||
|
||
if (((rm & 0xC0) == 0x80) && // it's a 16/32 bit disp
|
||
c->IFL1 == FLconst) // and it's a constant
|
||
{
|
||
a = c->IEVpointer1;
|
||
if (a == 0 && (rm & 7) != local_BPRM && // if 0[disp]
|
||
!(local_BPRM == 5 && (rm & 7) == 4 && (c->Isib & 7) == BP)
|
||
)
|
||
c->Irm &= 0x3F;
|
||
else if (!I16)
|
||
{
|
||
if ((targ_size_t)(targ_schar)a == a)
|
||
c->Irm ^= 0xC0; /* do 8 sx */
|
||
}
|
||
else if (((targ_size_t)(targ_schar)a & 0xFFFF) == (a & 0xFFFF))
|
||
c->Irm ^= 0xC0; /* do 8 sx */
|
||
}
|
||
|
||
/* Look for LEA reg,[ireg], replace with MOV reg,ireg */
|
||
else if (op == 0x8D)
|
||
{ rm = c->Irm & 7;
|
||
mod = c->Irm & modregrm(3,0,0);
|
||
if (mod == 0)
|
||
{
|
||
if (!I16)
|
||
{
|
||
switch (rm)
|
||
{
|
||
case 4:
|
||
case 5:
|
||
break;
|
||
default:
|
||
c->Irm |= modregrm(3,0,0);
|
||
c->Iop = 0x8B;
|
||
break;
|
||
}
|
||
}
|
||
else
|
||
{
|
||
switch (rm)
|
||
{
|
||
case 4: rm = modregrm(3,0,SI); goto L6;
|
||
case 5: rm = modregrm(3,0,DI); goto L6;
|
||
case 7: rm = modregrm(3,0,BX); goto L6;
|
||
L6: c->Irm = rm + reg;
|
||
c->Iop = 0x8B;
|
||
break;
|
||
}
|
||
}
|
||
}
|
||
|
||
/* replace LEA reg,0[BP] with MOV reg,BP */
|
||
else if (mod == modregrm(1,0,0) && rm == local_BPRM &&
|
||
c->IFL1 == FLconst && c->IEVpointer1 == 0)
|
||
{ c->Iop = 0x8B; /* MOV reg,BP */
|
||
c->Irm = modregrm(3,0,BP) + reg;
|
||
}
|
||
}
|
||
|
||
// Replace [R13] with 0[R13]
|
||
if (c->Irex & REX_B && (c->Irm & modregrm(3,0,5)) == modregrm(0,0,5))
|
||
{
|
||
c->Irm |= modregrm(1,0,0);
|
||
c->IFL1 = FLconst;
|
||
c->IEVpointer1 = 0;
|
||
}
|
||
}
|
||
else if (!(c->Iflags & CFvex))
|
||
{
|
||
switch (op)
|
||
{
|
||
default:
|
||
if ((op & ~0x0F) != 0x70)
|
||
break;
|
||
case JMP:
|
||
switch (c->IFL2)
|
||
{ case FLcode:
|
||
if (c->IEV2.Vcode == code_next(c))
|
||
{ c->Iop = NOP;
|
||
continue;
|
||
}
|
||
break;
|
||
case FLblock:
|
||
if (!code_next(c) && c->IEV2.Vblock == bn)
|
||
{ c->Iop = NOP;
|
||
continue;
|
||
}
|
||
break;
|
||
case FLconst:
|
||
case FLfunc:
|
||
case FLextern:
|
||
break;
|
||
default:
|
||
#ifdef DEBUG
|
||
WRFL((enum FL)c->IFL2);
|
||
#endif
|
||
assert(0);
|
||
}
|
||
break;
|
||
|
||
case 0x68: // PUSH immed16
|
||
if (c->IFL2 == FLconst)
|
||
{
|
||
targ_long u = c->IEV2.Vuns;
|
||
if (I64 ||
|
||
((c->Iflags & CFopsize) ? I16 : I32))
|
||
{ // PUSH 32/64 bit operand
|
||
if (u == (signed char) u)
|
||
c->Iop = 0x6A; // PUSH immed8
|
||
}
|
||
else // PUSH 16 bit operand
|
||
{ if ((short)u == (signed char) u)
|
||
c->Iop = 0x6A; // PUSH immed8
|
||
}
|
||
}
|
||
break;
|
||
}
|
||
}
|
||
}
|
||
#if 0
|
||
if (1 || debugc) {
|
||
printf("-pinholeopt(%p)\n",cstart);
|
||
for (c = cstart; c; c = code_next(c))
|
||
c->print();
|
||
}
|
||
#endif
|
||
}
|
||
|
||
#ifdef DEBUG
|
||
STATIC void pinholeopt_unittest()
|
||
{
|
||
//printf("pinholeopt_unittest()\n");
|
||
struct CS { unsigned model,op,ea,ev1,ev2,flags; } tests[][2] =
|
||
{
|
||
// XOR reg,immed NOT regL
|
||
{{ 16,0x81,modregrm(3,6,BX),0,0xFF,0 }, { 0,0xF6,modregrm(3,2,BX),0,0xFF }},
|
||
|
||
// MOV 0[BX],3 MOV [BX],3
|
||
{{ 16,0xC7,modregrm(2,0,7),0,3}, { 0,0xC7,modregrm(0,0,7),0,3 }},
|
||
|
||
#if 0 // only if config.flags4 & CFG4space
|
||
// TEST regL,immed8
|
||
{{ 0,0xF6,modregrm(3,0,BX),0,0xFF,0 }, { 0,0x84,modregrm(3,BX,BX),0,0xFF }},
|
||
{{ 0,0xF7,modregrm(3,0,BX),0,0xFF,0 }, { 0,0x84,modregrm(3,BX,BX),0,0xFF }},
|
||
{{ 64,0xF6,modregrmx(3,0,R8),0,0xFF,0 }, { 0,0x84,modregxrmx(3,R8,R8),0,0xFF }},
|
||
{{ 64,0xF7,modregrmx(3,0,R8),0,0xFF,0 }, { 0,0x84,modregxrmx(3,R8,R8),0,0xFF }},
|
||
#endif
|
||
|
||
// PUSH immed => PUSH immed8
|
||
{{ 0,0x68,0,0,0 }, { 0,0x6A,0,0,0 }},
|
||
{{ 0,0x68,0,0,0x7F }, { 0,0x6A,0,0,0x7F }},
|
||
{{ 0,0x68,0,0,0x80 }, { 0,0x68,0,0,0x80 }},
|
||
{{ 16,0x68,0,0,0,CFopsize }, { 0,0x6A,0,0,0,CFopsize }},
|
||
{{ 16,0x68,0,0,0x7F,CFopsize }, { 0,0x6A,0,0,0x7F,CFopsize }},
|
||
{{ 16,0x68,0,0,0x80,CFopsize }, { 0,0x68,0,0,0x80,CFopsize }},
|
||
{{ 16,0x68,0,0,0x10000,0 }, { 0,0x6A,0,0,0x10000,0 }},
|
||
{{ 16,0x68,0,0,0x10000,CFopsize }, { 0,0x68,0,0,0x10000,CFopsize }},
|
||
{{ 32,0x68,0,0,0,CFopsize }, { 0,0x6A,0,0,0,CFopsize }},
|
||
{{ 32,0x68,0,0,0x7F,CFopsize }, { 0,0x6A,0,0,0x7F,CFopsize }},
|
||
{{ 32,0x68,0,0,0x80,CFopsize }, { 0,0x68,0,0,0x80,CFopsize }},
|
||
{{ 32,0x68,0,0,0x10000,CFopsize }, { 0,0x6A,0,0,0x10000,CFopsize }},
|
||
{{ 32,0x68,0,0,0x8000,CFopsize }, { 0,0x68,0,0,0x8000,CFopsize }},
|
||
};
|
||
|
||
//config.flags4 |= CFG4space;
|
||
for (int i = 0; i < sizeof(tests)/sizeof(tests[0]); i++)
|
||
{ CS *pin = &tests[i][0];
|
||
CS *pout = &tests[i][1];
|
||
code cs;
|
||
memset(&cs, 0, sizeof(cs));
|
||
if (pin->model)
|
||
{
|
||
if (I16 && pin->model != 16)
|
||
continue;
|
||
if (I32 && pin->model != 32)
|
||
continue;
|
||
if (I64 && pin->model != 64)
|
||
continue;
|
||
}
|
||
//printf("[%d]\n", i);
|
||
cs.Iop = pin->op;
|
||
cs.Iea = pin->ea;
|
||
cs.IFL1 = FLconst;
|
||
cs.IFL2 = FLconst;
|
||
cs.IEV1.Vuns = pin->ev1;
|
||
cs.IEV2.Vuns = pin->ev2;
|
||
cs.Iflags = pin->flags;
|
||
pinholeopt(&cs, NULL);
|
||
if (cs.Iop != pout->op)
|
||
{ printf("[%d] Iop = x%02x, pout = x%02x\n", i, cs.Iop, pout->op);
|
||
assert(0);
|
||
}
|
||
assert(cs.Iea == pout->ea);
|
||
assert(cs.IEV1.Vuns == pout->ev1);
|
||
assert(cs.IEV2.Vuns == pout->ev2);
|
||
assert(cs.Iflags == pout->flags);
|
||
}
|
||
}
|
||
#endif
|
||
|
||
/**************************
|
||
* Compute jump addresses for FLcode.
|
||
* Note: only works for forward referenced code.
|
||
* only direct jumps and branches are detected.
|
||
* LOOP instructions only work for backward refs.
|
||
*/
|
||
|
||
void jmpaddr(code *c)
|
||
{ code *ci,*cn,*ctarg,*cstart;
|
||
targ_size_t ad;
|
||
unsigned op;
|
||
|
||
//printf("jmpaddr()\n");
|
||
cstart = c; /* remember start of code */
|
||
while (c)
|
||
{
|
||
op = c->Iop;
|
||
if (op <= 0xEB &&
|
||
inssize[op] & T && // if second operand
|
||
c->IFL2 == FLcode &&
|
||
((op & ~0x0F) == 0x70 || op == JMP || op == JMPS || op == JCXZ || op == CALL))
|
||
{ ci = code_next(c);
|
||
ctarg = c->IEV2.Vcode; /* target code */
|
||
ad = 0; /* IP displacement */
|
||
while (ci && ci != ctarg)
|
||
{
|
||
ad += calccodsize(ci);
|
||
ci = code_next(ci);
|
||
}
|
||
if (!ci)
|
||
goto Lbackjmp; // couldn't find it
|
||
if (!I16 || op == JMP || op == JMPS || op == JCXZ || op == CALL)
|
||
c->IEVpointer2 = ad;
|
||
else /* else conditional */
|
||
{ if (!(c->Iflags & CFjmp16)) /* if branch */
|
||
c->IEVpointer2 = ad;
|
||
else /* branch around a long jump */
|
||
{ cn = code_next(c);
|
||
code_next(c) = code_calloc();
|
||
code_next(code_next(c)) = cn;
|
||
c->Iop = op ^ 1; /* converse jmp */
|
||
c->Iflags &= ~CFjmp16;
|
||
c->IEVpointer2 = I16 ? 3 : 5;
|
||
cn = code_next(c);
|
||
cn->Iop = JMP; /* long jump */
|
||
cn->IFL2 = FLconst;
|
||
cn->IEVpointer2 = ad;
|
||
}
|
||
}
|
||
c->IFL2 = FLconst;
|
||
}
|
||
if (op == LOOP && c->IFL2 == FLcode) /* backwards refs */
|
||
{
|
||
Lbackjmp:
|
||
ctarg = c->IEV2.Vcode;
|
||
for (ci = cstart; ci != ctarg; ci = code_next(ci))
|
||
if (!ci || ci == c)
|
||
assert(0);
|
||
ad = 2; /* - IP displacement */
|
||
while (ci != c)
|
||
{ assert(ci);
|
||
ad += calccodsize(ci);
|
||
ci = code_next(ci);
|
||
}
|
||
c->IEVpointer2 = (-ad) & 0xFF;
|
||
c->IFL2 = FLconst;
|
||
}
|
||
c = code_next(c);
|
||
}
|
||
}
|
||
|
||
/*******************************
|
||
* Calculate bl->Bsize.
|
||
*/
|
||
|
||
unsigned calcblksize(code *c)
|
||
{ unsigned size;
|
||
|
||
for (size = 0; c; c = code_next(c))
|
||
{
|
||
unsigned sz = calccodsize(c);
|
||
//printf("off=%02x, sz = %d, code %p: op=%02x\n", size, sz, c, c->Iop);
|
||
size += sz;
|
||
}
|
||
//printf("calcblksize(c = x%x) = %d\n", c, size);
|
||
return size;
|
||
}
|
||
|
||
/*****************************
|
||
* Calculate and return code size of a code.
|
||
* Note that NOPs are sometimes used as markers, but are
|
||
* never output. LINNUMs are never output.
|
||
* Note: This routine must be fast. Profiling shows it is significant.
|
||
*/
|
||
|
||
unsigned calccodsize(code *c)
|
||
{ unsigned size;
|
||
unsigned op;
|
||
unsigned char rm,mod,ins;
|
||
unsigned iflags;
|
||
unsigned i32 = I32 || I64;
|
||
unsigned a32 = i32;
|
||
|
||
#ifdef DEBUG
|
||
assert((a32 & ~1) == 0);
|
||
#endif
|
||
iflags = c->Iflags;
|
||
op = c->Iop;
|
||
if (iflags & CFvex)
|
||
{
|
||
ins = vex_inssize(c);
|
||
size = ins & 7;
|
||
goto Lmodrm;
|
||
}
|
||
else if ((op & 0xFF00) == 0x0F00 || (op & 0xFFFD00) == 0x0F3800)
|
||
op = 0x0F;
|
||
else
|
||
op &= 0xFF;
|
||
switch (op)
|
||
{
|
||
case 0x0F:
|
||
if ((c->Iop & 0xFFFD00) == 0x0F3800)
|
||
{ // 3 byte op ( 0F38-- or 0F3A-- )
|
||
ins = inssize2[(c->Iop >> 8) & 0xFF];
|
||
size = ins & 7;
|
||
if (c->Iop & 0xFF000000)
|
||
size++;
|
||
}
|
||
else
|
||
{ // 2 byte op ( 0F-- )
|
||
ins = inssize2[c->Iop & 0xFF];
|
||
size = ins & 7;
|
||
if (c->Iop & 0xFF0000)
|
||
size++;
|
||
}
|
||
break;
|
||
|
||
case NOP:
|
||
case ESCAPE:
|
||
size = 0; // since these won't be output
|
||
goto Lret2;
|
||
|
||
case ASM:
|
||
if (c->Iflags == CFaddrsize) // kludge for DA inline asm
|
||
size = NPTRSIZE;
|
||
else
|
||
size = c->IEV1.as.len;
|
||
goto Lret2;
|
||
|
||
case 0xA1:
|
||
case 0xA3:
|
||
if (c->Irex)
|
||
{
|
||
size = 9; // 64 bit immediate value for MOV to/from RAX
|
||
goto Lret;
|
||
}
|
||
goto Ldefault;
|
||
|
||
case 0xF6: /* TEST mem8,immed8 */
|
||
ins = inssize[op];
|
||
size = ins & 7;
|
||
if (i32)
|
||
size = inssize32[op];
|
||
if ((c->Irm & (7<<3)) == 0)
|
||
size++; /* size of immed8 */
|
||
break;
|
||
|
||
case 0xF7:
|
||
ins = inssize[op];
|
||
size = ins & 7;
|
||
if (i32)
|
||
size = inssize32[op];
|
||
if ((c->Irm & (7<<3)) == 0)
|
||
size += (i32 ^ ((iflags & CFopsize) !=0)) ? 4 : 2;
|
||
break;
|
||
|
||
default:
|
||
Ldefault:
|
||
ins = inssize[op];
|
||
size = ins & 7;
|
||
if (i32)
|
||
size = inssize32[op];
|
||
}
|
||
|
||
if (iflags & (CFwait | CFopsize | CFaddrsize | CFSEG))
|
||
{
|
||
if (iflags & CFwait) // if add FWAIT prefix
|
||
size++;
|
||
if (iflags & CFSEG) // if segment override
|
||
size++;
|
||
|
||
// If the instruction has a second operand that is not an 8 bit,
|
||
// and the operand size prefix is present, then fix the size computation
|
||
// because the operand size will be different.
|
||
// Walter, I had problems with this bit at the end. There can still be
|
||
// an ADDRSIZE prefix for these and it does indeed change the operand size.
|
||
|
||
if (iflags & (CFopsize | CFaddrsize))
|
||
{
|
||
if ((ins & (T|E)) == T)
|
||
{
|
||
if ((op & 0xAC) == 0xA0)
|
||
{
|
||
if (iflags & CFaddrsize && !I64)
|
||
{ if (I32)
|
||
size -= 2;
|
||
else
|
||
size += 2;
|
||
}
|
||
}
|
||
else if (iflags & CFopsize)
|
||
{ if (I16)
|
||
size += 2;
|
||
else
|
||
size -= 2;
|
||
}
|
||
}
|
||
if (iflags & CFaddrsize)
|
||
{ if (!I64)
|
||
a32 ^= 1;
|
||
size++;
|
||
}
|
||
if (iflags & CFopsize)
|
||
size++; /* +1 for OPSIZE prefix */
|
||
}
|
||
}
|
||
|
||
Lmodrm:
|
||
if ((op & ~0x0F) == 0x70)
|
||
{ if (iflags & CFjmp16) // if long branch
|
||
size += I16 ? 3 : 4; // + 3(4) bytes for JMP
|
||
}
|
||
else if (ins & M) // if modregrm byte
|
||
{
|
||
rm = c->Irm;
|
||
mod = rm & 0xC0;
|
||
if (a32 || I64)
|
||
{ // 32 bit addressing
|
||
if (issib(rm))
|
||
size++;
|
||
switch (mod)
|
||
{ case 0:
|
||
if (issib(rm) && (c->Isib & 7) == 5 ||
|
||
(rm & 7) == 5)
|
||
size += 4; /* disp32 */
|
||
if (c->Irex & REX_B && (rm & 7) == 5)
|
||
/* Instead of selecting R13, this mode is an [RIP] relative
|
||
* address. Although valid, it's redundant, and should not
|
||
* be generated. Instead, generate 0[R13] instead of [R13].
|
||
*/
|
||
assert(0);
|
||
break;
|
||
case 0x40:
|
||
size++; /* disp8 */
|
||
break;
|
||
case 0x80:
|
||
size += 4; /* disp32 */
|
||
break;
|
||
}
|
||
}
|
||
else
|
||
{ // 16 bit addressing
|
||
if (mod == 0x40) /* 01: 8 bit displacement */
|
||
size++;
|
||
else if (mod == 0x80 || (mod == 0 && (rm & 7) == 6))
|
||
size += 2;
|
||
}
|
||
}
|
||
|
||
Lret:
|
||
if (!(iflags & CFvex) && c->Irex)
|
||
{ size++;
|
||
if (c->Irex & REX_W && (op & ~7) == 0xB8)
|
||
size += 4;
|
||
}
|
||
Lret2:
|
||
//printf("op = x%02x, size = %d\n",op,size);
|
||
return size;
|
||
}
|
||
|
||
/********************************
|
||
* Return !=0 if codes match.
|
||
*/
|
||
|
||
#if 0
|
||
|
||
int code_match(code *c1,code *c2)
|
||
{ code cs1,cs2;
|
||
unsigned char ins;
|
||
|
||
if (c1 == c2)
|
||
goto match;
|
||
cs1 = *c1;
|
||
cs2 = *c2;
|
||
if (cs1.Iop != cs2.Iop)
|
||
goto nomatch;
|
||
switch (cs1.Iop)
|
||
{
|
||
case ESCAPE | ESCctor:
|
||
case ESCAPE | ESCdtor:
|
||
goto nomatch;
|
||
|
||
case NOP:
|
||
goto match;
|
||
|
||
case ASM:
|
||
if (cs1.IEV1.as.len == cs2.IEV1.as.len &&
|
||
memcmp(cs1.IEV1.as.bytes,cs2.IEV1.as.bytes,cs1.EV1.as.len) == 0)
|
||
goto match;
|
||
else
|
||
goto nomatch;
|
||
|
||
default:
|
||
if ((cs1.Iop & 0xFF) == ESCAPE)
|
||
goto match;
|
||
break;
|
||
}
|
||
if (cs1.Iflags != cs2.Iflags)
|
||
goto nomatch;
|
||
|
||
ins = inssize[cs1.Iop & 0xFF];
|
||
if ((cs1.Iop & 0xFFFD00) == 0x0F3800)
|
||
{
|
||
ins = inssize2[(cs1.Iop >> 8) & 0xFF];
|
||
}
|
||
else if ((cs1.Iop & 0xFF00) == 0x0F00)
|
||
{
|
||
ins = inssize2[cs1.Iop & 0xFF];
|
||
}
|
||
|
||
if (ins & M) // if modregrm byte
|
||
{
|
||
if (cs1.Irm != cs2.Irm)
|
||
goto nomatch;
|
||
if ((cs1.Irm & 0xC0) == 0xC0)
|
||
goto do2;
|
||
if (is32bitaddr(I32,cs1.Iflags))
|
||
{
|
||
if (issib(cs1.Irm) && cs1.Isib != cs2.Isib)
|
||
goto nomatch;
|
||
if (
|
||
((rm & 0xC0) == 0 && !((rm & 7) == 4 && (c->Isib & 7) == 5 || (rm & 7) == 5))
|
||
)
|
||
goto do2; /* if no first operand */
|
||
}
|
||
else
|
||
{
|
||
if (
|
||
((rm & 0xC0) == 0 && !((rm & 7) == 6))
|
||
)
|
||
goto do2; /* if no first operand */
|
||
}
|
||
if (cs1.IFL1 != cs2.IFL1)
|
||
goto nomatch;
|
||
if (flinsymtab[cs1.IFL1] && cs1.IEVsym1 != cs2.IEVsym1)
|
||
goto nomatch;
|
||
if (cs1.IEVoffset1 != cs2.IEVoffset1)
|
||
goto nomatch;
|
||
}
|
||
|
||
do2:
|
||
if (!(ins & T)) // if no second operand
|
||
goto match;
|
||
if (cs1.IFL2 != cs2.IFL2)
|
||
goto nomatch;
|
||
if (flinsymtab[cs1.IFL2] && cs1.IEVsym2 != cs2.IEVsym2)
|
||
goto nomatch;
|
||
if (cs1.IEVoffset2 != cs2.IEVoffset2)
|
||
goto nomatch;
|
||
|
||
match:
|
||
return 1;
|
||
|
||
nomatch:
|
||
return 0;
|
||
}
|
||
|
||
#endif
|
||
|
||
/**************************
|
||
* Write code to intermediate file.
|
||
* Code starts at offset.
|
||
* Returns:
|
||
* addr of end of code
|
||
*/
|
||
|
||
static targ_size_t offset; /* to save code use a global */
|
||
static char bytes[100];
|
||
static char *pgen;
|
||
|
||
#define GEN(c) (*pgen++ = (c))
|
||
#define GENP(n,p) (memcpy(pgen,(p),(n)), pgen += (n))
|
||
#if ELFOBJ || MACHOBJ
|
||
#define FLUSH() if (pgen-bytes) cod3_flush()
|
||
#else
|
||
#define FLUSH() ((pgen - bytes) && cod3_flush())
|
||
#endif
|
||
#define OFFSET() (offset + (pgen - bytes))
|
||
|
||
STATIC void cod3_flush()
|
||
{
|
||
// Emit accumulated bytes to code segment
|
||
#ifdef DEBUG
|
||
assert(pgen - bytes < sizeof(bytes));
|
||
#endif
|
||
offset += obj_bytes(cseg,offset,pgen - bytes,bytes);
|
||
pgen = bytes;
|
||
}
|
||
|
||
unsigned codout(code *c)
|
||
{ unsigned op;
|
||
unsigned char rm,mod;
|
||
unsigned char ins;
|
||
code *cn;
|
||
unsigned flags;
|
||
symbol *s;
|
||
|
||
#ifdef DEBUG
|
||
if (debugc) printf("codout(%p), Coffset = x%llx\n",c,(unsigned long long)Coffset);
|
||
#endif
|
||
|
||
pgen = bytes;
|
||
offset = Coffset;
|
||
for (; c; c = code_next(c))
|
||
{
|
||
#ifdef DEBUG
|
||
if (debugc) { printf("off=%02lx, sz=%ld, ",(long)OFFSET(),(long)calccodsize(c)); c->print(); }
|
||
unsigned startoffset = OFFSET();
|
||
#endif
|
||
op = c->Iop;
|
||
ins = inssize[op & 0xFF];
|
||
switch (op & 0xFF)
|
||
{ case ESCAPE:
|
||
/* Check for SSE4 opcode v/pmaxuw xmm1,xmm2/m128 */
|
||
if(op == 0x660F383E || c->Iflags & CFvex) break;
|
||
|
||
switch (op & 0xFFFF00)
|
||
{ case ESClinnum:
|
||
/* put out line number stuff */
|
||
objlinnum(c->IEV1.Vsrcpos,OFFSET());
|
||
break;
|
||
#if SCPP
|
||
#if 1
|
||
case ESCctor:
|
||
case ESCdtor:
|
||
case ESCoffset:
|
||
if (config.exe != EX_NT)
|
||
except_pair_setoffset(c,OFFSET() - funcoffset);
|
||
break;
|
||
case ESCmark:
|
||
case ESCrelease:
|
||
case ESCmark2:
|
||
case ESCrelease2:
|
||
break;
|
||
#else
|
||
case ESCctor:
|
||
except_push(OFFSET() - funcoffset,c->IEV1.Vtor,NULL);
|
||
break;
|
||
case ESCdtor:
|
||
except_pop(OFFSET() - funcoffset,c->IEV1.Vtor,NULL);
|
||
break;
|
||
case ESCmark:
|
||
except_mark();
|
||
break;
|
||
case ESCrelease:
|
||
except_release();
|
||
break;
|
||
#endif
|
||
#endif
|
||
}
|
||
#ifdef DEBUG
|
||
assert(calccodsize(c) == 0);
|
||
#endif
|
||
continue;
|
||
case NOP: /* don't send them out */
|
||
if (op != NOP)
|
||
break;
|
||
#ifdef DEBUG
|
||
assert(calccodsize(c) == 0);
|
||
#endif
|
||
continue;
|
||
case ASM:
|
||
if (op != ASM)
|
||
break;
|
||
FLUSH();
|
||
if (c->Iflags == CFaddrsize) // kludge for DA inline asm
|
||
{
|
||
do32bit(FLblockoff,&c->IEV1,0);
|
||
}
|
||
else
|
||
{
|
||
offset += obj_bytes(cseg,offset,c->IEV1.as.len,c->IEV1.as.bytes);
|
||
}
|
||
#ifdef DEBUG
|
||
assert(calccodsize(c) == c->IEV1.as.len);
|
||
#endif
|
||
continue;
|
||
}
|
||
flags = c->Iflags;
|
||
|
||
// See if we need to flush (don't have room for largest code sequence)
|
||
if (pgen - bytes > sizeof(bytes) - (1+4+4+8+8))
|
||
FLUSH();
|
||
|
||
// see if we need to put out prefix bytes
|
||
if (flags & (CFwait | CFPREFIX | CFjmp16))
|
||
{ int override;
|
||
|
||
if (flags & CFwait)
|
||
GEN(0x9B); // FWAIT
|
||
/* ? SEGES : SEGSS */
|
||
switch (flags & CFSEG)
|
||
{ case CFes: override = SEGES; goto segover;
|
||
case CFss: override = SEGSS; goto segover;
|
||
case CFcs: override = SEGCS; goto segover;
|
||
case CFds: override = SEGDS; goto segover;
|
||
case CFfs: override = SEGFS; goto segover;
|
||
case CFgs: override = SEGGS; goto segover;
|
||
segover: GEN(override);
|
||
break;
|
||
}
|
||
|
||
if (flags & CFaddrsize)
|
||
GEN(0x67);
|
||
|
||
// Do this last because of instructions like ADDPD
|
||
if (flags & CFopsize)
|
||
GEN(0x66); /* operand size */
|
||
|
||
if ((op & ~0x0F) == 0x70 && flags & CFjmp16) /* long condit jmp */
|
||
{
|
||
if (!I16)
|
||
{ // Put out 16 bit conditional jump
|
||
c->Iop = op = 0x0F00 | (0x80 | (op & 0x0F));
|
||
}
|
||
else
|
||
{
|
||
cn = code_calloc();
|
||
/*cxcalloc++;*/
|
||
code_next(cn) = code_next(c);
|
||
code_next(c) = cn; // link into code
|
||
cn->Iop = JMP; // JMP block
|
||
cn->IFL2 = c->IFL2;
|
||
cn->IEV2.Vblock = c->IEV2.Vblock;
|
||
c->Iop = op ^= 1; // toggle condition
|
||
c->IFL2 = FLconst;
|
||
c->IEVpointer2 = I16 ? 3 : 5; // skip over JMP block
|
||
c->Iflags &= ~CFjmp16;
|
||
}
|
||
}
|
||
}
|
||
|
||
if (flags & CFvex)
|
||
{
|
||
if (flags & CFvex3)
|
||
{
|
||
GEN(0xC4);
|
||
GEN(VEX3_B1(c->Ivex));
|
||
GEN(VEX3_B2(c->Ivex));
|
||
GEN(c->Ivex.op);
|
||
}
|
||
else
|
||
{
|
||
GEN(0xC5);
|
||
GEN(VEX2_B1(c->Ivex));
|
||
GEN(c->Ivex.op);
|
||
}
|
||
ins = vex_inssize(c);
|
||
goto Lmodrm;
|
||
}
|
||
|
||
if (op > 0xFF)
|
||
{
|
||
if ((op & 0xFFFD00) == 0x0F3800)
|
||
ins = inssize2[(op >> 8) & 0xFF];
|
||
else if ((op & 0xFF00) == 0x0F00)
|
||
ins = inssize2[op & 0xFF];
|
||
|
||
if (op & 0xFF000000)
|
||
{
|
||
unsigned char op1 = op >> 24;
|
||
if (op1 == 0xF2 || op1 == 0xF3 || op1 == 0x66)
|
||
{
|
||
GEN(op1);
|
||
if (c->Irex)
|
||
GEN(c->Irex | REX);
|
||
}
|
||
else
|
||
{
|
||
if (c->Irex)
|
||
GEN(c->Irex | REX);
|
||
GEN(op1);
|
||
}
|
||
GEN((op >> 16) & 0xFF);
|
||
GEN((op >> 8) & 0xFF);
|
||
GEN(op & 0xFF);
|
||
}
|
||
else if (op & 0xFF0000)
|
||
{
|
||
unsigned char op1 = op >> 16;
|
||
if (op1 == 0xF2 || op1 == 0xF3 || op1 == 0x66)
|
||
{
|
||
GEN(op1);
|
||
if (c->Irex)
|
||
GEN(c->Irex | REX);
|
||
}
|
||
else
|
||
{
|
||
if (c->Irex)
|
||
GEN(c->Irex | REX);
|
||
GEN(op1);
|
||
}
|
||
GEN((op >> 8) & 0xFF);
|
||
GEN(op & 0xFF);
|
||
}
|
||
else
|
||
{
|
||
if (c->Irex)
|
||
GEN(c->Irex | REX);
|
||
GEN((op >> 8) & 0xFF);
|
||
GEN(op & 0xFF);
|
||
}
|
||
}
|
||
else
|
||
{
|
||
if (c->Irex)
|
||
GEN(c->Irex | REX);
|
||
GEN(op);
|
||
}
|
||
Lmodrm:
|
||
if (ins & M) /* if modregrm byte */
|
||
{
|
||
rm = c->Irm;
|
||
GEN(rm);
|
||
|
||
// Look for an address size override when working with the
|
||
// MOD R/M and SIB bytes
|
||
|
||
if (is32bitaddr( I32, flags))
|
||
{
|
||
if (issib(rm))
|
||
GEN(c->Isib);
|
||
switch (rm & 0xC0)
|
||
{ case 0x40:
|
||
do8bit((enum FL) c->IFL1,&c->IEV1); // 8 bit
|
||
break;
|
||
case 0:
|
||
if (!(issib(rm) && (c->Isib & 7) == 5 ||
|
||
(rm & 7) == 5))
|
||
break;
|
||
case 0x80:
|
||
{ int flags = CFoff;
|
||
targ_size_t val = 0;
|
||
if (I64)
|
||
{
|
||
if ((rm & modregrm(3,0,7)) == modregrm(0,0,5)) // if disp32[RIP]
|
||
{ flags |= CFpc32;
|
||
val = -4;
|
||
unsigned reg = rm & modregrm(0,7,0);
|
||
if (ins & T ||
|
||
((op == 0xF6 || op == 0xF7) && (reg == modregrm(0,0,0) || reg == modregrm(0,1,0))))
|
||
{ if (ins & E)
|
||
val = -5;
|
||
else if (c->Iflags & CFopsize)
|
||
val = -6;
|
||
else
|
||
val = -8;
|
||
}
|
||
#if TARGET_OSX
|
||
// Mach-O linkage already takes the 4 byte size into account
|
||
val += 4;
|
||
#endif
|
||
}
|
||
}
|
||
do32bit((enum FL)c->IFL1,&c->IEV1,flags,val);
|
||
break;
|
||
}
|
||
}
|
||
}
|
||
else
|
||
{
|
||
switch (rm & 0xC0)
|
||
{ case 0x40:
|
||
do8bit((enum FL) c->IFL1,&c->IEV1); // 8 bit
|
||
break;
|
||
case 0:
|
||
if ((rm & 7) != 6)
|
||
break;
|
||
case 0x80:
|
||
do16bit((enum FL)c->IFL1,&c->IEV1,CFoff);
|
||
break;
|
||
}
|
||
}
|
||
}
|
||
else
|
||
{
|
||
if (op == 0xC8)
|
||
do16bit((enum FL)c->IFL1,&c->IEV1,0);
|
||
}
|
||
flags &= CFseg | CFoff | CFselfrel;
|
||
if (ins & T) /* if second operand */
|
||
{ if (ins & E) /* if data-8 */
|
||
do8bit((enum FL) c->IFL2,&c->IEV2);
|
||
else if (!I16)
|
||
{
|
||
switch (op)
|
||
{ case 0xC2: /* RETN imm16 */
|
||
case 0xCA: /* RETF imm16 */
|
||
do16:
|
||
do16bit((enum FL)c->IFL2,&c->IEV2,flags);
|
||
break;
|
||
|
||
case 0xA1:
|
||
case 0xA3:
|
||
if (I64 && c->Irex)
|
||
{
|
||
do64:
|
||
do64bit((enum FL)c->IFL2,&c->IEV2,flags);
|
||
break;
|
||
}
|
||
case 0xA0: /* MOV AL,byte ptr [] */
|
||
case 0xA2:
|
||
if (c->Iflags & CFaddrsize && !I64)
|
||
goto do16;
|
||
else
|
||
do32:
|
||
do32bit((enum FL)c->IFL2,&c->IEV2,flags);
|
||
break;
|
||
case 0x9A:
|
||
case 0xEA:
|
||
if (c->Iflags & CFopsize)
|
||
goto ptr1616;
|
||
else
|
||
goto ptr1632;
|
||
|
||
case 0x68: // PUSH immed32
|
||
if ((enum FL)c->IFL2 == FLblock)
|
||
{
|
||
c->IFL2 = FLblockoff;
|
||
goto do32;
|
||
}
|
||
else
|
||
goto case_default;
|
||
|
||
case CALL: // CALL rel
|
||
case JMP: // JMP rel
|
||
flags |= CFselfrel;
|
||
goto case_default;
|
||
|
||
default:
|
||
if ((op|0xF) == 0x0F8F) // Jcc rel16 rel32
|
||
flags |= CFselfrel;
|
||
if (I64 && (op & ~7) == 0xB8 && c->Irex & REX_W)
|
||
goto do64;
|
||
case_default:
|
||
if (c->Iflags & CFopsize)
|
||
goto do16;
|
||
else
|
||
goto do32;
|
||
break;
|
||
}
|
||
}
|
||
else
|
||
{
|
||
switch (op) {
|
||
case 0xC2:
|
||
case 0xCA:
|
||
goto do16;
|
||
case 0xA0:
|
||
case 0xA1:
|
||
case 0xA2:
|
||
case 0xA3:
|
||
if (c->Iflags & CFaddrsize)
|
||
goto do32;
|
||
else
|
||
goto do16;
|
||
break;
|
||
case 0x9A:
|
||
case 0xEA:
|
||
if (c->Iflags & CFopsize)
|
||
goto ptr1632;
|
||
else
|
||
goto ptr1616;
|
||
|
||
ptr1616:
|
||
ptr1632:
|
||
//assert(c->IFL2 == FLfunc);
|
||
FLUSH();
|
||
if (c->IFL2 == FLdatseg)
|
||
{
|
||
reftodatseg(cseg,offset,c->IEVpointer2,
|
||
c->IEVseg2,flags);
|
||
offset += 4;
|
||
}
|
||
else
|
||
{
|
||
s = c->IEVsym2;
|
||
offset += reftoident(cseg,offset,s,0,flags);
|
||
}
|
||
break;
|
||
|
||
case 0x68: // PUSH immed16
|
||
if ((enum FL)c->IFL2 == FLblock)
|
||
{ c->IFL2 = FLblockoff;
|
||
goto do16;
|
||
}
|
||
else
|
||
goto case_default16;
|
||
|
||
case CALL:
|
||
case JMP:
|
||
flags |= CFselfrel;
|
||
default:
|
||
case_default16:
|
||
if (c->Iflags & CFopsize)
|
||
goto do32;
|
||
else
|
||
goto do16;
|
||
break;
|
||
}
|
||
}
|
||
}
|
||
else if (op == 0xF6) /* TEST mem8,immed8 */
|
||
{ if ((rm & (7<<3)) == 0)
|
||
do8bit((enum FL)c->IFL2,&c->IEV2);
|
||
}
|
||
else if (op == 0xF7)
|
||
{ if ((rm & (7<<3)) == 0) /* TEST mem16/32,immed16/32 */
|
||
{
|
||
if ((I32 || I64) ^ ((c->Iflags & CFopsize) != 0))
|
||
do32bit((enum FL)c->IFL2,&c->IEV2,flags);
|
||
else
|
||
do16bit((enum FL)c->IFL2,&c->IEV2,flags);
|
||
}
|
||
}
|
||
#ifdef DEBUG
|
||
if (OFFSET() - startoffset != calccodsize(c))
|
||
{
|
||
printf("actual: %d, calc: %d\n", (int)(OFFSET() - startoffset), (int)calccodsize(c));
|
||
c->print();
|
||
assert(0);
|
||
}
|
||
#endif
|
||
}
|
||
FLUSH();
|
||
Coffset = offset;
|
||
//printf("-codout(), Coffset = x%x\n", Coffset);
|
||
return offset; /* ending address */
|
||
}
|
||
|
||
|
||
STATIC void do64bit(enum FL fl,union evc *uev,int flags)
|
||
{ char *p;
|
||
symbol *s;
|
||
targ_size_t ad;
|
||
|
||
assert(I64);
|
||
switch (fl)
|
||
{
|
||
case FLconst:
|
||
ad = * (targ_size_t *) uev;
|
||
L1:
|
||
GENP(8,&ad);
|
||
return;
|
||
case FLdatseg:
|
||
FLUSH();
|
||
reftodatseg(cseg,offset,uev->_EP.Vpointer,uev->_EP.Vseg,CFoffset64 | flags);
|
||
break;
|
||
case FLframehandler:
|
||
framehandleroffset = OFFSET();
|
||
ad = 0;
|
||
goto L1;
|
||
case FLswitch:
|
||
FLUSH();
|
||
ad = uev->Vswitch->Btableoffset;
|
||
if (config.flags & CFGromable)
|
||
reftocodseg(cseg,offset,ad);
|
||
else
|
||
reftodatseg(cseg,offset,ad,JMPSEG,CFoff);
|
||
break;
|
||
#if TARGET_SEGMENTED
|
||
case FLcsdata:
|
||
case FLfardata:
|
||
#if DEBUG
|
||
symbol_print(uev->sp.Vsym);
|
||
#endif
|
||
#endif
|
||
// NOTE: In ELFOBJ all symbol refs have been tagged FLextern
|
||
// strings and statics are treated like offsets from a
|
||
// un-named external with is the start of .rodata or .data
|
||
case FLextern: /* external data symbol */
|
||
case FLtlsdata:
|
||
#if TARGET_LINUX || TARGET_FREEBSD || TARGET_OPENBSD || TARGET_SOLARIS
|
||
case FLgot:
|
||
case FLgotoff:
|
||
#endif
|
||
FLUSH();
|
||
s = uev->sp.Vsym; /* symbol pointer */
|
||
reftoident(cseg,offset,s,uev->sp.Voffset,CFoffset64 | flags);
|
||
break;
|
||
|
||
#if TARGET_OSX
|
||
case FLgot:
|
||
funcsym_p->Slocalgotoffset = OFFSET();
|
||
ad = 0;
|
||
goto L1;
|
||
#endif
|
||
|
||
case FLfunc: /* function call */
|
||
s = uev->sp.Vsym; /* symbol pointer */
|
||
assert(TARGET_SEGMENTED || !tyfarfunc(s->ty()));
|
||
FLUSH();
|
||
reftoident(cseg,offset,s,0,CFoffset64 | flags);
|
||
break;
|
||
|
||
case FLblock: /* displacement to another block */
|
||
ad = uev->Vblock->Boffset - OFFSET() - 4;
|
||
//printf("FLblock: funcoffset = %x, OFFSET = %x, Boffset = %x, ad = %x\n", funcoffset, OFFSET(), uev->Vblock->Boffset, ad);
|
||
goto L1;
|
||
|
||
case FLblockoff:
|
||
FLUSH();
|
||
assert(uev->Vblock);
|
||
//printf("FLblockoff: offset = %x, Boffset = %x, funcoffset = %x\n", offset, uev->Vblock->Boffset, funcoffset);
|
||
reftocodseg(cseg,offset,uev->Vblock->Boffset);
|
||
break;
|
||
|
||
default:
|
||
#ifdef DEBUG
|
||
WRFL(fl);
|
||
#endif
|
||
assert(0);
|
||
}
|
||
offset += 8;
|
||
}
|
||
|
||
|
||
STATIC void do32bit(enum FL fl,union evc *uev,int flags, targ_size_t val)
|
||
{ char *p;
|
||
symbol *s;
|
||
targ_size_t ad;
|
||
|
||
//printf("do32bit(flags = x%x)\n", flags);
|
||
switch (fl)
|
||
{
|
||
case FLconst:
|
||
assert(sizeof(targ_size_t) == 4 || sizeof(targ_size_t) == 8);
|
||
ad = * (targ_size_t *) uev;
|
||
L1:
|
||
GENP(4,&ad);
|
||
return;
|
||
case FLdatseg:
|
||
FLUSH();
|
||
reftodatseg(cseg,offset,uev->_EP.Vpointer,uev->_EP.Vseg,flags);
|
||
break;
|
||
case FLframehandler:
|
||
framehandleroffset = OFFSET();
|
||
ad = 0;
|
||
goto L1;
|
||
case FLswitch:
|
||
FLUSH();
|
||
ad = uev->Vswitch->Btableoffset;
|
||
if (config.flags & CFGromable)
|
||
reftocodseg(cseg,offset,ad);
|
||
else
|
||
reftodatseg(cseg,offset,ad,JMPSEG,CFoff);
|
||
break;
|
||
#if TARGET_SEGMENTED
|
||
case FLcsdata:
|
||
case FLfardata:
|
||
#if DEBUG
|
||
symbol_print(uev->sp.Vsym);
|
||
#endif
|
||
#endif
|
||
// NOTE: In ELFOBJ all symbol refs have been tagged FLextern
|
||
// strings and statics are treated like offsets from a
|
||
// un-named external with is the start of .rodata or .data
|
||
case FLextern: /* external data symbol */
|
||
case FLtlsdata:
|
||
#if TARGET_LINUX || TARGET_FREEBSD || TARGET_OPENBSD || TARGET_SOLARIS
|
||
case FLgot:
|
||
case FLgotoff:
|
||
#endif
|
||
FLUSH();
|
||
s = uev->sp.Vsym; /* symbol pointer */
|
||
reftoident(cseg,offset,s,uev->sp.Voffset + val,flags);
|
||
break;
|
||
|
||
#if TARGET_OSX
|
||
case FLgot:
|
||
funcsym_p->Slocalgotoffset = OFFSET();
|
||
ad = 0;
|
||
goto L1;
|
||
#endif
|
||
|
||
case FLfunc: /* function call */
|
||
s = uev->sp.Vsym; /* symbol pointer */
|
||
#if TARGET_SEGMENTED
|
||
if (tyfarfunc(s->ty()))
|
||
{ /* Large code references are always absolute */
|
||
FLUSH();
|
||
offset += reftoident(cseg,offset,s,0,flags) - 4;
|
||
}
|
||
else if (s->Sseg == cseg &&
|
||
(s->Sclass == SCstatic || s->Sclass == SCglobal) &&
|
||
s->Sxtrnnum == 0 && flags & CFselfrel)
|
||
{ /* if we know it's relative address */
|
||
ad = s->Soffset - OFFSET() - 4;
|
||
goto L1;
|
||
}
|
||
else
|
||
#endif
|
||
{
|
||
assert(TARGET_SEGMENTED || !tyfarfunc(s->ty()));
|
||
FLUSH();
|
||
reftoident(cseg,offset,s,val,flags);
|
||
}
|
||
break;
|
||
|
||
case FLblock: /* displacement to another block */
|
||
ad = uev->Vblock->Boffset - OFFSET() - 4;
|
||
//printf("FLblock: funcoffset = %x, OFFSET = %x, Boffset = %x, ad = %x\n", funcoffset, OFFSET(), uev->Vblock->Boffset, ad);
|
||
goto L1;
|
||
|
||
case FLblockoff:
|
||
FLUSH();
|
||
assert(uev->Vblock);
|
||
//printf("FLblockoff: offset = %x, Boffset = %x, funcoffset = %x\n", offset, uev->Vblock->Boffset, funcoffset);
|
||
reftocodseg(cseg,offset,uev->Vblock->Boffset);
|
||
break;
|
||
|
||
default:
|
||
#ifdef DEBUG
|
||
WRFL(fl);
|
||
#endif
|
||
assert(0);
|
||
}
|
||
offset += 4;
|
||
}
|
||
|
||
|
||
STATIC void do16bit(enum FL fl,union evc *uev,int flags)
|
||
{ char *p;
|
||
symbol *s;
|
||
targ_size_t ad;
|
||
|
||
switch (fl)
|
||
{
|
||
case FLconst:
|
||
GENP(2,(char *) uev);
|
||
return;
|
||
case FLdatseg:
|
||
FLUSH();
|
||
reftodatseg(cseg,offset,uev->_EP.Vpointer,uev->_EP.Vseg,flags);
|
||
break;
|
||
case FLswitch:
|
||
FLUSH();
|
||
ad = uev->Vswitch->Btableoffset;
|
||
if (config.flags & CFGromable)
|
||
reftocodseg(cseg,offset,ad);
|
||
else
|
||
reftodatseg(cseg,offset,ad,JMPSEG,CFoff);
|
||
break;
|
||
#if TARGET_SEGMENTED
|
||
case FLcsdata:
|
||
case FLfardata:
|
||
#endif
|
||
case FLextern: /* external data symbol */
|
||
case FLtlsdata:
|
||
assert(SIXTEENBIT || TARGET_SEGMENTED);
|
||
FLUSH();
|
||
s = uev->sp.Vsym; /* symbol pointer */
|
||
reftoident(cseg,offset,s,uev->sp.Voffset,flags);
|
||
break;
|
||
case FLfunc: /* function call */
|
||
assert(SIXTEENBIT || TARGET_SEGMENTED);
|
||
s = uev->sp.Vsym; /* symbol pointer */
|
||
if (tyfarfunc(s->ty()))
|
||
{ /* Large code references are always absolute */
|
||
FLUSH();
|
||
offset += reftoident(cseg,offset,s,0,flags) - 2;
|
||
}
|
||
else if (s->Sseg == cseg &&
|
||
(s->Sclass == SCstatic || s->Sclass == SCglobal) &&
|
||
s->Sxtrnnum == 0 && flags & CFselfrel)
|
||
{ /* if we know it's relative address */
|
||
ad = s->Soffset - OFFSET() - 2;
|
||
goto L1;
|
||
}
|
||
else
|
||
{ FLUSH();
|
||
reftoident(cseg,offset,s,0,flags);
|
||
}
|
||
break;
|
||
case FLblock: /* displacement to another block */
|
||
ad = uev->Vblock->Boffset - OFFSET() - 2;
|
||
#ifdef DEBUG
|
||
{
|
||
targ_ptrdiff_t delta = uev->Vblock->Boffset - OFFSET() - 2;
|
||
assert((signed short)delta == delta);
|
||
}
|
||
#endif
|
||
L1:
|
||
GENP(2,&ad); // displacement
|
||
return;
|
||
|
||
case FLblockoff:
|
||
FLUSH();
|
||
reftocodseg(cseg,offset,uev->Vblock->Boffset);
|
||
break;
|
||
|
||
default:
|
||
#ifdef DEBUG
|
||
WRFL(fl);
|
||
#endif
|
||
assert(0);
|
||
}
|
||
offset += 2;
|
||
}
|
||
|
||
STATIC void do8bit(enum FL fl,union evc *uev)
|
||
{ char c;
|
||
targ_ptrdiff_t delta;
|
||
|
||
switch (fl)
|
||
{
|
||
case FLconst:
|
||
c = uev->Vuns;
|
||
break;
|
||
case FLblock:
|
||
delta = uev->Vblock->Boffset - OFFSET() - 1;
|
||
if ((signed char)delta != delta)
|
||
{
|
||
#if MARS
|
||
if (uev->Vblock->Bsrcpos.Slinnum)
|
||
fprintf(stderr, "%s(%d): ", uev->Vblock->Bsrcpos.Sfilename, uev->Vblock->Bsrcpos.Slinnum);
|
||
#endif
|
||
fprintf(stderr, "block displacement of %lld exceeds the maximum offset of -128 to 127.\n", (long long)delta);
|
||
err_exit();
|
||
}
|
||
c = delta;
|
||
#ifdef DEBUG
|
||
assert(uev->Vblock->Boffset > OFFSET() || c != 0x7F);
|
||
#endif
|
||
break;
|
||
default:
|
||
#ifdef DEBUG
|
||
fprintf(stderr,"fl = %d\n",fl);
|
||
#endif
|
||
assert(0);
|
||
}
|
||
GEN(c);
|
||
}
|
||
|
||
|
||
/**********************************
|
||
*/
|
||
|
||
#if HYDRATE
|
||
void code_hydrate(code **pc)
|
||
{
|
||
code *c;
|
||
unsigned char ins,rm;
|
||
enum FL fl;
|
||
|
||
assert(pc);
|
||
while (*pc)
|
||
{
|
||
c = (code *) ph_hydrate(pc);
|
||
if (c->Iflags & CFvex)
|
||
ins = vex_inssize(c);
|
||
else if ((c->Iop & 0xFFFD00) == 0x0F3800)
|
||
ins = inssize2[(c->Iop >> 8) & 0xFF];
|
||
else if ((c->Iop & 0xFF00) == 0x0F00)
|
||
ins = inssize2[c->Iop & 0xFF];
|
||
else
|
||
ins = inssize[c->Iop & 0xFF];
|
||
switch (c->Iop)
|
||
{
|
||
default:
|
||
break;
|
||
|
||
case ESCAPE | ESClinnum:
|
||
srcpos_hydrate(&c->IEV1.Vsrcpos);
|
||
goto done;
|
||
|
||
case ESCAPE | ESCctor:
|
||
case ESCAPE | ESCdtor:
|
||
el_hydrate(&c->IEV1.Vtor);
|
||
goto done;
|
||
|
||
case ASM:
|
||
ph_hydrate(&c->IEV1.as.bytes);
|
||
goto done;
|
||
}
|
||
if (!(ins & M) ||
|
||
((rm = c->Irm) & 0xC0) == 0xC0)
|
||
goto do2; /* if no first operand */
|
||
if (is32bitaddr(I32,c->Iflags))
|
||
{
|
||
|
||
if (
|
||
((rm & 0xC0) == 0 && !((rm & 7) == 4 && (c->Isib & 7) == 5 || (rm & 7) == 5))
|
||
)
|
||
goto do2; /* if no first operand */
|
||
}
|
||
else
|
||
{
|
||
if (
|
||
((rm & 0xC0) == 0 && !((rm & 7) == 6))
|
||
)
|
||
goto do2; /* if no first operand */
|
||
}
|
||
fl = (enum FL) c->IFL1;
|
||
switch (fl)
|
||
{
|
||
case FLudata:
|
||
case FLdata:
|
||
case FLreg:
|
||
case FLauto:
|
||
case FLbprel:
|
||
case FLpara:
|
||
#if TARGET_SEGMENTED
|
||
case FLcsdata:
|
||
case FLfardata:
|
||
#endif
|
||
case FLtlsdata:
|
||
case FLfunc:
|
||
case FLpseudo:
|
||
case FLextern:
|
||
case FLtmp:
|
||
assert(flinsymtab[fl]);
|
||
symbol_hydrate(&c->IEVsym1);
|
||
symbol_debug(c->IEVsym1);
|
||
break;
|
||
case FLdatseg:
|
||
case FLfltreg:
|
||
case FLallocatmp:
|
||
case FLcs:
|
||
case FLndp:
|
||
case FLoffset:
|
||
case FLlocalsize:
|
||
case FLconst:
|
||
case FLframehandler:
|
||
assert(!flinsymtab[fl]);
|
||
break;
|
||
case FLcode:
|
||
(void) ph_hydrate(&c->IEV1.Vcode);
|
||
break;
|
||
case FLblock:
|
||
case FLblockoff:
|
||
(void) ph_hydrate(&c->IEV1.Vblock);
|
||
break;
|
||
#if SCPP
|
||
case FLctor:
|
||
case FLdtor:
|
||
el_hydrate(&c->IEV1.Vtor);
|
||
break;
|
||
#endif
|
||
case FLasm:
|
||
(void) ph_hydrate(&c->IEV1.as.bytes);
|
||
break;
|
||
default:
|
||
#ifdef DEBUG
|
||
WRFL(fl);
|
||
#endif
|
||
assert(0);
|
||
break;
|
||
}
|
||
do2:
|
||
/* Ignore TEST (F6 and F7) opcodes */
|
||
if (!(ins & T))
|
||
goto done; /* if no second operand */
|
||
|
||
fl = (enum FL) c->IFL2;
|
||
switch (fl)
|
||
{
|
||
case FLudata:
|
||
case FLdata:
|
||
case FLreg:
|
||
case FLauto:
|
||
case FLbprel:
|
||
case FLpara:
|
||
#if TARGET_SEGMENTED
|
||
case FLcsdata:
|
||
case FLfardata:
|
||
#endif
|
||
case FLtlsdata:
|
||
case FLfunc:
|
||
case FLpseudo:
|
||
case FLextern:
|
||
case FLtmp:
|
||
assert(flinsymtab[fl]);
|
||
symbol_hydrate(&c->IEVsym2);
|
||
symbol_debug(c->IEVsym2);
|
||
break;
|
||
case FLdatseg:
|
||
case FLfltreg:
|
||
case FLallocatmp:
|
||
case FLcs:
|
||
case FLndp:
|
||
case FLoffset:
|
||
case FLlocalsize:
|
||
case FLconst:
|
||
case FLframehandler:
|
||
assert(!flinsymtab[fl]);
|
||
break;
|
||
case FLcode:
|
||
(void) ph_hydrate(&c->IEV2.Vcode);
|
||
break;
|
||
case FLblock:
|
||
case FLblockoff:
|
||
(void) ph_hydrate(&c->IEV2.Vblock);
|
||
break;
|
||
default:
|
||
#ifdef DEBUG
|
||
WRFL(fl);
|
||
#endif
|
||
assert(0);
|
||
break;
|
||
}
|
||
done:
|
||
;
|
||
|
||
pc = &code_next(c);
|
||
}
|
||
}
|
||
#endif
|
||
|
||
/**********************************
|
||
*/
|
||
|
||
#if DEHYDRATE
|
||
void code_dehydrate(code **pc)
|
||
{
|
||
code *c;
|
||
unsigned char ins,rm;
|
||
enum FL fl;
|
||
|
||
while ((c = *pc) != NULL)
|
||
{
|
||
ph_dehydrate(pc);
|
||
|
||
if (c->Iflags & CFvex)
|
||
ins = vex_inssize(c);
|
||
else if ((c->Iop & 0xFFFD00) == 0x0F3800)
|
||
ins = inssize2[(c->Iop >> 8) & 0xFF];
|
||
else if ((c->Iop & 0xFF00) == 0x0F00)
|
||
ins = inssize2[c->Iop & 0xFF];
|
||
else
|
||
ins = inssize[c->Iop & 0xFF];
|
||
switch (c->Iop)
|
||
{
|
||
default:
|
||
break;
|
||
|
||
case ESCAPE | ESClinnum:
|
||
srcpos_dehydrate(&c->IEV1.Vsrcpos);
|
||
goto done;
|
||
|
||
case ESCAPE | ESCctor:
|
||
case ESCAPE | ESCdtor:
|
||
el_dehydrate(&c->IEV1.Vtor);
|
||
goto done;
|
||
|
||
case ASM:
|
||
ph_dehydrate(&c->IEV1.as.bytes);
|
||
goto done;
|
||
}
|
||
|
||
if (!(ins & M) ||
|
||
((rm = c->Irm) & 0xC0) == 0xC0)
|
||
goto do2; /* if no first operand */
|
||
if (is32bitaddr(I32,c->Iflags))
|
||
{
|
||
|
||
if (
|
||
((rm & 0xC0) == 0 && !((rm & 7) == 4 && (c->Isib & 7) == 5 || (rm & 7) == 5))
|
||
)
|
||
goto do2; /* if no first operand */
|
||
}
|
||
else
|
||
{
|
||
if (
|
||
((rm & 0xC0) == 0 && !((rm & 7) == 6))
|
||
)
|
||
goto do2; /* if no first operand */
|
||
}
|
||
fl = (enum FL) c->IFL1;
|
||
switch (fl)
|
||
{
|
||
case FLudata:
|
||
case FLdata:
|
||
case FLreg:
|
||
case FLauto:
|
||
case FLbprel:
|
||
case FLpara:
|
||
#if TARGET_SEGMENTED
|
||
case FLcsdata:
|
||
case FLfardata:
|
||
#endif
|
||
case FLtlsdata:
|
||
case FLfunc:
|
||
case FLpseudo:
|
||
case FLextern:
|
||
case FLtmp:
|
||
assert(flinsymtab[fl]);
|
||
symbol_dehydrate(&c->IEVsym1);
|
||
break;
|
||
case FLdatseg:
|
||
case FLfltreg:
|
||
case FLallocatmp:
|
||
case FLcs:
|
||
case FLndp:
|
||
case FLoffset:
|
||
case FLlocalsize:
|
||
case FLconst:
|
||
case FLframehandler:
|
||
assert(!flinsymtab[fl]);
|
||
break;
|
||
case FLcode:
|
||
ph_dehydrate(&c->IEV1.Vcode);
|
||
break;
|
||
case FLblock:
|
||
case FLblockoff:
|
||
ph_dehydrate(&c->IEV1.Vblock);
|
||
break;
|
||
#if SCPP
|
||
case FLctor:
|
||
case FLdtor:
|
||
el_dehydrate(&c->IEV1.Vtor);
|
||
break;
|
||
#endif
|
||
case FLasm:
|
||
ph_dehydrate(&c->IEV1.as.bytes);
|
||
break;
|
||
default:
|
||
#ifdef DEBUG
|
||
WRFL(fl);
|
||
#endif
|
||
assert(0);
|
||
break;
|
||
}
|
||
do2:
|
||
/* Ignore TEST (F6 and F7) opcodes */
|
||
if (!(ins & T))
|
||
goto done; /* if no second operand */
|
||
|
||
fl = (enum FL) c->IFL2;
|
||
switch (fl)
|
||
{
|
||
case FLudata:
|
||
case FLdata:
|
||
case FLreg:
|
||
case FLauto:
|
||
case FLbprel:
|
||
case FLpara:
|
||
#if TARGET_SEGMENTED
|
||
case FLcsdata:
|
||
case FLfardata:
|
||
#endif
|
||
case FLtlsdata:
|
||
case FLfunc:
|
||
case FLpseudo:
|
||
case FLextern:
|
||
case FLtmp:
|
||
assert(flinsymtab[fl]);
|
||
symbol_dehydrate(&c->IEVsym2);
|
||
break;
|
||
case FLdatseg:
|
||
case FLfltreg:
|
||
case FLallocatmp:
|
||
case FLcs:
|
||
case FLndp:
|
||
case FLoffset:
|
||
case FLlocalsize:
|
||
case FLconst:
|
||
case FLframehandler:
|
||
assert(!flinsymtab[fl]);
|
||
break;
|
||
case FLcode:
|
||
ph_dehydrate(&c->IEV2.Vcode);
|
||
break;
|
||
case FLblock:
|
||
case FLblockoff:
|
||
ph_dehydrate(&c->IEV2.Vblock);
|
||
break;
|
||
default:
|
||
#ifdef DEBUG
|
||
WRFL(fl);
|
||
#endif
|
||
assert(0);
|
||
break;
|
||
}
|
||
done:
|
||
;
|
||
pc = &code_next(c);
|
||
}
|
||
}
|
||
#endif
|
||
|
||
/***************************
|
||
* Debug code to dump code stucture.
|
||
*/
|
||
|
||
#if DEBUG
|
||
|
||
void WRcodlst(code *c)
|
||
{ for (; c; c = code_next(c))
|
||
c->print();
|
||
}
|
||
|
||
void code::print()
|
||
{
|
||
unsigned char ins;
|
||
unsigned char rexb;
|
||
code *c = this;
|
||
|
||
if (c == CNIL)
|
||
{ printf("code 0\n");
|
||
return;
|
||
}
|
||
|
||
unsigned op = c->Iop;
|
||
if (c->Iflags & CFvex)
|
||
ins = vex_inssize(c);
|
||
else if ((c->Iop & 0xFFFD00) == 0x0F3800)
|
||
ins = inssize2[(op >> 8) & 0xFF];
|
||
else if ((c->Iop & 0xFF00) == 0x0F00)
|
||
ins = inssize2[op & 0xFF];
|
||
else
|
||
ins = inssize[op & 0xFF];
|
||
|
||
printf("code %p: nxt=%p ",c,code_next(c));
|
||
|
||
if (c->Iflags & CFvex)
|
||
{
|
||
if (c->Iflags & CFvex3)
|
||
{ printf("vex=0xC4");
|
||
printf(" 0x%02X", VEX3_B1(c->Ivex));
|
||
printf(" 0x%02X", VEX3_B2(c->Ivex));
|
||
rexb =
|
||
( c->Ivex.w ? REX_W : 0) |
|
||
(!c->Ivex.r ? REX_R : 0) |
|
||
(!c->Ivex.x ? REX_X : 0) |
|
||
(!c->Ivex.b ? REX_B : 0);
|
||
}
|
||
else
|
||
{ printf("vex=0xC5");
|
||
printf(" 0x%02X", VEX2_B1(c->Ivex));
|
||
rexb = !c->Ivex.r ? REX_R : 0;
|
||
}
|
||
printf(" ");
|
||
}
|
||
else
|
||
rexb = c->Irex;
|
||
|
||
if (rexb)
|
||
{ printf("rex=0x%02X ", c->Irex);
|
||
if (rexb & REX_W)
|
||
printf("W");
|
||
if (rexb & REX_R)
|
||
printf("R");
|
||
if (rexb & REX_X)
|
||
printf("X");
|
||
if (rexb & REX_B)
|
||
printf("B");
|
||
printf(" ");
|
||
}
|
||
printf("op=0x%02X",op);
|
||
|
||
if ((op & 0xFF) == ESCAPE)
|
||
{ if ((op & 0xFF00) == ESClinnum)
|
||
{ printf(" linnum = %d\n",c->IEV1.Vsrcpos.Slinnum);
|
||
return;
|
||
}
|
||
printf(" ESCAPE %d",c->Iop >> 8);
|
||
}
|
||
if (c->Iflags)
|
||
printf(" flg=%x",c->Iflags);
|
||
if (ins & M)
|
||
{ unsigned rm = c->Irm;
|
||
printf(" rm=0x%02X=%d,%d,%d",rm,(rm>>6)&3,(rm>>3)&7,rm&7);
|
||
if (!I16 && issib(rm))
|
||
{ unsigned char sib = c->Isib;
|
||
printf(" sib=%02x=%d,%d,%d",sib,(sib>>6)&3,(sib>>3)&7,sib&7);
|
||
}
|
||
if ((rm & 0xC7) == BPRM || (rm & 0xC0) == 0x80 || (rm & 0xC0) == 0x40)
|
||
{
|
||
switch (c->IFL1)
|
||
{
|
||
case FLconst:
|
||
case FLoffset:
|
||
printf(" int = %4d",c->IEV1.Vuns);
|
||
break;
|
||
case FLblock:
|
||
printf(" block = %p",c->IEV1.Vblock);
|
||
break;
|
||
case FLswitch:
|
||
case FLblockoff:
|
||
case FLlocalsize:
|
||
case FLframehandler:
|
||
case 0:
|
||
break;
|
||
case FLdatseg:
|
||
printf(" %d.%llx",c->IEVseg1,(unsigned long long)c->IEVpointer1);
|
||
break;
|
||
case FLauto:
|
||
case FLreg:
|
||
case FLdata:
|
||
case FLudata:
|
||
case FLpara:
|
||
case FLtmp:
|
||
case FLbprel:
|
||
case FLtlsdata:
|
||
printf(" sym='%s'",c->IEVsym1->Sident);
|
||
break;
|
||
case FLextern:
|
||
printf(" FLextern offset = %4d",(int)c->IEVoffset1);
|
||
break;
|
||
default:
|
||
WRFL((enum FL)c->IFL1);
|
||
break;
|
||
}
|
||
}
|
||
}
|
||
if (ins & T)
|
||
{ printf(" "); WRFL((enum FL)c->IFL2);
|
||
switch (c->IFL2)
|
||
{
|
||
case FLconst:
|
||
printf(" int = %4d",c->IEV2.Vuns);
|
||
break;
|
||
case FLblock:
|
||
printf(" block = %p",c->IEV2.Vblock);
|
||
break;
|
||
case FLswitch:
|
||
case FLblockoff:
|
||
case 0:
|
||
case FLlocalsize:
|
||
case FLframehandler:
|
||
break;
|
||
case FLdatseg:
|
||
printf(" %d.%llx",c->IEVseg2,(unsigned long long)c->IEVpointer2);
|
||
break;
|
||
case FLauto:
|
||
case FLreg:
|
||
case FLpara:
|
||
case FLtmp:
|
||
case FLbprel:
|
||
case FLfunc:
|
||
case FLdata:
|
||
case FLudata:
|
||
case FLtlsdata:
|
||
printf(" sym='%s'",c->IEVsym2->Sident);
|
||
break;
|
||
case FLcode:
|
||
printf(" code = %p",c->IEV2.Vcode);
|
||
break;
|
||
default:
|
||
WRFL((enum FL)c->IFL2);
|
||
break;
|
||
}
|
||
}
|
||
printf("\n");
|
||
}
|
||
#endif
|
||
|
||
#endif // !SPP
|