// Copyright (C) 1985-1998 by Symantec // Copyright (C) 2000-2011 by Digital Mars // All Rights Reserved // http://www.digitalmars.com // Written by Walter Bright /* * This source file is made available for personal use * only. The license is in /dmd/src/dmd/backendlicense.txt * or /dm/src/dmd/backendlicense.txt * For any other uses, please contact Digital Mars. */ #if (SCPP || MARS) && !HTOD #include #include #include #include "cc.h" #include "el.h" #include "go.h" #include "oper.h" #include "global.h" #include "type.h" static char __file__[] = __FILE__; /* for tassert.h */ #include "tassert.h" /*#define vec_copy(t,f) (dbg_printf("line %d\n",__LINE__),vec_copy((t),(f)))*/ extern mftype mfoptim; struct Iv; /********************************* * Loop data structure. */ struct loop { loop *Lnext; // Next loop in list (startloop -> start of list) vec_t Lloop; // Vector of blocks in this loop vec_t Lexit; // Vector of exit blocks of loop block *Lhead; // Pointer to header of loop block *Ltail; // Pointer to tail block *Lpreheader; // Pointer to preheader (if any) list_t Llis; // loop invariant elems moved to Lpreheader, so // redundant temporaries aren't created Iv *Livlist; // basic induction variables Iv *Lopeqlist; // list of other op= variables void print(); static loop *mycalloc(); static loop *freelist; }; struct famlist { elem **FLpelem; /* parent of elem in the family */ elem *c1,*c2; /* c1*(basic IV) + c2 */ #define FLELIM ((symbol *)-1) symbol *FLtemp; // symbol index of temporary (FLELIM if */ /* this entry has no temporary) */ tym_t FLty; /* type of this induction variable */ tym_t FLivty; /* type of the basic IV elem (which is */ /* not necessarilly the type of the IV */ /* elem!) */ famlist *FLnext; // next in list void print(); static famlist *mycalloc(); static famlist *freelist; }; struct Iv { symbol *IVbasic; // symbol of basic IV elem **IVincr; // pointer to parent of IV increment elem famlist *IVfamily; // variables in this family Iv *IVnext; // next iv in list void print(); static Iv *mycalloc(); static Iv *freelist; }; STATIC void freeloop(loop **pl); STATIC void buildloop(loop **pl, block *head, block *tail); STATIC void insert(block *b , vec_t lv); STATIC void movelis(elem *n,block *b,loop *l,int *pdomexit); STATIC int looprotate(loop *l); STATIC void markinvar(elem *n , vec_t rd); STATIC bool refs(symbol *v , elem *n , elem *nstop); STATIC void appendelem(elem *n , elem **pn); STATIC void freeivlist(Iv *biv); STATIC void unmarkall(elem *e); void filterrd(vec_t f,vec_t rd,symbol *s); STATIC void filterrdind(vec_t f,vec_t rd,elem *e); STATIC famlist * simfl(famlist *fl , tym_t tym); STATIC famlist * newfamlist(tym_t ty); STATIC void loopiv(loop *l); STATIC void findbasivs(loop *l); STATIC void findopeqs(loop *l); STATIC void findivfams(loop *l); STATIC void ivfamelems(Iv *biv , elem **pn); STATIC void elimfrivivs(loop *l); STATIC void intronvars(loop *l); STATIC bool funcprev(Iv *biv , famlist *fl); STATIC void elimbasivs(loop *l); STATIC void elimopeqs(loop *l); STATIC famlist * flcmp(famlist *f1 , famlist *f2); STATIC elem ** onlyref(symbol *x , loop *l , elem *incn , int *prefcount); STATIC void countrefs(elem **pn , bool flag); STATIC int countrefs2(elem *e); STATIC void elimspec(loop *l); STATIC void elimspecwalk(elem **pn); static bool addblk; /* if TRUE, then we added a block */ /* is elem loop invariant? */ #define isLI(n) ((n)->Nflags & NFLli) /* make elem loop invariant */ #define makeLI(n) ((n)->Nflags |= NFLli) /****************************** * UNAMBIG being defined means that: * Only variables that could only be unambiguously defined * are candidates for loop invariant removal and induction * variables. * This means only variables that have the SFLunambig flag * set for them. * Doing this will still cover 90% (I hope) of the cases, and * is a lot faster to compute. */ #define UNAMBIG 1 /**************************** */ void famlist::print() { #ifdef DEBUG dbg_printf("famlist:\n"); dbg_printf("*FLpelem:\n"); elem_print(*FLpelem); dbg_printf("c1:"); elem_print(c1); dbg_printf("c2:"); elem_print(c2); dbg_printf("FLty = "); WRTYxx(FLty); dbg_printf("\nFLivty = "); WRTYxx(FLivty); dbg_printf("\n"); #endif } /**************************** */ void Iv::print() { #ifdef DEBUG dbg_printf("IV: '%s'\n",IVbasic->Sident); dbg_printf("*IVincr:\n"); elem_print(*IVincr); #endif } /*********************** * Write loop. */ void loop::print() { #ifdef DEBUG loop *l = this; dbg_printf("loop %p, next = %p\n",l,(l) ? l->Lnext : (loop *) NULL); if (!l) return; dbg_printf("\thead: B%d, tail: B%d, prehead: B%d\n",l->Lhead->Bdfoidx, l->Ltail->Bdfoidx,(l->Lpreheader ) ? l->Lpreheader->Bdfoidx : (unsigned)-1); dbg_printf("\tLloop "); vec_println(l->Lloop); dbg_printf("\tLexit "); vec_println(l->Lexit); #endif } /*************************** * Allocate loop. */ loop *loop::freelist = NULL; loop *loop::mycalloc() { loop *l; if (freelist) { l = freelist; freelist = l->Lnext; memset(l,0,sizeof(loop)); } else l = (loop *) mem_calloc(sizeof(loop)); return l; } /************* * Free loops. */ STATIC void freeloop(loop **pl) { loop *ln; loop *l; for (l = *pl; l; l = ln) { ln = l->Lnext; vec_free(l->Lloop); vec_free(l->Lexit); list_free(&l->Llis); l->Lnext = loop::freelist; loop::freelist = l; } *pl = NULL; } /********************************** * Initialize block information. * Returns: * !=0 contains BCasm block */ int blockinit() { register unsigned i; register block *b; int hasasm = 0; assert(dfo); for (i = 0, b = startblock; b; i++, b = b->Bnext) { #ifdef DEBUG /* check integrity of Bpred and Bsucc */ register list_t blp; for (blp = b->Bpred; blp; blp = list_next(blp)) { register list_t bls; for (bls = list_block(blp)->Bsucc; bls; bls = list_next(bls)) if (list_block(bls) == b) goto L1; assert(0); L1: ; } #endif if (b->BC == BCasm) hasasm = 1; ; /* compute number of blocks */ } assert(numblks == i && maxblks); assert(i <= maxblks); for (i = 0; i < dfotop; i++) { assert(dfo[i]->Bdfoidx == i); if (!dfo[i]->Bdom) dfo[i]->Bdom = vec_calloc(maxblks); /* alloc Bdom vectors */ } return hasasm; } /**************************************** * Compute dominators (Bdom) for each block. * See Aho & Ullman Fig. 13.5. * Note that flow graph is reducible if there is only one * pass through the loop. * Input: * dfo[] * Output: * fills in the Bdom vector for each block */ void compdom() { unsigned i; unsigned cntr; vec_t t1; list_t bl; bool chgs; block *sb; assert(dfo); sb = dfo[0]; // starting block t1 = vec_calloc(vec_numbits(sb->Bdom)); // allocate a temporary vec_clear(sb->Bdom); vec_setbit(0,sb->Bdom); // starting block only doms itself for (i = 1; i < dfotop; i++) // for all except startblock vec_set(dfo[i]->Bdom); // dominate all blocks cntr = 0; // # of times thru loop do { chgs = FALSE; for (i = 1; i < dfotop; ++i) // for each block in dfo[] { // except startblock bl = dfo[i]->Bpred; if (bl) // if there are predecessors { vec_copy(t1,list_block(bl)->Bdom); while ((bl = list_next(bl)) != NULL) vec_andass(t1,list_block(bl)->Bdom); } else vec_clear(t1); // no predecessors to dominate vec_setbit(i,t1); // each block doms itself if (chgs) vec_copy(dfo[i]->Bdom,t1); else if (!vec_equal(dfo[i]->Bdom,t1)) // if any changes { vec_copy(dfo[i]->Bdom,t1); chgs = TRUE; } } cntr++; assert(cntr < 50); // should have converged by now } while (chgs); vec_free(t1); if (cntr <= 2) cmes("Flow graph is reducible\n"); else cmes("Flow graph is not reducible\n"); } /*************************** * Return !=0 if block A dominates block B. */ HINT dom(block *A,block *B) { assert(A && B && dfo && dfo[A->Bdfoidx] == A); return vec_testbit(A->Bdfoidx,B->Bdom); } /********************** * Find all the loops. */ STATIC void findloops(loop **ploops) { unsigned i; list_t bl; block *b,*s; freeloop(ploops); //printf("findloops()\n"); for (i = 0; i < dfotop; i++) dfo[i]->Bweight = 1; /* reset Bweights */ for (i = dfotop; i--;) /* for each block (note reverse */ /* dfo order, so most nested */ /* loops are found first) */ { b = dfo[i]; assert(b); for (bl = b->Bsucc; bl; bl = list_next(bl)) { s = list_block(bl); /* each successor s to b */ assert(s); if (dom(s,b)) /* if s dominates b */ buildloop(ploops,s,b); // we found a loop } } #ifdef DEBUG if (debugc) { loop *l; for (l = *ploops; l; l = l->Lnext) { l->print(); } } #endif } /******************************** */ STATIC void loop_weight(block *b,int factor) { // Be careful not to overflow if (b->Bweight < 0x10000) b->Bweight *= 10 * factor; else if (b->Bweight < 0x100000) b->Bweight *= 2 * factor; else b->Bweight += factor; } /***************************** * Construct natural loop. * Algorithm 13.1 from Aho & Ullman. * Note that head dom tail. */ STATIC void buildloop(loop **ploops,block *head,block *tail) { loop *l; unsigned i; list_t bl; //printf("buildloop()\n"); /* See if this is part of an existing loop. If so, merge the two. */ for (l = *ploops; l; l = l->Lnext) if (l->Lhead == head) /* two loops with same header */ { vec_t v; // Calculate loop contents separately so we get the Bweights // done accurately. v = vec_calloc(maxblks); vec_setbit(head->Bdfoidx,v); loop_weight(head,1); insert(tail,v); vec_orass(l->Lloop,v); // merge into existing loop vec_free(v); vec_clear(l->Lexit); // recompute exit blocks goto L1; } /* Allocate loop entry */ l = loop::mycalloc(); l->Lnext = *ploops; *ploops = l; // put l at beginning of list l->Lloop = vec_calloc(maxblks); /* allocate loop bit vector */ l->Lexit = vec_calloc(maxblks); /* bit vector for exit blocks */ l->Lhead = head; l->Ltail = tail; vec_setbit(head->Bdfoidx,l->Lloop); /* add head to the loop */ loop_weight(head,2); // *20 usage for loop header insert(tail,l->Lloop); /* insert tail in loop */ L1: /* Find all the exit blocks (those blocks with * successors outside the loop). */ foreach (i,dfotop,l->Lloop) /* for each block in this loop */ { if (dfo[i]->BC == BCret || dfo[i]->BC == BCretexp || dfo[i]->BC == BCexit) vec_setbit(i,l->Lexit); /* ret blocks are exit blocks */ else { for (bl = dfo[i]->Bsucc; bl; bl = list_next(bl)) if (!vec_testbit(list_block(bl)->Bdfoidx,l->Lloop)) { vec_setbit(i,l->Lexit); break; } } } /* Find preheader, if any, to the loop. The preheader is a block that has only the head as a successor. All other predecessors of head must be inside the loop. */ l->Lpreheader = NULL; for (bl = head->Bpred; bl; bl = list_next(bl)) { block *b = list_block(bl); if (!vec_testbit(b->Bdfoidx,l->Lloop)) /* if not in loop */ { if (l->Lpreheader) /* if already one */ { l->Lpreheader = NULL; /* can only be one */ break; } else { if (list_next(b->Bsucc)) // if more than 1 successor break; // b can't be a preheader l->Lpreheader = b; } } } } /******************************** * Support routine for buildloop(). * Add a block b and all its predecessors to loop lv. */ STATIC void insert(register block *b, register vec_t lv) { register list_t bl; assert(b && lv); if (!vec_testbit(b->Bdfoidx,lv)) /* if block is not in loop */ { vec_setbit(b->Bdfoidx,lv); /* add block to loop */ loop_weight(b,1); // *10 usage count for (bl = b->Bpred; bl; bl = list_next(bl)) insert(list_block(bl),lv); /* insert all its predecessors */ } } /************************************** * Perform loop rotations. * Loop starts as: * * prehead * | * v * +->head----> * | | * | v * | body * | | * | v * +--tail * * Two types are done: * 1) Header is moved to be past the tail. * * prehead * | * +---+ * | * | body<-+ * | | | * | v | * | tail | * | | | * | v | * +->head--+ * | * v * * 2) Header is copied past the tail (done only if MFtime is set). * * prehead * | * v * head1-----+ * | | * v | * body<--+ | * | | | * v | | * tail | | * | | | * v | | * head2--+ | * | | * +--------+ * v * * Input: * Loop information (do not depend on the preheader information) * Output: * Revised list of blocks, a new dfo and new loop information * Returns: * TRUE need to recompute loop data */ STATIC int looprotate(loop *l) { register block *tail = l->Ltail; register block *head = l->Lhead; register block *b; //printf("looprotate(%p)\n",l); // Do not rotate loop if: if (head == tail || // loop is only one block big !vec_testbit(head->Bdfoidx,l->Lexit)) // header is not an exit block goto Lret; if (//iter != 1 && vec_testbit(tail->Bdfoidx,l->Lexit)) // tail is an exit block goto Lret; // Do not rotate if already rotated for (b = tail->Bnext; b; b = b->Bnext) if (b == head) // if loop already rotated goto Lret; #if SCPP if (head->BC == BCtry) goto Lret; #endif if (head->BC == BC_try) goto Lret; #ifdef DEBUG //if (debugc) { dbg_printf("looprotate: "); l->print(); } #endif if ((mfoptim & MFtime) && head->BC != BCswitch && head->BC != BCasm) { // Duplicate the header past the tail (but doing // switches would be too expensive in terms of code // generated). register block *head2; register list_t bl, *pbl2, *pbl, *pbln; head2 = block_calloc(); // create new head block numblks++; // number of blocks in existence head2->Btry = head->Btry; head2->Bflags = head->Bflags; head->Bflags = BFLnomerg; // move flags over to head2 head2->Bflags |= BFLnomerg; head2->BC = head->BC; assert(head2->BC != BCswitch); if (head->Belem) // copy expression tree head2->Belem = el_copytree(head->Belem); head2->Bnext = tail->Bnext; tail->Bnext = head2; // pred(head1) = pred(head) outside loop // pred(head2) = pred(head) inside loop pbl2 = &(head2->Bpred); for (pbl = &(head->Bpred); *pbl; pbl = pbln) { if (vec_testbit(list_block(*pbl)->Bdfoidx, l->Lloop)) { // if this predecessor is inside the loop *pbl2 = *pbl; *pbl = list_next(*pbl); pbln = pbl; // don't skip this next one list_next(*pbl2) = NULL; bl = list_block(*pbl2)->Bsucc; pbl2 = &(list_next(*pbl2)); for (; bl; bl = list_next(bl)) if (list_block(bl) == head) { list_ptr(bl) = (void *)head2; goto L2; } assert(0); L2: ; } else pbln = &(list_next(*pbl)); // next predecessor in list } // for each pred(head) // succ(head2) = succ(head) for (bl = head->Bsucc; bl; bl = list_next(bl)) { list_append(&(head2->Bsucc),list_block(bl)); list_append(&(list_block(bl)->Bpred),head2); } changes++; return TRUE; } else if (startblock != head /* This screws up the OPctor/OPdtor sequence for: * struct CString * { CString(); * ~CString(); * int GetLength(); * }; * * void f(void) * { for(;;) * { CString s ; * if(s.GetLength()!=0) * break ; * } * } */ && !(config.flags3 & CFG3eh) ) { // optimize for space // Simply position the header past the tail for (b = startblock; b; b = b->Bnext) if (b->Bnext == head) goto L1; // found parent b of head assert(0); L1: b->Bnext = head->Bnext; head->Bnext = tail->Bnext; tail->Bnext = head; cmes2( "Rotated loop %p\n", l); changes++; } Lret: return FALSE; } static int gref; // parameter for markinvar() static block *gblock; // parameter for markinvar() static vec_t lv; // parameter for markinvar() static vec_t gin; // parameter for markinvar() static bool doflow; // TRUE if flow analysis has to be redone /********************************* * Loop invariant and induction variable elimination. * Input: * iter which optimization iteration we are on */ void loopopt() { list_t bl; loop *l; loop *ln; vec_t rd; loop *startloop; cmes("loopopt()\n"); startloop = NULL; restart: file_progress(); if (blockinit()) // init block data { findloops(&startloop); // Compute Bweights freeloop(&startloop); // free existing loops return; // can't handle ASM blocks } compdom(); // compute dominators findloops(&startloop); // find the loops for (l = startloop; l; l = ln) { ln = l->Lnext; if (looprotate(l)) // rotate the loop { compdfo(); blockinit(); compdom(); findloops(&startloop); // may trash l->Lnext if (ln) { ln = startloop; // start over file_progress(); } } } // Make sure there is a preheader for each loop. addblk = FALSE; /* assume no blocks added */ for (l = startloop; l; l = l->Lnext)/* for each loop */ { #ifdef DEBUG //if (debugc) l->print(); #endif if (!l->Lpreheader) /* if no preheader */ { register block *h, *p; cmes("Generating preheader for loop\n"); addblk = TRUE; /* add one */ p = block_calloc(); // the preheader numblks++; assert (numblks <= maxblks); h = l->Lhead; /* loop header */ /* Find parent of h */ if (h == startblock) startblock = p; else { register block *ph; for (ph = startblock; 1; ph = ph->Bnext) { assert(ph); /* should have found it */ if (ph->Bnext == h) break; } /* Link p into block list between ph and h */ ph->Bnext = p; } p->Bnext = h; l->Lpreheader = p; p->BC = BCgoto; assert(p->Bsucc == NULL); list_append(&(p->Bsucc),h); /* only successor is h */ p->Btry = h->Btry; cmes3("Adding preheader %p to loop %p\n",p,l); // Move preds of h that aren't in the loop to preds of p for (bl = h->Bpred; bl;) { register block *b = list_block(bl); if (!vec_testbit (b->Bdfoidx, l->Lloop)) { register list_t bls; list_append(&(p->Bpred), b); list_subtract(&(h->Bpred), b); bl = h->Bpred; /* dunno what subtract did */ /* Fix up successors of predecessors */ for (bls = b->Bsucc; bls; bls = list_next(bls)) if (list_block(bls) == h) list_ptr(bls) = (void *)p; } else bl = list_next(bl); } list_append(&(h->Bpred),p); /* p is a predecessor to h */ } } /* for */ if (addblk) /* if any blocks were added */ { compdfo(); /* compute depth-first order */ blockinit(); compdom(); findloops(&startloop); // recompute block info addblk = FALSE; } /* Do the loop optimizations. Note that accessing the loops */ /* starting from startloop will access them in least nested */ /* one first, thus moving LIs out as far as possible. */ doflow = TRUE; /* do flow analysis */ cmes("Starting loop invariants\n"); for (l = startloop; l; l = l->Lnext) { register unsigned i,j; #ifdef DEBUG //if (debugc) l->print(); #endif file_progress(); assert(l->Lpreheader); if (doflow) { flowrd(); /* compute reaching definitions */ flowlv(); /* compute live variables */ flowae(); // compute available expressions doflow = FALSE; /* no need to redo it */ if (deftop == 0) /* if no definition elems */ break; /* no need to optimize */ } lv = l->Lloop; cmes2("...Loop %p start...\n",l); /* Unmark all elems in this loop */ foreach (i,dfotop,lv) if (dfo[i]->Belem) unmarkall(dfo[i]->Belem); /* unmark all elems */ /* Find & mark all LIs */ gin = vec_clone(l->Lpreheader->Bout); rd = vec_calloc(deftop); /* allocate our running RD vector */ foreach (i,dfotop,lv) /* for each block in loop */ { block *b = dfo[i]; cmes2("B%d\n",i); if (b->Belem) { vec_copy(rd, b->Binrd); // IN reaching defs #if 0 dbg_printf("i = %d\n",i); { int j; for (j = 0; j < deftop; j++) elem_print(defnod[j].DNelem); } dbg_printf("rd : "); vec_println(rd); #endif gblock = b; gref = 0; if (b != l->Lhead) gref = 1; markinvar(b->Belem, rd); #if 0 dbg_printf("i = %d\n",i); { int j; for (j = 0; j < deftop; j++) elem_print(defnod[j].DNelem); } dbg_printf("rd : "); vec_println(rd); dbg_printf("Boutrd: "); vec_println(b->Boutrd); #endif assert(vec_equal(rd, b->Boutrd)); } else assert(vec_equal(b->Binrd, b->Boutrd)); } vec_free(rd); vec_free(gin); /* Move loop invariants */ foreach (i,dfotop,lv) { int domexit; // TRUE if this block dominates all // exit blocks of the loop foreach (j,dfotop,l->Lexit) /* for each exit block */ { if (!vec_testbit (i, dfo[j]->Bdom)) { domexit = 0; goto L1; // break if !(i dom j) } } // if i dom (all exit blocks) domexit = 1; L1: ; if (dfo[i]->Belem) { // If there is any hope of making an improvement if (domexit || l->Llis) { if (dfo[i] != l->Lhead) ; //domexit |= 2; movelis(dfo[i]->Belem, dfo[i], l, &domexit); } } } //list_free(&l->Llis,FPNULL); cmes2("...Loop %p done...\n",l); if (mfoptim & MFliv) { loopiv(l); /* induction variables */ if (addblk) /* if we added a block */ { compdfo(); goto restart; /* play it safe and start over */ } } } /* for */ freeloop(&startloop); } /***************************** * If elem is loop invariant, mark it. * Input: * lv = vector of all the blocks in this loop. * rd = vector of loop invariants for this elem. This must be * continually updated. * Note that we do not iterate until no more LIs are found. The only * thing this would buy us is stuff that depends on LI assignments. */ STATIC void markinvar(elem *n,vec_t rd) { vec_t tmp; unsigned i; symbol *v; elem *n1; assert(n && rd); assert(vec_numbits(rd) == deftop); switch (n->Eoper) { case OPaddass: case OPminass: case OPmulass: case OPandass: case OPorass: case OPxorass: case OPdivass: case OPmodass: case OPshlass: case OPshrass: case OPashrass: case OPpostinc: case OPpostdec: case OPcall: markinvar(n->E2,rd); case OPnegass: n1 = n->E1; if (n1->Eoper == OPind) markinvar(n1->E1,rd); else if (OTbinary(n1->Eoper)) { markinvar(n1->E1,rd); markinvar(n1->E2,rd); } L2: if (n->Eoper == OPcall || gblock->Btry || !(n1->Eoper == OPvar && symbol_isintab(n1->EV.sp.Vsym))) { gref = 1; } updaterd(n,rd,NULL); break; case OPcallns: markinvar(n->E2,rd); markinvar(n->E1,rd); break; case OPstrcpy: case OPstrcat: case OPmemcpy: case OPmemset: markinvar(n->E2,rd); markinvar(n->E1,rd); updaterd(n,rd,NULL); break; case OPbtc: case OPbtr: case OPbts: markinvar(n->E1,rd); markinvar(n->E2,rd); updaterd(n,rd,NULL); break; case OPucall: markinvar(n->E1,rd); /* FALL-THROUGH */ case OPasm: gref = 1; updaterd(n,rd,NULL); break; case OPucallns: case OPstrpar: case OPstrctor: case OPvector: case OPvoid: case OPstrlen: #if TX86 case OPinp: #endif markinvar(n->E1,rd); break; case OPcond: case OPparam: case OPstrcmp: case OPmemcmp: case OPbt: // OPbt is like OPind, assume not LI #if TX86 case OPoutp: #endif markinvar(n->E1,rd); markinvar(n->E2,rd); break; case OPandand: case OPoror: markinvar(n->E1,rd); tmp = vec_clone(rd); markinvar(n->E2,tmp); vec_orass(rd,tmp); /* rd |= tmp */ vec_free(tmp); break; case OPcolon: case OPcolon2: tmp = vec_clone(rd); markinvar(n->E1,rd); markinvar(n->E2,tmp); vec_orass(rd,tmp); /* rd |= tmp */ vec_free(tmp); break; case OPaddr: // mark addresses of OPvars as LI markinvar(n->E1,rd); if (n->E1->Eoper == OPvar || isLI(n->E1)) makeLI(n); break; case OPmsw: case OPneg: case OPbool: case OPnot: case OPcom: case OPs16_32: case OPd_s32: case OPs32_d: case OPd_s16: case OPs16_d: case OPd_f: case OPf_d: case OP32_16: case OPu8_16: case OPld_d: case OPd_ld: case OPld_u64: case OPc_r: case OPc_i: case OParraylength: case OPnullcheck: case OPu16_32: case OPu16_d: case OPd_u16: case OPs8_16: case OP16_8: case OPd_u32: case OPu32_d: #if LONGLONG case OPs32_64: case OPu32_64: case OP64_32: case OPd_s64: case OPd_u64: case OPs64_d: case OPu64_d: case OP128_64: case OPs64_128: case OPu64_128: #endif case OPabs: case OPsqrt: case OPrndtol: case OPsin: case OPcos: case OPrint: case OPsetjmp: case OPbsf: case OPbsr: case OPbswap: #if TARGET_SEGMENTED case OPvp_fp: /* BUG for MacHandles */ case OPnp_f16p: case OPf16p_np: case OPoffset: case OPnp_fp: case OPcvp_fp: #endif markinvar(n->E1,rd); if (isLI(n->E1)) /* if child is LI */ makeLI(n); break; case OPeq: case OPstreq: markinvar(n->E2,rd); n1 = n->E1; markinvar(n1,rd); /* Determine if assignment is LI. Conditions are: */ /* 1) Rvalue is LI */ /* 2) Lvalue is a variable (simplifies things a lot) */ /* 3) Lvalue can only be affected by unambiguous defs */ /* 4) No rd's of lvalue that are within the loop (other */ /* than the current def) */ if (isLI(n->E2) && n1->Eoper == OPvar) /* 1 & 2 */ { v = n1->EV.sp.Vsym; #if UNAMBIG if (v->Sflags & SFLunambig) #endif { tmp = vec_calloc(deftop); //filterrd(tmp,rd,v); listrds(rd,n1,tmp); foreach (i,deftop,tmp) if (defnod[i].DNelem != n && vec_testbit(defnod[i].DNblock->Bdfoidx,lv)) goto L3; makeLI(n); // then the def is LI L3: vec_free(tmp); } } goto L2; case OPadd: case OPmin: case OPmul: case OPand: case OPor: case OPxor: case OPdiv: case OPmod: case OPshl: case OPshr: case OPeqeq: case OPne: case OPlt: case OPle: case OPgt: case OPge: case OPashr: case OPror: case OProl: case OPunord: case OPlg: case OPleg: case OPule: case OPul: case OPuge: case OPug: case OPue: case OPngt: case OPnge: case OPnlt: case OPnle: case OPord: case OPnlg: case OPnleg: case OPnule: case OPnul: case OPnuge: case OPnug: case OPnue: case OPinstanceof: case OPfinalinstanceof: case OPcheckcast: case OPcomma: case OPpair: case OPrpair: case OPscale: case OPremquo: case OPyl2x: case OPyl2xp1: markinvar(n->E1,rd); markinvar(n->E2,rd); if (isLI(n->E2) && isLI(n->E1)) makeLI(n); break; case OPind: /* must assume this is not LI */ markinvar(n->E1,rd); if (isLI(n->E1)) { #if 0 // This doesn't work with C++, because exp2_ptrtocomtype() will // transfer const to where it doesn't belong. if (n->Ety & mTYconst) { makeLI(n); } #endif #if 0 // This was disabled because it was marking as LI // the loop dimension for the [i] array if // a[j][i] was in a loop. This meant the a[j] array bounds // check for the a[j].length was skipped. else if (n->Ejty) { tmp = vec_calloc(deftop); filterrdind(tmp,rd,n); // only the RDs pertaining to n // if (no RDs within loop) // then it's loop invariant foreach (i,deftop,tmp) // for each RD if (vec_testbit(defnod[i].DNblock->Bdfoidx,lv)) goto L10; // found a RD in the loop // If gref has occurred, this can still be LI // if n is an AE that was also an AE at the // point of gref. // We can catch a subset of these cases by looking // at the AEs at the start of the loop. if (gref) { int j; //printf("\tn is: "); WReqn(n); printf("\n"); foreach (j,exptop,gin) { elem *e = expnod[j]; //printf("\t\texpnod[%d] = %p\n",j,e); //printf("\t\tAE is: "); WReqn(e); printf("\n"); if (el_match2(n,e)) { makeLI(n); //printf("Ind LI: "); WReqn(n); printf("\n"); break; } } } else makeLI(n); L10: vec_free(tmp); break; } #endif } break; case OPvar: v = n->EV.sp.Vsym; #if UNAMBIG if (v->Sflags & SFLunambig) // must be unambiguous to be LI #endif { tmp = vec_calloc(deftop); //filterrd(tmp,rd,v); // only the RDs pertaining to v listrds(rd,n,tmp); // only the RDs pertaining to v // if (no RDs within loop) // then it's loop invariant foreach (i,deftop,tmp) // for each RD if (vec_testbit(defnod[i].DNblock->Bdfoidx,lv)) goto L1; // found a RD in the loop makeLI(n); L1: vec_free(tmp); } break; case OPstring: case OPrelconst: case OPconst: /* constants are always LI */ case OPhstring: case OPframeptr: makeLI(n); break; case OPinfo: markinvar(n->E2,rd); break; case OPstrthis: case OPmark: case OPctor: case OPdtor: case OPdctor: case OPddtor: case OPhalt: case OPgot: // shouldn't OPgot be makeLI ? break; default: #ifdef DEBUG WROP(n->Eoper); #endif //printf("n->Eoper = %d, OPconst = %d\n", n->Eoper, OPconst); assert(0); } #ifdef DEBUG if (debugc && isLI(n)) { dbg_printf(" LI elem: "); WReqn(n); dbg_printf("\n"); } #endif } /******************** * Update rd vector. * Input: * n assignment elem or function call elem or OPasm elem * rd reaching def vector to update * (clear bits for defs we kill, set bit for n (which is the * def we are genning)) * vecdim deftop */ void updaterd(elem *n,vec_t GEN,vec_t KILL) { unsigned op = n->Eoper; unsigned i; unsigned ni; elem *t; assert(OTdef(op)); assert(GEN); elem_debug(n); // If unambiguous def if (OTassign(op) && (t = n->E1)->Eoper == OPvar) { symbol *d = t->EV.sp.Vsym; targ_size_t toff = t->EV.sp.Voffset; targ_size_t tsize; targ_size_t ttop; tsize = (op == OPstreq) ? type_size(n->ET) : tysize(t->Ety); ttop = toff + tsize; //printf("updaterd: "); WReqn(n); printf(" toff=%d, tsize=%d\n", toff, tsize); ni = (unsigned)-1; /* for all unambig defs in defnod[] */ for (i = 0; i < deftop; i++) { elem *tn = defnod[i].DNelem; elem *tn1; targ_size_t tn1size; if (tn == n) ni = i; if (!OTassign(tn->Eoper)) continue; // If def of same variable, kill that def tn1 = tn->E1; if (tn1->Eoper != OPvar || d != tn1->EV.sp.Vsym) continue; // If t completely overlaps tn1 tn1size = (tn->Eoper == OPstreq) ? type_size(tn->ET) : tysize(tn1->Ety); if (toff <= tn1->EV.sp.Voffset && tn1->EV.sp.Voffset + tn1size <= ttop) { if (KILL) vec_setbit(i,KILL); vec_clearbit(i,GEN); } } assert(ni != -1); } #if 0 else if (OTassign(op) && t->Eoper != OPvar && t->Ejty) { ni = -1; // for all unambig defs in defnod[] for (i = 0; i < deftop; i++) { elem *tn = defnod[i].DNelem; elem *tn1; if (tn == n) ni = i; if (!OTassign(tn->Eoper)) continue; // If def of same variable, kill that def tn1 = tn->E1; if (tn1->Eoper != OPind || t->Ejty != tn1->Ejty) continue; if (KILL) vec_setbit(i,KILL); vec_clearbit(i,GEN); } assert(ni != -1); } #endif else { /* Set bit in GEN for this def */ for (i = 0; 1; i++) { assert(i < deftop); // should find n in defnod[] if (defnod[i].DNelem == n) { ni = i; break; } } } vec_setbit(ni,GEN); // set bit in GEN for this def } /*************************** * Mark all elems as not being loop invariant. */ STATIC void unmarkall(elem *e) { for (; 1; e = e->E1) { assert(e); e->Nflags &= ~NFLli; /* unmark this elem */ if (OTunary(e->Eoper)) continue; else if (OTbinary(e->Eoper)) { unmarkall(e->E2); continue; } return; } } /******************************* * Take a RD vector and filter out all RDs but * ones that are defs of symbol s. * Output: * f */ #if 0 // replaced by listrds() void filterrd(vec_t f,vec_t rd,symbol *s) { register unsigned i; register elem *n; vec_copy(f,rd); #if UNAMBIG foreach (i,deftop,f) /* for each def in f */ { n = defnod[i].DNelem; /* the definition elem */ elem_debug(n); if (n->Eoper == OPasm) // OPasm defs always reach (sigh) continue; /* Clear bit if it's not an unambiguous def of si */ if (OTassign(n->Eoper)) /* if assignment elem */ { if (!(n->E1->Eoper == OPvar && n->E1->EV.sp.Vsym == s )) vec_clearbit(i,f); } else /* else ambiguous def */ vec_clearbit(i,f); // and couldn't def this var } #else assert(0); /* not implemented */ #endif } #endif /******************************* * Take a RD vector and filter out all RDs but * ones that are possible defs of OPind elem e. * Output: * f */ #if 0 STATIC void filterrdind(vec_t f,vec_t rd,elem *e) { unsigned i; elem *n; tym_t jty = e->Ejty; vec_copy(f,rd); #if UNAMBIG foreach (i,deftop,f) // for each def in f { n = defnod[i].DNelem; // the definition elem elem_debug(n); if (n->Eoper == OPasm) // OPasm defs always reach (sigh) continue; // Clear bit if it's not an unambiguous def of si if (OTassign(n->Eoper)) // if assignment elem { elem *n1 = n->E1; if (n1->Eoper == OPind) { if (jty && n1->Ejty && jty != n1->Ejty) vec_clearbit(i,f); } else if (n1->Eoper == OPvar) { if (jty || n1->EV.sp.Vsym->Sflags & SFLunambig) vec_clearbit(i,f); } } else if (OTcall(n->Eoper) && el_noreturn(n)) vec_clearbit(i,f); } #else assert(0); // not implemented #endif } #endif /******************************** * Return TRUE if there are any refs of v in n before nstop is encountered. * Input: * refstop = -1 */ static int refstop; /* flag to stop refs() */ STATIC bool refs(symbol *v,elem *n,elem *nstop) { register bool f; register unsigned op; symbol_debug(v); elem_debug(n); assert(symbol_isintab(v)); assert(v->Ssymnum < globsym.top); assert(n); op = n->Eoper; #if UNAMBIG if (refstop == 0) return FALSE; f = FALSE; if (OTunary(op)) f = refs(v,n->E1,nstop); else if (OTbinary(op)) { if (ERTOL(n)) /* watch order of evaluation */ { /* Note that (OPvar = e) is not a ref of OPvar, whereas */ /* ((OPbit OPvar) = e) is a ref of OPvar, and (OPvar op= e) is */ /* a ref of OPvar, etc. */ f = refs(v,n->E2,nstop); if (!f) { if (op == OPeq) { if (n->E1->Eoper != OPvar) f = refs(v,n->E1->E1,nstop); } else f = refs(v,n->E1,nstop); } } else f = refs(v,n->E1,nstop) || refs(v,n->E2,nstop); } if (n == nstop) refstop = 0; else if (n->Eoper == OPvar) /* if variable reference */ return v == n->EV.sp.Vsym; else if (op == OPasm) /* everything is referenced */ return TRUE; return f; #else assert(0); #endif } /************************* * Move LIs to preheader. * Conditions to be satisfied for code motion are: * 1) All exit blocks are dominated (TRUE before this is called). * -- OR -- * 2) Variable assigned by a statement is not live on entering * any successor outside the loop of any exit block of the * loop. * * 3) Cannot move assignment to variable if there are any other * assignments to that variable within the loop (TRUE or * assignment would not have been marked LI). * 4) Cannot move assignments to a variable if there is a use * of that variable in this loop that is reached by any other * def of it. * 5) Cannot move expressions that have side effects. * 6) Do not move assignments to variables that could be affected * by ambiguous defs. * 7) It is not worth it to move expressions of the form: * (var == const) * Input: * n the elem we're considering moving * b the block this elem is in * l the loop we're in * domexit flags * bit 0: 1 this branch is always executed * 0 this branch is only sometimes executed * bit 1: 1 do not move LIs that could throw exceptions * or cannot be moved past possibly thrown exceptions * Returns: * revised domexit */ STATIC void movelis(elem *n,block *b,loop *l,int *pdomexit) { register unsigned i,j,op; register vec_t tmp; register elem *ne,*t,*n2; register list_t nl; symbol *v; tym_t ty; Lnextlis: //if (isLI(n)) { printf("movelis("); WReqn(n); printf(")\n"); } assert(l && n); elem_debug(n); op = n->Eoper; switch (op) { case OPvar: case OPconst: case OPrelconst: goto Lret; case OPandand: case OPoror: case OPcond: { int domexit; movelis(n->E1,b,l,pdomexit); // always executed domexit = *pdomexit & ~1; // sometimes executed movelis(n->E2,b,l,&domexit); *pdomexit |= domexit & 2; goto Lret; } case OPeq: // Do loop invariant assignments if (isLI(n) && n->E1->Eoper == OPvar) { v = n->E1->EV.sp.Vsym; // variable index number #ifdef UNAMBIG if (!(v->Sflags & SFLunambig)) goto L3; // case 6 #endif // If case 4 is not satisfied, return // Function parameters have an implied definition prior to the // first block of the function. Unfortunately, the rd vector // does not take this into account. Therefore, we assume the // worst and reject assignments to function parameters. if (v->Sclass == SCparameter || v->Sclass == SCregpar || v->Sclass == SCfastpar) goto L3; if (el_sideeffect(n->E2)) goto L3; // case 5 // If case 1 or case 2 is not satisfied, return if (!(*pdomexit & 1)) // if not case 1 { foreach (i,dfotop,l->Lexit) // for each exit block { register list_t bl; for (bl = dfo[i]->Bsucc; bl; bl = list_next(bl)) { block *s; // successor to exit block s = list_block(bl); if (!vec_testbit(s->Bdfoidx,l->Lloop) && (!symbol_isintab(v) || vec_testbit(v->Ssymnum,s->Binlv))) // if v is live on exit goto L3; } } } tmp = vec_calloc(deftop); foreach (i,dfotop,l->Lloop) // for each block in loop { if (dfo[i] == b) // except this one continue; // // // return; //filterrd(tmp,dfo[i]->Binrd,v); listrds(dfo[i]->Binrd,n->E1,tmp); foreach (j,deftop,tmp) // for each RD of v in Binrd { if (defnod[j].DNelem == n) continue; refstop = -1; if (dfo[i]->Belem && refs(v,dfo[i]->Belem,(elem *)NULL)) //if refs of v { vec_free(tmp); goto L3; } break; } } // foreach // Binrd other than n> // // //filterrd(tmp,b->Binrd,v); listrds(b->Binrd,n->E1,tmp); foreach (j,deftop,tmp) // for each RD of v in Binrd { if (defnod[j].DNelem == n) continue; refstop = -1; if (b->Belem && refs(v,b->Belem,n)) { vec_free(tmp); goto L3; // can't move it } break; // avoid redundant looping } vec_free(tmp); // We have an LI assignment, n. // Check to see if the rvalue is already in the preheader. for (nl = l->Llis; nl; nl = list_next(nl)) { if (el_match(n->E2,list_elem(nl)->E2)) { el_free(n->E2); n->E2 = el_calloc(); el_copy(n->E2,list_elem(nl)->E1); cmes("LI assignment rvalue was replaced\n"); doflow = TRUE; changes++; break; } } // move assignment elem to preheader cmes("Moved LI assignment "); #ifdef DEBUG if (debugc) { WReqn(n); dbg_printf(";\n"); } #endif changes++; doflow = TRUE; // redo flow analysis ne = el_calloc(); el_copy(ne,n); // create assignment elem assert(l->Lpreheader); // make sure there is one appendelem(ne,&(l->Lpreheader->Belem)); // append ne to preheader list_prepend(&l->Llis,ne); el_copy(n,ne->E1); // replace n with just a reference to v goto Lret; } // if break; case OPcall: case OPucall: *pdomexit |= 2; break; } L3: // Do leaves of non-LI expressions, leaves of = elems that didn't // meet the invariant assignment removal criteria, and don't do leaves if (OTleaf(op)) goto Lret; if (!isLI(n) || op == OPeq || op == OPcomma || OTrel(op) || op == OPnot || // These are usually addressing modes, so moving them is a net loss (I32 && op == OPshl && n->E2->Eoper == OPconst && el_tolong(n->E2) <= 3ull) ) { if (OTassign(op)) { elem *n1 = n->E1; elem *n11; if (OTbinary(op)) movelis(n->E2,b,l,pdomexit); // Do lvalue only if it is an expression if (n1->Eoper == OPvar) goto Lret; n11 = n1->E1; if (OTbinary(n1->Eoper)) { movelis(n11,b,l,pdomexit); n = n1->E2; } // If *(x + c), just make x the LI, not the (x + c). // The +c comes free with the addressing mode. else if (n1->Eoper == OPind && isLI(n11) && n11->Eoper == OPadd && n11->E2->Eoper == OPconst ) { n = n11->E1; } else n = n11; movelis(n,b,l,pdomexit); if (b->Btry || !(n1->Eoper == OPvar && symbol_isintab(n1->EV.sp.Vsym))) { //printf("assign to global => domexit |= 2\n"); *pdomexit |= 2; } } else if (OTunary(op)) { elem *e1 = n->E1; // If *(x + c), just make x the LI, not the (x + c). // The +c comes free with the addressing mode. if (op == OPind && isLI(e1) && e1->Eoper == OPadd && e1->E2->Eoper == OPconst ) { n = e1->E1; } else n = e1; } else if (OTbinary(op)) { movelis(n->E1,b,l,pdomexit); n = n->E2; } goto Lnextlis; } if (el_sideeffect(n)) goto Lret; #if 0 printf("*pdomexit = %d\n",*pdomexit); if (*pdomexit & 2) { // If any indirections, can't LI it // If this operand has already been indirected, we can let // it pass. Symbol *s; printf("looking at:\n"); elem_print(n); s = el_basesym(n->E1); if (s) { for (nl = l->Llis; nl; nl = list_next(nl)) { elem *el; tym_t ty2; el = list_elem(nl); el = el->E2; elem_print(el); if (el->Eoper == OPind && el_basesym(el->E1) == s) { printf(" pass!\n"); goto Lpass; } } } printf(" skip!\n"); goto Lret; Lpass: ; } #endif // Move the LI expression to the preheader cmes("Moved LI expression "); #ifdef DEBUG if (debugc) { WReqn(n); dbg_printf(";\n"); } #endif // See if it's already been moved ty = n->Ety; for (nl = l->Llis; nl; nl = list_next(nl)) { elem *el; tym_t ty2; el = list_elem(nl); //printf("existing LI: "); WReqn(el); printf("\n"); ty2 = el->E2->Ety; if (tysize(ty) == tysize(ty2)) { el->E2->Ety = ty; if (el_match(n,el->E2)) { el->E2->Ety = ty2; if (!OTleaf(n->Eoper)) { el_free(n->E1); if (OTbinary(n->Eoper)) el_free(n->E2); } el_copy(n,el->E1); // make copy of temp n->Ety = ty; #ifdef DEBUG if (debugc) { dbg_printf("Already moved: LI expression replaced with "); WReqn(n); dbg_printf("\nPreheader %d expression %p ", l->Lpreheader->Bdfoidx,l->Lpreheader->Belem); WReqn(l->Lpreheader->Belem); dbg_printf("\n"); } #endif changes++; doflow = TRUE; // redo flow analysis goto Lret; } el->E2->Ety = ty2; } } if (!(*pdomexit & 1)) // if only sometimes executed { cmes(" doesn't dominate exit\n"); goto Lret; // don't move LI } if (tyaggregate(n->Ety)) goto Lret; changes++; doflow = TRUE; // redo flow analysis t = el_alloctmp(n->Ety); /* allocate temporary t */ #if DEBUG cmes2("movelis() introduced new variable '%s' of type ",t->EV.sp.Vsym->Sident); if (debugc) WRTYxx(t->Ety); cmes("\n"); #endif n2 = el_calloc(); el_copy(n2,n); /* create copy n2 of n */ ne = el_bin(OPeq,t->Ety,t,n2); /* create elem t=n2 */ assert(l->Lpreheader); /* make sure there is one */ appendelem(ne,&(l->Lpreheader->Belem)); /* append ne to preheader */ #ifdef DEBUG if (debugc) { dbg_printf("Preheader %d expression %p\n\t", l->Lpreheader->Bdfoidx,l->Lpreheader->Belem); WReqn(l->Lpreheader->Belem); dbg_printf("\nLI expression replaced with "); WReqn(t); dbg_printf("\n"); } #endif el_copy(n,t); /* replace this elem with t */ // Remember LI expression in elem list list_prepend(&l->Llis,ne); Lret: ; } /*************************** * Append elem to existing elem using an OPcomma elem. * Input: * n elem to append * *pn elem to append to */ STATIC void appendelem(register elem *n,elem **pn) { assert(n && pn); if (*pn) /* if this elem exists */ { while ((*pn)->Eoper == OPcomma) /* while we see OPcomma elems */ { (*pn)->Ety = n->Ety; pn = &((*pn)->E2); /* cruise down right side */ } *pn = el_bin(OPcomma,n->Ety,*pn,n); } else *pn = n; /* else create a elem */ } /************** LOOP INDUCTION VARIABLES **********************/ /*************************** * Allocate famlist. */ famlist *famlist::freelist = NULL; famlist *famlist::mycalloc() { famlist *fl; if (freelist) { fl = freelist; freelist = fl->FLnext; memset(fl,0,sizeof(famlist)); } else fl = (famlist *) mem_calloc(sizeof(famlist)); return fl; } /*************************** * Allocate Iv. */ Iv *Iv::freelist = NULL; Iv *Iv::mycalloc() { Iv *iv; if (freelist) { iv = freelist; freelist = iv->IVnext; memset(iv,0,sizeof(Iv)); } else iv = (Iv *) mem_calloc(sizeof(Iv)); return iv; } /********************* * Free iv list. */ STATIC void freeivlist(register Iv *biv) { register Iv *bivnext; while (biv) { register famlist *fl,*fln; for (fl = biv->IVfamily; fl; fl = fln) { el_free(fl->c1); el_free(fl->c2); fln = fl->FLnext; fl->FLnext = famlist::freelist; famlist::freelist = fl; } bivnext = biv->IVnext; biv->IVnext = Iv::freelist; Iv::freelist = biv; biv = bivnext; } } /**************************** * Create a new famlist entry. */ STATIC famlist * newfamlist(tym_t ty) { register famlist *fl; union eve c; memset(&c,0,sizeof(c)); fl = famlist::mycalloc(); fl->FLty = ty; switch (tybasic(ty)) { case TYfloat: c.Vfloat = 1; break; case TYdouble: case TYdouble_alias: c.Vdouble = 1; break; case TYldouble: c.Vldouble = 1; break; #if _MSDOS || __OS2__ || _WIN32 // if no byte ordering problems #if JHANDLE case TYjhandle: #endif #if TARGET_SEGMENTED case TYsptr: case TYcptr: case TYnptr: case TYfptr: case TYvptr: #endif /* Convert pointers to integrals to avoid things like */ /* multiplying pointers */ ty = TYptrdiff; /* FALL-THROUGH */ default: c.Vlong = 1; break; #if TARGET_SEGMENTED case TYhptr: ty = TYlong; c.Vlong = 1; break; #endif #else case TYbool: case TYchar: case TYschar: case TYuchar: c.Vchar = 1; break; case TYshort: case TYushort: case TYchar16: case TYwchar_t: // BUG: what about 4 byte wchar_t's? c.Vshort = 1; break; #if JHANDLE case TYjhandle: #endif #if TARGET_SEGMENTED case TYsptr: case TYcptr: case TYfptr: case TYvptr: #endif case TYnptr: case TYnullptr: ty = TYint; if (I64) ty = TYllong; /* FALL-THROUGH */ case TYint: case TYuint: c.Vint = 1; break; #if TARGET_SEGMENTED case TYhptr: ty = TYlong; #endif case TYlong: case TYulong: case TYdchar: default: c.Vlong = 1; break; #if 0 default: printf("ty = x%x\n", tybasic(ty)); assert(0); #endif #endif } fl->c1 = el_const(ty,&c); /* c1 = 1 */ c.Vldouble = 0; if (typtr(ty)) { ty = TYint; #if TARGET_SEGMENTED if (tybasic(ty) == TYhptr) ty = TYlong; #endif if (I64) ty = TYllong; } fl->c2 = el_const(ty,&c); /* c2 = 0 */ return fl; } /*************************** * Remove induction variables from loop l. * Loop invariant removal should have been done just previously. */ STATIC void loopiv(register loop *l) { cmes2("loopiv(%p)\n",l); assert(l->Livlist == NULL && l->Lopeqlist == NULL); elimspec(l); if (doflow) { flowrd(); /* compute reaching defs */ flowlv(); /* compute live variables */ flowae(); // compute available expressions doflow = FALSE; } findbasivs(l); /* find basic induction variables */ findopeqs(l); // find op= variables findivfams(l); /* find IV families */ elimfrivivs(l); /* eliminate less useful family IVs */ intronvars(l); /* introduce new variables */ elimbasivs(l); /* eliminate basic IVs */ if (!addblk) // adding a block changes the Binlv elimopeqs(l); // eliminate op= variables freeivlist(l->Livlist); // free up IV list l->Livlist = NULL; freeivlist(l->Lopeqlist); // free up list l->Lopeqlist = NULL; /* Do copy propagation and dead assignment elimination */ /* upon return to optfunc() */ } /************************************* * Find basic IVs of loop l. * A basic IV x of loop l is a variable x which has * exactly one assignment within l of the form: * x += c or x -= c, where c is either a constant * or a LI. * Input: * defnod[] loaded with all the definition elems of the loop */ STATIC void findbasivs(loop *l) { vec_t poss,notposs; elem *n; unsigned i,j; bool ambdone; assert(l); ambdone = FALSE; poss = vec_calloc(globsym.top); notposs = vec_calloc(globsym.top); /* vector of all variables */ /* (initially all unmarked) */ /* for each def in defnod[] that is within loop l */ for (i = 0; i < deftop; i++) { if (!vec_testbit(defnod[i].DNblock->Bdfoidx,l->Lloop)) continue; /* def is not in the loop */ n = defnod[i].DNelem; elem_debug(n); if (OTassign(n->Eoper) && n->E1->Eoper == OPvar) { symbol *s; /* if unambiguous def */ s = n->E1->EV.sp.Vsym; if (symbol_isintab(s)) { SYMIDX v; v = n->E1->EV.sp.Vsym->Ssymnum; if ((n->Eoper == OPaddass || n->Eoper == OPminass || n->Eoper == OPpostinc || n->Eoper == OPpostdec) && (cnst(n->E2) || /* if x += c or x -= c */ n->E2->Eoper == OPvar && isLI(n->E2))) { if (vec_testbit(v,poss)) /* We've already seen this def elem, */ /* therefore there is more than one */ /* def of v within the loop, therefore */ /* v is not a basic IV. */ vec_setbit(v,notposs); else vec_setbit(v,poss); } else /* else mark as not possible */ vec_setbit(v,notposs); } } else /* else ambiguous def */ { /* mark any vars that could be affected by */ /* this def as not possible */ if (!ambdone) /* avoid redundant loops */ { for (j = 0; j < globsym.top; j++) { if (!(globsym.tab[j]->Sflags & SFLunambig)) vec_setbit(j,notposs); } ambdone = TRUE; } } } #if 0 dbg_printf("poss "); vec_println(poss); dbg_printf("notposs "); vec_println(notposs); #endif vec_subass(poss,notposs); /* poss = poss - notposs */ /* create list of IVs */ foreach (i,globsym.top,poss) /* for each basic IV */ { register Iv *biv; symbol *s; /* Skip if we don't want it to be a basic IV (see funcprev()) */ s = globsym.tab[i]; assert(symbol_isintab(s)); if (s->Sflags & SFLnotbasiciv) continue; // Do not use aggregates as basic IVs. This is because the other loop // code doesn't check offsets into symbols, (assuming everything // is at offset 0). We could perhaps amend this by allowing basic IVs // if the struct consists of only one data member. if (tyaggregate(s->ty())) continue; biv = Iv::mycalloc(); biv->IVnext = l->Livlist; l->Livlist = biv; // link into list of IVs biv->IVbasic = s; // symbol of basic IV cmes3("Symbol '%s' (%d) is a basic IV, ",s->Sident ? (char *)s->Sident : "",i); /* We have the sym idx of the basic IV. We need to find */ /* the parent of the increment elem for it. */ /* First find the defnod[] */ for (j = 0; j < deftop; j++) { /* If defnod is a def of i and it is in the loop */ if (defnod[j].DNelem->E1 && /* OPasm are def nodes */ defnod[j].DNelem->E1->EV.sp.Vsym == s && vec_testbit(defnod[j].DNblock->Bdfoidx,l->Lloop)) goto L1; } assert(0); /* should have found it */ /* NOTREACHED */ L1: biv->IVincr = el_parent(defnod[j].DNelem,&(defnod[j].DNblock->Belem)); assert(s == (*biv->IVincr)->E1->EV.sp.Vsym); #ifdef DEBUG if (debugc) { dbg_printf("Increment elem is: "); WReqn(*biv->IVincr); dbg_printf("\n"); } #endif } vec_free(poss); vec_free(notposs); } /************************************* * Find op= elems of loop l. * Analogous to findbasivs(). * Used to eliminate useless loop code normally found in benchmark programs. * Input: * defnod[] loaded with all the definition elems of the loop */ STATIC void findopeqs(loop *l) { vec_t poss,notposs; elem *n; unsigned i,j; bool ambdone; assert(l); ambdone = FALSE; poss = vec_calloc(globsym.top); notposs = vec_calloc(globsym.top); // vector of all variables // (initially all unmarked) // for each def in defnod[] that is within loop l for (i = 0; i < deftop; i++) { if (!vec_testbit(defnod[i].DNblock->Bdfoidx,l->Lloop)) continue; // def is not in the loop n = defnod[i].DNelem; elem_debug(n); if (OTopeq(n->Eoper) && n->E1->Eoper == OPvar) { symbol *s; // if unambiguous def s = n->E1->EV.sp.Vsym; if (symbol_isintab(s)) { SYMIDX v; v = n->E1->EV.sp.Vsym->Ssymnum; { if (vec_testbit(v,poss)) // We've already seen this def elem, // therefore there is more than one // def of v within the loop, therefore // v is not a basic IV. vec_setbit(v,notposs); else vec_setbit(v,poss); } } } else // else ambiguous def { // mark any vars that could be affected by // this def as not possible if (!ambdone) // avoid redundant loops { for (j = 0; j < globsym.top; j++) { if (!(globsym.tab[j]->Sflags & SFLunambig)) vec_setbit(j,notposs); } ambdone = TRUE; } } } // Don't use symbols already in Livlist for (Iv *iv = l->Livlist; iv; iv = iv->IVnext) { symbol *s; s = iv->IVbasic; vec_setbit(s->Ssymnum,notposs); } #if 0 dbg_printf("poss "); vec_println(poss); dbg_printf("notposs "); vec_println(notposs); #endif vec_subass(poss,notposs); // poss = poss - notposs // create list of IVs foreach (i,globsym.top,poss) // for each opeq IV { register Iv *biv; symbol *s; s = globsym.tab[i]; assert(symbol_isintab(s)); // Do not use aggregates as basic IVs. This is because the other loop // code doesn't check offsets into symbols, (assuming everything // is at offset 0). We could perhaps amend this by allowing basic IVs // if the struct consists of only one data member. if (tyaggregate(s->ty())) continue; biv = Iv::mycalloc(); biv->IVnext = l->Lopeqlist; l->Lopeqlist = biv; // link into list of IVs biv->IVbasic = s; // symbol of basic IV cmes3("Symbol '%s' (%d) is an opeq IV, ",s->Sident ? (char *)s->Sident : "",i); // We have the sym idx of the basic IV. We need to find // the parent of the increment elem for it. // First find the defnod[] for (j = 0; j < deftop; j++) { // If defnod is a def of i and it is in the loop if (defnod[j].DNelem->E1 && // OPasm are def nodes defnod[j].DNelem->E1->EV.sp.Vsym == s && vec_testbit(defnod[j].DNblock->Bdfoidx,l->Lloop)) goto L1; } assert(0); // should have found it // NOTREACHED L1: biv->IVincr = el_parent(defnod[j].DNelem,&(defnod[j].DNblock->Belem)); assert(s == (*biv->IVincr)->E1->EV.sp.Vsym); #ifdef DEBUG if (debugc) { dbg_printf("Opeq elem is: "); WReqn(*biv->IVincr); dbg_printf("\n"); } #endif Lcont: ; } vec_free(poss); vec_free(notposs); } /***************************** * Find families for each basic IV. * An IV family is a list of elems of the form * c1*X+c2, where X is a basic induction variable. * Note that we do not do divides, because of roundoff error problems. */ STATIC void findivfams(register loop *l) { register Iv *biv; register unsigned i; register famlist *fl; cmes2("findivfams(%p)\n",l); for (biv = l->Livlist; biv; biv = biv->IVnext) { foreach (i,dfotop,l->Lloop) /* for each block in loop */ if (dfo[i]->Belem) ivfamelems(biv,&(dfo[i]->Belem)); /* Fold all the constant expressions in c1 and c2. */ for (fl = biv->IVfamily; fl; fl = fl->FLnext) { fl->c1 = doptelem(fl->c1,GOALvalue | GOALagain); fl->c2 = doptelem(fl->c2,GOALvalue | GOALagain); } } } /************************* * Tree walking support routine for findivfams(). * biv = basic induction variable pointer * pn pointer to elem */ STATIC void ivfamelems(register Iv *biv,register elem **pn) { register unsigned op; register tym_t ty,c2ty; register famlist *f; register elem *n,*n1,*n2; assert(pn); n = *pn; assert(biv && n); op = n->Eoper; if (OTunary(op)) { ivfamelems(biv,&n->E1); n1 = n->E1; n2 = NULL; } else if (OTbinary(op)) { ivfamelems(biv,&n->E1); ivfamelems(biv,&n->E2); /* LTOR or RTOL order is unimportant */ n1 = n->E1; n2 = n->E2; } else /* else leaf elem */ return; /* which can't be in the family */ if (op == OPmul || op == OPadd || op == OPmin || op == OPneg || op == OPshl) { /* Note that we are wimping out and not considering */ /* LI variables as part of c1 and c2, but only constants. */ ty = n->Ety; /* Since trees are canonicalized, basic induction variables */ /* will only appear on the left. */ /* Improvement: */ /* We wish to pick up the cases (biv + li), (biv - li) and */ /* (li + biv). OPmul and LS with bivs are out, since if we */ /* try to eliminate the biv, and the loop test is a >, >=, */ /* <, <=, we have a problem since we don't know if the li */ /* is negative. (Would have to call swaprel() on it.) */ /* If we have (li + var), swap the leaves. */ if (op == OPadd && isLI(n1) && n1->Eoper == OPvar && n2->Eoper == OPvar) { n->E1 = n2; n2 = n->E2 = n1; n1 = n->E1; } #if TARGET_SEGMENTED // Get rid of case where we painted a far pointer to a long if (op == OPadd || op == OPmin) { int sz; sz = tysize(ty); if (sz == tysize[TYfptr] && !tyfv(ty) && (sz != tysize(n1->Ety) || sz != tysize(n2->Ety))) return; } #endif /* Look for function of basic IV (-biv or biv op const) */ if (n1->Eoper == OPvar && n1->EV.sp.Vsym == biv->IVbasic) { if (op == OPneg) { register famlist *fl; cmes2("found (-biv), elem %p\n",n); fl = newfamlist(ty); fl->FLivty = n1->Ety; fl->FLpelem = pn; fl->FLnext = biv->IVfamily; biv->IVfamily = fl; fl->c1 = el_una(op,ty,fl->c1); /* c1 = -1 */ } else if (n2->Eoper == OPconst || isLI(n2) && (op == OPadd || op == OPmin)) { register famlist *fl; #ifdef DEBUG if (debugc) { dbg_printf("found (biv op const), elem ("); WReqn(n); dbg_printf(");\n"); dbg_printf("Types: n1="); WRTYxx(n1->Ety); dbg_printf(" ty="); WRTYxx(ty); dbg_printf(" n2="); WRTYxx(n2->Ety); dbg_printf("\n"); } #endif fl = newfamlist(ty); fl->FLivty = n1->Ety; fl->FLpelem = pn; fl->FLnext = biv->IVfamily; biv->IVfamily = fl; switch (op) { case OPadd: /* c2 = right */ c2ty = n2->Ety; if (typtr(fl->c2->Ety)) c2ty = fl->c2->Ety; goto L1; case OPmin: /* c2 = -right */ c2ty = fl->c2->Ety; /* Check for subtracting two pointers */ if (typtr(c2ty) && typtr(n2->Ety)) { #if TARGET_SEGMENTED if (tybasic(c2ty) == TYhptr) c2ty = TYlong; else #endif c2ty = I64 ? TYllong : TYint; } L1: fl->c2 = el_bin(op,c2ty,fl->c2,el_copytree(n2)); break; case OPmul: /* c1 = right */ case OPshl: /* c1 = 1 << right */ fl->c1 = el_bin(op,ty,fl->c1,el_copytree(n2)); break; default: assert(0); } } } /* Look for function of existing IV */ for (f = biv->IVfamily; f; f = f->FLnext) { if (*f->FLpelem != n1) /* not it */ continue; /* Look for (f op constant) */ if (op == OPneg) { cmes2("found (-f), elem %p\n",n); /* c1 = -c1; c2 = -c2; */ f->c1 = el_una(OPneg,ty,f->c1); f->c2 = el_una(OPneg,ty,f->c2); f->FLty = ty; f->FLpelem = pn; /* replace with new IV */ } else if (n2->Eoper == OPconst || isLI(n2) && (op == OPadd || op == OPmin)) { #ifdef DEBUG if (debugc) { dbg_printf("found (f op const), elem ("); WReqn(n); assert(*pn == n); dbg_printf(");\n"); elem_print(n); } #endif switch (op) { case OPmul: case OPshl: f->c1 = el_bin(op,ty,f->c1,el_copytree(n2)); break; case OPadd: case OPmin: break; default: assert(0); } f->c2 = el_bin(op,ty,f->c2,el_copytree(n2)); f->FLty = ty; f->FLpelem = pn; /* replace with new IV */ } /* else if */ } /* for */ } /* if */ } /********************************* * Eliminate frivolous family ivs, that is, * if we can't eliminate the BIV, then eliminate family ivs that * differ from it only by a constant. */ STATIC void elimfrivivs(loop *l) { Iv *biv; for (biv = l->Livlist; biv; biv = biv->IVnext) { int nfams; famlist *fl; int nrefs; cmes("elimfrivivs()\n"); /* Compute number of family ivs for biv */ nfams = 0; for (fl = biv->IVfamily; fl; fl = fl->FLnext) nfams++; cmes2("nfams = %d\n",nfams); /* Compute number of references to biv */ if (onlyref(biv->IVbasic,l,*biv->IVincr,&nrefs)) nrefs--; cmes2("nrefs = %d\n",nrefs); assert(nrefs + 1 >= nfams); if (nrefs > nfams || // if we won't eliminate the biv (!I16 && nrefs == nfams)) { /* Eliminate any family ivs that only differ by a constant */ /* from biv */ for (fl = biv->IVfamily; fl; fl = fl->FLnext) { elem *ec1 = fl->c1; targ_llong c; if (elemisone(ec1) || // Eliminate fl's that can be represented by // an addressing mode (!I16 && ec1->Eoper == OPconst && tyintegral(ec1->Ety) && ((c = el_tolong(ec1)) == 2 || c == 4 || c == 8) ) ) { fl->FLtemp = FLELIM; #ifdef DEBUG if (debugc) { dbg_printf("Eliminating frivolous IV "); WReqn(*fl->FLpelem); dbg_printf("\n"); } #endif } } } } } /****************************** * Introduce new variables. */ STATIC void intronvars(loop *l) { famlist *fl; Iv *biv; elem *T, *ne, *t2, *C2, *cmul; tym_t ty,tyr; cmes2("intronvars(%p)\n",l); for (biv = l->Livlist; biv; biv = biv->IVnext) // for each basic IV { register elem *bivinc = *biv->IVincr; /* ptr to increment elem */ for (fl = biv->IVfamily; fl; fl = fl->FLnext) { /* for each IV in family of biv */ if (fl->FLtemp == FLELIM) /* if already eliminated */ continue; /* If induction variable can be written as a simple function */ /* of a previous induction variable, skip it. */ if (funcprev(biv,fl)) continue; ty = fl->FLty; T = el_alloctmp(ty); /* allocate temporary T */ fl->FLtemp = T->EV.sp.Vsym; #if DEBUG cmes2("intronvars() introduced new variable '%s' of type ",T->EV.sp.Vsym->Sident); if (debugc) WRTYxx(ty); cmes("\n"); #endif /* append elem T=biv*C1+C2 to preheader */ /* ne = biv*C1 */ tyr = fl->FLivty; /* type of biv */ ne = el_var(biv->IVbasic); ne->Ety = tyr; if (!elemisone(fl->c1)) /* don't multiply ptrs by 1 */ ne = el_bin(OPmul,tyr,ne,el_copytree(fl->c1)); if (tyfv(tyr) && tysize(ty) == SHORTSIZE) ne = el_una(OP32_16,ty,ne); C2 = el_copytree(fl->c2); t2 = el_bin(OPadd,ty,ne,C2); /* t2 = ne + C2 */ ne = el_bin(OPeq,ty,el_copytree(T),t2); appendelem(ne, &(l->Lpreheader->Belem)); /* prefix T+=C1*C to elem biv+=C */ /* Must prefix in case the value of the expression (biv+=C) */ /* is used by somebody up the tree. */ cmul = el_bin(OPmul,fl->c1->Ety,el_copytree(fl->c1), el_copytree(bivinc->E2)); t2 = el_bin(bivinc->Eoper,ty,el_copytree(T),cmul); t2 = doptelem(t2,GOALvalue | GOALagain); *biv->IVincr = el_bin(OPcomma,bivinc->Ety,t2,bivinc); biv->IVincr = &((*biv->IVincr)->E2); #ifdef DEBUG if (debugc) { dbg_printf("Replacing elem ("); WReqn(*fl->FLpelem); dbg_printf(") with '%s'\n",T->EV.sp.Vsym->Sident); dbg_printf("The init elem is ("); WReqn(ne); dbg_printf(");\nThe increment elem is ("); WReqn(t2); dbg_printf(")\n"); } #endif el_free(*fl->FLpelem); *fl->FLpelem = T; /* replace elem n with ref to T */ doflow = TRUE; /* redo flow analysis */ changes++; } /* for */ } /* for */ } /******************************* * Determine if induction variable can be rewritten as a simple * function of a previously generated temporary. * This can frequently * generate less code than that of an all-new temporary (especially * if it is the same as a previous temporary!). * Input: * biv Basic induction variable * fl Item in biv's family list we are looking at * Returns: * FALSE Caller should create a new induction variable. * TRUE *FLpelem is replaced with function of a previous * induction variable. FLtemp is set to FLELIM to * indicate this. */ STATIC bool funcprev(Iv *biv,famlist *fl) { tym_t tymin; int sz; famlist *fls; elem *e1,*e2,*flse1; #ifdef DEBUG if (debugc) dbg_printf("funcprev\n"); #endif for (fls = biv->IVfamily; fls != fl; fls = fls->FLnext) { assert(fls); /* fl must be in list */ if (fls->FLtemp == FLELIM) /* no iv we can use here */ continue; /* The multipliers must match */ if (!el_match(fls->c1,fl->c1)) continue; /* If the c2's match also, we got it easy */ if (el_match(fls->c2,fl->c2)) { if (tysize(fl->FLty) > tysize(fls->FLtemp->ty())) continue; /* can't increase size of var */ flse1 = el_var(fls->FLtemp); flse1->Ety = fl->FLty; goto L2; } /* The difference is only in the addition. Therefore, replace *fl->FLpelem with: case 1: (fl->c2 + (fls->FLtemp - fls->c2)) case 2: (fls->FLtemp + (fl->c2 - fls->c2)) */ e1 = fl->c2; /* Subtracting relocatables usually generates slow code for */ /* linkers that can't handle arithmetic on relocatables. */ if (typtr(fls->c2->Ety)) { if (fls->c2->Eoper == OPrelconst && !(fl->c2->Eoper == OPrelconst && fl->c2->EV.sp.Vsym == fls->c2->EV.sp.Vsym) ) continue; } flse1 = el_var(fls->FLtemp); e2 = flse1; /* assume case 1 */ tymin = e2->Ety; if (typtr(fls->c2->Ety)) { if (!typtr(tymin)) { if (typtr(e1->Ety)) { e1 = e2; e2 = fl->c2; /* case 2 */ } else /* can't subtract fptr */ goto L1; } #if TARGET_SEGMENTED if (tybasic(fls->c2->Ety) == TYhptr) tymin = TYlong; else #endif tymin = I64 ? TYllong : TYint; /* type of (ptr - ptr) */ } #if TARGET_SEGMENTED /* If e1 and fls->c2 are fptrs, and are not from the same */ /* segment, we cannot subtract them. */ if (tyfv(e1->Ety) && tyfv(fls->c2->Ety)) { if (e1->Eoper != OPrelconst || fls->c2->Eoper != OPrelconst) goto L1; /* assume expressions have diff segs */ if (e1->EV.sp.Vsym->Sclass != fls->c2->EV.sp.Vsym->Sclass) { L1: el_free(flse1); continue; } } #else L1: el_free(flse1); continue; #endif /* Some more type checking... */ sz = tysize(fl->FLty); if (sz != tysize(e1->Ety) && sz != tysize(tymin)) goto L1; /* Do some type checking (can't add pointers and get a pointer!) */ //if (typtr(fl->FLty) && typtr(e1->Ety) && typtr(tymin)) //goto L1; /* Construct (e1 + (e2 - fls->c2)) */ flse1 = el_bin(OPadd,fl->FLty, e1, el_bin(OPmin,tymin, e2, el_copytree(fls->c2))); if (sz < tysize(tymin) && sz == tysize(e1->Ety)) #if TARGET_SEGMENTED flse1->E2 = el_una(OPoffset,fl->FLty,flse1->E2); #else assert(0); #endif flse1 = doptelem(flse1,GOALvalue | GOALagain); fl->c2 = NULL; L2: #ifdef DEBUG if (debugc) { dbg_printf("Replacing "); WReqn(*fl->FLpelem); dbg_printf(" with "); WReqn(flse1); dbg_printf("\n"); } #endif el_free(*fl->FLpelem); *fl->FLpelem = flse1; /* Fix the iv so when we do loops again, we won't create */ /* yet another iv, which is just what funcprev() is supposed */ /* to prevent. */ fls->FLtemp->Sflags |= SFLnotbasiciv; fl->FLtemp = FLELIM; /* mark iv as being gone */ changes++; doflow = TRUE; return TRUE; /* it was replaced */ } return FALSE; /* need to create a new variable */ } /*********************** * Eliminate basic IVs. */ STATIC void elimbasivs(register loop *l) { famlist *fl; register Iv *biv; register unsigned i; register tym_t ty; register elem **pref,*fofe,*C2; symbol *X; int refcount; cmes2("elimbasivs(%p)\n",l); for (biv = l->Livlist; biv; biv = biv->IVnext) // for each basic IV { /* Can't eliminate this basic IV if we have a goal for the */ /* increment elem. */ // Be careful about Nflags being in a union... if (!((*biv->IVincr)->Nflags & NFLnogoal)) continue; X = biv->IVbasic; assert(symbol_isintab(X)); ty = X->ty(); pref = onlyref(X,l,*biv->IVincr,&refcount); /* if only ref of X is of the form (X) or (X relop e) or (e relop X) */ if (pref != NULL && refcount <= 1) { elem *ref; tym_t flty; fl = biv->IVfamily; if (!fl) // if no elems in family of biv continue; ref = *pref; /* Replace (X) with (X != 0) */ if (ref->Eoper == OPvar) ref = *pref = el_bin(OPne,TYint,ref,el_long(ref->Ety,0L)); fl = simfl(fl,ty); /* find simplest elem in family */ if (!fl) continue; // Don't do the replacement if we would replace a // signed comparison with an unsigned one flty = fl->FLty; if (tyuns(ref->E1->Ety) | tyuns(ref->E2->Ety)) flty = touns(flty); if (ref->Eoper >= OPle && ref->Eoper <= OPge && #if 1 !(tyuns(ref->E1->Ety) | tyuns(ref->E2->Ety)) && tyuns(flty)) #else (tyuns(ref->E1->Ety) | tyuns(ref->E2->Ety)) != tyuns(flty)) #endif continue; /* if we have (e relop X), replace it with (X relop e) */ if (ref->E2->Eoper == OPvar && ref->E2->EV.sp.Vsym == X) { register elem *tmp; tmp = ref->E2; ref->E2 = ref->E1; ref->E1 = tmp; ref->Eoper = swaprel(ref->Eoper); } // If e*c1+c2 would result in a sign change or an overflow // then we can't do it if (fl->c1->Eoper == OPconst) { #if LONGLONG targ_llong c1; #else targ_long c1; #endif int sz; c1 = el_tolong(fl->c1); sz = tysize(ty); if (sz == SHORTSIZE && ((ref->E2->Eoper == OPconst && c1 * el_tolong(ref->E2) & ~0x7FFFL) || c1 & ~0x7FFFL) ) continue; if (sz == LONGSIZE && ((ref->E2->Eoper == OPconst && c1 * el_tolong(ref->E2) & ~0x7FFFFFFFL) || c1 & ~0x7FFFFFFFL) ) continue; #if LONGLONG && __INTSIZE >= 4 if (sz == LLONGSIZE && ((ref->E2->Eoper == OPconst && c1 * el_tolong(ref->E2) & ~0x7FFFFFFFFFFFFFFFLL) || c1 & ~0x7FFFFFFFFFFFFFFFLL) ) continue; #endif } /* If loop started out with a signed conditional that was * replaced with an unsigned one, don't do it if c2 * is less than 0. */ if (ref->Nflags & NFLtouns && fl->c2->Eoper == OPconst) { targ_llong c2 = el_tolong(fl->c2); if (c2 < 0) continue; } elem *refE2 = el_copytree(ref->E2); int refEoper = ref->Eoper; /* if c1 < 0 and relop is < <= > >= then adjust relop as if both sides were multiplied by -1 */ if (!tyuns(ty) && (tyintegral(ty) && el_tolong(fl->c1) < 0 || tyfloating(ty) && el_toldouble(fl->c1) < 0.0)) refEoper = swaprel(refEoper); /* Replace (X relop e) with (X relop (short)e) if T is 1 word but e is 2 */ if (tysize(flty) == SHORTSIZE && tysize(refE2->Ety) == LONGSIZE) refE2 = el_una(OP32_16,flty,refE2); /* replace e with e*c1 + c2 */ C2 = el_copytree(fl->c2); fofe = el_bin(OPadd,flty, el_bin(OPmul,refE2->Ety, refE2, el_copytree(fl->c1)), C2); fofe = doptelem(fofe,GOALvalue | GOALagain); // fold any constants if (tyuns(flty) && refEoper == OPge && fofe->Eoper == OPconst && el_allbits(fofe, 0) && fl->c2->Eoper == OPconst && !el_allbits(fl->c2, 0)) { /* Don't do it if replacement will result in * an unsigned T>=0 which will be an infinite loop. */ el_free(fofe); continue; } cmes2("Eliminating basic IV '%s'\n",X->Sident); #ifdef DEBUG if (debugc) { dbg_printf("Comparison replaced: "); WReqn(ref); dbg_printf(" with "); } #endif el_free(ref->E2); ref->E2 = refE2; ref->Eoper = refEoper; elimass(*biv->IVincr); // dump the increment elem // replace X with T assert(ref->E1->EV.sp.Voffset == 0); ref->E1->EV.sp.Vsym = fl->FLtemp; ref->E1->Ety = flty; ref->E2 = fofe; /* If sizes of expression worked out wrong... Which can happen if we have (int)ptr==e */ if (EBIN(fofe)) /* if didn't optimize it away */ { int sz; tym_t ty,ty1,ty2; ty = fofe->Ety; sz = tysize(ty); ty1 = fofe->E1->Ety; ty2 = fofe->E2->Ety; /* Sizes of + expression must all be the same */ if (sz != tysize(ty1) && sz != tysize(ty2) ) { if (tyuns(ty)) /* if unsigned comparison */ ty1 = touns(ty1); /* to unsigned type */ fofe->Ety = ty1; ref->E1->Ety = ty1; } } #if TARGET_SEGMENTED /* Fix if leaves of compare are TYfptrs and the compare */ /* operator is < <= > >=. */ if (ref->Eoper >= OPle && ref->Eoper <= OPge && tyfv(ref->E1->Ety)) { assert(tyfv(ref->E2->Ety)); ref->E1 = el_una(OPoffset,TYuint,ref->E1); ref->E2 = el_una(OPoffset,TYuint,fofe); } #endif #ifdef DEBUG if (debugc) { WReqn(ref); dbg_printf("\n"); } #endif changes++; doflow = TRUE; /* redo flow analysis */ /* if X is live on entry to any successor S outside loop */ /* prepend elem X=(T-c2)/c1 to S.Belem */ foreach (i,dfotop,l->Lexit) /* for each exit block */ { register elem *ne; register block *b; register list_t bl; for (bl = dfo[i]->Bsucc; bl; bl = list_next(bl)) { /* for each successor */ b = list_block(bl); if (vec_testbit(b->Bdfoidx,l->Lloop)) continue; /* inside loop */ if (!vec_testbit(X->Ssymnum,b->Binlv)) continue; /* not live */ C2 = el_copytree(fl->c2); ne = el_bin(OPmin,ty, el_var(fl->FLtemp), C2); #if TARGET_SEGMENTED if (tybasic(ne->E1->Ety) == TYfptr && tybasic(ne->E2->Ety) == TYfptr) { ne->Ety = I64 ? TYllong : TYint; if (tylong(ty) && intsize == 2) ne = el_una(OPs16_32,ty,ne); } #endif ne = el_bin(OPeq,X->ty(), el_var(X), el_bin(OPdiv,ne->Ety, ne, el_copytree(fl->c1))); #ifdef DEBUG if (debugc) { dbg_printf("Adding ("); WReqn(ne); dbg_printf(") to exit block B%d\n",b->Bdfoidx); //elem_print(ne); } #endif /* We have to add a new block if there is */ /* more than one predecessor to b. */ if (list_next(b->Bpred)) { block *bn; register list_t bl2; bn = block_calloc(); bn->Btry = b->Btry; numblks++; assert(numblks <= maxblks); bn->BC = BCgoto; bn->Bnext = dfo[i]->Bnext; dfo[i]->Bnext = bn; list_append(&(bn->Bsucc),b); list_append(&(bn->Bpred),dfo[i]); list_ptr(bl) = (void *)bn; for (bl2 = b->Bpred; bl2; bl2 = list_next(bl2)) if (list_block(bl2) == dfo[i]) { list_ptr(bl2) = (void *)bn; goto L2; } assert(0); L2: b = bn; addblk = TRUE; } if (b->Belem) b->Belem = el_bin(OPcomma,b->Belem->Ety, ne,b->Belem); else b->Belem = ne; changes++; doflow = TRUE; /* redo flow analysis */ } /* for each successor */ } /* foreach exit block */ if (addblk) return; } else if (refcount == 0) /* if no uses of IV in loop */ { /* Eliminate the basic IV if it is not live on any successor */ foreach (i,dfotop,l->Lexit) /* for each exit block */ { register block *b; register list_t bl; for (bl = dfo[i]->Bsucc; bl; bl = list_next(bl)) { /* for each successor */ b = list_block(bl); if (vec_testbit(b->Bdfoidx,l->Lloop)) continue; /* inside loop */ if (vec_testbit(X->Ssymnum,b->Binlv)) goto L1; /* live */ } } cmes3("No uses, eliminating basic IV '%s' (%p)\n",(X->Sident) ? (char *)X->Sident : "",X); /* Dump the increment elem */ /* (Replace it with an OPconst that only serves as a */ /* placeholder in the tree) */ *(biv->IVincr) = el_selecte2(*(biv->IVincr)); changes++; doflow = TRUE; /* redo flow analysis */ L1: ; } } /* for */ } /*********************** * Eliminate opeq IVs that are not used outside the loop. */ STATIC void elimopeqs(register loop *l) { Iv *biv; unsigned i; elem **pref; symbol *X; int refcount; cmes2("elimopeqs(%p)\n",l); for (biv = l->Lopeqlist; biv; biv = biv->IVnext) // for each opeq IV { // Can't eliminate this basic IV if we have a goal for the // increment elem. // Be careful about Nflags being in a union... if (!((*biv->IVincr)->Nflags & NFLnogoal)) continue; X = biv->IVbasic; assert(symbol_isintab(X)); pref = onlyref(X,l,*biv->IVincr,&refcount); // if only ref of X is of the form (X) or (X relop e) or (e relop X) if (pref != NULL && refcount <= 1) ; else if (refcount == 0) // if no uses of IV in loop { // Eliminate the basic IV if it is not live on any successor foreach (i,dfotop,l->Lexit) // for each exit block { block *b; list_t bl; for (bl = dfo[i]->Bsucc; bl; bl = list_next(bl)) { // for each successor b = list_block(bl); if (vec_testbit(b->Bdfoidx,l->Lloop)) continue; // inside loop if (vec_testbit(X->Ssymnum,b->Binlv)) goto L1; // live } } cmes3("No uses, eliminating opeq IV '%s' (%p)\n",(X->Sident) ? (char *)X->Sident : "",X); // Dump the increment elem // (Replace it with an OPconst that only serves as a // placeholder in the tree) *(biv->IVincr) = el_selecte2(*(biv->IVincr)); changes++; doflow = TRUE; // redo flow analysis L1: ; } } } /************************** * Find simplest elem in family. * Input: * tym type of basic IV * Return NULL if none found. */ STATIC famlist * simfl(famlist *fl,tym_t tym) { famlist *sofar; assert(fl); sofar = NULL; for (; fl; fl = fl->FLnext) { if (fl->FLtemp == FLELIM) /* no variable, so skip it */ continue; /* If size of replacement is less than size of biv, we could */ /* be in trouble due to loss of precision. */ if (size(fl->FLtemp->ty()) < size(tym)) continue; sofar = flcmp(sofar,fl); /* pick simplest */ } return sofar; } /************************** * Return simpler of two family elems. * There is room for improvement, namely if * f1.c1 = 2, f2.c1 = 27 * then pick f1 because it is a shift. */ STATIC famlist * flcmp(famlist *f1,famlist *f2) { tym_t ty; union eve *t1,*t2; assert(f2); if (!f1) goto Lf2; t1 = &(f1->c1->EV); t2 = &(f2->c1->EV); ty = (*f1->FLpelem)->Ety; /* type of elem */ #if 0 printf("f1: c1 = %d, c2 = %d\n",t1->Vshort,f1->c2->EV.Vshort); printf("f2: c1 = %d, c2 = %d\n",t2->Vshort,f2->c2->EV.Vshort); WRTYxx((*f1->FLpelem)->Ety); WRTYxx((*f2->FLpelem)->Ety); #endif /* Wimp out and just pick f1 if the types don't match */ if (tysize(ty) == tysize((*f2->FLpelem)->Ety)) { switch (tybasic(ty)) { case TYbool: case TYchar: case TYschar: case TYuchar: if (t2->Vuchar == 1 || t1->Vuchar != 1 && f2->c2->EV.Vuchar == 0) goto Lf2; break; case TYshort: case TYushort: case TYchar16: case TYwchar_t: // BUG: what about 4 byte wchar_t's? case_short: if (t2->Vshort == 1 || t1->Vshort != 1 && f2->c2->EV.Vshort == 0) goto Lf2; break; #if JHANDLE case TYjhandle: #endif #if TARGET_SEGMENTED case TYsptr: case TYcptr: #endif case TYnptr: // BUG: 64 bit pointers? case TYnullptr: case TYint: case TYuint: if (intsize == SHORTSIZE) goto case_short; else goto case_long; case TYlong: case TYulong: case TYdchar: #if TARGET_SEGMENTED case TYfptr: case TYvptr: case TYhptr: #endif case_long: if (t2->Vlong == 1 || t1->Vlong != 1 && f2->c2->EV.Vlong == 0) goto Lf2; break; case TYfloat: if (t2->Vfloat == 1 || t1->Vfloat != 1 && f2->c2->EV.Vfloat == 0) goto Lf2; break; case TYdouble: case TYdouble_alias: if (t2->Vdouble == 1.0 || t1->Vdouble != 1.0 && f2->c2->EV.Vdouble == 0) goto Lf2; break; case TYldouble: if (t2->Vldouble == 1.0 || t1->Vldouble != 1.0 && f2->c2->EV.Vldouble == 0) goto Lf2; break; case TYllong: case TYullong: if (t2->Vllong == 1 || t1->Vllong != 1 && f2->c2->EV.Vllong == 0) goto Lf2; break; default: assert(0); } } //printf("picking f1\n"); return f1; Lf2: //printf("picking f2\n"); return f2; } /************************************ * Input: * x basic IV symbol * incn increment elem for basic IV X. * Output: * *prefcount # of references to X other than the increment elem * Returns: * If ref of X in loop l is of the form (X relop e) or (e relop X) * Return the relop elem * Else * Return NULL */ static int count; static elem **nd,*sincn; static symbol *X; STATIC elem ** onlyref(symbol *x,loop *l,elem *incn,int *prefcount) { register unsigned i; //printf("onlyref('%s')\n", x->Sident); X = x; /* save some parameter passing */ assert(symbol_isintab(x)); sincn = incn; #ifdef DEBUG if (!(X->Ssymnum < globsym.top && l && incn)) dbg_printf("X = %d, globsym.top = %d, l = %p, incn = %p\n",X->Ssymnum,globsym.top,l,incn); #endif assert(X->Ssymnum < globsym.top && l && incn); count = 0; nd = NULL; foreach (i,dfotop,l->Lloop) /* for each block in loop */ { block *b; b = dfo[i]; if (b->Belem) { countrefs(&b->Belem,b->BC == BCiftrue); } } #if 0 dbg_printf("count = %d, nd = ("); if (nd) WReqn(*nd); dbg_printf(")\n"); #endif *prefcount = count; return nd; } /****************************** * Count elems of the form (X relop e) or (e relop X). * Do not count the node if it is the increment node (sincn). * Input: * flag: TRUE if block wants to test the elem */ STATIC void countrefs(register elem **pn,bool flag) { elem *n = *pn; assert(n); if (n == sincn) /* if it is the increment elem */ { if (OTbinary(n->Eoper)) countrefs(&n->E2, FALSE); return; // don't count lvalue } if (OTunary(n->Eoper)) countrefs(&n->E1,FALSE); else if (OTbinary(n->Eoper)) { if (OTrel(n->Eoper)) { elem *e1 = n->E1; assert(e1->Eoper != OPcomma); if (e1 == sincn && (e1->Eoper == OPeq || OTopeq(e1->Eoper))) goto L1; /* Check both subtrees to see if n is the comparison node, * that is, if X is a leaf of the comparison. */ if (e1->Eoper == OPvar && e1->EV.sp.Vsym == X && !countrefs2(n->E2) || n->E2->Eoper == OPvar && n->E2->EV.sp.Vsym == X && !countrefs2(e1)) nd = pn; /* found the relop node */ } L1: countrefs(&n->E1,FALSE); countrefs(&n->E2,(flag && n->Eoper == OPcomma)); } else if ((n->Eoper == OPvar || n->Eoper == OPrelconst) && n->EV.sp.Vsym == X) { if (flag) nd = pn; /* comparing it with 0 */ count++; /* found another reference */ } } /******************************* * Count number of times symbol X appears in elem tree e. */ STATIC int countrefs2(elem *e) { elem_debug(e); while (OTunary(e->Eoper)) e = e->E1; if (OTbinary(e->Eoper)) return countrefs2(e->E1) + countrefs2(e->E2); return ((e->Eoper == OPvar || e->Eoper == OPrelconst) && e->EV.sp.Vsym == X); } /**************************** * Eliminate some special cases. */ STATIC void elimspec(loop *l) { register unsigned i; foreach (i,dfotop,l->Lloop) /* for each block in loop */ { block *b; b = dfo[i]; if (b->Belem) elimspecwalk(&b->Belem); } } /****************************** */ STATIC void elimspecwalk(elem **pn) { elem *n; n = *pn; assert(n); if (OTunary(n->Eoper)) elimspecwalk(&n->E1); else if (OTbinary(n->Eoper)) { elimspecwalk(&n->E1); elimspecwalk(&n->E2); if (OTrel(n->Eoper)) { elem *e1 = n->E1; /* Replace ((e1,e2) rel e3) with (e1,(e2 rel e3). * This will reduce the number of cases for elimbasivs(). * Don't do equivalent with (e1 rel (e2,e3)) because * of potential side effects in e1. */ if (e1->Eoper == OPcomma) { elem *e; #ifdef DEBUG if (debugc) { dbg_printf("3rewriting ("); WReqn(n); dbg_printf(")\n"); } #endif e = n->E2; n->E2 = e1; n->E1 = n->E2->E1; n->E2->E1 = n->E2->E2; n->E2->E2 = e; n->E2->Eoper = n->Eoper; n->E2->Ety = n->Ety; n->Eoper = OPcomma; changes++; doflow = TRUE; elimspecwalk(&n->E1); elimspecwalk(&n->E2); } /* Rewrite ((X op= e2) rel e3) into ((X op= e2),(X rel e3)) * Rewrite ((X ++ e2) rel e3) into ((X += e2),(X-e2 rel e3)) * so that the op= will not have a goal, so elimbasivs() * will work on it. */ if ((OTopeq(e1->Eoper) || OTpost(e1->Eoper) ) && !el_sideeffect(e1->E1)) { elem *e; int op; #ifdef DEBUG if (debugc) { dbg_printf("4rewriting ("); WReqn(n); dbg_printf(")\n"); } #endif e = el_calloc(); el_copy(e,n); e->E1 = el_copytree(e1->E1); e->E1->Ety = n->E1->Ety; n->E2 = e; switch (e1->Eoper) { case OPpostinc: e1->Eoper = OPaddass; op = OPmin; goto L3; case OPpostdec: e1->Eoper = OPminass; op = OPadd; L3: e->E1 = el_bin(op,e->E1->Ety,e->E1,el_copytree(e1->E2)); break; } /* increment node is now guaranteed to have no goal */ e1->Nflags |= NFLnogoal; n->Eoper = OPcomma; //changes++; doflow = TRUE; elimspecwalk(&n->E1); elimspecwalk(&n->E2); } } } } #endif