From evan.cheng at apple.com Mon Jun 18 03:37:58 2007 From: evan.cheng at apple.com (Evan Cheng) Date: Mon, 18 Jun 2007 03:37:58 -0500 Subject: [llvm-commits] CVS: llvm/lib/CodeGen/IfConversion.cpp Message-ID: <200706180837.l5I8bwGS013929@zion.cs.uiuc.edu> Changes in directory llvm/lib/CodeGen: IfConversion.cpp updated: 1.55 -> 1.56 --- Log message: Properly remove duplicate instructions as result of diamond if-conversion. Other bug fixes. --- Diffs of the changes: (+176 -103) IfConversion.cpp | 279 ++++++++++++++++++++++++++++++++++--------------------- 1 files changed, 176 insertions(+), 103 deletions(-) Index: llvm/lib/CodeGen/IfConversion.cpp diff -u llvm/lib/CodeGen/IfConversion.cpp:1.55 llvm/lib/CodeGen/IfConversion.cpp:1.56 --- llvm/lib/CodeGen/IfConversion.cpp:1.55 Sat Jun 16 04:34:52 2007 +++ llvm/lib/CodeGen/IfConversion.cpp Mon Jun 18 03:37:25 2007 @@ -23,6 +23,7 @@ #include "llvm/Support/Debug.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/STLExtras.h" using namespace llvm; namespace { @@ -118,15 +119,20 @@ /// Kind - Type of block. See IfcvtKind. /// NeedSubsumsion - True if the to be predicated BB has already been /// predicated. - /// Duplicates - Number of instructions that would be duplicated due - /// to this if-conversion. + /// NumDups - Number of instructions that would be duplicated due + /// to this if-conversion. (For diamonds, the number of + /// identical instructions at the beginnings of both + /// paths). + /// NumDups2 - For diamonds, the number of identical instructions + /// at the ends of both paths. struct IfcvtToken { BBInfo &BBI; IfcvtKind Kind; bool NeedSubsumsion; - unsigned Duplicates; - IfcvtToken(BBInfo &b, IfcvtKind k, bool s, unsigned d) - : BBI(b), Kind(k), NeedSubsumsion(s), Duplicates(d) {} + unsigned NumDups; + unsigned NumDups2; + IfcvtToken(BBInfo &b, IfcvtKind k, bool s, unsigned d, unsigned d2 = 0) + : BBI(b), Kind(k), NeedSubsumsion(s), NumDups(d), NumDups2(d2) {} }; /// Roots - Basic blocks that do not have successors. These are the starting @@ -152,7 +158,8 @@ bool ValidSimple(BBInfo &TrueBBI, unsigned &Dups) const; bool ValidTriangle(BBInfo &TrueBBI, BBInfo &FalseBBI, bool FalseBranch, unsigned &Dups) const; - bool ValidDiamond(BBInfo &TrueBBI, BBInfo &FalseBBI) const; + bool ValidDiamond(BBInfo &TrueBBI, BBInfo &FalseBBI, + unsigned &Dups1, unsigned &Dups2) const; void ScanInstructions(BBInfo &BBI); BBInfo &AnalyzeBlock(MachineBasicBlock *BB, std::vector &Tokens); @@ -160,19 +167,24 @@ bool isTriangle = false, bool RevBranch = false); bool AnalyzeBlocks(MachineFunction &MF, std::vector &Tokens); - void ReTryPreds(MachineBasicBlock *BB); + void InvalidatePreds(MachineBasicBlock *BB); void RemoveExtraEdges(BBInfo &BBI); bool IfConvertSimple(BBInfo &BBI, IfcvtKind Kind); bool IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind); - bool IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind); + bool IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind, + unsigned NumDups1, unsigned NumDups2); void PredicateBlock(BBInfo &BBI, - std::vector &Cond, - bool IgnoreTerm = false); + MachineBasicBlock::iterator E, + std::vector &Cond); void CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI, std::vector &Cond, bool IgnoreBr = false); void MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI); + bool MeetIfcvtSizeLimit(unsigned Size) const { + return Size > 0 && Size <= TLI->getIfCvtBlockSizeLimit(); + } + // blockAlwaysFallThrough - Block ends without a terminator. bool blockAlwaysFallThrough(BBInfo &BBI) const { return BBI.IsBrAnalyzable && BBI.TrueBB == NULL; @@ -180,13 +192,17 @@ // IfcvtTokenCmp - Used to sort if-conversion candidates. static bool IfcvtTokenCmp(IfcvtToken *C1, IfcvtToken *C2) { - // Favors subsumsion. - if (C1->NeedSubsumsion == false && C2->NeedSubsumsion == true) + int Incr1 = (C1->Kind == ICDiamond) + ? -(int)(C1->NumDups + C1->NumDups2) : (int)C1->NumDups; + int Incr2 = (C2->Kind == ICDiamond) + ? -(int)(C2->NumDups + C2->NumDups2) : (int)C2->NumDups; + if (Incr1 > Incr2) return true; - else if (C1->NeedSubsumsion == C2->NeedSubsumsion) { - if (C1->Duplicates > C2->Duplicates) + else if (Incr1 == Incr2) { + // Favors subsumsion. + if (C1->NeedSubsumsion == false && C2->NeedSubsumsion == true) return true; - else if (C1->Duplicates == C2->Duplicates) { + else if (C1->NeedSubsumsion == C2->NeedSubsumsion) { // Favors diamond over triangle, etc. if ((unsigned)C1->Kind < (unsigned)C2->Kind) return true; @@ -305,7 +321,7 @@ DOUT << "Ifcvt (Diamond): BB#" << BBI.BB->getNumber() << " (T:" << BBI.TrueBB->getNumber() << ",F:" << BBI.FalseBB->getNumber() << ") "; - RetVal = IfConvertDiamond(BBI, Kind); + RetVal = IfConvertDiamond(BBI, Kind, Token->NumDups, Token->NumDups2); DOUT << (RetVal ? "succeeded!" : "failed!") << "\n"; if (RetVal) NumDiamonds++; break; @@ -380,7 +396,7 @@ /// in Dups. bool IfConverter::ValidSimple(BBInfo &TrueBBI, unsigned &Dups) const { Dups = 0; - if (TrueBBI.IsBeingAnalyzed) + if (TrueBBI.IsBeingAnalyzed || TrueBBI.IsDone) return false; if (TrueBBI.BB->pred_size() > 1) { @@ -402,7 +418,7 @@ bool IfConverter::ValidTriangle(BBInfo &TrueBBI, BBInfo &FalseBBI, bool FalseBranch, unsigned &Dups) const { Dups = 0; - if (TrueBBI.IsBeingAnalyzed) + if (TrueBBI.IsBeingAnalyzed || TrueBBI.IsDone) return false; if (TrueBBI.BB->pred_size() > 1) { @@ -437,10 +453,26 @@ return TExit && TExit == FalseBBI.BB; } +static +MachineBasicBlock::iterator firstNonBranchInst(MachineBasicBlock *BB, + const TargetInstrInfo *TII) { + MachineBasicBlock::iterator I = BB->end(); + while (I != BB->begin()) { + --I; + const TargetInstrDescriptor *TID = I->getInstrDescriptor(); + if ((TID->Flags & M_BRANCH_FLAG) == 0) + break; + } + return I; +} + /// ValidDiamond - Returns true if the 'true' and 'false' blocks (along /// with their common predecessor) forms a valid diamond shape for ifcvt. -bool IfConverter::ValidDiamond(BBInfo &TrueBBI, BBInfo &FalseBBI) const { - if (TrueBBI.IsBeingAnalyzed || FalseBBI.IsBeingAnalyzed) +bool IfConverter::ValidDiamond(BBInfo &TrueBBI, BBInfo &FalseBBI, + unsigned &Dups1, unsigned &Dups2) const { + Dups1 = Dups2 = 0; + if (TrueBBI.IsBeingAnalyzed || TrueBBI.IsDone || + FalseBBI.IsBeingAnalyzed || FalseBBI.IsDone) return false; MachineBasicBlock *TT = TrueBBI.TrueBB; @@ -455,9 +487,33 @@ if (TT == NULL && (TrueBBI.IsBrAnalyzable || FalseBBI.IsBrAnalyzable)) return false; // FIXME: Allow false block to have an early exit? - return (TrueBBI.BB->pred_size() == 1 && - FalseBBI.BB->pred_size() == 1 && - !TrueBBI.FalseBB && !FalseBBI.FalseBB); + if (TrueBBI.BB->pred_size() > 1 || + FalseBBI.BB->pred_size() > 1 || + TrueBBI.FalseBB || FalseBBI.FalseBB || + (TrueBBI.ClobbersPred && FalseBBI.ClobbersPred)) + return false; + + MachineBasicBlock::iterator TI = TrueBBI.BB->begin(); + MachineBasicBlock::iterator FI = FalseBBI.BB->begin(); + while (TI != TrueBBI.BB->end() && FI != FalseBBI.BB->end()) { + if (!TI->isIdenticalTo(FI)) + break; + ++Dups1; + ++TI; + ++FI; + } + + TI = firstNonBranchInst(TrueBBI.BB, TII); + FI = firstNonBranchInst(FalseBBI.BB, TII); + while (TI != TrueBBI.BB->begin() && FI != FalseBBI.BB->begin()) { + if (!TI->isIdenticalTo(FI)) + break; + ++Dups2; + --TI; + --FI; + } + + return true; } /// ScanInstructions - Scan all the instructions in the block to determine if @@ -536,10 +592,6 @@ if (BBI.IsDone || BBI.IsUnpredicable) return false; - // Check predication threshold. - if (BBI.NonPredSize == 0 || BBI.NonPredSize > TLI->getIfCvtBlockSizeLimit()) - return false; - // If it is already predicated, check if its predicate subsumes the new // predicate. if (BBI.Predicate.size() && !TII->SubsumesPredicate(BBI.Predicate, Pred)) @@ -606,11 +658,13 @@ bool CanRevCond = !TII->ReverseBranchCondition(RevCond); unsigned Dups = 0; + unsigned Dups2 = 0; bool TNeedSub = TrueBBI.Predicate.size() > 0; bool FNeedSub = FalseBBI.Predicate.size() > 0; bool Enqueued = false; - if (CanRevCond && ValidDiamond(TrueBBI, FalseBBI) && - !(TrueBBI.ClobbersPred && FalseBBI.ClobbersPred) && + if (CanRevCond && ValidDiamond(TrueBBI, FalseBBI, Dups, Dups2) && + MeetIfcvtSizeLimit(TrueBBI.NonPredSize - (Dups + Dups2)) && + MeetIfcvtSizeLimit(FalseBBI.NonPredSize - (Dups + Dups2)) && FeasibilityAnalysis(TrueBBI, BBI.BrCond) && FeasibilityAnalysis(FalseBBI, RevCond)) { // Diamond: @@ -621,11 +675,13 @@ // \ / // TailBB // Note TailBB can be empty. - Tokens.push_back(new IfcvtToken(BBI, ICDiamond, TNeedSub|FNeedSub, Dups)); + Tokens.push_back(new IfcvtToken(BBI, ICDiamond, TNeedSub|FNeedSub, Dups, + Dups2)); Enqueued = true; } if (ValidTriangle(TrueBBI, FalseBBI, false, Dups) && + MeetIfcvtSizeLimit(TrueBBI.NonPredSize) && FeasibilityAnalysis(TrueBBI, BBI.BrCond, true)) { // Triangle: // EBB @@ -639,12 +695,14 @@ } if (ValidTriangle(TrueBBI, FalseBBI, true, Dups) && + MeetIfcvtSizeLimit(TrueBBI.NonPredSize) && FeasibilityAnalysis(TrueBBI, BBI.BrCond, true, true)) { Tokens.push_back(new IfcvtToken(BBI, ICTriangleRev, TNeedSub, Dups)); Enqueued = true; } if (ValidSimple(TrueBBI, Dups) && + MeetIfcvtSizeLimit(TrueBBI.NonPredSize) && FeasibilityAnalysis(TrueBBI, BBI.BrCond)) { // Simple (split, no rejoin): // EBB @@ -660,18 +718,21 @@ if (CanRevCond) { // Try the other path... if (ValidTriangle(FalseBBI, TrueBBI, false, Dups) && + MeetIfcvtSizeLimit(FalseBBI.NonPredSize) && FeasibilityAnalysis(FalseBBI, RevCond, true)) { Tokens.push_back(new IfcvtToken(BBI, ICTriangleFalse, FNeedSub, Dups)); Enqueued = true; } if (ValidTriangle(FalseBBI, TrueBBI, true, Dups) && + MeetIfcvtSizeLimit(FalseBBI.NonPredSize) && FeasibilityAnalysis(FalseBBI, RevCond, true, true)) { Tokens.push_back(new IfcvtToken(BBI, ICTriangleFRev, FNeedSub, Dups)); Enqueued = true; } if (ValidSimple(FalseBBI, Dups) && + MeetIfcvtSizeLimit(FalseBBI.NonPredSize) && FeasibilityAnalysis(FalseBBI, RevCond)) { Tokens.push_back(new IfcvtToken(BBI, ICSimpleFalse, FNeedSub, Dups)); Enqueued = true; @@ -718,10 +779,10 @@ return true; } -/// ReTryPreds - Invalidate predecessor BB info so it would be re-analyzed -/// to determine if it can be if-converted. If predecessor is already -/// enqueud, dequeue it! -void IfConverter::ReTryPreds(MachineBasicBlock *BB) { +/// InvalidatePreds - Invalidate predecessor BB info so it would be re-analyzed +/// to determine if it can be if-converted. If predecessor is already enqueued, +/// dequeue it! +void IfConverter::InvalidatePreds(MachineBasicBlock *BB) { for (MachineBasicBlock::pred_iterator PI = BB->pred_begin(), E = BB->pred_end(); PI != E; ++PI) { BBInfo &PBBI = BBAnalysis[(*PI)->getNumber()]; @@ -769,7 +830,8 @@ if (Kind == ICSimpleFalse) std::swap(CvtBBI, NextBBI); - if (CvtBBI->CannotBeCopied && CvtBBI->BB->pred_size() > 1) { + if (CvtBBI->IsDone || + (CvtBBI->CannotBeCopied && CvtBBI->BB->pred_size() > 1)) { // Something has changed. It's no longer safe to predicate this block. BBI.IsAnalyzed = false; CvtBBI->IsAnalyzed = false; @@ -785,7 +847,7 @@ // the entry block. CopyAndPredicateBlock(BBI, *CvtBBI, Cond); } else { - PredicateBlock(*CvtBBI, Cond); + PredicateBlock(*CvtBBI, CvtBBI->BB->end(), Cond); // Merge converted block into entry block. BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB); @@ -814,7 +876,7 @@ // Update block info. BB can be iteratively if-converted. if (!IterIfcvt) BBI.IsDone = true; - ReTryPreds(BBI.BB); + InvalidatePreds(BBI.BB); CvtBBI->IsDone = true; // FIXME: Must maintain LiveIns. @@ -833,7 +895,8 @@ if (Kind == ICTriangleFalse || Kind == ICTriangleFRev) std::swap(CvtBBI, NextBBI); - if (CvtBBI->CannotBeCopied && CvtBBI->BB->pred_size() > 1) { + if (CvtBBI->IsDone || + (CvtBBI->CannotBeCopied && CvtBBI->BB->pred_size() > 1)) { // Something has changed. It's no longer safe to predicate this block. BBI.IsAnalyzed = false; CvtBBI->IsAnalyzed = false; @@ -870,7 +933,7 @@ } else { // Predicate the 'true' block after removing its branch. CvtBBI->NonPredSize -= TII->RemoveBranch(*CvtBBI->BB); - PredicateBlock(*CvtBBI, Cond); + PredicateBlock(*CvtBBI, CvtBBI->BB->end(), Cond); } // If 'true' block has a 'false' successor, add an exit branch to it. @@ -901,7 +964,8 @@ // Only merge them if the true block does not fallthrough to the false // block. By not merging them, we make it possible to iteratively // ifcvt the blocks. - if (!HasEarlyExit && NextBBI->BB->pred_size() == 1) { + if (!HasEarlyExit && + NextBBI->BB->pred_size() == 1 && !NextBBI->HasFallThrough) { MergeBlocks(BBI, *NextBBI); FalseBBDead = true; } else { @@ -918,7 +982,7 @@ // Update block info. BB can be iteratively if-converted. if (!IterIfcvt) BBI.IsDone = true; - ReTryPreds(BBI.BB); + InvalidatePreds(BBI.BB); CvtBBI->IsDone = true; if (FalseBBDead) NextBBI->IsDone = true; @@ -929,47 +993,28 @@ /// IfConvertDiamond - If convert a diamond sub-CFG. /// -bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind) { +bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind, + unsigned NumDups1, unsigned NumDups2) { BBInfo &TrueBBI = BBAnalysis[BBI.TrueBB->getNumber()]; BBInfo &FalseBBI = BBAnalysis[BBI.FalseBB->getNumber()]; MachineBasicBlock *TailBB = TrueBBI.TrueBB; - - SmallVector Dups; + // True block must fall through or ended with unanalyzable terminator. if (!TailBB) { - // No common merge block. Check if the terminators (e.g. return) are - // the same or predicable. - MachineBasicBlock::iterator TT = BBI.TrueBB->getFirstTerminator(); - MachineBasicBlock::iterator FT = BBI.FalseBB->getFirstTerminator(); - while (TT != BBI.TrueBB->end() && FT != BBI.FalseBB->end()) { - if (TT->isIdenticalTo(FT)) - Dups.push_back(TT); // Will erase these later. - else if ((TT->getInstrDescriptor()->Flags & M_PREDICABLE) == 0 || - (FT->getInstrDescriptor()->Flags & M_PREDICABLE) == 0) - return false; // Can't if-convert. Abort! - ++TT; - ++FT; - } - - // One of the two pathes have more terminators, make sure they are - // all predicable. - while (TT != BBI.TrueBB->end()) { - if ((TT->getInstrDescriptor()->Flags & M_PREDICABLE) == 0) - return false; // Can't if-convert. Abort! - ++TT; - } - while (FT != BBI.FalseBB->end()) { - if ((FT->getInstrDescriptor()->Flags & M_PREDICABLE) == 0) - return false; // Can't if-convert. Abort! - ++FT; - } + if (blockAlwaysFallThrough(TrueBBI)) + TailBB = FalseBBI.TrueBB; + assert((TailBB || !TrueBBI.IsBrAnalyzable) && "Unexpected!"); } - // Remove the duplicated instructions from the 'true' block. - for (unsigned i = 0, e = Dups.size(); i != e; ++i) { - Dups[i]->eraseFromParent(); - --TrueBBI.NonPredSize; + if (TrueBBI.IsDone || FalseBBI.IsDone || + TrueBBI.BB->pred_size() > 1 || + FalseBBI.BB->pred_size() > 1) { + // Something has changed. It's no longer safe to predicate these blocks. + BBI.IsAnalyzed = false; + TrueBBI.IsAnalyzed = false; + FalseBBI.IsAnalyzed = false; + return false; } - + // Merge the 'true' and 'false' blocks by copying the instructions // from the 'false' block to the 'true' block. That is, unless the true // block would clobber the predicate, in that case, do the opposite. @@ -979,11 +1024,8 @@ TII->ReverseBranchCondition(RevCond); std::vector *Cond1 = &BBI.BrCond; std::vector *Cond2 = &RevCond; - // Check the 'true' and 'false' blocks if either isn't ended with a branch. - // Either the block fallthrough to another block or it ends with a - // return. If it's the former, add a branch to its successor. - bool NeedBr1 = !BBI1->TrueBB && BBI1->BB->succ_size() > 0; - bool NeedBr2 = !BBI2->TrueBB && BBI2->BB->succ_size() > 0; + bool NeedBr1 = BBI1->FalseBB != NULL; + bool NeedBr2 = BBI2->FalseBB != NULL; // Figure out the more profitable ordering. bool DoSwap = false; @@ -1003,24 +1045,50 @@ std::swap(NeedBr1, NeedBr2); } + // Remove the conditional branch from entry to the blocks. + BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB); + + // Remove the duplicated instructions at the beginnings of both paths. + MachineBasicBlock::iterator DI1 = BBI1->BB->begin(); + MachineBasicBlock::iterator DI2 = BBI2->BB->begin(); + BBI1->NonPredSize -= NumDups1; + BBI2->NonPredSize -= NumDups1; + while (NumDups1 != 0) { + ++DI1; + ++DI2; + --NumDups1; + } + BBI.BB->splice(BBI.BB->end(), BBI1->BB, BBI1->BB->begin(), DI1); + BBI2->BB->erase(BBI2->BB->begin(), DI2); + // Predicate the 'true' block after removing its branch. BBI1->NonPredSize -= TII->RemoveBranch(*BBI1->BB); - PredicateBlock(*BBI1, *Cond1); + DI1 = BBI1->BB->end(); + for (unsigned i = 0; i != NumDups2; ++i) + --DI1; + BBI1->BB->erase(DI1, BBI1->BB->end()); + PredicateBlock(*BBI1, BBI1->BB->end(), *Cond1); // Add an early exit branch if needed. if (NeedBr1) - TII->InsertBranch(*BBI1->BB, *BBI1->BB->succ_begin(), NULL, *Cond1); + TII->InsertBranch(*BBI1->BB, BBI1->FalseBB, NULL, *Cond1); // Predicate the 'false' block. - PredicateBlock(*BBI2, *Cond2, true); + BBI2->NonPredSize -= TII->RemoveBranch(*BBI2->BB); + DI2 = BBI2->BB->end(); + while (NumDups2 != 0) { + --DI2; + --NumDups2; + } + PredicateBlock(*BBI2, DI2, *Cond2); // Add an unconditional branch from 'false' to to 'false' successor if it // will not be the fallthrough block. if (NeedBr2 && !NeedBr1) { // If BBI2 isn't going to be merged in, then the existing fallthrough // or branch is fine. - if (!canFallThroughTo(BBI.BB, *BBI2->BB->succ_begin())) { - InsertUncondBranch(BBI2->BB, *BBI2->BB->succ_begin(), TII); + if (!canFallThroughTo(BBI.BB, BBI2->FalseBB)) { + InsertUncondBranch(BBI2->BB, BBI2->FalseBB, TII); BBI2->HasFallThrough = false; } } @@ -1029,9 +1097,6 @@ if (!NeedBr1) MergeBlocks(*BBI1, *BBI2); - // Remove the conditional branch from entry to the blocks. - BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB); - // Merge the combined block into the entry of the diamond. MergeBlocks(BBI, *BBI1); @@ -1044,34 +1109,42 @@ // If the if-converted block fallthrough or unconditionally branch into the // tail block, and the tail block does not have other predecessors, then - // fold the tail block in as well. - BBInfo *CvtBBI = NeedBr1 ? BBI2 : &BBI; - if (TailBB && - TailBB->pred_size() == 1 && CvtBBI->BB->succ_size() == 1) { - CvtBBI->NonPredSize -= TII->RemoveBranch(*CvtBBI->BB); + // fold the tail block in as well. Otherwise, unless it falls through to the + // tail, add a unconditional branch to it. + if (TailBB) { BBInfo TailBBI = BBAnalysis[TailBB->getNumber()]; - MergeBlocks(*CvtBBI, TailBBI); - TailBBI.IsDone = true; + BBInfo *LastBBI = NeedBr1 ? BBI2 : &BBI; + bool HasEarlyExit = NeedBr1 ? NeedBr2 : false; + if (!HasEarlyExit && + TailBB->pred_size() == 1 && !TailBBI.HasFallThrough) { + LastBBI->NonPredSize -= TII->RemoveBranch(*LastBBI->BB); + MergeBlocks(*LastBBI, TailBBI); + TailBBI.IsDone = true; + } else { + bool isFallThrough = canFallThroughTo(LastBBI->BB, TailBB); + if (!isFallThrough) { + InsertUncondBranch(LastBBI->BB, TailBB, TII); + LastBBI->HasFallThrough = false; + } + } } RemoveExtraEdges(BBI); // Update block info. BBI.IsDone = TrueBBI.IsDone = FalseBBI.IsDone = true; + InvalidatePreds(BBI.BB); // FIXME: Must maintain LiveIns. return true; } -/// PredicateBlock - Predicate every instruction in the block with the specified -/// condition. If IgnoreTerm is true, skip over all terminator instructions. +/// PredicateBlock - Predicate instructions from the start of the block to the +/// specified end with the specified condition. void IfConverter::PredicateBlock(BBInfo &BBI, - std::vector &Cond, - bool IgnoreTerm) { - for (MachineBasicBlock::iterator I = BBI.BB->begin(), E = BBI.BB->end(); - I != E; ++I) { - if (IgnoreTerm && TII->isTerminatorInstr(I->getOpcode())) - continue; + MachineBasicBlock::iterator E, + std::vector &Cond) { + for (MachineBasicBlock::iterator I = BBI.BB->begin(); I != E; ++I) { if (TII->isPredicated(I)) continue; if (!TII->PredicateInstruction(I, Cond)) { From djg at cray.com Mon Jun 18 09:11:54 2007 From: djg at cray.com (Dan Gohman) Date: Mon, 18 Jun 2007 09:11:54 -0500 Subject: [llvm-commits] CVS: llvm/include/llvm/CodeGen/MachineInstr.h Message-ID: <200706181411.l5IEBsRl020614@zion.cs.uiuc.edu> Changes in directory llvm/include/llvm/CodeGen: MachineInstr.h updated: 1.222 -> 1.223 --- Log message: Add keyword explicit. --- Diffs of the changes: (+1 -1) MachineInstr.h | 2 +- 1 files changed, 1 insertion(+), 1 deletion(-) Index: llvm/include/llvm/CodeGen/MachineInstr.h diff -u llvm/include/llvm/CodeGen/MachineInstr.h:1.222 llvm/include/llvm/CodeGen/MachineInstr.h:1.223 --- llvm/include/llvm/CodeGen/MachineInstr.h:1.222 Fri Jun 15 14:05:13 2007 +++ llvm/include/llvm/CodeGen/MachineInstr.h Mon Jun 18 09:11:31 2007 @@ -344,7 +344,7 @@ /// MachineInstr ctor - This constructor create a MachineInstr and add the /// implicit operands. It reserves space for number of operands specified by /// TargetInstrDescriptor. - MachineInstr(const TargetInstrDescriptor &TID); + explicit MachineInstr(const TargetInstrDescriptor &TID); /// MachineInstr ctor - Work exactly the same as the ctor above, except that /// the MachineInstr is created and added to the end of the specified basic From djg at cray.com Mon Jun 18 09:13:18 2007 From: djg at cray.com (Dan Gohman) Date: Mon, 18 Jun 2007 09:13:18 -0500 Subject: [llvm-commits] CVS: llvm/lib/Target/X86/X86InstrX86-64.td Message-ID: <200706181413.l5IEDIAH020654@zion.cs.uiuc.edu> Changes in directory llvm/lib/Target/X86: X86InstrX86-64.td updated: 1.16 -> 1.17 --- Log message: Define the pushq instruction for x86-64. --- Diffs of the changes: (+2 -0) X86InstrX86-64.td | 2 ++ 1 files changed, 2 insertions(+) Index: llvm/lib/Target/X86/X86InstrX86-64.td diff -u llvm/lib/Target/X86/X86InstrX86-64.td:1.16 llvm/lib/Target/X86/X86InstrX86-64.td:1.17 --- llvm/lib/Target/X86/X86InstrX86-64.td:1.16 Thu May 17 01:35:11 2007 +++ llvm/lib/Target/X86/X86InstrX86-64.td Mon Jun 18 09:12:56 2007 @@ -141,6 +141,8 @@ (ops), "leave", []>, Imp<[RBP,RSP],[RBP,RSP]>; def POP64r : I<0x58, AddRegFrm, (ops GR64:$reg), "pop{q} $reg", []>, Imp<[RSP],[RSP]>; +def PUSH64r : I<0x50, AddRegFrm, + (ops GR64:$reg), "push{q} $reg", []>, Imp<[RSP],[RSP]>; def LEA64_32r : I<0x8D, MRMSrcMem, (ops GR32:$dst, lea64_32mem:$src), From djg at cray.com Mon Jun 18 09:46:32 2007 From: djg at cray.com (Dan Gohman) Date: Mon, 18 Jun 2007 09:46:32 -0500 Subject: [llvm-commits] CVS: llvm/lib/CodeGen/LiveIntervalAnalysis.cpp VirtRegMap.cpp Message-ID: <20070618144632.GK20470@village.us.cray.com> > Is this necessary? ARM is already doing this by marking trivially re- > materializable instructions with let isReMaterializable = 1. It's not necessary, but it seems cleaner :-). In the ARM target, other than instructions that are always trivially rematerialiable, there's only LDR, with a ReMaterializable clone named LDRcp. x86 has a variety of load instructions, so in order to work the way ARM does x86 would need a varity of clones, and then the clones wouldn't be able to use the automatic instruction selection, so there would still need to be some new custom logic for picking which clone to use for each constant. Dan From djg at cray.com Mon Jun 18 09:54:37 2007 From: djg at cray.com (Dan Gohman) Date: Mon, 18 Jun 2007 09:54:37 -0500 Subject: [llvm-commits] CVS: llvm/lib/Analysis/ScalarEvolution.cpp ScalarEvolutionExpander.cpp Message-ID: <20070618145437.GL20470@village.us.cray.com> > Nifty. Have you done any performance analysis of this? What cases > is it beneficial for? If this helps LSR, for example, can you please > add a testcase to verify this doesn't break in the future? It doesn't actually help LSR much. At this point, a signextend isn't much less opaque than an unknown, though hasComputableLoopEvolution works. Dan -- Dan Gohman, Cray Inc. From djg at cray.com Mon Jun 18 10:30:38 2007 From: djg at cray.com (Dan Gohman) Date: Mon, 18 Jun 2007 10:30:38 -0500 Subject: [llvm-commits] CVS: llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp Message-ID: <200706181530.l5IFUcIl022113@zion.cs.uiuc.edu> Changes in directory llvm/lib/CodeGen/SelectionDAG: SelectionDAGPrinter.cpp updated: 1.46 -> 1.47 --- Log message: Make chain dependencies blue, in addition to being dashed. --- Diffs of the changes: (+1 -1) SelectionDAGPrinter.cpp | 2 +- 1 files changed, 1 insertion(+), 1 deletion(-) Index: llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp diff -u llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp:1.46 llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp:1.47 --- llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp:1.46 Fri May 18 12:52:13 2007 +++ llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp Mon Jun 18 10:30:16 2007 @@ -51,7 +51,7 @@ if (VT == MVT::Flag) return "color=red,style=bold"; else if (VT == MVT::Other) - return "style=dashed"; + return "color=blue,style=dashed"; return ""; } From clattner at apple.com Mon Jun 18 11:41:23 2007 From: clattner at apple.com (Chris Lattner) Date: Mon, 18 Jun 2007 09:41:23 -0700 Subject: [llvm-commits] CVS: llvm/lib/Transforms/Scalar/GVNPRE.cpp In-Reply-To: <200706180431.l5I4V7jj000418@zion.cs.uiuc.edu> References: <200706180431.l5I4V7jj000418@zion.cs.uiuc.edu> Message-ID: <1C3089B9-B98C-42CF-8FF4-151CF852550F@apple.com> On Jun 17, 2007, at 9:31 PM, Owen Anderson wrote: > --- llvm/lib/Transforms/Scalar/GVNPRE.cpp:1.34 Fri Jun 15 19:26:54 > 2007 > +++ llvm/lib/Transforms/Scalar/GVNPRE.cpp Sun Jun 17 23:30:44 2007 > @@ -329,7 +329,12 @@ > lhsValid = true; > break; > } > - lhsValid &= !dependsOnInvoke(BO->getOperand(0)); > + > + // Check for dependency on invoke insts > + // NOTE: This check is expensive, so don't do it if we > + // don't have to > + if (lhsValid) > + lhsValid = !dependsOnInvoke(BO->getOperand(0)); It's too bad C++ doesn't have an &&= operator eh? :) -Chris From clattner at apple.com Mon Jun 18 11:42:33 2007 From: clattner at apple.com (Chris Lattner) Date: Mon, 18 Jun 2007 09:42:33 -0700 Subject: [llvm-commits] CVS: llvm/lib/CodeGen/LiveIntervalAnalysis.cpp VirtRegMap.cpp In-Reply-To: <20070618144632.GK20470@village.us.cray.com> References: <20070618144632.GK20470@village.us.cray.com> Message-ID: <2977575E-1CCE-4960-A0FF-38DB07BAFF47@apple.com> On Jun 18, 2007, at 7:46 AM, Dan Gohman wrote: >> Is this necessary? ARM is already doing this by marking trivially re- >> materializable instructions with let isReMaterializable = 1. > > It's not necessary, but it seems cleaner :-). > > In the ARM target, other than instructions that are always trivially > rematerialiable, there's only LDR, with a ReMaterializable clone > named LDRcp. > x86 has a variety of load instructions, so in order to work the way > ARM does > x86 would need a varity of clones, and then the clones wouldn't be > able to > use the automatic instruction selection, so there would still need > to be some > new custom logic for picking which clone to use for each constant. Ah, good point. Evan, do you think the LDRcp instruction can be eliminated now? -Chris From asl at math.spbu.ru Mon Jun 18 12:13:52 2007 From: asl at math.spbu.ru (Anton Korobeynikov) Date: Mon, 18 Jun 2007 12:13:52 -0500 Subject: [llvm-commits] CVS: llvm/lib/Analysis/BasicAliasAnalysis.cpp Message-ID: <200706181713.l5IHDqdk024996@zion.cs.uiuc.edu> Changes in directory llvm/lib/Analysis: BasicAliasAnalysis.cpp updated: 1.111 -> 1.112 --- Log message: Make BasicAliasAnalysis correctly register itself. Patch by Devang --- Diffs of the changes: (+2 -0) BasicAliasAnalysis.cpp | 2 ++ 1 files changed, 2 insertions(+) Index: llvm/lib/Analysis/BasicAliasAnalysis.cpp diff -u llvm/lib/Analysis/BasicAliasAnalysis.cpp:1.111 llvm/lib/Analysis/BasicAliasAnalysis.cpp:1.112 --- llvm/lib/Analysis/BasicAliasAnalysis.cpp:1.111 Wed May 2 20:11:53 2007 +++ llvm/lib/Analysis/BasicAliasAnalysis.cpp Mon Jun 18 12:13:29 2007 @@ -38,6 +38,7 @@ struct VISIBILITY_HIDDEN NoAA : public ImmutablePass, public AliasAnalysis { static char ID; // Class identification, replacement for typeinfo NoAA() : ImmutablePass((intptr_t)&ID) {} + NoAA(intptr_t PID) : ImmutablePass(PID) { } virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired(); @@ -93,6 +94,7 @@ /// derives from the NoAA class. struct VISIBILITY_HIDDEN BasicAliasAnalysis : public NoAA { static char ID; // Class identification, replacement for typeinfo + BasicAliasAnalysis() : NoAA((intptr_t)&ID) { } AliasResult alias(const Value *V1, unsigned V1Size, const Value *V2, unsigned V2Size); From evan.cheng at apple.com Mon Jun 18 14:12:22 2007 From: evan.cheng at apple.com (Evan Cheng) Date: Mon, 18 Jun 2007 12:12:22 -0700 Subject: [llvm-commits] CVS: llvm/lib/CodeGen/LiveIntervalAnalysis.cpp VirtRegMap.cpp In-Reply-To: <2977575E-1CCE-4960-A0FF-38DB07BAFF47@apple.com> References: <20070618144632.GK20470@village.us.cray.com> <2977575E-1CCE-4960-A0FF-38DB07BAFF47@apple.com> Message-ID: We can eliminate LDRcp, that's fine. However, I still don't like the separate hook and targetinstrinfo bit. Dan, can you have just a single isTriviallyReMaterializable hook that encompass all these? Evan On Jun 18, 2007, at 9:42 AM, Chris Lattner wrote: > > On Jun 18, 2007, at 7:46 AM, Dan Gohman wrote: > >>> Is this necessary? ARM is already doing this by marking trivially >>> re- >>> materializable instructions with let isReMaterializable = 1. >> >> It's not necessary, but it seems cleaner :-). >> >> In the ARM target, other than instructions that are always trivially >> rematerialiable, there's only LDR, with a ReMaterializable clone >> named LDRcp. >> x86 has a variety of load instructions, so in order to work the way >> ARM does >> x86 would need a varity of clones, and then the clones wouldn't be >> able to >> use the automatic instruction selection, so there would still need >> to be some >> new custom logic for picking which clone to use for each constant. > > Ah, good point. Evan, do you think the LDRcp instruction can be > eliminated now? > > -Chris > _______________________________________________ > llvm-commits mailing list > llvm-commits at cs.uiuc.edu > http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits From djg at cray.com Mon Jun 18 14:30:32 2007 From: djg at cray.com (Dan Gohman) Date: Mon, 18 Jun 2007 14:30:32 -0500 Subject: [llvm-commits] CVS: llvm/lib/Analysis/ScalarEvolution.cpp Message-ID: <200706181930.l5IJUW3i028823@zion.cs.uiuc.edu> Changes in directory llvm/lib/Analysis: ScalarEvolution.cpp updated: 1.119 -> 1.120 --- Log message: In SCEVAddExpr::get, skip over any cast operands before looking for nested add operands after constant operands. The recent change to recognize sign-extend expressions caused this to be exposed more often. --- Diffs of the changes: (+5 -2) ScalarEvolution.cpp | 7 +++++-- 1 files changed, 5 insertions(+), 2 deletions(-) Index: llvm/lib/Analysis/ScalarEvolution.cpp diff -u llvm/lib/Analysis/ScalarEvolution.cpp:1.119 llvm/lib/Analysis/ScalarEvolution.cpp:1.120 --- llvm/lib/Analysis/ScalarEvolution.cpp:1.119 Fri Jun 15 09:38:12 2007 +++ llvm/lib/Analysis/ScalarEvolution.cpp Mon Jun 18 14:30:09 2007 @@ -685,8 +685,11 @@ return SCEVAddExpr::get(Ops); } - // Okay, now we know the first non-constant operand. If there are add - // operands they would be next. + // Now we know the first non-constant operand. Skip past any cast SCEVs. + while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scAddExpr) + ++Idx; + + // If there are add operands they would be next. if (Idx < Ops.size()) { bool DeletedAdd = false; while (SCEVAddExpr *Add = dyn_cast(Ops[Idx])) { From clattner at apple.com Mon Jun 18 15:09:14 2007 From: clattner at apple.com (Chris Lattner) Date: Mon, 18 Jun 2007 13:09:14 -0700 Subject: [llvm-commits] CVS: llvm/lib/Analysis/ScalarEvolution.cpp In-Reply-To: <200706181930.l5IJUW3i028823@zion.cs.uiuc.edu> References: <200706181930.l5IJUW3i028823@zion.cs.uiuc.edu> Message-ID: > In SCEVAddExpr::get, skip over any cast operands before looking for > nested > add operands after constant operands. The recent change to recognize > sign-extend expressions caused this to be exposed more often. Ok > + // Now we know the first non-constant operand. Skip past any > cast SCEVs. > + while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scAddExpr) > + ++Idx; Can you please introduce a predicate for this? Comparing against scAddExpr is not obvious :) Thanks Dan, -Chris From djg at cray.com Mon Jun 18 15:35:17 2007 From: djg at cray.com (Dan Gohman) Date: Mon, 18 Jun 2007 15:35:17 -0500 Subject: [llvm-commits] CVS: llvm/lib/Analysis/ScalarEvolution.cpp Message-ID: <20070618203517.GQ20470@village.us.cray.com> > > + // Now we know the first non-constant operand. Skip past any > > cast SCEVs. > > + while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scAddExpr) > > + ++Idx; > > Can you please introduce a predicate for this? Comparing against > scAddExpr is not obvious :) I think if you read the whole body of SCEVAddExpr::get instead of just what's in this diff, it'll be more more obvious. The operands of theSCEVAddExpr are sorted by their getSCEVType() value, and the code is walking through them expecting to deal with each kind in that order. Dan -- Dan Gohman, Cray Inc. From clattner at apple.com Mon Jun 18 15:50:51 2007 From: clattner at apple.com (Chris Lattner) Date: Mon, 18 Jun 2007 13:50:51 -0700 Subject: [llvm-commits] CVS: llvm/lib/Analysis/ScalarEvolution.cpp In-Reply-To: <20070618203517.GQ20470@village.us.cray.com> References: <20070618203517.GQ20470@village.us.cray.com> Message-ID: On Jun 18, 2007, at 1:35 PM, Dan Gohman wrote: >>> + // Now we know the first non-constant operand. Skip past any >>> cast SCEVs. >>> + while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scAddExpr) >>> + ++Idx; >> >> Can you please introduce a predicate for this? Comparing against >> scAddExpr is not obvious :) > > I think if you read the whole body of SCEVAddExpr::get instead of > just what's > in this diff, it'll be more more obvious. The operands of > theSCEVAddExpr are > sorted by their getSCEVType() value, and the code is walking > through them > expecting to deal with each kind in that order. Ahh, ok, thanks! -Chris From sabre at nondot.org Mon Jun 18 16:28:33 2007 From: sabre at nondot.org (Chris Lattner) Date: Mon, 18 Jun 2007 16:28:33 -0500 Subject: [llvm-commits] CVS: llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp Message-ID: <200706182128.l5ILSXCr031647@zion.cs.uiuc.edu> Changes in directory llvm/lib/CodeGen/SelectionDAG: LegalizeDAG.cpp updated: 1.497 -> 1.498 --- Log message: make ComputeTopDownOrdering significantly faster and use less stack space by making it non-recursive --- Diffs of the changes: (+37 -32) LegalizeDAG.cpp | 69 ++++++++++++++++++++++++++++++-------------------------- 1 files changed, 37 insertions(+), 32 deletions(-) Index: llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp diff -u llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp:1.497 llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp:1.498 --- llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp:1.497 Thu Jun 14 17:58:02 2007 +++ llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp Mon Jun 18 16:28:10 2007 @@ -302,24 +302,45 @@ "Too many value types for ValueTypeActions to hold!"); } -/// ComputeTopDownOrdering - Add the specified node to the Order list if it has -/// not been visited yet and if all of its operands have already been visited. -static void ComputeTopDownOrdering(SDNode *N, SmallVector &Order, - DenseMap &Visited) { - if (++Visited[N] != N->getNumOperands()) - return; // Haven't visited all operands yet - - Order.push_back(N); +/// ComputeTopDownOrdering - Compute a top-down ordering of the dag, where Order +/// contains all of a nodes operands before it contains the node. +static void ComputeTopDownOrdering(SelectionDAG &DAG, + SmallVector &Order) { + + DenseMap Visited; + std::vector Worklist; + Worklist.reserve(128); - if (N->hasOneUse()) { // Tail recurse in common case. - ComputeTopDownOrdering(*N->use_begin(), Order, Visited); - return; + // Compute ordering from all of the leaves in the graphs, those (like the + // entry node) that have no operands. + for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(), + E = DAG.allnodes_end(); I != E; ++I) { + if (I->getNumOperands() == 0) { + Visited[I] = 0 - 1U; + Worklist.push_back(I); + } } - // Now that we have N in, add anything that uses it if all of their operands - // are now done. - for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end(); UI != E;++UI) - ComputeTopDownOrdering(*UI, Order, Visited); + while (!Worklist.empty()) { + SDNode *N = Worklist.back(); + Worklist.pop_back(); + + if (++Visited[N] != N->getNumOperands()) + continue; // Haven't visited all operands yet + + Order.push_back(N); + + // Now that we have N in, add anything that uses it if all of their operands + // are now done. + for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end(); + UI != E; ++UI) + Worklist.push_back(*UI); + } + + assert(Order.size() == Visited.size() && + Order.size() == + (unsigned)std::distance(DAG.allnodes_begin(), DAG.allnodes_end()) && + "Error: DAG is cyclic!"); } @@ -333,24 +354,8 @@ // practice however, this causes us to run out of stack space on large basic // blocks. To avoid this problem, compute an ordering of the nodes where each // node is only legalized after all of its operands are legalized. - DenseMap Visited; SmallVector Order; - - // Compute ordering from all of the leaves in the graphs, those (like the - // entry node) that have no operands. - for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(), - E = DAG.allnodes_end(); I != E; ++I) { - if (I->getNumOperands() == 0) { - Visited[I] = 0 - 1U; - ComputeTopDownOrdering(I, Order, Visited); - } - } - - assert(Order.size() == Visited.size() && - Order.size() == - (unsigned)std::distance(DAG.allnodes_begin(), DAG.allnodes_end()) && - "Error: DAG is cyclic!"); - Visited.clear(); + ComputeTopDownOrdering(DAG, Order); for (unsigned i = 0, e = Order.size(); i != e; ++i) HandleOp(SDOperand(Order[i], 0)); From dpatel at apple.com Mon Jun 18 16:32:51 2007 From: dpatel at apple.com (Devang Patel) Date: Mon, 18 Jun 2007 16:32:51 -0500 Subject: [llvm-commits] CVS: llvm/lib/VMCore/PassManager.cpp Message-ID: <200706182132.l5ILWpv9031744@zion.cs.uiuc.edu> Changes in directory llvm/lib/VMCore: PassManager.cpp updated: 1.157 -> 1.158 --- Log message: Fix quotes in debug messages. --- Diffs of the changes: (+6 -6) PassManager.cpp | 12 ++++++------ 1 files changed, 6 insertions(+), 6 deletions(-) Index: llvm/lib/VMCore/PassManager.cpp diff -u llvm/lib/VMCore/PassManager.cpp:1.157 llvm/lib/VMCore/PassManager.cpp:1.158 --- llvm/lib/VMCore/PassManager.cpp:1.157 Wed May 23 00:08:52 2007 +++ llvm/lib/VMCore/PassManager.cpp Mon Jun 18 16:32:29 2007 @@ -850,7 +850,7 @@ cerr << "Executing Pass '" << P->getPassName(); break; case MODIFICATION_MSG: - cerr << "' Made Modification '" << P->getPassName(); + cerr << "Made Modification '" << P->getPassName(); break; case FREEING_MSG: cerr << " Freeing Pass '" << P->getPassName(); @@ -860,19 +860,19 @@ } switch (S2) { case ON_BASICBLOCK_MSG: - cerr << "' on BasicBlock '" << Msg << "...\n"; + cerr << "' on BasicBlock '" << Msg << "'...\n"; break; case ON_FUNCTION_MSG: - cerr << "' on Function '" << Msg << "...\n"; + cerr << "' on Function '" << Msg << "'...\n"; break; case ON_MODULE_MSG: - cerr << "' on Module '" << Msg << "...\n"; + cerr << "' on Module '" << Msg << "'...\n"; break; case ON_LOOP_MSG: - cerr << "' on Loop " << Msg << "...\n"; + cerr << "' on Loop " << Msg << "'...\n"; break; case ON_CG_MSG: - cerr << "' on Call Graph " << Msg << "...\n"; + cerr << "' on Call Graph " << Msg << "'...\n"; break; default: break; From djg at cray.com Mon Jun 18 16:35:38 2007 From: djg at cray.com (Dan Gohman) Date: Mon, 18 Jun 2007 16:35:38 -0500 Subject: [llvm-commits] CVS: llvm/lib/CodeGen/LiveIntervalAnalysis.cpp VirtRegMap.cpp Message-ID: <20070618213538.GR20470@village.us.cray.com> > We can eliminate LDRcp, that's fine. > > However, I still don't like the separate hook and targetinstrinfo > bit. Dan, can you have just a single isTriviallyReMaterializable hook > that encompass all these? What do you think about the attached patch? It's not tested yet, but it shows what I think you're asking for. I left LDRcp in, but that can be fixed. Dan -- Dan Gohman, Cray Inc. -------------- next part -------------- Index: include/llvm/Target/TargetInstrInfo.h =================================================================== RCS file: /var/cvs/llvm/llvm/include/llvm/Target/TargetInstrInfo.h,v retrieving revision 1.130 diff -u -r1.130 TargetInstrInfo.h --- include/llvm/Target/TargetInstrInfo.h 15 Jun 2007 21:13:54 -0000 1.130 +++ include/llvm/Target/TargetInstrInfo.h 18 Jun 2007 21:17:17 -0000 @@ -78,10 +78,6 @@ // controls execution. It may be set to 'always'. const unsigned M_PREDICABLE = 1 << 12; -// M_REMATERIALIZIBLE - Set if this instruction can be trivally re-materialized -// at any time, e.g. constant generation, load from constant pool. -const unsigned M_REMATERIALIZIBLE = 1 << 13; - // M_CLOBBERS_PRED - Set if this instruction may clobbers the condition code // register and / or registers that are used to predicate instructions. const unsigned M_CLOBBERS_PRED = 1 << 14; @@ -217,9 +213,6 @@ bool clobbersPredicate(MachineOpCode Opcode) const { return get(Opcode).Flags & M_CLOBBERS_PRED; } - bool isReMaterializable(MachineOpCode Opcode) const { - return get(Opcode).Flags & M_REMATERIALIZIBLE; - } bool isCommutableInstr(MachineOpCode Opcode) const { return get(Opcode).Flags & M_COMMUTABLE; } @@ -298,13 +291,13 @@ return 0; } - /// isOtherReMaterializableLoad - If the specified machine instruction is a - /// direct load that is trivially rematerializable, not counting loads from - /// stack slots, return true. If not, return false. This predicate must + /// isTriviallyReMaterializable - If the specified machine instruction can + /// be trivally re-materialized at any time, e.g. constant generation or + /// loads from constant pools. If not, return false. This predicate must /// return false if the instruction has any side effects other than /// producing the value from the load, or if it requres any address /// registers that are not always available. - virtual bool isOtherReMaterializableLoad(MachineInstr *MI) const { + virtual bool isTriviallyReMaterializable(MachineInstr *MI) const { return false; } Index: lib/CodeGen/LiveIntervalAnalysis.cpp =================================================================== RCS file: /var/cvs/llvm/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp,v retrieving revision 1.247 diff -u -r1.247 LiveIntervalAnalysis.cpp --- lib/CodeGen/LiveIntervalAnalysis.cpp 14 Jun 2007 20:50:44 -0000 1.247 +++ lib/CodeGen/LiveIntervalAnalysis.cpp 18 Jun 2007 21:17:17 -0000 @@ -336,14 +336,13 @@ // time we see a vreg. if (interval.empty()) { // Remember if the definition can be rematerialized. All load's from fixed - // stack slots are re-materializable. The target may permit other loads to - // be re-materialized as well. + // stack slots are re-materializable. The target may permit other + // instructions to be re-materialized as well. int FrameIdx = 0; if (vi.DefInst && - (tii_->isReMaterializable(vi.DefInst->getOpcode()) || + (tii_->isTriviallyReMaterializable(vi.DefInst) || (tii_->isLoadFromStackSlot(vi.DefInst, FrameIdx) && - mf_->getFrameInfo()->isFixedObjectIndex(FrameIdx)) || - tii_->isOtherReMaterializableLoad(vi.DefInst))) + mf_->getFrameInfo()->isFixedObjectIndex(FrameIdx)))) interval.remat = vi.DefInst; // Get the Idx of the defining instructions. Index: lib/CodeGen/VirtRegMap.cpp =================================================================== RCS file: /var/cvs/llvm/llvm/lib/CodeGen/VirtRegMap.cpp,v retrieving revision 1.112 diff -u -r1.112 VirtRegMap.cpp --- lib/CodeGen/VirtRegMap.cpp 14 Jun 2007 20:50:44 -0000 1.112 +++ lib/CodeGen/VirtRegMap.cpp 18 Jun 2007 21:17:17 -0000 @@ -663,9 +663,8 @@ // If this instruction is being rematerialized, just remove it! int FrameIdx; - if ((TID->Flags & M_REMATERIALIZIBLE) || - TII->isLoadFromStackSlot(&MI, FrameIdx) || - TII->isOtherReMaterializableLoad(&MI)) { + if (TII->isTriviallyReMaterializable(&MI) || + TII->isLoadFromStackSlot(&MI, FrameIdx)) { bool Remove = true; for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { MachineOperand &MO = MI.getOperand(i); Index: lib/Target/Target.td =================================================================== RCS file: /var/cvs/llvm/llvm/lib/Target/Target.td,v retrieving revision 1.103 diff -u -r1.103 Target.td --- lib/Target/Target.td 13 Jun 2007 22:20:15 -0000 1.103 +++ lib/Target/Target.td 18 Jun 2007 21:17:17 -0000 @@ -186,7 +186,6 @@ bit isConvertibleToThreeAddress = 0; // Can this 2-addr instruction promote? bit isCommutable = 0; // Is this 3 operand instruction commutable? bit isTerminator = 0; // Is this part of the terminator for a basic block? - bit isReMaterializable = 0; // Is this instruction re-materializable? bit isPredicable = 0; // Is this instruction predicable? bit hasDelaySlot = 0; // Does this instruction have an delay slot? bit usesCustomDAGSchedInserter = 0; // Pseudo instr needing special help. Index: lib/Target/ARM/ARMInstrInfo.cpp =================================================================== RCS file: /var/cvs/llvm/llvm/lib/Target/ARM/ARMInstrInfo.cpp,v retrieving revision 1.37 diff -u -r1.37 ARMInstrInfo.cpp --- lib/Target/ARM/ARMInstrInfo.cpp 15 Jun 2007 21:15:00 -0000 1.37 +++ lib/Target/ARM/ARMInstrInfo.cpp 18 Jun 2007 21:17:17 -0000 @@ -130,6 +130,20 @@ return 0; } +bool ARMInstrInfo::isTriviallyReMaterializable(MachineInstr *MI) const { + switch (MI->getOpcode()) { + default: break; + case ARM::LDRcp: + case ARM::MOVi: + case ARM::MVNi: + case ARM::MOVi2pieces: + case ARM::tLDRcp: + // These instructions are always trivially rematerializable. + return true; + } + return false; +} + static unsigned getUnindexedOpcode(unsigned Opc) { switch (Opc) { default: break; Index: lib/Target/ARM/ARMInstrInfo.h =================================================================== RCS file: /var/cvs/llvm/llvm/lib/Target/ARM/ARMInstrInfo.h,v retrieving revision 1.14 diff -u -r1.14 ARMInstrInfo.h --- lib/Target/ARM/ARMInstrInfo.h 15 Jun 2007 21:15:00 -0000 1.14 +++ lib/Target/ARM/ARMInstrInfo.h 18 Jun 2007 21:17:17 -0000 @@ -87,6 +87,7 @@ unsigned &SrcReg, unsigned &DstReg) const; virtual unsigned isLoadFromStackSlot(MachineInstr *MI, int &FrameIndex) const; virtual unsigned isStoreToStackSlot(MachineInstr *MI, int &FrameIndex) const; + virtual bool isTriviallyReMaterializable(MachineInstr *MI) const; virtual MachineInstr *convertToThreeAddress(MachineFunction::iterator &MFI, MachineBasicBlock::iterator &MBBI, Index: lib/Target/ARM/ARMInstrInfo.td =================================================================== RCS file: /var/cvs/llvm/llvm/lib/Target/ARM/ARMInstrInfo.td,v retrieving revision 1.110 diff -u -r1.110 ARMInstrInfo.td --- lib/Target/ARM/ARMInstrInfo.td 6 Jun 2007 10:17:05 -0000 1.110 +++ lib/Target/ARM/ARMInstrInfo.td 18 Jun 2007 21:17:17 -0000 @@ -665,7 +665,6 @@ [(set GPR:$dst, (load addrmode2:$addr))]>; // Special LDR for loads from non-pc-relative constpools. -let isReMaterializable = 1 in def LDRcp : AI2<(ops GPR:$dst, addrmode2:$addr), "ldr", " $dst, $addr", []>; @@ -799,7 +798,6 @@ def MOVs : AI1<(ops GPR:$dst, so_reg:$src), "mov", " $dst, $src", [(set GPR:$dst, so_reg:$src)]>; -let isReMaterializable = 1 in def MOVi : AI1<(ops GPR:$dst, so_imm:$src), "mov", " $dst, $src", [(set GPR:$dst, so_imm:$src)]>; @@ -907,7 +905,6 @@ "mvn", " $dst, $src", [(set GPR:$dst, (not GPR:$src))]>; def MVNs : AI<(ops GPR:$dst, so_reg:$src), "mvn", " $dst, $src", [(set GPR:$dst, (not so_reg:$src))]>; -let isReMaterializable = 1 in def MVNi : AI<(ops GPR:$dst, so_imm:$imm), "mvn", " $dst, $imm", [(set GPR:$dst, so_imm_not:$imm)]>; @@ -1177,7 +1174,6 @@ // Large immediate handling. // Two piece so_imms. -let isReMaterializable = 1 in def MOVi2pieces : AI1x2<(ops GPR:$dst, so_imm2part:$src), "mov", " $dst, $src", [(set GPR:$dst, so_imm2part:$src)]>; Index: lib/Target/ARM/ARMInstrThumb.td =================================================================== RCS file: /var/cvs/llvm/llvm/lib/Target/ARM/ARMInstrThumb.td,v retrieving revision 1.30 diff -u -r1.30 ARMInstrThumb.td --- lib/Target/ARM/ARMInstrThumb.td 8 Jun 2007 09:13:23 -0000 1.30 +++ lib/Target/ARM/ARMInstrThumb.td 18 Jun 2007 21:17:17 -0000 @@ -266,7 +266,6 @@ [(set GPR:$dst, (load (ARMWrapper tconstpool:$addr)))]>; // Special LDR for loads from non-pc-relative constpools. -let isReMaterializable = 1 in def tLDRcp : TIs<(ops GPR:$dst, i32imm:$addr), "ldr $dst, $addr", []>; } // isLoad Index: lib/Target/X86/X86InstrFPStack.td =================================================================== RCS file: /var/cvs/llvm/llvm/lib/Target/X86/X86InstrFPStack.td,v retrieving revision 1.9 diff -u -r1.9 X86InstrFPStack.td --- lib/Target/X86/X86InstrFPStack.td 21 Mar 2007 00:16:56 -0000 1.9 +++ lib/Target/X86/X86InstrFPStack.td 18 Jun 2007 21:17:17 -0000 @@ -413,12 +413,10 @@ def FXCH : FPI<0xC8, AddRegFrm, (ops RST:$op), "fxch $op">, D9; // Floating point constant loads. -let isReMaterializable = 1 in { def FpLD0 : FpI<(ops RFP:$dst), ZeroArgFP, [(set RFP:$dst, fp64imm0)]>; def FpLD1 : FpI<(ops RFP:$dst), ZeroArgFP, [(set RFP:$dst, fp64imm1)]>; -} def FLD0 : FPI<0xEE, RawFrm, (ops), "fldz">, D9; def FLD1 : FPI<0xE8, RawFrm, (ops), "fld1">, D9; Index: lib/Target/X86/X86InstrInfo.cpp =================================================================== RCS file: /var/cvs/llvm/llvm/lib/Target/X86/X86InstrInfo.cpp,v retrieving revision 1.91 diff -u -r1.91 X86InstrInfo.cpp --- lib/Target/X86/X86InstrInfo.cpp 14 Jun 2007 22:03:45 -0000 1.91 +++ lib/Target/X86/X86InstrInfo.cpp 18 Jun 2007 21:17:17 -0000 @@ -112,9 +112,20 @@ } -bool X86InstrInfo::isOtherReMaterializableLoad(MachineInstr *MI) const { +bool X86InstrInfo::isTriviallyReMaterializable(MachineInstr *MI) const { switch (MI->getOpcode()) { default: break; + case X86::FpLD0: + case X86::FpLD1: + case X86::MOV8ri: + case X86::MOV16ri: + case X86::MOV32ri: + case X86::MMX_V_SET0: + case X86::MMX_V_SETALLONES: + case X86::V_SET0: + case X86::V_SETALLONES: + // These instructions are always trivially rematerializable. + return true; case X86::MOV8rm: case X86::MOV16rm: case X86::MOV16_rm: @@ -128,6 +139,7 @@ case X86::MOVAPDrm: case X86::MMX_MOVD64rm: case X86::MMX_MOVQ64rm: + // Loads from constant pools are trivially rematerializable. return MI->getOperand(1).isRegister() && MI->getOperand(2).isImmediate() && MI->getOperand(3).isRegister() && MI->getOperand(4).isConstantPoolIndex() && MI->getOperand(1).getReg() == 0 && Index: lib/Target/X86/X86InstrInfo.h =================================================================== RCS file: /var/cvs/llvm/llvm/lib/Target/X86/X86InstrInfo.h,v retrieving revision 1.66 diff -u -r1.66 X86InstrInfo.h --- lib/Target/X86/X86InstrInfo.h 14 Jun 2007 22:03:45 -0000 1.66 +++ lib/Target/X86/X86InstrInfo.h 18 Jun 2007 21:17:17 -0000 @@ -239,7 +239,7 @@ unsigned& destReg) const; unsigned isLoadFromStackSlot(MachineInstr *MI, int &FrameIndex) const; unsigned isStoreToStackSlot(MachineInstr *MI, int &FrameIndex) const; - bool isOtherReMaterializableLoad(MachineInstr *MI) const; + bool isTriviallyReMaterializable(MachineInstr *MI) const; /// convertToThreeAddress - This method must be implemented by targets that /// set the M_CONVERTIBLE_TO_3_ADDR flag. When this flag is set, the target Index: lib/Target/X86/X86InstrInfo.td =================================================================== RCS file: /var/cvs/llvm/llvm/lib/Target/X86/X86InstrInfo.td,v retrieving revision 1.307 diff -u -r1.307 X86InstrInfo.td --- lib/Target/X86/X86InstrInfo.td 6 May 2007 04:00:55 -0000 1.307 +++ lib/Target/X86/X86InstrInfo.td 18 Jun 2007 21:17:17 -0000 @@ -617,7 +617,6 @@ "mov{w} {$src, $dst|$dst, $src}", []>, OpSize; def MOV32rr : I<0x89, MRMDestReg, (ops GR32:$dst, GR32:$src), "mov{l} {$src, $dst|$dst, $src}", []>; -let isReMaterializable = 1 in { def MOV8ri : Ii8 <0xB0, AddRegFrm, (ops GR8 :$dst, i8imm :$src), "mov{b} {$src, $dst|$dst, $src}", [(set GR8:$dst, imm:$src)]>; @@ -627,7 +626,6 @@ def MOV32ri : Ii32<0xB8, AddRegFrm, (ops GR32:$dst, i32imm:$src), "mov{l} {$src, $dst|$dst, $src}", [(set GR32:$dst, imm:$src)]>; -} def MOV8mi : Ii8 <0xC6, MRM0m, (ops i8mem :$dst, i8imm :$src), "mov{b} {$src, $dst|$dst, $src}", [(store (i8 imm:$src), addr:$dst)]>; Index: lib/Target/X86/X86InstrMMX.td =================================================================== RCS file: /var/cvs/llvm/llvm/lib/Target/X86/X86InstrMMX.td,v retrieving revision 1.32 diff -u -r1.32 X86InstrMMX.td --- lib/Target/X86/X86InstrMMX.td 16 May 2007 06:08:17 -0000 1.32 +++ lib/Target/X86/X86InstrMMX.td 18 Jun 2007 21:17:18 -0000 @@ -503,14 +503,12 @@ // Alias instructions that map zero vector to pxor. // FIXME: remove when we can teach regalloc that xor reg, reg is ok. -let isReMaterializable = 1 in { - def MMX_V_SET0 : MMXI<0xEF, MRMInitReg, (ops VR64:$dst), - "pxor $dst, $dst", - [(set VR64:$dst, (v1i64 immAllZerosV))]>; - def MMX_V_SETALLONES : MMXI<0x76, MRMInitReg, (ops VR64:$dst), - "pcmpeqd $dst, $dst", - [(set VR64:$dst, (v1i64 immAllOnesV))]>; -} +def MMX_V_SET0 : MMXI<0xEF, MRMInitReg, (ops VR64:$dst), + "pxor $dst, $dst", + [(set VR64:$dst, (v1i64 immAllZerosV))]>; +def MMX_V_SETALLONES : MMXI<0x76, MRMInitReg, (ops VR64:$dst), + "pcmpeqd $dst, $dst", + [(set VR64:$dst, (v1i64 immAllOnesV))]>; //===----------------------------------------------------------------------===// // Non-Instruction Patterns Index: lib/Target/X86/X86InstrSSE.td =================================================================== RCS file: /var/cvs/llvm/llvm/lib/Target/X86/X86InstrSSE.td,v retrieving revision 1.183 diff -u -r1.183 X86InstrSSE.td --- lib/Target/X86/X86InstrSSE.td 17 May 2007 18:44:37 -0000 1.183 +++ lib/Target/X86/X86InstrSSE.td 18 Jun 2007 21:17:18 -0000 @@ -759,7 +759,6 @@ // Alias instructions that map zero vector to pxor / xorp* for sse. // FIXME: remove when we can teach regalloc that xor reg, reg is ok. -let isReMaterializable = 1 in def V_SET0 : PSI<0x57, MRMInitReg, (ops VR128:$dst), "xorps $dst, $dst", [(set VR128:$dst, (v4f32 immAllZerosV))]>; @@ -1819,10 +1818,9 @@ // Alias instructions that map zero vector to pxor / xorp* for sse. // FIXME: remove when we can teach regalloc that xor reg, reg is ok. -let isReMaterializable = 1 in - def V_SETALLONES : PDI<0x76, MRMInitReg, (ops VR128:$dst), - "pcmpeqd $dst, $dst", - [(set VR128:$dst, (v2f64 immAllOnesV))]>; +def V_SETALLONES : PDI<0x76, MRMInitReg, (ops VR128:$dst), + "pcmpeqd $dst, $dst", + [(set VR128:$dst, (v2f64 immAllOnesV))]>; // FR64 to 128-bit vector conversion. def MOVSD2PDrr : SDI<0x10, MRMSrcReg, (ops VR128:$dst, FR64:$src), Index: utils/TableGen/CodeGenInstruction.h =================================================================== RCS file: /var/cvs/llvm/llvm/utils/TableGen/CodeGenInstruction.h,v retrieving revision 1.28 diff -u -r1.28 CodeGenInstruction.h --- utils/TableGen/CodeGenInstruction.h 6 Jun 2007 10:14:55 -0000 1.28 +++ utils/TableGen/CodeGenInstruction.h 18 Jun 2007 21:17:21 -0000 @@ -91,7 +91,6 @@ bool isConvertibleToThreeAddress; bool isCommutable; bool isTerminator; - bool isReMaterializable; bool hasDelaySlot; bool usesCustomDAGSchedInserter; bool hasVariableNumberOfOperands; Index: utils/TableGen/CodeGenTarget.cpp =================================================================== RCS file: /var/cvs/llvm/llvm/utils/TableGen/CodeGenTarget.cpp,v retrieving revision 1.92 diff -u -r1.92 CodeGenTarget.cpp --- utils/TableGen/CodeGenTarget.cpp 13 Jun 2007 22:20:15 -0000 1.92 +++ utils/TableGen/CodeGenTarget.cpp 18 Jun 2007 21:17:21 -0000 @@ -365,7 +365,6 @@ isConvertibleToThreeAddress = R->getValueAsBit("isConvertibleToThreeAddress"); isCommutable = R->getValueAsBit("isCommutable"); isTerminator = R->getValueAsBit("isTerminator"); - isReMaterializable = R->getValueAsBit("isReMaterializable"); hasDelaySlot = R->getValueAsBit("hasDelaySlot"); usesCustomDAGSchedInserter = R->getValueAsBit("usesCustomDAGSchedInserter"); hasCtrlDep = R->getValueAsBit("hasCtrlDep"); Index: utils/TableGen/InstrInfoEmitter.cpp =================================================================== RCS file: /var/cvs/llvm/llvm/utils/TableGen/InstrInfoEmitter.cpp,v retrieving revision 1.60 diff -u -r1.60 InstrInfoEmitter.cpp --- utils/TableGen/InstrInfoEmitter.cpp 6 Jun 2007 10:14:55 -0000 1.60 +++ utils/TableGen/InstrInfoEmitter.cpp 18 Jun 2007 21:17:21 -0000 @@ -240,7 +240,6 @@ if (Inst.isConvertibleToThreeAddress) OS << "|M_CONVERTIBLE_TO_3_ADDR"; if (Inst.isCommutable) OS << "|M_COMMUTABLE"; if (Inst.isTerminator) OS << "|M_TERMINATOR_FLAG"; - if (Inst.isReMaterializable) OS << "|M_REMATERIALIZIBLE"; if (Inst.clobbersPred) OS << "|M_CLOBBERS_PRED"; if (Inst.usesCustomDAGSchedInserter) OS << "|M_USES_CUSTOM_DAG_SCHED_INSERTION"; From sabre at nondot.org Mon Jun 18 16:51:12 2007 From: sabre at nondot.org (Chris Lattner) Date: Mon, 18 Jun 2007 16:51:12 -0500 Subject: [llvm-commits] CVS: llvm/include/llvm/ParameterAttributes.h Message-ID: <200706182151.l5ILpCRF032232@zion.cs.uiuc.edu> Changes in directory llvm/include/llvm: ParameterAttributes.h updated: 1.13 -> 1.14 --- Log message: add helper --- Diffs of the changes: (+7 -0) ParameterAttributes.h | 7 +++++++ 1 files changed, 7 insertions(+) Index: llvm/include/llvm/ParameterAttributes.h diff -u llvm/include/llvm/ParameterAttributes.h:1.13 llvm/include/llvm/ParameterAttributes.h:1.14 --- llvm/include/llvm/ParameterAttributes.h:1.13 Tue Jun 5 00:28:25 2007 +++ llvm/include/llvm/ParameterAttributes.h Mon Jun 18 16:50:49 2007 @@ -47,6 +47,13 @@ struct ParamAttrsWithIndex { uint16_t attrs; ///< The attributes that are set, |'d together uint16_t index; ///< Index of the parameter for which the attributes apply + + static ParamAttrsWithIndex get(uint16_t idx, uint16_t attrs) { + ParamAttrsWithIndex P; + P.index = idx; + P.attrs = attrs; + return P; + } }; /// @brief A vector of attribute/index pairs. From clattner at apple.com Mon Jun 18 16:51:50 2007 From: clattner at apple.com (clattner at apple.com) Date: Mon, 18 Jun 2007 14:51:50 -0700 (PDT) Subject: [llvm-commits] [128564] Use new attribute helper, no functionality change. Message-ID: <20070618215150.3E2D093CE814@src> Revision: 128564 Author: clattner Date: 2007-06-18 14:51:50 -0700 (Mon, 18 Jun 2007) Log Message: ----------- Use new attribute helper, no functionality change. Modified Paths: -------------- apple-local/branches/llvm/gcc/llvm-types.cpp Modified: apple-local/branches/llvm/gcc/llvm-types.cpp =================================================================== --- apple-local/branches/llvm/gcc/llvm-types.cpp 2007-06-18 19:37:02 UTC (rev 128563) +++ apple-local/branches/llvm/gcc/llvm-types.cpp 2007-06-18 21:51:50 UTC (rev 128564) @@ -991,10 +991,8 @@ RAttributes |= ParamAttr::SExt; } } - if (RAttributes != ParamAttr::None) { - ParamAttrsWithIndex PAWI; PAWI.index = 0; PAWI.attrs = RAttributes; - Attrs.push_back(PAWI); - } + if (RAttributes != ParamAttr::None) + Attrs.push_back(ParamAttrsWithIndex::get(0, RAttributes)); unsigned Idx = 1; bool isFirstArg = true; @@ -1008,11 +1006,8 @@ // parameter but that parameter may have other attributes too so we set up // the first Attributes value here based on struct return. This only works // Handle the structure return calling convention - if (ABIConverter.isStructReturn()) { - ParamAttrsWithIndex PAWI; - PAWI.index = Idx++; PAWI.attrs = ParamAttr::StructRet; - Attrs.push_back(PAWI); - } + if (ABIConverter.isStructReturn()) + Attrs.push_back(ParamAttrsWithIndex::get(Idx++, ParamAttr::StructRet)); if (static_chain) // Skip over the static chain. @@ -1042,10 +1037,8 @@ isVarArg, lparam); #endif // LLVM_TARGET_ENABLE_REGPARM - if (Attributes != ParamAttr::None) { - ParamAttrsWithIndex PAWI; PAWI.index = Idx; PAWI.attrs = Attributes; - Attrs.push_back(PAWI); - } + if (Attributes != ParamAttr::None) + Attrs.push_back(ParamAttrsWithIndex::get(Idx, Attributes)); Idx++; } From evan.cheng at apple.com Mon Jun 18 17:20:19 2007 From: evan.cheng at apple.com (Evan Cheng) Date: Mon, 18 Jun 2007 15:20:19 -0700 Subject: [llvm-commits] CVS: llvm/lib/CodeGen/LiveIntervalAnalysis.cpp VirtRegMap.cpp In-Reply-To: <20070618213538.GR20470@village.us.cray.com> References: <20070618213538.GR20470@village.us.cray.com> Message-ID: <5A1DF86B-DB85-48F9-878A-3C28CD746082@apple.com> Yes, it looks good. I can deal with LDRcp and friends later. Thanks. Evan On Jun 18, 2007, at 2:35 PM, Dan Gohman wrote: >> We can eliminate LDRcp, that's fine. >> >> However, I still don't like the separate hook and targetinstrinfo >> bit. Dan, can you have just a single isTriviallyReMaterializable hook >> that encompass all these? > > What do you think about the attached patch? It's not tested yet, > but it > shows what I think you're asking for. I left LDRcp in, but that can > be fixed. > > Dan > > -- > Dan Gohman, Cray Inc. > > _______________________________________________ > llvm-commits mailing list > llvm-commits at cs.uiuc.edu > http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits From tonic at nondot.org Mon Jun 18 17:24:01 2007 From: tonic at nondot.org (Tanya Lattner) Date: Mon, 18 Jun 2007 17:24:01 -0500 Subject: [llvm-commits] CVS: llvm/test/CFrontend/2006-09-25-DebugFilename.c 2007-06-15-AnnotateAttribute.c Message-ID: <200706182224.l5IMO1VO000452@zion.cs.uiuc.edu> Changes in directory llvm/test/CFrontend: 2006-09-25-DebugFilename.c updated: 1.3 -> 1.4 2007-06-15-AnnotateAttribute.c updated: 1.1 -> 1.2 --- Log message: Modify annotate intrinsic to take 2 additional args: file and line number. Fix up Annotate attribute test case. Fix DebugFilename.c test case to look for right file name. --- Diffs of the changes: (+2 -3) 2006-09-25-DebugFilename.c | 2 +- 2007-06-15-AnnotateAttribute.c | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) Index: llvm/test/CFrontend/2006-09-25-DebugFilename.c diff -u llvm/test/CFrontend/2006-09-25-DebugFilename.c:1.3 llvm/test/CFrontend/2006-09-25-DebugFilename.c:1.4 --- llvm/test/CFrontend/2006-09-25-DebugFilename.c:1.3 Mon Apr 16 16:07:00 2007 +++ llvm/test/CFrontend/2006-09-25-DebugFilename.c Mon Jun 18 17:23:39 2007 @@ -1,5 +1,5 @@ // RUN: ignore %llvmgcc -xc %s -S -o /dev/null |& \ -// RUN: grep fluffy | grep 2006-09-25-DebugFilename.c.tr +// RUN: grep fluffy | grep 2006-09-25-DebugFilename.c #include "2006-09-25-DebugFilename.h" int func1() { return hfunc1(); } int func2() { fluffy; return hfunc1(); } Index: llvm/test/CFrontend/2007-06-15-AnnotateAttribute.c diff -u llvm/test/CFrontend/2007-06-15-AnnotateAttribute.c:1.1 llvm/test/CFrontend/2007-06-15-AnnotateAttribute.c:1.2 --- llvm/test/CFrontend/2007-06-15-AnnotateAttribute.c:1.1 Fri Jun 15 15:52:47 2007 +++ llvm/test/CFrontend/2007-06-15-AnnotateAttribute.c Mon Jun 18 17:23:39 2007 @@ -1,5 +1,4 @@ -// RUN: %llvmgxx -c -emit-llvm %s -o - | llvm-dis | grep llvm.global.annotations | grep foo \ -// RUN: | grep @X | grep @a +// RUN: %llvmgxx -c -emit-llvm %s -o - | llvm-dis | grep llvm.global.annotations // RUN: %llvmgxx -c -emit-llvm %s -o - | llvm-dis | grep -c llvm.var.annotation | grep 3 #include From tonic at nondot.org Mon Jun 18 17:24:02 2007 From: tonic at nondot.org (Tanya Lattner) Date: Mon, 18 Jun 2007 17:24:02 -0500 Subject: [llvm-commits] CVS: llvm/include/llvm/Intrinsics.td Message-ID: <200706182224.l5IMO2Td000457@zion.cs.uiuc.edu> Changes in directory llvm/include/llvm: Intrinsics.td updated: 1.56 -> 1.57 --- Log message: Modify annotate intrinsic to take 2 additional args: file and line number. Fix up Annotate attribute test case. Fix DebugFilename.c test case to look for right file name. --- Diffs of the changes: (+3 -2) Intrinsics.td | 5 +++-- 1 files changed, 3 insertions(+), 2 deletions(-) Index: llvm/include/llvm/Intrinsics.td diff -u llvm/include/llvm/Intrinsics.td:1.56 llvm/include/llvm/Intrinsics.td:1.57 --- llvm/include/llvm/Intrinsics.td:1.56 Fri Jun 15 15:50:54 2007 +++ llvm/include/llvm/Intrinsics.td Mon Jun 18 17:23:39 2007 @@ -237,8 +237,9 @@ //===---------------- Generic Variable Attribute Intrinsics----------------===// // -def int_var_annotation : Intrinsic<[llvm_void_ty, llvm_ptr_ty, llvm_ptr_ty], - [], "llvm.var.annotation">; +def int_var_annotation : Intrinsic<[llvm_void_ty, llvm_ptr_ty, llvm_ptr_ty, + llvm_ptr_ty, llvm_i32_ty], + [], "llvm.var.annotation">; //===----------------------------------------------------------------------===// // Target-specific intrinsics From lattner at apple.com Mon Jun 18 17:30:01 2007 From: lattner at apple.com (lattner at apple.com) Date: Mon, 18 Jun 2007 15:30:01 -0700 (PDT) Subject: [llvm-commits] [128567] Add file and line number to annotate attribute. Message-ID: <20070618223001.0753493F1BDE@src> Revision: 128567 Author: lattner Date: 2007-06-18 15:30:01 -0700 (Mon, 18 Jun 2007) Log Message: ----------- Add file and line number to annotate attribute. Modified Paths: -------------- apple-local/branches/llvm/gcc/llvm-backend.cpp apple-local/branches/llvm/gcc/llvm-convert.cpp apple-local/branches/llvm/gcc/llvm-internal.h Modified: apple-local/branches/llvm/gcc/llvm-backend.cpp =================================================================== --- apple-local/branches/llvm/gcc/llvm-backend.cpp 2007-06-18 22:03:55 UTC (rev 128566) +++ apple-local/branches/llvm/gcc/llvm-backend.cpp 2007-06-18 22:30:01 UTC (rev 128567) @@ -81,7 +81,7 @@ std::vector > StaticCtors, StaticDtors; std::vector AttributeUsedGlobals; std::vector AttributeNoinlineFunctions; -std::vector > AttributeAnnotateGlobals; +std::vector AttributeAnnotateGlobals; /// PerFunctionPasses - This is the list of cleanup passes run per-function /// as each is compiled. In cases where we are not doing IPO, it includes the @@ -492,21 +492,16 @@ // Add llvm.global.annotations if (!AttributeAnnotateGlobals.empty()) { - std::vector AttrList; - for (unsigned i = 0, e = AttributeAnnotateGlobals.size(); i != e; ++i) { - Constant *Elts[2] = {AttributeAnnotateGlobals[i].first, - AttributeAnnotateGlobals[i].second }; - AttrList.push_back(ConstantStruct::get(Elts, 2, false)); - } - Constant *Array = - ConstantArray::get(ArrayType::get(AttrList[0]->getType(), AttrList.size()), - AttrList); + ConstantArray::get(ArrayType::get(AttributeAnnotateGlobals[0]->getType(), + AttributeAnnotateGlobals.size()), + AttributeAnnotateGlobals); GlobalValue *gv = new GlobalVariable(Array->getType(), false, GlobalValue::AppendingLinkage, Array, "llvm.global.annotations", TheModule); gv->setSection("llvm.metadata"); + AttributeAnnotateGlobals.clear(); } @@ -671,15 +666,39 @@ return; } +// Convert string to global value. Use existing global if possible. +Constant* ConvertMetadataStringToGV(const char *str) { + + Constant *Init = ConstantArray::get(std::string(str)); + + // Use cached string if it exists. + static std::map StringCSTCache; + GlobalVariable *&Slot = StringCSTCache[Init]; + if (Slot) return Slot; + + // Create a new string global. + GlobalVariable *GV = new GlobalVariable(Init->getType(), true, + GlobalVariable::InternalLinkage, + Init, ".str", TheModule); + GV->setSection("llvm.metadata"); + Slot = GV; + return GV; + +} + /// AddAnnotateAttrsToGlobal - Adds decls that have a /// annotate attribute to a vector to be emitted later. void AddAnnotateAttrsToGlobal(GlobalValue *GV, tree decl) { // Handle annotate attribute on global. tree annotateAttr = lookup_attribute("annotate", DECL_ATTRIBUTES (decl)); - if (!annotateAttr) - return; + // Get file and line number + Constant *lineNo = ConstantInt::get(Type::Int32Ty, DECL_SOURCE_LINE(decl)); + Constant *file = ConvertMetadataStringToGV(DECL_SOURCE_FILE(decl)); + const Type *SBP= PointerType::get(Type::Int8Ty); + file = ConstantExpr::getBitCast(file, SBP); + // There may be multiple annotate attributes. Pass return of lookup_attr // to successive lookups. while (annotateAttr) { @@ -698,10 +717,12 @@ assert(TREE_CODE(val) == STRING_CST && "Annotate attribute arg should always be a string"); Constant *strGV = TreeConstantToLLVM::EmitLV_STRING_CST(val); - const Type *SBP= PointerType::get(Type::Int8Ty); - AttributeAnnotateGlobals.push_back( - std::make_pair(ConstantExpr::getBitCast(GV,SBP), - ConstantExpr::getBitCast(strGV,SBP))); + Constant *Element[4] = {ConstantExpr::getBitCast(GV,SBP), + ConstantExpr::getBitCast(strGV,SBP), + file, + lineNo}; + + AttributeAnnotateGlobals.push_back(ConstantStruct::get(Element, 4, false)); } // Get next annotate attribute. Modified: apple-local/branches/llvm/gcc/llvm-convert.cpp =================================================================== --- apple-local/branches/llvm/gcc/llvm-convert.cpp 2007-06-18 22:03:55 UTC (rev 128566) +++ apple-local/branches/llvm/gcc/llvm-convert.cpp 2007-06-18 22:30:01 UTC (rev 128567) @@ -1411,6 +1411,12 @@ Function *annotateFun = Intrinsic::getDeclaration(TheModule, Intrinsic::var_annotation); + + // Get file and line number + Constant *lineNo = ConstantInt::get(Type::Int32Ty, DECL_SOURCE_LINE(decl)); + Constant *file = ConvertMetadataStringToGV(DECL_SOURCE_FILE(decl)); + const Type *SBP= PointerType::get(Type::Int8Ty); + file = ConstantExpr::getBitCast(file, SBP); // There may be multiple annotate attributes. Pass return of lookup_attr // to successive lookups. @@ -1431,12 +1437,14 @@ "Annotate attribute arg should always be a string"); const Type *SBP = PointerType::get(Type::Int8Ty); Constant *strGV = TreeConstantToLLVM::EmitLV_STRING_CST(val); - Value *Ops[2] = { + Value *Ops[4] = { BitCastToType(V, SBP), - BitCastToType(strGV, SBP) + BitCastToType(strGV, SBP), + file, + lineNo }; - Builder.CreateCall(annotateFun, Ops, 2); + Builder.CreateCall(annotateFun, Ops, 4); } // Get next annotate attribute. Modified: apple-local/branches/llvm/gcc/llvm-internal.h =================================================================== --- apple-local/branches/llvm/gcc/llvm-internal.h 2007-06-18 22:03:55 UTC (rev 128566) +++ apple-local/branches/llvm/gcc/llvm-internal.h 2007-06-18 22:30:01 UTC (rev 128567) @@ -92,6 +92,8 @@ /// marked attribute(noinline) extern std::vector AttributeNoinlineFunctions; +extern Constant* ConvertMetadataStringToGV(const char* str); + /// AddAnnotateAttrsToGlobal - Adds decls that have a /// annotate attribute to a vector to be emitted later. extern void AddAnnotateAttrsToGlobal(GlobalValue *GV, union tree_node* decl); @@ -611,6 +613,7 @@ static Constant *EmitLV_STRING_CST(tree_node *exp); static Constant *EmitLV_COMPONENT_REF(tree_node *exp); static Constant *EmitLV_ARRAY_REF(tree_node *exp); + }; #endif From evan.cheng at apple.com Mon Jun 18 17:43:53 2007 From: evan.cheng at apple.com (Evan Cheng) Date: Mon, 18 Jun 2007 17:43:53 -0500 Subject: [llvm-commits] CVS: llvm/lib/CodeGen/BranchFolding.cpp MachineBasicBlock.cpp Message-ID: <200706182243.l5IMhrE5000808@zion.cs.uiuc.edu> Changes in directory llvm/lib/CodeGen: BranchFolding.cpp updated: 1.67 -> 1.68 MachineBasicBlock.cpp updated: 1.46 -> 1.47 --- Log message: Move CorrectExtraCFGEdges() from BranchFolding.cpp to a MachineBasicBlock method. --- Diffs of the changes: (+64 -75) BranchFolding.cpp | 79 ++------------------------------------------------ MachineBasicBlock.cpp | 60 +++++++++++++++++++++++++++++++++++++ 2 files changed, 64 insertions(+), 75 deletions(-) Index: llvm/lib/CodeGen/BranchFolding.cpp diff -u llvm/lib/CodeGen/BranchFolding.cpp:1.67 llvm/lib/CodeGen/BranchFolding.cpp:1.68 --- llvm/lib/CodeGen/BranchFolding.cpp:1.67 Thu Jun 7 20:08:52 2007 +++ llvm/lib/CodeGen/BranchFolding.cpp Mon Jun 18 17:43:31 2007 @@ -87,12 +87,6 @@ char BranchFolder::ID = 0; } -static bool CorrectExtraCFGEdges(MachineBasicBlock &MBB, - MachineBasicBlock *DestA, - MachineBasicBlock *DestB, - bool isCond, - MachineFunction::iterator FallThru); - FunctionPass *llvm::createBranchFoldingPass(bool DefaultEnableTailMerge) { return new BranchFolder(DefaultEnableTailMerge); } @@ -133,8 +127,7 @@ MachineBasicBlock *MBB = I, *TBB = 0, *FBB = 0; std::vector Cond; if (!TII->AnalyzeBranch(*MBB, TBB, FBB, Cond)) - EverMadeChange |= CorrectExtraCFGEdges(*MBB, TBB, FBB, - !Cond.empty(), next(I)); + EverMadeChange |= MBB->CorrectExtraCFGEdges(TBB, FBB, !Cond.empty()); } RegInfo = MF.getTarget().getRegisterInfo(); @@ -702,68 +695,6 @@ } -/// CorrectExtraCFGEdges - Various pieces of code can cause excess edges in the -/// CFG to be inserted. If we have proven that MBB can only branch to DestA and -/// DestB, remove any other MBB successors from the CFG. DestA and DestB can -/// be null. -/// Besides DestA and DestB, retain other edges leading to LandingPads (currently -/// there can be only one; we don't check or require that here). -/// Note it is possible that DestA and/or DestB are LandingPads. -static bool CorrectExtraCFGEdges(MachineBasicBlock &MBB, - MachineBasicBlock *DestA, - MachineBasicBlock *DestB, - bool isCond, - MachineFunction::iterator FallThru) { - bool MadeChange = false; - bool AddedFallThrough = false; - - // If this block ends with a conditional branch that falls through to its - // successor, set DestB as the successor. - if (isCond) { - if (DestB == 0 && FallThru != MBB.getParent()->end()) { - DestB = FallThru; - AddedFallThrough = true; - } - } else { - // If this is an unconditional branch with no explicit dest, it must just be - // a fallthrough into DestB. - if (DestA == 0 && FallThru != MBB.getParent()->end()) { - DestA = FallThru; - AddedFallThrough = true; - } - } - - MachineBasicBlock::succ_iterator SI = MBB.succ_begin(); - MachineBasicBlock *OrigDestA = DestA, *OrigDestB = DestB; - while (SI != MBB.succ_end()) { - if (*SI == DestA && DestA == DestB) { - DestA = DestB = 0; - ++SI; - } else if (*SI == DestA) { - DestA = 0; - ++SI; - } else if (*SI == DestB) { - DestB = 0; - ++SI; - } else if ((*SI)->isLandingPad() && - *SI!=OrigDestA && *SI!=OrigDestB) { - ++SI; - } else { - // Otherwise, this is a superfluous edge, remove it. - MBB.removeSuccessor(SI); - MadeChange = true; - } - } - if (!AddedFallThrough) { - assert(DestA == 0 && DestB == 0 && - "MachineCFG is missing edges!"); - } else if (isCond) { - assert(DestA == 0 && "MachineCFG is missing edges!"); - } - return MadeChange; -} - - /// CanFallThrough - Return true if the specified block (with the specified /// branch condition) can implicitly transfer control to the block after it by /// falling off the end of it. This should return false if it can reach the @@ -880,8 +811,8 @@ TII->AnalyzeBranch(PrevBB, PriorTBB, PriorFBB, PriorCond); if (!PriorUnAnalyzable) { // If the CFG for the prior block has extra edges, remove them. - MadeChange |= CorrectExtraCFGEdges(PrevBB, PriorTBB, PriorFBB, - !PriorCond.empty(), MBB); + MadeChange |= PrevBB.CorrectExtraCFGEdges(PriorTBB, PriorFBB, + !PriorCond.empty()); // If the previous branch is conditional and both conditions go to the same // destination, remove the branch, replacing it with an unconditional one or @@ -993,9 +924,7 @@ bool CurUnAnalyzable = TII->AnalyzeBranch(*MBB, CurTBB, CurFBB, CurCond); if (!CurUnAnalyzable) { // If the CFG for the prior block has extra edges, remove them. - MadeChange |= CorrectExtraCFGEdges(*MBB, CurTBB, CurFBB, - !CurCond.empty(), - ++MachineFunction::iterator(MBB)); + MadeChange |= MBB->CorrectExtraCFGEdges(CurTBB, CurFBB, !CurCond.empty()); // If this is a two-way branch, and the FBB branches to this block, reverse // the condition so the single-basic-block loop is faster. Instead of: Index: llvm/lib/CodeGen/MachineBasicBlock.cpp diff -u llvm/lib/CodeGen/MachineBasicBlock.cpp:1.46 llvm/lib/CodeGen/MachineBasicBlock.cpp:1.47 --- llvm/lib/CodeGen/MachineBasicBlock.cpp:1.46 Mon Jun 4 01:44:01 2007 +++ llvm/lib/CodeGen/MachineBasicBlock.cpp Mon Jun 18 17:43:31 2007 @@ -225,3 +225,63 @@ addSuccessor(New); } +/// CorrectExtraCFGEdges - Various pieces of code can cause excess edges in the +/// CFG to be inserted. If we have proven that MBB can only branch to DestA and +/// DestB, remove any other MBB successors from the CFG. DestA and DestB can +/// be null. +/// Besides DestA and DestB, retain other edges leading to LandingPads (currently +/// there can be only one; we don't check or require that here). +/// Note it is possible that DestA and/or DestB are LandingPads. +bool MachineBasicBlock::CorrectExtraCFGEdges(MachineBasicBlock *DestA, + MachineBasicBlock *DestB, + bool isCond) { + bool MadeChange = false; + bool AddedFallThrough = false; + + MachineBasicBlock *FallThru = getNext(); + + // If this block ends with a conditional branch that falls through to its + // successor, set DestB as the successor. + if (isCond) { + if (DestB == 0 && FallThru != getParent()->end()) { + DestB = FallThru; + AddedFallThrough = true; + } + } else { + // If this is an unconditional branch with no explicit dest, it must just be + // a fallthrough into DestB. + if (DestA == 0 && FallThru != getParent()->end()) { + DestA = FallThru; + AddedFallThrough = true; + } + } + + MachineBasicBlock::succ_iterator SI = succ_begin(); + MachineBasicBlock *OrigDestA = DestA, *OrigDestB = DestB; + while (SI != succ_end()) { + if (*SI == DestA && DestA == DestB) { + DestA = DestB = 0; + ++SI; + } else if (*SI == DestA) { + DestA = 0; + ++SI; + } else if (*SI == DestB) { + DestB = 0; + ++SI; + } else if ((*SI)->isLandingPad() && + *SI!=OrigDestA && *SI!=OrigDestB) { + ++SI; + } else { + // Otherwise, this is a superfluous edge, remove it. + removeSuccessor(SI); + MadeChange = true; + } + } + if (!AddedFallThrough) { + assert(DestA == 0 && DestB == 0 && + "MachineCFG is missing edges!"); + } else if (isCond) { + assert(DestA == 0 && "MachineCFG is missing edges!"); + } + return MadeChange; +} From evan.cheng at apple.com Mon Jun 18 17:44:21 2007 From: evan.cheng at apple.com (Evan Cheng) Date: Mon, 18 Jun 2007 17:44:21 -0500 Subject: [llvm-commits] CVS: llvm/include/llvm/CodeGen/MachineBasicBlock.h Message-ID: <200706182244.l5IMiLsq000833@zion.cs.uiuc.edu> Changes in directory llvm/include/llvm/CodeGen: MachineBasicBlock.h updated: 1.65 -> 1.66 --- Log message: Move CorrectExtraCFGEdges() from BranchFolding.cpp to a MachineBasicBlock method. --- Diffs of the changes: (+10 -0) MachineBasicBlock.h | 10 ++++++++++ 1 files changed, 10 insertions(+) Index: llvm/include/llvm/CodeGen/MachineBasicBlock.h diff -u llvm/include/llvm/CodeGen/MachineBasicBlock.h:1.65 llvm/include/llvm/CodeGen/MachineBasicBlock.h:1.66 --- llvm/include/llvm/CodeGen/MachineBasicBlock.h:1.65 Mon Jun 4 01:41:17 2007 +++ llvm/include/llvm/CodeGen/MachineBasicBlock.h Mon Jun 18 17:43:58 2007 @@ -250,6 +250,16 @@ /// 'Old', change the code and CFG so that it branches to 'New' instead. void ReplaceUsesOfBlockWith(MachineBasicBlock *Old, MachineBasicBlock *New); + /// CorrectExtraCFGEdges - Various pieces of code can cause excess edges in + /// the CFG to be inserted. If we have proven that MBB can only branch to + /// DestA and DestB, remove any other MBB successors from the CFG. DestA and + /// DestB can be null. Besides DestA and DestB, retain other edges leading + /// to LandingPads (currently there can be only one; we don't check or require + /// that here). Note it is possible that DestA and/or DestB are LandingPads. + bool CorrectExtraCFGEdges(MachineBasicBlock *DestA, + MachineBasicBlock *DestB, + bool isCond); + // Debugging methods. void dump() const; void print(std::ostream &OS) const; From evan.cheng at apple.com Mon Jun 18 17:45:20 2007 From: evan.cheng at apple.com (Evan Cheng) Date: Mon, 18 Jun 2007 17:45:20 -0500 Subject: [llvm-commits] CVS: llvm/lib/CodeGen/IfConversion.cpp Message-ID: <200706182245.l5IMjKCf000863@zion.cs.uiuc.edu> Changes in directory llvm/lib/CodeGen: IfConversion.cpp updated: 1.56 -> 1.57 --- Log message: Fix some fragile code wrt CFG edge updating. --- Diffs of the changes: (+39 -74) IfConversion.cpp | 113 ++++++++++++++++++------------------------------------- 1 files changed, 39 insertions(+), 74 deletions(-) Index: llvm/lib/CodeGen/IfConversion.cpp diff -u llvm/lib/CodeGen/IfConversion.cpp:1.56 llvm/lib/CodeGen/IfConversion.cpp:1.57 --- llvm/lib/CodeGen/IfConversion.cpp:1.56 Mon Jun 18 03:37:25 2007 +++ llvm/lib/CodeGen/IfConversion.cpp Mon Jun 18 17:44:57 2007 @@ -486,11 +486,12 @@ return false; if (TT == NULL && (TrueBBI.IsBrAnalyzable || FalseBBI.IsBrAnalyzable)) return false; - // FIXME: Allow false block to have an early exit? - if (TrueBBI.BB->pred_size() > 1 || - FalseBBI.BB->pred_size() > 1 || - TrueBBI.FalseBB || FalseBBI.FalseBB || - (TrueBBI.ClobbersPred && FalseBBI.ClobbersPred)) + if (TrueBBI.BB->pred_size() > 1 || FalseBBI.BB->pred_size() > 1) + return false; + + // FIXME: Allow true block to have an early exit? + if (TrueBBI.FalseBB || FalseBBI.FalseBB || + (TrueBBI.ClobbersPred && FalseBBI.ClobbersPred)) return false; MachineBasicBlock::iterator TI = TrueBBI.BB->begin(); @@ -806,16 +807,8 @@ void IfConverter::RemoveExtraEdges(BBInfo &BBI) { MachineBasicBlock *TBB = NULL, *FBB = NULL; std::vector Cond; - bool isAnalyzable = !TII->AnalyzeBranch(*BBI.BB, TBB, FBB, Cond); - bool CanFallthrough = isAnalyzable && (TBB == NULL || FBB == NULL); - if (BBI.TrueBB && BBI.BB->isSuccessor(BBI.TrueBB)) - if (!(BBI.TrueBB == TBB || BBI.TrueBB == FBB || - (CanFallthrough && getNextBlock(BBI.BB) == BBI.TrueBB))) - BBI.BB->removeSuccessor(BBI.TrueBB); - if (BBI.FalseBB && BBI.BB->isSuccessor(BBI.FalseBB)) - if (!(BBI.FalseBB == TBB || BBI.FalseBB == FBB || - (CanFallthrough && getNextBlock(BBI.BB) == BBI.FalseBB))) - BBI.BB->removeSuccessor(BBI.FalseBB); + if (!TII->AnalyzeBranch(*BBI.BB, TBB, FBB, Cond)) + BBI.BB->CorrectExtraCFGEdges(TBB, FBB, !Cond.empty()); } /// IfConvertSimple - If convert a simple (split, no rejoin) sub-CFG. @@ -936,23 +929,19 @@ PredicateBlock(*CvtBBI, CvtBBI->BB->end(), Cond); } + if (!DupBB) { + // Now merge the entry of the triangle with the true block. + BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB); + MergeBlocks(BBI, *CvtBBI); + } + // If 'true' block has a 'false' successor, add an exit branch to it. if (HasEarlyExit) { std::vector RevCond(CvtBBI->BrCond); if (TII->ReverseBranchCondition(RevCond)) assert(false && "Unable to reverse branch condition!"); - if (DupBB) { - TII->InsertBranch(*BBI.BB, CvtBBI->FalseBB, NULL, RevCond); - BBI.BB->addSuccessor(CvtBBI->FalseBB); - } else { - TII->InsertBranch(*CvtBBI->BB, CvtBBI->FalseBB, NULL, RevCond); - } - } - - if (!DupBB) { - // Now merge the entry of the triangle with the true block. - BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB); - MergeBlocks(BBI, *CvtBBI); + TII->InsertBranch(*BBI.BB, CvtBBI->FalseBB, NULL, RevCond); + BBI.BB->addSuccessor(CvtBBI->FalseBB); } // Merge in the 'false' block if the 'false' block has no other @@ -1024,25 +1013,18 @@ TII->ReverseBranchCondition(RevCond); std::vector *Cond1 = &BBI.BrCond; std::vector *Cond2 = &RevCond; - bool NeedBr1 = BBI1->FalseBB != NULL; - bool NeedBr2 = BBI2->FalseBB != NULL; // Figure out the more profitable ordering. bool DoSwap = false; if (TrueBBI.ClobbersPred && !FalseBBI.ClobbersPred) DoSwap = true; else if (TrueBBI.ClobbersPred == FalseBBI.ClobbersPred) { - if (!NeedBr1 && NeedBr2) + if (TrueBBI.NonPredSize > FalseBBI.NonPredSize) DoSwap = true; - else if (NeedBr1 == NeedBr2) { - if (TrueBBI.NonPredSize > FalseBBI.NonPredSize) - DoSwap = true; - } } if (DoSwap) { std::swap(BBI1, BBI2); std::swap(Cond1, Cond2); - std::swap(NeedBr1, NeedBr2); } // Remove the conditional branch from entry to the blocks. @@ -1069,10 +1051,6 @@ BBI1->BB->erase(DI1, BBI1->BB->end()); PredicateBlock(*BBI1, BBI1->BB->end(), *Cond1); - // Add an early exit branch if needed. - if (NeedBr1) - TII->InsertBranch(*BBI1->BB, BBI1->FalseBB, NULL, *Cond1); - // Predicate the 'false' block. BBI2->NonPredSize -= TII->RemoveBranch(*BBI2->BB); DI2 = BBI2->BB->end(); @@ -1082,30 +1060,9 @@ } PredicateBlock(*BBI2, DI2, *Cond2); - // Add an unconditional branch from 'false' to to 'false' successor if it - // will not be the fallthrough block. - if (NeedBr2 && !NeedBr1) { - // If BBI2 isn't going to be merged in, then the existing fallthrough - // or branch is fine. - if (!canFallThroughTo(BBI.BB, BBI2->FalseBB)) { - InsertUncondBranch(BBI2->BB, BBI2->FalseBB, TII); - BBI2->HasFallThrough = false; - } - } - - // Keep them as two separate blocks if there is an early exit. - if (!NeedBr1) - MergeBlocks(*BBI1, *BBI2); - - // Merge the combined block into the entry of the diamond. + // Merge the true block into the entry of the diamond. MergeBlocks(BBI, *BBI1); - - // 'True' and 'false' aren't combined, see if we need to add a unconditional - // branch to the 'false' block. - if (NeedBr1 && !canFallThroughTo(BBI.BB, BBI2->BB)) { - InsertUncondBranch(BBI.BB, BBI2->BB, TII); - BBI1->HasFallThrough = false; - } + MergeBlocks(BBI, *BBI2); // If the if-converted block fallthrough or unconditionally branch into the // tail block, and the tail block does not have other predecessors, then @@ -1113,19 +1070,13 @@ // tail, add a unconditional branch to it. if (TailBB) { BBInfo TailBBI = BBAnalysis[TailBB->getNumber()]; - BBInfo *LastBBI = NeedBr1 ? BBI2 : &BBI; - bool HasEarlyExit = NeedBr1 ? NeedBr2 : false; - if (!HasEarlyExit && - TailBB->pred_size() == 1 && !TailBBI.HasFallThrough) { - LastBBI->NonPredSize -= TII->RemoveBranch(*LastBBI->BB); - MergeBlocks(*LastBBI, TailBBI); + if (TailBB->pred_size() == 1 && !TailBBI.HasFallThrough) { + BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB); + MergeBlocks(BBI, TailBBI); TailBBI.IsDone = true; } else { - bool isFallThrough = canFallThroughTo(LastBBI->BB, TailBB); - if (!isFallThrough) { - InsertUncondBranch(LastBBI->BB, TailBB, TII); - LastBBI->HasFallThrough = false; - } + InsertUncondBranch(BBI.BB, TailBB, TII); + BBI.HasFallThrough = false; } } @@ -1185,6 +1136,20 @@ } } + std::vector Succs(FromBBI.BB->succ_begin(), + FromBBI.BB->succ_end()); + MachineBasicBlock *NBB = getNextBlock(FromBBI.BB); + MachineBasicBlock *FallThrough = FromBBI.HasFallThrough ? NBB : NULL; + + for (unsigned i = 0, e = Succs.size(); i != e; ++i) { + MachineBasicBlock *Succ = Succs[i]; + // Fallthrough edge can't be transferred. + if (Succ == FallThrough) + continue; + if (!ToBBI.BB->isSuccessor(Succ)) + ToBBI.BB->addSuccessor(Succ); + } + std::copy(FromBBI.Predicate.begin(), FromBBI.Predicate.end(), std::back_inserter(ToBBI.Predicate)); std::copy(Cond.begin(), Cond.end(), std::back_inserter(ToBBI.Predicate)); @@ -1227,7 +1192,7 @@ } // Now FromBBI always fall through to the next block! - if (NBB) + if (NBB && !FromBBI.BB->isSuccessor(NBB)) FromBBI.BB->addSuccessor(NBB); std::copy(FromBBI.Predicate.begin(), FromBBI.Predicate.end(), From tonic at nondot.org Mon Jun 18 18:42:59 2007 From: tonic at nondot.org (Tanya Lattner) Date: Mon, 18 Jun 2007 18:42:59 -0500 Subject: [llvm-commits] CVS: llvm/docs/LangRef.html Message-ID: <200706182342.l5INgx8n001978@zion.cs.uiuc.edu> Changes in directory llvm/docs: LangRef.html updated: 1.255 -> 1.256 --- Log message: Update docs for llvm.var.annotation intrinsic. --- Diffs of the changes: (+5 -4) LangRef.html | 9 +++++---- 1 files changed, 5 insertions(+), 4 deletions(-) Index: llvm/docs/LangRef.html diff -u llvm/docs/LangRef.html:1.255 llvm/docs/LangRef.html:1.256 --- llvm/docs/LangRef.html:1.255 Fri Jun 15 15:50:54 2007 +++ llvm/docs/LangRef.html Mon Jun 18 18:42:37 2007 @@ -4844,7 +4844,7 @@
Syntax:
-  declare void @llvm.var.annotation(i8* <val>, i8* <str>)
+  declare void @llvm.var.annotation(i8* <val>, i8* <str>, i8* <str>, i32  <int> )
 
Overview:
@@ -4856,8 +4856,9 @@
Arguments:

-The first argument is a pointer to a value, and the second is a pointer to a -global string. +The first argument is a pointer to a value, the second is a pointer to a +global string, the third is a pointer to a global string which is the source +file name, and the last argument is the line number.

Semantics:
@@ -4880,7 +4881,7 @@ Chris Lattner
The LLVM Compiler Infrastructure
- Last modified: $Date: 2007/06/15 20:50:54 $ + Last modified: $Date: 2007/06/18 23:42:37 $ From evan.cheng at apple.com Mon Jun 18 19:03:18 2007 From: evan.cheng at apple.com (Evan Cheng) Date: Mon, 18 Jun 2007 19:03:18 -0500 Subject: [llvm-commits] CVS: llvm/lib/Target/X86/X86ISelLowering.cpp Message-ID: <200706190003.l5J03IVh002385@zion.cs.uiuc.edu> Changes in directory llvm/lib/Target/X86: X86ISelLowering.cpp updated: 1.411 -> 1.412 --- Log message: Look for VECTOR_SHUFFLE that's identity operation on either LHS or RHS. This can happen before DAGCombiner catches it. --- Diffs of the changes: (+19 -2) X86ISelLowering.cpp | 21 +++++++++++++++++++-- 1 files changed, 19 insertions(+), 2 deletions(-) Index: llvm/lib/Target/X86/X86ISelLowering.cpp diff -u llvm/lib/Target/X86/X86ISelLowering.cpp:1.411 llvm/lib/Target/X86/X86ISelLowering.cpp:1.412 --- llvm/lib/Target/X86/X86ISelLowering.cpp:1.411 Sat Jun 16 18:57:15 2007 +++ llvm/lib/Target/X86/X86ISelLowering.cpp Mon Jun 18 19:02:56 2007 @@ -1954,6 +1954,16 @@ return HasHi; } +/// isIdentityMask - Return true if the specified VECTOR_SHUFFLE operand +/// specifies a identity operation on the LHS or RHS. +static bool isIdentityMask(SDNode *N, bool RHS = false) { + unsigned NumElems = N->getNumOperands(); + for (unsigned i = 0; i < NumElems; ++i) + if (!isUndefOrEqual(N->getOperand(i), i + (RHS ? NumElems : 0))) + return false; + return true; +} + /// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand specifies /// a splat of a single element. static bool isSplatMask(SDNode *N) { @@ -2630,6 +2640,11 @@ if (isZeroShuffle(Op.Val)) return getZeroVector(VT, DAG); + if (isIdentityMask(PermMask.Val)) + return V1; + else if (isIdentityMask(PermMask.Val, true)) + return V2; + if (isSplatMask(PermMask.Val)) { if (NumElems <= 4) return Op; // Promote it to a v4i32 splat. @@ -4316,12 +4331,14 @@ // Only do shuffles on 128-bit vector types for now. if (MVT::getSizeInBits(VT) == 64) return false; return (Mask.Val->getNumOperands() <= 4 || + isIdentityMask(Mask.Val) || + isIdentityMask(Mask.Val, true) || isSplatMask(Mask.Val) || isPSHUFHW_PSHUFLWMask(Mask.Val) || X86::isUNPCKLMask(Mask.Val) || + X86::isUNPCKHMask(Mask.Val) || X86::isUNPCKL_v_undef_Mask(Mask.Val) || - X86::isUNPCKH_v_undef_Mask(Mask.Val) || - X86::isUNPCKHMask(Mask.Val)); + X86::isUNPCKH_v_undef_Mask(Mask.Val)); } bool X86TargetLowering::isVectorClearMaskLegal(std::vector &BVOps, From evan.cheng at apple.com Mon Jun 18 19:06:30 2007 From: evan.cheng at apple.com (Evan Cheng) Date: Mon, 18 Jun 2007 19:06:30 -0500 Subject: [llvm-commits] CVS: llvm/test/CodeGen/X86/vec_shuffle-11.ll Message-ID: <200706190006.l5J06UMm002452@zion.cs.uiuc.edu> Changes in directory llvm/test/CodeGen/X86: vec_shuffle-11.ll added (r1.1) --- Log message: New test case: identity operation of RHS / LHS of a VECTOR_SHUFFLE. --- Diffs of the changes: (+11 -0) vec_shuffle-11.ll | 11 +++++++++++ 1 files changed, 11 insertions(+) Index: llvm/test/CodeGen/X86/vec_shuffle-11.ll diff -c /dev/null llvm/test/CodeGen/X86/vec_shuffle-11.ll:1.1 *** /dev/null Mon Jun 18 19:06:18 2007 --- llvm/test/CodeGen/X86/vec_shuffle-11.ll Mon Jun 18 19:06:08 2007 *************** *** 0 **** --- 1,11 ---- + ; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 + ; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | not grep mov + + define <4 x i32> @test() { + %tmp131 = call <2 x i64> @llvm.x86.sse2.psrl.dq( <2 x i64> < i64 -1, i64 -1 >, i32 96 ) ; <<2 x i64>> [#uses=1] + %tmp137 = bitcast <2 x i64> %tmp131 to <4 x i32> ; <<4 x i32>> [#uses=1] + %tmp138 = and <4 x i32> %tmp137, bitcast (<2 x i64> < i64 -1, i64 -1 > to <4 x i32>) ; <<4 x i32>> [#uses=1] + ret <4 x i32> %tmp138 + } + + declare <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64>, i32) From sabre at nondot.org Mon Jun 18 19:09:04 2007 From: sabre at nondot.org (Chris Lattner) Date: Mon, 18 Jun 2007 19:09:04 -0500 Subject: [llvm-commits] CVS: llvm/test/CFrontend/2007-06-18-SextAttrAggregate.c Message-ID: <200706190009.l5J094Nx002546@zion.cs.uiuc.edu> Changes in directory llvm/test/CFrontend: 2007-06-18-SextAttrAggregate.c added (r1.1) --- Log message: testcase for PR1513: http://llvm.org/PR1513 --- Diffs of the changes: (+11 -0) 2007-06-18-SextAttrAggregate.c | 11 +++++++++++ 1 files changed, 11 insertions(+) Index: llvm/test/CFrontend/2007-06-18-SextAttrAggregate.c diff -c /dev/null llvm/test/CFrontend/2007-06-18-SextAttrAggregate.c:1.1 *** /dev/null Mon Jun 18 19:08:52 2007 --- llvm/test/CFrontend/2007-06-18-SextAttrAggregate.c Mon Jun 18 19:08:42 2007 *************** *** 0 **** --- 1,11 ---- + // RUN: llvm-gcc %s -o - -S -emit-llvm -O3 | grep {i8 sext} + // PR1513 + + struct s{ + long a; + long b; + }; + + void f(struct s a, char *b, char C) { + + } From sabre at nondot.org Mon Jun 18 19:10:47 2007 From: sabre at nondot.org (Chris Lattner) Date: Mon, 18 Jun 2007 19:10:47 -0500 Subject: [llvm-commits] CVS: llvm/include/llvm/CodeGen/CallingConvLower.h Message-ID: <200706190010.l5J0Alof002658@zion.cs.uiuc.edu> Changes in directory llvm/include/llvm/CodeGen: CallingConvLower.h updated: 1.4 -> 1.5 --- Log message: add isvararg tracking to CCState --- Diffs of the changes: (+3 -1) CallingConvLower.h | 4 +++- 1 files changed, 3 insertions(+), 1 deletion(-) Index: llvm/include/llvm/CodeGen/CallingConvLower.h diff -u llvm/include/llvm/CodeGen/CallingConvLower.h:1.4 llvm/include/llvm/CodeGen/CallingConvLower.h:1.5 --- llvm/include/llvm/CodeGen/CallingConvLower.h:1.4 Wed Feb 28 01:09:40 2007 +++ llvm/include/llvm/CodeGen/CallingConvLower.h Mon Jun 18 19:10:25 2007 @@ -105,6 +105,7 @@ /// stack slots are used. It provides accessors to allocate these values. class CCState { unsigned CallingConv; + bool IsVarArg; const TargetMachine &TM; const MRegisterInfo &MRI; SmallVector &Locs; @@ -112,7 +113,7 @@ unsigned StackOffset; SmallVector UsedRegs; public: - CCState(unsigned CC, const TargetMachine &TM, + CCState(unsigned CC, bool isVarArg, const TargetMachine &TM, SmallVector &locs); void addLoc(const CCValAssign &V) { @@ -121,6 +122,7 @@ const TargetMachine &getTarget() const { return TM; } unsigned getCallingConv() const { return CallingConv; } + bool isVarArg() const { return IsVarArg; } unsigned getNextStackOffset() const { return StackOffset; } From sabre at nondot.org Mon Jun 18 19:11:31 2007 From: sabre at nondot.org (Chris Lattner) Date: Mon, 18 Jun 2007 19:11:31 -0500 Subject: [llvm-commits] CVS: llvm/lib/CodeGen/SelectionDAG/CallingConvLower.cpp Message-ID: <200706190011.l5J0BV6C002719@zion.cs.uiuc.edu> Changes in directory llvm/lib/CodeGen/SelectionDAG: CallingConvLower.cpp updated: 1.4 -> 1.5 --- Log message: add isVarArg to CCState --- Diffs of the changes: (+3 -2) CallingConvLower.cpp | 5 +++-- 1 files changed, 3 insertions(+), 2 deletions(-) Index: llvm/lib/CodeGen/SelectionDAG/CallingConvLower.cpp diff -u llvm/lib/CodeGen/SelectionDAG/CallingConvLower.cpp:1.4 llvm/lib/CodeGen/SelectionDAG/CallingConvLower.cpp:1.5 --- llvm/lib/CodeGen/SelectionDAG/CallingConvLower.cpp:1.4 Wed Feb 28 01:09:40 2007 +++ llvm/lib/CodeGen/SelectionDAG/CallingConvLower.cpp Mon Jun 18 19:11:09 2007 @@ -18,9 +18,10 @@ #include "llvm/Target/TargetMachine.h" using namespace llvm; -CCState::CCState(unsigned CC, const TargetMachine &tm, +CCState::CCState(unsigned CC, bool isVarArg, const TargetMachine &tm, SmallVector &locs) - : CallingConv(CC), TM(tm), MRI(*TM.getRegisterInfo()), Locs(locs) { + : CallingConv(CC), IsVarArg(isVarArg), TM(tm), + MRI(*TM.getRegisterInfo()), Locs(locs) { // No stack is used. StackOffset = 0; From sabre at nondot.org Mon Jun 18 19:13:33 2007 From: sabre at nondot.org (Chris Lattner) Date: Mon, 18 Jun 2007 19:13:33 -0500 Subject: [llvm-commits] CVS: llvm/lib/Target/TargetCallingConv.td Message-ID: <200706190013.l5J0DXeg002860@zion.cs.uiuc.edu> Changes in directory llvm/lib/Target: TargetCallingConv.td updated: 1.5 -> 1.6 --- Log message: If a function is vararg, never pass inreg arguments in registers. Thanks to Anton for half of this patch. --- Diffs of the changes: (+2 -0) TargetCallingConv.td | 2 ++ 1 files changed, 2 insertions(+) Index: llvm/lib/Target/TargetCallingConv.td diff -u llvm/lib/Target/TargetCallingConv.td:1.5 llvm/lib/Target/TargetCallingConv.td:1.6 --- llvm/lib/Target/TargetCallingConv.td:1.5 Wed Mar 7 10:25:08 2007 +++ llvm/lib/Target/TargetCallingConv.td Mon Jun 18 19:13:10 2007 @@ -40,6 +40,8 @@ /// the specified action. class CCIfInReg : CCIf<"ArgFlags & ISD::ParamFlags::InReg", A> {} +/// CCIfNotVarArg - If the current function is not vararg - apply the action +class CCIfNotVarArg : CCIf<"!State.isVarArg()", A> {} /// CCAssignToReg - This action matches if there is a register in the specified /// list that is still available. If so, it assigns the value to the first From sabre at nondot.org Mon Jun 18 19:13:34 2007 From: sabre at nondot.org (Chris Lattner) Date: Mon, 18 Jun 2007 19:13:34 -0500 Subject: [llvm-commits] CVS: llvm/lib/Target/PowerPC/PPCISelLowering.cpp Message-ID: <200706190013.l5J0DYKM002865@zion.cs.uiuc.edu> Changes in directory llvm/lib/Target/PowerPC: PPCISelLowering.cpp updated: 1.272 -> 1.273 --- Log message: If a function is vararg, never pass inreg arguments in registers. Thanks to Anton for half of this patch. --- Diffs of the changes: (+2 -1) PPCISelLowering.cpp | 3 ++- 1 files changed, 2 insertions(+), 1 deletion(-) Index: llvm/lib/Target/PowerPC/PPCISelLowering.cpp diff -u llvm/lib/Target/PowerPC/PPCISelLowering.cpp:1.272 llvm/lib/Target/PowerPC/PPCISelLowering.cpp:1.273 --- llvm/lib/Target/PowerPC/PPCISelLowering.cpp:1.272 Thu Jun 14 17:58:02 2007 +++ llvm/lib/Target/PowerPC/PPCISelLowering.cpp Mon Jun 18 19:13:10 2007 @@ -1830,7 +1830,8 @@ static SDOperand LowerRET(SDOperand Op, SelectionDAG &DAG, TargetMachine &TM) { SmallVector RVLocs; unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv(); - CCState CCInfo(CC, TM, RVLocs); + bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg(); + CCState CCInfo(CC, isVarArg, TM, RVLocs); CCInfo.AnalyzeReturn(Op.Val, RetCC_PPC); // If this is the first return lowered for this function, add the regs to the From sabre at nondot.org Mon Jun 18 19:13:40 2007 From: sabre at nondot.org (Chris Lattner) Date: Mon, 18 Jun 2007 19:13:40 -0500 Subject: [llvm-commits] CVS: llvm/lib/Target/X86/X86CallingConv.td X86ISelLowering.cpp Message-ID: <200706190013.l5J0DeUd002884@zion.cs.uiuc.edu> Changes in directory llvm/lib/Target/X86: X86CallingConv.td updated: 1.13 -> 1.14 X86ISelLowering.cpp updated: 1.412 -> 1.413 --- Log message: If a function is vararg, never pass inreg arguments in registers. Thanks to Anton for half of this patch. --- Diffs of the changes: (+18 -14) X86CallingConv.td | 6 +++--- X86ISelLowering.cpp | 26 +++++++++++++++----------- 2 files changed, 18 insertions(+), 14 deletions(-) Index: llvm/lib/Target/X86/X86CallingConv.td diff -u llvm/lib/Target/X86/X86CallingConv.td:1.13 llvm/lib/Target/X86/X86CallingConv.td:1.14 --- llvm/lib/Target/X86/X86CallingConv.td:1.13 Sat Mar 31 04:36:12 2007 +++ llvm/lib/Target/X86/X86CallingConv.td Mon Jun 18 19:13:10 2007 @@ -148,9 +148,9 @@ // Promote i8/i16 arguments to i32. CCIfType<[i8, i16], CCPromoteToType>, - // The first 3 integer arguments, if marked 'inreg', are passed in integer - // registers. - CCIfInReg>>, + // The first 3 integer arguments, if marked 'inreg' and if the call is not + // a vararg call, are passed in integer registers. + CCIfNotVarArg>>>, // Otherwise, same as everything else. CCDelegateTo Index: llvm/lib/Target/X86/X86ISelLowering.cpp diff -u llvm/lib/Target/X86/X86ISelLowering.cpp:1.412 llvm/lib/Target/X86/X86ISelLowering.cpp:1.413 --- llvm/lib/Target/X86/X86ISelLowering.cpp:1.412 Mon Jun 18 19:02:56 2007 +++ llvm/lib/Target/X86/X86ISelLowering.cpp Mon Jun 18 19:13:10 2007 @@ -502,7 +502,8 @@ SmallVector RVLocs; unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv(); - CCState CCInfo(CC, getTargetMachine(), RVLocs); + bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg(); + CCState CCInfo(CC, isVarArg, getTargetMachine(), RVLocs); CCInfo.AnalyzeReturn(Op.Val, RetCC_X86); @@ -582,7 +583,8 @@ // Assign locations to each value returned by this call. SmallVector RVLocs; - CCState CCInfo(CallingConv, getTargetMachine(), RVLocs); + bool isVarArg = cast(TheCall->getOperand(2))->getValue() != 0; + CCState CCInfo(CallingConv, isVarArg, getTargetMachine(), RVLocs); CCInfo.AnalyzeCallResult(TheCall, RetCC_X86); @@ -667,8 +669,8 @@ // Assign locations to all of the incoming arguments. SmallVector ArgLocs; - CCState CCInfo(MF.getFunction()->getCallingConv(), getTargetMachine(), - ArgLocs); + CCState CCInfo(MF.getFunction()->getCallingConv(), isVarArg, + getTargetMachine(), ArgLocs); CCInfo.AnalyzeFormalArguments(Op.Val, CC_X86_32_C); SmallVector ArgValues; @@ -764,7 +766,7 @@ // Analyze operands of the call, assigning locations to each operand. SmallVector ArgLocs; - CCState CCInfo(CC, getTargetMachine(), ArgLocs); + CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs); CCInfo.AnalyzeCallOperands(Op.Val, CC_X86_32_C); // Get a count of how many bytes are to be pushed on the stack. @@ -919,11 +921,12 @@ MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); SDOperand Root = Op.getOperand(0); + bool isVarArg = cast(Op.getOperand(2))->getValue() != 0; // Assign locations to all of the incoming arguments. SmallVector ArgLocs; - CCState CCInfo(MF.getFunction()->getCallingConv(), getTargetMachine(), - ArgLocs); + CCState CCInfo(MF.getFunction()->getCallingConv(), isVarArg, + getTargetMachine(), ArgLocs); CCInfo.AnalyzeFormalArguments(Op.Val, CC_X86_32_FastCall); SmallVector ArgValues; @@ -1003,11 +1006,12 @@ unsigned CC) { SDOperand Chain = Op.getOperand(0); bool isTailCall = cast(Op.getOperand(3))->getValue() != 0; + bool isVarArg = cast(Op.getOperand(2))->getValue() != 0; SDOperand Callee = Op.getOperand(4); // Analyze operands of the call, assigning locations to each operand. SmallVector ArgLocs; - CCState CCInfo(CC, getTargetMachine(), ArgLocs); + CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs); CCInfo.AnalyzeCallOperands(Op.Val, CC_X86_32_FastCall); // Get a count of how many bytes are to be pushed on the stack. @@ -1156,8 +1160,8 @@ // Assign locations to all of the incoming arguments. SmallVector ArgLocs; - CCState CCInfo(MF.getFunction()->getCallingConv(), getTargetMachine(), - ArgLocs); + CCState CCInfo(MF.getFunction()->getCallingConv(), isVarArg, + getTargetMachine(), ArgLocs); CCInfo.AnalyzeFormalArguments(Op.Val, CC_X86_64_C); SmallVector ArgValues; @@ -1292,7 +1296,7 @@ // Analyze operands of the call, assigning locations to each operand. SmallVector ArgLocs; - CCState CCInfo(CC, getTargetMachine(), ArgLocs); + CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs); CCInfo.AnalyzeCallOperands(Op.Val, CC_X86_64_C); // Get a count of how many bytes are to be pushed on the stack. From sabre at nondot.org Mon Jun 18 19:13:40 2007 From: sabre at nondot.org (Chris Lattner) Date: Mon, 18 Jun 2007 19:13:40 -0500 Subject: [llvm-commits] CVS: llvm/lib/Target/IA64/README Message-ID: <200706190013.l5J0De0i002877@zion.cs.uiuc.edu> Changes in directory llvm/lib/Target/IA64: README updated: 1.5 -> 1.6 --- Log message: If a function is vararg, never pass inreg arguments in registers. Thanks to Anton for half of this patch. --- Diffs of the changes: (+2 -0) README | 2 ++ 1 files changed, 2 insertions(+) Index: llvm/lib/Target/IA64/README diff -u llvm/lib/Target/IA64/README:1.5 llvm/lib/Target/IA64/README:1.6 --- llvm/lib/Target/IA64/README:1.5 Mon Oct 31 23:49:08 2005 +++ llvm/lib/Target/IA64/README Mon Jun 18 19:13:10 2007 @@ -102,3 +102,5 @@ things LLVM. + + From clattner at apple.com Mon Jun 18 19:15:05 2007 From: clattner at apple.com (clattner at apple.com) Date: Mon, 18 Jun 2007 17:15:05 -0700 (PDT) Subject: [llvm-commits] [128583] Merge parameter attribute handling into the same loop that Message-ID: <20070619001505.9CA2F946543F@src> Revision: 128583 Author: clattner Date: 2007-06-18 17:15:05 -0700 (Mon, 18 Jun 2007) Log Message: ----------- Merge parameter attribute handling into the same loop that translates the types for the arguments. This fixes PR1513 and CFrontend/2007-06-18-SextAttrAggregate.c Modified Paths: -------------- apple-local/branches/llvm/gcc/config/i386/llvm-i386-target.h apple-local/branches/llvm/gcc/llvm-types.cpp Modified: apple-local/branches/llvm/gcc/config/i386/llvm-i386-target.h =================================================================== --- apple-local/branches/llvm/gcc/config/i386/llvm-i386-target.h 2007-06-19 00:11:50 UTC (rev 128582) +++ apple-local/branches/llvm/gcc/config/i386/llvm-i386-target.h 2007-06-19 00:15:05 UTC (rev 128583) @@ -48,9 +48,9 @@ } #define LLVM_ADJUST_REGPARM_ATTRIBUTE(Attribute, Size, \ - isVarArg, local_regparm) \ + local_regparm) \ { \ - if (!isVarArg && !TARGET_64BIT) { \ + if (!TARGET_64BIT) { \ int words = (Size + BITS_PER_WORD - 1) / BITS_PER_WORD; \ local_regparm -= words; \ if (local_regparm>=0) { \ @@ -59,3 +59,6 @@ local_regparm = 0; \ } \ } + +/* APPLE LOCAL end LLVM (ENTIRE FILE!) */ + Modified: apple-local/branches/llvm/gcc/llvm-types.cpp =================================================================== --- apple-local/branches/llvm/gcc/llvm-types.cpp 2007-06-19 00:11:50 UTC (rev 128582) +++ apple-local/branches/llvm/gcc/llvm-types.cpp 2007-06-19 00:15:05 UTC (rev 128583) @@ -941,16 +941,44 @@ #ifdef TARGET_ADJUST_LLVM_CC TARGET_ADJUST_LLVM_CC(CallingConv, type); #endif - + + // Compute whether the result needs to be zext or sext'd, adding an attribute + // if so. + ParamAttrsVector Attrs; + if (isa(RetTy)) { + uint16_t RAttributes = ParamAttr::None; + tree ResultTy = TREE_TYPE(type); + if (TREE_INT_CST_LOW(TYPE_SIZE(ResultTy)) < INT_TYPE_SIZE) { + if (TYPE_UNSIGNED(ResultTy) || TREE_CODE(ResultTy) == BOOLEAN_TYPE) + Attrs.push_back(ParamAttrsWithIndex::get(0, ParamAttr::ZExt)); + else + Attrs.push_back(ParamAttrsWithIndex::get(0, ParamAttr::SExt)); + } + } + + // If this is a struct-return function, the dest loc is passed in as a + // pointer. Mark that pointer as structret. + if (ABIConverter.isStructReturn()) + Attrs.push_back(ParamAttrsWithIndex::get(ArgTypes.size(), + ParamAttr::StructRet)); + if (static_chain) // Pass the static chain as the first parameter. ABIConverter.HandleArgument(TREE_TYPE(static_chain)); + // If the target has regparam parameters, allow it to inspect the function + // type. + int local_regparam = 0; +#ifdef LLVM_TARGET_ENABLE_REGPARM + LLVM_TARGET_INIT_REGPARM(local_regparam, type); +#endif // LLVM_TARGET_ENABLE_REGPARM + // Loop over all of the arguments, adding them as we go. tree Args = TYPE_ARG_TYPES(type); for (; Args && TREE_VALUE(Args) != void_type_node; Args = TREE_CHAIN(Args)){ - if (!isPassedByInvisibleReference(TREE_VALUE(Args)) && - isa(ConvertType(TREE_VALUE(Args)))) { + tree ArgTy = TREE_VALUE(Args); + if (!isPassedByInvisibleReference(ArgTy) && + isa(ConvertType(ArgTy))) { // If we are passing an opaque struct by value, we don't know how many // arguments it will turn into. Because we can't handle this yet, // codegen the prototype as (...). @@ -962,87 +990,40 @@ break; } - ABIConverter.HandleArgument(TREE_VALUE(Args)); - } - - // If the argument list ends with a void type node, it isn't vararg. - isVarArg = (Args == 0); - - assert(RetTy && "Return type not specified!"); + ABIConverter.HandleArgument(ArgTy); - // If this is the C Calling Convention then scan the FunctionType's result - // type and argument types looking for integers less than 32-bits and set - // the parameter attribute in the FunctionType so any arguments passed to - // the function will be correctly sign or zero extended to 32-bits by - // the LLVM code gen. - ParamAttrsVector Attrs; - uint16_t RAttributes = ParamAttr::None; - if (CallingConv == CallingConv::C) { - tree ResultTy = TREE_TYPE(type); - if (TREE_CODE(ResultTy) == BOOLEAN_TYPE) { - if (TREE_INT_CST_LOW(TYPE_SIZE(ResultTy)) < INT_TYPE_SIZE) - RAttributes |= ParamAttr::ZExt; - } else { - if (TREE_CODE(ResultTy) == INTEGER_TYPE && - TREE_INT_CST_LOW(TYPE_SIZE(ResultTy)) < INT_TYPE_SIZE) - if (TYPE_UNSIGNED(ResultTy)) - RAttributes |= ParamAttr::ZExt; - else - RAttributes |= ParamAttr::SExt; - } - } - if (RAttributes != ParamAttr::None) - Attrs.push_back(ParamAttrsWithIndex::get(0, RAttributes)); - - unsigned Idx = 1; - bool isFirstArg = true; - - int lparam = 0; -#ifdef LLVM_TARGET_ENABLE_REGPARM - LLVM_TARGET_INIT_REGPARM(lparam, type); -#endif // LLVM_TARGET_ENABLE_REGPARM - - // The struct return attribute must be associated with the first - // parameter but that parameter may have other attributes too so we set up - // the first Attributes value here based on struct return. This only works - // Handle the structure return calling convention - if (ABIConverter.isStructReturn()) - Attrs.push_back(ParamAttrsWithIndex::get(Idx++, ParamAttr::StructRet)); - - if (static_chain) - // Skip over the static chain. - Idx++; - - for (tree Args = TYPE_ARG_TYPES(type); - Args && TREE_VALUE(Args) != void_type_node; Args = TREE_CHAIN(Args)) { - tree Ty = TREE_VALUE(Args); + // Determine if there are any attributes for this param. + // Compute zext/sext attributes. unsigned Attributes = ParamAttr::None; - if (CallingConv == CallingConv::C) { - if (TREE_CODE(Ty) == BOOLEAN_TYPE) { - if (TREE_INT_CST_LOW(TYPE_SIZE(Ty)) < INT_TYPE_SIZE) - Attributes |= ParamAttr::ZExt; - } else if (TREE_CODE(Ty) == INTEGER_TYPE && - TREE_INT_CST_LOW(TYPE_SIZE(Ty)) < INT_TYPE_SIZE) { - if (TYPE_UNSIGNED(Ty)) - Attributes |= ParamAttr::ZExt; - else - Attributes |= ParamAttr::SExt; - } + if (TREE_CODE(ArgTy) == BOOLEAN_TYPE) { + if (TREE_INT_CST_LOW(TYPE_SIZE(ArgTy)) < INT_TYPE_SIZE) + Attributes |= ParamAttr::ZExt; + } else if (TREE_CODE(ArgTy) == INTEGER_TYPE && + TREE_INT_CST_LOW(TYPE_SIZE(ArgTy)) < INT_TYPE_SIZE) { + if (TYPE_UNSIGNED(ArgTy)) + Attributes |= ParamAttr::ZExt; + else + Attributes |= ParamAttr::SExt; } - + #ifdef LLVM_TARGET_ENABLE_REGPARM - if (TREE_CODE(Ty) == INTEGER_TYPE || TREE_CODE(Ty) == POINTER_TYPE) - LLVM_ADJUST_REGPARM_ATTRIBUTE(Attributes, TREE_INT_CST_LOW(TYPE_SIZE(Ty)), - isVarArg, lparam); + // Allow the target to mark this as inreg. + if (TREE_CODE(ArgTy) == INTEGER_TYPE || TREE_CODE(ArgTy) == POINTER_TYPE) + LLVM_ADJUST_REGPARM_ATTRIBUTE(Attributes, + TREE_INT_CST_LOW(TYPE_SIZE(ArgTy)), + local_regparam); #endif // LLVM_TARGET_ENABLE_REGPARM - + if (Attributes != ParamAttr::None) - Attrs.push_back(ParamAttrsWithIndex::get(Idx, Attributes)); - Idx++; + Attrs.push_back(ParamAttrsWithIndex::get(ArgTypes.size(), Attributes)); } + + // If the argument list ends with a void type node, it isn't vararg. + isVarArg = (Args == 0); + assert(RetTy && "Return type not specified!"); - // Only instantiate the parameter attributes if we got some + // Only instantiate the parameter attributes if we got some. ParamAttrsList *PAL = 0; if (!Attrs.empty()) PAL = ParamAttrsList::get(Attrs); From evan.cheng at apple.com Mon Jun 18 20:22:03 2007 From: evan.cheng at apple.com (Evan Cheng) Date: Mon, 18 Jun 2007 20:22:03 -0500 Subject: [llvm-commits] CVS: llvm/include/llvm/Target/TargetInstrInfo.h Message-ID: <200706190122.l5J1M3Dv004301@zion.cs.uiuc.edu> Changes in directory llvm/include/llvm/Target: TargetInstrInfo.h updated: 1.130 -> 1.131 --- Log message: Replace CanBeDuplicated() with a M_NOT_DUPLICABLE bit. --- Diffs of the changes: (+20 -16) TargetInstrInfo.h | 36 ++++++++++++++++++++---------------- 1 files changed, 20 insertions(+), 16 deletions(-) Index: llvm/include/llvm/Target/TargetInstrInfo.h diff -u llvm/include/llvm/Target/TargetInstrInfo.h:1.130 llvm/include/llvm/Target/TargetInstrInfo.h:1.131 --- llvm/include/llvm/Target/TargetInstrInfo.h:1.130 Fri Jun 15 16:13:54 2007 +++ llvm/include/llvm/Target/TargetInstrInfo.h Mon Jun 18 20:21:41 2007 @@ -86,6 +86,10 @@ // register and / or registers that are used to predicate instructions. const unsigned M_CLOBBERS_PRED = 1 << 14; +// M_NOT_DUPLICABLE - Set if this instruction cannot be safely duplicated. +// (e.g. instructions with unique labels attached). +const unsigned M_NOT_DUPLICABLE = 1 << 15; + // Machine operand flags // M_LOOK_UP_PTR_REG_CLASS - Set if this operand is a pointer value and it // requires a callback to look up its register class. @@ -211,15 +215,6 @@ return get(Opcode).Flags & M_RET_FLAG; } - bool isPredicable(MachineOpCode Opcode) const { - return get(Opcode).Flags & M_PREDICABLE; - } - bool clobbersPredicate(MachineOpCode Opcode) const { - return get(Opcode).Flags & M_CLOBBERS_PRED; - } - bool isReMaterializable(MachineOpCode Opcode) const { - return get(Opcode).Flags & M_REMATERIALIZIBLE; - } bool isCommutableInstr(MachineOpCode Opcode) const { return get(Opcode).Flags & M_COMMUTABLE; } @@ -265,6 +260,22 @@ return get(Opcode).Flags & M_VARIABLE_OPS; } + bool isPredicable(MachineOpCode Opcode) const { + return get(Opcode).Flags & M_PREDICABLE; + } + + bool isReMaterializable(MachineOpCode Opcode) const { + return get(Opcode).Flags & M_REMATERIALIZIBLE; + } + + bool clobbersPredicate(MachineOpCode Opcode) const { + return get(Opcode).Flags & M_CLOBBERS_PRED; + } + + bool isNotDuplicable(MachineOpCode Opcode) const { + return get(Opcode).Flags & M_NOT_DUPLICABLE; + } + /// getOperandConstraint - Returns the value of the specific constraint if /// it is set. Returns -1 if it is not set. int getOperandConstraint(MachineOpCode Opcode, unsigned OpNum, @@ -415,13 +426,6 @@ return false; } - /// CanBeDuplicated - Returns true if the instruction can be duplicated - /// without causing unforseenable side-effect (e.g. instructions with unique - /// labels attached). - virtual bool CanBeDuplicated(const MachineInstr *MI) const { - return false; - } - /// isUnpredicatedTerminator - Returns true if the instruction is a /// terminator instruction that has not been predicated. virtual bool isUnpredicatedTerminator(const MachineInstr *MI) const; From evan.cheng at apple.com Mon Jun 18 20:25:53 2007 From: evan.cheng at apple.com (Evan Cheng) Date: Mon, 18 Jun 2007 20:25:53 -0500 Subject: [llvm-commits] CVS: llvm/lib/Target/ARM/ARMInstrInfo.cpp ARMInstrInfo.h ARMInstrInfo.td ARMInstrThumb.td Message-ID: <200706190125.l5J1Prpx004387@zion.cs.uiuc.edu> Changes in directory llvm/lib/Target/ARM: ARMInstrInfo.cpp updated: 1.37 -> 1.38 ARMInstrInfo.h updated: 1.14 -> 1.15 ARMInstrInfo.td updated: 1.110 -> 1.111 ARMInstrThumb.td updated: 1.30 -> 1.31 --- Log message: Replace TargetInstrInfo::CanBeDuplicated() with a M_NOT_DUPLICABLE bit. --- Diffs of the changes: (+7 -31) ARMInstrInfo.cpp | 29 ----------------------------- ARMInstrInfo.h | 2 -- ARMInstrInfo.td | 6 ++++++ ARMInstrThumb.td | 1 + 4 files changed, 7 insertions(+), 31 deletions(-) Index: llvm/lib/Target/ARM/ARMInstrInfo.cpp diff -u llvm/lib/Target/ARM/ARMInstrInfo.cpp:1.37 llvm/lib/Target/ARM/ARMInstrInfo.cpp:1.38 --- llvm/lib/Target/ARM/ARMInstrInfo.cpp:1.37 Fri Jun 15 16:15:00 2007 +++ llvm/lib/Target/ARM/ARMInstrInfo.cpp Mon Jun 18 20:25:30 2007 @@ -446,35 +446,6 @@ return PIdx != -1 && MI->getOperand(PIdx).getImmedValue() != ARMCC::AL; } -bool ARMInstrInfo::CanBeDuplicated(const MachineInstr *MI) const { - switch (MI->getOpcode()) { - default: return true; - // These have unique labels. - case ARM::PICADD: - case ARM::PICLD: - case ARM::PICLDZH: - case ARM::PICLDZB: - case ARM::PICLDH: - case ARM::PICLDB: - case ARM::PICLDSH: - case ARM::PICLDSB: - case ARM::PICSTR: - case ARM::PICSTRH: - case ARM::PICSTRB: - case ARM::LEApcrel: - case ARM::LEApcrelJT: - case ARM::tPICADD: - case ARM::tLEApcrel: - case ARM::tLEApcrelJT: - case ARM::CONSTPOOL_ENTRY: - // These embed jumptables. - case ARM::BR_JTr: - case ARM::BR_JTm: - case ARM::BR_JTadd: - return false; - } -} - bool ARMInstrInfo::PredicateInstruction(MachineInstr *MI, const std::vector &Pred) const { unsigned Opc = MI->getOpcode(); Index: llvm/lib/Target/ARM/ARMInstrInfo.h diff -u llvm/lib/Target/ARM/ARMInstrInfo.h:1.14 llvm/lib/Target/ARM/ARMInstrInfo.h:1.15 --- llvm/lib/Target/ARM/ARMInstrInfo.h:1.14 Fri Jun 15 16:15:00 2007 +++ llvm/lib/Target/ARM/ARMInstrInfo.h Mon Jun 18 20:25:30 2007 @@ -106,8 +106,6 @@ // Predication support. virtual bool isPredicated(const MachineInstr *MI) const; - virtual bool CanBeDuplicated(const MachineInstr *MI) const; - virtual bool PredicateInstruction(MachineInstr *MI, const std::vector &Pred) const; Index: llvm/lib/Target/ARM/ARMInstrInfo.td diff -u llvm/lib/Target/ARM/ARMInstrInfo.td:1.110 llvm/lib/Target/ARM/ARMInstrInfo.td:1.111 --- llvm/lib/Target/ARM/ARMInstrInfo.td:1.110 Wed Jun 6 05:17:05 2007 +++ llvm/lib/Target/ARM/ARMInstrInfo.td Mon Jun 18 20:25:30 2007 @@ -533,6 +533,7 @@ /// the function. The first operand is the ID# for this instruction, the second /// is the index into the MachineConstantPool that this is, the third is the /// size in bytes of this constant pool entry. +let isNotDuplicable = 1 in def CONSTPOOL_ENTRY : PseudoInst<(ops cpinst_operand:$instid, cpinst_operand:$cpidx, i32imm:$size), "${instid:label} ${cpidx:cpentry}", []>; @@ -552,6 +553,7 @@ ".loc $file, $line, $col", [(dwarf_loc (i32 imm:$line), (i32 imm:$col), (i32 imm:$file))]>; +let isNotDuplicable = 1 in { def PICADD : AXI1<(ops GPR:$dst, GPR:$a, pclabel:$cp, pred:$p), "$cp:\n\tadd$p $dst, pc, $a", [(set GPR:$dst, (ARMpic_add GPR:$a, imm:$cp))]>; @@ -598,6 +600,7 @@ "${addr:label}:\n\tstr${p}b $src, $addr", [(truncstorei8 GPR:$src, addrmodepc:$addr)]>; } +} //===----------------------------------------------------------------------===// // Control Flow Instructions. @@ -637,6 +640,7 @@ def B : AXI<(ops brtarget:$dst), "b $dst", [(br bb:$dst)]>; + let isNotDuplicable = 1 in { def BR_JTr : JTI<(ops GPR:$dst, jtblock_operand:$jt, i32imm:$id), "mov pc, $dst \n$jt", [(ARMbrjt GPR:$dst, tjumptable:$jt, imm:$id)]>; @@ -649,6 +653,7 @@ [(ARMbrjt (add GPR:$dst, GPR:$idx), tjumptable:$jt, imm:$id)]>; } + } def Bcc : AXI<(ops brtarget:$dst, ccop:$cc), "b$cc $dst", [(ARMbrcond bb:$dst, imm:$cc)]>; @@ -1152,6 +1157,7 @@ !strconcat("${:private}PCRELL${:uid}:\n\t", "add$p $dst, pc, #PCRELV${:uid}")), []>; + //===----------------------------------------------------------------------===// // TLS Instructions // Index: llvm/lib/Target/ARM/ARMInstrThumb.td diff -u llvm/lib/Target/ARM/ARMInstrThumb.td:1.30 llvm/lib/Target/ARM/ARMInstrThumb.td:1.31 --- llvm/lib/Target/ARM/ARMInstrThumb.td:1.30 Fri Jun 8 04:13:23 2007 +++ llvm/lib/Target/ARM/ARMInstrThumb.td Mon Jun 18 20:25:30 2007 @@ -170,6 +170,7 @@ "@ tADJCALLSTACKDOWN $amt", [(ARMcallseq_start imm:$amt)]>, Imp<[SP],[SP]>, Requires<[IsThumb]>; +let isNotDuplicable = 1 in def tPICADD : TIt<(ops GPR:$dst, GPR:$lhs, pclabel:$cp), "$cp:\n\tadd $dst, pc", [(set GPR:$dst, (ARMpic_add GPR:$lhs, imm:$cp))]>; From evan.cheng at apple.com Mon Jun 18 20:25:52 2007 From: evan.cheng at apple.com (Evan Cheng) Date: Mon, 18 Jun 2007 20:25:52 -0500 Subject: [llvm-commits] CVS: llvm/lib/Target/Target.td Message-ID: <200706190125.l5J1Pqut004376@zion.cs.uiuc.edu> Changes in directory llvm/lib/Target: Target.td updated: 1.103 -> 1.104 --- Log message: Replace TargetInstrInfo::CanBeDuplicated() with a M_NOT_DUPLICABLE bit. --- Diffs of the changes: (+1 -0) Target.td | 1 + 1 files changed, 1 insertion(+) Index: llvm/lib/Target/Target.td diff -u llvm/lib/Target/Target.td:1.103 llvm/lib/Target/Target.td:1.104 --- llvm/lib/Target/Target.td:1.103 Wed Jun 13 17:20:15 2007 +++ llvm/lib/Target/Target.td Mon Jun 18 20:25:30 2007 @@ -193,6 +193,7 @@ bit hasCtrlDep = 0; // Does this instruction r/w ctrl-flow chains? bit noResults = 0; // Does this instruction produce no results? bit clobbersPred = 0; // Does it clobbers condition code / predicate? + bit isNotDuplicable = 0; // Is it unsafe to duplicate this instruction? InstrItinClass Itinerary = NoItinerary;// Execution steps used for scheduling. From evan.cheng at apple.com Mon Jun 18 20:26:34 2007 From: evan.cheng at apple.com (Evan Cheng) Date: Mon, 18 Jun 2007 20:26:34 -0500 Subject: [llvm-commits] CVS: llvm/lib/CodeGen/IfConversion.cpp Message-ID: <200706190126.l5J1QYQS004413@zion.cs.uiuc.edu> Changes in directory llvm/lib/CodeGen: IfConversion.cpp updated: 1.57 -> 1.58 --- Log message: Replace TargetInstrInfo::CanBeDuplicated() with a M_NOT_DUPLICABLE bit. --- Diffs of the changes: (+2 -2) IfConversion.cpp | 4 ++-- 1 files changed, 2 insertions(+), 2 deletions(-) Index: llvm/lib/CodeGen/IfConversion.cpp diff -u llvm/lib/CodeGen/IfConversion.cpp:1.57 llvm/lib/CodeGen/IfConversion.cpp:1.58 --- llvm/lib/CodeGen/IfConversion.cpp:1.57 Mon Jun 18 17:44:57 2007 +++ llvm/lib/CodeGen/IfConversion.cpp Mon Jun 18 20:26:12 2007 @@ -547,10 +547,10 @@ bool SeenCondBr = false; for (MachineBasicBlock::iterator I = BBI.BB->begin(), E = BBI.BB->end(); I != E; ++I) { - if (!BBI.CannotBeCopied && !TII->CanBeDuplicated(I)) + const TargetInstrDescriptor *TID = I->getInstrDescriptor(); + if ((TID->Flags & M_NOT_DUPLICABLE) != 0) BBI.CannotBeCopied = true; - const TargetInstrDescriptor *TID = I->getInstrDescriptor(); bool isPredicated = TII->isPredicated(I); bool isCondBr = BBI.IsBrAnalyzable && (TID->Flags & M_BRANCH_FLAG) != 0 && (TID->Flags & M_BARRIER_FLAG) == 0; From evan.cheng at apple.com Mon Jun 18 20:27:14 2007 From: evan.cheng at apple.com (Evan Cheng) Date: Mon, 18 Jun 2007 20:27:14 -0500 Subject: [llvm-commits] CVS: llvm/utils/TableGen/CodeGenInstruction.h CodeGenTarget.cpp InstrInfoEmitter.cpp Message-ID: <200706190127.l5J1REUK004442@zion.cs.uiuc.edu> Changes in directory llvm/utils/TableGen: CodeGenInstruction.h updated: 1.28 -> 1.29 CodeGenTarget.cpp updated: 1.92 -> 1.93 InstrInfoEmitter.cpp updated: 1.60 -> 1.61 --- Log message: Replace TargetInstrInfo::CanBeDuplicated() with a M_NOT_DUPLICABLE bit. --- Diffs of the changes: (+3 -0) CodeGenInstruction.h | 1 + CodeGenTarget.cpp | 1 + InstrInfoEmitter.cpp | 1 + 3 files changed, 3 insertions(+) Index: llvm/utils/TableGen/CodeGenInstruction.h diff -u llvm/utils/TableGen/CodeGenInstruction.h:1.28 llvm/utils/TableGen/CodeGenInstruction.h:1.29 --- llvm/utils/TableGen/CodeGenInstruction.h:1.28 Wed Jun 6 05:14:55 2007 +++ llvm/utils/TableGen/CodeGenInstruction.h Mon Jun 18 20:26:51 2007 @@ -98,6 +98,7 @@ bool hasCtrlDep; bool noResults; bool clobbersPred; + bool isNotDuplicable; /// ParseOperandName - Parse an operand name like "$foo" or "$foo.bar", /// where $foo is a whole operand and $foo.bar refers to a suboperand. Index: llvm/utils/TableGen/CodeGenTarget.cpp diff -u llvm/utils/TableGen/CodeGenTarget.cpp:1.92 llvm/utils/TableGen/CodeGenTarget.cpp:1.93 --- llvm/utils/TableGen/CodeGenTarget.cpp:1.92 Wed Jun 13 17:20:15 2007 +++ llvm/utils/TableGen/CodeGenTarget.cpp Mon Jun 18 20:26:51 2007 @@ -371,6 +371,7 @@ hasCtrlDep = R->getValueAsBit("hasCtrlDep"); noResults = R->getValueAsBit("noResults"); clobbersPred = R->getValueAsBit("clobbersPred"); + isNotDuplicable = R->getValueAsBit("isNotDuplicable"); hasVariableNumberOfOperands = false; DagInit *DI; Index: llvm/utils/TableGen/InstrInfoEmitter.cpp diff -u llvm/utils/TableGen/InstrInfoEmitter.cpp:1.60 llvm/utils/TableGen/InstrInfoEmitter.cpp:1.61 --- llvm/utils/TableGen/InstrInfoEmitter.cpp:1.60 Wed Jun 6 05:14:55 2007 +++ llvm/utils/TableGen/InstrInfoEmitter.cpp Mon Jun 18 20:26:51 2007 @@ -242,6 +242,7 @@ if (Inst.isTerminator) OS << "|M_TERMINATOR_FLAG"; if (Inst.isReMaterializable) OS << "|M_REMATERIALIZIBLE"; if (Inst.clobbersPred) OS << "|M_CLOBBERS_PRED"; + if (Inst.isNotDuplicable) OS << "|M_NOT_DUPLICABLE"; if (Inst.usesCustomDAGSchedInserter) OS << "|M_USES_CUSTOM_DAG_SCHED_INSERTION"; if (Inst.hasVariableNumberOfOperands) From djg at cray.com Mon Jun 18 20:48:41 2007 From: djg at cray.com (Dan Gohman) Date: Mon, 18 Jun 2007 20:48:41 -0500 Subject: [llvm-commits] CVS: llvm/include/llvm/Target/TargetInstrInfo.h Message-ID: <200706190148.l5J1mfs0004881@zion.cs.uiuc.edu> Changes in directory llvm/include/llvm/Target: TargetInstrInfo.h updated: 1.131 -> 1.132 --- Log message: Replace M_REMATERIALIZIBLE and the newly-added isOtherReMaterializableLoad with a general target hook to identify rematerializable instructions. Some instructions are only rematerializable with specific operands, such as loads from constant pools, while others are always rematerializable. This hook allows both to be identified as being rematerializable with the same mechanism. --- Diffs of the changes: (+4 -12) TargetInstrInfo.h | 16 ++++------------ 1 files changed, 4 insertions(+), 12 deletions(-) Index: llvm/include/llvm/Target/TargetInstrInfo.h diff -u llvm/include/llvm/Target/TargetInstrInfo.h:1.131 llvm/include/llvm/Target/TargetInstrInfo.h:1.132 --- llvm/include/llvm/Target/TargetInstrInfo.h:1.131 Mon Jun 18 20:21:41 2007 +++ llvm/include/llvm/Target/TargetInstrInfo.h Mon Jun 18 20:48:04 2007 @@ -78,10 +78,6 @@ // controls execution. It may be set to 'always'. const unsigned M_PREDICABLE = 1 << 12; -// M_REMATERIALIZIBLE - Set if this instruction can be trivally re-materialized -// at any time, e.g. constant generation, load from constant pool. -const unsigned M_REMATERIALIZIBLE = 1 << 13; - // M_CLOBBERS_PRED - Set if this instruction may clobbers the condition code // register and / or registers that are used to predicate instructions. const unsigned M_CLOBBERS_PRED = 1 << 14; @@ -264,10 +260,6 @@ return get(Opcode).Flags & M_PREDICABLE; } - bool isReMaterializable(MachineOpCode Opcode) const { - return get(Opcode).Flags & M_REMATERIALIZIBLE; - } - bool clobbersPredicate(MachineOpCode Opcode) const { return get(Opcode).Flags & M_CLOBBERS_PRED; } @@ -309,13 +301,13 @@ return 0; } - /// isOtherReMaterializableLoad - If the specified machine instruction is a - /// direct load that is trivially rematerializable, not counting loads from - /// stack slots, return true. If not, return false. This predicate must + /// isTriviallyReMaterializable - If the specified machine instruction can + /// be trivally re-materialized at any time, e.g. constant generation or + /// loads from constant pools. If not, return false. This predicate must /// return false if the instruction has any side effects other than /// producing the value from the load, or if it requres any address /// registers that are not always available. - virtual bool isOtherReMaterializableLoad(MachineInstr *MI) const { + virtual bool isTriviallyReMaterializable(MachineInstr *MI) const { return false; } From djg at cray.com Mon Jun 18 20:48:41 2007 From: djg at cray.com (Dan Gohman) Date: Mon, 18 Jun 2007 20:48:41 -0500 Subject: [llvm-commits] CVS: llvm/lib/CodeGen/LiveIntervalAnalysis.cpp VirtRegMap.cpp Message-ID: <200706190148.l5J1mfGm004895@zion.cs.uiuc.edu> Changes in directory llvm/lib/CodeGen: LiveIntervalAnalysis.cpp updated: 1.247 -> 1.248 VirtRegMap.cpp updated: 1.112 -> 1.113 --- Log message: Replace M_REMATERIALIZIBLE and the newly-added isOtherReMaterializableLoad with a general target hook to identify rematerializable instructions. Some instructions are only rematerializable with specific operands, such as loads from constant pools, while others are always rematerializable. This hook allows both to be identified as being rematerializable with the same mechanism. --- Diffs of the changes: (+6 -8) LiveIntervalAnalysis.cpp | 9 ++++----- VirtRegMap.cpp | 5 ++--- 2 files changed, 6 insertions(+), 8 deletions(-) Index: llvm/lib/CodeGen/LiveIntervalAnalysis.cpp diff -u llvm/lib/CodeGen/LiveIntervalAnalysis.cpp:1.247 llvm/lib/CodeGen/LiveIntervalAnalysis.cpp:1.248 --- llvm/lib/CodeGen/LiveIntervalAnalysis.cpp:1.247 Thu Jun 14 15:50:44 2007 +++ llvm/lib/CodeGen/LiveIntervalAnalysis.cpp Mon Jun 18 20:48:04 2007 @@ -336,14 +336,13 @@ // time we see a vreg. if (interval.empty()) { // Remember if the definition can be rematerialized. All load's from fixed - // stack slots are re-materializable. The target may permit other loads to - // be re-materialized as well. + // stack slots are re-materializable. The target may permit other + // instructions to be re-materialized as well. int FrameIdx = 0; if (vi.DefInst && - (tii_->isReMaterializable(vi.DefInst->getOpcode()) || + (tii_->isTriviallyReMaterializable(vi.DefInst) || (tii_->isLoadFromStackSlot(vi.DefInst, FrameIdx) && - mf_->getFrameInfo()->isFixedObjectIndex(FrameIdx)) || - tii_->isOtherReMaterializableLoad(vi.DefInst))) + mf_->getFrameInfo()->isFixedObjectIndex(FrameIdx)))) interval.remat = vi.DefInst; // Get the Idx of the defining instructions. Index: llvm/lib/CodeGen/VirtRegMap.cpp diff -u llvm/lib/CodeGen/VirtRegMap.cpp:1.112 llvm/lib/CodeGen/VirtRegMap.cpp:1.113 --- llvm/lib/CodeGen/VirtRegMap.cpp:1.112 Thu Jun 14 15:50:44 2007 +++ llvm/lib/CodeGen/VirtRegMap.cpp Mon Jun 18 20:48:04 2007 @@ -663,9 +663,8 @@ // If this instruction is being rematerialized, just remove it! int FrameIdx; - if ((TID->Flags & M_REMATERIALIZIBLE) || - TII->isLoadFromStackSlot(&MI, FrameIdx) || - TII->isOtherReMaterializableLoad(&MI)) { + if (TII->isTriviallyReMaterializable(&MI) || + TII->isLoadFromStackSlot(&MI, FrameIdx)) { bool Remove = true; for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { MachineOperand &MO = MI.getOperand(i); From djg at cray.com Mon Jun 18 20:48:42 2007 From: djg at cray.com (Dan Gohman) Date: Mon, 18 Jun 2007 20:48:42 -0500 Subject: [llvm-commits] CVS: llvm/lib/Target/ARM/ARMInstrInfo.cpp ARMInstrInfo.h ARMInstrInfo.td ARMInstrThumb.td Message-ID: <200706190148.l5J1mgNU004914@zion.cs.uiuc.edu> Changes in directory llvm/lib/Target/ARM: ARMInstrInfo.cpp updated: 1.38 -> 1.39 ARMInstrInfo.h updated: 1.15 -> 1.16 ARMInstrInfo.td updated: 1.111 -> 1.112 ARMInstrThumb.td updated: 1.31 -> 1.32 --- Log message: Replace M_REMATERIALIZIBLE and the newly-added isOtherReMaterializableLoad with a general target hook to identify rematerializable instructions. Some instructions are only rematerializable with specific operands, such as loads from constant pools, while others are always rematerializable. This hook allows both to be identified as being rematerializable with the same mechanism. --- Diffs of the changes: (+15 -5) ARMInstrInfo.cpp | 14 ++++++++++++++ ARMInstrInfo.h | 1 + ARMInstrInfo.td | 4 ---- ARMInstrThumb.td | 1 - 4 files changed, 15 insertions(+), 5 deletions(-) Index: llvm/lib/Target/ARM/ARMInstrInfo.cpp diff -u llvm/lib/Target/ARM/ARMInstrInfo.cpp:1.38 llvm/lib/Target/ARM/ARMInstrInfo.cpp:1.39 --- llvm/lib/Target/ARM/ARMInstrInfo.cpp:1.38 Mon Jun 18 20:25:30 2007 +++ llvm/lib/Target/ARM/ARMInstrInfo.cpp Mon Jun 18 20:48:04 2007 @@ -130,6 +130,20 @@ return 0; } +bool ARMInstrInfo::isTriviallyReMaterializable(MachineInstr *MI) const { + switch (MI->getOpcode()) { + default: break; + case ARM::LDRcp: + case ARM::MOVi: + case ARM::MVNi: + case ARM::MOVi2pieces: + case ARM::tLDRcp: + // These instructions are always trivially rematerializable. + return true; + } + return false; +} + static unsigned getUnindexedOpcode(unsigned Opc) { switch (Opc) { default: break; Index: llvm/lib/Target/ARM/ARMInstrInfo.h diff -u llvm/lib/Target/ARM/ARMInstrInfo.h:1.15 llvm/lib/Target/ARM/ARMInstrInfo.h:1.16 --- llvm/lib/Target/ARM/ARMInstrInfo.h:1.15 Mon Jun 18 20:25:30 2007 +++ llvm/lib/Target/ARM/ARMInstrInfo.h Mon Jun 18 20:48:04 2007 @@ -87,6 +87,7 @@ unsigned &SrcReg, unsigned &DstReg) const; virtual unsigned isLoadFromStackSlot(MachineInstr *MI, int &FrameIndex) const; virtual unsigned isStoreToStackSlot(MachineInstr *MI, int &FrameIndex) const; + virtual bool isTriviallyReMaterializable(MachineInstr *MI) const; virtual MachineInstr *convertToThreeAddress(MachineFunction::iterator &MFI, MachineBasicBlock::iterator &MBBI, Index: llvm/lib/Target/ARM/ARMInstrInfo.td diff -u llvm/lib/Target/ARM/ARMInstrInfo.td:1.111 llvm/lib/Target/ARM/ARMInstrInfo.td:1.112 --- llvm/lib/Target/ARM/ARMInstrInfo.td:1.111 Mon Jun 18 20:25:30 2007 +++ llvm/lib/Target/ARM/ARMInstrInfo.td Mon Jun 18 20:48:04 2007 @@ -670,7 +670,6 @@ [(set GPR:$dst, (load addrmode2:$addr))]>; // Special LDR for loads from non-pc-relative constpools. -let isReMaterializable = 1 in def LDRcp : AI2<(ops GPR:$dst, addrmode2:$addr), "ldr", " $dst, $addr", []>; @@ -804,7 +803,6 @@ def MOVs : AI1<(ops GPR:$dst, so_reg:$src), "mov", " $dst, $src", [(set GPR:$dst, so_reg:$src)]>; -let isReMaterializable = 1 in def MOVi : AI1<(ops GPR:$dst, so_imm:$src), "mov", " $dst, $src", [(set GPR:$dst, so_imm:$src)]>; @@ -912,7 +910,6 @@ "mvn", " $dst, $src", [(set GPR:$dst, (not GPR:$src))]>; def MVNs : AI<(ops GPR:$dst, so_reg:$src), "mvn", " $dst, $src", [(set GPR:$dst, (not so_reg:$src))]>; -let isReMaterializable = 1 in def MVNi : AI<(ops GPR:$dst, so_imm:$imm), "mvn", " $dst, $imm", [(set GPR:$dst, so_imm_not:$imm)]>; @@ -1183,7 +1180,6 @@ // Large immediate handling. // Two piece so_imms. -let isReMaterializable = 1 in def MOVi2pieces : AI1x2<(ops GPR:$dst, so_imm2part:$src), "mov", " $dst, $src", [(set GPR:$dst, so_imm2part:$src)]>; Index: llvm/lib/Target/ARM/ARMInstrThumb.td diff -u llvm/lib/Target/ARM/ARMInstrThumb.td:1.31 llvm/lib/Target/ARM/ARMInstrThumb.td:1.32 --- llvm/lib/Target/ARM/ARMInstrThumb.td:1.31 Mon Jun 18 20:25:30 2007 +++ llvm/lib/Target/ARM/ARMInstrThumb.td Mon Jun 18 20:48:04 2007 @@ -267,7 +267,6 @@ [(set GPR:$dst, (load (ARMWrapper tconstpool:$addr)))]>; // Special LDR for loads from non-pc-relative constpools. -let isReMaterializable = 1 in def tLDRcp : TIs<(ops GPR:$dst, i32imm:$addr), "ldr $dst, $addr", []>; } // isLoad From djg at cray.com Mon Jun 18 20:48:39 2007 From: djg at cray.com (Dan Gohman) Date: Mon, 18 Jun 2007 20:48:39 -0500 Subject: [llvm-commits] CVS: llvm/lib/Target/X86/X86InstrFPStack.td X86InstrInfo.cpp X86InstrInfo.h X86InstrInfo.td X86InstrMMX.td X86InstrSSE.td Message-ID: <200706190148.l5J1mdkK004876@zion.cs.uiuc.edu> Changes in directory llvm/lib/Target/X86: X86InstrFPStack.td updated: 1.9 -> 1.10 X86InstrInfo.cpp updated: 1.91 -> 1.92 X86InstrInfo.h updated: 1.66 -> 1.67 X86InstrInfo.td updated: 1.307 -> 1.308 X86InstrMMX.td updated: 1.32 -> 1.33 X86InstrSSE.td updated: 1.183 -> 1.184 --- Log message: Replace M_REMATERIALIZIBLE and the newly-added isOtherReMaterializableLoad with a general target hook to identify rematerializable instructions. Some instructions are only rematerializable with specific operands, such as loads from constant pools, while others are always rematerializable. This hook allows both to be identified as being rematerializable with the same mechanism. --- Diffs of the changes: (+23 -19) X86InstrFPStack.td | 2 -- X86InstrInfo.cpp | 14 +++++++++++++- X86InstrInfo.h | 2 +- X86InstrInfo.td | 2 -- X86InstrMMX.td | 14 ++++++-------- X86InstrSSE.td | 8 +++----- 6 files changed, 23 insertions(+), 19 deletions(-) Index: llvm/lib/Target/X86/X86InstrFPStack.td diff -u llvm/lib/Target/X86/X86InstrFPStack.td:1.9 llvm/lib/Target/X86/X86InstrFPStack.td:1.10 --- llvm/lib/Target/X86/X86InstrFPStack.td:1.9 Tue Mar 20 19:16:56 2007 +++ llvm/lib/Target/X86/X86InstrFPStack.td Mon Jun 18 20:48:04 2007 @@ -413,12 +413,10 @@ def FXCH : FPI<0xC8, AddRegFrm, (ops RST:$op), "fxch $op">, D9; // Floating point constant loads. -let isReMaterializable = 1 in { def FpLD0 : FpI<(ops RFP:$dst), ZeroArgFP, [(set RFP:$dst, fp64imm0)]>; def FpLD1 : FpI<(ops RFP:$dst), ZeroArgFP, [(set RFP:$dst, fp64imm1)]>; -} def FLD0 : FPI<0xEE, RawFrm, (ops), "fldz">, D9; def FLD1 : FPI<0xE8, RawFrm, (ops), "fld1">, D9; Index: llvm/lib/Target/X86/X86InstrInfo.cpp diff -u llvm/lib/Target/X86/X86InstrInfo.cpp:1.91 llvm/lib/Target/X86/X86InstrInfo.cpp:1.92 --- llvm/lib/Target/X86/X86InstrInfo.cpp:1.91 Thu Jun 14 17:03:45 2007 +++ llvm/lib/Target/X86/X86InstrInfo.cpp Mon Jun 18 20:48:04 2007 @@ -112,9 +112,20 @@ } -bool X86InstrInfo::isOtherReMaterializableLoad(MachineInstr *MI) const { +bool X86InstrInfo::isTriviallyReMaterializable(MachineInstr *MI) const { switch (MI->getOpcode()) { default: break; + case X86::FpLD0: + case X86::FpLD1: + case X86::MOV8ri: + case X86::MOV16ri: + case X86::MOV32ri: + case X86::MMX_V_SET0: + case X86::MMX_V_SETALLONES: + case X86::V_SET0: + case X86::V_SETALLONES: + // These instructions are always trivially rematerializable. + return true; case X86::MOV8rm: case X86::MOV16rm: case X86::MOV16_rm: @@ -128,6 +139,7 @@ case X86::MOVAPDrm: case X86::MMX_MOVD64rm: case X86::MMX_MOVQ64rm: + // Loads from constant pools are trivially rematerializable. return MI->getOperand(1).isRegister() && MI->getOperand(2).isImmediate() && MI->getOperand(3).isRegister() && MI->getOperand(4).isConstantPoolIndex() && MI->getOperand(1).getReg() == 0 && Index: llvm/lib/Target/X86/X86InstrInfo.h diff -u llvm/lib/Target/X86/X86InstrInfo.h:1.66 llvm/lib/Target/X86/X86InstrInfo.h:1.67 --- llvm/lib/Target/X86/X86InstrInfo.h:1.66 Thu Jun 14 17:03:45 2007 +++ llvm/lib/Target/X86/X86InstrInfo.h Mon Jun 18 20:48:04 2007 @@ -239,7 +239,7 @@ unsigned& destReg) const; unsigned isLoadFromStackSlot(MachineInstr *MI, int &FrameIndex) const; unsigned isStoreToStackSlot(MachineInstr *MI, int &FrameIndex) const; - bool isOtherReMaterializableLoad(MachineInstr *MI) const; + bool isTriviallyReMaterializable(MachineInstr *MI) const; /// convertToThreeAddress - This method must be implemented by targets that /// set the M_CONVERTIBLE_TO_3_ADDR flag. When this flag is set, the target Index: llvm/lib/Target/X86/X86InstrInfo.td diff -u llvm/lib/Target/X86/X86InstrInfo.td:1.307 llvm/lib/Target/X86/X86InstrInfo.td:1.308 --- llvm/lib/Target/X86/X86InstrInfo.td:1.307 Sat May 5 23:00:55 2007 +++ llvm/lib/Target/X86/X86InstrInfo.td Mon Jun 18 20:48:04 2007 @@ -617,7 +617,6 @@ "mov{w} {$src, $dst|$dst, $src}", []>, OpSize; def MOV32rr : I<0x89, MRMDestReg, (ops GR32:$dst, GR32:$src), "mov{l} {$src, $dst|$dst, $src}", []>; -let isReMaterializable = 1 in { def MOV8ri : Ii8 <0xB0, AddRegFrm, (ops GR8 :$dst, i8imm :$src), "mov{b} {$src, $dst|$dst, $src}", [(set GR8:$dst, imm:$src)]>; @@ -627,7 +626,6 @@ def MOV32ri : Ii32<0xB8, AddRegFrm, (ops GR32:$dst, i32imm:$src), "mov{l} {$src, $dst|$dst, $src}", [(set GR32:$dst, imm:$src)]>; -} def MOV8mi : Ii8 <0xC6, MRM0m, (ops i8mem :$dst, i8imm :$src), "mov{b} {$src, $dst|$dst, $src}", [(store (i8 imm:$src), addr:$dst)]>; Index: llvm/lib/Target/X86/X86InstrMMX.td diff -u llvm/lib/Target/X86/X86InstrMMX.td:1.32 llvm/lib/Target/X86/X86InstrMMX.td:1.33 --- llvm/lib/Target/X86/X86InstrMMX.td:1.32 Wed May 16 01:08:17 2007 +++ llvm/lib/Target/X86/X86InstrMMX.td Mon Jun 18 20:48:05 2007 @@ -503,14 +503,12 @@ // Alias instructions that map zero vector to pxor. // FIXME: remove when we can teach regalloc that xor reg, reg is ok. -let isReMaterializable = 1 in { - def MMX_V_SET0 : MMXI<0xEF, MRMInitReg, (ops VR64:$dst), - "pxor $dst, $dst", - [(set VR64:$dst, (v1i64 immAllZerosV))]>; - def MMX_V_SETALLONES : MMXI<0x76, MRMInitReg, (ops VR64:$dst), - "pcmpeqd $dst, $dst", - [(set VR64:$dst, (v1i64 immAllOnesV))]>; -} +def MMX_V_SET0 : MMXI<0xEF, MRMInitReg, (ops VR64:$dst), + "pxor $dst, $dst", + [(set VR64:$dst, (v1i64 immAllZerosV))]>; +def MMX_V_SETALLONES : MMXI<0x76, MRMInitReg, (ops VR64:$dst), + "pcmpeqd $dst, $dst", + [(set VR64:$dst, (v1i64 immAllOnesV))]>; //===----------------------------------------------------------------------===// // Non-Instruction Patterns Index: llvm/lib/Target/X86/X86InstrSSE.td diff -u llvm/lib/Target/X86/X86InstrSSE.td:1.183 llvm/lib/Target/X86/X86InstrSSE.td:1.184 --- llvm/lib/Target/X86/X86InstrSSE.td:1.183 Thu May 17 13:44:37 2007 +++ llvm/lib/Target/X86/X86InstrSSE.td Mon Jun 18 20:48:05 2007 @@ -759,7 +759,6 @@ // Alias instructions that map zero vector to pxor / xorp* for sse. // FIXME: remove when we can teach regalloc that xor reg, reg is ok. -let isReMaterializable = 1 in def V_SET0 : PSI<0x57, MRMInitReg, (ops VR128:$dst), "xorps $dst, $dst", [(set VR128:$dst, (v4f32 immAllZerosV))]>; @@ -1819,10 +1818,9 @@ // Alias instructions that map zero vector to pxor / xorp* for sse. // FIXME: remove when we can teach regalloc that xor reg, reg is ok. -let isReMaterializable = 1 in - def V_SETALLONES : PDI<0x76, MRMInitReg, (ops VR128:$dst), - "pcmpeqd $dst, $dst", - [(set VR128:$dst, (v2f64 immAllOnesV))]>; +def V_SETALLONES : PDI<0x76, MRMInitReg, (ops VR128:$dst), + "pcmpeqd $dst, $dst", + [(set VR128:$dst, (v2f64 immAllOnesV))]>; // FR64 to 128-bit vector conversion. def MOVSD2PDrr : SDI<0x10, MRMSrcReg, (ops VR128:$dst, FR64:$src), From djg at cray.com Mon Jun 18 20:48:42 2007 From: djg at cray.com (Dan Gohman) Date: Mon, 18 Jun 2007 20:48:42 -0500 Subject: [llvm-commits] CVS: llvm/lib/Target/Target.td Message-ID: <200706190148.l5J1mg8m004903@zion.cs.uiuc.edu> Changes in directory llvm/lib/Target: Target.td updated: 1.104 -> 1.105 --- Log message: Replace M_REMATERIALIZIBLE and the newly-added isOtherReMaterializableLoad with a general target hook to identify rematerializable instructions. Some instructions are only rematerializable with specific operands, such as loads from constant pools, while others are always rematerializable. This hook allows both to be identified as being rematerializable with the same mechanism. --- Diffs of the changes: (+0 -1) Target.td | 1 - 1 files changed, 1 deletion(-) Index: llvm/lib/Target/Target.td diff -u llvm/lib/Target/Target.td:1.104 llvm/lib/Target/Target.td:1.105 --- llvm/lib/Target/Target.td:1.104 Mon Jun 18 20:25:30 2007 +++ llvm/lib/Target/Target.td Mon Jun 18 20:48:04 2007 @@ -186,7 +186,6 @@ bit isConvertibleToThreeAddress = 0; // Can this 2-addr instruction promote? bit isCommutable = 0; // Is this 3 operand instruction commutable? bit isTerminator = 0; // Is this part of the terminator for a basic block? - bit isReMaterializable = 0; // Is this instruction re-materializable? bit isPredicable = 0; // Is this instruction predicable? bit hasDelaySlot = 0; // Does this instruction have an delay slot? bit usesCustomDAGSchedInserter = 0; // Pseudo instr needing special help. From djg at cray.com Mon Jun 18 20:48:41 2007 From: djg at cray.com (Dan Gohman) Date: Mon, 18 Jun 2007 20:48:41 -0500 Subject: [llvm-commits] CVS: llvm/utils/TableGen/CodeGenInstruction.h CodeGenTarget.cpp InstrInfoEmitter.cpp Message-ID: <200706190148.l5J1mfEp004893@zion.cs.uiuc.edu> Changes in directory llvm/utils/TableGen: CodeGenInstruction.h updated: 1.29 -> 1.30 CodeGenTarget.cpp updated: 1.93 -> 1.94 InstrInfoEmitter.cpp updated: 1.61 -> 1.62 --- Log message: Replace M_REMATERIALIZIBLE and the newly-added isOtherReMaterializableLoad with a general target hook to identify rematerializable instructions. Some instructions are only rematerializable with specific operands, such as loads from constant pools, while others are always rematerializable. This hook allows both to be identified as being rematerializable with the same mechanism. --- Diffs of the changes: (+0 -3) CodeGenInstruction.h | 1 - CodeGenTarget.cpp | 1 - InstrInfoEmitter.cpp | 1 - 3 files changed, 3 deletions(-) Index: llvm/utils/TableGen/CodeGenInstruction.h diff -u llvm/utils/TableGen/CodeGenInstruction.h:1.29 llvm/utils/TableGen/CodeGenInstruction.h:1.30 --- llvm/utils/TableGen/CodeGenInstruction.h:1.29 Mon Jun 18 20:26:51 2007 +++ llvm/utils/TableGen/CodeGenInstruction.h Mon Jun 18 20:48:05 2007 @@ -91,7 +91,6 @@ bool isConvertibleToThreeAddress; bool isCommutable; bool isTerminator; - bool isReMaterializable; bool hasDelaySlot; bool usesCustomDAGSchedInserter; bool hasVariableNumberOfOperands; Index: llvm/utils/TableGen/CodeGenTarget.cpp diff -u llvm/utils/TableGen/CodeGenTarget.cpp:1.93 llvm/utils/TableGen/CodeGenTarget.cpp:1.94 --- llvm/utils/TableGen/CodeGenTarget.cpp:1.93 Mon Jun 18 20:26:51 2007 +++ llvm/utils/TableGen/CodeGenTarget.cpp Mon Jun 18 20:48:05 2007 @@ -365,7 +365,6 @@ isConvertibleToThreeAddress = R->getValueAsBit("isConvertibleToThreeAddress"); isCommutable = R->getValueAsBit("isCommutable"); isTerminator = R->getValueAsBit("isTerminator"); - isReMaterializable = R->getValueAsBit("isReMaterializable"); hasDelaySlot = R->getValueAsBit("hasDelaySlot"); usesCustomDAGSchedInserter = R->getValueAsBit("usesCustomDAGSchedInserter"); hasCtrlDep = R->getValueAsBit("hasCtrlDep"); Index: llvm/utils/TableGen/InstrInfoEmitter.cpp diff -u llvm/utils/TableGen/InstrInfoEmitter.cpp:1.61 llvm/utils/TableGen/InstrInfoEmitter.cpp:1.62 --- llvm/utils/TableGen/InstrInfoEmitter.cpp:1.61 Mon Jun 18 20:26:51 2007 +++ llvm/utils/TableGen/InstrInfoEmitter.cpp Mon Jun 18 20:48:05 2007 @@ -240,7 +240,6 @@ if (Inst.isConvertibleToThreeAddress) OS << "|M_CONVERTIBLE_TO_3_ADDR"; if (Inst.isCommutable) OS << "|M_COMMUTABLE"; if (Inst.isTerminator) OS << "|M_TERMINATOR_FLAG"; - if (Inst.isReMaterializable) OS << "|M_REMATERIALIZIBLE"; if (Inst.clobbersPred) OS << "|M_CLOBBERS_PRED"; if (Inst.isNotDuplicable) OS << "|M_NOT_DUPLICABLE"; if (Inst.usesCustomDAGSchedInserter) From resistor at mac.com Mon Jun 18 22:32:03 2007 From: resistor at mac.com (Owen Anderson) Date: Mon, 18 Jun 2007 22:32:03 -0500 Subject: [llvm-commits] CVS: llvm/lib/Transforms/Scalar/GVNPRE.cpp Message-ID: <200706190332.l5J3W3ud006963@zion.cs.uiuc.edu> Changes in directory llvm/lib/Transforms/Scalar: GVNPRE.cpp updated: 1.37 -> 1.38 --- Log message: Refactor GVNPRE to use a much smart method of uniquing value sets, and centralize a lot of the value numbering information. No functionality change. --- Diffs of the changes: (+451 -273) GVNPRE.cpp | 724 +++++++++++++++++++++++++++++++++++++------------------------ 1 files changed, 451 insertions(+), 273 deletions(-) Index: llvm/lib/Transforms/Scalar/GVNPRE.cpp diff -u llvm/lib/Transforms/Scalar/GVNPRE.cpp:1.37 llvm/lib/Transforms/Scalar/GVNPRE.cpp:1.38 --- llvm/lib/Transforms/Scalar/GVNPRE.cpp:1.37 Sun Jun 17 23:42:29 2007 +++ llvm/lib/Transforms/Scalar/GVNPRE.cpp Mon Jun 18 22:31:41 2007 @@ -37,54 +37,268 @@ #include using namespace llvm; -struct ExprLT { - bool operator()(Value* left, Value* right) { - if (BinaryOperator* leftBO = dyn_cast(left)) { - if (BinaryOperator* rightBO = dyn_cast(right)) - return cmpBinaryOperator(leftBO, rightBO); - else - if (isa(right)) { - return false; - } else { - return true; +//===----------------------------------------------------------------------===// +// ValueTable Class +//===----------------------------------------------------------------------===// + +/// This class holds the mapping between values and value numbers. + +namespace { + class VISIBILITY_HIDDEN ValueTable { + public: + struct Expression { + enum ExpressionOpcode { ADD, SUB, MUL, UDIV, SDIV, FDIV, UREM, SREM, + FREM, SHL, LSHR, ASHR, AND, OR, XOR, ICMPEQ, + ICMPNE, ICMPUGT, ICMPUGE, ICMPULT, ICMPULE, + ICMPSGT, ICMPSGE, ICMPSLT, ICMPSLE, FCMPOEQ, + FCMPOGT, FCMPOGE, FCMPOLT, FCMPOLE, FCMPONE, + FCMPORD, FCMPUNO, FCMPUEQ, FCMPUGT, FCMPUGE, + FCMPULT, FCMPULE, FCMPUNE }; + + ExpressionOpcode opcode; + uint32_t leftVN; + uint32_t rightVN; + + bool operator< (const Expression& other) const { + if (opcode < other.opcode) + return true; + else if (opcode > other.opcode) + return false; + else if (leftVN < other.leftVN) + return true; + else if (leftVN > other.leftVN) + return false; + else if (rightVN < other.rightVN) + return true; + else if (rightVN > other.rightVN) + return false; + else + return false; } - } else if (CmpInst* leftCmp = dyn_cast(left)) { - if (CmpInst* rightCmp = dyn_cast(right)) - return cmpComparison(leftCmp, rightCmp); - else - return true; - } else { - if (isa(right) || isa(right)) - return false; - else - return left < right; + }; + + private: + std::map valueNumbering; + std::map expressionNumbering; + + std::set maximalExpressions; + std::set maximalValues; + + uint32_t nextValueNumber; + + Expression::ExpressionOpcode getOpcode(BinaryOperator* BO); + Expression::ExpressionOpcode getOpcode(CmpInst* C); + public: + ValueTable() { nextValueNumber = 1; } + uint32_t lookup_or_add(Value* V); + uint32_t lookup(Value* V); + void add(Value* V, uint32_t num); + void clear(); + std::set& getMaximalExpressions() { + return maximalExpressions; + + } + std::set& getMaximalValues() { return maximalValues; } + Expression create_expression(BinaryOperator* BO); + Expression create_expression(CmpInst* C); + }; +} + +ValueTable::Expression::ExpressionOpcode + ValueTable::getOpcode(BinaryOperator* BO) { + switch(BO->getOpcode()) { + case Instruction::Add: + return Expression::ADD; + case Instruction::Sub: + return Expression::SUB; + case Instruction::Mul: + return Expression::MUL; + case Instruction::UDiv: + return Expression::UDIV; + case Instruction::SDiv: + return Expression::SDIV; + case Instruction::FDiv: + return Expression::FDIV; + case Instruction::URem: + return Expression::UREM; + case Instruction::SRem: + return Expression::SREM; + case Instruction::FRem: + return Expression::FREM; + case Instruction::Shl: + return Expression::SHL; + case Instruction::LShr: + return Expression::LSHR; + case Instruction::AShr: + return Expression::ASHR; + case Instruction::And: + return Expression::AND; + case Instruction::Or: + return Expression::OR; + case Instruction::Xor: + return Expression::XOR; + + // THIS SHOULD NEVER HAPPEN + default: + assert(0 && "Binary operator with unknown opcode?"); + return Expression::ADD; + } +} + +ValueTable::Expression::ExpressionOpcode ValueTable::getOpcode(CmpInst* C) { + if (C->getOpcode() == Instruction::ICmp) { + switch (C->getPredicate()) { + case ICmpInst::ICMP_EQ: + return Expression::ICMPEQ; + case ICmpInst::ICMP_NE: + return Expression::ICMPNE; + case ICmpInst::ICMP_UGT: + return Expression::ICMPUGT; + case ICmpInst::ICMP_UGE: + return Expression::ICMPUGE; + case ICmpInst::ICMP_ULT: + return Expression::ICMPULT; + case ICmpInst::ICMP_ULE: + return Expression::ICMPULE; + case ICmpInst::ICMP_SGT: + return Expression::ICMPSGT; + case ICmpInst::ICMP_SGE: + return Expression::ICMPSGE; + case ICmpInst::ICMP_SLT: + return Expression::ICMPSLT; + case ICmpInst::ICMP_SLE: + return Expression::ICMPSLE; + + // THIS SHOULD NEVER HAPPEN + default: + assert(0 && "Comparison with unknown predicate?"); + return Expression::ICMPEQ; + } + } else { + switch (C->getPredicate()) { + case FCmpInst::FCMP_OEQ: + return Expression::FCMPOEQ; + case FCmpInst::FCMP_OGT: + return Expression::FCMPOGT; + case FCmpInst::FCMP_OGE: + return Expression::FCMPOGE; + case FCmpInst::FCMP_OLT: + return Expression::FCMPOLT; + case FCmpInst::FCMP_OLE: + return Expression::FCMPOLE; + case FCmpInst::FCMP_ONE: + return Expression::FCMPONE; + case FCmpInst::FCMP_ORD: + return Expression::FCMPORD; + case FCmpInst::FCMP_UNO: + return Expression::FCMPUNO; + case FCmpInst::FCMP_UEQ: + return Expression::FCMPUEQ; + case FCmpInst::FCMP_UGT: + return Expression::FCMPUGT; + case FCmpInst::FCMP_UGE: + return Expression::FCMPUGE; + case FCmpInst::FCMP_ULT: + return Expression::FCMPULT; + case FCmpInst::FCMP_ULE: + return Expression::FCMPULE; + case FCmpInst::FCMP_UNE: + return Expression::FCMPUNE; + + // THIS SHOULD NEVER HAPPEN + default: + assert(0 && "Comparison with unknown predicate?"); + return Expression::FCMPOEQ; } } +} + +uint32_t ValueTable::lookup_or_add(Value* V) { + maximalValues.insert(V); + + std::map::iterator VI = valueNumbering.find(V); + if (VI != valueNumbering.end()) + return VI->second; - bool cmpBinaryOperator(BinaryOperator* left, BinaryOperator* right) { - if (left->getOpcode() != right->getOpcode()) - return left->getOpcode() < right->getOpcode(); - else if ((*this)(left->getOperand(0), right->getOperand(0))) - return true; - else if ((*this)(right->getOperand(0), left->getOperand(0))) - return false; - else - return (*this)(left->getOperand(1), right->getOperand(1)); - } - bool cmpComparison(CmpInst* left, CmpInst* right) { - if (left->getOpcode() != right->getOpcode()) - return left->getOpcode() < right->getOpcode(); - else if (left->getPredicate() != right->getPredicate()) - return left->getPredicate() < right->getPredicate(); - else if ((*this)(left->getOperand(0), right->getOperand(0))) - return true; - else if ((*this)(right->getOperand(0), left->getOperand(0))) - return false; - else - return (*this)(left->getOperand(1), right->getOperand(1)); + if (BinaryOperator* BO = dyn_cast(V)) { + Expression e = create_expression(BO); + + std::map::iterator EI = expressionNumbering.find(e); + if (EI != expressionNumbering.end()) { + valueNumbering.insert(std::make_pair(V, EI->second)); + return EI->second; + } else { + expressionNumbering.insert(std::make_pair(e, nextValueNumber)); + valueNumbering.insert(std::make_pair(V, nextValueNumber)); + + return nextValueNumber++; + } + } else if (CmpInst* C = dyn_cast(V)) { + Expression e = create_expression(C); + + std::map::iterator EI = expressionNumbering.find(e); + if (EI != expressionNumbering.end()) { + valueNumbering.insert(std::make_pair(V, EI->second)); + return EI->second; + } else { + expressionNumbering.insert(std::make_pair(e, nextValueNumber)); + valueNumbering.insert(std::make_pair(V, nextValueNumber)); + + return nextValueNumber++; + } + } else { + valueNumbering.insert(std::make_pair(V, nextValueNumber)); + return nextValueNumber++; } -}; +} + +uint32_t ValueTable::lookup(Value* V) { + std::map::iterator VI = valueNumbering.find(V); + if (VI != valueNumbering.end()) + return VI->second; + else + assert(0 && "Value not numbered?"); + + return 0; +} + +void ValueTable::add(Value* V, uint32_t num) { + std::map::iterator VI = valueNumbering.find(V); + if (VI != valueNumbering.end()) + valueNumbering.erase(VI); + valueNumbering.insert(std::make_pair(V, num)); +} + +ValueTable::Expression ValueTable::create_expression(BinaryOperator* BO) { + Expression e; + + e.leftVN = lookup_or_add(BO->getOperand(0)); + e.rightVN = lookup_or_add(BO->getOperand(1)); + e.opcode = getOpcode(BO); + + maximalExpressions.insert(e); + + return e; +} + +ValueTable::Expression ValueTable::create_expression(CmpInst* C) { + Expression e; + + e.leftVN = lookup_or_add(C->getOperand(0)); + e.rightVN = lookup_or_add(C->getOperand(1)); + e.opcode = getOpcode(C); + + maximalExpressions.insert(e); + + return e; +} + +void ValueTable::clear() { + valueNumbering.clear(); + expressionNumbering.clear(); + nextValueNumber = 1; +} namespace { @@ -96,13 +310,11 @@ private: uint32_t nextValueNumber; - typedef std::map ValueTable; ValueTable VN; - std::set MS; std::vector createdExpressions; - std::map > availableOut; - std::map > anticipatedIn; + std::map > availableOut; + std::map > anticipatedIn; std::map invokeDep; virtual void getAnalysisUsage(AnalysisUsage &AU) const { @@ -114,29 +326,24 @@ // Helper fuctions // FIXME: eliminate or document these better void dump(const std::set& s) const; - void dump_unique(const std::set& s) const; - void clean(std::set& set); - bool add(Value* V, uint32_t number); - Value* find_leader(std::set& vals, - Value* v); + void dump_unique(const std::set& s) const; + void clean(std::set& set); + Value* find_leader(std::set& vals, + uint32_t v); Value* phi_translate(Value* V, BasicBlock* pred, BasicBlock* succ); - void phi_translate_set(std::set& anticIn, BasicBlock* pred, - BasicBlock* succ, std::set& out); + void phi_translate_set(std::set& anticIn, BasicBlock* pred, + BasicBlock* succ, std::set& out); - void topo_sort(std::set& set, + void topo_sort(std::set& set, std::vector& vec); // For a given block, calculate the generated expressions, temporaries, // and the AVAIL_OUT set - void CalculateAvailOut(DomTreeNode* DI, - std::set& currExps, - std::set& currPhis, - std::set& currTemps, - std::set& currAvail, - std::map > availOut); void cleanup(); void elimination(); + void val_insert(std::set& s, Value* v); + void val_replace(std::set& s, Value* v); bool dependsOnInvoke(Value* V); }; @@ -155,27 +362,30 @@ STATISTIC(NumInsertedPhis, "Number of PHI nodes inserted"); STATISTIC(NumEliminated, "Number of redundant instructions eliminated"); +Value* GVNPRE::find_leader(std::set& vals, uint32_t v) { + for (std::set::iterator I = vals.begin(), E = vals.end(); + I != E; ++I) + if (v == VN.lookup(*I)) + return *I; + + return 0; +} -bool GVNPRE::add(Value* V, uint32_t number) { - std::pair ret = VN.insert(std::make_pair(V, number)); - if (isa(V) || isa(V) || isa(V)) - MS.insert(V); - return ret.second; +void GVNPRE::val_insert(std::set& s, Value* v) { + uint32_t num = VN.lookup(v); + Value* leader = find_leader(s, num); + if (leader == 0) + s.insert(v); } -Value* GVNPRE::find_leader(std::set& vals, Value* v) { - if (!isa(v)) - return v; - - for (std::set::iterator I = vals.begin(), E = vals.end(); - I != E; ++I) { - assert(VN.find(v) != VN.end() && "Value not numbered?"); - assert(VN.find(*I) != VN.end() && "Value not numbered?"); - if (VN[v] == VN[*I]) - return *I; +void GVNPRE::val_replace(std::set& s, Value* v) { + uint32_t num = VN.lookup(v); + Value* leader = find_leader(s, num); + while (leader != 0) { + s.erase(leader); + leader = find_leader(s, num); } - - return 0; + s.insert(v); } Value* GVNPRE::phi_translate(Value* V, BasicBlock* pred, BasicBlock* succ) { @@ -183,19 +393,25 @@ return 0; if (BinaryOperator* BO = dyn_cast(V)) { - Value* newOp1 = isa(BO->getOperand(0)) - ? phi_translate( - find_leader(anticipatedIn[succ], BO->getOperand(0)), - pred, succ) - : BO->getOperand(0); + Value* newOp1 = 0; + if (isa(BO->getOperand(0))) + newOp1 = phi_translate(find_leader(anticipatedIn[succ], + VN.lookup(BO->getOperand(0))), + pred, succ); + else + newOp1 = BO->getOperand(0); + if (newOp1 == 0) return 0; - Value* newOp2 = isa(BO->getOperand(1)) - ? phi_translate( - find_leader(anticipatedIn[succ], BO->getOperand(1)), - pred, succ) - : BO->getOperand(1); + Value* newOp2 = 0; + if (isa(BO->getOperand(1))) + newOp2 = phi_translate(find_leader(anticipatedIn[succ], + VN.lookup(BO->getOperand(1))), + pred, succ); + else + newOp2 = BO->getOperand(1); + if (newOp2 == 0) return 0; @@ -204,23 +420,13 @@ newOp1, newOp2, BO->getName()+".gvnpre"); - if (add(newVal, nextValueNumber)) - nextValueNumber++; + uint32_t v = VN.lookup_or_add(newVal); - Value* leader = find_leader(availableOut[pred], newVal); + Value* leader = find_leader(availableOut[pred], v); if (leader == 0) { - DOUT << "Creating value: " << std::hex << newVal << std::dec << "\n"; createdExpressions.push_back(newVal); return newVal; } else { - ValueTable::iterator I = VN.find(newVal); - if (I->first == newVal) - VN.erase(newVal); - - std::set::iterator F = MS.find(newVal); - if (*F == newVal) - MS.erase(newVal); - delete newVal; return leader; } @@ -229,19 +435,25 @@ if (P->getParent() == succ) return P->getIncomingValueForBlock(pred); } else if (CmpInst* C = dyn_cast(V)) { - Value* newOp1 = isa(C->getOperand(0)) - ? phi_translate( - find_leader(anticipatedIn[succ], C->getOperand(0)), - pred, succ) - : C->getOperand(0); + Value* newOp1 = 0; + if (isa(C->getOperand(0))) + newOp1 = phi_translate(find_leader(anticipatedIn[succ], + VN.lookup(C->getOperand(0))), + pred, succ); + else + newOp1 = C->getOperand(0); + if (newOp1 == 0) return 0; - Value* newOp2 = isa(C->getOperand(1)) - ? phi_translate( - find_leader(anticipatedIn[succ], C->getOperand(1)), - pred, succ) - : C->getOperand(1); + Value* newOp2 = 0; + if (isa(C->getOperand(1))) + newOp2 = phi_translate(find_leader(anticipatedIn[succ], + VN.lookup(C->getOperand(1))), + pred, succ); + else + newOp2 = C->getOperand(1); + if (newOp2 == 0) return 0; @@ -251,23 +463,13 @@ newOp1, newOp2, C->getName()+".gvnpre"); - if (add(newVal, nextValueNumber)) - nextValueNumber++; + uint32_t v = VN.lookup_or_add(newVal); - Value* leader = find_leader(availableOut[pred], newVal); + Value* leader = find_leader(availableOut[pred], v); if (leader == 0) { - DOUT << "Creating value: " << std::hex << newVal << std::dec << "\n"; createdExpressions.push_back(newVal); return newVal; } else { - ValueTable::iterator I = VN.find(newVal); - if (I->first == newVal) - VN.erase(newVal); - - std::set::iterator F = MS.find(newVal); - if (*F == newVal) - MS.erase(newVal); - delete newVal; return leader; } @@ -277,10 +479,10 @@ return V; } -void GVNPRE::phi_translate_set(std::set& anticIn, +void GVNPRE::phi_translate_set(std::set& anticIn, BasicBlock* pred, BasicBlock* succ, - std::set& out) { - for (std::set::iterator I = anticIn.begin(), + std::set& out) { + for (std::set::iterator I = anticIn.begin(), E = anticIn.end(); I != E; ++I) { Value* V = phi_translate(*I, pred, succ); if (V != 0) @@ -326,7 +528,7 @@ } // Remove all expressions whose operands are not themselves in the set -void GVNPRE::clean(std::set& set) { +void GVNPRE::clean(std::set& set) { std::vector worklist; topo_sort(set, worklist); @@ -336,9 +538,9 @@ if (BinaryOperator* BO = dyn_cast(v)) { bool lhsValid = !isa(BO->getOperand(0)); if (!lhsValid) - for (std::set::iterator I = set.begin(), E = set.end(); + for (std::set::iterator I = set.begin(), E = set.end(); I != E; ++I) - if (VN[*I] == VN[BO->getOperand(0)]) { + if (VN.lookup(*I) == VN.lookup(BO->getOperand(0))) { lhsValid = true; break; } @@ -351,9 +553,9 @@ bool rhsValid = !isa(BO->getOperand(1)); if (!rhsValid) - for (std::set::iterator I = set.begin(), E = set.end(); + for (std::set::iterator I = set.begin(), E = set.end(); I != E; ++I) - if (VN[*I] == VN[BO->getOperand(1)]) { + if (VN.lookup(*I) == VN.lookup(BO->getOperand(1))) { rhsValid = true; break; } @@ -369,9 +571,9 @@ } else if (CmpInst* C = dyn_cast(v)) { bool lhsValid = !isa(C->getOperand(0)); if (!lhsValid) - for (std::set::iterator I = set.begin(), E = set.end(); + for (std::set::iterator I = set.begin(), E = set.end(); I != E; ++I) - if (VN[*I] == VN[C->getOperand(0)]) { + if (VN.lookup(*I) == VN.lookup(C->getOperand(0))) { lhsValid = true; break; } @@ -379,9 +581,9 @@ bool rhsValid = !isa(C->getOperand(1)); if (!rhsValid) - for (std::set::iterator I = set.begin(), E = set.end(); + for (std::set::iterator I = set.begin(), E = set.end(); I != E; ++I) - if (VN[*I] == VN[C->getOperand(1)]) { + if (VN.lookup(*I) == VN.lookup(C->getOperand(1))) { rhsValid = true; break; } @@ -393,29 +595,29 @@ } } -void GVNPRE::topo_sort(std::set& set, +void GVNPRE::topo_sort(std::set& set, std::vector& vec) { - std::set toErase; - for (std::set::iterator I = set.begin(), E = set.end(); + std::set toErase; + for (std::set::iterator I = set.begin(), E = set.end(); I != E; ++I) { if (BinaryOperator* BO = dyn_cast(*I)) - for (std::set::iterator SI = set.begin(); SI != E; ++SI) { - if (VN[BO->getOperand(0)] == VN[*SI] || - VN[BO->getOperand(1)] == VN[*SI]) { + for (std::set::iterator SI = set.begin(); SI != E; ++SI) { + if (VN.lookup(BO->getOperand(0)) == VN.lookup(*SI) || + VN.lookup(BO->getOperand(1)) == VN.lookup(*SI)) { toErase.insert(*SI); } } else if (CmpInst* C = dyn_cast(*I)) - for (std::set::iterator SI = set.begin(); SI != E; ++SI) { - if (VN[C->getOperand(0)] == VN[*SI] || - VN[C->getOperand(1)] == VN[*SI]) { + for (std::set::iterator SI = set.begin(); SI != E; ++SI) { + if (VN.lookup(C->getOperand(0)) == VN.lookup(*SI) || + VN.lookup(C->getOperand(1)) == VN.lookup(*SI)) { toErase.insert(*SI); } } } std::vector Q; - for (std::set::iterator I = set.begin(), E = set.end(); + for (std::set::iterator I = set.begin(), E = set.end(); I != E; ++I) { if (toErase.find(*I) == toErase.end()) Q.push_back(*I); @@ -426,8 +628,8 @@ Value* e = Q.back(); if (BinaryOperator* BO = dyn_cast(e)) { - Value* l = find_leader(set, BO->getOperand(0)); - Value* r = find_leader(set, BO->getOperand(1)); + Value* l = find_leader(set, VN.lookup(BO->getOperand(0))); + Value* r = find_leader(set, VN.lookup(BO->getOperand(1))); if (l != 0 && isa(l) && visited.find(l) == visited.end()) @@ -441,8 +643,8 @@ Q.pop_back(); } } else if (CmpInst* C = dyn_cast(e)) { - Value* l = find_leader(set, C->getOperand(0)); - Value* r = find_leader(set, C->getOperand(1)); + Value* l = find_leader(set, VN.lookup(C->getOperand(0))); + Value* r = find_leader(set, VN.lookup(C->getOperand(1))); if (l != 0 && isa(l) && visited.find(l) == visited.end()) @@ -473,7 +675,7 @@ DOUT << "}\n\n"; } -void GVNPRE::dump_unique(const std::set& s) const { +void GVNPRE::dump_unique(const std::set& s) const { DOUT << "{ "; for (std::set::iterator I = s.begin(), E = s.end(); I != E; ++I) { @@ -482,70 +684,6 @@ DOUT << "}\n\n"; } -void GVNPRE::CalculateAvailOut(DomTreeNode* DI, - std::set& currExps, - std::set& currPhis, - std::set& currTemps, - std::set& currAvail, - std::map > availOut) { - - BasicBlock* BB = DI->getBlock(); - - // A block inherits AVAIL_OUT from its dominator - if (DI->getIDom() != 0) - currAvail.insert(availOut[DI->getIDom()->getBlock()].begin(), - availOut[DI->getIDom()->getBlock()].end()); - - - for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); - BI != BE; ++BI) { - - // Handle PHI nodes... - if (PHINode* p = dyn_cast(BI)) { - if (add(p, nextValueNumber)) - nextValueNumber++; - currPhis.insert(p); - - // Handle binary ops... - } else if (BinaryOperator* BO = dyn_cast(BI)) { - Value* leftValue = BO->getOperand(0); - Value* rightValue = BO->getOperand(1); - - if (add(BO, nextValueNumber)) - nextValueNumber++; - - if (isa(leftValue)) - currExps.insert(leftValue); - if (isa(rightValue)) - currExps.insert(rightValue); - currExps.insert(BO); - - // Handle cmp ops... - } else if (CmpInst* C = dyn_cast(BI)) { - Value* leftValue = C->getOperand(0); - Value* rightValue = C->getOperand(1); - - if (add(C, nextValueNumber)) - nextValueNumber++; - - if (isa(leftValue)) - currExps.insert(leftValue); - if (isa(rightValue)) - currExps.insert(rightValue); - currExps.insert(C); - - // Handle unsupported ops - } else if (!BI->isTerminator()){ - if (add(BI, nextValueNumber)) - nextValueNumber++; - currTemps.insert(BI); - } - - if (!BI->isTerminator()) - currAvail.insert(BI); - } -} - void GVNPRE::elimination() { DOUT << "\n\nPhase 3: Elimination\n\n"; @@ -566,7 +704,7 @@ BI != BE; ++BI) { if (isa(BI) || isa(BI)) { - Value *leader = find_leader(availableOut[BB], BI); + Value *leader = find_leader(availableOut[BB], VN.lookup(BI)); if (leader != 0) if (Instruction* Instr = dyn_cast(leader)) @@ -602,13 +740,12 @@ bool GVNPRE::runOnFunction(Function &F) { VN.clear(); - MS.clear(); createdExpressions.clear(); availableOut.clear(); anticipatedIn.clear(); invokeDep.clear(); - std::map > generatedExpressions; + std::map > generatedExpressions; std::map > generatedPhis; std::map > generatedTemporaries; @@ -624,17 +761,66 @@ E = df_end(DT.getRootNode()); DI != E; ++DI) { // Get the sets to update for this block - std::set& currExps = generatedExpressions[DI->getBlock()]; + std::set& currExps = generatedExpressions[DI->getBlock()]; std::set& currPhis = generatedPhis[DI->getBlock()]; std::set& currTemps = generatedTemporaries[DI->getBlock()]; - std::set& currAvail = availableOut[DI->getBlock()]; + std::set& currAvail = availableOut[DI->getBlock()]; + + BasicBlock* BB = DI->getBlock(); + + // A block inherits AVAIL_OUT from its dominator + if (DI->getIDom() != 0) + currAvail.insert(availableOut[DI->getIDom()->getBlock()].begin(), + availableOut[DI->getIDom()->getBlock()].end()); + - CalculateAvailOut(*DI, currExps, currPhis, - currTemps, currAvail, availableOut); + for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); + BI != BE; ++BI) { + + // Handle PHI nodes... + if (PHINode* p = dyn_cast(BI)) { + VN.lookup_or_add(p); + currPhis.insert(p); + + // Handle binary ops... + } else if (BinaryOperator* BO = dyn_cast(BI)) { + Value* leftValue = BO->getOperand(0); + Value* rightValue = BO->getOperand(1); + + VN.lookup_or_add(BO); + + if (isa(leftValue)) + val_insert(currExps, leftValue); + if (isa(rightValue)) + val_insert(currExps, rightValue); + val_insert(currExps, BO); + + // Handle cmp ops... + } else if (CmpInst* C = dyn_cast(BI)) { + Value* leftValue = C->getOperand(0); + Value* rightValue = C->getOperand(1); + + VN.lookup_or_add(C); + + if (isa(leftValue)) + val_insert(currExps, leftValue); + if (isa(rightValue)) + val_insert(currExps, rightValue); + val_insert(currExps, C); + + // Handle unsupported ops + } else if (!BI->isTerminator()){ + VN.lookup_or_add(BI); + currTemps.insert(BI); + } + + if (!BI->isTerminator()) + val_insert(currAvail, BI); + } } DOUT << "Maximal Set: "; - dump_unique(MS); + dump_unique(VN.getMaximalValues()); DOUT << "\n"; // If function has no exit blocks, only perform GVN @@ -655,7 +841,7 @@ unsigned iterations = 0; while (changed) { changed = false; - std::set anticOut; + std::set anticOut; // Top-down walk of the postdominator tree for (df_iterator PDI = @@ -674,32 +860,33 @@ dump_unique(generatedExpressions[BB]); visited.insert(BB); - std::set& anticIn = anticipatedIn[BB]; - std::set old (anticIn.begin(), anticIn.end()); + std::set& anticIn = anticipatedIn[BB]; + std::set old (anticIn.begin(), anticIn.end()); if (BB->getTerminator()->getNumSuccessors() == 1) { if (visited.find(BB->getTerminator()->getSuccessor(0)) == visited.end()) - phi_translate_set(MS, BB, BB->getTerminator()->getSuccessor(0), + phi_translate_set(VN.getMaximalValues(), BB, + BB->getTerminator()->getSuccessor(0), anticOut); else - phi_translate_set(anticipatedIn[BB->getTerminator()->getSuccessor(0)], - BB, BB->getTerminator()->getSuccessor(0), - anticOut); + phi_translate_set(anticipatedIn[BB->getTerminator()->getSuccessor(0)], + BB, BB->getTerminator()->getSuccessor(0), + anticOut); } else if (BB->getTerminator()->getNumSuccessors() > 1) { BasicBlock* first = BB->getTerminator()->getSuccessor(0); anticOut.insert(anticipatedIn[first].begin(), anticipatedIn[first].end()); for (unsigned i = 1; i < BB->getTerminator()->getNumSuccessors(); ++i) { BasicBlock* currSucc = BB->getTerminator()->getSuccessor(i); - std::set& succAnticIn = anticipatedIn[currSucc]; + std::set& succAnticIn = anticipatedIn[currSucc]; - std::set temp; - std::insert_iterator > temp_ins(temp, - temp.begin()); + std::set temp; + std::insert_iterator > temp_ins(temp, + temp.begin()); std::set_intersection(anticOut.begin(), anticOut.end(), succAnticIn.begin(), succAnticIn.end(), - temp_ins, ExprLT()); + temp_ins); anticOut.clear(); anticOut.insert(temp.begin(), temp.end()); @@ -710,19 +897,25 @@ dump_unique(anticOut); DOUT << "\n"; - std::set S; - std::insert_iterator > s_ins(S, S.begin()); - std::set_union(anticOut.begin(), anticOut.end(), - generatedExpressions[BB].begin(), - generatedExpressions[BB].end(), - s_ins, ExprLT()); + std::set S; + std::insert_iterator > s_ins(S, S.begin()); + std::set_difference(anticOut.begin(), anticOut.end(), + generatedTemporaries[BB].begin(), + generatedTemporaries[BB].end(), + s_ins); anticIn.clear(); + std::insert_iterator > ai_ins(anticIn, anticIn.begin()); + std::set_difference(generatedExpressions[BB].begin(), + generatedExpressions[BB].end(), + generatedTemporaries[BB].begin(), + generatedTemporaries[BB].end(), + ai_ins); - for (std::set::iterator I = S.begin(), E = S.end(); + for (std::set::iterator I = S.begin(), E = S.end(); I != E; ++I) { - if (generatedTemporaries[BB].find(*I) == generatedTemporaries[BB].end()) - anticIn.insert(*I); + if (find_leader(anticIn, VN.lookup(*I)) == 0) + val_insert(anticIn, *I); } clean(anticIn); @@ -765,7 +958,7 @@ // Phase 2: Insert DOUT<< "\nPhase 2: Insertion\n"; - std::map > new_sets; + std::map > new_sets; unsigned i_iterations = 0; bool new_stuff = true; while (new_stuff) { @@ -778,25 +971,19 @@ if (BB == 0) continue; - std::set& new_set = new_sets[BB]; - std::set& availOut = availableOut[BB]; - std::set& anticIn = anticipatedIn[BB]; + std::set& new_set = new_sets[BB]; + std::set& availOut = availableOut[BB]; + std::set& anticIn = anticipatedIn[BB]; new_set.clear(); // Replace leaders with leaders inherited from dominator if (DI->getIDom() != 0) { - std::set& dom_set = new_sets[DI->getIDom()->getBlock()]; - for (std::set::iterator I = dom_set.begin(), + std::set& dom_set = new_sets[DI->getIDom()->getBlock()]; + for (std::set::iterator I = dom_set.begin(), E = dom_set.end(); I != E; ++I) { new_set.insert(*I); - - Value* val = find_leader(availOut, *I); - while (val != 0) { - availOut.erase(val); - val = find_leader(availOut, *I); - } - availOut.insert(*I); + val_replace(availOut, *I); } } @@ -814,7 +1001,7 @@ Value* e = workList[i]; if (isa(e) || isa(e)) { - if (find_leader(availableOut[DI->getIDom()->getBlock()], e) != 0) + if (find_leader(availableOut[DI->getIDom()->getBlock()], VN.lookup(e)) != 0) continue; std::map avail; @@ -824,7 +1011,7 @@ for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE; ++PI) { Value *e2 = phi_translate(e, *PI, BB); - Value *e3 = find_leader(availableOut[*PI], e2); + Value *e3 = find_leader(availableOut[*PI], VN.lookup(e2)); if (e3 == 0) { std::map::iterator av = avail.find(*PI); @@ -851,20 +1038,20 @@ for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE; ++PI) { Value* e2 = avail[*PI]; - if (!find_leader(availableOut[*PI], e2)) { + if (!find_leader(availableOut[*PI], VN.lookup(e2))) { User* U = cast(e2); Value* s1 = 0; if (isa(U->getOperand(0)) || isa(U->getOperand(0))) - s1 = find_leader(availableOut[*PI], U->getOperand(0)); + s1 = find_leader(availableOut[*PI], VN.lookup(U->getOperand(0))); else s1 = U->getOperand(0); Value* s2 = 0; if (isa(U->getOperand(1)) || isa(U->getOperand(1))) - s2 = find_leader(availableOut[*PI], U->getOperand(1)); + s2 = find_leader(availableOut[*PI], VN.lookup(U->getOperand(1))); else s2 = U->getOperand(1); @@ -881,15 +1068,10 @@ C->getName()+".gvnpre", (*PI)->getTerminator()); - add(newVal, VN[U]); + VN.add(newVal, VN.lookup(U)); - std::set& predAvail = availableOut[*PI]; - Value* val = find_leader(predAvail, newVal); - while (val != 0) { - predAvail.erase(val); - val = find_leader(predAvail, newVal); - } - predAvail.insert(newVal); + std::set& predAvail = availableOut[*PI]; + val_replace(predAvail, newVal); DOUT << "Creating value: " << std::hex << newVal << std::dec << "\n"; @@ -913,14 +1095,10 @@ p->addIncoming(avail[*PI], *PI); } - add(p, VN[e]); + VN.add(p, VN.lookup(e)); DOUT << "Creating value: " << std::hex << p << std::dec << "\n"; - Value* val = find_leader(availOut, p); - while (val != 0) { - availOut.erase(val); - val = find_leader(availOut, p); - } + val_replace(availOut, p); availOut.insert(p); new_stuff = true; From resistor at mac.com Mon Jun 18 23:33:18 2007 From: resistor at mac.com (Owen Anderson) Date: Mon, 18 Jun 2007 23:33:18 -0500 Subject: [llvm-commits] CVS: llvm/lib/Transforms/Scalar/GVNPRE.cpp Message-ID: <200706190433.l5J4XI4B008001@zion.cs.uiuc.edu> Changes in directory llvm/lib/Transforms/Scalar: GVNPRE.cpp updated: 1.38 -> 1.39 --- Log message: Remember to clear the maximal sets between functions. Thanks to Nicholas for valgrinding this. --- Diffs of the changes: (+3 -2) GVNPRE.cpp | 5 +++-- 1 files changed, 3 insertions(+), 2 deletions(-) Index: llvm/lib/Transforms/Scalar/GVNPRE.cpp diff -u llvm/lib/Transforms/Scalar/GVNPRE.cpp:1.38 llvm/lib/Transforms/Scalar/GVNPRE.cpp:1.39 --- llvm/lib/Transforms/Scalar/GVNPRE.cpp:1.38 Mon Jun 18 22:31:41 2007 +++ llvm/lib/Transforms/Scalar/GVNPRE.cpp Mon Jun 18 23:32:55 2007 @@ -297,6 +297,8 @@ void ValueTable::clear() { valueNumbering.clear(); expressionNumbering.clear(); + maximalExpressions.clear(); + maximalValues.clear(); nextValueNumber = 1; } @@ -306,10 +308,9 @@ bool runOnFunction(Function &F); public: static char ID; // Pass identification, replacement for typeid - GVNPRE() : FunctionPass((intptr_t)&ID) { nextValueNumber = 1; } + GVNPRE() : FunctionPass((intptr_t)&ID) { } private: - uint32_t nextValueNumber; ValueTable VN; std::vector createdExpressions; From clattner at apple.com Tue Jun 19 00:10:42 2007 From: clattner at apple.com (clattner at apple.com) Date: Mon, 18 Jun 2007 22:10:42 -0700 (PDT) Subject: [llvm-commits] [128592] Fix bugs handling the ldmxcsr and stmxcsr builtins Message-ID: <20070619051042.357CF964A49A@src> Revision: 128592 Author: clattner Date: 2007-06-18 22:10:41 -0700 (Mon, 18 Jun 2007) Log Message: ----------- Fix bugs handling the ldmxcsr and stmxcsr builtins Modified Paths: -------------- apple-local/branches/llvm/gcc/config/i386/llvm-i386.cpp Modified: apple-local/branches/llvm/gcc/config/i386/llvm-i386.cpp =================================================================== --- apple-local/branches/llvm/gcc/config/i386/llvm-i386.cpp 2007-06-19 02:22:47 UTC (rev 128591) +++ apple-local/branches/llvm/gcc/config/i386/llvm-i386.cpp 2007-06-19 05:10:41 UTC (rev 128592) @@ -590,6 +590,7 @@ Intrinsic::getDeclaration(TheModule, Intrinsic::x86_sse_ldmxcsr); Value *Ptr = CreateTemporary(Type::Int32Ty); Builder.CreateStore(Ops[0], Ptr); + Ptr = Builder.CreateBitCast(Ptr, PointerType::get(Type::Int8Ty), "tmp"); Result = Builder.CreateCall(ldmxcsr, Ptr); return true; } @@ -597,6 +598,7 @@ Function *stmxcsr = Intrinsic::getDeclaration(TheModule, Intrinsic::x86_sse_stmxcsr); Value *Ptr = CreateTemporary(Type::Int32Ty); + Ptr = Builder.CreateBitCast(Ptr, PointerType::get(Type::Int8Ty), "tmp"); Builder.CreateCall(stmxcsr, Ptr); Result = Builder.CreateLoad(Ptr, "tmp"); return true; From clattner at apple.com Tue Jun 19 00:18:26 2007 From: clattner at apple.com (Chris Lattner) Date: Mon, 18 Jun 2007 22:18:26 -0700 Subject: [llvm-commits] CVS: llvm/lib/Target/ARM/ARMInstrInfo.cpp ARMInstrInfo.h ARMInstrInfo.td ARMInstrThumb.td In-Reply-To: <200706190125.l5J1Prpx004387@zion.cs.uiuc.edu> References: <200706190125.l5J1Prpx004387@zion.cs.uiuc.edu> Message-ID: <752395EB-528C-43E2-99C5-37130575555C@apple.com> On Jun 18, 2007, at 6:25 PM, Evan Cheng wrote: > Log message: > > Replace TargetInstrInfo::CanBeDuplicated() with a M_NOT_DUPLICABLE > bit. > Thanks Evan! -Chris From clattner at apple.com Tue Jun 19 00:29:09 2007 From: clattner at apple.com (Chris Lattner) Date: Mon, 18 Jun 2007 22:29:09 -0700 Subject: [llvm-commits] CVS: llvm/lib/Target/X86/X86InstrFPStack.td X86InstrInfo.cpp X86InstrInfo.h X86InstrInfo.td X86InstrMMX.td X86InstrSSE.td In-Reply-To: <200706190148.l5J1mdkK004876@zion.cs.uiuc.edu> References: <200706190148.l5J1mdkK004876@zion.cs.uiuc.edu> Message-ID: > Replace M_REMATERIALIZIBLE and the newly-added > isOtherReMaterializableLoad > with a general target hook to identify rematerializable > instructions. Some > instructions are only rematerializable with specific operands, such > as loads ... > // Floating point constant loads. > -let isReMaterializable = 1 in { > def FpLD0 : FpI<(ops RFP:$dst), ZeroArgFP, > [(set RFP:$dst, fp64imm0)]>; > def FpLD1 : FpI<(ops RFP:$dst), ZeroArgFP, > [(set RFP:$dst, fp64imm1)]>; > -} > +bool X86InstrInfo::isTriviallyReMaterializable(MachineInstr *MI) > const { > switch (MI->getOpcode()) { > default: break; > + case X86::FpLD0: > + case X86::FpLD1: > + case X86::MOV8ri: > + case X86::MOV16ri: > + case X86::MOV32ri: > + case X86::MMX_V_SET0: > + case X86::MMX_V_SETALLONES: > + case X86::V_SET0: > + case X86::V_SETALLONES: > + // These instructions are always trivially rematerializable. > + return true; > case X86::MOV8rm: > case X86::MOV16rm: > case X86::MOV16_rm: > @@ -128,6 +139,7 @@ > case X86::MOVAPDrm: > case X86::MMX_MOVD64rm: > case X86::MMX_MOVQ64rm: > + // Loads from constant pools are trivially rematerializable. > return MI->getOperand(1).isRegister() && MI->getOperand > (2).isImmediate() && > MI->getOperand(3).isRegister() && MI->getOperand > (4).isConstantPoolIndex() && > MI->getOperand(1).getReg() == 0 && Hi Dan, I'm sorry to be such a pain, but this seems like a step backward. We've gone from having an explicit flag in the .td files to having another magic table in the .cpp file (somewhat amusing because 'duplicable' just made the opposite transition). How about this proposal (Obviously feel free to pick better names for these things): 1. Reintroduce the 'isremat-able' flag, set it to true for all the instructions that are *potentially* rematerializable. 2. Add a virtual target hook that can override the flag: "TII::isReallyRematerializable(Machineinstr*)". 3. Introduce a new non-virtual method: bool TII::isRematerializable(Machineinstr *MI) { return MI->flags->isrematable && isReallyRematerializable(MI); } This achieves two things: 1. Just looking at the .td file, you can tell which instructions are candidates for remat. 2. The isRematerializable predicate is faster for instructions that are not remat-able. 3. The isReallyRematerializable only needs to be implemented by targets with instructions that are remat-able only in some cases (like the x86 instructions). To me, #1 is the killer feature. In general, I'd like to move away from having tables (either explicit, like the one in X86RegisterInfo::foldMemoryOperand, or just big switch stmts) to having properties on .td file entries. That makes it much more clear what is going on when inspecting the .td files. I'm sorry I didn't look at your patch when you asked for comments, but does this proposal sound sane? -Chris From resistor at mac.com Tue Jun 19 00:37:55 2007 From: resistor at mac.com (Owen Anderson) Date: Tue, 19 Jun 2007 00:37:55 -0500 Subject: [llvm-commits] CVS: llvm/lib/Transforms/Scalar/GVNPRE.cpp Message-ID: <200706190537.l5J5btd9009398@zion.cs.uiuc.edu> Changes in directory llvm/lib/Transforms/Scalar: GVNPRE.cpp updated: 1.39 -> 1.40 --- Log message: Be careful to erase values from all of the appropriate sets when they're not needed anymore. This fixes a few more memory-related issues. --- Diffs of the changes: (+14 -2) GVNPRE.cpp | 16 ++++++++++++++-- 1 files changed, 14 insertions(+), 2 deletions(-) Index: llvm/lib/Transforms/Scalar/GVNPRE.cpp diff -u llvm/lib/Transforms/Scalar/GVNPRE.cpp:1.39 llvm/lib/Transforms/Scalar/GVNPRE.cpp:1.40 --- llvm/lib/Transforms/Scalar/GVNPRE.cpp:1.39 Mon Jun 18 23:32:55 2007 +++ llvm/lib/Transforms/Scalar/GVNPRE.cpp Tue Jun 19 00:37:32 2007 @@ -101,6 +101,7 @@ std::set& getMaximalValues() { return maximalValues; } Expression create_expression(BinaryOperator* BO); Expression create_expression(CmpInst* C); + void erase(Value* v); }; } @@ -302,6 +303,15 @@ nextValueNumber = 1; } +void ValueTable::erase(Value* V) { + maximalValues.erase(V); + valueNumbering.erase(V); + if (BinaryOperator* BO = dyn_cast(V)) + maximalExpressions.erase(create_expression(BO)); + else if (CmpInst* C = dyn_cast(V)) + maximalExpressions.erase(create_expression(C)); +} + namespace { class VISIBILITY_HIDDEN GVNPRE : public FunctionPass { @@ -419,7 +429,7 @@ if (newOp1 != BO->getOperand(0) || newOp2 != BO->getOperand(1)) { Instruction* newVal = BinaryOperator::create(BO->getOpcode(), newOp1, newOp2, - BO->getName()+".gvnpre"); + BO->getName()+".expr"); uint32_t v = VN.lookup_or_add(newVal); @@ -428,6 +438,7 @@ createdExpressions.push_back(newVal); return newVal; } else { + VN.erase(newVal); delete newVal; return leader; } @@ -462,7 +473,7 @@ Instruction* newVal = CmpInst::create(C->getOpcode(), C->getPredicate(), newOp1, newOp2, - C->getName()+".gvnpre"); + C->getName()+".expr"); uint32_t v = VN.lookup_or_add(newVal); @@ -471,6 +482,7 @@ createdExpressions.push_back(newVal); return newVal; } else { + VN.erase(newVal); delete newVal; return leader; } From resistor at mac.com Tue Jun 19 00:41:44 2007 From: resistor at mac.com (Owen Anderson) Date: Tue, 19 Jun 2007 00:41:44 -0500 Subject: [llvm-commits] CVS: llvm/test/Transforms/GVNPRE/2007-06-18-EraseNumbering.ll Message-ID: <200706190541.l5J5fibn009482@zion.cs.uiuc.edu> Changes in directory llvm/test/Transforms/GVNPRE: 2007-06-18-EraseNumbering.ll added (r1.1) --- Log message: Add a new testcase for memory corruption issues. --- Diffs of the changes: (+26 -0) 2007-06-18-EraseNumbering.ll | 26 ++++++++++++++++++++++++++ 1 files changed, 26 insertions(+) Index: llvm/test/Transforms/GVNPRE/2007-06-18-EraseNumbering.ll diff -c /dev/null llvm/test/Transforms/GVNPRE/2007-06-18-EraseNumbering.ll:1.1 *** /dev/null Tue Jun 19 00:41:32 2007 --- llvm/test/Transforms/GVNPRE/2007-06-18-EraseNumbering.ll Tue Jun 19 00:41:22 2007 *************** *** 0 **** --- 1,26 ---- + ; RUN: llvm-as < %s | opt -gvnpre | llvm-dis + + define i32 @TreeCCStreamFlush(i8* %stream) { + entry: + br i1 false, label %bb55.preheader, label %cond_true + + cond_true: ; preds = %entry + ret i32 0 + + bb55.preheader: ; preds = %entry + %tmp57 = icmp eq i8* null, null ; [#uses=0] + br i1 false, label %cond_next106, label %bb124 + + cond_next106: ; preds = %bb55.preheader + %tmp109 = load i8** null + br i1 false, label %bb124, label %bb116 + + bb116: ; preds = %cond_next106 + ret i32 0 + + bb124: ; preds = %cond_next106, %bb55.preheader + %buffer.4 = phi i8* [ null, %bb55.preheader ], [ %tmp109, %cond_next106 ] + %tmp131 = icmp eq i8* %buffer.4, null + %bothcond = or i1 %tmp131, false + ret i32 0 + } From sabre at nondot.org Tue Jun 19 00:44:11 2007 From: sabre at nondot.org (Chris Lattner) Date: Tue, 19 Jun 2007 00:44:11 -0500 Subject: [llvm-commits] CVS: llvm/lib/Transforms/Scalar/InstructionCombining.cpp Message-ID: <200706190544.l5J5iB5Q009551@zion.cs.uiuc.edu> Changes in directory llvm/lib/Transforms/Scalar: InstructionCombining.cpp updated: 1.772 -> 1.773 --- Log message: silence a bogus warning Duraid ran into. --- Diffs of the changes: (+1 -1) InstructionCombining.cpp | 2 +- 1 files changed, 1 insertion(+), 1 deletion(-) Index: llvm/lib/Transforms/Scalar/InstructionCombining.cpp diff -u llvm/lib/Transforms/Scalar/InstructionCombining.cpp:1.772 llvm/lib/Transforms/Scalar/InstructionCombining.cpp:1.773 --- llvm/lib/Transforms/Scalar/InstructionCombining.cpp:1.772 Fri Jun 15 01:23:19 2007 +++ llvm/lib/Transforms/Scalar/InstructionCombining.cpp Tue Jun 19 00:43:49 2007 @@ -3808,7 +3808,7 @@ } // (A & C)|(B & D) - Value *C, *D; + Value *C = 0, *D = 0; if (match(Op0, m_And(m_Value(A), m_Value(C))) && match(Op1, m_And(m_Value(B), m_Value(D)))) { Value *V1 = 0, *V2 = 0, *V3 = 0; From sabre at nondot.org Tue Jun 19 00:46:28 2007 From: sabre at nondot.org (Chris Lattner) Date: Tue, 19 Jun 2007 00:46:28 -0500 Subject: [llvm-commits] CVS: llvm/lib/Target/PowerPC/PPCISelLowering.cpp Message-ID: <200706190546.l5J5kSo2009637@zion.cs.uiuc.edu> Changes in directory llvm/lib/Target/PowerPC: PPCISelLowering.cpp updated: 1.273 -> 1.274 --- Log message: describe an argument, hide it. --- Diffs of the changes: (+3 -1) PPCISelLowering.cpp | 4 +++- 1 files changed, 3 insertions(+), 1 deletion(-) Index: llvm/lib/Target/PowerPC/PPCISelLowering.cpp diff -u llvm/lib/Target/PowerPC/PPCISelLowering.cpp:1.273 llvm/lib/Target/PowerPC/PPCISelLowering.cpp:1.274 --- llvm/lib/Target/PowerPC/PPCISelLowering.cpp:1.273 Mon Jun 18 19:13:10 2007 +++ llvm/lib/Target/PowerPC/PPCISelLowering.cpp Tue Jun 19 00:46:06 2007 @@ -32,7 +32,9 @@ #include "llvm/Support/CommandLine.h" using namespace llvm; -static cl::opt EnablePPCPreinc("enable-ppc-preinc"); +static cl::opt EnablePPCPreinc("enable-ppc-preinc", +cl::desc("enable preincrement load/store generation on PPC (experimental)"), + cl::Hidden); PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) : TargetLowering(TM), PPCSubTarget(*TM.getSubtargetImpl()) { From sabre at nondot.org Tue Jun 19 00:48:11 2007 From: sabre at nondot.org (Chris Lattner) Date: Tue, 19 Jun 2007 00:48:11 -0500 Subject: [llvm-commits] CVS: llvm/lib/CodeGen/LLVMTargetMachine.cpp Message-ID: <200706190548.l5J5mBmF009703@zion.cs.uiuc.edu> Changes in directory llvm/lib/CodeGen: LLVMTargetMachine.cpp updated: 1.14 -> 1.15 --- Log message: document and hide two options. --- Diffs of the changes: (+5 -2) LLVMTargetMachine.cpp | 7 +++++-- 1 files changed, 5 insertions(+), 2 deletions(-) Index: llvm/lib/CodeGen/LLVMTargetMachine.cpp diff -u llvm/lib/CodeGen/LLVMTargetMachine.cpp:1.14 llvm/lib/CodeGen/LLVMTargetMachine.cpp:1.15 --- llvm/lib/CodeGen/LLVMTargetMachine.cpp:1.14 Tue May 22 13:31:04 2007 +++ llvm/lib/CodeGen/LLVMTargetMachine.cpp Tue Jun 19 00:47:49 2007 @@ -22,8 +22,11 @@ #include "llvm/Support/CommandLine.h" using namespace llvm; -static cl::opt PrintLSR("print-lsr-output"); -static cl::opt PrintISelInput("print-isel-input"); +static cl::opt PrintLSR("print-lsr-output", cl::Hidden, + cl::desc("Print LLVM IR produced by the loop-reduce pass")); +static cl::opt PrintISelInput("print-isel-input", cl::Hidden, + cl::desc("Print LLVM IR input to isel pass")); + FileModel::Model LLVMTargetMachine::addPassesToEmitFile(FunctionPassManager &PM, std::ostream &Out, From sabre at nondot.org Tue Jun 19 00:52:58 2007 From: sabre at nondot.org (Chris Lattner) Date: Tue, 19 Jun 2007 00:52:58 -0500 Subject: [llvm-commits] CVS: llvm/docs/GettingStarted.html Message-ID: <200706190552.l5J5qwCA009846@zion.cs.uiuc.edu> Changes in directory llvm/docs: GettingStarted.html updated: 1.162 -> 1.163 --- Log message: don't use binutils 2.17 --- Diffs of the changes: (+9 -3) GettingStarted.html | 12 +++++++++--- 1 files changed, 9 insertions(+), 3 deletions(-) Index: llvm/docs/GettingStarted.html diff -u llvm/docs/GettingStarted.html:1.162 llvm/docs/GettingStarted.html:1.163 --- llvm/docs/GettingStarted.html:1.162 Tue May 22 01:12:51 2007 +++ llvm/docs/GettingStarted.html Tue Jun 19 00:52:36 2007 @@ -19,7 +19,7 @@
  1. Hardware
  2. Software -
  3. Broken versions of GCC +
  4. Broken versions of GCC and other tools
  • Getting Started with LLVM @@ -491,7 +491,7 @@
    @@ -553,6 +553,12 @@ defined in a discarded section. You can safely ignore these messages as they are erroneous and the linkage is correct. These messages disappear using ld 2.17.

    + +

    GNU binutils 2.17: Binutils 2.17 contains a bug which +causes huge link times (minutes instead of seconds) when building LLVM. We +recommend upgrading to a newer version (2.17.50.0.4 or later).

    +
    @@ -1623,7 +1629,7 @@ Chris Lattner
    Reid Spencer
    The LLVM Compiler Infrastructure
    - Last modified: $Date: 2007/05/22 06:12:51 $ + Last modified: $Date: 2007/06/19 05:52:36 $ From resistor at mac.com Tue Jun 19 00:55:23 2007 From: resistor at mac.com (Owen Anderson) Date: Tue, 19 Jun 2007 00:55:23 -0500 Subject: [llvm-commits] CVS: llvm/test/Transforms/GVNPRE/2007-06-18-ConstantInPhi.ll Message-ID: <200706190555.l5J5tNBS009906@zion.cs.uiuc.edu> Changes in directory llvm/test/Transforms/GVNPRE: 2007-06-18-ConstantInPhi.ll added (r1.1) --- Log message: Testcase for instances where a constant only occurs as an operand to a phi node. --- Diffs of the changes: (+22 -0) 2007-06-18-ConstantInPhi.ll | 22 ++++++++++++++++++++++ 1 files changed, 22 insertions(+) Index: llvm/test/Transforms/GVNPRE/2007-06-18-ConstantInPhi.ll diff -c /dev/null llvm/test/Transforms/GVNPRE/2007-06-18-ConstantInPhi.ll:1.1 *** /dev/null Tue Jun 19 00:55:11 2007 --- llvm/test/Transforms/GVNPRE/2007-06-18-ConstantInPhi.ll Tue Jun 19 00:55:01 2007 *************** *** 0 **** --- 1,22 ---- + ; RUN: llvm-as < %s | opt -gvnpre | llvm-dis + + define void @vorbis_lsp_to_curve() { + entry: + br i1 false, label %bb.nph, label %return + + bb.nph: ; preds = %entry + br label %bb34 + + bb34: ; preds = %bb34, %bb.nph + %p.1 = phi float [ 0x3FE6A09E60000000, %bb.nph ], [ %tmp48, %bb34 ] ; [#uses=1] + %tmp44 = load float* null ; [#uses=1] + %tmp46 = sub float %tmp44, 0.000000e+00 ; [#uses=1] + %tmp48 = mul float %tmp46, %p.1 ; [#uses=1] + br i1 false, label %bb57, label %bb34 + + bb57: ; preds = %bb34 + ret void + + return: ; preds = %entry + ret void + } From sabre at nondot.org Tue Jun 19 01:41:08 2007 From: sabre at nondot.org (Chris Lattner) Date: Tue, 19 Jun 2007 01:41:08 -0500 Subject: [llvm-commits] CVS: llvm/utils/TableGen/DAGISelEmitter.cpp Message-ID: <200706190641.l5J6f8ww010966@zion.cs.uiuc.edu> Changes in directory llvm/utils/TableGen: DAGISelEmitter.cpp updated: 1.297 -> 1.298 --- Log message: silence warning when assertions are disabled. --- Diffs of the changes: (+1 -2) DAGISelEmitter.cpp | 3 +-- 1 files changed, 1 insertion(+), 2 deletions(-) Index: llvm/utils/TableGen/DAGISelEmitter.cpp diff -u llvm/utils/TableGen/DAGISelEmitter.cpp:1.297 llvm/utils/TableGen/DAGISelEmitter.cpp:1.298 --- llvm/utils/TableGen/DAGISelEmitter.cpp:1.297 Mon May 14 20:36:44 2007 +++ llvm/utils/TableGen/DAGISelEmitter.cpp Tue Jun 19 01:40:46 2007 @@ -1502,8 +1502,7 @@ unsigned NumResults = InstResults.size(); // Parse the operands list from the (ops) list, validating it. - std::vector &Args = I->getArgList(); - assert(Args.empty() && "Args list should still be empty here!"); + assert(I->getArgList().empty() && "Args list should still be empty here!"); CodeGenInstruction &CGI = Target.getInstruction(Instrs[i]->getName()); // Check that all of the results occur first in the list. From evan.cheng at apple.com Tue Jun 19 01:59:44 2007 From: evan.cheng at apple.com (Evan Cheng) Date: Mon, 18 Jun 2007 23:59:44 -0700 Subject: [llvm-commits] CVS: llvm/lib/Target/X86/X86InstrFPStack.td X86InstrInfo.cpp X86InstrInfo.h X86InstrInfo.td X86InstrMMX.td X86InstrSSE.td In-Reply-To: References: <200706190148.l5J1mdkK004876@zion.cs.uiuc.edu> Message-ID: <80053877-EE88-4786-AB58-DF5C70D971A6@apple.com> On Jun 18, 2007, at 10:29 PM, Chris Lattner wrote: > > Hi Dan, > > I'm sorry to be such a pain, but this seems like a step backward. > We've gone from having an explicit flag in the .td files to having > another magic table in the .cpp file (somewhat amusing because > 'duplicable' just made the opposite transition). > > How about this proposal (Obviously feel free to pick better names for > these things): > > 1. Reintroduce the 'isremat-able' flag, set it to true for all the > instructions that are *potentially* rematerializable. > 2. Add a virtual target hook that can override the flag: > "TII::isReallyRematerializable(Machineinstr*)". > 3. Introduce a new non-virtual method: > bool TII::isRematerializable(Machineinstr *MI) { > return MI->flags->isrematable && isReallyRematerializable(MI); > } > > This achieves two things: > > 1. Just looking at the .td file, you can tell which instructions are > candidates for remat. > 2. The isRematerializable predicate is faster for instructions that > are not remat-able. > 3. The isReallyRematerializable only needs to be implemented by > targets with instructions that are remat-able only in some cases > (like the x86 instructions). I okay'd Dan patch after considering the trade-offs. To me this gets rid of the duplicate instructions so it's worth it. Not to mention I had already considered the "trivial rematerialization" scheme to be temporary. If we are really concerned about the speed, then I agree the hybrid approach is the best. Sorry about the confusion. Evan > > To me, #1 is the killer feature. In general, I'd like to move away > from having tables (either explicit, like the one in > X86RegisterInfo::foldMemoryOperand, or just big switch stmts) to > having properties on .td file entries. That makes it much more clear > what is going on when inspecting the .td files. > > I'm sorry I didn't look at your patch when you asked for comments, > but does this proposal sound sane? > > -Chris > _______________________________________________ > llvm-commits mailing list > llvm-commits at cs.uiuc.edu > http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits From resistor at mac.com Tue Jun 19 02:36:46 2007 From: resistor at mac.com (Owen Anderson) Date: Tue, 19 Jun 2007 02:36:46 -0500 Subject: [llvm-commits] CVS: llvm/lib/Transforms/Scalar/GVNPRE.cpp Message-ID: <200706190736.l5J7akAZ021331@zion.cs.uiuc.edu> Changes in directory llvm/lib/Transforms/Scalar: GVNPRE.cpp updated: 1.40 -> 1.41 --- Log message: Handle constants in phi nodes properly. This fixes test/Transforms/GVNPRE/2007-06-18-ConstantInPhi.ll --- Diffs of the changes: (+10 -1) GVNPRE.cpp | 11 ++++++++++- 1 files changed, 10 insertions(+), 1 deletion(-) Index: llvm/lib/Transforms/Scalar/GVNPRE.cpp diff -u llvm/lib/Transforms/Scalar/GVNPRE.cpp:1.40 llvm/lib/Transforms/Scalar/GVNPRE.cpp:1.41 --- llvm/lib/Transforms/Scalar/GVNPRE.cpp:1.40 Tue Jun 19 00:37:32 2007 +++ llvm/lib/Transforms/Scalar/GVNPRE.cpp Tue Jun 19 02:35:36 2007 @@ -927,7 +927,16 @@ for (std::set::iterator I = S.begin(), E = S.end(); I != E; ++I) { - if (find_leader(anticIn, VN.lookup(*I)) == 0) + // For non-opaque values, we should already have a value numbering. + // However, for opaques, such as constants within PHI nodes, it is + // possible that they have not yet received a number. Make sure they do + // so now. + uint32_t valNum = 0; + if (isa(*I) || isa(*I)) + valNum = VN.lookup(*I); + else + valNum = VN.lookup_or_add(*I); + if (find_leader(anticIn, valNum) == 0) val_insert(anticIn, *I); } From djg at cray.com Tue Jun 19 09:14:19 2007 From: djg at cray.com (Dan Gohman) Date: Tue, 19 Jun 2007 09:14:19 -0500 Subject: [llvm-commits] CVS: llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp ScheduleDAG.cpp SelectionDAG.cpp Message-ID: <200706191414.l5JEEJcQ030298@zion.cs.uiuc.edu> Changes in directory llvm/lib/CodeGen/SelectionDAG: DAGCombiner.cpp updated: 1.312 -> 1.313 ScheduleDAG.cpp updated: 1.125 -> 1.126 SelectionDAG.cpp updated: 1.407 -> 1.408 --- Log message: Pass a SelectionDAG into SDNode::dump everywhere it's used, in prepration for needing the DAG node to print pre-legalize extended value types, and to get better debug messages with target-specific nodes. --- Diffs of the changes: (+7 -7) DAGCombiner.cpp | 10 +++++----- ScheduleDAG.cpp | 2 +- SelectionDAG.cpp | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) Index: llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp diff -u llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp:1.312 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp:1.313 --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp:1.312 Thu Jun 14 17:58:02 2007 +++ llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Tue Jun 19 09:13:56 2007 @@ -113,7 +113,7 @@ bool AddTo = true) { assert(N->getNumValues() == NumTo && "Broken CombineTo call!"); ++NodesCombined; - DOUT << "\nReplacing.1 "; DEBUG(N->dump()); + DOUT << "\nReplacing.1 "; DEBUG(N->dump(&DAG)); DOUT << "\nWith: "; DEBUG(To[0].Val->dump(&DAG)); DOUT << " and " << NumTo-1 << " other values\n"; std::vector NowDead; @@ -164,7 +164,7 @@ // Replace the old value with the new one. ++NodesCombined; - DOUT << "\nReplacing.2 "; DEBUG(TLO.Old.Val->dump()); + DOUT << "\nReplacing.2 "; DEBUG(TLO.Old.Val->dump(&DAG)); DOUT << "\nWith: "; DEBUG(TLO.New.Val->dump(&DAG)); DOUT << '\n'; @@ -592,7 +592,7 @@ RV.Val->getOpcode() != ISD::DELETED_NODE && "Node was deleted but visit returned new node!"); - DOUT << "\nReplacing.3 "; DEBUG(N->dump()); + DOUT << "\nReplacing.3 "; DEBUG(N->dump(&DAG)); DOUT << "\nWith: "; DEBUG(RV.Val->dump(&DAG)); DOUT << '\n'; std::vector NowDead; @@ -3314,7 +3314,7 @@ Result = DAG.getIndexedStore(SDOperand(N,0), BasePtr, Offset, AM); ++PreIndexedNodes; ++NodesCombined; - DOUT << "\nReplacing.4 "; DEBUG(N->dump()); + DOUT << "\nReplacing.4 "; DEBUG(N->dump(&DAG)); DOUT << "\nWith: "; DEBUG(Result.Val->dump(&DAG)); DOUT << '\n'; std::vector NowDead; @@ -3445,7 +3445,7 @@ : DAG.getIndexedStore(SDOperand(N,0), BasePtr, Offset, AM); ++PostIndexedNodes; ++NodesCombined; - DOUT << "\nReplacing.5 "; DEBUG(N->dump()); + DOUT << "\nReplacing.5 "; DEBUG(N->dump(&DAG)); DOUT << "\nWith: "; DEBUG(Result.Val->dump(&DAG)); DOUT << '\n'; std::vector NowDead; Index: llvm/lib/CodeGen/SelectionDAG/ScheduleDAG.cpp diff -u llvm/lib/CodeGen/SelectionDAG/ScheduleDAG.cpp:1.125 llvm/lib/CodeGen/SelectionDAG/ScheduleDAG.cpp:1.126 --- llvm/lib/CodeGen/SelectionDAG/ScheduleDAG.cpp:1.125 Tue Mar 20 15:43:18 2007 +++ llvm/lib/CodeGen/SelectionDAG/ScheduleDAG.cpp Tue Jun 19 09:13:56 2007 @@ -476,7 +476,7 @@ switch (Node->getOpcode()) { default: #ifndef NDEBUG - Node->dump(); + Node->dump(&DAG); #endif assert(0 && "This target-independent node should have been selected!"); case ISD::EntryToken: // fall thru Index: llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp diff -u llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp:1.407 llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp:1.408 --- llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp:1.407 Thu Jun 14 17:58:02 2007 +++ llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp Tue Jun 19 09:13:56 2007 @@ -512,7 +512,7 @@ // not subject to CSE. if (!Erased && N->getValueType(N->getNumValues()-1) != MVT::Flag && !N->isTargetOpcode()) { - N->dump(); + N->dump(this); cerr << "\n"; assert(0 && "Node is not in map!"); } From djg at cray.com Tue Jun 19 09:14:25 2007 From: djg at cray.com (Dan Gohman) Date: Tue, 19 Jun 2007 09:14:25 -0500 Subject: [llvm-commits] CVS: llvm/lib/Target/Alpha/AlphaISelDAGToDAG.cpp Message-ID: <200706191414.l5JEEPGg030304@zion.cs.uiuc.edu> Changes in directory llvm/lib/Target/Alpha: AlphaISelDAGToDAG.cpp updated: 1.69 -> 1.70 --- Log message: Pass a SelectionDAG into SDNode::dump everywhere it's used, in prepration for needing the DAG node to print pre-legalize extended value types, and to get better debug messages with target-specific nodes. --- Diffs of the changes: (+1 -1) AlphaISelDAGToDAG.cpp | 2 +- 1 files changed, 1 insertion(+), 1 deletion(-) Index: llvm/lib/Target/Alpha/AlphaISelDAGToDAG.cpp diff -u llvm/lib/Target/Alpha/AlphaISelDAGToDAG.cpp:1.69 llvm/lib/Target/Alpha/AlphaISelDAGToDAG.cpp:1.70 --- llvm/lib/Target/Alpha/AlphaISelDAGToDAG.cpp:1.69 Mon Apr 16 13:10:22 2007 +++ llvm/lib/Target/Alpha/AlphaISelDAGToDAG.cpp Tue Jun 19 09:13:56 2007 @@ -356,7 +356,7 @@ bool rev = false; bool inv = false; switch(CC) { - default: DEBUG(N->dump()); assert(0 && "Unknown FP comparison!"); + default: DEBUG(N->dump(CurDAG)); assert(0 && "Unknown FP comparison!"); case ISD::SETEQ: case ISD::SETOEQ: case ISD::SETUEQ: Opc = Alpha::CMPTEQ; break; case ISD::SETLT: case ISD::SETOLT: case ISD::SETULT: From djg at cray.com Tue Jun 19 09:14:25 2007 From: djg at cray.com (Dan Gohman) Date: Tue, 19 Jun 2007 09:14:25 -0500 Subject: [llvm-commits] CVS: llvm/lib/Target/IA64/IA64ISelDAGToDAG.cpp Message-ID: <200706191414.l5JEEPwJ030309@zion.cs.uiuc.edu> Changes in directory llvm/lib/Target/IA64: IA64ISelDAGToDAG.cpp updated: 1.65 -> 1.66 --- Log message: Pass a SelectionDAG into SDNode::dump everywhere it's used, in prepration for needing the DAG node to print pre-legalize extended value types, and to get better debug messages with target-specific nodes. --- Diffs of the changes: (+1 -1) IA64ISelDAGToDAG.cpp | 2 +- 1 files changed, 1 insertion(+), 1 deletion(-) Index: llvm/lib/Target/IA64/IA64ISelDAGToDAG.cpp diff -u llvm/lib/Target/IA64/IA64ISelDAGToDAG.cpp:1.65 llvm/lib/Target/IA64/IA64ISelDAGToDAG.cpp:1.66 --- llvm/lib/Target/IA64/IA64ISelDAGToDAG.cpp:1.65 Mon Apr 16 13:10:23 2007 +++ llvm/lib/Target/IA64/IA64ISelDAGToDAG.cpp Tue Jun 19 09:13:56 2007 @@ -470,7 +470,7 @@ switch (TypeBeingLoaded) { default: #ifndef NDEBUG - N->dump(); + N->dump(CurDAG); #endif assert(0 && "Cannot load this type!"); case MVT::i1: { // this is a bool From djg at cray.com Tue Jun 19 09:28:53 2007 From: djg at cray.com (Dan Gohman) Date: Tue, 19 Jun 2007 09:28:53 -0500 Subject: [llvm-commits] CVS: llvm/lib/Analysis/ScalarEvolution.cpp Message-ID: <200706191428.l5JESrCv030820@zion.cs.uiuc.edu> Changes in directory llvm/lib/Analysis: ScalarEvolution.cpp updated: 1.120 -> 1.121 --- Log message: Rename ScalarEvolution::deleteInstructionFromRecords to deleteValueFromRecords and loosen the types to all it to accept Value* instead of just Instruction*, since this is what ScalarEvolution uses internally anyway. This allows more flexibility for future uses. --- Diffs of the changes: (+14 -14) ScalarEvolution.cpp | 28 ++++++++++++++-------------- 1 files changed, 14 insertions(+), 14 deletions(-) Index: llvm/lib/Analysis/ScalarEvolution.cpp diff -u llvm/lib/Analysis/ScalarEvolution.cpp:1.120 llvm/lib/Analysis/ScalarEvolution.cpp:1.121 --- llvm/lib/Analysis/ScalarEvolution.cpp:1.120 Mon Jun 18 14:30:09 2007 +++ llvm/lib/Analysis/ScalarEvolution.cpp Tue Jun 19 09:28:31 2007 @@ -1167,10 +1167,10 @@ /// loop without a loop-invariant iteration count. SCEVHandle getIterationCount(const Loop *L); - /// deleteInstructionFromRecords - This method should be called by the - /// client before it removes an instruction from the program, to make sure + /// deleteValueFromRecords - This method should be called by the + /// client before it removes a value from the program, to make sure /// that no dangling references are left around. - void deleteInstructionFromRecords(Instruction *I); + void deleteValueFromRecords(Value *V); private: /// createSCEV - We know that there is no SCEV for the specified value. @@ -1236,27 +1236,27 @@ // Basic SCEV Analysis and PHI Idiom Recognition Code // -/// deleteInstructionFromRecords - This method should be called by the +/// deleteValueFromRecords - This method should be called by the /// client before it removes an instruction from the program, to make sure /// that no dangling references are left around. -void ScalarEvolutionsImpl::deleteInstructionFromRecords(Instruction *I) { - SmallVector Worklist; +void ScalarEvolutionsImpl::deleteValueFromRecords(Value *V) { + SmallVector Worklist; - if (Scalars.erase(I)) { - if (PHINode *PN = dyn_cast(I)) + if (Scalars.erase(V)) { + if (PHINode *PN = dyn_cast(V)) ConstantEvolutionLoopExitValue.erase(PN); - Worklist.push_back(I); + Worklist.push_back(V); } while (!Worklist.empty()) { - Instruction *II = Worklist.back(); + Value *VV = Worklist.back(); Worklist.pop_back(); - for (Instruction::use_iterator UI = II->use_begin(), UE = II->use_end(); + for (Instruction::use_iterator UI = VV->use_begin(), UE = VV->use_end(); UI != UE; ++UI) { Instruction *Inst = cast(*UI); if (Scalars.erase(Inst)) { - if (PHINode *PN = dyn_cast(II)) + if (PHINode *PN = dyn_cast(VV)) ConstantEvolutionLoopExitValue.erase(PN); Worklist.push_back(Inst); } @@ -2593,8 +2593,8 @@ return ((ScalarEvolutionsImpl*)Impl)->getSCEVAtScope(getSCEV(V), L); } -void ScalarEvolution::deleteInstructionFromRecords(Instruction *I) const { - return ((ScalarEvolutionsImpl*)Impl)->deleteInstructionFromRecords(I); +void ScalarEvolution::deleteValueFromRecords(Value *V) const { + return ((ScalarEvolutionsImpl*)Impl)->deleteValueFromRecords(V); } static void PrintLoopInfo(std::ostream &OS, const ScalarEvolution *SE, From djg at cray.com Tue Jun 19 09:28:58 2007 From: djg at cray.com (Dan Gohman) Date: Tue, 19 Jun 2007 09:28:58 -0500 Subject: [llvm-commits] CVS: llvm/include/llvm/Analysis/ScalarEvolution.h Message-ID: <200706191428.l5JESwkL030827@zion.cs.uiuc.edu> Changes in directory llvm/include/llvm/Analysis: ScalarEvolution.h updated: 1.21 -> 1.22 --- Log message: Rename ScalarEvolution::deleteInstructionFromRecords to deleteValueFromRecords and loosen the types to all it to accept Value* instead of just Instruction*, since this is what ScalarEvolution uses internally anyway. This allows more flexibility for future uses. --- Diffs of the changes: (+3 -3) ScalarEvolution.h | 6 +++--- 1 files changed, 3 insertions(+), 3 deletions(-) Index: llvm/include/llvm/Analysis/ScalarEvolution.h diff -u llvm/include/llvm/Analysis/ScalarEvolution.h:1.21 llvm/include/llvm/Analysis/ScalarEvolution.h:1.22 --- llvm/include/llvm/Analysis/ScalarEvolution.h:1.21 Sun May 6 08:37:15 2007 +++ llvm/include/llvm/Analysis/ScalarEvolution.h Tue Jun 19 09:28:30 2007 @@ -232,10 +232,10 @@ /// an analyzable loop-invariant iteration count. bool hasLoopInvariantIterationCount(const Loop *L) const; - /// deleteInstructionFromRecords - This method should be called by the - /// client before it removes an instruction from the program, to make sure + /// deleteValueFromRecords - This method should be called by the + /// client before it removes a Value from the program, to make sure /// that no dangling references are left around. - void deleteInstructionFromRecords(Instruction *I) const; + void deleteValueFromRecords(Value *V) const; virtual bool runOnFunction(Function &F); virtual void releaseMemory(); From djg at cray.com Tue Jun 19 09:28:59 2007 From: djg at cray.com (Dan Gohman) Date: Tue, 19 Jun 2007 09:28:59 -0500 Subject: [llvm-commits] CVS: llvm/lib/Transforms/Scalar/IndVarSimplify.cpp LoopStrengthReduce.cpp Message-ID: <200706191428.l5JESx2b030834@zion.cs.uiuc.edu> Changes in directory llvm/lib/Transforms/Scalar: IndVarSimplify.cpp updated: 1.122 -> 1.123 LoopStrengthReduce.cpp updated: 1.141 -> 1.142 --- Log message: Rename ScalarEvolution::deleteInstructionFromRecords to deleteValueFromRecords and loosen the types to all it to accept Value* instead of just Instruction*, since this is what ScalarEvolution uses internally anyway. This allows more flexibility for future uses. --- Diffs of the changes: (+5 -5) IndVarSimplify.cpp | 6 +++--- LoopStrengthReduce.cpp | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) Index: llvm/lib/Transforms/Scalar/IndVarSimplify.cpp diff -u llvm/lib/Transforms/Scalar/IndVarSimplify.cpp:1.122 llvm/lib/Transforms/Scalar/IndVarSimplify.cpp:1.123 --- llvm/lib/Transforms/Scalar/IndVarSimplify.cpp:1.122 Fri Jun 15 13:00:55 2007 +++ llvm/lib/Transforms/Scalar/IndVarSimplify.cpp Tue Jun 19 09:28:31 2007 @@ -115,7 +115,7 @@ for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) if (Instruction *U = dyn_cast(I->getOperand(i))) Insts.insert(U); - SE->deleteInstructionFromRecords(I); + SE->deleteValueFromRecords(I); DOUT << "INDVARS: Deleting: " << *I; I->eraseFromParent(); Changed = true; @@ -181,7 +181,7 @@ GetElementPtrInst *NGEPI = new GetElementPtrInst( NCE, Constant::getNullValue(Type::Int32Ty), NewAdd, GEPI->getName(), GEPI); - SE->deleteInstructionFromRecords(GEPI); + SE->deleteValueFromRecords(GEPI); GEPI->replaceAllUsesWith(NGEPI); GEPI->eraseFromParent(); GEPI = NGEPI; @@ -398,7 +398,7 @@ // the PHI entirely. This is safe, because the NewVal won't be variant // in the loop, so we don't need an LCSSA phi node anymore. if (NumPreds == 1) { - SE->deleteInstructionFromRecords(PN); + SE->deleteValueFromRecords(PN); PN->replaceAllUsesWith(ExitVal); PN->eraseFromParent(); break; Index: llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp diff -u llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp:1.141 llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp:1.142 --- llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp:1.141 Fri Jun 15 09:38:12 2007 +++ llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp Tue Jun 19 09:28:31 2007 @@ -226,7 +226,7 @@ for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) if (Instruction *U = dyn_cast(I->getOperand(i))) Insts.insert(U); - SE->deleteInstructionFromRecords(I); + SE->deleteValueFromRecords(I); I->eraseFromParent(); Changed = true; } @@ -1488,7 +1488,7 @@ DeadInsts.insert(BO); // Break the cycle, then delete the PHI. PN->replaceAllUsesWith(UndefValue::get(PN->getType())); - SE->deleteInstructionFromRecords(PN); + SE->deleteValueFromRecords(PN); PN->eraseFromParent(); } } From clattner at apple.com Tue Jun 19 10:06:41 2007 From: clattner at apple.com (Chris Lattner) Date: Tue, 19 Jun 2007 08:06:41 -0700 Subject: [llvm-commits] CVS: llvm/lib/Target/X86/X86InstrFPStack.td X86InstrInfo.cpp X86InstrInfo.h X86InstrInfo.td X86InstrMMX.td X86InstrSSE.td In-Reply-To: <80053877-EE88-4786-AB58-DF5C70D971A6@apple.com> References: <200706190148.l5J1mdkK004876@zion.cs.uiuc.edu> <80053877-EE88-4786-AB58-DF5C70D971A6@apple.com> Message-ID: <91E60045-9875-4DB0-8040-A07E23F7E7D1@apple.com> >> This achieves two things: >> >> 1. Just looking at the .td file, you can tell which instructions are >> candidates for remat. >> 2. The isRematerializable predicate is faster for instructions that >> are not remat-able. >> 3. The isReallyRematerializable only needs to be implemented by >> targets with instructions that are remat-able only in some cases >> (like the x86 instructions). > > I okay'd Dan patch after considering the trade-offs. To me this gets > rid of the duplicate instructions so it's worth it. I think both approaches get rid of the duplicate instructions. > If we are really concerned about the speed, then I agree the hybrid > approach is the best. Sorry about the confusion. Speed is something to consider, but I don't think it should override maintainability. > Not to mention I had already considered the "trivial > rematerialization" > scheme to be temporary. Okay, how do you think this should work going forward? -Chris From evan.cheng at apple.com Tue Jun 19 11:38:15 2007 From: evan.cheng at apple.com (Evan Cheng) Date: Tue, 19 Jun 2007 09:38:15 -0700 Subject: [llvm-commits] CVS: llvm/lib/Target/X86/X86InstrFPStack.td X86InstrInfo.cpp X86InstrInfo.h X86InstrInfo.td X86InstrMMX.td X86InstrSSE.td In-Reply-To: <91E60045-9875-4DB0-8040-A07E23F7E7D1@apple.com> References: <200706190148.l5J1mdkK004876@zion.cs.uiuc.edu> <80053877-EE88-4786-AB58-DF5C70D971A6@apple.com> <91E60045-9875-4DB0-8040-A07E23F7E7D1@apple.com> Message-ID: <81A7D3AF-3380-400B-99FA-798E1622D93D@apple.com> On Jun 19, 2007, at 8:06 AM, Chris Lattner wrote: >>> This achieves two things: >>> >>> 1. Just looking at the .td file, you can tell which instructions are >>> candidates for remat. >>> 2. The isRematerializable predicate is faster for instructions that >>> are not remat-able. >>> 3. The isReallyRematerializable only needs to be implemented by >>> targets with instructions that are remat-able only in some cases >>> (like the x86 instructions). >> >> I okay'd Dan patch after considering the trade-offs. To me this gets >> rid of the duplicate instructions so it's worth it. > > I think both approaches get rid of the duplicate instructions. Right, not disputing that. > >> If we are really concerned about the speed, then I agree the hybrid >> approach is the best. Sorry about the confusion. > > Speed is something to consider, but I don't think it should override > maintainability. > >> Not to mention I had already considered the "trivial >> rematerialization" >> scheme to be temporary. > > Okay, how do you think this should work going forward? Trivial remat will go away when proper remat is implemented. All instructions without side-effect will be rematerializable if their operands are available so all these will go away. Not that proper remat is coming anytime soon though. Evan > > -Chris > > > > _______________________________________________ > llvm-commits mailing list > llvm-commits at cs.uiuc.edu > http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits From clattner at apple.com Tue Jun 19 11:40:32 2007 From: clattner at apple.com (Chris Lattner) Date: Tue, 19 Jun 2007 09:40:32 -0700 Subject: [llvm-commits] CVS: llvm/lib/Target/X86/X86InstrFPStack.td X86InstrInfo.cpp X86InstrInfo.h X86InstrInfo.td X86InstrMMX.td X86InstrSSE.td In-Reply-To: <81A7D3AF-3380-400B-99FA-798E1622D93D@apple.com> References: <200706190148.l5J1mdkK004876@zion.cs.uiuc.edu> <80053877-EE88-4786-AB58-DF5C70D971A6@apple.com> <91E60045-9875-4DB0-8040-A07E23F7E7D1@apple.com> <81A7D3AF-3380-400B-99FA-798E1622D93D@apple.com> Message-ID: On Jun 19, 2007, at 9:38 AM, Evan Cheng wrote: >>> If we are really concerned about the speed, then I agree the hybrid >>> approach is the best. Sorry about the confusion. >> >> Speed is something to consider, but I don't think it should override >> maintainability. >> >>> Not to mention I had already considered the "trivial >>> rematerialization" >>> scheme to be temporary. >> >> Okay, how do you think this should work going forward? > > Trivial remat will go away when proper remat is implemented. All > instructions without side-effect will be rematerializable if their > operands are available so all these will go away. Not that proper > remat is coming anytime soon though. Ok. How do you intend to capture the "without side-effects" part? It seems that simple remat works for things that: 1. have no side effects 2. have no register inputs Is there another condition? If so, simple remat can check #2 today, just by itself looking at the machineinstr. Given that, it comes down to how we want to represent #1. -Chris From sabre at nondot.org Tue Jun 19 11:47:10 2007 From: sabre at nondot.org (Chris Lattner) Date: Tue, 19 Jun 2007 11:47:10 -0500 Subject: [llvm-commits] CVS: llvm/tools/llvm-ld/llvm-ld.cpp Message-ID: <200706191647.l5JGlAWe001270@zion.cs.uiuc.edu> Changes in directory llvm/tools/llvm-ld: llvm-ld.cpp updated: 1.53 -> 1.54 --- Log message: Fix pr1448: http://llvm.org/PR1448 --- Diffs of the changes: (+20 -14) llvm-ld.cpp | 34 ++++++++++++++++++++-------------- 1 files changed, 20 insertions(+), 14 deletions(-) Index: llvm/tools/llvm-ld/llvm-ld.cpp diff -u llvm/tools/llvm-ld/llvm-ld.cpp:1.53 llvm/tools/llvm-ld/llvm-ld.cpp:1.54 --- llvm/tools/llvm-ld/llvm-ld.cpp:1.53 Sun May 6 04:29:57 2007 +++ llvm/tools/llvm-ld/llvm-ld.cpp Tue Jun 19 11:46:48 2007 @@ -326,46 +326,52 @@ // We can't just assemble and link the file with the system assembler // and linker because we don't know where to put the _start symbol. // GCC mysteriously knows how to do it. - std::vector args; + std::vector args; args.push_back(gcc.c_str()); args.push_back("-fno-strict-aliasing"); args.push_back("-O3"); args.push_back("-o"); - args.push_back(OutputFilename.c_str()); - args.push_back(InputFilename.c_str()); + args.push_back(OutputFilename); + args.push_back(InputFilename); // Add in the library paths for (unsigned index = 0; index < LibPaths.size(); index++) { args.push_back("-L"); - args.push_back(LibPaths[index].c_str()); + args.push_back(LibPaths[index]); } // Add the requested options for (unsigned index = 0; index < XLinker.size(); index++) { - args.push_back(XLinker[index].c_str()); - args.push_back(Libraries[index].c_str()); + args.push_back(XLinker[index]); + args.push_back(Libraries[index]); } // Add in the libraries to link. for (unsigned index = 0; index < LinkItems.size(); index++) if (LinkItems[index].first != "crtend") { - if (LinkItems[index].second) { - std::string lib_name = "-l" + LinkItems[index].first; - args.push_back(lib_name.c_str()); - } else - args.push_back(LinkItems[index].first.c_str()); + if (LinkItems[index].second) + args.push_back("-l" + LinkItems[index].first); + else + args.push_back(LinkItems[index].first); } - args.push_back(0); + + // Now that "args" owns all the std::strings for the arguments, call the c_str + // method to get the underlying string array. We do this game so that the + // std::string array is guaranteed to outlive the const char* array. + std::vector Args; + for (unsigned i = 0, e = args.size(); i != e; ++i) + Args.push_back(args[i].c_str()); + Args.push_back(0); if (Verbose) { cout << "Generating Native Executable With:\n"; - PrintCommand(args); + PrintCommand(Args); } // Run the compiler to assembly and link together the program. int R = sys::Program::ExecuteAndWait( - gcc, &args[0], (const char**)clean_env, 0, 0, 0, &ErrMsg); + gcc, &Args[0], (const char**)clean_env, 0, 0, 0, &ErrMsg); delete [] clean_env; return R; } From evan.cheng at apple.com Tue Jun 19 12:22:28 2007 From: evan.cheng at apple.com (Evan Cheng) Date: Tue, 19 Jun 2007 10:22:28 -0700 Subject: [llvm-commits] CVS: llvm/lib/Target/X86/X86InstrFPStack.td X86InstrInfo.cpp X86InstrInfo.h X86InstrInfo.td X86InstrMMX.td X86InstrSSE.td In-Reply-To: References: <200706190148.l5J1mdkK004876@zion.cs.uiuc.edu> <80053877-EE88-4786-AB58-DF5C70D971A6@apple.com> <91E60045-9875-4DB0-8040-A07E23F7E7D1@apple.com> <81A7D3AF-3380-400B-99FA-798E1622D93D@apple.com> Message-ID: On Jun 19, 2007, at 9:40 AM, Chris Lattner wrote: > > On Jun 19, 2007, at 9:38 AM, Evan Cheng wrote: > >>>> If we are really concerned about the speed, then I agree the hybrid >>>> approach is the best. Sorry about the confusion. >>> >>> Speed is something to consider, but I don't think it should override >>> maintainability. >>> >>>> Not to mention I had already considered the "trivial >>>> rematerialization" >>>> scheme to be temporary. >>> >>> Okay, how do you think this should work going forward? >> >> Trivial remat will go away when proper remat is implemented. All >> instructions without side-effect will be rematerializable if their >> operands are available so all these will go away. Not that proper >> remat is coming anytime soon though. > > Ok. How do you intend to capture the "without side-effects" part? > > It seems that simple remat works for things that: > > 1. have no side effects > 2. have no register inputs > > Is there another condition? Can't think of any. > > If so, simple remat can check #2 today, just by itself looking at the > machineinstr. Given that, it comes down to how we want to > represent #1. I don't see a better way so I guess this will be a targetinstrinfo bit (true for those with side-effects). Evan > > -Chris > _______________________________________________ > llvm-commits mailing list > llvm-commits at cs.uiuc.edu > http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits From clattner at apple.com Tue Jun 19 12:31:10 2007 From: clattner at apple.com (Chris Lattner) Date: Tue, 19 Jun 2007 10:31:10 -0700 Subject: [llvm-commits] CVS: llvm/lib/Target/X86/X86InstrFPStack.td X86InstrInfo.cpp X86InstrInfo.h X86InstrInfo.td X86InstrMMX.td X86InstrSSE.td In-Reply-To: References: <200706190148.l5J1mdkK004876@zion.cs.uiuc.edu> <80053877-EE88-4786-AB58-DF5C70D971A6@apple.com> <91E60045-9875-4DB0-8040-A07E23F7E7D1@apple.com> <81A7D3AF-3380-400B-99FA-798E1622D93D@apple.com> Message-ID: <35CB5F27-E516-4F8C-BC13-10DC959A7DF1@apple.com> >> It seems that simple remat works for things that: >> >> 1. have no side effects >> 2. have no register inputs >> >> Is there another condition? > > Can't think of any. > >> >> If so, simple remat can check #2 today, just by itself looking at the >> machineinstr. Given that, it comes down to how we want to >> represent #1. > > I don't see a better way so I guess this will be a targetinstrinfo > bit (true for those with side-effects). Okay, the tricky thing here is instructions that have "conditional side effects". For example, all instructions marked isload/isstore/ iscall etc should be considered to have side effects (as would anything with implicit definitions), but loads from constant pools and other special cases should not be considered to have side effects. This sounds like the exact same class of problems we already have with trivial remat :) -Chris From evan.cheng at apple.com Tue Jun 19 12:49:56 2007 From: evan.cheng at apple.com (Evan Cheng) Date: Tue, 19 Jun 2007 10:49:56 -0700 Subject: [llvm-commits] CVS: llvm/lib/Target/X86/X86InstrFPStack.td X86InstrInfo.cpp X86InstrInfo.h X86InstrInfo.td X86InstrMMX.td X86InstrSSE.td In-Reply-To: <35CB5F27-E516-4F8C-BC13-10DC959A7DF1@apple.com> References: <200706190148.l5J1mdkK004876@zion.cs.uiuc.edu> <80053877-EE88-4786-AB58-DF5C70D971A6@apple.com> <91E60045-9875-4DB0-8040-A07E23F7E7D1@apple.com> <81A7D3AF-3380-400B-99FA-798E1622D93D@apple.com> <35CB5F27-E516-4F8C-BC13-10DC959A7DF1@apple.com> Message-ID: <54126BF5-9E48-4077-8174-626F170D6C81@apple.com> On Jun 19, 2007, at 10:31 AM, Chris Lattner wrote: >>> It seems that simple remat works for things that: >>> >>> 1. have no side effects >>> 2. have no register inputs >>> >>> Is there another condition? >> >> Can't think of any. >> >>> >>> If so, simple remat can check #2 today, just by itself looking at >>> the >>> machineinstr. Given that, it comes down to how we want to >>> represent #1. >> >> I don't see a better way so I guess this will be a targetinstrinfo >> bit (true for those with side-effects). > > Okay, the tricky thing here is instructions that have "conditional > side effects". For example, all instructions marked isload/isstore/ > iscall etc should be considered to have side effects (as would > anything with implicit definitions), but loads from constant pools > and other special cases should not be considered to have side effects. Calls should definitely be marked to have side-effects. To me the tricky cases are loads / stores. But can't we determine these from the operands? So, add one more condition: 3. if memory operation, no external symbol or global address inputs. Then we don't need to mark loads / stores as having side effects. Evan > > This sounds like the exact same class of problems we already have > with trivial remat :) > > -Chris > _______________________________________________ > llvm-commits mailing list > llvm-commits at cs.uiuc.edu > http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits From clattner at apple.com Tue Jun 19 12:49:10 2007 From: clattner at apple.com (clattner at apple.com) Date: Tue, 19 Jun 2007 10:49:10 -0700 (PDT) Subject: [llvm-commits] [128613] add a new -mllvm -disable-llvm-optzns option, which can be used Message-ID: <20070619174910.227E69837910@src> Revision: 128613 Author: clattner Date: 2007-06-19 10:49:09 -0700 (Tue, 19 Jun 2007) Log Message: ----------- add a new -mllvm -disable-llvm-optzns option, which can be used to isolate changes in the code made by llvm optimizers from changes make by GCC FE optimizations. Modified Paths: -------------- apple-local/branches/llvm/gcc/llvm-backend.cpp Modified: apple-local/branches/llvm/gcc/llvm-backend.cpp =================================================================== --- apple-local/branches/llvm/gcc/llvm-backend.cpp 2007-06-19 16:30:52 UTC (rev 128612) +++ apple-local/branches/llvm/gcc/llvm-backend.cpp 2007-06-19 17:49:09 UTC (rev 128613) @@ -78,6 +78,11 @@ TypeConverter *TheTypeConverter = 0; llvm::OStream *AsmOutFile = 0; +/// DisableLLVMOptimizations - Allow the user to specify: +/// "-mllvm -disable-llvm-optzns" on the llvm-gcc command line to force llvm +/// optimizations off. +static cl::opt DisableLLVMOptimizations("disable-llvm-optzns"); + std::vector > StaticCtors, StaticDtors; std::vector AttributeUsedGlobals; std::vector AttributeNoinlineFunctions; @@ -271,7 +276,7 @@ HasPerFunctionPasses = true; #endif - if (optimize > 0) { + if (optimize > 0 && !DisableLLVMOptimizations) { HasPerFunctionPasses = true; PerFunctionPasses->add(createCFGSimplificationPass()); if (optimize == 1) @@ -287,7 +292,7 @@ PerModulePasses->add(new TargetData(*TheTarget->getTargetData())); bool HasPerModulePasses = false; - if (optimize > 0) { + if (optimize > 0 && !DisableLLVMOptimizations) { HasPerModulePasses = true; PassManager *PM = PerModulePasses; if (flag_unit_at_a_time) From clattner at apple.com Tue Jun 19 12:54:58 2007 From: clattner at apple.com (Chris Lattner) Date: Tue, 19 Jun 2007 10:54:58 -0700 Subject: [llvm-commits] CVS: llvm/lib/Target/X86/X86InstrFPStack.td X86InstrInfo.cpp X86InstrInfo.h X86InstrInfo.td X86InstrMMX.td X86InstrSSE.td In-Reply-To: <54126BF5-9E48-4077-8174-626F170D6C81@apple.com> References: <200706190148.l5J1mdkK004876@zion.cs.uiuc.edu> <80053877-EE88-4786-AB58-DF5C70D971A6@apple.com> <91E60045-9875-4DB0-8040-A07E23F7E7D1@apple.com> <81A7D3AF-3380-400B-99FA-798E1622D93D@apple.com> <35CB5F27-E516-4F8C-BC13-10DC959A7DF1@apple.com> <54126BF5-9E48-4077-8174-626F170D6C81@apple.com> Message-ID: <80A316E1-937A-416D-85F5-68FE43C66575@apple.com> >>> I don't see a better way so I guess this will be a targetinstrinfo >>> bit (true for those with side-effects). >> >> Okay, the tricky thing here is instructions that have "conditional >> side effects". For example, all instructions marked isload/isstore/ >> iscall etc should be considered to have side effects (as would >> anything with implicit definitions), but loads from constant pools >> and other special cases should not be considered to have side >> effects. > > Calls should definitely be marked to have side-effects. To me the > tricky cases are loads / stores. But can't we determine these from > the operands? So, add one more condition: > 3. if memory operation, no external symbol or global address inputs. > > Then we don't need to mark loads / stores as having side effects. Ah right, even better. So you're saying we don't need a new flag at all? I guess this won't work right now though, because we don't model things that clobber the condcodes. Wouldn't it be nice if we did? :) -Chris From evan.cheng at apple.com Tue Jun 19 13:09:45 2007 From: evan.cheng at apple.com (Evan Cheng) Date: Tue, 19 Jun 2007 11:09:45 -0700 Subject: [llvm-commits] CVS: llvm/lib/Target/X86/X86InstrFPStack.td X86InstrInfo.cpp X86InstrInfo.h X86InstrInfo.td X86InstrMMX.td X86InstrSSE.td In-Reply-To: <80A316E1-937A-416D-85F5-68FE43C66575@apple.com> References: <200706190148.l5J1mdkK004876@zion.cs.uiuc.edu> <80053877-EE88-4786-AB58-DF5C70D971A6@apple.com> <91E60045-9875-4DB0-8040-A07E23F7E7D1@apple.com> <81A7D3AF-3380-400B-99FA-798E1622D93D@apple.com> <35CB5F27-E516-4F8C-BC13-10DC959A7DF1@apple.com> <54126BF5-9E48-4077-8174-626F170D6C81@apple.com> <80A316E1-937A-416D-85F5-68FE43C66575@apple.com> Message-ID: <3087CCA6-2DCB-41AD-B7BE-7998C49721E1@apple.com> On Jun 19, 2007, at 10:54 AM, Chris Lattner wrote: >>>> I don't see a better way so I guess this will be a targetinstrinfo >>>> bit (true for those with side-effects). >>> >>> Okay, the tricky thing here is instructions that have "conditional >>> side effects". For example, all instructions marked isload/isstore/ >>> iscall etc should be considered to have side effects (as would >>> anything with implicit definitions), but loads from constant pools >>> and other special cases should not be considered to have side >>> effects. >> >> Calls should definitely be marked to have side-effects. To me the >> tricky cases are loads / stores. But can't we determine these from >> the operands? So, add one more condition: >> 3. if memory operation, no external symbol or global address inputs. >> >> Then we don't need to mark loads / stores as having side effects. > > Ah right, even better. So you're saying we don't need a new flag at > all? We do need a "hasSideEffect" flag. There are some non-branch, non- load/store instructions that have side effects, no? > > I guess this won't work right now though, because we don't model > things that clobber the condcodes. Wouldn't it be nice if we did? :) Yeah, I hear you. It's coming. Those flag clobbering instructions just have to be marked "hasSideEffect" for now. Evan > > -Chris > _______________________________________________ > llvm-commits mailing list > llvm-commits at cs.uiuc.edu > http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits From clattner at apple.com Tue Jun 19 13:40:06 2007 From: clattner at apple.com (clattner at apple.com) Date: Tue, 19 Jun 2007 11:40:06 -0700 (PDT) Subject: [llvm-commits] [128616] Fix a recent regression that broke the logical __builtin_*pd ops. Message-ID: <20070619184006.977D49855D07@src> Revision: 128616 Author: clattner Date: 2007-06-19 11:40:06 -0700 (Tue, 19 Jun 2007) Log Message: ----------- Fix a recent regression that broke the logical __builtin_*pd ops. Modified Paths: -------------- apple-local/branches/llvm/gcc/config/i386/llvm-i386.cpp Modified: apple-local/branches/llvm/gcc/config/i386/llvm-i386.cpp =================================================================== --- apple-local/branches/llvm/gcc/config/i386/llvm-i386.cpp 2007-06-19 18:06:58 UTC (rev 128615) +++ apple-local/branches/llvm/gcc/config/i386/llvm-i386.cpp 2007-06-19 18:40:06 UTC (rev 128616) @@ -250,15 +250,19 @@ Ops[1] = Builder.CreateBitCast(Ops[1], Ops[0]->getType(), "tmp"); switch (FnCode) { case IX86_BUILTIN_ANDPS: + case IX86_BUILTIN_ANDPD: Result = Builder.CreateAnd(Ops[0], Ops[1], "tmp"); break; case IX86_BUILTIN_ORPS: + case IX86_BUILTIN_ORPD: Result = Builder.CreateOr (Ops[0], Ops[1], "tmp"); break; case IX86_BUILTIN_XORPS: + case IX86_BUILTIN_XORPD: Result = Builder.CreateXor(Ops[0], Ops[1], "tmp"); break; case IX86_BUILTIN_ANDNPS: + case IX86_BUILTIN_ANDNPD: Ops[0] = Builder.CreateNot(Ops[0], "tmp"); Result = Builder.CreateAnd(Ops[0], Ops[1], "tmp"); break; From clattner at apple.com Tue Jun 19 15:17:40 2007 From: clattner at apple.com (clattner at apple.com) Date: Tue, 19 Jun 2007 13:17:40 -0700 (PDT) Subject: [llvm-commits] [128618] Fix some problems handling function pointers where the default Message-ID: <20070619201740.5A8E198B6884@src> Revision: 128618 Author: clattner Date: 2007-06-19 13:17:40 -0700 (Tue, 19 Jun 2007) Log Message: ----------- Fix some problems handling function pointers where the default unary conversions were not applied. Many thanks to Fariborz Jahanian for writing this patch. Modified Paths: -------------- apple-local/branches/llvm/gcc/objc/objc-act.c Modified: apple-local/branches/llvm/gcc/objc/objc-act.c =================================================================== --- apple-local/branches/llvm/gcc/objc/objc-act.c 2007-06-19 18:43:11 UTC (rev 128617) +++ apple-local/branches/llvm/gcc/objc/objc-act.c 2007-06-19 20:17:40 UTC (rev 128618) @@ -1855,8 +1855,10 @@ /* To allow for correct property assignment semantics and in accordance with C99 rules we generate: type temp; (temp = rhs, [lhs Setter:temp], temp) */ - temp = objc_create_named_tmp_var ( - objc_decay_parm_type (TREE_TYPE (rhs)), "prop"); + /* APPLE LOCAL begin radar 5279122 */ + rhs = default_conversion(rhs); + temp = objc_create_named_tmp_var (TREE_TYPE (rhs), "prop"); + /* APPLE LOCAL end radar 5279122 */ bind = build (BIND_EXPR, void_type_node, temp, NULL, NULL); TREE_SIDE_EFFECTS (bind) = 1; add_stmt (bind); @@ -1867,6 +1869,8 @@ else { comma_exp = objc_setter_func_call (receiver, prop_ident, rhs); + /* APPLE LOCAL radar 5279122 */ + rhs = default_conversion(rhs); /* APPLE LOCAL 5140757 */ temp = save_expr (rhs); if (TREE_CODE (temp) == VAR_DECL || TREE_CODE (temp) == PARM_DECL) From evan.cheng at apple.com Tue Jun 19 16:05:32 2007 From: evan.cheng at apple.com (Evan Cheng) Date: Tue, 19 Jun 2007 16:05:32 -0500 Subject: [llvm-commits] CVS: llvm/lib/Target/ARM/ARMISelLowering.cpp ARMISelLowering.h ARMInstrInfo.td Message-ID: <200706192105.l5JL5Wu4008243@zion.cs.uiuc.edu> Changes in directory llvm/lib/Target/ARM: ARMISelLowering.cpp updated: 1.56 -> 1.57 ARMISelLowering.h updated: 1.16 -> 1.17 ARMInstrInfo.td updated: 1.112 -> 1.113 --- Log message: Allow predicated immediate ARM to ARM calls. --- Diffs of the changes: (+14 -1) ARMISelLowering.cpp | 7 ++++++- ARMISelLowering.h | 1 + ARMInstrInfo.td | 7 +++++++ 3 files changed, 14 insertions(+), 1 deletion(-) Index: llvm/lib/Target/ARM/ARMISelLowering.cpp diff -u llvm/lib/Target/ARM/ARMISelLowering.cpp:1.56 llvm/lib/Target/ARM/ARMISelLowering.cpp:1.57 --- llvm/lib/Target/ARM/ARMISelLowering.cpp:1.56 Fri Jun 1 03:28:59 2007 +++ llvm/lib/Target/ARM/ARMISelLowering.cpp Tue Jun 19 16:05:09 2007 @@ -267,6 +267,7 @@ case ARMISD::Wrapper: return "ARMISD::Wrapper"; case ARMISD::WrapperJT: return "ARMISD::WrapperJT"; case ARMISD::CALL: return "ARMISD::CALL"; + case ARMISD::CALL_PRED: return "ARMISD::CALL_PRED"; case ARMISD::CALL_NOLINK: return "ARMISD::CALL_NOLINK"; case ARMISD::tCALL: return "ARMISD::tCALL"; case ARMISD::BRCOND: return "ARMISD::BRCOND"; @@ -517,6 +518,7 @@ // node so that legalize doesn't hack it. bool isDirect = false; bool isARMFunc = false; + bool isLocalARMFunc = false; if (GlobalAddressSDNode *G = dyn_cast(Callee)) { GlobalValue *GV = G->getGlobal(); isDirect = true; @@ -525,6 +527,8 @@ bool isStub = (isExt && Subtarget->isTargetDarwin()) && getTargetMachine().getRelocationModel() != Reloc::Static; isARMFunc = !Subtarget->isThumb() || isStub; + // ARM call to a local ARM function is predicable. + isLocalARMFunc = !Subtarget->isThumb() && !isExt; // tBX takes a register source operand. if (isARMFunc && Subtarget->isThumb() && !Subtarget->hasV5TOps()) { ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV, ARMPCLabelIndex, @@ -564,7 +568,8 @@ CallOpc = isARMFunc ? ARMISD::CALL : ARMISD::tCALL; } else { CallOpc = (isDirect || Subtarget->hasV5TOps()) - ? ARMISD::CALL : ARMISD::CALL_NOLINK; + ? (isLocalARMFunc ? ARMISD::CALL_PRED : ARMISD::CALL) + : ARMISD::CALL_NOLINK; } if (CallOpc == ARMISD::CALL_NOLINK && !Subtarget->isThumb()) { // implicit def LR - LR mustn't be allocated as GRP:$dst of CALL_NOLINK Index: llvm/lib/Target/ARM/ARMISelLowering.h diff -u llvm/lib/Target/ARM/ARMISelLowering.h:1.16 llvm/lib/Target/ARM/ARMISelLowering.h:1.17 --- llvm/lib/Target/ARM/ARMISelLowering.h:1.16 Thu May 17 16:31:21 2007 +++ llvm/lib/Target/ARM/ARMISelLowering.h Tue Jun 19 16:05:09 2007 @@ -34,6 +34,7 @@ WrapperJT, // WrapperJT - A wrapper node for TargetJumpTable CALL, // Function call. + CALL_PRED, // Function call that's predicable. CALL_NOLINK, // Function call with branch not branch-and-link. tCALL, // Thumb function call. BRCOND, // Conditional branch. Index: llvm/lib/Target/ARM/ARMInstrInfo.td diff -u llvm/lib/Target/ARM/ARMInstrInfo.td:1.112 llvm/lib/Target/ARM/ARMInstrInfo.td:1.113 --- llvm/lib/Target/ARM/ARMInstrInfo.td:1.112 Mon Jun 18 20:48:04 2007 +++ llvm/lib/Target/ARM/ARMInstrInfo.td Tue Jun 19 16:05:09 2007 @@ -52,6 +52,8 @@ def ARMcall : SDNode<"ARMISD::CALL", SDT_ARMcall, [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>; +def ARMcall_pred : SDNode<"ARMISD::CALL_PRED", SDT_ARMcall, + [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>; def ARMcall_nolink : SDNode<"ARMISD::CALL_NOLINK", SDT_ARMcall, [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>; @@ -621,6 +623,11 @@ def BL : AXI<(ops i32imm:$func, variable_ops), "bl ${func:call}", [(ARMcall tglobaladdr:$func)]>; + + def BL_pred : AI<(ops i32imm:$func, variable_ops), + "bl", " ${func:call}", + [(ARMcall_pred tglobaladdr:$func)]>; + // ARMv5T and above def BLX : AXI<(ops GPR:$dst, variable_ops), "blx $dst", From christopher.lamb at gmail.com Tue Jun 19 16:39:01 2007 From: christopher.lamb at gmail.com (Christopher Lamb) Date: Tue, 19 Jun 2007 14:39:01 -0700 Subject: [llvm-commits] CVS: llvm/lib/Target/X86/X86InstrFPStack.td X86InstrInfo.cpp X86InstrInfo.h X86InstrInfo.td X86InstrMMX.td X86InstrSSE.td In-Reply-To: <35CB5F27-E516-4F8C-BC13-10DC959A7DF1@apple.com> References: <200706190148.l5J1mdkK004876@zion.cs.uiuc.edu> <80053877-EE88-4786-AB58-DF5C70D971A6@apple.com> <91E60045-9875-4DB0-8040-A07E23F7E7D1@apple.com> <81A7D3AF-3380-400B-99FA-798E1622D93D@apple.com> <35CB5F27-E516-4F8C-BC13-10DC959A7DF1@apple.com> Message-ID: On Jun 19, 2007, at 10:31 AM, Chris Lattner wrote: >>> It seems that simple remat works for things that: >>> >>> 1. have no side effects >>> 2. have no register inputs >>> >>> Is there another condition? >> >> Can't think of any. >> >>> >>> If so, simple remat can check #2 today, just by itself looking at >>> the >>> machineinstr. Given that, it comes down to how we want to >>> represent #1. >> >> I don't see a better way so I guess this will be a targetinstrinfo >> bit (true for those with side-effects). > > Okay, the tricky thing here is instructions that have "conditional > side effects". For example, all instructions marked isload/isstore/ > iscall etc should be considered to have side effects (as would > anything with implicit definitions), but loads from constant pools > and other special cases should not be considered to have side effects. And on some specialized architectures there are other types of loads/ stores that are not side affecting as well. It would be good to have this be open to customization. -- Christopher Lamb -------------- next part -------------- An HTML attachment was scrubbed... URL: http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20070619/17d69b74/attachment.html From evan.cheng at apple.com Tue Jun 19 16:45:36 2007 From: evan.cheng at apple.com (Evan Cheng) Date: Tue, 19 Jun 2007 16:45:36 -0500 Subject: [llvm-commits] CVS: llvm/lib/CodeGen/IfConversion.cpp Message-ID: <200706192145.l5JLjaCu009087@zion.cs.uiuc.edu> Changes in directory llvm/lib/CodeGen: IfConversion.cpp updated: 1.58 -> 1.59 --- Log message: Avoid if-converting simple block that ends with unconditional branch or fallthrough unless it branches / falls to the 'false' block. Not profitable, may end up increasing code size. --- Diffs of the changes: (+4 -1) IfConversion.cpp | 5 ++++- 1 files changed, 4 insertions(+), 1 deletion(-) Index: llvm/lib/CodeGen/IfConversion.cpp diff -u llvm/lib/CodeGen/IfConversion.cpp:1.58 llvm/lib/CodeGen/IfConversion.cpp:1.59 --- llvm/lib/CodeGen/IfConversion.cpp:1.58 Mon Jun 18 20:26:12 2007 +++ llvm/lib/CodeGen/IfConversion.cpp Tue Jun 19 16:45:13 2007 @@ -399,6 +399,9 @@ if (TrueBBI.IsBeingAnalyzed || TrueBBI.IsDone) return false; + if (TrueBBI.IsBrAnalyzable) + return false; + if (TrueBBI.BB->pred_size() > 1) { if (TrueBBI.CannotBeCopied || TrueBBI.NonPredSize > TLI->getIfCvtDupBlockSizeLimit()) @@ -406,7 +409,7 @@ Dups = TrueBBI.NonPredSize; } - return !blockAlwaysFallThrough(TrueBBI) && TrueBBI.BrCond.size() == 0; + return true; } /// ValidTriangle - Returns true if the 'true' and 'false' blocks (along From evan.cheng at apple.com Tue Jun 19 16:51:18 2007 From: evan.cheng at apple.com (Evan Cheng) Date: Tue, 19 Jun 2007 16:51:18 -0500 Subject: [llvm-commits] CVS: llvm-test/MultiSource/Benchmarks/Olden/tsp/Makefile Message-ID: <200706192151.l5JLpI6j009185@zion.cs.uiuc.edu> Changes in directory llvm-test/MultiSource/Benchmarks/Olden/tsp: Makefile updated: 1.8 -> 1.9 --- Log message: Increase test size to something more reasonable. --- Diffs of the changes: (+1 -1) Makefile | 2 +- 1 files changed, 1 insertion(+), 1 deletion(-) Index: llvm-test/MultiSource/Benchmarks/Olden/tsp/Makefile diff -u llvm-test/MultiSource/Benchmarks/Olden/tsp/Makefile:1.8 llvm-test/MultiSource/Benchmarks/Olden/tsp/Makefile:1.9 --- llvm-test/MultiSource/Benchmarks/Olden/tsp/Makefile:1.8 Thu May 3 15:45:19 2007 +++ llvm-test/MultiSource/Benchmarks/Olden/tsp/Makefile Tue Jun 19 16:50:56 2007 @@ -7,7 +7,7 @@ RUN_OPTIONS = 2048000 else ifdef SMALL_PROBLEM_SIZE -RUN_OPTIONS = 10240 +RUN_OPTIONS = 102400 else RUN_OPTIONS = 1024000 endif From evan.cheng at apple.com Tue Jun 19 16:54:23 2007 From: evan.cheng at apple.com (Evan Cheng) Date: Tue, 19 Jun 2007 16:54:23 -0500 Subject: [llvm-commits] CVS: llvm-test/MultiSource/Benchmarks/Olden/power/power.h Message-ID: <200706192154.l5JLsNiU009285@zion.cs.uiuc.edu> Changes in directory llvm-test/MultiSource/Benchmarks/Olden/power: power.h updated: 1.5 -> 1.6 --- Log message: Increase test size to something more reasonable. --- Diffs of the changes: (+4 -4) power.h | 8 ++++---- 1 files changed, 4 insertions(+), 4 deletions(-) Index: llvm-test/MultiSource/Benchmarks/Olden/power/power.h diff -u llvm-test/MultiSource/Benchmarks/Olden/power/power.h:1.5 llvm-test/MultiSource/Benchmarks/Olden/power/power.h:1.6 --- llvm-test/MultiSource/Benchmarks/Olden/power/power.h:1.5 Thu May 3 15:09:45 2007 +++ llvm-test/MultiSource/Benchmarks/Olden/power/power.h Tue Jun 19 16:54:00 2007 @@ -24,10 +24,10 @@ #define NULL 0 #ifdef SMALL_PROBLEM_SIZE /* __llvm__ SCALED BACK SETTINGS */ -#define NUM_FEEDERS 6 -#define LATERALS_PER_FEEDER 6 -#define BRANCHES_PER_LATERAL 4 -#define LEAVES_PER_BRANCH 6 +#define NUM_FEEDERS 8 +#define LATERALS_PER_FEEDER 16 +#define BRANCHES_PER_LATERAL 5 +#define LEAVES_PER_BRANCH 10 #else #if 0 /* DEFAULT SETTINGS */ #define NUM_FEEDERS 10 From evan.cheng at apple.com Tue Jun 19 16:55:47 2007 From: evan.cheng at apple.com (Evan Cheng) Date: Tue, 19 Jun 2007 16:55:47 -0500 Subject: [llvm-commits] CVS: llvm-test/MultiSource/Benchmarks/Olden/mst/Makefile Message-ID: <200706192155.l5JLtlQE009317@zion.cs.uiuc.edu> Changes in directory llvm-test/MultiSource/Benchmarks/Olden/mst: Makefile updated: 1.14 -> 1.15 --- Log message: Increase test size to something more reasonable. --- Diffs of the changes: (+1 -1) Makefile | 2 +- 1 files changed, 1 insertion(+), 1 deletion(-) Index: llvm-test/MultiSource/Benchmarks/Olden/mst/Makefile diff -u llvm-test/MultiSource/Benchmarks/Olden/mst/Makefile:1.14 llvm-test/MultiSource/Benchmarks/Olden/mst/Makefile:1.15 --- llvm-test/MultiSource/Benchmarks/Olden/mst/Makefile:1.14 Wed Sep 1 09:33:25 2004 +++ llvm-test/MultiSource/Benchmarks/Olden/mst/Makefile Tue Jun 19 16:55:24 2007 @@ -6,7 +6,7 @@ ifdef LARGE_PROBLEM_SIZE RUN_OPTIONS = 4000 else -RUN_OPTIONS = 500 +RUN_OPTIONS = 1000 endif include $(LEVEL)/MultiSource/Makefile.multisrc From evan.cheng at apple.com Tue Jun 19 17:02:33 2007 From: evan.cheng at apple.com (Evan Cheng) Date: Tue, 19 Jun 2007 17:02:33 -0500 Subject: [llvm-commits] CVS: llvm-test/MultiSource/Benchmarks/Olden/em3d/Makefile Message-ID: <200706192202.l5JM2X0r009439@zion.cs.uiuc.edu> Changes in directory llvm-test/MultiSource/Benchmarks/Olden/em3d: Makefile updated: 1.7 -> 1.8 --- Log message: Increase test size to something more reasonable. --- Diffs of the changes: (+3 -1) Makefile | 4 +++- 1 files changed, 3 insertions(+), 1 deletion(-) Index: llvm-test/MultiSource/Benchmarks/Olden/em3d/Makefile diff -u llvm-test/MultiSource/Benchmarks/Olden/em3d/Makefile:1.7 llvm-test/MultiSource/Benchmarks/Olden/em3d/Makefile:1.8 --- llvm-test/MultiSource/Benchmarks/Olden/em3d/Makefile:1.7 Wed Sep 1 09:33:25 2004 +++ llvm-test/MultiSource/Benchmarks/Olden/em3d/Makefile Tue Jun 19 17:02:11 2007 @@ -4,7 +4,9 @@ CPPFLAGS = -DTORONTO LDFLAGS = -ifdef LARGE_PROBLEM_SIZE +ifdef SMALL_PROBLEM_SIZE +RUN_OPTIONS = 256 250 35 +else RUN_OPTIONS = 1024 1000 125 endif From clattner at apple.com Tue Jun 19 17:12:03 2007 From: clattner at apple.com (Chris Lattner) Date: Tue, 19 Jun 2007 15:12:03 -0700 Subject: [llvm-commits] CVS: llvm/lib/Target/X86/X86InstrFPStack.td X86InstrInfo.cpp X86InstrInfo.h X86InstrInfo.td X86InstrMMX.td X86InstrSSE.td In-Reply-To: References: <200706190148.l5J1mdkK004876@zion.cs.uiuc.edu> <80053877-EE88-4786-AB58-DF5C70D971A6@apple.com> <91E60045-9875-4DB0-8040-A07E23F7E7D1@apple.com> <81A7D3AF-3380-400B-99FA-798E1622D93D@apple.com> <35CB5F27-E516-4F8C-BC13-10DC959A7DF1@apple.com> Message-ID: <825C8FF8-6131-45FF-929D-B1151D23A759@apple.com> >> Okay, the tricky thing here is instructions that have "conditional >> side effects". For example, all instructions marked isload/isstore/ >> iscall etc should be considered to have side effects (as would >> anything with implicit definitions), but loads from constant pools >> and other special cases should not be considered to have side >> effects. > > And on some specialized architectures there are other types of > loads/stores that are not side affecting as well. It would be good > to have this be open to customization. The issues with loads and stores isn't one of side effects, it's one of aliasing. You can't sink a load past a potentially modifying store. The remat pass could track this and do smart things, but in the short term it would be better to just stop all stores and loads from moving... unless they are loads from the constant pool etc. -Chris From evan.cheng at apple.com Tue Jun 19 17:12:32 2007 From: evan.cheng at apple.com (Evan Cheng) Date: Tue, 19 Jun 2007 17:12:32 -0500 Subject: [llvm-commits] CVS: llvm-test/MultiSource/Benchmarks/Olden/bh/Makefile Message-ID: <200706192212.l5JMCW1l009703@zion.cs.uiuc.edu> Changes in directory llvm-test/MultiSource/Benchmarks/Olden/bh: Makefile updated: 1.12 -> 1.13 --- Log message: Increase test size to something more reasonable. --- Diffs of the changes: (+6 -0) Makefile | 6 ++++++ 1 files changed, 6 insertions(+) Index: llvm-test/MultiSource/Benchmarks/Olden/bh/Makefile diff -u llvm-test/MultiSource/Benchmarks/Olden/bh/Makefile:1.12 llvm-test/MultiSource/Benchmarks/Olden/bh/Makefile:1.13 --- llvm-test/MultiSource/Benchmarks/Olden/bh/Makefile:1.12 Wed Sep 1 09:33:25 2004 +++ llvm-test/MultiSource/Benchmarks/Olden/bh/Makefile Tue Jun 19 17:12:09 2007 @@ -5,6 +5,12 @@ LDFLAGS = -lm ifdef LARGE_PROBLEM_SIZE RUN_OPTIONS = 40000 30 +else +ifdef SMALL_PROBLEM_SIZE +RUN_OPTIONS = 2000 5 +else +RUN_OPTIONS = 20000 20 +endif endif include $(LEVEL)/MultiSource/Makefile.multisrc From tonic at nondot.org Tue Jun 19 17:29:24 2007 From: tonic at nondot.org (Tanya Lattner) Date: Tue, 19 Jun 2007 17:29:24 -0500 Subject: [llvm-commits] CVS: llvm/include/llvm/Transforms/IPO/InlinerPass.h Message-ID: <200706192229.l5JMTOnA010115@zion.cs.uiuc.edu> Changes in directory llvm/include/llvm/Transforms/IPO: InlinerPass.h added (r1.1) --- Log message: Move inliner pass header file. --- Diffs of the changes: (+64 -0) InlinerPass.h | 64 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 files changed, 64 insertions(+) Index: llvm/include/llvm/Transforms/IPO/InlinerPass.h diff -c /dev/null llvm/include/llvm/Transforms/IPO/InlinerPass.h:1.1 *** /dev/null Tue Jun 19 17:29:12 2007 --- llvm/include/llvm/Transforms/IPO/InlinerPass.h Tue Jun 19 17:29:02 2007 *************** *** 0 **** --- 1,64 ---- + //===- InlinerPass.h - Code common to all inliners --------------*- C++ -*-===// + // + // The LLVM Compiler Infrastructure + // + // This file was developed by the LLVM research group and is distributed under + // the University of Illinois Open Source License. See LICENSE.TXT for details. + // + //===----------------------------------------------------------------------===// + // + // This file defines a simple policy-based bottom-up inliner. This file + // implements all of the boring mechanics of the bottom-up inlining, while the + // subclass determines WHAT to inline, which is the much more interesting + // component. + // + //===----------------------------------------------------------------------===// + + #ifndef INLINER_H + #define INLINER_H + + #include "llvm/CallGraphSCCPass.h" + + namespace llvm { + class CallSite; + + /// Inliner - This class contains all of the helper code which is used to + /// perform the inlining operations that does not depend on the policy. + /// + struct Inliner : public CallGraphSCCPass { + Inliner(const void *ID); + + /// getAnalysisUsage - For this class, we declare that we require and preserve + /// the call graph. If the derived class implements this method, it should + /// always explicitly call the implementation here. + virtual void getAnalysisUsage(AnalysisUsage &Info) const; + + // Main run interface method, this implements the interface required by the + // Pass class. + virtual bool runOnSCC(const std::vector &SCC); + + // doFinalization - Remove now-dead linkonce functions at the end of + // processing to avoid breaking the SCC traversal. + virtual bool doFinalization(CallGraph &CG); + + + /// This method returns the value specified by the -inline-threshold value, + /// specified on the command line. This is typically not directly needed. + /// + unsigned getInlineThreshold() const { return InlineThreshold; } + + /// getInlineCost - This method must be implemented by the subclass to + /// determine the cost of inlining the specified call site. If the cost + /// returned is greater than the current inline threshold, the call site is + /// not inlined. + /// + virtual int getInlineCost(CallSite CS) = 0; + + private: + // InlineThreshold - Cache the value here for easy access. + unsigned InlineThreshold; + }; + + } // End llvm namespace + + #endif From tonic at nondot.org Tue Jun 19 17:30:12 2007 From: tonic at nondot.org (Tanya Lattner) Date: Tue, 19 Jun 2007 17:30:12 -0500 Subject: [llvm-commits] CVS: llvm/lib/Transforms/IPO/InlineSimple.cpp Inliner.cpp Message-ID: <200706192230.l5JMUCgI010164@zion.cs.uiuc.edu> Changes in directory llvm/lib/Transforms/IPO: InlineSimple.cpp updated: 1.85 -> 1.86 Inliner.cpp updated: 1.41 -> 1.42 --- Log message: Inliner pass header file was moved. --- Diffs of the changes: (+3 -2) InlineSimple.cpp | 3 ++- Inliner.cpp | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) Index: llvm/lib/Transforms/IPO/InlineSimple.cpp diff -u llvm/lib/Transforms/IPO/InlineSimple.cpp:1.85 llvm/lib/Transforms/IPO/InlineSimple.cpp:1.86 --- llvm/lib/Transforms/IPO/InlineSimple.cpp:1.85 Thu Jun 7 12:12:16 2007 +++ llvm/lib/Transforms/IPO/InlineSimple.cpp Tue Jun 19 17:29:50 2007 @@ -11,7 +11,7 @@ // //===----------------------------------------------------------------------===// -#include "Inliner.h" +#define DEBUG_TYPE "inline" #include "llvm/CallingConv.h" #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" @@ -21,6 +21,7 @@ #include "llvm/Support/CallSite.h" #include "llvm/Support/Compiler.h" #include "llvm/Transforms/IPO.h" +#include "llvm/Transforms/IPO/InlinerPass.h" #include using namespace llvm; Index: llvm/lib/Transforms/IPO/Inliner.cpp diff -u llvm/lib/Transforms/IPO/Inliner.cpp:1.41 llvm/lib/Transforms/IPO/Inliner.cpp:1.42 --- llvm/lib/Transforms/IPO/Inliner.cpp:1.41 Sun May 6 18:13:56 2007 +++ llvm/lib/Transforms/IPO/Inliner.cpp Tue Jun 19 17:29:50 2007 @@ -14,12 +14,12 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "inline" -#include "Inliner.h" #include "llvm/Module.h" #include "llvm/Instructions.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/Support/CallSite.h" #include "llvm/Target/TargetData.h" +#include "llvm/Transforms/IPO/InlinerPass.h" #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" From tbrethou at cs.uiuc.edu Tue Jun 19 17:32:14 2007 From: tbrethou at cs.uiuc.edu (Tanya Lattner) Date: Tue, 19 Jun 2007 17:32:14 -0500 Subject: [llvm-commits] CVS: llvm/lib/Transforms/IPO/Inliner.h Message-ID: <200706192232.l5JMWEok010301@zion.cs.uiuc.edu> Changes in directory llvm/lib/Transforms/IPO: Inliner.h (r1.13) removed --- Log message: Moved Inliner.h to include/llvm/Transforms/IPO/InlinerPass.h --- Diffs of the changes: (+0 -0) 0 files changed From resistor at mac.com Tue Jun 19 18:07:38 2007 From: resistor at mac.com (Owen Anderson) Date: Tue, 19 Jun 2007 18:07:38 -0500 Subject: [llvm-commits] CVS: llvm/lib/Transforms/Scalar/GVNPRE.cpp Message-ID: <200706192307.l5JN7cPg011089@zion.cs.uiuc.edu> Changes in directory llvm/lib/Transforms/Scalar: GVNPRE.cpp updated: 1.41 -> 1.42 --- Log message: Make dependsOnInvoke much more specific in what it tests, which in turn make it much faster to run. This reduces the time to optimize lencondwith a debug build on PPC from ~450s to ~300s. --- Diffs of the changes: (+10 -42) GVNPRE.cpp | 52 ++++++++++------------------------------------------ 1 files changed, 10 insertions(+), 42 deletions(-) Index: llvm/lib/Transforms/Scalar/GVNPRE.cpp diff -u llvm/lib/Transforms/Scalar/GVNPRE.cpp:1.41 llvm/lib/Transforms/Scalar/GVNPRE.cpp:1.42 --- llvm/lib/Transforms/Scalar/GVNPRE.cpp:1.41 Tue Jun 19 02:35:36 2007 +++ llvm/lib/Transforms/Scalar/GVNPRE.cpp Tue Jun 19 18:07:16 2007 @@ -504,40 +504,14 @@ } bool GVNPRE::dependsOnInvoke(Value* V) { - if (!isa(V)) - return false; - - User* U = cast(V); - std::map::iterator I = invokeDep.find(U); - if (I != invokeDep.end()) - return I->second; - - std::vector worklist(U->op_begin(), U->op_end()); - std::set visited; - - while (!worklist.empty()) { - Value* current = worklist.back(); - worklist.pop_back(); - visited.insert(current); - - if (!isa(current)) - continue; - else if (isa(current)) - return true; - - User* curr = cast(current); - std::map::iterator CI = invokeDep.find(curr); - if (CI != invokeDep.end()) { - if (CI->second) + if (PHINode* p = dyn_cast(V)) { + for (PHINode::op_iterator I = p->op_begin(), E = p->op_end(); I != E; ++I) + if (isa(*I)) return true; - } else { - for (unsigned i = 0; i < curr->getNumOperands(); ++i) - if (visited.find(curr->getOperand(i)) == visited.end()) - worklist.push_back(curr->getOperand(i)); - } + return false; + } else { + return false; } - - return false; } // Remove all expressions whose operands are not themselves in the set @@ -557,10 +531,6 @@ lhsValid = true; break; } - - // Check for dependency on invoke insts - // NOTE: This check is expensive, so don't do it if we - // don't have to if (lhsValid) lhsValid = !dependsOnInvoke(BO->getOperand(0)); @@ -572,10 +542,6 @@ rhsValid = true; break; } - - // Check for dependency on invoke insts - // NOTE: This check is expensive, so don't do it if we - // don't have to if (rhsValid) rhsValid = !dependsOnInvoke(BO->getOperand(1)); @@ -590,7 +556,8 @@ lhsValid = true; break; } - lhsValid &= !dependsOnInvoke(C->getOperand(0)); + if (lhsValid) + lhsValid = !dependsOnInvoke(C->getOperand(0)); bool rhsValid = !isa(C->getOperand(1)); if (!rhsValid) @@ -600,7 +567,8 @@ rhsValid = true; break; } - rhsValid &= !dependsOnInvoke(C->getOperand(1)); + if (rhsValid) + rhsValid = !dependsOnInvoke(C->getOperand(1)); if (!lhsValid || !rhsValid) set.erase(C); From resistor at mac.com Tue Jun 19 18:24:17 2007 From: resistor at mac.com (Owen Anderson) Date: Tue, 19 Jun 2007 18:24:17 -0500 Subject: [llvm-commits] CVS: llvm/lib/Transforms/Scalar/GVNPRE.cpp Message-ID: <200706192324.l5JNOH9V011444@zion.cs.uiuc.edu> Changes in directory llvm/lib/Transforms/Scalar: GVNPRE.cpp updated: 1.42 -> 1.43 --- Log message: Use a DenseMap instead of an std::map for the value numbering. This reduces the time to optimize lencod on a PPC Debug build from ~300s to ~140s. --- Diffs of the changes: (+5 -4) GVNPRE.cpp | 9 +++++---- 1 files changed, 5 insertions(+), 4 deletions(-) Index: llvm/lib/Transforms/Scalar/GVNPRE.cpp diff -u llvm/lib/Transforms/Scalar/GVNPRE.cpp:1.42 llvm/lib/Transforms/Scalar/GVNPRE.cpp:1.43 --- llvm/lib/Transforms/Scalar/GVNPRE.cpp:1.42 Tue Jun 19 18:07:16 2007 +++ llvm/lib/Transforms/Scalar/GVNPRE.cpp Tue Jun 19 18:23:54 2007 @@ -25,6 +25,7 @@ #include "llvm/Function.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/PostDominators.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/Statistic.h" #include "llvm/Support/CFG.h" @@ -78,7 +79,7 @@ }; private: - std::map valueNumbering; + DenseMap valueNumbering; std::map expressionNumbering; std::set maximalExpressions; @@ -217,7 +218,7 @@ uint32_t ValueTable::lookup_or_add(Value* V) { maximalValues.insert(V); - std::map::iterator VI = valueNumbering.find(V); + DenseMap::iterator VI = valueNumbering.find(V); if (VI != valueNumbering.end()) return VI->second; @@ -255,7 +256,7 @@ } uint32_t ValueTable::lookup(Value* V) { - std::map::iterator VI = valueNumbering.find(V); + DenseMap::iterator VI = valueNumbering.find(V); if (VI != valueNumbering.end()) return VI->second; else @@ -265,7 +266,7 @@ } void ValueTable::add(Value* V, uint32_t num) { - std::map::iterator VI = valueNumbering.find(V); + DenseMap::iterator VI = valueNumbering.find(V); if (VI != valueNumbering.end()) valueNumbering.erase(VI); valueNumbering.insert(std::make_pair(V, num)); From evan.cheng at apple.com Tue Jun 19 18:55:25 2007 From: evan.cheng at apple.com (Evan Cheng) Date: Tue, 19 Jun 2007 18:55:25 -0500 Subject: [llvm-commits] CVS: llvm/lib/Target/ARM/ARMISelLowering.cpp Message-ID: <200706192355.l5JNtPAk013297@zion.cs.uiuc.edu> Changes in directory llvm/lib/Target/ARM: ARMISelLowering.cpp updated: 1.57 -> 1.58 --- Log message: Be more conservative of duplicating blocks. --- Diffs of the changes: (+1 -1) ARMISelLowering.cpp | 2 +- 1 files changed, 1 insertion(+), 1 deletion(-) Index: llvm/lib/Target/ARM/ARMISelLowering.cpp diff -u llvm/lib/Target/ARM/ARMISelLowering.cpp:1.57 llvm/lib/Target/ARM/ARMISelLowering.cpp:1.58 --- llvm/lib/Target/ARM/ARMISelLowering.cpp:1.57 Tue Jun 19 16:05:09 2007 +++ llvm/lib/Target/ARM/ARMISelLowering.cpp Tue Jun 19 18:55:02 2007 @@ -255,7 +255,7 @@ setStackPointerRegisterToSaveRestore(ARM::SP); setSchedulingPreference(SchedulingForRegPressure); setIfCvtBlockSizeLimit(Subtarget->isThumb() ? 0 : 10); - setIfCvtDupBlockSizeLimit(Subtarget->isThumb() ? 0 : 3); + setIfCvtDupBlockSizeLimit(Subtarget->isThumb() ? 0 : 2); maxStoresPerMemcpy = 1; //// temporary - rewrite interface to use type } From resistor at mac.com Tue Jun 19 19:43:55 2007 From: resistor at mac.com (Owen Anderson) Date: Tue, 19 Jun 2007 19:43:55 -0500 Subject: [llvm-commits] CVS: llvm/lib/Transforms/Scalar/GVNPRE.cpp Message-ID: <200706200043.l5K0htA3015418@zion.cs.uiuc.edu> Changes in directory llvm/lib/Transforms/Scalar: GVNPRE.cpp updated: 1.43 -> 1.44 --- Log message: Get rid of an unneeded helper function. --- Diffs of the changes: (+9 -19) GVNPRE.cpp | 28 +++++++++------------------- 1 files changed, 9 insertions(+), 19 deletions(-) Index: llvm/lib/Transforms/Scalar/GVNPRE.cpp diff -u llvm/lib/Transforms/Scalar/GVNPRE.cpp:1.43 llvm/lib/Transforms/Scalar/GVNPRE.cpp:1.44 --- llvm/lib/Transforms/Scalar/GVNPRE.cpp:1.43 Tue Jun 19 18:23:54 2007 +++ llvm/lib/Transforms/Scalar/GVNPRE.cpp Tue Jun 19 19:43:33 2007 @@ -338,7 +338,6 @@ // Helper fuctions // FIXME: eliminate or document these better void dump(const std::set& s) const; - void dump_unique(const std::set& s) const; void clean(std::set& set); Value* find_leader(std::set& vals, uint32_t v); @@ -657,15 +656,6 @@ DOUT << "}\n\n"; } -void GVNPRE::dump_unique(const std::set& s) const { - DOUT << "{ "; - for (std::set::iterator I = s.begin(), E = s.end(); - I != E; ++I) { - DEBUG((*I)->dump()); - } - DOUT << "}\n\n"; -} - void GVNPRE::elimination() { DOUT << "\n\nPhase 3: Elimination\n\n"; @@ -679,7 +669,7 @@ BasicBlock* BB = DI->getBlock(); DOUT << "Block: " << BB->getName() << "\n"; - dump_unique(availableOut[BB]); + dump(availableOut[BB]); DOUT << "\n\n"; for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); @@ -802,7 +792,7 @@ } DOUT << "Maximal Set: "; - dump_unique(VN.getMaximalValues()); + dump(VN.getMaximalValues()); DOUT << "\n"; // If function has no exit blocks, only perform GVN @@ -839,7 +829,7 @@ DOUT << "\n"; DOUT << "EXP_GEN: "; - dump_unique(generatedExpressions[BB]); + dump(generatedExpressions[BB]); visited.insert(BB); std::set& anticIn = anticipatedIn[BB]; @@ -876,7 +866,7 @@ } DOUT << "ANTIC_OUT: "; - dump_unique(anticOut); + dump(anticOut); DOUT << "\n"; std::set S; @@ -912,7 +902,7 @@ clean(anticIn); DOUT << "ANTIC_IN: "; - dump_unique(anticIn); + dump(anticIn); DOUT << "\n"; if (old.size() != anticIn.size()) @@ -934,15 +924,15 @@ DOUT << "\n"; DOUT << "EXP_GEN: "; - dump_unique(generatedExpressions[I]); + dump(generatedExpressions[I]); DOUT << "\n"; DOUT << "ANTIC_IN: "; - dump_unique(anticipatedIn[I]); + dump(anticipatedIn[I]); DOUT << "\n"; DOUT << "AVAIL_OUT: "; - dump_unique(availableOut[I]); + dump(availableOut[I]); DOUT << "\n"; } @@ -985,7 +975,7 @@ DOUT << "Merge Block: " << BB->getName() << "\n"; DOUT << "ANTIC_IN: "; - dump_unique(anticIn); + dump(anticIn); DOUT << "\n"; for (unsigned i = 0; i < workList.size(); ++i) { From resistor at mac.com Wed Jun 20 13:07:20 2007 From: resistor at mac.com (Owen Anderson) Date: Wed, 20 Jun 2007 13:07:20 -0500 Subject: [llvm-commits] CVS: llvm-test/TEST.nightly.Makefile Message-ID: <200706201807.l5KI7KcM016624@zion.cs.uiuc.edu> Changes in directory llvm-test: TEST.nightly.Makefile updated: 1.46 -> 1.47 --- Log message: 1) Measure the actual time the optimize a file, rather than just the time to pass it through opt 2) Measure the optimization time of the raw bytecode, rather than the time to reoptimized the already optimized bytecode. NOTE: This will SIGNIFICANTLY increase GCCAS times on the nightly testers. This is not an error. The previously reported times were not an accurate reflection of real optimization times. --- Diffs of the changes: (+2 -2) TEST.nightly.Makefile | 4 ++-- 1 files changed, 2 insertions(+), 2 deletions(-) Index: llvm-test/TEST.nightly.Makefile diff -u llvm-test/TEST.nightly.Makefile:1.46 llvm-test/TEST.nightly.Makefile:1.47 --- llvm-test/TEST.nightly.Makefile:1.46 Wed Mar 21 14:58:59 2007 +++ llvm-test/TEST.nightly.Makefile Wed Jun 20 13:06:48 2007 @@ -33,8 +33,8 @@ $(PROGRAMS_TO_TEST:%=Output/%.nightly.compile.report.txt): \ Output/%.nightly.compile.report.txt: Output/%.llvm.bc $(LOPT) @echo > $@ - @echo '$(LOPT) Output/$*.linked.bc -f -o /dev/null $(TIMEOPT) $(EXTRA_LOPT_OPTIONS) >>$@ 2>&1' - @-if ($(LOPT) Output/$*.linked.bc -f -o /dev/null $(TIMEOPT) $(EXTRA_LOPT_OPTIONS) >>$@ 2>&1)\ + @echo '$(LOPT) -std-compile-opts Output/$*.linked.rbc -f -o /dev/null $(TIMEOPT) $(EXTRA_LOPT_OPTIONS) >>$@ 2>&1' + @-if ($(LOPT) -std-compile-opts Output/$*.linked.rbc -f -o /dev/null $(TIMEOPT) $(EXTRA_LOPT_OPTIONS) >>$@ 2>&1)\ ;then \ echo "TEST-PASS: compile $(RELDIR)/$*" >> $@;\ printf "TEST-RESULT-compile: " >> $@;\ From evan.cheng at apple.com Wed Jun 20 13:26:37 2007 From: evan.cheng at apple.com (Evan Cheng) Date: Wed, 20 Jun 2007 13:26:37 -0500 Subject: [llvm-commits] CVS: llvm/test/CodeGen/ARM/ifcvt1.ll ifcvt2.ll ifcvt3.ll ifcvt4.ll ifcvt5.ll ifcvt6.ll ifcvt7.ll ifcvt8.ll Message-ID: <200706201826.l5KIQbO5017428@zion.cs.uiuc.edu> Changes in directory llvm/test/CodeGen/ARM: ifcvt1.ll added (r1.1) ifcvt2.ll added (r1.1) ifcvt3.ll added (r1.1) ifcvt4.ll added (r1.1) ifcvt5.ll added (r1.1) ifcvt6.ll added (r1.1) ifcvt7.ll added (r1.1) ifcvt8.ll added (r1.1) --- Log message: Added some if-conversion tests. --- Diffs of the changes: (+201 -0) ifcvt1.ll | 15 +++++++++++++++ ifcvt2.ll | 36 ++++++++++++++++++++++++++++++++++++ ifcvt3.ll | 19 +++++++++++++++++++ ifcvt4.ll | 38 ++++++++++++++++++++++++++++++++++++++ ifcvt5.ll | 24 ++++++++++++++++++++++++ ifcvt6.ll | 18 ++++++++++++++++++ ifcvt7.ll | 32 ++++++++++++++++++++++++++++++++ ifcvt8.ll | 19 +++++++++++++++++++ 8 files changed, 201 insertions(+) Index: llvm/test/CodeGen/ARM/ifcvt1.ll diff -c /dev/null llvm/test/CodeGen/ARM/ifcvt1.ll:1.1 *** /dev/null Wed Jun 20 13:26:25 2007 --- llvm/test/CodeGen/ARM/ifcvt1.ll Wed Jun 20 13:26:15 2007 *************** *** 0 **** --- 1,15 ---- + ; RUN: llvm-as < %s | llc -march=arm -enable-arm-if-conversion + ; RUN: llvm-as < %s | llc -march=arm -enable-arm-if-conversion | grep bx | wc -l | grep 1 + + define i32 @t1(i32 %a, i32 %b) { + %tmp2 = icmp eq i32 %a, 0 + br i1 %tmp2, label %cond_false, label %cond_true + + cond_true: + %tmp5 = add i32 %b, 1 + ret i32 %tmp5 + + cond_false: + %tmp7 = add i32 %b, -1 + ret i32 %tmp7 + } Index: llvm/test/CodeGen/ARM/ifcvt2.ll diff -c /dev/null llvm/test/CodeGen/ARM/ifcvt2.ll:1.1 *** /dev/null Wed Jun 20 13:26:37 2007 --- llvm/test/CodeGen/ARM/ifcvt2.ll Wed Jun 20 13:26:15 2007 *************** *** 0 **** --- 1,36 ---- + ; RUN: llvm-as < %s | llc -march=arm -enable-arm-if-conversion + ; RUN: llvm-as < %s | llc -march=arm -enable-arm-if-conversion | grep bxlt | wc -l | grep 2 + ; RUN: llvm-as < %s | llc -march=arm -enable-arm-if-conversion | grep bxgt | wc -l | grep 1 + ; RUN: llvm-as < %s | llc -march=arm -enable-arm-if-conversion | grep bxge | wc -l | grep 1 + + define i32 @t1(i32 %a, i32 %b, i32 %c, i32 %d) { + %tmp2 = icmp sgt i32 %c, 10 + %tmp5 = icmp slt i32 %d, 4 + %tmp8 = or i1 %tmp5, %tmp2 + %tmp13 = add i32 %b, %a + br i1 %tmp8, label %cond_true, label %UnifiedReturnBlock + + cond_true: + %tmp15 = add i32 %tmp13, %c + %tmp1821 = sub i32 %tmp15, %d + ret i32 %tmp1821 + + UnifiedReturnBlock: + ret i32 %tmp13 + } + + define i32 @t2(i32 %a, i32 %b, i32 %c, i32 %d) { + %tmp2 = icmp sgt i32 %c, 10 + %tmp5 = icmp slt i32 %d, 4 + %tmp8 = and i1 %tmp5, %tmp2 + %tmp13 = add i32 %b, %a + br i1 %tmp8, label %cond_true, label %UnifiedReturnBlock + + cond_true: + %tmp15 = add i32 %tmp13, %c + %tmp1821 = sub i32 %tmp15, %d + ret i32 %tmp1821 + + UnifiedReturnBlock: + ret i32 %tmp13 + } Index: llvm/test/CodeGen/ARM/ifcvt3.ll diff -c /dev/null llvm/test/CodeGen/ARM/ifcvt3.ll:1.1 *** /dev/null Wed Jun 20 13:26:37 2007 --- llvm/test/CodeGen/ARM/ifcvt3.ll Wed Jun 20 13:26:15 2007 *************** *** 0 **** --- 1,19 ---- + ; RUN: llvm-as < %s | llc -march=arm -enable-arm-if-conversion + ; RUN: llvm-as < %s | llc -march=arm -enable-arm-if-conversion | grep cmpne | wc -l | grep 1 + ; RUN: llvm-as < %s | llc -march=arm -enable-arm-if-conversion | grep bx | wc -l | grep 2 + + define i32 @t1(i32 %a, i32 %b, i32 %c, i32 %d) { + switch i32 %c, label %cond_next [ + i32 1, label %cond_true + i32 7, label %cond_true + ] + + cond_true: + %tmp12 = add i32 %a, 1 + %tmp1518 = add i32 %tmp12, %b + ret i32 %tmp1518 + + cond_next: + %tmp15 = add i32 %b, %a + ret i32 %tmp15 + } Index: llvm/test/CodeGen/ARM/ifcvt4.ll diff -c /dev/null llvm/test/CodeGen/ARM/ifcvt4.ll:1.1 *** /dev/null Wed Jun 20 13:26:37 2007 --- llvm/test/CodeGen/ARM/ifcvt4.ll Wed Jun 20 13:26:15 2007 *************** *** 0 **** --- 1,38 ---- + ; RUN: llvm-as < %s | llc -march=arm -enable-arm-if-conversion + ; RUN: llvm-as < %s | llc -march=arm -enable-arm-if-conversion | grep subgt | wc -l | grep 1 + ; RUN: llvm-as < %s | llc -march=arm -enable-arm-if-conversion | grep suble | wc -l | grep 1 + ; FIXME: Check for # of unconditional branch after adding branch folding post ifcvt. + + define i32 @t(i32 %a, i32 %b) { + entry: + %tmp1434 = icmp eq i32 %a, %b ; [#uses=1] + br i1 %tmp1434, label %bb17, label %bb.outer + + bb.outer: ; preds = %cond_false, %entry + %b_addr.021.0.ph = phi i32 [ %b, %entry ], [ %tmp10, %cond_false ] ; [#uses=5] + %a_addr.026.0.ph = phi i32 [ %a, %entry ], [ %a_addr.026.0, %cond_false ] ; [#uses=1] + br label %bb + + bb: ; preds = %cond_true, %bb.outer + %indvar = phi i32 [ 0, %bb.outer ], [ %indvar.next, %cond_true ] ; [#uses=2] + %tmp. = sub i32 0, %b_addr.021.0.ph ; [#uses=1] + %tmp.40 = mul i32 %indvar, %tmp. ; [#uses=1] + %a_addr.026.0 = add i32 %tmp.40, %a_addr.026.0.ph ; [#uses=6] + %tmp3 = icmp sgt i32 %a_addr.026.0, %b_addr.021.0.ph ; [#uses=1] + br i1 %tmp3, label %cond_true, label %cond_false + + cond_true: ; preds = %bb + %tmp7 = sub i32 %a_addr.026.0, %b_addr.021.0.ph ; [#uses=2] + %tmp1437 = icmp eq i32 %tmp7, %b_addr.021.0.ph ; [#uses=1] + %indvar.next = add i32 %indvar, 1 ; [#uses=1] + br i1 %tmp1437, label %bb17, label %bb + + cond_false: ; preds = %bb + %tmp10 = sub i32 %b_addr.021.0.ph, %a_addr.026.0 ; [#uses=2] + %tmp14 = icmp eq i32 %a_addr.026.0, %tmp10 ; [#uses=1] + br i1 %tmp14, label %bb17, label %bb.outer + + bb17: ; preds = %cond_false, %cond_true, %entry + %a_addr.026.1 = phi i32 [ %a, %entry ], [ %tmp7, %cond_true ], [ %a_addr.026.0, %cond_false ] ; [#uses=1] + ret i32 %a_addr.026.1 + } Index: llvm/test/CodeGen/ARM/ifcvt5.ll diff -c /dev/null llvm/test/CodeGen/ARM/ifcvt5.ll:1.1 *** /dev/null Wed Jun 20 13:26:37 2007 --- llvm/test/CodeGen/ARM/ifcvt5.ll Wed Jun 20 13:26:15 2007 *************** *** 0 **** --- 1,24 ---- + ; RUN: llvm-as < %s | llc -march=arm -enable-arm-if-conversion + ; RUN: llvm-as < %s | llc -march=arm -enable-arm-if-conversion | grep blge | wc -l | grep 1 + + @x = external global i32* ; [#uses=1] + + define void @foo(i32 %a) { + entry: + %tmp = load i32** @x ; [#uses=1] + store i32 %a, i32* %tmp + ret void + } + + define void @t1(i32 %a, i32 %b) { + entry: + %tmp1 = icmp sgt i32 %a, 10 ; [#uses=1] + br i1 %tmp1, label %cond_true, label %UnifiedReturnBlock + + cond_true: ; preds = %entry + tail call void @foo( i32 %b ) + ret void + + UnifiedReturnBlock: ; preds = %entry + ret void + } Index: llvm/test/CodeGen/ARM/ifcvt6.ll diff -c /dev/null llvm/test/CodeGen/ARM/ifcvt6.ll:1.1 *** /dev/null Wed Jun 20 13:26:37 2007 --- llvm/test/CodeGen/ARM/ifcvt6.ll Wed Jun 20 13:26:15 2007 *************** *** 0 **** --- 1,18 ---- + ; RUN: llvm-as < %s | llc -march=arm -enable-arm-if-conversion + ; RUN: llvm-as < %s | llc -march=arm -enable-arm-if-conversion | grep cmpne | wc -l | grep 1 + ; RUN: llvm-as < %s | llc -march=arm -enable-arm-if-conversion | grep ldmhi | wc -l | grep 1 + + define void @foo(i32 %X, i32 %Y) { + entry: + %tmp1 = icmp ult i32 %X, 4 ; [#uses=1] + %tmp4 = icmp eq i32 %Y, 0 ; [#uses=1] + %tmp7 = or i1 %tmp4, %tmp1 ; [#uses=1] + br i1 %tmp7, label %cond_true, label %UnifiedReturnBlock + + cond_true: ; preds = %entry + %tmp10 = tail call i32 (...)* @bar( ) ; [#uses=0] + ret void + + UnifiedReturnBlock: ; preds = %entry + ret void + } Index: llvm/test/CodeGen/ARM/ifcvt7.ll diff -c /dev/null llvm/test/CodeGen/ARM/ifcvt7.ll:1.1 *** /dev/null Wed Jun 20 13:26:37 2007 --- llvm/test/CodeGen/ARM/ifcvt7.ll Wed Jun 20 13:26:15 2007 *************** *** 0 **** --- 1,32 ---- + ; RUN: llvm-as < %s | llc -march=arm -enable-arm-if-conversion + ; RUN: llvm-as < %s | llc -march=arm -enable-arm-if-conversion | grep cmpeq | wc -l | grep 1 + ; RUN: llvm-as < %s | llc -march=arm -enable-arm-if-conversion | grep moveq | wc -l | grep 1 + ; RUN: llvm-as < %s | llc -march=arm -enable-arm-if-conversion | grep ldmeq | wc -l | grep 1 + ; FIXME: Need post-ifcvt branch folding to get rid of the extra br at end of BB1. + + %struct.quad_struct = type { i32, i32, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct* } + + define fastcc i32 @CountTree(%struct.quad_struct* %tree) { + entry: + br label %tailrecurse + + tailrecurse: ; preds = %bb, %entry + %tmp6 = load %struct.quad_struct** null ; <%struct.quad_struct*> [#uses=1] + %tmp9 = load %struct.quad_struct** null ; <%struct.quad_struct*> [#uses=2] + %tmp12 = load %struct.quad_struct** null ; <%struct.quad_struct*> [#uses=1] + %tmp14 = icmp eq %struct.quad_struct* null, null ; [#uses=1] + %tmp17 = icmp eq %struct.quad_struct* %tmp6, null ; [#uses=1] + %tmp23 = icmp eq %struct.quad_struct* %tmp9, null ; [#uses=1] + %tmp29 = icmp eq %struct.quad_struct* %tmp12, null ; [#uses=1] + %bothcond = and i1 %tmp17, %tmp14 ; [#uses=1] + %bothcond1 = and i1 %bothcond, %tmp23 ; [#uses=1] + %bothcond2 = and i1 %bothcond1, %tmp29 ; [#uses=1] + br i1 %bothcond2, label %return, label %bb + + bb: ; preds = %tailrecurse + %tmp41 = tail call fastcc i32 @CountTree( %struct.quad_struct* %tmp9 ) ; [#uses=0] + br label %tailrecurse + + return: ; preds = %tailrecurse + ret i32 0 + } Index: llvm/test/CodeGen/ARM/ifcvt8.ll diff -c /dev/null llvm/test/CodeGen/ARM/ifcvt8.ll:1.1 *** /dev/null Wed Jun 20 13:26:37 2007 --- llvm/test/CodeGen/ARM/ifcvt8.ll Wed Jun 20 13:26:15 2007 *************** *** 0 **** --- 1,19 ---- + ; RUN: llvm-as < %s | llc -march=arm -enable-arm-if-conversion + ; RUN: llvm-as < %s | llc -march=arm -enable-arm-if-conversion | grep ldmne | wc -l | grep 1 + + %struct.SString = type { i8*, i32, i32 } + + declare void @abort() + + define fastcc void @t(%struct.SString* %word, i8 sext %c) { + entry: + %tmp1 = icmp eq %struct.SString* %word, null ; [#uses=1] + br i1 %tmp1, label %cond_true, label %cond_false + + cond_true: ; preds = %entry + tail call void @abort( ) + unreachable + + cond_false: ; preds = %entry + ret void + } From resistor at mac.com Wed Jun 20 13:30:42 2007 From: resistor at mac.com (Owen Anderson) Date: Wed, 20 Jun 2007 13:30:42 -0500 Subject: [llvm-commits] CVS: llvm/lib/Transforms/Scalar/GVNPRE.cpp Message-ID: <200706201830.l5KIUg7f017653@zion.cs.uiuc.edu> Changes in directory llvm/lib/Transforms/Scalar: GVNPRE.cpp updated: 1.44 -> 1.45 --- Log message: Make GVNPRE accurate report whether it modified the function or not. --- Diffs of the changes: (+12 -5) GVNPRE.cpp | 17 ++++++++++++----- 1 files changed, 12 insertions(+), 5 deletions(-) Index: llvm/lib/Transforms/Scalar/GVNPRE.cpp diff -u llvm/lib/Transforms/Scalar/GVNPRE.cpp:1.44 llvm/lib/Transforms/Scalar/GVNPRE.cpp:1.45 --- llvm/lib/Transforms/Scalar/GVNPRE.cpp:1.44 Tue Jun 19 19:43:33 2007 +++ llvm/lib/Transforms/Scalar/GVNPRE.cpp Wed Jun 20 13:30:20 2007 @@ -351,7 +351,7 @@ // For a given block, calculate the generated expressions, temporaries, // and the AVAIL_OUT set void cleanup(); - void elimination(); + void elimination(bool& changed_function); void val_insert(std::set& s, Value* v); void val_replace(std::set& s, Value* v); @@ -656,7 +656,7 @@ DOUT << "}\n\n"; } -void GVNPRE::elimination() { +void GVNPRE::elimination(bool& changed_function) { DOUT << "\n\nPhase 3: Elimination\n\n"; std::vector > replace; @@ -693,6 +693,7 @@ std::pair rep = replace.back(); replace.pop_back(); rep.first->replaceAllUsesWith(rep.second); + changed_function = true; } for (std::vector::iterator I = erase.begin(), E = erase.end(); @@ -716,6 +717,8 @@ availableOut.clear(); anticipatedIn.clear(); invokeDep.clear(); + + bool changed_function = false; std::map > generatedExpressions; std::map > generatedPhis; @@ -798,7 +801,7 @@ // If function has no exit blocks, only perform GVN PostDominatorTree &PDT = getAnalysis(); if (PDT[&F.getEntryBlock()] == 0) { - elimination(); + elimination(changed_function); cleanup(); return true; @@ -1049,6 +1052,8 @@ C->getName()+".gvnpre", (*PI)->getTerminator()); + changed_function = true; + VN.add(newVal, VN.lookup(U)); std::set& predAvail = availableOut[*PI]; @@ -1076,6 +1081,8 @@ p->addIncoming(avail[*PI], *PI); } + changed_function = true; + VN.add(p, VN.lookup(e)); DOUT << "Creating value: " << std::hex << p << std::dec << "\n"; @@ -1106,10 +1113,10 @@ } // Phase 3: Eliminate - elimination(); + elimination(changed_function); // Phase 4: Cleanup cleanup(); - return true; + return changed_function; } From tonic at nondot.org Wed Jun 20 13:33:33 2007 From: tonic at nondot.org (Tanya Lattner) Date: Wed, 20 Jun 2007 13:33:33 -0500 Subject: [llvm-commits] CVS: llvm/docs/ProgrammersManual.html Message-ID: <200706201833.l5KIXX9b017957@zion.cs.uiuc.edu> Changes in directory llvm/docs: ProgrammersManual.html updated: 1.131 -> 1.132 --- Log message: Add blurb on deleting global variables. --- Diffs of the changes: (+27 -1) ProgrammersManual.html | 28 +++++++++++++++++++++++++++- 1 files changed, 27 insertions(+), 1 deletion(-) Index: llvm/docs/ProgrammersManual.html diff -u llvm/docs/ProgrammersManual.html:1.131 llvm/docs/ProgrammersManual.html:1.132 --- llvm/docs/ProgrammersManual.html:1.131 Fri Mar 30 07:22:09 2007 +++ llvm/docs/ProgrammersManual.html Wed Jun 20 13:33:15 2007 @@ -106,6 +106,7 @@
  • Deleting Instructions
  • Replacing an Instruction with another Value
  • +
  • Deleting GlobalVariables
  • + + +
    + +

    Deleting a global variable from a module is similar to deleting an +instruction. First, you must have a pointer to the global variable that you wish + to delete. Second, you must have a pointer to the module the global variable + belongs to. You use the pointer to the module to get its list of global + variables and then use the erase function to remove your global variable. + For example:

    + +
    +
    +GlobalVariable *GV = .. ;
    +Module *M = GV->getParent();
    +
    +M->getGlobaleList().erase(GV);
    +
    +
    + +
    +
    Advanced Topics @@ -3060,7 +3086,7 @@ Dinakar Dhurjati and Chris Lattner
    The LLVM Compiler Infrastructure
    - Last modified: $Date: 2007/03/30 12:22:09 $ + Last modified: $Date: 2007/06/20 18:33:15 $ From dpatel at apple.com Wed Jun 20 13:51:37 2007 From: dpatel at apple.com (Devang Patel) Date: Wed, 20 Jun 2007 13:51:37 -0500 Subject: [llvm-commits] CVS: llvm/docs/WritingAnLLVMPass.html Message-ID: <200706201851.l5KIpbX7018932@zion.cs.uiuc.edu> Changes in directory llvm/docs: WritingAnLLVMPass.html updated: 1.62 -> 1.63 --- Log message: Update AnalysisGroup documentation to document restriction that allows only one ImmutablePass in a group. --- Diffs of the changes: (+4 -3) WritingAnLLVMPass.html | 7 ++++--- 1 files changed, 4 insertions(+), 3 deletions(-) Index: llvm/docs/WritingAnLLVMPass.html diff -u llvm/docs/WritingAnLLVMPass.html:1.62 llvm/docs/WritingAnLLVMPass.html:1.63 --- llvm/docs/WritingAnLLVMPass.html:1.62 Thu Jun 7 11:44:52 2007 +++ llvm/docs/WritingAnLLVMPass.html Wed Jun 20 13:51:14 2007 @@ -1308,8 +1308,9 @@

    Here we show how the default implementation is specified (using the extra argument to the RegisterAnalysisGroup template). There must be exactly one default implementation available at all times for an Analysis Group to be -used. Here we declare that the BasicAliasAnalysis +used. Only default implementation can derive from ImmutablePass. +Here we declare that the + BasicAliasAnalysis pass is the default implementation for the interface.

    @@ -1809,7 +1810,7 @@ Chris Lattner
    The LLVM Compiler Infrastructure
    - Last modified: $Date: 2007/06/07 16:44:52 $ + Last modified: $Date: 2007/06/20 18:51:14 $ From tonic at nondot.org Wed Jun 20 15:46:59 2007 From: tonic at nondot.org (Tanya Lattner) Date: Wed, 20 Jun 2007 15:46:59 -0500 Subject: [llvm-commits] CVS: llvm/docs/ProgrammersManual.html Message-ID: <200706202046.l5KKkx59021461@zion.cs.uiuc.edu> Changes in directory llvm/docs: ProgrammersManual.html updated: 1.132 -> 1.133 --- Log message: Modify deleting global variable with an even easier way. --- Diffs of the changes: (+5 -8) ProgrammersManual.html | 13 +++++-------- 1 files changed, 5 insertions(+), 8 deletions(-) Index: llvm/docs/ProgrammersManual.html diff -u llvm/docs/ProgrammersManual.html:1.132 llvm/docs/ProgrammersManual.html:1.133 --- llvm/docs/ProgrammersManual.html:1.132 Wed Jun 20 13:33:15 2007 +++ llvm/docs/ProgrammersManual.html Wed Jun 20 15:46:37 2007 @@ -1886,19 +1886,16 @@
    -

    Deleting a global variable from a module is similar to deleting an -instruction. First, you must have a pointer to the global variable that you wish - to delete. Second, you must have a pointer to the module the global variable - belongs to. You use the pointer to the module to get its list of global - variables and then use the erase function to remove your global variable. +

    Deleting a global variable from a module is just as easy as deleting an +Instruction. First, you must have a pointer to the global variable that you wish + to delete. You use this pointer to erase it from its parent, the module. For example:

     GlobalVariable *GV = .. ;
    -Module *M = GV->getParent();
     
    -M->getGlobaleList().erase(GV);
    +GV->eraseFromParent();
     
    @@ -3086,7 +3083,7 @@ Dinakar Dhurjati and Chris Lattner
    The LLVM Compiler Infrastructure
    - Last modified: $Date: 2007/06/20 18:33:15 $ + Last modified: $Date: 2007/06/20 20:46:37 $ From evan.cheng at apple.com Wed Jun 20 15:50:09 2007 From: evan.cheng at apple.com (Evan Cheng) Date: Wed, 20 Jun 2007 15:50:09 -0500 Subject: [llvm-commits] CVS: llvm-test/MultiSource/Applications/spiff/Makefile spiff.c Message-ID: <200706202050.l5KKo9ou021547@zion.cs.uiuc.edu> Changes in directory llvm-test/MultiSource/Applications/spiff: Makefile updated: 1.3 -> 1.4 spiff.c updated: 1.2 -> 1.3 --- Log message: Increase test size to something reasonable. --- Diffs of the changes: (+18 -8) Makefile | 3 ++- spiff.c | 23 ++++++++++++++++------- 2 files changed, 18 insertions(+), 8 deletions(-) Index: llvm-test/MultiSource/Applications/spiff/Makefile diff -u llvm-test/MultiSource/Applications/spiff/Makefile:1.3 llvm-test/MultiSource/Applications/spiff/Makefile:1.4 --- llvm-test/MultiSource/Applications/spiff/Makefile:1.3 Fri Jul 29 13:08:48 2005 +++ llvm-test/MultiSource/Applications/spiff/Makefile Wed Jun 20 15:49:47 2007 @@ -4,7 +4,8 @@ CFLAGS = LDFLAGS = -RUN_OPTIONS = $(PROJ_SRC_DIR)/Sample.1 $(PROJ_SRC_DIR)/Sample.2 +#RUN_OPTIONS = $(PROJ_SRC_DIR)/Sample.1 $(PROJ_SRC_DIR)/Sample.2 +RUN_OPTIONS = $(PROJ_SRC_DIR)/Sample.3 $(PROJ_SRC_DIR)/Sample.4 #RUN_OPTIONS = ./Sample.3 ./Sample.4 #RUN_OPTIONS = ./one ./two # you get the idea... Index: llvm-test/MultiSource/Applications/spiff/spiff.c diff -u llvm-test/MultiSource/Applications/spiff/spiff.c:1.2 llvm-test/MultiSource/Applications/spiff/spiff.c:1.3 --- llvm-test/MultiSource/Applications/spiff/spiff.c:1.2 Wed Sep 1 09:33:21 2004 +++ llvm-test/MultiSource/Applications/spiff/spiff.c Wed Jun 20 15:49:47 2007 @@ -8,7 +8,7 @@ #ifndef lint -static char rcsid[]= "$Header: /var/cvs/llvm/llvm-test/MultiSource/Applications/spiff/spiff.c,v 1.2 2004/09/01 14:33:21 criswell Exp $"; +static char rcsid[]= "$Header: /var/cvs/llvm/llvm-test/MultiSource/Applications/spiff/spiff.c,v 1.3 2007/06/20 20:49:47 evancheng Exp $"; #endif @@ -38,6 +38,12 @@ */ static int _Y_flags; +#ifdef SMALL_PROBLEM_SIZE +#define SPIFF_ITER 1000 +#else +#define SPIFF_ITER 30000 +#endif + int main(argc,argv) int argc; @@ -107,16 +113,19 @@ if (-1 == max_d) max_d = K_gettmax(0) + K_gettmax(1); - if (_Y_eflag) - { + for(i=0;i<=SPIFF_ITER;i++) + { + if (_Y_eflag) + { edit_end = Q_do_exact(K_gettmax(0),K_gettmax(1), max_d,_Y_flags); - } - else - { + } + else + { edit_end = G_do_miller(K_gettmax(0), K_gettmax(1), max_d,_Y_flags); - } + } + } if (E_NULL != edit_end) { From resistor at mac.com Wed Jun 20 17:10:25 2007 From: resistor at mac.com (Owen Anderson) Date: Wed, 20 Jun 2007 17:10:25 -0500 Subject: [llvm-commits] CVS: llvm/lib/Transforms/Scalar/GVNPRE.cpp Message-ID: <200706202210.l5KMAPMv023106@zion.cs.uiuc.edu> Changes in directory llvm/lib/Transforms/Scalar: GVNPRE.cpp updated: 1.45 -> 1.46 --- Log message: Split runOnFunction into many smaller functions. This make it easier to get accurate performance analysis of GVNPRE. --- Diffs of the changes: (+329 -279) GVNPRE.cpp | 608 +++++++++++++++++++++++++++++++++---------------------------- 1 files changed, 329 insertions(+), 279 deletions(-) Index: llvm/lib/Transforms/Scalar/GVNPRE.cpp diff -u llvm/lib/Transforms/Scalar/GVNPRE.cpp:1.45 llvm/lib/Transforms/Scalar/GVNPRE.cpp:1.46 --- llvm/lib/Transforms/Scalar/GVNPRE.cpp:1.45 Wed Jun 20 13:30:20 2007 +++ llvm/lib/Transforms/Scalar/GVNPRE.cpp Wed Jun 20 17:10:02 2007 @@ -327,7 +327,6 @@ std::map > availableOut; std::map > anticipatedIn; - std::map invokeDep; virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); @@ -348,14 +347,34 @@ void topo_sort(std::set& set, std::vector& vec); - // For a given block, calculate the generated expressions, temporaries, - // and the AVAIL_OUT set void cleanup(); - void elimination(bool& changed_function); + bool elimination(); void val_insert(std::set& s, Value* v); void val_replace(std::set& s, Value* v); bool dependsOnInvoke(Value* V); + void buildsets_availout(BasicBlock::iterator I, + std::set& currAvail, + std::set& currPhis, + std::set& currExps, + std::set& currTemps); + void buildsets_anticout(BasicBlock* BB, + std::set& anticOut, + std::set& visited); + bool buildsets_anticin(BasicBlock* BB, + std::set& anticOut, + std::set& currExps, + std::set& currTemps, + std::set& visited); + unsigned buildsets(Function& F); + + void insertion_pre(Value* e, BasicBlock* BB, + std::map& avail, + std::set& new_set); + unsigned insertion_mergepoint(std::vector& workList, + df_iterator D, + std::set& new_set); + bool insertion(Function& F); }; @@ -656,9 +675,11 @@ DOUT << "}\n\n"; } -void GVNPRE::elimination(bool& changed_function) { +bool GVNPRE::elimination() { DOUT << "\n\nPhase 3: Elimination\n\n"; + bool changed_function = false; + std::vector > replace; std::vector erase; @@ -699,6 +720,8 @@ for (std::vector::iterator I = erase.begin(), E = erase.end(); I != E; ++I) (*I)->eraseFromParent(); + + return changed_function; } @@ -711,24 +734,132 @@ } } -bool GVNPRE::runOnFunction(Function &F) { - VN.clear(); - createdExpressions.clear(); - availableOut.clear(); - anticipatedIn.clear(); - invokeDep.clear(); +void GVNPRE::buildsets_availout(BasicBlock::iterator I, + std::set& currAvail, + std::set& currPhis, + std::set& currExps, + std::set& currTemps) { + // Handle PHI nodes... + if (PHINode* p = dyn_cast(I)) { + VN.lookup_or_add(p); + currPhis.insert(p); + + // Handle binary ops... + } else if (BinaryOperator* BO = dyn_cast(I)) { + Value* leftValue = BO->getOperand(0); + Value* rightValue = BO->getOperand(1); + + VN.lookup_or_add(BO); + + if (isa(leftValue)) + val_insert(currExps, leftValue); + if (isa(rightValue)) + val_insert(currExps, rightValue); + val_insert(currExps, BO); + + // Handle cmp ops... + } else if (CmpInst* C = dyn_cast(I)) { + Value* leftValue = C->getOperand(0); + Value* rightValue = C->getOperand(1); + + VN.lookup_or_add(C); + + if (isa(leftValue)) + val_insert(currExps, leftValue); + if (isa(rightValue)) + val_insert(currExps, rightValue); + val_insert(currExps, C); + + // Handle unsupported ops + } else if (!I->isTerminator()){ + VN.lookup_or_add(I); + currTemps.insert(I); + } + + if (!I->isTerminator()) + val_insert(currAvail, I); +} + +void GVNPRE::buildsets_anticout(BasicBlock* BB, + std::set& anticOut, + std::set& visited) { + if (BB->getTerminator()->getNumSuccessors() == 1) { + if (visited.find(BB->getTerminator()->getSuccessor(0)) == visited.end()) + phi_translate_set(VN.getMaximalValues(), BB, + BB->getTerminator()->getSuccessor(0), anticOut); + else + phi_translate_set(anticipatedIn[BB->getTerminator()->getSuccessor(0)], + BB, BB->getTerminator()->getSuccessor(0), anticOut); + } else if (BB->getTerminator()->getNumSuccessors() > 1) { + BasicBlock* first = BB->getTerminator()->getSuccessor(0); + anticOut.insert(anticipatedIn[first].begin(), anticipatedIn[first].end()); + + for (unsigned i = 1; i < BB->getTerminator()->getNumSuccessors(); ++i) { + BasicBlock* currSucc = BB->getTerminator()->getSuccessor(i); + std::set& succAnticIn = anticipatedIn[currSucc]; + + std::set temp; + std::insert_iterator > temp_ins(temp, temp.begin()); + std::set_intersection(anticOut.begin(), anticOut.end(), + succAnticIn.begin(), succAnticIn.end(), temp_ins); + + anticOut.clear(); + anticOut.insert(temp.begin(), temp.end()); + } + } +} + +bool GVNPRE::buildsets_anticin(BasicBlock* BB, + std::set& anticOut, + std::set& currExps, + std::set& currTemps, + std::set& visited) { + std::set& anticIn = anticipatedIn[BB]; + std::set old (anticIn.begin(), anticIn.end()); + + buildsets_anticout(BB, anticOut, visited); + + std::set S; + std::insert_iterator > s_ins(S, S.begin()); + std::set_difference(anticOut.begin(), anticOut.end(), + currTemps.begin(), currTemps.end(), s_ins); + + anticIn.clear(); + std::insert_iterator > ai_ins(anticIn, anticIn.begin()); + std::set_difference(currExps.begin(), currExps.end(), + currTemps.begin(), currTemps.end(), ai_ins); + + for (std::set::iterator I = S.begin(), E = S.end(); + I != E; ++I) { + // For non-opaque values, we should already have a value numbering. + // However, for opaques, such as constants within PHI nodes, it is + // possible that they have not yet received a number. Make sure they do + // so now. + uint32_t valNum = 0; + if (isa(*I) || isa(*I)) + valNum = VN.lookup(*I); + else + valNum = VN.lookup_or_add(*I); + if (find_leader(anticIn, valNum) == 0) + val_insert(anticIn, *I); + } + + clean(anticIn); + anticOut.clear(); - bool changed_function = false; + if (old.size() != anticIn.size()) + return true; + else + return false; +} +unsigned GVNPRE::buildsets(Function& F) { std::map > generatedExpressions; std::map > generatedPhis; std::map > generatedTemporaries; - - + DominatorTree &DT = getAnalysis(); - // Phase 1: BuildSets - // Phase 1, Part 1: calculate AVAIL_OUT // Top-down walk of the dominator tree @@ -750,61 +881,21 @@ for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); - BI != BE; ++BI) { - - // Handle PHI nodes... - if (PHINode* p = dyn_cast(BI)) { - VN.lookup_or_add(p); - currPhis.insert(p); - - // Handle binary ops... - } else if (BinaryOperator* BO = dyn_cast(BI)) { - Value* leftValue = BO->getOperand(0); - Value* rightValue = BO->getOperand(1); - - VN.lookup_or_add(BO); - - if (isa(leftValue)) - val_insert(currExps, leftValue); - if (isa(rightValue)) - val_insert(currExps, rightValue); - val_insert(currExps, BO); - - // Handle cmp ops... - } else if (CmpInst* C = dyn_cast(BI)) { - Value* leftValue = C->getOperand(0); - Value* rightValue = C->getOperand(1); - - VN.lookup_or_add(C); - - if (isa(leftValue)) - val_insert(currExps, leftValue); - if (isa(rightValue)) - val_insert(currExps, rightValue); - val_insert(currExps, C); + BI != BE; ++BI) + buildsets_availout(BI, currAvail, currPhis, currExps, currTemps); - // Handle unsupported ops - } else if (!BI->isTerminator()){ - VN.lookup_or_add(BI); - currTemps.insert(BI); - } - - if (!BI->isTerminator()) - val_insert(currAvail, BI); - } } - DOUT << "Maximal Set: "; - dump(VN.getMaximalValues()); - DOUT << "\n"; - // If function has no exit blocks, only perform GVN PostDominatorTree &PDT = getAnalysis(); if (PDT[&F.getEntryBlock()] == 0) { - elimination(changed_function); + bool changed_function = elimination(); cleanup(); - return true; + if (changed_function) + return 2; // Bailed early, made changes + else + return 1; // Bailed early, no changes } @@ -826,92 +917,10 @@ if (BB == 0) continue; - DOUT << "Block: " << BB->getName() << "\n"; - DOUT << "TMP_GEN: "; - dump(generatedTemporaries[BB]); - DOUT << "\n"; - - DOUT << "EXP_GEN: "; - dump(generatedExpressions[BB]); visited.insert(BB); - std::set& anticIn = anticipatedIn[BB]; - std::set old (anticIn.begin(), anticIn.end()); - - if (BB->getTerminator()->getNumSuccessors() == 1) { - if (visited.find(BB->getTerminator()->getSuccessor(0)) == - visited.end()) - phi_translate_set(VN.getMaximalValues(), BB, - BB->getTerminator()->getSuccessor(0), - anticOut); - else - phi_translate_set(anticipatedIn[BB->getTerminator()->getSuccessor(0)], - BB, BB->getTerminator()->getSuccessor(0), - anticOut); - } else if (BB->getTerminator()->getNumSuccessors() > 1) { - BasicBlock* first = BB->getTerminator()->getSuccessor(0); - anticOut.insert(anticipatedIn[first].begin(), - anticipatedIn[first].end()); - for (unsigned i = 1; i < BB->getTerminator()->getNumSuccessors(); ++i) { - BasicBlock* currSucc = BB->getTerminator()->getSuccessor(i); - std::set& succAnticIn = anticipatedIn[currSucc]; - - std::set temp; - std::insert_iterator > temp_ins(temp, - temp.begin()); - std::set_intersection(anticOut.begin(), anticOut.end(), - succAnticIn.begin(), succAnticIn.end(), - temp_ins); - - anticOut.clear(); - anticOut.insert(temp.begin(), temp.end()); - } - } - - DOUT << "ANTIC_OUT: "; - dump(anticOut); - DOUT << "\n"; - - std::set S; - std::insert_iterator > s_ins(S, S.begin()); - std::set_difference(anticOut.begin(), anticOut.end(), - generatedTemporaries[BB].begin(), - generatedTemporaries[BB].end(), - s_ins); - - anticIn.clear(); - std::insert_iterator > ai_ins(anticIn, anticIn.begin()); - std::set_difference(generatedExpressions[BB].begin(), - generatedExpressions[BB].end(), - generatedTemporaries[BB].begin(), - generatedTemporaries[BB].end(), - ai_ins); - - for (std::set::iterator I = S.begin(), E = S.end(); - I != E; ++I) { - // For non-opaque values, we should already have a value numbering. - // However, for opaques, such as constants within PHI nodes, it is - // possible that they have not yet received a number. Make sure they do - // so now. - uint32_t valNum = 0; - if (isa(*I) || isa(*I)) - valNum = VN.lookup(*I); - else - valNum = VN.lookup_or_add(*I); - if (find_leader(anticIn, valNum) == 0) - val_insert(anticIn, *I); - } - - clean(anticIn); - - DOUT << "ANTIC_IN: "; - dump(anticIn); - DOUT << "\n"; - - if (old.size() != anticIn.size()) - changed = true; - - anticOut.clear(); + changed |= buildsets_anticin(BB, anticOut, generatedTemporaries[BB], + generatedExpressions[BB], visited); } iterations++; @@ -939,15 +948,156 @@ DOUT << "\n"; } - // Phase 2: Insert - DOUT<< "\nPhase 2: Insertion\n"; + return 0; // No bail, no changes +} + +void GVNPRE::insertion_pre(Value* e, BasicBlock* BB, + std::map& avail, + std::set& new_set) { + DOUT << "Processing Value: "; + DEBUG(e->dump()); + DOUT << "\n\n"; + + for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE; ++PI) { + Value* e2 = avail[*PI]; + if (!find_leader(availableOut[*PI], VN.lookup(e2))) { + User* U = cast(e2); + + Value* s1 = 0; + if (isa(U->getOperand(0)) || + isa(U->getOperand(0))) + s1 = find_leader(availableOut[*PI], VN.lookup(U->getOperand(0))); + else + s1 = U->getOperand(0); + + Value* s2 = 0; + if (isa(U->getOperand(1)) || + isa(U->getOperand(1))) + s2 = find_leader(availableOut[*PI], VN.lookup(U->getOperand(1))); + else + s2 = U->getOperand(1); + + Value* newVal = 0; + if (BinaryOperator* BO = dyn_cast(U)) + newVal = BinaryOperator::create(BO->getOpcode(), s1, s2, + BO->getName()+".gvnpre", + (*PI)->getTerminator()); + else if (CmpInst* C = dyn_cast(U)) + newVal = CmpInst::create(C->getOpcode(), C->getPredicate(), s1, s2, + C->getName()+".gvnpre", + (*PI)->getTerminator()); + + VN.add(newVal, VN.lookup(U)); + + std::set& predAvail = availableOut[*PI]; + val_replace(predAvail, newVal); + + DOUT << "Creating value: " << std::hex << newVal << std::dec << "\n"; + + std::map::iterator av = avail.find(*PI); + if (av != avail.end()) + avail.erase(av); + avail.insert(std::make_pair(*PI, newVal)); + + ++NumInsertedVals; + } + } + + PHINode* p = 0; + + for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE; ++PI) { + if (p == 0) + p = new PHINode(avail[*PI]->getType(), "gvnpre-join", BB->begin()); + + p->addIncoming(avail[*PI], *PI); + } + + VN.add(p, VN.lookup(e)); + DOUT << "Creating value: " << std::hex << p << std::dec << "\n"; + + val_replace(availableOut[BB], p); + + DOUT << "Preds After Processing: "; + for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE; ++PI) + DEBUG((*PI)->dump()); + DOUT << "\n\n"; + + DOUT << "Merge Block After Processing: "; + DEBUG(BB->dump()); + DOUT << "\n\n"; + + new_set.insert(p); + + ++NumInsertedPhis; +} + +unsigned GVNPRE::insertion_mergepoint(std::vector& workList, + df_iterator D, + std::set& new_set) { + bool changed_function = false; + bool new_stuff = false; + + BasicBlock* BB = D->getBlock(); + for (unsigned i = 0; i < workList.size(); ++i) { + Value* e = workList[i]; + + if (isa(e) || isa(e)) { + if (find_leader(availableOut[D->getIDom()->getBlock()], + VN.lookup(e)) != 0) + continue; + + std::map avail; + bool by_some = false; + int num_avail = 0; + + for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE; + ++PI) { + Value *e2 = phi_translate(e, *PI, BB); + Value *e3 = find_leader(availableOut[*PI], VN.lookup(e2)); + + if (e3 == 0) { + std::map::iterator av = avail.find(*PI); + if (av != avail.end()) + avail.erase(av); + avail.insert(std::make_pair(*PI, e2)); + } else { + std::map::iterator av = avail.find(*PI); + if (av != avail.end()) + avail.erase(av); + avail.insert(std::make_pair(*PI, e3)); + + by_some = true; + num_avail++; + } + } + + if (by_some && num_avail < std::distance(pred_begin(BB), pred_end(BB))) { + insertion_pre(e, BB, avail, new_set); + + changed_function = true; + new_stuff = true; + } + } + } + + unsigned retval = 0; + if (changed_function) + retval += 1; + if (new_stuff) + retval += 2; + + return retval; +} + +bool GVNPRE::insertion(Function& F) { + bool changed_function = false; + + DominatorTree &DT = getAnalysis(); std::map > new_sets; - unsigned i_iterations = 0; bool new_stuff = true; while (new_stuff) { new_stuff = false; - DOUT << "Iteration: " << i_iterations << "\n\n"; for (df_iterator DI = df_begin(DT.getRootNode()), E = df_end(DT.getRootNode()); DI != E; ++DI) { BasicBlock* BB = DI->getBlock(); @@ -981,139 +1131,39 @@ dump(anticIn); DOUT << "\n"; - for (unsigned i = 0; i < workList.size(); ++i) { - Value* e = workList[i]; - - if (isa(e) || isa(e)) { - if (find_leader(availableOut[DI->getIDom()->getBlock()], VN.lookup(e)) != 0) - continue; - - std::map avail; - bool by_some = false; - int num_avail = 0; - - for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE; - ++PI) { - Value *e2 = phi_translate(e, *PI, BB); - Value *e3 = find_leader(availableOut[*PI], VN.lookup(e2)); - - if (e3 == 0) { - std::map::iterator av = avail.find(*PI); - if (av != avail.end()) - avail.erase(av); - avail.insert(std::make_pair(*PI, e2)); - } else { - std::map::iterator av = avail.find(*PI); - if (av != avail.end()) - avail.erase(av); - avail.insert(std::make_pair(*PI, e3)); - - by_some = true; - num_avail++; - } - } - - if (by_some && - num_avail < std::distance(pred_begin(BB), pred_end(BB))) { - DOUT << "Processing Value: "; - DEBUG(e->dump()); - DOUT << "\n\n"; - - for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); - PI != PE; ++PI) { - Value* e2 = avail[*PI]; - if (!find_leader(availableOut[*PI], VN.lookup(e2))) { - User* U = cast(e2); - - Value* s1 = 0; - if (isa(U->getOperand(0)) || - isa(U->getOperand(0))) - s1 = find_leader(availableOut[*PI], VN.lookup(U->getOperand(0))); - else - s1 = U->getOperand(0); - - Value* s2 = 0; - if (isa(U->getOperand(1)) || - isa(U->getOperand(1))) - s2 = find_leader(availableOut[*PI], VN.lookup(U->getOperand(1))); - else - s2 = U->getOperand(1); - - Value* newVal = 0; - if (BinaryOperator* BO = dyn_cast(U)) - newVal = BinaryOperator::create(BO->getOpcode(), - s1, s2, - BO->getName()+".gvnpre", - (*PI)->getTerminator()); - else if (CmpInst* C = dyn_cast(U)) - newVal = CmpInst::create(C->getOpcode(), - C->getPredicate(), - s1, s2, - C->getName()+".gvnpre", - (*PI)->getTerminator()); - - changed_function = true; - - VN.add(newVal, VN.lookup(U)); - - std::set& predAvail = availableOut[*PI]; - val_replace(predAvail, newVal); - - DOUT << "Creating value: " << std::hex << newVal << std::dec << "\n"; - - std::map::iterator av = avail.find(*PI); - if (av != avail.end()) - avail.erase(av); - avail.insert(std::make_pair(*PI, newVal)); - - ++NumInsertedVals; - } - } - - PHINode* p = 0; - - for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); - PI != PE; ++PI) { - if (p == 0) - p = new PHINode(avail[*PI]->getType(), "gvnpre-join", - BB->begin()); - - p->addIncoming(avail[*PI], *PI); - } - - changed_function = true; - - VN.add(p, VN.lookup(e)); - DOUT << "Creating value: " << std::hex << p << std::dec << "\n"; - - val_replace(availOut, p); - availOut.insert(p); - - new_stuff = true; - - DOUT << "Preds After Processing: "; - for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); - PI != PE; ++PI) - DEBUG((*PI)->dump()); - DOUT << "\n\n"; - - DOUT << "Merge Block After Processing: "; - DEBUG(BB->dump()); - DOUT << "\n\n"; - - new_set.insert(p); - - ++NumInsertedPhis; - } - } - } + unsigned result = insertion_mergepoint(workList, DI, new_set); + if (result & 1) + changed_function = true; + if (result & 2) + new_stuff = true; } } - i_iterations++; } + return changed_function; +} + +bool GVNPRE::runOnFunction(Function &F) { + VN.clear(); + createdExpressions.clear(); + availableOut.clear(); + anticipatedIn.clear(); + + bool changed_function = false; + + // Phase 1: BuildSets + unsigned bail = buildsets(F); + //If a bail occurred, terminate early + if (bail != 0) + return (bail == 2); + + // Phase 2: Insert + DOUT<< "\nPhase 2: Insertion\n"; + + changed_function |= insertion(F); + // Phase 3: Eliminate - elimination(changed_function); + changed_function |= elimination(); // Phase 4: Cleanup cleanup(); From sabre at nondot.org Wed Jun 20 18:46:48 2007 From: sabre at nondot.org (Chris Lattner) Date: Wed, 20 Jun 2007 18:46:48 -0500 Subject: [llvm-commits] CVS: llvm/lib/Transforms/Scalar/InstructionCombining.cpp Message-ID: <200706202346.l5KNkmtj025054@zion.cs.uiuc.edu> Changes in directory llvm/lib/Transforms/Scalar: InstructionCombining.cpp updated: 1.773 -> 1.774 --- Log message: refactor a bunch of code out of visitICmpInstWithInstAndIntCst into its own routine. --- Diffs of the changes: (+134 -123) InstructionCombining.cpp | 257 ++++++++++++++++++++++++----------------------- 1 files changed, 134 insertions(+), 123 deletions(-) Index: llvm/lib/Transforms/Scalar/InstructionCombining.cpp diff -u llvm/lib/Transforms/Scalar/InstructionCombining.cpp:1.773 llvm/lib/Transforms/Scalar/InstructionCombining.cpp:1.774 --- llvm/lib/Transforms/Scalar/InstructionCombining.cpp:1.773 Tue Jun 19 00:43:49 2007 +++ llvm/lib/Transforms/Scalar/InstructionCombining.cpp Wed Jun 20 18:46:26 2007 @@ -189,6 +189,8 @@ Instruction *visitICmpInstWithInstAndIntCst(ICmpInst &ICI, Instruction *LHS, ConstantInt *RHS); + Instruction *FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI, + ConstantInt *DivRHS); Instruction *FoldGEPICmp(User *GEPLHS, Value *RHS, ICmpInst::Predicate Cond, Instruction &I); @@ -5109,6 +5111,134 @@ return Changed ? &I : 0; } + +/// FoldICmpDivCst - Fold "icmp pred, ([su]div X, DivRHS), CmpRHS" where DivRHS +/// and CmpRHS are both known to be integer constants. +Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI, + ConstantInt *DivRHS) { + ConstantInt *CmpRHS = cast(ICI.getOperand(1)); + const APInt &CmpRHSV = CmpRHS->getValue(); + + // FIXME: If the operand types don't match the type of the divide + // then don't attempt this transform. The code below doesn't have the + // logic to deal with a signed divide and an unsigned compare (and + // vice versa). This is because (x /s C1) getOpcode() == Instruction::SDiv; + if (!ICI.isEquality() && DivIsSigned != ICI.isSignedPredicate()) + return 0; + if (DivRHS->isZero()) + return 0; // Don't hack on div by zero + + // Initialize the variables that will indicate the nature of the + // range check. + bool LoOverflow = false, HiOverflow = false; + ConstantInt *LoBound = 0, *HiBound = 0; + + // Compute Prod = CI * DivRHS. We are essentially solving an equation + // of form X/C1=C2. We solve for X by multiplying C1 (DivRHS) and + // C2 (CI). By solving for X we can turn this into a range check + // instead of computing a divide. + ConstantInt *Prod = Multiply(CmpRHS, DivRHS); + + // Determine if the product overflows by seeing if the product is + // not equal to the divide. Make sure we do the same kind of divide + // as in the LHS instruction that we're folding. + bool ProdOV = (DivIsSigned ? ConstantExpr::getSDiv(Prod, DivRHS) : + ConstantExpr::getUDiv(Prod, DivRHS)) != CmpRHS; + + // Get the ICmp opcode + ICmpInst::Predicate predicate = ICI.getPredicate(); + + if (!DivIsSigned) { // udiv + LoBound = Prod; + LoOverflow = ProdOV; + HiOverflow = ProdOV || AddWithOverflow(HiBound, LoBound, DivRHS, false); + } else if (DivRHS->getValue().isPositive()) { // Divisor is > 0. + if (CmpRHSV == 0) { // (X / pos) op 0 + // Can't overflow. + LoBound = cast(ConstantExpr::getNeg(SubOne(DivRHS))); + HiBound = DivRHS; + } else if (CmpRHSV.isPositive()) { // (X / pos) op pos + LoBound = Prod; + LoOverflow = ProdOV; + HiOverflow = ProdOV || AddWithOverflow(HiBound, Prod, DivRHS, true); + } else { // (X / pos) op neg + Constant *DivRHSH = ConstantExpr::getNeg(SubOne(DivRHS)); + LoOverflow = AddWithOverflow(LoBound, Prod, + cast(DivRHSH), true); + HiBound = AddOne(Prod); + HiOverflow = ProdOV; + } + } else { // Divisor is < 0. + if (CmpRHSV == 0) { // (X / neg) op 0 + LoBound = AddOne(DivRHS); + HiBound = cast(ConstantExpr::getNeg(DivRHS)); + if (HiBound == DivRHS) + return 0; // - INTMIN = INTMIN + } else if (CmpRHSV.isPositive()) { // (X / neg) op pos + HiOverflow = LoOverflow = ProdOV; + if (!LoOverflow) + LoOverflow = AddWithOverflow(LoBound, Prod, AddOne(DivRHS), + true); + HiBound = AddOne(Prod); + } else { // (X / neg) op neg + LoBound = Prod; + LoOverflow = HiOverflow = ProdOV; + HiBound = Subtract(Prod, DivRHS); + } + + // Dividing by a negate swaps the condition. + predicate = ICmpInst::getSwappedPredicate(predicate); + } + + Value *X = DivI->getOperand(0); + switch (predicate) { + default: assert(0 && "Unhandled icmp opcode!"); + case ICmpInst::ICMP_EQ: + if (LoOverflow && HiOverflow) + return ReplaceInstUsesWith(ICI, ConstantInt::getFalse()); + else if (HiOverflow) + return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SGE : + ICmpInst::ICMP_UGE, X, LoBound); + else if (LoOverflow) + return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT : + ICmpInst::ICMP_ULT, X, HiBound); + else + return InsertRangeTest(X, LoBound, HiBound, DivIsSigned, + true, ICI); + case ICmpInst::ICMP_NE: + if (LoOverflow && HiOverflow) + return ReplaceInstUsesWith(ICI, ConstantInt::getTrue()); + else if (HiOverflow) + return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT : + ICmpInst::ICMP_ULT, X, LoBound); + else if (LoOverflow) + return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SGE : + ICmpInst::ICMP_UGE, X, HiBound); + else + return InsertRangeTest(X, LoBound, HiBound, DivIsSigned, + false, ICI); + case ICmpInst::ICMP_ULT: + case ICmpInst::ICMP_SLT: + if (LoOverflow) + return ReplaceInstUsesWith(ICI, ConstantInt::getFalse()); + return new ICmpInst(predicate, X, LoBound); + case ICmpInst::ICMP_UGT: + case ICmpInst::ICMP_SGT: + if (HiOverflow) + return ReplaceInstUsesWith(ICI, ConstantInt::getFalse()); + if (predicate == ICmpInst::ICMP_UGT) + return new ICmpInst(ICmpInst::ICMP_UGE, X, HiBound); + else + return new ICmpInst(ICmpInst::ICMP_SGE, X, HiBound); + } +} + + /// visitICmpInstWithInstAndIntCst - Handle "icmp (instr, intcst)". /// Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, @@ -5357,129 +5487,10 @@ // checked. If there is an overflow on the low or high side, remember // it, otherwise compute the range [low, hi) bounding the new value. // See: InsertRangeTest above for the kinds of replacements possible. - if (ConstantInt *DivRHS = dyn_cast(LHSI->getOperand(1))) { - // FIXME: If the operand types don't match the type of the divide - // then don't attempt this transform. The code below doesn't have the - // logic to deal with a signed divide and an unsigned compare (and - // vice versa). This is because (x /s C1) getOpcode() == Instruction::SDiv; - if (!ICI.isEquality() && DivIsSigned != ICI.isSignedPredicate()) - break; - if (DivRHS->isZero()) - break; // Don't hack on div by zero - - // Initialize the variables that will indicate the nature of the - // range check. - bool LoOverflow = false, HiOverflow = false; - ConstantInt *LoBound = 0, *HiBound = 0; - - // Compute Prod = CI * DivRHS. We are essentially solving an equation - // of form X/C1=C2. We solve for X by multiplying C1 (DivRHS) and - // C2 (CI). By solving for X we can turn this into a range check - // instead of computing a divide. - ConstantInt *Prod = Multiply(RHS, DivRHS); - - // Determine if the product overflows by seeing if the product is - // not equal to the divide. Make sure we do the same kind of divide - // as in the LHS instruction that we're folding. - bool ProdOV = (DivIsSigned ? ConstantExpr::getSDiv(Prod, DivRHS) : - ConstantExpr::getUDiv(Prod, DivRHS)) != RHS; - - // Get the ICmp opcode - ICmpInst::Predicate predicate = ICI.getPredicate(); - - if (!DivIsSigned) { // udiv - LoBound = Prod; - LoOverflow = ProdOV; - HiOverflow = ProdOV || - AddWithOverflow(HiBound, LoBound, DivRHS, false); - } else if (DivRHS->getValue().isPositive()) { // Divisor is > 0. - if (RHSV == 0) { // (X / pos) op 0 - // Can't overflow. - LoBound = cast(ConstantExpr::getNeg(SubOne(DivRHS))); - HiBound = DivRHS; - } else if (RHSV.isPositive()) { // (X / pos) op pos - LoBound = Prod; - LoOverflow = ProdOV; - HiOverflow = ProdOV || - AddWithOverflow(HiBound, Prod, DivRHS, true); - } else { // (X / pos) op neg - Constant *DivRHSH = ConstantExpr::getNeg(SubOne(DivRHS)); - LoOverflow = AddWithOverflow(LoBound, Prod, - cast(DivRHSH), true); - HiBound = AddOne(Prod); - HiOverflow = ProdOV; - } - } else { // Divisor is < 0. - if (RHSV == 0) { // (X / neg) op 0 - LoBound = AddOne(DivRHS); - HiBound = cast(ConstantExpr::getNeg(DivRHS)); - if (HiBound == DivRHS) - LoBound = 0; // - INTMIN = INTMIN - } else if (RHSV.isPositive()) { // (X / neg) op pos - HiOverflow = LoOverflow = ProdOV; - if (!LoOverflow) - LoOverflow = AddWithOverflow(LoBound, Prod, AddOne(DivRHS), - true); - HiBound = AddOne(Prod); - } else { // (X / neg) op neg - LoBound = Prod; - LoOverflow = HiOverflow = ProdOV; - HiBound = Subtract(Prod, DivRHS); - } - - // Dividing by a negate swaps the condition. - predicate = ICmpInst::getSwappedPredicate(predicate); - } - - if (LoBound) { - Value *X = LHSI->getOperand(0); - switch (predicate) { - default: assert(0 && "Unhandled icmp opcode!"); - case ICmpInst::ICMP_EQ: - if (LoOverflow && HiOverflow) - return ReplaceInstUsesWith(ICI, ConstantInt::getFalse()); - else if (HiOverflow) - return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SGE : - ICmpInst::ICMP_UGE, X, LoBound); - else if (LoOverflow) - return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT : - ICmpInst::ICMP_ULT, X, HiBound); - else - return InsertRangeTest(X, LoBound, HiBound, DivIsSigned, - true, ICI); - case ICmpInst::ICMP_NE: - if (LoOverflow && HiOverflow) - return ReplaceInstUsesWith(ICI, ConstantInt::getTrue()); - else if (HiOverflow) - return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT : - ICmpInst::ICMP_ULT, X, LoBound); - else if (LoOverflow) - return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SGE : - ICmpInst::ICMP_UGE, X, HiBound); - else - return InsertRangeTest(X, LoBound, HiBound, DivIsSigned, - false, ICI); - case ICmpInst::ICMP_ULT: - case ICmpInst::ICMP_SLT: - if (LoOverflow) - return ReplaceInstUsesWith(ICI, ConstantInt::getFalse()); - return new ICmpInst(predicate, X, LoBound); - case ICmpInst::ICMP_UGT: - case ICmpInst::ICMP_SGT: - if (HiOverflow) - return ReplaceInstUsesWith(ICI, ConstantInt::getFalse()); - if (predicate == ICmpInst::ICMP_UGT) - return new ICmpInst(ICmpInst::ICMP_UGE, X, HiBound); - else - return new ICmpInst(ICmpInst::ICMP_SGE, X, HiBound); - } - } - } + if (ConstantInt *DivRHS = dyn_cast(LHSI->getOperand(1))) + if (Instruction *R = FoldICmpDivCst(ICI, cast(LHSI), + DivRHS)) + return R; break; } From resistor at mac.com Wed Jun 20 18:48:24 2007 From: resistor at mac.com (Owen Anderson) Date: Wed, 20 Jun 2007 18:48:24 -0500 Subject: [llvm-commits] CVS: llvm-test/Makefile.programs TEST.nightly.Makefile Message-ID: <200706202348.l5KNmOMC025115@zion.cs.uiuc.edu> Changes in directory llvm-test: Makefile.programs updated: 1.268 -> 1.269 TEST.nightly.Makefile updated: 1.47 -> 1.48 --- Log message: Don't run the optimizations twice when performing a nightly build. Just record the results the first time. --- Diffs of the changes: (+2 -4) Makefile.programs | 2 +- TEST.nightly.Makefile | 4 +--- 2 files changed, 2 insertions(+), 4 deletions(-) Index: llvm-test/Makefile.programs diff -u llvm-test/Makefile.programs:1.268 llvm-test/Makefile.programs:1.269 --- llvm-test/Makefile.programs:1.268 Wed May 30 12:49:53 2007 +++ llvm-test/Makefile.programs Wed Jun 20 18:48:01 2007 @@ -256,7 +256,7 @@ # standard compilation optimizations. $(PROGRAMS_TO_TEST:%=Output/%.linked.bc): \ Output/%.linked.bc: Output/%.linked.rbc $(LOPT) - -$(LOPT) -std-compile-opts $(STATS) $(EXTRA_LOPT_OPTIONS) $< -o $@ -f + -$(LOPT) -std-compile-opts -time-passes -info-output-file=$(CURDIR)/$@.info $(STATS) $(EXTRA_LOPT_OPTIONS) $< -o $@ -f $(PROGRAMS_TO_TEST:%=Output/%.llvm.stripped.bc): \ Output/%.llvm.stripped.bc: Output/%.llvm.bc $(LOPT) Index: llvm-test/TEST.nightly.Makefile diff -u llvm-test/TEST.nightly.Makefile:1.47 llvm-test/TEST.nightly.Makefile:1.48 --- llvm-test/TEST.nightly.Makefile:1.47 Wed Jun 20 13:06:48 2007 +++ llvm-test/TEST.nightly.Makefile Wed Jun 20 18:48:01 2007 @@ -33,9 +33,7 @@ $(PROGRAMS_TO_TEST:%=Output/%.nightly.compile.report.txt): \ Output/%.nightly.compile.report.txt: Output/%.llvm.bc $(LOPT) @echo > $@ - @echo '$(LOPT) -std-compile-opts Output/$*.linked.rbc -f -o /dev/null $(TIMEOPT) $(EXTRA_LOPT_OPTIONS) >>$@ 2>&1' - @-if ($(LOPT) -std-compile-opts Output/$*.linked.rbc -f -o /dev/null $(TIMEOPT) $(EXTRA_LOPT_OPTIONS) >>$@ 2>&1)\ - ;then \ + @-if test -f Output/$@.info; then \ echo "TEST-PASS: compile $(RELDIR)/$*" >> $@;\ printf "TEST-RESULT-compile: " >> $@;\ grep "Total Execution Time" $@.info >> $@;\ From resistor at mac.com Wed Jun 20 19:19:27 2007 From: resistor at mac.com (Owen Anderson) Date: Wed, 20 Jun 2007 19:19:27 -0500 Subject: [llvm-commits] CVS: llvm/lib/Transforms/Scalar/GVNPRE.cpp Message-ID: <200706210019.l5L0JR0o025767@zion.cs.uiuc.edu> Changes in directory llvm/lib/Transforms/Scalar: GVNPRE.cpp updated: 1.46 -> 1.47 --- Log message: Comment-ize the functions in GVNPRE. --- Diffs of the changes: (+108 -76) GVNPRE.cpp | 184 +++++++++++++++++++++++++++++++++++-------------------------- 1 files changed, 108 insertions(+), 76 deletions(-) Index: llvm/lib/Transforms/Scalar/GVNPRE.cpp diff -u llvm/lib/Transforms/Scalar/GVNPRE.cpp:1.46 llvm/lib/Transforms/Scalar/GVNPRE.cpp:1.47 --- llvm/lib/Transforms/Scalar/GVNPRE.cpp:1.46 Wed Jun 20 17:10:02 2007 +++ llvm/lib/Transforms/Scalar/GVNPRE.cpp Wed Jun 20 19:19:05 2007 @@ -42,7 +42,9 @@ // ValueTable Class //===----------------------------------------------------------------------===// -/// This class holds the mapping between values and value numbers. +/// This class holds the mapping between values and value numbers. It is used +/// as an efficient mechanism to determine the expression-wise equivalence of +/// two values. namespace { class VISIBILITY_HIDDEN ValueTable { @@ -89,6 +91,8 @@ Expression::ExpressionOpcode getOpcode(BinaryOperator* BO); Expression::ExpressionOpcode getOpcode(CmpInst* C); + Expression create_expression(BinaryOperator* BO); + Expression create_expression(CmpInst* C); public: ValueTable() { nextValueNumber = 1; } uint32_t lookup_or_add(Value* V); @@ -100,14 +104,15 @@ } std::set& getMaximalValues() { return maximalValues; } - Expression create_expression(BinaryOperator* BO); - Expression create_expression(CmpInst* C); void erase(Value* v); }; } +//===----------------------------------------------------------------------===// +// ValueTable Internal Functions +//===----------------------------------------------------------------------===// ValueTable::Expression::ExpressionOpcode - ValueTable::getOpcode(BinaryOperator* BO) { + ValueTable::getOpcode(BinaryOperator* BO) { switch(BO->getOpcode()) { case Instruction::Add: return Expression::ADD; @@ -215,6 +220,36 @@ } } +ValueTable::Expression ValueTable::create_expression(BinaryOperator* BO) { + Expression e; + + e.leftVN = lookup_or_add(BO->getOperand(0)); + e.rightVN = lookup_or_add(BO->getOperand(1)); + e.opcode = getOpcode(BO); + + maximalExpressions.insert(e); + + return e; +} + +ValueTable::Expression ValueTable::create_expression(CmpInst* C) { + Expression e; + + e.leftVN = lookup_or_add(C->getOperand(0)); + e.rightVN = lookup_or_add(C->getOperand(1)); + e.opcode = getOpcode(C); + + maximalExpressions.insert(e); + + return e; +} + +//===----------------------------------------------------------------------===// +// ValueTable External Functions +//===----------------------------------------------------------------------===// + +/// lookup_or_add - Returns the value number for the specified value, assigning +/// it a new number if it did not have one before. uint32_t ValueTable::lookup_or_add(Value* V) { maximalValues.insert(V); @@ -255,6 +290,8 @@ } } +/// lookup - Returns the value number of the specified value. Fails if +/// the value has not yet been numbered. uint32_t ValueTable::lookup(Value* V) { DenseMap::iterator VI = valueNumbering.find(V); if (VI != valueNumbering.end()) @@ -265,6 +302,8 @@ return 0; } +/// add - Add the specified value with the given value number, removing +/// its old number, if any void ValueTable::add(Value* V, uint32_t num) { DenseMap::iterator VI = valueNumbering.find(V); if (VI != valueNumbering.end()) @@ -272,30 +311,7 @@ valueNumbering.insert(std::make_pair(V, num)); } -ValueTable::Expression ValueTable::create_expression(BinaryOperator* BO) { - Expression e; - - e.leftVN = lookup_or_add(BO->getOperand(0)); - e.rightVN = lookup_or_add(BO->getOperand(1)); - e.opcode = getOpcode(BO); - - maximalExpressions.insert(e); - - return e; -} - -ValueTable::Expression ValueTable::create_expression(CmpInst* C) { - Expression e; - - e.leftVN = lookup_or_add(C->getOperand(0)); - e.rightVN = lookup_or_add(C->getOperand(1)); - e.opcode = getOpcode(C); - - maximalExpressions.insert(e); - - return e; -} - +/// clear - Remove all entries from the ValueTable and the maximal sets void ValueTable::clear() { valueNumbering.clear(); expressionNumbering.clear(); @@ -304,6 +320,7 @@ nextValueNumber = 1; } +/// erase - Remove a value from the value numbering and maximal sets void ValueTable::erase(Value* V) { maximalValues.erase(V); valueNumbering.erase(V); @@ -313,6 +330,10 @@ maximalExpressions.erase(create_expression(C)); } +//===----------------------------------------------------------------------===// +// GVNPRE Pass +//===----------------------------------------------------------------------===// + namespace { class VISIBILITY_HIDDEN GVNPRE : public FunctionPass { @@ -328,6 +349,7 @@ std::map > availableOut; std::map > anticipatedIn; + // This transformation requires dominator postdominator info virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); AU.addRequired(); @@ -382,6 +404,7 @@ } +// createGVNPREPass - The public interface to this file... FunctionPass *llvm::createGVNPREPass() { return new GVNPRE(); } RegisterPass X("gvnpre", @@ -392,6 +415,9 @@ STATISTIC(NumInsertedPhis, "Number of PHI nodes inserted"); STATISTIC(NumEliminated, "Number of redundant instructions eliminated"); +/// find_leader - Given a set and a value number, return the first +/// element of the set with that value number, or 0 if no such element +/// is present Value* GVNPRE::find_leader(std::set& vals, uint32_t v) { for (std::set::iterator I = vals.begin(), E = vals.end(); I != E; ++I) @@ -401,6 +427,8 @@ return 0; } +/// val_insert - Insert a value into a set only if there is not a value +/// with the same value number already in the set void GVNPRE::val_insert(std::set& s, Value* v) { uint32_t num = VN.lookup(v); Value* leader = find_leader(s, num); @@ -408,6 +436,8 @@ s.insert(v); } +/// val_replace - Insert a value into a set, replacing any values already in +/// the set that have the same value number void GVNPRE::val_replace(std::set& s, Value* v) { uint32_t num = VN.lookup(v); Value* leader = find_leader(s, num); @@ -418,6 +448,10 @@ s.insert(v); } +/// phi_translate - Given a value, its parent block, and a predecessor of its +/// parent, translate the value into legal for the predecessor block. This +/// means translating its operands (and recursively, their operands) through +/// any phi nodes in the parent into values available in the predecessor Value* GVNPRE::phi_translate(Value* V, BasicBlock* pred, BasicBlock* succ) { if (V == 0) return 0; @@ -511,6 +545,7 @@ return V; } +/// phi_translate_set - Perform phi translation on every element of a set void GVNPRE::phi_translate_set(std::set& anticIn, BasicBlock* pred, BasicBlock* succ, std::set& out) { @@ -522,6 +557,9 @@ } } +/// dependsOnInvoke - Test if a value has an phi node as an operand, any of +/// whose inputs is an invoke instruction. If this is true, we cannot safely +/// PRE the instruction or anything that depends on it. bool GVNPRE::dependsOnInvoke(Value* V) { if (PHINode* p = dyn_cast(V)) { for (PHINode::op_iterator I = p->op_begin(), E = p->op_end(); I != E; ++I) @@ -533,7 +571,9 @@ } } -// Remove all expressions whose operands are not themselves in the set +/// clean - Remove all non-opaque values from the set whose operands are not +/// themselves in the set, as well as all values that depend on invokes (see +/// above) void GVNPRE::clean(std::set& set) { std::vector worklist; topo_sort(set, worklist); @@ -595,8 +635,9 @@ } } -void GVNPRE::topo_sort(std::set& set, - std::vector& vec) { +/// topo_sort - Given a set of values, sort them by topological +/// order into the provided vector. +void GVNPRE::topo_sort(std::set& set, std::vector& vec) { std::set toErase; for (std::set::iterator I = set.begin(), E = set.end(); I != E; ++I) { @@ -665,7 +706,7 @@ } } - +/// dump - Dump a set of values to standard error void GVNPRE::dump(const std::set& s) const { DOUT << "{ "; for (std::set::iterator I = s.begin(), E = s.end(); @@ -675,6 +716,9 @@ DOUT << "}\n\n"; } +/// elimination - Phase 3 of the main algorithm. Perform full redundancy +/// elimination by walking the dominator tree and removing any instruction that +/// is dominated by another instruction with the same value number. bool GVNPRE::elimination() { DOUT << "\n\nPhase 3: Elimination\n\n"; @@ -724,7 +768,8 @@ return changed_function; } - +/// cleanup - Delete any extraneous values that were created to represent +/// expressions without leaders. void GVNPRE::cleanup() { while (!createdExpressions.empty()) { Instruction* I = createdExpressions.back(); @@ -734,6 +779,8 @@ } } +/// buildsets_availout - When calculating availability, handle an instruction +/// by inserting it into the appropriate sets void GVNPRE::buildsets_availout(BasicBlock::iterator I, std::set& currAvail, std::set& currPhis, @@ -780,6 +827,8 @@ val_insert(currAvail, I); } +/// buildsets_anticout - When walking the postdom tree, calculate the ANTIC_OUT +/// set as a function of the ANTIC_IN set of the block's predecessors void GVNPRE::buildsets_anticout(BasicBlock* BB, std::set& anticOut, std::set& visited) { @@ -809,6 +858,9 @@ } } +/// buildsets_anticin - Walk the postdom tree, calculating ANTIC_OUT for +/// each block. ANTIC_IN is then a function of ANTIC_OUT and the GEN +/// sets populated in buildsets_availout bool GVNPRE::buildsets_anticin(BasicBlock* BB, std::set& anticOut, std::set& currExps, @@ -853,6 +905,8 @@ return false; } +/// buildsets - Phase 1 of the main algorithm. Construct the AVAIL_OUT +/// and the ANTIC_IN sets. unsigned GVNPRE::buildsets(Function& F) { std::map > generatedExpressions; std::map > generatedPhis; @@ -926,38 +980,15 @@ iterations++; } - DOUT << "Iterations: " << iterations << "\n"; - - for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) { - DOUT << "Name: " << I->getName().c_str() << "\n"; - - DOUT << "TMP_GEN: "; - dump(generatedTemporaries[I]); - DOUT << "\n"; - - DOUT << "EXP_GEN: "; - dump(generatedExpressions[I]); - DOUT << "\n"; - - DOUT << "ANTIC_IN: "; - dump(anticipatedIn[I]); - DOUT << "\n"; - - DOUT << "AVAIL_OUT: "; - dump(availableOut[I]); - DOUT << "\n"; - } - return 0; // No bail, no changes } +/// insertion_pre - When a partial redundancy has been identified, eliminate it +/// by inserting appropriate values into the predecessors and a phi node in +/// the main block void GVNPRE::insertion_pre(Value* e, BasicBlock* BB, std::map& avail, std::set& new_set) { - DOUT << "Processing Value: "; - DEBUG(e->dump()); - DOUT << "\n\n"; - for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE; ++PI) { Value* e2 = avail[*PI]; if (!find_leader(availableOut[*PI], VN.lookup(e2))) { @@ -991,9 +1022,7 @@ std::set& predAvail = availableOut[*PI]; val_replace(predAvail, newVal); - - DOUT << "Creating value: " << std::hex << newVal << std::dec << "\n"; - + std::map::iterator av = avail.find(*PI); if (av != avail.end()) avail.erase(av); @@ -1013,24 +1042,14 @@ } VN.add(p, VN.lookup(e)); - DOUT << "Creating value: " << std::hex << p << std::dec << "\n"; - val_replace(availableOut[BB], p); - - DOUT << "Preds After Processing: "; - for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE; ++PI) - DEBUG((*PI)->dump()); - DOUT << "\n\n"; - - DOUT << "Merge Block After Processing: "; - DEBUG(BB->dump()); - DOUT << "\n\n"; - new_set.insert(p); ++NumInsertedPhis; } +/// insertion_mergepoint - When walking the dom tree, check at each merge +/// block for the possibility of a partial redundancy. If present, eliminate it unsigned GVNPRE::insertion_mergepoint(std::vector& workList, df_iterator D, std::set& new_set) { @@ -1089,6 +1108,9 @@ return retval; } +/// insert - Phase 2 of the main algorithm. Walk the dominator tree looking for +/// merge points. When one is found, check for a partial redundancy. If one is +/// present, eliminate it. Repeat this walk until no changes are made. bool GVNPRE::insertion(Function& F) { bool changed_function = false; @@ -1143,7 +1165,11 @@ return changed_function; } +// GVNPRE::runOnFunction - This is the main transformation entry point for a +// function. +// bool GVNPRE::runOnFunction(Function &F) { + // Clean out global sets from any previous functions VN.clear(); createdExpressions.clear(); availableOut.clear(); @@ -1152,20 +1178,26 @@ bool changed_function = false; // Phase 1: BuildSets + // This phase calculates the AVAIL_OUT and ANTIC_IN sets + // NOTE: If full postdom information is no available, this will bail + // early, performing GVN but not PRE unsigned bail = buildsets(F); //If a bail occurred, terminate early if (bail != 0) return (bail == 2); // Phase 2: Insert - DOUT<< "\nPhase 2: Insertion\n"; - + // This phase inserts values to make partially redundant values + // fully redundant changed_function |= insertion(F); // Phase 3: Eliminate + // This phase performs trivial full redundancy elimination changed_function |= elimination(); // Phase 4: Cleanup + // This phase cleans up values that were created solely + // as leaders for expressions cleanup(); return changed_function; From evan.cheng at apple.com Wed Jun 20 19:48:02 2007 From: evan.cheng at apple.com (Evan Cheng) Date: Wed, 20 Jun 2007 19:48:02 -0500 Subject: [llvm-commits] CVS: llvm-test/MultiSource/Benchmarks/MiBench/consumer-typeset/Makefile Message-ID: <200706210048.l5L0m2TC026394@zion.cs.uiuc.edu> Changes in directory llvm-test/MultiSource/Benchmarks/MiBench/consumer-typeset: Makefile updated: 1.3 -> 1.4 --- Log message: Fix test. --- Diffs of the changes: (+1 -1) Makefile | 2 +- 1 files changed, 1 insertion(+), 1 deletion(-) Index: llvm-test/MultiSource/Benchmarks/MiBench/consumer-typeset/Makefile diff -u llvm-test/MultiSource/Benchmarks/MiBench/consumer-typeset/Makefile:1.3 llvm-test/MultiSource/Benchmarks/MiBench/consumer-typeset/Makefile:1.4 --- llvm-test/MultiSource/Benchmarks/MiBench/consumer-typeset/Makefile:1.3 Thu Mar 29 19:56:51 2007 +++ llvm-test/MultiSource/Benchmarks/MiBench/consumer-typeset/Makefile Wed Jun 20 19:47:39 2007 @@ -3,5 +3,5 @@ PROG = consumer-typeset CPPFLAGS = -DOS_UNIX=1 -DOS_DOS=0 -DOS_MAC=0 -DDB_FIX=0 -DUSE_STAT=1 -DSAFE_DFT=0 -DCOLLATE=1 -DLIB_DIR=\"lout.lib\" -DFONT_DIR=\"font\" -DMAPS_DIR=\"maps\" -DINCL_DIR=\"include\" -DDATA_DIR=\"data\" -DHYPH_DIR=\"hyph\" -DLOCALE_DIR=\"locale\" -DCHAR_IN=1 -DCHAR_OUT=0 -DLOCALE_ON=1 -DASSERT_ON=1 -DDEBUG_ON=0 -DPDF_COMPRESSION=0 LDFLAGS = -lm -RUN_OPTIONS = -I $(PROJ_SRC_DIR)/data/include -D $(PROJ_SRC_DIR)/data/data -F $(PROJ_SRC_DIR)/data/font -C $(PROJ_SRC_DIR)/data/maps -H $(PROJ_SRC_DIR)/data/hyph $(PROJ_SRC_DIR)/large.lout +RUN_OPTIONS = -x -I $(PROJ_SRC_DIR)/data/include -D $(PROJ_SRC_DIR)/data/data -F $(PROJ_SRC_DIR)/data/font -C $(PROJ_SRC_DIR)/data/maps -H $(PROJ_SRC_DIR)/data/hyph $(PROJ_SRC_DIR)/large.lout include $(LEVEL)/MultiSource/Makefile.multisrc From resistor at mac.com Wed Jun 20 20:59:27 2007 From: resistor at mac.com (Owen Anderson) Date: Wed, 20 Jun 2007 20:59:27 -0500 Subject: [llvm-commits] CVS: llvm/lib/Transforms/Scalar/GVNPRE.cpp Message-ID: <200706210159.l5L1xRia027464@zion.cs.uiuc.edu> Changes in directory llvm/lib/Transforms/Scalar: GVNPRE.cpp updated: 1.47 -> 1.48 --- Log message: Eliminate a redundant check. This speeds up optimization of 253.perlbmk from 13.5 seconds to 10.9 seconds. --- Diffs of the changes: (+3 -7) GVNPRE.cpp | 10 +++------- 1 files changed, 3 insertions(+), 7 deletions(-) Index: llvm/lib/Transforms/Scalar/GVNPRE.cpp diff -u llvm/lib/Transforms/Scalar/GVNPRE.cpp:1.47 llvm/lib/Transforms/Scalar/GVNPRE.cpp:1.48 --- llvm/lib/Transforms/Scalar/GVNPRE.cpp:1.47 Wed Jun 20 19:19:05 2007 +++ llvm/lib/Transforms/Scalar/GVNPRE.cpp Wed Jun 20 20:59:05 2007 @@ -887,13 +887,9 @@ // However, for opaques, such as constants within PHI nodes, it is // possible that they have not yet received a number. Make sure they do // so now. - uint32_t valNum = 0; - if (isa(*I) || isa(*I)) - valNum = VN.lookup(*I); - else - valNum = VN.lookup_or_add(*I); - if (find_leader(anticIn, valNum) == 0) - val_insert(anticIn, *I); + if (!isa(*I) && !isa(*I)) + VN.lookup_or_add(*I); + val_insert(anticIn, *I); } clean(anticIn); From rspencer at reidspencer.com Wed Jun 20 21:30:01 2007 From: rspencer at reidspencer.com (Reid Spencer) Date: Wed, 20 Jun 2007 22:30:01 -0400 Subject: [llvm-commits] CVS: llvm/lib/Transforms/Scalar/GVNPRE.cpp In-Reply-To: <200706210159.l5L1xRia027464@zion.cs.uiuc.edu> References: <200706210159.l5L1xRia027464@zion.cs.uiuc.edu> Message-ID: <4679E2A9.3010603@reidspencer.com> Owen Anderson wrote: >Changes in directory llvm/lib/Transforms/Scalar: > >GVNPRE.cpp updated: 1.47 -> 1.48 >--- >Log message: > >Eliminate a redundant check. This speeds up optimization of 253.perlbmk from 13.5 seconds to 10.9 seconds. > > This is speeding up run time of GVNPRE not run time of 253.perlbmk, right? Reid > >--- >Diffs of the changes: (+3 -7) > > GVNPRE.cpp | 10 +++------- > 1 files changed, 3 insertions(+), 7 deletions(-) > > >Index: llvm/lib/Transforms/Scalar/GVNPRE.cpp >diff -u llvm/lib/Transforms/Scalar/GVNPRE.cpp:1.47 llvm/lib/Transforms/Scalar/GVNPRE.cpp:1.48 >--- llvm/lib/Transforms/Scalar/GVNPRE.cpp:1.47 Wed Jun 20 19:19:05 2007 >+++ llvm/lib/Transforms/Scalar/GVNPRE.cpp Wed Jun 20 20:59:05 2007 >@@ -887,13 +887,9 @@ > // However, for opaques, such as constants within PHI nodes, it is > // possible that they have not yet received a number. Make sure they do > // so now. >- uint32_t valNum = 0; >- if (isa(*I) || isa(*I)) >- valNum = VN.lookup(*I); >- else >- valNum = VN.lookup_or_add(*I); >- if (find_leader(anticIn, valNum) == 0) >- val_insert(anticIn, *I); >+ if (!isa(*I) && !isa(*I)) >+ VN.lookup_or_add(*I); >+ val_insert(anticIn, *I); > } > > clean(anticIn); > > > >_______________________________________________ >llvm-commits mailing list >llvm-commits at cs.uiuc.edu >http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits > > > From resistor at mac.com Wed Jun 20 23:55:00 2007 From: resistor at mac.com (Owen Anderson) Date: Wed, 20 Jun 2007 23:55:00 -0500 Subject: [llvm-commits] CVS: llvm-test/TEST.nightly.Makefile Message-ID: <200706210455.l5L4t08t030485@zion.cs.uiuc.edu> Changes in directory llvm-test: TEST.nightly.Makefile updated: 1.48 -> 1.49 --- Log message: Try again at getting these patterns right. --- Diffs of the changes: (+2 -2) TEST.nightly.Makefile | 4 ++-- 1 files changed, 2 insertions(+), 2 deletions(-) Index: llvm-test/TEST.nightly.Makefile diff -u llvm-test/TEST.nightly.Makefile:1.48 llvm-test/TEST.nightly.Makefile:1.49 --- llvm-test/TEST.nightly.Makefile:1.48 Wed Jun 20 18:48:01 2007 +++ llvm-test/TEST.nightly.Makefile Wed Jun 20 23:54:38 2007 @@ -33,10 +33,10 @@ $(PROGRAMS_TO_TEST:%=Output/%.nightly.compile.report.txt): \ Output/%.nightly.compile.report.txt: Output/%.llvm.bc $(LOPT) @echo > $@ - @-if test -f Output/$@.info; then \ + @-if test -f Output/$*.linked.bc.info; then \ echo "TEST-PASS: compile $(RELDIR)/$*" >> $@;\ printf "TEST-RESULT-compile: " >> $@;\ - grep "Total Execution Time" $@.info >> $@;\ + grep "Total Execution Time" Output/$*.linked.bc.info >> $@;\ echo >> $@;\ printf "TEST-RESULT-compile: " >> $@;\ wc -c $< >> $@;\ From evan.cheng at apple.com Thu Jun 21 02:39:48 2007 From: evan.cheng at apple.com (Evan Cheng) Date: Thu, 21 Jun 2007 02:39:48 -0500 Subject: [llvm-commits] CVS: llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Message-ID: <200706210739.l5L7dmDW010446@zion.cs.uiuc.edu> Changes in directory llvm/lib/CodeGen/SelectionDAG: DAGCombiner.cpp updated: 1.313 -> 1.314 --- Log message: Xforms: (add (select cc, 0, c), x) -> (select cc, x, (add, x, c)) (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c)) --- Diffs of the changes: (+64 -0) DAGCombiner.cpp | 64 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 files changed, 64 insertions(+) Index: llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp diff -u llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp:1.313 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp:1.314 --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp:1.313 Tue Jun 19 09:13:56 2007 +++ llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Thu Jun 21 02:39:16 2007 @@ -801,6 +801,55 @@ return SDOperand(); } +static +SDOperand combineSelectAndUse(SDNode *N, SDOperand Slct, SDOperand OtherOp, + SelectionDAG &DAG) { + MVT::ValueType VT = N->getValueType(0); + unsigned Opc = N->getOpcode(); + bool isSlctCC = Slct.getOpcode() == ISD::SELECT_CC; + SDOperand LHS = isSlctCC ? Slct.getOperand(2) : Slct.getOperand(1); + SDOperand RHS = isSlctCC ? Slct.getOperand(3) : Slct.getOperand(2); + ISD::CondCode CC = ISD::SETCC_INVALID; + if (isSlctCC) + CC = cast(Slct.getOperand(4))->get(); + else { + SDOperand CCOp = Slct.getOperand(0); + if (CCOp.getOpcode() == ISD::SETCC) + CC = cast(CCOp.getOperand(2))->get(); + } + + bool DoXform = false; + bool InvCC = false; + assert ((Opc == ISD::ADD || (Opc == ISD::SUB && Slct == N->getOperand(1))) && + "Bad input!"); + if (LHS.getOpcode() == ISD::Constant && + cast(LHS)->isNullValue()) + DoXform = true; + else if (CC != ISD::SETCC_INVALID && + RHS.getOpcode() == ISD::Constant && + cast(RHS)->isNullValue()) { + std::swap(LHS, RHS); + bool isInt = MVT::isInteger(isSlctCC ? Slct.getOperand(0).getValueType() + : Slct.getOperand(0).getOperand(0).getValueType()); + CC = ISD::getSetCCInverse(CC, isInt); + DoXform = true; + InvCC = true; + } + + if (DoXform) { + SDOperand Result = DAG.getNode(Opc, VT, OtherOp, RHS); + if (isSlctCC) + return DAG.getSelectCC(OtherOp, Result, + Slct.getOperand(0), Slct.getOperand(1), CC); + SDOperand CCOp = Slct.getOperand(0); + if (InvCC) + CCOp = DAG.getSetCC(CCOp.getValueType(), CCOp.getOperand(0), + CCOp.getOperand(1), CC); + return DAG.getNode(ISD::SELECT, VT, CCOp, OtherOp, Result); + } + return SDOperand(); +} + SDOperand DAGCombiner::visitADD(SDNode *N) { SDOperand N0 = N->getOperand(0); SDOperand N1 = N->getOperand(1); @@ -869,6 +918,16 @@ if (Result.Val) return Result; } + // fold (add (select cc, 0, c), x) -> (select cc, x, (add, x, c)) + if (N0.getOpcode() == ISD::SELECT && N0.Val->hasOneUse()) { + SDOperand Result = combineSelectAndUse(N, N0, N1, DAG); + if (Result.Val) return Result; + } + if (N1.getOpcode() == ISD::SELECT && N1.Val->hasOneUse()) { + SDOperand Result = combineSelectAndUse(N, N1, N0, DAG); + if (Result.Val) return Result; + } + return SDOperand(); } @@ -960,6 +1019,11 @@ // fold (A+B)-B -> A if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1) return N0.getOperand(0); + // fold (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c)) + if (N1.getOpcode() == ISD::SELECT && N1.Val->hasOneUse()) { + SDOperand Result = combineSelectAndUse(N, N1, N0, DAG); + if (Result.Val) return Result; + } return SDOperand(); } From evan.cheng at apple.com Thu Jun 21 02:40:23 2007 From: evan.cheng at apple.com (Evan Cheng) Date: Thu, 21 Jun 2007 02:40:23 -0500 Subject: [llvm-commits] CVS: llvm/test/CodeGen/ARM/select_xform.ll Message-ID: <200706210740.l5L7eNKd010472@zion.cs.uiuc.edu> Changes in directory llvm/test/CodeGen/ARM: select_xform.ll added (r1.1) --- Log message: New tests. --- Diffs of the changes: (+16 -0) select_xform.ll | 16 ++++++++++++++++ 1 files changed, 16 insertions(+) Index: llvm/test/CodeGen/ARM/select_xform.ll diff -c /dev/null llvm/test/CodeGen/ARM/select_xform.ll:1.1 *** /dev/null Thu Jun 21 02:40:10 2007 --- llvm/test/CodeGen/ARM/select_xform.ll Thu Jun 21 02:40:00 2007 *************** *** 0 **** --- 1,16 ---- + ; RUN: llvm-as < %s | llc -march=arm + ; RUN: llvm-as < %s | llc -march=arm | grep mov | wc -l | grep 2 + + define i32 @t1(i32 %a, i32 %b, i32 %c) { + %tmp1 = icmp sgt i32 %c, 10 + %tmp2 = select i1 %tmp1, i32 0, i32 2147483647 + %tmp3 = add i32 %tmp2, %b + ret i32 %tmp3 + } + + define i32 @t2(i32 %a, i32 %b, i32 %c, i32 %d) { + %tmp1 = icmp sgt i32 %c, 10 + %tmp2 = select i1 %tmp1, i32 0, i32 10 + %tmp3 = sub i32 %b, %tmp2 + ret i32 %tmp3 + } From djg at cray.com Thu Jun 21 09:42:44 2007 From: djg at cray.com (Dan Gohman) Date: Thu, 21 Jun 2007 09:42:44 -0500 Subject: [llvm-commits] CVS: llvm/include/llvm/Target/TargetLowering.h Message-ID: <200706211442.l5LEgiwL018997@zion.cs.uiuc.edu> Changes in directory llvm/include/llvm/Target: TargetLowering.h updated: 1.127 -> 1.128 --- Log message: Rename TargetLowering::getNumElements and friends to TargetLowering::getNumRegisters and similar, to avoid confusion with the actual number of elements for vector types. --- Diffs of the changes: (+4 -4) TargetLowering.h | 8 ++++---- 1 files changed, 4 insertions(+), 4 deletions(-) Index: llvm/include/llvm/Target/TargetLowering.h diff -u llvm/include/llvm/Target/TargetLowering.h:1.127 llvm/include/llvm/Target/TargetLowering.h:1.128 --- llvm/include/llvm/Target/TargetLowering.h:1.127 Fri Jun 1 03:25:24 2007 +++ llvm/include/llvm/Target/TargetLowering.h Thu Jun 21 09:42:22 2007 @@ -347,12 +347,12 @@ return VT == MVT::iPTR ? PointerTy : VT; } - /// getNumElements - Return the number of registers that this ValueType will + /// getNumRegisters - Return the number of registers that this ValueType will /// eventually require. This is one for any types promoted to live in larger /// registers, but may be more than one for types (like i64) that are split /// into pieces. - unsigned getNumElements(MVT::ValueType VT) const { - return NumElementsForVT[VT]; + unsigned getNumRegisters(MVT::ValueType VT) const { + return NumRegistersForVT[VT]; } /// hasTargetDAGCombine - If true, the target has custom DAG combine @@ -1035,7 +1035,7 @@ /// RegClassForVT - This indicates the default register class to use for /// each ValueType the target supports natively. TargetRegisterClass *RegClassForVT[MVT::LAST_VALUETYPE]; - unsigned char NumElementsForVT[MVT::LAST_VALUETYPE]; + unsigned char NumRegistersForVT[MVT::LAST_VALUETYPE]; /// TransformToType - For any value types we are promoting or expanding, this /// contains the value type that we are changing to. For Expanded types, this From djg at cray.com Thu Jun 21 09:42:45 2007 From: djg at cray.com (Dan Gohman) Date: Thu, 21 Jun 2007 09:42:45 -0500 Subject: [llvm-commits] CVS: llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp TargetLowering.cpp Message-ID: <200706211442.l5LEgjRM019004@zion.cs.uiuc.edu> Changes in directory llvm/lib/CodeGen/SelectionDAG: SelectionDAGISel.cpp updated: 1.466 -> 1.467 TargetLowering.cpp updated: 1.119 -> 1.120 --- Log message: Rename TargetLowering::getNumElements and friends to TargetLowering::getNumRegisters and similar, to avoid confusion with the actual number of elements for vector types. --- Diffs of the changes: (+19 -19) SelectionDAGISel.cpp | 28 ++++++++++++++-------------- TargetLowering.cpp | 10 +++++----- 2 files changed, 19 insertions(+), 19 deletions(-) Index: llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp diff -u llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp:1.466 llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp:1.467 --- llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp:1.466 Fri Jun 15 17:26:58 2007 +++ llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp Thu Jun 21 09:42:22 2007 @@ -290,19 +290,19 @@ if (PN->use_empty()) continue; MVT::ValueType VT = TLI.getValueType(PN->getType()); - unsigned NumElements; + unsigned NumRegisters; if (VT != MVT::Vector) - NumElements = TLI.getNumElements(VT); + NumRegisters = TLI.getNumRegisters(VT); else { MVT::ValueType VT1,VT2; - NumElements = + NumRegisters = TLI.getVectorTypeBreakdown(cast(PN->getType()), VT1, VT2); } unsigned PHIReg = ValueMap[PN]; assert(PHIReg && "PHI node does not have an assigned virtual register!"); const TargetInstrInfo *TII = TLI.getTargetMachine().getInstrInfo(); - for (unsigned i = 0; i != NumElements; ++i) + for (unsigned i = 0; i != NumRegisters; ++i) BuildMI(MBB, TII->get(TargetInstrInfo::PHI), PHIReg+i); } } @@ -343,7 +343,7 @@ // The common case is that we will only create one register for this // value. If we have that case, create and return the virtual register. - unsigned NV = TLI.getNumElements(VT); + unsigned NV = TLI.getNumRegisters(VT); if (NV == 1) { // If we are promoting this value, pick the next largest supported type. MVT::ValueType PromotedType = TLI.getTypeToTransformTo(VT); @@ -750,7 +750,7 @@ // Source must be expanded. This input value is actually coming from the // register pair InReg and InReg+1. MVT::ValueType DestVT = TLI.getTypeToExpandTo(VT); - unsigned NumVals = TLI.getNumElements(VT); + unsigned NumVals = TLI.getNumRegisters(VT); N = DAG.getCopyFromReg(DAG.getEntryNode(), InReg, DestVT); if (NumVals == 1) N = DAG.getNode(ISD::BIT_CONVERT, VT, N); @@ -3185,7 +3185,7 @@ unsigned NumRegs = 1; if (OpInfo.ConstraintVT != MVT::Other) - NumRegs = TLI.getNumElements(OpInfo.ConstraintVT); + NumRegs = TLI.getNumRegisters(OpInfo.ConstraintVT); MVT::ValueType RegVT; MVT::ValueType ValueVT = OpInfo.ConstraintVT; @@ -3831,7 +3831,7 @@ // integers. Figure out what the destination type is and how many small // integers it turns into. MVT::ValueType NVT = getTypeToExpandTo(VT); - unsigned NumVals = getNumElements(VT); + unsigned NumVals = getNumRegisters(VT); for (unsigned i = 0; i != NumVals; ++i) { RetVals.push_back(NVT); // if it isn't first piece, alignment must be 1 @@ -4088,7 +4088,7 @@ // integers. Figure out what the source elt type is and how many small // integers it is. MVT::ValueType NVT = getTypeToExpandTo(VT); - unsigned NumVals = getNumElements(VT); + unsigned NumVals = getNumRegisters(VT); for (unsigned i = 0; i != NumVals; ++i) RetTys.push_back(NVT); } else { @@ -4507,7 +4507,7 @@ return DAG.getCopyToReg(getRoot(), Reg, Op); } else { DestVT = TLI.getTypeToExpandTo(SrcVT); - unsigned NumVals = TLI.getNumElements(SrcVT); + unsigned NumVals = TLI.getNumRegisters(SrcVT); if (NumVals == 1) return DAG.getCopyToReg(getRoot(), Reg, DAG.getNode(ISD::BIT_CONVERT, DestVT, Op)); @@ -4695,16 +4695,16 @@ // Remember that this register needs to added to the machine PHI node as // the input for this MBB. MVT::ValueType VT = TLI.getValueType(PN->getType()); - unsigned NumElements; + unsigned NumRegisters; if (VT != MVT::Vector) - NumElements = TLI.getNumElements(VT); + NumRegisters = TLI.getNumRegisters(VT); else { MVT::ValueType VT1,VT2; - NumElements = + NumRegisters = TLI.getVectorTypeBreakdown(cast(PN->getType()), VT1, VT2); } - for (unsigned i = 0, e = NumElements; i != e; ++i) + for (unsigned i = 0, e = NumRegisters; i != e; ++i) PHINodesToUpdate.push_back(std::make_pair(MBBI++, Reg+i)); } } Index: llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp diff -u llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp:1.119 llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp:1.120 --- llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp:1.119 Wed May 30 11:30:06 2007 +++ llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp Thu Jun 21 09:42:22 2007 @@ -216,7 +216,7 @@ // Everything defaults to one. for (unsigned i = 0; i != MVT::LAST_VALUETYPE; ++i) - NumElementsForVT[i] = 1; + NumRegistersForVT[i] = 1; // Find the largest integer register class. unsigned LargestIntReg = MVT::i128; @@ -227,12 +227,12 @@ // many registers to represent as the previous ValueType. unsigned ExpandedReg = LargestIntReg; ++LargestIntReg; for (++ExpandedReg; MVT::isInteger((MVT::ValueType)ExpandedReg);++ExpandedReg) - NumElementsForVT[ExpandedReg] = 2*NumElementsForVT[ExpandedReg-1]; + NumRegistersForVT[ExpandedReg] = 2*NumRegistersForVT[ExpandedReg-1]; // Inspect all of the ValueType's possible, deciding how to process them. for (unsigned IntReg = MVT::i1; IntReg <= MVT::i128; ++IntReg) // If we are expanding this type, expand it! - if (getNumElements((MVT::ValueType)IntReg) != 1) + if (getNumRegisters((MVT::ValueType)IntReg) != 1) SetValueTypeAction((MVT::ValueType)IntReg, Expand, *this, TransformToType, ValueTypeActions); else if (!isTypeLegal((MVT::ValueType)IntReg)) @@ -250,7 +250,7 @@ if (isTypeLegal(MVT::f64)) TransformToType[MVT::f64] = MVT::f64; else { - NumElementsForVT[MVT::f64] = NumElementsForVT[MVT::i64]; + NumRegistersForVT[MVT::f64] = NumRegistersForVT[MVT::i64]; SetValueTypeAction(MVT::f64, Expand, *this, TransformToType, ValueTypeActions); } @@ -260,7 +260,7 @@ SetValueTypeAction(MVT::f32, Promote, *this, TransformToType, ValueTypeActions); else { - NumElementsForVT[MVT::f32] = NumElementsForVT[MVT::i32]; + NumRegistersForVT[MVT::f32] = NumRegistersForVT[MVT::i32]; SetValueTypeAction(MVT::f32, Expand, *this, TransformToType, ValueTypeActions); } From djg at cray.com Thu Jun 21 09:48:49 2007 From: djg at cray.com (Dan Gohman) Date: Thu, 21 Jun 2007 09:48:49 -0500 Subject: [llvm-commits] CVS: llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp TargetLowering.cpp Message-ID: <200706211448.l5LEmn1l019119@zion.cs.uiuc.edu> Changes in directory llvm/lib/CodeGen/SelectionDAG: LegalizeDAG.cpp updated: 1.498 -> 1.499 TargetLowering.cpp updated: 1.120 -> 1.121 --- Log message: Tidy up ValueType names in comments. --- Diffs of the changes: (+5 -5) LegalizeDAG.cpp | 4 ++-- TargetLowering.cpp | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) Index: llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp diff -u llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp:1.498 llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp:1.499 --- llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp:1.498 Mon Jun 18 16:28:10 2007 +++ llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp Thu Jun 21 09:48:26 2007 @@ -5649,8 +5649,8 @@ /// PackVectorOp - Given an operand of MVT::Vector type, convert it into the -/// equivalent operation that returns a scalar (e.g. F32) or packed value -/// (e.g. MVT::V4F32). When this is called, we know that PackedVT is the right +/// equivalent operation that returns a scalar (e.g. MVT::f32) or packed value +/// (e.g. MVT::v4f32). When this is called, we know that PackedVT is the right /// type for the result. SDOperand SelectionDAGLegalize::PackVectorOp(SDOperand Op, MVT::ValueType NewVT) { Index: llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp diff -u llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp:1.120 llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp:1.121 --- llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp:1.120 Thu Jun 21 09:42:22 2007 +++ llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp Thu Jun 21 09:48:26 2007 @@ -243,10 +243,10 @@ else TransformToType[(MVT::ValueType)IntReg] = (MVT::ValueType)IntReg; - // If the target does not have native F64 support, expand it to I64. We will + // If the target does not have native f64 support, expand it to i64. We will // be generating soft float library calls. If the target does not have native - // support for F32, promote it to F64 if it is legal. Otherwise, expand it to - // I32. + // support for f32, promote it to f64 if it is legal. Otherwise, expand it to + // i32. if (isTypeLegal(MVT::f64)) TransformToType[MVT::f64] = MVT::f64; else { From duraid at kinoko.c.u-tokyo.ac.jp Thu Jun 21 10:20:46 2007 From: duraid at kinoko.c.u-tokyo.ac.jp (Duraid Madina) Date: Fri, 22 Jun 2007 00:20:46 +0900 Subject: [llvm-commits] New "BigBlock" local register allocator Message-ID: <20070621152046.GA10368@kinoko.c.u-tokyo.ac.jp> Hi all, Attached is a new local register allocator tuned for big basic blocks. It is fast and doesn't use much memory, but it is *slightly* slower and does use more memory than the existing local allocator. Having said that, it quickly gets close-to-optimal allocations on very large basic blocks, and it doesn't give *terrible* results on complex functions, so it might make sense to use this allocator by default in a JIT context. I haven't yet been able to narrow down a case where the existing local allocator produces better code than this one. Anyway, to play with it, just drop it into lib/Codegen, but you'll also need to declare it in: include/CodeGen/Passes.h include/CodeGen/LinkAllCodegenComponents.h and for testing, you might want to edit lib/Codegen/Passes.cpp to make it the default allocator. I've built llvm-gcc in this way, and the results are pretty reasonable. The allocator hasn't choked on anything so far. As you'll quickly notice, the file is basically a copy of RegAllocLocal.cpp. The only bits changed are the bits implementing the new algorithm, which is very simple. It's just: "At every instruction, if you have to spill a register, greedily spill the one whose value isn't going to be read again for the longest amount of time." To do this, two passes over each basic block are performed. The first pass builds a table which lists the times that each virtual register is read. The second pass does the allocation proper, spilling registers according to the "won't be needed longest" rule. The only thing I am not happy about is the "InsnTimes" map, which just maps all (unallocated) instructions in the basic-block to the integers 0,1,2... Surely there is some way I can do without this map? (I guess I just need to thread a "currentTime" value through to chooseReg() and reloadVirtReg()?) I tried this but messed it up, I probably just need more coffee. ;) Seriously though, if anyone can find any problems or suggest any improvements, I'd be really happy to hear them. Well, the only other question I have is: can I check this in as a seperate allocator, or should we "upgrade" the existing local allocator to this one? This allocator will never be as fast, or use as little memory as the "simple" local allocator, and for those reasons alone I'm thinking we may as well keep the existing local allocator. However, compared to linearscan, or almost any other imaginable "heavy duty" allocator, any difference in the efficiency is probably going to be imperceptible. (On my own JIT workload, BigBlock is >30% slower than Local, but even so, it is still <1% of the total codegen time.) OK, enough rambling: please take a look - flames welcome!! Duraid -------------- next part -------------- A non-text attachment was scrubbed... Name: RegAllocBigBlock.cpp Type: text/x-c++src Size: 34156 bytes Desc: not available Url : http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20070622/d721f803/attachment.bin From dpatel at apple.com Thu Jun 21 12:24:08 2007 From: dpatel at apple.com (Devang Patel) Date: Thu, 21 Jun 2007 12:24:08 -0500 Subject: [llvm-commits] CVS: llvm/include/llvm/Analysis/Dominators.h Message-ID: <200706211724.l5LHO8xL022175@zion.cs.uiuc.edu> Changes in directory llvm/include/llvm/Analysis: Dominators.h updated: 1.105 -> 1.106 --- Log message: Move code to update dominator information after basic block is split from LoopSimplify.cpp to Dominator.cpp --- Diffs of the changes: (+10 -0) Dominators.h | 10 ++++++++++ 1 files changed, 10 insertions(+) Index: llvm/include/llvm/Analysis/Dominators.h diff -u llvm/include/llvm/Analysis/Dominators.h:1.105 llvm/include/llvm/Analysis/Dominators.h:1.106 --- llvm/include/llvm/Analysis/Dominators.h:1.105 Tue Jun 12 12:30:56 2007 +++ llvm/include/llvm/Analysis/Dominators.h Thu Jun 21 12:23:45 2007 @@ -302,6 +302,11 @@ virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); } + + /// splitBlock + /// BB is split and now it has one successor. Update dominator tree to + /// reflect this change. + void splitBlock(BasicBlock *BB); private: void calculate(Function& F); DomTreeNode *getNodeForBlock(BasicBlock *BB); @@ -587,6 +592,11 @@ AU.addRequired(); } + /// splitBlock + /// BB is split and now it has one successor. Update dominace frontier to + /// reflect this change. + void splitBlock(BasicBlock *BB); + private: const DomSetType &calculate(const DominatorTree &DT, const DomTreeNode *Node); From dpatel at apple.com Thu Jun 21 12:24:14 2007 From: dpatel at apple.com (Devang Patel) Date: Thu, 21 Jun 2007 12:24:14 -0500 Subject: [llvm-commits] CVS: llvm/lib/Transforms/Utils/CodeExtractor.cpp LoopSimplify.cpp Message-ID: <200706211724.l5LHOElg022184@zion.cs.uiuc.edu> Changes in directory llvm/lib/Transforms/Utils: CodeExtractor.cpp updated: 1.57 -> 1.58 LoopSimplify.cpp updated: 1.104 -> 1.105 --- Log message: Move code to update dominator information after basic block is split from LoopSimplify.cpp to Dominator.cpp --- Diffs of the changes: (+22 -202) CodeExtractor.cpp | 15 --- LoopSimplify.cpp | 209 +++++------------------------------------------------- 2 files changed, 22 insertions(+), 202 deletions(-) Index: llvm/lib/Transforms/Utils/CodeExtractor.cpp diff -u llvm/lib/Transforms/Utils/CodeExtractor.cpp:1.57 llvm/lib/Transforms/Utils/CodeExtractor.cpp:1.58 --- llvm/lib/Transforms/Utils/CodeExtractor.cpp:1.57 Thu Jun 7 17:17:16 2007 +++ llvm/lib/Transforms/Utils/CodeExtractor.cpp Thu Jun 21 12:23:45 2007 @@ -140,19 +140,8 @@ // Okay, update dominator sets. The blocks that dominate the new one are the // blocks that dominate TIBB plus the new block itself. - if (DT) { - DomTreeNode *OPNode = DT->getNode(OldPred); - DomTreeNode *IDomNode = OPNode->getIDom(); - BasicBlock* idom = IDomNode->getBlock(); - DT->addNewBlock(NewBB, idom); - - // Additionally, NewBB replaces OldPred as the immediate dominator of blocks - Function *F = Header->getParent(); - for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I) - if (DT->getIDomBlock(I) == OldPred) { - DT->changeImmediateDominator(I, NewBB); - } - } + if (DT) + DT->splitBlock(NewBB); // Okay, now we need to adjust the PHI nodes and any branches from within the // region to go to the new header block instead of the old header block. Index: llvm/lib/Transforms/Utils/LoopSimplify.cpp diff -u llvm/lib/Transforms/Utils/LoopSimplify.cpp:1.104 llvm/lib/Transforms/Utils/LoopSimplify.cpp:1.105 --- llvm/lib/Transforms/Utils/LoopSimplify.cpp:1.104 Mon Jun 11 18:31:22 2007 +++ llvm/lib/Transforms/Utils/LoopSimplify.cpp Thu Jun 21 12:23:45 2007 @@ -61,7 +61,7 @@ // this is null. AliasAnalysis *AA; LoopInfo *LI; - + DominatorTree *DT; virtual bool runOnFunction(Function &F); virtual void getAnalysisUsage(AnalysisUsage &AU) const { @@ -85,9 +85,6 @@ void PlaceSplitBlockCarefully(BasicBlock *NewBB, std::vector &SplitPreds, Loop *L); - - void UpdateDomInfoForRevectoredPreds(BasicBlock *NewBB, - std::vector &PredBlocks); }; char LoopSimplify::ID = 0; @@ -106,6 +103,7 @@ bool Changed = false; LI = &getAnalysis(); AA = getAnalysisToUpdate(); + DT = &getAnalysis(); // Check to see that no blocks (other than the header) in loops have // predecessors that are not in loops. This is not valid for natural loops, @@ -341,6 +339,7 @@ PN->addIncoming(Constant::getNullValue(PN->getType()), NewBB); } } + return NewBB; } @@ -371,8 +370,10 @@ if (Loop *Parent = L->getParentLoop()) Parent->addBasicBlockToLoop(NewBB, *LI); - UpdateDomInfoForRevectoredPreds(NewBB, OutsideBlocks); - + DT->splitBlock(NewBB); + if (DominanceFrontier *DF = getAnalysisToUpdate()) + DF->splitBlock(NewBB); + // Make sure that NewBB is put someplace intelligent, which doesn't mess up // code layout too horribly. PlaceSplitBlockCarefully(NewBB, OutsideBlocks, L); @@ -401,8 +402,11 @@ if (SuccLoop) SuccLoop->addBasicBlockToLoop(NewBB, *LI); - // Update dominator information (set, immdom, domtree, and domfrontier) - UpdateDomInfoForRevectoredPreds(NewBB, LoopBlocks); + // Update Dominator Information + DT->splitBlock(NewBB); + if (DominanceFrontier *DF = getAnalysisToUpdate()) + DF->splitBlock(NewBB); + return NewBB; } @@ -507,7 +511,6 @@ /// created. /// Loop *LoopSimplify::SeparateNestedLoop(Loop *L) { - DominatorTree *DT = getAnalysisToUpdate(); PHINode *PN = FindPHIToPartitionLoops(L, DT, AA); if (PN == 0) return 0; // No known way to partition. @@ -523,8 +526,10 @@ BasicBlock *Header = L->getHeader(); BasicBlock *NewBB = SplitBlockPredecessors(Header, ".outer", OuterLoopPreds); - // Update dominator information (set, immdom, domtree, and domfrontier) - UpdateDomInfoForRevectoredPreds(NewBB, OuterLoopPreds); + // Update dominator information + DT->splitBlock(NewBB); + if (DominanceFrontier *DF = getAnalysisToUpdate()) + DF->splitBlock(NewBB); // Make sure that NewBB is put someplace intelligent, which doesn't mess up // code layout too horribly. @@ -677,184 +682,10 @@ // loop and all parent loops. L->addBasicBlockToLoop(BEBlock, *LI); - // Update dominator information (set, immdom, domtree, and domfrontier) - UpdateDomInfoForRevectoredPreds(BEBlock, BackedgeBlocks); -} - -// Returns true if BasicBlock A dominates at least one block in vector B -// Helper function for UpdateDomInfoForRevectoredPreds -static bool BlockDominatesAny(BasicBlock* A, const std::vector& B, - DominatorTree &DT) { - for (std::vector::const_iterator BI = B.begin(), BE = B.end(); - BI != BE; ++BI) { - if (DT.dominates(A, *BI)) - return true; - } - return false; -} - -/// UpdateDomInfoForRevectoredPreds - This method is used to update -/// dominator trees and dominance frontiers after a new block has -/// been added to the CFG. -/// -/// This only supports the case when an existing block (known as "NewBBSucc"), -/// had some of its predecessors factored into a new basic block. This -/// transformation inserts a new basic block ("NewBB"), with a single -/// unconditional branch to NewBBSucc, and moves some predecessors of -/// "NewBBSucc" to now branch to NewBB. These predecessors are listed in -/// PredBlocks, even though they are the same as -/// pred_begin(NewBB)/pred_end(NewBB). -/// -void LoopSimplify::UpdateDomInfoForRevectoredPreds(BasicBlock *NewBB, - std::vector &PredBlocks) { - assert(!PredBlocks.empty() && "No predblocks??"); - assert(NewBB->getTerminator()->getNumSuccessors() == 1 - && "NewBB should have a single successor!"); - BasicBlock *NewBBSucc = NewBB->getTerminator()->getSuccessor(0); - DominatorTree &DT = getAnalysis(); - - // The newly inserted basic block will dominate existing basic blocks iff the - // PredBlocks dominate all of the non-pred blocks. If all predblocks dominate - // the non-pred blocks, then they all must be the same block! - // - bool NewBBDominatesNewBBSucc = true; - { - BasicBlock *OnePred = PredBlocks[0]; - unsigned i = 1, e = PredBlocks.size(); - for (i = 1; !DT.isReachableFromEntry(OnePred); ++i) { - assert(i != e && "Didn't find reachable pred?"); - OnePred = PredBlocks[i]; - } - - for (; i != e; ++i) - if (PredBlocks[i] != OnePred && DT.isReachableFromEntry(OnePred)){ - NewBBDominatesNewBBSucc = false; - break; - } - - if (NewBBDominatesNewBBSucc) - for (pred_iterator PI = pred_begin(NewBBSucc), E = pred_end(NewBBSucc); - PI != E; ++PI) - if (*PI != NewBB && !DT.dominates(NewBBSucc, *PI)) { - NewBBDominatesNewBBSucc = false; - break; - } - } - - // The other scenario where the new block can dominate its successors are when - // all predecessors of NewBBSucc that are not NewBB are dominated by NewBBSucc - // already. - if (!NewBBDominatesNewBBSucc) { - NewBBDominatesNewBBSucc = true; - for (pred_iterator PI = pred_begin(NewBBSucc), E = pred_end(NewBBSucc); - PI != E; ++PI) - if (*PI != NewBB && !DT.dominates(NewBBSucc, *PI)) { - NewBBDominatesNewBBSucc = false; - break; - } - } - - - // Update DominatorTree information if it is active. - - // Find NewBB's immediate dominator and create new dominator tree node for NewBB. - BasicBlock *NewBBIDom = 0; - unsigned i = 0; - for (i = 0; i < PredBlocks.size(); ++i) - if (DT.isReachableFromEntry(PredBlocks[i])) { - NewBBIDom = PredBlocks[i]; - break; - } - assert(i != PredBlocks.size() && "No reachable preds?"); - for (i = i + 1; i < PredBlocks.size(); ++i) { - if (DT.isReachableFromEntry(PredBlocks[i])) - NewBBIDom = DT.findNearestCommonDominator(NewBBIDom, PredBlocks[i]); - } - assert(NewBBIDom && "No immediate dominator found??"); - - // Create the new dominator tree node... and set the idom of NewBB. - DomTreeNode *NewBBNode = DT.addNewBlock(NewBB, NewBBIDom); - - // If NewBB strictly dominates other blocks, then it is now the immediate - // dominator of NewBBSucc. Update the dominator tree as appropriate. - if (NewBBDominatesNewBBSucc) { - DomTreeNode *NewBBSuccNode = DT.getNode(NewBBSucc); - DT.changeImmediateDominator(NewBBSuccNode, NewBBNode); - } - - // Update dominance frontier information... - if (DominanceFrontier *DF = getAnalysisToUpdate()) { - // If NewBB dominates NewBBSucc, then DF(NewBB) is now going to be the - // DF(PredBlocks[0]) without the stuff that the new block does not dominate - // a predecessor of. - if (NewBBDominatesNewBBSucc) { - DominanceFrontier::iterator DFI = DF->find(PredBlocks[0]); - if (DFI != DF->end()) { - DominanceFrontier::DomSetType Set = DFI->second; - // Filter out stuff in Set that we do not dominate a predecessor of. - for (DominanceFrontier::DomSetType::iterator SetI = Set.begin(), - E = Set.end(); SetI != E;) { - bool DominatesPred = false; - for (pred_iterator PI = pred_begin(*SetI), E = pred_end(*SetI); - PI != E; ++PI) - if (DT.dominates(NewBB, *PI)) - DominatesPred = true; - if (!DominatesPred) - Set.erase(SetI++); - else - ++SetI; - } - - DF->addBasicBlock(NewBB, Set); - } - - } else { - // DF(NewBB) is {NewBBSucc} because NewBB does not strictly dominate - // NewBBSucc, but it does dominate itself (and there is an edge (NewBB -> - // NewBBSucc)). NewBBSucc is the single successor of NewBB. - DominanceFrontier::DomSetType NewDFSet; - NewDFSet.insert(NewBBSucc); - DF->addBasicBlock(NewBB, NewDFSet); - } - - // Now we must loop over all of the dominance frontiers in the function, - // replacing occurrences of NewBBSucc with NewBB in some cases. All - // blocks that dominate a block in PredBlocks and contained NewBBSucc in - // their dominance frontier must be updated to contain NewBB instead. - // - for (Function::iterator FI = NewBB->getParent()->begin(), - FE = NewBB->getParent()->end(); FI != FE; ++FI) { - DominanceFrontier::iterator DFI = DF->find(FI); - if (DFI == DF->end()) continue; // unreachable block. - - // Only consider dominators of NewBBSucc - if (!DFI->second.count(NewBBSucc)) continue; - - if (BlockDominatesAny(FI, PredBlocks, DT)) { - // If NewBBSucc should not stay in our dominator frontier, remove it. - // We remove it unless there is a predecessor of NewBBSucc that we - // dominate, but we don't strictly dominate NewBBSucc. - bool ShouldRemove = true; - if ((BasicBlock*)FI == NewBBSucc - || !DT.dominates(FI, NewBBSucc)) { - // Okay, we know that PredDom does not strictly dominate NewBBSucc. - // Check to see if it dominates any predecessors of NewBBSucc. - for (pred_iterator PI = pred_begin(NewBBSucc), - E = pred_end(NewBBSucc); PI != E; ++PI) - if (DT.dominates(FI, *PI)) { - ShouldRemove = false; - break; - } - - if (ShouldRemove) - DF->removeFromFrontier(DFI, NewBBSucc); - DF->addToFrontier(DFI, NewBB); - - break; - } - } - } - } + // Update dominator information + DT->splitBlock(BEBlock); + if (DominanceFrontier *DF = getAnalysisToUpdate()) + DF->splitBlock(BEBlock); } From dpatel at apple.com Thu Jun 21 12:24:15 2007 From: dpatel at apple.com (Devang Patel) Date: Thu, 21 Jun 2007 12:24:15 -0500 Subject: [llvm-commits] CVS: llvm/lib/VMCore/Dominators.cpp Message-ID: <200706211724.l5LHOFWB022189@zion.cs.uiuc.edu> Changes in directory llvm/lib/VMCore: Dominators.cpp updated: 1.117 -> 1.118 --- Log message: Move code to update dominator information after basic block is split from LoopSimplify.cpp to Dominator.cpp --- Diffs of the changes: (+184 -0) Dominators.cpp | 184 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 files changed, 184 insertions(+) Index: llvm/lib/VMCore/Dominators.cpp diff -u llvm/lib/VMCore/Dominators.cpp:1.117 llvm/lib/VMCore/Dominators.cpp:1.118 --- llvm/lib/VMCore/Dominators.cpp:1.117 Tue Jun 12 12:50:25 2007 +++ llvm/lib/VMCore/Dominators.cpp Thu Jun 21 12:23:45 2007 @@ -63,6 +63,89 @@ static RegisterPass E("domtree", "Dominator Tree Construction", true); +// NewBB is split and now it has one successor. Update dominator tree to +// reflect this change. +void DominatorTree::splitBlock(BasicBlock *NewBB) { + + assert(NewBB->getTerminator()->getNumSuccessors() == 1 + && "NewBB should have a single successor!"); + BasicBlock *NewBBSucc = NewBB->getTerminator()->getSuccessor(0); + + std::vector PredBlocks; + for (pred_iterator PI = pred_begin(NewBB), PE = pred_end(NewBB); + PI != PE; ++PI) + PredBlocks.push_back(*PI); + + assert(!PredBlocks.empty() && "No predblocks??"); + + // The newly inserted basic block will dominate existing basic blocks iff the + // PredBlocks dominate all of the non-pred blocks. If all predblocks dominate + // the non-pred blocks, then they all must be the same block! + // + bool NewBBDominatesNewBBSucc = true; + { + BasicBlock *OnePred = PredBlocks[0]; + unsigned i = 1, e = PredBlocks.size(); + for (i = 1; !isReachableFromEntry(OnePred); ++i) { + assert(i != e && "Didn't find reachable pred?"); + OnePred = PredBlocks[i]; + } + + for (; i != e; ++i) + if (PredBlocks[i] != OnePred && isReachableFromEntry(OnePred)){ + NewBBDominatesNewBBSucc = false; + break; + } + + if (NewBBDominatesNewBBSucc) + for (pred_iterator PI = pred_begin(NewBBSucc), E = pred_end(NewBBSucc); + PI != E; ++PI) + if (*PI != NewBB && !dominates(NewBBSucc, *PI)) { + NewBBDominatesNewBBSucc = false; + break; + } + } + + // The other scenario where the new block can dominate its successors are when + // all predecessors of NewBBSucc that are not NewBB are dominated by NewBBSucc + // already. + if (!NewBBDominatesNewBBSucc) { + NewBBDominatesNewBBSucc = true; + for (pred_iterator PI = pred_begin(NewBBSucc), E = pred_end(NewBBSucc); + PI != E; ++PI) + if (*PI != NewBB && !dominates(NewBBSucc, *PI)) { + NewBBDominatesNewBBSucc = false; + break; + } + } + + + // Find NewBB's immediate dominator and create new dominator tree node for NewBB. + BasicBlock *NewBBIDom = 0; + unsigned i = 0; + for (i = 0; i < PredBlocks.size(); ++i) + if (isReachableFromEntry(PredBlocks[i])) { + NewBBIDom = PredBlocks[i]; + break; + } + assert(i != PredBlocks.size() && "No reachable preds?"); + for (i = i + 1; i < PredBlocks.size(); ++i) { + if (isReachableFromEntry(PredBlocks[i])) + NewBBIDom = findNearestCommonDominator(NewBBIDom, PredBlocks[i]); + } + assert(NewBBIDom && "No immediate dominator found??"); + + // Create the new dominator tree node... and set the idom of NewBB. + DomTreeNode *NewBBNode = addNewBlock(NewBB, NewBBIDom); + + // If NewBB strictly dominates other blocks, then it is now the immediate + // dominator of NewBBSucc. Update the dominator tree as appropriate. + if (NewBBDominatesNewBBSucc) { + DomTreeNode *NewBBSuccNode = getNode(NewBBSucc); + changeImmediateDominator(NewBBSuccNode, NewBBNode); + } +} + unsigned DominatorTree::DFSPass(BasicBlock *V, InfoRec &VInfo, unsigned N) { // This is more understandable as a recursive algorithm, but we can't use the @@ -520,6 +603,107 @@ static RegisterPass G("domfrontier", "Dominance Frontier Construction", true); +// NewBB is split and now it has one successor. Update dominace frontier to +// reflect this change. +void DominanceFrontier::splitBlock(BasicBlock *NewBB) { + + assert(NewBB->getTerminator()->getNumSuccessors() == 1 + && "NewBB should have a single successor!"); + BasicBlock *NewBBSucc = NewBB->getTerminator()->getSuccessor(0); + + std::vector PredBlocks; + for (pred_iterator PI = pred_begin(NewBB), PE = pred_end(NewBB); + PI != PE; ++PI) + PredBlocks.push_back(*PI); + + assert(!PredBlocks.empty() && "No predblocks??"); + + DominatorTree &DT = getAnalysis(); + bool NewBBDominatesNewBBSucc = true; + if (!DT.dominates(NewBB, NewBBSucc)) + NewBBDominatesNewBBSucc = false; + + // If NewBB dominates NewBBSucc, then DF(NewBB) is now going to be the + // DF(PredBlocks[0]) without the stuff that the new block does not dominate + // a predecessor of. + if (NewBBDominatesNewBBSucc) { + DominanceFrontier::iterator DFI = find(PredBlocks[0]); + if (DFI != end()) { + DominanceFrontier::DomSetType Set = DFI->second; + // Filter out stuff in Set that we do not dominate a predecessor of. + for (DominanceFrontier::DomSetType::iterator SetI = Set.begin(), + E = Set.end(); SetI != E;) { + bool DominatesPred = false; + for (pred_iterator PI = pred_begin(*SetI), E = pred_end(*SetI); + PI != E; ++PI) + if (DT.dominates(NewBB, *PI)) + DominatesPred = true; + if (!DominatesPred) + Set.erase(SetI++); + else + ++SetI; + } + + addBasicBlock(NewBB, Set); + } + + } else { + // DF(NewBB) is {NewBBSucc} because NewBB does not strictly dominate + // NewBBSucc, but it does dominate itself (and there is an edge (NewBB -> + // NewBBSucc)). NewBBSucc is the single successor of NewBB. + DominanceFrontier::DomSetType NewDFSet; + NewDFSet.insert(NewBBSucc); + addBasicBlock(NewBB, NewDFSet); + } + + // Now we must loop over all of the dominance frontiers in the function, + // replacing occurrences of NewBBSucc with NewBB in some cases. All + // blocks that dominate a block in PredBlocks and contained NewBBSucc in + // their dominance frontier must be updated to contain NewBB instead. + // + for (Function::iterator FI = NewBB->getParent()->begin(), + FE = NewBB->getParent()->end(); FI != FE; ++FI) { + DominanceFrontier::iterator DFI = find(FI); + if (DFI == end()) continue; // unreachable block. + + // Only consider dominators of NewBBSucc + if (!DFI->second.count(NewBBSucc)) continue; + + bool BlockDominatesAny = false; + for (std::vector::const_iterator BI = PredBlocks.begin(), + BE = PredBlocks.end(); BI != BE; ++BI) { + if (DT.dominates(FI, *BI)) { + BlockDominatesAny = true; + break; + } + } + + if (BlockDominatesAny) { + // If NewBBSucc should not stay in our dominator frontier, remove it. + // We remove it unless there is a predecessor of NewBBSucc that we + // dominate, but we don't strictly dominate NewBBSucc. + bool ShouldRemove = true; + if ((BasicBlock*)FI == NewBBSucc + || !DT.dominates(FI, NewBBSucc)) { + // Okay, we know that PredDom does not strictly dominate NewBBSucc. + // Check to see if it dominates any predecessors of NewBBSucc. + for (pred_iterator PI = pred_begin(NewBBSucc), + E = pred_end(NewBBSucc); PI != E; ++PI) + if (DT.dominates(FI, *PI)) { + ShouldRemove = false; + break; + } + + if (ShouldRemove) + removeFromFrontier(DFI, NewBBSucc); + addToFrontier(DFI, NewBB); + + break; + } + } + } +} + namespace { class DFCalculateWorkObject { public: From resistor at mac.com Thu Jun 21 12:58:15 2007 From: resistor at mac.com (Owen Anderson) Date: Thu, 21 Jun 2007 12:58:15 -0500 Subject: [llvm-commits] CVS: llvm/lib/Transforms/Scalar/GVNPRE.cpp Message-ID: <200706211758.l5LHwF3O022797@zion.cs.uiuc.edu> Changes in directory llvm/lib/Transforms/Scalar: GVNPRE.cpp updated: 1.48 -> 1.49 --- Log message: Change lots of sets from std::set to SmallPtrSet. This reduces the time required to optimize 253.perlbmk from 10.9s to 5.3s. --- Diffs of the changes: (+99 -92) GVNPRE.cpp | 191 +++++++++++++++++++++++++++++++------------------------------ 1 files changed, 99 insertions(+), 92 deletions(-) Index: llvm/lib/Transforms/Scalar/GVNPRE.cpp diff -u llvm/lib/Transforms/Scalar/GVNPRE.cpp:1.48 llvm/lib/Transforms/Scalar/GVNPRE.cpp:1.49 --- llvm/lib/Transforms/Scalar/GVNPRE.cpp:1.48 Wed Jun 20 20:59:05 2007 +++ llvm/lib/Transforms/Scalar/GVNPRE.cpp Thu Jun 21 12:57:53 2007 @@ -27,6 +27,7 @@ #include "llvm/Analysis/PostDominators.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/Support/CFG.h" #include "llvm/Support/Compiler.h" @@ -85,7 +86,7 @@ std::map expressionNumbering; std::set maximalExpressions; - std::set maximalValues; + SmallPtrSet maximalValues; uint32_t nextValueNumber; @@ -103,7 +104,7 @@ return maximalExpressions; } - std::set& getMaximalValues() { return maximalValues; } + SmallPtrSet& getMaximalValues() { return maximalValues; } void erase(Value* v); }; } @@ -346,8 +347,8 @@ ValueTable VN; std::vector createdExpressions; - std::map > availableOut; - std::map > anticipatedIn; + std::map > availableOut; + std::map > anticipatedIn; // This transformation requires dominator postdominator info virtual void getAnalysisUsage(AnalysisUsage &AU) const { @@ -358,44 +359,44 @@ // Helper fuctions // FIXME: eliminate or document these better - void dump(const std::set& s) const; - void clean(std::set& set); - Value* find_leader(std::set& vals, + void dump(const SmallPtrSet& s) const; + void clean(SmallPtrSet& set); + Value* find_leader(SmallPtrSet& vals, uint32_t v); Value* phi_translate(Value* V, BasicBlock* pred, BasicBlock* succ); - void phi_translate_set(std::set& anticIn, BasicBlock* pred, - BasicBlock* succ, std::set& out); + void phi_translate_set(SmallPtrSet& anticIn, BasicBlock* pred, + BasicBlock* succ, SmallPtrSet& out); - void topo_sort(std::set& set, + void topo_sort(SmallPtrSet& set, std::vector& vec); void cleanup(); bool elimination(); - void val_insert(std::set& s, Value* v); - void val_replace(std::set& s, Value* v); + void val_insert(SmallPtrSet& s, Value* v); + void val_replace(SmallPtrSet& s, Value* v); bool dependsOnInvoke(Value* V); void buildsets_availout(BasicBlock::iterator I, - std::set& currAvail, - std::set& currPhis, - std::set& currExps, - std::set& currTemps); + SmallPtrSet& currAvail, + SmallPtrSet& currPhis, + SmallPtrSet& currExps, + SmallPtrSet& currTemps); void buildsets_anticout(BasicBlock* BB, - std::set& anticOut, + SmallPtrSet& anticOut, std::set& visited); bool buildsets_anticin(BasicBlock* BB, - std::set& anticOut, - std::set& currExps, - std::set& currTemps, + SmallPtrSet& anticOut, + SmallPtrSet& currExps, + SmallPtrSet& currTemps, std::set& visited); unsigned buildsets(Function& F); void insertion_pre(Value* e, BasicBlock* BB, std::map& avail, - std::set& new_set); + SmallPtrSet& new_set); unsigned insertion_mergepoint(std::vector& workList, df_iterator D, - std::set& new_set); + SmallPtrSet& new_set); bool insertion(Function& F); }; @@ -418,8 +419,8 @@ /// find_leader - Given a set and a value number, return the first /// element of the set with that value number, or 0 if no such element /// is present -Value* GVNPRE::find_leader(std::set& vals, uint32_t v) { - for (std::set::iterator I = vals.begin(), E = vals.end(); +Value* GVNPRE::find_leader(SmallPtrSet& vals, uint32_t v) { + for (SmallPtrSet::iterator I = vals.begin(), E = vals.end(); I != E; ++I) if (v == VN.lookup(*I)) return *I; @@ -429,7 +430,7 @@ /// val_insert - Insert a value into a set only if there is not a value /// with the same value number already in the set -void GVNPRE::val_insert(std::set& s, Value* v) { +void GVNPRE::val_insert(SmallPtrSet& s, Value* v) { uint32_t num = VN.lookup(v); Value* leader = find_leader(s, num); if (leader == 0) @@ -438,7 +439,7 @@ /// val_replace - Insert a value into a set, replacing any values already in /// the set that have the same value number -void GVNPRE::val_replace(std::set& s, Value* v) { +void GVNPRE::val_replace(SmallPtrSet& s, Value* v) { uint32_t num = VN.lookup(v); Value* leader = find_leader(s, num); while (leader != 0) { @@ -546,10 +547,10 @@ } /// phi_translate_set - Perform phi translation on every element of a set -void GVNPRE::phi_translate_set(std::set& anticIn, +void GVNPRE::phi_translate_set(SmallPtrSet& anticIn, BasicBlock* pred, BasicBlock* succ, - std::set& out) { - for (std::set::iterator I = anticIn.begin(), + SmallPtrSet& out) { + for (SmallPtrSet::iterator I = anticIn.begin(), E = anticIn.end(); I != E; ++I) { Value* V = phi_translate(*I, pred, succ); if (V != 0) @@ -574,7 +575,7 @@ /// clean - Remove all non-opaque values from the set whose operands are not /// themselves in the set, as well as all values that depend on invokes (see /// above) -void GVNPRE::clean(std::set& set) { +void GVNPRE::clean(SmallPtrSet& set) { std::vector worklist; topo_sort(set, worklist); @@ -584,7 +585,7 @@ if (BinaryOperator* BO = dyn_cast(v)) { bool lhsValid = !isa(BO->getOperand(0)); if (!lhsValid) - for (std::set::iterator I = set.begin(), E = set.end(); + for (SmallPtrSet::iterator I = set.begin(), E = set.end(); I != E; ++I) if (VN.lookup(*I) == VN.lookup(BO->getOperand(0))) { lhsValid = true; @@ -595,7 +596,7 @@ bool rhsValid = !isa(BO->getOperand(1)); if (!rhsValid) - for (std::set::iterator I = set.begin(), E = set.end(); + for (SmallPtrSet::iterator I = set.begin(), E = set.end(); I != E; ++I) if (VN.lookup(*I) == VN.lookup(BO->getOperand(1))) { rhsValid = true; @@ -609,7 +610,7 @@ } else if (CmpInst* C = dyn_cast(v)) { bool lhsValid = !isa(C->getOperand(0)); if (!lhsValid) - for (std::set::iterator I = set.begin(), E = set.end(); + for (SmallPtrSet::iterator I = set.begin(), E = set.end(); I != E; ++I) if (VN.lookup(*I) == VN.lookup(C->getOperand(0))) { lhsValid = true; @@ -620,7 +621,7 @@ bool rhsValid = !isa(C->getOperand(1)); if (!rhsValid) - for (std::set::iterator I = set.begin(), E = set.end(); + for (SmallPtrSet::iterator I = set.begin(), E = set.end(); I != E; ++I) if (VN.lookup(*I) == VN.lookup(C->getOperand(1))) { rhsValid = true; @@ -637,19 +638,19 @@ /// topo_sort - Given a set of values, sort them by topological /// order into the provided vector. -void GVNPRE::topo_sort(std::set& set, std::vector& vec) { - std::set toErase; - for (std::set::iterator I = set.begin(), E = set.end(); +void GVNPRE::topo_sort(SmallPtrSet& set, std::vector& vec) { + SmallPtrSet toErase; + for (SmallPtrSet::iterator I = set.begin(), E = set.end(); I != E; ++I) { if (BinaryOperator* BO = dyn_cast(*I)) - for (std::set::iterator SI = set.begin(); SI != E; ++SI) { + for (SmallPtrSet::iterator SI = set.begin(); SI != E; ++SI) { if (VN.lookup(BO->getOperand(0)) == VN.lookup(*SI) || VN.lookup(BO->getOperand(1)) == VN.lookup(*SI)) { toErase.insert(*SI); } } else if (CmpInst* C = dyn_cast(*I)) - for (std::set::iterator SI = set.begin(); SI != E; ++SI) { + for (SmallPtrSet::iterator SI = set.begin(); SI != E; ++SI) { if (VN.lookup(C->getOperand(0)) == VN.lookup(*SI) || VN.lookup(C->getOperand(1)) == VN.lookup(*SI)) { toErase.insert(*SI); @@ -658,13 +659,13 @@ } std::vector Q; - for (std::set::iterator I = set.begin(), E = set.end(); + for (SmallPtrSet::iterator I = set.begin(), E = set.end(); I != E; ++I) { - if (toErase.find(*I) == toErase.end()) + if (toErase.count(*I) == 0) Q.push_back(*I); } - std::set visited; + SmallPtrSet visited; while (!Q.empty()) { Value* e = Q.back(); @@ -673,10 +674,10 @@ Value* r = find_leader(set, VN.lookup(BO->getOperand(1))); if (l != 0 && isa(l) && - visited.find(l) == visited.end()) + visited.count(l) == 0) Q.push_back(l); else if (r != 0 && isa(r) && - visited.find(r) == visited.end()) + visited.count(r) == 0) Q.push_back(r); else { vec.push_back(e); @@ -688,10 +689,10 @@ Value* r = find_leader(set, VN.lookup(C->getOperand(1))); if (l != 0 && isa(l) && - visited.find(l) == visited.end()) + visited.count(l) == 0) Q.push_back(l); else if (r != 0 && isa(r) && - visited.find(r) == visited.end()) + visited.count(r) == 0) Q.push_back(r); else { vec.push_back(e); @@ -707,9 +708,9 @@ } /// dump - Dump a set of values to standard error -void GVNPRE::dump(const std::set& s) const { +void GVNPRE::dump(const SmallPtrSet& s) const { DOUT << "{ "; - for (std::set::iterator I = s.begin(), E = s.end(); + for (SmallPtrSet::iterator I = s.begin(), E = s.end(); I != E; ++I) { DEBUG((*I)->dump()); } @@ -782,10 +783,10 @@ /// buildsets_availout - When calculating availability, handle an instruction /// by inserting it into the appropriate sets void GVNPRE::buildsets_availout(BasicBlock::iterator I, - std::set& currAvail, - std::set& currPhis, - std::set& currExps, - std::set& currTemps) { + SmallPtrSet& currAvail, + SmallPtrSet& currPhis, + SmallPtrSet& currExps, + SmallPtrSet& currTemps) { // Handle PHI nodes... if (PHINode* p = dyn_cast(I)) { VN.lookup_or_add(p); @@ -830,7 +831,7 @@ /// buildsets_anticout - When walking the postdom tree, calculate the ANTIC_OUT /// set as a function of the ANTIC_IN set of the block's predecessors void GVNPRE::buildsets_anticout(BasicBlock* BB, - std::set& anticOut, + SmallPtrSet& anticOut, std::set& visited) { if (BB->getTerminator()->getNumSuccessors() == 1) { if (visited.find(BB->getTerminator()->getSuccessor(0)) == visited.end()) @@ -845,15 +846,18 @@ for (unsigned i = 1; i < BB->getTerminator()->getNumSuccessors(); ++i) { BasicBlock* currSucc = BB->getTerminator()->getSuccessor(i); - std::set& succAnticIn = anticipatedIn[currSucc]; + SmallPtrSet& succAnticIn = anticipatedIn[currSucc]; - std::set temp; - std::insert_iterator > temp_ins(temp, temp.begin()); - std::set_intersection(anticOut.begin(), anticOut.end(), - succAnticIn.begin(), succAnticIn.end(), temp_ins); - - anticOut.clear(); - anticOut.insert(temp.begin(), temp.end()); + std::vector temp; + + for (SmallPtrSet::iterator I = anticOut.begin(), + E = anticOut.end(); I != E; ++I) + if (succAnticIn.count(*I) == 0) + temp.push_back(*I); + + for (std::vector::iterator I = temp.begin(), E = temp.end(); + I != E; ++I) + anticOut.erase(*I); } } } @@ -862,26 +866,29 @@ /// each block. ANTIC_IN is then a function of ANTIC_OUT and the GEN /// sets populated in buildsets_availout bool GVNPRE::buildsets_anticin(BasicBlock* BB, - std::set& anticOut, - std::set& currExps, - std::set& currTemps, + SmallPtrSet& anticOut, + SmallPtrSet& currExps, + SmallPtrSet& currTemps, std::set& visited) { - std::set& anticIn = anticipatedIn[BB]; - std::set old (anticIn.begin(), anticIn.end()); + SmallPtrSet& anticIn = anticipatedIn[BB]; + SmallPtrSet old (anticIn.begin(), anticIn.end()); buildsets_anticout(BB, anticOut, visited); - std::set S; - std::insert_iterator > s_ins(S, S.begin()); - std::set_difference(anticOut.begin(), anticOut.end(), - currTemps.begin(), currTemps.end(), s_ins); - + SmallPtrSet S; + for (SmallPtrSet::iterator I = anticOut.begin(), + E = anticOut.end(); I != E; ++I) + if (currTemps.count(*I) == 0) + S.insert(*I); + anticIn.clear(); - std::insert_iterator > ai_ins(anticIn, anticIn.begin()); - std::set_difference(currExps.begin(), currExps.end(), - currTemps.begin(), currTemps.end(), ai_ins); + + for (SmallPtrSet::iterator I = currExps.begin(), + E = currExps.end(); I != E; ++I) + if (currTemps.count(*I) == 0) + anticIn.insert(*I); - for (std::set::iterator I = S.begin(), E = S.end(); + for (SmallPtrSet::iterator I = S.begin(), E = S.end(); I != E; ++I) { // For non-opaque values, we should already have a value numbering. // However, for opaques, such as constants within PHI nodes, it is @@ -904,9 +911,9 @@ /// buildsets - Phase 1 of the main algorithm. Construct the AVAIL_OUT /// and the ANTIC_IN sets. unsigned GVNPRE::buildsets(Function& F) { - std::map > generatedExpressions; - std::map > generatedPhis; - std::map > generatedTemporaries; + std::map > generatedExpressions; + std::map > generatedPhis; + std::map > generatedTemporaries; DominatorTree &DT = getAnalysis(); @@ -917,10 +924,10 @@ E = df_end(DT.getRootNode()); DI != E; ++DI) { // Get the sets to update for this block - std::set& currExps = generatedExpressions[DI->getBlock()]; - std::set& currPhis = generatedPhis[DI->getBlock()]; - std::set& currTemps = generatedTemporaries[DI->getBlock()]; - std::set& currAvail = availableOut[DI->getBlock()]; + SmallPtrSet& currExps = generatedExpressions[DI->getBlock()]; + SmallPtrSet& currPhis = generatedPhis[DI->getBlock()]; + SmallPtrSet& currTemps = generatedTemporaries[DI->getBlock()]; + SmallPtrSet& currAvail = availableOut[DI->getBlock()]; BasicBlock* BB = DI->getBlock(); @@ -957,7 +964,7 @@ unsigned iterations = 0; while (changed) { changed = false; - std::set anticOut; + SmallPtrSet anticOut; // Top-down walk of the postdominator tree for (df_iterator PDI = @@ -984,7 +991,7 @@ /// the main block void GVNPRE::insertion_pre(Value* e, BasicBlock* BB, std::map& avail, - std::set& new_set) { + SmallPtrSet& new_set) { for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE; ++PI) { Value* e2 = avail[*PI]; if (!find_leader(availableOut[*PI], VN.lookup(e2))) { @@ -1016,7 +1023,7 @@ VN.add(newVal, VN.lookup(U)); - std::set& predAvail = availableOut[*PI]; + SmallPtrSet& predAvail = availableOut[*PI]; val_replace(predAvail, newVal); std::map::iterator av = avail.find(*PI); @@ -1048,7 +1055,7 @@ /// block for the possibility of a partial redundancy. If present, eliminate it unsigned GVNPRE::insertion_mergepoint(std::vector& workList, df_iterator D, - std::set& new_set) { + SmallPtrSet& new_set) { bool changed_function = false; bool new_stuff = false; @@ -1112,7 +1119,7 @@ DominatorTree &DT = getAnalysis(); - std::map > new_sets; + std::map > new_sets; bool new_stuff = true; while (new_stuff) { new_stuff = false; @@ -1123,16 +1130,16 @@ if (BB == 0) continue; - std::set& new_set = new_sets[BB]; - std::set& availOut = availableOut[BB]; - std::set& anticIn = anticipatedIn[BB]; + SmallPtrSet& new_set = new_sets[BB]; + SmallPtrSet& availOut = availableOut[BB]; + SmallPtrSet& anticIn = anticipatedIn[BB]; new_set.clear(); // Replace leaders with leaders inherited from dominator if (DI->getIDom() != 0) { - std::set& dom_set = new_sets[DI->getIDom()->getBlock()]; - for (std::set::iterator I = dom_set.begin(), + SmallPtrSet& dom_set = new_sets[DI->getIDom()->getBlock()]; + for (SmallPtrSet::iterator I = dom_set.begin(), E = dom_set.end(); I != E; ++I) { new_set.insert(*I); val_replace(availOut, *I); From sabre at nondot.org Thu Jun 21 13:09:47 2007 From: sabre at nondot.org (Chris Lattner) Date: Thu, 21 Jun 2007 13:09:47 -0500 Subject: [llvm-commits] CVS: llvm/test/Transforms/InstCombine/2007-06-21-DivCompareMiscomp.ll Message-ID: <200706211809.l5LI9lks023109@zion.cs.uiuc.edu> Changes in directory llvm/test/Transforms/InstCombine: 2007-06-21-DivCompareMiscomp.ll added (r1.1) --- Log message: new testcase miscompiled by instcombine, reduced from perl --- Diffs of the changes: (+9 -0) 2007-06-21-DivCompareMiscomp.ll | 9 +++++++++ 1 files changed, 9 insertions(+) Index: llvm/test/Transforms/InstCombine/2007-06-21-DivCompareMiscomp.ll diff -c /dev/null llvm/test/Transforms/InstCombine/2007-06-21-DivCompareMiscomp.ll:1.1 *** /dev/null Thu Jun 21 13:09:35 2007 --- llvm/test/Transforms/InstCombine/2007-06-21-DivCompareMiscomp.ll Thu Jun 21 13:09:25 2007 *************** *** 0 **** --- 1,9 ---- + ; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep {ret i1 true} + ; rdar://5278853 + + define i1 @test(i32 %tmp468) { + %tmp470 = udiv i32 %tmp468, 4 ; [#uses=2] + %tmp475 = icmp ult i32 %tmp470, 1073741824 ; [#uses=1] + ret i1 %tmp475 + } + From sabre at nondot.org Thu Jun 21 13:11:42 2007 From: sabre at nondot.org (Chris Lattner) Date: Thu, 21 Jun 2007 13:11:42 -0500 Subject: [llvm-commits] CVS: llvm/lib/Transforms/Scalar/InstructionCombining.cpp Message-ID: <200706211811.l5LIBgQG023159@zion.cs.uiuc.edu> Changes in directory llvm/lib/Transforms/Scalar: InstructionCombining.cpp updated: 1.774 -> 1.775 --- Log message: Significantly improve the documentation of the instcombine divide/compare transformation. Also, keep track of which end of the integer interval overflows occur on. This fixes Transforms/InstCombine/2007-06-21-DivCompareMiscomp.ll and rdar://5278853, a miscompilation of perl. --- Diffs of the changes: (+50 -34) InstructionCombining.cpp | 84 +++++++++++++++++++++++++++-------------------- 1 files changed, 50 insertions(+), 34 deletions(-) Index: llvm/lib/Transforms/Scalar/InstructionCombining.cpp diff -u llvm/lib/Transforms/Scalar/InstructionCombining.cpp:1.774 llvm/lib/Transforms/Scalar/InstructionCombining.cpp:1.775 --- llvm/lib/Transforms/Scalar/InstructionCombining.cpp:1.774 Wed Jun 20 18:46:26 2007 +++ llvm/lib/Transforms/Scalar/InstructionCombining.cpp Thu Jun 21 13:11:19 2007 @@ -5131,12 +5131,7 @@ if (!ICI.isEquality() && DivIsSigned != ICI.isSignedPredicate()) return 0; if (DivRHS->isZero()) - return 0; // Don't hack on div by zero - - // Initialize the variables that will indicate the nature of the - // range check. - bool LoOverflow = false, HiOverflow = false; - ConstantInt *LoBound = 0, *HiBound = 0; + return 0; // The ProdOV computation fails on divide by zero. // Compute Prod = CI * DivRHS. We are essentially solving an equation // of form X/C1=C2. We solve for X by multiplying C1 (DivRHS) and @@ -5151,87 +5146,108 @@ ConstantExpr::getUDiv(Prod, DivRHS)) != CmpRHS; // Get the ICmp opcode - ICmpInst::Predicate predicate = ICI.getPredicate(); + ICmpInst::Predicate Pred = ICI.getPredicate(); + // Figure out the interval that is being checked. For example, a comparison + // like "X /u 5 == 0" is really checking that X is in the interval [0, 5). + // Compute this interval based on the constants involved and the signedness of + // the compare/divide. This computes a half-open interval, keeping track of + // whether either value in the interval overflows. After analysis each + // overflow variable is set to 0 if it's corresponding bound variable is valid + // -1 if overflowed off the bottom end, or +1 if overflowed off the top end. + int LoOverflow = 0, HiOverflow = 0; + ConstantInt *LoBound = 0, *HiBound = 0; + + if (!DivIsSigned) { // udiv + // e.g. X/5 op 3 --> [15, 20) LoBound = Prod; - LoOverflow = ProdOV; - HiOverflow = ProdOV || AddWithOverflow(HiBound, LoBound, DivRHS, false); + HiOverflow = LoOverflow = ProdOV; + if (!HiOverflow) + HiOverflow = AddWithOverflow(HiBound, LoBound, DivRHS, false); } else if (DivRHS->getValue().isPositive()) { // Divisor is > 0. if (CmpRHSV == 0) { // (X / pos) op 0 - // Can't overflow. + // Can't overflow. e.g. X/2 op 0 --> [-1, 2) LoBound = cast(ConstantExpr::getNeg(SubOne(DivRHS))); HiBound = DivRHS; } else if (CmpRHSV.isPositive()) { // (X / pos) op pos - LoBound = Prod; - LoOverflow = ProdOV; - HiOverflow = ProdOV || AddWithOverflow(HiBound, Prod, DivRHS, true); + LoBound = Prod; // e.g. X/5 op 3 --> [15, 20) + HiOverflow = LoOverflow = ProdOV; + if (!HiOverflow) + HiOverflow = AddWithOverflow(HiBound, Prod, DivRHS, true); } else { // (X / pos) op neg + // e.g. X/5 op -3 --> [-15-4, -15+1) --> [-19, -14) Constant *DivRHSH = ConstantExpr::getNeg(SubOne(DivRHS)); LoOverflow = AddWithOverflow(LoBound, Prod, - cast(DivRHSH), true); + cast(DivRHSH), true) ? -1 : 0; HiBound = AddOne(Prod); - HiOverflow = ProdOV; + HiOverflow = ProdOV ? -1 : 0; } } else { // Divisor is < 0. if (CmpRHSV == 0) { // (X / neg) op 0 + // e.g. X/-5 op 0 --> [-4, 5) LoBound = AddOne(DivRHS); HiBound = cast(ConstantExpr::getNeg(DivRHS)); - if (HiBound == DivRHS) - return 0; // - INTMIN = INTMIN + if (HiBound == DivRHS) { // -INTMIN = INTMIN + HiOverflow = 1; // [INTMIN+1, overflow) + HiBound = 0; // e.g. X/INTMIN = 0 --> X > INTMIN + } } else if (CmpRHSV.isPositive()) { // (X / neg) op pos - HiOverflow = LoOverflow = ProdOV; + // e.g. X/-5 op 3 --> [-19, -14) + HiOverflow = LoOverflow = ProdOV ? -1 : 0; if (!LoOverflow) - LoOverflow = AddWithOverflow(LoBound, Prod, AddOne(DivRHS), - true); + LoOverflow = AddWithOverflow(LoBound, Prod, AddOne(DivRHS), true) ?-1:0; HiBound = AddOne(Prod); } else { // (X / neg) op neg + // e.g. X/-5 op -3 --> [15, 20) LoBound = Prod; - LoOverflow = HiOverflow = ProdOV; + LoOverflow = HiOverflow = ProdOV ? 1 : 0; HiBound = Subtract(Prod, DivRHS); } - // Dividing by a negate swaps the condition. - predicate = ICmpInst::getSwappedPredicate(predicate); + // Dividing by a negative swaps the condition. LT <-> GT + Pred = ICmpInst::getSwappedPredicate(Pred); } Value *X = DivI->getOperand(0); - switch (predicate) { + switch (Pred) { default: assert(0 && "Unhandled icmp opcode!"); case ICmpInst::ICMP_EQ: if (LoOverflow && HiOverflow) return ReplaceInstUsesWith(ICI, ConstantInt::getFalse()); else if (HiOverflow) - return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SGE : + return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE, X, LoBound); else if (LoOverflow) return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT, X, HiBound); else - return InsertRangeTest(X, LoBound, HiBound, DivIsSigned, - true, ICI); + return InsertRangeTest(X, LoBound, HiBound, DivIsSigned, true, ICI); case ICmpInst::ICMP_NE: if (LoOverflow && HiOverflow) return ReplaceInstUsesWith(ICI, ConstantInt::getTrue()); else if (HiOverflow) - return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT : + return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT, X, LoBound); else if (LoOverflow) return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE, X, HiBound); else - return InsertRangeTest(X, LoBound, HiBound, DivIsSigned, - false, ICI); + return InsertRangeTest(X, LoBound, HiBound, DivIsSigned, false, ICI); case ICmpInst::ICMP_ULT: case ICmpInst::ICMP_SLT: - if (LoOverflow) + if (LoOverflow == +1) // Low bound is greater than input range. + return ReplaceInstUsesWith(ICI, ConstantInt::getTrue()); + if (LoOverflow == -1) // Low bound is less than input range. return ReplaceInstUsesWith(ICI, ConstantInt::getFalse()); - return new ICmpInst(predicate, X, LoBound); + return new ICmpInst(Pred, X, LoBound); case ICmpInst::ICMP_UGT: case ICmpInst::ICMP_SGT: - if (HiOverflow) + if (HiOverflow == +1) // High bound greater than input range. return ReplaceInstUsesWith(ICI, ConstantInt::getFalse()); - if (predicate == ICmpInst::ICMP_UGT) + else if (HiOverflow == -1) // High bound less than input range. + return ReplaceInstUsesWith(ICI, ConstantInt::getTrue()); + if (Pred == ICmpInst::ICMP_UGT) return new ICmpInst(ICmpInst::ICMP_UGE, X, HiBound); else return new ICmpInst(ICmpInst::ICMP_SGE, X, HiBound); From evan.cheng at apple.com Thu Jun 21 13:23:04 2007 From: evan.cheng at apple.com (Evan Cheng) Date: Thu, 21 Jun 2007 13:23:04 -0500 Subject: [llvm-commits] CVS: llvm/test/CodeGen/ARM/ifcvt2.ll ifcvt6.ll Message-ID: <200706211823.l5LIN4PA023387@zion.cs.uiuc.edu> Changes in directory llvm/test/CodeGen/ARM: ifcvt2.ll updated: 1.1 -> 1.2 ifcvt6.ll updated: 1.1 -> 1.2 --- Log message: Fix tests. --- Diffs of the changes: (+3 -1) ifcvt2.ll | 2 +- ifcvt6.ll | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) Index: llvm/test/CodeGen/ARM/ifcvt2.ll diff -u llvm/test/CodeGen/ARM/ifcvt2.ll:1.1 llvm/test/CodeGen/ARM/ifcvt2.ll:1.2 --- llvm/test/CodeGen/ARM/ifcvt2.ll:1.1 Wed Jun 20 13:26:15 2007 +++ llvm/test/CodeGen/ARM/ifcvt2.ll Thu Jun 21 13:22:42 2007 @@ -1,5 +1,5 @@ ; RUN: llvm-as < %s | llc -march=arm -enable-arm-if-conversion -; RUN: llvm-as < %s | llc -march=arm -enable-arm-if-conversion | grep bxlt | wc -l | grep 2 +; RUN: llvm-as < %s | llc -march=arm -enable-arm-if-conversion | grep bxlt | wc -l | grep 1 ; RUN: llvm-as < %s | llc -march=arm -enable-arm-if-conversion | grep bxgt | wc -l | grep 1 ; RUN: llvm-as < %s | llc -march=arm -enable-arm-if-conversion | grep bxge | wc -l | grep 1 Index: llvm/test/CodeGen/ARM/ifcvt6.ll diff -u llvm/test/CodeGen/ARM/ifcvt6.ll:1.1 llvm/test/CodeGen/ARM/ifcvt6.ll:1.2 --- llvm/test/CodeGen/ARM/ifcvt6.ll:1.1 Wed Jun 20 13:26:15 2007 +++ llvm/test/CodeGen/ARM/ifcvt6.ll Thu Jun 21 13:22:42 2007 @@ -16,3 +16,5 @@ UnifiedReturnBlock: ; preds = %entry ret void } + +declare i32 @bar(...) From resistor at mac.com Thu Jun 21 16:26:04 2007 From: resistor at mac.com (Owen Anderson) Date: Thu, 21 Jun 2007 16:26:04 -0500 Subject: [llvm-commits] CVS: llvm/include/llvm/ADT/DepthFirstIterator.h Message-ID: <200706212126.l5LLQ45x027311@zion.cs.uiuc.edu> Changes in directory llvm/include/llvm/ADT: DepthFirstIterator.h updated: 1.14 -> 1.15 --- Log message: Have internal df_iterator's use SmallPtrSet instead of std::set. This provides compile time speedups to any pass using df_iterator. --- Diffs of the changes: (+5 -3) DepthFirstIterator.h | 8 +++++--- 1 files changed, 5 insertions(+), 3 deletions(-) Index: llvm/include/llvm/ADT/DepthFirstIterator.h diff -u llvm/include/llvm/ADT/DepthFirstIterator.h:1.14 llvm/include/llvm/ADT/DepthFirstIterator.h:1.15 --- llvm/include/llvm/ADT/DepthFirstIterator.h:1.14 Thu Apr 21 15:13:50 2005 +++ llvm/include/llvm/ADT/DepthFirstIterator.h Thu Jun 21 16:25:36 2007 @@ -35,6 +35,7 @@ #include "llvm/ADT/GraphTraits.h" #include "llvm/ADT/iterator" +#include "llvm/ADT/SmallPtrSet.h" #include #include @@ -58,8 +59,8 @@ // Generic Depth First Iterator -template::NodeType*>, +template::NodeType*, 8>, bool ExtStorage = false, class GT = GraphTraits > class df_iterator : public forward_iterator, public df_iterator_storage { @@ -189,7 +190,8 @@ // Provide global definitions of inverse depth first iterators... -template ::NodeType*>, +template ::NodeType*, 8>, bool External = false> struct idf_iterator : public df_iterator, SetTy, External> { idf_iterator(const df_iterator, SetTy, External> &V) From clattner at apple.com Thu Jun 21 16:38:19 2007 From: clattner at apple.com (Chris Lattner) Date: Thu, 21 Jun 2007 14:38:19 -0700 Subject: [llvm-commits] CVS: llvm/include/llvm/ADT/DepthFirstIterator.h In-Reply-To: <200706212126.l5LLQ45x027311@zion.cs.uiuc.edu> References: <200706212126.l5LLQ45x027311@zion.cs.uiuc.edu> Message-ID: <9C55FBC4-2BA5-4DF7-829B-1F6BAD2A7B40@apple.com> > Have internal df_iterator's use SmallPtrSet instead of std::set. > This provides compile time speedups to any pass using df_iterator. Can you remove the #include of now? -Chris > > > --- > Diffs of the changes: (+5 -3) > > DepthFirstIterator.h | 8 +++++--- > 1 files changed, 5 insertions(+), 3 deletions(-) > > > Index: llvm/include/llvm/ADT/DepthFirstIterator.h > diff -u llvm/include/llvm/ADT/DepthFirstIterator.h:1.14 llvm/ > include/llvm/ADT/DepthFirstIterator.h:1.15 > --- llvm/include/llvm/ADT/DepthFirstIterator.h:1.14 Thu Apr 21 > 15:13:50 2005 > +++ llvm/include/llvm/ADT/DepthFirstIterator.h Thu Jun 21 16:25:36 > 2007 > @@ -35,6 +35,7 @@ > > #include "llvm/ADT/GraphTraits.h" > #include "llvm/ADT/iterator" > +#include "llvm/ADT/SmallPtrSet.h" > #include > #include > > @@ -58,8 +59,8 @@ > > > // Generic Depth First Iterator > -template - std::set GraphTraits::NodeType*>, > +template +class SetType = llvm::SmallPtrSet GraphTraits::NodeType*, 8>, > bool ExtStorage = false, class GT = GraphTraits > > class df_iterator : public forward_iterator ptrdiff_t>, > public df_iterator_storage { > @@ -189,7 +190,8 @@ > > > // Provide global definitions of inverse depth first iterators... > -template GraphTraits::NodeType*>, > +template + class SetTy = llvm::SmallPtrSet GraphTraits::NodeType*, 8>, > bool External = false> > struct idf_iterator : public df_iterator, SetTy, > External> { > idf_iterator(const df_iterator, SetTy, External> &V) > > > > _______________________________________________ > llvm-commits mailing list > llvm-commits at cs.uiuc.edu > http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits From djg at cray.com Thu Jun 21 16:56:21 2007 From: djg at cray.com (Dan Gohman) Date: Thu, 21 Jun 2007 16:56:21 -0500 Subject: [llvm-commits] CVS: llvm/lib/Target/X86/X86InstrFPStack.td X86InstrInfo.cpp X86InstrInfo.h X86InstrInfo.td X86InstrMMX.td X86InstrSSE.td Message-ID: <20070621215621.GR5693@village.us.cray.com> > I'm sorry to be such a pain, but this seems like a step backward. > We've gone from having an explicit flag in the .td files to having > another magic table in the .cpp file (somewhat amusing because > 'duplicable' just made the opposite transition). > > How about this proposal (Obviously feel free to pick better names for > these things): > > 1. Reintroduce the 'isremat-able' flag, set it to true for all the > instructions that are *potentially* rematerializable. > 2. Add a virtual target hook that can override the flag: > "TII::isReallyRematerializable(Machineinstr*)". > 3. Introduce a new non-virtual method: > bool TII::isRematerializable(Machineinstr *MI) { > return MI->flags->isrematable && isReallyRematerializable(MI); > } I tried this, and got circular dependencies between libLLVMAnalysis.a, libLLVMTarget.a, and libLLVMCodeGen.a. I think it's because the actual code for 3. uses MachineInstr::getOpcode. > > This achieves two things: > > 1. Just looking at the .td file, you can tell which instructions are > candidates for remat. > 2. The isRematerializable predicate is faster for instructions that > are not remat-able. > 3. The isReallyRematerializable only needs to be implemented by > targets with instructions that are remat-able only in some cases > (like the x86 instructions). > > To me, #1 is the killer feature. In general, I'd like to move away > from having tables (either explicit, like the one in > X86RegisterInfo::foldMemoryOperand, or just big switch stmts) to > having properties on .td file entries. That makes it much more clear > what is going on when inspecting the .td files. > > I'm sorry I didn't look at your patch when you asked for comments, > but does this proposal sound sane? *shrug*. Adding isReMaterializable flags to all the load instructions in the X86 files isn't unambiguously prettier though. But I've already strayed from my tangent here :-}. Attached is a patch which does points 1 and 2 of what you describe above. Dan -- Dan Gohman, Cray Inc. -------------- next part -------------- Index: include/llvm/Target/TargetInstrInfo.h =================================================================== RCS file: /var/cvs/llvm/llvm/include/llvm/Target/TargetInstrInfo.h,v retrieving revision 1.132 diff -u -r1.132 TargetInstrInfo.h --- include/llvm/Target/TargetInstrInfo.h +++ include/llvm/Target/TargetInstrInfo.h @@ -78,6 +78,10 @@ // controls execution. It may be set to 'always'. const unsigned M_PREDICABLE = 1 << 12; +// M_REMATERIALIZIBLE - Set if this instruction can be trivally re-materialized +// at any time, e.g. constant generation, load from constant pool. +const unsigned M_REMATERIALIZIBLE = 1 << 13; + // M_CLOBBERS_PRED - Set if this instruction may clobbers the condition code // register and / or registers that are used to predicate instructions. const unsigned M_CLOBBERS_PRED = 1 << 14; @@ -260,6 +264,13 @@ return get(Opcode).Flags & M_PREDICABLE; } + /// isReMaterializableOpcode - Return true if this opcode can ever be trivially + /// rematerializable. Use use isReallyReMaterializable to test the specific + /// instructions for trivial rematerializability, considering their operands. + bool isReMaterializableOpcode(MachineOpCode Opcode) const { + return get(Opcode).Flags & M_REMATERIALIZIBLE; + } + bool clobbersPredicate(MachineOpCode Opcode) const { return get(Opcode).Flags & M_CLOBBERS_PRED; } @@ -301,14 +312,16 @@ return 0; } - /// isTriviallyReMaterializable - If the specified machine instruction can - /// be trivally re-materialized at any time, e.g. constant generation or - /// loads from constant pools. If not, return false. This predicate must + /// isReallyReMaterializable - For instructions with opcodes for which + /// the M_REMATERIALIZABLE flag is set, this function tests whether the + /// instruction itself is actually trivially rematerializable, considering + /// its operands. This is used for targets that have instructions that are + /// only trivially rematerializable for specific uses. This predicate must /// return false if the instruction has any side effects other than /// producing the value from the load, or if it requres any address /// registers that are not always available. - virtual bool isTriviallyReMaterializable(MachineInstr *MI) const { - return false; + virtual bool isReallyReMaterializable(MachineInstr *MI) const { + return true; } /// convertToThreeAddress - This method must be implemented by targets that Index: lib/CodeGen/LiveIntervalAnalysis.cpp =================================================================== RCS file: /var/cvs/llvm/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp,v retrieving revision 1.248 diff -u -r1.248 LiveIntervalAnalysis.cpp --- lib/CodeGen/LiveIntervalAnalysis.cpp +++ lib/CodeGen/LiveIntervalAnalysis.cpp @@ -340,7 +340,8 @@ // instructions to be re-materialized as well. int FrameIdx = 0; if (vi.DefInst && - (tii_->isTriviallyReMaterializable(vi.DefInst) || + ((tii_->isReMaterializableOpcode(vi.DefInst->getOpcode()) && + tii_->isReallyReMaterializable(vi.DefInst)) || (tii_->isLoadFromStackSlot(vi.DefInst, FrameIdx) && mf_->getFrameInfo()->isFixedObjectIndex(FrameIdx)))) interval.remat = vi.DefInst; Index: lib/CodeGen/VirtRegMap.cpp =================================================================== RCS file: /var/cvs/llvm/llvm/lib/CodeGen/VirtRegMap.cpp,v retrieving revision 1.113 diff -u -r1.113 VirtRegMap.cpp --- lib/CodeGen/VirtRegMap.cpp +++ lib/CodeGen/VirtRegMap.cpp @@ -663,7 +663,8 @@ // If this instruction is being rematerialized, just remove it! int FrameIdx; - if (TII->isTriviallyReMaterializable(&MI) || + if ((TII->isReMaterializableOpcode(MI.getOpcode()) && + TII->isReallyReMaterializable(&MI)) || TII->isLoadFromStackSlot(&MI, FrameIdx)) { bool Remove = true; for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { Index: lib/Target/Target.td =================================================================== RCS file: /var/cvs/llvm/llvm/lib/Target/Target.td,v retrieving revision 1.105 diff -u -r1.105 Target.td --- lib/Target/Target.td +++ lib/Target/Target.td @@ -186,6 +186,7 @@ bit isConvertibleToThreeAddress = 0; // Can this 2-addr instruction promote? bit isCommutable = 0; // Is this 3 operand instruction commutable? bit isTerminator = 0; // Is this part of the terminator for a basic block? + bit isReMaterializable = 0; // Is this instruction re-materializable? bit isPredicable = 0; // Is this instruction predicable? bit hasDelaySlot = 0; // Does this instruction have an delay slot? bit usesCustomDAGSchedInserter = 0; // Pseudo instr needing special help. Index: lib/Target/ARM/ARMInstrInfo.cpp =================================================================== RCS file: /var/cvs/llvm/llvm/lib/Target/ARM/ARMInstrInfo.cpp,v retrieving revision 1.39 diff -u -r1.39 ARMInstrInfo.cpp --- lib/Target/ARM/ARMInstrInfo.cpp +++ lib/Target/ARM/ARMInstrInfo.cpp @@ -130,20 +130,6 @@ return 0; } -bool ARMInstrInfo::isTriviallyReMaterializable(MachineInstr *MI) const { - switch (MI->getOpcode()) { - default: break; - case ARM::LDRcp: - case ARM::MOVi: - case ARM::MVNi: - case ARM::MOVi2pieces: - case ARM::tLDRcp: - // These instructions are always trivially rematerializable. - return true; - } - return false; -} - static unsigned getUnindexedOpcode(unsigned Opc) { switch (Opc) { default: break; Index: lib/Target/ARM/ARMInstrInfo.h =================================================================== RCS file: /var/cvs/llvm/llvm/lib/Target/ARM/ARMInstrInfo.h,v retrieving revision 1.16 diff -u -r1.16 ARMInstrInfo.h --- lib/Target/ARM/ARMInstrInfo.h +++ lib/Target/ARM/ARMInstrInfo.h @@ -87,7 +87,6 @@ unsigned &SrcReg, unsigned &DstReg) const; virtual unsigned isLoadFromStackSlot(MachineInstr *MI, int &FrameIndex) const; virtual unsigned isStoreToStackSlot(MachineInstr *MI, int &FrameIndex) const; - virtual bool isTriviallyReMaterializable(MachineInstr *MI) const; virtual MachineInstr *convertToThreeAddress(MachineFunction::iterator &MFI, MachineBasicBlock::iterator &MBBI, Index: lib/Target/ARM/ARMInstrInfo.td =================================================================== RCS file: /var/cvs/llvm/llvm/lib/Target/ARM/ARMInstrInfo.td,v retrieving revision 1.113 diff -u -r1.113 ARMInstrInfo.td --- lib/Target/ARM/ARMInstrInfo.td +++ lib/Target/ARM/ARMInstrInfo.td @@ -677,6 +677,7 @@ [(set GPR:$dst, (load addrmode2:$addr))]>; // Special LDR for loads from non-pc-relative constpools. +let isReMaterializable = 1 in def LDRcp : AI2<(ops GPR:$dst, addrmode2:$addr), "ldr", " $dst, $addr", []>; @@ -810,6 +811,7 @@ def MOVs : AI1<(ops GPR:$dst, so_reg:$src), "mov", " $dst, $src", [(set GPR:$dst, so_reg:$src)]>; +let isReMaterializable = 1 in def MOVi : AI1<(ops GPR:$dst, so_imm:$src), "mov", " $dst, $src", [(set GPR:$dst, so_imm:$src)]>; @@ -917,6 +919,7 @@ "mvn", " $dst, $src", [(set GPR:$dst, (not GPR:$src))]>; def MVNs : AI<(ops GPR:$dst, so_reg:$src), "mvn", " $dst, $src", [(set GPR:$dst, (not so_reg:$src))]>; +let isReMaterializable = 1 in def MVNi : AI<(ops GPR:$dst, so_imm:$imm), "mvn", " $dst, $imm", [(set GPR:$dst, so_imm_not:$imm)]>; @@ -1187,6 +1190,7 @@ // Large immediate handling. // Two piece so_imms. +let isReMaterializable = 1 in def MOVi2pieces : AI1x2<(ops GPR:$dst, so_imm2part:$src), "mov", " $dst, $src", [(set GPR:$dst, so_imm2part:$src)]>; Index: lib/Target/ARM/ARMInstrThumb.td =================================================================== RCS file: /var/cvs/llvm/llvm/lib/Target/ARM/ARMInstrThumb.td,v retrieving revision 1.32 diff -u -r1.32 ARMInstrThumb.td --- lib/Target/ARM/ARMInstrThumb.td +++ lib/Target/ARM/ARMInstrThumb.td @@ -267,6 +267,7 @@ [(set GPR:$dst, (load (ARMWrapper tconstpool:$addr)))]>; // Special LDR for loads from non-pc-relative constpools. +let isReMaterializable = 1 in def tLDRcp : TIs<(ops GPR:$dst, i32imm:$addr), "ldr $dst, $addr", []>; } // isLoad Index: lib/Target/X86/X86InstrFPStack.td =================================================================== RCS file: /var/cvs/llvm/llvm/lib/Target/X86/X86InstrFPStack.td,v retrieving revision 1.10 diff -u -r1.10 X86InstrFPStack.td --- lib/Target/X86/X86InstrFPStack.td +++ lib/Target/X86/X86InstrFPStack.td @@ -356,6 +356,7 @@ // Floating point loads & stores. def FpLD32m : FpI<(ops RFP:$dst, f32mem:$src), ZeroArgFP, [(set RFP:$dst, (extloadf64f32 addr:$src))]>; +let isReMaterializable = 1 in def FpLD64m : FpI<(ops RFP:$dst, f64mem:$src), ZeroArgFP, [(set RFP:$dst, (loadf64 addr:$src))]>; def FpILD16m : FpI<(ops RFP:$dst, i16mem:$src), ZeroArgFP, @@ -413,10 +414,12 @@ def FXCH : FPI<0xC8, AddRegFrm, (ops RST:$op), "fxch $op">, D9; // Floating point constant loads. +let isReMaterializable = 1 in { def FpLD0 : FpI<(ops RFP:$dst), ZeroArgFP, [(set RFP:$dst, fp64imm0)]>; def FpLD1 : FpI<(ops RFP:$dst), ZeroArgFP, [(set RFP:$dst, fp64imm1)]>; +} def FLD0 : FPI<0xEE, RawFrm, (ops), "fldz">, D9; def FLD1 : FPI<0xE8, RawFrm, (ops), "fld1">, D9; Index: lib/Target/X86/X86InstrInfo.cpp =================================================================== RCS file: /var/cvs/llvm/llvm/lib/Target/X86/X86InstrInfo.cpp,v retrieving revision 1.92 diff -u -r1.92 X86InstrInfo.cpp --- lib/Target/X86/X86InstrInfo.cpp +++ lib/Target/X86/X86InstrInfo.cpp @@ -112,20 +112,11 @@ } -bool X86InstrInfo::isTriviallyReMaterializable(MachineInstr *MI) const { +bool X86InstrInfo::isReallyReMaterializable(MachineInstr *MI) const { + assert(isReMaterializableOpcode(MI->getOpcode()) && + "Check isReMaterializableOpcode before checking isReallyReMaterializable"); switch (MI->getOpcode()) { default: break; - case X86::FpLD0: - case X86::FpLD1: - case X86::MOV8ri: - case X86::MOV16ri: - case X86::MOV32ri: - case X86::MMX_V_SET0: - case X86::MMX_V_SETALLONES: - case X86::V_SET0: - case X86::V_SETALLONES: - // These instructions are always trivially rematerializable. - return true; case X86::MOV8rm: case X86::MOV16rm: case X86::MOV16_rm: @@ -146,7 +137,9 @@ MI->getOperand(2).getImmedValue() == 1 && MI->getOperand(3).getReg() == 0; } - return false; + // All other isReMaterializable instructions are always trivially + // rematerializable. + return true; } /// convertToThreeAddress - This method must be implemented by targets that Index: lib/Target/X86/X86InstrInfo.h =================================================================== RCS file: /var/cvs/llvm/llvm/lib/Target/X86/X86InstrInfo.h,v retrieving revision 1.67 diff -u -r1.67 X86InstrInfo.h --- lib/Target/X86/X86InstrInfo.h +++ lib/Target/X86/X86InstrInfo.h @@ -239,7 +239,7 @@ unsigned& destReg) const; unsigned isLoadFromStackSlot(MachineInstr *MI, int &FrameIndex) const; unsigned isStoreToStackSlot(MachineInstr *MI, int &FrameIndex) const; - bool isTriviallyReMaterializable(MachineInstr *MI) const; + bool isReallyReMaterializable(MachineInstr *MI) const; /// convertToThreeAddress - This method must be implemented by targets that /// set the M_CONVERTIBLE_TO_3_ADDR flag. When this flag is set, the target Index: lib/Target/X86/X86InstrInfo.td =================================================================== RCS file: /var/cvs/llvm/llvm/lib/Target/X86/X86InstrInfo.td,v retrieving revision 1.308 diff -u -r1.308 X86InstrInfo.td --- lib/Target/X86/X86InstrInfo.td +++ lib/Target/X86/X86InstrInfo.td @@ -617,6 +617,7 @@ "mov{w} {$src, $dst|$dst, $src}", []>, OpSize; def MOV32rr : I<0x89, MRMDestReg, (ops GR32:$dst, GR32:$src), "mov{l} {$src, $dst|$dst, $src}", []>; +let isReMaterializable = 1 in { def MOV8ri : Ii8 <0xB0, AddRegFrm, (ops GR8 :$dst, i8imm :$src), "mov{b} {$src, $dst|$dst, $src}", [(set GR8:$dst, imm:$src)]>; @@ -626,6 +627,7 @@ def MOV32ri : Ii32<0xB8, AddRegFrm, (ops GR32:$dst, i32imm:$src), "mov{l} {$src, $dst|$dst, $src}", [(set GR32:$dst, imm:$src)]>; +} def MOV8mi : Ii8 <0xC6, MRM0m, (ops i8mem :$dst, i8imm :$src), "mov{b} {$src, $dst|$dst, $src}", [(store (i8 imm:$src), addr:$dst)]>; @@ -636,6 +638,7 @@ "mov{l} {$src, $dst|$dst, $src}", [(store (i32 imm:$src), addr:$dst)]>; +let isReMaterializable = 1 in { def MOV8rm : I<0x8A, MRMSrcMem, (ops GR8 :$dst, i8mem :$src), "mov{b} {$src, $dst|$dst, $src}", [(set GR8:$dst, (load addr:$src))]>; @@ -645,6 +648,7 @@ def MOV32rm : I<0x8B, MRMSrcMem, (ops GR32:$dst, i32mem:$src), "mov{l} {$src, $dst|$dst, $src}", [(set GR32:$dst, (load addr:$src))]>; +} def MOV8mr : I<0x88, MRMDestMem, (ops i8mem :$dst, GR8 :$src), "mov{b} {$src, $dst|$dst, $src}", @@ -2447,10 +2451,12 @@ "mov{w} {$src, $dst|$dst, $src}", []>, OpSize; def MOV32_rr : I<0x89, MRMDestReg, (ops GR32_:$dst, GR32_:$src), "mov{l} {$src, $dst|$dst, $src}", []>; +let isReMaterializable = 1 in { def MOV16_rm : I<0x8B, MRMSrcMem, (ops GR16_:$dst, i16mem:$src), "mov{w} {$src, $dst|$dst, $src}", []>, OpSize; def MOV32_rm : I<0x8B, MRMSrcMem, (ops GR32_:$dst, i32mem:$src), "mov{l} {$src, $dst|$dst, $src}", []>; +} def MOV16_mr : I<0x89, MRMDestMem, (ops i16mem:$dst, GR16_:$src), "mov{w} {$src, $dst|$dst, $src}", []>, OpSize; def MOV32_mr : I<0x89, MRMDestMem, (ops i32mem:$dst, GR32_:$src), Index: lib/Target/X86/X86InstrMMX.td =================================================================== RCS file: /var/cvs/llvm/llvm/lib/Target/X86/X86InstrMMX.td,v retrieving revision 1.33 diff -u -r1.33 X86InstrMMX.td --- lib/Target/X86/X86InstrMMX.td +++ lib/Target/X86/X86InstrMMX.td @@ -178,6 +178,7 @@ // Data Transfer Instructions def MMX_MOVD64rr : MMXI<0x6E, MRMSrcReg, (ops VR64:$dst, GR32:$src), "movd {$src, $dst|$dst, $src}", []>; +let isReMaterializable = 1 in def MMX_MOVD64rm : MMXI<0x6E, MRMSrcMem, (ops VR64:$dst, i32mem:$src), "movd {$src, $dst|$dst, $src}", []>; def MMX_MOVD64mr : MMXI<0x7E, MRMDestMem, (ops i32mem:$dst, VR64:$src), @@ -185,6 +186,7 @@ def MMX_MOVQ64rr : MMXI<0x6F, MRMSrcReg, (ops VR64:$dst, VR64:$src), "movq {$src, $dst|$dst, $src}", []>; +let isReMaterializable = 1 in def MMX_MOVQ64rm : MMXI<0x6F, MRMSrcMem, (ops VR64:$dst, i64mem:$src), "movq {$src, $dst|$dst, $src}", [(set VR64:$dst, (load_mmx addr:$src))]>; @@ -503,12 +505,14 @@ // Alias instructions that map zero vector to pxor. // FIXME: remove when we can teach regalloc that xor reg, reg is ok. -def MMX_V_SET0 : MMXI<0xEF, MRMInitReg, (ops VR64:$dst), - "pxor $dst, $dst", - [(set VR64:$dst, (v1i64 immAllZerosV))]>; -def MMX_V_SETALLONES : MMXI<0x76, MRMInitReg, (ops VR64:$dst), - "pcmpeqd $dst, $dst", - [(set VR64:$dst, (v1i64 immAllOnesV))]>; +let isReMaterializable = 1 in { + def MMX_V_SET0 : MMXI<0xEF, MRMInitReg, (ops VR64:$dst), + "pxor $dst, $dst", + [(set VR64:$dst, (v1i64 immAllZerosV))]>; + def MMX_V_SETALLONES : MMXI<0x76, MRMInitReg, (ops VR64:$dst), + "pcmpeqd $dst, $dst", + [(set VR64:$dst, (v1i64 immAllOnesV))]>; +} //===----------------------------------------------------------------------===// // Non-Instruction Patterns Index: lib/Target/X86/X86InstrSSE.td =================================================================== RCS file: /var/cvs/llvm/llvm/lib/Target/X86/X86InstrSSE.td,v retrieving revision 1.184 diff -u -r1.184 X86InstrSSE.td --- lib/Target/X86/X86InstrSSE.td +++ lib/Target/X86/X86InstrSSE.td @@ -260,6 +260,7 @@ // Move Instructions def MOVSSrr : SSI<0x10, MRMSrcReg, (ops FR32:$dst, FR32:$src), "movss {$src, $dst|$dst, $src}", []>; +let isReMaterializable = 1 in def MOVSSrm : SSI<0x10, MRMSrcMem, (ops FR32:$dst, f32mem:$src), "movss {$src, $dst|$dst, $src}", [(set FR32:$dst, (loadf32 addr:$src))]>; @@ -480,6 +481,7 @@ // Move Instructions def MOVAPSrr : PSI<0x28, MRMSrcReg, (ops VR128:$dst, VR128:$src), "movaps {$src, $dst|$dst, $src}", []>; +let isReMaterializable = 1 in def MOVAPSrm : PSI<0x28, MRMSrcMem, (ops VR128:$dst, f128mem:$src), "movaps {$src, $dst|$dst, $src}", [(set VR128:$dst, (loadv4f32 addr:$src))]>; @@ -759,6 +761,7 @@ // Alias instructions that map zero vector to pxor / xorp* for sse. // FIXME: remove when we can teach regalloc that xor reg, reg is ok. +let isReMaterializable = 1 in def V_SET0 : PSI<0x57, MRMInitReg, (ops VR128:$dst), "xorps $dst, $dst", [(set VR128:$dst, (v4f32 immAllZerosV))]>; @@ -844,6 +847,7 @@ // Move Instructions def MOVSDrr : SDI<0x10, MRMSrcReg, (ops FR64:$dst, FR64:$src), "movsd {$src, $dst|$dst, $src}", []>; +let isReMaterializable = 1 in def MOVSDrm : SDI<0x10, MRMSrcMem, (ops FR64:$dst, f64mem:$src), "movsd {$src, $dst|$dst, $src}", [(set FR64:$dst, (loadf64 addr:$src))]>; @@ -1065,6 +1069,7 @@ // Move Instructions def MOVAPDrr : PDI<0x28, MRMSrcReg, (ops VR128:$dst, VR128:$src), "movapd {$src, $dst|$dst, $src}", []>; +let isReMaterializable = 1 in def MOVAPDrm : PDI<0x28, MRMSrcMem, (ops VR128:$dst, f128mem:$src), "movapd {$src, $dst|$dst, $src}", [(set VR128:$dst, (loadv2f64 addr:$src))]>; @@ -1818,9 +1823,10 @@ // Alias instructions that map zero vector to pxor / xorp* for sse. // FIXME: remove when we can teach regalloc that xor reg, reg is ok. -def V_SETALLONES : PDI<0x76, MRMInitReg, (ops VR128:$dst), - "pcmpeqd $dst, $dst", - [(set VR128:$dst, (v2f64 immAllOnesV))]>; +let isReMaterializable = 1 in + def V_SETALLONES : PDI<0x76, MRMInitReg, (ops VR128:$dst), + "pcmpeqd $dst, $dst", + [(set VR128:$dst, (v2f64 immAllOnesV))]>; // FR64 to 128-bit vector conversion. def MOVSD2PDrr : SDI<0x10, MRMSrcReg, (ops VR128:$dst, FR64:$src), Index: lib/Target/X86/X86InstrX86-64.td =================================================================== RCS file: /var/cvs/llvm/llvm/lib/Target/X86/X86InstrX86-64.td,v retrieving revision 1.17 diff -u -r1.17 X86InstrX86-64.td --- lib/Target/X86/X86InstrX86-64.td +++ lib/Target/X86/X86InstrX86-64.td @@ -187,6 +187,7 @@ "mov{q} {$src, $dst|$dst, $src}", [(set GR64:$dst, i64immSExt32:$src)]>; +let isReMaterializable = 1 in def MOV64rm : RI<0x8B, MRMSrcMem, (ops GR64:$dst, i64mem:$src), "mov{q} {$src, $dst|$dst, $src}", [(set GR64:$dst, (load addr:$src))]>; Index: utils/TableGen/CodeGenInstruction.h =================================================================== RCS file: /var/cvs/llvm/llvm/utils/TableGen/CodeGenInstruction.h,v retrieving revision 1.30 diff -u -r1.30 CodeGenInstruction.h --- utils/TableGen/CodeGenInstruction.h +++ utils/TableGen/CodeGenInstruction.h @@ -91,6 +91,7 @@ bool isConvertibleToThreeAddress; bool isCommutable; bool isTerminator; + bool isReMaterializable; bool hasDelaySlot; bool usesCustomDAGSchedInserter; bool hasVariableNumberOfOperands; Index: utils/TableGen/CodeGenTarget.cpp =================================================================== RCS file: /var/cvs/llvm/llvm/utils/TableGen/CodeGenTarget.cpp,v retrieving revision 1.94 diff -u -r1.94 CodeGenTarget.cpp --- utils/TableGen/CodeGenTarget.cpp +++ utils/TableGen/CodeGenTarget.cpp @@ -365,6 +365,7 @@ isConvertibleToThreeAddress = R->getValueAsBit("isConvertibleToThreeAddress"); isCommutable = R->getValueAsBit("isCommutable"); isTerminator = R->getValueAsBit("isTerminator"); + isReMaterializable = R->getValueAsBit("isReMaterializable"); hasDelaySlot = R->getValueAsBit("hasDelaySlot"); usesCustomDAGSchedInserter = R->getValueAsBit("usesCustomDAGSchedInserter"); hasCtrlDep = R->getValueAsBit("hasCtrlDep"); Index: utils/TableGen/InstrInfoEmitter.cpp =================================================================== RCS file: /var/cvs/llvm/llvm/utils/TableGen/InstrInfoEmitter.cpp,v retrieving revision 1.62 diff -u -r1.62 InstrInfoEmitter.cpp --- utils/TableGen/InstrInfoEmitter.cpp +++ utils/TableGen/InstrInfoEmitter.cpp @@ -240,6 +240,7 @@ if (Inst.isConvertibleToThreeAddress) OS << "|M_CONVERTIBLE_TO_3_ADDR"; if (Inst.isCommutable) OS << "|M_COMMUTABLE"; if (Inst.isTerminator) OS << "|M_TERMINATOR_FLAG"; + if (Inst.isReMaterializable) OS << "|M_REMATERIALIZIBLE"; if (Inst.clobbersPred) OS << "|M_CLOBBERS_PRED"; if (Inst.isNotDuplicable) OS << "|M_NOT_DUPLICABLE"; if (Inst.usesCustomDAGSchedInserter) From dpatel at apple.com Thu Jun 21 17:29:25 2007 From: dpatel at apple.com (Devang Patel) Date: Thu, 21 Jun 2007 17:29:25 -0500 Subject: [llvm-commits] CVS: llvm/lib/Analysis/IPA/CallGraphSCCPass.cpp Message-ID: <200706212229.l5LMTPnl028512@zion.cs.uiuc.edu> Changes in directory llvm/lib/Analysis/IPA: CallGraphSCCPass.cpp updated: 1.24 -> 1.25 --- Log message: CallGraphSCCPass manager may require other passes. Use schedulePass() to accomodate these requirement instead of directly assigning a manager to new CallGraph PassManager. --- Diffs of the changes: (+1 -1) CallGraphSCCPass.cpp | 2 +- 1 files changed, 1 insertion(+), 1 deletion(-) Index: llvm/lib/Analysis/IPA/CallGraphSCCPass.cpp diff -u llvm/lib/Analysis/IPA/CallGraphSCCPass.cpp:1.24 llvm/lib/Analysis/IPA/CallGraphSCCPass.cpp:1.25 --- llvm/lib/Analysis/IPA/CallGraphSCCPass.cpp:1.24 Wed May 2 20:11:53 2007 +++ llvm/lib/Analysis/IPA/CallGraphSCCPass.cpp Thu Jun 21 17:29:02 2007 @@ -178,7 +178,7 @@ // [3] Assign manager to manage this new manager. This may create // and push new managers into PMS Pass *P = dynamic_cast(CGP); - P->assignPassManager(PMS); + TPM->schedulePass(P); // [4] Push new manager into PMS PMS.push(CGP); From sabre at nondot.org Thu Jun 21 18:23:54 2007 From: sabre at nondot.org (Chris Lattner) Date: Thu, 21 Jun 2007 18:23:54 -0500 Subject: [llvm-commits] CVS: llvm/lib/Support/SmallPtrSet.cpp Message-ID: <200706212323.l5LNNsC9029521@zion.cs.uiuc.edu> Changes in directory llvm/lib/Support: SmallPtrSet.cpp updated: 1.6 -> 1.7 --- Log message: Two changes: 1. Make SmallPtrSet::erase faster in the small case by replacing a memmove with a pointer copy. 2. Fix a bug where the null terminator at the end of the array in the small case was not copied --- Diffs of the changes: (+5 -4) SmallPtrSet.cpp | 9 +++++---- 1 files changed, 5 insertions(+), 4 deletions(-) Index: llvm/lib/Support/SmallPtrSet.cpp diff -u llvm/lib/Support/SmallPtrSet.cpp:1.6 llvm/lib/Support/SmallPtrSet.cpp:1.7 --- llvm/lib/Support/SmallPtrSet.cpp:1.6 Sat Apr 14 16:50:21 2007 +++ llvm/lib/Support/SmallPtrSet.cpp Thu Jun 21 18:23:32 2007 @@ -54,9 +54,8 @@ for (void **APtr = SmallArray, **E = SmallArray+NumElements; APtr != E; ++APtr) if (*APtr == Ptr) { - // If it is in the set, move everything over, replacing this element. - memmove(APtr, APtr+1, sizeof(void*)*(E-APtr-1)); - // Clear the end element. + // If it is in the set, replace this element. + *APtr = E[-1]; E[-1] = getEmptyMarker(); --NumElements; return true; @@ -151,7 +150,9 @@ if (that.isSmall()) { CurArraySize = that.CurArraySize; CurArray = &SmallArray[0]; - memcpy(CurArray, that.CurArray, sizeof(void*)*CurArraySize); + // Copy the entire contents of the array, including the -1's and the null + // terminator. + memcpy(CurArray, that.CurArray, sizeof(void*)*(CurArraySize+1)); } else { CurArraySize = that.NumElements < 64 ? 128 : that.NumElements*2; CurArray = new void*[CurArraySize+1]; From dpatel at apple.com Thu Jun 21 18:28:14 2007 From: dpatel at apple.com (dpatel at apple.com) Date: Thu, 21 Jun 2007 16:28:14 -0700 (PDT) Subject: [llvm-commits] [128706] Fix 1519. Message-ID: <20070621232814.6918AA0C5AC0@src> Revision: 128706 Author: dpatel Date: 2007-06-21 16:28:14 -0700 (Thu, 21 Jun 2007) Log Message: ----------- Fix 1519. Patch by Rafael Espindola. Modified Paths: -------------- apple-local/branches/llvm/gcc/llvm-types.cpp Modified: apple-local/branches/llvm/gcc/llvm-types.cpp =================================================================== --- apple-local/branches/llvm/gcc/llvm-types.cpp 2007-06-21 22:36:49 UTC (rev 128705) +++ apple-local/branches/llvm/gcc/llvm-types.cpp 2007-06-21 23:28:14 UTC (rev 128706) @@ -1070,6 +1070,8 @@ void allFieldsAreNotBitFields() { AllBitFields = false; + // Next field is not a bitfield. + LastFieldStartsAtNonByteBoundry = false; } unsigned getGCCStructAlignmentInBytes() const { From clattner at apple.com Thu Jun 21 18:49:17 2007 From: clattner at apple.com (clattner at apple.com) Date: Thu, 21 Jun 2007 16:49:17 -0700 (PDT) Subject: [llvm-commits] [128707] Check in Rafael's hack that works around PR1521, allowing bootstrap on x86-64. Message-ID: <20070621234917.5AE13A0EF8D0@src> Revision: 128707 Author: clattner Date: 2007-06-21 16:49:16 -0700 (Thu, 21 Jun 2007) Log Message: ----------- Check in Rafael's hack that works around PR1521, allowing bootstrap on x86-64. This should be reverted when PR1521 is fixed. Modified Paths: -------------- apple-local/branches/llvm/gcc/c-lex.c Modified: apple-local/branches/llvm/gcc/c-lex.c =================================================================== --- apple-local/branches/llvm/gcc/c-lex.c 2007-06-21 23:28:14 UTC (rev 128706) +++ apple-local/branches/llvm/gcc/c-lex.c 2007-06-21 23:49:16 UTC (rev 128707) @@ -818,6 +818,36 @@ return itk_none; } +/* APPLE LOCAL begin llvm hack for PR1521 */ +#define PART_PRECISION (sizeof (cpp_num_part) * CHAR_BIT) +/* Sign extend a number, with PRECISION significant bits and all + others assumed clear, to fill out a cpp_num structure. */ +static cpp_num +my_cpp_num_sign_extend (cpp_num num, size_t precision) +{ + if (num.high) + printf("%lu\n", num.high); + if (!num.unsignedp) + { + if (precision > PART_PRECISION) + { + precision -= PART_PRECISION; + if (precision < PART_PRECISION + && (num.high & (cpp_num_part) 1 << (precision - 1))) + num.high |= ~(~(cpp_num_part) 0 >> (PART_PRECISION - precision)); + } + else if (num.low & (cpp_num_part) 1 << (precision - 1)) + { + if (precision < PART_PRECISION) + num.low |= ~(~(cpp_num_part) 0 >> (PART_PRECISION - precision)); + num.high = ~(cpp_num_part) 0; + } + } + + return num; +} +/* APPLE LOCAL llvm end */ + /* Interpret TOKEN, an integer with FLAGS as classified by cpplib. */ static tree interpret_integer (const cpp_token *token, unsigned int flags) @@ -828,7 +858,8 @@ cpp_options *options = cpp_get_options (parse_in); integer = cpp_interpret_integer (parse_in, token, flags); - integer = cpp_num_sign_extend (integer, options->precision); + /* APPLE LOCAL llvm hack for PR1521 */ + integer = my_cpp_num_sign_extend (integer, options->precision); /* The type of a constant with a U suffix is straightforward. */ if (flags & CPP_N_UNSIGNED) From resistor at mac.com Thu Jun 21 19:11:40 2007 From: resistor at mac.com (Owen Anderson) Date: Thu, 21 Jun 2007 19:11:40 -0500 Subject: [llvm-commits] CVS: llvm/lib/Support/SmallPtrSet.cpp Message-ID: <200706220011.l5M0BeA0030375@zion.cs.uiuc.edu> Changes in directory llvm/lib/Support: SmallPtrSet.cpp updated: 1.7 -> 1.8 --- Log message: Fix a bug in SmallPtrSet that was causing GVNPRE to enter an infinite loop. --- Diffs of the changes: (+1 -1) SmallPtrSet.cpp | 2 +- 1 files changed, 1 insertion(+), 1 deletion(-) Index: llvm/lib/Support/SmallPtrSet.cpp diff -u llvm/lib/Support/SmallPtrSet.cpp:1.7 llvm/lib/Support/SmallPtrSet.cpp:1.8 --- llvm/lib/Support/SmallPtrSet.cpp:1.7 Thu Jun 21 18:23:32 2007 +++ llvm/lib/Support/SmallPtrSet.cpp Thu Jun 21 19:11:18 2007 @@ -154,7 +154,7 @@ // terminator. memcpy(CurArray, that.CurArray, sizeof(void*)*(CurArraySize+1)); } else { - CurArraySize = that.NumElements < 64 ? 128 : that.NumElements*2; + CurArraySize = that.NumElements < 64 ? 128 : that.CurArraySize*2; CurArray = new void*[CurArraySize+1]; memset(CurArray, -1, CurArraySize*sizeof(void*)); From resistor at mac.com Thu Jun 21 19:20:52 2007 From: resistor at mac.com (Owen Anderson) Date: Thu, 21 Jun 2007 19:20:52 -0500 Subject: [llvm-commits] CVS: llvm/lib/Transforms/Scalar/GVNPRE.cpp Message-ID: <200706220020.l5M0KqfY030774@zion.cs.uiuc.edu> Changes in directory llvm/lib/Transforms/Scalar: GVNPRE.cpp updated: 1.49 -> 1.50 --- Log message: Make a bunch of optimizations for compile time to GVNPRE, including smarter set unions, deferring blocks rather than computing maximal sets, and smarter use of sets. With these enhancements, the time to optimize 273.perlbmk goes from 5.3s to 2.7s. --- Diffs of the changes: (+41 -17) GVNPRE.cpp | 58 +++++++++++++++++++++++++++++++++++++++++----------------- 1 files changed, 41 insertions(+), 17 deletions(-) Index: llvm/lib/Transforms/Scalar/GVNPRE.cpp diff -u llvm/lib/Transforms/Scalar/GVNPRE.cpp:1.49 llvm/lib/Transforms/Scalar/GVNPRE.cpp:1.50 --- llvm/lib/Transforms/Scalar/GVNPRE.cpp:1.49 Thu Jun 21 12:57:53 2007 +++ llvm/lib/Transforms/Scalar/GVNPRE.cpp Thu Jun 21 19:20:30 2007 @@ -25,6 +25,7 @@ #include "llvm/Function.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/PostDominators.h" +#include "llvm/ADT/BitVector.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/SmallPtrSet.h" @@ -106,6 +107,7 @@ } SmallPtrSet& getMaximalValues() { return maximalValues; } void erase(Value* v); + unsigned size(); }; } @@ -331,6 +333,12 @@ maximalExpressions.erase(create_expression(C)); } +/// size - Return the number of assigned value numbers +unsigned ValueTable::size() { + // NOTE: zero is never assigned + return nextValueNumber; +} + //===----------------------------------------------------------------------===// // GVNPRE Pass //===----------------------------------------------------------------------===// @@ -365,7 +373,7 @@ uint32_t v); Value* phi_translate(Value* V, BasicBlock* pred, BasicBlock* succ); void phi_translate_set(SmallPtrSet& anticIn, BasicBlock* pred, - BasicBlock* succ, SmallPtrSet& out); + BasicBlock* succ, SmallPtrSet& out) ; void topo_sort(SmallPtrSet& set, std::vector& vec); @@ -381,10 +389,10 @@ SmallPtrSet& currPhis, SmallPtrSet& currExps, SmallPtrSet& currTemps); - void buildsets_anticout(BasicBlock* BB, + bool buildsets_anticout(BasicBlock* BB, SmallPtrSet& anticOut, std::set& visited); - bool buildsets_anticin(BasicBlock* BB, + unsigned buildsets_anticin(BasicBlock* BB, SmallPtrSet& anticOut, SmallPtrSet& currExps, SmallPtrSet& currTemps, @@ -395,7 +403,7 @@ std::map& avail, SmallPtrSet& new_set); unsigned insertion_mergepoint(std::vector& workList, - df_iterator D, + df_iterator& D, SmallPtrSet& new_set); bool insertion(Function& F); @@ -830,13 +838,12 @@ /// buildsets_anticout - When walking the postdom tree, calculate the ANTIC_OUT /// set as a function of the ANTIC_IN set of the block's predecessors -void GVNPRE::buildsets_anticout(BasicBlock* BB, +bool GVNPRE::buildsets_anticout(BasicBlock* BB, SmallPtrSet& anticOut, std::set& visited) { if (BB->getTerminator()->getNumSuccessors() == 1) { - if (visited.find(BB->getTerminator()->getSuccessor(0)) == visited.end()) - phi_translate_set(VN.getMaximalValues(), BB, - BB->getTerminator()->getSuccessor(0), anticOut); + if (visited.count(BB->getTerminator()->getSuccessor(0)) == 0) + return true; else phi_translate_set(anticipatedIn[BB->getTerminator()->getSuccessor(0)], BB, BB->getTerminator()->getSuccessor(0), anticOut); @@ -860,12 +867,14 @@ anticOut.erase(*I); } } + + return false; } /// buildsets_anticin - Walk the postdom tree, calculating ANTIC_OUT for /// each block. ANTIC_IN is then a function of ANTIC_OUT and the GEN /// sets populated in buildsets_availout -bool GVNPRE::buildsets_anticin(BasicBlock* BB, +unsigned GVNPRE::buildsets_anticin(BasicBlock* BB, SmallPtrSet& anticOut, SmallPtrSet& currExps, SmallPtrSet& currTemps, @@ -873,7 +882,9 @@ SmallPtrSet& anticIn = anticipatedIn[BB]; SmallPtrSet old (anticIn.begin(), anticIn.end()); - buildsets_anticout(BB, anticOut, visited); + bool defer = buildsets_anticout(BB, anticOut, visited); + if (defer) + return 0; SmallPtrSet S; for (SmallPtrSet::iterator I = anticOut.begin(), @@ -887,7 +898,11 @@ E = currExps.end(); I != E; ++I) if (currTemps.count(*I) == 0) anticIn.insert(*I); - + + BitVector numbers(VN.size()); + for (SmallPtrSet::iterator I = anticIn.begin(), + E = anticIn.end(); I != E; ++I) + numbers.set(VN.lookup(*I)-1); for (SmallPtrSet::iterator I = S.begin(), E = S.end(); I != E; ++I) { // For non-opaque values, we should already have a value numbering. @@ -896,16 +911,17 @@ // so now. if (!isa(*I) && !isa(*I)) VN.lookup_or_add(*I); - val_insert(anticIn, *I); + if (!numbers.test(VN.lookup(*I)-1)) + anticIn.insert(*I); } clean(anticIn); anticOut.clear(); if (old.size() != anticIn.size()) - return true; + return 2; else - return false; + return 1; } /// buildsets - Phase 1 of the main algorithm. Construct the AVAIL_OUT @@ -974,10 +990,18 @@ if (BB == 0) continue; - visited.insert(BB); - changed |= buildsets_anticin(BB, anticOut, generatedTemporaries[BB], + + unsigned ret = buildsets_anticin(BB, anticOut, generatedTemporaries[BB], generatedExpressions[BB], visited); + + if (ret == 0) { + changed = true; + break; + } else { + visited.insert(BB); + changed |= (ret == 2); + } } iterations++; @@ -1054,7 +1078,7 @@ /// insertion_mergepoint - When walking the dom tree, check at each merge /// block for the possibility of a partial redundancy. If present, eliminate it unsigned GVNPRE::insertion_mergepoint(std::vector& workList, - df_iterator D, + df_iterator& D, SmallPtrSet& new_set) { bool changed_function = false; bool new_stuff = false; From resistor at mac.com Thu Jun 21 19:43:44 2007 From: resistor at mac.com (Owen Anderson) Date: Thu, 21 Jun 2007 19:43:44 -0500 Subject: [llvm-commits] CVS: llvm/lib/Transforms/Scalar/GVNPRE.cpp Message-ID: <200706220043.l5M0hiKs031277@zion.cs.uiuc.edu> Changes in directory llvm/lib/Transforms/Scalar: GVNPRE.cpp updated: 1.50 -> 1.51 --- Log message: Reserve space in vectors before topologically sorting into them. This improves the time to optimize 403.gcc from 28s to 23.5s. --- Diffs of the changes: (+2 -0) GVNPRE.cpp | 2 ++ 1 files changed, 2 insertions(+) Index: llvm/lib/Transforms/Scalar/GVNPRE.cpp diff -u llvm/lib/Transforms/Scalar/GVNPRE.cpp:1.50 llvm/lib/Transforms/Scalar/GVNPRE.cpp:1.51 --- llvm/lib/Transforms/Scalar/GVNPRE.cpp:1.50 Thu Jun 21 19:20:30 2007 +++ llvm/lib/Transforms/Scalar/GVNPRE.cpp Thu Jun 21 19:43:22 2007 @@ -585,6 +585,7 @@ /// above) void GVNPRE::clean(SmallPtrSet& set) { std::vector worklist; + worklist.reserve(set.size()); topo_sort(set, worklist); for (unsigned i = 0; i < worklist.size(); ++i) { @@ -1173,6 +1174,7 @@ // If there is more than one predecessor... if (pred_begin(BB) != pred_end(BB) && ++pred_begin(BB) != pred_end(BB)) { std::vector workList; + workList.reserve(anticIn.size()); topo_sort(anticIn, workList); DOUT << "Merge Block: " << BB->getName() << "\n"; From dalej at apple.com Thu Jun 21 19:55:19 2007 From: dalej at apple.com (Dale Johannesen) Date: Thu, 21 Jun 2007 19:55:19 -0500 Subject: [llvm-commits] CVS: llvm/lib/Target/X86/X86AsmPrinter.cpp Message-ID: <200706220055.l5M0tJFs031535@zion.cs.uiuc.edu> Changes in directory llvm/lib/Target/X86: X86AsmPrinter.cpp updated: 1.243 -> 1.244 --- Log message: Quote complex names for Darwin X86 and ARM. --- Diffs of the changes: (+7 -1) X86AsmPrinter.cpp | 8 +++++++- 1 files changed, 7 insertions(+), 1 deletion(-) Index: llvm/lib/Target/X86/X86AsmPrinter.cpp diff -u llvm/lib/Target/X86/X86AsmPrinter.cpp:1.243 llvm/lib/Target/X86/X86AsmPrinter.cpp:1.244 --- llvm/lib/Target/X86/X86AsmPrinter.cpp:1.243 Thu Jun 14 10:00:27 2007 +++ llvm/lib/Target/X86/X86AsmPrinter.cpp Thu Jun 21 19:54:56 2007 @@ -120,7 +120,13 @@ DW.BeginModule(&M); } - return AsmPrinter::doInitialization(M); + AsmPrinter::doInitialization(M); + + // Darwin wants symbols to be quoted if they have complex names. + if (Subtarget->isTargetDarwin()) + Mang->setUseQuotes(true); + + return false; } bool X86SharedAsmPrinter::doFinalization(Module &M) { From evan.cheng at apple.com Thu Jun 21 20:36:14 2007 From: evan.cheng at apple.com (Evan Cheng) Date: Thu, 21 Jun 2007 20:36:14 -0500 Subject: [llvm-commits] CVS: llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp Message-ID: <200706220136.l5M1aEb0032141@zion.cs.uiuc.edu> Changes in directory llvm/lib/CodeGen/SelectionDAG: ScheduleDAGRRList.cpp updated: 1.30 -> 1.31 --- Log message: std::set is really really terrible. Switch to SmallPtrSet to reduce compile time. For Duraid's example. The overall isel time is reduced from 0.6255 sec to 0.1876 sec. --- Diffs of the changes: (+4 -4) ScheduleDAGRRList.cpp | 8 ++++---- 1 files changed, 4 insertions(+), 4 deletions(-) Index: llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp diff -u llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp:1.30 llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp:1.31 --- llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp:1.30 Thu Apr 26 14:40:56 2007 +++ llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp Thu Jun 21 20:35:51 2007 @@ -118,7 +118,7 @@ /// it is not the last use of its first operand, add it to the CommuteSet if /// possible. It will be commuted when it is translated to a MI. void ScheduleDAGRRList::CommuteNodesToReducePressure() { - std::set OperandSeen; + SmallPtrSet OperandSeen; for (unsigned i = Sequence.size()-1; i != 0; --i) { // Ignore first node. SUnit *SU = Sequence[i]; if (!SU) continue; @@ -680,13 +680,13 @@ // FIXME: This is probably too slow! static void isReachable(SUnit *SU, SUnit *TargetSU, - std::set &Visited, bool &Reached) { + SmallPtrSet &Visited, bool &Reached) { if (Reached) return; if (SU == TargetSU) { Reached = true; return; } - if (!Visited.insert(SU).second) return; + if (!Visited.insert(SU)) return; for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); I != E; ++I) @@ -694,7 +694,7 @@ } static bool isReachable(SUnit *SU, SUnit *TargetSU) { - std::set Visited; + SmallPtrSet Visited; bool Reached = false; isReachable(SU, TargetSU, Visited, Reached); return Reached; From resistor at mac.com Thu Jun 21 22:14:26 2007 From: resistor at mac.com (Owen Anderson) Date: Thu, 21 Jun 2007 22:14:26 -0500 Subject: [llvm-commits] CVS: llvm/lib/Transforms/Scalar/GVNPRE.cpp Message-ID: <200706220314.l5M3EQbW001173@zion.cs.uiuc.edu> Changes in directory llvm/lib/Transforms/Scalar: GVNPRE.cpp updated: 1.51 -> 1.52 --- Log message: Avoid excessive calls to find_leader when calculating AVAIL_OUT. This reduces the time to optimize 403.gcc from 23.5s to 21.9s. --- Diffs of the changes: (+76 -30) GVNPRE.cpp | 106 +++++++++++++++++++++++++++++++++++++++++++------------------ 1 files changed, 76 insertions(+), 30 deletions(-) Index: llvm/lib/Transforms/Scalar/GVNPRE.cpp diff -u llvm/lib/Transforms/Scalar/GVNPRE.cpp:1.51 llvm/lib/Transforms/Scalar/GVNPRE.cpp:1.52 --- llvm/lib/Transforms/Scalar/GVNPRE.cpp:1.51 Thu Jun 21 19:43:22 2007 +++ llvm/lib/Transforms/Scalar/GVNPRE.cpp Thu Jun 21 22:14:03 2007 @@ -367,45 +367,47 @@ // Helper fuctions // FIXME: eliminate or document these better - void dump(const SmallPtrSet& s) const; - void clean(SmallPtrSet& set); + void dump(const SmallPtrSet& s) const __attribute__((noinline)); + void clean(SmallPtrSet& set) __attribute__((noinline)); Value* find_leader(SmallPtrSet& vals, - uint32_t v); - Value* phi_translate(Value* V, BasicBlock* pred, BasicBlock* succ); + uint32_t v) __attribute__((noinline)); + Value* phi_translate(Value* V, BasicBlock* pred, BasicBlock* succ) __attribute__((noinline)); void phi_translate_set(SmallPtrSet& anticIn, BasicBlock* pred, - BasicBlock* succ, SmallPtrSet& out) ; + BasicBlock* succ, SmallPtrSet& out) __attribute__((noinline)); void topo_sort(SmallPtrSet& set, - std::vector& vec); + std::vector& vec) __attribute__((noinline)); - void cleanup(); - bool elimination(); + void cleanup() __attribute__((noinline)); + bool elimination() __attribute__((noinline)); - void val_insert(SmallPtrSet& s, Value* v); - void val_replace(SmallPtrSet& s, Value* v); - bool dependsOnInvoke(Value* V); + void val_insert(SmallPtrSet& s, Value* v) __attribute__((noinline)); + void val_replace(SmallPtrSet& s, Value* v) __attribute__((noinline)); + bool dependsOnInvoke(Value* V) __attribute__((noinline)); void buildsets_availout(BasicBlock::iterator I, SmallPtrSet& currAvail, SmallPtrSet& currPhis, SmallPtrSet& currExps, - SmallPtrSet& currTemps); + SmallPtrSet& currTemps, + BitVector& availNumbers, + BitVector& expNumbers) __attribute__((noinline)); bool buildsets_anticout(BasicBlock* BB, SmallPtrSet& anticOut, - std::set& visited); + std::set& visited) __attribute__((noinline)); unsigned buildsets_anticin(BasicBlock* BB, SmallPtrSet& anticOut, SmallPtrSet& currExps, SmallPtrSet& currTemps, - std::set& visited); - unsigned buildsets(Function& F); + std::set& visited) __attribute__((noinline)); + unsigned buildsets(Function& F) __attribute__((noinline)); void insertion_pre(Value* e, BasicBlock* BB, std::map& avail, - SmallPtrSet& new_set); + SmallPtrSet& new_set) __attribute__((noinline)); unsigned insertion_mergepoint(std::vector& workList, df_iterator& D, - SmallPtrSet& new_set); - bool insertion(Function& F); + SmallPtrSet& new_set) __attribute__((noinline)); + bool insertion(Function& F) __attribute__((noinline)); }; @@ -795,10 +797,15 @@ SmallPtrSet& currAvail, SmallPtrSet& currPhis, SmallPtrSet& currExps, - SmallPtrSet& currTemps) { + SmallPtrSet& currTemps, + BitVector& availNumbers, + BitVector& expNumbers) { // Handle PHI nodes... if (PHINode* p = dyn_cast(I)) { VN.lookup_or_add(p); + expNumbers.resize(VN.size()); + availNumbers.resize(VN.size()); + currPhis.insert(p); // Handle binary ops... @@ -806,13 +813,26 @@ Value* leftValue = BO->getOperand(0); Value* rightValue = BO->getOperand(1); - VN.lookup_or_add(BO); + unsigned num = VN.lookup_or_add(BO); + expNumbers.resize(VN.size()); + availNumbers.resize(VN.size()); if (isa(leftValue)) - val_insert(currExps, leftValue); + if (!expNumbers.test(VN.lookup(leftValue)-1)) { + currExps.insert(leftValue); + expNumbers.set(VN.lookup(leftValue)-1); + } + if (isa(rightValue)) - val_insert(currExps, rightValue); - val_insert(currExps, BO); + if (!expNumbers.test(VN.lookup(rightValue)-1)) { + currExps.insert(rightValue); + expNumbers.set(VN.lookup(rightValue)-1); + } + + if (!expNumbers.test(VN.lookup(BO)-1)) { + currExps.insert(BO); + expNumbers.set(num-1); + } // Handle cmp ops... } else if (CmpInst* C = dyn_cast(I)) { @@ -820,21 +840,41 @@ Value* rightValue = C->getOperand(1); VN.lookup_or_add(C); - + + unsigned num = VN.lookup_or_add(C); + expNumbers.resize(VN.size()); + availNumbers.resize(VN.size()); + if (isa(leftValue)) - val_insert(currExps, leftValue); + if (!expNumbers.test(VN.lookup(leftValue)-1)) { + currExps.insert(leftValue); + expNumbers.set(VN.lookup(leftValue)-1); + } if (isa(rightValue)) - val_insert(currExps, rightValue); - val_insert(currExps, C); - + if (!expNumbers.test(VN.lookup(rightValue)-1)) { + currExps.insert(rightValue); + expNumbers.set(VN.lookup(rightValue)-1); + } + + if (!expNumbers.test(VN.lookup(C)-1)) { + currExps.insert(C); + expNumbers.set(num-1); + } + // Handle unsupported ops } else if (!I->isTerminator()){ VN.lookup_or_add(I); + expNumbers.resize(VN.size()); + availNumbers.resize(VN.size()); + currTemps.insert(I); } if (!I->isTerminator()) - val_insert(currAvail, I); + if (!availNumbers.test(VN.lookup(I)-1)) { + currAvail.insert(I); + availNumbers.set(VN.lookup(I)-1); + } } /// buildsets_anticout - When walking the postdom tree, calculate the ANTIC_OUT @@ -953,10 +993,16 @@ currAvail.insert(availableOut[DI->getIDom()->getBlock()].begin(), availableOut[DI->getIDom()->getBlock()].end()); + BitVector availNumbers(VN.size()); + for (SmallPtrSet::iterator I = currAvail.begin(), + E = currAvail.end(); I != E; ++I) + availNumbers.set(VN.lookup(*I)); + BitVector expNumbers(VN.size()); for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE; ++BI) - buildsets_availout(BI, currAvail, currPhis, currExps, currTemps); + buildsets_availout(BI, currAvail, currPhis, currExps, + currTemps, availNumbers, expNumbers); } From duraid at octopus.com.au Fri Jun 22 03:27:41 2007 From: duraid at octopus.com.au (Duraid Madina) Date: Fri, 22 Jun 2007 03:27:41 -0500 Subject: [llvm-commits] CVS: llvm/include/llvm/CodeGen/Passes.h LinkAllCodegenComponents.h Message-ID: <200706220827.l5M8Rfcl016629@zion.cs.uiuc.edu> Changes in directory llvm/include/llvm/CodeGen: Passes.h updated: 1.27 -> 1.28 LinkAllCodegenComponents.h updated: 1.5 -> 1.6 --- Log message: check in the BigBlock local register allocator --- Diffs of the changes: (+10 -0) LinkAllCodegenComponents.h | 1 + Passes.h | 9 +++++++++ 2 files changed, 10 insertions(+) Index: llvm/include/llvm/CodeGen/Passes.h diff -u llvm/include/llvm/CodeGen/Passes.h:1.27 llvm/include/llvm/CodeGen/Passes.h:1.28 --- llvm/include/llvm/CodeGen/Passes.h:1.27 Fri Jun 8 12:18:56 2007 +++ llvm/include/llvm/CodeGen/Passes.h Fri Jun 22 03:27:12 2007 @@ -70,6 +70,15 @@ /// FunctionPass *createLocalRegisterAllocator(); + /// BigBlockRegisterAllocation Pass - The BigBlock register allocator + /// munches single basic blocks at a time, like the local register + /// allocator. While the BigBlock allocator is a little slower, and uses + /// somewhat more memory than the local register allocator, it tends to + /// yield the best allocations (of any of the allocators) for blocks that + /// have hundreds or thousands of instructions in sequence. + /// + FunctionPass *createBigBlockRegisterAllocator(); + /// LinearScanRegisterAllocation Pass - This pass implements the linear scan /// register allocation algorithm, a global register allocator. /// Index: llvm/include/llvm/CodeGen/LinkAllCodegenComponents.h diff -u llvm/include/llvm/CodeGen/LinkAllCodegenComponents.h:1.5 llvm/include/llvm/CodeGen/LinkAllCodegenComponents.h:1.6 --- llvm/include/llvm/CodeGen/LinkAllCodegenComponents.h:1.5 Sun Dec 17 05:13:13 2006 +++ llvm/include/llvm/CodeGen/LinkAllCodegenComponents.h Fri Jun 22 03:27:12 2007 @@ -30,6 +30,7 @@ (void) llvm::createSimpleRegisterAllocator(); (void) llvm::createLocalRegisterAllocator(); + (void) llvm::createBigBlockRegisterAllocator(); (void) llvm::createLinearScanRegisterAllocator(); (void) llvm::createBFS_DAGScheduler(NULL, NULL, NULL); From duraid at octopus.com.au Fri Jun 22 03:27:41 2007 From: duraid at octopus.com.au (Duraid Madina) Date: Fri, 22 Jun 2007 03:27:41 -0500 Subject: [llvm-commits] CVS: llvm/lib/CodeGen/RegAllocBigBlock.cpp Message-ID: <200706220827.l5M8RfGi016628@zion.cs.uiuc.edu> Changes in directory llvm/lib/CodeGen: RegAllocBigBlock.cpp added (r1.1) --- Log message: check in the BigBlock local register allocator --- Diffs of the changes: (+852 -0) RegAllocBigBlock.cpp | 852 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 files changed, 852 insertions(+) Index: llvm/lib/CodeGen/RegAllocBigBlock.cpp diff -c /dev/null llvm/lib/CodeGen/RegAllocBigBlock.cpp:1.1 *** /dev/null Fri Jun 22 03:27:22 2007 --- llvm/lib/CodeGen/RegAllocBigBlock.cpp Fri Jun 22 03:27:12 2007 *************** *** 0 **** --- 1,852 ---- + //===- RegAllocBigBlock.cpp - A register allocator for large basic blocks -===// + // + // The LLVM Compiler Infrastructure + // + // This file was developed by the LLVM research group and is distributed under + // the University of Illinois Open Source License. See LICENSE.TXT for details. + // + //===----------------------------------------------------------------------===// + // + // This register allocator is derived from RegAllocLocal.cpp. Like it, this + // allocator works on one basic block at a time, oblivious to others. + // However, the algorithm used here is suited for long blocks of + // instructions - registers are spilled by greedily choosing those holding + // values that will not be needed for the longest amount of time. This works + // particularly well for blocks with 10 or more times as many instructions + // as machine registers, but can be used for general code. + // + //===----------------------------------------------------------------------===// + // + // TODO: - automagically invoke linearscan for (groups of) small BBs? + // - break ties when picking regs? (probably not worth it in a + // JIT context) + // + //===----------------------------------------------------------------------===// + + #define DEBUG_TYPE "regalloc" + #include "llvm/BasicBlock.h" + #include "llvm/CodeGen/Passes.h" + #include "llvm/CodeGen/MachineFunctionPass.h" + #include "llvm/CodeGen/MachineInstr.h" + #include "llvm/CodeGen/SSARegMap.h" + #include "llvm/CodeGen/MachineFrameInfo.h" + #include "llvm/CodeGen/LiveVariables.h" + #include "llvm/CodeGen/RegAllocRegistry.h" + #include "llvm/Target/TargetInstrInfo.h" + #include "llvm/Target/TargetMachine.h" + #include "llvm/Support/CommandLine.h" + #include "llvm/Support/Debug.h" + #include "llvm/Support/Compiler.h" + #include "llvm/ADT/IndexedMap.h" + #include "llvm/ADT/DenseMap.h" + #include "llvm/ADT/SmallVector.h" + #include "llvm/ADT/Statistic.h" + #include + using namespace llvm; + + STATISTIC(NumStores, "Number of stores added"); + STATISTIC(NumLoads , "Number of loads added"); + STATISTIC(NumFolded, "Number of loads/stores folded into instructions"); + + namespace { + static RegisterRegAlloc + bigBlockRegAlloc("bigblock", " Big-block register allocator", + createBigBlockRegisterAllocator); + + struct VRegKeyInfo { + static inline unsigned getEmptyKey() { return -1U; } + static inline unsigned getTombstoneKey() { return -2U; } + static unsigned getHashValue(const unsigned &Key) { return Key; } + }; + + class VISIBILITY_HIDDEN RABigBlock : public MachineFunctionPass { + public: + static char ID; + RABigBlock() : MachineFunctionPass((intptr_t)&ID) {} + private: + const TargetMachine *TM; + MachineFunction *MF; + const MRegisterInfo *RegInfo; + LiveVariables *LV; + + // InsnTimes - maps machine instructions to their "execute times" + std::map InsnTimes; + + // VRegReadTable - maps VRegs in a BB to the set of times they are read + DenseMap*, VRegKeyInfo> VRegReadTable; + + // StackSlotForVirtReg - Maps virtual regs to the frame index where these + // values are spilled. + std::map StackSlotForVirtReg; + + // Virt2PhysRegMap - This map contains entries for each virtual register + // that is currently available in a physical register. + IndexedMap Virt2PhysRegMap; + + unsigned &getVirt2PhysRegMapSlot(unsigned VirtReg) { + return Virt2PhysRegMap[VirtReg]; + } + + // PhysRegsUsed - This array is effectively a map, containing entries for + // each physical register that currently has a value (ie, it is in + // Virt2PhysRegMap). The value mapped to is the virtual register + // corresponding to the physical register (the inverse of the + // Virt2PhysRegMap), or 0. The value is set to 0 if this register is pinned + // because it is used by a future instruction, and to -2 if it is not + // allocatable. If the entry for a physical register is -1, then the + // physical register is "not in the map". + // + std::vector PhysRegsUsed; + + // PhysRegsUseOrder - This contains a list of the physical registers that + // currently have a virtual register value in them. This list provides an + // ordering of registers, imposing a reallocation order. This list is only + // used if all registers are allocated and we have to spill one, in which + // case we spill the least recently used register. Entries at the front of + // the list are the least recently used registers, entries at the back are + // the most recently used. + // + std::vector PhysRegsUseOrder; + + // VirtRegModified - This bitset contains information about which virtual + // registers need to be spilled back to memory when their registers are + // scavenged. If a virtual register has simply been rematerialized, there + // is no reason to spill it to memory when we need the register back. + // + std::vector VirtRegModified; + + void markVirtRegModified(unsigned Reg, bool Val = true) { + assert(MRegisterInfo::isVirtualRegister(Reg) && "Illegal VirtReg!"); + Reg -= MRegisterInfo::FirstVirtualRegister; + if (VirtRegModified.size() <= Reg) VirtRegModified.resize(Reg+1); + VirtRegModified[Reg] = Val; + } + + bool isVirtRegModified(unsigned Reg) const { + assert(MRegisterInfo::isVirtualRegister(Reg) && "Illegal VirtReg!"); + assert(Reg - MRegisterInfo::FirstVirtualRegister < VirtRegModified.size() + && "Illegal virtual register!"); + return VirtRegModified[Reg - MRegisterInfo::FirstVirtualRegister]; + } + + void MarkPhysRegRecentlyUsed(unsigned Reg) { + if (PhysRegsUseOrder.empty() || + PhysRegsUseOrder.back() == Reg) return; // Already most recently used + + for (unsigned i = PhysRegsUseOrder.size(); i != 0; --i) + if (areRegsEqual(Reg, PhysRegsUseOrder[i-1])) { + unsigned RegMatch = PhysRegsUseOrder[i-1]; // remove from middle + PhysRegsUseOrder.erase(PhysRegsUseOrder.begin()+i-1); + // Add it to the end of the list + PhysRegsUseOrder.push_back(RegMatch); + if (RegMatch == Reg) + return; // Found an exact match, exit early + } + } + + public: + virtual const char *getPassName() const { + return "BigBlock Register Allocator"; + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired(); + AU.addRequiredID(PHIEliminationID); + AU.addRequiredID(TwoAddressInstructionPassID); + MachineFunctionPass::getAnalysisUsage(AU); + } + + private: + /// runOnMachineFunction - Register allocate the whole function + bool runOnMachineFunction(MachineFunction &Fn); + + /// AllocateBasicBlock - Register allocate the specified basic block. + void AllocateBasicBlock(MachineBasicBlock &MBB); + + /// FillVRegReadTable - Fill out the table of vreg read times given a BB + void FillVRegReadTable(MachineBasicBlock &MBB); + + /// areRegsEqual - This method returns true if the specified registers are + /// related to each other. To do this, it checks to see if they are equal + /// or if the first register is in the alias set of the second register. + /// + bool areRegsEqual(unsigned R1, unsigned R2) const { + if (R1 == R2) return true; + for (const unsigned *AliasSet = RegInfo->getAliasSet(R2); + *AliasSet; ++AliasSet) { + if (*AliasSet == R1) return true; + } + return false; + } + + /// getStackSpaceFor - This returns the frame index of the specified virtual + /// register on the stack, allocating space if necessary. + int getStackSpaceFor(unsigned VirtReg, const TargetRegisterClass *RC); + + /// removePhysReg - This method marks the specified physical register as no + /// longer being in use. + /// + void removePhysReg(unsigned PhysReg); + + /// spillVirtReg - This method spills the value specified by PhysReg into + /// the virtual register slot specified by VirtReg. It then updates the RA + /// data structures to indicate the fact that PhysReg is now available. + /// + void spillVirtReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, + unsigned VirtReg, unsigned PhysReg); + + /// spillPhysReg - This method spills the specified physical register into + /// the virtual register slot associated with it. If OnlyVirtRegs is set to + /// true, then the request is ignored if the physical register does not + /// contain a virtual register. + /// + void spillPhysReg(MachineBasicBlock &MBB, MachineInstr *I, + unsigned PhysReg, bool OnlyVirtRegs = false); + + /// assignVirtToPhysReg - This method updates local state so that we know + /// that PhysReg is the proper container for VirtReg now. The physical + /// register must not be used for anything else when this is called. + /// + void assignVirtToPhysReg(unsigned VirtReg, unsigned PhysReg); + + /// liberatePhysReg - Make sure the specified physical register is available + /// for use. If there is currently a value in it, it is either moved out of + /// the way or spilled to memory. + /// + void liberatePhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator &I, + unsigned PhysReg); + + /// isPhysRegAvailable - Return true if the specified physical register is + /// free and available for use. This also includes checking to see if + /// aliased registers are all free... + /// + bool isPhysRegAvailable(unsigned PhysReg) const; + + /// getFreeReg - Look to see if there is a free register available in the + /// specified register class. If not, return 0. + /// + unsigned getFreeReg(const TargetRegisterClass *RC); + + /// chooseReg - Pick a physical register to hold the specified + /// virtual register by choosing the one which will be read furthest + /// in the future. + /// + unsigned chooseReg(MachineBasicBlock &MBB, MachineInstr *MI, + unsigned VirtReg); + + /// reloadVirtReg - This method transforms the specified specified virtual + /// register use to refer to a physical register. This method may do this + /// in one of several ways: if the register is available in a physical + /// register already, it uses that physical register. If the value is not + /// in a physical register, and if there are physical registers available, + /// it loads it into a register. If register pressure is high, and it is + /// possible, it tries to fold the load of the virtual register into the + /// instruction itself. It avoids doing this if register pressure is low to + /// improve the chance that subsequent instructions can use the reloaded + /// value. This method returns the modified instruction. + /// + MachineInstr *reloadVirtReg(MachineBasicBlock &MBB, MachineInstr *MI, + unsigned OpNum); + + }; + char RABigBlock::ID = 0; + } + + /// getStackSpaceFor - This allocates space for the specified virtual register + /// to be held on the stack. + int RABigBlock::getStackSpaceFor(unsigned VirtReg, const TargetRegisterClass *RC) { + // Find the location Reg would belong... + std::map::iterator I =StackSlotForVirtReg.lower_bound(VirtReg); + + if (I != StackSlotForVirtReg.end() && I->first == VirtReg) + return I->second; // Already has space allocated? + + // Allocate a new stack object for this spill location... + int FrameIdx = MF->getFrameInfo()->CreateStackObject(RC->getSize(), + RC->getAlignment()); + + // Assign the slot... + StackSlotForVirtReg.insert(I, std::make_pair(VirtReg, FrameIdx)); + return FrameIdx; + } + + + /// removePhysReg - This method marks the specified physical register as no + /// longer being in use. + /// + void RABigBlock::removePhysReg(unsigned PhysReg) { + PhysRegsUsed[PhysReg] = -1; // PhyReg no longer used + + std::vector::iterator It = + std::find(PhysRegsUseOrder.begin(), PhysRegsUseOrder.end(), PhysReg); + if (It != PhysRegsUseOrder.end()) + PhysRegsUseOrder.erase(It); + } + + + /// spillVirtReg - This method spills the value specified by PhysReg into the + /// virtual register slot specified by VirtReg. It then updates the RA data + /// structures to indicate the fact that PhysReg is now available. + /// + void RABigBlock::spillVirtReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + unsigned VirtReg, unsigned PhysReg) { + assert(VirtReg && "Spilling a physical register is illegal!" + " Must not have appropriate kill for the register or use exists beyond" + " the intended one."); + DOUT << " Spilling register " << RegInfo->getName(PhysReg) + << " containing %reg" << VirtReg; + if (!isVirtRegModified(VirtReg)) + DOUT << " which has not been modified, so no store necessary!"; + + // Otherwise, there is a virtual register corresponding to this physical + // register. We only need to spill it into its stack slot if it has been + // modified. + if (isVirtRegModified(VirtReg)) { + const TargetRegisterClass *RC = MF->getSSARegMap()->getRegClass(VirtReg); + int FrameIndex = getStackSpaceFor(VirtReg, RC); + DOUT << " to stack slot #" << FrameIndex; + RegInfo->storeRegToStackSlot(MBB, I, PhysReg, FrameIndex, RC); + ++NumStores; // Update statistics + } + + getVirt2PhysRegMapSlot(VirtReg) = 0; // VirtReg no longer available + + DOUT << "\n"; + removePhysReg(PhysReg); + } + + + /// spillPhysReg - This method spills the specified physical register into the + /// virtual register slot associated with it. If OnlyVirtRegs is set to true, + /// then the request is ignored if the physical register does not contain a + /// virtual register. + /// + void RABigBlock::spillPhysReg(MachineBasicBlock &MBB, MachineInstr *I, + unsigned PhysReg, bool OnlyVirtRegs) { + if (PhysRegsUsed[PhysReg] != -1) { // Only spill it if it's used! + assert(PhysRegsUsed[PhysReg] != -2 && "Non allocable reg used!"); + if (PhysRegsUsed[PhysReg] || !OnlyVirtRegs) + spillVirtReg(MBB, I, PhysRegsUsed[PhysReg], PhysReg); + } else { + // If the selected register aliases any other registers, we must make + // sure that one of the aliases isn't alive. + for (const unsigned *AliasSet = RegInfo->getAliasSet(PhysReg); + *AliasSet; ++AliasSet) + if (PhysRegsUsed[*AliasSet] != -1 && // Spill aliased register. + PhysRegsUsed[*AliasSet] != -2) // If allocatable. + if (PhysRegsUsed[*AliasSet] == 0) { + // This must have been a dead def due to something like this: + // %EAX := + // := op %AL + // No more use of %EAX, %AH, etc. + // %EAX isn't dead upon definition, but %AH is. However %AH isn't + // an operand of definition MI so it's not marked as such. + DOUT << " Register " << RegInfo->getName(*AliasSet) + << " [%reg" << *AliasSet + << "] is never used, removing it frame live list\n"; + removePhysReg(*AliasSet); + } else + spillVirtReg(MBB, I, PhysRegsUsed[*AliasSet], *AliasSet); + } + } + + + /// assignVirtToPhysReg - This method updates local state so that we know + /// that PhysReg is the proper container for VirtReg now. The physical + /// register must not be used for anything else when this is called. + /// + void RABigBlock::assignVirtToPhysReg(unsigned VirtReg, unsigned PhysReg) { + assert(PhysRegsUsed[PhysReg] == -1 && "Phys reg already assigned!"); + // Update information to note the fact that this register was just used, and + // it holds VirtReg. + PhysRegsUsed[PhysReg] = VirtReg; + getVirt2PhysRegMapSlot(VirtReg) = PhysReg; + PhysRegsUseOrder.push_back(PhysReg); // New use of PhysReg + } + + + /// isPhysRegAvailable - Return true if the specified physical register is free + /// and available for use. This also includes checking to see if aliased + /// registers are all free... + /// + bool RABigBlock::isPhysRegAvailable(unsigned PhysReg) const { + if (PhysRegsUsed[PhysReg] != -1) return false; + + // If the selected register aliases any other allocated registers, it is + // not free! + for (const unsigned *AliasSet = RegInfo->getAliasSet(PhysReg); + *AliasSet; ++AliasSet) + if (PhysRegsUsed[*AliasSet] != -1) // Aliased register in use? + return false; // Can't use this reg then. + return true; + } + + + //////// FIX THIS: + /// getFreeReg - Look to see if there is a free register available in the + /// specified register class. If not, return 0. + /// + unsigned RABigBlock::getFreeReg(const TargetRegisterClass *RC) { + // Get iterators defining the range of registers that are valid to allocate in + // this class, which also specifies the preferred allocation order. + TargetRegisterClass::iterator RI = RC->allocation_order_begin(*MF); + TargetRegisterClass::iterator RE = RC->allocation_order_end(*MF); + + for (; RI != RE; ++RI) + if (isPhysRegAvailable(*RI)) { // Is reg unused? + assert(*RI != 0 && "Cannot use register!"); + return *RI; // Found an unused register! + } + return 0; + } + + + /// liberatePhysReg - Make sure the specified physical register is available for + /// use. If there is currently a value in it, it is either moved out of the way + /// or spilled to memory. + /// + void RABigBlock::liberatePhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator &I, + unsigned PhysReg) { + spillPhysReg(MBB, I, PhysReg); + } + + /// chooseReg - Pick a physical register to hold the specified + /// virtual register by choosing the one whose value will be read + /// furthest in the future. + /// + unsigned RABigBlock::chooseReg(MachineBasicBlock &MBB, MachineInstr *I, + unsigned VirtReg) { + const TargetRegisterClass *RC = MF->getSSARegMap()->getRegClass(VirtReg); + // First check to see if we have a free register of the requested type... + unsigned PhysReg = getFreeReg(RC); + + // If we didn't find an unused register, find the one which will be + // read at the most distant point in time. + if (PhysReg == 0) { + unsigned delay=0, longest_delay=0; + SmallVector *ReadTimes; + + unsigned curTime = InsnTimes[I]; + + // for all physical regs in the RC, + for(TargetRegisterClass::iterator pReg = RC->begin(); + pReg != RC->end(); ++pReg) { + // how long until they're read? + if(PhysRegsUsed[*pReg]>0) { // ignore non-allocatable regs + ReadTimes = VRegReadTable[PhysRegsUsed[*pReg]]; + SmallVector::iterator pt = + std::lower_bound(ReadTimes->begin(), + ReadTimes->end(), + curTime); + delay = *pt - curTime; + + if(delay > longest_delay) { + longest_delay = delay; + PhysReg = *pReg; + } + } + } + + assert(PhysReg && "couldn't grab a register from the table?"); + // TODO: assert that RC->contains(PhysReg) / handle aliased registers + + // since we needed to look in the table we need to spill this register. + spillPhysReg(MBB, I, PhysReg); + } + + // assign the vreg to our chosen physical register + assignVirtToPhysReg(VirtReg, PhysReg); + return PhysReg; // and return it + } + + + /// reloadVirtReg - This method transforms an instruction with a virtual + /// register use to one that references a physical register. It does this as + /// follows: + /// + /// 1) If the register is already in a physical register, it uses it. + /// 2) Otherwise, if there is a free physical register, it uses that. + /// 3) Otherwise, it calls chooseReg() to get the physical register + /// holding the most distantly needed value, generating a spill in + /// the process. + /// + /// This method returns the modified instruction. + MachineInstr *RABigBlock::reloadVirtReg(MachineBasicBlock &MBB, MachineInstr *MI, + unsigned OpNum) { + unsigned VirtReg = MI->getOperand(OpNum).getReg(); + + // If the virtual register is already available in a physical register, + // just update the instruction and return. + if (unsigned PR = getVirt2PhysRegMapSlot(VirtReg)) { + MI->getOperand(OpNum).setReg(PR); + return MI; + } + + // Otherwise, if we have free physical registers available to hold the + // value, use them. + const TargetRegisterClass *RC = MF->getSSARegMap()->getRegClass(VirtReg); + unsigned PhysReg = getFreeReg(RC); + int FrameIndex = getStackSpaceFor(VirtReg, RC); + + if (PhysReg) { // we have a free register, so use it. + assignVirtToPhysReg(VirtReg, PhysReg); + } else { // no free registers available. + // try to fold the spill into the instruction + if(MachineInstr* FMI = RegInfo->foldMemoryOperand(MI, OpNum, FrameIndex)) { + ++NumFolded; + // Since we changed the address of MI, make sure to update live variables + // to know that the new instruction has the properties of the old one. + LV->instructionChanged(MI, FMI); + return MBB.insert(MBB.erase(MI), FMI); + } + + // determine which of the physical registers we'll kill off, since we + // couldn't fold. + PhysReg = chooseReg(MBB, MI, VirtReg); + } + + // this virtual register is now unmodified (since we just reloaded it) + markVirtRegModified(VirtReg, false); + + DOUT << " Reloading %reg" << VirtReg << " into " + << RegInfo->getName(PhysReg) << "\n"; + + // Add move instruction(s) + RegInfo->loadRegFromStackSlot(MBB, MI, PhysReg, FrameIndex, RC); + ++NumLoads; // Update statistics + + MF->setPhysRegUsed(PhysReg); + MI->getOperand(OpNum).setReg(PhysReg); // Assign the input register + return MI; + } + + /// Fill out the vreg read timetable. Since ReadTime increases + /// monotonically, the individual readtime sets will be sorted + /// in ascending order. + void RABigBlock::FillVRegReadTable(MachineBasicBlock &MBB) { + // loop over each instruction + MachineBasicBlock::iterator MII; + unsigned ReadTime; + + for(ReadTime=0, MII = MBB.begin(); MII != MBB.end(); ++ReadTime, ++MII) { + MachineInstr *MI = MII; + + InsnTimes[MI] = ReadTime; + + for (unsigned i = 0; i != MI->getNumOperands(); ++i) { + MachineOperand& MO = MI->getOperand(i); + // look for vreg reads.. + if (MO.isRegister() && !MO.isDef() && MO.getReg() && + MRegisterInfo::isVirtualRegister(MO.getReg())) { + // ..and add them to the read table. + if(!VRegReadTable[MO.getReg()]) + VRegReadTable[MO.getReg()] = new SmallVector; + + VRegReadTable[MO.getReg()]->push_back(ReadTime); + } + } + + } + + } + + void RABigBlock::AllocateBasicBlock(MachineBasicBlock &MBB) { + // loop over each instruction + MachineBasicBlock::iterator MII = MBB.begin(); + const TargetInstrInfo &TII = *TM->getInstrInfo(); + + DEBUG(const BasicBlock *LBB = MBB.getBasicBlock(); + if (LBB) DOUT << "\nStarting RegAlloc of BB: " << LBB->getName()); + + // If this is the first basic block in the machine function, add live-in + // registers as active. + if (&MBB == &*MF->begin()) { + for (MachineFunction::livein_iterator I = MF->livein_begin(), + E = MF->livein_end(); I != E; ++I) { + unsigned Reg = I->first; + MF->setPhysRegUsed(Reg); + PhysRegsUsed[Reg] = 0; // It is free and reserved now + PhysRegsUseOrder.push_back(Reg); + for (const unsigned *AliasSet = RegInfo->getAliasSet(Reg); + *AliasSet; ++AliasSet) { + if (PhysRegsUsed[*AliasSet] != -2) { + PhysRegsUseOrder.push_back(*AliasSet); + PhysRegsUsed[*AliasSet] = 0; // It is free and reserved now + MF->setPhysRegUsed(*AliasSet); + } + } + } + } + + // Otherwise, sequentially allocate each instruction in the MBB. + while (MII != MBB.end()) { + MachineInstr *MI = MII++; + const TargetInstrDescriptor &TID = TII.get(MI->getOpcode()); + DEBUG(DOUT << "\nStarting RegAlloc of: " << *MI; + DOUT << " Regs have values: "; + for (unsigned i = 0; i != RegInfo->getNumRegs(); ++i) + if (PhysRegsUsed[i] != -1 && PhysRegsUsed[i] != -2) + DOUT << "[" << RegInfo->getName(i) + << ",%reg" << PhysRegsUsed[i] << "] "; + DOUT << "\n"); + + // Loop over the implicit uses, making sure that they are at the head of the + // use order list, so they don't get reallocated. + if (TID.ImplicitUses) { + for (const unsigned *ImplicitUses = TID.ImplicitUses; + *ImplicitUses; ++ImplicitUses) + MarkPhysRegRecentlyUsed(*ImplicitUses); + } + + SmallVector Kills; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand& MO = MI->getOperand(i); + if (MO.isRegister() && MO.isKill()) + Kills.push_back(MO.getReg()); + } + + // Get the used operands into registers. This has the potential to spill + // incoming values if we are out of registers. Note that we completely + // ignore physical register uses here. We assume that if an explicit + // physical register is referenced by the instruction, that it is guaranteed + // to be live-in, or the input is badly hosed. + // + for (unsigned i = 0; i != MI->getNumOperands(); ++i) { + MachineOperand& MO = MI->getOperand(i); + // here we are looking for only used operands (never def&use) + if (MO.isRegister() && !MO.isDef() && MO.getReg() && !MO.isImplicit() && + MRegisterInfo::isVirtualRegister(MO.getReg())) + MI = reloadVirtReg(MBB, MI, i); + } + + // If this instruction is the last user of this register, kill the + // value, freeing the register being used, so it doesn't need to be + // spilled to memory. + // + for (unsigned i = 0, e = Kills.size(); i != e; ++i) { + unsigned VirtReg = Kills[i]; + unsigned PhysReg = VirtReg; + if (MRegisterInfo::isVirtualRegister(VirtReg)) { + // If the virtual register was never materialized into a register, it + // might not be in the map, but it won't hurt to zero it out anyway. + unsigned &PhysRegSlot = getVirt2PhysRegMapSlot(VirtReg); + PhysReg = PhysRegSlot; + PhysRegSlot = 0; + } else if (PhysRegsUsed[PhysReg] == -2) { + // Unallocatable register dead, ignore. + continue; + } + + if (PhysReg) { + DOUT << " Last use of " << RegInfo->getName(PhysReg) + << "[%reg" << VirtReg <<"], removing it from live set\n"; + removePhysReg(PhysReg); + for (const unsigned *AliasSet = RegInfo->getAliasSet(PhysReg); + *AliasSet; ++AliasSet) { + if (PhysRegsUsed[*AliasSet] != -2) { + DOUT << " Last use of " + << RegInfo->getName(*AliasSet) + << "[%reg" << VirtReg <<"], removing it from live set\n"; + removePhysReg(*AliasSet); + } + } + } + } + + // Loop over all of the operands of the instruction, spilling registers that + // are defined, and marking explicit destinations in the PhysRegsUsed map. + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand& MO = MI->getOperand(i); + if (MO.isRegister() && MO.isDef() && !MO.isImplicit() && MO.getReg() && + MRegisterInfo::isPhysicalRegister(MO.getReg())) { + unsigned Reg = MO.getReg(); + if (PhysRegsUsed[Reg] == -2) continue; // Something like ESP. + + MF->setPhysRegUsed(Reg); + spillPhysReg(MBB, MI, Reg, true); // Spill any existing value in reg + PhysRegsUsed[Reg] = 0; // It is free and reserved now + PhysRegsUseOrder.push_back(Reg); + for (const unsigned *AliasSet = RegInfo->getAliasSet(Reg); + *AliasSet; ++AliasSet) { + if (PhysRegsUsed[*AliasSet] != -2) { + PhysRegsUseOrder.push_back(*AliasSet); + PhysRegsUsed[*AliasSet] = 0; // It is free and reserved now + MF->setPhysRegUsed(*AliasSet); + } + } + } + } + + // Loop over the implicit defs, spilling them as well. + if (TID.ImplicitDefs) { + for (const unsigned *ImplicitDefs = TID.ImplicitDefs; + *ImplicitDefs; ++ImplicitDefs) { + unsigned Reg = *ImplicitDefs; + bool IsNonAllocatable = PhysRegsUsed[Reg] == -2; + if (!IsNonAllocatable) { + spillPhysReg(MBB, MI, Reg, true); + PhysRegsUseOrder.push_back(Reg); + PhysRegsUsed[Reg] = 0; // It is free and reserved now + } + MF->setPhysRegUsed(Reg); + + for (const unsigned *AliasSet = RegInfo->getAliasSet(Reg); + *AliasSet; ++AliasSet) { + if (PhysRegsUsed[*AliasSet] != -2) { + if (!IsNonAllocatable) { + PhysRegsUseOrder.push_back(*AliasSet); + PhysRegsUsed[*AliasSet] = 0; // It is free and reserved now + } + MF->setPhysRegUsed(*AliasSet); + } + } + } + } + + SmallVector DeadDefs; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand& MO = MI->getOperand(i); + if (MO.isRegister() && MO.isDead()) + DeadDefs.push_back(MO.getReg()); + } + + // Okay, we have allocated all of the source operands and spilled any values + // that would be destroyed by defs of this instruction. Loop over the + // explicit defs and assign them to a register, spilling incoming values if + // we need to scavenge a register. + // + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand& MO = MI->getOperand(i); + if (MO.isRegister() && MO.isDef() && MO.getReg() && + MRegisterInfo::isVirtualRegister(MO.getReg())) { + unsigned DestVirtReg = MO.getReg(); + unsigned DestPhysReg; + + // If DestVirtReg already has a value, use it. + if (!(DestPhysReg = getVirt2PhysRegMapSlot(DestVirtReg))) + DestPhysReg = chooseReg(MBB, MI, DestVirtReg); + MF->setPhysRegUsed(DestPhysReg); + markVirtRegModified(DestVirtReg); + MI->getOperand(i).setReg(DestPhysReg); // Assign the output register + } + } + + // If this instruction defines any registers that are immediately dead, + // kill them now. + // + for (unsigned i = 0, e = DeadDefs.size(); i != e; ++i) { + unsigned VirtReg = DeadDefs[i]; + unsigned PhysReg = VirtReg; + if (MRegisterInfo::isVirtualRegister(VirtReg)) { + unsigned &PhysRegSlot = getVirt2PhysRegMapSlot(VirtReg); + PhysReg = PhysRegSlot; + assert(PhysReg != 0); + PhysRegSlot = 0; + } else if (PhysRegsUsed[PhysReg] == -2) { + // Unallocatable register dead, ignore. + continue; + } + + if (PhysReg) { + DOUT << " Register " << RegInfo->getName(PhysReg) + << " [%reg" << VirtReg + << "] is never used, removing it frame live list\n"; + removePhysReg(PhysReg); + for (const unsigned *AliasSet = RegInfo->getAliasSet(PhysReg); + *AliasSet; ++AliasSet) { + if (PhysRegsUsed[*AliasSet] != -2) { + DOUT << " Register " << RegInfo->getName(*AliasSet) + << " [%reg" << *AliasSet + << "] is never used, removing it frame live list\n"; + removePhysReg(*AliasSet); + } + } + } + } + + // Finally, if this is a noop copy instruction, zap it. + unsigned SrcReg, DstReg; + if (TII.isMoveInstr(*MI, SrcReg, DstReg) && SrcReg == DstReg) { + LV->removeVirtualRegistersKilled(MI); + LV->removeVirtualRegistersDead(MI); + MBB.erase(MI); + } + } + + MachineBasicBlock::iterator MI = MBB.getFirstTerminator(); + + // Spill all physical registers holding virtual registers now. + for (unsigned i = 0, e = RegInfo->getNumRegs(); i != e; ++i) + if (PhysRegsUsed[i] != -1 && PhysRegsUsed[i] != -2) + if (unsigned VirtReg = PhysRegsUsed[i]) + spillVirtReg(MBB, MI, VirtReg, i); + else + removePhysReg(i); + + #if 0 + // This checking code is very expensive. + bool AllOk = true; + for (unsigned i = MRegisterInfo::FirstVirtualRegister, + e = MF->getSSARegMap()->getLastVirtReg(); i <= e; ++i) + if (unsigned PR = Virt2PhysRegMap[i]) { + cerr << "Register still mapped: " << i << " -> " << PR << "\n"; + AllOk = false; + } + assert(AllOk && "Virtual registers still in phys regs?"); + #endif + + // Clear any physical register which appear live at the end of the basic + // block, but which do not hold any virtual registers. e.g., the stack + // pointer. + PhysRegsUseOrder.clear(); + } + + /// runOnMachineFunction - Register allocate the whole function + /// + bool RABigBlock::runOnMachineFunction(MachineFunction &Fn) { + DOUT << "Machine Function " << "\n"; + MF = &Fn; + TM = &Fn.getTarget(); + RegInfo = TM->getRegisterInfo(); + LV = &getAnalysis(); + + PhysRegsUsed.assign(RegInfo->getNumRegs(), -1); + + // At various places we want to efficiently check to see whether a register + // is allocatable. To handle this, we mark all unallocatable registers as + // being pinned down, permanently. + { + BitVector Allocable = RegInfo->getAllocatableSet(Fn); + for (unsigned i = 0, e = Allocable.size(); i != e; ++i) + if (!Allocable[i]) + PhysRegsUsed[i] = -2; // Mark the reg unallocable. + } + + // initialize the virtual->physical register map to have a 'null' + // mapping for all virtual registers + Virt2PhysRegMap.grow(MF->getSSARegMap()->getLastVirtReg()); + + // Loop over all of the basic blocks, eliminating virtual register references + for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end(); + MBB != MBBe; ++MBB) { + // fill out the read timetable + FillVRegReadTable(*MBB); + // use it to allocate the BB + AllocateBasicBlock(*MBB); + // clear it + VRegReadTable.clear(); + } + + StackSlotForVirtReg.clear(); + PhysRegsUsed.clear(); + VirtRegModified.clear(); + Virt2PhysRegMap.clear(); + return true; + } + + FunctionPass *llvm::createBigBlockRegisterAllocator() { + return new RABigBlock(); + } + From rspencer at reidspencer.com Fri Jun 22 05:22:09 2007 From: rspencer at reidspencer.com (Reid Spencer) Date: Fri, 22 Jun 2007 06:22:09 -0400 Subject: [llvm-commits] CVS: llvm/lib/CodeGen/RegAllocBigBlock.cpp In-Reply-To: <200706220827.l5M8RfGi016628@zion.cs.uiuc.edu> References: <200706220827.l5M8RfGi016628@zion.cs.uiuc.edu> Message-ID: <467BA2D1.2050406@reidspencer.com> Hi Duraid .. This looks nifty. Thanks for adding it. Some minor comments ... Reid. Duraid Madina wrote: >Changes in directory llvm/lib/CodeGen: > >RegAllocBigBlock.cpp added (r1.1) >--- >Log message: > >check in the BigBlock local register allocator > > > >--- >Diffs of the changes: (+852 -0) > > RegAllocBigBlock.cpp | 852 +++++++++++++++++++++++++++++++++++++++++++++++++++ > 1 files changed, 852 insertions(+) > > >Index: llvm/lib/CodeGen/RegAllocBigBlock.cpp >diff -c /dev/null llvm/lib/CodeGen/RegAllocBigBlock.cpp:1.1 >*** /dev/null Fri Jun 22 03:27:22 2007 >--- llvm/lib/CodeGen/RegAllocBigBlock.cpp Fri Jun 22 03:27:12 2007 >*************** >*** 0 **** >--- 1,852 ---- >+ //===- RegAllocBigBlock.cpp - A register allocator for large basic blocks -===// >+ // >+ // The LLVM Compiler Infrastructure >+ // >+ // This file was developed by the LLVM research group and is distributed under > > Really? Wasn't the author Duraid? >+ // the University of Illinois Open Source License. See LICENSE.TXT for details. >+ // >+ //===----------------------------------------------------------------------===// >+ // >+ // This register allocator is derived from RegAllocLocal.cpp. Like it, this >+ // allocator works on one basic block at a time, oblivious to others. >+ // However, the algorithm used here is suited for long blocks of >+ // instructions - registers are spilled by greedily choosing those holding >+ // values that will not be needed for the longest amount of time. This works >+ // particularly well for blocks with 10 or more times as many instructions >+ // as machine registers, but can be used for general code. >+ // >+ //===----------------------------------------------------------------------===// >+ // >+ // TODO: - automagically invoke linearscan for (groups of) small BBs? >+ // - break ties when picking regs? (probably not worth it in a >+ // JIT context) >+ // >+ //===----------------------------------------------------------------------===// > > This entire comment block belongs above the RABigBlock class and should be a doxygen comment (3 /). At this file level the comment should just say "This file implements the RABigBlock class". >+ >+ #define DEBUG_TYPE "regalloc" > > >+ #include "llvm/BasicBlock.h" >+ #include "llvm/CodeGen/Passes.h" >+ #include "llvm/CodeGen/MachineFunctionPass.h" >+ #include "llvm/CodeGen/MachineInstr.h" >+ #include "llvm/CodeGen/SSARegMap.h" >+ #include "llvm/CodeGen/MachineFrameInfo.h" >+ #include "llvm/CodeGen/LiveVariables.h" >+ #include "llvm/CodeGen/RegAllocRegistry.h" >+ #include "llvm/Target/TargetInstrInfo.h" >+ #include "llvm/Target/TargetMachine.h" >+ #include "llvm/Support/CommandLine.h" >+ #include "llvm/Support/Debug.h" >+ #include "llvm/Support/Compiler.h" >+ #include "llvm/ADT/IndexedMap.h" >+ #include "llvm/ADT/DenseMap.h" >+ #include "llvm/ADT/SmallVector.h" >+ #include "llvm/ADT/Statistic.h" >+ #include >+ using namespace llvm; >+ >+ STATISTIC(NumStores, "Number of stores added"); >+ STATISTIC(NumLoads , "Number of loads added"); >+ STATISTIC(NumFolded, "Number of loads/stores folded into instructions"); > > >+ >+ namespace { >+ static RegisterRegAlloc >+ bigBlockRegAlloc("bigblock", " Big-block register allocator", >+ createBigBlockRegisterAllocator); >+ >+ struct VRegKeyInfo { >+ static inline unsigned getEmptyKey() { return -1U; } >+ static inline unsigned getTombstoneKey() { return -2U; } >+ static unsigned getHashValue(const unsigned &Key) { return Key; } >+ }; > > This struct needs a doxygen comment >+ >+ class VISIBILITY_HIDDEN RABigBlock : public MachineFunctionPass { > > This is where you need to move the big comment at the start of the file to. >+ public: >+ static char ID; > > Please add a doxygen comment for this variable. >+ RABigBlock() : MachineFunctionPass((intptr_t)&ID) {} >+ private: >+ const TargetMachine *TM; >+ MachineFunction *MF; >+ const MRegisterInfo *RegInfo; >+ LiveVariables *LV; > > Please use a ///< comment to describe each of these member variables like you do for the ones that follow. >+ >+ // InsnTimes - maps machine instructions to their "execute times" >+ std::map InsnTimes; >+ >+ // VRegReadTable - maps VRegs in a BB to the set of times they are read >+ DenseMap*, VRegKeyInfo> VRegReadTable; >+ >+ // StackSlotForVirtReg - Maps virtual regs to the frame index where these >+ // values are spilled. >+ std::map StackSlotForVirtReg; >+ >+ // Virt2PhysRegMap - This map contains entries for each virtual register >+ // that is currently available in a physical register. >+ IndexedMap Virt2PhysRegMap; > > Please make the above comments into doxygen comments by using /// >+ >+ unsigned &getVirt2PhysRegMapSlot(unsigned VirtReg) { >+ return Virt2PhysRegMap[VirtReg]; >+ } > > This is mis-placed. Please move this function down to where the other functions are located. >+ >+ // PhysRegsUsed - This array is effectively a map, containing entries for >+ // each physical register that currently has a value (ie, it is in >+ // Virt2PhysRegMap). The value mapped to is the virtual register >+ // corresponding to the physical register (the inverse of the >+ // Virt2PhysRegMap), or 0. The value is set to 0 if this register is pinned >+ // because it is used by a future instruction, and to -2 if it is not >+ // allocatable. If the entry for a physical register is -1, then the >+ // physical register is "not in the map". >+ // >+ std::vector PhysRegsUsed; >+ >+ // PhysRegsUseOrder - This contains a list of the physical registers that >+ // currently have a virtual register value in them. This list provides an >+ // ordering of registers, imposing a reallocation order. This list is only >+ // used if all registers are allocated and we have to spill one, in which >+ // case we spill the least recently used register. Entries at the front of >+ // the list are the least recently used registers, entries at the back are >+ // the most recently used. >+ // >+ std::vector PhysRegsUseOrder; >+ >+ // VirtRegModified - This bitset contains information about which virtual >+ // registers need to be spilled back to memory when their registers are >+ // scavenged. If a virtual register has simply been rematerialized, there >+ // is no reason to spill it to memory when we need the register back. >+ // >+ std::vector VirtRegModified; > > Again, these member vars need to use /// insead of // in order for these comments to be picked up by doxygen. >+ >+ void markVirtRegModified(unsigned Reg, bool Val = true) { > > This function needs a doxygen comment. >+ assert(MRegisterInfo::isVirtualRegister(Reg) && "Illegal VirtReg!"); >+ Reg -= MRegisterInfo::FirstVirtualRegister; >+ if (VirtRegModified.size() <= Reg) VirtRegModified.resize(Reg+1); > > It helps readability and debugging if you put the body of the if on a separate line from the condition. >+ VirtRegModified[Reg] = Val; >+ } >+ >+ bool isVirtRegModified(unsigned Reg) const { > > This function needs a doxygen comment >+ assert(MRegisterInfo::isVirtualRegister(Reg) && "Illegal VirtReg!"); >+ assert(Reg - MRegisterInfo::FirstVirtualRegister < VirtRegModified.size() >+ && "Illegal virtual register!"); >+ return VirtRegModified[Reg - MRegisterInfo::FirstVirtualRegister]; >+ } >+ >+ void MarkPhysRegRecentlyUsed(unsigned Reg) { > > This function needs a doxygen comment. >+ if (PhysRegsUseOrder.empty() || >+ PhysRegsUseOrder.back() == Reg) return; // Already most recently used >+ >+ for (unsigned i = PhysRegsUseOrder.size(); i != 0; --i) >+ if (areRegsEqual(Reg, PhysRegsUseOrder[i-1])) { >+ unsigned RegMatch = PhysRegsUseOrder[i-1]; // remove from middle >+ PhysRegsUseOrder.erase(PhysRegsUseOrder.begin()+i-1); >+ // Add it to the end of the list >+ PhysRegsUseOrder.push_back(RegMatch); >+ if (RegMatch == Reg) >+ return; // Found an exact match, exit early >+ } >+ } >+ >+ public: >+ virtual const char *getPassName() const { > > Doxygen comment. >+ return "BigBlock Register Allocator"; >+ } >+ >+ virtual void getAnalysisUsage(AnalysisUsage &AU) const { > > Doxygen comment. >+ AU.addRequired(); >+ AU.addRequiredID(PHIEliminationID); >+ AU.addRequiredID(TwoAddressInstructionPassID); >+ MachineFunctionPass::getAnalysisUsage(AU); >+ } >+ >+ private: >+ /// runOnMachineFunction - Register allocate the whole function > > This won't get picked up by doxygen. Either it needs to be 2 lines long or you need to use @brief >+ bool runOnMachineFunction(MachineFunction &Fn); >+ >+ /// AllocateBasicBlock - Register allocate the specified basic block. > > Same. >+ void AllocateBasicBlock(MachineBasicBlock &MBB); >+ >+ /// FillVRegReadTable - Fill out the table of vreg read times given a BB > > Same. >+ void FillVRegReadTable(MachineBasicBlock &MBB); >+ >+ /// areRegsEqual - This method returns true if the specified registers are >+ /// related to each other. To do this, it checks to see if they are equal >+ /// or if the first register is in the alias set of the second register. >+ /// >+ bool areRegsEqual(unsigned R1, unsigned R2) const { >+ if (R1 == R2) return true; >+ for (const unsigned *AliasSet = RegInfo->getAliasSet(R2); >+ *AliasSet; ++AliasSet) { >+ if (*AliasSet == R1) return true; >+ } >+ return false; >+ } >+ >+ /// getStackSpaceFor - This returns the frame index of the specified virtual >+ /// register on the stack, allocating space if necessary. >+ int getStackSpaceFor(unsigned VirtReg, const TargetRegisterClass *RC); >+ >+ /// removePhysReg - This method marks the specified physical register as no >+ /// longer being in use. >+ /// >+ void removePhysReg(unsigned PhysReg); >+ >+ /// spillVirtReg - This method spills the value specified by PhysReg into >+ /// the virtual register slot specified by VirtReg. It then updates the RA >+ /// data structures to indicate the fact that PhysReg is now available. >+ /// >+ void spillVirtReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, >+ unsigned VirtReg, unsigned PhysReg); >+ >+ /// spillPhysReg - This method spills the specified physical register into >+ /// the virtual register slot associated with it. If OnlyVirtRegs is set to >+ /// true, then the request is ignored if the physical register does not >+ /// contain a virtual register. >+ /// >+ void spillPhysReg(MachineBasicBlock &MBB, MachineInstr *I, >+ unsigned PhysReg, bool OnlyVirtRegs = false); >+ >+ /// assignVirtToPhysReg - This method updates local state so that we know >+ /// that PhysReg is the proper container for VirtReg now. The physical >+ /// register must not be used for anything else when this is called. >+ /// >+ void assignVirtToPhysReg(unsigned VirtReg, unsigned PhysReg); >+ >+ /// liberatePhysReg - Make sure the specified physical register is available >+ /// for use. If there is currently a value in it, it is either moved out of >+ /// the way or spilled to memory. >+ /// >+ void liberatePhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator &I, >+ unsigned PhysReg); >+ >+ /// isPhysRegAvailable - Return true if the specified physical register is >+ /// free and available for use. This also includes checking to see if >+ /// aliased registers are all free... >+ /// >+ bool isPhysRegAvailable(unsigned PhysReg) const; >+ >+ /// getFreeReg - Look to see if there is a free register available in the >+ /// specified register class. If not, return 0. >+ /// >+ unsigned getFreeReg(const TargetRegisterClass *RC); >+ >+ /// chooseReg - Pick a physical register to hold the specified >+ /// virtual register by choosing the one which will be read furthest >+ /// in the future. >+ /// >+ unsigned chooseReg(MachineBasicBlock &MBB, MachineInstr *MI, >+ unsigned VirtReg); >+ >+ /// reloadVirtReg - This method transforms the specified specified virtual >+ /// register use to refer to a physical register. This method may do this >+ /// in one of several ways: if the register is available in a physical >+ /// register already, it uses that physical register. If the value is not >+ /// in a physical register, and if there are physical registers available, >+ /// it loads it into a register. If register pressure is high, and it is >+ /// possible, it tries to fold the load of the virtual register into the >+ /// instruction itself. It avoids doing this if register pressure is low to >+ /// improve the chance that subsequent instructions can use the reloaded >+ /// value. This method returns the modified instruction. >+ /// >+ MachineInstr *reloadVirtReg(MachineBasicBlock &MBB, MachineInstr *MI, >+ unsigned OpNum); >+ >+ }; >+ char RABigBlock::ID = 0; >+ } >+ >+ /// getStackSpaceFor - This allocates space for the specified virtual register >+ /// to be held on the stack. >+ int RABigBlock::getStackSpaceFor(unsigned VirtReg, const TargetRegisterClass *RC) { >+ // Find the location Reg would belong... >+ std::map::iterator I =StackSlotForVirtReg.lower_bound(VirtReg); >+ >+ if (I != StackSlotForVirtReg.end() && I->first == VirtReg) >+ return I->second; // Already has space allocated? >+ >+ // Allocate a new stack object for this spill location... >+ int FrameIdx = MF->getFrameInfo()->CreateStackObject(RC->getSize(), >+ RC->getAlignment()); >+ >+ // Assign the slot... >+ StackSlotForVirtReg.insert(I, std::make_pair(VirtReg, FrameIdx)); >+ return FrameIdx; >+ } >+ >+ >+ /// removePhysReg - This method marks the specified physical register as no >+ /// longer being in use. >+ /// >+ void RABigBlock::removePhysReg(unsigned PhysReg) { >+ PhysRegsUsed[PhysReg] = -1; // PhyReg no longer used >+ >+ std::vector::iterator It = >+ std::find(PhysRegsUseOrder.begin(), PhysRegsUseOrder.end(), PhysReg); >+ if (It != PhysRegsUseOrder.end()) >+ PhysRegsUseOrder.erase(It); >+ } >+ >+ >+ /// spillVirtReg - This method spills the value specified by PhysReg into the >+ /// virtual register slot specified by VirtReg. It then updates the RA data >+ /// structures to indicate the fact that PhysReg is now available. >+ /// >+ void RABigBlock::spillVirtReg(MachineBasicBlock &MBB, >+ MachineBasicBlock::iterator I, >+ unsigned VirtReg, unsigned PhysReg) { >+ assert(VirtReg && "Spilling a physical register is illegal!" >+ " Must not have appropriate kill for the register or use exists beyond" >+ " the intended one."); >+ DOUT << " Spilling register " << RegInfo->getName(PhysReg) >+ << " containing %reg" << VirtReg; >+ if (!isVirtRegModified(VirtReg)) >+ DOUT << " which has not been modified, so no store necessary!"; >+ >+ // Otherwise, there is a virtual register corresponding to this physical >+ // register. We only need to spill it into its stack slot if it has been >+ // modified. >+ if (isVirtRegModified(VirtReg)) { >+ const TargetRegisterClass *RC = MF->getSSARegMap()->getRegClass(VirtReg); >+ int FrameIndex = getStackSpaceFor(VirtReg, RC); >+ DOUT << " to stack slot #" << FrameIndex; >+ RegInfo->storeRegToStackSlot(MBB, I, PhysReg, FrameIndex, RC); >+ ++NumStores; // Update statistics >+ } >+ >+ getVirt2PhysRegMapSlot(VirtReg) = 0; // VirtReg no longer available >+ >+ DOUT << "\n"; >+ removePhysReg(PhysReg); >+ } >+ >+ >+ /// spillPhysReg - This method spills the specified physical register into the >+ /// virtual register slot associated with it. If OnlyVirtRegs is set to true, >+ /// then the request is ignored if the physical register does not contain a >+ /// virtual register. >+ /// >+ void RABigBlock::spillPhysReg(MachineBasicBlock &MBB, MachineInstr *I, >+ unsigned PhysReg, bool OnlyVirtRegs) { >+ if (PhysRegsUsed[PhysReg] != -1) { // Only spill it if it's used! >+ assert(PhysRegsUsed[PhysReg] != -2 && "Non allocable reg used!"); >+ if (PhysRegsUsed[PhysReg] || !OnlyVirtRegs) >+ spillVirtReg(MBB, I, PhysRegsUsed[PhysReg], PhysReg); >+ } else { >+ // If the selected register aliases any other registers, we must make >+ // sure that one of the aliases isn't alive. >+ for (const unsigned *AliasSet = RegInfo->getAliasSet(PhysReg); >+ *AliasSet; ++AliasSet) >+ if (PhysRegsUsed[*AliasSet] != -1 && // Spill aliased register. >+ PhysRegsUsed[*AliasSet] != -2) // If allocatable. >+ if (PhysRegsUsed[*AliasSet] == 0) { >+ // This must have been a dead def due to something like this: >+ // %EAX := >+ // := op %AL >+ // No more use of %EAX, %AH, etc. >+ // %EAX isn't dead upon definition, but %AH is. However %AH isn't >+ // an operand of definition MI so it's not marked as such. >+ DOUT << " Register " << RegInfo->getName(*AliasSet) >+ << " [%reg" << *AliasSet >+ << "] is never used, removing it frame live list\n"; >+ removePhysReg(*AliasSet); >+ } else >+ spillVirtReg(MBB, I, PhysRegsUsed[*AliasSet], *AliasSet); >+ } >+ } >+ >+ >+ /// assignVirtToPhysReg - This method updates local state so that we know >+ /// that PhysReg is the proper container for VirtReg now. The physical >+ /// register must not be used for anything else when this is called. >+ /// >+ void RABigBlock::assignVirtToPhysReg(unsigned VirtReg, unsigned PhysReg) { >+ assert(PhysRegsUsed[PhysReg] == -1 && "Phys reg already assigned!"); >+ // Update information to note the fact that this register was just used, and >+ // it holds VirtReg. >+ PhysRegsUsed[PhysReg] = VirtReg; >+ getVirt2PhysRegMapSlot(VirtReg) = PhysReg; >+ PhysRegsUseOrder.push_back(PhysReg); // New use of PhysReg >+ } >+ >+ >+ /// isPhysRegAvailable - Return true if the specified physical register is free >+ /// and available for use. This also includes checking to see if aliased >+ /// registers are all free... >+ /// >+ bool RABigBlock::isPhysRegAvailable(unsigned PhysReg) const { >+ if (PhysRegsUsed[PhysReg] != -1) return false; >+ >+ // If the selected register aliases any other allocated registers, it is >+ // not free! >+ for (const unsigned *AliasSet = RegInfo->getAliasSet(PhysReg); >+ *AliasSet; ++AliasSet) >+ if (PhysRegsUsed[*AliasSet] != -1) // Aliased register in use? >+ return false; // Can't use this reg then. >+ return true; >+ } >+ >+ >+ //////// FIX THIS: > > /// is sufficient. Also, please use "FIXME" or "TODO" as they are more standard for searches and IDE highlighting. >+ /// getFreeReg - Look to see if there is a free register available in the >+ /// specified register class. If not, return 0. >+ /// >+ unsigned RABigBlock::getFreeReg(const TargetRegisterClass *RC) { >+ // Get iterators defining the range of registers that are valid to allocate in >+ // this class, which also specifies the preferred allocation order. >+ TargetRegisterClass::iterator RI = RC->allocation_order_begin(*MF); >+ TargetRegisterClass::iterator RE = RC->allocation_order_end(*MF); >+ >+ for (; RI != RE; ++RI) >+ if (isPhysRegAvailable(*RI)) { // Is reg unused? >+ assert(*RI != 0 && "Cannot use register!"); >+ return *RI; // Found an unused register! >+ } >+ return 0; >+ } >+ >+ >+ /// liberatePhysReg - Make sure the specified physical register is available for >+ /// use. If there is currently a value in it, it is either moved out of the way >+ /// or spilled to memory. >+ /// >+ void RABigBlock::liberatePhysReg(MachineBasicBlock &MBB, >+ MachineBasicBlock::iterator &I, >+ unsigned PhysReg) { >+ spillPhysReg(MBB, I, PhysReg); >+ } >+ >+ /// chooseReg - Pick a physical register to hold the specified >+ /// virtual register by choosing the one whose value will be read >+ /// furthest in the future. >+ /// >+ unsigned RABigBlock::chooseReg(MachineBasicBlock &MBB, MachineInstr *I, >+ unsigned VirtReg) { >+ const TargetRegisterClass *RC = MF->getSSARegMap()->getRegClass(VirtReg); >+ // First check to see if we have a free register of the requested type... >+ unsigned PhysReg = getFreeReg(RC); >+ >+ // If we didn't find an unused register, find the one which will be >+ // read at the most distant point in time. >+ if (PhysReg == 0) { >+ unsigned delay=0, longest_delay=0; >+ SmallVector *ReadTimes; >+ >+ unsigned curTime = InsnTimes[I]; >+ >+ // for all physical regs in the RC, >+ for(TargetRegisterClass::iterator pReg = RC->begin(); >+ pReg != RC->end(); ++pReg) { >+ // how long until they're read? >+ if(PhysRegsUsed[*pReg]>0) { // ignore non-allocatable regs >+ ReadTimes = VRegReadTable[PhysRegsUsed[*pReg]]; >+ SmallVector::iterator pt = >+ std::lower_bound(ReadTimes->begin(), >+ ReadTimes->end(), >+ curTime); > > >+ delay = *pt - curTime; >+ >+ if(delay > longest_delay) { >+ longest_delay = delay; >+ PhysReg = *pReg; >+ } >+ } >+ } >+ >+ assert(PhysReg && "couldn't grab a register from the table?"); >+ // TODO: assert that RC->contains(PhysReg) / handle aliased registers >+ >+ // since we needed to look in the table we need to spill this register. >+ spillPhysReg(MBB, I, PhysReg); >+ } >+ >+ // assign the vreg to our chosen physical register >+ assignVirtToPhysReg(VirtReg, PhysReg); >+ return PhysReg; // and return it >+ } >+ >+ >+ /// reloadVirtReg - This method transforms an instruction with a virtual >+ /// register use to one that references a physical register. It does this as >+ /// follows: >+ /// >+ /// 1) If the register is already in a physical register, it uses it. >+ /// 2) Otherwise, if there is a free physical register, it uses that. >+ /// 3) Otherwise, it calls chooseReg() to get the physical register >+ /// holding the most distantly needed value, generating a spill in >+ /// the process. >+ /// >+ /// This method returns the modified instruction. >+ MachineInstr *RABigBlock::reloadVirtReg(MachineBasicBlock &MBB, MachineInstr *MI, >+ unsigned OpNum) { >+ unsigned VirtReg = MI->getOperand(OpNum).getReg(); >+ >+ // If the virtual register is already available in a physical register, >+ // just update the instruction and return. >+ if (unsigned PR = getVirt2PhysRegMapSlot(VirtReg)) { >+ MI->getOperand(OpNum).setReg(PR); >+ return MI; >+ } >+ >+ // Otherwise, if we have free physical registers available to hold the >+ // value, use them. >+ const TargetRegisterClass *RC = MF->getSSARegMap()->getRegClass(VirtReg); >+ unsigned PhysReg = getFreeReg(RC); >+ int FrameIndex = getStackSpaceFor(VirtReg, RC); >+ >+ if (PhysReg) { // we have a free register, so use it. >+ assignVirtToPhysReg(VirtReg, PhysReg); >+ } else { // no free registers available. >+ // try to fold the spill into the instruction >+ if(MachineInstr* FMI = RegInfo->foldMemoryOperand(MI, OpNum, FrameIndex)) { >+ ++NumFolded; >+ // Since we changed the address of MI, make sure to update live variables >+ // to know that the new instruction has the properties of the old one. >+ LV->instructionChanged(MI, FMI); >+ return MBB.insert(MBB.erase(MI), FMI); >+ } >+ >+ // determine which of the physical registers we'll kill off, since we >+ // couldn't fold. >+ PhysReg = chooseReg(MBB, MI, VirtReg); >+ } >+ >+ // this virtual register is now unmodified (since we just reloaded it) >+ markVirtRegModified(VirtReg, false); >+ >+ DOUT << " Reloading %reg" << VirtReg << " into " >+ << RegInfo->getName(PhysReg) << "\n"; >+ >+ // Add move instruction(s) >+ RegInfo->loadRegFromStackSlot(MBB, MI, PhysReg, FrameIndex, RC); >+ ++NumLoads; // Update statistics >+ >+ MF->setPhysRegUsed(PhysReg); >+ MI->getOperand(OpNum).setReg(PhysReg); // Assign the input register >+ return MI; >+ } >+ >+ /// Fill out the vreg read timetable. Since ReadTime increases >+ /// monotonically, the individual readtime sets will be sorted >+ /// in ascending order. >+ void RABigBlock::FillVRegReadTable(MachineBasicBlock &MBB) { >+ // loop over each instruction >+ MachineBasicBlock::iterator MII; >+ unsigned ReadTime; >+ >+ for(ReadTime=0, MII = MBB.begin(); MII != MBB.end(); ++ReadTime, ++MII) { >+ MachineInstr *MI = MII; >+ >+ InsnTimes[MI] = ReadTime; >+ >+ for (unsigned i = 0; i != MI->getNumOperands(); ++i) { >+ MachineOperand& MO = MI->getOperand(i); >+ // look for vreg reads.. >+ if (MO.isRegister() && !MO.isDef() && MO.getReg() && >+ MRegisterInfo::isVirtualRegister(MO.getReg())) { >+ // ..and add them to the read table. >+ if(!VRegReadTable[MO.getReg()]) >+ VRegReadTable[MO.getReg()] = new SmallVector; >+ >+ VRegReadTable[MO.getReg()]->push_back(ReadTime); >+ } >+ } >+ >+ } >+ >+ } >+ >+ void RABigBlock::AllocateBasicBlock(MachineBasicBlock &MBB) { >+ // loop over each instruction >+ MachineBasicBlock::iterator MII = MBB.begin(); >+ const TargetInstrInfo &TII = *TM->getInstrInfo(); >+ >+ DEBUG(const BasicBlock *LBB = MBB.getBasicBlock(); >+ if (LBB) DOUT << "\nStarting RegAlloc of BB: " << LBB->getName()); >+ >+ // If this is the first basic block in the machine function, add live-in >+ // registers as active. >+ if (&MBB == &*MF->begin()) { >+ for (MachineFunction::livein_iterator I = MF->livein_begin(), >+ E = MF->livein_end(); I != E; ++I) { >+ unsigned Reg = I->first; >+ MF->setPhysRegUsed(Reg); >+ PhysRegsUsed[Reg] = 0; // It is free and reserved now >+ PhysRegsUseOrder.push_back(Reg); >+ for (const unsigned *AliasSet = RegInfo->getAliasSet(Reg); >+ *AliasSet; ++AliasSet) { >+ if (PhysRegsUsed[*AliasSet] != -2) { >+ PhysRegsUseOrder.push_back(*AliasSet); >+ PhysRegsUsed[*AliasSet] = 0; // It is free and reserved now >+ MF->setPhysRegUsed(*AliasSet); >+ } >+ } >+ } >+ } >+ >+ // Otherwise, sequentially allocate each instruction in the MBB. >+ while (MII != MBB.end()) { >+ MachineInstr *MI = MII++; >+ const TargetInstrDescriptor &TID = TII.get(MI->getOpcode()); >+ DEBUG(DOUT << "\nStarting RegAlloc of: " << *MI; >+ DOUT << " Regs have values: "; >+ for (unsigned i = 0; i != RegInfo->getNumRegs(); ++i) >+ if (PhysRegsUsed[i] != -1 && PhysRegsUsed[i] != -2) >+ DOUT << "[" << RegInfo->getName(i) >+ << ",%reg" << PhysRegsUsed[i] << "] "; >+ DOUT << "\n"); >+ >+ // Loop over the implicit uses, making sure that they are at the head of the >+ // use order list, so they don't get reallocated. >+ if (TID.ImplicitUses) { >+ for (const unsigned *ImplicitUses = TID.ImplicitUses; >+ *ImplicitUses; ++ImplicitUses) >+ MarkPhysRegRecentlyUsed(*ImplicitUses); >+ } >+ >+ SmallVector Kills; >+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { >+ MachineOperand& MO = MI->getOperand(i); >+ if (MO.isRegister() && MO.isKill()) >+ Kills.push_back(MO.getReg()); >+ } >+ >+ // Get the used operands into registers. This has the potential to spill >+ // incoming values if we are out of registers. Note that we completely >+ // ignore physical register uses here. We assume that if an explicit >+ // physical register is referenced by the instruction, that it is guaranteed >+ // to be live-in, or the input is badly hosed. >+ // >+ for (unsigned i = 0; i != MI->getNumOperands(); ++i) { >+ MachineOperand& MO = MI->getOperand(i); >+ // here we are looking for only used operands (never def&use) >+ if (MO.isRegister() && !MO.isDef() && MO.getReg() && !MO.isImplicit() && >+ MRegisterInfo::isVirtualRegister(MO.getReg())) >+ MI = reloadVirtReg(MBB, MI, i); >+ } >+ >+ // If this instruction is the last user of this register, kill the >+ // value, freeing the register being used, so it doesn't need to be >+ // spilled to memory. >+ // >+ for (unsigned i = 0, e = Kills.size(); i != e; ++i) { >+ unsigned VirtReg = Kills[i]; >+ unsigned PhysReg = VirtReg; >+ if (MRegisterInfo::isVirtualRegister(VirtReg)) { >+ // If the virtual register was never materialized into a register, it >+ // might not be in the map, but it won't hurt to zero it out anyway. >+ unsigned &PhysRegSlot = getVirt2PhysRegMapSlot(VirtReg); >+ PhysReg = PhysRegSlot; >+ PhysRegSlot = 0; >+ } else if (PhysRegsUsed[PhysReg] == -2) { >+ // Unallocatable register dead, ignore. >+ continue; >+ } >+ >+ if (PhysReg) { >+ DOUT << " Last use of " << RegInfo->getName(PhysReg) >+ << "[%reg" << VirtReg <<"], removing it from live set\n"; >+ removePhysReg(PhysReg); >+ for (const unsigned *AliasSet = RegInfo->getAliasSet(PhysReg); >+ *AliasSet; ++AliasSet) { >+ if (PhysRegsUsed[*AliasSet] != -2) { >+ DOUT << " Last use of " >+ << RegInfo->getName(*AliasSet) >+ << "[%reg" << VirtReg <<"], removing it from live set\n"; >+ removePhysReg(*AliasSet); >+ } >+ } >+ } >+ } >+ >+ // Loop over all of the operands of the instruction, spilling registers that >+ // are defined, and marking explicit destinations in the PhysRegsUsed map. >+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { >+ MachineOperand& MO = MI->getOperand(i); >+ if (MO.isRegister() && MO.isDef() && !MO.isImplicit() && MO.getReg() && >+ MRegisterInfo::isPhysicalRegister(MO.getReg())) { >+ unsigned Reg = MO.getReg(); >+ if (PhysRegsUsed[Reg] == -2) continue; // Something like ESP. >+ >+ MF->setPhysRegUsed(Reg); >+ spillPhysReg(MBB, MI, Reg, true); // Spill any existing value in reg >+ PhysRegsUsed[Reg] = 0; // It is free and reserved now >+ PhysRegsUseOrder.push_back(Reg); >+ for (const unsigned *AliasSet = RegInfo->getAliasSet(Reg); >+ *AliasSet; ++AliasSet) { >+ if (PhysRegsUsed[*AliasSet] != -2) { >+ PhysRegsUseOrder.push_back(*AliasSet); >+ PhysRegsUsed[*AliasSet] = 0; // It is free and reserved now >+ MF->setPhysRegUsed(*AliasSet); >+ } >+ } >+ } >+ } >+ >+ // Loop over the implicit defs, spilling them as well. >+ if (TID.ImplicitDefs) { >+ for (const unsigned *ImplicitDefs = TID.ImplicitDefs; >+ *ImplicitDefs; ++ImplicitDefs) { >+ unsigned Reg = *ImplicitDefs; >+ bool IsNonAllocatable = PhysRegsUsed[Reg] == -2; >+ if (!IsNonAllocatable) { >+ spillPhysReg(MBB, MI, Reg, true); >+ PhysRegsUseOrder.push_back(Reg); >+ PhysRegsUsed[Reg] = 0; // It is free and reserved now >+ } >+ MF->setPhysRegUsed(Reg); >+ >+ for (const unsigned *AliasSet = RegInfo->getAliasSet(Reg); >+ *AliasSet; ++AliasSet) { >+ if (PhysRegsUsed[*AliasSet] != -2) { >+ if (!IsNonAllocatable) { >+ PhysRegsUseOrder.push_back(*AliasSet); >+ PhysRegsUsed[*AliasSet] = 0; // It is free and reserved now >+ } >+ MF->setPhysRegUsed(*AliasSet); >+ } >+ } >+ } >+ } >+ >+ SmallVector DeadDefs; >+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { >+ MachineOperand& MO = MI->getOperand(i); >+ if (MO.isRegister() && MO.isDead()) >+ DeadDefs.push_back(MO.getReg()); >+ } >+ >+ // Okay, we have allocated all of the source operands and spilled any values >+ // that would be destroyed by defs of this instruction. Loop over the >+ // explicit defs and assign them to a register, spilling incoming values if >+ // we need to scavenge a register. >+ // >+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { >+ MachineOperand& MO = MI->getOperand(i); >+ if (MO.isRegister() && MO.isDef() && MO.getReg() && >+ MRegisterInfo::isVirtualRegister(MO.getReg())) { >+ unsigned DestVirtReg = MO.getReg(); >+ unsigned DestPhysReg; >+ >+ // If DestVirtReg already has a value, use it. >+ if (!(DestPhysReg = getVirt2PhysRegMapSlot(DestVirtReg))) >+ DestPhysReg = chooseReg(MBB, MI, DestVirtReg); >+ MF->setPhysRegUsed(DestPhysReg); >+ markVirtRegModified(DestVirtReg); >+ MI->getOperand(i).setReg(DestPhysReg); // Assign the output register >+ } >+ } >+ >+ // If this instruction defines any registers that are immediately dead, >+ // kill them now. >+ // >+ for (unsigned i = 0, e = DeadDefs.size(); i != e; ++i) { >+ unsigned VirtReg = DeadDefs[i]; >+ unsigned PhysReg = VirtReg; >+ if (MRegisterInfo::isVirtualRegister(VirtReg)) { >+ unsigned &PhysRegSlot = getVirt2PhysRegMapSlot(VirtReg); >+ PhysReg = PhysRegSlot; >+ assert(PhysReg != 0); >+ PhysRegSlot = 0; >+ } else if (PhysRegsUsed[PhysReg] == -2) { >+ // Unallocatable register dead, ignore. >+ continue; >+ } >+ >+ if (PhysReg) { >+ DOUT << " Register " << RegInfo->getName(PhysReg) >+ << " [%reg" << VirtReg >+ << "] is never used, removing it frame live list\n"; >+ removePhysReg(PhysReg); >+ for (const unsigned *AliasSet = RegInfo->getAliasSet(PhysReg); >+ *AliasSet; ++AliasSet) { >+ if (PhysRegsUsed[*AliasSet] != -2) { >+ DOUT << " Register " << RegInfo->getName(*AliasSet) >+ << " [%reg" << *AliasSet >+ << "] is never used, removing it frame live list\n"; >+ removePhysReg(*AliasSet); >+ } >+ } >+ } >+ } >+ >+ // Finally, if this is a noop copy instruction, zap it. >+ unsigned SrcReg, DstReg; >+ if (TII.isMoveInstr(*MI, SrcReg, DstReg) && SrcReg == DstReg) { >+ LV->removeVirtualRegistersKilled(MI); >+ LV->removeVirtualRegistersDead(MI); >+ MBB.erase(MI); >+ } >+ } >+ >+ MachineBasicBlock::iterator MI = MBB.getFirstTerminator(); >+ >+ // Spill all physical registers holding virtual registers now. >+ for (unsigned i = 0, e = RegInfo->getNumRegs(); i != e; ++i) >+ if (PhysRegsUsed[i] != -1 && PhysRegsUsed[i] != -2) >+ if (unsigned VirtReg = PhysRegsUsed[i]) >+ spillVirtReg(MBB, MI, VirtReg, i); >+ else >+ removePhysReg(i); >+ >+ #if 0 >+ // This checking code is very expensive. >+ bool AllOk = true; >+ for (unsigned i = MRegisterInfo::FirstVirtualRegister, >+ e = MF->getSSARegMap()->getLastVirtReg(); i <= e; ++i) >+ if (unsigned PR = Virt2PhysRegMap[i]) { >+ cerr << "Register still mapped: " << i << " -> " << PR << "\n"; >+ AllOk = false; >+ } >+ assert(AllOk && "Virtual registers still in phys regs?"); >+ #endif > > Really need to keep this #if 0 section? >+ >+ // Clear any physical register which appear live at the end of the basic >+ // block, but which do not hold any virtual registers. e.g., the stack >+ // pointer. >+ PhysRegsUseOrder.clear(); >+ } >+ >+ /// runOnMachineFunction - Register allocate the whole function >+ /// >+ bool RABigBlock::runOnMachineFunction(MachineFunction &Fn) { >+ DOUT << "Machine Function " << "\n"; >+ MF = &Fn; >+ TM = &Fn.getTarget(); >+ RegInfo = TM->getRegisterInfo(); >+ LV = &getAnalysis(); >+ >+ PhysRegsUsed.assign(RegInfo->getNumRegs(), -1); >+ >+ // At various places we want to efficiently check to see whether a register >+ // is allocatable. To handle this, we mark all unallocatable registers as >+ // being pinned down, permanently. >+ { >+ BitVector Allocable = RegInfo->getAllocatableSet(Fn); >+ for (unsigned i = 0, e = Allocable.size(); i != e; ++i) >+ if (!Allocable[i]) >+ PhysRegsUsed[i] = -2; // Mark the reg unallocable. >+ } >+ >+ // initialize the virtual->physical register map to have a 'null' >+ // mapping for all virtual registers >+ Virt2PhysRegMap.grow(MF->getSSARegMap()->getLastVirtReg()); >+ >+ // Loop over all of the basic blocks, eliminating virtual register references >+ for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end(); >+ MBB != MBBe; ++MBB) { >+ // fill out the read timetable >+ FillVRegReadTable(*MBB); >+ // use it to allocate the BB >+ AllocateBasicBlock(*MBB); >+ // clear it >+ VRegReadTable.clear(); >+ } >+ >+ StackSlotForVirtReg.clear(); >+ PhysRegsUsed.clear(); >+ VirtRegModified.clear(); >+ Virt2PhysRegMap.clear(); >+ return true; >+ } >+ >+ FunctionPass *llvm::createBigBlockRegisterAllocator() { >+ return new RABigBlock(); >+ } >+ > > > >_______________________________________________ >llvm-commits mailing list >llvm-commits at cs.uiuc.edu >http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits > > > From djg at cray.com Fri Jun 22 09:59:42 2007 From: djg at cray.com (Dan Gohman) Date: Fri, 22 Jun 2007 09:59:42 -0500 Subject: [llvm-commits] CVS: llvm/lib/Target/Alpha/AlphaISelDAGToDAG.cpp Message-ID: <200706221459.l5MExgg1028677@zion.cs.uiuc.edu> Changes in directory llvm/lib/Target/Alpha: AlphaISelDAGToDAG.cpp updated: 1.70 -> 1.71 --- Log message: Move ComputeMaskedBits, MaskedValueIsZero, and ComputeNumSignBits from TargetLowering to SelectionDAG so that they have more convenient access to the current DAG, in preparation for the ValueType routines being changed from standalone functions to members of SelectionDAG for the pre-legalize vector type changes. --- Diffs of the changes: (+1 -1) AlphaISelDAGToDAG.cpp | 2 +- 1 files changed, 1 insertion(+), 1 deletion(-) Index: llvm/lib/Target/Alpha/AlphaISelDAGToDAG.cpp diff -u llvm/lib/Target/Alpha/AlphaISelDAGToDAG.cpp:1.70 llvm/lib/Target/Alpha/AlphaISelDAGToDAG.cpp:1.71 --- llvm/lib/Target/Alpha/AlphaISelDAGToDAG.cpp:1.70 Tue Jun 19 09:13:56 2007 +++ llvm/lib/Target/Alpha/AlphaISelDAGToDAG.cpp Fri Jun 22 09:59:07 2007 @@ -90,7 +90,7 @@ // see if the missing bits (0x1000) are already known zero if not, the zap // isn't okay to do, as it won't clear all the required bits. if (BitsToCheck && - !getTargetLowering().MaskedValueIsZero(LHS, BitsToCheck)) + !CurDAG->MaskedValueIsZero(LHS, BitsToCheck)) return 0; return Result; From djg at cray.com Fri Jun 22 09:59:45 2007 From: djg at cray.com (Dan Gohman) Date: Fri, 22 Jun 2007 09:59:45 -0500 Subject: [llvm-commits] CVS: llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp LegalizeDAG.cpp SelectionDAG.cpp SelectionDAGISel.cpp TargetLowering.cpp Message-ID: <200706221459.l5MExji1028691@zion.cs.uiuc.edu> Changes in directory llvm/lib/CodeGen/SelectionDAG: DAGCombiner.cpp updated: 1.314 -> 1.315 LegalizeDAG.cpp updated: 1.499 -> 1.500 SelectionDAG.cpp updated: 1.408 -> 1.409 SelectionDAGISel.cpp updated: 1.467 -> 1.468 TargetLowering.cpp updated: 1.121 -> 1.122 --- Log message: Move ComputeMaskedBits, MaskedValueIsZero, and ComputeNumSignBits from TargetLowering to SelectionDAG so that they have more convenient access to the current DAG, in preparation for the ValueType routines being changed from standalone functions to members of SelectionDAG for the pre-legalize vector type changes. --- Diffs of the changes: (+582 -581) DAGCombiner.cpp | 46 ++-- LegalizeDAG.cpp | 6 SelectionDAG.cpp | 546 +++++++++++++++++++++++++++++++++++++++++++++++++ SelectionDAGISel.cpp | 4 TargetLowering.cpp | 561 --------------------------------------------------- 5 files changed, 582 insertions(+), 581 deletions(-) Index: llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp diff -u llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp:1.314 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp:1.315 --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp:1.314 Thu Jun 21 02:39:16 2007 +++ llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Fri Jun 22 09:59:07 2007 @@ -896,9 +896,9 @@ uint64_t LHSZero, LHSOne; uint64_t RHSZero, RHSOne; uint64_t Mask = MVT::getIntVTBitMask(VT); - TLI.ComputeMaskedBits(N0, Mask, LHSZero, LHSOne); + DAG.ComputeMaskedBits(N0, Mask, LHSZero, LHSOne); if (LHSZero) { - TLI.ComputeMaskedBits(N1, Mask, RHSZero, RHSOne); + DAG.ComputeMaskedBits(N1, Mask, RHSZero, RHSOne); // If all possibly-set bits on the LHS are clear on the RHS, return an OR. // If all possibly-set bits on the RHS are clear on the LHS, return an OR. @@ -957,9 +957,9 @@ uint64_t LHSZero, LHSOne; uint64_t RHSZero, RHSOne; uint64_t Mask = MVT::getIntVTBitMask(VT); - TLI.ComputeMaskedBits(N0, Mask, LHSZero, LHSOne); + DAG.ComputeMaskedBits(N0, Mask, LHSZero, LHSOne); if (LHSZero) { - TLI.ComputeMaskedBits(N1, Mask, RHSZero, RHSOne); + DAG.ComputeMaskedBits(N1, Mask, RHSZero, RHSOne); // If all possibly-set bits on the LHS are clear on the RHS, return an OR. // If all possibly-set bits on the RHS are clear on the LHS, return an OR. @@ -1120,8 +1120,8 @@ // If we know the sign bits of both operands are zero, strength reduce to a // udiv instead. Handles (X&15) /s 4 -> X&15 >> 2 uint64_t SignBit = 1ULL << (MVT::getSizeInBits(VT)-1); - if (TLI.MaskedValueIsZero(N1, SignBit) && - TLI.MaskedValueIsZero(N0, SignBit)) + if (DAG.MaskedValueIsZero(N1, SignBit) && + DAG.MaskedValueIsZero(N0, SignBit)) return DAG.getNode(ISD::UDIV, N1.getValueType(), N0, N1); // fold (sdiv X, pow2) -> simple ops after legalize if (N1C && N1C->getValue() && !TLI.isIntDivCheap() && @@ -1214,8 +1214,8 @@ // If we know the sign bits of both operands are zero, strength reduce to a // urem instead. Handles (X & 0x0FFFFFFF) %s 16 -> X&15 uint64_t SignBit = 1ULL << (MVT::getSizeInBits(VT)-1); - if (TLI.MaskedValueIsZero(N1, SignBit) && - TLI.MaskedValueIsZero(N0, SignBit)) + if (DAG.MaskedValueIsZero(N1, SignBit) && + DAG.MaskedValueIsZero(N0, SignBit)) return DAG.getNode(ISD::UREM, VT, N0, N1); // Unconditionally lower X%C -> X-X/C*C. This allows the X/C logic to hack on @@ -1357,7 +1357,7 @@ if (N1C && N1C->isAllOnesValue()) return N0; // if (and x, c) is known to be zero, return 0 - if (N1C && TLI.MaskedValueIsZero(SDOperand(N, 0), MVT::getIntVTBitMask(VT))) + if (N1C && DAG.MaskedValueIsZero(SDOperand(N, 0), MVT::getIntVTBitMask(VT))) return DAG.getConstant(0, VT); // reassociate and SDOperand RAND = ReassociateOps(ISD::AND, N0, N1); @@ -1371,7 +1371,7 @@ // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits. if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) { unsigned InMask = MVT::getIntVTBitMask(N0.getOperand(0).getValueType()); - if (TLI.MaskedValueIsZero(N0.getOperand(0), + if (DAG.MaskedValueIsZero(N0.getOperand(0), ~N1C->getValue() & InMask)) { SDOperand Zext = DAG.getNode(ISD::ZERO_EXTEND, N0.getValueType(), N0.getOperand(0)); @@ -1442,7 +1442,7 @@ MVT::ValueType EVT = LN0->getLoadedVT(); // If we zero all the possible extended bits, then we can turn this into // a zextload if we are running before legalize or the operation is legal. - if (TLI.MaskedValueIsZero(N1, ~0ULL << MVT::getSizeInBits(EVT)) && + if (DAG.MaskedValueIsZero(N1, ~0ULL << MVT::getSizeInBits(EVT)) && (!AfterLegalize || TLI.isLoadXLegal(ISD::ZEXTLOAD, EVT))) { SDOperand ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, VT, LN0->getChain(), LN0->getBasePtr(), LN0->getSrcValue(), @@ -1461,7 +1461,7 @@ MVT::ValueType EVT = LN0->getLoadedVT(); // If we zero all the possible extended bits, then we can turn this into // a zextload if we are running before legalize or the operation is legal. - if (TLI.MaskedValueIsZero(N1, ~0ULL << MVT::getSizeInBits(EVT)) && + if (DAG.MaskedValueIsZero(N1, ~0ULL << MVT::getSizeInBits(EVT)) && (!AfterLegalize || TLI.isLoadXLegal(ISD::ZEXTLOAD, EVT))) { SDOperand ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, VT, LN0->getChain(), LN0->getBasePtr(), LN0->getSrcValue(), @@ -1542,7 +1542,7 @@ return N1; // fold (or x, c) -> c iff (x & ~c) == 0 if (N1C && - TLI.MaskedValueIsZero(N0,~N1C->getValue() & (~0ULL>>(64-OpSizeInBits)))) + DAG.MaskedValueIsZero(N0,~N1C->getValue() & (~0ULL>>(64-OpSizeInBits)))) return N1; // reassociate or SDOperand ROR = ReassociateOps(ISD::OR, N0, N1); @@ -1611,8 +1611,8 @@ uint64_t LHSMask = cast(N0.getOperand(1))->getValue(); uint64_t RHSMask = cast(N1.getOperand(1))->getValue(); - if (TLI.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) && - TLI.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) { + if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) && + DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) { SDOperand X =DAG.getNode(ISD::OR, VT, N0.getOperand(0), N1.getOperand(0)); return DAG.getNode(ISD::AND, VT, X, DAG.getConstant(LHSMask|RHSMask, VT)); } @@ -1914,7 +1914,7 @@ if (N1C && N1C->isNullValue()) return N0; // if (shl x, c) is known to be zero, return 0 - if (TLI.MaskedValueIsZero(SDOperand(N, 0), MVT::getIntVTBitMask(VT))) + if (DAG.MaskedValueIsZero(SDOperand(N, 0), MVT::getIntVTBitMask(VT))) return DAG.getConstant(0, VT); if (N1C && SimplifyDemandedBits(SDOperand(N, 0))) return SDOperand(N, 0); @@ -2005,7 +2005,7 @@ // If the sign bit is known to be zero, switch this to a SRL. - if (TLI.MaskedValueIsZero(N0, MVT::getIntVTSignBit(VT))) + if (DAG.MaskedValueIsZero(N0, MVT::getIntVTSignBit(VT))) return DAG.getNode(ISD::SRL, VT, N0, N1); return SDOperand(); } @@ -2031,7 +2031,7 @@ if (N1C && N1C->isNullValue()) return N0; // if (srl x, c) is known to be zero, return 0 - if (N1C && TLI.MaskedValueIsZero(SDOperand(N, 0), ~0ULL >> (64-OpSizeInBits))) + if (N1C && DAG.MaskedValueIsZero(SDOperand(N, 0), ~0ULL >> (64-OpSizeInBits))) return DAG.getConstant(0, VT); // fold (srl (srl x, c1), c2) -> 0 or (srl x, c1+c2) @@ -2068,7 +2068,7 @@ if (N1C && N0.getOpcode() == ISD::CTLZ && N1C->getValue() == Log2_32(MVT::getSizeInBits(VT))) { uint64_t KnownZero, KnownOne, Mask = MVT::getIntVTBitMask(VT); - TLI.ComputeMaskedBits(N0.getOperand(0), Mask, KnownZero, KnownOne); + DAG.ComputeMaskedBits(N0.getOperand(0), Mask, KnownZero, KnownOne); // If any of the input bits are KnownOne, then the input couldn't be all // zeros, thus the result of the srl will always be zero. @@ -2270,7 +2270,7 @@ unsigned OpBits = MVT::getSizeInBits(Op.getValueType()); unsigned MidBits = MVT::getSizeInBits(N0.getValueType()); unsigned DestBits = MVT::getSizeInBits(VT); - unsigned NumSignBits = TLI.ComputeNumSignBits(Op); + unsigned NumSignBits = DAG.ComputeNumSignBits(Op); if (OpBits == DestBits) { // Op is i32, Mid is i8, and Dest is i32. If Op has more than 24 sign @@ -2634,7 +2634,7 @@ return DAG.getNode(ISD::SIGN_EXTEND_INREG, VT, N0, N1); // If the input is already sign extended, just drop the extension. - if (TLI.ComputeNumSignBits(N0) >= MVT::getSizeInBits(VT)-EVTBits+1) + if (DAG.ComputeNumSignBits(N0) >= MVT::getSizeInBits(VT)-EVTBits+1) return N0; // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2 @@ -2644,7 +2644,7 @@ } // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero. - if (TLI.MaskedValueIsZero(N0, 1ULL << (EVTBits-1))) + if (DAG.MaskedValueIsZero(N0, 1ULL << (EVTBits-1))) return DAG.getZeroExtendInReg(N0, EVT); // fold operands of sext_in_reg based on knowledge that the top bits are not @@ -2666,7 +2666,7 @@ if (ShAmt->getValue()+EVTBits <= MVT::getSizeInBits(VT)) { // We can turn this into an SRA iff the input to the SRL is already sign // extended enough. - unsigned InSignBits = TLI.ComputeNumSignBits(N0.getOperand(0)); + unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0)); if (MVT::getSizeInBits(VT)-(ShAmt->getValue()+EVTBits) < InSignBits) return DAG.getNode(ISD::SRA, VT, N0.getOperand(0), N0.getOperand(1)); } Index: llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp diff -u llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp:1.499 llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp:1.500 --- llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp:1.499 Thu Jun 21 09:48:26 2007 +++ llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp Fri Jun 22 09:59:07 2007 @@ -1449,7 +1449,7 @@ // The top bits of the promoted condition are not necessarily zero, ensure // that the value is properly zero extended. - if (!TLI.MaskedValueIsZero(Tmp2, + if (!DAG.MaskedValueIsZero(Tmp2, MVT::getIntVTBitMask(Tmp2.getValueType())^1)) Tmp2 = DAG.getZeroExtendInReg(Tmp2, MVT::i1); break; @@ -2041,7 +2041,7 @@ case Promote: Tmp1 = PromoteOp(Node->getOperand(0)); // Promote the condition. // Make sure the condition is either zero or one. - if (!TLI.MaskedValueIsZero(Tmp1, + if (!DAG.MaskedValueIsZero(Tmp1, MVT::getIntVTBitMask(Tmp1.getValueType())^1)) Tmp1 = DAG.getZeroExtendInReg(Tmp1, MVT::i1); break; @@ -4209,7 +4209,7 @@ // Okay, the shift amount isn't constant. However, if we can tell that it is // >= 32 or < 32, we can still simplify it, without knowing the actual value. uint64_t Mask = NVTBits, KnownZero, KnownOne; - TLI.ComputeMaskedBits(Amt, Mask, KnownZero, KnownOne); + DAG.ComputeMaskedBits(Amt, Mask, KnownZero, KnownOne); // If we know that the high bit of the shift amount is one, then we can do // this as a couple of simple shifts. Index: llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp diff -u llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp:1.408 llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp:1.409 --- llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp:1.408 Tue Jun 19 09:13:56 2007 +++ llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp Fri Jun 22 09:59:07 2007 @@ -936,6 +936,552 @@ return SDOperand(); } +/// MaskedValueIsZero - Return true if 'V & Mask' is known to be zero. We use +/// this predicate to simplify operations downstream. Mask is known to be zero +/// for bits that V cannot have. +bool SelectionDAG::MaskedValueIsZero(SDOperand Op, uint64_t Mask, + unsigned Depth) const { + // The masks are not wide enough to represent this type! Should use APInt. + if (Op.getValueType() == MVT::i128) + return false; + + uint64_t KnownZero, KnownOne; + ComputeMaskedBits(Op, Mask, KnownZero, KnownOne, Depth); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + return (KnownZero & Mask) == Mask; +} + +/// ComputeMaskedBits - Determine which of the bits specified in Mask are +/// known to be either zero or one and return them in the KnownZero/KnownOne +/// bitsets. This code only analyzes bits in Mask, in order to short-circuit +/// processing. +void SelectionDAG::ComputeMaskedBits(SDOperand Op, uint64_t Mask, + uint64_t &KnownZero, uint64_t &KnownOne, + unsigned Depth) const { + KnownZero = KnownOne = 0; // Don't know anything. + if (Depth == 6 || Mask == 0) + return; // Limit search depth. + + // The masks are not wide enough to represent this type! Should use APInt. + if (Op.getValueType() == MVT::i128) + return; + + uint64_t KnownZero2, KnownOne2; + + switch (Op.getOpcode()) { + case ISD::Constant: + // We know all of the bits for a constant! + KnownOne = cast(Op)->getValue() & Mask; + KnownZero = ~KnownOne & Mask; + return; + case ISD::AND: + // If either the LHS or the RHS are Zero, the result is zero. + ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1); + Mask &= ~KnownZero; + ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero2, KnownOne2, Depth+1); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + + // Output known-1 bits are only known if set in both the LHS & RHS. + KnownOne &= KnownOne2; + // Output known-0 are known to be clear if zero in either the LHS | RHS. + KnownZero |= KnownZero2; + return; + case ISD::OR: + ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1); + Mask &= ~KnownOne; + ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero2, KnownOne2, Depth+1); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + + // Output known-0 bits are only known if clear in both the LHS & RHS. + KnownZero &= KnownZero2; + // Output known-1 are known to be set if set in either the LHS | RHS. + KnownOne |= KnownOne2; + return; + case ISD::XOR: { + ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1); + ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero2, KnownOne2, Depth+1); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + + // Output known-0 bits are known if clear or set in both the LHS & RHS. + uint64_t KnownZeroOut = (KnownZero & KnownZero2) | (KnownOne & KnownOne2); + // Output known-1 are known to be set if set in only one of the LHS, RHS. + KnownOne = (KnownZero & KnownOne2) | (KnownOne & KnownZero2); + KnownZero = KnownZeroOut; + return; + } + case ISD::SELECT: + ComputeMaskedBits(Op.getOperand(2), Mask, KnownZero, KnownOne, Depth+1); + ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero2, KnownOne2, Depth+1); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + + // Only known if known in both the LHS and RHS. + KnownOne &= KnownOne2; + KnownZero &= KnownZero2; + return; + case ISD::SELECT_CC: + ComputeMaskedBits(Op.getOperand(3), Mask, KnownZero, KnownOne, Depth+1); + ComputeMaskedBits(Op.getOperand(2), Mask, KnownZero2, KnownOne2, Depth+1); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + + // Only known if known in both the LHS and RHS. + KnownOne &= KnownOne2; + KnownZero &= KnownZero2; + return; + case ISD::SETCC: + // If we know the result of a setcc has the top bits zero, use this info. + if (TLI.getSetCCResultContents() == TargetLowering::ZeroOrOneSetCCResult) + KnownZero |= (MVT::getIntVTBitMask(Op.getValueType()) ^ 1ULL); + return; + case ISD::SHL: + // (shl X, C1) & C2 == 0 iff (X & C2 >>u C1) == 0 + if (ConstantSDNode *SA = dyn_cast(Op.getOperand(1))) { + ComputeMaskedBits(Op.getOperand(0), Mask >> SA->getValue(), + KnownZero, KnownOne, Depth+1); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + KnownZero <<= SA->getValue(); + KnownOne <<= SA->getValue(); + KnownZero |= (1ULL << SA->getValue())-1; // low bits known zero. + } + return; + case ISD::SRL: + // (ushr X, C1) & C2 == 0 iff (-1 >> C1) & C2 == 0 + if (ConstantSDNode *SA = dyn_cast(Op.getOperand(1))) { + MVT::ValueType VT = Op.getValueType(); + unsigned ShAmt = SA->getValue(); + + uint64_t TypeMask = MVT::getIntVTBitMask(VT); + ComputeMaskedBits(Op.getOperand(0), (Mask << ShAmt) & TypeMask, + KnownZero, KnownOne, Depth+1); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + KnownZero &= TypeMask; + KnownOne &= TypeMask; + KnownZero >>= ShAmt; + KnownOne >>= ShAmt; + + uint64_t HighBits = (1ULL << ShAmt)-1; + HighBits <<= MVT::getSizeInBits(VT)-ShAmt; + KnownZero |= HighBits; // High bits known zero. + } + return; + case ISD::SRA: + if (ConstantSDNode *SA = dyn_cast(Op.getOperand(1))) { + MVT::ValueType VT = Op.getValueType(); + unsigned ShAmt = SA->getValue(); + + // Compute the new bits that are at the top now. + uint64_t TypeMask = MVT::getIntVTBitMask(VT); + + uint64_t InDemandedMask = (Mask << ShAmt) & TypeMask; + // If any of the demanded bits are produced by the sign extension, we also + // demand the input sign bit. + uint64_t HighBits = (1ULL << ShAmt)-1; + HighBits <<= MVT::getSizeInBits(VT) - ShAmt; + if (HighBits & Mask) + InDemandedMask |= MVT::getIntVTSignBit(VT); + + ComputeMaskedBits(Op.getOperand(0), InDemandedMask, KnownZero, KnownOne, + Depth+1); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + KnownZero &= TypeMask; + KnownOne &= TypeMask; + KnownZero >>= ShAmt; + KnownOne >>= ShAmt; + + // Handle the sign bits. + uint64_t SignBit = MVT::getIntVTSignBit(VT); + SignBit >>= ShAmt; // Adjust to where it is now in the mask. + + if (KnownZero & SignBit) { + KnownZero |= HighBits; // New bits are known zero. + } else if (KnownOne & SignBit) { + KnownOne |= HighBits; // New bits are known one. + } + } + return; + case ISD::SIGN_EXTEND_INREG: { + MVT::ValueType EVT = cast(Op.getOperand(1))->getVT(); + + // Sign extension. Compute the demanded bits in the result that are not + // present in the input. + uint64_t NewBits = ~MVT::getIntVTBitMask(EVT) & Mask; + + uint64_t InSignBit = MVT::getIntVTSignBit(EVT); + int64_t InputDemandedBits = Mask & MVT::getIntVTBitMask(EVT); + + // If the sign extended bits are demanded, we know that the sign + // bit is demanded. + if (NewBits) + InputDemandedBits |= InSignBit; + + ComputeMaskedBits(Op.getOperand(0), InputDemandedBits, + KnownZero, KnownOne, Depth+1); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + + // If the sign bit of the input is known set or clear, then we know the + // top bits of the result. + if (KnownZero & InSignBit) { // Input sign bit known clear + KnownZero |= NewBits; + KnownOne &= ~NewBits; + } else if (KnownOne & InSignBit) { // Input sign bit known set + KnownOne |= NewBits; + KnownZero &= ~NewBits; + } else { // Input sign bit unknown + KnownZero &= ~NewBits; + KnownOne &= ~NewBits; + } + return; + } + case ISD::CTTZ: + case ISD::CTLZ: + case ISD::CTPOP: { + MVT::ValueType VT = Op.getValueType(); + unsigned LowBits = Log2_32(MVT::getSizeInBits(VT))+1; + KnownZero = ~((1ULL << LowBits)-1) & MVT::getIntVTBitMask(VT); + KnownOne = 0; + return; + } + case ISD::LOAD: { + if (ISD::isZEXTLoad(Op.Val)) { + LoadSDNode *LD = cast(Op); + MVT::ValueType VT = LD->getLoadedVT(); + KnownZero |= ~MVT::getIntVTBitMask(VT) & Mask; + } + return; + } + case ISD::ZERO_EXTEND: { + uint64_t InMask = MVT::getIntVTBitMask(Op.getOperand(0).getValueType()); + uint64_t NewBits = (~InMask) & Mask; + ComputeMaskedBits(Op.getOperand(0), Mask & InMask, KnownZero, + KnownOne, Depth+1); + KnownZero |= NewBits & Mask; + KnownOne &= ~NewBits; + return; + } + case ISD::SIGN_EXTEND: { + MVT::ValueType InVT = Op.getOperand(0).getValueType(); + unsigned InBits = MVT::getSizeInBits(InVT); + uint64_t InMask = MVT::getIntVTBitMask(InVT); + uint64_t InSignBit = 1ULL << (InBits-1); + uint64_t NewBits = (~InMask) & Mask; + uint64_t InDemandedBits = Mask & InMask; + + // If any of the sign extended bits are demanded, we know that the sign + // bit is demanded. + if (NewBits & Mask) + InDemandedBits |= InSignBit; + + ComputeMaskedBits(Op.getOperand(0), InDemandedBits, KnownZero, + KnownOne, Depth+1); + // If the sign bit is known zero or one, the top bits match. + if (KnownZero & InSignBit) { + KnownZero |= NewBits; + KnownOne &= ~NewBits; + } else if (KnownOne & InSignBit) { + KnownOne |= NewBits; + KnownZero &= ~NewBits; + } else { // Otherwise, top bits aren't known. + KnownOne &= ~NewBits; + KnownZero &= ~NewBits; + } + return; + } + case ISD::ANY_EXTEND: { + MVT::ValueType VT = Op.getOperand(0).getValueType(); + ComputeMaskedBits(Op.getOperand(0), Mask & MVT::getIntVTBitMask(VT), + KnownZero, KnownOne, Depth+1); + return; + } + case ISD::TRUNCATE: { + ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero, KnownOne, Depth+1); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + uint64_t OutMask = MVT::getIntVTBitMask(Op.getValueType()); + KnownZero &= OutMask; + KnownOne &= OutMask; + break; + } + case ISD::AssertZext: { + MVT::ValueType VT = cast(Op.getOperand(1))->getVT(); + uint64_t InMask = MVT::getIntVTBitMask(VT); + ComputeMaskedBits(Op.getOperand(0), Mask & InMask, KnownZero, + KnownOne, Depth+1); + KnownZero |= (~InMask) & Mask; + return; + } + case ISD::ADD: { + // If either the LHS or the RHS are Zero, the result is zero. + ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1); + ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero2, KnownOne2, Depth+1); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + + // Output known-0 bits are known if clear or set in both the low clear bits + // common to both LHS & RHS. For example, 8+(X<<3) is known to have the + // low 3 bits clear. + uint64_t KnownZeroOut = std::min(CountTrailingZeros_64(~KnownZero), + CountTrailingZeros_64(~KnownZero2)); + + KnownZero = (1ULL << KnownZeroOut) - 1; + KnownOne = 0; + return; + } + case ISD::SUB: { + ConstantSDNode *CLHS = dyn_cast(Op.getOperand(0)); + if (!CLHS) return; + + // We know that the top bits of C-X are clear if X contains less bits + // than C (i.e. no wrap-around can happen). For example, 20-X is + // positive if we can prove that X is >= 0 and < 16. + MVT::ValueType VT = CLHS->getValueType(0); + if ((CLHS->getValue() & MVT::getIntVTSignBit(VT)) == 0) { // sign bit clear + unsigned NLZ = CountLeadingZeros_64(CLHS->getValue()+1); + uint64_t MaskV = (1ULL << (63-NLZ))-1; // NLZ can't be 64 with no sign bit + MaskV = ~MaskV & MVT::getIntVTBitMask(VT); + ComputeMaskedBits(Op.getOperand(1), MaskV, KnownZero, KnownOne, Depth+1); + + // If all of the MaskV bits are known to be zero, then we know the output + // top bits are zero, because we now know that the output is from [0-C]. + if ((KnownZero & MaskV) == MaskV) { + unsigned NLZ2 = CountLeadingZeros_64(CLHS->getValue()); + KnownZero = ~((1ULL << (64-NLZ2))-1) & Mask; // Top bits known zero. + KnownOne = 0; // No one bits known. + } else { + KnownZero = KnownOne = 0; // Otherwise, nothing known. + } + } + return; + } + default: + // Allow the target to implement this method for its nodes. + if (Op.getOpcode() >= ISD::BUILTIN_OP_END) { + case ISD::INTRINSIC_WO_CHAIN: + case ISD::INTRINSIC_W_CHAIN: + case ISD::INTRINSIC_VOID: + TLI.computeMaskedBitsForTargetNode(Op, Mask, KnownZero, KnownOne, *this); + } + return; + } +} + +/// ComputeNumSignBits - Return the number of times the sign bit of the +/// register is replicated into the other bits. We know that at least 1 bit +/// is always equal to the sign bit (itself), but other cases can give us +/// information. For example, immediately after an "SRA X, 2", we know that +/// the top 3 bits are all equal to each other, so we return 3. +unsigned SelectionDAG::ComputeNumSignBits(SDOperand Op, unsigned Depth) const{ + MVT::ValueType VT = Op.getValueType(); + assert(MVT::isInteger(VT) && "Invalid VT!"); + unsigned VTBits = MVT::getSizeInBits(VT); + unsigned Tmp, Tmp2; + + if (Depth == 6) + return 1; // Limit search depth. + + switch (Op.getOpcode()) { + default: break; + case ISD::AssertSext: + Tmp = MVT::getSizeInBits(cast(Op.getOperand(1))->getVT()); + return VTBits-Tmp+1; + case ISD::AssertZext: + Tmp = MVT::getSizeInBits(cast(Op.getOperand(1))->getVT()); + return VTBits-Tmp; + + case ISD::Constant: { + uint64_t Val = cast(Op)->getValue(); + // If negative, invert the bits, then look at it. + if (Val & MVT::getIntVTSignBit(VT)) + Val = ~Val; + + // Shift the bits so they are the leading bits in the int64_t. + Val <<= 64-VTBits; + + // Return # leading zeros. We use 'min' here in case Val was zero before + // shifting. We don't want to return '64' as for an i32 "0". + return std::min(VTBits, CountLeadingZeros_64(Val)); + } + + case ISD::SIGN_EXTEND: + Tmp = VTBits-MVT::getSizeInBits(Op.getOperand(0).getValueType()); + return ComputeNumSignBits(Op.getOperand(0), Depth+1) + Tmp; + + case ISD::SIGN_EXTEND_INREG: + // Max of the input and what this extends. + Tmp = MVT::getSizeInBits(cast(Op.getOperand(1))->getVT()); + Tmp = VTBits-Tmp+1; + + Tmp2 = ComputeNumSignBits(Op.getOperand(0), Depth+1); + return std::max(Tmp, Tmp2); + + case ISD::SRA: + Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1); + // SRA X, C -> adds C sign bits. + if (ConstantSDNode *C = dyn_cast(Op.getOperand(1))) { + Tmp += C->getValue(); + if (Tmp > VTBits) Tmp = VTBits; + } + return Tmp; + case ISD::SHL: + if (ConstantSDNode *C = dyn_cast(Op.getOperand(1))) { + // shl destroys sign bits. + Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1); + if (C->getValue() >= VTBits || // Bad shift. + C->getValue() >= Tmp) break; // Shifted all sign bits out. + return Tmp - C->getValue(); + } + break; + case ISD::AND: + case ISD::OR: + case ISD::XOR: // NOT is handled here. + // Logical binary ops preserve the number of sign bits. + Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1); + if (Tmp == 1) return 1; // Early out. + Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1); + return std::min(Tmp, Tmp2); + + case ISD::SELECT: + Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1); + if (Tmp == 1) return 1; // Early out. + Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1); + return std::min(Tmp, Tmp2); + + case ISD::SETCC: + // If setcc returns 0/-1, all bits are sign bits. + if (TLI.getSetCCResultContents() == + TargetLowering::ZeroOrNegativeOneSetCCResult) + return VTBits; + break; + case ISD::ROTL: + case ISD::ROTR: + if (ConstantSDNode *C = dyn_cast(Op.getOperand(1))) { + unsigned RotAmt = C->getValue() & (VTBits-1); + + // Handle rotate right by N like a rotate left by 32-N. + if (Op.getOpcode() == ISD::ROTR) + RotAmt = (VTBits-RotAmt) & (VTBits-1); + + // If we aren't rotating out all of the known-in sign bits, return the + // number that are left. This handles rotl(sext(x), 1) for example. + Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1); + if (Tmp > RotAmt+1) return Tmp-RotAmt; + } + break; + case ISD::ADD: + // Add can have at most one carry bit. Thus we know that the output + // is, at worst, one more bit than the inputs. + Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1); + if (Tmp == 1) return 1; // Early out. + + // Special case decrementing a value (ADD X, -1): + if (ConstantSDNode *CRHS = dyn_cast(Op.getOperand(0))) + if (CRHS->isAllOnesValue()) { + uint64_t KnownZero, KnownOne; + uint64_t Mask = MVT::getIntVTBitMask(VT); + ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero, KnownOne, Depth+1); + + // If the input is known to be 0 or 1, the output is 0/-1, which is all + // sign bits set. + if ((KnownZero|1) == Mask) + return VTBits; + + // If we are subtracting one from a positive number, there is no carry + // out of the result. + if (KnownZero & MVT::getIntVTSignBit(VT)) + return Tmp; + } + + Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1); + if (Tmp2 == 1) return 1; + return std::min(Tmp, Tmp2)-1; + break; + + case ISD::SUB: + Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1); + if (Tmp2 == 1) return 1; + + // Handle NEG. + if (ConstantSDNode *CLHS = dyn_cast(Op.getOperand(0))) + if (CLHS->getValue() == 0) { + uint64_t KnownZero, KnownOne; + uint64_t Mask = MVT::getIntVTBitMask(VT); + ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1); + // If the input is known to be 0 or 1, the output is 0/-1, which is all + // sign bits set. + if ((KnownZero|1) == Mask) + return VTBits; + + // If the input is known to be positive (the sign bit is known clear), + // the output of the NEG has the same number of sign bits as the input. + if (KnownZero & MVT::getIntVTSignBit(VT)) + return Tmp2; + + // Otherwise, we treat this like a SUB. + } + + // Sub can have at most one carry bit. Thus we know that the output + // is, at worst, one more bit than the inputs. + Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1); + if (Tmp == 1) return 1; // Early out. + return std::min(Tmp, Tmp2)-1; + break; + case ISD::TRUNCATE: + // FIXME: it's tricky to do anything useful for this, but it is an important + // case for targets like X86. + break; + } + + // Handle LOADX separately here. EXTLOAD case will fallthrough. + if (Op.getOpcode() == ISD::LOAD) { + LoadSDNode *LD = cast(Op); + unsigned ExtType = LD->getExtensionType(); + switch (ExtType) { + default: break; + case ISD::SEXTLOAD: // '17' bits known + Tmp = MVT::getSizeInBits(LD->getLoadedVT()); + return VTBits-Tmp+1; + case ISD::ZEXTLOAD: // '16' bits known + Tmp = MVT::getSizeInBits(LD->getLoadedVT()); + return VTBits-Tmp; + } + } + + // Allow the target to implement this method for its nodes. + if (Op.getOpcode() >= ISD::BUILTIN_OP_END || + Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || + Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || + Op.getOpcode() == ISD::INTRINSIC_VOID) { + unsigned NumBits = TLI.ComputeNumSignBitsForTargetNode(Op, Depth); + if (NumBits > 1) return NumBits; + } + + // Finally, if we can prove that the top bits of the result are 0's or 1's, + // use this information. + uint64_t KnownZero, KnownOne; + uint64_t Mask = MVT::getIntVTBitMask(VT); + ComputeMaskedBits(Op, Mask, KnownZero, KnownOne, Depth); + + uint64_t SignBit = MVT::getIntVTSignBit(VT); + if (KnownZero & SignBit) { // SignBit is 0 + Mask = KnownZero; + } else if (KnownOne & SignBit) { // SignBit is 1; + Mask = KnownOne; + } else { + // Nothing known. + return 1; + } + + // Okay, we know that the sign bit in Mask is set. Use CLZ to determine + // the number of identical bits in the top of the input value. + Mask ^= ~0ULL; + Mask <<= 64-VTBits; + // Return # leading zeros. We use 'min' here in case Val was zero before + // shifting. We don't want to return '64' as for an i32 "0". + return std::min(VTBits, CountLeadingZeros_64(Mask)); +} + /// getNode - Gets or creates the specified node. /// Index: llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp diff -u llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp:1.467 llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp:1.468 --- llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp:1.467 Thu Jun 21 09:42:22 2007 +++ llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp Fri Jun 22 09:59:07 2007 @@ -5026,7 +5026,7 @@ // Otherwise, the DAG Combiner may have proven that the value coming in is // either already zero or is not demanded. Check for known zero input bits. uint64_t NeededMask = DesiredMask & ~ActualMask; - if (getTargetLowering().MaskedValueIsZero(LHS, NeededMask)) + if (CurDAG->MaskedValueIsZero(LHS, NeededMask)) return true; // TODO: check to see if missing bits are just not demanded. @@ -5057,7 +5057,7 @@ uint64_t NeededMask = DesiredMask & ~ActualMask; uint64_t KnownZero, KnownOne; - getTargetLowering().ComputeMaskedBits(LHS, NeededMask, KnownZero, KnownOne); + CurDAG->ComputeMaskedBits(LHS, NeededMask, KnownZero, KnownOne); // If all the missing bits in the or are already known to be set, match! if ((NeededMask & KnownOne) == NeededMask) Index: llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp diff -u llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp:1.121 llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp:1.122 --- llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp:1.121 Thu Jun 21 09:48:26 2007 +++ llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp Fri Jun 22 09:59:07 2007 @@ -375,7 +375,7 @@ if (Depth != 0) { // If not at the root, Just compute the KnownZero/KnownOne bits to // simplify things downstream. - ComputeMaskedBits(Op, DemandedMask, KnownZero, KnownOne, Depth); + TLO.DAG.ComputeMaskedBits(Op, DemandedMask, KnownZero, KnownOne, Depth); return false; } // If this is the root being simplified, allow it to have multiple uses, @@ -404,8 +404,8 @@ // the RHS. if (ConstantSDNode *RHSC = dyn_cast(Op.getOperand(1))) { uint64_t LHSZero, LHSOne; - ComputeMaskedBits(Op.getOperand(0), DemandedMask, - LHSZero, LHSOne, Depth+1); + TLO.DAG.ComputeMaskedBits(Op.getOperand(0), DemandedMask, + LHSZero, LHSOne, Depth+1); // If the LHS already has zeros where RHSC does, this and is dead. if ((LHSZero & DemandedMask) == (~RHSC->getValue() & DemandedMask)) return TLO.CombineTo(Op, Op.getOperand(0)); @@ -862,7 +862,7 @@ case ISD::INTRINSIC_W_CHAIN: case ISD::INTRINSIC_VOID: // Just use ComputeMaskedBits to compute output bits. - ComputeMaskedBits(Op, DemandedMask, KnownZero, KnownOne, Depth); + TLO.DAG.ComputeMaskedBits(Op, DemandedMask, KnownZero, KnownOne, Depth); break; } @@ -874,337 +874,6 @@ return false; } -/// MaskedValueIsZero - Return true if 'V & Mask' is known to be zero. We use -/// this predicate to simplify operations downstream. Mask is known to be zero -/// for bits that V cannot have. -bool TargetLowering::MaskedValueIsZero(SDOperand Op, uint64_t Mask, - unsigned Depth) const { - // The masks are not wide enough to represent this type! Should use APInt. - if (Op.getValueType() == MVT::i128) - return false; - - uint64_t KnownZero, KnownOne; - ComputeMaskedBits(Op, Mask, KnownZero, KnownOne, Depth); - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); - return (KnownZero & Mask) == Mask; -} - -/// ComputeMaskedBits - Determine which of the bits specified in Mask are -/// known to be either zero or one and return them in the KnownZero/KnownOne -/// bitsets. This code only analyzes bits in Mask, in order to short-circuit -/// processing. -void TargetLowering::ComputeMaskedBits(SDOperand Op, uint64_t Mask, - uint64_t &KnownZero, uint64_t &KnownOne, - unsigned Depth) const { - KnownZero = KnownOne = 0; // Don't know anything. - if (Depth == 6 || Mask == 0) - return; // Limit search depth. - - // The masks are not wide enough to represent this type! Should use APInt. - if (Op.getValueType() == MVT::i128) - return; - - uint64_t KnownZero2, KnownOne2; - - switch (Op.getOpcode()) { - case ISD::Constant: - // We know all of the bits for a constant! - KnownOne = cast(Op)->getValue() & Mask; - KnownZero = ~KnownOne & Mask; - return; - case ISD::AND: - // If either the LHS or the RHS are Zero, the result is zero. - ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1); - Mask &= ~KnownZero; - ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero2, KnownOne2, Depth+1); - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); - assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); - - // Output known-1 bits are only known if set in both the LHS & RHS. - KnownOne &= KnownOne2; - // Output known-0 are known to be clear if zero in either the LHS | RHS. - KnownZero |= KnownZero2; - return; - case ISD::OR: - ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1); - Mask &= ~KnownOne; - ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero2, KnownOne2, Depth+1); - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); - assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); - - // Output known-0 bits are only known if clear in both the LHS & RHS. - KnownZero &= KnownZero2; - // Output known-1 are known to be set if set in either the LHS | RHS. - KnownOne |= KnownOne2; - return; - case ISD::XOR: { - ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1); - ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero2, KnownOne2, Depth+1); - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); - assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); - - // Output known-0 bits are known if clear or set in both the LHS & RHS. - uint64_t KnownZeroOut = (KnownZero & KnownZero2) | (KnownOne & KnownOne2); - // Output known-1 are known to be set if set in only one of the LHS, RHS. - KnownOne = (KnownZero & KnownOne2) | (KnownOne & KnownZero2); - KnownZero = KnownZeroOut; - return; - } - case ISD::SELECT: - ComputeMaskedBits(Op.getOperand(2), Mask, KnownZero, KnownOne, Depth+1); - ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero2, KnownOne2, Depth+1); - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); - assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); - - // Only known if known in both the LHS and RHS. - KnownOne &= KnownOne2; - KnownZero &= KnownZero2; - return; - case ISD::SELECT_CC: - ComputeMaskedBits(Op.getOperand(3), Mask, KnownZero, KnownOne, Depth+1); - ComputeMaskedBits(Op.getOperand(2), Mask, KnownZero2, KnownOne2, Depth+1); - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); - assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); - - // Only known if known in both the LHS and RHS. - KnownOne &= KnownOne2; - KnownZero &= KnownZero2; - return; - case ISD::SETCC: - // If we know the result of a setcc has the top bits zero, use this info. - if (getSetCCResultContents() == TargetLowering::ZeroOrOneSetCCResult) - KnownZero |= (MVT::getIntVTBitMask(Op.getValueType()) ^ 1ULL); - return; - case ISD::SHL: - // (shl X, C1) & C2 == 0 iff (X & C2 >>u C1) == 0 - if (ConstantSDNode *SA = dyn_cast(Op.getOperand(1))) { - ComputeMaskedBits(Op.getOperand(0), Mask >> SA->getValue(), - KnownZero, KnownOne, Depth+1); - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); - KnownZero <<= SA->getValue(); - KnownOne <<= SA->getValue(); - KnownZero |= (1ULL << SA->getValue())-1; // low bits known zero. - } - return; - case ISD::SRL: - // (ushr X, C1) & C2 == 0 iff (-1 >> C1) & C2 == 0 - if (ConstantSDNode *SA = dyn_cast(Op.getOperand(1))) { - MVT::ValueType VT = Op.getValueType(); - unsigned ShAmt = SA->getValue(); - - uint64_t TypeMask = MVT::getIntVTBitMask(VT); - ComputeMaskedBits(Op.getOperand(0), (Mask << ShAmt) & TypeMask, - KnownZero, KnownOne, Depth+1); - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); - KnownZero &= TypeMask; - KnownOne &= TypeMask; - KnownZero >>= ShAmt; - KnownOne >>= ShAmt; - - uint64_t HighBits = (1ULL << ShAmt)-1; - HighBits <<= MVT::getSizeInBits(VT)-ShAmt; - KnownZero |= HighBits; // High bits known zero. - } - return; - case ISD::SRA: - if (ConstantSDNode *SA = dyn_cast(Op.getOperand(1))) { - MVT::ValueType VT = Op.getValueType(); - unsigned ShAmt = SA->getValue(); - - // Compute the new bits that are at the top now. - uint64_t TypeMask = MVT::getIntVTBitMask(VT); - - uint64_t InDemandedMask = (Mask << ShAmt) & TypeMask; - // If any of the demanded bits are produced by the sign extension, we also - // demand the input sign bit. - uint64_t HighBits = (1ULL << ShAmt)-1; - HighBits <<= MVT::getSizeInBits(VT) - ShAmt; - if (HighBits & Mask) - InDemandedMask |= MVT::getIntVTSignBit(VT); - - ComputeMaskedBits(Op.getOperand(0), InDemandedMask, KnownZero, KnownOne, - Depth+1); - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); - KnownZero &= TypeMask; - KnownOne &= TypeMask; - KnownZero >>= ShAmt; - KnownOne >>= ShAmt; - - // Handle the sign bits. - uint64_t SignBit = MVT::getIntVTSignBit(VT); - SignBit >>= ShAmt; // Adjust to where it is now in the mask. - - if (KnownZero & SignBit) { - KnownZero |= HighBits; // New bits are known zero. - } else if (KnownOne & SignBit) { - KnownOne |= HighBits; // New bits are known one. - } - } - return; - case ISD::SIGN_EXTEND_INREG: { - MVT::ValueType EVT = cast(Op.getOperand(1))->getVT(); - - // Sign extension. Compute the demanded bits in the result that are not - // present in the input. - uint64_t NewBits = ~MVT::getIntVTBitMask(EVT) & Mask; - - uint64_t InSignBit = MVT::getIntVTSignBit(EVT); - int64_t InputDemandedBits = Mask & MVT::getIntVTBitMask(EVT); - - // If the sign extended bits are demanded, we know that the sign - // bit is demanded. - if (NewBits) - InputDemandedBits |= InSignBit; - - ComputeMaskedBits(Op.getOperand(0), InputDemandedBits, - KnownZero, KnownOne, Depth+1); - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); - - // If the sign bit of the input is known set or clear, then we know the - // top bits of the result. - if (KnownZero & InSignBit) { // Input sign bit known clear - KnownZero |= NewBits; - KnownOne &= ~NewBits; - } else if (KnownOne & InSignBit) { // Input sign bit known set - KnownOne |= NewBits; - KnownZero &= ~NewBits; - } else { // Input sign bit unknown - KnownZero &= ~NewBits; - KnownOne &= ~NewBits; - } - return; - } - case ISD::CTTZ: - case ISD::CTLZ: - case ISD::CTPOP: { - MVT::ValueType VT = Op.getValueType(); - unsigned LowBits = Log2_32(MVT::getSizeInBits(VT))+1; - KnownZero = ~((1ULL << LowBits)-1) & MVT::getIntVTBitMask(VT); - KnownOne = 0; - return; - } - case ISD::LOAD: { - if (ISD::isZEXTLoad(Op.Val)) { - LoadSDNode *LD = cast(Op); - MVT::ValueType VT = LD->getLoadedVT(); - KnownZero |= ~MVT::getIntVTBitMask(VT) & Mask; - } - return; - } - case ISD::ZERO_EXTEND: { - uint64_t InMask = MVT::getIntVTBitMask(Op.getOperand(0).getValueType()); - uint64_t NewBits = (~InMask) & Mask; - ComputeMaskedBits(Op.getOperand(0), Mask & InMask, KnownZero, - KnownOne, Depth+1); - KnownZero |= NewBits & Mask; - KnownOne &= ~NewBits; - return; - } - case ISD::SIGN_EXTEND: { - MVT::ValueType InVT = Op.getOperand(0).getValueType(); - unsigned InBits = MVT::getSizeInBits(InVT); - uint64_t InMask = MVT::getIntVTBitMask(InVT); - uint64_t InSignBit = 1ULL << (InBits-1); - uint64_t NewBits = (~InMask) & Mask; - uint64_t InDemandedBits = Mask & InMask; - - // If any of the sign extended bits are demanded, we know that the sign - // bit is demanded. - if (NewBits & Mask) - InDemandedBits |= InSignBit; - - ComputeMaskedBits(Op.getOperand(0), InDemandedBits, KnownZero, - KnownOne, Depth+1); - // If the sign bit is known zero or one, the top bits match. - if (KnownZero & InSignBit) { - KnownZero |= NewBits; - KnownOne &= ~NewBits; - } else if (KnownOne & InSignBit) { - KnownOne |= NewBits; - KnownZero &= ~NewBits; - } else { // Otherwise, top bits aren't known. - KnownOne &= ~NewBits; - KnownZero &= ~NewBits; - } - return; - } - case ISD::ANY_EXTEND: { - MVT::ValueType VT = Op.getOperand(0).getValueType(); - ComputeMaskedBits(Op.getOperand(0), Mask & MVT::getIntVTBitMask(VT), - KnownZero, KnownOne, Depth+1); - return; - } - case ISD::TRUNCATE: { - ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero, KnownOne, Depth+1); - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); - uint64_t OutMask = MVT::getIntVTBitMask(Op.getValueType()); - KnownZero &= OutMask; - KnownOne &= OutMask; - break; - } - case ISD::AssertZext: { - MVT::ValueType VT = cast(Op.getOperand(1))->getVT(); - uint64_t InMask = MVT::getIntVTBitMask(VT); - ComputeMaskedBits(Op.getOperand(0), Mask & InMask, KnownZero, - KnownOne, Depth+1); - KnownZero |= (~InMask) & Mask; - return; - } - case ISD::ADD: { - // If either the LHS or the RHS are Zero, the result is zero. - ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1); - ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero2, KnownOne2, Depth+1); - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); - assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); - - // Output known-0 bits are known if clear or set in both the low clear bits - // common to both LHS & RHS. For example, 8+(X<<3) is known to have the - // low 3 bits clear. - uint64_t KnownZeroOut = std::min(CountTrailingZeros_64(~KnownZero), - CountTrailingZeros_64(~KnownZero2)); - - KnownZero = (1ULL << KnownZeroOut) - 1; - KnownOne = 0; - return; - } - case ISD::SUB: { - ConstantSDNode *CLHS = dyn_cast(Op.getOperand(0)); - if (!CLHS) return; - - // We know that the top bits of C-X are clear if X contains less bits - // than C (i.e. no wrap-around can happen). For example, 20-X is - // positive if we can prove that X is >= 0 and < 16. - MVT::ValueType VT = CLHS->getValueType(0); - if ((CLHS->getValue() & MVT::getIntVTSignBit(VT)) == 0) { // sign bit clear - unsigned NLZ = CountLeadingZeros_64(CLHS->getValue()+1); - uint64_t MaskV = (1ULL << (63-NLZ))-1; // NLZ can't be 64 with no sign bit - MaskV = ~MaskV & MVT::getIntVTBitMask(VT); - ComputeMaskedBits(Op.getOperand(1), MaskV, KnownZero, KnownOne, Depth+1); - - // If all of the MaskV bits are known to be zero, then we know the output - // top bits are zero, because we now know that the output is from [0-C]. - if ((KnownZero & MaskV) == MaskV) { - unsigned NLZ2 = CountLeadingZeros_64(CLHS->getValue()); - KnownZero = ~((1ULL << (64-NLZ2))-1) & Mask; // Top bits known zero. - KnownOne = 0; // No one bits known. - } else { - KnownZero = KnownOne = 0; // Otherwise, nothing known. - } - } - return; - } - default: - // Allow the target to implement this method for its nodes. - if (Op.getOpcode() >= ISD::BUILTIN_OP_END) { - case ISD::INTRINSIC_WO_CHAIN: - case ISD::INTRINSIC_W_CHAIN: - case ISD::INTRINSIC_VOID: - computeMaskedBitsForTargetNode(Op, Mask, KnownZero, KnownOne); - } - return; - } -} - /// computeMaskedBitsForTargetNode - Determine which of the bits specified /// in Mask are known to be either zero or one and return them in the /// KnownZero/KnownOne bitsets. @@ -1212,6 +881,7 @@ uint64_t Mask, uint64_t &KnownZero, uint64_t &KnownOne, + const SelectionDAG &DAG, unsigned Depth) const { assert((Op.getOpcode() >= ISD::BUILTIN_OP_END || Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || @@ -1223,222 +893,6 @@ KnownOne = 0; } -/// ComputeNumSignBits - Return the number of times the sign bit of the -/// register is replicated into the other bits. We know that at least 1 bit -/// is always equal to the sign bit (itself), but other cases can give us -/// information. For example, immediately after an "SRA X, 2", we know that -/// the top 3 bits are all equal to each other, so we return 3. -unsigned TargetLowering::ComputeNumSignBits(SDOperand Op, unsigned Depth) const{ - MVT::ValueType VT = Op.getValueType(); - assert(MVT::isInteger(VT) && "Invalid VT!"); - unsigned VTBits = MVT::getSizeInBits(VT); - unsigned Tmp, Tmp2; - - if (Depth == 6) - return 1; // Limit search depth. - - switch (Op.getOpcode()) { - default: break; - case ISD::AssertSext: - Tmp = MVT::getSizeInBits(cast(Op.getOperand(1))->getVT()); - return VTBits-Tmp+1; - case ISD::AssertZext: - Tmp = MVT::getSizeInBits(cast(Op.getOperand(1))->getVT()); - return VTBits-Tmp; - - case ISD::Constant: { - uint64_t Val = cast(Op)->getValue(); - // If negative, invert the bits, then look at it. - if (Val & MVT::getIntVTSignBit(VT)) - Val = ~Val; - - // Shift the bits so they are the leading bits in the int64_t. - Val <<= 64-VTBits; - - // Return # leading zeros. We use 'min' here in case Val was zero before - // shifting. We don't want to return '64' as for an i32 "0". - return std::min(VTBits, CountLeadingZeros_64(Val)); - } - - case ISD::SIGN_EXTEND: - Tmp = VTBits-MVT::getSizeInBits(Op.getOperand(0).getValueType()); - return ComputeNumSignBits(Op.getOperand(0), Depth+1) + Tmp; - - case ISD::SIGN_EXTEND_INREG: - // Max of the input and what this extends. - Tmp = MVT::getSizeInBits(cast(Op.getOperand(1))->getVT()); - Tmp = VTBits-Tmp+1; - - Tmp2 = ComputeNumSignBits(Op.getOperand(0), Depth+1); - return std::max(Tmp, Tmp2); - - case ISD::SRA: - Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1); - // SRA X, C -> adds C sign bits. - if (ConstantSDNode *C = dyn_cast(Op.getOperand(1))) { - Tmp += C->getValue(); - if (Tmp > VTBits) Tmp = VTBits; - } - return Tmp; - case ISD::SHL: - if (ConstantSDNode *C = dyn_cast(Op.getOperand(1))) { - // shl destroys sign bits. - Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1); - if (C->getValue() >= VTBits || // Bad shift. - C->getValue() >= Tmp) break; // Shifted all sign bits out. - return Tmp - C->getValue(); - } - break; - case ISD::AND: - case ISD::OR: - case ISD::XOR: // NOT is handled here. - // Logical binary ops preserve the number of sign bits. - Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1); - if (Tmp == 1) return 1; // Early out. - Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1); - return std::min(Tmp, Tmp2); - - case ISD::SELECT: - Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1); - if (Tmp == 1) return 1; // Early out. - Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1); - return std::min(Tmp, Tmp2); - - case ISD::SETCC: - // If setcc returns 0/-1, all bits are sign bits. - if (getSetCCResultContents() == ZeroOrNegativeOneSetCCResult) - return VTBits; - break; - case ISD::ROTL: - case ISD::ROTR: - if (ConstantSDNode *C = dyn_cast(Op.getOperand(1))) { - unsigned RotAmt = C->getValue() & (VTBits-1); - - // Handle rotate right by N like a rotate left by 32-N. - if (Op.getOpcode() == ISD::ROTR) - RotAmt = (VTBits-RotAmt) & (VTBits-1); - - // If we aren't rotating out all of the known-in sign bits, return the - // number that are left. This handles rotl(sext(x), 1) for example. - Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1); - if (Tmp > RotAmt+1) return Tmp-RotAmt; - } - break; - case ISD::ADD: - // Add can have at most one carry bit. Thus we know that the output - // is, at worst, one more bit than the inputs. - Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1); - if (Tmp == 1) return 1; // Early out. - - // Special case decrementing a value (ADD X, -1): - if (ConstantSDNode *CRHS = dyn_cast(Op.getOperand(0))) - if (CRHS->isAllOnesValue()) { - uint64_t KnownZero, KnownOne; - uint64_t Mask = MVT::getIntVTBitMask(VT); - ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero, KnownOne, Depth+1); - - // If the input is known to be 0 or 1, the output is 0/-1, which is all - // sign bits set. - if ((KnownZero|1) == Mask) - return VTBits; - - // If we are subtracting one from a positive number, there is no carry - // out of the result. - if (KnownZero & MVT::getIntVTSignBit(VT)) - return Tmp; - } - - Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1); - if (Tmp2 == 1) return 1; - return std::min(Tmp, Tmp2)-1; - break; - - case ISD::SUB: - Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1); - if (Tmp2 == 1) return 1; - - // Handle NEG. - if (ConstantSDNode *CLHS = dyn_cast(Op.getOperand(0))) - if (CLHS->getValue() == 0) { - uint64_t KnownZero, KnownOne; - uint64_t Mask = MVT::getIntVTBitMask(VT); - ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1); - // If the input is known to be 0 or 1, the output is 0/-1, which is all - // sign bits set. - if ((KnownZero|1) == Mask) - return VTBits; - - // If the input is known to be positive (the sign bit is known clear), - // the output of the NEG has the same number of sign bits as the input. - if (KnownZero & MVT::getIntVTSignBit(VT)) - return Tmp2; - - // Otherwise, we treat this like a SUB. - } - - // Sub can have at most one carry bit. Thus we know that the output - // is, at worst, one more bit than the inputs. - Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1); - if (Tmp == 1) return 1; // Early out. - return std::min(Tmp, Tmp2)-1; - break; - case ISD::TRUNCATE: - // FIXME: it's tricky to do anything useful for this, but it is an important - // case for targets like X86. - break; - } - - // Handle LOADX separately here. EXTLOAD case will fallthrough. - if (Op.getOpcode() == ISD::LOAD) { - LoadSDNode *LD = cast(Op); - unsigned ExtType = LD->getExtensionType(); - switch (ExtType) { - default: break; - case ISD::SEXTLOAD: // '17' bits known - Tmp = MVT::getSizeInBits(LD->getLoadedVT()); - return VTBits-Tmp+1; - case ISD::ZEXTLOAD: // '16' bits known - Tmp = MVT::getSizeInBits(LD->getLoadedVT()); - return VTBits-Tmp; - } - } - - // Allow the target to implement this method for its nodes. - if (Op.getOpcode() >= ISD::BUILTIN_OP_END || - Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || - Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || - Op.getOpcode() == ISD::INTRINSIC_VOID) { - unsigned NumBits = ComputeNumSignBitsForTargetNode(Op, Depth); - if (NumBits > 1) return NumBits; - } - - // Finally, if we can prove that the top bits of the result are 0's or 1's, - // use this information. - uint64_t KnownZero, KnownOne; - uint64_t Mask = MVT::getIntVTBitMask(VT); - ComputeMaskedBits(Op, Mask, KnownZero, KnownOne, Depth); - - uint64_t SignBit = MVT::getIntVTSignBit(VT); - if (KnownZero & SignBit) { // SignBit is 0 - Mask = KnownZero; - } else if (KnownOne & SignBit) { // SignBit is 1; - Mask = KnownOne; - } else { - // Nothing known. - return 1; - } - - // Okay, we know that the sign bit in Mask is set. Use CLZ to determine - // the number of identical bits in the top of the input value. - Mask ^= ~0ULL; - Mask <<= 64-VTBits; - // Return # leading zeros. We use 'min' here in case Val was zero before - // shifting. We don't want to return '64' as for an i32 "0". - return std::min(VTBits, CountLeadingZeros_64(Mask)); -} - - - /// ComputeNumSignBitsForTargetNode - This method can be implemented by /// targets that want to expose additional information about sign bits to the /// DAG Combiner. @@ -1597,7 +1051,8 @@ cast(N0.getOperand(1))->getValue() == 1) { // If this is (X^1) == 0/1, swap the RHS and eliminate the xor. We // can only do this if the top bits are known zero. - if (MaskedValueIsZero(N0, MVT::getIntVTBitMask(N0.getValueType())-1)){ + if (DAG.MaskedValueIsZero(N0, + MVT::getIntVTBitMask(N0.getValueType())-1)){ // Okay, get the un-inverted input value. SDOperand Val; if (N0.getOpcode() == ISD::XOR) @@ -1761,7 +1216,7 @@ if (N0.getOpcode() == ISD::XOR) // If we know that all of the inverted bits are zero, don't bother // performing the inversion. - if (MaskedValueIsZero(N0.getOperand(0), ~LHSR->getValue())) + if (DAG.MaskedValueIsZero(N0.getOperand(0), ~LHSR->getValue())) return DAG.getSetCC(VT, N0.getOperand(0), DAG.getConstant(LHSR->getValue()^RHSC->getValue(), N0.getValueType()), Cond); From djg at cray.com Fri Jun 22 09:59:50 2007 From: djg at cray.com (Dan Gohman) Date: Fri, 22 Jun 2007 09:59:50 -0500 Subject: [llvm-commits] CVS: llvm/include/llvm/Target/TargetLowering.h Message-ID: <200706221459.l5MExoCh028698@zion.cs.uiuc.edu> Changes in directory llvm/include/llvm/Target: TargetLowering.h updated: 1.128 -> 1.129 --- Log message: Move ComputeMaskedBits, MaskedValueIsZero, and ComputeNumSignBits from TargetLowering to SelectionDAG so that they have more convenient access to the current DAG, in preparation for the ValueType routines being changed from standalone functions to members of SelectionDAG for the pre-legalize vector type changes. --- Diffs of the changes: (+1 -21) TargetLowering.h | 22 +--------------------- 1 files changed, 1 insertion(+), 21 deletions(-) Index: llvm/include/llvm/Target/TargetLowering.h diff -u llvm/include/llvm/Target/TargetLowering.h:1.128 llvm/include/llvm/Target/TargetLowering.h:1.129 --- llvm/include/llvm/Target/TargetLowering.h:1.128 Thu Jun 21 09:42:22 2007 +++ llvm/include/llvm/Target/TargetLowering.h Fri Jun 22 09:59:07 2007 @@ -494,20 +494,6 @@ bool ShrinkDemandedConstant(SDOperand Op, uint64_t Demanded); }; - /// MaskedValueIsZero - Return true if 'Op & Mask' is known to be zero. We - /// use this predicate to simplify operations downstream. Op and Mask are - /// known to be the same type. - bool MaskedValueIsZero(SDOperand Op, uint64_t Mask, unsigned Depth = 0) - const; - - /// ComputeMaskedBits - Determine which of the bits specified in Mask are - /// known to be either zero or one and return them in the KnownZero/KnownOne - /// bitsets. This code only analyzes bits in Mask, in order to short-circuit - /// processing. Targets can implement the computeMaskedBitsForTargetNode - /// method, to allow target nodes to be understood. - void ComputeMaskedBits(SDOperand Op, uint64_t Mask, uint64_t &KnownZero, - uint64_t &KnownOne, unsigned Depth = 0) const; - /// SimplifyDemandedBits - Look at Op. At this point, we know that only the /// DemandedMask bits of the result of Op are ever used downstream. If we can /// use this information to simplify Op, create a new simplified DAG node and @@ -527,15 +513,9 @@ uint64_t Mask, uint64_t &KnownZero, uint64_t &KnownOne, + const SelectionDAG &DAG, unsigned Depth = 0) const; - /// ComputeNumSignBits - Return the number of times the sign bit of the - /// register is replicated into the other bits. We know that at least 1 bit - /// is always equal to the sign bit (itself), but other cases can give us - /// information. For example, immediately after an "SRA X, 2", we know that - /// the top 3 bits are all equal to each other, so we return 3. - unsigned ComputeNumSignBits(SDOperand Op, unsigned Depth = 0) const; - /// ComputeNumSignBitsForTargetNode - This method can be implemented by /// targets that want to expose additional information about sign bits to the /// DAG Combiner. From djg at cray.com Fri Jun 22 09:59:52 2007 From: djg at cray.com (Dan Gohman) Date: Fri, 22 Jun 2007 09:59:52 -0500 Subject: [llvm-commits] CVS: llvm/lib/Target/Sparc/SparcISelDAGToDAG.cpp Message-ID: <200706221459.l5MExqeg028710@zion.cs.uiuc.edu> Changes in directory llvm/lib/Target/Sparc: SparcISelDAGToDAG.cpp updated: 1.122 -> 1.123 --- Log message: Move ComputeMaskedBits, MaskedValueIsZero, and ComputeNumSignBits from TargetLowering to SelectionDAG so that they have more convenient access to the current DAG, in preparation for the ValueType routines being changed from standalone functions to members of SelectionDAG for the pre-legalize vector type changes. --- Diffs of the changes: (+6 -2) SparcISelDAGToDAG.cpp | 8 ++++++-- 1 files changed, 6 insertions(+), 2 deletions(-) Index: llvm/lib/Target/Sparc/SparcISelDAGToDAG.cpp diff -u llvm/lib/Target/Sparc/SparcISelDAGToDAG.cpp:1.122 llvm/lib/Target/Sparc/SparcISelDAGToDAG.cpp:1.123 --- llvm/lib/Target/Sparc/SparcISelDAGToDAG.cpp:1.122 Wed Mar 7 10:25:09 2007 +++ llvm/lib/Target/Sparc/SparcISelDAGToDAG.cpp Fri Jun 22 09:59:07 2007 @@ -112,6 +112,7 @@ uint64_t Mask, uint64_t &KnownZero, uint64_t &KnownOne, + const SelectionDAG &DAG, unsigned Depth = 0) const; virtual std::vector @@ -261,6 +262,7 @@ uint64_t Mask, uint64_t &KnownZero, uint64_t &KnownOne, + const SelectionDAG &DAG, unsigned Depth) const { uint64_t KnownZero2, KnownOne2; KnownZero = KnownOne = 0; // Don't know anything. @@ -269,8 +271,10 @@ default: break; case SPISD::SELECT_ICC: case SPISD::SELECT_FCC: - ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1); - ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero2, KnownOne2, Depth+1); + DAG.ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, + Depth+1); + DAG.ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero2, KnownOne2, + Depth+1); assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); From djg at cray.com Fri Jun 22 09:59:52 2007 From: djg at cray.com (Dan Gohman) Date: Fri, 22 Jun 2007 09:59:52 -0500 Subject: [llvm-commits] CVS: llvm/lib/Target/X86/X86ISelDAGToDAG.cpp X86ISelLowering.cpp X86ISelLowering.h Message-ID: <200706221459.l5MExqJ5028719@zion.cs.uiuc.edu> Changes in directory llvm/lib/Target/X86: X86ISelDAGToDAG.cpp updated: 1.149 -> 1.150 X86ISelLowering.cpp updated: 1.413 -> 1.414 X86ISelLowering.h updated: 1.100 -> 1.101 --- Log message: Move ComputeMaskedBits, MaskedValueIsZero, and ComputeNumSignBits from TargetLowering to SelectionDAG so that they have more convenient access to the current DAG, in preparation for the ValueType routines being changed from standalone functions to members of SelectionDAG for the pre-legalize vector type changes. --- Diffs of the changes: (+3 -1) X86ISelDAGToDAG.cpp | 2 +- X86ISelLowering.cpp | 1 + X86ISelLowering.h | 1 + 3 files changed, 3 insertions(+), 1 deletion(-) Index: llvm/lib/Target/X86/X86ISelDAGToDAG.cpp diff -u llvm/lib/Target/X86/X86ISelDAGToDAG.cpp:1.149 llvm/lib/Target/X86/X86ISelDAGToDAG.cpp:1.150 --- llvm/lib/Target/X86/X86ISelDAGToDAG.cpp:1.149 Wed Apr 11 17:29:46 2007 +++ llvm/lib/Target/X86/X86ISelDAGToDAG.cpp Fri Jun 22 09:59:07 2007 @@ -744,7 +744,7 @@ // On x86-64, the resultant disp must fit in 32-bits. isInt32(AM.Disp + CN->getSignExtended()) && // Check to see if the LHS & C is zero. - TLI.MaskedValueIsZero(N.getOperand(0), CN->getValue())) { + CurDAG->MaskedValueIsZero(N.getOperand(0), CN->getValue())) { AM.Disp += CN->getValue(); return false; } Index: llvm/lib/Target/X86/X86ISelLowering.cpp diff -u llvm/lib/Target/X86/X86ISelLowering.cpp:1.413 llvm/lib/Target/X86/X86ISelLowering.cpp:1.414 --- llvm/lib/Target/X86/X86ISelLowering.cpp:1.413 Mon Jun 18 19:13:10 2007 +++ llvm/lib/Target/X86/X86ISelLowering.cpp Fri Jun 22 09:59:07 2007 @@ -4506,6 +4506,7 @@ uint64_t Mask, uint64_t &KnownZero, uint64_t &KnownOne, + const SelectionDAG &DAG, unsigned Depth) const { unsigned Opc = Op.getOpcode(); assert((Opc >= ISD::BUILTIN_OP_END || Index: llvm/lib/Target/X86/X86ISelLowering.h diff -u llvm/lib/Target/X86/X86ISelLowering.h:1.100 llvm/lib/Target/X86/X86ISelLowering.h:1.101 --- llvm/lib/Target/X86/X86ISelLowering.h:1.100 Tue Apr 24 16:16:55 2007 +++ llvm/lib/Target/X86/X86ISelLowering.h Fri Jun 22 09:59:07 2007 @@ -319,6 +319,7 @@ uint64_t Mask, uint64_t &KnownZero, uint64_t &KnownOne, + const SelectionDAG &DAG, unsigned Depth = 0) const; SDOperand getReturnAddressFrameIndex(SelectionDAG &DAG); From djg at cray.com Fri Jun 22 09:59:53 2007 From: djg at cray.com (Dan Gohman) Date: Fri, 22 Jun 2007 09:59:53 -0500 Subject: [llvm-commits] CVS: llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp PPCISelLowering.cpp PPCISelLowering.h Message-ID: <200706221459.l5MExrIo028731@zion.cs.uiuc.edu> Changes in directory llvm/lib/Target/PowerPC: PPCISelDAGToDAG.cpp updated: 1.230 -> 1.231 PPCISelLowering.cpp updated: 1.274 -> 1.275 PPCISelLowering.h updated: 1.65 -> 1.66 --- Log message: Move ComputeMaskedBits, MaskedValueIsZero, and ComputeNumSignBits from TargetLowering to SelectionDAG so that they have more convenient access to the current DAG, in preparation for the ValueType routines being changed from standalone functions to members of SelectionDAG for the pre-legalize vector type changes. --- Diffs of the changes: (+8 -6) PPCISelDAGToDAG.cpp | 4 ++-- PPCISelLowering.cpp | 9 +++++---- PPCISelLowering.h | 1 + 3 files changed, 8 insertions(+), 6 deletions(-) Index: llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp diff -u llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp:1.230 llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp:1.231 --- llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp:1.230 Mon Apr 2 00:59:42 2007 +++ llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp Fri Jun 22 09:59:07 2007 @@ -424,8 +424,8 @@ SDOperand Op1 = N->getOperand(1); uint64_t LKZ, LKO, RKZ, RKO; - TLI.ComputeMaskedBits(Op0, 0xFFFFFFFFULL, LKZ, LKO); - TLI.ComputeMaskedBits(Op1, 0xFFFFFFFFULL, RKZ, RKO); + CurDAG->ComputeMaskedBits(Op0, 0xFFFFFFFFULL, LKZ, LKO); + CurDAG->ComputeMaskedBits(Op1, 0xFFFFFFFFULL, RKZ, RKO); unsigned TargetMask = LKZ; unsigned InsertMask = RKZ; Index: llvm/lib/Target/PowerPC/PPCISelLowering.cpp diff -u llvm/lib/Target/PowerPC/PPCISelLowering.cpp:1.274 llvm/lib/Target/PowerPC/PPCISelLowering.cpp:1.275 --- llvm/lib/Target/PowerPC/PPCISelLowering.cpp:1.274 Tue Jun 19 00:46:06 2007 +++ llvm/lib/Target/PowerPC/PPCISelLowering.cpp Fri Jun 22 09:59:07 2007 @@ -688,10 +688,10 @@ // disjoint. uint64_t LHSKnownZero, LHSKnownOne; uint64_t RHSKnownZero, RHSKnownOne; - ComputeMaskedBits(N.getOperand(0), ~0U, LHSKnownZero, LHSKnownOne); + DAG.ComputeMaskedBits(N.getOperand(0), ~0U, LHSKnownZero, LHSKnownOne); if (LHSKnownZero) { - ComputeMaskedBits(N.getOperand(1), ~0U, RHSKnownZero, RHSKnownOne); + DAG.ComputeMaskedBits(N.getOperand(1), ~0U, RHSKnownZero, RHSKnownOne); // If all of the bits are known zero on the LHS or RHS, the add won't // carry. if ((LHSKnownZero | RHSKnownZero) == ~0U) { @@ -742,7 +742,7 @@ // (for better address arithmetic) if the LHS and RHS of the OR are // provably disjoint. uint64_t LHSKnownZero, LHSKnownOne; - ComputeMaskedBits(N.getOperand(0), ~0U, LHSKnownZero, LHSKnownOne); + DAG.ComputeMaskedBits(N.getOperand(0), ~0U, LHSKnownZero, LHSKnownOne); if ((LHSKnownZero|~(unsigned)imm) == ~0U) { // If all of the bits are known zero on the LHS or RHS, the add won't // carry. @@ -850,7 +850,7 @@ // (for better address arithmetic) if the LHS and RHS of the OR are // provably disjoint. uint64_t LHSKnownZero, LHSKnownOne; - ComputeMaskedBits(N.getOperand(0), ~0U, LHSKnownZero, LHSKnownOne); + DAG.ComputeMaskedBits(N.getOperand(0), ~0U, LHSKnownZero, LHSKnownOne); if ((LHSKnownZero|~(unsigned)imm) == ~0U) { // If all of the bits are known zero on the LHS or RHS, the add won't // carry. @@ -3235,6 +3235,7 @@ uint64_t Mask, uint64_t &KnownZero, uint64_t &KnownOne, + const SelectionDAG &DAG, unsigned Depth) const { KnownZero = 0; KnownOne = 0; Index: llvm/lib/Target/PowerPC/PPCISelLowering.h diff -u llvm/lib/Target/PowerPC/PPCISelLowering.h:1.65 llvm/lib/Target/PowerPC/PPCISelLowering.h:1.66 --- llvm/lib/Target/PowerPC/PPCISelLowering.h:1.65 Tue Apr 3 08:59:52 2007 +++ llvm/lib/Target/PowerPC/PPCISelLowering.h Fri Jun 22 09:59:07 2007 @@ -230,6 +230,7 @@ uint64_t Mask, uint64_t &KnownZero, uint64_t &KnownOne, + const SelectionDAG &DAG, unsigned Depth = 0) const; virtual MachineBasicBlock *InsertAtEndOfBasicBlock(MachineInstr *MI, From djg at cray.com Fri Jun 22 09:59:53 2007 From: djg at cray.com (Dan Gohman) Date: Fri, 22 Jun 2007 09:59:53 -0500 Subject: [llvm-commits] CVS: llvm/include/llvm/CodeGen/SelectionDAG.h Message-ID: <200706221459.l5MExrdC028724@zion.cs.uiuc.edu> Changes in directory llvm/include/llvm/CodeGen: SelectionDAG.h updated: 1.150 -> 1.151 --- Log message: Move ComputeMaskedBits, MaskedValueIsZero, and ComputeNumSignBits from TargetLowering to SelectionDAG so that they have more convenient access to the current DAG, in preparation for the ValueType routines being changed from standalone functions to members of SelectionDAG for the pre-legalize vector type changes. --- Diffs of the changes: (+23 -0) SelectionDAG.h | 23 +++++++++++++++++++++++ 1 files changed, 23 insertions(+) Index: llvm/include/llvm/CodeGen/SelectionDAG.h diff -u llvm/include/llvm/CodeGen/SelectionDAG.h:1.150 llvm/include/llvm/CodeGen/SelectionDAG.h:1.151 --- llvm/include/llvm/CodeGen/SelectionDAG.h:1.150 Sun Apr 22 18:15:29 2007 +++ llvm/include/llvm/CodeGen/SelectionDAG.h Fri Jun 22 09:59:07 2007 @@ -465,6 +465,29 @@ SDOperand FoldSetCC(MVT::ValueType VT, SDOperand N1, SDOperand N2, ISD::CondCode Cond); + /// MaskedValueIsZero - Return true if 'Op & Mask' is known to be zero. We + /// use this predicate to simplify operations downstream. Op and Mask are + /// known to be the same type. + bool MaskedValueIsZero(SDOperand Op, uint64_t Mask, unsigned Depth = 0) + const; + + /// ComputeMaskedBits - Determine which of the bits specified in Mask are + /// known to be either zero or one and return them in the KnownZero/KnownOne + /// bitsets. This code only analyzes bits in Mask, in order to short-circuit + /// processing. Targets can implement the computeMaskedBitsForTargetNode + /// method in the TargetLowering class to allow target nodes to be understood. + void ComputeMaskedBits(SDOperand Op, uint64_t Mask, uint64_t &KnownZero, + uint64_t &KnownOne, unsigned Depth = 0) const; + + /// ComputeNumSignBits - Return the number of times the sign bit of the + /// register is replicated into the other bits. We know that at least 1 bit + /// is always equal to the sign bit (itself), but other cases can give us + /// information. For example, immediately after an "SRA X, 2", we know that + /// the top 3 bits are all equal to each other, so we return 3. Targets can + /// implement the ComputeNumSignBitsForTarget method in the TargetLowering + /// class to allow target nodes to be understood. + unsigned ComputeNumSignBits(SDOperand Op, unsigned Depth = 0) const; + private: void RemoveNodeFromCSEMaps(SDNode *N); SDNode *AddNonLeafNodeToCSEMaps(SDNode *N); From djg at cray.com Fri Jun 22 09:59:50 2007 From: djg at cray.com (Dan Gohman) Date: Fri, 22 Jun 2007 09:59:50 -0500 Subject: [llvm-commits] CVS: llvm/lib/Target/ARM/ARMISelLowering.cpp ARMISelLowering.h Message-ID: <200706221459.l5MExoLq028705@zion.cs.uiuc.edu> Changes in directory llvm/lib/Target/ARM: ARMISelLowering.cpp updated: 1.58 -> 1.59 ARMISelLowering.h updated: 1.17 -> 1.18 --- Log message: Move ComputeMaskedBits, MaskedValueIsZero, and ComputeNumSignBits from TargetLowering to SelectionDAG so that they have more convenient access to the current DAG, in preparation for the ValueType routines being changed from standalone functions to members of SelectionDAG for the pre-legalize vector type changes. --- Diffs of the changes: (+9 -8) ARMISelLowering.cpp | 16 ++++++++-------- ARMISelLowering.h | 1 + 2 files changed, 9 insertions(+), 8 deletions(-) Index: llvm/lib/Target/ARM/ARMISelLowering.cpp diff -u llvm/lib/Target/ARM/ARMISelLowering.cpp:1.58 llvm/lib/Target/ARM/ARMISelLowering.cpp:1.59 --- llvm/lib/Target/ARM/ARMISelLowering.cpp:1.58 Tue Jun 19 18:55:02 2007 +++ llvm/lib/Target/ARM/ARMISelLowering.cpp Fri Jun 22 09:59:07 2007 @@ -1254,9 +1254,8 @@ SDOperand RL = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op.getOperand(1), DAG.getConstant(0, MVT::i32)); - const TargetLowering &TL = DAG.getTargetLoweringInfo(); - unsigned LHSSB = TL.ComputeNumSignBits(Op.getOperand(0)); - unsigned RHSSB = TL.ComputeNumSignBits(Op.getOperand(1)); + unsigned LHSSB = DAG.ComputeNumSignBits(Op.getOperand(0)); + unsigned RHSSB = DAG.ComputeNumSignBits(Op.getOperand(1)); SDOperand Lo, Hi; // Figure out how to lower this multiply. @@ -1265,8 +1264,8 @@ Lo = DAG.getNode(ISD::MUL, MVT::i32, LL, RL); Hi = DAG.getNode(ISD::MULHS, MVT::i32, LL, RL); } else if (LHSSB == 32 && RHSSB == 32 && - TL.MaskedValueIsZero(Op.getOperand(0), 0xFFFFFFFF00000000ULL) && - TL.MaskedValueIsZero(Op.getOperand(1), 0xFFFFFFFF00000000ULL)) { + DAG.MaskedValueIsZero(Op.getOperand(0), 0xFFFFFFFF00000000ULL) && + DAG.MaskedValueIsZero(Op.getOperand(1), 0xFFFFFFFF00000000ULL)) { // If the inputs are zero extended, use mulhu. Lo = DAG.getNode(ISD::MUL, MVT::i32, LL, RL); Hi = DAG.getNode(ISD::MULHU, MVT::i32, LL, RL); @@ -1757,6 +1756,7 @@ uint64_t Mask, uint64_t &KnownZero, uint64_t &KnownOne, + const SelectionDAG &DAG, unsigned Depth) const { KnownZero = 0; KnownOne = 0; @@ -1764,12 +1764,12 @@ default: break; case ARMISD::CMOV: { // Bits are known zero/one if known on the LHS and RHS. - ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero, KnownOne, Depth+1); + DAG.ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero, KnownOne, Depth+1); if (KnownZero == 0 && KnownOne == 0) return; uint64_t KnownZeroRHS, KnownOneRHS; - ComputeMaskedBits(Op.getOperand(1), Mask, - KnownZeroRHS, KnownOneRHS, Depth+1); + DAG.ComputeMaskedBits(Op.getOperand(1), Mask, + KnownZeroRHS, KnownOneRHS, Depth+1); KnownZero &= KnownZeroRHS; KnownOne &= KnownOneRHS; return; Index: llvm/lib/Target/ARM/ARMISelLowering.h diff -u llvm/lib/Target/ARM/ARMISelLowering.h:1.17 llvm/lib/Target/ARM/ARMISelLowering.h:1.18 --- llvm/lib/Target/ARM/ARMISelLowering.h:1.17 Tue Jun 19 16:05:09 2007 +++ llvm/lib/Target/ARM/ARMISelLowering.h Fri Jun 22 09:59:07 2007 @@ -108,6 +108,7 @@ uint64_t Mask, uint64_t &KnownZero, uint64_t &KnownOne, + const SelectionDAG &DAG, unsigned Depth) const; ConstraintType getConstraintType(const std::string &Constraint) const; std::pair From resistor at mac.com Fri Jun 22 12:00:16 2007 From: resistor at mac.com (Owen Anderson) Date: Fri, 22 Jun 2007 12:00:16 -0500 Subject: [llvm-commits] CVS: llvm/lib/Target/ARM/ARMInstrInfo.td Message-ID: <200706221700.l5MH0GhB000406@zion.cs.uiuc.edu> Changes in directory llvm/lib/Target/ARM: ARMInstrInfo.td updated: 1.113 -> 1.114 --- Log message: Fix the build. --- Diffs of the changes: (+1 -1) ARMInstrInfo.td | 2 +- 1 files changed, 1 insertion(+), 1 deletion(-) Index: llvm/lib/Target/ARM/ARMInstrInfo.td diff -u llvm/lib/Target/ARM/ARMInstrInfo.td:1.113 llvm/lib/Target/ARM/ARMInstrInfo.td:1.114 --- llvm/lib/Target/ARM/ARMInstrInfo.td:1.113 Tue Jun 19 16:05:09 2007 +++ llvm/lib/Target/ARM/ARMInstrInfo.td Fri Jun 22 11:59:54 2007 @@ -152,7 +152,7 @@ // sext_16_node predicate - True if the SDNode is sign-extended 16 or more bits. def sext_16_node : PatLeaf<(i32 GPR:$a), [{ - return TLI.ComputeNumSignBits(SDOperand(N,0)) >= 17; + return CurDAG->ComputeNumSignBits(SDOperand(N,0)) >= 17; }]>; From resistor at mac.com Fri Jun 22 12:05:03 2007 From: resistor at mac.com (Owen Anderson) Date: Fri, 22 Jun 2007 12:05:03 -0500 Subject: [llvm-commits] CVS: llvm/lib/Transforms/Scalar/GVNPRE.cpp Message-ID: <200706221705.l5MH53v8000588@zion.cs.uiuc.edu> Changes in directory llvm/lib/Transforms/Scalar: GVNPRE.cpp updated: 1.52 -> 1.53 --- Log message: Remove some code that I was using for collecting performance information that should not have been committed. --- Diffs of the changes: (+18 -18) GVNPRE.cpp | 36 ++++++++++++++++++------------------ 1 files changed, 18 insertions(+), 18 deletions(-) Index: llvm/lib/Transforms/Scalar/GVNPRE.cpp diff -u llvm/lib/Transforms/Scalar/GVNPRE.cpp:1.52 llvm/lib/Transforms/Scalar/GVNPRE.cpp:1.53 --- llvm/lib/Transforms/Scalar/GVNPRE.cpp:1.52 Thu Jun 21 22:14:03 2007 +++ llvm/lib/Transforms/Scalar/GVNPRE.cpp Fri Jun 22 12:04:40 2007 @@ -367,47 +367,47 @@ // Helper fuctions // FIXME: eliminate or document these better - void dump(const SmallPtrSet& s) const __attribute__((noinline)); - void clean(SmallPtrSet& set) __attribute__((noinline)); + void dump(const SmallPtrSet& s) const; + void clean(SmallPtrSet& set); Value* find_leader(SmallPtrSet& vals, - uint32_t v) __attribute__((noinline)); - Value* phi_translate(Value* V, BasicBlock* pred, BasicBlock* succ) __attribute__((noinline)); + uint32_t v); + Value* phi_translate(Value* V, BasicBlock* pred, BasicBlock* succ); void phi_translate_set(SmallPtrSet& anticIn, BasicBlock* pred, - BasicBlock* succ, SmallPtrSet& out) __attribute__((noinline)); + BasicBlock* succ, SmallPtrSet& out); void topo_sort(SmallPtrSet& set, - std::vector& vec) __attribute__((noinline)); + std::vector& vec); - void cleanup() __attribute__((noinline)); - bool elimination() __attribute__((noinline)); + void cleanup(); + bool elimination(); - void val_insert(SmallPtrSet& s, Value* v) __attribute__((noinline)); - void val_replace(SmallPtrSet& s, Value* v) __attribute__((noinline)); - bool dependsOnInvoke(Value* V) __attribute__((noinline)); + void val_insert(SmallPtrSet& s, Value* v); + void val_replace(SmallPtrSet& s, Value* v); + bool dependsOnInvoke(Value* V); void buildsets_availout(BasicBlock::iterator I, SmallPtrSet& currAvail, SmallPtrSet& currPhis, SmallPtrSet& currExps, SmallPtrSet& currTemps, BitVector& availNumbers, - BitVector& expNumbers) __attribute__((noinline)); + BitVector& expNumbers); bool buildsets_anticout(BasicBlock* BB, SmallPtrSet& anticOut, - std::set& visited) __attribute__((noinline)); + std::set& visited); unsigned buildsets_anticin(BasicBlock* BB, SmallPtrSet& anticOut, SmallPtrSet& currExps, SmallPtrSet& currTemps, - std::set& visited) __attribute__((noinline)); - unsigned buildsets(Function& F) __attribute__((noinline)); + std::set& visited); + unsigned buildsets(Function& F); void insertion_pre(Value* e, BasicBlock* BB, std::map& avail, - SmallPtrSet& new_set) __attribute__((noinline)); + SmallPtrSet& new_set); unsigned insertion_mergepoint(std::vector& workList, df_iterator& D, - SmallPtrSet& new_set) __attribute__((noinline)); - bool insertion(Function& F) __attribute__((noinline)); + SmallPtrSet& new_set); + bool insertion(Function& F); }; From clattner at apple.com Fri Jun 22 12:05:06 2007 From: clattner at apple.com (Chris Lattner) Date: Fri, 22 Jun 2007 10:05:06 -0700 Subject: [llvm-commits] CVS: llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp LegalizeDAG.cpp SelectionDAG.cpp SelectionDAGISel.cpp TargetLowering.cpp In-Reply-To: <200706221459.l5MExji1028691@zion.cs.uiuc.edu> References: <200706221459.l5MExji1028691@zion.cs.uiuc.edu> Message-ID: <6567C84E-6B8F-4610-A9CF-4692F19F861B@apple.com> On Jun 22, 2007, at 7:59 AM, Dan Gohman wrote: > Move ComputeMaskedBits, MaskedValueIsZero, and ComputeNumSignBits from > TargetLowering to SelectionDAG so that they have more convenient > access to the current DAG, in preparation for the ValueType routines > being changed from standalone functions to members of SelectionDAG for > the pre-legalize vector type changes. Ah, that's also a much more logical place for them anyway. They originally started out in DAGCombine, this is their third home :). Thanks Dan, -Chris From clattner at apple.com Fri Jun 22 12:09:58 2007 From: clattner at apple.com (Chris Lattner) Date: Fri, 22 Jun 2007 10:09:58 -0700 Subject: [llvm-commits] CVS: llvm/lib/Target/X86/X86InstrFPStack.td X86InstrInfo.cpp X86InstrInfo.h X86InstrInfo.td X86InstrMMX.td X86InstrSSE.td In-Reply-To: <20070621215621.GR5693@village.us.cray.com> References: <20070621215621.GR5693@village.us.cray.com> Message-ID: <74E967A1-557B-4774-AB85-1614C412DDCF@apple.com> >> How about this proposal (Obviously feel free to pick better names for >> these things): >> >> 1. Reintroduce the 'isremat-able' flag, set it to true for all the >> instructions that are *potentially* rematerializable. >> 2. Add a virtual target hook that can override the flag: >> "TII::isReallyRematerializable(Machineinstr*)". >> 3. Introduce a new non-virtual method: >> bool TII::isRematerializable(Machineinstr *MI) { >> return MI->flags->isrematable && isReallyRematerializable(MI); >> } > > I tried this, and got circular dependencies between libLLVMAnalysis.a, > libLLVMTarget.a, and libLLVMCodeGen.a. I think it's because the actual > code for 3. uses MachineInstr::getOpcode. It shouldn't need to, it can be an inline function that calls: MI->getInstrDescriptor()->isrematable() MachineInstrs have a direct pointer to their TargetInstrDescriptor record. They actually don't hold their opcode :) >> I'm sorry I didn't look at your patch when you asked for comments, >> but does this proposal sound sane? > > *shrug*. Adding isReMaterializable flags to all the load > instructions in > the X86 files isn't unambiguously prettier though. But I've already > strayed from my tangent here :-}. True, in the future we can add some smarts to tblgen... we already know what the loads are (from the patterns), so tblgen could do this automatically for every target. Unfortunately, the tblgen code that interprets the patterns is built into the DAGISelEmitter.cpp file. Someday we should refactor the code for interpreting the patterns out from the code that emits the isel. That way, other tblgen backends could use the patterns to autogenerate things (e.g. the "fold load into instruction" code). -Chris From resistor at mac.com Fri Jun 22 13:27:27 2007 From: resistor at mac.com (Owen Anderson) Date: Fri, 22 Jun 2007 13:27:27 -0500 Subject: [llvm-commits] CVS: llvm/lib/Transforms/Scalar/GVNPRE.cpp Message-ID: <200706221827.l5MIRRkp002247@zion.cs.uiuc.edu> Changes in directory llvm/lib/Transforms/Scalar: GVNPRE.cpp updated: 1.53 -> 1.54 --- Log message: Perform fewer set insertions while calculating ANTIC_IN. This reduces the amount of time to optimize 403.gcc from 21.9s to 18.2s. --- Diffs of the changes: (+31 -40) GVNPRE.cpp | 71 ++++++++++++++++++++++++++----------------------------------- 1 files changed, 31 insertions(+), 40 deletions(-) Index: llvm/lib/Transforms/Scalar/GVNPRE.cpp diff -u llvm/lib/Transforms/Scalar/GVNPRE.cpp:1.53 llvm/lib/Transforms/Scalar/GVNPRE.cpp:1.54 --- llvm/lib/Transforms/Scalar/GVNPRE.cpp:1.53 Fri Jun 22 12:04:40 2007 +++ llvm/lib/Transforms/Scalar/GVNPRE.cpp Fri Jun 22 13:27:04 2007 @@ -818,20 +818,20 @@ availNumbers.resize(VN.size()); if (isa(leftValue)) - if (!expNumbers.test(VN.lookup(leftValue)-1)) { + if (!expNumbers.test(VN.lookup(leftValue))) { currExps.insert(leftValue); - expNumbers.set(VN.lookup(leftValue)-1); + expNumbers.set(VN.lookup(leftValue)); } if (isa(rightValue)) - if (!expNumbers.test(VN.lookup(rightValue)-1)) { + if (!expNumbers.test(VN.lookup(rightValue))) { currExps.insert(rightValue); - expNumbers.set(VN.lookup(rightValue)-1); + expNumbers.set(VN.lookup(rightValue)); } - if (!expNumbers.test(VN.lookup(BO)-1)) { + if (!expNumbers.test(VN.lookup(BO))) { currExps.insert(BO); - expNumbers.set(num-1); + expNumbers.set(num); } // Handle cmp ops... @@ -846,19 +846,19 @@ availNumbers.resize(VN.size()); if (isa(leftValue)) - if (!expNumbers.test(VN.lookup(leftValue)-1)) { + if (!expNumbers.test(VN.lookup(leftValue))) { currExps.insert(leftValue); - expNumbers.set(VN.lookup(leftValue)-1); + expNumbers.set(VN.lookup(leftValue)); } if (isa(rightValue)) - if (!expNumbers.test(VN.lookup(rightValue)-1)) { + if (!expNumbers.test(VN.lookup(rightValue))) { currExps.insert(rightValue); - expNumbers.set(VN.lookup(rightValue)-1); + expNumbers.set(VN.lookup(rightValue)); } - if (!expNumbers.test(VN.lookup(C)-1)) { + if (!expNumbers.test(VN.lookup(C))) { currExps.insert(C); - expNumbers.set(num-1); + expNumbers.set(num); } // Handle unsupported ops @@ -871,9 +871,9 @@ } if (!I->isTerminator()) - if (!availNumbers.test(VN.lookup(I)-1)) { + if (!availNumbers.test(VN.lookup(I))) { currAvail.insert(I); - availNumbers.set(VN.lookup(I)-1); + availNumbers.set(VN.lookup(I)); } } @@ -921,45 +921,36 @@ SmallPtrSet& currTemps, std::set& visited) { SmallPtrSet& anticIn = anticipatedIn[BB]; - SmallPtrSet old (anticIn.begin(), anticIn.end()); + unsigned old = anticIn.size(); bool defer = buildsets_anticout(BB, anticOut, visited); if (defer) return 0; - - SmallPtrSet S; - for (SmallPtrSet::iterator I = anticOut.begin(), - E = anticOut.end(); I != E; ++I) - if (currTemps.count(*I) == 0) - S.insert(*I); anticIn.clear(); + BitVector numbers(VN.size()); + for (SmallPtrSet::iterator I = anticOut.begin(), + E = anticOut.end(); I != E; ++I) { + anticIn.insert(*I); + numbers.set(VN.lookup_or_add(*I)); + } for (SmallPtrSet::iterator I = currExps.begin(), - E = currExps.end(); I != E; ++I) - if (currTemps.count(*I) == 0) + E = currExps.end(); I != E; ++I) { + if (!numbers.test(VN.lookup_or_add(*I))) { anticIn.insert(*I); + numbers.set(VN.lookup(*I)); + } + } + + for (SmallPtrSet::iterator I = currTemps.begin(), + E = currTemps.end(); I != E; ++I) + anticIn.erase(*I); - BitVector numbers(VN.size()); - for (SmallPtrSet::iterator I = anticIn.begin(), - E = anticIn.end(); I != E; ++I) - numbers.set(VN.lookup(*I)-1); - for (SmallPtrSet::iterator I = S.begin(), E = S.end(); - I != E; ++I) { - // For non-opaque values, we should already have a value numbering. - // However, for opaques, such as constants within PHI nodes, it is - // possible that they have not yet received a number. Make sure they do - // so now. - if (!isa(*I) && !isa(*I)) - VN.lookup_or_add(*I); - if (!numbers.test(VN.lookup(*I)-1)) - anticIn.insert(*I); - } - clean(anticIn); anticOut.clear(); - if (old.size() != anticIn.size()) + if (old != anticIn.size()) return 2; else return 1; From dpatel at apple.com Fri Jun 22 15:28:03 2007 From: dpatel at apple.com (dpatel at apple.com) Date: Fri, 22 Jun 2007 13:28:03 -0700 (PDT) Subject: [llvm-commits] [128742] Fix warning. Message-ID: <20070622202803.3E833A3C1333@src> Revision: 128742 Author: dpatel Date: 2007-06-22 13:28:02 -0700 (Fri, 22 Jun 2007) Log Message: ----------- Fix warning. Modified Paths: -------------- apple-local/branches/llvm/gcc/c-lex.c Modified: apple-local/branches/llvm/gcc/c-lex.c =================================================================== --- apple-local/branches/llvm/gcc/c-lex.c 2007-06-22 17:28:14 UTC (rev 128741) +++ apple-local/branches/llvm/gcc/c-lex.c 2007-06-22 20:28:02 UTC (rev 128742) @@ -826,7 +826,7 @@ my_cpp_num_sign_extend (cpp_num num, size_t precision) { if (num.high) - printf("%lu\n", num.high); + printf("%lu\n", (long unsigned int) num.high); if (!num.unsignedp) { if (precision > PART_PRECISION) From resistor at mac.com Fri Jun 22 16:31:41 2007 From: resistor at mac.com (Owen Anderson) Date: Fri, 22 Jun 2007 16:31:41 -0500 Subject: [llvm-commits] CVS: llvm/lib/Transforms/Scalar/GVNPRE.cpp Message-ID: <200706222131.l5MLVfHm006677@zion.cs.uiuc.edu> Changes in directory llvm/lib/Transforms/Scalar: GVNPRE.cpp updated: 1.54 -> 1.55 --- Log message: Rework topo_sort so eliminate some behavior that scaled terribly. This reduces the time to optimize 403.gcc from 18.2s to 17.5s, and has an even larger effect on larger testcases. --- Diffs of the changes: (+40 -57) GVNPRE.cpp | 97 +++++++++++++++++++++++++------------------------------------ 1 files changed, 40 insertions(+), 57 deletions(-) Index: llvm/lib/Transforms/Scalar/GVNPRE.cpp diff -u llvm/lib/Transforms/Scalar/GVNPRE.cpp:1.54 llvm/lib/Transforms/Scalar/GVNPRE.cpp:1.55 --- llvm/lib/Transforms/Scalar/GVNPRE.cpp:1.54 Fri Jun 22 13:27:04 2007 +++ llvm/lib/Transforms/Scalar/GVNPRE.cpp Fri Jun 22 16:31:16 2007 @@ -650,71 +650,54 @@ /// topo_sort - Given a set of values, sort them by topological /// order into the provided vector. void GVNPRE::topo_sort(SmallPtrSet& set, std::vector& vec) { - SmallPtrSet toErase; + SmallPtrSet visited; + std::vector stack; for (SmallPtrSet::iterator I = set.begin(), E = set.end(); I != E; ++I) { - if (BinaryOperator* BO = dyn_cast(*I)) - for (SmallPtrSet::iterator SI = set.begin(); SI != E; ++SI) { - if (VN.lookup(BO->getOperand(0)) == VN.lookup(*SI) || - VN.lookup(BO->getOperand(1)) == VN.lookup(*SI)) { - toErase.insert(*SI); + if (visited.count(*I) == 0) + stack.push_back(*I); + + while (!stack.empty()) { + Value* e = stack.back(); + + if (BinaryOperator* BO = dyn_cast(e)) { + Value* l = find_leader(set, VN.lookup(BO->getOperand(0))); + Value* r = find_leader(set, VN.lookup(BO->getOperand(1))); + + if (l != 0 && isa(l) && + visited.count(l) == 0) + stack.push_back(l); + else if (r != 0 && isa(r) && + visited.count(r) == 0) + stack.push_back(r); + else { + vec.push_back(e); + visited.insert(e); + stack.pop_back(); } - } - else if (CmpInst* C = dyn_cast(*I)) - for (SmallPtrSet::iterator SI = set.begin(); SI != E; ++SI) { - if (VN.lookup(C->getOperand(0)) == VN.lookup(*SI) || - VN.lookup(C->getOperand(1)) == VN.lookup(*SI)) { - toErase.insert(*SI); + } else if (CmpInst* C = dyn_cast(e)) { + Value* l = find_leader(set, VN.lookup(C->getOperand(0))); + Value* r = find_leader(set, VN.lookup(C->getOperand(1))); + + if (l != 0 && isa(l) && + visited.count(l) == 0) + stack.push_back(l); + else if (r != 0 && isa(r) && + visited.count(r) == 0) + stack.push_back(r); + else { + vec.push_back(e); + visited.insert(e); + stack.pop_back(); } - } - } - - std::vector Q; - for (SmallPtrSet::iterator I = set.begin(), E = set.end(); - I != E; ++I) { - if (toErase.count(*I) == 0) - Q.push_back(*I); - } - - SmallPtrSet visited; - while (!Q.empty()) { - Value* e = Q.back(); - - if (BinaryOperator* BO = dyn_cast(e)) { - Value* l = find_leader(set, VN.lookup(BO->getOperand(0))); - Value* r = find_leader(set, VN.lookup(BO->getOperand(1))); - - if (l != 0 && isa(l) && - visited.count(l) == 0) - Q.push_back(l); - else if (r != 0 && isa(r) && - visited.count(r) == 0) - Q.push_back(r); - else { - vec.push_back(e); + } else { visited.insert(e); - Q.pop_back(); - } - } else if (CmpInst* C = dyn_cast(e)) { - Value* l = find_leader(set, VN.lookup(C->getOperand(0))); - Value* r = find_leader(set, VN.lookup(C->getOperand(1))); - - if (l != 0 && isa(l) && - visited.count(l) == 0) - Q.push_back(l); - else if (r != 0 && isa(r) && - visited.count(r) == 0) - Q.push_back(r); - else { vec.push_back(e); - visited.insert(e); - Q.pop_back(); + stack.pop_back(); } - } else { - visited.insert(e); - vec.push_back(e); - Q.pop_back(); } + + stack.clear(); } } From nicholas at mxc.ca Sat Jun 23 23:36:42 2007 From: nicholas at mxc.ca (Nick Lewycky) Date: Sat, 23 Jun 2007 23:36:42 -0500 Subject: [llvm-commits] CVS: llvm/lib/Transforms/Scalar/PredicateSimplifier.cpp Message-ID: <200706240436.l5O4ag1N020264@zion.cs.uiuc.edu> Changes in directory llvm/lib/Transforms/Scalar: PredicateSimplifier.cpp updated: 1.78 -> 1.79 --- Log message: Remove use of ETForest. Also cleaned up issues around unreachable basic blocks, and optimizing within one basic block. --- Diffs of the changes: (+326 -156) PredicateSimplifier.cpp | 482 ++++++++++++++++++++++++++++++++---------------- 1 files changed, 326 insertions(+), 156 deletions(-) Index: llvm/lib/Transforms/Scalar/PredicateSimplifier.cpp diff -u llvm/lib/Transforms/Scalar/PredicateSimplifier.cpp:1.78 llvm/lib/Transforms/Scalar/PredicateSimplifier.cpp:1.79 --- llvm/lib/Transforms/Scalar/PredicateSimplifier.cpp:1.78 Mon Jun 4 18:52:59 2007 +++ llvm/lib/Transforms/Scalar/PredicateSimplifier.cpp Sat Jun 23 23:36:20 2007 @@ -92,7 +92,6 @@ #include "llvm/ADT/Statistic.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Analysis/Dominators.h" -#include "llvm/Analysis/ET-Forest.h" #include "llvm/Support/CFG.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/ConstantRange.h" @@ -103,6 +102,7 @@ #include #include #include +#include using namespace llvm; STATISTIC(NumVarsReplaced, "Number of argument substitutions"); @@ -112,6 +112,186 @@ STATISTIC(NumSnuggle , "Number of comparisons snuggled"); namespace { + class DomTreeDFS { + public: + class Node { + friend class DomTreeDFS; + public: + typedef std::vector::iterator iterator; + typedef std::vector::const_iterator const_iterator; + + unsigned getDFSNumIn() const { return DFSin; } + unsigned getDFSNumOut() const { return DFSout; } + + BasicBlock *getBlock() const { return BB; } + + iterator begin() { return Children.begin(); } + iterator end() { return Children.end(); } + + const_iterator begin() const { return Children.begin(); } + const_iterator end() const { return Children.end(); } + + bool dominates(const Node *N) const { + return DFSin <= N->DFSin && DFSout >= N->DFSout; + } + + bool DominatedBy(const Node *N) const { + return N->dominates(this); + } + + /// Sorts by the number of descendants. With this, you can iterate + /// through a sorted list and the first matching entry is the most + /// specific match for your basic block. The order provided is stable; + /// DomTreeDFS::Nodes with the same number of descendants are sorted by + /// DFS in number. + bool operator<(const Node &N) const { + unsigned spread = DFSout - DFSin; + unsigned N_spread = N.DFSout - N.DFSin; + if (spread == N_spread) return DFSin < N.DFSin; + else return DFSout - DFSin < N.DFSout - N.DFSin; + } + bool operator>(const Node &N) const { return N < *this; } + + private: + unsigned DFSin, DFSout; + BasicBlock *BB; + + std::vector Children; + }; + + // XXX: this may be slow. Instead of using "new" for each node, consider + // putting them in a vector to keep them contiguous. + explicit DomTreeDFS(DominatorTree *DT) { + std::stack > S; + + Entry = new Node; + Entry->BB = DT->getRootNode()->getBlock(); + S.push(std::make_pair(Entry, DT->getRootNode())); + + NodeMap[Entry->BB] = Entry; + + while (!S.empty()) { + std::pair &Pair = S.top(); + Node *N = Pair.first; + DomTreeNode *DTNode = Pair.second; + S.pop(); + + for (DomTreeNode::iterator I = DTNode->begin(), E = DTNode->end(); + I != E; ++I) { + Node *NewNode = new Node; + NewNode->BB = (*I)->getBlock(); + N->Children.push_back(NewNode); + S.push(std::make_pair(NewNode, *I)); + + NodeMap[NewNode->BB] = NewNode; + } + } + + renumber(); + +#ifndef NDEBUG + DEBUG(dump()); +#endif + } + +#ifndef NDEBUG + virtual +#endif + ~DomTreeDFS() { + std::stack S; + + S.push(Entry); + while (!S.empty()) { + Node *N = S.top(); S.pop(); + + for (Node::iterator I = N->begin(), E = N->end(); I != E; ++I) + S.push(*I); + + delete N; + } + } + + Node *getRootNode() const { return Entry; } + + Node *getNodeForBlock(BasicBlock *BB) const { + if (!NodeMap.count(BB)) return 0; + else return const_cast(this)->NodeMap[BB]; + } + + bool dominates(Instruction *I1, Instruction *I2) { + BasicBlock *BB1 = I1->getParent(), + *BB2 = I2->getParent(); + if (BB1 == BB2) { + if (isa(I1)) return false; + if (isa(I2)) return true; + if ( isa(I1) && !isa(I2)) return true; + if (!isa(I1) && isa(I2)) return false; + + for (BasicBlock::const_iterator I = BB2->begin(), E = BB2->end(); + I != E; ++I) { + if (&*I == I1) return true; + else if (&*I == I2) return false; + } + assert(!"Instructions not found in parent BasicBlock?"); + } else { + Node *Node1 = getNodeForBlock(BB1), + *Node2 = getNodeForBlock(BB2); + if (!Node1 || !Node2) return false; + return Node1->dominates(Node2); + } + } + private: + void renumber() { + std::stack > S; + unsigned n = 0; + + Entry->DFSin = ++n; + S.push(std::make_pair(Entry, Entry->begin())); + + while (!S.empty()) { + std::pair &Pair = S.top(); + Node *N = Pair.first; + Node::iterator &I = Pair.second; + + if (I == N->end()) { + N->DFSout = ++n; + S.pop(); + } else { + Node *Next = *I++; + Next->DFSin = ++n; + S.push(std::make_pair(Next, Next->begin())); + } + } + } + +#ifndef NDEBUG + virtual void dump() const { + dump(*cerr.stream()); + } + + void dump(std::ostream &os) const { + os << "Predicate simplifier DomTreeDFS: \n"; + dump(Entry, 0, os); + os << "\n\n"; + } + + void dump(Node *N, int depth, std::ostream &os) const { + ++depth; + for (int i = 0; i < depth; ++i) { os << " "; } + os << "[" << depth << "] "; + + os << N->getBlock()->getName() << " (" << N->getDFSNumIn() + << ", " << N->getDFSNumOut() << ")\n"; + + for (Node::iterator I = N->begin(), E = N->end(); I != E; ++I) + dump(*I, depth, os); + } +#endif + + Node *Entry; + std::map NodeMap; + }; + // SLT SGT ULT UGT EQ // 0 1 0 1 0 -- GT 10 // 0 1 0 1 1 -- GE 11 @@ -181,20 +361,6 @@ return Rev; } - /// This is a StrictWeakOrdering predicate that sorts ETNodes by how many - /// descendants they have. With this, you can iterate through a list sorted - /// by this operation and the first matching entry is the most specific - /// match for your basic block. The order provided is stable; ETNodes with - /// the same number of children are sorted by pointer address. - struct VISIBILITY_HIDDEN OrderByDominance { - bool operator()(const ETNode *LHS, const ETNode *RHS) const { - unsigned LHS_spread = LHS->getDFSNumOut() - LHS->getDFSNumIn(); - unsigned RHS_spread = RHS->getDFSNumOut() - RHS->getDFSNumIn(); - if (LHS_spread != RHS_spread) return LHS_spread < RHS_spread; - else return LHS < RHS; - } - }; - /// The InequalityGraph stores the relationships between values. /// Each Value in the graph is assigned to a Node. Nodes are pointer /// comparable for equality. The caller is expected to maintain the logical @@ -203,35 +369,37 @@ /// The InequalityGraph class may invalidate Node*s after any mutator call. /// @brief The InequalityGraph stores the relationships between values. class VISIBILITY_HIDDEN InequalityGraph { - ETNode *TreeRoot; + DomTreeDFS::Node *TreeRoot; InequalityGraph(); // DO NOT IMPLEMENT InequalityGraph(InequalityGraph &); // DO NOT IMPLEMENT public: - explicit InequalityGraph(ETNode *TreeRoot) : TreeRoot(TreeRoot) {} + explicit InequalityGraph(DomTreeDFS::Node *TreeRoot) : TreeRoot(TreeRoot){} class Node; /// An Edge is contained inside a Node making one end of the edge implicit /// and contains a pointer to the other end. The edge contains a lattice - /// value specifying the relationship and an ETNode specifying the root - /// in the dominator tree to which this edge applies. + /// value specifying the relationship and an DomTreeDFS::Node specifying + /// the root in the dominator tree to which this edge applies. class VISIBILITY_HIDDEN Edge { public: - Edge(unsigned T, LatticeVal V, ETNode *ST) + Edge(unsigned T, LatticeVal V, DomTreeDFS::Node *ST) : To(T), LV(V), Subtree(ST) {} unsigned To; LatticeVal LV; - ETNode *Subtree; + DomTreeDFS::Node *Subtree; bool operator<(const Edge &edge) const { if (To != edge.To) return To < edge.To; - else return OrderByDominance()(Subtree, edge.Subtree); + else return *Subtree < *edge.Subtree; } + bool operator<(unsigned to) const { return To < to; } + bool operator>(unsigned to) const { return To > to; } @@ -293,7 +461,7 @@ const_iterator begin() const { return Relations.begin(); } const_iterator end() const { return Relations.end(); } - iterator find(unsigned n, ETNode *Subtree) { + iterator find(unsigned n, DomTreeDFS::Node *Subtree) { iterator E = end(); for (iterator I = std::lower_bound(begin(), E, n); I != E && I->To == n; ++I) { @@ -303,7 +471,7 @@ return E; } - const_iterator find(unsigned n, ETNode *Subtree) const { + const_iterator find(unsigned n, DomTreeDFS::Node *Subtree) const { const_iterator E = end(); for (const_iterator I = std::lower_bound(begin(), E, n); I != E && I->To == n; ++I) { @@ -321,7 +489,7 @@ /// Updates the lattice value for a given node. Create a new entry if /// one doesn't exist, otherwise it merges the values. The new lattice /// value must not be inconsistent with any previously existing value. - void update(unsigned n, LatticeVal R, ETNode *Subtree) { + void update(unsigned n, LatticeVal R, DomTreeDFS::Node *Subtree) { assert(validPredicate(R) && "Invalid predicate."); iterator I = find(n, Subtree); if (I == end()) { @@ -360,9 +528,9 @@ struct VISIBILITY_HIDDEN NodeMapEdge { Value *V; unsigned index; - ETNode *Subtree; + DomTreeDFS::Node *Subtree; - NodeMapEdge(Value *V, unsigned index, ETNode *Subtree) + NodeMapEdge(Value *V, unsigned index, DomTreeDFS::Node *Subtree) : V(V), index(index), Subtree(Subtree) {} bool operator==(const NodeMapEdge &RHS) const { @@ -372,7 +540,7 @@ bool operator<(const NodeMapEdge &RHS) const { if (V != RHS.V) return V < RHS.V; - return OrderByDominance()(Subtree, RHS.Subtree); + else return *Subtree < *RHS.Subtree; } bool operator<(Value *RHS) const { @@ -397,7 +565,7 @@ /// Returns the node currently representing Value V, or zero if no such /// node exists. - unsigned getNode(Value *V, ETNode *Subtree) { + unsigned getNode(Value *V, DomTreeDFS::Node *Subtree) { NodeMapType::iterator E = NodeMap.end(); NodeMapEdge Edge(V, 0, Subtree); NodeMapType::iterator I = std::lower_bound(NodeMap.begin(), E, Edge); @@ -411,7 +579,7 @@ /// getOrInsertNode - always returns a valid node index, creating a node /// to match the Value if needed. - unsigned getOrInsertNode(Value *V, ETNode *Subtree) { + unsigned getOrInsertNode(Value *V, DomTreeDFS::Node *Subtree) { if (unsigned n = getNode(V, Subtree)) return n; else @@ -420,6 +588,9 @@ /// newNode - creates a new node for a given Value and returns the index. unsigned newNode(Value *V) { + assert(!isa(V) && "BBs may not be nodes."); + assert(V->getType() != Type::VoidTy && "Void node?"); + Nodes.push_back(Node(V)); NodeMapEdge MapEntry = NodeMapEdge(V, Nodes.size(), TreeRoot); @@ -432,7 +603,7 @@ /// If the Value is in the graph, return the canonical form. Otherwise, /// return the original Value. - Value *canonicalize(Value *V, ETNode *Subtree) { + Value *canonicalize(Value *V, DomTreeDFS::Node *Subtree) { if (isa(V)) return V; if (unsigned n = getNode(V, Subtree)) @@ -442,7 +613,8 @@ } /// isRelatedBy - true iff n1 op n2 - bool isRelatedBy(unsigned n1, unsigned n2, ETNode *Subtree, LatticeVal LV) { + bool isRelatedBy(unsigned n1, unsigned n2, DomTreeDFS::Node *Subtree, + LatticeVal LV) { if (n1 == n2) return LV & EQ_BIT; Node *N1 = node(n1); @@ -455,7 +627,7 @@ // The add* methods assume that your input is logically valid and may // assertion-fail or infinitely loop if you attempt a contradiction. - void addEquality(unsigned n, Value *V, ETNode *Subtree) { + void addEquality(unsigned n, Value *V, DomTreeDFS::Node *Subtree) { assert(canonicalize(node(n)->getValue(), Subtree) == node(n)->getValue() && "Node's 'canonical' choice isn't best within this subtree."); @@ -504,7 +676,7 @@ /// addInequality - Sets n1 op n2. /// It is also an error to call this on an inequality that is already true. - void addInequality(unsigned n1, unsigned n2, ETNode *Subtree, + void addInequality(unsigned n1, unsigned n2, DomTreeDFS::Node *Subtree, LatticeVal LV1) { assert(n1 != n2 && "A node can't be inequal to itself."); @@ -529,7 +701,7 @@ for (Node::iterator I = N1->begin(), E = N1->end(); I != E; ++I) { if (I->LV != NE && I->To != n2) { - ETNode *Local_Subtree = NULL; + DomTreeDFS::Node *Local_Subtree = NULL; if (Subtree->DominatedBy(I->Subtree)) Local_Subtree = Subtree; else if (I->Subtree->DominatedBy(Subtree)) @@ -565,7 +737,7 @@ for (Node::iterator I = N2->begin(), E = N2->end(); I != E; ++I) { if (I->LV != NE && I->To != n1) { - ETNode *Local_Subtree = NULL; + DomTreeDFS::Node *Local_Subtree = NULL; if (Subtree->DominatedBy(I->Subtree)) Local_Subtree = Subtree; else if (I->Subtree->DominatedBy(Subtree)) @@ -661,16 +833,16 @@ /// the scope of a rooted subtree in the dominator tree. class VISIBILITY_HIDDEN ScopedRange { public: - ScopedRange(Value *V, ConstantRange CR, ETNode *ST) + ScopedRange(Value *V, ConstantRange CR, DomTreeDFS::Node *ST) : V(V), CR(CR), Subtree(ST) {} Value *V; ConstantRange CR; - ETNode *Subtree; + DomTreeDFS::Node *Subtree; bool operator<(const ScopedRange &range) const { if (V != range.V) return V < range.V; - else return OrderByDominance()(Subtree, range.Subtree); + else return Subtree < range.Subtree; } bool operator<(const Value *value) const { @@ -697,7 +869,7 @@ iterator begin() { return Ranges.begin(); } iterator end() { return Ranges.end(); } - iterator find(Value *V, ETNode *Subtree) { + iterator find(Value *V, DomTreeDFS::Node *Subtree) { iterator E = end(); for (iterator I = std::lower_bound(begin(), E, V); I != E && I->V == V; ++I) { @@ -707,7 +879,7 @@ return E; } - void update(Value *V, ConstantRange CR, ETNode *Subtree) { + void update(Value *V, ConstantRange CR, DomTreeDFS::Node *Subtree) { assert(!CR.isEmptySet() && "Empty ConstantRange!"); if (CR.isFullSet()) return; @@ -827,7 +999,7 @@ } #ifndef NDEBUG - bool isCanonical(Value *V, ETNode *Subtree, VRPSolver *VRP); + bool isCanonical(Value *V, DomTreeDFS::Node *Subtree, VRPSolver *VRP); #endif public: @@ -838,7 +1010,8 @@ // constant it constructs the single element range, otherwise it performs // a lookup. The width W must be retrieved from typeToWidth and may not // be zero. - ConstantRange rangeFromValue(Value *V, ETNode *Subtree, uint32_t W) { + ConstantRange rangeFromValue(Value *V, DomTreeDFS::Node *Subtree, + uint32_t W) { if (ConstantInt *C = dyn_cast(V)) { return ConstantRange(C->getValue()); } else if (isa(V)) { @@ -863,7 +1036,8 @@ return 0; } - bool isRelatedBy(Value *V1, Value *V2, ETNode *Subtree, LatticeVal LV) { + bool isRelatedBy(Value *V1, Value *V2, DomTreeDFS::Node *Subtree, + LatticeVal LV) { uint32_t W = typeToWidth(V1->getType()); if (!W) return false; @@ -922,8 +1096,8 @@ VRPSolver *VRP); void markBlock(VRPSolver *VRP); - void mergeInto(Value **I, unsigned n, Value *New, ETNode *Subtree, - VRPSolver *VRP) { + void mergeInto(Value **I, unsigned n, Value *New, + DomTreeDFS::Node *Subtree, VRPSolver *VRP) { assert(isCanonical(New, Subtree, VRP) && "Best choice not canonical?"); uint32_t W = typeToWidth(New->getType()); @@ -943,8 +1117,8 @@ applyRange(New, Merged, Subtree, VRP); } - void applyRange(Value *V, const ConstantRange &CR, ETNode *Subtree, - VRPSolver *VRP) { + void applyRange(Value *V, const ConstantRange &CR, + DomTreeDFS::Node *Subtree, VRPSolver *VRP) { assert(isCanonical(V, Subtree, VRP) && "Value not canonical."); if (const APInt *I = CR.getSingleElement()) { @@ -970,7 +1144,8 @@ update(V, Merged, Subtree); } - void addNotEquals(Value *V1, Value *V2, ETNode *Subtree, VRPSolver *VRP) { + void addNotEquals(Value *V1, Value *V2, DomTreeDFS::Node *Subtree, + VRPSolver *VRP) { uint32_t W = typeToWidth(V1->getType()); if (!W) return; @@ -1024,8 +1199,8 @@ } } - void addInequality(Value *V1, Value *V2, ETNode *Subtree, LatticeVal LV, - VRPSolver *VRP) { + void addInequality(Value *V1, Value *V2, DomTreeDFS::Node *Subtree, + LatticeVal LV, VRPSolver *VRP) { assert(!isRelatedBy(V1, V2, Subtree, LV) && "Asked to do useless work."); assert(isCanonical(V1, Subtree, VRP) && "Value not canonical."); @@ -1123,7 +1298,7 @@ Value *LHS, *RHS; ICmpInst::Predicate Op; - BasicBlock *ContextBB; + BasicBlock *ContextBB; // XXX use a DomTreeDFS::Node instead Instruction *ContextInst; }; std::deque WorkList; @@ -1131,37 +1306,14 @@ InequalityGraph &IG; UnreachableBlocks &UB; ValueRanges &VR; - - ETForest *Forest; - ETNode *Top; + DomTreeDFS *DTDFS; + DomTreeDFS::Node *Top; BasicBlock *TopBB; Instruction *TopInst; bool &modified; typedef InequalityGraph::Node Node; - /// IdomI - Determines whether one Instruction dominates another. - bool IdomI(Instruction *I1, Instruction *I2) const { - BasicBlock *BB1 = I1->getParent(), - *BB2 = I2->getParent(); - if (BB1 == BB2) { - if (isa(I1)) return false; - if (isa(I2)) return true; - if (isa(I1) && !isa(I2)) return true; - if (!isa(I1) && isa(I2)) return false; - - for (BasicBlock::const_iterator I = BB1->begin(), E = BB1->end(); - I != E; ++I) { - if (&*I == I1) return true; - if (&*I == I2) return false; - } - assert(!"Instructions not found in parent BasicBlock?"); - } else { - return Forest->properlyDominates(BB1, BB2); - } - return false; - } - /// Returns true if V1 is a better canonical value than V2. bool compare(Value *V1, Value *V2) const { if (isa(V1)) @@ -1179,22 +1331,48 @@ if (!I1 || !I2) return V1->getNumUses() < V2->getNumUses(); - return IdomI(I1, I2); + return DTDFS->dominates(I1, I2); } // below - true if the Instruction is dominated by the current context // block or instruction bool below(Instruction *I) { - if (TopInst) - return IdomI(TopInst, I); - else { - ETNode *Node = Forest->getNodeForBlock(I->getParent()); - return Node->DominatedBy(Top); + BasicBlock *BB = I->getParent(); + if (TopInst && TopInst->getParent() == BB) { + if (isa(TopInst)) return false; + if (isa(I)) return true; + if ( isa(TopInst) && !isa(I)) return true; + if (!isa(TopInst) && isa(I)) return false; + + for (BasicBlock::const_iterator Iter = BB->begin(), E = BB->end(); + Iter != E; ++Iter) { + if (&*Iter == TopInst) return true; + else if (&*Iter == I) return false; + } + assert(!"Instructions not found in parent BasicBlock?"); + } else { + DomTreeDFS::Node *Node = DTDFS->getNodeForBlock(BB); + if (!Node) return false; + return Top->dominates(Node); } } + // aboveOrBelow - true if the Instruction either dominates or is dominated + // by the current context block or instruction + bool aboveOrBelow(Instruction *I) { + BasicBlock *BB = I->getParent(); + DomTreeDFS::Node *Node = DTDFS->getNodeForBlock(BB); + if (!Node) return false; + + return Top == Node || Top->dominates(Node) || Node->dominates(Top); + } + bool makeEqual(Value *V1, Value *V2) { DOUT << "makeEqual(" << *V1 << ", " << *V2 << ")\n"; + DOUT << "context is "; + if (TopInst) DOUT << "I: " << *TopInst << "\n"; + else DOUT << "BB: " << TopBB->getName() + << "(" << Top->getDFSNumIn() << ")\n"; assert(V1->getType() == V2->getType() && "Can't make two values with different types equal."); @@ -1396,8 +1574,7 @@ if (i) R = IG.node(Remove[i])->getValue(); // skip n2. if (Instruction *I2 = dyn_cast(R)) { - if (below(I2) || - Top->DominatedBy(Forest->getNodeForBlock(I2->getParent()))) + if (aboveOrBelow(I2)) defToOps(I2); } for (Value::use_iterator UI = V2->use_begin(), UE = V2->use_end(); @@ -1405,8 +1582,7 @@ Use &TheUse = UI.getUse(); ++UI; if (Instruction *I = dyn_cast(TheUse.getUser())) { - if (below(I) || - Top->DominatedBy(Forest->getNodeForBlock(I->getParent()))) + if (aboveOrBelow(I)) opsToDef(I); } } @@ -1422,8 +1598,7 @@ Value *V = TheUse.getUser(); if (!V->use_empty()) { if (Instruction *Inst = dyn_cast(V)) { - if (below(Inst) || - Top->DominatedBy(Forest->getNodeForBlock(Inst->getParent()))) + if (aboveOrBelow(Inst)) opsToDef(Inst); } } @@ -1465,27 +1640,32 @@ public: VRPSolver(InequalityGraph &IG, UnreachableBlocks &UB, ValueRanges &VR, - ETForest *Forest, bool &modified, BasicBlock *TopBB) + DomTreeDFS *DTDFS, bool &modified, BasicBlock *TopBB) : IG(IG), UB(UB), VR(VR), - Forest(Forest), - Top(Forest->getNodeForBlock(TopBB)), + DTDFS(DTDFS), + Top(DTDFS->getNodeForBlock(TopBB)), TopBB(TopBB), TopInst(NULL), - modified(modified) {} + modified(modified) + { + assert(Top && "VRPSolver created for unreachable basic block."); + } VRPSolver(InequalityGraph &IG, UnreachableBlocks &UB, ValueRanges &VR, - ETForest *Forest, bool &modified, Instruction *TopInst) + DomTreeDFS *DTDFS, bool &modified, Instruction *TopInst) : IG(IG), UB(UB), VR(VR), - Forest(Forest), + DTDFS(DTDFS), + Top(DTDFS->getNodeForBlock(TopInst->getParent())), + TopBB(TopInst->getParent()), TopInst(TopInst), modified(modified) { - TopBB = TopInst->getParent(); - Top = Forest->getNodeForBlock(TopBB); + assert(Top && "VRPSolver created for unreachable basic block."); + assert(Top->getBlock() == TopInst->getParent() && "Context mismatch."); } bool isRelatedBy(Value *V1, Value *V2, ICmpInst::Predicate Pred) const { @@ -1514,7 +1694,7 @@ Instruction *I = NULL) { DOUT << "adding " << *V1 << " " << Pred << " " << *V2; if (I) DOUT << " context: " << *I; - else DOUT << " default context"; + else DOUT << " default context (" << Top->getDFSNumIn() << ")"; DOUT << "\n"; assert(V1->getType() == V2->getType() && @@ -1856,7 +2036,7 @@ Operation &O = WorkList.front(); TopInst = O.ContextInst; TopBB = O.ContextBB; - Top = Forest->getNodeForBlock(TopBB); + Top = DTDFS->getNodeForBlock(TopBB); // XXX move this into Context O.LHS = IG.canonicalize(O.LHS, Top); O.RHS = IG.canonicalize(O.RHS, Top); @@ -1933,8 +2113,7 @@ } if (Instruction *I1 = dyn_cast(O.LHS)) { - if (below(I1) || - Top->DominatedBy(Forest->getNodeForBlock(I1->getParent()))) + if (aboveOrBelow(I1)) defToOps(I1); } if (isa(O.LHS) || isa(O.LHS)) { @@ -1943,15 +2122,13 @@ Use &TheUse = UI.getUse(); ++UI; if (Instruction *I = dyn_cast(TheUse.getUser())) { - if (below(I) || - Top->DominatedBy(Forest->getNodeForBlock(I->getParent()))) + if (aboveOrBelow(I)) opsToDef(I); } } } if (Instruction *I2 = dyn_cast(O.RHS)) { - if (below(I2) || - Top->DominatedBy(Forest->getNodeForBlock(I2->getParent()))) + if (aboveOrBelow(I2)) defToOps(I2); } if (isa(O.RHS) || isa(O.RHS)) { @@ -1960,9 +2137,7 @@ Use &TheUse = UI.getUse(); ++UI; if (Instruction *I = dyn_cast(TheUse.getUser())) { - if (below(I) || - Top->DominatedBy(Forest->getNodeForBlock(I->getParent()))) - + if (aboveOrBelow(I)) opsToDef(I); } } @@ -1984,7 +2159,8 @@ } #ifndef NDEBUG - bool ValueRanges::isCanonical(Value *V, ETNode *Subtree, VRPSolver *VRP) { + bool ValueRanges::isCanonical(Value *V, DomTreeDFS::Node *Subtree, + VRPSolver *VRP) { return V == VRP->IG.canonicalize(V, Subtree); } #endif @@ -1994,14 +2170,13 @@ /// can't be equal and will solve setcc instructions when possible. /// @brief Root of the predicate simplifier optimization. class VISIBILITY_HIDDEN PredicateSimplifier : public FunctionPass { - DominatorTree *DT; - ETForest *Forest; + DomTreeDFS *DTDFS; bool modified; InequalityGraph *IG; UnreachableBlocks UB; ValueRanges *VR; - std::vector WorkList; + std::vector WorkList; public: static char ID; // Pass identification, replacement for typeid @@ -2012,7 +2187,6 @@ virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequiredID(BreakCriticalEdgesID); AU.addRequired(); - AU.addRequired(); AU.addRequired(); AU.addPreserved(); } @@ -2027,14 +2201,14 @@ class VISIBILITY_HIDDEN Forwards : public InstVisitor { friend class InstVisitor; PredicateSimplifier *PS; - DomTreeNode *DTNode; + DomTreeDFS::Node *DTNode; public: InequalityGraph &IG; UnreachableBlocks &UB; ValueRanges &VR; - Forwards(PredicateSimplifier *PS, DomTreeNode *DTNode) + Forwards(PredicateSimplifier *PS, DomTreeDFS::Node *DTNode) : PS(PS), DTNode(DTNode), IG(*PS->IG), UB(PS->UB), VR(*PS->VR) {} void visitTerminatorInst(TerminatorInst &TI); @@ -2055,31 +2229,30 @@ // Used by terminator instructions to proceed from the current basic // block to the next. Verifies that "current" dominates "next", // then calls visitBasicBlock. - void proceedToSuccessors(DomTreeNode *Current) { - for (DomTreeNode::iterator I = Current->begin(), + void proceedToSuccessors(DomTreeDFS::Node *Current) { + for (DomTreeDFS::Node::iterator I = Current->begin(), E = Current->end(); I != E; ++I) { WorkList.push_back(*I); } } - void proceedToSuccessor(DomTreeNode *Next) { + void proceedToSuccessor(DomTreeDFS::Node *Next) { WorkList.push_back(Next); } // Visits each instruction in the basic block. - void visitBasicBlock(DomTreeNode *Node) { + void visitBasicBlock(DomTreeDFS::Node *Node) { BasicBlock *BB = Node->getBlock(); - ETNode *ET = Forest->getNodeForBlock(BB); DOUT << "Entering Basic Block: " << BB->getName() - << " (" << ET->getDFSNumIn() << ")\n"; + << " (" << Node->getDFSNumIn() << ")\n"; for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;) { - visitInstruction(I++, Node, ET); + visitInstruction(I++, Node); } } // Tries to simplify each Instruction and add new properties to // the PropertySet. - void visitInstruction(Instruction *I, DomTreeNode *DT, ETNode *ET) { + void visitInstruction(Instruction *I, DomTreeDFS::Node *DT) { DOUT << "Considering instruction " << *I << "\n"; DEBUG(IG->dump()); @@ -2094,7 +2267,7 @@ #ifndef NDEBUG // Try to replace the whole instruction. - Value *V = IG->canonicalize(I, ET); + Value *V = IG->canonicalize(I, DT); assert(V == I && "Late instruction canonicalization."); if (V != I) { modified = true; @@ -2109,7 +2282,7 @@ // Try to substitute operands. for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { Value *Oper = I->getOperand(i); - Value *V = IG->canonicalize(Oper, ET); + Value *V = IG->canonicalize(Oper, DT); assert(V == Oper && "Late operand canonicalization."); if (V != Oper) { modified = true; @@ -2130,28 +2303,25 @@ }; bool PredicateSimplifier::runOnFunction(Function &F) { - DT = &getAnalysis(); - Forest = &getAnalysis(); - + DominatorTree *DT = &getAnalysis(); + DTDFS = new DomTreeDFS(DT); TargetData *TD = &getAnalysis(); - // XXX: should only act when numbers are out of date - Forest->updateDFSNumbers(); - DOUT << "Entering Function: " << F.getName() << "\n"; modified = false; - BasicBlock *RootBlock = &F.getEntryBlock(); - IG = new InequalityGraph(Forest->getNodeForBlock(RootBlock)); + DomTreeDFS::Node *Root = DTDFS->getRootNode(); + IG = new InequalityGraph(Root); VR = new ValueRanges(TD); - WorkList.push_back(DT->getRootNode()); + WorkList.push_back(Root); do { - DomTreeNode *DTNode = WorkList.back(); + DomTreeDFS::Node *DTNode = WorkList.back(); WorkList.pop_back(); if (!UB.isDead(DTNode->getBlock())) visitBasicBlock(DTNode); } while (!WorkList.empty()); + delete DTDFS; delete VR; delete IG; @@ -2179,21 +2349,21 @@ return; } - for (DomTreeNode::iterator I = DTNode->begin(), E = DTNode->end(); + for (DomTreeDFS::Node::iterator I = DTNode->begin(), E = DTNode->end(); I != E; ++I) { BasicBlock *Dest = (*I)->getBlock(); DOUT << "Branch thinking about %" << Dest->getName() - << "(" << PS->Forest->getNodeForBlock(Dest)->getDFSNumIn() << ")\n"; + << "(" << PS->DTDFS->getNodeForBlock(Dest)->getDFSNumIn() << ")\n"; if (Dest == TrueDest) { DOUT << "(" << DTNode->getBlock()->getName() << ") true set:\n"; - VRPSolver VRP(IG, UB, VR, PS->Forest, PS->modified, Dest); + VRPSolver VRP(IG, UB, VR, PS->DTDFS, PS->modified, Dest); VRP.add(ConstantInt::getTrue(), Condition, ICmpInst::ICMP_EQ); VRP.solve(); DEBUG(IG.dump()); } else if (Dest == FalseDest) { DOUT << "(" << DTNode->getBlock()->getName() << ") false set:\n"; - VRPSolver VRP(IG, UB, VR, PS->Forest, PS->modified, Dest); + VRPSolver VRP(IG, UB, VR, PS->DTDFS, PS->modified, Dest); VRP.add(ConstantInt::getFalse(), Condition, ICmpInst::ICMP_EQ); VRP.solve(); DEBUG(IG.dump()); @@ -2209,13 +2379,13 @@ // Set the EQProperty in each of the cases BBs, and the NEProperties // in the default BB. - for (DomTreeNode::iterator I = DTNode->begin(), E = DTNode->end(); + for (DomTreeDFS::Node::iterator I = DTNode->begin(), E = DTNode->end(); I != E; ++I) { BasicBlock *BB = (*I)->getBlock(); DOUT << "Switch thinking about BB %" << BB->getName() - << "(" << PS->Forest->getNodeForBlock(BB)->getDFSNumIn() << ")\n"; + << "(" << PS->DTDFS->getNodeForBlock(BB)->getDFSNumIn() << ")\n"; - VRPSolver VRP(IG, UB, VR, PS->Forest, PS->modified, BB); + VRPSolver VRP(IG, UB, VR, PS->DTDFS, PS->modified, BB); if (BB == SI.getDefaultDest()) { for (unsigned i = 1, e = SI.getNumCases(); i < e; ++i) if (SI.getSuccessor(i) != BB) @@ -2230,7 +2400,7 @@ } void PredicateSimplifier::Forwards::visitAllocaInst(AllocaInst &AI) { - VRPSolver VRP(IG, UB, VR, PS->Forest, PS->modified, &AI); + VRPSolver VRP(IG, UB, VR, PS->DTDFS, PS->modified, &AI); VRP.add(Constant::getNullValue(AI.getType()), &AI, ICmpInst::ICMP_NE); VRP.solve(); } @@ -2240,7 +2410,7 @@ // avoid "load uint* null" -> null NE null. if (isa(Ptr)) return; - VRPSolver VRP(IG, UB, VR, PS->Forest, PS->modified, &LI); + VRPSolver VRP(IG, UB, VR, PS->DTDFS, PS->modified, &LI); VRP.add(Constant::getNullValue(Ptr->getType()), Ptr, ICmpInst::ICMP_NE); VRP.solve(); } @@ -2249,13 +2419,13 @@ Value *Ptr = SI.getPointerOperand(); if (isa(Ptr)) return; - VRPSolver VRP(IG, UB, VR, PS->Forest, PS->modified, &SI); + VRPSolver VRP(IG, UB, VR, PS->DTDFS, PS->modified, &SI); VRP.add(Constant::getNullValue(Ptr->getType()), Ptr, ICmpInst::ICMP_NE); VRP.solve(); } void PredicateSimplifier::Forwards::visitSExtInst(SExtInst &SI) { - VRPSolver VRP(IG, UB, VR, PS->Forest, PS->modified, &SI); + VRPSolver VRP(IG, UB, VR, PS->DTDFS, PS->modified, &SI); uint32_t SrcBitWidth = cast(SI.getSrcTy())->getBitWidth(); uint32_t DstBitWidth = cast(SI.getDestTy())->getBitWidth(); APInt Min(APInt::getHighBitsSet(DstBitWidth, DstBitWidth-SrcBitWidth+1)); @@ -2266,7 +2436,7 @@ } void PredicateSimplifier::Forwards::visitZExtInst(ZExtInst &ZI) { - VRPSolver VRP(IG, UB, VR, PS->Forest, PS->modified, &ZI); + VRPSolver VRP(IG, UB, VR, PS->DTDFS, PS->modified, &ZI); uint32_t SrcBitWidth = cast(ZI.getSrcTy())->getBitWidth(); uint32_t DstBitWidth = cast(ZI.getDestTy())->getBitWidth(); APInt Max(APInt::getLowBitsSet(DstBitWidth, SrcBitWidth)); @@ -2284,7 +2454,7 @@ case Instruction::UDiv: case Instruction::SDiv: { Value *Divisor = BO.getOperand(1); - VRPSolver VRP(IG, UB, VR, PS->Forest, PS->modified, &BO); + VRPSolver VRP(IG, UB, VR, PS->DTDFS, PS->modified, &BO); VRP.add(Constant::getNullValue(Divisor->getType()), Divisor, ICmpInst::ICMP_NE); VRP.solve(); @@ -2295,34 +2465,34 @@ switch (ops) { default: break; case Instruction::Shl: { - VRPSolver VRP(IG, UB, VR, PS->Forest, PS->modified, &BO); + VRPSolver VRP(IG, UB, VR, PS->DTDFS, PS->modified, &BO); VRP.add(&BO, BO.getOperand(0), ICmpInst::ICMP_UGE); VRP.solve(); } break; case Instruction::AShr: { - VRPSolver VRP(IG, UB, VR, PS->Forest, PS->modified, &BO); + VRPSolver VRP(IG, UB, VR, PS->DTDFS, PS->modified, &BO); VRP.add(&BO, BO.getOperand(0), ICmpInst::ICMP_SLE); VRP.solve(); } break; case Instruction::LShr: case Instruction::UDiv: { - VRPSolver VRP(IG, UB, VR, PS->Forest, PS->modified, &BO); + VRPSolver VRP(IG, UB, VR, PS->DTDFS, PS->modified, &BO); VRP.add(&BO, BO.getOperand(0), ICmpInst::ICMP_ULE); VRP.solve(); } break; case Instruction::URem: { - VRPSolver VRP(IG, UB, VR, PS->Forest, PS->modified, &BO); + VRPSolver VRP(IG, UB, VR, PS->DTDFS, PS->modified, &BO); VRP.add(&BO, BO.getOperand(1), ICmpInst::ICMP_ULE); VRP.solve(); } break; case Instruction::And: { - VRPSolver VRP(IG, UB, VR, PS->Forest, PS->modified, &BO); + VRPSolver VRP(IG, UB, VR, PS->DTDFS, PS->modified, &BO); VRP.add(&BO, BO.getOperand(0), ICmpInst::ICMP_ULE); VRP.add(&BO, BO.getOperand(1), ICmpInst::ICMP_ULE); VRP.solve(); } break; case Instruction::Or: { - VRPSolver VRP(IG, UB, VR, PS->Forest, PS->modified, &BO); + VRPSolver VRP(IG, UB, VR, PS->DTDFS, PS->modified, &BO); VRP.add(&BO, BO.getOperand(0), ICmpInst::ICMP_UGE); VRP.add(&BO, BO.getOperand(1), ICmpInst::ICMP_UGE); VRP.solve(); @@ -2348,7 +2518,7 @@ case ICmpInst::ICMP_SGE: Pred = ICmpInst::ICMP_SGT; break; } if (Pred != IC.getPredicate()) { - VRPSolver VRP(IG, UB, VR, PS->Forest, PS->modified, &IC); + VRPSolver VRP(IG, UB, VR, PS->DTDFS, PS->modified, &IC); if (VRP.isRelatedBy(IC.getOperand(1), IC.getOperand(0), ICmpInst::ICMP_NE)) { ++NumSnuggle; @@ -2376,7 +2546,7 @@ } if (NextVal) { - VRPSolver VRP(IG, UB, VR, PS->Forest, PS->modified, &IC); + VRPSolver VRP(IG, UB, VR, PS->DTDFS, PS->modified, &IC); if (VRP.isRelatedBy(IC.getOperand(0), NextVal, ICmpInst::getInversePredicate(Pred))) { ICmpInst *NewIC = new ICmpInst(ICmpInst::ICMP_EQ, IC.getOperand(0), From nicholas at mxc.ca Sat Jun 23 23:40:38 2007 From: nicholas at mxc.ca (Nick Lewycky) Date: Sat, 23 Jun 2007 23:40:38 -0500 Subject: [llvm-commits] CVS: llvm/lib/Transforms/Scalar/PredicateSimplifier.cpp Message-ID: <200706240440.l5O4ecQx020343@zion.cs.uiuc.edu> Changes in directory llvm/lib/Transforms/Scalar: PredicateSimplifier.cpp updated: 1.79 -> 1.80 --- Log message: Remove tabs. --- Diffs of the changes: (+2 -2) PredicateSimplifier.cpp | 4 ++-- 1 files changed, 2 insertions(+), 2 deletions(-) Index: llvm/lib/Transforms/Scalar/PredicateSimplifier.cpp diff -u llvm/lib/Transforms/Scalar/PredicateSimplifier.cpp:1.79 llvm/lib/Transforms/Scalar/PredicateSimplifier.cpp:1.80 --- llvm/lib/Transforms/Scalar/PredicateSimplifier.cpp:1.79 Sat Jun 23 23:36:20 2007 +++ llvm/lib/Transforms/Scalar/PredicateSimplifier.cpp Sat Jun 23 23:40:16 2007 @@ -234,7 +234,7 @@ } assert(!"Instructions not found in parent BasicBlock?"); } else { - Node *Node1 = getNodeForBlock(BB1), + Node *Node1 = getNodeForBlock(BB1), *Node2 = getNodeForBlock(BB2); if (!Node1 || !Node2) return false; return Node1->dominates(Node2); @@ -1351,7 +1351,7 @@ } assert(!"Instructions not found in parent BasicBlock?"); } else { - DomTreeDFS::Node *Node = DTDFS->getNodeForBlock(BB); + DomTreeDFS::Node *Node = DTDFS->getNodeForBlock(BB); if (!Node) return false; return Top->dominates(Node); } From resistor at mac.com Sun Jun 24 03:18:30 2007 From: resistor at mac.com (Owen Anderson) Date: Sun, 24 Jun 2007 03:18:30 -0500 Subject: [llvm-commits] CVS: llvm/test/Transforms/GVNPRE/basic.ll Message-ID: <200706240818.l5O8IUlq000892@zion.cs.uiuc.edu> Changes in directory llvm/test/Transforms/GVNPRE: basic.ll updated: 1.2 -> 1.3 --- Log message: Rename variables to expose the fact that this test is failing. --- Diffs of the changes: (+16 -16) basic.ll | 32 ++++++++++++++++---------------- 1 files changed, 16 insertions(+), 16 deletions(-) Index: llvm/test/Transforms/GVNPRE/basic.ll diff -u llvm/test/Transforms/GVNPRE/basic.ll:1.2 llvm/test/Transforms/GVNPRE/basic.ll:1.3 --- llvm/test/Transforms/GVNPRE/basic.ll:1.2 Mon Jun 11 23:40:48 2007 +++ llvm/test/Transforms/GVNPRE/basic.ll Sun Jun 24 03:17:41 2007 @@ -1,38 +1,38 @@ -; RUN: llvm-as < %s | opt -gvnpre | llvm-dis | not grep {%t3 =} -; RUN: llvm-as < %s | opt -gvnpre | llvm-dis | not grep {%t9 =} +; RUN: llvm-as < %s | opt -gvnpre | llvm-dis | not grep {%z3 =} +; RUN: llvm-as < %s | opt -gvnpre | llvm-dis | not grep {%z9 =} define i32 @main() { block1: - %t1 = bitcast i32 0 to i32 ; [#uses=5] + %z1 = bitcast i32 0 to i32 ; [#uses=5] br label %block2 block2: ; preds = %block6, %block1 - %t2 = phi i32 [ %t1, %block1 ], [ %t3, %block6 ] ; [#uses=3] - %t3 = add i32 %t2, 1 ; [#uses=5] + %z2 = phi i32 [ %z1, %block1 ], [ %z3, %block6 ] ; [#uses=3] + %z3 = add i32 %z2, 1 ; [#uses=5] br i1 false, label %block3, label %block7 block3: ; preds = %block2 br i1 true, label %block4, label %block5 block4: ; preds = %block3 - %t4 = add i32 %t2, %t3 ; [#uses=1] - %t5 = bitcast i32 %t4 to i32 ; [#uses=1] - %t6 = add i32 %t1, %t5 ; [#uses=0] + %z4 = add i32 %z2, %z3 ; [#uses=1] + %z5 = bitcast i32 %z4 to i32 ; [#uses=1] + %z6 = add i32 %z1, %z5 ; [#uses=0] br label %block6 block5: ; preds = %block3 - %t7 = add i32 %t3, 1 ; [#uses=1] + %z7 = add i32 %z3, 1 ; [#uses=1] br label %block6 block6: ; preds = %block5, %block4 - %t8 = phi i32 [ %t1, %block4 ], [ %t7, %block5 ] ; [#uses=1] - %t9 = add i32 %t2, %t3 ; [#uses=2] - %t10 = add i32 %t9, %t8 ; [#uses=0] - %t11 = bitcast i32 12 to i32 ; [#uses=1] - %t12 = add i32 %t9, %t11 ; [#uses=1] - %t13 = add i32 %t12, %t3 ; [#uses=0] + %z8 = phi i32 [ %z1, %block4 ], [ %z7, %block5 ] ; [#uses=1] + %z9 = add i32 %z2, %z3 ; [#uses=2] + %z10 = add i32 %z9, %z8 ; [#uses=0] + %z11 = bitcast i32 12 to i32 ; [#uses=1] + %z12 = add i32 %z9, %z11 ; [#uses=1] + %z13 = add i32 %z12, %z3 ; [#uses=0] br label %block2 block7: ; preds = %block2 - ret i32 %t1 + ret i32 %z1 } From resistor at mac.com Sun Jun 24 03:42:46 2007 From: resistor at mac.com (Owen Anderson) Date: Sun, 24 Jun 2007 03:42:46 -0500 Subject: [llvm-commits] CVS: llvm/lib/Transforms/Scalar/GVNPRE.cpp Message-ID: <200706240842.l5O8gkik001317@zion.cs.uiuc.edu> Changes in directory llvm/lib/Transforms/Scalar: GVNPRE.cpp updated: 1.55 -> 1.56 --- Log message: Fix a silly mistake that was causing failures. --- Diffs of the changes: (+2 -2) GVNPRE.cpp | 4 ++-- 1 files changed, 2 insertions(+), 2 deletions(-) Index: llvm/lib/Transforms/Scalar/GVNPRE.cpp diff -u llvm/lib/Transforms/Scalar/GVNPRE.cpp:1.55 llvm/lib/Transforms/Scalar/GVNPRE.cpp:1.56 --- llvm/lib/Transforms/Scalar/GVNPRE.cpp:1.55 Fri Jun 22 16:31:16 2007 +++ llvm/lib/Transforms/Scalar/GVNPRE.cpp Sun Jun 24 03:42:24 2007 @@ -1013,8 +1013,8 @@ - unsigned ret = buildsets_anticin(BB, anticOut, generatedTemporaries[BB], - generatedExpressions[BB], visited); + unsigned ret = buildsets_anticin(BB, anticOut,generatedExpressions[BB], + generatedTemporaries[BB], visited); if (ret == 0) { changed = true; From nicholas at mxc.ca Sun Jun 24 15:14:47 2007 From: nicholas at mxc.ca (Nick Lewycky) Date: Sun, 24 Jun 2007 15:14:47 -0500 Subject: [llvm-commits] CVS: llvm/lib/Transforms/Scalar/PredicateSimplifier.cpp Message-ID: <200706242014.l5OKElKr030253@zion.cs.uiuc.edu> Changes in directory llvm/lib/Transforms/Scalar: PredicateSimplifier.cpp updated: 1.80 -> 1.81 --- Log message: Fix value ranges. --- Diffs of the changes: (+1 -1) PredicateSimplifier.cpp | 2 +- 1 files changed, 1 insertion(+), 1 deletion(-) Index: llvm/lib/Transforms/Scalar/PredicateSimplifier.cpp diff -u llvm/lib/Transforms/Scalar/PredicateSimplifier.cpp:1.80 llvm/lib/Transforms/Scalar/PredicateSimplifier.cpp:1.81 --- llvm/lib/Transforms/Scalar/PredicateSimplifier.cpp:1.80 Sat Jun 23 23:40:16 2007 +++ llvm/lib/Transforms/Scalar/PredicateSimplifier.cpp Sun Jun 24 15:14:22 2007 @@ -842,7 +842,7 @@ bool operator<(const ScopedRange &range) const { if (V != range.V) return V < range.V; - else return Subtree < range.Subtree; + else return *Subtree < *range.Subtree; } bool operator<(const Value *value) const { From romixlev at yahoo.com Sun Jun 24 16:18:06 2007 From: romixlev at yahoo.com (Roman Levenstein) Date: Sun, 24 Jun 2007 23:18:06 +0200 (CEST) Subject: [llvm-commits] [LLVMdev] BigBlock register allocator Message-ID: <50075.99914.qm@web56302.mail.re3.yahoo.com> Hi Duraid, Here is a promised patch. It makes the VigBlock regalloc faster (almost as fast as Local), removes some unused code derived from the Local regalloc and also fixes one or two bugs. More precisely: 1) InsnTimes map was removed completely 2) For each vector of VReg read occurences In VRegReadTable current index is maintained. It allows for expecting only those occurences that are in the future and avoids looking at the old references. 3) PhysRegsUseOrder is not used any more 4) There was a bug in your code when an instruction was changed due to the memory operands folding. The new instruction had always an incorrect time (i.e. 0), because it was not found in the InsnTimes map. It lead to wrong results in chooseReg(). Fixed. Duraid, I send your the whole file, since there are quite some changes. Please review it and commit if you think it is OK. I'm looking forward to getting from you some examples of very big BBs for testing. -Roman > --- Roman Levenstein schrieb: > > > Hi Duraid, > > > > > Hi everyone, > > > > > > Quick summary: > > > > > > LLVM now has a new register allocator particularly suitable for > > > compiling (very) large, machine-generated functions. > > > > Congrats! Very good job! > > > > > Longer story: > > > > > > I've recently been using LLVM in an application that involves > > JITing > > > > > > fairly large functions that have no control flow - they're just > > flat > > > sequences of instructions, anywhere from 100 to 10000+ in size. > > (The > > > control flow is all in the host program, which works out which > > > monster function to call and when.) > > > > > The default (linearscan) register allocator wasn't doing a good > > job, > > as > > > it doesn't (yet) have live range splitting. It would quickly use > > all > > > available registers and then get stuck, using only a single > > register > > > (and the stack, a lot!) for hundreds or thousands of instructions > > at > > > a time, greatly slowing (+bloating) the code. > > > > True. I'm working on the version of the linear scan based on > Wimmer's > > thesis. It supports live range splitting. I'd like to compare it > with > > yours. Do you have any good examples of those fairly large > functions > > that are just flat sequences of instructions, anywhere from 100 to > > 10000+ in size??? It would be nice, if you could send me those test > > cases (as C or ll files). I could use it then as a basis for a > > comparision and report about results. > > > > > The good news is the new "BigBlock" allocator turns out to > > > produce even better code than the local allocator when blocks > are > > very > > > large. We're talking a +10~20% speed boost on average. (If your > > basic > > > > > blocks are small, or there's not much register pressure, you'll > > > actually get the same code out of both local and BigBlock.) > > > > Do you have numbers comparing it to the current version of the > LLVM's > > linear scan? The win of your allocator over the linear scan should > be > > even better, I guess. > > > > > > > While BigBlock isn't (and never will be) as fast as the local > > > allocator, it's not much slower, doesn't use much memory, and is > > > certainly faster than linearscan. So if you're compiling very > > large, > > > (probably) machine-generated blocks of straight-line code, give > the > > > > > Local and BigBlock allocators a try, especially if you're JITing > > > things and compile time is important. > > > > I looked at your code. And I see some things that could be > > significantlty sped up, e.g. > > - InsnTimes handling. I have the feeling, this map can be > eliminated > > completely. > > - use of the VRegReadTable. The vector of read occurences can be > > shortened every time, you processed the corresponding intruction. > > This > > makes it shorter and makes searches inside this vector faster, thus > > making chooseReg much faster. Probably also some other > optimizations > > can be applied to the chooseReg function. > > - PhysRegsUseOrder - you remove some elements from the middle of > > this > > vector in removePhysReg. This is not a very efficient operation on > > the > > vectors, since it need to copy the tail of the vector. I think > using > > a > > list data-structure could be much more efficient for this purpose > > > > I think these changes may significantely improve the performance of > > your BigBlock register allocator. I'll try to come up with some > more > > concrete proposals or even patches over the week-end or next week. > > > > -Roman __________________________________________________ Do You Yahoo!? Sie sind Spam leid? Yahoo! Mail verf?gt ?ber einen herausragenden Schutz gegen Massenmails. http://mail.yahoo.com -------------- next part -------------- An embedded and charset-unspecified text was scrubbed... Name: RegAllocBigBlock.cpp Url: http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20070624/c3542125/attachment.pl