From lhames at gmail.com Mon Dec 14 00:49:42 2009 From: lhames at gmail.com (Lang Hames) Date: Mon, 14 Dec 2009 06:49:42 -0000 Subject: [llvm-commits] [llvm] r91273 - in /llvm/trunk: include/llvm/CodeGen/CalcSpillWeights.h lib/CodeGen/CalcSpillWeights.cpp lib/CodeGen/PreAllocSplitting.cpp lib/CodeGen/RegAllocLinearScan.cpp lib/CodeGen/RegAllocPBQP.cpp lib/CodeGen/SimpleRegisterCoalescing.cpp lib/CodeGen/SimpleRegisterCoalescing.h Message-ID: <200912140649.nBE6ngqC000877@zion.cs.uiuc.edu> Author: lhames Date: Mon Dec 14 00:49:42 2009 New Revision: 91273 URL: http://llvm.org/viewvc/llvm-project?rev=91273&view=rev Log: Moved spill weight calculation out of SimpleRegisterCoalescing and into its own pass: CalculateSpillWeights. Added: llvm/trunk/include/llvm/CodeGen/CalcSpillWeights.h llvm/trunk/lib/CodeGen/CalcSpillWeights.cpp Modified: llvm/trunk/lib/CodeGen/PreAllocSplitting.cpp llvm/trunk/lib/CodeGen/RegAllocLinearScan.cpp llvm/trunk/lib/CodeGen/RegAllocPBQP.cpp llvm/trunk/lib/CodeGen/SimpleRegisterCoalescing.cpp llvm/trunk/lib/CodeGen/SimpleRegisterCoalescing.h Added: llvm/trunk/include/llvm/CodeGen/CalcSpillWeights.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/CalcSpillWeights.h?rev=91273&view=auto ============================================================================== --- llvm/trunk/include/llvm/CodeGen/CalcSpillWeights.h (added) +++ llvm/trunk/include/llvm/CodeGen/CalcSpillWeights.h Mon Dec 14 00:49:42 2009 @@ -0,0 +1,39 @@ +//===---------------- lib/CodeGen/CalcSpillWeights.h ------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + + +#ifndef LLVM_CODEGEN_CALCSPILLWEIGHTS_H +#define LLVM_CODEGEN_CALCSPILLWEIGHTS_H + +#include "llvm/CodeGen/MachineFunctionPass.h" + +namespace llvm { + + class LiveInterval; + + /// CalculateSpillWeights - Compute spill weights for all virtual register + /// live intervals. + class CalculateSpillWeights : public MachineFunctionPass { + public: + static char ID; + + CalculateSpillWeights() : MachineFunctionPass(&ID) {} + + virtual void getAnalysisUsage(AnalysisUsage &au) const; + + virtual bool runOnMachineFunction(MachineFunction &fn); + + private: + /// Returns true if the given live interval is zero length. + bool isZeroLengthInterval(LiveInterval *li) const; + }; + +} + +#endif // LLVM_CODEGEN_CALCSPILLWEIGHTS_H Added: llvm/trunk/lib/CodeGen/CalcSpillWeights.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/CalcSpillWeights.cpp?rev=91273&view=auto ============================================================================== --- llvm/trunk/lib/CodeGen/CalcSpillWeights.cpp (added) +++ llvm/trunk/lib/CodeGen/CalcSpillWeights.cpp Mon Dec 14 00:49:42 2009 @@ -0,0 +1,154 @@ +//===------------------------ CalcSpillWeights.cpp ------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "calcspillweights" + +#include "llvm/Function.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/CodeGen/CalcSpillWeights.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/SlotIndexes.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" + +using namespace llvm; + +char CalculateSpillWeights::ID = 0; +static RegisterPass X("calcspillweights", + "Calculate spill weights"); + +void CalculateSpillWeights::getAnalysisUsage(AnalysisUsage &au) const { + au.addRequired(); + au.addRequired(); + au.setPreservesAll(); + MachineFunctionPass::getAnalysisUsage(au); +} + +bool CalculateSpillWeights::runOnMachineFunction(MachineFunction &fn) { + + DEBUG(errs() << "********** Compute Spill Weights **********\n" + << "********** Function: " + << fn.getFunction()->getName() << '\n'); + + LiveIntervals *lis = &getAnalysis(); + MachineLoopInfo *loopInfo = &getAnalysis(); + const TargetInstrInfo *tii = fn.getTarget().getInstrInfo(); + MachineRegisterInfo *mri = &fn.getRegInfo(); + + SmallSet processed; + for (MachineFunction::iterator mbbi = fn.begin(), mbbe = fn.end(); + mbbi != mbbe; ++mbbi) { + MachineBasicBlock* mbb = mbbi; + SlotIndex mbbEnd = lis->getMBBEndIdx(mbb); + MachineLoop* loop = loopInfo->getLoopFor(mbb); + unsigned loopDepth = loop ? loop->getLoopDepth() : 0; + bool isExiting = loop ? loop->isLoopExiting(mbb) : false; + + for (MachineBasicBlock::const_iterator mii = mbb->begin(), mie = mbb->end(); + mii != mie; ++mii) { + const MachineInstr *mi = mii; + if (tii->isIdentityCopy(*mi)) + continue; + + if (mi->getOpcode() == TargetInstrInfo::IMPLICIT_DEF) + continue; + + for (unsigned i = 0, e = mi->getNumOperands(); i != e; ++i) { + const MachineOperand &mopi = mi->getOperand(i); + if (!mopi.isReg() || mopi.getReg() == 0) + continue; + unsigned reg = mopi.getReg(); + if (!TargetRegisterInfo::isVirtualRegister(mopi.getReg())) + continue; + // Multiple uses of reg by the same instruction. It should not + // contribute to spill weight again. + if (!processed.insert(reg)) + continue; + + bool hasDef = mopi.isDef(); + bool hasUse = !hasDef; + for (unsigned j = i+1; j != e; ++j) { + const MachineOperand &mopj = mi->getOperand(j); + if (!mopj.isReg() || mopj.getReg() != reg) + continue; + hasDef |= mopj.isDef(); + hasUse |= mopj.isUse(); + if (hasDef && hasUse) + break; + } + + LiveInterval ®Int = lis->getInterval(reg); + float weight = lis->getSpillWeight(hasDef, hasUse, loopDepth); + if (hasDef && isExiting) { + // Looks like this is a loop count variable update. + SlotIndex defIdx = lis->getInstructionIndex(mi).getDefIndex(); + const LiveRange *dlr = + lis->getInterval(reg).getLiveRangeContaining(defIdx); + if (dlr->end > mbbEnd) + weight *= 3.0F; + } + regInt.weight += weight; + } + processed.clear(); + } + } + + for (LiveIntervals::iterator I = lis->begin(), E = lis->end(); I != E; ++I) { + LiveInterval &li = *I->second; + if (TargetRegisterInfo::isVirtualRegister(li.reg)) { + // If the live interval length is essentially zero, i.e. in every live + // range the use follows def immediately, it doesn't make sense to spill + // it and hope it will be easier to allocate for this li. + if (isZeroLengthInterval(&li)) { + li.weight = HUGE_VALF; + continue; + } + + bool isLoad = false; + SmallVector spillIs; + if (lis->isReMaterializable(li, spillIs, isLoad)) { + // If all of the definitions of the interval are re-materializable, + // it is a preferred candidate for spilling. If non of the defs are + // loads, then it's potentially very cheap to re-materialize. + // FIXME: this gets much more complicated once we support non-trivial + // re-materialization. + if (isLoad) + li.weight *= 0.9F; + else + li.weight *= 0.5F; + } + + // Slightly prefer live interval that has been assigned a preferred reg. + std::pair Hint = mri->getRegAllocationHint(li.reg); + if (Hint.first || Hint.second) + li.weight *= 1.01F; + + // Divide the weight of the interval by its size. This encourages + // spilling of intervals that are large and have few uses, and + // discourages spilling of small intervals with many uses. + li.weight /= lis->getApproximateInstructionCount(li) * SlotIndex::NUM; + } + } + + return false; +} + +/// Returns true if the given live interval is zero length. +bool CalculateSpillWeights::isZeroLengthInterval(LiveInterval *li) const { + for (LiveInterval::Ranges::const_iterator + i = li->ranges.begin(), e = li->ranges.end(); i != e; ++i) + if (i->end.getPrevIndex() > i->start) + return false; + return true; +} Modified: llvm/trunk/lib/CodeGen/PreAllocSplitting.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/PreAllocSplitting.cpp?rev=91273&r1=91272&r2=91273&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/PreAllocSplitting.cpp (original) +++ llvm/trunk/lib/CodeGen/PreAllocSplitting.cpp Mon Dec 14 00:49:42 2009 @@ -16,6 +16,7 @@ #define DEBUG_TYPE "pre-alloc-split" #include "VirtRegMap.h" +#include "llvm/CodeGen/CalcSpillWeights.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/LiveStackAnalysis.h" #include "llvm/CodeGen/MachineDominators.h" @@ -104,6 +105,7 @@ AU.addRequired(); AU.addPreserved(); AU.addPreserved(); + AU.addPreserved(); if (StrongPHIElim) AU.addPreservedID(StrongPHIEliminationID); else Modified: llvm/trunk/lib/CodeGen/RegAllocLinearScan.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/RegAllocLinearScan.cpp?rev=91273&r1=91272&r2=91273&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/RegAllocLinearScan.cpp (original) +++ llvm/trunk/lib/CodeGen/RegAllocLinearScan.cpp Mon Dec 14 00:49:42 2009 @@ -16,6 +16,7 @@ #include "VirtRegRewriter.h" #include "Spiller.h" #include "llvm/Function.h" +#include "llvm/CodeGen/CalcSpillWeights.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/LiveStackAnalysis.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -187,6 +188,7 @@ // Make sure PassManager knows which analyses to make available // to coalescing and which analyses coalescing invalidates. AU.addRequiredTransitive(); + AU.addRequired(); if (PreSplitIntervals) AU.addRequiredID(PreAllocSplittingID); AU.addRequired(); Modified: llvm/trunk/lib/CodeGen/RegAllocPBQP.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/RegAllocPBQP.cpp?rev=91273&r1=91272&r2=91273&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/RegAllocPBQP.cpp (original) +++ llvm/trunk/lib/CodeGen/RegAllocPBQP.cpp Mon Dec 14 00:49:42 2009 @@ -36,6 +36,7 @@ #include "PBQP/Heuristics/Briggs.h" #include "VirtRegMap.h" #include "VirtRegRewriter.h" +#include "llvm/CodeGen/CalcSpillWeights.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/LiveStackAnalysis.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -90,6 +91,7 @@ au.addRequired(); //au.addRequiredID(SplitCriticalEdgesID); au.addRequired(); + au.addRequired(); au.addRequired(); au.addPreserved(); au.addRequired(); Modified: llvm/trunk/lib/CodeGen/SimpleRegisterCoalescing.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SimpleRegisterCoalescing.cpp?rev=91273&r1=91272&r2=91273&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/SimpleRegisterCoalescing.cpp (original) +++ llvm/trunk/lib/CodeGen/SimpleRegisterCoalescing.cpp Mon Dec 14 00:49:42 2009 @@ -2622,114 +2622,6 @@ ReMatDefs.clear(); } -/// Returns true if the given live interval is zero length. -static bool isZeroLengthInterval(LiveInterval *li, LiveIntervals *li_) { - for (LiveInterval::Ranges::const_iterator - i = li->ranges.begin(), e = li->ranges.end(); i != e; ++i) - if (i->end.getPrevIndex() > i->start) - return false; - return true; -} - - -void SimpleRegisterCoalescing::CalculateSpillWeights() { - SmallSet Processed; - for (MachineFunction::iterator mbbi = mf_->begin(), mbbe = mf_->end(); - mbbi != mbbe; ++mbbi) { - MachineBasicBlock* MBB = mbbi; - SlotIndex MBBEnd = li_->getMBBEndIdx(MBB); - MachineLoop* loop = loopInfo->getLoopFor(MBB); - unsigned loopDepth = loop ? loop->getLoopDepth() : 0; - bool isExiting = loop ? loop->isLoopExiting(MBB) : false; - - for (MachineBasicBlock::const_iterator mii = MBB->begin(), mie = MBB->end(); - mii != mie; ++mii) { - const MachineInstr *MI = mii; - if (tii_->isIdentityCopy(*MI)) - continue; - - if (MI->getOpcode() == TargetInstrInfo::IMPLICIT_DEF) - continue; - - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - const MachineOperand &mopi = MI->getOperand(i); - if (!mopi.isReg() || mopi.getReg() == 0) - continue; - unsigned Reg = mopi.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(mopi.getReg())) - continue; - // Multiple uses of reg by the same instruction. It should not - // contribute to spill weight again. - if (!Processed.insert(Reg)) - continue; - - bool HasDef = mopi.isDef(); - bool HasUse = !HasDef; - for (unsigned j = i+1; j != e; ++j) { - const MachineOperand &mopj = MI->getOperand(j); - if (!mopj.isReg() || mopj.getReg() != Reg) - continue; - HasDef |= mopj.isDef(); - HasUse |= mopj.isUse(); - if (HasDef && HasUse) - break; - } - - LiveInterval &RegInt = li_->getInterval(Reg); - float Weight = li_->getSpillWeight(HasDef, HasUse, loopDepth); - if (HasDef && isExiting) { - // Looks like this is a loop count variable update. - SlotIndex DefIdx = li_->getInstructionIndex(MI).getDefIndex(); - const LiveRange *DLR = - li_->getInterval(Reg).getLiveRangeContaining(DefIdx); - if (DLR->end > MBBEnd) - Weight *= 3.0F; - } - RegInt.weight += Weight; - } - Processed.clear(); - } - } - - for (LiveIntervals::iterator I = li_->begin(), E = li_->end(); I != E; ++I) { - LiveInterval &LI = *I->second; - if (TargetRegisterInfo::isVirtualRegister(LI.reg)) { - // If the live interval length is essentially zero, i.e. in every live - // range the use follows def immediately, it doesn't make sense to spill - // it and hope it will be easier to allocate for this li. - if (isZeroLengthInterval(&LI, li_)) { - LI.weight = HUGE_VALF; - continue; - } - - bool isLoad = false; - SmallVector SpillIs; - if (li_->isReMaterializable(LI, SpillIs, isLoad)) { - // If all of the definitions of the interval are re-materializable, - // it is a preferred candidate for spilling. If non of the defs are - // loads, then it's potentially very cheap to re-materialize. - // FIXME: this gets much more complicated once we support non-trivial - // re-materialization. - if (isLoad) - LI.weight *= 0.9F; - else - LI.weight *= 0.5F; - } - - // Slightly prefer live interval that has been assigned a preferred reg. - std::pair Hint = mri_->getRegAllocationHint(LI.reg); - if (Hint.first || Hint.second) - LI.weight *= 1.01F; - - // Divide the weight of the interval by its size. This encourages - // spilling of intervals that are large and have few uses, and - // discourages spilling of small intervals with many uses. - LI.weight /= li_->getApproximateInstructionCount(LI) * InstrSlots::NUM; - } - } -} - - bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) { mf_ = &fn; mri_ = &fn.getRegInfo(); @@ -2860,8 +2752,6 @@ } } - CalculateSpillWeights(); - DEBUG(dump()); return true; } Modified: llvm/trunk/lib/CodeGen/SimpleRegisterCoalescing.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SimpleRegisterCoalescing.h?rev=91273&r1=91272&r2=91273&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/SimpleRegisterCoalescing.h (original) +++ llvm/trunk/lib/CodeGen/SimpleRegisterCoalescing.h Mon Dec 14 00:49:42 2009 @@ -244,10 +244,6 @@ MachineOperand *lastRegisterUse(SlotIndex Start, SlotIndex End, unsigned Reg, SlotIndex &LastUseIdx) const; - /// CalculateSpillWeights - Compute spill weights for all virtual register - /// live intervals. - void CalculateSpillWeights(); - void printRegName(unsigned reg) const; }; From isanbard at gmail.com Mon Dec 14 00:51:20 2009 From: isanbard at gmail.com (Bill Wendling) Date: Mon, 14 Dec 2009 06:51:20 -0000 Subject: [llvm-commits] [llvm] r91274 - in /llvm/trunk: include/llvm/Target/TargetInstrInfo.h lib/CodeGen/MachineBasicBlock.cpp lib/Target/X86/X86InstrInfo.cpp Message-ID: <200912140651.nBE6pKcZ000938@zion.cs.uiuc.edu> Author: void Date: Mon Dec 14 00:51:19 2009 New Revision: 91274 URL: http://llvm.org/viewvc/llvm-project?rev=91274&view=rev Log: Whitespace changes, comment clarification. No functional changes. Modified: llvm/trunk/include/llvm/Target/TargetInstrInfo.h llvm/trunk/lib/CodeGen/MachineBasicBlock.cpp llvm/trunk/lib/Target/X86/X86InstrInfo.cpp Modified: llvm/trunk/include/llvm/Target/TargetInstrInfo.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Target/TargetInstrInfo.h?rev=91274&r1=91273&r2=91274&view=diff ============================================================================== --- llvm/trunk/include/llvm/Target/TargetInstrInfo.h (original) +++ llvm/trunk/include/llvm/Target/TargetInstrInfo.h Mon Dec 14 00:51:19 2009 @@ -286,11 +286,10 @@ /// just return false, leaving TBB/FBB null. /// 2. If this block ends with only an unconditional branch, it sets TBB to be /// the destination block. - /// 3. If this block ends with an conditional branch and it falls through to - /// a successor block, it sets TBB to be the branch destination block and - /// a list of operands that evaluate the condition. These - /// operands can be passed to other TargetInstrInfo methods to create new - /// branches. + /// 3. If this block ends with a conditional branch and it falls through to a + /// successor block, it sets TBB to be the branch destination block and a + /// list of operands that evaluate the condition. These operands can be + /// passed to other TargetInstrInfo methods to create new branches. /// 4. If this block ends with a conditional branch followed by an /// unconditional branch, it returns the 'true' destination in TBB, the /// 'false' destination in FBB, and a list of operands that evaluate the Modified: llvm/trunk/lib/CodeGen/MachineBasicBlock.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/MachineBasicBlock.cpp?rev=91274&r1=91273&r2=91274&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/MachineBasicBlock.cpp (original) +++ llvm/trunk/lib/CodeGen/MachineBasicBlock.cpp Mon Dec 14 00:51:19 2009 @@ -483,16 +483,16 @@ MachineFunction::iterator FallThru = llvm::next(MachineFunction::iterator(this)); - // If this block ends with a conditional branch that falls through to its - // successor, set DestB as the successor. if (isCond) { + // If this block ends with a conditional branch that falls through to its + // successor, set DestB as the successor. if (DestB == 0 && FallThru != getParent()->end()) { DestB = FallThru; AddedFallThrough = true; } } else { // If this is an unconditional branch with no explicit dest, it must just be - // a fallthrough into DestB. + // a fallthrough into DestA. if (DestA == 0 && FallThru != getParent()->end()) { DestA = FallThru; AddedFallThrough = true; Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.cpp?rev=91274&r1=91273&r2=91274&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86InstrInfo.cpp (original) +++ llvm/trunk/lib/Target/X86/X86InstrInfo.cpp Mon Dec 14 00:51:19 2009 @@ -1624,14 +1624,17 @@ MachineBasicBlock::iterator I = MBB.end(); while (I != MBB.begin()) { --I; - // Working from the bottom, when we see a non-terminator - // instruction, we're done. + + // Working from the bottom, when we see a non-terminator instruction, we're + // done. if (!isBrAnalysisUnpredicatedTerminator(I, *this)) break; - // A terminator that isn't a branch can't easily be handled - // by this analysis. + + // A terminator that isn't a branch can't easily be handled by this + // analysis. if (!I->getDesc().isBranch()) return true; + // Handle unconditional branches. if (I->getOpcode() == X86::JMP) { if (!AllowModify) { @@ -1642,8 +1645,10 @@ // If the block has any instructions after a JMP, delete them. while (llvm::next(I) != MBB.end()) llvm::next(I)->eraseFromParent(); + Cond.clear(); FBB = 0; + // Delete the JMP if it's equivalent to a fall-through. if (MBB.isLayoutSuccessor(I->getOperand(0).getMBB())) { TBB = 0; @@ -1651,14 +1656,17 @@ I = MBB.end(); continue; } + // TBB is used to indicate the unconditinal destination. TBB = I->getOperand(0).getMBB(); continue; } + // Handle conditional branches. X86::CondCode BranchCode = GetCondFromBranchOpc(I->getOpcode()); if (BranchCode == X86::COND_INVALID) return true; // Can't handle indirect branch. + // Working from the bottom, handle the first conditional branch. if (Cond.empty()) { FBB = TBB; @@ -1666,24 +1674,26 @@ Cond.push_back(MachineOperand::CreateImm(BranchCode)); continue; } - // Handle subsequent conditional branches. Only handle the case - // where all conditional branches branch to the same destination - // and their condition opcodes fit one of the special - // multi-branch idioms. + + // Handle subsequent conditional branches. Only handle the case where all + // conditional branches branch to the same destination and their condition + // opcodes fit one of the special multi-branch idioms. assert(Cond.size() == 1); assert(TBB); - // Only handle the case where all conditional branches branch to - // the same destination. + + // Only handle the case where all conditional branches branch to the same + // destination. if (TBB != I->getOperand(0).getMBB()) return true; - X86::CondCode OldBranchCode = (X86::CondCode)Cond[0].getImm(); + // If the conditions are the same, we can leave them alone. + X86::CondCode OldBranchCode = (X86::CondCode)Cond[0].getImm(); if (OldBranchCode == BranchCode) continue; - // If they differ, see if they fit one of the known patterns. - // Theoretically we could handle more patterns here, but - // we shouldn't expect to see them if instruction selection - // has done a reasonable job. + + // If they differ, see if they fit one of the known patterns. Theoretically, + // we could handle more patterns here, but we shouldn't expect to see them + // if instruction selection has done a reasonable job. if ((OldBranchCode == X86::COND_NP && BranchCode == X86::COND_E) || (OldBranchCode == X86::COND_E && @@ -1696,6 +1706,7 @@ BranchCode = X86::COND_NE_OR_P; else return true; + // Update the MachineOperand. Cond[0].setImm(BranchCode); } From lhames at gmail.com Mon Dec 14 01:43:26 2009 From: lhames at gmail.com (Lang Hames) Date: Mon, 14 Dec 2009 07:43:26 -0000 Subject: [llvm-commits] [llvm] r91275 - /llvm/trunk/lib/CodeGen/CMakeLists.txt Message-ID: <200912140743.nBE7hQ3W002834@zion.cs.uiuc.edu> Author: lhames Date: Mon Dec 14 01:43:25 2009 New Revision: 91275 URL: http://llvm.org/viewvc/llvm-project?rev=91275&view=rev Log: Added CalcSpillWeights to CMakeLists. Modified: llvm/trunk/lib/CodeGen/CMakeLists.txt Modified: llvm/trunk/lib/CodeGen/CMakeLists.txt URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/CMakeLists.txt?rev=91275&r1=91274&r2=91275&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/CMakeLists.txt (original) +++ llvm/trunk/lib/CodeGen/CMakeLists.txt Mon Dec 14 01:43:25 2009 @@ -1,6 +1,7 @@ add_llvm_library(LLVMCodeGen AggressiveAntiDepBreaker.cpp BranchFolding.cpp + CalcSpillWeights.cpp CodePlacementOpt.cpp CriticalAntiDepBreaker.cpp DeadMachineInstructionElim.cpp From edwintorok at gmail.com Mon Dec 14 06:38:19 2009 From: edwintorok at gmail.com (Torok Edwin) Date: Mon, 14 Dec 2009 12:38:19 -0000 Subject: [llvm-commits] [llvm] r91276 - /llvm/trunk/lib/System/Host.cpp Message-ID: <200912141238.nBECcJlE028673@zion.cs.uiuc.edu> Author: edwin Date: Mon Dec 14 06:38:18 2009 New Revision: 91276 URL: http://llvm.org/viewvc/llvm-project?rev=91276&view=rev Log: Add "generic" fallback. gcc warned that the function may not have a return value, indeed for non-intel and non-amd X86 CPUs it is right (VIA, etc.). Modified: llvm/trunk/lib/System/Host.cpp Modified: llvm/trunk/lib/System/Host.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/System/Host.cpp?rev=91276&r1=91275&r2=91276&view=diff ============================================================================== --- llvm/trunk/lib/System/Host.cpp (original) +++ llvm/trunk/lib/System/Host.cpp Mon Dec 14 06:38:18 2009 @@ -292,6 +292,7 @@ return "generic"; } } + return "generic"; } #else std::string sys::getHostCPUName() { From ssen at apple.com Mon Dec 14 08:15:15 2009 From: ssen at apple.com (Shantonu Sen) Date: Mon, 14 Dec 2009 14:15:15 -0000 Subject: [llvm-commits] [llvm] r91277 - /llvm/trunk/test/Transforms/ScalarRepl/2009-12-11-NeonTypes.ll Message-ID: <200912141415.nBEEFFpr031877@zion.cs.uiuc.edu> Author: ssen Date: Mon Dec 14 08:15:15 2009 New Revision: 91277 URL: http://llvm.org/viewvc/llvm-project?rev=91277&view=rev Log: Remove empty file completely Removed: llvm/trunk/test/Transforms/ScalarRepl/2009-12-11-NeonTypes.ll Removed: llvm/trunk/test/Transforms/ScalarRepl/2009-12-11-NeonTypes.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/ScalarRepl/2009-12-11-NeonTypes.ll?rev=91276&view=auto ============================================================================== (empty) From ssen at apple.com Mon Dec 14 08:18:23 2009 From: ssen at apple.com (Shantonu Sen) Date: Mon, 14 Dec 2009 06:18:23 -0800 Subject: [llvm-commits] [llvm] r91268 - in /llvm/trunk: lib/Transforms/Scalar/ScalarReplAggregates.cpp test/Transforms/ScalarRepl/2009-12-11-NeonTypes.ll In-Reply-To: <200912140511.nBE5B3Uj029684@zion.cs.uiuc.edu> References: <200912140511.nBE5B3Uj029684@zion.cs.uiuc.edu> Message-ID: <05B54FC2-9B70-4DF6-94D2-9AB0B370F05D@apple.com> There was a 0-byte test case left, which caused a test suite error. File completely removed in r91277 Shantonu Sent from my MacBook UNRESOLVED: LLVM::Transforms/ScalarRepl/2009-12-11-NeonTypes.ll (4776 of 4969) ******************** TEST 'LLVM::Transforms/ScalarRepl/2009-12-11-NeonTypes.ll' FAILED ******************** Test has no run line! ******************** On Dec 13, 2009, at 9:11 PM, Chris Lattner wrote: > Author: lattner > Date: Sun Dec 13 23:11:02 2009 > New Revision: 91268 > > URL: http://llvm.org/viewvc/llvm-project?rev=91268&view=rev > Log: > revert r91184, because it causes a crash on a .bc file I just > sent to Bob. > > Modified: > llvm/trunk/lib/Transforms/Scalar/ScalarReplAggregates.cpp > llvm/trunk/test/Transforms/ScalarRepl/2009-12-11-NeonTypes.ll > > Modified: llvm/trunk/lib/Transforms/Scalar/ScalarReplAggregates.cpp > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/ScalarReplAggregates.cpp?rev=91268&r1=91267&r2=91268&view=diff > > ============================================================================== > --- llvm/trunk/lib/Transforms/Scalar/ScalarReplAggregates.cpp (original) > +++ llvm/trunk/lib/Transforms/Scalar/ScalarReplAggregates.cpp Sun Dec 13 23:11:02 2009 > @@ -102,27 +102,25 @@ > > int isSafeAllocaToScalarRepl(AllocaInst *AI); > > - void isSafeForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset, > - uint64_t ArrayOffset, AllocaInfo &Info); > - void isSafeGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t &Offset, > - uint64_t &ArrayOffset, AllocaInfo &Info); > - void isSafeMemAccess(AllocaInst *AI, uint64_t Offset, uint64_t ArrayOffset, > - uint64_t MemSize, const Type *MemOpType, bool isStore, > - AllocaInfo &Info); > - bool TypeHasComponent(const Type *T, uint64_t Offset, uint64_t Size); > - unsigned FindElementAndOffset(const Type *&T, uint64_t &Offset); > + void isSafeUseOfAllocation(Instruction *User, AllocaInst *AI, > + AllocaInfo &Info); > + void isSafeElementUse(Value *Ptr, bool isFirstElt, AllocaInst *AI, > + AllocaInfo &Info); > + void isSafeMemIntrinsicOnAllocation(MemIntrinsic *MI, AllocaInst *AI, > + unsigned OpNo, AllocaInfo &Info); > + void isSafeUseOfBitCastedAllocation(BitCastInst *User, AllocaInst *AI, > + AllocaInfo &Info); > > void DoScalarReplacement(AllocaInst *AI, > std::vector &WorkList); > void CleanupGEP(GetElementPtrInst *GEP); > - void CleanupAllocaUsers(Value *V); > + void CleanupAllocaUsers(AllocaInst *AI); > AllocaInst *AddNewAlloca(Function &F, const Type *Ty, AllocaInst *Base); > > - void RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset, > - SmallVector &NewElts); > - void RewriteGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t Offset, > - SmallVector &NewElts); > - void RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst, > + void RewriteBitCastUserOfAlloca(Instruction *BCInst, AllocaInst *AI, > + SmallVector &NewElts); > + > + void RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *BCInst, > AllocaInst *AI, > SmallVector &NewElts); > void RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI, > @@ -362,12 +360,176 @@ > } > } > > - // Now that we have created the new alloca instructions, rewrite all the > - // uses of the old alloca. > - RewriteForScalarRepl(AI, AI, 0, ElementAllocas); > + // Now that we have created the alloca instructions that we want to use, > + // expand the getelementptr instructions to use them. > + while (!AI->use_empty()) { > + Instruction *User = cast(AI->use_back()); > + if (BitCastInst *BCInst = dyn_cast(User)) { > + RewriteBitCastUserOfAlloca(BCInst, AI, ElementAllocas); > + BCInst->eraseFromParent(); > + continue; > + } > + > + // Replace: > + // %res = load { i32, i32 }* %alloc > + // with: > + // %load.0 = load i32* %alloc.0 > + // %insert.0 insertvalue { i32, i32 } zeroinitializer, i32 %load.0, 0 > + // %load.1 = load i32* %alloc.1 > + // %insert = insertvalue { i32, i32 } %insert.0, i32 %load.1, 1 > + // (Also works for arrays instead of structs) > + if (LoadInst *LI = dyn_cast(User)) { > + Value *Insert = UndefValue::get(LI->getType()); > + for (unsigned i = 0, e = ElementAllocas.size(); i != e; ++i) { > + Value *Load = new LoadInst(ElementAllocas[i], "load", LI); > + Insert = InsertValueInst::Create(Insert, Load, i, "insert", LI); > + } > + LI->replaceAllUsesWith(Insert); > + LI->eraseFromParent(); > + continue; > + } > + > + // Replace: > + // store { i32, i32 } %val, { i32, i32 }* %alloc > + // with: > + // %val.0 = extractvalue { i32, i32 } %val, 0 > + // store i32 %val.0, i32* %alloc.0 > + // %val.1 = extractvalue { i32, i32 } %val, 1 > + // store i32 %val.1, i32* %alloc.1 > + // (Also works for arrays instead of structs) > + if (StoreInst *SI = dyn_cast(User)) { > + Value *Val = SI->getOperand(0); > + for (unsigned i = 0, e = ElementAllocas.size(); i != e; ++i) { > + Value *Extract = ExtractValueInst::Create(Val, i, Val->getName(), SI); > + new StoreInst(Extract, ElementAllocas[i], SI); > + } > + SI->eraseFromParent(); > + continue; > + } > + > + GetElementPtrInst *GEPI = cast(User); > + // We now know that the GEP is of the form: GEP , 0, > + unsigned Idx = > + (unsigned)cast(GEPI->getOperand(2))->getZExtValue(); > + > + assert(Idx < ElementAllocas.size() && "Index out of range?"); > + AllocaInst *AllocaToUse = ElementAllocas[Idx]; > + > + Value *RepValue; > + if (GEPI->getNumOperands() == 3) { > + // Do not insert a new getelementptr instruction with zero indices, only > + // to have it optimized out later. > + RepValue = AllocaToUse; > + } else { > + // We are indexing deeply into the structure, so we still need a > + // getelement ptr instruction to finish the indexing. This may be > + // expanded itself once the worklist is rerun. > + // > + SmallVector NewArgs; > + NewArgs.push_back(Constant::getNullValue( > + Type::getInt32Ty(AI->getContext()))); > + NewArgs.append(GEPI->op_begin()+3, GEPI->op_end()); > + RepValue = GetElementPtrInst::Create(AllocaToUse, NewArgs.begin(), > + NewArgs.end(), "", GEPI); > + RepValue->takeName(GEPI); > + } > + > + // If this GEP is to the start of the aggregate, check for memcpys. > + if (Idx == 0 && GEPI->hasAllZeroIndices()) > + RewriteBitCastUserOfAlloca(GEPI, AI, ElementAllocas); > + > + // Move all of the users over to the new GEP. > + GEPI->replaceAllUsesWith(RepValue); > + // Delete the old GEP > + GEPI->eraseFromParent(); > + } > + > + // Finally, delete the Alloca instruction > + AI->eraseFromParent(); > NumReplaced++; > } > - > + > +/// isSafeElementUse - Check to see if this use is an allowed use for a > +/// getelementptr instruction of an array aggregate allocation. isFirstElt > +/// indicates whether Ptr is known to the start of the aggregate. > +void SROA::isSafeElementUse(Value *Ptr, bool isFirstElt, AllocaInst *AI, > + AllocaInfo &Info) { > + for (Value::use_iterator I = Ptr->use_begin(), E = Ptr->use_end(); > + I != E; ++I) { > + Instruction *User = cast(*I); > + switch (User->getOpcode()) { > + case Instruction::Load: break; > + case Instruction::Store: > + // Store is ok if storing INTO the pointer, not storing the pointer > + if (User->getOperand(0) == Ptr) return MarkUnsafe(Info); > + break; > + case Instruction::GetElementPtr: { > + GetElementPtrInst *GEP = cast(User); > + bool AreAllZeroIndices = isFirstElt; > + if (GEP->getNumOperands() > 1 && > + (!isa(GEP->getOperand(1)) || > + !cast(GEP->getOperand(1))->isZero())) > + // Using pointer arithmetic to navigate the array. > + return MarkUnsafe(Info); > + > + // Verify that any array subscripts are in range. > + for (gep_type_iterator GEPIt = gep_type_begin(GEP), > + E = gep_type_end(GEP); GEPIt != E; ++GEPIt) { > + // Ignore struct elements, no extra checking needed for these. > + if (isa(*GEPIt)) > + continue; > + > + // This GEP indexes an array. Verify that this is an in-range > + // constant integer. Specifically, consider A[0][i]. We cannot know that > + // the user isn't doing invalid things like allowing i to index an > + // out-of-range subscript that accesses A[1]. Because of this, we have > + // to reject SROA of any accesses into structs where any of the > + // components are variables. > + ConstantInt *IdxVal = dyn_cast(GEPIt.getOperand()); > + if (!IdxVal) return MarkUnsafe(Info); > + > + // Are all indices still zero? > + AreAllZeroIndices &= IdxVal->isZero(); > + > + if (const ArrayType *AT = dyn_cast(*GEPIt)) { > + if (IdxVal->getZExtValue() >= AT->getNumElements()) > + return MarkUnsafe(Info); > + } else if (const VectorType *VT = dyn_cast(*GEPIt)) { > + if (IdxVal->getZExtValue() >= VT->getNumElements()) > + return MarkUnsafe(Info); > + } > + } > + > + isSafeElementUse(GEP, AreAllZeroIndices, AI, Info); > + if (Info.isUnsafe) return; > + break; > + } > + case Instruction::BitCast: > + if (isFirstElt) { > + isSafeUseOfBitCastedAllocation(cast(User), AI, Info); > + if (Info.isUnsafe) return; > + break; > + } > + DEBUG(errs() << " Transformation preventing inst: " << *User << '\n'); > + return MarkUnsafe(Info); > + case Instruction::Call: > + if (MemIntrinsic *MI = dyn_cast(User)) { > + if (isFirstElt) { > + isSafeMemIntrinsicOnAllocation(MI, AI, I.getOperandNo(), Info); > + if (Info.isUnsafe) return; > + break; > + } > + } > + DEBUG(errs() << " Transformation preventing inst: " << *User << '\n'); > + return MarkUnsafe(Info); > + default: > + DEBUG(errs() << " Transformation preventing inst: " << *User << '\n'); > + return MarkUnsafe(Info); > + } > + } > + return; // All users look ok :) > +} > + > /// AllUsersAreLoads - Return true if all users of this value are loads. > static bool AllUsersAreLoads(Value *Ptr) { > for (Value::use_iterator I = Ptr->use_begin(), E = Ptr->use_end(); > @@ -377,116 +539,72 @@ > return true; > } > > -/// isSafeForScalarRepl - Check if instruction I is a safe use with regard to > -/// performing scalar replacement of alloca AI. The results are flagged in > -/// the Info parameter. Offset and ArrayOffset indicate the position within > -/// AI that is referenced by this instruction. > -void SROA::isSafeForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset, > - uint64_t ArrayOffset, AllocaInfo &Info) { > - for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI!=E; ++UI) { > - Instruction *User = cast(*UI); > - > - if (BitCastInst *BC = dyn_cast(User)) { > - isSafeForScalarRepl(BC, AI, Offset, ArrayOffset, Info); > - } else if (GetElementPtrInst *GEPI = dyn_cast(User)) { > - uint64_t GEPArrayOffset = ArrayOffset; > - uint64_t GEPOffset = Offset; > - isSafeGEP(GEPI, AI, GEPOffset, GEPArrayOffset, Info); > - if (!Info.isUnsafe) > - isSafeForScalarRepl(GEPI, AI, GEPOffset, GEPArrayOffset, Info); > - } else if (MemIntrinsic *MI = dyn_cast(UI)) { > - ConstantInt *Length = dyn_cast(MI->getLength()); > - if (Length) > - isSafeMemAccess(AI, Offset, ArrayOffset, Length->getZExtValue(), 0, > - UI.getOperandNo() == 1, Info); > - else > - MarkUnsafe(Info); > - } else if (LoadInst *LI = dyn_cast(User)) { > - if (!LI->isVolatile()) { > - const Type *LIType = LI->getType(); > - isSafeMemAccess(AI, Offset, ArrayOffset, TD->getTypeAllocSize(LIType), > - LIType, false, Info); > - } else > - MarkUnsafe(Info); > - } else if (StoreInst *SI = dyn_cast(User)) { > - // Store is ok if storing INTO the pointer, not storing the pointer > - if (!SI->isVolatile() && SI->getOperand(0) != I) { > - const Type *SIType = SI->getOperand(0)->getType(); > - isSafeMemAccess(AI, Offset, ArrayOffset, TD->getTypeAllocSize(SIType), > - SIType, true, Info); > - } else > - MarkUnsafe(Info); > - } else if (isa(UI)) { > - // If one user is DbgInfoIntrinsic then check if all users are > - // DbgInfoIntrinsics. > - if (OnlyUsedByDbgInfoIntrinsics(I)) { > - Info.needsCleanup = true; > - return; > - } > - MarkUnsafe(Info); > - } else { > - DEBUG(errs() << " Transformation preventing inst: " << *User << '\n'); > - MarkUnsafe(Info); > - } > - if (Info.isUnsafe) return; > - } > -} > +/// isSafeUseOfAllocation - Check if this user is an allowed use for an > +/// aggregate allocation. > +void SROA::isSafeUseOfAllocation(Instruction *User, AllocaInst *AI, > + AllocaInfo &Info) { > + if (BitCastInst *C = dyn_cast(User)) > + return isSafeUseOfBitCastedAllocation(C, AI, Info); > + > + if (LoadInst *LI = dyn_cast(User)) > + if (!LI->isVolatile()) > + return;// Loads (returning a first class aggregrate) are always rewritable > + > + if (StoreInst *SI = dyn_cast(User)) > + if (!SI->isVolatile() && SI->getOperand(0) != AI) > + return;// Store is ok if storing INTO the pointer, not storing the pointer > + > + GetElementPtrInst *GEPI = dyn_cast(User); > + if (GEPI == 0) > + return MarkUnsafe(Info); > > -/// isSafeGEP - Check if a GEP instruction can be handled for scalar > -/// replacement. It is safe when all the indices are constant, in-bounds > -/// references, and when the resulting offset corresponds to an element within > -/// the alloca type. The results are flagged in the Info parameter. Upon > -/// return, Offset is adjusted as specified by the GEP indices. For the > -/// special case of a variable index to a 2-element array, ArrayOffset is set > -/// to the array element size. > -void SROA::isSafeGEP(GetElementPtrInst *GEPI, AllocaInst *AI, > - uint64_t &Offset, uint64_t &ArrayOffset, > - AllocaInfo &Info) { > - gep_type_iterator GEPIt = gep_type_begin(GEPI), E = gep_type_end(GEPI); > - if (GEPIt == E) > - return; > + gep_type_iterator I = gep_type_begin(GEPI), E = gep_type_end(GEPI); > > - // The first GEP index must be zero. > - if (!isa(GEPIt.getOperand()) || > - !cast(GEPIt.getOperand())->isZero()) > + // The GEP is not safe to transform if not of the form "GEP , 0, ". > + if (I == E || > + I.getOperand() != Constant::getNullValue(I.getOperand()->getType())) { > return MarkUnsafe(Info); > - if (++GEPIt == E) > - return; > + } > + > + ++I; > + if (I == E) return MarkUnsafe(Info); // ran out of GEP indices?? > > + bool IsAllZeroIndices = true; > + > // If the first index is a non-constant index into an array, see if we can > // handle it as a special case. > - const Type *ArrayEltTy = 0; > - if (ArrayOffset == 0 && Offset == 0) { > - if (const ArrayType *AT = dyn_cast(*GEPIt)) { > - if (!isa(GEPIt.getOperand())) { > - uint64_t NumElements = AT->getNumElements(); > - > - // If this is an array index and the index is not constant, we cannot > - // promote... that is unless the array has exactly one or two elements > - // in it, in which case we CAN promote it, but we have to canonicalize > - // this out if this is the only problem. > - if ((NumElements != 1 && NumElements != 2) || !AllUsersAreLoads(GEPI)) > - return MarkUnsafe(Info); > + if (const ArrayType *AT = dyn_cast(*I)) { > + if (!isa(I.getOperand())) { > + IsAllZeroIndices = 0; > + uint64_t NumElements = AT->getNumElements(); > + > + // If this is an array index and the index is not constant, we cannot > + // promote... that is unless the array has exactly one or two elements in > + // it, in which case we CAN promote it, but we have to canonicalize this > + // out if this is the only problem. > + if ((NumElements == 1 || NumElements == 2) && > + AllUsersAreLoads(GEPI)) { > Info.needsCleanup = true; > - ArrayOffset = TD->getTypeAllocSizeInBits(AT->getElementType()); > - ArrayEltTy = AT->getElementType(); > - ++GEPIt; > + return; // Canonicalization required! > } > + return MarkUnsafe(Info); > } > } > - > + > // Walk through the GEP type indices, checking the types that this indexes > // into. > - for (; GEPIt != E; ++GEPIt) { > + for (; I != E; ++I) { > // Ignore struct elements, no extra checking needed for these. > - if (isa(*GEPIt)) > + if (isa(*I)) > continue; > + > + ConstantInt *IdxVal = dyn_cast(I.getOperand()); > + if (!IdxVal) return MarkUnsafe(Info); > > - ConstantInt *IdxVal = dyn_cast(GEPIt.getOperand()); > - if (!IdxVal) > - return MarkUnsafe(Info); > - > - if (const ArrayType *AT = dyn_cast(*GEPIt)) { > + // Are all indices still zero? > + IsAllZeroIndices &= IdxVal->isZero(); > + > + if (const ArrayType *AT = dyn_cast(*I)) { > // This GEP indexes an array. Verify that this is an in-range constant > // integer. Specifically, consider A[0][i]. We cannot know that the user > // isn't doing invalid things like allowing i to index an out-of-range > @@ -494,254 +612,144 @@ > // of any accesses into structs where any of the components are variables. > if (IdxVal->getZExtValue() >= AT->getNumElements()) > return MarkUnsafe(Info); > - } else { > - const VectorType *VT = dyn_cast(*GEPIt); > - assert(VT && "unexpected type in GEP type iterator"); > + } else if (const VectorType *VT = dyn_cast(*I)) { > if (IdxVal->getZExtValue() >= VT->getNumElements()) > return MarkUnsafe(Info); > } > } > - > - // All the indices are safe. Now compute the offset due to this GEP and > - // check if the alloca has a component element at that offset. > - if (ArrayOffset == 0) { > - SmallVector Indices(GEPI->op_begin() + 1, GEPI->op_end()); > - Offset += TD->getIndexedOffset(GEPI->getPointerOperandType(), > - &Indices[0], Indices.size()); > - } else { > - // Both array elements have the same type, so it suffices to check one of > - // them. Copy the GEP indices starting from the array index, but replace > - // that variable index with a constant zero. > - SmallVector Indices(GEPI->op_begin() + 2, GEPI->op_end()); > - Indices[0] = Constant::getNullValue(Type::getInt32Ty(GEPI->getContext())); > - const Type *ArrayEltPtr = PointerType::getUnqual(ArrayEltTy); > - Offset += TD->getIndexedOffset(ArrayEltPtr, &Indices[0], Indices.size()); > - } > - if (!TypeHasComponent(AI->getAllocatedType(), Offset, 0)) > - MarkUnsafe(Info); > -} > - > -/// isSafeMemAccess - Check if a load/store/memcpy operates on the entire AI > -/// alloca or has an offset and size that corresponds to a component element > -/// within it. The offset checked here may have been formed from a GEP with a > -/// pointer bitcasted to a different type. > -void SROA::isSafeMemAccess(AllocaInst *AI, uint64_t Offset, > - uint64_t ArrayOffset, uint64_t MemSize, > - const Type *MemOpType, bool isStore, > - AllocaInfo &Info) { > - // Check if this is a load/store of the entire alloca. > - if (Offset == 0 && ArrayOffset == 0 && > - MemSize == TD->getTypeAllocSize(AI->getAllocatedType())) { > - bool UsesAggregateType = (MemOpType == AI->getAllocatedType()); > - // This is safe for MemIntrinsics (where MemOpType is 0), integer types > - // (which are essentially the same as the MemIntrinsics, especially with > - // regard to copying padding between elements), or references using the > - // aggregate type of the alloca. > - if (!MemOpType || isa(MemOpType) || UsesAggregateType) { > - if (!UsesAggregateType) { > - if (isStore) > - Info.isMemCpyDst = true; > - else > - Info.isMemCpySrc = true; > - } > - return; > - } > - } > - // Check if the offset/size correspond to a component within the alloca type. > - const Type *T = AI->getAllocatedType(); > - if (TypeHasComponent(T, Offset, MemSize) && > - (ArrayOffset == 0 || TypeHasComponent(T, Offset + ArrayOffset, MemSize))) > - return; > - > - return MarkUnsafe(Info); > + > + // If there are any non-simple uses of this getelementptr, make sure to reject > + // them. > + return isSafeElementUse(GEPI, IsAllZeroIndices, AI, Info); > } > > -/// TypeHasComponent - Return true if T has a component type with the > -/// specified offset and size. If Size is zero, do not check the size. > -bool SROA::TypeHasComponent(const Type *T, uint64_t Offset, uint64_t Size) { > - const Type *EltTy; > - uint64_t EltSize; > - if (const StructType *ST = dyn_cast(T)) { > - const StructLayout *Layout = TD->getStructLayout(ST); > - unsigned EltIdx = Layout->getElementContainingOffset(Offset); > - EltTy = ST->getContainedType(EltIdx); > - EltSize = TD->getTypeAllocSize(EltTy); > - Offset -= Layout->getElementOffset(EltIdx); > - } else if (const ArrayType *AT = dyn_cast(T)) { > - EltTy = AT->getElementType(); > - EltSize = TD->getTypeAllocSize(EltTy); > - Offset %= EltSize; > - } else { > - return false; > +/// isSafeMemIntrinsicOnAllocation - Check if the specified memory > +/// intrinsic can be promoted by SROA. At this point, we know that the operand > +/// of the memintrinsic is a pointer to the beginning of the allocation. > +void SROA::isSafeMemIntrinsicOnAllocation(MemIntrinsic *MI, AllocaInst *AI, > + unsigned OpNo, AllocaInfo &Info) { > + // If not constant length, give up. > + ConstantInt *Length = dyn_cast(MI->getLength()); > + if (!Length) return MarkUnsafe(Info); > + > + // If not the whole aggregate, give up. > + if (Length->getZExtValue() != > + TD->getTypeAllocSize(AI->getType()->getElementType())) > + return MarkUnsafe(Info); > + > + // We only know about memcpy/memset/memmove. > + if (!isa(MI)) > + return MarkUnsafe(Info); > + > + // Otherwise, we can transform it. Determine whether this is a memcpy/set > + // into or out of the aggregate. > + if (OpNo == 1) > + Info.isMemCpyDst = true; > + else { > + assert(OpNo == 2); > + Info.isMemCpySrc = true; > } > - if (Offset == 0 && (Size == 0 || EltSize == Size)) > - return true; > - // Check if the component spans multiple elements. > - if (Offset + Size > EltSize) > - return false; > - return TypeHasComponent(EltTy, Offset, Size); > } > > -/// RewriteForScalarRepl - Alloca AI is being split into NewElts, so rewrite > -/// the instruction I, which references it, to use the separate elements. > -/// Offset indicates the position within AI that is referenced by this > -/// instruction. > -void SROA::RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset, > - SmallVector &NewElts) { > - for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI != E; ) { > - Instruction *User = cast(*UI++); > +/// isSafeUseOfBitCastedAllocation - Check if all users of this bitcast > +/// from an alloca are safe for SROA of that alloca. > +void SROA::isSafeUseOfBitCastedAllocation(BitCastInst *BC, AllocaInst *AI, > + AllocaInfo &Info) { > + for (Value::use_iterator UI = BC->use_begin(), E = BC->use_end(); > + UI != E; ++UI) { > + if (BitCastInst *BCU = dyn_cast(UI)) { > + isSafeUseOfBitCastedAllocation(BCU, AI, Info); > + } else if (MemIntrinsic *MI = dyn_cast(UI)) { > + isSafeMemIntrinsicOnAllocation(MI, AI, UI.getOperandNo(), Info); > + } else if (StoreInst *SI = dyn_cast(UI)) { > + if (SI->isVolatile()) > + return MarkUnsafe(Info); > + > + // If storing the entire alloca in one chunk through a bitcasted pointer > + // to integer, we can transform it. This happens (for example) when you > + // cast a {i32,i32}* to i64* and store through it. This is similar to the > + // memcpy case and occurs in various "byval" cases and emulated memcpys. > + if (isa(SI->getOperand(0)->getType()) && > + TD->getTypeAllocSize(SI->getOperand(0)->getType()) == > + TD->getTypeAllocSize(AI->getType()->getElementType())) { > + Info.isMemCpyDst = true; > + continue; > + } > + return MarkUnsafe(Info); > + } else if (LoadInst *LI = dyn_cast(UI)) { > + if (LI->isVolatile()) > + return MarkUnsafe(Info); > > - if (BitCastInst *BC = dyn_cast(User)) { > - if (BC->getOperand(0) == AI) > - BC->setOperand(0, NewElts[0]); > - // If the bitcast type now matches the operand type, it will be removed > - // after processing its uses. > - RewriteForScalarRepl(BC, AI, Offset, NewElts); > - } else if (GetElementPtrInst *GEPI = dyn_cast(User)) { > - RewriteGEP(GEPI, AI, Offset, NewElts); > - } else if (MemIntrinsic *MI = dyn_cast(User)) { > - ConstantInt *Length = dyn_cast(MI->getLength()); > - uint64_t MemSize = Length->getZExtValue(); > - if (Offset == 0 && > - MemSize == TD->getTypeAllocSize(AI->getAllocatedType())) > - RewriteMemIntrinUserOfAlloca(MI, I, AI, NewElts); > - } else if (LoadInst *LI = dyn_cast(User)) { > - const Type *LIType = LI->getType(); > - if (LIType == AI->getAllocatedType()) { > - // Replace: > - // %res = load { i32, i32 }* %alloc > - // with: > - // %load.0 = load i32* %alloc.0 > - // %insert.0 insertvalue { i32, i32 } zeroinitializer, i32 %load.0, 0 > - // %load.1 = load i32* %alloc.1 > - // %insert = insertvalue { i32, i32 } %insert.0, i32 %load.1, 1 > - // (Also works for arrays instead of structs) > - Value *Insert = UndefValue::get(LIType); > - for (unsigned i = 0, e = NewElts.size(); i != e; ++i) { > - Value *Load = new LoadInst(NewElts[i], "load", LI); > - Insert = InsertValueInst::Create(Insert, Load, i, "insert", LI); > - } > - LI->replaceAllUsesWith(Insert); > - LI->eraseFromParent(); > - } else if (isa(LIType) && > - TD->getTypeAllocSize(LIType) == > - TD->getTypeAllocSize(AI->getAllocatedType())) { > - // If this is a load of the entire alloca to an integer, rewrite it. > - RewriteLoadUserOfWholeAlloca(LI, AI, NewElts); > + // If loading the entire alloca in one chunk through a bitcasted pointer > + // to integer, we can transform it. This happens (for example) when you > + // cast a {i32,i32}* to i64* and load through it. This is similar to the > + // memcpy case and occurs in various "byval" cases and emulated memcpys. > + if (isa(LI->getType()) && > + TD->getTypeAllocSize(LI->getType()) == > + TD->getTypeAllocSize(AI->getType()->getElementType())) { > + Info.isMemCpySrc = true; > + continue; > } > - } else if (StoreInst *SI = dyn_cast(User)) { > - Value *Val = SI->getOperand(0); > - const Type *SIType = Val->getType(); > - if (SIType == AI->getAllocatedType()) { > - // Replace: > - // store { i32, i32 } %val, { i32, i32 }* %alloc > - // with: > - // %val.0 = extractvalue { i32, i32 } %val, 0 > - // store i32 %val.0, i32* %alloc.0 > - // %val.1 = extractvalue { i32, i32 } %val, 1 > - // store i32 %val.1, i32* %alloc.1 > - // (Also works for arrays instead of structs) > - for (unsigned i = 0, e = NewElts.size(); i != e; ++i) { > - Value *Extract = ExtractValueInst::Create(Val, i, Val->getName(), SI); > - new StoreInst(Extract, NewElts[i], SI); > - } > - SI->eraseFromParent(); > - } else if (isa(SIType) && > - TD->getTypeAllocSize(SIType) == > - TD->getTypeAllocSize(AI->getAllocatedType())) { > - // If this is a store of the entire alloca from an integer, rewrite it. > - RewriteStoreUserOfWholeAlloca(SI, AI, NewElts); > + return MarkUnsafe(Info); > + } else if (isa(UI)) { > + // If one user is DbgInfoIntrinsic then check if all users are > + // DbgInfoIntrinsics. > + if (OnlyUsedByDbgInfoIntrinsics(BC)) { > + Info.needsCleanup = true; > + return; > } > + else > + MarkUnsafe(Info); > } > - } > - // Delete unused instructions and identity bitcasts. > - if (I->use_empty()) > - I->eraseFromParent(); > - else if (BitCastInst *BC = dyn_cast(I)) { > - if (BC->getDestTy() == BC->getSrcTy()) { > - BC->replaceAllUsesWith(BC->getOperand(0)); > - BC->eraseFromParent(); > + else { > + return MarkUnsafe(Info); > } > + if (Info.isUnsafe) return; > } > } > > -/// FindElementAndOffset - Return the index of the element containing Offset > -/// within the specified type, which must be either a struct or an array. > -/// Sets T to the type of the element and Offset to the offset within that > -/// element. > -unsigned SROA::FindElementAndOffset(const Type *&T, uint64_t &Offset) { > - unsigned Idx = 0; > - if (const StructType *ST = dyn_cast(T)) { > - const StructLayout *Layout = TD->getStructLayout(ST); > - Idx = Layout->getElementContainingOffset(Offset); > - T = ST->getContainedType(Idx); > - Offset -= Layout->getElementOffset(Idx); > - } else { > - const ArrayType *AT = dyn_cast(T); > - assert(AT && "unexpected type for scalar replacement"); > - T = AT->getElementType(); > - uint64_t EltSize = TD->getTypeAllocSize(T); > - Idx = (unsigned)(Offset / EltSize); > - Offset -= Idx * EltSize; > - } > - return Idx; > -} > - > -/// RewriteGEP - Check if this GEP instruction moves the pointer across > -/// elements of the alloca that are being split apart, and if so, rewrite > -/// the GEP to be relative to the new element. > -void SROA::RewriteGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t Offset, > - SmallVector &NewElts) { > - Instruction *Val = GEPI; > - > - uint64_t OldOffset = Offset; > - SmallVector Indices(GEPI->op_begin() + 1, GEPI->op_end()); > - Offset += TD->getIndexedOffset(GEPI->getPointerOperandType(), > - &Indices[0], Indices.size()); > - > - const Type *T = AI->getAllocatedType(); > - unsigned OldIdx = FindElementAndOffset(T, OldOffset); > - if (GEPI->getOperand(0) == AI) > - OldIdx = ~0U; // Force the GEP to be rewritten. > - > - T = AI->getAllocatedType(); > - uint64_t EltOffset = Offset; > - unsigned Idx = FindElementAndOffset(T, EltOffset); > - > - // If this GEP moves the pointer across elements of the alloca that are > - // being split, then it needs to be rewritten. > - if (Idx != OldIdx) { > - const Type *i32Ty = Type::getInt32Ty(AI->getContext()); > - SmallVector NewArgs; > - NewArgs.push_back(Constant::getNullValue(i32Ty)); > - while (EltOffset != 0) { > - unsigned EltIdx = FindElementAndOffset(T, EltOffset); > - NewArgs.push_back(ConstantInt::get(i32Ty, EltIdx)); > - } > - if (NewArgs.size() > 1) { > - Val = GetElementPtrInst::CreateInBounds(NewElts[Idx], NewArgs.begin(), > - NewArgs.end(), "", GEPI); > - Val->takeName(GEPI); > - if (Val->getType() != GEPI->getType()) > - Val = new BitCastInst(Val, GEPI->getType(), Val->getNameStr(), GEPI); > - } else { > - Val = NewElts[Idx]; > - // Insert a new bitcast. If the types match, it will be removed after > - // handling all of its uses. > - Val = new BitCastInst(Val, GEPI->getType(), Val->getNameStr(), GEPI); > - Val->takeName(GEPI); > +/// RewriteBitCastUserOfAlloca - BCInst (transitively) bitcasts AI, or indexes > +/// to its first element. Transform users of the cast to use the new values > +/// instead. > +void SROA::RewriteBitCastUserOfAlloca(Instruction *BCInst, AllocaInst *AI, > + SmallVector &NewElts) { > + Value::use_iterator UI = BCInst->use_begin(), UE = BCInst->use_end(); > + while (UI != UE) { > + Instruction *User = cast(*UI++); > + if (BitCastInst *BCU = dyn_cast(User)) { > + RewriteBitCastUserOfAlloca(BCU, AI, NewElts); > + if (BCU->use_empty()) BCU->eraseFromParent(); > + continue; > } > > - GEPI->replaceAllUsesWith(Val); > - GEPI->eraseFromParent(); > - } > + if (MemIntrinsic *MI = dyn_cast(User)) { > + // This must be memcpy/memmove/memset of the entire aggregate. > + // Split into one per element. > + RewriteMemIntrinUserOfAlloca(MI, BCInst, AI, NewElts); > + continue; > + } > + > + if (StoreInst *SI = dyn_cast(User)) { > + // If this is a store of the entire alloca from an integer, rewrite it. > + RewriteStoreUserOfWholeAlloca(SI, AI, NewElts); > + continue; > + } > > - RewriteForScalarRepl(Val, AI, Offset, NewElts); > + if (LoadInst *LI = dyn_cast(User)) { > + // If this is a load of the entire alloca to an integer, rewrite it. > + RewriteLoadUserOfWholeAlloca(LI, AI, NewElts); > + continue; > + } > + > + // Otherwise it must be some other user of a gep of the first pointer. Just > + // leave these alone. > + continue; > + } > } > > /// RewriteMemIntrinUserOfAlloca - MI is a memcpy/memset/memmove from or to AI. > /// Rewrite it to copy or set the elements of the scalarized memory. > -void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst, > +void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *BCInst, > AllocaInst *AI, > SmallVector &NewElts) { > > @@ -753,10 +761,10 @@ > LLVMContext &Context = MI->getContext(); > unsigned MemAlignment = MI->getAlignment(); > if (MemTransferInst *MTI = dyn_cast(MI)) { // memmove/memcopy > - if (Inst == MTI->getRawDest()) > + if (BCInst == MTI->getRawDest()) > OtherPtr = MTI->getRawSource(); > else { > - assert(Inst == MTI->getRawSource()); > + assert(BCInst == MTI->getRawSource()); > OtherPtr = MTI->getRawDest(); > } > } > @@ -790,7 +798,7 @@ > // Process each element of the aggregate. > Value *TheFn = MI->getOperand(0); > const Type *BytePtrTy = MI->getRawDest()->getType(); > - bool SROADest = MI->getRawDest() == Inst; > + bool SROADest = MI->getRawDest() == BCInst; > > Constant *Zero = Constant::getNullValue(Type::getInt32Ty(MI->getContext())); > > @@ -802,9 +810,9 @@ > if (OtherPtr) { > Value *Idx[2] = { Zero, > ConstantInt::get(Type::getInt32Ty(MI->getContext()), i) }; > - OtherElt = GetElementPtrInst::CreateInBounds(OtherPtr, Idx, Idx + 2, > + OtherElt = GetElementPtrInst::Create(OtherPtr, Idx, Idx + 2, > OtherPtr->getNameStr()+"."+Twine(i), > - MI); > + MI); > uint64_t EltOffset; > const PointerType *OtherPtrTy = cast(OtherPtr->getType()); > if (const StructType *ST = > @@ -929,9 +937,15 @@ > // Extract each element out of the integer according to its structure offset > // and store the element value to the individual alloca. > Value *SrcVal = SI->getOperand(0); > - const Type *AllocaEltTy = AI->getAllocatedType(); > + const Type *AllocaEltTy = AI->getType()->getElementType(); > uint64_t AllocaSizeBits = TD->getTypeAllocSizeInBits(AllocaEltTy); > > + // If this isn't a store of an integer to the whole alloca, it may be a store > + // to the first element. Just ignore the store in this case and normal SROA > + // will handle it. > + if (!isa(SrcVal->getType()) || > + TD->getTypeAllocSizeInBits(SrcVal->getType()) != AllocaSizeBits) > + return; > // Handle tail padding by extending the operand > if (TD->getTypeSizeInBits(SrcVal->getType()) != AllocaSizeBits) > SrcVal = new ZExtInst(SrcVal, > @@ -1045,9 +1059,16 @@ > SmallVector &NewElts) { > // Extract each element out of the NewElts according to its structure offset > // and form the result value. > - const Type *AllocaEltTy = AI->getAllocatedType(); > + const Type *AllocaEltTy = AI->getType()->getElementType(); > uint64_t AllocaSizeBits = TD->getTypeAllocSizeInBits(AllocaEltTy); > > + // If this isn't a load of the whole alloca to an integer, it may be a load > + // of the first element. Just ignore the load in this case and normal SROA > + // will handle it. > + if (!isa(LI->getType()) || > + TD->getTypeAllocSizeInBits(LI->getType()) != AllocaSizeBits) > + return; > + > DEBUG(errs() << "PROMOTING LOAD OF WHOLE ALLOCA: " << *AI << '\n' << *LI > << '\n'); > > @@ -1121,6 +1142,7 @@ > LI->eraseFromParent(); > } > > + > /// HasPadding - Return true if the specified type has any structure or > /// alignment padding, false otherwise. > static bool HasPadding(const Type *Ty, const TargetData &TD) { > @@ -1170,10 +1192,14 @@ > // the users are safe to transform. > AllocaInfo Info; > > - isSafeForScalarRepl(AI, AI, 0, 0, Info); > - if (Info.isUnsafe) { > - DEBUG(errs() << "Cannot transform: " << *AI << '\n'); > - return 0; > + for (Value::use_iterator I = AI->use_begin(), E = AI->use_end(); > + I != E; ++I) { > + isSafeUseOfAllocation(cast(*I), AI, Info); > + if (Info.isUnsafe) { > + DEBUG(errs() << "Cannot transform: " << *AI << "\n due to user: " > + << **I << '\n'); > + return 0; > + } > } > > // Okay, we know all the users are promotable. If the aggregate is a memcpy > @@ -1182,7 +1208,7 @@ > // types, but may actually be used. In these cases, we refuse to promote the > // struct. > if (Info.isMemCpySrc && Info.isMemCpyDst && > - HasPadding(AI->getAllocatedType(), *TD)) > + HasPadding(AI->getType()->getElementType(), *TD)) > return 0; > > // If we require cleanup, return 1, otherwise return 3. > @@ -1219,15 +1245,15 @@ > // Insert the new GEP instructions, which are properly indexed. > SmallVector Indices(GEPI->op_begin()+1, GEPI->op_end()); > Indices[1] = Constant::getNullValue(Type::getInt32Ty(GEPI->getContext())); > - Value *ZeroIdx = GetElementPtrInst::CreateInBounds(GEPI->getOperand(0), > - Indices.begin(), > - Indices.end(), > - GEPI->getName()+".0",GEPI); > + Value *ZeroIdx = GetElementPtrInst::Create(GEPI->getOperand(0), > + Indices.begin(), > + Indices.end(), > + GEPI->getName()+".0", GEPI); > Indices[1] = ConstantInt::get(Type::getInt32Ty(GEPI->getContext()), 1); > - Value *OneIdx = GetElementPtrInst::CreateInBounds(GEPI->getOperand(0), > - Indices.begin(), > - Indices.end(), > - GEPI->getName()+".1", GEPI); > + Value *OneIdx = GetElementPtrInst::Create(GEPI->getOperand(0), > + Indices.begin(), > + Indices.end(), > + GEPI->getName()+".1", GEPI); > // Replace all loads of the variable index GEP with loads from both > // indexes and a select. > while (!GEPI->use_empty()) { > @@ -1238,24 +1264,22 @@ > LI->replaceAllUsesWith(R); > LI->eraseFromParent(); > } > + GEPI->eraseFromParent(); > } > > + > /// CleanupAllocaUsers - If SROA reported that it can promote the specified > /// allocation, but only if cleaned up, perform the cleanups required. > -void SROA::CleanupAllocaUsers(Value *V) { > +void SROA::CleanupAllocaUsers(AllocaInst *AI) { > // At this point, we know that the end result will be SROA'd and promoted, so > // we can insert ugly code if required so long as sroa+mem2reg will clean it > // up. > - for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); > + for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); > UI != E; ) { > User *U = *UI++; > - if (isa(U)) { > - CleanupAllocaUsers(U); > - } else if (GetElementPtrInst *GEPI = dyn_cast(U)) { > + if (GetElementPtrInst *GEPI = dyn_cast(U)) > CleanupGEP(GEPI); > - CleanupAllocaUsers(GEPI); > - if (GEPI->use_empty()) GEPI->eraseFromParent(); > - } else { > + else { > Instruction *I = cast(U); > SmallVector DbgInUses; > if (!isa(I) && OnlyUsedByDbgInfoIntrinsics(I, &DbgInUses)) { > @@ -1371,7 +1395,7 @@ > > // Compute the offset that this GEP adds to the pointer. > SmallVector Indices(GEP->op_begin()+1, GEP->op_end()); > - uint64_t GEPOffset = TD->getIndexedOffset(GEP->getPointerOperandType(), > + uint64_t GEPOffset = TD->getIndexedOffset(GEP->getOperand(0)->getType(), > &Indices[0], Indices.size()); > // See if all uses can be converted. > if (!CanConvertToScalar(GEP, IsNotTrivial, VecTy, SawVec,Offset+GEPOffset, > @@ -1433,7 +1457,7 @@ > if (GetElementPtrInst *GEP = dyn_cast(User)) { > // Compute the offset that this GEP adds to the pointer. > SmallVector Indices(GEP->op_begin()+1, GEP->op_end()); > - uint64_t GEPOffset = TD->getIndexedOffset(GEP->getPointerOperandType(), > + uint64_t GEPOffset = TD->getIndexedOffset(GEP->getOperand(0)->getType(), > &Indices[0], Indices.size()); > ConvertUsesToScalar(GEP, NewAI, Offset+GEPOffset*8); > GEP->eraseFromParent(); > > Modified: llvm/trunk/test/Transforms/ScalarRepl/2009-12-11-NeonTypes.ll > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/ScalarRepl/2009-12-11-NeonTypes.ll?rev=91268&r1=91267&r2=91268&view=diff > > ============================================================================== > --- llvm/trunk/test/Transforms/ScalarRepl/2009-12-11-NeonTypes.ll (original) > +++ llvm/trunk/test/Transforms/ScalarRepl/2009-12-11-NeonTypes.ll Sun Dec 13 23:11:02 2009 > @@ -1,68 +0,0 @@ > -; RUN: opt < %s -scalarrepl -S | FileCheck %s > -; Radar 7441282 > - > -target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32" > -target triple = "thumbv7-apple-darwin10" > - > -%struct.__neon_int16x8x2_t = type { <8 x i16>, <8 x i16> } > -%struct.int16x8_t = type { <8 x i16> } > -%struct.int16x8x2_t = type { [2 x %struct.int16x8_t] } > -%union..0anon = type { %struct.int16x8x2_t } > - > -define arm_apcscc void @test(<8 x i16> %tmp.0, %struct.int16x8x2_t* %dst) nounwind { > -; CHECK: @test > -; CHECK-NOT: alloca > -; CHECK: "alloca point" > -entry: > - %tmp_addr = alloca %struct.int16x8_t ; <%struct.int16x8_t*> [#uses=3] > - %dst_addr = alloca %struct.int16x8x2_t* ; <%struct.int16x8x2_t**> [#uses=2] > - %__rv = alloca %union..0anon ; <%union..0anon*> [#uses=2] > - %__bx = alloca %struct.int16x8_t ; <%struct.int16x8_t*> [#uses=2] > - %__ax = alloca %struct.int16x8_t ; <%struct.int16x8_t*> [#uses=2] > - %tmp2 = alloca %struct.int16x8x2_t ; <%struct.int16x8x2_t*> [#uses=2] > - %0 = alloca %struct.int16x8x2_t ; <%struct.int16x8x2_t*> [#uses=2] > - %"alloca point" = bitcast i32 0 to i32 ; [#uses=0] > - %1 = getelementptr inbounds %struct.int16x8_t* %tmp_addr, i32 0, i32 0 ; <<8 x i16>*> [#uses=1] > - store <8 x i16> %tmp.0, <8 x i16>* %1 > - store %struct.int16x8x2_t* %dst, %struct.int16x8x2_t** %dst_addr > - %2 = getelementptr inbounds %struct.int16x8_t* %__ax, i32 0, i32 0 ; <<8 x i16>*> [#uses=1] > - %3 = getelementptr inbounds %struct.int16x8_t* %tmp_addr, i32 0, i32 0 ; <<8 x i16>*> [#uses=1] > - %4 = load <8 x i16>* %3, align 16 ; <<8 x i16>> [#uses=1] > - store <8 x i16> %4, <8 x i16>* %2, align 16 > - %5 = getelementptr inbounds %struct.int16x8_t* %__bx, i32 0, i32 0 ; <<8 x i16>*> [#uses=1] > - %6 = getelementptr inbounds %struct.int16x8_t* %tmp_addr, i32 0, i32 0 ; <<8 x i16>*> [#uses=1] > - %7 = load <8 x i16>* %6, align 16 ; <<8 x i16>> [#uses=1] > - store <8 x i16> %7, <8 x i16>* %5, align 16 > - %8 = getelementptr inbounds %struct.int16x8_t* %__ax, i32 0, i32 0 ; <<8 x i16>*> [#uses=1] > - %9 = load <8 x i16>* %8, align 16 ; <<8 x i16>> [#uses=2] > - %10 = getelementptr inbounds %struct.int16x8_t* %__bx, i32 0, i32 0 ; <<8 x i16>*> [#uses=1] > - %11 = load <8 x i16>* %10, align 16 ; <<8 x i16>> [#uses=2] > - %12 = getelementptr inbounds %union..0anon* %__rv, i32 0, i32 0 ; <%struct.int16x8x2_t*> [#uses=1] > - %13 = bitcast %struct.int16x8x2_t* %12 to %struct.__neon_int16x8x2_t* ; <%struct.__neon_int16x8x2_t*> [#uses=2] > - %14 = shufflevector <8 x i16> %9, <8 x i16> %11, <8 x i32> ; <<8 x i16>> [#uses=1] > - %15 = getelementptr inbounds %struct.__neon_int16x8x2_t* %13, i32 0, i32 0 ; <<8 x i16>*> [#uses=1] > - store <8 x i16> %14, <8 x i16>* %15 > - %16 = shufflevector <8 x i16> %9, <8 x i16> %11, <8 x i32> ; <<8 x i16>> [#uses=1] > - %17 = getelementptr inbounds %struct.__neon_int16x8x2_t* %13, i32 0, i32 1 ; <<8 x i16>*> [#uses=1] > - store <8 x i16> %16, <8 x i16>* %17 > - %18 = getelementptr inbounds %union..0anon* %__rv, i32 0, i32 0 ; <%struct.int16x8x2_t*> [#uses=1] > - %19 = bitcast %struct.int16x8x2_t* %0 to i8* ; [#uses=1] > - %20 = bitcast %struct.int16x8x2_t* %18 to i8* ; [#uses=1] > - call void @llvm.memcpy.i32(i8* %19, i8* %20, i32 32, i32 16) > - %tmp21 = bitcast %struct.int16x8x2_t* %tmp2 to i8* ; [#uses=1] > - %21 = bitcast %struct.int16x8x2_t* %0 to i8* ; [#uses=1] > - call void @llvm.memcpy.i32(i8* %tmp21, i8* %21, i32 32, i32 16) > - %22 = load %struct.int16x8x2_t** %dst_addr, align 4 ; <%struct.int16x8x2_t*> [#uses=1] > - %23 = bitcast %struct.int16x8x2_t* %22 to i8* ; [#uses=1] > - %tmp22 = bitcast %struct.int16x8x2_t* %tmp2 to i8* ; [#uses=1] > - call void @llvm.memcpy.i32(i8* %23, i8* %tmp22, i32 32, i32 16) > - br label %return > - > -; CHECK: store <8 x i16> > -; CHECK: store <8 x i16> > - > -return: ; preds = %entry > - ret void > -} > - > -declare void @llvm.memcpy.i32(i8* nocapture, i8* nocapture, i32, i32) nounwind > > > _______________________________________________ > llvm-commits mailing list > llvm-commits at cs.uiuc.edu > http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits -------------- next part -------------- An HTML attachment was scrubbed... URL: http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20091214/b1afe358/attachment.html From stoklund at 2pi.dk Mon Dec 14 09:48:46 2009 From: stoklund at 2pi.dk (Jakob Stoklund Olesen) Date: Mon, 14 Dec 2009 07:48:46 -0800 Subject: [llvm-commits] [llvm] r91273 - in /llvm/trunk: include/llvm/CodeGen/CalcSpillWeights.h lib/CodeGen/CalcSpillWeights.cpp lib/CodeGen/PreAllocSplitting.cpp lib/CodeGen/RegAllocLinearScan.cpp lib/CodeGen/RegAllocPBQP.cpp lib/CodeGen/SimpleRegisterCoalescing.cpp lib/CodeGen/SimpleRegisterCoalescing.h In-Reply-To: <200912140649.nBE6ngqC000877@zion.cs.uiuc.edu> References: <200912140649.nBE6ngqC000877@zion.cs.uiuc.edu> Message-ID: <949F384C-AAE3-4C80-959E-1DE703DB7CED@2pi.dk> On Dec 13, 2009, at 10:49 PM, Lang Hames wrote: > Author: lhames > Date: Mon Dec 14 00:49:42 2009 > New Revision: 91273 > > URL: http://llvm.org/viewvc/llvm-project?rev=91273&view=rev > Log: > Moved spill weight calculation out of SimpleRegisterCoalescing and into its own pass: CalculateSpillWeights. Very nice! I was wondering what that method was doing in the coalescer. > ============================================================================== > --- llvm/trunk/include/llvm/CodeGen/CalcSpillWeights.h (added) > +++ llvm/trunk/include/llvm/CodeGen/CalcSpillWeights.h Mon Dec 14 00:49:42 2009 I think you can do without the header file. Just put a CalculateSpillWeightsID in Passes.h instead. > +/// Returns true if the given live interval is zero length. > +bool CalculateSpillWeights::isZeroLengthInterval(LiveInterval *li) const { Should this be a method on LiveInterval instead? /jakob -------------- next part -------------- A non-text attachment was scrubbed... Name: smime.p7s Type: application/pkcs7-signature Size: 1929 bytes Desc: not available Url : http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20091214/b8f1c7e5/attachment.bin From dpatel at apple.com Mon Dec 14 10:18:46 2009 From: dpatel at apple.com (Devang Patel) Date: Mon, 14 Dec 2009 16:18:46 -0000 Subject: [llvm-commits] [llvm] r91278 - in /llvm/trunk/lib/CodeGen/AsmPrinter: DwarfDebug.cpp DwarfDebug.h Message-ID: <200912141618.nBEGIkvN004012@zion.cs.uiuc.edu> Author: dpatel Date: Mon Dec 14 10:18:45 2009 New Revision: 91278 URL: http://llvm.org/viewvc/llvm-project?rev=91278&view=rev Log: Use DW_AT_specification to point to DIE describing function declaration. Modified: llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.h Modified: llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp?rev=91278&r1=91277&r2=91278&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp (original) +++ llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp Mon Dec 14 10:18:45 2009 @@ -906,7 +906,7 @@ continue; DIE *ElemDie = NULL; if (Element.getTag() == dwarf::DW_TAG_subprogram) - ElemDie = createMemberSubprogramDIE(DISubprogram(Element.getNode())); + ElemDie = createSubprogramDIE(DISubprogram(Element.getNode())); else ElemDie = createMemberDIE(DIDerivedType(Element.getNode())); Buffer.addChild(ElemDie); @@ -1098,11 +1098,13 @@ return MemberDie; } -/// createRawSubprogramDIE - Create new partially incomplete DIE. This is -/// a helper routine used by createMemberSubprogramDIE and -/// createSubprogramDIE. -DIE *DwarfDebug::createRawSubprogramDIE(const DISubprogram &SP) { - DIE *SPDie = new DIE(dwarf::DW_TAG_subprogram); +/// createSubprogramDIE - Create new DIE using SP. +DIE *DwarfDebug::createSubprogramDIE(const DISubprogram &SP, bool MakeDecl) { + DIE *SPDie = ModuleCU->getDIE(SP.getNode()); + if (SPDie) + return SPDie; + + SPDie = new DIE(dwarf::DW_TAG_subprogram); addString(SPDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, SP.getName()); StringRef LinkageName = SP.getLinkageName(); @@ -1144,52 +1146,7 @@ ContainingTypeMap.insert(std::make_pair(SPDie, WeakVH(SP.getContainingType().getNode()))); } - return SPDie; -} - -/// createMemberSubprogramDIE - Create new member DIE using SP. This routine -/// always returns a die with DW_AT_declaration attribute. -DIE *DwarfDebug::createMemberSubprogramDIE(const DISubprogram &SP) { - DIE *SPDie = ModuleCU->getDIE(SP.getNode()); - if (!SPDie) - SPDie = createSubprogramDIE(SP); - - // If SPDie has DW_AT_declaration then reuse it. - if (!SP.isDefinition()) - return SPDie; - - // Otherwise create new DIE for the declaration. First push definition - // DIE at the top level. - if (TopLevelDIEs.insert(SPDie)) - TopLevelDIEsVector.push_back(SPDie); - - SPDie = createRawSubprogramDIE(SP); - - // Add arguments. - DICompositeType SPTy = SP.getType(); - DIArray Args = SPTy.getTypeArray(); - unsigned SPTag = SPTy.getTag(); - if (SPTag == dwarf::DW_TAG_subroutine_type) - for (unsigned i = 1, N = Args.getNumElements(); i < N; ++i) { - DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter); - addType(Arg, DIType(Args.getElement(i).getNode())); - addUInt(Arg, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1); // ?? - SPDie->addChild(Arg); - } - - addUInt(SPDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1); - return SPDie; -} - -/// createSubprogramDIE - Create new DIE using SP. -DIE *DwarfDebug::createSubprogramDIE(const DISubprogram &SP) { - DIE *SPDie = ModuleCU->getDIE(SP.getNode()); - if (SPDie) - return SPDie; - - SPDie = createRawSubprogramDIE(SP); - - if (!SP.isDefinition()) { + if (MakeDecl || !SP.isDefinition()) { addUInt(SPDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1); // Add arguments. Do not add arguments for subprogram definition. They will @@ -1310,6 +1267,28 @@ DIE *SPDie = ModuleCU->getDIE(SPNode); assert (SPDie && "Unable to find subprogram DIE!"); + DISubprogram SP(SPNode); + if (SP.isDefinition() && !SP.getContext().isCompileUnit()) { + addUInt(SPDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1); + // Add arguments. + DICompositeType SPTy = SP.getType(); + DIArray Args = SPTy.getTypeArray(); + unsigned SPTag = SPTy.getTag(); + if (SPTag == dwarf::DW_TAG_subroutine_type) + for (unsigned i = 1, N = Args.getNumElements(); i < N; ++i) { + DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter); + addType(Arg, DIType(Args.getElement(i).getNode())); + addUInt(Arg, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1); // ?? + SPDie->addChild(Arg); + } + DIE *SPDeclDie = SPDie; + SPDie = new DIE(dwarf::DW_TAG_subprogram); + addDIEEntry(SPDie, dwarf::DW_AT_specification, dwarf::DW_FORM_ref4, + SPDeclDie); + + ModuleCU->addDie(SPDie); + } + addLabel(SPDie, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, DWLabel("func_begin", SubprogramCount)); addLabel(SPDie, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr, Modified: llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.h?rev=91278&r1=91277&r2=91278&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.h (original) +++ llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.h Mon Dec 14 10:18:45 2009 @@ -350,17 +350,7 @@ DIE *createMemberDIE(const DIDerivedType &DT); /// createSubprogramDIE - Create new DIE using SP. - DIE *createSubprogramDIE(const DISubprogram &SP); - - /// createMemberSubprogramDIE - Create new member DIE using SP. This - /// routine always returns a die with DW_AT_declaration attribute. - - DIE *createMemberSubprogramDIE(const DISubprogram &SP); - - /// createRawSubprogramDIE - Create new partially incomplete DIE. This is - /// a helper routine used by createMemberSubprogramDIE and - /// createSubprogramDIE. - DIE *createRawSubprogramDIE(const DISubprogram &SP); + DIE *createSubprogramDIE(const DISubprogram &SP, bool MakeDecl = false); /// findCompileUnit - Get the compile unit for the given descriptor. /// From gohman at apple.com Mon Dec 14 10:37:30 2009 From: gohman at apple.com (Dan Gohman) Date: Mon, 14 Dec 2009 16:37:30 -0000 Subject: [llvm-commits] [llvm] r91280 - /llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp Message-ID: <200912141637.nBEGbU2I004678@zion.cs.uiuc.edu> Author: djg Date: Mon Dec 14 10:37:29 2009 New Revision: 91280 URL: http://llvm.org/viewvc/llvm-project?rev=91280&view=rev Log: Minor code cleanups. Modified: llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp Modified: llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp?rev=91280&r1=91279&r2=91280&view=diff ============================================================================== --- llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp (original) +++ llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp Mon Dec 14 10:37:29 2009 @@ -228,19 +228,17 @@ if (DeadInsts.empty()) return; while (!DeadInsts.empty()) { - Instruction *I = dyn_cast_or_null(DeadInsts.back()); - DeadInsts.pop_back(); + Instruction *I = dyn_cast_or_null(DeadInsts.pop_back_val()); if (I == 0 || !isInstructionTriviallyDead(I)) continue; - for (User::op_iterator OI = I->op_begin(), E = I->op_end(); OI != E; ++OI) { + for (User::op_iterator OI = I->op_begin(), E = I->op_end(); OI != E; ++OI) if (Instruction *U = dyn_cast(*OI)) { *OI = 0; if (U->use_empty()) DeadInsts.push_back(U); } - } I->eraseFromParent(); Changed = true; From gohman at apple.com Mon Dec 14 10:52:55 2009 From: gohman at apple.com (Dan Gohman) Date: Mon, 14 Dec 2009 16:52:55 -0000 Subject: [llvm-commits] [llvm] r91281 - /llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp Message-ID: <200912141652.nBEGqtEZ005267@zion.cs.uiuc.edu> Author: djg Date: Mon Dec 14 10:52:55 2009 New Revision: 91281 URL: http://llvm.org/viewvc/llvm-project?rev=91281&view=rev Log: Remove the code in LSR that manually hoists expansions out of loops; SCEVExpander does this automatically. Modified: llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp Modified: llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp?rev=91281&r1=91280&r2=91281&view=diff ============================================================================== --- llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp (original) +++ llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp Mon Dec 14 10:52:55 2009 @@ -109,8 +109,7 @@ public: static char ID; // Pass ID, replacement for typeid explicit LoopStrengthReduce(const TargetLowering *tli = NULL) : - LoopPass(&ID), TLI(tli) { - } + LoopPass(&ID), TLI(tli) {} bool runOnLoop(Loop *L, LPPassManager &LPM); @@ -381,14 +380,12 @@ void RewriteInstructionToUseNewBase(const SCEV *const &NewBase, Instruction *InsertPt, SCEVExpander &Rewriter, Loop *L, Pass *P, - LoopInfo &LI, SmallVectorImpl &DeadInsts); Value *InsertCodeForBaseAtPosition(const SCEV *const &NewBase, const Type *Ty, SCEVExpander &Rewriter, - Instruction *IP, Loop *L, - LoopInfo &LI); + Instruction *IP); void dump() const; }; } @@ -402,27 +399,11 @@ Value *BasedUser::InsertCodeForBaseAtPosition(const SCEV *const &NewBase, const Type *Ty, SCEVExpander &Rewriter, - Instruction *IP, Loop *L, - LoopInfo &LI) { - // Figure out where we *really* want to insert this code. In particular, if - // the user is inside of a loop that is nested inside of L, we really don't - // want to insert this expression before the user, we'd rather pull it out as - // many loops as possible. - Instruction *BaseInsertPt = IP; - - // Figure out the most-nested loop that IP is in. - Loop *InsertLoop = LI.getLoopFor(IP->getParent()); - - // If InsertLoop is not L, and InsertLoop is nested inside of L, figure out - // the preheader of the outer-most loop where NewBase is not loop invariant. - if (L->contains(IP->getParent())) - while (InsertLoop && NewBase->isLoopInvariant(InsertLoop)) { - BaseInsertPt = InsertLoop->getLoopPreheader()->getTerminator(); - InsertLoop = InsertLoop->getParentLoop(); - } - - Value *Base = Rewriter.expandCodeFor(NewBase, 0, BaseInsertPt); + Instruction *IP) { + Value *Base = Rewriter.expandCodeFor(NewBase, 0, IP); + // Wrap the base in a SCEVUnknown so that ScalarEvolution doesn't try to + // re-analyze it. const SCEV *NewValSCEV = SE->getUnknown(Base); // Always emit the immediate into the same block as the user. @@ -441,7 +422,6 @@ void BasedUser::RewriteInstructionToUseNewBase(const SCEV *const &NewBase, Instruction *NewBasePt, SCEVExpander &Rewriter, Loop *L, Pass *P, - LoopInfo &LI, SmallVectorImpl &DeadInsts) { if (!isa(Inst)) { // By default, insert code at the user instruction. @@ -471,7 +451,7 @@ } Value *NewVal = InsertCodeForBaseAtPosition(NewBase, OperandValToReplace->getType(), - Rewriter, InsertPt, L, LI); + Rewriter, InsertPt); // Replace the use of the operand Value with the new Phi we just created. Inst->replaceUsesOfWith(OperandValToReplace, NewVal); @@ -533,7 +513,7 @@ PHIPred->getTerminator() : OldLoc->getParent()->getTerminator(); Code = InsertCodeForBaseAtPosition(NewBase, PN->getType(), - Rewriter, InsertPt, L, LI); + Rewriter, InsertPt); DEBUG(errs() << " Changing PHI use to "); DEBUG(WriteAsOperand(errs(), Code, /*PrintType=*/false)); @@ -1778,7 +1758,7 @@ RewriteExpr = SE->getAddExpr(RewriteExpr, SE->getUnknown(BaseV)); User.RewriteInstructionToUseNewBase(RewriteExpr, NewBasePt, - Rewriter, L, this, *LI, + Rewriter, L, this, DeadInsts); // Mark old value we replaced as possibly dead, so that it is eliminated From gohman at apple.com Mon Dec 14 10:57:08 2009 From: gohman at apple.com (Dan Gohman) Date: Mon, 14 Dec 2009 16:57:08 -0000 Subject: [llvm-commits] [llvm] r91282 - /llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp Message-ID: <200912141657.nBEGv8lj005396@zion.cs.uiuc.edu> Author: djg Date: Mon Dec 14 10:57:08 2009 New Revision: 91282 URL: http://llvm.org/viewvc/llvm-project?rev=91282&view=rev Log: LSR itself doesn't need DominatorTree. Modified: llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp Modified: llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp?rev=91282&r1=91281&r2=91282&view=diff ============================================================================== --- llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp (original) +++ llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp Mon Dec 14 10:57:08 2009 @@ -26,7 +26,6 @@ #include "llvm/IntrinsicInst.h" #include "llvm/Type.h" #include "llvm/DerivedTypes.h" -#include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/IVUsers.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" @@ -86,7 +85,6 @@ class LoopStrengthReduce : public LoopPass { IVUsers *IU; LoopInfo *LI; - DominatorTree *DT; ScalarEvolution *SE; bool Changed; @@ -118,12 +116,11 @@ // many analyses if they are around. AU.addPreservedID(LoopSimplifyID); AU.addPreserved(); - AU.addPreserved(); - AU.addPreserved(); + AU.addPreserved("domfrontier"); + AU.addPreserved("domtree"); AU.addRequiredID(LoopSimplifyID); AU.addRequired(); - AU.addRequired(); AU.addRequired(); AU.addPreserved(); AU.addRequired(); @@ -2724,7 +2721,6 @@ bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager &LPM) { IU = &getAnalysis(); LI = &getAnalysis(); - DT = &getAnalysis(); SE = &getAnalysis(); Changed = false; From gohman at apple.com Mon Dec 14 11:02:34 2009 From: gohman at apple.com (Dan Gohman) Date: Mon, 14 Dec 2009 17:02:34 -0000 Subject: [llvm-commits] [llvm] r91283 - /llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp Message-ID: <200912141702.nBEH2YDB005622@zion.cs.uiuc.edu> Author: djg Date: Mon Dec 14 11:02:34 2009 New Revision: 91283 URL: http://llvm.org/viewvc/llvm-project?rev=91283&view=rev Log: LSR itself doesn't need LoopInfo. Modified: llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp Modified: llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp?rev=91283&r1=91282&r2=91283&view=diff ============================================================================== --- llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp (original) +++ llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp Mon Dec 14 11:02:34 2009 @@ -27,7 +27,6 @@ #include "llvm/Type.h" #include "llvm/DerivedTypes.h" #include "llvm/Analysis/IVUsers.h" -#include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolutionExpander.h" #include "llvm/Transforms/Utils/AddrModeMatcher.h" @@ -84,7 +83,6 @@ class LoopStrengthReduce : public LoopPass { IVUsers *IU; - LoopInfo *LI; ScalarEvolution *SE; bool Changed; @@ -115,12 +113,11 @@ // We split critical edges, so we change the CFG. However, we do update // many analyses if they are around. AU.addPreservedID(LoopSimplifyID); - AU.addPreserved(); + AU.addPreserved("loops"); AU.addPreserved("domfrontier"); AU.addPreserved("domtree"); AU.addRequiredID(LoopSimplifyID); - AU.addRequired(); AU.addRequired(); AU.addPreserved(); AU.addRequired(); @@ -259,7 +256,7 @@ if (newLoop == L) return false; // if newLoop is an outer loop of L, this is OK. - if (!LoopInfo::isNotAlreadyContainedIn(L, newLoop)) + if (!newLoop->contains(L->getHeader())) return false; } return true; @@ -2720,7 +2717,6 @@ bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager &LPM) { IU = &getAnalysis(); - LI = &getAnalysis(); SE = &getAnalysis(); Changed = false; From grosbach at apple.com Mon Dec 14 11:02:56 2009 From: grosbach at apple.com (Jim Grosbach) Date: Mon, 14 Dec 2009 17:02:56 -0000 Subject: [llvm-commits] [llvm] r91284 - /llvm/trunk/lib/Target/ARM/ARMInstrInfo.td Message-ID: <200912141702.nBEH2uSv005645@zion.cs.uiuc.edu> Author: grosbach Date: Mon Dec 14 11:02:55 2009 New Revision: 91284 URL: http://llvm.org/viewvc/llvm-project?rev=91284&view=rev Log: add ldrexd/strexd instructions Modified: llvm/trunk/lib/Target/ARM/ARMInstrInfo.td Modified: llvm/trunk/lib/Target/ARM/ARMInstrInfo.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrInfo.td?rev=91284&r1=91283&r2=91284&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMInstrInfo.td (original) +++ llvm/trunk/lib/Target/ARM/ARMInstrInfo.td Mon Dec 14 11:02:55 2009 @@ -1710,11 +1710,15 @@ def LDREX : AIldrex<0b00, (outs GPR:$dest), (ins GPR:$ptr), NoItinerary, "ldrex", "\t$dest, [$ptr]", []>; +def LDREXD : AIldrex<0b00, (outs GPR:$dest, GPR:$dest2), (ins GPR:$ptr), + NoItinerary, + "ldrexd", "\t$dest, $dest2, [$ptr]", + []>; } let mayStore = 1 in { def STREXB : AIstrex<0b10, (outs GPR:$success), (ins GPR:$src, GPR:$ptr), - NoItinerary, + NoItinerary, "strexb", "\t$success, $src, [$ptr]", []>; def STREXH : AIstrex<0b11, (outs GPR:$success), (ins GPR:$src, GPR:$ptr), @@ -1722,9 +1726,14 @@ "strexh", "\t$success, $src, [$ptr]", []>; def STREX : AIstrex<0b00, (outs GPR:$success), (ins GPR:$src, GPR:$ptr), - NoItinerary, + NoItinerary, "strex", "\t$success, $src, [$ptr]", []>; +def STREXD : AIstrex<0b00, (outs GPR:$success), + (ins GPR:$src, GPR:$src2, GPR:$ptr), + NoItinerary, + "strexd", "\t$success, $src, $src2, [$ptr]", + []>; } //===----------------------------------------------------------------------===// From gohman at apple.com Mon Dec 14 11:06:50 2009 From: gohman at apple.com (Dan Gohman) Date: Mon, 14 Dec 2009 17:06:50 -0000 Subject: [llvm-commits] [llvm] r91286 - in /llvm/trunk: include/llvm/Analysis/LoopInfo.h lib/Analysis/IVUsers.cpp Message-ID: <200912141706.nBEH6oLg005799@zion.cs.uiuc.edu> Author: djg Date: Mon Dec 14 11:06:50 2009 New Revision: 91286 URL: http://llvm.org/viewvc/llvm-project?rev=91286&view=rev Log: Drop Loop::isNotAlreadyContainedIn in favor of Loop::contains. The former was just exposing a LoopInfoBase implementation detail. Modified: llvm/trunk/include/llvm/Analysis/LoopInfo.h llvm/trunk/lib/Analysis/IVUsers.cpp Modified: llvm/trunk/include/llvm/Analysis/LoopInfo.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Analysis/LoopInfo.h?rev=91286&r1=91285&r2=91286&view=diff ============================================================================== --- llvm/trunk/include/llvm/Analysis/LoopInfo.h (original) +++ llvm/trunk/include/llvm/Analysis/LoopInfo.h Mon Dec 14 11:06:50 2009 @@ -976,13 +976,6 @@ void removeBlock(BasicBlock *BB) { LI.removeBlock(BB); } - - static bool isNotAlreadyContainedIn(const Loop *SubLoop, - const Loop *ParentLoop) { - return - LoopInfoBase::isNotAlreadyContainedIn(SubLoop, - ParentLoop); - } }; Modified: llvm/trunk/lib/Analysis/IVUsers.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/IVUsers.cpp?rev=91286&r1=91285&r2=91286&view=diff ============================================================================== --- llvm/trunk/lib/Analysis/IVUsers.cpp (original) +++ llvm/trunk/lib/Analysis/IVUsers.cpp Mon Dec 14 11:06:50 2009 @@ -53,7 +53,7 @@ if (newLoop == L) return false; // if newLoop is an outer loop of L, this is OK. - if (!LoopInfo::isNotAlreadyContainedIn(L, newLoop)) + if (!newLoop->contains(L->getHeader())) return false; } return true; From gohman at apple.com Mon Dec 14 11:08:09 2009 From: gohman at apple.com (Dan Gohman) Date: Mon, 14 Dec 2009 17:08:09 -0000 Subject: [llvm-commits] [llvm] r91287 - /llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp Message-ID: <200912141708.nBEH89Yh005869@zion.cs.uiuc.edu> Author: djg Date: Mon Dec 14 11:08:09 2009 New Revision: 91287 URL: http://llvm.org/viewvc/llvm-project?rev=91287&view=rev Log: Delete an unused variable. Modified: llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp Modified: llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp?rev=91287&r1=91286&r2=91287&view=diff ============================================================================== --- llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp (original) +++ llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp Mon Dec 14 11:08:09 2009 @@ -90,10 +90,6 @@ /// particular stride. std::map IVsByStride; - /// StrideNoReuse - Keep track of all the strides whose ivs cannot be - /// reused (nor should they be rewritten to reuse other strides). - SmallSet StrideNoReuse; - /// DeadInsts - Keep track of instructions we may have made dead, so that /// we can remove them after we are done working. SmallVector DeadInsts; @@ -983,17 +979,13 @@ const SCEV *const &Stride, IVExpr &IV, const Type *Ty, const std::vector& UsersToProcess) { - if (StrideNoReuse.count(Stride)) - return SE->getIntegerSCEV(0, Stride->getType()); - if (const SCEVConstant *SC = dyn_cast(Stride)) { int64_t SInt = SC->getValue()->getSExtValue(); for (unsigned NewStride = 0, e = IU->StrideOrder.size(); NewStride != e; ++NewStride) { std::map::iterator SI = IVsByStride.find(IU->StrideOrder[NewStride]); - if (SI == IVsByStride.end() || !isa(SI->first) || - StrideNoReuse.count(SI->first)) + if (SI == IVsByStride.end() || !isa(SI->first)) continue; // The other stride has no uses, don't reuse it. std::map::iterator UI = @@ -2766,7 +2758,6 @@ // We're done analyzing this loop; release all the state we built up for it. IVsByStride.clear(); - StrideNoReuse.clear(); // Clean up after ourselves if (!DeadInsts.empty()) From gohman at apple.com Mon Dec 14 11:10:45 2009 From: gohman at apple.com (Dan Gohman) Date: Mon, 14 Dec 2009 17:10:45 -0000 Subject: [llvm-commits] [llvm] r91288 - /llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp Message-ID: <200912141710.nBEHAjtL006068@zion.cs.uiuc.edu> Author: djg Date: Mon Dec 14 11:10:44 2009 New Revision: 91288 URL: http://llvm.org/viewvc/llvm-project?rev=91288&view=rev Log: Don't bother cleaning up if there's nothing to clean up. Modified: llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp Modified: llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp?rev=91288&r1=91287&r2=91288&view=diff ============================================================================== --- llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp (original) +++ llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp Mon Dec 14 11:10:44 2009 @@ -2754,14 +2754,14 @@ // After all sharing is done, see if we can adjust the loop to test against // zero instead of counting up to a maximum. This is usually faster. OptimizeLoopCountIV(L); - } - // We're done analyzing this loop; release all the state we built up for it. - IVsByStride.clear(); + // We're done analyzing this loop; release all the state we built up for it. + IVsByStride.clear(); - // Clean up after ourselves - if (!DeadInsts.empty()) - DeleteTriviallyDeadInstructions(); + // Clean up after ourselves + if (!DeadInsts.empty()) + DeleteTriviallyDeadInstructions(); + } // At this point, it is worth checking to see if any recurrence PHIs are also // dead, so that we can remove them as well. From gohman at apple.com Mon Dec 14 11:12:51 2009 From: gohman at apple.com (Dan Gohman) Date: Mon, 14 Dec 2009 17:12:51 -0000 Subject: [llvm-commits] [llvm] r91289 - /llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp Message-ID: <200912141712.nBEHCpDj006196@zion.cs.uiuc.edu> Author: djg Date: Mon Dec 14 11:12:51 2009 New Revision: 91289 URL: http://llvm.org/viewvc/llvm-project?rev=91289&view=rev Log: Instead of having a ScalarEvolution pointer member in BasedUser, just pass the ScalarEvolution pointer into the functions which need it. Modified: llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp Modified: llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp?rev=91289&r1=91288&r2=91289&view=diff ============================================================================== --- llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp (original) +++ llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp Mon Dec 14 11:12:51 2009 @@ -325,9 +325,6 @@ /// BasedUser - For a particular base value, keep information about how we've /// partitioned the expression so far. struct BasedUser { - /// SE - The current ScalarEvolution object. - ScalarEvolution *SE; - /// Base - The Base value for the PHI node that needs to be inserted for /// this use. As the use is processed, information gets moved from this /// field to the Imm field (below). BasedUser values are sorted by this @@ -359,9 +356,9 @@ bool isUseOfPostIncrementedValue; BasedUser(IVStrideUse &IVSU, ScalarEvolution *se) - : SE(se), Base(IVSU.getOffset()), Inst(IVSU.getUser()), + : Base(IVSU.getOffset()), Inst(IVSU.getUser()), OperandValToReplace(IVSU.getOperandValToReplace()), - Imm(SE->getIntegerSCEV(0, Base->getType())), + Imm(se->getIntegerSCEV(0, Base->getType())), isUseOfPostIncrementedValue(IVSU.isUseOfPostIncrementedValue()) {} // Once we rewrite the code to insert the new IVs we want, update the @@ -370,12 +367,14 @@ void RewriteInstructionToUseNewBase(const SCEV *const &NewBase, Instruction *InsertPt, SCEVExpander &Rewriter, Loop *L, Pass *P, - SmallVectorImpl &DeadInsts); + SmallVectorImpl &DeadInsts, + ScalarEvolution *SE); Value *InsertCodeForBaseAtPosition(const SCEV *const &NewBase, const Type *Ty, SCEVExpander &Rewriter, - Instruction *IP); + Instruction *IP, + ScalarEvolution *SE); void dump() const; }; } @@ -389,7 +388,8 @@ Value *BasedUser::InsertCodeForBaseAtPosition(const SCEV *const &NewBase, const Type *Ty, SCEVExpander &Rewriter, - Instruction *IP) { + Instruction *IP, + ScalarEvolution *SE) { Value *Base = Rewriter.expandCodeFor(NewBase, 0, IP); // Wrap the base in a SCEVUnknown so that ScalarEvolution doesn't try to @@ -412,7 +412,8 @@ void BasedUser::RewriteInstructionToUseNewBase(const SCEV *const &NewBase, Instruction *NewBasePt, SCEVExpander &Rewriter, Loop *L, Pass *P, - SmallVectorImpl &DeadInsts) { + SmallVectorImpl &DeadInsts, + ScalarEvolution *SE) { if (!isa(Inst)) { // By default, insert code at the user instruction. BasicBlock::iterator InsertPt = Inst; @@ -441,7 +442,7 @@ } Value *NewVal = InsertCodeForBaseAtPosition(NewBase, OperandValToReplace->getType(), - Rewriter, InsertPt); + Rewriter, InsertPt, SE); // Replace the use of the operand Value with the new Phi we just created. Inst->replaceUsesOfWith(OperandValToReplace, NewVal); @@ -503,7 +504,7 @@ PHIPred->getTerminator() : OldLoc->getParent()->getTerminator(); Code = InsertCodeForBaseAtPosition(NewBase, PN->getType(), - Rewriter, InsertPt); + Rewriter, InsertPt, SE); DEBUG(errs() << " Changing PHI use to "); DEBUG(WriteAsOperand(errs(), Code, /*PrintType=*/false)); @@ -1745,7 +1746,7 @@ User.RewriteInstructionToUseNewBase(RewriteExpr, NewBasePt, Rewriter, L, this, - DeadInsts); + DeadInsts, SE); // Mark old value we replaced as possibly dead, so that it is eliminated // if we just replaced the last use of that value. From gohman at apple.com Mon Dec 14 11:14:32 2009 From: gohman at apple.com (Dan Gohman) Date: Mon, 14 Dec 2009 17:14:32 -0000 Subject: [llvm-commits] [llvm] r91291 - /llvm/trunk/include/llvm/Analysis/IVUsers.h Message-ID: <200912141714.nBEHEWEZ006374@zion.cs.uiuc.edu> Author: djg Date: Mon Dec 14 11:14:32 2009 New Revision: 91291 URL: http://llvm.org/viewvc/llvm-project?rev=91291&view=rev Log: Make the IVUses member private. Modified: llvm/trunk/include/llvm/Analysis/IVUsers.h Modified: llvm/trunk/include/llvm/Analysis/IVUsers.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Analysis/IVUsers.h?rev=91291&r1=91290&r2=91291&view=diff ============================================================================== --- llvm/trunk/include/llvm/Analysis/IVUsers.h (original) +++ llvm/trunk/include/llvm/Analysis/IVUsers.h Mon Dec 14 11:14:32 2009 @@ -175,11 +175,11 @@ ScalarEvolution *SE; SmallPtrSet Processed; -public: /// IVUses - A list of all tracked IV uses of induction variable expressions /// we are interested in. ilist IVUses; +public: /// IVUsesByStride - A mapping from the strides in StrideOrder to the /// uses in IVUses. std::map IVUsesByStride; From gohman at apple.com Mon Dec 14 11:19:06 2009 From: gohman at apple.com (Dan Gohman) Date: Mon, 14 Dec 2009 17:19:06 -0000 Subject: [llvm-commits] [llvm] r91293 - /llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp Message-ID: <200912141719.nBEHJ690006551@zion.cs.uiuc.edu> Author: djg Date: Mon Dec 14 11:19:06 2009 New Revision: 91293 URL: http://llvm.org/viewvc/llvm-project?rev=91293&view=rev Log: Remove unnecessary #includes. Modified: llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp Modified: llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp?rev=91293&r1=91292&r2=91293&view=diff ============================================================================== --- llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp (original) +++ llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp Mon Dec 14 11:19:06 2009 @@ -24,7 +24,6 @@ #include "llvm/Constants.h" #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" -#include "llvm/Type.h" #include "llvm/DerivedTypes.h" #include "llvm/Analysis/IVUsers.h" #include "llvm/Analysis/LoopPass.h" @@ -33,7 +32,6 @@ #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/ADT/Statistic.h" -#include "llvm/Support/CFG.h" #include "llvm/Support/Debug.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ValueHandle.h" From gohman at apple.com Mon Dec 14 11:31:01 2009 From: gohman at apple.com (Dan Gohman) Date: Mon, 14 Dec 2009 17:31:01 -0000 Subject: [llvm-commits] [llvm] r91295 - in /llvm/trunk/lib: Analysis/IVUsers.cpp Transforms/Scalar/LoopStrengthReduce.cpp Message-ID: <200912141731.nBEHV2Z7006982@zion.cs.uiuc.edu> Author: djg Date: Mon Dec 14 11:31:01 2009 New Revision: 91295 URL: http://llvm.org/viewvc/llvm-project?rev=91295&view=rev Log: Fix a thinko; isNotAlreadyContainedIn had a built-in negative, so the condition was inverted when the code was converted to contains(). Modified: llvm/trunk/lib/Analysis/IVUsers.cpp llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp Modified: llvm/trunk/lib/Analysis/IVUsers.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/IVUsers.cpp?rev=91295&r1=91294&r2=91295&view=diff ============================================================================== --- llvm/trunk/lib/Analysis/IVUsers.cpp (original) +++ llvm/trunk/lib/Analysis/IVUsers.cpp Mon Dec 14 11:31:01 2009 @@ -53,7 +53,7 @@ if (newLoop == L) return false; // if newLoop is an outer loop of L, this is OK. - if (!newLoop->contains(L->getHeader())) + if (newLoop->contains(L->getHeader())) return false; } return true; Modified: llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp?rev=91295&r1=91294&r2=91295&view=diff ============================================================================== --- llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp (original) +++ llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp Mon Dec 14 11:31:01 2009 @@ -250,7 +250,7 @@ if (newLoop == L) return false; // if newLoop is an outer loop of L, this is OK. - if (!newLoop->contains(L->getHeader())) + if (newLoop->contains(L->getHeader())) return false; } return true; From gohman at apple.com Mon Dec 14 11:35:17 2009 From: gohman at apple.com (Dan Gohman) Date: Mon, 14 Dec 2009 17:35:17 -0000 Subject: [llvm-commits] [llvm] r91296 - /llvm/trunk/lib/Analysis/IVUsers.cpp Message-ID: <200912141735.nBEHZHW2007105@zion.cs.uiuc.edu> Author: djg Date: Mon Dec 14 11:35:17 2009 New Revision: 91296 URL: http://llvm.org/viewvc/llvm-project?rev=91296&view=rev Log: Clear the Processed set when it is no longer used, and clear the IVUses list in releaseMemory(). Modified: llvm/trunk/lib/Analysis/IVUsers.cpp Modified: llvm/trunk/lib/Analysis/IVUsers.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/IVUsers.cpp?rev=91296&r1=91295&r2=91296&view=diff ============================================================================== --- llvm/trunk/lib/Analysis/IVUsers.cpp (original) +++ llvm/trunk/lib/Analysis/IVUsers.cpp Mon Dec 14 11:35:17 2009 @@ -307,6 +307,7 @@ for (BasicBlock::iterator I = L->getHeader()->begin(); isa(I); ++I) AddUsersIfInteresting(I); + Processed.clear(); return false; } @@ -369,7 +370,7 @@ void IVUsers::releaseMemory() { IVUsesByStride.clear(); StrideOrder.clear(); - Processed.clear(); + IVUses.clear(); } void IVStrideUse::deleted() { From clattner at apple.com Mon Dec 14 11:47:37 2009 From: clattner at apple.com (Chris Lattner) Date: Mon, 14 Dec 2009 09:47:37 -0800 Subject: [llvm-commits] [llvm] r91277 - /llvm/trunk/test/Transforms/ScalarRepl/2009-12-11-NeonTypes.ll In-Reply-To: <200912141415.nBEEFFpr031877@zion.cs.uiuc.edu> References: <200912141415.nBEEFFpr031877@zion.cs.uiuc.edu> Message-ID: Doh, thanks Shantonu! -Chris On Dec 14, 2009, at 6:15 AM, Shantonu Sen wrote: > Author: ssen > Date: Mon Dec 14 08:15:15 2009 > New Revision: 91277 > > URL: http://llvm.org/viewvc/llvm-project?rev=91277&view=rev > Log: > Remove empty file completely > > Removed: > llvm/trunk/test/Transforms/ScalarRepl/2009-12-11-NeonTypes.ll > > Removed: llvm/trunk/test/Transforms/ScalarRepl/2009-12-11-NeonTypes.ll > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/ScalarRepl/2009-12-11-NeonTypes.ll?rev=91276&view=auto > > = > = > = > = > = > = > = > = > ====================================================================== > (empty) > > > _______________________________________________ > llvm-commits mailing list > llvm-commits at cs.uiuc.edu > http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits From daniel at zuster.org Mon Dec 14 11:58:27 2009 From: daniel at zuster.org (Daniel Dunbar) Date: Mon, 14 Dec 2009 17:58:27 -0000 Subject: [llvm-commits] [llvm] r91301 - /llvm/trunk/utils/NewNightlyTest.pl Message-ID: <200912141758.nBEHwSG2008318@zion.cs.uiuc.edu> Author: ddunbar Date: Mon Dec 14 11:58:27 2009 New Revision: 91301 URL: http://llvm.org/viewvc/llvm-project?rev=91301&view=rev Log: NNT: Always create the -sentdata.txt file. Modified: llvm/trunk/utils/NewNightlyTest.pl Modified: llvm/trunk/utils/NewNightlyTest.pl URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/utils/NewNightlyTest.pl?rev=91301&r1=91300&r2=91301&view=diff ============================================================================== --- llvm/trunk/utils/NewNightlyTest.pl (original) +++ llvm/trunk/utils/NewNightlyTest.pl Mon Dec 14 11:58:27 2009 @@ -393,10 +393,8 @@ # to our central server via the post method # #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -sub SendData { - $host = $_[0]; - $file = $_[1]; - $variables = $_[2]; +sub WriteSentData { + $variables = $_[0]; # Write out the "...-sentdata.txt" file. @@ -406,6 +404,12 @@ $sentdata.= "$x => $value\n"; } WriteFile "$Prefix-sentdata.txt", $sentdata; +} + +sub SendData { + $host = $_[0]; + $file = $_[1]; + $variables = $_[2]; if (!($SUBMITAUX eq "")) { system "$SUBMITAUX \"$Prefix-sentdata.txt\""; @@ -797,6 +801,9 @@ 'a_file_sizes' => "" ); +# Write out the "...-sentdata.txt" file. +WriteSentData \%hash_of_data; + if ($SUBMIT || !($SUBMITAUX eq "")) { my $response = SendData $SUBMITSERVER,$SUBMITSCRIPT,\%hash_of_data; if( $VERBOSE) { print "============================\n$response"; } From daniel at zuster.org Mon Dec 14 11:58:33 2009 From: daniel at zuster.org (Daniel Dunbar) Date: Mon, 14 Dec 2009 17:58:33 -0000 Subject: [llvm-commits] [llvm] r91302 - /llvm/trunk/utils/NewNightlyTest.pl Message-ID: <200912141758.nBEHwXvQ008332@zion.cs.uiuc.edu> Author: ddunbar Date: Mon Dec 14 11:58:33 2009 New Revision: 91302 URL: http://llvm.org/viewvc/llvm-project?rev=91302&view=rev Log: NNT: Use [e]grep -a when scanning logs, its possibly they will have non-text characters in them, in which case the grep will just return 'Binary file matches' and the whole thing falls over. Modified: llvm/trunk/utils/NewNightlyTest.pl Modified: llvm/trunk/utils/NewNightlyTest.pl URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/utils/NewNightlyTest.pl?rev=91302&r1=91301&r2=91302&view=diff ============================================================================== --- llvm/trunk/utils/NewNightlyTest.pl (original) +++ llvm/trunk/utils/NewNightlyTest.pl Mon Dec 14 11:58:33 2009 @@ -507,8 +507,8 @@ } RunAppendingLoggedCommand("(time -p $NICE $MAKECMD $MAKEOPTS)", $BuildLog, "BUILD"); - if (`grep '^$MAKECMD\[^:]*: .*Error' $BuildLog | wc -l` + 0 || - `grep '^$MAKECMD: \*\*\*.*Stop.' $BuildLog | wc -l` + 0) { + if (`grep -a '^$MAKECMD\[^:]*: .*Error' $BuildLog | wc -l` + 0 || + `grep -a '^$MAKECMD: \*\*\*.*Stop.' $BuildLog | wc -l` + 0) { return 0; } @@ -535,15 +535,15 @@ $LLCBetaOpts = `$MAKECMD print-llcbeta-option`; my $ProgramsTable; - if (`grep '^$MAKECMD\[^:]: .*Error' $ProgramTestLog | wc -l` + 0) { + if (`grep -a '^$MAKECMD\[^:]: .*Error' $ProgramTestLog | wc -l` + 0) { $ProgramsTable="Error running test $SubDir\n"; print "ERROR TESTING\n"; - } elsif (`grep '^$MAKECMD\[^:]: .*No rule to make target' $ProgramTestLog | wc -l` + 0) { + } elsif (`grep -a '^$MAKECMD\[^:]: .*No rule to make target' $ProgramTestLog | wc -l` + 0) { $ProgramsTable="Makefile error running tests $SubDir!\n"; print "ERROR TESTING\n"; } else { # Create a list of the tests which were run... - system "egrep 'TEST-(PASS|FAIL)' < $ProgramTestLog ". + system "egrep -a 'TEST-(PASS|FAIL)' < $ProgramTestLog ". "| sort > $Prefix-$SubDir-Tests.txt"; } $ProgramsTable = ReadFile "report.nightly.csv"; From grosbach at apple.com Mon Dec 14 12:31:20 2009 From: grosbach at apple.com (Jim Grosbach) Date: Mon, 14 Dec 2009 18:31:20 -0000 Subject: [llvm-commits] [llvm] r91305 - in /llvm/trunk/lib/Target/ARM: ARMInstrFormats.td ARMInstrInfo.td Message-ID: <200912141831.nBEIVKq2009591@zion.cs.uiuc.edu> Author: grosbach Date: Mon Dec 14 12:31:20 2009 New Revision: 91305 URL: http://llvm.org/viewvc/llvm-project?rev=91305&view=rev Log: ARM memory barrier instructions are not predicable Modified: llvm/trunk/lib/Target/ARM/ARMInstrFormats.td llvm/trunk/lib/Target/ARM/ARMInstrInfo.td Modified: llvm/trunk/lib/Target/ARM/ARMInstrFormats.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrFormats.td?rev=91305&r1=91304&r2=91305&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMInstrFormats.td (original) +++ llvm/trunk/lib/Target/ARM/ARMInstrFormats.td Mon Dec 14 12:31:20 2009 @@ -201,6 +201,19 @@ let Pattern = pattern; list Predicates = [IsARM]; } +// A few are not predicable +class InoP pattern> + : InstARM { + let OutOperandList = oops; + let InOperandList = iops; + let AsmString = !strconcat(opc, asm); + let Pattern = pattern; + let isPredicable = 0; + list Predicates = [IsARM]; +} // Same as I except it can optionally modify CPSR. Note it's modeled as // an input operand since by default it's a zero register. It will @@ -241,6 +254,10 @@ string asm, list pattern> : XI; +class AInoP pattern> + : InoP; // Ctrl flow instructions class ABI opcod, dag oops, dag iops, InstrItinClass itin, Modified: llvm/trunk/lib/Target/ARM/ARMInstrInfo.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrInfo.td?rev=91305&r1=91304&r2=91305&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMInstrInfo.td (original) +++ llvm/trunk/lib/Target/ARM/ARMInstrInfo.td Mon Dec 14 12:31:20 2009 @@ -1574,8 +1574,8 @@ // // memory barriers protect the atomic sequences -let isPredicable = 0, hasSideEffects = 1 in { -def Int_MemBarrierV7 : AI<(outs), (ins), +let hasSideEffects = 1 in { +def Int_MemBarrierV7 : AInoP<(outs), (ins), Pseudo, NoItinerary, "dmb", "", [(ARMMemBarrier)]>, @@ -1585,7 +1585,7 @@ let Inst{3-0} = 0b1111; } -def Int_SyncBarrierV7 : AI<(outs), (ins), +def Int_SyncBarrierV7 : AInoP<(outs), (ins), Pseudo, NoItinerary, "dsb", "", [(ARMSyncBarrier)]>, From grosbach at apple.com Mon Dec 14 12:36:32 2009 From: grosbach at apple.com (Jim Grosbach) Date: Mon, 14 Dec 2009 18:36:32 -0000 Subject: [llvm-commits] [llvm] r91307 - /llvm/trunk/lib/Target/ARM/ARMInstrInfo.td Message-ID: <200912141836.nBEIaX8E009855@zion.cs.uiuc.edu> Author: grosbach Date: Mon Dec 14 12:36:32 2009 New Revision: 91307 URL: http://llvm.org/viewvc/llvm-project?rev=91307&view=rev Log: whitespace Modified: llvm/trunk/lib/Target/ARM/ARMInstrInfo.td Modified: llvm/trunk/lib/Target/ARM/ARMInstrInfo.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrInfo.td?rev=91307&r1=91306&r2=91307&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMInstrInfo.td (original) +++ llvm/trunk/lib/Target/ARM/ARMInstrInfo.td Mon Dec 14 12:36:32 2009 @@ -1684,7 +1684,6 @@ "${:comment} ATOMIC_SWAP_I32 PSEUDO!", [(set GPR:$dst, (atomic_swap_32 GPR:$ptr, GPR:$new))]>; - def ATOMIC_CMP_SWAP_I8 : PseudoInst< (outs GPR:$dst), (ins GPR:$ptr, GPR:$old, GPR:$new), NoItinerary, "${:comment} ATOMIC_CMP_SWAP_I8 PSEUDO!", From grosbach at apple.com Mon Dec 14 12:56:48 2009 From: grosbach at apple.com (Jim Grosbach) Date: Mon, 14 Dec 2009 18:56:48 -0000 Subject: [llvm-commits] [llvm] r91310 - /llvm/trunk/lib/Target/ARM/ARMInstrThumb2.td Message-ID: <200912141856.nBEIumWl010790@zion.cs.uiuc.edu> Author: grosbach Date: Mon Dec 14 12:56:47 2009 New Revision: 91310 URL: http://llvm.org/viewvc/llvm-project?rev=91310&view=rev Log: add Thumb2 atomic and memory barrier instruction definitions Modified: llvm/trunk/lib/Target/ARM/ARMInstrThumb2.td Modified: llvm/trunk/lib/Target/ARM/ARMInstrThumb2.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrThumb2.td?rev=91310&r1=91309&r2=91310&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMInstrThumb2.td (original) +++ llvm/trunk/lib/Target/ARM/ARMInstrThumb2.td Mon Dec 14 12:56:47 2009 @@ -1065,6 +1065,66 @@ RegConstraint<"$false = $dst">; //===----------------------------------------------------------------------===// +// Atomic operations intrinsics +// + +// memory barriers protect the atomic sequences +let hasSideEffects = 1 in { +def t2Int_MemBarrierV7 : AInoP<(outs), (ins), + Pseudo, NoItinerary, + "dmb", "", + [(ARMMemBarrier)]> { + // FIXME: add support for options other than a full system DMB +} + +def t2Int_SyncBarrierV7 : AInoP<(outs), (ins), + Pseudo, NoItinerary, + "dsb", "", + [(ARMSyncBarrier)]> { + // FIXME: add support for options other than a full system DSB +} +} + +let mayLoad = 1 in { +def t2LDREXB : Thumb2I<(outs GPR:$dest), (ins GPR:$ptr), AddrModeNone, + Size4Bytes, NoItinerary, + "ldrexb", "\t$dest, [$ptr]", "", + []>; +def t2LDREXH : Thumb2I<(outs GPR:$dest), (ins GPR:$ptr), AddrModeNone, + Size4Bytes, NoItinerary, + "ldrexh", "\t$dest, [$ptr]", "", + []>; +def t2LDREX : Thumb2I<(outs GPR:$dest), (ins GPR:$ptr), AddrModeNone, + Size4Bytes, NoItinerary, + "ldrex", "\t$dest, [$ptr]", "", + []>; +def t2LDREXD : Thumb2I<(outs GPR:$dest, GPR:$dest2), (ins GPR:$ptr), + AddrModeNone, Size4Bytes, NoItinerary, + "ldrexd", "\t$dest, $dest2, [$ptr]", "", + []>; +} + +let mayStore = 1 in { +def t2STREXB : Thumb2I<(outs GPR:$success), (ins GPR:$src, GPR:$ptr), + AddrModeNone, Size4Bytes, NoItinerary, + "strexb", "\t$success, $src, [$ptr]", "", + []>; +def t2STREXH : Thumb2I<(outs GPR:$success), (ins GPR:$src, GPR:$ptr), + AddrModeNone, Size4Bytes, NoItinerary, + "strexh", "\t$success, $src, [$ptr]", "", + []>; +def t2STREX : Thumb2I<(outs GPR:$success), (ins GPR:$src, GPR:$ptr), + AddrModeNone, Size4Bytes, NoItinerary, + "strex", "\t$success, $src, [$ptr]", "", + []>; +def t2STREXD : Thumb2I<(outs GPR:$success), + (ins GPR:$src, GPR:$src2, GPR:$ptr), + AddrModeNone, Size4Bytes, NoItinerary, + "strexd", "\t$success, $src, $src2, [$ptr]", "", + []>; +} + +//===----------------------------------------------------------------------===// // TLS Instructions // From echristo at apple.com Mon Dec 14 13:07:26 2009 From: echristo at apple.com (Eric Christopher) Date: Mon, 14 Dec 2009 19:07:26 -0000 Subject: [llvm-commits] [llvm] r91312 - /llvm/trunk/test/Transforms/LICM/2009-12-10-LICM-Indbr-Crash.ll Message-ID: <200912141907.nBEJ7QpA011270@zion.cs.uiuc.edu> Author: echristo Date: Mon Dec 14 13:07:25 2009 New Revision: 91312 URL: http://llvm.org/viewvc/llvm-project?rev=91312&view=rev Log: Add radar fixed in comment. Modified: llvm/trunk/test/Transforms/LICM/2009-12-10-LICM-Indbr-Crash.ll Modified: llvm/trunk/test/Transforms/LICM/2009-12-10-LICM-Indbr-Crash.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LICM/2009-12-10-LICM-Indbr-Crash.ll?rev=91312&r1=91311&r2=91312&view=diff ============================================================================== --- llvm/trunk/test/Transforms/LICM/2009-12-10-LICM-Indbr-Crash.ll (original) +++ llvm/trunk/test/Transforms/LICM/2009-12-10-LICM-Indbr-Crash.ll Mon Dec 14 13:07:25 2009 @@ -1,20 +1,21 @@ +; Test for rdar://7452967 ; RUN: opt < %s -licm -disable-output define void @foo (i8* %v) { entry: br i1 undef, label %preheader, label %return - + preheader: br i1 undef, label %loop, label %return - + loop: indirectbr i8* undef, [label %preheader, label %stuff] - + stuff: %0 = load i8* undef, align 1 br label %loop - + return: ret void -} \ No newline at end of file +} From grosbach at apple.com Mon Dec 14 13:24:11 2009 From: grosbach at apple.com (Jim Grosbach) Date: Mon, 14 Dec 2009 19:24:11 -0000 Subject: [llvm-commits] [llvm] r91313 - in /llvm/trunk/lib/Target/ARM: ARMInstrInfo.td ARMInstrThumb2.td Message-ID: <200912141924.nBEJOBNQ012144@zion.cs.uiuc.edu> Author: grosbach Date: Mon Dec 14 13:24:11 2009 New Revision: 91313 URL: http://llvm.org/viewvc/llvm-project?rev=91313&view=rev Log: correct selection requirements for thumb2 vs. arm versions of the barrier intrinsics Modified: llvm/trunk/lib/Target/ARM/ARMInstrInfo.td llvm/trunk/lib/Target/ARM/ARMInstrThumb2.td Modified: llvm/trunk/lib/Target/ARM/ARMInstrInfo.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrInfo.td?rev=91313&r1=91312&r2=91313&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMInstrInfo.td (original) +++ llvm/trunk/lib/Target/ARM/ARMInstrInfo.td Mon Dec 14 13:24:11 2009 @@ -1579,7 +1579,7 @@ Pseudo, NoItinerary, "dmb", "", [(ARMMemBarrier)]>, - Requires<[HasV7]> { + Requires<[IsARM, HasV7]> { let Inst{31-4} = 0xf57ff05; // FIXME: add support for options other than a full system DMB let Inst{3-0} = 0b1111; @@ -1589,7 +1589,7 @@ Pseudo, NoItinerary, "dsb", "", [(ARMSyncBarrier)]>, - Requires<[HasV7]> { + Requires<[IsARM, HasV7]> { let Inst{31-4} = 0xf57ff04; // FIXME: add support for options other than a full system DSB let Inst{3-0} = 0b1111; Modified: llvm/trunk/lib/Target/ARM/ARMInstrThumb2.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrThumb2.td?rev=91313&r1=91312&r2=91313&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMInstrThumb2.td (original) +++ llvm/trunk/lib/Target/ARM/ARMInstrThumb2.td Mon Dec 14 13:24:11 2009 @@ -1073,14 +1073,16 @@ def t2Int_MemBarrierV7 : AInoP<(outs), (ins), Pseudo, NoItinerary, "dmb", "", - [(ARMMemBarrier)]> { + [(ARMMemBarrier)]>, + Requires<[IsThumb2]> { // FIXME: add support for options other than a full system DMB } def t2Int_SyncBarrierV7 : AInoP<(outs), (ins), Pseudo, NoItinerary, "dsb", "", - [(ARMSyncBarrier)]> { + [(ARMSyncBarrier)]>, + Requires<[IsThumb2]> { // FIXME: add support for options other than a full system DSB } } From gohman at apple.com Mon Dec 14 13:32:32 2009 From: gohman at apple.com (Dan Gohman) Date: Mon, 14 Dec 2009 19:32:32 -0000 Subject: [llvm-commits] [llvm] r91316 - in /llvm/trunk: include/llvm/Support/raw_ostream.h lib/Support/raw_ostream.cpp Message-ID: <200912141932.nBEJWW2u012521@zion.cs.uiuc.edu> Author: djg Date: Mon Dec 14 13:32:31 2009 New Revision: 91316 URL: http://llvm.org/viewvc/llvm-project?rev=91316&view=rev Log: Micro-optimize these functions in the case where they are not inlined. Modified: llvm/trunk/include/llvm/Support/raw_ostream.h llvm/trunk/lib/Support/raw_ostream.cpp Modified: llvm/trunk/include/llvm/Support/raw_ostream.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Support/raw_ostream.h?rev=91316&r1=91315&r2=91316&view=diff ============================================================================== --- llvm/trunk/include/llvm/Support/raw_ostream.h (original) +++ llvm/trunk/include/llvm/Support/raw_ostream.h Mon Dec 14 13:32:31 2009 @@ -186,14 +186,12 @@ // Inline fast path, particulary for constant strings where a sufficiently // smart compiler will simplify strlen. - this->operator<<(StringRef(Str)); - return *this; + return this->operator<<(StringRef(Str)); } raw_ostream &operator<<(const std::string &Str) { // Avoid the fast path, it would only increase code size for a marginal win. - write(Str.data(), Str.length()); - return *this; + return write(Str.data(), Str.length()); } raw_ostream &operator<<(unsigned long N); @@ -202,13 +200,11 @@ raw_ostream &operator<<(long long N); raw_ostream &operator<<(const void *P); raw_ostream &operator<<(unsigned int N) { - this->operator<<(static_cast(N)); - return *this; + return this->operator<<(static_cast(N)); } raw_ostream &operator<<(int N) { - this->operator<<(static_cast(N)); - return *this; + return this->operator<<(static_cast(N)); } raw_ostream &operator<<(double N); Modified: llvm/trunk/lib/Support/raw_ostream.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Support/raw_ostream.cpp?rev=91316&r1=91315&r2=91316&view=diff ============================================================================== --- llvm/trunk/lib/Support/raw_ostream.cpp (original) +++ llvm/trunk/lib/Support/raw_ostream.cpp Mon Dec 14 13:32:31 2009 @@ -209,8 +209,7 @@ } raw_ostream &raw_ostream::operator<<(double N) { - this->operator<<(ftostr(N)); - return *this; + return this->operator<<(ftostr(N)); } From sabre at nondot.org Mon Dec 14 13:34:33 2009 From: sabre at nondot.org (Chris Lattner) Date: Mon, 14 Dec 2009 19:34:33 -0000 Subject: [llvm-commits] [llvm] r91318 - /llvm/trunk/lib/Target/CppBackend/CPPBackend.cpp Message-ID: <200912141934.nBEJYX2L012628@zion.cs.uiuc.edu> Author: lattner Date: Mon Dec 14 13:34:32 2009 New Revision: 91318 URL: http://llvm.org/viewvc/llvm-project?rev=91318&view=rev Log: fix an obvious bug found by clang++ and collapse a redundant if. Here's the diagnostic from clang: /Volumes/Data/dgregor/Projects/llvm/lib/Target/CppBackend/CPPBackend.cpp:989:23: warning: 'gv' is always NULL in this context ????????printConstant(gv); ??????????????????????^ 1 diagnostic generated. Modified: llvm/trunk/lib/Target/CppBackend/CPPBackend.cpp Modified: llvm/trunk/lib/Target/CppBackend/CPPBackend.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/CppBackend/CPPBackend.cpp?rev=91318&r1=91317&r2=91318&view=diff ============================================================================== --- llvm/trunk/lib/Target/CppBackend/CPPBackend.cpp (original) +++ llvm/trunk/lib/Target/CppBackend/CPPBackend.cpp Mon Dec 14 13:34:32 2009 @@ -976,21 +976,20 @@ nl(Out); printType(GV->getType()); if (GV->hasInitializer()) { - Constant* Init = GV->getInitializer(); + Constant *Init = GV->getInitializer(); printType(Init->getType()); - if (Function* F = dyn_cast(Init)) { + if (Function *F = dyn_cast(Init)) { nl(Out)<< "/ Function Declarations"; nl(Out); printFunctionHead(F); } else if (GlobalVariable* gv = dyn_cast(Init)) { nl(Out) << "// Global Variable Declarations"; nl(Out); printVariableHead(gv); - } else { - nl(Out) << "// Constant Definitions"; nl(Out); - printConstant(gv); - } - if (GlobalVariable* gv = dyn_cast(Init)) { + nl(Out) << "// Global Variable Definitions"; nl(Out); printVariableBody(gv); + } else { + nl(Out) << "// Constant Definitions"; nl(Out); + printConstant(Init); } } } From gohman at apple.com Mon Dec 14 13:43:17 2009 From: gohman at apple.com (Dan Gohman) Date: Mon, 14 Dec 2009 19:43:17 -0000 Subject: [llvm-commits] [llvm] r91319 - in /llvm/trunk: include/llvm/Pass.h lib/VMCore/Pass.cpp Message-ID: <200912141943.nBEJhHhM012971@zion.cs.uiuc.edu> Author: djg Date: Mon Dec 14 13:43:09 2009 New Revision: 91319 URL: http://llvm.org/viewvc/llvm-project?rev=91319&view=rev Log: Move several function bodies which are rarely inlined out of line. Modified: llvm/trunk/include/llvm/Pass.h llvm/trunk/lib/VMCore/Pass.cpp Modified: llvm/trunk/include/llvm/Pass.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Pass.h?rev=91319&r1=91318&r2=91319&view=diff ============================================================================== --- llvm/trunk/include/llvm/Pass.h (original) +++ llvm/trunk/include/llvm/Pass.h Mon Dec 14 13:43:09 2009 @@ -111,12 +111,10 @@ virtual void assignPassManager(PMStack &, PassManagerType = PMT_Unknown) {} /// Check if available pass managers are suitable for this pass or not. - virtual void preparePassManager(PMStack &) {} + virtual void preparePassManager(PMStack &); /// Return what kind of Pass Manager can manage this pass. - virtual PassManagerType getPotentialPassManagerType() const { - return PMT_Unknown; - } + virtual PassManagerType getPotentialPassManagerType() const; // Access AnalysisResolver inline void setResolver(AnalysisResolver *AR) { @@ -132,9 +130,7 @@ /// particular analysis result to this function, it can then use the /// getAnalysis() function, below. /// - virtual void getAnalysisUsage(AnalysisUsage &) const { - // By default, no analysis results are used, all are invalidated. - } + virtual void getAnalysisUsage(AnalysisUsage &) const; /// releaseMemory() - This member can be implemented by a pass if it wants to /// be able to release its memory when it is no longer needed. The default @@ -147,11 +143,11 @@ /// Optionally implement this function to release pass memory when it is no /// longer used. /// - virtual void releaseMemory() {} + virtual void releaseMemory(); /// verifyAnalysis() - This member can be implemented by a analysis pass to /// check state of analysis information. - virtual void verifyAnalysis() const {} + virtual void verifyAnalysis() const; // dumpPassStructure - Implement the -debug-passes=PassStructure option virtual void dumpPassStructure(unsigned Offset = 0); @@ -221,9 +217,7 @@ PassManagerType T = PMT_ModulePassManager); /// Return what kind of Pass Manager can manage this pass. - virtual PassManagerType getPotentialPassManagerType() const { - return PMT_ModulePassManager; - } + virtual PassManagerType getPotentialPassManagerType() const; explicit ModulePass(intptr_t pid) : Pass(pid) {} explicit ModulePass(const void *pid) : Pass(pid) {} @@ -245,7 +239,7 @@ /// and if it does, the overloaded version of initializePass may get access to /// these passes with getAnalysis<>. /// - virtual void initializePass() {} + virtual void initializePass(); /// ImmutablePasses are never run. /// @@ -276,7 +270,7 @@ /// doInitialization - Virtual method overridden by subclasses to do /// any necessary per-module initialization. /// - virtual bool doInitialization(Module &) { return false; } + virtual bool doInitialization(Module &); /// runOnFunction - Virtual method overriden by subclasses to do the /// per-function processing of the pass. @@ -286,7 +280,7 @@ /// doFinalization - Virtual method overriden by subclasses to do any post /// processing needed after all passes have run. /// - virtual bool doFinalization(Module &) { return false; } + virtual bool doFinalization(Module &); /// runOnModule - On a module, we run this pass by initializing, /// ronOnFunction'ing once for every function in the module, then by @@ -303,9 +297,7 @@ PassManagerType T = PMT_FunctionPassManager); /// Return what kind of Pass Manager can manage this pass. - virtual PassManagerType getPotentialPassManagerType() const { - return PMT_FunctionPassManager; - } + virtual PassManagerType getPotentialPassManagerType() const; }; @@ -328,12 +320,12 @@ /// doInitialization - Virtual method overridden by subclasses to do /// any necessary per-module initialization. /// - virtual bool doInitialization(Module &) { return false; } + virtual bool doInitialization(Module &); /// doInitialization - Virtual method overridden by BasicBlockPass subclasses /// to do any necessary per-function initialization. /// - virtual bool doInitialization(Function &) { return false; } + virtual bool doInitialization(Function &); /// runOnBasicBlock - Virtual method overriden by subclasses to do the /// per-basicblock processing of the pass. @@ -343,12 +335,12 @@ /// doFinalization - Virtual method overriden by BasicBlockPass subclasses to /// do any post processing needed after all passes have run. /// - virtual bool doFinalization(Function &) { return false; } + virtual bool doFinalization(Function &); /// doFinalization - Virtual method overriden by subclasses to do any post /// processing needed after all passes have run. /// - virtual bool doFinalization(Module &) { return false; } + virtual bool doFinalization(Module &); // To run this pass on a function, we simply call runOnBasicBlock once for @@ -360,9 +352,7 @@ PassManagerType T = PMT_BasicBlockPassManager); /// Return what kind of Pass Manager can manage this pass. - virtual PassManagerType getPotentialPassManagerType() const { - return PMT_BasicBlockPassManager; - } + virtual PassManagerType getPotentialPassManagerType() const; }; /// If the user specifies the -time-passes argument on an LLVM tool command line Modified: llvm/trunk/lib/VMCore/Pass.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/VMCore/Pass.cpp?rev=91319&r1=91318&r2=91319&view=diff ============================================================================== --- llvm/trunk/lib/VMCore/Pass.cpp (original) +++ llvm/trunk/lib/VMCore/Pass.cpp Mon Dec 14 13:43:09 2009 @@ -41,6 +41,10 @@ // Force out-of-line virtual method. ModulePass::~ModulePass() { } +PassManagerType ModulePass::getPotentialPassManagerType() const { + return PMT_ModulePassManager; +} + bool Pass::mustPreserveAnalysisID(const PassInfo *AnalysisID) const { return Resolver->getAnalysisIfAvailable(AnalysisID, true) != 0; } @@ -60,6 +64,27 @@ return "Unnamed pass: implement Pass::getPassName()"; } +void Pass::preparePassManager(PMStack &) { + // By default, don't do anything. +} + +PassManagerType Pass::getPotentialPassManagerType() const { + // Default implementation. + return PMT_Unknown; +} + +void Pass::getAnalysisUsage(AnalysisUsage &) const { + // By default, no analysis results are used, all are invalidated. +} + +void Pass::releaseMemory() { + // By default, don't do anything. +} + +void Pass::verifyAnalysis() const { + // By default, don't do anything. +} + // print - Print out the internal state of the pass. This is called by Analyze // to print out the contents of an analysis. Otherwise it is not necessary to // implement this method. @@ -79,6 +104,10 @@ // Force out-of-line virtual method. ImmutablePass::~ImmutablePass() { } +void ImmutablePass::initializePass() { + // By default, don't do anything. +} + //===----------------------------------------------------------------------===// // FunctionPass Implementation // @@ -107,6 +136,20 @@ return Changed | doFinalization(*F.getParent()); } +bool FunctionPass::doInitialization(Module &) { + // By default, don't do anything. + return false; +} + +bool FunctionPass::doFinalization(Module &) { + // By default, don't do anything. + return false; +} + +PassManagerType FunctionPass::getPotentialPassManagerType() const { + return PMT_FunctionPassManager; +} + //===----------------------------------------------------------------------===// // BasicBlockPass Implementation // @@ -121,6 +164,30 @@ return Changed | doFinalization(F); } +bool BasicBlockPass::doInitialization(Module &) { + // By default, don't do anything. + return false; +} + +bool BasicBlockPass::doInitialization(Function &) { + // By default, don't do anything. + return false; +} + +bool BasicBlockPass::doFinalization(Function &) { + // By default, don't do anything. + return false; +} + +bool BasicBlockPass::doFinalization(Module &) { + // By default, don't do anything. + return false; +} + +PassManagerType BasicBlockPass::getPotentialPassManagerType() const { + return PMT_BasicBlockPassManager; +} + //===----------------------------------------------------------------------===// // Pass Registration mechanism // From gohman at apple.com Mon Dec 14 13:55:23 2009 From: gohman at apple.com (Dan Gohman) Date: Mon, 14 Dec 2009 19:55:23 -0000 Subject: [llvm-commits] [llvm] r91320 - in /llvm/trunk: include/llvm/Config/ lib/Target/X86/Disassembler/ Message-ID: <200912141955.nBEJtNj4013407@zion.cs.uiuc.edu> Author: djg Date: Mon Dec 14 13:55:22 2009 New Revision: 91320 URL: http://llvm.org/viewvc/llvm-project?rev=91320&view=rev Log: Add svn:ignore entries for the Disassembler files. Modified: llvm/trunk/include/llvm/Config/ (props changed) llvm/trunk/lib/Target/X86/Disassembler/ (props changed) Propchange: llvm/trunk/include/llvm/Config/ ------------------------------------------------------------------------------ --- svn:ignore (original) +++ svn:ignore Mon Dec 14 13:55:22 2009 @@ -3,3 +3,4 @@ AsmPrinters.def Targets.def AsmParsers.def +Disassemblers.def Propchange: llvm/trunk/lib/Target/X86/Disassembler/ ------------------------------------------------------------------------------ --- svn:ignore (added) +++ svn:ignore Mon Dec 14 13:55:22 2009 @@ -0,0 +1,8 @@ +Debug +Release +Release-Asserts +*.inc +Debug+Coverage-Asserts +Debug+Coverage +Release+Coverage +Debug+Checks From grosbach at apple.com Mon Dec 14 14:14:59 2009 From: grosbach at apple.com (Jim Grosbach) Date: Mon, 14 Dec 2009 20:14:59 -0000 Subject: [llvm-commits] [llvm] r91321 - /llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp Message-ID: <200912142014.nBEKEx6p014220@zion.cs.uiuc.edu> Author: grosbach Date: Mon Dec 14 14:14:59 2009 New Revision: 91321 URL: http://llvm.org/viewvc/llvm-project?rev=91321&view=rev Log: Thumb2 atomic operations Modified: llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp Modified: llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp?rev=91321&r1=91320&r2=91321&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp (original) +++ llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp Mon Dec 14 14:14:59 2009 @@ -3055,13 +3055,23 @@ .createVirtualRegister(ARM::GPRRegisterClass); const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); DebugLoc dl = MI->getDebugLoc(); + bool isThumb2 = Subtarget->isThumb2(); unsigned ldrOpc, strOpc; switch (Size) { default: llvm_unreachable("unsupported size for AtomicCmpSwap!"); - case 1: ldrOpc = ARM::LDREXB; strOpc = ARM::STREXB; break; - case 2: ldrOpc = ARM::LDREXH; strOpc = ARM::STREXH; break; - case 4: ldrOpc = ARM::LDREX; strOpc = ARM::STREX; break; + case 1: + ldrOpc = isThumb2 ? ARM::t2LDREXB : ARM::LDREXB; + strOpc = isThumb2 ? ARM::t2LDREXB : ARM::STREXB; + break; + case 2: + ldrOpc = isThumb2 ? ARM::t2LDREXH : ARM::LDREXH; + strOpc = isThumb2 ? ARM::t2STREXH : ARM::STREXH; + break; + case 4: + ldrOpc = isThumb2 ? ARM::t2LDREX : ARM::LDREX; + strOpc = isThumb2 ? ARM::t2STREX : ARM::STREX; + break; } MachineFunction *MF = BB->getParent(); @@ -3088,10 +3098,10 @@ // bne exitMBB BB = loop1MBB; AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr)); - AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::CMPrr)) + AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr)) .addReg(dest).addReg(oldval)); - BuildMI(BB, dl, TII->get(ARM::Bcc)).addMBB(exitMBB).addImm(ARMCC::NE) - .addReg(ARM::CPSR); + BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) + .addMBB(exitMBB).addImm(ARMCC::NE).addReg(ARM::CPSR); BB->addSuccessor(loop2MBB); BB->addSuccessor(exitMBB); @@ -3102,10 +3112,10 @@ BB = loop2MBB; AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(newval) .addReg(ptr)); - AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::CMPri)) + AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) .addReg(scratch).addImm(0)); - BuildMI(BB, dl, TII->get(ARM::Bcc)).addMBB(loop1MBB).addImm(ARMCC::NE) - .addReg(ARM::CPSR); + BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) + .addMBB(loop1MBB).addImm(ARMCC::NE).addReg(ARM::CPSR); BB->addSuccessor(loop1MBB); BB->addSuccessor(exitMBB); @@ -3130,12 +3140,22 @@ unsigned ptr = MI->getOperand(1).getReg(); unsigned incr = MI->getOperand(2).getReg(); DebugLoc dl = MI->getDebugLoc(); + bool isThumb2 = Subtarget->isThumb2(); unsigned ldrOpc, strOpc; switch (Size) { default: llvm_unreachable("unsupported size for AtomicCmpSwap!"); - case 1: ldrOpc = ARM::LDREXB; strOpc = ARM::STREXB; break; - case 2: ldrOpc = ARM::LDREXH; strOpc = ARM::STREXH; break; - case 4: ldrOpc = ARM::LDREX; strOpc = ARM::STREX; break; + case 1: + ldrOpc = isThumb2 ? ARM::t2LDREXB : ARM::LDREXB; + strOpc = isThumb2 ? ARM::t2LDREXB : ARM::STREXB; + break; + case 2: + ldrOpc = isThumb2 ? ARM::t2LDREXH : ARM::LDREXH; + strOpc = isThumb2 ? ARM::t2STREXH : ARM::STREXH; + break; + case 4: + ldrOpc = isThumb2 ? ARM::t2LDREX : ARM::LDREX; + strOpc = isThumb2 ? ARM::t2STREX : ARM::STREX; + break; } MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB); @@ -3156,8 +3176,8 @@ // loopMBB: // ldrex dest, ptr - // add tmp, dest, incr - // strex scratch, tmp, ptr + // scratch2, dest, incr + // strex scratch, scratch2, ptr // cmp scratch, #0 // bne- loopMBB // fallthrough --> exitMBB @@ -3169,10 +3189,10 @@ AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(scratch2) .addReg(ptr)); - AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::CMPri)) + AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) .addReg(scratch).addImm(0)); - BuildMI(BB, dl, TII->get(ARM::Bcc)).addMBB(loopMBB).addImm(ARMCC::NE) - .addReg(ARM::CPSR); + BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) + .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR); BB->addSuccessor(loopMBB); BB->addSuccessor(exitMBB); @@ -3189,38 +3209,57 @@ DenseMap *EM) const { const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); DebugLoc dl = MI->getDebugLoc(); + bool isThumb2 = Subtarget->isThumb2(); switch (MI->getOpcode()) { default: MI->dump(); llvm_unreachable("Unexpected instr type to insert"); - case ARM::ATOMIC_LOAD_ADD_I8: return EmitAtomicBinary(MI, BB, 1, ARM::ADDrr); - case ARM::ATOMIC_LOAD_ADD_I16: return EmitAtomicBinary(MI, BB, 2, ARM::ADDrr); - case ARM::ATOMIC_LOAD_ADD_I32: return EmitAtomicBinary(MI, BB, 4, ARM::ADDrr); - - case ARM::ATOMIC_LOAD_AND_I8: return EmitAtomicBinary(MI, BB, 1, ARM::ANDrr); - case ARM::ATOMIC_LOAD_AND_I16: return EmitAtomicBinary(MI, BB, 2, ARM::ANDrr); - case ARM::ATOMIC_LOAD_AND_I32: return EmitAtomicBinary(MI, BB, 4, ARM::ANDrr); - - case ARM::ATOMIC_LOAD_OR_I8: return EmitAtomicBinary(MI, BB, 1, ARM::ORRrr); - case ARM::ATOMIC_LOAD_OR_I16: return EmitAtomicBinary(MI, BB, 2, ARM::ORRrr); - case ARM::ATOMIC_LOAD_OR_I32: return EmitAtomicBinary(MI, BB, 4, ARM::ORRrr); - - case ARM::ATOMIC_LOAD_XOR_I8: return EmitAtomicBinary(MI, BB, 1, ARM::EORrr); - case ARM::ATOMIC_LOAD_XOR_I16: return EmitAtomicBinary(MI, BB, 2, ARM::EORrr); - case ARM::ATOMIC_LOAD_XOR_I32: return EmitAtomicBinary(MI, BB, 4, ARM::EORrr); - - case ARM::ATOMIC_LOAD_NAND_I8: return EmitAtomicBinary(MI, BB, 1, ARM::BICrr); - case ARM::ATOMIC_LOAD_NAND_I16:return EmitAtomicBinary(MI, BB, 2, ARM::BICrr); - case ARM::ATOMIC_LOAD_NAND_I32:return EmitAtomicBinary(MI, BB, 4, ARM::BICrr); - - case ARM::ATOMIC_LOAD_SUB_I8: return EmitAtomicBinary(MI, BB, 1, ARM::SUBrr); - case ARM::ATOMIC_LOAD_SUB_I16: return EmitAtomicBinary(MI, BB, 2, ARM::SUBrr); - case ARM::ATOMIC_LOAD_SUB_I32: return EmitAtomicBinary(MI, BB, 4, ARM::SUBrr); - - case ARM::ATOMIC_SWAP_I8: return EmitAtomicBinary(MI, BB, 1, 0); - case ARM::ATOMIC_SWAP_I16: return EmitAtomicBinary(MI, BB, 2, 0); - case ARM::ATOMIC_SWAP_I32: return EmitAtomicBinary(MI, BB, 4, 0); + case ARM::ATOMIC_LOAD_ADD_I8: + return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr); + case ARM::ATOMIC_LOAD_ADD_I16: + return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr); + case ARM::ATOMIC_LOAD_ADD_I32: + return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr); + + case ARM::ATOMIC_LOAD_AND_I8: + return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr); + case ARM::ATOMIC_LOAD_AND_I16: + return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr); + case ARM::ATOMIC_LOAD_AND_I32: + return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr); + + case ARM::ATOMIC_LOAD_OR_I8: + return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr); + case ARM::ATOMIC_LOAD_OR_I16: + return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr); + case ARM::ATOMIC_LOAD_OR_I32: + return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr); + + case ARM::ATOMIC_LOAD_XOR_I8: + return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2EORrr : ARM::EORrr); + case ARM::ATOMIC_LOAD_XOR_I16: + return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2EORrr : ARM::EORrr); + case ARM::ATOMIC_LOAD_XOR_I32: + return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2EORrr : ARM::EORrr); + + case ARM::ATOMIC_LOAD_NAND_I8: + return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2BICrr : ARM::BICrr); + case ARM::ATOMIC_LOAD_NAND_I16: + return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2BICrr : ARM::BICrr); + case ARM::ATOMIC_LOAD_NAND_I32: + return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2BICrr : ARM::BICrr); + + case ARM::ATOMIC_LOAD_SUB_I8: + return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr); + case ARM::ATOMIC_LOAD_SUB_I16: + return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr); + case ARM::ATOMIC_LOAD_SUB_I32: + return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr); + + case ARM::ATOMIC_SWAP_I8: return EmitAtomicBinary(MI, BB, 1, 0); + case ARM::ATOMIC_SWAP_I16: return EmitAtomicBinary(MI, BB, 2, 0); + case ARM::ATOMIC_SWAP_I32: return EmitAtomicBinary(MI, BB, 4, 0); case ARM::ATOMIC_CMP_SWAP_I8: return EmitAtomicCmpSwap(MI, BB, 1); case ARM::ATOMIC_CMP_SWAP_I16: return EmitAtomicCmpSwap(MI, BB, 2); From johnny.chen at apple.com Mon Dec 14 15:01:46 2009 From: johnny.chen at apple.com (Johnny Chen) Date: Mon, 14 Dec 2009 21:01:46 -0000 Subject: [llvm-commits] [llvm] r91327 - /llvm/trunk/lib/Target/ARM/ARMInstrInfo.td Message-ID: <200912142101.nBEL1kL7016122@zion.cs.uiuc.edu> Author: johnny Date: Mon Dec 14 15:01:46 2009 New Revision: 91327 URL: http://llvm.org/viewvc/llvm-project?rev=91327&view=rev Log: Fixed encoding bits typo of ldrexd/strexd. Modified: llvm/trunk/lib/Target/ARM/ARMInstrInfo.td Modified: llvm/trunk/lib/Target/ARM/ARMInstrInfo.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrInfo.td?rev=91327&r1=91326&r2=91327&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMInstrInfo.td (original) +++ llvm/trunk/lib/Target/ARM/ARMInstrInfo.td Mon Dec 14 15:01:46 2009 @@ -1709,7 +1709,7 @@ def LDREX : AIldrex<0b00, (outs GPR:$dest), (ins GPR:$ptr), NoItinerary, "ldrex", "\t$dest, [$ptr]", []>; -def LDREXD : AIldrex<0b00, (outs GPR:$dest, GPR:$dest2), (ins GPR:$ptr), +def LDREXD : AIldrex<0b01, (outs GPR:$dest, GPR:$dest2), (ins GPR:$ptr), NoItinerary, "ldrexd", "\t$dest, $dest2, [$ptr]", []>; @@ -1728,7 +1728,7 @@ NoItinerary, "strex", "\t$success, $src, [$ptr]", []>; -def STREXD : AIstrex<0b00, (outs GPR:$success), +def STREXD : AIstrex<0b01, (outs GPR:$success), (ins GPR:$src, GPR:$src2, GPR:$ptr), NoItinerary, "strexd", "\t$success, $src, $src2, [$ptr]", From Micah.Villmow at amd.com Mon Dec 14 15:10:43 2009 From: Micah.Villmow at amd.com (Villmow, Micah) Date: Mon, 14 Dec 2009 13:10:43 -0800 Subject: [llvm-commits] [llvm] r91158 - in /llvm/trunk: include/llvm/CodeGen/ValueTypes.h lib/CodeGen/SelectionDAG/DAGCombiner.cpp lib/CodeGen/SelectionDAG/LegalizeDAG.cpp lib/CodeGen/SelectionDAG/LegalizeTypes.h lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp In-Reply-To: <200912112131.nBBLVSbn029491@zion.cs.uiuc.edu> References: <200912112131.nBBLVSbn029491@zion.cs.uiuc.edu> Message-ID: <493720826E33B1459E7F5C253E6D4BB5BA49FE@ssanexmb2.amd.com> Dan, I've applied this patch set and it still does not work correctly. The problem seems to be an assert in TargetLowering.cpp. assert(Op.getValueType().getScalarType().getSizeInBits() == BitWidth > && > "Mask size mismatches value type size!"); This occurs on a truncating store of a vector type. I've attached the test case that I am using to get past this. The problem seems to be that the BitWidth of the mask is the vector bitwidth and not the scalar bitwidth. This test case asserts on both my backend and x86. > -----Original Message----- > From: llvm-commits-bounces at cs.uiuc.edu [mailto:llvm-commits- > bounces at cs.uiuc.edu] On Behalf Of Dan Gohman > Sent: Friday, December 11, 2009 1:31 PM > To: llvm-commits at cs.uiuc.edu > Subject: [llvm-commits] [llvm] r91158 - in /llvm/trunk: > include/llvm/CodeGen/ValueTypes.h > lib/CodeGen/SelectionDAG/DAGCombiner.cpp > lib/CodeGen/SelectionDAG/LegalizeDAG.cpp > lib/CodeGen/SelectionDAG/LegalizeTypes.h > lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp lib/ > > Author: djg > Date: Fri Dec 11 15:31:27 2009 > New Revision: 91158 > > URL: http://llvm.org/viewvc/llvm-project?rev=91158&view=rev > Log: > Implement vector widening, splitting, and scalarizing for > SIGN_EXTEND_INREG. > > Added: > llvm/trunk/test/CodeGen/X86/vec_ext_inreg.ll > Modified: > llvm/trunk/include/llvm/CodeGen/ValueTypes.h > llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp > llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp > llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeTypes.h > llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp > llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp > llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp > llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp > llvm/trunk/lib/Target/X86/X86ISelLowering.cpp > > Modified: llvm/trunk/include/llvm/CodeGen/ValueTypes.h > URL: http://llvm.org/viewvc/llvm- > project/llvm/trunk/include/llvm/CodeGen/ValueTypes.h?rev=91158&r1=91157 > &r2=91158&view=diff > > ======================================================================= > ======= > --- llvm/trunk/include/llvm/CodeGen/ValueTypes.h (original) > +++ llvm/trunk/include/llvm/CodeGen/ValueTypes.h Fri Dec 11 15:31:27 > 2009 > @@ -166,6 +166,12 @@ > return *this; > } > } > + > + /// getScalarType - If this is a vector type, return the element > type, > + /// otherwise return this. > + MVT getScalarType() const { > + return isVector() ? getVectorElementType() : *this; > + } > > MVT getVectorElementType() const { > switch (SimpleTy) { > @@ -524,6 +530,12 @@ > return V; > } > > + /// getScalarType - If this is a vector type, return the element > type, > + /// otherwise return this. > + EVT getScalarType() const { > + return isVector() ? getVectorElementType() : *this; > + } > + > /// getVectorElementType - Given a vector type, return the type of > /// each element. > EVT getVectorElementType() const { > > Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp > URL: http://llvm.org/viewvc/llvm- > project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=91158&r > 1=91157&r2=91158&view=diff > > ======================================================================= > ======= > --- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original) > +++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Fri Dec 11 > 15:31:27 2009 > @@ -119,7 +119,8 @@ > /// it can be simplified or if things it uses can be simplified by > bit > /// propagation. If so, return true. > bool SimplifyDemandedBits(SDValue Op) { > - APInt Demanded = > APInt::getAllOnesValue(Op.getValueSizeInBits()); > + unsigned BitWidth = > Op.getValueType().getScalarType().getSizeInBits(); > + APInt Demanded = APInt::getAllOnesValue(BitWidth); > return SimplifyDemandedBits(Op, Demanded); > } > > @@ -2441,7 +2442,7 @@ > ConstantSDNode *N0C = dyn_cast(N0); > ConstantSDNode *N1C = dyn_cast(N1); > EVT VT = N0.getValueType(); > - unsigned OpSizeInBits = VT.getSizeInBits(); > + unsigned OpSizeInBits = VT.getScalarType().getSizeInBits(); > > // fold (shl c1, c2) -> c1< if (N0C && N1C) > @@ -2457,7 +2458,7 @@ > return N0; > // if (shl x, c) is known to be zero, return 0 > if (DAG.MaskedValueIsZero(SDValue(N, 0), > - > APInt::getAllOnesValue(VT.getSizeInBits()))) > + APInt::getAllOnesValue(OpSizeInBits))) > return DAG.getConstant(0, VT); > // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), > (trunc c))). > if (N1.getOpcode() == ISD::TRUNCATE && > @@ -2533,6 +2534,7 @@ > ConstantSDNode *N0C = dyn_cast(N0); > ConstantSDNode *N1C = dyn_cast(N1); > EVT VT = N0.getValueType(); > + unsigned OpSizeInBits = VT.getScalarType().getSizeInBits(); > > // fold (sra c1, c2) -> (sra c1, c2) > if (N0C && N1C) > @@ -2544,7 +2546,7 @@ > if (N0C && N0C->isAllOnesValue()) > return N0; > // fold (sra x, (setge c, size(x))) -> undef > - if (N1C && N1C->getZExtValue() >= VT.getSizeInBits()) > + if (N1C && N1C->getZExtValue() >= OpSizeInBits) > return DAG.getUNDEF(VT); > // fold (sra x, 0) -> x > if (N1C && N1C->isNullValue()) > @@ -2552,7 +2554,7 @@ > // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target > supports > // sext_inreg. > if (N1C && N0.getOpcode() == ISD::SHL && N1 == N0.getOperand(1)) { > - unsigned LowBits = VT.getSizeInBits() - (unsigned)N1C- > >getZExtValue(); > + unsigned LowBits = OpSizeInBits - (unsigned)N1C->getZExtValue(); > EVT EVT = EVT::getIntegerVT(*DAG.getContext(), LowBits); > if ((!LegalOperations || > TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, EVT))) > return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT, > @@ -2563,7 +2565,7 @@ > if (N1C && N0.getOpcode() == ISD::SRA) { > if (ConstantSDNode *C1 = > dyn_cast(N0.getOperand(1))) { > unsigned Sum = N1C->getZExtValue() + C1->getZExtValue(); > - if (Sum >= VT.getSizeInBits()) Sum = VT.getSizeInBits()-1; > + if (Sum >= OpSizeInBits) Sum = OpSizeInBits-1; > return DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, > N0.getOperand(0), > DAG.getConstant(Sum, N1C->getValueType(0))); > } > @@ -2579,9 +2581,8 @@ > const ConstantSDNode *N01C = > dyn_cast(N0.getOperand(1)); > if (N01C && N1C) { > // Determine what the truncate's result bitsize and type would > be. > - unsigned VTValSize = VT.getSizeInBits(); > EVT TruncVT = > - EVT::getIntegerVT(*DAG.getContext(), VTValSize - N1C- > >getZExtValue()); > + EVT::getIntegerVT(*DAG.getContext(), OpSizeInBits - N1C- > >getZExtValue()); > // Determine the residual right-shift amount. > signed ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue(); > > @@ -2614,7 +2615,7 @@ > EVT TruncVT = N1.getValueType(); > SDValue N100 = N1.getOperand(0).getOperand(0); > APInt TruncC = N101C->getAPIntValue(); > - TruncC.trunc(TruncVT.getSizeInBits()); > + TruncC.trunc(TruncVT.getScalarType().getSizeInBits()); > return DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, N0, > DAG.getNode(ISD::AND, N->getDebugLoc(), > TruncVT, > @@ -2643,7 +2644,7 @@ > ConstantSDNode *N0C = dyn_cast(N0); > ConstantSDNode *N1C = dyn_cast(N1); > EVT VT = N0.getValueType(); > - unsigned OpSizeInBits = VT.getSizeInBits(); > + unsigned OpSizeInBits = VT.getScalarType().getSizeInBits(); > > // fold (srl c1, c2) -> c1 >>u c2 > if (N0C && N1C) > @@ -3036,7 +3037,7 @@ > else if (Op.getValueType().bitsGT(VT)) > Op = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), VT, Op); > return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT, > Op, > - DAG.getValueType(N0.getValueType())); > + > DAG.getValueType(N0.getValueType().getScalarType())); > } > } > > @@ -3177,7 +3178,8 @@ > } else if (Op.getValueType().bitsGT(VT)) { > Op = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, Op); > } > - return DAG.getZeroExtendInReg(Op, N->getDebugLoc(), > N0.getValueType()); > + return DAG.getZeroExtendInReg(Op, N->getDebugLoc(), > + N0.getValueType().getScalarType()); > } > > // Fold (zext (and (trunc x), cst)) -> (and x, cst), > @@ -3536,7 +3538,7 @@ > SDValue N1 = N->getOperand(1); > EVT VT = N->getValueType(0); > EVT EVT = cast(N1)->getVT(); > - unsigned VTBits = VT.getSizeInBits(); > + unsigned VTBits = VT.getScalarType().getSizeInBits(); > unsigned EVTBits = EVT.getSizeInBits(); > > // fold (sext_in_reg c1) -> c1 > @@ -3544,7 +3546,7 @@ > return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT, > N0, N1); > > // If the input is already sign extended, just drop the extension. > - if (DAG.ComputeNumSignBits(N0) >= VT.getSizeInBits()-EVTBits+1) > + if (DAG.ComputeNumSignBits(N0) >= VTBits-EVTBits+1) > return N0; > > // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, > minVT) pt2 > @@ -3559,7 +3561,7 @@ > // if x is small enough. > if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == > ISD::ANY_EXTEND) { > SDValue N00 = N0.getOperand(0); > - if (N00.getValueType().getSizeInBits() < EVTBits) > + if (N00.getValueType().getScalarType().getSizeInBits() < EVTBits) > return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT, N00, > N1); > } > > @@ -3583,11 +3585,11 @@ > // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" > above. > if (N0.getOpcode() == ISD::SRL) { > if (ConstantSDNode *ShAmt = > dyn_cast(N0.getOperand(1))) > - if (ShAmt->getZExtValue()+EVTBits <= VT.getSizeInBits()) { > + if (ShAmt->getZExtValue()+EVTBits <= VTBits) { > // We can turn this into an SRA iff the input to the SRL is > already sign > // extended enough. > unsigned InSignBits = > DAG.ComputeNumSignBits(N0.getOperand(0)); > - if (VT.getSizeInBits()-(ShAmt->getZExtValue()+EVTBits) < > InSignBits) > + if (VTBits-(ShAmt->getZExtValue()+EVTBits) < InSignBits) > return DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, > N0.getOperand(0), N0.getOperand(1)); > } > > Modified: llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp > URL: http://llvm.org/viewvc/llvm- > project/llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp?rev=91158&r > 1=91157&r2=91158&view=diff > > ======================================================================= > ======= > --- llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp (original) > +++ llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp Fri Dec 11 > 15:31:27 2009 > @@ -2294,9 +2294,16 @@ > // NOTE: we could fall back on load/store here too for targets > without > // SAR. However, it is doubtful that any exist. > EVT ExtraVT = cast(Node->getOperand(1))->getVT(); > - unsigned BitsDiff = Node->getValueType(0).getSizeInBits() - > + EVT VT = Node->getValueType(0); > + EVT ShiftAmountTy = TLI.getShiftAmountTy(); > + if (ExtraVT.isVector()) ExtraVT = ExtraVT.getVectorElementType(); > + if (VT.isVector()) { > + ShiftAmountTy = VT; > + VT = VT.getVectorElementType(); > + } > + unsigned BitsDiff = VT.getSizeInBits() - > ExtraVT.getSizeInBits(); > - SDValue ShiftCst = DAG.getConstant(BitsDiff, > TLI.getShiftAmountTy()); > + SDValue ShiftCst = DAG.getConstant(BitsDiff, ShiftAmountTy); > Tmp1 = DAG.getNode(ISD::SHL, dl, Node->getValueType(0), > Node->getOperand(0), ShiftCst); > Tmp1 = DAG.getNode(ISD::SRA, dl, Node->getValueType(0), Tmp1, > ShiftCst); > > Modified: llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeTypes.h > URL: http://llvm.org/viewvc/llvm- > project/llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeTypes.h?rev=91158&r > 1=91157&r2=91158&view=diff > > ======================================================================= > ======= > --- llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeTypes.h (original) > +++ llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeTypes.h Fri Dec 11 > 15:31:27 2009 > @@ -517,6 +517,7 @@ > SDValue ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N); > SDValue ScalarizeVecRes_LOAD(LoadSDNode *N); > SDValue ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N); > + SDValue ScalarizeVecRes_SIGN_EXTEND_INREG(SDNode *N); > SDValue ScalarizeVecRes_SELECT(SDNode *N); > SDValue ScalarizeVecRes_SELECT_CC(SDNode *N); > SDValue ScalarizeVecRes_SETCC(SDNode *N); > @@ -560,6 +561,7 @@ > void SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue > &Hi); > void SplitVecRes_LOAD(LoadSDNode *N, SDValue &Lo, SDValue &Hi); > void SplitVecRes_SCALAR_TO_VECTOR(SDNode *N, SDValue &Lo, SDValue > &Hi); > + void SplitVecRes_SIGN_EXTEND_INREG(SDNode *N, SDValue &Lo, SDValue > &Hi); > void SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi); > void SplitVecRes_UNDEF(SDNode *N, SDValue &Lo, SDValue &Hi); > void SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N, SDValue &Lo, > @@ -602,6 +604,7 @@ > SDValue WidenVecRes_INSERT_VECTOR_ELT(SDNode* N); > SDValue WidenVecRes_LOAD(SDNode* N); > SDValue WidenVecRes_SCALAR_TO_VECTOR(SDNode* N); > + SDValue WidenVecRes_SIGN_EXTEND_INREG(SDNode* N); > SDValue WidenVecRes_SELECT(SDNode* N); > SDValue WidenVecRes_SELECT_CC(SDNode* N); > SDValue WidenVecRes_UNDEF(SDNode *N); > > Modified: llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp > URL: http://llvm.org/viewvc/llvm- > project/llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp?rev=9 > 1158&r1=91157&r2=91158&view=diff > > ======================================================================= > ======= > --- llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp > (original) > +++ llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp Fri Dec > 11 15:31:27 2009 > @@ -179,6 +179,7 @@ > case ISD::FRINT: > case ISD::FNEARBYINT: > case ISD::FFLOOR: > + case ISD::SIGN_EXTEND_INREG: > QueryType = Node->getValueType(0); > break; > case ISD::SINT_TO_FP: > > Modified: llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp > URL: http://llvm.org/viewvc/llvm- > project/llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp?rev > =91158&r1=91157&r2=91158&view=diff > > ======================================================================= > ======= > --- llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp > (original) > +++ llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp Fri Dec > 11 15:31:27 2009 > @@ -54,6 +54,7 @@ > case ISD::INSERT_VECTOR_ELT: R = > ScalarizeVecRes_INSERT_VECTOR_ELT(N); break; > case ISD::LOAD: R = > ScalarizeVecRes_LOAD(cast(N));break; > case ISD::SCALAR_TO_VECTOR: R = > ScalarizeVecRes_SCALAR_TO_VECTOR(N); break; > + case ISD::SIGN_EXTEND_INREG: R = > ScalarizeVecRes_SIGN_EXTEND_INREG(N); break; > case ISD::SELECT: R = ScalarizeVecRes_SELECT(N); break; > case ISD::SELECT_CC: R = ScalarizeVecRes_SELECT_CC(N); > break; > case ISD::SETCC: R = ScalarizeVecRes_SETCC(N); break; > @@ -195,6 +196,13 @@ > return InOp; > } > > +SDValue DAGTypeLegalizer::ScalarizeVecRes_SIGN_EXTEND_INREG(SDNode *N) > { > + EVT EltVT = N->getValueType(0).getVectorElementType(); > + SDValue LHS = GetScalarizedVector(N->getOperand(0)); > + return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), EltVT, > + LHS, N->getOperand(1)); > +} > + > SDValue DAGTypeLegalizer::ScalarizeVecRes_SELECT(SDNode *N) { > SDValue LHS = GetScalarizedVector(N->getOperand(1)); > return DAG.getNode(ISD::SELECT, N->getDebugLoc(), > @@ -401,6 +409,7 @@ > case ISD::FPOWI: SplitVecRes_FPOWI(N, Lo, Hi); break; > case ISD::INSERT_VECTOR_ELT: SplitVecRes_INSERT_VECTOR_ELT(N, Lo, > Hi); break; > case ISD::SCALAR_TO_VECTOR: SplitVecRes_SCALAR_TO_VECTOR(N, Lo, > Hi); break; > + case ISD::SIGN_EXTEND_INREG: SplitVecRes_SIGN_EXTEND_INREG(N, Lo, > Hi); break; > case ISD::LOAD: > SplitVecRes_LOAD(cast(N), Lo, Hi); > break; > @@ -700,6 +709,18 @@ > Hi = DAG.getUNDEF(HiVT); > } > > +void DAGTypeLegalizer::SplitVecRes_SIGN_EXTEND_INREG(SDNode *N, > SDValue &Lo, > + SDValue &Hi) { > + SDValue LHSLo, LHSHi; > + GetSplitVector(N->getOperand(0), LHSLo, LHSHi); > + DebugLoc dl = N->getDebugLoc(); > + > + Lo = DAG.getNode(N->getOpcode(), dl, LHSLo.getValueType(), LHSLo, > + N->getOperand(1)); > + Hi = DAG.getNode(N->getOpcode(), dl, LHSHi.getValueType(), LHSHi, > + N->getOperand(1)); > +} > + > void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, > SDValue &Hi) { > assert(ISD::isUNINDEXEDLoad(LD) && "Indexed load during type > legalization!"); > @@ -1141,6 +1162,7 @@ > case ISD::INSERT_VECTOR_ELT: Res = WidenVecRes_INSERT_VECTOR_ELT(N); > break; > case ISD::LOAD: Res = WidenVecRes_LOAD(N); break; > case ISD::SCALAR_TO_VECTOR: Res = WidenVecRes_SCALAR_TO_VECTOR(N); > break; > + case ISD::SIGN_EXTEND_INREG: Res = WidenVecRes_SIGN_EXTEND_INREG(N); > break; > case ISD::SELECT: Res = WidenVecRes_SELECT(N); break; > case ISD::SELECT_CC: Res = WidenVecRes_SELECT_CC(N); break; > case ISD::UNDEF: Res = WidenVecRes_UNDEF(N); break; > @@ -1691,6 +1713,13 @@ > WidenVT, N->getOperand(0)); > } > > +SDValue DAGTypeLegalizer::WidenVecRes_SIGN_EXTEND_INREG(SDNode *N) { > + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N- > >getValueType(0)); > + SDValue WidenLHS = GetWidenedVector(N->getOperand(0)); > + return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), > + WidenVT, WidenLHS, N->getOperand(1)); > +} > + > SDValue DAGTypeLegalizer::WidenVecRes_SELECT(SDNode *N) { > EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N- > >getValueType(0)); > unsigned WidenNumElts = WidenVT.getVectorNumElements(); > > Modified: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp > URL: http://llvm.org/viewvc/llvm- > project/llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp?rev=91158& > r1=91157&r2=91158&view=diff > > ======================================================================= > ======= > --- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp (original) > +++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp Fri Dec 11 > 15:31:27 2009 > @@ -832,8 +832,12 @@ > } > > SDValue SelectionDAG::getZeroExtendInReg(SDValue Op, DebugLoc DL, EVT > VT) { > + assert(!VT.isVector() && > + "getZeroExtendInReg should use the vector element type > instead of " > + "the vector type!"); > if (Op.getValueType() == VT) return Op; > - APInt Imm = APInt::getLowBitsSet(Op.getValueSizeInBits(), > + unsigned BitWidth = > Op.getValueType().getScalarType().getSizeInBits(); > + APInt Imm = APInt::getLowBitsSet(BitWidth, > VT.getSizeInBits()); > return getNode(ISD::AND, DL, Op.getValueType(), Op, > getConstant(Imm, Op.getValueType())); > @@ -1481,7 +1485,7 @@ > if (Op.getValueType().isVector()) > return false; > > - unsigned BitWidth = Op.getValueSizeInBits(); > + unsigned BitWidth = > Op.getValueType().getScalarType().getSizeInBits(); > return MaskedValueIsZero(Op, APInt::getSignBit(BitWidth), Depth); > } > > @@ -1504,7 +1508,7 @@ > APInt &KnownZero, APInt > &KnownOne, > unsigned Depth) const { > unsigned BitWidth = Mask.getBitWidth(); > - assert(BitWidth == Op.getValueType().getSizeInBits() && > + assert(BitWidth == Op.getValueType().getScalarType().getSizeInBits() > && > "Mask size mismatches value type size!"); > > KnownZero = KnownOne = APInt(BitWidth, 0); // Don't know anything. > @@ -1761,7 +1765,7 @@ > } > case ISD::ZERO_EXTEND: { > EVT InVT = Op.getOperand(0).getValueType(); > - unsigned InBits = InVT.getSizeInBits(); > + unsigned InBits = InVT.getScalarType().getSizeInBits(); > APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - > InBits) & Mask; > APInt InMask = Mask; > InMask.trunc(InBits); > @@ -1775,7 +1779,7 @@ > } > case ISD::SIGN_EXTEND: { > EVT InVT = Op.getOperand(0).getValueType(); > - unsigned InBits = InVT.getSizeInBits(); > + unsigned InBits = InVT.getScalarType().getSizeInBits(); > APInt InSignBit = APInt::getSignBit(InBits); > APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - > InBits) & Mask; > APInt InMask = Mask; > @@ -1816,7 +1820,7 @@ > } > case ISD::ANY_EXTEND: { > EVT InVT = Op.getOperand(0).getValueType(); > - unsigned InBits = InVT.getSizeInBits(); > + unsigned InBits = InVT.getScalarType().getSizeInBits(); > APInt InMask = Mask; > InMask.trunc(InBits); > KnownZero.trunc(InBits); > @@ -1828,7 +1832,7 @@ > } > case ISD::TRUNCATE: { > EVT InVT = Op.getOperand(0).getValueType(); > - unsigned InBits = InVT.getSizeInBits(); > + unsigned InBits = InVT.getScalarType().getSizeInBits(); > APInt InMask = Mask; > InMask.zext(InBits); > KnownZero.zext(InBits); > @@ -1961,7 +1965,7 @@ > unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) > const{ > EVT VT = Op.getValueType(); > assert(VT.isInteger() && "Invalid VT!"); > - unsigned VTBits = VT.getSizeInBits(); > + unsigned VTBits = VT.getScalarType().getSizeInBits(); > unsigned Tmp, Tmp2; > unsigned FirstAnswer = 1; > > @@ -1988,7 +1992,7 @@ > } > > case ISD::SIGN_EXTEND: > - Tmp = VTBits-Op.getOperand(0).getValueType().getSizeInBits(); > + Tmp = VTBits- > Op.getOperand(0).getValueType().getScalarType().getSizeInBits(); > return ComputeNumSignBits(Op.getOperand(0), Depth+1) + Tmp; > > case ISD::SIGN_EXTEND_INREG: > @@ -2624,6 +2628,9 @@ > assert(VT == N1.getValueType() && "Not an inreg extend!"); > assert(VT.isInteger() && EVT.isInteger() && > "Cannot *_EXTEND_INREG FP types"); > + assert(!EVT.isVector() && > + "AssertSExt/AssertZExt type should be the vector element > type " > + "rather than the vector type!"); > assert(EVT.bitsLE(VT) && "Not extending!"); > if (VT == EVT) return N1; // noop assertion. > break; > @@ -2633,12 +2640,15 @@ > assert(VT == N1.getValueType() && "Not an inreg extend!"); > assert(VT.isInteger() && EVT.isInteger() && > "Cannot *_EXTEND_INREG FP types"); > - assert(EVT.bitsLE(VT) && "Not extending!"); > + assert(!EVT.isVector() && > + "SIGN_EXTEND_INREG type should be the vector element type > rather " > + "than the vector type!"); > + assert(EVT.bitsLE(VT.getScalarType()) && "Not extending!"); > if (EVT == VT) return N1; // Not actually extending > > if (N1C) { > APInt Val = N1C->getAPIntValue(); > - unsigned FromBits = cast(N2)->getVT().getSizeInBits(); > + unsigned FromBits = EVT.getSizeInBits(); > Val <<= Val.getBitWidth()-FromBits; > Val = Val.ashr(Val.getBitWidth()-FromBits); > return getConstant(Val, VT); > > Modified: llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp > URL: http://llvm.org/viewvc/llvm- > project/llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp?rev=9115 > 8&r1=91157&r2=91158&view=diff > > ======================================================================= > ======= > --- llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp (original) > +++ llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp Fri Dec 11 > 15:31:27 2009 > @@ -911,7 +911,7 @@ > TargetLoweringOpt &TLO, > unsigned Depth) const { > unsigned BitWidth = DemandedMask.getBitWidth(); > - assert(Op.getValueSizeInBits() == BitWidth && > + assert(Op.getValueType().getScalarType().getSizeInBits() == BitWidth > && > "Mask size mismatches value type size!"); > APInt NewMask = DemandedMask; > DebugLoc dl = Op.getDebugLoc(); > @@ -1240,7 +1240,7 @@ > // demand the input sign bit. > APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt); > if (HighBits.intersects(NewMask)) > - InDemandedMask |= APInt::getSignBit(VT.getSizeInBits()); > + InDemandedMask |= > APInt::getSignBit(VT.getScalarType().getSizeInBits()); > > if (SimplifyDemandedBits(Op.getOperand(0), InDemandedMask, > KnownZero, KnownOne, TLO, Depth+1)) > > Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp > URL: http://llvm.org/viewvc/llvm- > project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=91158&r1=9115 > 7&r2=91158&view=diff > > ======================================================================= > ======= > --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original) > +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Fri Dec 11 15:31:27 > 2009 > @@ -595,6 +595,7 @@ > setOperationAction(ISD::FP_TO_SINT, (MVT::SimpleValueType)VT, > Expand); > setOperationAction(ISD::UINT_TO_FP, (MVT::SimpleValueType)VT, > Expand); > setOperationAction(ISD::SINT_TO_FP, (MVT::SimpleValueType)VT, > Expand); > + setOperationAction(ISD::SIGN_EXTEND_INREG, > (MVT::SimpleValueType)VT,Expand); > } > > // FIXME: In order to prevent SSE instructions being expanded to MMX > ones > > Added: llvm/trunk/test/CodeGen/X86/vec_ext_inreg.ll > URL: http://llvm.org/viewvc/llvm- > project/llvm/trunk/test/CodeGen/X86/vec_ext_inreg.ll?rev=91158&view=aut > o > > ======================================================================= > ======= > --- llvm/trunk/test/CodeGen/X86/vec_ext_inreg.ll (added) > +++ llvm/trunk/test/CodeGen/X86/vec_ext_inreg.ll Fri Dec 11 15:31:27 > 2009 > @@ -0,0 +1,37 @@ > +; RUN: llc < %s -march=x86-64 > + > +define <8 x i32> @a(<8 x i32> %a) nounwind { > + %b = trunc <8 x i32> %a to <8 x i16> > + %c = sext <8 x i16> %b to <8 x i32> > + ret <8 x i32> %c > +} > + > +define <3 x i32> @b(<3 x i32> %a) nounwind { > + %b = trunc <3 x i32> %a to <3 x i16> > + %c = sext <3 x i16> %b to <3 x i32> > + ret <3 x i32> %c > +} > + > +define <1 x i32> @c(<1 x i32> %a) nounwind { > + %b = trunc <1 x i32> %a to <1 x i16> > + %c = sext <1 x i16> %b to <1 x i32> > + ret <1 x i32> %c > +} > + > +define <8 x i32> @d(<8 x i32> %a) nounwind { > + %b = trunc <8 x i32> %a to <8 x i16> > + %c = zext <8 x i16> %b to <8 x i32> > + ret <8 x i32> %c > +} > + > +define <3 x i32> @e(<3 x i32> %a) nounwind { > + %b = trunc <3 x i32> %a to <3 x i16> > + %c = zext <3 x i16> %b to <3 x i32> > + ret <3 x i32> %c > +} > + > +define <1 x i32> @f(<1 x i32> %a) nounwind { > + %b = trunc <1 x i32> %a to <1 x i16> > + %c = zext <1 x i16> %b to <1 x i32> > + ret <1 x i32> %c > +} > > > _______________________________________________ > llvm-commits mailing list > llvm-commits at cs.uiuc.edu > http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits -------------- next part -------------- A non-text attachment was scrubbed... Name: loads-custom.bc Type: application/octet-stream Size: 2268 bytes Desc: loads-custom.bc Url : http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20091214/fedf58cf/attachment.obj -------------- next part -------------- A non-text attachment was scrubbed... Name: loads-custom.ll Type: application/octet-stream Size: 12799 bytes Desc: loads-custom.ll Url : http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20091214/fedf58cf/attachment-0001.obj From grosbach at apple.com Mon Dec 14 15:24:17 2009 From: grosbach at apple.com (Jim Grosbach) Date: Mon, 14 Dec 2009 21:24:17 -0000 Subject: [llvm-commits] [llvm] r91329 - in /llvm/trunk/lib/Target/ARM: ARMISelLowering.cpp ARMInstrInfo.td ARMInstrThumb2.td Message-ID: <200912142124.nBELOHgF017049@zion.cs.uiuc.edu> Author: grosbach Date: Mon Dec 14 15:24:16 2009 New Revision: 91329 URL: http://llvm.org/viewvc/llvm-project?rev=91329&view=rev Log: Add ARMv6 memory and sync barrier instructions Modified: llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp llvm/trunk/lib/Target/ARM/ARMInstrInfo.td llvm/trunk/lib/Target/ARM/ARMInstrThumb2.td Modified: llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp?rev=91329&r1=91328&r2=91329&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp (original) +++ llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp Mon Dec 14 15:24:16 2009 @@ -1474,17 +1474,24 @@ } } -static SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG) { +static SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG, + const ARMSubtarget *Subtarget) { DebugLoc dl = Op.getDebugLoc(); SDValue Op5 = Op.getOperand(5); SDValue Res; unsigned isDeviceBarrier = cast(Op5)->getZExtValue(); if (isDeviceBarrier) { - Res = DAG.getNode(ARMISD::SYNCBARRIER, dl, MVT::Other, - Op.getOperand(0)); + if (Subtarget->hasV7Ops()) + Res = DAG.getNode(ARMISD::SYNCBARRIER, dl, MVT::Other, Op.getOperand(0)); + else + Res = DAG.getNode(ARMISD::SYNCBARRIER, dl, MVT::Other, Op.getOperand(0), + DAG.getConstant(0, MVT::i32)); } else { - Res = DAG.getNode(ARMISD::MEMBARRIER, dl, MVT::Other, - Op.getOperand(0)); + if (Subtarget->hasV7Ops()) + Res = DAG.getNode(ARMISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0)); + else + Res = DAG.getNode(ARMISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0), + DAG.getConstant(0, MVT::i32)); } return Res; } @@ -2991,7 +2998,7 @@ case ISD::BR_JT: return LowerBR_JT(Op, DAG); case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG); case ISD::VASTART: return LowerVASTART(Op, DAG, VarArgsFrameIndex); - case ISD::MEMBARRIER: return LowerMEMBARRIER(Op, DAG); + case ISD::MEMBARRIER: return LowerMEMBARRIER(Op, DAG, Subtarget); case ISD::SINT_TO_FP: case ISD::UINT_TO_FP: return LowerINT_TO_FP(Op, DAG); case ISD::FP_TO_SINT: Modified: llvm/trunk/lib/Target/ARM/ARMInstrInfo.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrInfo.td?rev=91329&r1=91328&r2=91329&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMInstrInfo.td (original) +++ llvm/trunk/lib/Target/ARM/ARMInstrInfo.td Mon Dec 14 15:24:16 2009 @@ -46,8 +46,10 @@ def SDT_ARMThreadPointer : SDTypeProfile<1, 0, [SDTCisPtrTy<0>]>; def SDT_ARMEH_SJLJ_Setjmp : SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisPtrTy<1>]>; -def SDT_ARMMEMBARRIER : SDTypeProfile<0, 0, []>; -def SDT_ARMSYNCBARRIER : SDTypeProfile<0, 0, []>; +def SDT_ARMMEMBARRIERV7 : SDTypeProfile<0, 0, []>; +def SDT_ARMSYNCBARRIERV7 : SDTypeProfile<0, 0, []>; +def SDT_ARMMEMBARRIERV6 : SDTypeProfile<0, 1, [SDTCisInt<0>]>; +def SDT_ARMSYNCBARRIERV6 : SDTypeProfile<0, 1, [SDTCisInt<0>]>; // Node definitions. def ARMWrapper : SDNode<"ARMISD::Wrapper", SDTIntUnaryOp>; @@ -96,9 +98,13 @@ def ARMthread_pointer: SDNode<"ARMISD::THREAD_POINTER", SDT_ARMThreadPointer>; def ARMeh_sjlj_setjmp: SDNode<"ARMISD::EH_SJLJ_SETJMP", SDT_ARMEH_SJLJ_Setjmp>; -def ARMMemBarrier : SDNode<"ARMISD::MEMBARRIER", SDT_ARMMEMBARRIER, +def ARMMemBarrierV7 : SDNode<"ARMISD::MEMBARRIER", SDT_ARMMEMBARRIERV7, [SDNPHasChain]>; -def ARMSyncBarrier : SDNode<"ARMISD::SYNCBARRIER", SDT_ARMMEMBARRIER, +def ARMSyncBarrierV7 : SDNode<"ARMISD::SYNCBARRIER", SDT_ARMMEMBARRIERV7, + [SDNPHasChain]>; +def ARMMemBarrierV6 : SDNode<"ARMISD::MEMBARRIER", SDT_ARMMEMBARRIERV6, + [SDNPHasChain]>; +def ARMSyncBarrierV6 : SDNode<"ARMISD::SYNCBARRIER", SDT_ARMMEMBARRIERV6, [SDNPHasChain]>; //===----------------------------------------------------------------------===// @@ -1578,7 +1584,7 @@ def Int_MemBarrierV7 : AInoP<(outs), (ins), Pseudo, NoItinerary, "dmb", "", - [(ARMMemBarrier)]>, + [(ARMMemBarrierV7)]>, Requires<[IsARM, HasV7]> { let Inst{31-4} = 0xf57ff05; // FIXME: add support for options other than a full system DMB @@ -1588,12 +1594,30 @@ def Int_SyncBarrierV7 : AInoP<(outs), (ins), Pseudo, NoItinerary, "dsb", "", - [(ARMSyncBarrier)]>, + [(ARMSyncBarrierV7)]>, Requires<[IsARM, HasV7]> { let Inst{31-4} = 0xf57ff04; // FIXME: add support for options other than a full system DSB let Inst{3-0} = 0b1111; } + +def Int_MemBarrierV6 : AInoP<(outs), (ins GPR:$zero), + Pseudo, NoItinerary, + "mcr", "\tp15, 0, $zero, c7, c10, 5", + [(ARMMemBarrierV6 GPR:$zero)]>, + Requires<[IsARM, HasV6]> { + // FIXME: add support for options other than a full system DMB + // FIXME: add encoding +} + +def Int_SyncBarrierV6 : AInoP<(outs), (ins GPR:$zero), + Pseudo, NoItinerary, + "mcr", "\tp15, 0, $zero, c7, c10, 5", + [(ARMSyncBarrierV6 GPR:$zero)]>, + Requires<[IsARM, HasV6]> { + // FIXME: add support for options other than a full system DSB + // FIXME: add encoding +} } let usesCustomInserter = 1 in { Modified: llvm/trunk/lib/Target/ARM/ARMInstrThumb2.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrThumb2.td?rev=91329&r1=91328&r2=91329&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMInstrThumb2.td (original) +++ llvm/trunk/lib/Target/ARM/ARMInstrThumb2.td Mon Dec 14 15:24:16 2009 @@ -1073,7 +1073,7 @@ def t2Int_MemBarrierV7 : AInoP<(outs), (ins), Pseudo, NoItinerary, "dmb", "", - [(ARMMemBarrier)]>, + [(ARMMemBarrierV7)]>, Requires<[IsThumb2]> { // FIXME: add support for options other than a full system DMB } @@ -1081,7 +1081,7 @@ def t2Int_SyncBarrierV7 : AInoP<(outs), (ins), Pseudo, NoItinerary, "dsb", "", - [(ARMSyncBarrier)]>, + [(ARMSyncBarrierV7)]>, Requires<[IsThumb2]> { // FIXME: add support for options other than a full system DSB } From grosbach at apple.com Mon Dec 14 15:33:33 2009 From: grosbach at apple.com (Jim Grosbach) Date: Mon, 14 Dec 2009 21:33:33 -0000 Subject: [llvm-commits] [llvm] r91333 - /llvm/trunk/lib/Target/ARM/ARMInstrInfo.td Message-ID: <200912142133.nBELXXXG017503@zion.cs.uiuc.edu> Author: grosbach Date: Mon Dec 14 15:33:32 2009 New Revision: 91333 URL: http://llvm.org/viewvc/llvm-project?rev=91333&view=rev Log: v6 sync insn copy/paste error Modified: llvm/trunk/lib/Target/ARM/ARMInstrInfo.td Modified: llvm/trunk/lib/Target/ARM/ARMInstrInfo.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrInfo.td?rev=91333&r1=91332&r2=91333&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMInstrInfo.td (original) +++ llvm/trunk/lib/Target/ARM/ARMInstrInfo.td Mon Dec 14 15:33:32 2009 @@ -1612,7 +1612,7 @@ def Int_SyncBarrierV6 : AInoP<(outs), (ins GPR:$zero), Pseudo, NoItinerary, - "mcr", "\tp15, 0, $zero, c7, c10, 5", + "mcr", "\tp15, 0, $zero, c7, c10, 4", [(ARMSyncBarrierV6 GPR:$zero)]>, Requires<[IsARM, HasV6]> { // FIXME: add support for options other than a full system DSB From grosbach at apple.com Mon Dec 14 15:48:46 2009 From: grosbach at apple.com (Jim Grosbach) Date: Mon, 14 Dec 2009 21:48:46 -0000 Subject: [llvm-commits] [llvm-gcc-4.2] r91336 - /llvm-gcc-4.2/trunk/gcc/llvm-convert.cpp Message-ID: <200912142148.nBELmkho018138@zion.cs.uiuc.edu> Author: grosbach Date: Mon Dec 14 15:48:45 2009 New Revision: 91336 URL: http://llvm.org/viewvc/llvm-project?rev=91336&view=rev Log: Enable atomic builtins for ARM Modified: llvm-gcc-4.2/trunk/gcc/llvm-convert.cpp Modified: llvm-gcc-4.2/trunk/gcc/llvm-convert.cpp URL: http://llvm.org/viewvc/llvm-project/llvm-gcc-4.2/trunk/gcc/llvm-convert.cpp?rev=91336&r1=91335&r2=91336&view=diff ============================================================================== --- llvm-gcc-4.2/trunk/gcc/llvm-convert.cpp (original) +++ llvm-gcc-4.2/trunk/gcc/llvm-convert.cpp Mon Dec 14 15:48:45 2009 @@ -5178,7 +5178,8 @@ C, C + 5); return true; } -#if defined(TARGET_ALPHA) || defined(TARGET_386) || defined(TARGET_POWERPC) +#if defined(TARGET_ALPHA) || defined(TARGET_386) || defined(TARGET_POWERPC) \ + || defined(TARGET_ARM) // gcc uses many names for the sync intrinsics // The type of the first argument is not reliable for choosing the // right llvm function; if the original type is not volatile, gcc has @@ -5189,18 +5190,33 @@ // Note that Intrinsic::getDeclaration expects the type list in reversed // order, while CreateCall expects the parameter list in normal order. case BUILT_IN_BOOL_COMPARE_AND_SWAP_1: { +#if defined(TARGET_ARM) + if (TARGET_THUMB1 || !arm_arch6) + return false; +#endif Result = BuildCmpAndSwapAtomicBuiltin(exp, unsigned_char_type_node, true); return true; } case BUILT_IN_BOOL_COMPARE_AND_SWAP_2: { +#if defined(TARGET_ARM) + if (TARGET_THUMB1 || !arm_arch6) + return false; +#endif Result = BuildCmpAndSwapAtomicBuiltin(exp, short_unsigned_type_node, true); return true; } case BUILT_IN_BOOL_COMPARE_AND_SWAP_4: { +#if defined(TARGET_ARM) + if (TARGET_THUMB1 || !arm_arch6) + return false; +#endif Result = BuildCmpAndSwapAtomicBuiltin(exp, unsigned_type_node, true); return true; } case BUILT_IN_BOOL_COMPARE_AND_SWAP_8: { +#if defined(TARGET_ARM) + return false; +#endif #if defined(TARGET_POWERPC) if (!TARGET_64BIT) return false; @@ -5211,6 +5227,9 @@ } case BUILT_IN_VAL_COMPARE_AND_SWAP_8: +#if defined(TARGET_ARM) + return false; +#endif #if defined(TARGET_POWERPC) if (!TARGET_64BIT) return false; @@ -5218,11 +5237,19 @@ case BUILT_IN_VAL_COMPARE_AND_SWAP_1: case BUILT_IN_VAL_COMPARE_AND_SWAP_2: case BUILT_IN_VAL_COMPARE_AND_SWAP_4: { +#if defined(TARGET_ARM) + if (TARGET_THUMB1 || !arm_arch6) + return false; +#endif tree type = TREE_TYPE(exp); Result = BuildCmpAndSwapAtomicBuiltin(exp, type, false); return true; } case BUILT_IN_FETCH_AND_ADD_8: +#if defined(TARGET_ARM) + if (TARGET_THUMB1 || !arm_arch6) + return false; +#endif #if defined(TARGET_POWERPC) if (!TARGET_64BIT) return false; @@ -5234,6 +5261,9 @@ return true; } case BUILT_IN_FETCH_AND_SUB_8: +#if defined(TARGET_ARM) + return false; +#endif #if defined(TARGET_POWERPC) if (!TARGET_64BIT) return false; @@ -5241,10 +5271,17 @@ case BUILT_IN_FETCH_AND_SUB_1: case BUILT_IN_FETCH_AND_SUB_2: case BUILT_IN_FETCH_AND_SUB_4: { +#if defined(TARGET_ARM) + if (TARGET_THUMB1 || !arm_arch6) + return false; +#endif Result = BuildBinaryAtomicBuiltin(exp, Intrinsic::atomic_load_sub); return true; } case BUILT_IN_FETCH_AND_OR_8: +#if defined(TARGET_ARM) + return false; +#endif #if defined(TARGET_POWERPC) if (!TARGET_64BIT) return false; @@ -5252,10 +5289,17 @@ case BUILT_IN_FETCH_AND_OR_1: case BUILT_IN_FETCH_AND_OR_2: case BUILT_IN_FETCH_AND_OR_4: { +#if defined(TARGET_ARM) + if (TARGET_THUMB1 || !arm_arch6) + return false; +#endif Result = BuildBinaryAtomicBuiltin(exp, Intrinsic::atomic_load_or); return true; } case BUILT_IN_FETCH_AND_AND_8: +#if defined(TARGET_ARM) + return false; +#endif #if defined(TARGET_POWERPC) if (!TARGET_64BIT) return false; @@ -5263,10 +5307,17 @@ case BUILT_IN_FETCH_AND_AND_1: case BUILT_IN_FETCH_AND_AND_2: case BUILT_IN_FETCH_AND_AND_4: { +#if defined(TARGET_ARM) + if (TARGET_THUMB1 || !arm_arch6) + return false; +#endif Result = BuildBinaryAtomicBuiltin(exp, Intrinsic::atomic_load_and); return true; } case BUILT_IN_FETCH_AND_XOR_8: +#if defined(TARGET_ARM) + return false; +#endif #if defined(TARGET_POWERPC) if (!TARGET_64BIT) return false; @@ -5274,10 +5325,17 @@ case BUILT_IN_FETCH_AND_XOR_1: case BUILT_IN_FETCH_AND_XOR_2: case BUILT_IN_FETCH_AND_XOR_4: { +#if defined(TARGET_ARM) + if (TARGET_THUMB1 || !arm_arch6) + return false; +#endif Result = BuildBinaryAtomicBuiltin(exp, Intrinsic::atomic_load_xor); return true; } case BUILT_IN_FETCH_AND_NAND_8: +#if defined(TARGET_ARM) + return false; +#endif #if defined(TARGET_POWERPC) if (!TARGET_64BIT) return false; @@ -5285,10 +5343,17 @@ case BUILT_IN_FETCH_AND_NAND_1: case BUILT_IN_FETCH_AND_NAND_2: case BUILT_IN_FETCH_AND_NAND_4: { +#if defined(TARGET_ARM) + if (TARGET_THUMB1 || !arm_arch6) + return false; +#endif Result = BuildBinaryAtomicBuiltin(exp, Intrinsic::atomic_load_nand); return true; } case BUILT_IN_LOCK_TEST_AND_SET_8: +#if defined(TARGET_ARM) + return false; +#endif #if defined(TARGET_POWERPC) if (!TARGET_64BIT) return false; @@ -5296,11 +5361,18 @@ case BUILT_IN_LOCK_TEST_AND_SET_1: case BUILT_IN_LOCK_TEST_AND_SET_2: case BUILT_IN_LOCK_TEST_AND_SET_4: { +#if defined(TARGET_ARM) + if (TARGET_THUMB1 || !arm_arch6) + return false; +#endif Result = BuildBinaryAtomicBuiltin(exp, Intrinsic::atomic_swap); return true; } case BUILT_IN_ADD_AND_FETCH_8: +#if defined(TARGET_ARM) + return false; +#endif #if defined(TARGET_POWERPC) if (!TARGET_64BIT) return false; @@ -5308,6 +5380,10 @@ case BUILT_IN_ADD_AND_FETCH_1: case BUILT_IN_ADD_AND_FETCH_2: case BUILT_IN_ADD_AND_FETCH_4: { +#if defined(TARGET_ARM) + if (TARGET_THUMB1 || !arm_arch6) + return false; +#endif const Type *ResultTy = ConvertType(TREE_TYPE(exp)); tree arglist = TREE_OPERAND(exp, 1); Value* C[2] = { @@ -5340,6 +5416,9 @@ return true; } case BUILT_IN_SUB_AND_FETCH_8: +#if defined(TARGET_ARM) + return false; +#endif #if defined(TARGET_POWERPC) if (!TARGET_64BIT) return false; @@ -5347,6 +5426,10 @@ case BUILT_IN_SUB_AND_FETCH_1: case BUILT_IN_SUB_AND_FETCH_2: case BUILT_IN_SUB_AND_FETCH_4: { +#if defined(TARGET_ARM) + if (TARGET_THUMB1 || !arm_arch6) + return false; +#endif const Type *ResultTy = ConvertType(TREE_TYPE(exp)); tree arglist = TREE_OPERAND(exp, 1); Value* C[2] = { @@ -5379,6 +5462,9 @@ return true; } case BUILT_IN_OR_AND_FETCH_8: +#if defined(TARGET_ARM) + return false; +#endif #if defined(TARGET_POWERPC) if (!TARGET_64BIT) return false; @@ -5386,6 +5472,10 @@ case BUILT_IN_OR_AND_FETCH_1: case BUILT_IN_OR_AND_FETCH_2: case BUILT_IN_OR_AND_FETCH_4: { +#if defined(TARGET_ARM) + if (TARGET_THUMB1 || !arm_arch6) + return false; +#endif const Type *ResultTy = ConvertType(TREE_TYPE(exp)); tree arglist = TREE_OPERAND(exp, 1); Value* C[2] = { @@ -5425,6 +5515,10 @@ case BUILT_IN_AND_AND_FETCH_1: case BUILT_IN_AND_AND_FETCH_2: case BUILT_IN_AND_AND_FETCH_4: { +#if defined(TARGET_ARM) + if (TARGET_THUMB1 || !arm_arch6) + return false; +#endif const Type *ResultTy = ConvertType(TREE_TYPE(exp)); tree arglist = TREE_OPERAND(exp, 1); Value* C[2] = { @@ -5457,6 +5551,9 @@ return true; } case BUILT_IN_XOR_AND_FETCH_8: +#if defined(TARGET_ARM) + return false; +#endif #if defined(TARGET_POWERPC) if (!TARGET_64BIT) return false; @@ -5464,6 +5561,10 @@ case BUILT_IN_XOR_AND_FETCH_1: case BUILT_IN_XOR_AND_FETCH_2: case BUILT_IN_XOR_AND_FETCH_4: { +#if defined(TARGET_ARM) + if (TARGET_THUMB1 || !arm_arch6) + return false; +#endif const Type *ResultTy = ConvertType(TREE_TYPE(exp)); tree arglist = TREE_OPERAND(exp, 1); Value* C[2] = { @@ -5496,6 +5597,9 @@ return true; } case BUILT_IN_NAND_AND_FETCH_8: +#if defined(TARGET_ARM) + return false; +#endif #if defined(TARGET_POWERPC) if (!TARGET_64BIT) return false; @@ -5503,6 +5607,10 @@ case BUILT_IN_NAND_AND_FETCH_1: case BUILT_IN_NAND_AND_FETCH_2: case BUILT_IN_NAND_AND_FETCH_4: { +#if defined(TARGET_ARM) + if (TARGET_THUMB1 || !arm_arch6) + return false; +#endif const Type *ResultTy = ConvertType(TREE_TYPE(exp)); tree arglist = TREE_OPERAND(exp, 1); Value* C[2] = { From isanbard at gmail.com Mon Dec 14 15:49:44 2009 From: isanbard at gmail.com (Bill Wendling) Date: Mon, 14 Dec 2009 21:49:44 -0000 Subject: [llvm-commits] [llvm] r91337 - /llvm/trunk/lib/CodeGen/AsmPrinter/DwarfException.cpp Message-ID: <200912142149.nBELnijF018208@zion.cs.uiuc.edu> Author: void Date: Mon Dec 14 15:49:44 2009 New Revision: 91337 URL: http://llvm.org/viewvc/llvm-project?rev=91337&view=rev Log: The CIE says that the LSDA point in the FDE section is an "sdata4". That's fine, but we need it to actually be 4-bytes in the FDE. Modified: llvm/trunk/lib/CodeGen/AsmPrinter/DwarfException.cpp Modified: llvm/trunk/lib/CodeGen/AsmPrinter/DwarfException.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/AsmPrinter/DwarfException.cpp?rev=91337&r1=91336&r2=91337&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/AsmPrinter/DwarfException.cpp (original) +++ llvm/trunk/lib/CodeGen/AsmPrinter/DwarfException.cpp Mon Dec 14 15:49:44 2009 @@ -292,14 +292,13 @@ Asm->EmitULEB128Bytes(is4Byte ? 4 : 8); Asm->EOL("Augmentation size"); + // We force 32-bits here because we've encoded our LSDA in the CIE with + // `dwarf::DW_EH_PE_sdata4'. And the CIE and FDE should agree. if (EHFrameInfo.hasLandingPads) - EmitReference("exception", EHFrameInfo.Number, true, false); - else { - if (is4Byte) - Asm->EmitInt32((int)0); - else - Asm->EmitInt64((int)0); - } + EmitReference("exception", EHFrameInfo.Number, true, true); + else + Asm->EmitInt32((int)0); + Asm->EOL("Language Specific Data Area"); } else { Asm->EmitULEB128Bytes(0); From johnny.chen at apple.com Mon Dec 14 15:51:34 2009 From: johnny.chen at apple.com (Johnny Chen) Date: Mon, 14 Dec 2009 21:51:34 -0000 Subject: [llvm-commits] [llvm] r91339 - /llvm/trunk/lib/Target/ARM/ARMInstrInfo.td Message-ID: <200912142151.nBELpYkm018291@zion.cs.uiuc.edu> Author: johnny Date: Mon Dec 14 15:51:34 2009 New Revision: 91339 URL: http://llvm.org/viewvc/llvm-project?rev=91339&view=rev Log: Add encoding bits "let Inst{11-4} = 0b00000000;" to BR_JTr to disambiguate between BR_JTr and STREXD. Modified: llvm/trunk/lib/Target/ARM/ARMInstrInfo.td Modified: llvm/trunk/lib/Target/ARM/ARMInstrInfo.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrInfo.td?rev=91339&r1=91338&r2=91339&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMInstrInfo.td (original) +++ llvm/trunk/lib/Target/ARM/ARMInstrInfo.td Mon Dec 14 15:51:34 2009 @@ -786,6 +786,7 @@ def BR_JTr : JTI<(outs), (ins GPR:$target, jtblock_operand:$jt, i32imm:$id), IIC_Br, "mov\tpc, $target \n$jt", [(ARMbrjt GPR:$target, tjumptable:$jt, imm:$id)]> { + let Inst{11-4} = 0b00000000; let Inst{15-12} = 0b1111; let Inst{20} = 0; // S Bit let Inst{24-21} = 0b1101; From grosbach at apple.com Mon Dec 14 16:40:51 2009 From: grosbach at apple.com (Jim Grosbach) Date: Mon, 14 Dec 2009 22:40:51 -0000 Subject: [llvm-commits] [llvm-gcc-4.2] r91350 - in /llvm-gcc-4.2/trunk/gcc: llvm-convert.cpp llvm-internal.h Message-ID: <200912142240.nBEMeppR020572@zion.cs.uiuc.edu> Author: grosbach Date: Mon Dec 14 16:40:51 2009 New Revision: 91350 URL: http://llvm.org/viewvc/llvm-project?rev=91350&view=rev Log: ARM Darwin doesn't require as strict a memory barrier Modified: llvm-gcc-4.2/trunk/gcc/llvm-convert.cpp llvm-gcc-4.2/trunk/gcc/llvm-internal.h Modified: llvm-gcc-4.2/trunk/gcc/llvm-convert.cpp URL: http://llvm.org/viewvc/llvm-project/llvm-gcc-4.2/trunk/gcc/llvm-convert.cpp?rev=91350&r1=91349&r2=91350&view=diff ============================================================================== --- llvm-gcc-4.2/trunk/gcc/llvm-convert.cpp (original) +++ llvm-gcc-4.2/trunk/gcc/llvm-convert.cpp Mon Dec 14 16:40:51 2009 @@ -4762,14 +4762,14 @@ TargetBuiltinCache.clear(); } -void TreeToLLVM::EmitMemoryBarrier(bool ll, bool ls, bool sl, bool ss) { +void TreeToLLVM::EmitMemoryBarrier(bool ll, bool ls, bool sl, bool ss, + bool device) { Value* C[5]; C[0] = ConstantInt::get(Type::getInt1Ty(Context), ll); C[1] = ConstantInt::get(Type::getInt1Ty(Context), ls); C[2] = ConstantInt::get(Type::getInt1Ty(Context), sl); C[3] = ConstantInt::get(Type::getInt1Ty(Context), ss); - // Be conservatively safe. - C[4] = ConstantInt::get(Type::getInt1Ty(Context), true); + C[4] = ConstantInt::get(Type::getInt1Ty(Context), device); Builder.CreateCall(Intrinsic::getDeclaration(TheModule, Intrinsic::memory_barrier), @@ -4792,7 +4792,11 @@ "cast"); // The gcc builtins are also full memory barriers. // FIXME: __sync_lock_test_and_set and __sync_lock_release require less. - EmitMemoryBarrier(true, true, true, true); +#if defined(TARGET_ARM) && defined(CONFIG_DARWIN_H) + EmitMemoryBarrier(true, true, true, true, false); +#else + EmitMemoryBarrier(true, true, true, true, true); +#endif Value *Result = Builder.CreateCall(Intrinsic::getDeclaration(TheModule, id, Ty, 2), @@ -4821,7 +4825,11 @@ // The gcc builtins are also full memory barriers. // FIXME: __sync_lock_test_and_set and __sync_lock_release require less. - EmitMemoryBarrier(true, true, true, true); +#if defined(TARGET_ARM) && defined(CONFIG_DARWIN_H) + EmitMemoryBarrier(true, true, true, true, false); +#else + EmitMemoryBarrier(true, true, true, true, true); +#endif Value *Result = Builder.CreateCall(Intrinsic::getDeclaration(TheModule, @@ -4831,7 +4839,11 @@ // The gcc builtins are also full memory barriers. // FIXME: __sync_lock_test_and_set and __sync_lock_release require less. - EmitMemoryBarrier(true, true, true, true); +#if defined(TARGET_ARM) && defined(CONFIG_DARWIN_H) + EmitMemoryBarrier(true, true, true, true, false); +#else + EmitMemoryBarrier(true, true, true, true, true); +#endif if (isBool) Result = CastToUIntType(Builder.CreateICmpEQ(Result, C[1]), @@ -5399,7 +5411,11 @@ // The gcc builtins are also full memory barriers. // FIXME: __sync_lock_test_and_set and __sync_lock_release require less. - EmitMemoryBarrier(true, true, true, true); +#if defined(TARGET_ARM) && defined(CONFIG_DARWIN_H) + EmitMemoryBarrier(true, true, true, true, false); +#else + EmitMemoryBarrier(true, true, true, true, true); +#endif Result = Builder.CreateCall(Intrinsic::getDeclaration(TheModule, @@ -5409,7 +5425,11 @@ // The gcc builtins are also full memory barriers. // FIXME: __sync_lock_test_and_set and __sync_lock_release require less. - EmitMemoryBarrier(true, true, true, true); +#if defined(TARGET_ARM) && defined(CONFIG_DARWIN_H) + EmitMemoryBarrier(true, true, true, true, false); +#else + EmitMemoryBarrier(true, true, true, true, true); +#endif Result = Builder.CreateAdd(Result, C[1]); Result = Builder.CreateIntToPtr(Result, ResultTy); @@ -5445,7 +5465,11 @@ // The gcc builtins are also full memory barriers. // FIXME: __sync_lock_test_and_set and __sync_lock_release require less. - EmitMemoryBarrier(true, true, true, true); +#if defined(TARGET_ARM) && defined(CONFIG_DARWIN_H) + EmitMemoryBarrier(true, true, true, true, false); +#else + EmitMemoryBarrier(true, true, true, true, true); +#endif Result = Builder.CreateCall(Intrinsic::getDeclaration(TheModule, @@ -5455,7 +5479,11 @@ // The gcc builtins are also full memory barriers. // FIXME: __sync_lock_test_and_set and __sync_lock_release require less. - EmitMemoryBarrier(true, true, true, true); +#if defined(TARGET_ARM) && defined(CONFIG_DARWIN_H) + EmitMemoryBarrier(true, true, true, true, false); +#else + EmitMemoryBarrier(true, true, true, true, true); +#endif Result = Builder.CreateSub(Result, C[1]); Result = Builder.CreateIntToPtr(Result, ResultTy); @@ -5491,7 +5519,11 @@ // The gcc builtins are also full memory barriers. // FIXME: __sync_lock_test_and_set and __sync_lock_release require less. - EmitMemoryBarrier(true, true, true, true); +#if defined(TARGET_ARM) && defined(CONFIG_DARWIN_H) + EmitMemoryBarrier(true, true, true, true, false); +#else + EmitMemoryBarrier(true, true, true, true, true); +#endif Result = Builder.CreateCall(Intrinsic::getDeclaration(TheModule, @@ -5501,7 +5533,11 @@ // The gcc builtins are also full memory barriers. // FIXME: __sync_lock_test_and_set and __sync_lock_release require less. - EmitMemoryBarrier(true, true, true, true); +#if defined(TARGET_ARM) && defined(CONFIG_DARWIN_H) + EmitMemoryBarrier(true, true, true, true, false); +#else + EmitMemoryBarrier(true, true, true, true, true); +#endif Result = Builder.CreateOr(Result, C[1]); Result = Builder.CreateIntToPtr(Result, ResultTy); @@ -5534,7 +5570,11 @@ // The gcc builtins are also full memory barriers. // FIXME: __sync_lock_test_and_set and __sync_lock_release require less. - EmitMemoryBarrier(true, true, true, true); +#if defined(TARGET_ARM) && defined(CONFIG_DARWIN_H) + EmitMemoryBarrier(true, true, true, true, false); +#else + EmitMemoryBarrier(true, true, true, true, true); +#endif Result = Builder.CreateCall(Intrinsic::getDeclaration(TheModule, @@ -5544,7 +5584,11 @@ // The gcc builtins are also full memory barriers. // FIXME: __sync_lock_test_and_set and __sync_lock_release require less. - EmitMemoryBarrier(true, true, true, true); +#if defined(TARGET_ARM) && defined(CONFIG_DARWIN_H) + EmitMemoryBarrier(true, true, true, true, false); +#else + EmitMemoryBarrier(true, true, true, true, true); +#endif Result = Builder.CreateAnd(Result, C[1]); Result = Builder.CreateIntToPtr(Result, ResultTy); @@ -5580,7 +5624,11 @@ // The gcc builtins are also full memory barriers. // FIXME: __sync_lock_test_and_set and __sync_lock_release require less. - EmitMemoryBarrier(true, true, true, true); +#if defined(TARGET_ARM) && defined(CONFIG_DARWIN_H) + EmitMemoryBarrier(true, true, true, true, false); +#else + EmitMemoryBarrier(true, true, true, true, true); +#endif Result = Builder.CreateCall(Intrinsic::getDeclaration(TheModule, @@ -5590,7 +5638,11 @@ // The gcc builtins are also full memory barriers. // FIXME: __sync_lock_test_and_set and __sync_lock_release require less. - EmitMemoryBarrier(true, true, true, true); +#if defined(TARGET_ARM) && defined(CONFIG_DARWIN_H) + EmitMemoryBarrier(true, true, true, true, false); +#else + EmitMemoryBarrier(true, true, true, true, true); +#endif Result = Builder.CreateXor(Result, C[1]); Result = Builder.CreateIntToPtr(Result, ResultTy); @@ -5626,7 +5678,11 @@ // The gcc builtins are also full memory barriers. // FIXME: __sync_lock_test_and_set and __sync_lock_release require less. - EmitMemoryBarrier(true, true, true, true); +#if defined(TARGET_ARM) && defined(CONFIG_DARWIN_H) + EmitMemoryBarrier(true, true, true, true, false); +#else + EmitMemoryBarrier(true, true, true, true, true); +#endif Result = Builder.CreateCall(Intrinsic::getDeclaration(TheModule, @@ -5636,7 +5692,11 @@ // The gcc builtins are also full memory barriers. // FIXME: __sync_lock_test_and_set and __sync_lock_release require less. - EmitMemoryBarrier(true, true, true, true); +#if defined(TARGET_ARM) && defined(CONFIG_DARWIN_H) + EmitMemoryBarrier(true, true, true, true, false); +#else + EmitMemoryBarrier(true, true, true, true, true); +#endif Result = Builder.CreateAnd(Builder.CreateNot(Result), C[1]); Result = Builder.CreateIntToPtr(Result, ResultTy); Modified: llvm-gcc-4.2/trunk/gcc/llvm-internal.h URL: http://llvm.org/viewvc/llvm-project/llvm-gcc-4.2/trunk/gcc/llvm-internal.h?rev=91350&r1=91349&r2=91350&view=diff ============================================================================== --- llvm-gcc-4.2/trunk/gcc/llvm-internal.h (original) +++ llvm-gcc-4.2/trunk/gcc/llvm-internal.h Mon Dec 14 16:40:51 2009 @@ -500,7 +500,7 @@ void EmitModifyOfRegisterVariable(tree_node *vardecl, Value *RHS); // Helpers for Builtin Function Expansion. - void EmitMemoryBarrier(bool ll, bool ls, bool sl, bool ss); + void EmitMemoryBarrier(bool ll, bool ls, bool sl, bool ss, bool device); Value *BuildVector(const std::vector &Elts); Value *BuildVector(Value *Elt, ...); Value *BuildVectorShuffle(Value *InVec1, Value *InVec2, ...); From bob.wilson at apple.com Mon Dec 14 16:44:23 2009 From: bob.wilson at apple.com (Bob Wilson) Date: Mon, 14 Dec 2009 22:44:23 -0000 Subject: [llvm-commits] [llvm] r91352 - /llvm/trunk/docs/Makefile Message-ID: <200912142244.nBEMiN6L020736@zion.cs.uiuc.edu> Author: bwilson Date: Mon Dec 14 16:44:22 2009 New Revision: 91352 URL: http://llvm.org/viewvc/llvm-project?rev=91352&view=rev Log: Rearrange rules to add missing dependency and allow parallel makes. Modified: llvm/trunk/docs/Makefile Modified: llvm/trunk/docs/Makefile URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/docs/Makefile?rev=91352&r1=91351&r2=91352&view=diff ============================================================================== --- llvm/trunk/docs/Makefile (original) +++ llvm/trunk/docs/Makefile Mon Dec 14 16:44:22 2009 @@ -100,7 +100,12 @@ $(FIND) . -type f -exec \ $(DataInstall) {} $(PROJ_docsdir)/ocamldoc/html \; -ocamldoc: regen-ocamldoc $(PROJ_OBJ_DIR)/ocamldoc.tar.gz +ocamldoc: regen-ocamldoc + $(Echo) Packaging ocamldoc documentation + $(Verb) $(RM) -rf $(PROJ_OBJ_DIR)/ocamldoc.tar* + $(Verb) $(TAR) cf $(PROJ_OBJ_DIR)/ocamldoc.tar ocamldoc + $(Verb) $(GZIP) $(PROJ_OBJ_DIR)/ocamldoc.tar + $(Verb) $(CP) $(PROJ_OBJ_DIR)/ocamldoc.tar.gz $(PROJ_OBJ_DIR)/ocamldoc/html/ regen-ocamldoc: $(Echo) Building ocamldoc documentation @@ -113,13 +118,6 @@ $(OCAMLDOC) -d $(PROJ_OBJ_DIR)/ocamldoc/html -sort -colorize-code -html \ `$(FIND) $(LEVEL)/bindings/ocaml -name "*.odoc" -exec echo -load '{}' ';'` -$(PROJ_OBJ_DIR)/ocamldoc.tar.gz: - $(Echo) Packaging ocamldoc documentation - $(Verb) $(RM) -rf $@ $(PROJ_OBJ_DIR)/ocamldoc.tar - $(Verb) $(TAR) cf $(PROJ_OBJ_DIR)/ocamldoc.tar ocamldoc - $(Verb) $(GZIP) $(PROJ_OBJ_DIR)/ocamldoc.tar - $(Verb) $(CP) $(PROJ_OBJ_DIR)/ocamldoc.tar.gz $(PROJ_OBJ_DIR)/ocamldoc/html/ - uninstall-local:: $(Echo) Uninstalling Documentation $(Verb) $(RM) -rf $(PROJ_docsdir) From dpatel at apple.com Mon Dec 14 16:47:09 2009 From: dpatel at apple.com (Devang Patel) Date: Mon, 14 Dec 2009 22:47:09 -0000 Subject: [llvm-commits] [llvm-gcc-4.2] r91353 - /llvm-gcc-4.2/trunk/gcc/llvm-debug.cpp Message-ID: <200912142247.nBEMl9uw020846@zion.cs.uiuc.edu> Author: dpatel Date: Mon Dec 14 16:47:09 2009 New Revision: 91353 URL: http://llvm.org/viewvc/llvm-project?rev=91353&view=rev Log: Check for existing type descriptor in type cache. Modified: llvm-gcc-4.2/trunk/gcc/llvm-debug.cpp Modified: llvm-gcc-4.2/trunk/gcc/llvm-debug.cpp URL: http://llvm.org/viewvc/llvm-project/llvm-gcc-4.2/trunk/gcc/llvm-debug.cpp?rev=91353&r1=91352&r2=91353&view=diff ============================================================================== --- llvm-gcc-4.2/trunk/gcc/llvm-debug.cpp (original) +++ llvm-gcc-4.2/trunk/gcc/llvm-debug.cpp Mon Dec 14 16:47:09 2009 @@ -694,9 +694,18 @@ SFlags |= llvm::DIType::FlagAppleBlock; if (type_is_block_byref_struct(type)) SFlags |= llvm::DIType::FlagBlockByrefStruct; + DIDescriptor TyContext = findRegion(TYPE_CONTEXT(type)); + + // Check if this type is created while creating context information + // descriptor. + std::map::iterator I = TypeCache.find(type); + if (I != TypeCache.end()) + if (MDNode *TN = dyn_cast_or_null(I->second)) + return DIType(TN); + llvm::DICompositeType FwdDecl = DebugFactory.CreateCompositeType(Tag, - findRegion(TYPE_CONTEXT(type)), + TyContext, FwdName.c_str(), getOrCreateCompileUnit(Loc.file), Loc.line, From gohman at apple.com Mon Dec 14 17:08:09 2009 From: gohman at apple.com (Dan Gohman) Date: Mon, 14 Dec 2009 23:08:09 -0000 Subject: [llvm-commits] [llvm] r91355 - /llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp Message-ID: <200912142308.nBEN89sq021688@zion.cs.uiuc.edu> Author: djg Date: Mon Dec 14 17:08:09 2009 New Revision: 91355 URL: http://llvm.org/viewvc/llvm-project?rev=91355&view=rev Log: Fix this to properly clear the FastISel debug location. Thanks to Bill for spotting this! Modified: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp Modified: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp?rev=91355&r1=91354&r2=91355&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp (original) +++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp Mon Dec 14 17:08:09 2009 @@ -390,7 +390,7 @@ FastISel *FastIS) { SDB->setCurDebugLoc(DebugLoc::getUnknownLoc()); if (FastIS) - SDB->setCurDebugLoc(DebugLoc::getUnknownLoc()); + FastIS->setCurDebugLoc(DebugLoc::getUnknownLoc()); } void SelectionDAGISel::SelectBasicBlock(BasicBlock *LLVMBB, From gohman at apple.com Mon Dec 14 17:13:31 2009 From: gohman at apple.com (Dan Gohman) Date: Mon, 14 Dec 2009 23:13:31 -0000 Subject: [llvm-commits] [llvm] r91356 - /llvm/trunk/include/llvm/CodeGen/ValueTypes.h Message-ID: <200912142313.nBENDVHd021865@zion.cs.uiuc.edu> Author: djg Date: Mon Dec 14 17:13:31 2009 New Revision: 91356 URL: http://llvm.org/viewvc/llvm-project?rev=91356&view=rev Log: Update this comment. Modified: llvm/trunk/include/llvm/CodeGen/ValueTypes.h Modified: llvm/trunk/include/llvm/CodeGen/ValueTypes.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/ValueTypes.h?rev=91356&r1=91355&r2=91356&view=diff ============================================================================== --- llvm/trunk/include/llvm/CodeGen/ValueTypes.h (original) +++ llvm/trunk/include/llvm/CodeGen/ValueTypes.h Mon Dec 14 17:13:31 2009 @@ -47,7 +47,7 @@ f80 = 9, // This is a 80 bit floating point value f128 = 10, // This is a 128 bit floating point value ppcf128 = 11, // This is a PPC 128-bit floating point value - Flag = 12, // This is a condition code or machine flag. + Flag = 12, // This glues nodes together during pre-RA sched isVoid = 13, // This has no value From gohman at apple.com Mon Dec 14 17:34:37 2009 From: gohman at apple.com (Dan Gohman) Date: Mon, 14 Dec 2009 23:34:37 -0000 Subject: [llvm-commits] [llvm] r91360 - /llvm/trunk/include/llvm/Target/TargetLowering.h Message-ID: <200912142334.nBENYbkp022799@zion.cs.uiuc.edu> Author: djg Date: Mon Dec 14 17:34:36 2009 New Revision: 91360 URL: http://llvm.org/viewvc/llvm-project?rev=91360&view=rev Log: Fix these asserts to check the invariant that the code actually depends on. Modified: llvm/trunk/include/llvm/Target/TargetLowering.h Modified: llvm/trunk/include/llvm/Target/TargetLowering.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Target/TargetLowering.h?rev=91360&r1=91359&r2=91360&view=diff ============================================================================== --- llvm/trunk/include/llvm/Target/TargetLowering.h (original) +++ llvm/trunk/include/llvm/Target/TargetLowering.h Mon Dec 14 17:34:36 2009 @@ -972,7 +972,7 @@ /// not work with the with specified type and indicate what to do about it. void setLoadExtAction(unsigned ExtType, MVT VT, LegalizeAction Action) { - assert((unsigned)VT.SimpleTy < MVT::LAST_VALUETYPE && + assert((unsigned)VT.SimpleTy*2 < 63 && ExtType < array_lengthof(LoadExtActions) && "Table isn't big enough!"); LoadExtActions[ExtType] &= ~(uint64_t(3UL) << VT.SimpleTy*2); @@ -984,7 +984,7 @@ void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action) { assert((unsigned)ValVT.SimpleTy < array_lengthof(TruncStoreActions) && - (unsigned)MemVT.SimpleTy < MVT::LAST_VALUETYPE && + (unsigned)MemVT.SimpleTy*2 < 63 && "Table isn't big enough!"); TruncStoreActions[ValVT.SimpleTy] &= ~(uint64_t(3UL) << MemVT.SimpleTy*2); TruncStoreActions[ValVT.SimpleTy] |= (uint64_t)Action << MemVT.SimpleTy*2; From gohman at apple.com Mon Dec 14 17:36:04 2009 From: gohman at apple.com (Dan Gohman) Date: Mon, 14 Dec 2009 23:36:04 -0000 Subject: [llvm-commits] [llvm] r91361 - in /llvm/trunk/include/llvm/CodeGen: ValueTypes.h ValueTypes.td Message-ID: <200912142336.nBENa4Kp022852@zion.cs.uiuc.edu> Author: djg Date: Mon Dec 14 17:36:03 2009 New Revision: 91361 URL: http://llvm.org/viewvc/llvm-project?rev=91361&view=rev Log: Move Flag and isVoid after the vector types, since bit arithmetic with those enum values is less common. Modified: llvm/trunk/include/llvm/CodeGen/ValueTypes.h llvm/trunk/include/llvm/CodeGen/ValueTypes.td Modified: llvm/trunk/include/llvm/CodeGen/ValueTypes.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/ValueTypes.h?rev=91361&r1=91360&r2=91361&view=diff ============================================================================== --- llvm/trunk/include/llvm/CodeGen/ValueTypes.h (original) +++ llvm/trunk/include/llvm/CodeGen/ValueTypes.h Mon Dec 14 17:36:03 2009 @@ -47,35 +47,36 @@ f80 = 9, // This is a 80 bit floating point value f128 = 10, // This is a 128 bit floating point value ppcf128 = 11, // This is a PPC 128-bit floating point value - Flag = 12, // This glues nodes together during pre-RA sched - isVoid = 13, // This has no value - - v2i8 = 14, // 2 x i8 - v4i8 = 15, // 4 x i8 - v8i8 = 16, // 8 x i8 - v16i8 = 17, // 16 x i8 - v32i8 = 18, // 32 x i8 - v2i16 = 19, // 2 x i16 - v4i16 = 20, // 4 x i16 - v8i16 = 21, // 8 x i16 - v16i16 = 22, // 16 x i16 - v2i32 = 23, // 2 x i32 - v4i32 = 24, // 4 x i32 - v8i32 = 25, // 8 x i32 - v1i64 = 26, // 1 x i64 - v2i64 = 27, // 2 x i64 - v4i64 = 28, // 4 x i64 - - v2f32 = 29, // 2 x f32 - v4f32 = 30, // 4 x f32 - v8f32 = 31, // 8 x f32 - v2f64 = 32, // 2 x f64 - v4f64 = 33, // 4 x f64 + v2i8 = 12, // 2 x i8 + v4i8 = 13, // 4 x i8 + v8i8 = 14, // 8 x i8 + v16i8 = 15, // 16 x i8 + v32i8 = 16, // 32 x i8 + v2i16 = 17, // 2 x i16 + v4i16 = 18, // 4 x i16 + v8i16 = 19, // 8 x i16 + v16i16 = 20, // 16 x i16 + v2i32 = 21, // 2 x i32 + v4i32 = 22, // 4 x i32 + v8i32 = 23, // 8 x i32 + v1i64 = 24, // 1 x i64 + v2i64 = 25, // 2 x i64 + v4i64 = 26, // 4 x i64 + + v2f32 = 27, // 2 x f32 + v4f32 = 28, // 4 x f32 + v8f32 = 29, // 8 x f32 + v2f64 = 30, // 2 x f64 + v4f64 = 31, // 4 x f64 FIRST_VECTOR_VALUETYPE = v2i8, LAST_VECTOR_VALUETYPE = v4f64, + Flag = 32, // This glues nodes together during pre-RA sched + + isVoid = 33, // This has no value + LAST_VALUETYPE = 34, // This always remains at the end of the list. // This is the current maximum for LAST_VALUETYPE. Modified: llvm/trunk/include/llvm/CodeGen/ValueTypes.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/ValueTypes.td?rev=91361&r1=91360&r2=91361&view=diff ============================================================================== --- llvm/trunk/include/llvm/CodeGen/ValueTypes.td (original) +++ llvm/trunk/include/llvm/CodeGen/ValueTypes.td Mon Dec 14 17:36:03 2009 @@ -31,30 +31,31 @@ def f80 : ValueType<80 , 9>; // 80-bit floating point value def f128 : ValueType<128, 10>; // 128-bit floating point value def ppcf128: ValueType<128, 11>; // PPC 128-bit floating point value -def FlagVT : ValueType<0 , 12>; // Condition code or machine flag -def isVoid : ValueType<0 , 13>; // Produces no value -def v2i8 : ValueType<16 , 14>; // 2 x i8 vector value -def v4i8 : ValueType<32 , 15>; // 4 x i8 vector value -def v8i8 : ValueType<64 , 16>; // 8 x i8 vector value -def v16i8 : ValueType<128, 17>; // 16 x i8 vector value -def v32i8 : ValueType<256, 18>; // 32 x i8 vector value -def v2i16 : ValueType<32 , 19>; // 2 x i16 vector value -def v4i16 : ValueType<64 , 20>; // 4 x i16 vector value -def v8i16 : ValueType<128, 21>; // 8 x i16 vector value -def v16i16 : ValueType<256, 22>; // 16 x i16 vector value -def v2i32 : ValueType<64 , 23>; // 2 x i32 vector value -def v4i32 : ValueType<128, 24>; // 4 x i32 vector value -def v8i32 : ValueType<256, 25>; // 8 x i32 vector value -def v1i64 : ValueType<64 , 26>; // 1 x i64 vector value -def v2i64 : ValueType<128, 27>; // 2 x i64 vector value -def v4i64 : ValueType<256, 28>; // 4 x f64 vector value - -def v2f32 : ValueType<64, 29>; // 2 x f32 vector value -def v4f32 : ValueType<128, 30>; // 4 x f32 vector value -def v8f32 : ValueType<256, 31>; // 8 x f32 vector value -def v2f64 : ValueType<128, 32>; // 2 x f64 vector value -def v4f64 : ValueType<256, 33>; // 4 x f64 vector value +def v2i8 : ValueType<16 , 12>; // 2 x i8 vector value +def v4i8 : ValueType<32 , 13>; // 4 x i8 vector value +def v8i8 : ValueType<64 , 14>; // 8 x i8 vector value +def v16i8 : ValueType<128, 15>; // 16 x i8 vector value +def v32i8 : ValueType<256, 16>; // 32 x i8 vector value +def v2i16 : ValueType<32 , 17>; // 2 x i16 vector value +def v4i16 : ValueType<64 , 18>; // 4 x i16 vector value +def v8i16 : ValueType<128, 19>; // 8 x i16 vector value +def v16i16 : ValueType<256, 20>; // 16 x i16 vector value +def v2i32 : ValueType<64 , 21>; // 2 x i32 vector value +def v4i32 : ValueType<128, 22>; // 4 x i32 vector value +def v8i32 : ValueType<256, 23>; // 8 x i32 vector value +def v1i64 : ValueType<64 , 24>; // 1 x i64 vector value +def v2i64 : ValueType<128, 25>; // 2 x i64 vector value +def v4i64 : ValueType<256, 26>; // 4 x f64 vector value + +def v2f32 : ValueType<64, 27>; // 2 x f32 vector value +def v4f32 : ValueType<128, 28>; // 4 x f32 vector value +def v8f32 : ValueType<256, 29>; // 8 x f32 vector value +def v2f64 : ValueType<128, 30>; // 2 x f64 vector value +def v4f64 : ValueType<256, 31>; // 4 x f64 vector value + +def FlagVT : ValueType<0 , 32>; // Pre-RA sched glue +def isVoid : ValueType<0 , 33>; // Produces no value def MetadataVT: ValueType<0, 250>; // Metadata From gohman at apple.com Mon Dec 14 17:40:38 2009 From: gohman at apple.com (Dan Gohman) Date: Mon, 14 Dec 2009 23:40:38 -0000 Subject: [llvm-commits] [llvm] r91362 - in /llvm/trunk: lib/CodeGen/SelectionDAG/DAGCombiner.cpp lib/CodeGen/SelectionDAG/SelectionDAG.cpp lib/Target/X86/X86ISelLowering.cpp test/CodeGen/X86/vec-trunc-store.ll Message-ID: <200912142340.nBENecaK023015@zion.cs.uiuc.edu> Author: djg Date: Mon Dec 14 17:40:38 2009 New Revision: 91362 URL: http://llvm.org/viewvc/llvm-project?rev=91362&view=rev Log: Fix integer cast code to handle vector types. Added: llvm/trunk/test/CodeGen/X86/vec-trunc-store.ll Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=91362&r1=91361&r2=91362&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original) +++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Mon Dec 14 17:40:38 2009 @@ -5196,7 +5196,7 @@ // SimplifyDemandedBits, which only works if the value has a single use. if (SimplifyDemandedBits(Value, APInt::getLowBitsSet( - Value.getValueSizeInBits(), + Value.getValueType().getScalarType().getSizeInBits(), ST->getMemoryVT().getSizeInBits()))) return SDValue(N, 0); } Modified: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp?rev=91362&r1=91361&r2=91362&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp (original) +++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp Mon Dec 14 17:40:38 2009 @@ -2354,6 +2354,10 @@ assert(VT.isFloatingPoint() && Operand.getValueType().isFloatingPoint() && "Invalid FP cast!"); if (Operand.getValueType() == VT) return Operand; // noop conversion. + assert((!VT.isVector() || + VT.getVectorNumElements() == + Operand.getValueType().getVectorNumElements()) && + "Vector element count mismatch!"); if (Operand.getOpcode() == ISD::UNDEF) return getUNDEF(VT); break; @@ -2361,8 +2365,12 @@ assert(VT.isInteger() && Operand.getValueType().isInteger() && "Invalid SIGN_EXTEND!"); if (Operand.getValueType() == VT) return Operand; // noop extension - assert(Operand.getValueType().bitsLT(VT) - && "Invalid sext node, dst < src!"); + assert(Operand.getValueType().getScalarType().bitsLT(VT.getScalarType()) && + "Invalid sext node, dst < src!"); + assert((!VT.isVector() || + VT.getVectorNumElements() == + Operand.getValueType().getVectorNumElements()) && + "Vector element count mismatch!"); if (OpOpcode == ISD::SIGN_EXTEND || OpOpcode == ISD::ZERO_EXTEND) return getNode(OpOpcode, DL, VT, Operand.getNode()->getOperand(0)); break; @@ -2370,8 +2378,12 @@ assert(VT.isInteger() && Operand.getValueType().isInteger() && "Invalid ZERO_EXTEND!"); if (Operand.getValueType() == VT) return Operand; // noop extension - assert(Operand.getValueType().bitsLT(VT) - && "Invalid zext node, dst < src!"); + assert(Operand.getValueType().getScalarType().bitsLT(VT.getScalarType()) && + "Invalid zext node, dst < src!"); + assert((!VT.isVector() || + VT.getVectorNumElements() == + Operand.getValueType().getVectorNumElements()) && + "Vector element count mismatch!"); if (OpOpcode == ISD::ZERO_EXTEND) // (zext (zext x)) -> (zext x) return getNode(ISD::ZERO_EXTEND, DL, VT, Operand.getNode()->getOperand(0)); @@ -2380,8 +2392,12 @@ assert(VT.isInteger() && Operand.getValueType().isInteger() && "Invalid ANY_EXTEND!"); if (Operand.getValueType() == VT) return Operand; // noop extension - assert(Operand.getValueType().bitsLT(VT) - && "Invalid anyext node, dst < src!"); + assert(Operand.getValueType().getScalarType().bitsLT(VT.getScalarType()) && + "Invalid anyext node, dst < src!"); + assert((!VT.isVector() || + VT.getVectorNumElements() == + Operand.getValueType().getVectorNumElements()) && + "Vector element count mismatch!"); if (OpOpcode == ISD::ZERO_EXTEND || OpOpcode == ISD::SIGN_EXTEND) // (ext (zext x)) -> (zext x) and (ext (sext x)) -> (sext x) return getNode(OpOpcode, DL, VT, Operand.getNode()->getOperand(0)); @@ -2390,14 +2406,19 @@ assert(VT.isInteger() && Operand.getValueType().isInteger() && "Invalid TRUNCATE!"); if (Operand.getValueType() == VT) return Operand; // noop truncate - assert(Operand.getValueType().bitsGT(VT) - && "Invalid truncate node, src < dst!"); + assert(Operand.getValueType().getScalarType().bitsGT(VT.getScalarType()) && + "Invalid truncate node, src < dst!"); + assert((!VT.isVector() || + VT.getVectorNumElements() == + Operand.getValueType().getVectorNumElements()) && + "Vector element count mismatch!"); if (OpOpcode == ISD::TRUNCATE) return getNode(ISD::TRUNCATE, DL, VT, Operand.getNode()->getOperand(0)); else if (OpOpcode == ISD::ZERO_EXTEND || OpOpcode == ISD::SIGN_EXTEND || OpOpcode == ISD::ANY_EXTEND) { // If the source is smaller than the dest, we still need an extend. - if (Operand.getNode()->getOperand(0).getValueType().bitsLT(VT)) + if (Operand.getNode()->getOperand(0).getValueType().getScalarType() + .bitsLT(VT.getScalarType())) return getNode(OpOpcode, DL, VT, Operand.getNode()->getOperand(0)); else if (Operand.getNode()->getOperand(0).getValueType().bitsGT(VT)) return getNode(ISD::TRUNCATE, DL, VT, Operand.getNode()->getOperand(0)); @@ -3743,16 +3764,15 @@ assert(VT == MemVT && "Non-extending load from different memory type!"); } else { // Extending load. - if (VT.isVector()) - assert(MemVT.getVectorNumElements() == VT.getVectorNumElements() && - "Invalid vector extload!"); - else - assert(MemVT.bitsLT(VT) && - "Should only be an extending load, not truncating!"); - assert((ExtType == ISD::EXTLOAD || VT.isInteger()) && - "Cannot sign/zero extend a FP/Vector load!"); + assert(MemVT.getScalarType().bitsLT(VT.getScalarType()) && + "Should only be an extending load, not truncating!"); assert(VT.isInteger() == MemVT.isInteger() && "Cannot convert from FP to Int or Int -> FP!"); + assert(VT.isVector() == MemVT.isVector() && + "Cannot use trunc store to convert to or from a vector!"); + assert((!VT.isVector() || + VT.getVectorNumElements() == MemVT.getVectorNumElements()) && + "Cannot use trunc store to change the number of vector elements!"); } bool Indexed = AM != ISD::UNINDEXED; @@ -3885,10 +3905,15 @@ if (VT == SVT) return getStore(Chain, dl, Val, Ptr, MMO); - assert(VT.bitsGT(SVT) && "Not a truncation?"); + assert(SVT.getScalarType().bitsLT(VT.getScalarType()) && + "Should only be a truncating store, not extending!"); assert(VT.isInteger() == SVT.isInteger() && "Can't do FP-INT conversion!"); - + assert(VT.isVector() == SVT.isVector() && + "Cannot use trunc store to convert to or from a vector!"); + assert((!VT.isVector() || + VT.getVectorNumElements() == SVT.getVectorNumElements()) && + "Cannot use trunc store to change the number of vector elements!"); SDVTList VTs = getVTList(MVT::Other); SDValue Undef = getUNDEF(Ptr.getValueType()); Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=91362&r1=91361&r2=91362&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original) +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Mon Dec 14 17:40:38 2009 @@ -596,6 +596,17 @@ setOperationAction(ISD::UINT_TO_FP, (MVT::SimpleValueType)VT, Expand); setOperationAction(ISD::SINT_TO_FP, (MVT::SimpleValueType)VT, Expand); setOperationAction(ISD::SIGN_EXTEND_INREG, (MVT::SimpleValueType)VT,Expand); + setOperationAction(ISD::TRUNCATE, (MVT::SimpleValueType)VT, Expand); + setOperationAction(ISD::SIGN_EXTEND, (MVT::SimpleValueType)VT, Expand); + setOperationAction(ISD::ZERO_EXTEND, (MVT::SimpleValueType)VT, Expand); + setOperationAction(ISD::ANY_EXTEND, (MVT::SimpleValueType)VT, Expand); + for (unsigned InnerVT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; + InnerVT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++InnerVT) + setTruncStoreAction((MVT::SimpleValueType)VT, + (MVT::SimpleValueType)InnerVT, Expand); + setLoadExtAction(ISD::SEXTLOAD, (MVT::SimpleValueType)VT, Expand); + setLoadExtAction(ISD::ZEXTLOAD, (MVT::SimpleValueType)VT, Expand); + setLoadExtAction(ISD::EXTLOAD, (MVT::SimpleValueType)VT, Expand); } // FIXME: In order to prevent SSE instructions being expanded to MMX ones @@ -672,8 +683,6 @@ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i16, Custom); - setTruncStoreAction(MVT::v8i16, MVT::v8i8, Expand); - setOperationAction(ISD::TRUNCATE, MVT::v8i8, Expand); setOperationAction(ISD::SELECT, MVT::v8i8, Promote); setOperationAction(ISD::SELECT, MVT::v4i16, Promote); setOperationAction(ISD::SELECT, MVT::v2i32, Promote); Added: llvm/trunk/test/CodeGen/X86/vec-trunc-store.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec-trunc-store.ll?rev=91362&view=auto ============================================================================== --- llvm/trunk/test/CodeGen/X86/vec-trunc-store.ll (added) +++ llvm/trunk/test/CodeGen/X86/vec-trunc-store.ll Mon Dec 14 17:40:38 2009 @@ -0,0 +1,13 @@ +; RUN: llc < %s -march=x86-64 -disable-mmx | grep punpcklwd | count 2 + +define void @foo() nounwind { + %cti69 = trunc <8 x i32> undef to <8 x i16> ; <<8 x i16>> [#uses=1] + store <8 x i16> %cti69, <8 x i16>* undef + ret void +} + +define void @bar() nounwind { + %cti44 = trunc <4 x i32> undef to <4 x i16> ; <<4 x i16>> [#uses=1] + store <4 x i16> %cti44, <4 x i16>* undef + ret void +} From anton at korobeynikov.info Mon Dec 14 18:01:01 2009 From: anton at korobeynikov.info (Anton Korobeynikov) Date: Tue, 15 Dec 2009 03:01:01 +0300 Subject: [llvm-commits] [llvm-gcc-4.2] r91350 - in /llvm-gcc-4.2/trunk/gcc: llvm-convert.cpp llvm-internal.h In-Reply-To: <200912142240.nBEMeppR020572@zion.cs.uiuc.edu> References: <200912142240.nBEMeppR020572@zion.cs.uiuc.edu> Message-ID: Hell, Jim > - ?EmitMemoryBarrier(true, true, true, true); > +#if defined(TARGET_ARM) && defined(CONFIG_DARWIN_H) > + ?EmitMemoryBarrier(true, true, true, true, false); > +#else > + ?EmitMemoryBarrier(true, true, true, true, true); > +#endif Could this be moved to some target hook? I don't think we'd want a chain of target ifdef's here. Thanks! -- With best regards, Anton Korobeynikov Faculty of Mathematics and Mechanics, Saint Petersburg State University From daniel at zuster.org Mon Dec 14 18:02:42 2009 From: daniel at zuster.org (Daniel Dunbar) Date: Tue, 15 Dec 2009 00:02:42 -0000 Subject: [llvm-commits] [test-suite] r91369 - in /test-suite/trunk/External: Makefile SPEC/CFP2006/Makefile SPEC/CINT2000/Makefile SPEC/CINT2006/Makefile Message-ID: <200912150002.nBF02gac023921@zion.cs.uiuc.edu> Author: ddunbar Date: Mon Dec 14 18:02:42 2009 New Revision: 91369 URL: http://llvm.org/viewvc/llvm-project?rev=91369&view=rev Log: NightlyTest: Implement DISABLE_CXX for externals. Modified: test-suite/trunk/External/Makefile (contents, props changed) test-suite/trunk/External/SPEC/CFP2006/Makefile (contents, props changed) test-suite/trunk/External/SPEC/CINT2000/Makefile (contents, props changed) test-suite/trunk/External/SPEC/CINT2006/Makefile (contents, props changed) Modified: test-suite/trunk/External/Makefile URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/External/Makefile?rev=91369&r1=91368&r2=91369&view=diff ============================================================================== --- test-suite/trunk/External/Makefile (original) +++ test-suite/trunk/External/Makefile Mon Dec 14 18:02:42 2009 @@ -39,4 +39,8 @@ PARALLEL_DIRS := $(filter-out Namd, $(PARALLEL_DIRS)) endif +ifdef DISABLE_CXX +PARALLEL_DIRS := $(filter-out Nurbs, $(PARALLEL_DIRS)) +endif + include Makefile.external Propchange: test-suite/trunk/External/Makefile ------------------------------------------------------------------------------ svn:executable = * Modified: test-suite/trunk/External/SPEC/CFP2006/Makefile URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/External/SPEC/CFP2006/Makefile?rev=91369&r1=91368&r2=91369&view=diff ============================================================================== --- test-suite/trunk/External/SPEC/CFP2006/Makefile (original) +++ test-suite/trunk/External/SPEC/CFP2006/Makefile Mon Dec 14 18:02:42 2009 @@ -50,6 +50,12 @@ # 465.tonto - infinite loop, works at -O0, not at -O2 # 482.sphinx3 - generates control file at run time +ifdef DISABLE_CXX +PARALLEL_DIRS := $(filter-out 444.namd, $(PARALLEL_DIRS)) +PARALLEL_DIRS := $(filter-out 447.dealII, $(PARALLEL_DIRS)) +PARALLEL_DIRS := $(filter-out 450.soplex, $(PARALLEL_DIRS)) +endif + # Get the $(ARCH) setting include $(LEVEL)/Makefile.config Propchange: test-suite/trunk/External/SPEC/CFP2006/Makefile ------------------------------------------------------------------------------ svn:executable = * Modified: test-suite/trunk/External/SPEC/CINT2000/Makefile URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/External/SPEC/CINT2000/Makefile?rev=91369&r1=91368&r2=91369&view=diff ============================================================================== --- test-suite/trunk/External/SPEC/CINT2000/Makefile (original) +++ test-suite/trunk/External/SPEC/CINT2000/Makefile Mon Dec 14 18:02:42 2009 @@ -26,4 +26,8 @@ endif +ifdef DISABLE_CXX +PARALLEL_DIRS := $(filter-out 252.eon, $(PARALLEL_DIRS)) +endif + include $(LEVEL)/Makefile.programs Propchange: test-suite/trunk/External/SPEC/CINT2000/Makefile ------------------------------------------------------------------------------ svn:executable = * Modified: test-suite/trunk/External/SPEC/CINT2006/Makefile URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/External/SPEC/CINT2006/Makefile?rev=91369&r1=91368&r2=91369&view=diff ============================================================================== --- test-suite/trunk/External/SPEC/CINT2006/Makefile (original) +++ test-suite/trunk/External/SPEC/CINT2006/Makefile Mon Dec 14 18:02:42 2009 @@ -24,6 +24,11 @@ 473.astar endif +ifdef DISABLE_CXX +PARALLEL_DIRS := $(filter-out 471.omnetpp, $(PARALLEL_DIRS)) +PARALLEL_DIRS := $(filter-out 483.xalancbmk, $(PARALLEL_DIRS)) +endif + # Get the $(ARCH) setting include $(LEVEL)/Makefile.config Propchange: test-suite/trunk/External/SPEC/CINT2006/Makefile ------------------------------------------------------------------------------ svn:executable = * From grosbach at apple.com Mon Dec 14 18:12:36 2009 From: grosbach at apple.com (Jim Grosbach) Date: Tue, 15 Dec 2009 00:12:36 -0000 Subject: [llvm-commits] [llvm] r91371 - /llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp Message-ID: <200912150012.nBF0Ca4W024302@zion.cs.uiuc.edu> Author: grosbach Date: Mon Dec 14 18:12:35 2009 New Revision: 91371 URL: http://llvm.org/viewvc/llvm-project?rev=91371&view=rev Log: nand atomic requires opposite operand ordering Modified: llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp Modified: llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp?rev=91371&r1=91370&r2=91371&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp (original) +++ llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp Mon Dec 14 18:12:35 2009 @@ -3190,9 +3190,15 @@ // fallthrough --> exitMBB BB = loopMBB; AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr)); - if (BinOpcode) - AddDefaultPred(BuildMI(BB, dl, TII->get(BinOpcode), scratch2). - addReg(dest).addReg(incr)).addReg(0); + if (BinOpcode) { + // operand order needs to go the other way for NAND + if (BinOpcode == ARM::BICrr || BinOpcode == ARM::t2BICrr) + AddDefaultPred(BuildMI(BB, dl, TII->get(BinOpcode), scratch2). + addReg(incr).addReg(dest)).addReg(0); + else + AddDefaultPred(BuildMI(BB, dl, TII->get(BinOpcode), scratch2). + addReg(dest).addReg(incr)).addReg(0); + } AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(scratch2) .addReg(ptr)); From daniel at zuster.org Mon Dec 14 18:35:46 2009 From: daniel at zuster.org (Daniel Dunbar) Date: Tue, 15 Dec 2009 00:35:46 -0000 Subject: [llvm-commits] [test-suite] r91375 - in /test-suite/trunk/MultiSource/Benchmarks: Prolangs-C/TimberWolfMC/main.c Prolangs-C/TimberWolfMC/mt.c Prolangs-C/archie-client/archie.c Prolangs-C/bison/main.c Prolangs-C/fixoutput/fixoutput.c Prolangs-C/unix-tbl/t1.c Ptrdist/ft/ft.c Ptrdist/ks/KS-2.c Ptrdist/ks/KS.h Message-ID: <200912150035.nBF0ZkE7025270@zion.cs.uiuc.edu> Author: ddunbar Date: Mon Dec 14 18:35:45 2009 New Revision: 91375 URL: http://llvm.org/viewvc/llvm-project?rev=91375&view=rev Log: Fix uses of 'void main ...', clang doesn't like this. Modified: test-suite/trunk/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/main.c test-suite/trunk/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/mt.c test-suite/trunk/MultiSource/Benchmarks/Prolangs-C/archie-client/archie.c test-suite/trunk/MultiSource/Benchmarks/Prolangs-C/bison/main.c test-suite/trunk/MultiSource/Benchmarks/Prolangs-C/fixoutput/fixoutput.c test-suite/trunk/MultiSource/Benchmarks/Prolangs-C/unix-tbl/t1.c test-suite/trunk/MultiSource/Benchmarks/Ptrdist/ft/ft.c test-suite/trunk/MultiSource/Benchmarks/Ptrdist/ks/KS-2.c test-suite/trunk/MultiSource/Benchmarks/Ptrdist/ks/KS.h Modified: test-suite/trunk/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/main.c URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/main.c?rev=91375&r1=91374&r2=91375&view=diff ============================================================================== --- test-suite/trunk/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/main.c (original) +++ test-suite/trunk/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/main.c Mon Dec 14 18:35:45 2009 @@ -51,7 +51,7 @@ void TW_oldinput( FILE *fp ); void prepSpots(void); -void main( int argc , char *argv[]) +int main( int argc , char *argv[]) { FILE *fp , *fopen() ; @@ -330,6 +330,7 @@ fprintf(fpo,"\n\n************************************ \n\n"); fclose(fpo); exit(0); +return 0; } Modified: test-suite/trunk/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/mt.c URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/mt.c?rev=91375&r1=91374&r2=91375&view=diff ============================================================================== --- test-suite/trunk/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/mt.c (original) +++ test-suite/trunk/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/mt.c Mon Dec 14 18:35:45 2009 @@ -406,7 +406,7 @@ } #ifdef TEST1 -void main(void) +int main(void) { int selection; MT *t; @@ -475,5 +475,6 @@ default: printf("What?\n"); break; } } + return 0; } #endif Modified: test-suite/trunk/MultiSource/Benchmarks/Prolangs-C/archie-client/archie.c URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Benchmarks/Prolangs-C/archie-client/archie.c?rev=91375&r1=91374&r2=91375&view=diff ============================================================================== --- test-suite/trunk/MultiSource/Benchmarks/Prolangs-C/archie-client/archie.c (original) +++ test-suite/trunk/MultiSource/Benchmarks/Prolangs-C/archie-client/archie.c Mon Dec 14 18:35:45 2009 @@ -63,7 +63,7 @@ #define ARCHIE_HOST "archie.rutgers.edu" -void main(int argc,char *argv[]) +int main(int argc,char *argv[]) { char *cur_arg; char qtype = '='; /* Default to exact string match */ @@ -287,4 +287,5 @@ netshut(); #endif exit(0); + return 0; } Modified: test-suite/trunk/MultiSource/Benchmarks/Prolangs-C/bison/main.c URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Benchmarks/Prolangs-C/bison/main.c?rev=91375&r1=91374&r2=91375&view=diff ============================================================================== --- test-suite/trunk/MultiSource/Benchmarks/Prolangs-C/bison/main.c (original) +++ test-suite/trunk/MultiSource/Benchmarks/Prolangs-C/bison/main.c Mon Dec 14 18:35:45 2009 @@ -40,7 +40,7 @@ extern void output(void); extern void done(int k); -void main(int argc,char *argv[]) +int main(int argc,char *argv[]) { failure = 0; lineno = 0; @@ -78,6 +78,7 @@ /* output the tables and the parser to ftable. In file output. */ output(); done(failure); + return 0; } /* functions to report errors which prevent a parser from being generated */ Modified: test-suite/trunk/MultiSource/Benchmarks/Prolangs-C/fixoutput/fixoutput.c URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Benchmarks/Prolangs-C/fixoutput/fixoutput.c?rev=91375&r1=91374&r2=91375&view=diff ============================================================================== --- test-suite/trunk/MultiSource/Benchmarks/Prolangs-C/fixoutput/fixoutput.c (original) +++ test-suite/trunk/MultiSource/Benchmarks/Prolangs-C/fixoutput/fixoutput.c Mon Dec 14 18:35:45 2009 @@ -1,4 +1,5 @@ + #include "scanner.h" /* Useful Macro Definitions */ #include @@ -39,7 +40,7 @@ EOF_FLAG = 0; } -void main(void) +int main(void) { int CODE; /* TOKEN CODE */ int I1,I2; /* temp storage */ @@ -318,4 +319,5 @@ printf("****** %d format errors found in output of your scanner.\n", ERROR_CNT); exit(0); + return 0; } Modified: test-suite/trunk/MultiSource/Benchmarks/Prolangs-C/unix-tbl/t1.c URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Benchmarks/Prolangs-C/unix-tbl/t1.c?rev=91375&r1=91374&r2=91375&view=diff ============================================================================== --- test-suite/trunk/MultiSource/Benchmarks/Prolangs-C/unix-tbl/t1.c (original) +++ test-suite/trunk/MultiSource/Benchmarks/Prolangs-C/unix-tbl/t1.c Mon Dec 14 18:35:45 2009 @@ -33,7 +33,7 @@ int swapin(void); -void main(int argc,char *argv[]) +int main(int argc,char *argv[]) { extern init_options(); # ifdef unix @@ -45,6 +45,7 @@ # endif init_options(); exit(tbl(argc,argv)); +return 0; } Modified: test-suite/trunk/MultiSource/Benchmarks/Ptrdist/ft/ft.c URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Benchmarks/Ptrdist/ft/ft.c?rev=91375&r1=91374&r2=91375&view=diff ============================================================================== --- test-suite/trunk/MultiSource/Benchmarks/Ptrdist/ft/ft.c (original) +++ test-suite/trunk/MultiSource/Benchmarks/Ptrdist/ft/ft.c Mon Dec 14 18:35:45 2009 @@ -65,7 +65,7 @@ */ int debug = 1; -void +int main(int argc, char *argv[]) { int nVertex; @@ -119,6 +119,7 @@ PrintHeapSize(stderr); #endif /* PLUS_STATS */ exit(0); + return 0; } Vertices * Modified: test-suite/trunk/MultiSource/Benchmarks/Ptrdist/ks/KS-2.c URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Benchmarks/Ptrdist/ks/KS-2.c?rev=91375&r1=91374&r2=91375&view=diff ============================================================================== --- test-suite/trunk/MultiSource/Benchmarks/Ptrdist/ks/KS-2.c (original) +++ test-suite/trunk/MultiSource/Benchmarks/Ptrdist/ks/KS-2.c Mon Dec 14 18:35:45 2009 @@ -323,7 +323,7 @@ netStats[i].edgesCut, netStats[i].netsCut); } -void +int main(int argc, char **argv) { unsigned long p, iMax; @@ -405,4 +405,5 @@ PrintHeapSize(stderr); #endif /* PLUS_STATS */ exit(0); + return 0; } Modified: test-suite/trunk/MultiSource/Benchmarks/Ptrdist/ks/KS.h URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Benchmarks/Ptrdist/ks/KS.h?rev=91375&r1=91374&r2=91375&view=diff ============================================================================== --- test-suite/trunk/MultiSource/Benchmarks/Ptrdist/ks/KS.h (original) +++ test-suite/trunk/MultiSource/Benchmarks/Ptrdist/ks/KS.h Mon Dec 14 18:35:45 2009 @@ -95,4 +95,4 @@ float FindMaxGpAndSwap(); void SwapSubsetAndReset(unsigned long iMax); void PrintResults(int verbose); -void main(int argc, char **argv); +int main(int argc, char **argv); From grosbach at apple.com Mon Dec 14 18:36:25 2009 From: grosbach at apple.com (Jim Grosbach) Date: Mon, 14 Dec 2009 16:36:25 -0800 Subject: [llvm-commits] [llvm-gcc-4.2] r91350 - in /llvm-gcc-4.2/trunk/gcc: llvm-convert.cpp llvm-internal.h In-Reply-To: References: <200912142240.nBEMeppR020572@zion.cs.uiuc.edu> Message-ID: On Dec 14, 2009, at 4:01 PM, Anton Korobeynikov wrote: >> - EmitMemoryBarrier(true, true, true, true); >> +#if defined(TARGET_ARM) && defined(CONFIG_DARWIN_H) >> + EmitMemoryBarrier(true, true, true, true, false); >> +#else >> + EmitMemoryBarrier(true, true, true, true, true); >> +#endif > Could this be moved to some target hook? I don't think we'd want a > chain of target ifdef's here. > Maybe? I agree having a bunch of ifdef stuff there is pretty ugly. I'm not sure how best to do target-specific bits in the front end, though, so I added these bits in the same manner the current PowerPC-specific portions are done. Likewise, the whole sequence for the atomics is conditionalized on whether the target is known to support the intrinsics, which is suboptimal. Much better would be to always create the intrinsics, then the legalizer (probably) could check if the back end explicitly supports them, and insert the library support calls if it doesn't. That sort of refactoring is beyond the scope of what I was looking to do here, though, so I just stayed with the current scheme. I'd wholeheartedly support it if someone with more frontend expertise than I wanted to make something like that happen, though. -Jim From isanbard at gmail.com Mon Dec 14 18:39:25 2009 From: isanbard at gmail.com (Bill Wendling) Date: Tue, 15 Dec 2009 00:39:25 -0000 Subject: [llvm-commits] [llvm] r91376 - in /llvm/trunk: include/llvm/CodeGen/MachineBasicBlock.h lib/CodeGen/BranchFolding.cpp lib/CodeGen/MachineBasicBlock.cpp Message-ID: <200912150039.nBF0dPpY025409@zion.cs.uiuc.edu> Author: void Date: Mon Dec 14 18:39:24 2009 New Revision: 91376 URL: http://llvm.org/viewvc/llvm-project?rev=91376&view=rev Log: Revert these. They may have been causing 483_xalancbmk to fail: $ svn merge -c -91161 https://llvm.org/svn/llvm-project/llvm/trunk --- Reverse-merging r91161 into '.': U lib/CodeGen/BranchFolding.cpp U lib/CodeGen/MachineBasicBlock.cpp $ svn merge -c -91113 https://llvm.org/svn/llvm-project/llvm/trunk --- Reverse-merging r91113 into '.': G lib/CodeGen/MachineBasicBlock.cpp $ svn merge -c -91101 https://llvm.org/svn/llvm-project/llvm/trunk --- Reverse-merging r91101 into '.': U include/llvm/CodeGen/MachineBasicBlock.h G lib/CodeGen/MachineBasicBlock.cpp $ svn merge -c -91092 https://llvm.org/svn/llvm-project/llvm/trunk --- Reverse-merging r91092 into '.': G include/llvm/CodeGen/MachineBasicBlock.h G lib/CodeGen/MachineBasicBlock.cpp Modified: llvm/trunk/include/llvm/CodeGen/MachineBasicBlock.h llvm/trunk/lib/CodeGen/BranchFolding.cpp llvm/trunk/lib/CodeGen/MachineBasicBlock.cpp Modified: llvm/trunk/include/llvm/CodeGen/MachineBasicBlock.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/MachineBasicBlock.h?rev=91376&r1=91375&r2=91376&view=diff ============================================================================== --- llvm/trunk/include/llvm/CodeGen/MachineBasicBlock.h (original) +++ llvm/trunk/include/llvm/CodeGen/MachineBasicBlock.h Mon Dec 14 18:39:24 2009 @@ -327,11 +327,6 @@ /// 'Old', change the code and CFG so that it branches to 'New' instead. void ReplaceUsesOfBlockWith(MachineBasicBlock *Old, MachineBasicBlock *New); - /// BranchesToLandingPad - The basic block is a landing pad or branches only - /// to a landing pad. No other instructions are present other than the - /// unconditional branch. - bool BranchesToLandingPad(const MachineBasicBlock *MBB) const; - /// CorrectExtraCFGEdges - Various pieces of code can cause excess edges in /// the CFG to be inserted. If we have proven that MBB can only branch to /// DestA and DestB, remove any other MBB successors from the CFG. DestA and Modified: llvm/trunk/lib/CodeGen/BranchFolding.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/BranchFolding.cpp?rev=91376&r1=91375&r2=91376&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/BranchFolding.cpp (original) +++ llvm/trunk/lib/CodeGen/BranchFolding.cpp Mon Dec 14 18:39:24 2009 @@ -1205,11 +1205,11 @@ } } - // If the prior block doesn't fall through into this block and if this block - // doesn't fall through into some other block and it's not branching only to a - // landing pad, then see if we can find a place to move this block where a - // fall-through will happen. - if (!PrevBB.canFallThrough() && !MBB->BranchesToLandingPad(MBB)) { + // If the prior block doesn't fall through into this block, and if this + // block doesn't fall through into some other block, see if we can find a + // place to move this block where a fall-through will happen. + if (!PrevBB.canFallThrough()) { + // Now we know that there was no fall-through into this block, check to // see if it has a fall-through into its successor. bool CurFallsThru = MBB->canFallThrough(); @@ -1221,32 +1221,28 @@ E = MBB->pred_end(); PI != E; ++PI) { // Analyze the branch at the end of the pred. MachineBasicBlock *PredBB = *PI; - MachineFunction::iterator PredNextBB = PredBB; ++PredNextBB; + MachineFunction::iterator PredFallthrough = PredBB; ++PredFallthrough; MachineBasicBlock *PredTBB, *PredFBB; SmallVector PredCond; - if (PredBB != MBB && !PredBB->canFallThrough() - && !TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true) + if (PredBB != MBB && !PredBB->canFallThrough() && + !TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true) && (!CurFallsThru || !CurTBB || !CurFBB) && (!CurFallsThru || MBB->getNumber() >= PredBB->getNumber())) { - // If the current block doesn't fall through, just move it. If the - // current block can fall through and does not end with a conditional - // branch, we need to append an unconditional jump to the (current) - // next block. To avoid a possible compile-time infinite loop, move - // blocks only backward in this case. - // - // Also, if there are already 2 branches here, we cannot add a third. - // I.e. we have the case: - // - // Bcc next - // B elsewhere - // next: + // If the current block doesn't fall through, just move it. + // If the current block can fall through and does not end with a + // conditional branch, we need to append an unconditional jump to + // the (current) next block. To avoid a possible compile-time + // infinite loop, move blocks only backward in this case. + // Also, if there are already 2 branches here, we cannot add a third; + // this means we have the case + // Bcc next + // B elsewhere + // next: if (CurFallsThru) { - MachineBasicBlock *NextBB = - llvm::next(MachineFunction::iterator(MBB)); + MachineBasicBlock *NextBB = llvm::next(MachineFunction::iterator(MBB)); CurCond.clear(); TII->InsertBranch(*MBB, NextBB, 0, CurCond); } - MBB->moveAfter(PredBB); MadeChange = true; goto ReoptimizeBlock; Modified: llvm/trunk/lib/CodeGen/MachineBasicBlock.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/MachineBasicBlock.cpp?rev=91376&r1=91375&r2=91376&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/MachineBasicBlock.cpp (original) +++ llvm/trunk/lib/CodeGen/MachineBasicBlock.cpp Mon Dec 14 18:39:24 2009 @@ -13,16 +13,15 @@ #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/BasicBlock.h" -#include "llvm/ADT/SmallSet.h" -#include "llvm/Assembly/Writer.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetInstrDesc.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Support/LeakDetector.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Assembly/Writer.h" #include using namespace llvm; @@ -449,28 +448,10 @@ addSuccessor(New); } -/// BranchesToLandingPad - The basic block is a landing pad or branches only to -/// a landing pad. No other instructions are present other than the -/// unconditional branch. -bool -MachineBasicBlock::BranchesToLandingPad(const MachineBasicBlock *MBB) const { - SmallSet Visited; - const MachineBasicBlock *CurMBB = MBB; - - while (!CurMBB->isLandingPad()) { - if (CurMBB->succ_size() != 1) break; - if (!Visited.insert(CurMBB)) break; - CurMBB = *CurMBB->succ_begin(); - } - - return CurMBB->isLandingPad(); -} - /// CorrectExtraCFGEdges - Various pieces of code can cause excess edges in the /// CFG to be inserted. If we have proven that MBB can only branch to DestA and /// DestB, remove any other MBB successors from the CFG. DestA and DestB can /// be null. -/// /// Besides DestA and DestB, retain other edges leading to LandingPads /// (currently there can be only one; we don't check or require that here). /// Note it is possible that DestA and/or DestB are LandingPads. @@ -500,17 +481,16 @@ } MachineBasicBlock::succ_iterator SI = succ_begin(); - const MachineBasicBlock *OrigDestA = DestA, *OrigDestB = DestB; + MachineBasicBlock *OrigDestA = DestA, *OrigDestB = DestB; while (SI != succ_end()) { - const MachineBasicBlock *MBB = *SI; - if (MBB == DestA) { + if (*SI == DestA) { DestA = 0; ++SI; - } else if (MBB == DestB) { + } else if (*SI == DestB) { DestB = 0; ++SI; - } else if (MBB != OrigDestA && MBB != OrigDestB && - BranchesToLandingPad(MBB)) { + } else if ((*SI)->isLandingPad() && + *SI!=OrigDestA && *SI!=OrigDestB) { ++SI; } else { // Otherwise, this is a superfluous edge, remove it. @@ -518,14 +498,12 @@ MadeChange = true; } } - if (!AddedFallThrough) { assert(DestA == 0 && DestB == 0 && "MachineCFG is missing edges!"); } else if (isCond) { assert(DestA == 0 && "MachineCFG is missing edges!"); } - return MadeChange; } From echristo at apple.com Mon Dec 14 18:40:55 2009 From: echristo at apple.com (Eric Christopher) Date: Tue, 15 Dec 2009 00:40:55 -0000 Subject: [llvm-commits] [llvm] r91377 - /llvm/trunk/lib/ExecutionEngine/JIT/JIT.cpp Message-ID: <200912150040.nBF0et2w025465@zion.cs.uiuc.edu> Author: echristo Date: Mon Dec 14 18:40:55 2009 New Revision: 91377 URL: http://llvm.org/viewvc/llvm-project?rev=91377&view=rev Log: Formatting. Modified: llvm/trunk/lib/ExecutionEngine/JIT/JIT.cpp Modified: llvm/trunk/lib/ExecutionEngine/JIT/JIT.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/ExecutionEngine/JIT/JIT.cpp?rev=91377&r1=91376&r2=91377&view=diff ============================================================================== --- llvm/trunk/lib/ExecutionEngine/JIT/JIT.cpp (original) +++ llvm/trunk/lib/ExecutionEngine/JIT/JIT.cpp Mon Dec 14 18:40:55 2009 @@ -208,7 +208,7 @@ JITMemoryManager *JMM, CodeGenOpt::Level OptLevel, bool GVsWithCode, - CodeModel::Model CMM) { + CodeModel::Model CMM) { // Make sure we can resolve symbols in the program as well. The zero arg // to the function tells DynamicLibrary to load the program, not a library. if (sys::DynamicLibrary::LoadLibraryPermanently(0, ErrorStr)) From evan.cheng at apple.com Mon Dec 14 18:41:36 2009 From: evan.cheng at apple.com (Evan Cheng) Date: Tue, 15 Dec 2009 00:41:36 -0000 Subject: [llvm-commits] [llvm] r91378 - in /llvm/trunk: lib/CodeGen/SelectionDAG/DAGCombiner.cpp test/CodeGen/X86/setcc.ll test/CodeGen/X86/zext-shl.ll Message-ID: <200912150041.nBF0faC8025493@zion.cs.uiuc.edu> Author: evancheng Date: Mon Dec 14 18:41:36 2009 New Revision: 91378 URL: http://llvm.org/viewvc/llvm-project?rev=91378&view=rev Log: Propagate zest through logical shift. Added: llvm/trunk/test/CodeGen/X86/setcc.ll llvm/trunk/test/CodeGen/X86/zext-shl.ll Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=91378&r1=91377&r2=91378&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original) +++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Mon Dec 14 18:41:36 2009 @@ -3278,6 +3278,16 @@ if (SCC.getNode()) return SCC; } + // (zext (shl (zext x), y)) -> (shl (zext x), (zext y)) + if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) && + N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND && + N0.hasOneUse()) { + DebugLoc dl = N->getDebugLoc(); + return DAG.getNode(N0.getOpcode(), dl, VT, + DAG.getNode(ISD::ZERO_EXTEND, dl, VT, N0.getOperand(0)), + DAG.getNode(ISD::ZERO_EXTEND, dl, VT, N0.getOperand(1))); + } + return SDValue(); } Added: llvm/trunk/test/CodeGen/X86/setcc.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/setcc.ll?rev=91378&view=auto ============================================================================== --- llvm/trunk/test/CodeGen/X86/setcc.ll (added) +++ llvm/trunk/test/CodeGen/X86/setcc.ll Mon Dec 14 18:41:36 2009 @@ -0,0 +1,13 @@ +; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s + +define zeroext i16 @t1(i16 zeroext %x) nounwind readnone ssp { +entry: +; CHECK: t1: +; CHECK: seta %al +; CHECK: movzbl %al, %eax +; CHECK: shll $5, %eax + %0 = icmp ugt i16 %x, 26 ; [#uses=1] + %iftmp.1.0 = select i1 %0, i16 32, i16 0 ; [#uses=1] + ret i16 %iftmp.1.0 +} + Added: llvm/trunk/test/CodeGen/X86/zext-shl.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/zext-shl.ll?rev=91378&view=auto ============================================================================== --- llvm/trunk/test/CodeGen/X86/zext-shl.ll (added) +++ llvm/trunk/test/CodeGen/X86/zext-shl.ll Mon Dec 14 18:41:36 2009 @@ -0,0 +1,38 @@ +; RUN: llc < %s -march=x86 | FileCheck %s + +define i32 @t1(i8 zeroext %x) nounwind readnone ssp { +entry: +; CHECK: t1: +; CHECK: shll +; CHECK-NOT: movzwl +; CHECK: ret + %0 = zext i8 %x to i16 + %1 = shl i16 %0, 5 + %2 = zext i16 %1 to i32 + ret i32 %2 +} + +define i32 @t2(i8 zeroext %x) nounwind readnone ssp { +entry: +; CHECK: t2: +; CHECK: shrl +; CHECK-NOT: movzwl +; CHECK: ret + %0 = zext i8 %x to i16 + %1 = lshr i16 %0, 3 + %2 = zext i16 %1 to i32 + ret i32 %2 +} + +define i32 @t3(i8 zeroext %x, i8 zeroext %y) nounwind readnone ssp { +entry: +; CHECK: t3: +; CHECK: shll +; CHECK-NOT: movzwl +; CHECK: ret + %0 = zext i8 %x to i16 + %1 = zext i8 %y to i16 + %2 = shl i16 %0, %1 + %3 = zext i16 %2 to i32 + ret i32 %3 +} From daniel at zuster.org Mon Dec 14 18:41:47 2009 From: daniel at zuster.org (Daniel Dunbar) Date: Tue, 15 Dec 2009 00:41:47 -0000 Subject: [llvm-commits] [llvm] r91379 - /llvm/trunk/utils/NewNightlyTest.pl Message-ID: <200912150041.nBF0flOX025512@zion.cs.uiuc.edu> Author: ddunbar Date: Mon Dec 14 18:41:47 2009 New Revision: 91379 URL: http://llvm.org/viewvc/llvm-project?rev=91379&view=rev Log: NNT: Make sure stderr for build commands goes to log file, as intended but misdirected. Modified: llvm/trunk/utils/NewNightlyTest.pl Modified: llvm/trunk/utils/NewNightlyTest.pl URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/utils/NewNightlyTest.pl?rev=91379&r1=91378&r2=91379&view=diff ============================================================================== --- llvm/trunk/utils/NewNightlyTest.pl (original) +++ llvm/trunk/utils/NewNightlyTest.pl Mon Dec 14 18:41:47 2009 @@ -317,9 +317,9 @@ } else { if ($VERBOSE) { print "$Title\n"; - print "$Command 2>&1 > $Log\n"; + print "$Command > $Log 2>&1\n"; } - system "$Command 2>&1 > $Log"; + system "$Command > $Log 2>&1"; } } @@ -336,9 +336,9 @@ } else { if ($VERBOSE) { print "$Title\n"; - print "$Command 2>&1 > $Log\n"; + print "$Command >> $Log 2>&1\n"; } - system "$Command 2>&1 >> $Log"; + system "$Command >> $Log 2>&1"; } } From evan.cheng at apple.com Mon Dec 14 18:52:11 2009 From: evan.cheng at apple.com (Evan Cheng) Date: Tue, 15 Dec 2009 00:52:11 -0000 Subject: [llvm-commits] [llvm] r91380 - in /llvm/trunk: lib/CodeGen/SelectionDAG/DAGCombiner.cpp test/CodeGen/CellSPU/and_ops.ll Message-ID: <200912150052.nBF0qBhN025917@zion.cs.uiuc.edu> Author: evancheng Date: Mon Dec 14 18:52:11 2009 New Revision: 91380 URL: http://llvm.org/viewvc/llvm-project?rev=91380&view=rev Log: Fold (zext (and x, cst)) -> (and (zext x), cst). Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp llvm/trunk/test/CodeGen/CellSPU/and_ops.ll Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=91380&r1=91379&r2=91380&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original) +++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Mon Dec 14 18:52:11 2009 @@ -3202,6 +3202,19 @@ X, DAG.getConstant(Mask, VT)); } + // Fold (zext (and x, cst)) -> (and (zext x), cst) + if (N0.getOpcode() == ISD::AND && + N0.getOperand(1).getOpcode() == ISD::Constant && + N0.getOperand(0).getOpcode() != ISD::TRUNCATE && + N0.getOperand(0).hasOneUse()) { + APInt Mask = cast(N0.getOperand(1))->getAPIntValue(); + Mask.zext(VT.getSizeInBits()); + return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, + DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, + N0.getOperand(0)), + DAG.getConstant(Mask, VT)); + } + // fold (zext (load x)) -> (zext (truncate (zextload x))) if (ISD::isNON_EXTLoad(N0.getNode()) && ((!LegalOperations && !cast(N0)->isVolatile()) || Modified: llvm/trunk/test/CodeGen/CellSPU/and_ops.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/CellSPU/and_ops.ll?rev=91380&r1=91379&r2=91380&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/CellSPU/and_ops.ll (original) +++ llvm/trunk/test/CodeGen/CellSPU/and_ops.ll Mon Dec 14 18:52:11 2009 @@ -1,9 +1,9 @@ ; RUN: llc < %s -march=cellspu > %t1.s -; RUN: grep and %t1.s | count 234 +; RUN: grep and %t1.s | count 230 ; RUN: grep andc %t1.s | count 85 -; RUN: grep andi %t1.s | count 37 -; RUN: grep andhi %t1.s | count 30 -; RUN: grep andbi %t1.s | count 4 +; RUN: grep andi %t1.s | count 39 +; RUN: grep andhi %t1.s | count 28 +; RUN: grep andbi %t1.s | count 2 target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" target triple = "spu" From evan.cheng at apple.com Mon Dec 14 18:53:42 2009 From: evan.cheng at apple.com (Evan Cheng) Date: Tue, 15 Dec 2009 00:53:42 -0000 Subject: [llvm-commits] [llvm] r91381 - in /llvm/trunk: lib/Target/X86/X86ISelLowering.cpp lib/Target/X86/X86ISelLowering.h lib/Target/X86/X86Instr64bit.td lib/Target/X86/X86InstrInfo.td test/CodeGen/X86/setcc.ll Message-ID: <200912150053.nBF0rhfK025969@zion.cs.uiuc.edu> Author: evancheng Date: Mon Dec 14 18:53:42 2009 New Revision: 91381 URL: http://llvm.org/viewvc/llvm-project?rev=91381&view=rev Log: Use sbb x, x to materialize carry bit in a GPR. The result is all one's or all zero's. Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp llvm/trunk/lib/Target/X86/X86ISelLowering.h llvm/trunk/lib/Target/X86/X86Instr64bit.td llvm/trunk/lib/Target/X86/X86InstrInfo.td llvm/trunk/test/CodeGen/X86/setcc.ll Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=91381&r1=91380&r2=91381&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original) +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Mon Dec 14 18:53:42 2009 @@ -5750,6 +5750,15 @@ return SDValue(); SDValue Cond = EmitCmp(Op0, Op1, X86CC, DAG); + + // Use sbb x, x to materialize carry bit into a GPR. + if (X86CC == X86::COND_B) { + return DAG.getNode(ISD::AND, dl, MVT::i8, + DAG.getNode(X86ISD::SETCC_CARRY, dl, MVT::i8, + DAG.getConstant(X86CC, MVT::i8), Cond), + DAG.getConstant(1, MVT::i8)); + } + return DAG.getNode(X86ISD::SETCC, dl, MVT::i8, DAG.getConstant(X86CC, MVT::i8), Cond); } @@ -5902,9 +5911,18 @@ Cond = NewCond; } + // Look pass (and (setcc_carry (cmp ...)), 1). + if (Cond.getOpcode() == ISD::AND && + Cond.getOperand(0).getOpcode() == X86ISD::SETCC_CARRY) { + ConstantSDNode *C = dyn_cast(Cond.getOperand(1)); + if (C && C->getAPIntValue() == 1) + Cond = Cond.getOperand(0); + } + // If condition flag is set by a X86ISD::CMP, then use it as the condition // setting operand in place of the X86ISD::SETCC. - if (Cond.getOpcode() == X86ISD::SETCC) { + if (Cond.getOpcode() == X86ISD::SETCC || + Cond.getOpcode() == X86ISD::SETCC_CARRY) { CC = Cond.getOperand(0); SDValue Cmp = Cond.getOperand(1); @@ -5987,9 +6005,18 @@ Cond = LowerXALUO(Cond, DAG); #endif + // Look pass (and (setcc_carry (cmp ...)), 1). + if (Cond.getOpcode() == ISD::AND && + Cond.getOperand(0).getOpcode() == X86ISD::SETCC_CARRY) { + ConstantSDNode *C = dyn_cast(Cond.getOperand(1)); + if (C && C->getAPIntValue() == 1) + Cond = Cond.getOperand(0); + } + // If condition flag is set by a X86ISD::CMP, then use it as the condition // setting operand in place of the X86ISD::SETCC. - if (Cond.getOpcode() == X86ISD::SETCC) { + if (Cond.getOpcode() == X86ISD::SETCC || + Cond.getOpcode() == X86ISD::SETCC_CARRY) { CC = Cond.getOperand(0); SDValue Cmp = Cond.getOperand(1); @@ -7376,6 +7403,7 @@ case X86ISD::COMI: return "X86ISD::COMI"; case X86ISD::UCOMI: return "X86ISD::UCOMI"; case X86ISD::SETCC: return "X86ISD::SETCC"; + case X86ISD::SETCC_CARRY: return "X86ISD::SETCC_CARRY"; case X86ISD::CMOV: return "X86ISD::CMOV"; case X86ISD::BRCOND: return "X86ISD::BRCOND"; case X86ISD::RET_FLAG: return "X86ISD::RET_FLAG"; @@ -8950,11 +8978,42 @@ return SDValue(); } +static SDValue PerformSHLCombine(SDNode *N, SelectionDAG &DAG) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + ConstantSDNode *N1C = dyn_cast(N1); + EVT VT = N0.getValueType(); + + // fold (shl (and (setcc_c), c1), c2) -> (and setcc_c, (c1 << c2)) + // since the result of setcc_c is all zero's or all ones. + if (N1C && N0.getOpcode() == ISD::AND && + N0.getOperand(1).getOpcode() == ISD::Constant) { + SDValue N00 = N0.getOperand(0); + if (N00.getOpcode() == X86ISD::SETCC_CARRY || + ((N00.getOpcode() == ISD::ANY_EXTEND || + N00.getOpcode() == ISD::ZERO_EXTEND) && + N00.getOperand(0).getOpcode() == X86ISD::SETCC_CARRY)) { + APInt Mask = cast(N0.getOperand(1))->getAPIntValue(); + APInt ShAmt = N1C->getAPIntValue(); + Mask = Mask.shl(ShAmt); + if (Mask != 0) + return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, + N00, DAG.getConstant(Mask, VT)); + } + } + + return SDValue(); +} /// PerformShiftCombine - Transforms vector shift nodes to use vector shifts /// when possible. static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG, const X86Subtarget *Subtarget) { + EVT VT = N->getValueType(0); + if (!VT.isVector() && VT.isInteger() && + N->getOpcode() == ISD::SHL) + return PerformSHLCombine(N, DAG); + // On X86 with SSE2 support, we can transform this to a vector shift if // all elements are shifted by the same amount. We can't do this in legalize // because the a constant vector is typically transformed to a constant pool @@ -8962,7 +9021,6 @@ if (!Subtarget->hasSSE2()) return SDValue(); - EVT VT = N->getValueType(0); if (VT != MVT::v2i64 && VT != MVT::v4i32 && VT != MVT::v8i16) return SDValue(); Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.h?rev=91381&r1=91380&r2=91381&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.h (original) +++ llvm/trunk/lib/Target/X86/X86ISelLowering.h Mon Dec 14 18:53:42 2009 @@ -118,6 +118,10 @@ /// operand produced by a CMP instruction. SETCC, + // Same as SETCC except it's materialized with a sbb and the value is all + // one's or all zero's. + SETCC_CARRY, + /// X86 conditional moves. Operand 0 and operand 1 are the two values /// to select from. Operand 2 is the condition code, and operand 3 is the /// flag operand produced by a CMP or TEST instruction. It also writes a Modified: llvm/trunk/lib/Target/X86/X86Instr64bit.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86Instr64bit.td?rev=91381&r1=91380&r2=91381&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86Instr64bit.td (original) +++ llvm/trunk/lib/Target/X86/X86Instr64bit.td Mon Dec 14 18:53:42 2009 @@ -1333,6 +1333,15 @@ X86_COND_NO, EFLAGS))]>, TB; } // isTwoAddress +// Use sbb to materialize carry flag into a GPR. +let Defs = [EFLAGS], Uses = [EFLAGS], isCodeGenOnly = 1 in +def SETB_C64r : RI<0x19, MRMDestReg, (outs GR64:$dst), (ins), + "sbb{q}\t$dst, $dst", + [(set GR64:$dst, (zext (X86setcc_c X86_COND_B, EFLAGS)))]>; + +def : Pat<(i64 (anyext (X86setcc_c X86_COND_B, EFLAGS))), + (SETB_C64r)>; + //===----------------------------------------------------------------------===// // Conversion Instructions... // Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.td?rev=91381&r1=91380&r2=91381&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86InstrInfo.td (original) +++ llvm/trunk/lib/Target/X86/X86InstrInfo.td Mon Dec 14 18:53:42 2009 @@ -87,6 +87,7 @@ def X86brcond : SDNode<"X86ISD::BRCOND", SDTX86BrCond, [SDNPHasChain]>; def X86setcc : SDNode<"X86ISD::SETCC", SDTX86SetCC>; +def X86setcc_c : SDNode<"X86ISD::SETCC_CARRY", SDTX86SetCC>; def X86cas : SDNode<"X86ISD::LCMPXCHG_DAG", SDTX86cas, [SDNPHasChain, SDNPInFlag, SDNPOutFlag, SDNPMayStore, @@ -3059,6 +3060,21 @@ def LAHF : I<0x9F, RawFrm, (outs), (ins), "lahf", []>; // AH = flags let Uses = [EFLAGS] in { +// Use sbb to materialize carry bit. + +let Defs = [EFLAGS], isCodeGenOnly = 1 in { +def SETB_C8r : I<0x18, MRMInitReg, (outs GR8:$dst), (ins), + "sbb{b}\t$dst, $dst", + [(set GR8:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>; +def SETB_C16r : I<0x19, MRMInitReg, (outs GR16:$dst), (ins), + "sbb{w}\t$dst, $dst", + [(set GR16:$dst, (zext (X86setcc_c X86_COND_B, EFLAGS)))]>, + OpSize; +def SETB_C32r : I<0x19, MRMInitReg, (outs GR32:$dst), (ins), + "sbb{l}\t$dst, $dst", + [(set GR32:$dst, (zext (X86setcc_c X86_COND_B, EFLAGS)))]>; +} // isCodeGenOnly + def SETEr : I<0x94, MRM0r, (outs GR8 :$dst), (ins), "sete\t$dst", @@ -4169,6 +4185,12 @@ GR16:$src2, (i8 imm:$amt2)), addr:$dst), (SHLD16mri8 addr:$dst, GR16:$src2, (i8 imm:$amt1))>; +// (anyext (setcc_carry)) -> (zext (setcc_carry)) +def : Pat<(i16 (anyext (X86setcc_c X86_COND_B, EFLAGS))), + (SETB_C16r)>; +def : Pat<(i32 (anyext (X86setcc_c X86_COND_B, EFLAGS))), + (SETB_C32r)>; + //===----------------------------------------------------------------------===// // EFLAGS-defining Patterns //===----------------------------------------------------------------------===// Modified: llvm/trunk/test/CodeGen/X86/setcc.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/setcc.ll?rev=91381&r1=91380&r2=91381&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/X86/setcc.ll (original) +++ llvm/trunk/test/CodeGen/X86/setcc.ll Mon Dec 14 18:53:42 2009 @@ -1,4 +1,8 @@ ; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s +; rdar://7329206 + +; Use sbb x, x to materialize carry bit in a GPR. The value is either +; all 1's or all 0's. define zeroext i16 @t1(i16 zeroext %x) nounwind readnone ssp { entry: @@ -11,3 +15,22 @@ ret i16 %iftmp.1.0 } +define zeroext i16 @t2(i16 zeroext %x) nounwind readnone ssp { +entry: +; CHECK: t2: +; CHECK: sbbl %eax, %eax +; CHECK: andl $32, %eax + %0 = icmp ult i16 %x, 26 ; [#uses=1] + %iftmp.0.0 = select i1 %0, i16 32, i16 0 ; [#uses=1] + ret i16 %iftmp.0.0 +} + +define i64 @t3(i64 %x) nounwind readnone ssp { +entry: +; CHECK: t3: +; CHECK: sbbq %rax, %rax +; CHECK: andq $64, %rax + %0 = icmp ult i64 %x, 18 ; [#uses=1] + %iftmp.2.0 = select i1 %0, i64 64, i64 0 ; [#uses=1] + ret i64 %iftmp.2.0 +} From clattner at apple.com Mon Dec 14 19:00:01 2009 From: clattner at apple.com (Chris Lattner) Date: Mon, 14 Dec 2009 17:00:01 -0800 Subject: [llvm-commits] [llvm] r91378 - in /llvm/trunk: lib/CodeGen/SelectionDAG/DAGCombiner.cpp test/CodeGen/X86/setcc.ll test/CodeGen/X86/zext-shl.ll In-Reply-To: <200912150041.nBF0faC8025493@zion.cs.uiuc.edu> References: <200912150041.nBF0faC8025493@zion.cs.uiuc.edu> Message-ID: On Dec 14, 2009, at 4:41 PM, Evan Cheng wrote: > Author: evancheng > Date: Mon Dec 14 18:41:36 2009 > New Revision: 91378 > > URL: http://llvm.org/viewvc/llvm-project?rev=91378&view=rev > Log: > Propagate zest through logical shift. Is this really profitable in the general case? It seems that this should only be done if at least one of the inputs to the shift will fold away. Adding a nice "WouldFoldAZExt" predicate that would return true for zext/trunc would make sense, no? -Chris > > Added: > llvm/trunk/test/CodeGen/X86/setcc.ll > llvm/trunk/test/CodeGen/X86/zext-shl.ll > Modified: > llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp > > Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=91378&r1=91377&r2=91378&view=diff > > = > = > = > = > = > = > = > = > ====================================================================== > --- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original) > +++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Mon Dec 14 > 18:41:36 2009 > @@ -3278,6 +3278,16 @@ > if (SCC.getNode()) return SCC; > } > > + // (zext (shl (zext x), y)) -> (shl (zext x), (zext y)) > + if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) && > + N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND && > + N0.hasOneUse()) { > + DebugLoc dl = N->getDebugLoc(); > + return DAG.getNode(N0.getOpcode(), dl, VT, > + DAG.getNode(ISD::ZERO_EXTEND, dl, VT, > N0.getOperand(0)), > + DAG.getNode(ISD::ZERO_EXTEND, dl, VT, > N0.getOperand(1))); > + } > + > return SDValue(); > } > > > Added: llvm/trunk/test/CodeGen/X86/setcc.ll > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/setcc.ll?rev=91378&view=auto > > = > = > = > = > = > = > = > = > ====================================================================== > --- llvm/trunk/test/CodeGen/X86/setcc.ll (added) > +++ llvm/trunk/test/CodeGen/X86/setcc.ll Mon Dec 14 18:41:36 2009 > @@ -0,0 +1,13 @@ > +; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s > + > +define zeroext i16 @t1(i16 zeroext %x) nounwind readnone ssp { > +entry: > +; CHECK: t1: > +; CHECK: seta %al > +; CHECK: movzbl %al, %eax > +; CHECK: shll $5, %eax > + %0 = icmp ugt i16 %x, 26 ; [#uses=1] > + %iftmp.1.0 = select i1 %0, i16 32, i16 0 ; [#uses=1] > + ret i16 %iftmp.1.0 > +} > + > > Added: llvm/trunk/test/CodeGen/X86/zext-shl.ll > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/zext-shl.ll?rev=91378&view=auto > > = > = > = > = > = > = > = > = > ====================================================================== > --- llvm/trunk/test/CodeGen/X86/zext-shl.ll (added) > +++ llvm/trunk/test/CodeGen/X86/zext-shl.ll Mon Dec 14 18:41:36 2009 > @@ -0,0 +1,38 @@ > +; RUN: llc < %s -march=x86 | FileCheck %s > + > +define i32 @t1(i8 zeroext %x) nounwind readnone ssp { > +entry: > +; CHECK: t1: > +; CHECK: shll > +; CHECK-NOT: movzwl > +; CHECK: ret > + %0 = zext i8 %x to i16 > + %1 = shl i16 %0, 5 > + %2 = zext i16 %1 to i32 > + ret i32 %2 > +} > + > +define i32 @t2(i8 zeroext %x) nounwind readnone ssp { > +entry: > +; CHECK: t2: > +; CHECK: shrl > +; CHECK-NOT: movzwl > +; CHECK: ret > + %0 = zext i8 %x to i16 > + %1 = lshr i16 %0, 3 > + %2 = zext i16 %1 to i32 > + ret i32 %2 > +} > + > +define i32 @t3(i8 zeroext %x, i8 zeroext %y) nounwind readnone ssp { > +entry: > +; CHECK: t3: > +; CHECK: shll > +; CHECK-NOT: movzwl > +; CHECK: ret > + %0 = zext i8 %x to i16 > + %1 = zext i8 %y to i16 > + %2 = shl i16 %0, %1 > + %3 = zext i16 %2 to i32 > + ret i32 %3 > +} > > > _______________________________________________ > llvm-commits mailing list > llvm-commits at cs.uiuc.edu > http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits From wendling at apple.com Mon Dec 14 19:05:04 2009 From: wendling at apple.com (Bill Wendling) Date: Mon, 14 Dec 2009 17:05:04 -0800 Subject: [llvm-commits] [llvm] r91378 - in /llvm/trunk: lib/CodeGen/SelectionDAG/DAGCombiner.cpp test/CodeGen/X86/setcc.ll test/CodeGen/X86/zext-shl.ll In-Reply-To: <200912150041.nBF0faC8025493@zion.cs.uiuc.edu> References: <200912150041.nBF0faC8025493@zion.cs.uiuc.edu> Message-ID: <4C43CF46-A38E-4B9E-9AEB-5A0D20BED56C@apple.com> On Dec 14, 2009, at 4:41 PM, Evan Cheng wrote: > Author: evancheng > Date: Mon Dec 14 18:41:36 2009 > New Revision: 91378 > > URL: http://llvm.org/viewvc/llvm-project?rev=91378&view=rev > Log: > Propagate zest through logical shift. > > Added: > llvm/trunk/test/CodeGen/X86/setcc.ll > llvm/trunk/test/CodeGen/X86/zext-shl.ll > Modified: > llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp > > Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=91378&r1=91377&r2=91378&view=diff > > ============================================================================== > --- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original) > +++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Mon Dec 14 18:41:36 2009 > @@ -3278,6 +3278,16 @@ > if (SCC.getNode()) return SCC; > } > > + // (zext (shl (zext x), y)) -> (shl (zext x), (zext y)) > + if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) && > + N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND && Is it necessary that op 0 be zext? -bw From evan.cheng at apple.com Mon Dec 14 19:07:46 2009 From: evan.cheng at apple.com (Evan Cheng) Date: Tue, 15 Dec 2009 01:07:46 -0000 Subject: [llvm-commits] [test-suite] r91385 - /test-suite/trunk/Makefile.programs Message-ID: <200912150107.nBF17kkY026443@zion.cs.uiuc.edu> Author: evancheng Date: Mon Dec 14 19:07:46 2009 New Revision: 91385 URL: http://llvm.org/viewvc/llvm-project?rev=91385&view=rev Log: Test -disable-16bit. Modified: test-suite/trunk/Makefile.programs Modified: test-suite/trunk/Makefile.programs URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/Makefile.programs?rev=91385&r1=91384&r2=91385&view=diff ============================================================================== --- test-suite/trunk/Makefile.programs (original) +++ test-suite/trunk/Makefile.programs Mon Dec 14 19:07:46 2009 @@ -225,13 +225,15 @@ LLCBETAOPTION := -sched=simple endif ifeq ($(ARCH),x86_64) -LLCBETAOPTION := -split-gep-gvn +LLCBETAOPTION := -disable-16bit +#-split-gep-gvn #-combiner-alias-analysis #-pre-alloc-split #-remat-pic-stub-load endif ifeq ($(ARCH),x86) -LLCBETAOPTION := -split-gep-gvn +LLCBETAOPTION := -disable-16bit +#-split-gep-gvn #-combiner-alias-analysis #-pre-alloc-split #-remat-pic-stub-load From jyasskin at google.com Mon Dec 14 19:25:52 2009 From: jyasskin at google.com (Jeffrey Yasskin) Date: Mon, 14 Dec 2009 17:25:52 -0800 Subject: [llvm-commits] [PATCH] Fix http://llvm.org/PR5735: available_externally codegen Message-ID: Patch at http://llvm.org/bugs/attachment.cgi?id=3948 or http://codereview.appspot.com/179048. Jeffrey From grosbach at apple.com Mon Dec 14 19:35:31 2009 From: grosbach at apple.com (Jim Grosbach) Date: Tue, 15 Dec 2009 01:35:31 -0000 Subject: [llvm-commits] [llvm-gcc-4.2] r91387 - /llvm-gcc-4.2/trunk/gcc/llvm-convert.cpp Message-ID: <200912150135.nBF1ZVBu027404@zion.cs.uiuc.edu> Author: grosbach Date: Mon Dec 14 19:35:31 2009 New Revision: 91387 URL: http://llvm.org/viewvc/llvm-project?rev=91387&view=rev Log: ARM doesn't support double-word atomics yet. Fix copy-paste error and a missed case. Modified: llvm-gcc-4.2/trunk/gcc/llvm-convert.cpp Modified: llvm-gcc-4.2/trunk/gcc/llvm-convert.cpp URL: http://llvm.org/viewvc/llvm-project/llvm-gcc-4.2/trunk/gcc/llvm-convert.cpp?rev=91387&r1=91386&r2=91387&view=diff ============================================================================== --- llvm-gcc-4.2/trunk/gcc/llvm-convert.cpp (original) +++ llvm-gcc-4.2/trunk/gcc/llvm-convert.cpp Mon Dec 14 19:35:31 2009 @@ -5259,8 +5259,7 @@ } case BUILT_IN_FETCH_AND_ADD_8: #if defined(TARGET_ARM) - if (TARGET_THUMB1 || !arm_arch6) - return false; + return false; #endif #if defined(TARGET_POWERPC) if (!TARGET_64BIT) @@ -5544,6 +5543,9 @@ return true; } case BUILT_IN_AND_AND_FETCH_8: +#if defined(TARGET_ARM) + return false; +#endif #if defined(TARGET_POWERPC) if (!TARGET_64BIT) return false; From evan.cheng at apple.com Mon Dec 14 19:44:10 2009 From: evan.cheng at apple.com (Evan Cheng) Date: Tue, 15 Dec 2009 01:44:10 -0000 Subject: [llvm-commits] [llvm] r91390 - /llvm/trunk/lib/CodeGen/TailDuplication.cpp Message-ID: <200912150144.nBF1iAmH027740@zion.cs.uiuc.edu> Author: evancheng Date: Mon Dec 14 19:44:10 2009 New Revision: 91390 URL: http://llvm.org/viewvc/llvm-project?rev=91390&view=rev Log: Tail duplication should zap a copy it inserted for SSA update if the copy is the only use of its source. Modified: llvm/trunk/lib/CodeGen/TailDuplication.cpp Modified: llvm/trunk/lib/CodeGen/TailDuplication.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/TailDuplication.cpp?rev=91390&r1=91389&r2=91390&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/TailDuplication.cpp (original) +++ llvm/trunk/lib/CodeGen/TailDuplication.cpp Mon Dec 14 19:44:10 2009 @@ -90,7 +90,8 @@ SmallSetVector &Succs); bool TailDuplicateBlocks(MachineFunction &MF); bool TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF, - SmallVector &TDBBs); + SmallVector &TDBBs, + SmallVector &Copies); void RemoveDeadBlock(MachineBasicBlock *MBB); }; @@ -194,7 +195,8 @@ MBB->succ_end()); SmallVector TDBBs; - if (TailDuplicate(MBB, MF, TDBBs)) { + SmallVector Copies; + if (TailDuplicate(MBB, MF, TDBBs, Copies)) { ++NumTails; // TailBB's immediate successors are now successors of those predecessors @@ -251,6 +253,21 @@ SSAUpdateVals.clear(); } + // Eliminate some of the copies inserted tail duplication to maintain + // SSA form. + for (unsigned i = 0, e = Copies.size(); i != e; ++i) { + MachineInstr *Copy = Copies[i]; + unsigned Src, Dst, SrcSR, DstSR; + if (TII->isMoveInstr(*Copy, Src, Dst, SrcSR, DstSR)) { + MachineRegisterInfo::use_iterator UI = MRI->use_begin(Src); + if (++UI == MRI->use_end()) { + // Copy is the only use. Do trivial copy propagation here. + MRI->replaceRegWith(Dst, Src); + Copy->eraseFromParent(); + } + } + } + if (PreRegAlloc && TailDupVerify) VerifyPHIs(MF, false); MadeChange = true; @@ -418,7 +435,8 @@ /// of its predecessors. bool TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF, - SmallVector &TDBBs) { + SmallVector &TDBBs, + SmallVector &Copies) { // Don't try to tail-duplicate single-block loops. if (TailBB->isSuccessor(TailBB)) return false; @@ -502,7 +520,7 @@ // Clone the contents of TailBB into PredBB. DenseMap LocalVRMap; - SmallVector, 4> Copies; + SmallVector, 4> CopyInfos; MachineBasicBlock::iterator I = TailBB->begin(); while (I != TailBB->end()) { MachineInstr *MI = &*I; @@ -510,7 +528,7 @@ if (MI->getOpcode() == TargetInstrInfo::PHI) { // Replace the uses of the def of the PHI with the register coming // from PredBB. - ProcessPHI(MI, TailBB, PredBB, LocalVRMap, Copies); + ProcessPHI(MI, TailBB, PredBB, LocalVRMap, CopyInfos); } else { // Replace def of virtual registers with new registers, and update // uses with PHI source register or the new registers. @@ -518,9 +536,12 @@ } } MachineBasicBlock::iterator Loc = PredBB->getFirstTerminator(); - for (unsigned i = 0, e = Copies.size(); i != e; ++i) { - const TargetRegisterClass *RC = MRI->getRegClass(Copies[i].first); - TII->copyRegToReg(*PredBB, Loc, Copies[i].first, Copies[i].second, RC, RC); + for (unsigned i = 0, e = CopyInfos.size(); i != e; ++i) { + const TargetRegisterClass *RC = MRI->getRegClass(CopyInfos[i].first); + TII->copyRegToReg(*PredBB, Loc, CopyInfos[i].first, + CopyInfos[i].second, RC,RC); + MachineInstr *CopyMI = prior(Loc); + Copies.push_back(CopyMI); } NumInstrDups += TailBB->size() - 1; // subtract one for removed branch @@ -553,14 +574,14 @@ << "From MBB: " << *TailBB); if (PreRegAlloc) { DenseMap LocalVRMap; - SmallVector, 4> Copies; + SmallVector, 4> CopyInfos; MachineBasicBlock::iterator I = TailBB->begin(); // Process PHI instructions first. while (I != TailBB->end() && I->getOpcode() == TargetInstrInfo::PHI) { // Replace the uses of the def of the PHI with the register coming // from PredBB. MachineInstr *MI = &*I++; - ProcessPHI(MI, TailBB, PrevBB, LocalVRMap, Copies); + ProcessPHI(MI, TailBB, PrevBB, LocalVRMap, CopyInfos); if (MI->getParent()) MI->eraseFromParent(); } @@ -574,9 +595,12 @@ MI->eraseFromParent(); } MachineBasicBlock::iterator Loc = PrevBB->getFirstTerminator(); - for (unsigned i = 0, e = Copies.size(); i != e; ++i) { - const TargetRegisterClass *RC = MRI->getRegClass(Copies[i].first); - TII->copyRegToReg(*PrevBB, Loc, Copies[i].first, Copies[i].second, RC, RC); + for (unsigned i = 0, e = CopyInfos.size(); i != e; ++i) { + const TargetRegisterClass *RC = MRI->getRegClass(CopyInfos[i].first); + TII->copyRegToReg(*PrevBB, Loc, CopyInfos[i].first, + CopyInfos[i].second, RC, RC); + MachineInstr *CopyMI = prior(Loc); + Copies.push_back(CopyMI); } } else { // No PHIs to worry about, just splice the instructions over. From evan.cheng at apple.com Mon Dec 14 19:45:29 2009 From: evan.cheng at apple.com (Evan Cheng) Date: Mon, 14 Dec 2009 17:45:29 -0800 Subject: [llvm-commits] [llvm] r91378 - in /llvm/trunk: lib/CodeGen/SelectionDAG/DAGCombiner.cpp test/CodeGen/X86/setcc.ll test/CodeGen/X86/zext-shl.ll In-Reply-To: References: <200912150041.nBF0faC8025493@zion.cs.uiuc.edu> Message-ID: <6ED59F80-2FD1-4C09-8325-BD4ED7C90EBE@apple.com> On Dec 14, 2009, at 5:00 PM, Chris Lattner wrote: > > On Dec 14, 2009, at 4:41 PM, Evan Cheng wrote: > >> Author: evancheng >> Date: Mon Dec 14 18:41:36 2009 >> New Revision: 91378 >> >> URL: http://llvm.org/viewvc/llvm-project?rev=91378&view=rev >> Log: >> Propagate zest through logical shift. > > Is this really profitable in the general case? It seems that this should only be done if at least one of the inputs to the shift will fold away. Adding a nice "WouldFoldAZExt" predicate that would return true for zext/trunc would make sense, no? Are there cases where (zext (zext)) would not fold into a single zext? Evan > > -Chris > >> >> Added: >> llvm/trunk/test/CodeGen/X86/setcc.ll >> llvm/trunk/test/CodeGen/X86/zext-shl.ll >> Modified: >> llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp >> >> Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp >> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=91378&r1=91377&r2=91378&view=diff >> >> ============================================================================== >> --- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original) >> +++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Mon Dec 14 18:41:36 2009 >> @@ -3278,6 +3278,16 @@ >> if (SCC.getNode()) return SCC; >> } >> >> + // (zext (shl (zext x), y)) -> (shl (zext x), (zext y)) >> + if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) && >> + N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND && >> + N0.hasOneUse()) { >> + DebugLoc dl = N->getDebugLoc(); >> + return DAG.getNode(N0.getOpcode(), dl, VT, >> + DAG.getNode(ISD::ZERO_EXTEND, dl, VT, N0.getOperand(0)), >> + DAG.getNode(ISD::ZERO_EXTEND, dl, VT, N0.getOperand(1))); >> + } >> + >> return SDValue(); >> } >> >> >> Added: llvm/trunk/test/CodeGen/X86/setcc.ll >> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/setcc.ll?rev=91378&view=auto >> >> ============================================================================== >> --- llvm/trunk/test/CodeGen/X86/setcc.ll (added) >> +++ llvm/trunk/test/CodeGen/X86/setcc.ll Mon Dec 14 18:41:36 2009 >> @@ -0,0 +1,13 @@ >> +; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s >> + >> +define zeroext i16 @t1(i16 zeroext %x) nounwind readnone ssp { >> +entry: >> +; CHECK: t1: >> +; CHECK: seta %al >> +; CHECK: movzbl %al, %eax >> +; CHECK: shll $5, %eax >> + %0 = icmp ugt i16 %x, 26 ; [#uses=1] >> + %iftmp.1.0 = select i1 %0, i16 32, i16 0 ; [#uses=1] >> + ret i16 %iftmp.1.0 >> +} >> + >> >> Added: llvm/trunk/test/CodeGen/X86/zext-shl.ll >> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/zext-shl.ll?rev=91378&view=auto >> >> ============================================================================== >> --- llvm/trunk/test/CodeGen/X86/zext-shl.ll (added) >> +++ llvm/trunk/test/CodeGen/X86/zext-shl.ll Mon Dec 14 18:41:36 2009 >> @@ -0,0 +1,38 @@ >> +; RUN: llc < %s -march=x86 | FileCheck %s >> + >> +define i32 @t1(i8 zeroext %x) nounwind readnone ssp { >> +entry: >> +; CHECK: t1: >> +; CHECK: shll >> +; CHECK-NOT: movzwl >> +; CHECK: ret >> + %0 = zext i8 %x to i16 >> + %1 = shl i16 %0, 5 >> + %2 = zext i16 %1 to i32 >> + ret i32 %2 >> +} >> + >> +define i32 @t2(i8 zeroext %x) nounwind readnone ssp { >> +entry: >> +; CHECK: t2: >> +; CHECK: shrl >> +; CHECK-NOT: movzwl >> +; CHECK: ret >> + %0 = zext i8 %x to i16 >> + %1 = lshr i16 %0, 3 >> + %2 = zext i16 %1 to i32 >> + ret i32 %2 >> +} >> + >> +define i32 @t3(i8 zeroext %x, i8 zeroext %y) nounwind readnone ssp { >> +entry: >> +; CHECK: t3: >> +; CHECK: shll >> +; CHECK-NOT: movzwl >> +; CHECK: ret >> + %0 = zext i8 %x to i16 >> + %1 = zext i8 %y to i16 >> + %2 = shl i16 %0, %1 >> + %3 = zext i16 %2 to i32 >> + ret i32 %3 >> +} >> >> >> _______________________________________________ >> llvm-commits mailing list >> llvm-commits at cs.uiuc.edu >> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits > From clattner at apple.com Mon Dec 14 19:47:00 2009 From: clattner at apple.com (Chris Lattner) Date: Mon, 14 Dec 2009 17:47:00 -0800 Subject: [llvm-commits] [llvm] r91378 - in /llvm/trunk: lib/CodeGen/SelectionDAG/DAGCombiner.cpp test/CodeGen/X86/setcc.ll test/CodeGen/X86/zext-shl.ll In-Reply-To: <6ED59F80-2FD1-4C09-8325-BD4ED7C90EBE@apple.com> References: <200912150041.nBF0faC8025493@zion.cs.uiuc.edu> <6ED59F80-2FD1-4C09-8325-BD4ED7C90EBE@apple.com> Message-ID: On Dec 14, 2009, at 5:45 PM, Evan Cheng wrote: > > On Dec 14, 2009, at 5:00 PM, Chris Lattner wrote: > >> >> On Dec 14, 2009, at 4:41 PM, Evan Cheng wrote: >> >>> Author: evancheng >>> Date: Mon Dec 14 18:41:36 2009 >>> New Revision: 91378 >>> >>> URL: http://llvm.org/viewvc/llvm-project?rev=91378&view=rev >>> Log: >>> Propagate zest through logical shift. >> >> Is this really profitable in the general case? It seems that this >> should only be done if at least one of the inputs to the shift will >> fold away. Adding a nice "WouldFoldAZExt" predicate that would >> return true for zext/trunc would make sense, no? > > Are there cases where (zext (zext)) would not fold into a single zext? No, I'm worried that you're turning something like: zext(shr(call, call)) -> shr(zext(call), zext(call)) which introduces two zexts and deletes one. -Chris From evan.cheng at apple.com Mon Dec 14 19:49:43 2009 From: evan.cheng at apple.com (Evan Cheng) Date: Mon, 14 Dec 2009 17:49:43 -0800 Subject: [llvm-commits] [llvm] r91378 - in /llvm/trunk: lib/CodeGen/SelectionDAG/DAGCombiner.cpp test/CodeGen/X86/setcc.ll test/CodeGen/X86/zext-shl.ll In-Reply-To: References: <200912150041.nBF0faC8025493@zion.cs.uiuc.edu> <6ED59F80-2FD1-4C09-8325-BD4ED7C90EBE@apple.com> Message-ID: On Dec 14, 2009, at 5:47 PM, Chris Lattner wrote: > > On Dec 14, 2009, at 5:45 PM, Evan Cheng wrote: > >> >> On Dec 14, 2009, at 5:00 PM, Chris Lattner wrote: >> >>> >>> On Dec 14, 2009, at 4:41 PM, Evan Cheng wrote: >>> >>>> Author: evancheng >>>> Date: Mon Dec 14 18:41:36 2009 >>>> New Revision: 91378 >>>> >>>> URL: http://llvm.org/viewvc/llvm-project?rev=91378&view=rev >>>> Log: >>>> Propagate zest through logical shift. >>> >>> Is this really profitable in the general case? It seems that this should only be done if at least one of the inputs to the shift will fold away. Adding a nice "WouldFoldAZExt" predicate that would return true for zext/trunc would make sense, no? >> >> Are there cases where (zext (zext)) would not fold into a single zext? > > No, I'm worried that you're turning something like: > > zext(shr(call, call)) -> shr(zext(call), zext(call)) > > which introduces two zexts and deletes one. No. The transformation only happens when one of the shift's operand is a zext. Evan > > -Chris From evan.cheng at apple.com Mon Dec 14 19:50:28 2009 From: evan.cheng at apple.com (Evan Cheng) Date: Mon, 14 Dec 2009 17:50:28 -0800 Subject: [llvm-commits] [llvm] r91378 - in /llvm/trunk: lib/CodeGen/SelectionDAG/DAGCombiner.cpp test/CodeGen/X86/setcc.ll test/CodeGen/X86/zext-shl.ll In-Reply-To: <4C43CF46-A38E-4B9E-9AEB-5A0D20BED56C@apple.com> References: <200912150041.nBF0faC8025493@zion.cs.uiuc.edu> <4C43CF46-A38E-4B9E-9AEB-5A0D20BED56C@apple.com> Message-ID: On Dec 14, 2009, at 5:05 PM, Bill Wendling wrote: > On Dec 14, 2009, at 4:41 PM, Evan Cheng wrote: > >> Author: evancheng >> Date: Mon Dec 14 18:41:36 2009 >> New Revision: 91378 >> >> URL: http://llvm.org/viewvc/llvm-project?rev=91378&view=rev >> Log: >> Propagate zest through logical shift. >> >> Added: >> llvm/trunk/test/CodeGen/X86/setcc.ll >> llvm/trunk/test/CodeGen/X86/zext-shl.ll >> Modified: >> llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp >> >> Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp >> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=91378&r1=91377&r2=91378&view=diff >> >> ============================================================================== >> --- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original) >> +++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Mon Dec 14 18:41:36 2009 >> @@ -3278,6 +3278,16 @@ >> if (SCC.getNode()) return SCC; >> } >> >> + // (zext (shl (zext x), y)) -> (shl (zext x), (zext y)) >> + if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) && >> + N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND && > > Is it necessary that op 0 be zext? Not necessary, but more likely. I can change it. (After I figure out which of my patches broke self-host). Evan > > -bw > From eli.friedman at gmail.com Mon Dec 14 19:54:06 2009 From: eli.friedman at gmail.com (Eli Friedman) Date: Mon, 14 Dec 2009 17:54:06 -0800 Subject: [llvm-commits] [llvm] r91378 - in /llvm/trunk: lib/CodeGen/SelectionDAG/DAGCombiner.cpp test/CodeGen/X86/setcc.ll test/CodeGen/X86/zext-shl.ll In-Reply-To: <200912150041.nBF0faC8025493@zion.cs.uiuc.edu> References: <200912150041.nBF0faC8025493@zion.cs.uiuc.edu> Message-ID: On Mon, Dec 14, 2009 at 4:41 PM, Evan Cheng wrote: > --- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original) > +++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Mon Dec 14 18:41:36 2009 > @@ -3278,6 +3278,16 @@ > ? ? if (SCC.getNode()) return SCC; > ? } > > + ?// (zext (shl (zext x), y)) -> (shl (zext x), (zext y)) > + ?if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) && > + ? ? ?N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND && > + ? ? ?N0.hasOneUse()) { > + ? ?DebugLoc dl = N->getDebugLoc(); > + ? ?return DAG.getNode(N0.getOpcode(), dl, VT, > + ? ? ? ? ? ? ? ? ? ? ? DAG.getNode(ISD::ZERO_EXTEND, dl, VT, N0.getOperand(0)), > + ? ? ? ? ? ? ? ? ? ? ? DAG.getNode(ISD::ZERO_EXTEND, dl, VT, N0.getOperand(1))); > + ?} > + > ? return SDValue(); > ?} I think you need a check here to make sure the original shift never shifts out any set bits. (Only happens with unusual widths, but I think it's possible before legalization.) -Eli From isanbard at gmail.com Mon Dec 14 19:54:51 2009 From: isanbard at gmail.com (Bill Wendling) Date: Tue, 15 Dec 2009 01:54:51 -0000 Subject: [llvm-commits] [llvm] r91392 - in /llvm/trunk: include/llvm/CodeGen/SelectionDAG.h lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp lib/CodeGen/SelectionDAG/SelectionDAG.cpp lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp Message-ID: <200912150154.nBF1spun028126@zion.cs.uiuc.edu> Author: void Date: Mon Dec 14 19:54:51 2009 New Revision: 91392 URL: http://llvm.org/viewvc/llvm-project?rev=91392&view=rev Log: Initial work on disabling the scheduler. This is a work in progress, and this stuff isn't used just yet. We want to model the GCC `-fno-schedule-insns' and `-fno-schedule-insns2' flags. The hypothesis is that the people who use these flags know what they are doing, and have hand-optimized the C code to reduce latencies and other conflicts. The idea behind our scheme to turn off scheduling is to create a map "on the side" during DAG generation. It will order the nodes by how they appeared in the code. This map is then used during scheduling to get the ordering. Modified: llvm/trunk/include/llvm/CodeGen/SelectionDAG.h llvm/trunk/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp Modified: llvm/trunk/include/llvm/CodeGen/SelectionDAG.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/SelectionDAG.h?rev=91392&r1=91391&r2=91392&view=diff ============================================================================== --- llvm/trunk/include/llvm/CodeGen/SelectionDAG.h (original) +++ llvm/trunk/include/llvm/CodeGen/SelectionDAG.h Mon Dec 14 19:54:51 2009 @@ -110,6 +110,46 @@ /// SelectionDAG. BumpPtrAllocator Allocator; + /// NodeOrdering - Assigns a "line number" value to each SDNode that + /// corresponds to the "line number" of the original LLVM instruction. This + /// used for turning off scheduling, because we'll forgo the normal scheduling + /// algorithm and output the instructions according to this ordering. + class NodeOrdering { + /// LineNo - The line of the instruction the node corresponds to. A value of + /// `0' means it's not assigned. + unsigned LineNo; + std::map Order; + + void operator=(const NodeOrdering&); // Do not implement. + NodeOrdering(const NodeOrdering&); // Do not implement. + public: + NodeOrdering() : LineNo(0) {} + + void add(const SDNode *Node) { + assert(LineNo && "Invalid line number!"); + Order[Node] = LineNo; + } + void remove(const SDNode *Node) { + std::map::iterator Itr = Order.find(Node); + if (Itr != Order.end()) + Order.erase(Itr); + } + void clear() { + Order.clear(); + LineNo = 1; + } + unsigned getLineNo(const SDNode *Node) { + unsigned LN = Order[Node]; + assert(LN && "Node isn't in ordering map!"); + return LN; + } + void newInst() { + ++LineNo; + } + + void dump() const; + } *Ordering; + /// VerifyNode - Sanity check the given node. Aborts if it is invalid. void VerifyNode(SDNode *N); @@ -120,6 +160,9 @@ DenseSet &visited, int level, bool &printed); + void operator=(const SelectionDAG&); // Do not implement. + SelectionDAG(const SelectionDAG&); // Do not implement. + public: SelectionDAG(TargetLowering &tli, FunctionLoweringInfo &fli); ~SelectionDAG(); @@ -199,6 +242,13 @@ return Root = N; } + /// NewInst - Tell the ordering object that we're processing a new + /// instruction. + void NewInst() { + if (Ordering) + Ordering->newInst(); + } + /// Combine - This iterates over the nodes in the SelectionDAG, folding /// certain types of nodes together, or eliminating superfluous nodes. The /// Level argument controls whether Combine is allowed to produce nodes and Modified: llvm/trunk/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp?rev=91392&r1=91391&r2=91392&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp (original) +++ llvm/trunk/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp Mon Dec 14 19:54:51 2009 @@ -20,10 +20,16 @@ #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtarget.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; +cl::opt +DisableInstScheduling("disable-inst-scheduling", + cl::init(false), + cl::desc("Disable instruction scheduling")); + ScheduleDAGSDNodes::ScheduleDAGSDNodes(MachineFunction &mf) : ScheduleDAG(mf) { } Modified: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp?rev=91392&r1=91391&r2=91392&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp (original) +++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp Mon Dec 14 19:54:51 2009 @@ -48,6 +48,8 @@ #include using namespace llvm; +extern cl::opt DisableInstScheduling; + /// makeVTList - Return an instance of the SDVTList struct initialized with the /// specified members. static SDVTList makeVTList(const EVT *VTs, unsigned NumVTs) { @@ -552,6 +554,9 @@ } DeallocateNode(N); + + // Remove the ordering of this node. + if (Ordering) Ordering->remove(N); } } @@ -577,6 +582,9 @@ N->DropOperands(); DeallocateNode(N); + + // Remove the ordering of this node. + if (Ordering) Ordering->remove(N); } void SelectionDAG::DeallocateNode(SDNode *N) { @@ -588,6 +596,9 @@ N->NodeType = ISD::DELETED_NODE; NodeAllocator.Deallocate(AllNodes.remove(N)); + + // Remove the ordering of this node. + if (Ordering) Ordering->remove(N); } /// RemoveNodeFromCSEMaps - Take the specified node out of the CSE map that @@ -691,7 +702,9 @@ FoldingSetNodeID ID; AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops, 1); AddNodeIDCustom(ID, N); - return CSEMap.FindNodeOrInsertPos(ID, InsertPos); + SDNode *Node = CSEMap.FindNodeOrInsertPos(ID, InsertPos); + if (Ordering) Ordering->remove(Node); + return Node; } /// FindModifiedNodeSlot - Find a slot for the specified node if its operands @@ -708,7 +721,9 @@ FoldingSetNodeID ID; AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops, 2); AddNodeIDCustom(ID, N); - return CSEMap.FindNodeOrInsertPos(ID, InsertPos); + SDNode *Node = CSEMap.FindNodeOrInsertPos(ID, InsertPos); + if (Ordering) Ordering->remove(Node); + return Node; } @@ -725,7 +740,9 @@ FoldingSetNodeID ID; AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops, NumOps); AddNodeIDCustom(ID, N); - return CSEMap.FindNodeOrInsertPos(ID, InsertPos); + SDNode *Node = CSEMap.FindNodeOrInsertPos(ID, InsertPos); + if (Ordering) Ordering->remove(Node); + return Node; } /// VerifyNode - Sanity check the given node. Aborts if it is invalid. @@ -778,8 +795,13 @@ SelectionDAG::SelectionDAG(TargetLowering &tli, FunctionLoweringInfo &fli) : TLI(tli), FLI(fli), DW(0), EntryNode(ISD::EntryToken, DebugLoc::getUnknownLoc(), - getVTList(MVT::Other)), Root(getEntryNode()) { + getVTList(MVT::Other)), + Root(getEntryNode()), Ordering(0) { AllNodes.push_back(&EntryNode); + if (DisableInstScheduling) { + Ordering = new NodeOrdering(); + Ordering->add(&EntryNode); + } } void SelectionDAG::init(MachineFunction &mf, MachineModuleInfo *mmi, @@ -792,6 +814,7 @@ SelectionDAG::~SelectionDAG() { allnodes_clear(); + delete Ordering; } void SelectionDAG::allnodes_clear() { @@ -817,6 +840,10 @@ EntryNode.UseList = 0; AllNodes.push_back(&EntryNode); Root = getEntryNode(); + if (DisableInstScheduling) { + Ordering = new NodeOrdering(); + Ordering->add(&EntryNode); + } } SDValue SelectionDAG::getSExtOrTrunc(SDValue Op, DebugLoc DL, EVT VT) { @@ -877,14 +904,17 @@ ID.AddPointer(&Val); void *IP = 0; SDNode *N = NULL; - if ((N = CSEMap.FindNodeOrInsertPos(ID, IP))) + if ((N = CSEMap.FindNodeOrInsertPos(ID, IP))) { + if (Ordering) Ordering->add(N); if (!VT.isVector()) return SDValue(N, 0); + } if (!N) { N = NodeAllocator.Allocate(); new (N) ConstantSDNode(isT, &Val, EltVT); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); + if (Ordering) Ordering->add(N); } SDValue Result(N, 0); @@ -921,14 +951,17 @@ ID.AddPointer(&V); void *IP = 0; SDNode *N = NULL; - if ((N = CSEMap.FindNodeOrInsertPos(ID, IP))) + if ((N = CSEMap.FindNodeOrInsertPos(ID, IP))) { + if (Ordering) Ordering->add(N); if (!VT.isVector()) return SDValue(N, 0); + } if (!N) { N = NodeAllocator.Allocate(); new (N) ConstantFPSDNode(isTarget, &V, EltVT); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); + if (Ordering) Ordering->add(N); } SDValue Result(N, 0); @@ -983,12 +1016,15 @@ ID.AddInteger(Offset); ID.AddInteger(TargetFlags); void *IP = 0; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { + if (Ordering) Ordering->add(E); return SDValue(E, 0); + } SDNode *N = NodeAllocator.Allocate(); new (N) GlobalAddressSDNode(Opc, GV, VT, Offset, TargetFlags); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); + if (Ordering) Ordering->add(N); return SDValue(N, 0); } @@ -998,12 +1034,15 @@ AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0); ID.AddInteger(FI); void *IP = 0; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { + if (Ordering) Ordering->add(E); return SDValue(E, 0); + } SDNode *N = NodeAllocator.Allocate(); new (N) FrameIndexSDNode(FI, VT, isTarget); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); + if (Ordering) Ordering->add(N); return SDValue(N, 0); } @@ -1017,12 +1056,15 @@ ID.AddInteger(JTI); ID.AddInteger(TargetFlags); void *IP = 0; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { + if (Ordering) Ordering->add(E); return SDValue(E, 0); + } SDNode *N = NodeAllocator.Allocate(); new (N) JumpTableSDNode(JTI, VT, isTarget, TargetFlags); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); + if (Ordering) Ordering->add(N); return SDValue(N, 0); } @@ -1042,12 +1084,15 @@ ID.AddPointer(C); ID.AddInteger(TargetFlags); void *IP = 0; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { + if (Ordering) Ordering->add(E); return SDValue(E, 0); + } SDNode *N = NodeAllocator.Allocate(); new (N) ConstantPoolSDNode(isTarget, C, VT, Offset, Alignment, TargetFlags); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); + if (Ordering) Ordering->add(N); return SDValue(N, 0); } @@ -1068,12 +1113,15 @@ C->AddSelectionDAGCSEId(ID); ID.AddInteger(TargetFlags); void *IP = 0; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { + if (Ordering) Ordering->add(E); return SDValue(E, 0); + } SDNode *N = NodeAllocator.Allocate(); new (N) ConstantPoolSDNode(isTarget, C, VT, Offset, Alignment, TargetFlags); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); + if (Ordering) Ordering->add(N); return SDValue(N, 0); } @@ -1082,12 +1130,15 @@ AddNodeIDNode(ID, ISD::BasicBlock, getVTList(MVT::Other), 0, 0); ID.AddPointer(MBB); void *IP = 0; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { + if (Ordering) Ordering->add(E); return SDValue(E, 0); + } SDNode *N = NodeAllocator.Allocate(); new (N) BasicBlockSDNode(MBB); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); + if (Ordering) Ordering->add(N); return SDValue(N, 0); } @@ -1103,6 +1154,7 @@ N = NodeAllocator.Allocate(); new (N) VTSDNode(VT); AllNodes.push_back(N); + if (Ordering) Ordering->add(N); return SDValue(N, 0); } @@ -1112,6 +1164,7 @@ N = NodeAllocator.Allocate(); new (N) ExternalSymbolSDNode(false, Sym, 0, VT); AllNodes.push_back(N); + if (Ordering) Ordering->add(N); return SDValue(N, 0); } @@ -1124,6 +1177,7 @@ N = NodeAllocator.Allocate(); new (N) ExternalSymbolSDNode(true, Sym, TargetFlags, VT); AllNodes.push_back(N); + if (Ordering) Ordering->add(N); return SDValue(N, 0); } @@ -1136,6 +1190,7 @@ new (N) CondCodeSDNode(Cond); CondCodeNodes[Cond] = N; AllNodes.push_back(N); + if (Ordering) Ordering->add(N); } return SDValue(CondCodeNodes[Cond], 0); } @@ -1228,8 +1283,10 @@ ID.AddInteger(MaskVec[i]); void* IP = 0; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { + if (Ordering) Ordering->add(E); return SDValue(E, 0); + } // Allocate the mask array for the node out of the BumpPtrAllocator, since // SDNode doesn't have access to it. This memory will be "leaked" when @@ -1241,6 +1298,7 @@ new (N) ShuffleVectorSDNode(VT, dl, N1, N2, MaskAlloc); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); + if (Ordering) Ordering->add(N); return SDValue(N, 0); } @@ -1258,12 +1316,15 @@ SDValue Ops[] = { Val, DTy, STy, Rnd, Sat }; AddNodeIDNode(ID, ISD::CONVERT_RNDSAT, getVTList(VT), &Ops[0], 5); void* IP = 0; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { + if (Ordering) Ordering->add(E); return SDValue(E, 0); + } CvtRndSatSDNode *N = NodeAllocator.Allocate(); new (N) CvtRndSatSDNode(VT, dl, Ops, 5, Code); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); + if (Ordering) Ordering->add(N); return SDValue(N, 0); } @@ -1272,12 +1333,15 @@ AddNodeIDNode(ID, ISD::Register, getVTList(VT), 0, 0); ID.AddInteger(RegNo); void *IP = 0; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { + if (Ordering) Ordering->add(E); return SDValue(E, 0); + } SDNode *N = NodeAllocator.Allocate(); new (N) RegisterSDNode(RegNo, VT); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); + if (Ordering) Ordering->add(N); return SDValue(N, 0); } @@ -1289,12 +1353,15 @@ AddNodeIDNode(ID, Opcode, getVTList(MVT::Other), &Ops[0], 1); ID.AddInteger(LabelID); void *IP = 0; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { + if (Ordering) Ordering->add(E); return SDValue(E, 0); + } SDNode *N = NodeAllocator.Allocate(); new (N) LabelSDNode(Opcode, dl, Root, LabelID); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); + if (Ordering) Ordering->add(N); return SDValue(N, 0); } @@ -1308,12 +1375,15 @@ ID.AddPointer(BA); ID.AddInteger(TargetFlags); void *IP = 0; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { + if (Ordering) Ordering->add(E); return SDValue(E, 0); + } SDNode *N = NodeAllocator.Allocate(); new (N) BlockAddressSDNode(Opc, VT, BA, TargetFlags); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); + if (Ordering) Ordering->add(N); return SDValue(N, 0); } @@ -1326,13 +1396,16 @@ ID.AddPointer(V); void *IP = 0; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { + if (Ordering) Ordering->add(E); return SDValue(E, 0); + } SDNode *N = NodeAllocator.Allocate(); new (N) SrcValueSDNode(V); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); + if (Ordering) Ordering->add(N); return SDValue(N, 0); } @@ -2243,13 +2316,16 @@ FoldingSetNodeID ID; AddNodeIDNode(ID, Opcode, getVTList(VT), 0, 0); void *IP = 0; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { + if (Ordering) Ordering->add(E); return SDValue(E, 0); + } SDNode *N = NodeAllocator.Allocate(); new (N) SDNode(Opcode, DL, getVTList(VT)); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); + if (Ordering) Ordering->add(N); #ifndef NDEBUG VerifyNode(N); #endif @@ -2473,8 +2549,10 @@ SDValue Ops[1] = { Operand }; AddNodeIDNode(ID, Opcode, VTs, Ops, 1); void *IP = 0; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { + if (Ordering) Ordering->add(E); return SDValue(E, 0); + } N = NodeAllocator.Allocate(); new (N) UnarySDNode(Opcode, DL, VTs, Operand); CSEMap.InsertNode(N, IP); @@ -2484,6 +2562,7 @@ } AllNodes.push_back(N); + if (Ordering) Ordering->add(N); #ifndef NDEBUG VerifyNode(N); #endif @@ -2891,8 +2970,10 @@ FoldingSetNodeID ID; AddNodeIDNode(ID, Opcode, VTs, Ops, 2); void *IP = 0; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { + if (Ordering) Ordering->add(E); return SDValue(E, 0); + } N = NodeAllocator.Allocate(); new (N) BinarySDNode(Opcode, DL, VTs, N1, N2); CSEMap.InsertNode(N, IP); @@ -2902,6 +2983,7 @@ } AllNodes.push_back(N); + if (Ordering) Ordering->add(N); #ifndef NDEBUG VerifyNode(N); #endif @@ -2968,8 +3050,10 @@ FoldingSetNodeID ID; AddNodeIDNode(ID, Opcode, VTs, Ops, 3); void *IP = 0; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { + if (Ordering) Ordering->add(E); return SDValue(E, 0); + } N = NodeAllocator.Allocate(); new (N) TernarySDNode(Opcode, DL, VTs, N1, N2, N3); CSEMap.InsertNode(N, IP); @@ -2977,7 +3061,9 @@ N = NodeAllocator.Allocate(); new (N) TernarySDNode(Opcode, DL, VTs, N1, N2, N3); } + AllNodes.push_back(N); + if (Ordering) Ordering->add(N); #ifndef NDEBUG VerifyNode(N); #endif @@ -3573,12 +3659,14 @@ void* IP = 0; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { cast(E)->refineAlignment(MMO); + if (Ordering) Ordering->add(E); return SDValue(E, 0); } SDNode* N = NodeAllocator.Allocate(); new (N) AtomicSDNode(Opcode, dl, VTs, MemVT, Chain, Ptr, Cmp, Swp, MMO); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); + if (Ordering) Ordering->add(N); return SDValue(N, 0); } @@ -3636,12 +3724,14 @@ void* IP = 0; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { cast(E)->refineAlignment(MMO); + if (Ordering) Ordering->add(E); return SDValue(E, 0); } SDNode* N = NodeAllocator.Allocate(); new (N) AtomicSDNode(Opcode, dl, VTs, MemVT, Chain, Ptr, Val, MMO); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); + if (Ordering) Ordering->add(N); return SDValue(N, 0); } @@ -3714,6 +3804,7 @@ void *IP = 0; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { cast(E)->refineAlignment(MMO); + if (Ordering) Ordering->add(E); return SDValue(E, 0); } @@ -3725,6 +3816,7 @@ new (N) MemIntrinsicSDNode(Opcode, dl, VTList, Ops, NumOps, MemVT, MMO); } AllNodes.push_back(N); + if (Ordering) Ordering->add(N); return SDValue(N, 0); } @@ -3789,12 +3881,14 @@ void *IP = 0; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { cast(E)->refineAlignment(MMO); + if (Ordering) Ordering->add(E); return SDValue(E, 0); } SDNode *N = NodeAllocator.Allocate(); new (N) LoadSDNode(Ops, dl, VTs, AM, ExtType, MemVT, MMO); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); + if (Ordering) Ordering->add(N); return SDValue(N, 0); } @@ -3865,12 +3959,14 @@ void *IP = 0; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { cast(E)->refineAlignment(MMO); + if (Ordering) Ordering->add(E); return SDValue(E, 0); } SDNode *N = NodeAllocator.Allocate(); new (N) StoreSDNode(Ops, dl, VTs, ISD::UNINDEXED, false, VT, MMO); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); + if (Ordering) Ordering->add(N); return SDValue(N, 0); } @@ -3925,12 +4021,14 @@ void *IP = 0; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { cast(E)->refineAlignment(MMO); + if (Ordering) Ordering->add(E); return SDValue(E, 0); } SDNode *N = NodeAllocator.Allocate(); new (N) StoreSDNode(Ops, dl, VTs, ISD::UNINDEXED, true, SVT, MMO); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); + if (Ordering) Ordering->add(N); return SDValue(N, 0); } @@ -3947,14 +4045,17 @@ ID.AddInteger(ST->getMemoryVT().getRawBits()); ID.AddInteger(ST->getRawSubclassData()); void *IP = 0; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { + if (Ordering) Ordering->add(E); return SDValue(E, 0); + } SDNode *N = NodeAllocator.Allocate(); new (N) StoreSDNode(Ops, dl, VTs, AM, ST->isTruncatingStore(), ST->getMemoryVT(), ST->getMemOperand()); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); + if (Ordering) Ordering->add(N); return SDValue(N, 0); } @@ -4020,8 +4121,10 @@ AddNodeIDNode(ID, Opcode, VTs, Ops, NumOps); void *IP = 0; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { + if (Ordering) Ordering->add(E); return SDValue(E, 0); + } N = NodeAllocator.Allocate(); new (N) SDNode(Opcode, DL, VTs, Ops, NumOps); @@ -4032,6 +4135,7 @@ } AllNodes.push_back(N); + if (Ordering) Ordering->add(N); #ifndef NDEBUG VerifyNode(N); #endif @@ -4087,8 +4191,10 @@ FoldingSetNodeID ID; AddNodeIDNode(ID, Opcode, VTList, Ops, NumOps); void *IP = 0; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { + if (Ordering) Ordering->add(E); return SDValue(E, 0); + } if (NumOps == 1) { N = NodeAllocator.Allocate(); new (N) UnarySDNode(Opcode, DL, VTList, Ops[0]); @@ -4119,6 +4225,7 @@ } } AllNodes.push_back(N); + if (Ordering) Ordering->add(N); #ifndef NDEBUG VerifyNode(N); #endif @@ -4581,8 +4688,10 @@ if (VTs.VTs[VTs.NumVTs-1] != MVT::Flag) { FoldingSetNodeID ID; AddNodeIDNode(ID, Opc, VTs, Ops, NumOps); - if (SDNode *ON = CSEMap.FindNodeOrInsertPos(ID, IP)) + if (SDNode *ON = CSEMap.FindNodeOrInsertPos(ID, IP)) { + if (Ordering) Ordering->add(ON); return ON; + } } if (!RemoveNodeFromCSEMaps(N)) @@ -4646,6 +4755,7 @@ if (IP) CSEMap.InsertNode(N, IP); // Memoize the new node. + if (Ordering) Ordering->add(N); return N; } @@ -4784,8 +4894,10 @@ FoldingSetNodeID ID; AddNodeIDNode(ID, ~Opcode, VTs, Ops, NumOps); IP = 0; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { + if (Ordering) Ordering->add(E); return cast(E); + } } // Allocate a new MachineSDNode. @@ -4807,6 +4919,7 @@ CSEMap.InsertNode(N, IP); AllNodes.push_back(N); + if (Ordering) Ordering->add(N); #ifndef NDEBUG VerifyNode(N); #endif @@ -4843,8 +4956,10 @@ FoldingSetNodeID ID; AddNodeIDNode(ID, Opcode, VTList, Ops, NumOps); void *IP = 0; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { + if (Ordering) Ordering->add(E); return E; + } } return NULL; } @@ -6011,6 +6126,9 @@ errs() << "\n\n"; } +void SelectionDAG::NodeOrdering::dump() const { +} + void SDNode::printr(raw_ostream &OS, const SelectionDAG *G) const { print_types(OS, G); print_details(OS, G); @@ -6151,4 +6269,3 @@ return false; return true; } - Modified: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp?rev=91392&r1=91391&r2=91392&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp (original) +++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp Mon Dec 14 19:54:51 2009 @@ -583,6 +583,9 @@ } void SelectionDAGBuilder::visit(unsigned Opcode, User &I) { + // Tell the DAG that we're processing a new instruction. + DAG.NewInst(); + // Note: this doesn't use InstVisitor, because it has to work with // ConstantExpr's in addition to instructions. switch (Opcode) { From rjmccall at apple.com Mon Dec 14 20:35:25 2009 From: rjmccall at apple.com (John McCall) Date: Tue, 15 Dec 2009 02:35:25 -0000 Subject: [llvm-commits] [llvm] r91397 - /llvm/trunk/lib/Analysis/ProfileInfo.cpp Message-ID: <200912150235.nBF2ZPTE029607@zion.cs.uiuc.edu> Author: rjmccall Date: Mon Dec 14 20:35:24 2009 New Revision: 91397 URL: http://llvm.org/viewvc/llvm-project?rev=91397&view=rev Log: You can't use typedefs to declare template member specializations, and clang enforces it. Modified: llvm/trunk/lib/Analysis/ProfileInfo.cpp Modified: llvm/trunk/lib/Analysis/ProfileInfo.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/ProfileInfo.cpp?rev=91397&r1=91396&r2=91397&view=diff ============================================================================== --- llvm/trunk/lib/Analysis/ProfileInfo.cpp (original) +++ llvm/trunk/lib/Analysis/ProfileInfo.cpp Mon Dec 14 20:35:24 2009 @@ -44,19 +44,19 @@ } template<> -char ProfileInfo::ID = 0; +char ProfileInfoT::ID = 0; template<> -char MachineProfileInfo::ID = 0; +char ProfileInfoT::ID = 0; template<> -const double ProfileInfo::MissingValue = -1; +const double ProfileInfoT::MissingValue = -1; -template<> -const double MachineProfileInfo::MissingValue = -1; +template<> const +double ProfileInfoT::MissingValue = -1; -template<> -double ProfileInfo::getExecutionCount(const BasicBlock *BB) { +template<> double +ProfileInfoT::getExecutionCount(const BasicBlock *BB) { std::map::iterator J = BlockInformation.find(BB->getParent()); if (J != BlockInformation.end()) { @@ -118,7 +118,8 @@ } template<> -double MachineProfileInfo::getExecutionCount(const MachineBasicBlock *MBB) { +double ProfileInfoT:: + getExecutionCount(const MachineBasicBlock *MBB) { std::map::iterator J = BlockInformation.find(MBB->getParent()); if (J != BlockInformation.end()) { @@ -131,7 +132,7 @@ } template<> -double ProfileInfo::getExecutionCount(const Function *F) { +double ProfileInfoT::getExecutionCount(const Function *F) { std::map::iterator J = FunctionInformation.find(F); if (J != FunctionInformation.end()) @@ -147,7 +148,8 @@ } template<> -double MachineProfileInfo::getExecutionCount(const MachineFunction *MF) { +double ProfileInfoT:: + getExecutionCount(const MachineFunction *MF) { std::map::iterator J = FunctionInformation.find(MF); if (J != FunctionInformation.end()) @@ -159,21 +161,23 @@ } template<> -void ProfileInfo::setExecutionCount(const BasicBlock *BB, double w) { +void ProfileInfoT:: + setExecutionCount(const BasicBlock *BB, double w) { DEBUG(errs() << "Creating Block " << BB->getName() << " (weight: " << format("%.20g",w) << ")\n"); BlockInformation[BB->getParent()][BB] = w; } template<> -void MachineProfileInfo::setExecutionCount(const MachineBasicBlock *MBB, double w) { +void ProfileInfoT:: + setExecutionCount(const MachineBasicBlock *MBB, double w) { DEBUG(errs() << "Creating Block " << MBB->getBasicBlock()->getName() << " (weight: " << format("%.20g",w) << ")\n"); BlockInformation[MBB->getParent()][MBB] = w; } template<> -void ProfileInfo::addEdgeWeight(Edge e, double w) { +void ProfileInfoT::addEdgeWeight(Edge e, double w) { double oldw = getEdgeWeight(e); assert (oldw != MissingValue && "Adding weight to Edge with no previous weight"); DEBUG(errs() << "Adding to Edge " << e @@ -182,7 +186,8 @@ } template<> -void ProfileInfo::addExecutionCount(const BasicBlock *BB, double w) { +void ProfileInfoT:: + addExecutionCount(const BasicBlock *BB, double w) { double oldw = getExecutionCount(BB); assert (oldw != MissingValue && "Adding weight to Block with no previous weight"); DEBUG(errs() << "Adding to Block " << BB->getName() @@ -191,7 +196,7 @@ } template<> -void ProfileInfo::removeBlock(const BasicBlock *BB) { +void ProfileInfoT::removeBlock(const BasicBlock *BB) { std::map::iterator J = BlockInformation.find(BB->getParent()); if (J == BlockInformation.end()) return; @@ -201,7 +206,7 @@ } template<> -void ProfileInfo::removeEdge(Edge e) { +void ProfileInfoT::removeEdge(Edge e) { std::map::iterator J = EdgeInformation.find(getFunction(e)); if (J == EdgeInformation.end()) return; @@ -211,7 +216,8 @@ } template<> -void ProfileInfo::replaceEdge(const Edge &oldedge, const Edge &newedge) { +void ProfileInfoT:: + replaceEdge(const Edge &oldedge, const Edge &newedge) { double w; if ((w = getEdgeWeight(newedge)) == MissingValue) { w = getEdgeWeight(oldedge); @@ -225,8 +231,9 @@ } template<> -const BasicBlock *ProfileInfo::GetPath(const BasicBlock *Src, const BasicBlock *Dest, - Path &P, unsigned Mode) { +const BasicBlock *ProfileInfoT:: + GetPath(const BasicBlock *Src, const BasicBlock *Dest, + Path &P, unsigned Mode) { const BasicBlock *BB = 0; bool hasFoundPath = false; @@ -268,7 +275,8 @@ } template<> -void ProfileInfo::divertFlow(const Edge &oldedge, const Edge &newedge) { +void ProfileInfoT:: + divertFlow(const Edge &oldedge, const Edge &newedge) { DEBUG(errs() << "Diverting " << oldedge << " via " << newedge ); // First check if the old edge was taken, if not, just delete it... @@ -302,8 +310,8 @@ /// This checks all edges of the function the blocks reside in and replaces the /// occurences of RmBB with DestBB. template<> -void ProfileInfo::replaceAllUses(const BasicBlock *RmBB, - const BasicBlock *DestBB) { +void ProfileInfoT:: + replaceAllUses(const BasicBlock *RmBB, const BasicBlock *DestBB) { DEBUG(errs() << "Replacing " << RmBB->getName() << " with " << DestBB->getName() << "\n"); const Function *F = DestBB->getParent(); @@ -352,10 +360,10 @@ /// Since its possible that there is more than one edge in the CFG from FristBB /// to SecondBB its necessary to redirect the flow proporionally. template<> -void ProfileInfo::splitEdge(const BasicBlock *FirstBB, - const BasicBlock *SecondBB, - const BasicBlock *NewBB, - bool MergeIdenticalEdges) { +void ProfileInfoT::splitEdge(const BasicBlock *FirstBB, + const BasicBlock *SecondBB, + const BasicBlock *NewBB, + bool MergeIdenticalEdges) { const Function *F = FirstBB->getParent(); std::map::iterator J = EdgeInformation.find(F); @@ -398,7 +406,8 @@ } template<> -void ProfileInfo::splitBlock(const BasicBlock *Old, const BasicBlock* New) { +void ProfileInfoT::splitBlock(const BasicBlock *Old, + const BasicBlock* New) { const Function *F = Old->getParent(); std::map::iterator J = EdgeInformation.find(F); @@ -426,8 +435,10 @@ } template<> -void ProfileInfo::splitBlock(const BasicBlock *BB, const BasicBlock* NewBB, - BasicBlock *const *Preds, unsigned NumPreds) { +void ProfileInfoT::splitBlock(const BasicBlock *BB, + const BasicBlock* NewBB, + BasicBlock *const *Preds, + unsigned NumPreds) { const Function *F = BB->getParent(); std::map::iterator J = EdgeInformation.find(F); @@ -461,7 +472,8 @@ } template<> -void ProfileInfo::transfer(const Function *Old, const Function *New) { +void ProfileInfoT::transfer(const Function *Old, + const Function *New) { DEBUG(errs() << "Replacing Function " << Old->getName() << " with " << New->getName() << "\n"); std::map::iterator J = @@ -474,8 +486,8 @@ FunctionInformation.erase(Old); } -static double readEdgeOrRemember(ProfileInfo::Edge edge, double w, ProfileInfo::Edge &tocalc, - unsigned &uncalc) { +static double readEdgeOrRemember(ProfileInfo::Edge edge, double w, + ProfileInfo::Edge &tocalc, unsigned &uncalc) { if (w == ProfileInfo::MissingValue) { tocalc = edge; uncalc++; @@ -486,7 +498,9 @@ } template<> -bool ProfileInfo::CalculateMissingEdge(const BasicBlock *BB, Edge &removed, bool assumeEmptySelf) { +bool ProfileInfoT:: + CalculateMissingEdge(const BasicBlock *BB, Edge &removed, + bool assumeEmptySelf) { Edge edgetocalc; unsigned uncalculated = 0; @@ -562,7 +576,7 @@ } template<> -bool ProfileInfo::EstimateMissingEdges(const BasicBlock *BB) { +bool ProfileInfoT::EstimateMissingEdges(const BasicBlock *BB) { bool hasNoSuccessors = false; double inWeight = 0; @@ -619,7 +633,7 @@ } template<> -void ProfileInfo::repair(const Function *F) { +void ProfileInfoT::repair(const Function *F) { // if (getExecutionCount(&(F->getEntryBlock())) == 0) { // for (Function::const_iterator FI = F->begin(), FE = F->end(); // FI != FE; ++FI) { From lhames at gmail.com Mon Dec 14 20:36:17 2009 From: lhames at gmail.com (Lang Hames) Date: Tue, 15 Dec 2009 13:36:17 +1100 Subject: [llvm-commits] [llvm] r91273 - in /llvm/trunk: include/llvm/CodeGen/CalcSpillWeights.h lib/CodeGen/CalcSpillWeights.cpp lib/CodeGen/PreAllocSplitting.cpp lib/CodeGen/RegAllocLinearScan.cpp lib/CodeGen/RegAllocPBQP.cpp lib/CodeGen/SimpleRegisterCo Message-ID: <728927c70912141836h5af5f1ey1aeb6359216ea85c@mail.gmail.com> > I think you can do without the header file. Just put a > CalculateSpillWeightsID in Passes.h instead. > I'm planning to add functionality to the pass to compute the spill weight of individual intervals. Passes using this (such as the new spiller) will need access to the interface. > > +/// Returns true if the given live interval is zero length. > > +bool CalculateSpillWeights::isZeroLengthInterval(LiveInterval *li) const > { > > Should this be a method on LiveInterval instead? > Probably. I'll move it over. :) - Lang. -------------- next part -------------- An HTML attachment was scrubbed... URL: http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20091215/a0696aec/attachment.html From evan.cheng at apple.com Mon Dec 14 21:00:32 2009 From: evan.cheng at apple.com (Evan Cheng) Date: Tue, 15 Dec 2009 03:00:32 -0000 Subject: [llvm-commits] [llvm] r91399 - in /llvm/trunk: lib/CodeGen/SelectionDAG/DAGCombiner.cpp test/CodeGen/X86/zext-shl.ll Message-ID: <200912150300.nBF30Wsv030497@zion.cs.uiuc.edu> Author: evancheng Date: Mon Dec 14 21:00:32 2009 New Revision: 91399 URL: http://llvm.org/viewvc/llvm-project?rev=91399&view=rev Log: Make 91378 more conservative. 1. Only perform (zext (shl (zext x), y)) -> (shl (zext x), y) when y is a constant. This makes sure it remove at least one zest. 2. If the shift is a left shift, make sure the original shift cannot shift out bits. Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp llvm/trunk/test/CodeGen/X86/zext-shl.ll Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=91399&r1=91398&r2=91399&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original) +++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Mon Dec 14 21:00:32 2009 @@ -3291,10 +3291,20 @@ if (SCC.getNode()) return SCC; } - // (zext (shl (zext x), y)) -> (shl (zext x), (zext y)) + // (zext (shl (zext x), cst)) -> (shl (zext x), cst) if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) && + isa(N0.getOperand(1)) && N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse()) { + if (N0.getOpcode() == ISD::SHL) { + // If the original shl may be shifting out bits, do not perform this + // transformation. + unsigned ShAmt = cast(N0.getOperand(1))->getZExtValue(); + unsigned KnownZeroBits = N0.getOperand(0).getValueType().getSizeInBits() - + N0.getOperand(0).getOperand(0).getValueType().getSizeInBits(); + if (ShAmt > KnownZeroBits) + return SDValue(); + } DebugLoc dl = N->getDebugLoc(); return DAG.getNode(N0.getOpcode(), dl, VT, DAG.getNode(ISD::ZERO_EXTEND, dl, VT, N0.getOperand(0)), Modified: llvm/trunk/test/CodeGen/X86/zext-shl.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/zext-shl.ll?rev=91399&r1=91398&r2=91399&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/X86/zext-shl.ll (original) +++ llvm/trunk/test/CodeGen/X86/zext-shl.ll Mon Dec 14 21:00:32 2009 @@ -23,16 +23,3 @@ %2 = zext i16 %1 to i32 ret i32 %2 } - -define i32 @t3(i8 zeroext %x, i8 zeroext %y) nounwind readnone ssp { -entry: -; CHECK: t3: -; CHECK: shll -; CHECK-NOT: movzwl -; CHECK: ret - %0 = zext i8 %x to i16 - %1 = zext i8 %y to i16 - %2 = shl i16 %0, %1 - %3 = zext i16 %2 to i32 - ret i32 %3 -} From evan.cheng at apple.com Mon Dec 14 21:00:51 2009 From: evan.cheng at apple.com (Evan Cheng) Date: Mon, 14 Dec 2009 19:00:51 -0800 Subject: [llvm-commits] [llvm] r91378 - in /llvm/trunk: lib/CodeGen/SelectionDAG/DAGCombiner.cpp test/CodeGen/X86/setcc.ll test/CodeGen/X86/zext-shl.ll In-Reply-To: References: <200912150041.nBF0faC8025493@zion.cs.uiuc.edu> Message-ID: Good idea. See 91399. Thanks. Evan On Dec 14, 2009, at 5:54 PM, Eli Friedman wrote: > On Mon, Dec 14, 2009 at 4:41 PM, Evan Cheng wrote: >> --- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original) >> +++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Mon Dec 14 18:41:36 2009 >> @@ -3278,6 +3278,16 @@ >> if (SCC.getNode()) return SCC; >> } >> >> + // (zext (shl (zext x), y)) -> (shl (zext x), (zext y)) >> + if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) && >> + N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND && >> + N0.hasOneUse()) { >> + DebugLoc dl = N->getDebugLoc(); >> + return DAG.getNode(N0.getOpcode(), dl, VT, >> + DAG.getNode(ISD::ZERO_EXTEND, dl, VT, N0.getOperand(0)), >> + DAG.getNode(ISD::ZERO_EXTEND, dl, VT, N0.getOperand(1))); >> + } >> + >> return SDValue(); >> } > > I think you need a check here to make sure the original shift never > shifts out any set bits. (Only happens with unusual widths, but I > think it's possible before legalization.) > > -Eli From foldr at codedgers.com Mon Dec 14 21:03:37 2009 From: foldr at codedgers.com (Mikhail Glushenkov) Date: Tue, 15 Dec 2009 03:03:37 -0000 Subject: [llvm-commits] [llvm] r91401 - /llvm/trunk/tools/llvmc/doc/LLVMC-Reference.rst Message-ID: <200912150303.nBF33bCv030653@zion.cs.uiuc.edu> Author: foldr Date: Mon Dec 14 21:03:37 2009 New Revision: 91401 URL: http://llvm.org/viewvc/llvm-project?rev=91401&view=rev Log: Small documentation update. Modified: llvm/trunk/tools/llvmc/doc/LLVMC-Reference.rst Modified: llvm/trunk/tools/llvmc/doc/LLVMC-Reference.rst URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvmc/doc/LLVMC-Reference.rst?rev=91401&r1=91400&r2=91401&view=diff ============================================================================== --- llvm/trunk/tools/llvmc/doc/LLVMC-Reference.rst (original) +++ llvm/trunk/tools/llvmc/doc/LLVMC-Reference.rst Mon Dec 14 21:03:37 2009 @@ -360,10 +360,11 @@ and ``required`` properties. - ``init`` - this option has a default value, either a string (if it is a - parameter), or a boolean (if it is a switch; boolean constants are called - ``true`` and ``false``). List options can't have this attribute. Usage - examples: ``(switch_option "foo", (init true))``; ``(prefix_option "bar", - (init "baz"))``. + parameter), or a boolean (if it is a switch; as in C++, boolean constants + are called ``true`` and ``false``). List options can't have ``init`` + attribute. + Usage examples: ``(switch_option "foo", (init true))``; ``(prefix_option + "bar", (init "baz"))``. - ``extern`` - this option is defined in some other plugin, see `below`__. From foldr at codedgers.com Mon Dec 14 21:04:02 2009 From: foldr at codedgers.com (Mikhail Glushenkov) Date: Tue, 15 Dec 2009 03:04:02 -0000 Subject: [llvm-commits] [llvm] r91402 - in /llvm/trunk: test/LLVMC/HookWithInFile.td utils/TableGen/LLVMCConfigurationEmitter.cpp Message-ID: <200912150304.nBF343NP030677@zion.cs.uiuc.edu> Author: foldr Date: Mon Dec 14 21:04:02 2009 New Revision: 91402 URL: http://llvm.org/viewvc/llvm-project?rev=91402&view=rev Log: Allow $CALL(Hook, '$INFILE') for non-join tools. Added: llvm/trunk/test/LLVMC/HookWithInFile.td Modified: llvm/trunk/utils/TableGen/LLVMCConfigurationEmitter.cpp Added: llvm/trunk/test/LLVMC/HookWithInFile.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/LLVMC/HookWithInFile.td?rev=91402&view=auto ============================================================================== --- llvm/trunk/test/LLVMC/HookWithInFile.td (added) +++ llvm/trunk/test/LLVMC/HookWithInFile.td Mon Dec 14 21:04:02 2009 @@ -0,0 +1,13 @@ +// Check that a hook can be given $INFILE as an argument. +// RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t +// RUN: grep Hook\\(inFile.c_str\\(\\)\\) %t | count 1 + +include "llvm/CompilerDriver/Common.td" + +def dummy_tool : Tool<[ +(cmd_line "$CALL(Hook, '$INFILE')/path $INFILE"), +(in_language "dummy"), +(out_language "dummy") +]>; + +def DummyGraph : CompilationGraph<[SimpleEdge<"root", "dummy_tool">]>; Modified: llvm/trunk/utils/TableGen/LLVMCConfigurationEmitter.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/utils/TableGen/LLVMCConfigurationEmitter.cpp?rev=91402&r1=91401&r2=91402&view=diff ============================================================================== --- llvm/trunk/utils/TableGen/LLVMCConfigurationEmitter.cpp (original) +++ llvm/trunk/utils/TableGen/LLVMCConfigurationEmitter.cpp Mon Dec 14 21:04:02 2009 @@ -1537,62 +1537,99 @@ } } -/// SubstituteSpecialCommands - Perform string substitution for $CALL -/// and $ENV. Helper function used by EmitCmdLineVecFill(). -StrVector::const_iterator SubstituteSpecialCommands -(StrVector::const_iterator Pos, StrVector::const_iterator End, raw_ostream& O) +/// SubstituteCall - Given "$CALL(HookName, [Arg1 [, Arg2 [...]]])", output +/// "hooks::HookName([Arg1 [, Arg2 [, ...]]])". Helper function used by +/// SubstituteSpecialCommands(). +StrVector::const_iterator +SubstituteCall (StrVector::const_iterator Pos, + StrVector::const_iterator End, + bool IsJoin, raw_ostream& O) { + const char* errorMessage = "Syntax error in $CALL invocation!"; + checkedIncrement(Pos, End, errorMessage); + const std::string& CmdName = *Pos; - const std::string& cmd = *Pos; - - if (cmd == "$CALL") { - checkedIncrement(Pos, End, "Syntax error in $CALL invocation!"); - const std::string& CmdName = *Pos; + if (CmdName == ")") + throw "$CALL invocation: empty argument list!"; - if (CmdName == ")") - throw "$CALL invocation: empty argument list!"; + O << "hooks::"; + O << CmdName << "("; - O << "hooks::"; - O << CmdName << "("; + bool firstIteration = true; + while (true) { + checkedIncrement(Pos, End, errorMessage); + const std::string& Arg = *Pos; + assert(Arg.size() != 0); - bool firstIteration = true; - while (true) { - checkedIncrement(Pos, End, "Syntax error in $CALL invocation!"); - const std::string& Arg = *Pos; - assert(Arg.size() != 0); + if (Arg[0] == ')') + break; - if (Arg[0] == ')') - break; + if (firstIteration) + firstIteration = false; + else + O << ", "; - if (firstIteration) - firstIteration = false; + if (Arg == "$INFILE") { + if (IsJoin) + throw "$CALL(Hook, $INFILE) can't be used with a Join tool!"; else - O << ", "; - + O << "inFile.c_str()"; + } + else { O << '"' << Arg << '"'; } + } - O << ')'; + O << ')'; - } - else if (cmd == "$ENV") { - checkedIncrement(Pos, End, "Syntax error in $ENV invocation!"); - const std::string& EnvName = *Pos; + return Pos; +} - if (EnvName == ")") - throw "$ENV invocation: empty argument list!"; +/// SubstituteEnv - Given '$ENV(VAR_NAME)', output 'getenv("VAR_NAME")'. Helper +/// function used by SubstituteSpecialCommands(). +StrVector::const_iterator +SubstituteEnv (StrVector::const_iterator Pos, + StrVector::const_iterator End, raw_ostream& O) +{ + const char* errorMessage = "Syntax error in $ENV invocation!"; + checkedIncrement(Pos, End, errorMessage); + const std::string& EnvName = *Pos; + + if (EnvName == ")") + throw "$ENV invocation: empty argument list!"; + + O << "checkCString(std::getenv(\""; + O << EnvName; + O << "\"))"; + + checkedIncrement(Pos, End, errorMessage); + + return Pos; +} - O << "checkCString(std::getenv(\""; - O << EnvName; - O << "\"))"; +/// SubstituteSpecialCommands - Given an invocation of $CALL or $ENV, output +/// handler code. Helper function used by EmitCmdLineVecFill(). +StrVector::const_iterator +SubstituteSpecialCommands (StrVector::const_iterator Pos, + StrVector::const_iterator End, + bool IsJoin, raw_ostream& O) +{ - checkedIncrement(Pos, End, "Syntax error in $ENV invocation!"); + const std::string& cmd = *Pos; + + // Perform substitution. + if (cmd == "$CALL") { + Pos = SubstituteCall(Pos, End, IsJoin, O); + } + else if (cmd == "$ENV") { + Pos = SubstituteEnv(Pos, End, O); } else { throw "Unknown special command: " + cmd; } + // Handle '$CMD(ARG)/additional/text'. const std::string& Leftover = *Pos; assert(Leftover.at(0) == ')'); if (Leftover.size() != 1) @@ -1652,7 +1689,7 @@ } else { O << "vec.push_back("; - I = SubstituteSpecialCommands(I, E, O); + I = SubstituteSpecialCommands(I, E, IsJoin, O); O << ");\n"; } } @@ -1665,7 +1702,7 @@ O.indent(IndentLevel) << "cmd = "; if (StrVec[0][0] == '$') - SubstituteSpecialCommands(StrVec.begin(), StrVec.end(), O); + SubstituteSpecialCommands(StrVec.begin(), StrVec.end(), IsJoin, O); else O << '"' << StrVec[0] << '"'; O << ";\n"; From foldr at codedgers.com Mon Dec 14 21:04:14 2009 From: foldr at codedgers.com (Mikhail Glushenkov) Date: Tue, 15 Dec 2009 03:04:14 -0000 Subject: [llvm-commits] [llvm] r91403 - /llvm/trunk/test/LLVMC/Init.td Message-ID: <200912150304.nBF34EB2030697@zion.cs.uiuc.edu> Author: foldr Date: Mon Dec 14 21:04:14 2009 New Revision: 91403 URL: http://llvm.org/viewvc/llvm-project?rev=91403&view=rev Log: Pipe 'grep' output to 'count'. Modified: llvm/trunk/test/LLVMC/Init.td Modified: llvm/trunk/test/LLVMC/Init.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/LLVMC/Init.td?rev=91403&r1=91402&r2=91403&view=diff ============================================================================== --- llvm/trunk/test/LLVMC/Init.td (original) +++ llvm/trunk/test/LLVMC/Init.td Mon Dec 14 21:04:14 2009 @@ -1,7 +1,7 @@ // Check that (init true/false) and (init "str") work. // RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t -// RUN: grep cl::init("some-string") %t -// RUN: grep cl::init(true) %t +// RUN: grep cl::init(\\"some-string\\") %t | count 1 +// RUN: grep cl::init(true) %t | count 1 include "llvm/CompilerDriver/Common.td" From foldr at codedgers.com Mon Dec 14 21:04:52 2009 From: foldr at codedgers.com (Mikhail Glushenkov) Date: Tue, 15 Dec 2009 03:04:52 -0000 Subject: [llvm-commits] [llvm] r91404 - in /llvm/trunk: include/llvm/CompilerDriver/ test/LLVMC/ tools/llvmc/doc/ utils/TableGen/ Message-ID: <200912150304.nBF34rPa030741@zion.cs.uiuc.edu> Author: foldr Date: Mon Dec 14 21:04:52 2009 New Revision: 91404 URL: http://llvm.org/viewvc/llvm-project?rev=91404&view=rev Log: Validate the generated C++ code in llvmc tests. Checks that the code generated by 'tblgen --emit-llvmc' can be actually compiled. Also fixes two bugs found in this way: - forward_transformed_value didn't work with non-list arguments - cl::ZeroOrOne is now called cl::Optional Modified: llvm/trunk/include/llvm/CompilerDriver/Common.td llvm/trunk/test/LLVMC/EmptyCompilationGraph.td llvm/trunk/test/LLVMC/EnvParentheses.td llvm/trunk/test/LLVMC/ExternOptions.td llvm/trunk/test/LLVMC/ForwardAs.td llvm/trunk/test/LLVMC/ForwardTransformedValue.td llvm/trunk/test/LLVMC/ForwardValue.td llvm/trunk/test/LLVMC/HookWithArguments.td llvm/trunk/test/LLVMC/HookWithInFile.td llvm/trunk/test/LLVMC/Init.td llvm/trunk/test/LLVMC/MultiValuedOption.td llvm/trunk/test/LLVMC/MultipleCompilationGraphs.td llvm/trunk/test/LLVMC/NoActions.td llvm/trunk/test/LLVMC/NoCompilationGraph.td llvm/trunk/test/LLVMC/OneOrMore.td llvm/trunk/test/LLVMC/OptionPreprocessor.td llvm/trunk/test/LLVMC/TestWarnings.td llvm/trunk/tools/llvmc/doc/LLVMC-Reference.rst llvm/trunk/utils/TableGen/LLVMCConfigurationEmitter.cpp Modified: llvm/trunk/include/llvm/CompilerDriver/Common.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CompilerDriver/Common.td?rev=91404&r1=91403&r2=91404&view=diff ============================================================================== --- llvm/trunk/include/llvm/CompilerDriver/Common.td (original) +++ llvm/trunk/include/llvm/CompilerDriver/Common.td Mon Dec 14 21:04:52 2009 @@ -42,9 +42,9 @@ def init; def multi_val; def one_or_more; +def optional; def really_hidden; def required; -def zero_or_one; def comma_separated; // The 'case' construct. Modified: llvm/trunk/test/LLVMC/EmptyCompilationGraph.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/LLVMC/EmptyCompilationGraph.td?rev=91404&r1=91403&r2=91404&view=diff ============================================================================== --- llvm/trunk/test/LLVMC/EmptyCompilationGraph.td (original) +++ llvm/trunk/test/LLVMC/EmptyCompilationGraph.td Mon Dec 14 21:04:52 2009 @@ -1,5 +1,6 @@ // Check that the compilation graph can be empty. -// RUN: tblgen -I %p/../../include --gen-llvmc %s +// RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t +// RUN: %compile_cxx -fexceptions -x c++ %t include "llvm/CompilerDriver/Common.td" Modified: llvm/trunk/test/LLVMC/EnvParentheses.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/LLVMC/EnvParentheses.td?rev=91404&r1=91403&r2=91404&view=diff ============================================================================== --- llvm/trunk/test/LLVMC/EnvParentheses.td (original) +++ llvm/trunk/test/LLVMC/EnvParentheses.td Mon Dec 14 21:04:52 2009 @@ -2,6 +2,7 @@ // http://llvm.org/bugs/show_bug.cgi?id=4157 // RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t // RUN: not grep {)));} %t +// RUN: %compile_cxx -fexceptions -x c++ %t include "llvm/CompilerDriver/Common.td" Modified: llvm/trunk/test/LLVMC/ExternOptions.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/LLVMC/ExternOptions.td?rev=91404&r1=91403&r2=91404&view=diff ============================================================================== --- llvm/trunk/test/LLVMC/ExternOptions.td (original) +++ llvm/trunk/test/LLVMC/ExternOptions.td Mon Dec 14 21:04:52 2009 @@ -2,6 +2,7 @@ // The dummy tool and graph are required to silence warnings. // RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t // RUN: grep {extern .* AutoGeneratedSwitch_Wall} %t +// RUN: %compile_cxx -fexceptions -x c++ %t include "llvm/CompilerDriver/Common.td" Modified: llvm/trunk/test/LLVMC/ForwardAs.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/LLVMC/ForwardAs.td?rev=91404&r1=91403&r2=91404&view=diff ============================================================================== --- llvm/trunk/test/LLVMC/ForwardAs.td (original) +++ llvm/trunk/test/LLVMC/ForwardAs.td Mon Dec 14 21:04:52 2009 @@ -2,6 +2,7 @@ // http://llvm.org/bugs/show_bug.cgi?id=4159 // RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t // RUN: grep unique_name %t +// RUN: %compile_cxx -fexceptions -x c++ %t include "llvm/CompilerDriver/Common.td" Modified: llvm/trunk/test/LLVMC/ForwardTransformedValue.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/LLVMC/ForwardTransformedValue.td?rev=91404&r1=91403&r2=91404&view=diff ============================================================================== --- llvm/trunk/test/LLVMC/ForwardTransformedValue.td (original) +++ llvm/trunk/test/LLVMC/ForwardTransformedValue.td Mon Dec 14 21:04:52 2009 @@ -1,8 +1,9 @@ // Check that forward_transformed_value works. // The dummy tool and graph are required to silence warnings. // RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t -// RUN: grep HookA %t -// RUN: grep HookB %t +// RUN: grep HookA %t | count 2 +// RUN: grep HookB %t | count 2 +// RUN: %compile_cxx -fexceptions -x c++ %t include "llvm/CompilerDriver/Common.td" Modified: llvm/trunk/test/LLVMC/ForwardValue.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/LLVMC/ForwardValue.td?rev=91404&r1=91403&r2=91404&view=diff ============================================================================== --- llvm/trunk/test/LLVMC/ForwardValue.td (original) +++ llvm/trunk/test/LLVMC/ForwardValue.td Mon Dec 14 21:04:52 2009 @@ -3,6 +3,7 @@ // RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t // RUN: grep {vec.push_back\(AutoGeneratedParameter_a\)} %t // RUN: grep {std::copy\(AutoGeneratedList_b.begin\(\)} %t +// RUN: %compile_cxx -fexceptions -x c++ %t include "llvm/CompilerDriver/Common.td" Modified: llvm/trunk/test/LLVMC/HookWithArguments.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/LLVMC/HookWithArguments.td?rev=91404&r1=91403&r2=91404&view=diff ============================================================================== --- llvm/trunk/test/LLVMC/HookWithArguments.td (original) +++ llvm/trunk/test/LLVMC/HookWithArguments.td Mon Dec 14 21:04:52 2009 @@ -4,6 +4,7 @@ // RUN: grep "/path" %t | count 1 // RUN: grep "VARIABLE" %t | count 1 // RUN: grep "/2path" %t | count 1 +// RUN: %compile_cxx -fexceptions -x c++ %t include "llvm/CompilerDriver/Common.td" Modified: llvm/trunk/test/LLVMC/HookWithInFile.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/LLVMC/HookWithInFile.td?rev=91404&r1=91403&r2=91404&view=diff ============================================================================== --- llvm/trunk/test/LLVMC/HookWithInFile.td (original) +++ llvm/trunk/test/LLVMC/HookWithInFile.td Mon Dec 14 21:04:52 2009 @@ -1,6 +1,7 @@ // Check that a hook can be given $INFILE as an argument. // RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t // RUN: grep Hook\\(inFile.c_str\\(\\)\\) %t | count 1 +// RUN: %compile_cxx -fexceptions -x c++ %t include "llvm/CompilerDriver/Common.td" Modified: llvm/trunk/test/LLVMC/Init.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/LLVMC/Init.td?rev=91404&r1=91403&r2=91404&view=diff ============================================================================== --- llvm/trunk/test/LLVMC/Init.td (original) +++ llvm/trunk/test/LLVMC/Init.td Mon Dec 14 21:04:52 2009 @@ -2,6 +2,7 @@ // RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t // RUN: grep cl::init(\\"some-string\\") %t | count 1 // RUN: grep cl::init(true) %t | count 1 +// RUN: %compile_cxx -fexceptions -x c++ %t include "llvm/CompilerDriver/Common.td" Modified: llvm/trunk/test/LLVMC/MultiValuedOption.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/LLVMC/MultiValuedOption.td?rev=91404&r1=91403&r2=91404&view=diff ============================================================================== --- llvm/trunk/test/LLVMC/MultiValuedOption.td (original) +++ llvm/trunk/test/LLVMC/MultiValuedOption.td Mon Dec 14 21:04:52 2009 @@ -2,6 +2,7 @@ // The dummy tool and graph are required to silence warnings. // RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t // RUN: grep cl::multi_val(2) %t | count 1 +// RUN: %compile_cxx -fexceptions -x c++ %t include "llvm/CompilerDriver/Common.td" Modified: llvm/trunk/test/LLVMC/MultipleCompilationGraphs.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/LLVMC/MultipleCompilationGraphs.td?rev=91404&r1=91403&r2=91404&view=diff ============================================================================== --- llvm/trunk/test/LLVMC/MultipleCompilationGraphs.td (original) +++ llvm/trunk/test/LLVMC/MultipleCompilationGraphs.td Mon Dec 14 21:04:52 2009 @@ -1,5 +1,6 @@ // Check that multiple compilation graphs are allowed. -// RUN: tblgen -I %p/../../include --gen-llvmc %s +// RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t +// RUN: %compile_cxx -fexceptions -x c++ %t include "llvm/CompilerDriver/Common.td" Modified: llvm/trunk/test/LLVMC/NoActions.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/LLVMC/NoActions.td?rev=91404&r1=91403&r2=91404&view=diff ============================================================================== --- llvm/trunk/test/LLVMC/NoActions.td (original) +++ llvm/trunk/test/LLVMC/NoActions.td Mon Dec 14 21:04:52 2009 @@ -1,5 +1,7 @@ // Check that tools without associated actions are accepted. -// RUN: tblgen -I %p/../../include --gen-llvmc %s | grep dummy_tool +// RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t +// RUN: grep dummy_tool %t +// RUN: %compile_cxx -fexceptions -x c++ %t include "llvm/CompilerDriver/Common.td" Modified: llvm/trunk/test/LLVMC/NoCompilationGraph.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/LLVMC/NoCompilationGraph.td?rev=91404&r1=91403&r2=91404&view=diff ============================================================================== --- llvm/trunk/test/LLVMC/NoCompilationGraph.td (original) +++ llvm/trunk/test/LLVMC/NoCompilationGraph.td Mon Dec 14 21:04:52 2009 @@ -1,4 +1,5 @@ // Check that the compilation graph is not required. -// RUN: tblgen -I %p/../../include --gen-llvmc %s +// RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t +// RUN: %compile_cxx -fexceptions -x c++ %t include "llvm/CompilerDriver/Common.td" Modified: llvm/trunk/test/LLVMC/OneOrMore.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/LLVMC/OneOrMore.td?rev=91404&r1=91403&r2=91404&view=diff ============================================================================== --- llvm/trunk/test/LLVMC/OneOrMore.td (original) +++ llvm/trunk/test/LLVMC/OneOrMore.td Mon Dec 14 21:04:52 2009 @@ -1,14 +1,15 @@ // Check that (one_or_more) and (zero_or_one) properties work. // The dummy tool and graph are required to silence warnings. // RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t -// RUN: grep cl::ZeroOrOne %t | count 1 +// RUN: grep cl::Optional %t | count 1 // RUN: grep cl::OneOrMore %t | count 1 +// RUN: %compile_cxx -fexceptions -x c++ %t include "llvm/CompilerDriver/Common.td" def OptList : OptionList<[ (prefix_list_option "foo", (one_or_more)), - (parameter_list_option "baz", (zero_or_one))]>; + (parameter_list_option "baz", (optional))]>; def dummy_tool : Tool<[ (cmd_line "dummy_cmd $INFILE"), Modified: llvm/trunk/test/LLVMC/OptionPreprocessor.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/LLVMC/OptionPreprocessor.td?rev=91404&r1=91403&r2=91404&view=diff ============================================================================== --- llvm/trunk/test/LLVMC/OptionPreprocessor.td (original) +++ llvm/trunk/test/LLVMC/OptionPreprocessor.td Mon Dec 14 21:04:52 2009 @@ -3,6 +3,7 @@ // RUN: grep W1 %t // RUN: grep W2 %t // RUN: grep W3 %t +// RUN: %compile_cxx -fexceptions -x c++ %t include "llvm/CompilerDriver/Common.td" Modified: llvm/trunk/test/LLVMC/TestWarnings.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/LLVMC/TestWarnings.td?rev=91404&r1=91403&r2=91404&view=diff ============================================================================== --- llvm/trunk/test/LLVMC/TestWarnings.td (original) +++ llvm/trunk/test/LLVMC/TestWarnings.td Mon Dec 14 21:04:52 2009 @@ -1,4 +1,4 @@ -// Check that the compiler warns about unused options. +// Check that warnings about unused options are really emitted. // This should fail because the output is printed on stderr. // RUN: ignore tblgen -I %p/../../include --gen-llvmc %s |& grep "option '-Wall' has no effect!" Modified: llvm/trunk/tools/llvmc/doc/LLVMC-Reference.rst URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvmc/doc/LLVMC-Reference.rst?rev=91404&r1=91403&r2=91404&view=diff ============================================================================== --- llvm/trunk/tools/llvmc/doc/LLVMC-Reference.rst (original) +++ llvm/trunk/tools/llvmc/doc/LLVMC-Reference.rst Mon Dec 14 21:04:52 2009 @@ -336,8 +336,8 @@ it is synonymous with ``required``. Incompatible with ``required`` and ``zero_or_one``. - - ``zero_or_one`` - the option can be specified zero or one times. Useful - only for list options in conjunction with ``multi_val``. Incompatible with + - ``optional`` - the option can be specified zero or one times. Useful only + for list options in conjunction with ``multi_val``. Incompatible with ``required`` and ``one_or_more``. - ``hidden`` - the description of this option will not appear in @@ -356,7 +356,7 @@ - ``multi_val n`` - this option takes *n* arguments (can be useful in some special cases). Usage example: ``(parameter_list_option "foo", (multi_val 3))``; the command-line syntax is '-foo a b c'. Only list options can have - this attribute; you can, however, use the ``one_or_more``, ``zero_or_one`` + this attribute; you can, however, use the ``one_or_more``, ``optional`` and ``required`` properties. - ``init`` - this option has a default value, either a string (if it is a Modified: llvm/trunk/utils/TableGen/LLVMCConfigurationEmitter.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/utils/TableGen/LLVMCConfigurationEmitter.cpp?rev=91404&r1=91403&r2=91404&view=diff ============================================================================== --- llvm/trunk/utils/TableGen/LLVMCConfigurationEmitter.cpp (original) +++ llvm/trunk/utils/TableGen/LLVMCConfigurationEmitter.cpp Mon Dec 14 21:04:52 2009 @@ -211,7 +211,7 @@ namespace OptionDescriptionFlags { enum OptionDescriptionFlags { Required = 0x1, Hidden = 0x2, ReallyHidden = 0x4, Extern = 0x8, - OneOrMore = 0x10, ZeroOrOne = 0x20, + OneOrMore = 0x10, Optional = 0x20, CommaSeparated = 0x40 }; } @@ -260,8 +260,8 @@ bool isOneOrMore() const; void setOneOrMore(); - bool isZeroOrOne() const; - void setZeroOrOne(); + bool isOptional() const; + void setOptional(); bool isHidden() const; void setHidden(); @@ -331,11 +331,11 @@ Flags |= OptionDescriptionFlags::OneOrMore; } -bool OptionDescription::isZeroOrOne() const { - return Flags & OptionDescriptionFlags::ZeroOrOne; +bool OptionDescription::isOptional() const { + return Flags & OptionDescriptionFlags::Optional; } -void OptionDescription::setZeroOrOne() { - Flags |= OptionDescriptionFlags::ZeroOrOne; +void OptionDescription::setOptional() { + Flags |= OptionDescriptionFlags::Optional; } bool OptionDescription::isHidden() const { @@ -548,7 +548,7 @@ AddHandler("one_or_more", &CollectOptionProperties::onOneOrMore); AddHandler("really_hidden", &CollectOptionProperties::onReallyHidden); AddHandler("required", &CollectOptionProperties::onRequired); - AddHandler("zero_or_one", &CollectOptionProperties::onZeroOrOne); + AddHandler("optional", &CollectOptionProperties::onOptional); AddHandler("comma_separated", &CollectOptionProperties::onCommaSeparated); staticMembersInitialized_ = true; @@ -595,8 +595,8 @@ void onRequired (const DagInit* d) { checkNumberOfArguments(d, 0); - if (optDesc_.isOneOrMore() || optDesc_.isZeroOrOne()) - throw "Only one of (required), (zero_or_one) or " + if (optDesc_.isOneOrMore() || optDesc_.isOptional()) + throw "Only one of (required), (optional) or " "(one_or_more) properties is allowed!"; optDesc_.setRequired(); } @@ -617,8 +617,8 @@ void onOneOrMore (const DagInit* d) { checkNumberOfArguments(d, 0); - if (optDesc_.isRequired() || optDesc_.isZeroOrOne()) - throw "Only one of (required), (zero_or_one) or " + if (optDesc_.isRequired() || optDesc_.isOptional()) + throw "Only one of (required), (optional) or " "(one_or_more) properties is allowed!"; if (!OptionType::IsList(optDesc_.Type)) llvm::errs() << "Warning: specifying the 'one_or_more' property " @@ -626,15 +626,15 @@ optDesc_.setOneOrMore(); } - void onZeroOrOne (const DagInit* d) { + void onOptional (const DagInit* d) { checkNumberOfArguments(d, 0); if (optDesc_.isRequired() || optDesc_.isOneOrMore()) - throw "Only one of (required), (zero_or_one) or " + throw "Only one of (required), (optional) or " "(one_or_more) properties is allowed!"; if (!OptionType::IsList(optDesc_.Type)) - llvm::errs() << "Warning: specifying the 'zero_or_one' property" + llvm::errs() << "Warning: specifying the 'optional' property" "on a non-list option will have no effect.\n"; - optDesc_.setZeroOrOne(); + optDesc_.setOptional(); } void onMultiVal (const DagInit* d) { @@ -1882,7 +1882,8 @@ const OptionDescription& D = OptDescs.FindListOrParameter(Name); O.indent(IndentLevel) << "vec.push_back(" << "hooks::" - << Hook << "(" << D.GenVariableName() << "));\n"; + << Hook << "(" << D.GenVariableName() + << (D.isParameter() ? ".c_str()" : "") << "));\n"; } @@ -2211,8 +2212,8 @@ else if (val.isOneOrMore() && val.isList()) { O << ", cl::OneOrMore"; } - else if (val.isZeroOrOne() && val.isList()) { - O << ", cl::ZeroOrOne"; + else if (val.isOptional() && val.isList()) { + O << ", cl::Optional"; } if (val.isReallyHidden()) From evan.cheng at apple.com Mon Dec 14 21:07:11 2009 From: evan.cheng at apple.com (Evan Cheng) Date: Tue, 15 Dec 2009 03:07:11 -0000 Subject: [llvm-commits] [llvm] r91405 - in /llvm/trunk: lib/Target/X86/X86ISelLowering.cpp test/CodeGen/X86/setcc.ll Message-ID: <200912150307.nBF37Btj030812@zion.cs.uiuc.edu> Author: evancheng Date: Mon Dec 14 21:07:11 2009 New Revision: 91405 URL: http://llvm.org/viewvc/llvm-project?rev=91405&view=rev Log: Disable 91381 for now. It's miscompiling ARMISelDAG2DAG.cpp. Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp llvm/trunk/test/CodeGen/X86/setcc.ll Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=91405&r1=91404&r2=91405&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original) +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Mon Dec 14 21:07:11 2009 @@ -5752,7 +5752,9 @@ SDValue Cond = EmitCmp(Op0, Op1, X86CC, DAG); // Use sbb x, x to materialize carry bit into a GPR. - if (X86CC == X86::COND_B) { + // FIXME: Temporarily disabled since it breaks self-hosting. It's apparently + // miscompiling ARMISelDAGToDAG.cpp. + if (0 && !isFP && X86CC == X86::COND_B) { return DAG.getNode(ISD::AND, dl, MVT::i8, DAG.getNode(X86ISD::SETCC_CARRY, dl, MVT::i8, DAG.getConstant(X86CC, MVT::i8), Cond), Modified: llvm/trunk/test/CodeGen/X86/setcc.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/setcc.ll?rev=91405&r1=91404&r2=91405&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/X86/setcc.ll (original) +++ llvm/trunk/test/CodeGen/X86/setcc.ll Mon Dec 14 21:07:11 2009 @@ -1,4 +1,5 @@ ; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s +; XFAIL: * ; rdar://7329206 ; Use sbb x, x to materialize carry bit in a GPR. The value is either From rjmccall at apple.com Mon Dec 14 21:10:26 2009 From: rjmccall at apple.com (John McCall) Date: Tue, 15 Dec 2009 03:10:26 -0000 Subject: [llvm-commits] [llvm] r91407 - /llvm/trunk/include/llvm/ADT/ilist.h Message-ID: <200912150310.nBF3AR5P030923@zion.cs.uiuc.edu> Author: rjmccall Date: Mon Dec 14 21:10:26 2009 New Revision: 91407 URL: http://llvm.org/viewvc/llvm-project?rev=91407&view=rev Log: Names from dependent base classes are not found by unqualified lookup. Modified: llvm/trunk/include/llvm/ADT/ilist.h Modified: llvm/trunk/include/llvm/ADT/ilist.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/ADT/ilist.h?rev=91407&r1=91406&r2=91407&view=diff ============================================================================== --- llvm/trunk/include/llvm/ADT/ilist.h (original) +++ llvm/trunk/include/llvm/ADT/ilist.h Mon Dec 14 21:10:26 2009 @@ -643,7 +643,7 @@ // Main implementation here - Insert for a node passed by value... iterator insert(iterator where, const NodeTy &val) { - return insert(where, createNode(val)); + return insert(where, this->createNode(val)); } From kennethuil at gmail.com Mon Dec 14 21:27:52 2009 From: kennethuil at gmail.com (Kenneth Uildriks) Date: Tue, 15 Dec 2009 03:27:52 -0000 Subject: [llvm-commits] [llvm] r91410 - in /llvm/trunk: lib/Target/X86/X86CallingConv.td test/CodeGen/X86/fastcc3struct.ll Message-ID: <200912150327.nBF3RqRc031581@zion.cs.uiuc.edu> Author: kennethuil Date: Mon Dec 14 21:27:52 2009 New Revision: 91410 URL: http://llvm.org/viewvc/llvm-project?rev=91410&view=rev Log: For fastcc on x86, let ECX be used as a return register after EAX and EDX Added: llvm/trunk/test/CodeGen/X86/fastcc3struct.ll Modified: llvm/trunk/lib/Target/X86/X86CallingConv.td Modified: llvm/trunk/lib/Target/X86/X86CallingConv.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86CallingConv.td?rev=91410&r1=91409&r2=91410&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86CallingConv.td (original) +++ llvm/trunk/lib/Target/X86/X86CallingConv.td Mon Dec 14 21:27:52 2009 @@ -64,11 +64,18 @@ // X86-32 FastCC return-value convention. def RetCC_X86_32_Fast : CallingConv<[ // The X86-32 fastcc returns 1, 2, or 3 FP values in XMM0-2 if the target has - // SSE2, otherwise it is the the C calling conventions. + // SSE2. // This can happen when a float, 2 x float, or 3 x float vector is split by // target lowering, and is returned in 1-3 sse regs. CCIfType<[f32], CCIfSubtarget<"hasSSE2()", CCAssignToReg<[XMM0,XMM1,XMM2]>>>, CCIfType<[f64], CCIfSubtarget<"hasSSE2()", CCAssignToReg<[XMM0,XMM1,XMM2]>>>, + + // For integers, ECX can be used as an extra return register + CCIfType<[i8], CCAssignToReg<[AL, DL, CL]>>, + CCIfType<[i16], CCAssignToReg<[AX, DX, CX]>>, + CCIfType<[i32], CCAssignToReg<[EAX, EDX, ECX]>>, + + // Otherwise, it is the same as the common X86 calling convention. CCDelegateTo ]>; Added: llvm/trunk/test/CodeGen/X86/fastcc3struct.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/fastcc3struct.ll?rev=91410&view=auto ============================================================================== --- llvm/trunk/test/CodeGen/X86/fastcc3struct.ll (added) +++ llvm/trunk/test/CodeGen/X86/fastcc3struct.ll Mon Dec 14 21:27:52 2009 @@ -0,0 +1,15 @@ +; RUN: llc < %s -march=x86 -o %t +; RUN: grep "movl .48, %ecx" %t +; RUN: grep "movl .24, %edx" %t +; RUN: grep "movl .12, %eax" %t + +%0 = type { i32, i32, i32 } + +define internal fastcc %0 @ReturnBigStruct() nounwind readnone { +entry: + %0 = insertvalue %0 zeroinitializer, i32 12, 0 + %1 = insertvalue %0 %0, i32 24, 1 + %2 = insertvalue %0 %1, i32 48, 2 + ret %0 %2 +} + From clattner at apple.com Mon Dec 14 23:45:35 2009 From: clattner at apple.com (Chris Lattner) Date: Mon, 14 Dec 2009 21:45:35 -0800 Subject: [llvm-commits] [llvm] r91378 - in /llvm/trunk: lib/CodeGen/SelectionDAG/DAGCombiner.cpp test/CodeGen/X86/setcc.ll test/CodeGen/X86/zext-shl.ll In-Reply-To: References: <200912150041.nBF0faC8025493@zion.cs.uiuc.edu> <6ED59F80-2FD1-4C09-8325-BD4ED7C90EBE@apple.com> Message-ID: <36B43602-B224-444F-AF93-AFA3CF30E539@apple.com> On Dec 14, 2009, at 5:49 PM, Evan Cheng wrote: >>>> >>>> Is this really profitable in the general case? It seems that this should only be done if at least one of the inputs to the shift will fold away. Adding a nice "WouldFoldAZExt" predicate that would return true for zext/trunc would make sense, no? >>> >>> Are there cases where (zext (zext)) would not fold into a single zext? >> >> No, I'm worried that you're turning something like: >> >> zext(shr(call, call)) -> shr(zext(call), zext(call)) >> >> which introduces two zexts and deletes one. > > No. The transformation only happens when one of the shift's operand is a zext. Oh ok! -Chris From clattner at apple.com Mon Dec 14 23:46:11 2009 From: clattner at apple.com (Chris Lattner) Date: Mon, 14 Dec 2009 21:46:11 -0800 Subject: [llvm-commits] [llvm] r91397 - /llvm/trunk/lib/Analysis/ProfileInfo.cpp In-Reply-To: <200912150235.nBF2ZPTE029607@zion.cs.uiuc.edu> References: <200912150235.nBF2ZPTE029607@zion.cs.uiuc.edu> Message-ID: <9367CAC4-5D67-4A50-A4F2-C4BE12F88A4C@apple.com> On Dec 14, 2009, at 6:35 PM, John McCall wrote: > Author: rjmccall > Date: Mon Dec 14 20:35:24 2009 > New Revision: 91397 > > URL: http://llvm.org/viewvc/llvm-project?rev=91397&view=rev > Log: > You can't use typedefs to declare template member specializations, and > clang enforces it. Does clang produce a good diagnostic (with a fixit hint) for this error? -Chris > > > Modified: > llvm/trunk/lib/Analysis/ProfileInfo.cpp > > Modified: llvm/trunk/lib/Analysis/ProfileInfo.cpp > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/ProfileInfo.cpp?rev=91397&r1=91396&r2=91397&view=diff > > ============================================================================== > --- llvm/trunk/lib/Analysis/ProfileInfo.cpp (original) > +++ llvm/trunk/lib/Analysis/ProfileInfo.cpp Mon Dec 14 20:35:24 2009 > @@ -44,19 +44,19 @@ > } > > template<> > -char ProfileInfo::ID = 0; > +char ProfileInfoT::ID = 0; > > template<> > -char MachineProfileInfo::ID = 0; > +char ProfileInfoT::ID = 0; > > template<> > -const double ProfileInfo::MissingValue = -1; > +const double ProfileInfoT::MissingValue = -1; > > -template<> > -const double MachineProfileInfo::MissingValue = -1; > +template<> const > +double ProfileInfoT::MissingValue = -1; > > -template<> > -double ProfileInfo::getExecutionCount(const BasicBlock *BB) { > +template<> double > +ProfileInfoT::getExecutionCount(const BasicBlock *BB) { > std::map::iterator J = > BlockInformation.find(BB->getParent()); > if (J != BlockInformation.end()) { > @@ -118,7 +118,8 @@ > } > > template<> > -double MachineProfileInfo::getExecutionCount(const MachineBasicBlock *MBB) { > +double ProfileInfoT:: > + getExecutionCount(const MachineBasicBlock *MBB) { > std::map::iterator J = > BlockInformation.find(MBB->getParent()); > if (J != BlockInformation.end()) { > @@ -131,7 +132,7 @@ > } > > template<> > -double ProfileInfo::getExecutionCount(const Function *F) { > +double ProfileInfoT::getExecutionCount(const Function *F) { > std::map::iterator J = > FunctionInformation.find(F); > if (J != FunctionInformation.end()) > @@ -147,7 +148,8 @@ > } > > template<> > -double MachineProfileInfo::getExecutionCount(const MachineFunction *MF) { > +double ProfileInfoT:: > + getExecutionCount(const MachineFunction *MF) { > std::map::iterator J = > FunctionInformation.find(MF); > if (J != FunctionInformation.end()) > @@ -159,21 +161,23 @@ > } > > template<> > -void ProfileInfo::setExecutionCount(const BasicBlock *BB, double w) { > +void ProfileInfoT:: > + setExecutionCount(const BasicBlock *BB, double w) { > DEBUG(errs() << "Creating Block " << BB->getName() > << " (weight: " << format("%.20g",w) << ")\n"); > BlockInformation[BB->getParent()][BB] = w; > } > > template<> > -void MachineProfileInfo::setExecutionCount(const MachineBasicBlock *MBB, double w) { > +void ProfileInfoT:: > + setExecutionCount(const MachineBasicBlock *MBB, double w) { > DEBUG(errs() << "Creating Block " << MBB->getBasicBlock()->getName() > << " (weight: " << format("%.20g",w) << ")\n"); > BlockInformation[MBB->getParent()][MBB] = w; > } > > template<> > -void ProfileInfo::addEdgeWeight(Edge e, double w) { > +void ProfileInfoT::addEdgeWeight(Edge e, double w) { > double oldw = getEdgeWeight(e); > assert (oldw != MissingValue && "Adding weight to Edge with no previous weight"); > DEBUG(errs() << "Adding to Edge " << e > @@ -182,7 +186,8 @@ > } > > template<> > -void ProfileInfo::addExecutionCount(const BasicBlock *BB, double w) { > +void ProfileInfoT:: > + addExecutionCount(const BasicBlock *BB, double w) { > double oldw = getExecutionCount(BB); > assert (oldw != MissingValue && "Adding weight to Block with no previous weight"); > DEBUG(errs() << "Adding to Block " << BB->getName() > @@ -191,7 +196,7 @@ > } > > template<> > -void ProfileInfo::removeBlock(const BasicBlock *BB) { > +void ProfileInfoT::removeBlock(const BasicBlock *BB) { > std::map::iterator J = > BlockInformation.find(BB->getParent()); > if (J == BlockInformation.end()) return; > @@ -201,7 +206,7 @@ > } > > template<> > -void ProfileInfo::removeEdge(Edge e) { > +void ProfileInfoT::removeEdge(Edge e) { > std::map::iterator J = > EdgeInformation.find(getFunction(e)); > if (J == EdgeInformation.end()) return; > @@ -211,7 +216,8 @@ > } > > template<> > -void ProfileInfo::replaceEdge(const Edge &oldedge, const Edge &newedge) { > +void ProfileInfoT:: > + replaceEdge(const Edge &oldedge, const Edge &newedge) { > double w; > if ((w = getEdgeWeight(newedge)) == MissingValue) { > w = getEdgeWeight(oldedge); > @@ -225,8 +231,9 @@ > } > > template<> > -const BasicBlock *ProfileInfo::GetPath(const BasicBlock *Src, const BasicBlock *Dest, > - Path &P, unsigned Mode) { > +const BasicBlock *ProfileInfoT:: > + GetPath(const BasicBlock *Src, const BasicBlock *Dest, > + Path &P, unsigned Mode) { > const BasicBlock *BB = 0; > bool hasFoundPath = false; > > @@ -268,7 +275,8 @@ > } > > template<> > -void ProfileInfo::divertFlow(const Edge &oldedge, const Edge &newedge) { > +void ProfileInfoT:: > + divertFlow(const Edge &oldedge, const Edge &newedge) { > DEBUG(errs() << "Diverting " << oldedge << " via " << newedge ); > > // First check if the old edge was taken, if not, just delete it... > @@ -302,8 +310,8 @@ > /// This checks all edges of the function the blocks reside in and replaces the > /// occurences of RmBB with DestBB. > template<> > -void ProfileInfo::replaceAllUses(const BasicBlock *RmBB, > - const BasicBlock *DestBB) { > +void ProfileInfoT:: > + replaceAllUses(const BasicBlock *RmBB, const BasicBlock *DestBB) { > DEBUG(errs() << "Replacing " << RmBB->getName() > << " with " << DestBB->getName() << "\n"); > const Function *F = DestBB->getParent(); > @@ -352,10 +360,10 @@ > /// Since its possible that there is more than one edge in the CFG from FristBB > /// to SecondBB its necessary to redirect the flow proporionally. > template<> > -void ProfileInfo::splitEdge(const BasicBlock *FirstBB, > - const BasicBlock *SecondBB, > - const BasicBlock *NewBB, > - bool MergeIdenticalEdges) { > +void ProfileInfoT::splitEdge(const BasicBlock *FirstBB, > + const BasicBlock *SecondBB, > + const BasicBlock *NewBB, > + bool MergeIdenticalEdges) { > const Function *F = FirstBB->getParent(); > std::map::iterator J = > EdgeInformation.find(F); > @@ -398,7 +406,8 @@ > } > > template<> > -void ProfileInfo::splitBlock(const BasicBlock *Old, const BasicBlock* New) { > +void ProfileInfoT::splitBlock(const BasicBlock *Old, > + const BasicBlock* New) { > const Function *F = Old->getParent(); > std::map::iterator J = > EdgeInformation.find(F); > @@ -426,8 +435,10 @@ > } > > template<> > -void ProfileInfo::splitBlock(const BasicBlock *BB, const BasicBlock* NewBB, > - BasicBlock *const *Preds, unsigned NumPreds) { > +void ProfileInfoT::splitBlock(const BasicBlock *BB, > + const BasicBlock* NewBB, > + BasicBlock *const *Preds, > + unsigned NumPreds) { > const Function *F = BB->getParent(); > std::map::iterator J = > EdgeInformation.find(F); > @@ -461,7 +472,8 @@ > } > > template<> > -void ProfileInfo::transfer(const Function *Old, const Function *New) { > +void ProfileInfoT::transfer(const Function *Old, > + const Function *New) { > DEBUG(errs() << "Replacing Function " << Old->getName() << " with " > << New->getName() << "\n"); > std::map::iterator J = > @@ -474,8 +486,8 @@ > FunctionInformation.erase(Old); > } > > -static double readEdgeOrRemember(ProfileInfo::Edge edge, double w, ProfileInfo::Edge &tocalc, > - unsigned &uncalc) { > +static double readEdgeOrRemember(ProfileInfo::Edge edge, double w, > + ProfileInfo::Edge &tocalc, unsigned &uncalc) { > if (w == ProfileInfo::MissingValue) { > tocalc = edge; > uncalc++; > @@ -486,7 +498,9 @@ > } > > template<> > -bool ProfileInfo::CalculateMissingEdge(const BasicBlock *BB, Edge &removed, bool assumeEmptySelf) { > +bool ProfileInfoT:: > + CalculateMissingEdge(const BasicBlock *BB, Edge &removed, > + bool assumeEmptySelf) { > Edge edgetocalc; > unsigned uncalculated = 0; > > @@ -562,7 +576,7 @@ > } > > template<> > -bool ProfileInfo::EstimateMissingEdges(const BasicBlock *BB) { > +bool ProfileInfoT::EstimateMissingEdges(const BasicBlock *BB) { > bool hasNoSuccessors = false; > > double inWeight = 0; > @@ -619,7 +633,7 @@ > } > > template<> > -void ProfileInfo::repair(const Function *F) { > +void ProfileInfoT::repair(const Function *F) { > // if (getExecutionCount(&(F->getEntryBlock())) == 0) { > // for (Function::const_iterator FI = F->begin(), FE = F->end(); > // FI != FE; ++FI) { > > > _______________________________________________ > llvm-commits mailing list > llvm-commits at cs.uiuc.edu > http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits From clattner at apple.com Mon Dec 14 23:47:38 2009 From: clattner at apple.com (Chris Lattner) Date: Mon, 14 Dec 2009 21:47:38 -0800 Subject: [llvm-commits] [llvm] r91403 - /llvm/trunk/test/LLVMC/Init.td In-Reply-To: <200912150304.nBF34EB2030697@zion.cs.uiuc.edu> References: <200912150304.nBF34EB2030697@zion.cs.uiuc.edu> Message-ID: On Dec 14, 2009, at 7:04 PM, Mikhail Glushenkov wrote: > Author: foldr > Date: Mon Dec 14 21:04:14 2009 > New Revision: 91403 > > URL: http://llvm.org/viewvc/llvm-project?rev=91403&view=rev > Log: > Pipe 'grep' output to 'count'. Hi Mikhail, Please convert this to use FileCheck. Not all systems have "nice" grep implementations that can handle all the escapes etc. FileCheck is documented here: http://llvm.org/docs/TestingGuide.html#FileCheck -Chris > > Modified: > llvm/trunk/test/LLVMC/Init.td > > Modified: llvm/trunk/test/LLVMC/Init.td > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/LLVMC/Init.td?rev=91403&r1=91402&r2=91403&view=diff > > ============================================================================== > --- llvm/trunk/test/LLVMC/Init.td (original) > +++ llvm/trunk/test/LLVMC/Init.td Mon Dec 14 21:04:14 2009 > @@ -1,7 +1,7 @@ > // Check that (init true/false) and (init "str") work. > // RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t > -// RUN: grep cl::init("some-string") %t > -// RUN: grep cl::init(true) %t > +// RUN: grep cl::init(\\"some-string\\") %t | count 1 > +// RUN: grep cl::init(true) %t | count 1 > > include "llvm/CompilerDriver/Common.td" > > > > _______________________________________________ > llvm-commits mailing list > llvm-commits at cs.uiuc.edu > http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits From nicholas at mxc.ca Tue Dec 15 00:05:14 2009 From: nicholas at mxc.ca (Nick Lewycky) Date: Mon, 14 Dec 2009 22:05:14 -0800 Subject: [llvm-commits] [llvm] r91378 - in /llvm/trunk: lib/CodeGen/SelectionDAG/DAGCombiner.cpp test/CodeGen/X86/setcc.ll test/CodeGen/X86/zext-shl.ll In-Reply-To: <200912150041.nBF0faC8025493@zion.cs.uiuc.edu> References: <200912150041.nBF0faC8025493@zion.cs.uiuc.edu> Message-ID: <4B27271A.7040105@mxc.ca> Evan Cheng wrote: > Author: evancheng > Date: Mon Dec 14 18:41:36 2009 > New Revision: 91378 > > URL: http://llvm.org/viewvc/llvm-project?rev=91378&view=rev > Log: > Propagate zest through logical shift. Evan, should instcombine be doing this? > > Added: > llvm/trunk/test/CodeGen/X86/setcc.ll > llvm/trunk/test/CodeGen/X86/zext-shl.ll > Modified: > llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp > > Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=91378&r1=91377&r2=91378&view=diff > > ============================================================================== > --- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original) > +++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Mon Dec 14 18:41:36 2009 > @@ -3278,6 +3278,16 @@ > if (SCC.getNode()) return SCC; > } > > + // (zext (shl (zext x), y)) -> (shl (zext x), (zext y)) > + if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL)&& > + N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND&& > + N0.hasOneUse()) { > + DebugLoc dl = N->getDebugLoc(); > + return DAG.getNode(N0.getOpcode(), dl, VT, > + DAG.getNode(ISD::ZERO_EXTEND, dl, VT, N0.getOperand(0)), > + DAG.getNode(ISD::ZERO_EXTEND, dl, VT, N0.getOperand(1))); > + } > + > return SDValue(); > } > > > Added: llvm/trunk/test/CodeGen/X86/setcc.ll > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/setcc.ll?rev=91378&view=auto > > ============================================================================== > --- llvm/trunk/test/CodeGen/X86/setcc.ll (added) > +++ llvm/trunk/test/CodeGen/X86/setcc.ll Mon Dec 14 18:41:36 2009 > @@ -0,0 +1,13 @@ > +; RUN: llc< %s -mtriple=x86_64-apple-darwin | FileCheck %s > + > +define zeroext i16 @t1(i16 zeroext %x) nounwind readnone ssp { > +entry: > +; CHECK: t1: > +; CHECK: seta %al > +; CHECK: movzbl %al, %eax > +; CHECK: shll $5, %eax > + %0 = icmp ugt i16 %x, 26 ; [#uses=1] > + %iftmp.1.0 = select i1 %0, i16 32, i16 0 ; [#uses=1] > + ret i16 %iftmp.1.0 > +} > + > > Added: llvm/trunk/test/CodeGen/X86/zext-shl.ll > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/zext-shl.ll?rev=91378&view=auto > > ============================================================================== > --- llvm/trunk/test/CodeGen/X86/zext-shl.ll (added) > +++ llvm/trunk/test/CodeGen/X86/zext-shl.ll Mon Dec 14 18:41:36 2009 > @@ -0,0 +1,38 @@ > +; RUN: llc< %s -march=x86 | FileCheck %s > + > +define i32 @t1(i8 zeroext %x) nounwind readnone ssp { > +entry: > +; CHECK: t1: > +; CHECK: shll > +; CHECK-NOT: movzwl > +; CHECK: ret > + %0 = zext i8 %x to i16 > + %1 = shl i16 %0, 5 > + %2 = zext i16 %1 to i32 > + ret i32 %2 > +} > + > +define i32 @t2(i8 zeroext %x) nounwind readnone ssp { > +entry: > +; CHECK: t2: > +; CHECK: shrl > +; CHECK-NOT: movzwl > +; CHECK: ret > + %0 = zext i8 %x to i16 > + %1 = lshr i16 %0, 3 > + %2 = zext i16 %1 to i32 > + ret i32 %2 > +} > + > +define i32 @t3(i8 zeroext %x, i8 zeroext %y) nounwind readnone ssp { > +entry: > +; CHECK: t3: > +; CHECK: shll > +; CHECK-NOT: movzwl > +; CHECK: ret > + %0 = zext i8 %x to i16 > + %1 = zext i8 %y to i16 > + %2 = shl i16 %0, %1 > + %3 = zext i16 %2 to i32 > + ret i32 %3 > +} > > > _______________________________________________ > llvm-commits mailing list > llvm-commits at cs.uiuc.edu > http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits > From nicholas at mxc.ca Tue Dec 15 00:08:25 2009 From: nicholas at mxc.ca (Nick Lewycky) Date: Mon, 14 Dec 2009 22:08:25 -0800 Subject: [llvm-commits] [llvm] r91380 - in /llvm/trunk: lib/CodeGen/SelectionDAG/DAGCombiner.cpp test/CodeGen/CellSPU/and_ops.ll In-Reply-To: <200912150052.nBF0qBhN025917@zion.cs.uiuc.edu> References: <200912150052.nBF0qBhN025917@zion.cs.uiuc.edu> Message-ID: <4B2727D9.5090203@mxc.ca> Evan Cheng wrote: > Author: evancheng > Date: Mon Dec 14 18:52:11 2009 > New Revision: 91380 > > URL: http://llvm.org/viewvc/llvm-project?rev=91380&view=rev > Log: > Fold (zext (and x, cst)) -> (and (zext x), cst). Again, this looks like something instcombine ought to be doing instead? Nick > > Modified: > llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp > llvm/trunk/test/CodeGen/CellSPU/and_ops.ll > > Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=91380&r1=91379&r2=91380&view=diff > > ============================================================================== > --- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original) > +++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Mon Dec 14 18:52:11 2009 > @@ -3202,6 +3202,19 @@ > X, DAG.getConstant(Mask, VT)); > } > > + // Fold (zext (and x, cst)) -> (and (zext x), cst) > + if (N0.getOpcode() == ISD::AND&& > + N0.getOperand(1).getOpcode() == ISD::Constant&& > + N0.getOperand(0).getOpcode() != ISD::TRUNCATE&& > + N0.getOperand(0).hasOneUse()) { > + APInt Mask = cast(N0.getOperand(1))->getAPIntValue(); > + Mask.zext(VT.getSizeInBits()); > + return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, > + DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, > + N0.getOperand(0)), > + DAG.getConstant(Mask, VT)); > + } > + > // fold (zext (load x)) -> (zext (truncate (zextload x))) > if (ISD::isNON_EXTLoad(N0.getNode())&& > ((!LegalOperations&& !cast(N0)->isVolatile()) || > > Modified: llvm/trunk/test/CodeGen/CellSPU/and_ops.ll > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/CellSPU/and_ops.ll?rev=91380&r1=91379&r2=91380&view=diff > > ============================================================================== > --- llvm/trunk/test/CodeGen/CellSPU/and_ops.ll (original) > +++ llvm/trunk/test/CodeGen/CellSPU/and_ops.ll Mon Dec 14 18:52:11 2009 > @@ -1,9 +1,9 @@ > ; RUN: llc< %s -march=cellspu> %t1.s > -; RUN: grep and %t1.s | count 234 > +; RUN: grep and %t1.s | count 230 > ; RUN: grep andc %t1.s | count 85 > -; RUN: grep andi %t1.s | count 37 > -; RUN: grep andhi %t1.s | count 30 > -; RUN: grep andbi %t1.s | count 4 > +; RUN: grep andi %t1.s | count 39 > +; RUN: grep andhi %t1.s | count 28 > +; RUN: grep andbi %t1.s | count 2 > > target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" > target triple = "spu" > > > _______________________________________________ > llvm-commits mailing list > llvm-commits at cs.uiuc.edu > http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits > From sabre at nondot.org Tue Dec 15 00:14:33 2009 From: sabre at nondot.org (Chris Lattner) Date: Tue, 15 Dec 2009 06:14:33 -0000 Subject: [llvm-commits] [llvm] r91416 - /llvm/trunk/include/llvm/Support/Compiler.h Message-ID: <200912150614.nBF6EXLV005148@zion.cs.uiuc.edu> Author: lattner Date: Tue Dec 15 00:14:33 2009 New Revision: 91416 URL: http://llvm.org/viewvc/llvm-project?rev=91416&view=rev Log: add an ALWAYS_INLINE macro, which does the obvious thing. Modified: llvm/trunk/include/llvm/Support/Compiler.h Modified: llvm/trunk/include/llvm/Support/Compiler.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Support/Compiler.h?rev=91416&r1=91415&r2=91416&view=diff ============================================================================== --- llvm/trunk/include/llvm/Support/Compiler.h (original) +++ llvm/trunk/include/llvm/Support/Compiler.h Tue Dec 15 00:14:33 2009 @@ -70,6 +70,16 @@ #define DISABLE_INLINE #endif +// ALWAYS_INLINE - On compilers where we have a directive to do so, mark a +// method "always inline" because it is performance sensitive. +#if (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4)) +#define ALWAYS_INLINE __attribute__((always_inline)) +#else +// TODO: No idea how to do this with MSVC. +#define ALWAYS_INLINE +#endif + + #ifdef __GNUC__ #define NORETURN __attribute__((noreturn)) #elif defined(_MSC_VER) From evan.cheng at apple.com Tue Dec 15 00:49:02 2009 From: evan.cheng at apple.com (Evan Cheng) Date: Tue, 15 Dec 2009 06:49:02 -0000 Subject: [llvm-commits] [llvm] r91417 - /llvm/trunk/lib/Target/X86/X86Instr64bit.td Message-ID: <200912150649.nBF6n3V1006606@zion.cs.uiuc.edu> Author: evancheng Date: Tue Dec 15 00:49:02 2009 New Revision: 91417 URL: http://llvm.org/viewvc/llvm-project?rev=91417&view=rev Log: Fix an encoding bug. Modified: llvm/trunk/lib/Target/X86/X86Instr64bit.td Modified: llvm/trunk/lib/Target/X86/X86Instr64bit.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86Instr64bit.td?rev=91417&r1=91416&r2=91417&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86Instr64bit.td (original) +++ llvm/trunk/lib/Target/X86/X86Instr64bit.td Tue Dec 15 00:49:02 2009 @@ -1335,7 +1335,7 @@ // Use sbb to materialize carry flag into a GPR. let Defs = [EFLAGS], Uses = [EFLAGS], isCodeGenOnly = 1 in -def SETB_C64r : RI<0x19, MRMDestReg, (outs GR64:$dst), (ins), +def SETB_C64r : RI<0x19, MRMInitReg, (outs GR64:$dst), (ins), "sbb{q}\t$dst, $dst", [(set GR64:$dst, (zext (X86setcc_c X86_COND_B, EFLAGS)))]>; From evan.cheng at apple.com Tue Dec 15 00:56:28 2009 From: evan.cheng at apple.com (Evan Cheng) Date: Mon, 14 Dec 2009 22:56:28 -0800 Subject: [llvm-commits] [llvm] r91378 - in /llvm/trunk: lib/CodeGen/SelectionDAG/DAGCombiner.cpp test/CodeGen/X86/setcc.ll test/CodeGen/X86/zext-shl.ll In-Reply-To: <4B27271A.7040105@mxc.ca> References: <200912150041.nBF0faC8025493@zion.cs.uiuc.edu> <4B27271A.7040105@mxc.ca> Message-ID: <0C6C95C0-C677-4131-9485-8FA8D5C10C33@apple.com> On Dec 14, 2009, at 10:05 PM, Nick Lewycky wrote: > Evan Cheng wrote: >> Author: evancheng >> Date: Mon Dec 14 18:41:36 2009 >> New Revision: 91378 >> >> URL: http://llvm.org/viewvc/llvm-project?rev=91378&view=rev >> Log: >> Propagate zest through logical shift. > > Evan, should instcombine be doing this? I am not sure. This is one of those transforms that can go into either pass. Now that dag combine does it, I am not sure if there is benefit to adding it to instcombine. Evan > >> >> Added: >> llvm/trunk/test/CodeGen/X86/setcc.ll >> llvm/trunk/test/CodeGen/X86/zext-shl.ll >> Modified: >> llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp >> >> Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp >> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=91378&r1=91377&r2=91378&view=diff >> >> ============================================================================== >> --- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original) >> +++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Mon Dec 14 18:41:36 2009 >> @@ -3278,6 +3278,16 @@ >> if (SCC.getNode()) return SCC; >> } >> >> + // (zext (shl (zext x), y)) -> (shl (zext x), (zext y)) >> + if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL)&& >> + N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND&& >> + N0.hasOneUse()) { >> + DebugLoc dl = N->getDebugLoc(); >> + return DAG.getNode(N0.getOpcode(), dl, VT, >> + DAG.getNode(ISD::ZERO_EXTEND, dl, VT, N0.getOperand(0)), >> + DAG.getNode(ISD::ZERO_EXTEND, dl, VT, N0.getOperand(1))); >> + } >> + >> return SDValue(); >> } >> >> >> Added: llvm/trunk/test/CodeGen/X86/setcc.ll >> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/setcc.ll?rev=91378&view=auto >> >> ============================================================================== >> --- llvm/trunk/test/CodeGen/X86/setcc.ll (added) >> +++ llvm/trunk/test/CodeGen/X86/setcc.ll Mon Dec 14 18:41:36 2009 >> @@ -0,0 +1,13 @@ >> +; RUN: llc< %s -mtriple=x86_64-apple-darwin | FileCheck %s >> + >> +define zeroext i16 @t1(i16 zeroext %x) nounwind readnone ssp { >> +entry: >> +; CHECK: t1: >> +; CHECK: seta %al >> +; CHECK: movzbl %al, %eax >> +; CHECK: shll $5, %eax >> + %0 = icmp ugt i16 %x, 26 ; [#uses=1] >> + %iftmp.1.0 = select i1 %0, i16 32, i16 0 ; [#uses=1] >> + ret i16 %iftmp.1.0 >> +} >> + >> >> Added: llvm/trunk/test/CodeGen/X86/zext-shl.ll >> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/zext-shl.ll?rev=91378&view=auto >> >> ============================================================================== >> --- llvm/trunk/test/CodeGen/X86/zext-shl.ll (added) >> +++ llvm/trunk/test/CodeGen/X86/zext-shl.ll Mon Dec 14 18:41:36 2009 >> @@ -0,0 +1,38 @@ >> +; RUN: llc< %s -march=x86 | FileCheck %s >> + >> +define i32 @t1(i8 zeroext %x) nounwind readnone ssp { >> +entry: >> +; CHECK: t1: >> +; CHECK: shll >> +; CHECK-NOT: movzwl >> +; CHECK: ret >> + %0 = zext i8 %x to i16 >> + %1 = shl i16 %0, 5 >> + %2 = zext i16 %1 to i32 >> + ret i32 %2 >> +} >> + >> +define i32 @t2(i8 zeroext %x) nounwind readnone ssp { >> +entry: >> +; CHECK: t2: >> +; CHECK: shrl >> +; CHECK-NOT: movzwl >> +; CHECK: ret >> + %0 = zext i8 %x to i16 >> + %1 = lshr i16 %0, 3 >> + %2 = zext i16 %1 to i32 >> + ret i32 %2 >> +} >> + >> +define i32 @t3(i8 zeroext %x, i8 zeroext %y) nounwind readnone ssp { >> +entry: >> +; CHECK: t3: >> +; CHECK: shll >> +; CHECK-NOT: movzwl >> +; CHECK: ret >> + %0 = zext i8 %x to i16 >> + %1 = zext i8 %y to i16 >> + %2 = shl i16 %0, %1 >> + %3 = zext i16 %2 to i32 >> + ret i32 %3 >> +} >> >> >> _______________________________________________ >> llvm-commits mailing list >> llvm-commits at cs.uiuc.edu >> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits >> > From evan.cheng at apple.com Tue Dec 15 00:58:30 2009 From: evan.cheng at apple.com (Evan Cheng) Date: Mon, 14 Dec 2009 22:58:30 -0800 Subject: [llvm-commits] [llvm] r91380 - in /llvm/trunk: lib/CodeGen/SelectionDAG/DAGCombiner.cpp test/CodeGen/CellSPU/and_ops.ll In-Reply-To: <4B2727D9.5090203@mxc.ca> References: <200912150052.nBF0qBhN025917@zion.cs.uiuc.edu> <4B2727D9.5090203@mxc.ca> Message-ID: <26DE8EB3-26F1-44B2-B019-B43B1C0BAE30@apple.com> On Dec 14, 2009, at 10:08 PM, Nick Lewycky wrote: > Evan Cheng wrote: >> Author: evancheng >> Date: Mon Dec 14 18:52:11 2009 >> New Revision: 91380 >> >> URL: http://llvm.org/viewvc/llvm-project?rev=91380&view=rev >> Log: >> Fold (zext (and x, cst)) -> (and (zext x), cst). > > Again, this looks like something instcombine ought to be doing instead? Why? See my patch that use sbb to put carry bit into GPR. Target lowering can create opportunities for this transformation. Evan > > Nick > >> >> Modified: >> llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp >> llvm/trunk/test/CodeGen/CellSPU/and_ops.ll >> >> Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp >> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=91380&r1=91379&r2=91380&view=diff >> >> ============================================================================== >> --- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original) >> +++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Mon Dec 14 18:52:11 2009 >> @@ -3202,6 +3202,19 @@ >> X, DAG.getConstant(Mask, VT)); >> } >> >> + // Fold (zext (and x, cst)) -> (and (zext x), cst) >> + if (N0.getOpcode() == ISD::AND&& >> + N0.getOperand(1).getOpcode() == ISD::Constant&& >> + N0.getOperand(0).getOpcode() != ISD::TRUNCATE&& >> + N0.getOperand(0).hasOneUse()) { >> + APInt Mask = cast(N0.getOperand(1))->getAPIntValue(); >> + Mask.zext(VT.getSizeInBits()); >> + return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, >> + DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, >> + N0.getOperand(0)), >> + DAG.getConstant(Mask, VT)); >> + } >> + >> // fold (zext (load x)) -> (zext (truncate (zextload x))) >> if (ISD::isNON_EXTLoad(N0.getNode())&& >> ((!LegalOperations&& !cast(N0)->isVolatile()) || >> >> Modified: llvm/trunk/test/CodeGen/CellSPU/and_ops.ll >> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/CellSPU/and_ops.ll?rev=91380&r1=91379&r2=91380&view=diff >> >> ============================================================================== >> --- llvm/trunk/test/CodeGen/CellSPU/and_ops.ll (original) >> +++ llvm/trunk/test/CodeGen/CellSPU/and_ops.ll Mon Dec 14 18:52:11 2009 >> @@ -1,9 +1,9 @@ >> ; RUN: llc< %s -march=cellspu> %t1.s >> -; RUN: grep and %t1.s | count 234 >> +; RUN: grep and %t1.s | count 230 >> ; RUN: grep andc %t1.s | count 85 >> -; RUN: grep andi %t1.s | count 37 >> -; RUN: grep andhi %t1.s | count 30 >> -; RUN: grep andbi %t1.s | count 4 >> +; RUN: grep andi %t1.s | count 39 >> +; RUN: grep andhi %t1.s | count 28 >> +; RUN: grep andbi %t1.s | count 2 >> >> target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" >> target triple = "spu" >> >> >> _______________________________________________ >> llvm-commits mailing list >> llvm-commits at cs.uiuc.edu >> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits >> > From nicholas at mxc.ca Tue Dec 15 01:11:11 2009 From: nicholas at mxc.ca (Nick Lewycky) Date: Mon, 14 Dec 2009 23:11:11 -0800 Subject: [llvm-commits] [llvm] r91378 - in /llvm/trunk: lib/CodeGen/SelectionDAG/DAGCombiner.cpp test/CodeGen/X86/setcc.ll test/CodeGen/X86/zext-shl.ll In-Reply-To: <0C6C95C0-C677-4131-9485-8FA8D5C10C33@apple.com> References: <200912150041.nBF0faC8025493@zion.cs.uiuc.edu> <4B27271A.7040105@mxc.ca> <0C6C95C0-C677-4131-9485-8FA8D5C10C33@apple.com> Message-ID: <4B27368F.1010504@mxc.ca> Evan Cheng wrote: > > On Dec 14, 2009, at 10:05 PM, Nick Lewycky wrote: > >> Evan Cheng wrote: >>> Author: evancheng >>> Date: Mon Dec 14 18:41:36 2009 >>> New Revision: 91378 >>> >>> URL: http://llvm.org/viewvc/llvm-project?rev=91378&view=rev >>> Log: >>> Propagate zest through logical shift. >> >> Evan, should instcombine be doing this? > > I am not sure. This is one of those transforms that can go into either pass. Now that dag combine does it, I am not sure if there is benefit to adding it to instcombine. Something like this which can lead to further optz'ns probably belongs in instcombine. It might make the difference between being too large for inlining and not, for example. It would need to be in the DAG combiner if the backend internally produces this sort of code. It could be that it belongs in both places, which is why I'm asking. > Evan > >> >>> >>> Added: >>> llvm/trunk/test/CodeGen/X86/setcc.ll >>> llvm/trunk/test/CodeGen/X86/zext-shl.ll >>> Modified: >>> llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp >>> >>> Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp >>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=91378&r1=91377&r2=91378&view=diff >>> >>> ============================================================================== >>> --- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original) >>> +++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Mon Dec 14 18:41:36 2009 >>> @@ -3278,6 +3278,16 @@ >>> if (SCC.getNode()) return SCC; >>> } >>> >>> + // (zext (shl (zext x), y)) -> (shl (zext x), (zext y)) >>> + if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL)&& >>> + N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND&& >>> + N0.hasOneUse()) { >>> + DebugLoc dl = N->getDebugLoc(); >>> + return DAG.getNode(N0.getOpcode(), dl, VT, >>> + DAG.getNode(ISD::ZERO_EXTEND, dl, VT, N0.getOperand(0)), >>> + DAG.getNode(ISD::ZERO_EXTEND, dl, VT, N0.getOperand(1))); >>> + } >>> + >>> return SDValue(); >>> } >>> >>> >>> Added: llvm/trunk/test/CodeGen/X86/setcc.ll >>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/setcc.ll?rev=91378&view=auto >>> >>> ============================================================================== >>> --- llvm/trunk/test/CodeGen/X86/setcc.ll (added) >>> +++ llvm/trunk/test/CodeGen/X86/setcc.ll Mon Dec 14 18:41:36 2009 >>> @@ -0,0 +1,13 @@ >>> +; RUN: llc< %s -mtriple=x86_64-apple-darwin | FileCheck %s >>> + >>> +define zeroext i16 @t1(i16 zeroext %x) nounwind readnone ssp { >>> +entry: >>> +; CHECK: t1: >>> +; CHECK: seta %al >>> +; CHECK: movzbl %al, %eax >>> +; CHECK: shll $5, %eax >>> + %0 = icmp ugt i16 %x, 26 ; [#uses=1] >>> + %iftmp.1.0 = select i1 %0, i16 32, i16 0 ; [#uses=1] >>> + ret i16 %iftmp.1.0 >>> +} >>> + >>> >>> Added: llvm/trunk/test/CodeGen/X86/zext-shl.ll >>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/zext-shl.ll?rev=91378&view=auto >>> >>> ============================================================================== >>> --- llvm/trunk/test/CodeGen/X86/zext-shl.ll (added) >>> +++ llvm/trunk/test/CodeGen/X86/zext-shl.ll Mon Dec 14 18:41:36 2009 >>> @@ -0,0 +1,38 @@ >>> +; RUN: llc< %s -march=x86 | FileCheck %s >>> + >>> +define i32 @t1(i8 zeroext %x) nounwind readnone ssp { >>> +entry: >>> +; CHECK: t1: >>> +; CHECK: shll >>> +; CHECK-NOT: movzwl >>> +; CHECK: ret >>> + %0 = zext i8 %x to i16 >>> + %1 = shl i16 %0, 5 >>> + %2 = zext i16 %1 to i32 >>> + ret i32 %2 >>> +} >>> + >>> +define i32 @t2(i8 zeroext %x) nounwind readnone ssp { >>> +entry: >>> +; CHECK: t2: >>> +; CHECK: shrl >>> +; CHECK-NOT: movzwl >>> +; CHECK: ret >>> + %0 = zext i8 %x to i16 >>> + %1 = lshr i16 %0, 3 >>> + %2 = zext i16 %1 to i32 >>> + ret i32 %2 >>> +} >>> + >>> +define i32 @t3(i8 zeroext %x, i8 zeroext %y) nounwind readnone ssp { >>> +entry: >>> +; CHECK: t3: >>> +; CHECK: shll >>> +; CHECK-NOT: movzwl >>> +; CHECK: ret >>> + %0 = zext i8 %x to i16 >>> + %1 = zext i8 %y to i16 >>> + %2 = shl i16 %0, %1 >>> + %3 = zext i16 %2 to i32 >>> + ret i32 %3 >>> +} >>> >>> >>> _______________________________________________ >>> llvm-commits mailing list >>> llvm-commits at cs.uiuc.edu >>> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits >>> >> > > From nicholas at mxc.ca Tue Dec 15 01:15:20 2009 From: nicholas at mxc.ca (Nick Lewycky) Date: Mon, 14 Dec 2009 23:15:20 -0800 Subject: [llvm-commits] [llvm] r91380 - in /llvm/trunk: lib/CodeGen/SelectionDAG/DAGCombiner.cpp test/CodeGen/CellSPU/and_ops.ll In-Reply-To: <26DE8EB3-26F1-44B2-B019-B43B1C0BAE30@apple.com> References: <200912150052.nBF0qBhN025917@zion.cs.uiuc.edu> <4B2727D9.5090203@mxc.ca> <26DE8EB3-26F1-44B2-B019-B43B1C0BAE30@apple.com> Message-ID: <4B273788.7070609@mxc.ca> Evan Cheng wrote: > > On Dec 14, 2009, at 10:08 PM, Nick Lewycky wrote: > >> Evan Cheng wrote: >>> Author: evancheng >>> Date: Mon Dec 14 18:52:11 2009 >>> New Revision: 91380 >>> >>> URL: http://llvm.org/viewvc/llvm-project?rev=91380&view=rev >>> Log: >>> Fold (zext (and x, cst)) -> (and (zext x), cst). >> >> Again, this looks like something instcombine ought to be doing instead? > > Why? See my patch that use sbb to put carry bit into GPR. Target lowering can create opportunities for this transformation. I was asking because instcombine doesn't pick one. That means that anyone who wants to optimize it further down the line has to check for both of these patterns. Instcombine should just canonicalize it down to one -- but it doesn't seem like either one is particularly better in this case, so I thought I'd see if you had an opinion from the backend point of view. No biggie. This doesn't strike me as a very important transformation to do in the middle-end. Nick > Evan > >> >> Nick >> >>> >>> Modified: >>> llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp >>> llvm/trunk/test/CodeGen/CellSPU/and_ops.ll >>> >>> Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp >>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=91380&r1=91379&r2=91380&view=diff >>> >>> ============================================================================== >>> --- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original) >>> +++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Mon Dec 14 18:52:11 2009 >>> @@ -3202,6 +3202,19 @@ >>> X, DAG.getConstant(Mask, VT)); >>> } >>> >>> + // Fold (zext (and x, cst)) -> (and (zext x), cst) >>> + if (N0.getOpcode() == ISD::AND&& >>> + N0.getOperand(1).getOpcode() == ISD::Constant&& >>> + N0.getOperand(0).getOpcode() != ISD::TRUNCATE&& >>> + N0.getOperand(0).hasOneUse()) { >>> + APInt Mask = cast(N0.getOperand(1))->getAPIntValue(); >>> + Mask.zext(VT.getSizeInBits()); >>> + return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, >>> + DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, >>> + N0.getOperand(0)), >>> + DAG.getConstant(Mask, VT)); >>> + } >>> + >>> // fold (zext (load x)) -> (zext (truncate (zextload x))) >>> if (ISD::isNON_EXTLoad(N0.getNode())&& >>> ((!LegalOperations&& !cast(N0)->isVolatile()) || >>> >>> Modified: llvm/trunk/test/CodeGen/CellSPU/and_ops.ll >>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/CellSPU/and_ops.ll?rev=91380&r1=91379&r2=91380&view=diff >>> >>> ============================================================================== >>> --- llvm/trunk/test/CodeGen/CellSPU/and_ops.ll (original) >>> +++ llvm/trunk/test/CodeGen/CellSPU/and_ops.ll Mon Dec 14 18:52:11 2009 >>> @@ -1,9 +1,9 @@ >>> ; RUN: llc< %s -march=cellspu> %t1.s >>> -; RUN: grep and %t1.s | count 234 >>> +; RUN: grep and %t1.s | count 230 >>> ; RUN: grep andc %t1.s | count 85 >>> -; RUN: grep andi %t1.s | count 37 >>> -; RUN: grep andhi %t1.s | count 30 >>> -; RUN: grep andbi %t1.s | count 4 >>> +; RUN: grep andi %t1.s | count 39 >>> +; RUN: grep andhi %t1.s | count 28 >>> +; RUN: grep andbi %t1.s | count 2 >>> >>> target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" >>> target triple = "spu" >>> >>> >>> _______________________________________________ >>> llvm-commits mailing list >>> llvm-commits at cs.uiuc.edu >>> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits >>> >> > > From foldr at codedgers.com Tue Dec 15 01:20:50 2009 From: foldr at codedgers.com (Mikhail Glushenkov) Date: Tue, 15 Dec 2009 07:20:50 -0000 Subject: [llvm-commits] [llvm] r91419 - in /llvm/trunk: test/LLVMC/AppendCmdHook.td test/LLVMC/OutputSuffixHook.td utils/TableGen/LLVMCConfigurationEmitter.cpp Message-ID: <200912150720.nBF7KoB8007577@zion.cs.uiuc.edu> Author: foldr Date: Tue Dec 15 01:20:50 2009 New Revision: 91419 URL: http://llvm.org/viewvc/llvm-project?rev=91419&view=rev Log: Support hook invocation from 'append_cmd'. Added: llvm/trunk/test/LLVMC/AppendCmdHook.td llvm/trunk/test/LLVMC/OutputSuffixHook.td Modified: llvm/trunk/utils/TableGen/LLVMCConfigurationEmitter.cpp Added: llvm/trunk/test/LLVMC/AppendCmdHook.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/LLVMC/AppendCmdHook.td?rev=91419&view=auto ============================================================================== --- llvm/trunk/test/LLVMC/AppendCmdHook.td (added) +++ llvm/trunk/test/LLVMC/AppendCmdHook.td Tue Dec 15 01:20:50 2009 @@ -0,0 +1,28 @@ +// Check that hooks can be invoked from 'append_cmd'. +// RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t +// RUN: FileCheck -input-file=%t %s +// RUN: %compile_cxx -fexceptions -x c++ %t + +include "llvm/CompilerDriver/Common.td" + +// CHECK: std::string MyHook() + +def OptList : OptionList<[ +(switch_option "dummy1", (help "none")), +(switch_option "dummy2", (help "none")) +]>; + +def dummy_tool : Tool<[ +(cmd_line "dummy_cmd $INFILE"), +(in_language "dummy_lang"), +(out_language "dummy_lang"), +(actions (case + // CHECK: push_back("-arg1") + // CHECK: push_back("-arg2") + (switch_on "dummy1"), (append_cmd "-arg1 -arg2"), + // CHECK: push_back("-arg3") + // CHECK: hooks::MyHook() + (switch_on "dummy2"), (append_cmd "-arg3 $CALL(MyHook)"))) +]>; + +def DummyGraph : CompilationGraph<[SimpleEdge<"root", "dummy_tool">]>; Added: llvm/trunk/test/LLVMC/OutputSuffixHook.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/LLVMC/OutputSuffixHook.td?rev=91419&view=auto ============================================================================== --- llvm/trunk/test/LLVMC/OutputSuffixHook.td (added) +++ llvm/trunk/test/LLVMC/OutputSuffixHook.td Tue Dec 15 01:20:50 2009 @@ -0,0 +1,24 @@ +// Check that hooks can be invoked from 'output_suffix'. +// RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t +// RUN: FileCheck -input-file=%t %s +// RUN: %compile_cxx -fexceptions -x c++ %t +// XFAIL: * + +include "llvm/CompilerDriver/Common.td" + +// CHECK: std::string MyHook() + +def OptList : OptionList<[ +(switch_option "dummy1", (help "none")) +]>; + +def dummy_tool : Tool<[ +(cmd_line "dummy_cmd $INFILE"), +(in_language "dummy_lang"), +(out_language "dummy_lang"), +(actions (case + // CHECK: hooks::MyHook() + (switch_on "dummy1"), (output_suffix "$CALL(MyHook)"))) +]>; + +def DummyGraph : CompilationGraph<[SimpleEdge<"root", "dummy_tool">]>; Modified: llvm/trunk/utils/TableGen/LLVMCConfigurationEmitter.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/utils/TableGen/LLVMCConfigurationEmitter.cpp?rev=91419&r1=91418&r2=91419&view=diff ============================================================================== --- llvm/trunk/utils/TableGen/LLVMCConfigurationEmitter.cpp (original) +++ llvm/trunk/utils/TableGen/LLVMCConfigurationEmitter.cpp Tue Dec 15 01:20:50 2009 @@ -15,8 +15,6 @@ #include "Record.h" #include "llvm/ADT/IntrusiveRefCntPtr.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringSet.h" #include @@ -1454,9 +1452,9 @@ EmitCaseStatementCallback(Callback, O), IndentLevel); } -/// TokenizeCmdline - converts from "$CALL(HookName, 'Arg1', 'Arg2')/path" to -/// ["$CALL(", "HookName", "Arg1", "Arg2", ")/path"] . -/// Helper function used by EmitCmdLineVecFill and. +/// TokenizeCmdline - converts from +/// "$CALL(HookName, 'Arg1', 'Arg2')/path -arg1 -arg2" to +/// ["$CALL(", "HookName", "Arg1", "Arg2", ")/path", "-arg1", "-arg2"]. void TokenizeCmdline(const std::string& CmdLine, StrVector& Out) { const char* Delimiters = " \t\n\v\f\r"; enum TokenizerState @@ -1823,17 +1821,36 @@ const OptionDescriptions& OptDescs; typedef EmitActionHandlersCallbackHandler Handler; - void onAppendCmd (const DagInit& Dag, - unsigned IndentLevel, raw_ostream& O) const + /// EmitHookInvocation - Common code for hook invocation from actions. Used by + /// onAppendCmd and onOutputSuffix. + void EmitHookInvocation(const std::string& Str, + const char* BlockOpen, const char* BlockClose, + unsigned IndentLevel, raw_ostream& O) const { - checkNumberOfArguments(&Dag, 1); - const std::string& Cmd = InitPtrToString(Dag.getArg(0)); StrVector Out; - llvm::SplitString(Cmd, Out); + TokenizeCmdline(Str, Out); for (StrVector::const_iterator B = Out.begin(), E = Out.end(); - B != E; ++B) - O.indent(IndentLevel) << "vec.push_back(\"" << *B << "\");\n"; + B != E; ++B) { + const std::string& cmd = *B; + + O.indent(IndentLevel) << BlockOpen; + + if (cmd.at(0) == '$') + B = SubstituteSpecialCommands(B, E, /* IsJoin = */ true, O); + else + O << '"' << cmd << '"'; + + O << BlockClose; + } + } + + void onAppendCmd (const DagInit& Dag, + unsigned IndentLevel, raw_ostream& O) const + { + checkNumberOfArguments(&Dag, 1); + this->EmitHookInvocation(InitPtrToString(Dag.getArg(0)), + "vec.push_back(", ");\n", IndentLevel, O); } void onForward (const DagInit& Dag, @@ -1886,13 +1903,12 @@ << (D.isParameter() ? ".c_str()" : "") << "));\n"; } - void onOutputSuffix (const DagInit& Dag, unsigned IndentLevel, raw_ostream& O) const { checkNumberOfArguments(&Dag, 1); - const std::string& OutSuf = InitPtrToString(Dag.getArg(0)); - O.indent(IndentLevel) << "output_suffix = \"" << OutSuf << "\";\n"; + this->EmitHookInvocation(InitPtrToString(Dag.getArg(0)), + "output_suffix = ", ";\n", IndentLevel, O); } void onStopCompilation (const DagInit& Dag, @@ -2521,7 +2537,9 @@ {} void onAction (const DagInit& Dag) { - if (GetOperatorName(Dag) == "forward_transformed_value") { + const std::string& Name = GetOperatorName(Dag); + + if (Name == "forward_transformed_value") { checkNumberOfArguments(Dag, 2); const std::string& OptName = InitPtrToString(Dag.getArg(0)); const std::string& HookName = InitPtrToString(Dag.getArg(1)); @@ -2530,29 +2548,16 @@ HookNames_[HookName] = HookInfo(D.isList() ? HookInfo::ListHook : HookInfo::ArgHook); } - } - - void operator()(const Init* Arg) { - - // We're invoked on an action (either a dag or a dag list). - if (typeid(*Arg) == typeid(DagInit)) { - const DagInit& Dag = InitPtrToDag(Arg); - this->onAction(Dag); - return; - } - else if (typeid(*Arg) == typeid(ListInit)) { - const ListInit& List = InitPtrToList(Arg); - for (ListInit::const_iterator B = List.begin(), E = List.end(); B != E; - ++B) { - const DagInit& Dag = InitPtrToDag(*B); - this->onAction(Dag); - } - return; + else if (Name == "append_cmd" || Name == "output_suffix") { + checkNumberOfArguments(Dag, 1); + this->onCmdLine(InitPtrToString(Dag.getArg(0))); } + } - // We're invoked on a command line. + void onCmdLine(const std::string& Cmd) { StrVector cmds; - TokenizeCmdline(InitPtrToString(Arg), cmds); + TokenizeCmdline(Cmd, cmds); + for (StrVector::const_iterator B = cmds.begin(), E = cmds.end(); B != E; ++B) { const std::string& cmd = *B; @@ -2562,7 +2567,6 @@ checkedIncrement(B, E, "Syntax error in $CALL invocation!"); const std::string& HookName = *B; - if (HookName.at(0) == ')') throw "$CALL invoked with no arguments!"; @@ -2578,9 +2582,30 @@ + HookName; else HookNames_[HookName] = HookInfo(NumArgs); + } + } + } + + void operator()(const Init* Arg) { + // We're invoked on an action (either a dag or a dag list). + if (typeid(*Arg) == typeid(DagInit)) { + const DagInit& Dag = InitPtrToDag(Arg); + this->onAction(Dag); + return; + } + else if (typeid(*Arg) == typeid(ListInit)) { + const ListInit& List = InitPtrToList(Arg); + for (ListInit::const_iterator B = List.begin(), E = List.end(); B != E; + ++B) { + const DagInit& Dag = InitPtrToDag(*B); + this->onAction(Dag); } + return; } + + // We're invoked on a command line. + this->onCmdLine(InitPtrToString(Arg)); } void operator()(const DagInit* Test, unsigned, bool) { From foldr at codedgers.com Tue Dec 15 01:21:14 2009 From: foldr at codedgers.com (Mikhail Glushenkov) Date: Tue, 15 Dec 2009 07:21:14 -0000 Subject: [llvm-commits] [llvm] r91420 - in /llvm/trunk: test/LLVMC/AppendCmdHook.td test/LLVMC/ExternOptions.td test/LLVMC/ForwardAs.td test/LLVMC/ForwardTransformedValue.td test/LLVMC/ForwardValue.td test/LLVMC/HookWithArguments.td test/LLVMC/HookWithInFile.td test/LLVMC/Init.td test/LLVMC/MultiValuedOption.td test/LLVMC/NoActions.td test/LLVMC/OneOrMore.td test/LLVMC/OptionPreprocessor.td test/LLVMC/OutputSuffixHook.td utils/TableGen/LLVMCConfigurationEmitter.cpp Message-ID: <200912150721.nBF7LFsl007611@zion.cs.uiuc.edu> Author: foldr Date: Tue Dec 15 01:21:14 2009 New Revision: 91420 URL: http://llvm.org/viewvc/llvm-project?rev=91420&view=rev Log: Convert llvmc tests to FileCheck. Modified: llvm/trunk/test/LLVMC/AppendCmdHook.td llvm/trunk/test/LLVMC/ExternOptions.td llvm/trunk/test/LLVMC/ForwardAs.td llvm/trunk/test/LLVMC/ForwardTransformedValue.td llvm/trunk/test/LLVMC/ForwardValue.td llvm/trunk/test/LLVMC/HookWithArguments.td llvm/trunk/test/LLVMC/HookWithInFile.td llvm/trunk/test/LLVMC/Init.td llvm/trunk/test/LLVMC/MultiValuedOption.td llvm/trunk/test/LLVMC/NoActions.td llvm/trunk/test/LLVMC/OneOrMore.td llvm/trunk/test/LLVMC/OptionPreprocessor.td llvm/trunk/test/LLVMC/OutputSuffixHook.td llvm/trunk/utils/TableGen/LLVMCConfigurationEmitter.cpp Modified: llvm/trunk/test/LLVMC/AppendCmdHook.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/LLVMC/AppendCmdHook.td?rev=91420&r1=91419&r2=91420&view=diff ============================================================================== --- llvm/trunk/test/LLVMC/AppendCmdHook.td (original) +++ llvm/trunk/test/LLVMC/AppendCmdHook.td Tue Dec 15 01:21:14 2009 @@ -1,6 +1,6 @@ // Check that hooks can be invoked from 'append_cmd'. // RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t -// RUN: FileCheck -input-file=%t %s +// RUN: FileCheck -input-file %t %s // RUN: %compile_cxx -fexceptions -x c++ %t include "llvm/CompilerDriver/Common.td" Modified: llvm/trunk/test/LLVMC/ExternOptions.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/LLVMC/ExternOptions.td?rev=91420&r1=91419&r2=91420&view=diff ============================================================================== --- llvm/trunk/test/LLVMC/ExternOptions.td (original) +++ llvm/trunk/test/LLVMC/ExternOptions.td Tue Dec 15 01:21:14 2009 @@ -1,11 +1,13 @@ // Check that extern options work. // The dummy tool and graph are required to silence warnings. // RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t -// RUN: grep {extern .* AutoGeneratedSwitch_Wall} %t +// RUN: FileCheck -input-file %t %s // RUN: %compile_cxx -fexceptions -x c++ %t include "llvm/CompilerDriver/Common.td" +// CHECK: extern cl::opt AutoGeneratedSwitch_Wall + def OptList : OptionList<[(switch_option "Wall", (extern)), (parameter_option "std", (extern)), (prefix_list_option "L", (extern))]>; Modified: llvm/trunk/test/LLVMC/ForwardAs.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/LLVMC/ForwardAs.td?rev=91420&r1=91419&r2=91420&view=diff ============================================================================== --- llvm/trunk/test/LLVMC/ForwardAs.td (original) +++ llvm/trunk/test/LLVMC/ForwardAs.td Tue Dec 15 01:21:14 2009 @@ -1,7 +1,7 @@ // Check the fix for PR4159. // http://llvm.org/bugs/show_bug.cgi?id=4159 // RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t -// RUN: grep unique_name %t +// RUN: FileCheck -input-file %t %s // RUN: %compile_cxx -fexceptions -x c++ %t include "llvm/CompilerDriver/Common.td" @@ -13,6 +13,7 @@ (in_language "dummy"), (out_language "dummy"), (actions (case + // CHECK: vec.push_back("unique_name") (not_empty "dummy"), (forward_as "dummy", "unique_name"))) ]>; Modified: llvm/trunk/test/LLVMC/ForwardTransformedValue.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/LLVMC/ForwardTransformedValue.td?rev=91420&r1=91419&r2=91420&view=diff ============================================================================== --- llvm/trunk/test/LLVMC/ForwardTransformedValue.td (original) +++ llvm/trunk/test/LLVMC/ForwardTransformedValue.td Tue Dec 15 01:21:14 2009 @@ -1,8 +1,7 @@ // Check that forward_transformed_value works. // The dummy tool and graph are required to silence warnings. // RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t -// RUN: grep HookA %t | count 2 -// RUN: grep HookB %t | count 2 +// RUN: FileCheck -input-file %t %s // RUN: %compile_cxx -fexceptions -x c++ %t include "llvm/CompilerDriver/Common.td" @@ -10,12 +9,17 @@ def OptList : OptionList<[(parameter_option "a", (extern)), (prefix_list_option "b", (extern))]>; +// CHECK: std::string HookA +// CHECK: std::string HookB + def dummy_tool : Tool<[ (cmd_line "dummy_cmd $INFILE"), (in_language "dummy"), (out_language "dummy"), (actions (case + // CHECK: HookA(AutoGeneratedParameter_a (not_empty "a"), (forward_transformed_value "a", "HookA"), + // CHECK: HookB(AutoGeneratedList_b (not_empty "b"), (forward_transformed_value "b", "HookB"))) ]>; Modified: llvm/trunk/test/LLVMC/ForwardValue.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/LLVMC/ForwardValue.td?rev=91420&r1=91419&r2=91420&view=diff ============================================================================== --- llvm/trunk/test/LLVMC/ForwardValue.td (original) +++ llvm/trunk/test/LLVMC/ForwardValue.td Tue Dec 15 01:21:14 2009 @@ -1,8 +1,7 @@ // Check that forward_value works. // The dummy tool and graph are required to silence warnings. // RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t -// RUN: grep {vec.push_back\(AutoGeneratedParameter_a\)} %t -// RUN: grep {std::copy\(AutoGeneratedList_b.begin\(\)} %t +// RUN: FileCheck -input-file %t %s // RUN: %compile_cxx -fexceptions -x c++ %t include "llvm/CompilerDriver/Common.td" @@ -15,7 +14,9 @@ (in_language "dummy"), (out_language "dummy"), (actions (case + // CHECK: vec.push_back(AutoGeneratedParameter_a) (not_empty "a"), (forward_value "a"), + // CHECK: std::copy(AutoGeneratedList_b.begin() (not_empty "b"), (forward_value "b"))) ]>; Modified: llvm/trunk/test/LLVMC/HookWithArguments.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/LLVMC/HookWithArguments.td?rev=91420&r1=91419&r2=91420&view=diff ============================================================================== --- llvm/trunk/test/LLVMC/HookWithArguments.td (original) +++ llvm/trunk/test/LLVMC/HookWithArguments.td Tue Dec 15 01:21:14 2009 @@ -1,13 +1,15 @@ // Check that hooks with arguments work. // RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t -// RUN: grep {Hook(const char\\* Arg0, const char\\* Arg1, const char\\* Arg2);} %t | count 1 -// RUN: grep "/path" %t | count 1 -// RUN: grep "VARIABLE" %t | count 1 -// RUN: grep "/2path" %t | count 1 +// RUN: FileCheck -input-file %t %s // RUN: %compile_cxx -fexceptions -x c++ %t include "llvm/CompilerDriver/Common.td" +// CHECK: Hook(const char* Arg0, const char* Arg1, const char* Arg2); +// CHECK: std::getenv("VARIABLE") +// CHECK: "/2path" +// CHECK: "/path" + def dummy_tool : Tool<[ (cmd_line "$CALL(Hook, 'Arg1', 'Arg2', 'Arg3 Arg3Cont')/path arg1 $ENV(VARIABLE)/2path arg2 $INFILE"), (in_language "dummy"), Modified: llvm/trunk/test/LLVMC/HookWithInFile.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/LLVMC/HookWithInFile.td?rev=91420&r1=91419&r2=91420&view=diff ============================================================================== --- llvm/trunk/test/LLVMC/HookWithInFile.td (original) +++ llvm/trunk/test/LLVMC/HookWithInFile.td Tue Dec 15 01:21:14 2009 @@ -1,11 +1,12 @@ // Check that a hook can be given $INFILE as an argument. // RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t -// RUN: grep Hook\\(inFile.c_str\\(\\)\\) %t | count 1 +// RUN: FileCheck -input-file %t %s // RUN: %compile_cxx -fexceptions -x c++ %t include "llvm/CompilerDriver/Common.td" def dummy_tool : Tool<[ +// CHECK: Hook(inFile.c_str()) (cmd_line "$CALL(Hook, '$INFILE')/path $INFILE"), (in_language "dummy"), (out_language "dummy") Modified: llvm/trunk/test/LLVMC/Init.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/LLVMC/Init.td?rev=91420&r1=91419&r2=91420&view=diff ============================================================================== --- llvm/trunk/test/LLVMC/Init.td (original) +++ llvm/trunk/test/LLVMC/Init.td Tue Dec 15 01:21:14 2009 @@ -1,13 +1,14 @@ // Check that (init true/false) and (init "str") work. // RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t -// RUN: grep cl::init(\\"some-string\\") %t | count 1 -// RUN: grep cl::init(true) %t | count 1 +// RUN: FileCheck -input-file %t %s // RUN: %compile_cxx -fexceptions -x c++ %t include "llvm/CompilerDriver/Common.td" def OptList : OptionList<[ +// CHECK: cl::init(true) (switch_option "dummy1", (help "none"), (init true)), +// CHECK: cl::init("some-string") (parameter_option "dummy2", (help "none"), (init "some-string")) ]>; Modified: llvm/trunk/test/LLVMC/MultiValuedOption.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/LLVMC/MultiValuedOption.td?rev=91420&r1=91419&r2=91420&view=diff ============================================================================== --- llvm/trunk/test/LLVMC/MultiValuedOption.td (original) +++ llvm/trunk/test/LLVMC/MultiValuedOption.td Tue Dec 15 01:21:14 2009 @@ -1,12 +1,13 @@ // Check that multivalued options work. // The dummy tool and graph are required to silence warnings. // RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t -// RUN: grep cl::multi_val(2) %t | count 1 +// RUN: FileCheck -input-file %t %s // RUN: %compile_cxx -fexceptions -x c++ %t include "llvm/CompilerDriver/Common.td" def OptList : OptionList<[ + // CHECK: cl::multi_val(2) (prefix_list_option "foo", (multi_val 2)), (parameter_list_option "baz", (multi_val 2), (extern))]>; Modified: llvm/trunk/test/LLVMC/NoActions.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/LLVMC/NoActions.td?rev=91420&r1=91419&r2=91420&view=diff ============================================================================== --- llvm/trunk/test/LLVMC/NoActions.td (original) +++ llvm/trunk/test/LLVMC/NoActions.td Tue Dec 15 01:21:14 2009 @@ -1,10 +1,11 @@ // Check that tools without associated actions are accepted. // RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t -// RUN: grep dummy_tool %t +// RUN: FileCheck -input-file %t %s // RUN: %compile_cxx -fexceptions -x c++ %t include "llvm/CompilerDriver/Common.td" +// CHECK: class dummy_tool : public Tool { def dummy_tool : Tool<[ (cmd_line "dummy_cmd $INFILE"), (in_language "dummy"), Modified: llvm/trunk/test/LLVMC/OneOrMore.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/LLVMC/OneOrMore.td?rev=91420&r1=91419&r2=91420&view=diff ============================================================================== --- llvm/trunk/test/LLVMC/OneOrMore.td (original) +++ llvm/trunk/test/LLVMC/OneOrMore.td Tue Dec 15 01:21:14 2009 @@ -1,14 +1,15 @@ // Check that (one_or_more) and (zero_or_one) properties work. // The dummy tool and graph are required to silence warnings. // RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t -// RUN: grep cl::Optional %t | count 1 -// RUN: grep cl::OneOrMore %t | count 1 +// RUN: FileCheck -input-file %t %s // RUN: %compile_cxx -fexceptions -x c++ %t include "llvm/CompilerDriver/Common.td" def OptList : OptionList<[ + // CHECK: cl::OneOrMore (prefix_list_option "foo", (one_or_more)), + // CHECK: cl::Optional (parameter_list_option "baz", (optional))]>; def dummy_tool : Tool<[ Modified: llvm/trunk/test/LLVMC/OptionPreprocessor.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/LLVMC/OptionPreprocessor.td?rev=91420&r1=91419&r2=91420&view=diff ============================================================================== --- llvm/trunk/test/LLVMC/OptionPreprocessor.td (original) +++ llvm/trunk/test/LLVMC/OptionPreprocessor.td Tue Dec 15 01:21:14 2009 @@ -1,8 +1,6 @@ // Test for the OptionPreprocessor and any*. -// RUN: ignore tblgen -I %p/../../include --gen-llvmc %s -o %t -// RUN: grep W1 %t -// RUN: grep W2 %t -// RUN: grep W3 %t +// RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t +// RUN: FileCheck -input-file %t %s // RUN: %compile_cxx -fexceptions -x c++ %t include "llvm/CompilerDriver/Common.td" @@ -17,10 +15,14 @@ ]>; def Preprocess : OptionPreprocessor< -(case (and (switch_on "foo"), (any_switch_on ["bar", "baz"])), +(case + // CHECK: W1 + (and (switch_on "foo"), (any_switch_on ["bar", "baz"])), (warning "W1"), + // CHECK: W2 (and (switch_on ["foo", "bar"]), (any_empty ["foo_p", "bar_p"])), (warning "W2"), + // CHECK: W3 (and (empty ["foo_p", "bar_p"]), (any_not_empty ["baz_p"])), (warning "W3")) >; Modified: llvm/trunk/test/LLVMC/OutputSuffixHook.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/LLVMC/OutputSuffixHook.td?rev=91420&r1=91419&r2=91420&view=diff ============================================================================== --- llvm/trunk/test/LLVMC/OutputSuffixHook.td (original) +++ llvm/trunk/test/LLVMC/OutputSuffixHook.td Tue Dec 15 01:21:14 2009 @@ -1,6 +1,6 @@ // Check that hooks can be invoked from 'output_suffix'. // RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t -// RUN: FileCheck -input-file=%t %s +// RUN: FileCheck -input-file %t %s // RUN: %compile_cxx -fexceptions -x c++ %t // XFAIL: * Modified: llvm/trunk/utils/TableGen/LLVMCConfigurationEmitter.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/utils/TableGen/LLVMCConfigurationEmitter.cpp?rev=91420&r1=91419&r2=91420&view=diff ============================================================================== --- llvm/trunk/utils/TableGen/LLVMCConfigurationEmitter.cpp (original) +++ llvm/trunk/utils/TableGen/LLVMCConfigurationEmitter.cpp Tue Dec 15 01:21:14 2009 @@ -2169,7 +2169,7 @@ else O << "Tool"; - O << "{\nprivate:\n"; + O << " {\nprivate:\n"; O.indent(Indent1) << "static const char* InputLanguages_[];\n\n"; O << "public:\n"; From sabre at nondot.org Tue Dec 15 01:26:43 2009 From: sabre at nondot.org (Chris Lattner) Date: Tue, 15 Dec 2009 07:26:43 -0000 Subject: [llvm-commits] [llvm] r91421 - in /llvm/trunk: include/llvm/ADT/ include/llvm/Analysis/ include/llvm/Bitcode/ include/llvm/CodeGen/ include/llvm/Support/ lib/Analysis/IPA/ lib/Bitcode/Reader/ lib/CodeGen/AsmPrinter/ lib/ExecutionEngine/JIT/ lib/Transforms/Scalar/ lib/Transforms/Utils/ lib/VMCore/ Message-ID: <200912150726.nBF7Qipr007844@zion.cs.uiuc.edu> Author: lattner Date: Tue Dec 15 01:26:43 2009 New Revision: 91421 URL: http://llvm.org/viewvc/llvm-project?rev=91421&view=rev Log: Remove isPod() from DenseMapInfo, splitting it out to its own isPodLike type trait. This is a generally useful type trait for more than just DenseMap, and we really care about whether something acts like a pod, not whether it really is a pod. Modified: llvm/trunk/include/llvm/ADT/DenseMap.h llvm/trunk/include/llvm/ADT/DenseMapInfo.h llvm/trunk/include/llvm/ADT/ImmutableList.h llvm/trunk/include/llvm/ADT/PointerIntPair.h llvm/trunk/include/llvm/ADT/ValueMap.h llvm/trunk/include/llvm/Analysis/AliasSetTracker.h llvm/trunk/include/llvm/Bitcode/Deserialize.h llvm/trunk/include/llvm/CodeGen/SelectionDAGNodes.h llvm/trunk/include/llvm/CodeGen/SlotIndexes.h llvm/trunk/include/llvm/Support/DebugLoc.h llvm/trunk/include/llvm/Support/ValueHandle.h llvm/trunk/include/llvm/Support/type_traits.h llvm/trunk/lib/Analysis/IPA/Andersens.cpp llvm/trunk/lib/Bitcode/Reader/Deserialize.cpp llvm/trunk/lib/CodeGen/AsmPrinter/DwarfException.h llvm/trunk/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp llvm/trunk/lib/Transforms/Scalar/GVN.cpp llvm/trunk/lib/Transforms/Scalar/InstructionCombining.cpp llvm/trunk/lib/Transforms/Scalar/SCCVN.cpp llvm/trunk/lib/Transforms/Utils/PromoteMemoryToRegister.cpp llvm/trunk/lib/VMCore/LLVMContextImpl.h Modified: llvm/trunk/include/llvm/ADT/DenseMap.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/ADT/DenseMap.h?rev=91421&r1=91420&r2=91421&view=diff ============================================================================== --- llvm/trunk/include/llvm/ADT/DenseMap.h (original) +++ llvm/trunk/include/llvm/ADT/DenseMap.h Tue Dec 15 01:26:43 2009 @@ -217,7 +217,8 @@ private: void CopyFrom(const DenseMap& other) { - if (NumBuckets != 0 && (!KeyInfoT::isPod() || !ValueInfoT::isPod())) { + if (NumBuckets != 0 && + (!isPodLike::value || !isPodLike::value)) { const KeyT EmptyKey = getEmptyKey(), TombstoneKey = getTombstoneKey(); for (BucketT *P = Buckets, *E = Buckets+NumBuckets; P != E; ++P) { if (!KeyInfoT::isEqual(P->first, EmptyKey) && @@ -239,7 +240,7 @@ Buckets = static_cast(operator new(sizeof(BucketT) * other.NumBuckets)); - if (KeyInfoT::isPod() && ValueInfoT::isPod()) + if (isPodLike::value && isPodLike::value) memcpy(Buckets, other.Buckets, other.NumBuckets * sizeof(BucketT)); else for (size_t i = 0; i < other.NumBuckets; ++i) { Modified: llvm/trunk/include/llvm/ADT/DenseMapInfo.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/ADT/DenseMapInfo.h?rev=91421&r1=91420&r2=91421&view=diff ============================================================================== --- llvm/trunk/include/llvm/ADT/DenseMapInfo.h (original) +++ llvm/trunk/include/llvm/ADT/DenseMapInfo.h Tue Dec 15 01:26:43 2009 @@ -15,7 +15,7 @@ #define LLVM_ADT_DENSEMAPINFO_H #include "llvm/Support/PointerLikeTypeTraits.h" -#include +#include "llvm/Support/type_traits.h" namespace llvm { @@ -25,7 +25,6 @@ //static inline T getTombstoneKey(); //static unsigned getHashValue(const T &Val); //static bool isEqual(const T &LHS, const T &RHS); - //static bool isPod() }; // Provide DenseMapInfo for all pointers. @@ -46,7 +45,6 @@ (unsigned((uintptr_t)PtrVal) >> 9); } static bool isEqual(const T *LHS, const T *RHS) { return LHS == RHS; } - static bool isPod() { return true; } }; // Provide DenseMapInfo for chars. @@ -54,7 +52,6 @@ static inline char getEmptyKey() { return ~0; } static inline char getTombstoneKey() { return ~0 - 1; } static unsigned getHashValue(const char& Val) { return Val * 37; } - static bool isPod() { return true; } static bool isEqual(const char &LHS, const char &RHS) { return LHS == RHS; } @@ -65,7 +62,6 @@ static inline unsigned getEmptyKey() { return ~0; } static inline unsigned getTombstoneKey() { return ~0U - 1; } static unsigned getHashValue(const unsigned& Val) { return Val * 37; } - static bool isPod() { return true; } static bool isEqual(const unsigned& LHS, const unsigned& RHS) { return LHS == RHS; } @@ -78,7 +74,6 @@ static unsigned getHashValue(const unsigned long& Val) { return (unsigned)(Val * 37UL); } - static bool isPod() { return true; } static bool isEqual(const unsigned long& LHS, const unsigned long& RHS) { return LHS == RHS; } @@ -91,7 +86,6 @@ static unsigned getHashValue(const unsigned long long& Val) { return (unsigned)(Val * 37ULL); } - static bool isPod() { return true; } static bool isEqual(const unsigned long long& LHS, const unsigned long long& RHS) { return LHS == RHS; @@ -127,7 +121,6 @@ return (unsigned)key; } static bool isEqual(const Pair& LHS, const Pair& RHS) { return LHS == RHS; } - static bool isPod() { return FirstInfo::isPod() && SecondInfo::isPod(); } }; } // end namespace llvm Modified: llvm/trunk/include/llvm/ADT/ImmutableList.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/ADT/ImmutableList.h?rev=91421&r1=91420&r2=91421&view=diff ============================================================================== --- llvm/trunk/include/llvm/ADT/ImmutableList.h (original) +++ llvm/trunk/include/llvm/ADT/ImmutableList.h Tue Dec 15 01:26:43 2009 @@ -211,9 +211,12 @@ static bool isEqual(ImmutableList X1, ImmutableList X2) { return X1 == X2; } - static bool isPod() { return true; } }; +template struct isPodLike; +template +struct isPodLike > { static const bool value = true; }; + } // end llvm namespace #endif Modified: llvm/trunk/include/llvm/ADT/PointerIntPair.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/ADT/PointerIntPair.h?rev=91421&r1=91420&r2=91421&view=diff ============================================================================== --- llvm/trunk/include/llvm/ADT/PointerIntPair.h (original) +++ llvm/trunk/include/llvm/ADT/PointerIntPair.h Tue Dec 15 01:26:43 2009 @@ -106,6 +106,12 @@ bool operator>=(const PointerIntPair &RHS) const {return Value >= RHS.Value;} }; +template struct isPodLike; +template +struct isPodLike > { + static const bool value = true; +}; + // Provide specialization of DenseMapInfo for PointerIntPair. template struct DenseMapInfo > { @@ -125,7 +131,6 @@ return unsigned(IV) ^ unsigned(IV >> 9); } static bool isEqual(const Ty &LHS, const Ty &RHS) { return LHS == RHS; } - static bool isPod() { return true; } }; // Teach SmallPtrSet that PointerIntPair is "basically a pointer". Modified: llvm/trunk/include/llvm/ADT/ValueMap.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/ADT/ValueMap.h?rev=91421&r1=91420&r2=91421&view=diff ============================================================================== --- llvm/trunk/include/llvm/ADT/ValueMap.h (original) +++ llvm/trunk/include/llvm/ADT/ValueMap.h Tue Dec 15 01:26:43 2009 @@ -250,6 +250,12 @@ } }; + +template +struct isPodLike > { + static const bool value = true; +}; + template struct DenseMapInfo > { typedef ValueMapCallbackVH VH; @@ -267,7 +273,6 @@ static bool isEqual(const VH &LHS, const VH &RHS) { return LHS == RHS; } - static bool isPod() { return false; } }; Modified: llvm/trunk/include/llvm/Analysis/AliasSetTracker.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Analysis/AliasSetTracker.h?rev=91421&r1=91420&r2=91421&view=diff ============================================================================== --- llvm/trunk/include/llvm/Analysis/AliasSetTracker.h (original) +++ llvm/trunk/include/llvm/Analysis/AliasSetTracker.h Tue Dec 15 01:26:43 2009 @@ -259,11 +259,9 @@ ASTCallbackVH(Value *V, AliasSetTracker *AST = 0); ASTCallbackVH &operator=(Value *V); }; - /// ASTCallbackVHDenseMapInfo - Traits to tell DenseMap that ASTCallbackVH - /// is not a POD (it needs its destructor called). - struct ASTCallbackVHDenseMapInfo : public DenseMapInfo { - static bool isPod() { return false; } - }; + /// ASTCallbackVHDenseMapInfo - Traits to tell DenseMap that tell us how to + /// compare and hash the value handle. + struct ASTCallbackVHDenseMapInfo : public DenseMapInfo {}; AliasAnalysis &AA; ilist AliasSets; Modified: llvm/trunk/include/llvm/Bitcode/Deserialize.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Bitcode/Deserialize.h?rev=91421&r1=91420&r2=91421&view=diff ============================================================================== --- llvm/trunk/include/llvm/Bitcode/Deserialize.h (original) +++ llvm/trunk/include/llvm/Bitcode/Deserialize.h Tue Dec 15 01:26:43 2009 @@ -25,53 +25,52 @@ namespace llvm { +struct BPNode { + BPNode* Next; + uintptr_t& PtrRef; + + BPNode(BPNode* n, uintptr_t& pref) + : Next(n), PtrRef(pref) { + PtrRef = 0; + } +}; + +struct BPEntry { + union { BPNode* Head; void* Ptr; }; + BPEntry() : Head(NULL) {} + void SetPtr(BPNode*& FreeList, void* P); +}; + +class BPKey { + unsigned Raw; +public: + BPKey(SerializedPtrID PtrId) : Raw(PtrId << 1) { assert (PtrId > 0); } + BPKey(unsigned code, unsigned) : Raw(code) {} + + void MarkFinal() { Raw |= 0x1; } + bool hasFinalPtr() const { return Raw & 0x1 ? true : false; } + SerializedPtrID getID() const { return Raw >> 1; } + + static inline BPKey getEmptyKey() { return BPKey(0,0); } + static inline BPKey getTombstoneKey() { return BPKey(1,0); } + static inline unsigned getHashValue(const BPKey& K) { return K.Raw & ~0x1; } + + static bool isEqual(const BPKey& K1, const BPKey& K2) { + return (K1.Raw ^ K2.Raw) & ~0x1 ? false : true; + } +}; + +template <> +struct isPodLike { static const bool value = true; }; +template <> +struct isPodLike { static const bool value = true; }; + class Deserializer { //===----------------------------------------------------------===// // Internal type definitions. //===----------------------------------------------------------===// - struct BPNode { - BPNode* Next; - uintptr_t& PtrRef; - - BPNode(BPNode* n, uintptr_t& pref) - : Next(n), PtrRef(pref) { - PtrRef = 0; - } - }; - - struct BPEntry { - union { BPNode* Head; void* Ptr; }; - - BPEntry() : Head(NULL) {} - - static inline bool isPod() { return true; } - - void SetPtr(BPNode*& FreeList, void* P); - }; - - class BPKey { - unsigned Raw; - - public: - BPKey(SerializedPtrID PtrId) : Raw(PtrId << 1) { assert (PtrId > 0); } - BPKey(unsigned code, unsigned) : Raw(code) {} - - void MarkFinal() { Raw |= 0x1; } - bool hasFinalPtr() const { return Raw & 0x1 ? true : false; } - SerializedPtrID getID() const { return Raw >> 1; } - - static inline BPKey getEmptyKey() { return BPKey(0,0); } - static inline BPKey getTombstoneKey() { return BPKey(1,0); } - static inline unsigned getHashValue(const BPKey& K) { return K.Raw & ~0x1; } - - static bool isEqual(const BPKey& K1, const BPKey& K2) { - return (K1.Raw ^ K2.Raw) & ~0x1 ? false : true; - } - - static bool isPod() { return true; } - }; typedef llvm::DenseMap MapTy; Modified: llvm/trunk/include/llvm/CodeGen/SelectionDAGNodes.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/SelectionDAGNodes.h?rev=91421&r1=91420&r2=91421&view=diff ============================================================================== --- llvm/trunk/include/llvm/CodeGen/SelectionDAGNodes.h (original) +++ llvm/trunk/include/llvm/CodeGen/SelectionDAGNodes.h Tue Dec 15 01:26:43 2009 @@ -891,8 +891,9 @@ static bool isEqual(const SDValue &LHS, const SDValue &RHS) { return LHS == RHS; } - static bool isPod() { return true; } }; +template <> struct isPodLike { static const bool value = true; }; + /// simplify_type specializations - Allow casting operators to work directly on /// SDValues as if they were SDNode*'s. Modified: llvm/trunk/include/llvm/CodeGen/SlotIndexes.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/SlotIndexes.h?rev=91421&r1=91420&r2=91421&view=diff ============================================================================== --- llvm/trunk/include/llvm/CodeGen/SlotIndexes.h (original) +++ llvm/trunk/include/llvm/CodeGen/SlotIndexes.h Tue Dec 15 01:26:43 2009 @@ -329,6 +329,7 @@ }; /// DenseMapInfo specialization for SlotIndex. + /// TODO: Not a POD? template <> struct DenseMapInfo { static inline SlotIndex getEmptyKey() { @@ -343,7 +344,6 @@ static inline bool isEqual(const SlotIndex &LHS, const SlotIndex &RHS) { return (LHS == RHS); } - static inline bool isPod() { return false; } }; inline raw_ostream& operator<<(raw_ostream &os, SlotIndex li) { Modified: llvm/trunk/include/llvm/Support/DebugLoc.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Support/DebugLoc.h?rev=91421&r1=91420&r2=91421&view=diff ============================================================================== --- llvm/trunk/include/llvm/Support/DebugLoc.h (original) +++ llvm/trunk/include/llvm/Support/DebugLoc.h Tue Dec 15 01:26:43 2009 @@ -66,7 +66,7 @@ }; // Specialize DenseMapInfo for DebugLocTuple. - template<> struct DenseMapInfo { + template<> struct DenseMapInfo { static inline DebugLocTuple getEmptyKey() { return DebugLocTuple(0, 0, ~0U, ~0U); } @@ -85,9 +85,9 @@ LHS.Line == RHS.Line && LHS.Col == RHS.Col; } - - static bool isPod() { return true; } }; + template <> struct isPodLike {static const bool value = true;}; + /// DebugLocTracker - This class tracks debug location information. /// Modified: llvm/trunk/include/llvm/Support/ValueHandle.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Support/ValueHandle.h?rev=91421&r1=91420&r2=91421&view=diff ============================================================================== --- llvm/trunk/include/llvm/Support/ValueHandle.h (original) +++ llvm/trunk/include/llvm/Support/ValueHandle.h Tue Dec 15 01:26:43 2009 @@ -254,15 +254,18 @@ static bool isEqual(const AssertingVH &LHS, const AssertingVH &RHS) { return LHS == RHS; } - static bool isPod() { +}; + +template +struct isPodLike > { #ifdef NDEBUG - return true; + static const bool value = true; #else - return false; + static const bool value = false; #endif - } }; + /// TrackingVH - This is a value handle that tracks a Value (or Value subclass), /// even across RAUW operations. /// Modified: llvm/trunk/include/llvm/Support/type_traits.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Support/type_traits.h?rev=91421&r1=91420&r2=91421&view=diff ============================================================================== --- llvm/trunk/include/llvm/Support/type_traits.h (original) +++ llvm/trunk/include/llvm/Support/type_traits.h Tue Dec 15 01:26:43 2009 @@ -17,6 +17,8 @@ #ifndef LLVM_SUPPORT_TYPE_TRAITS_H #define LLVM_SUPPORT_TYPE_TRAITS_H +#include + // This is actually the conforming implementation which works with abstract // classes. However, enough compilers have trouble with it that most will use // the one in boost/type_traits/object_traits.hpp. This implementation actually @@ -24,6 +26,33 @@ namespace llvm { +/// isPodLike - This is a type trait that is used to determine whether a given +/// type can be copied around with memcpy instead of running ctors etc. +template +struct isPodLike { + static const bool value = false; +}; + +// pointers are all pod-like. +template +struct isPodLike { static const bool value = true; }; + +// builtin types are pod-like as well. +// There is probably a much better way to do this. +template <> struct isPodLike { static const bool value = true; }; +template <> struct isPodLike { static const bool value = true; }; +template <> struct isPodLike { static const bool value = true; }; +template <> struct isPodLike { + static const bool value = true; +}; + + +// pairs are pod-like if their elements are. +template +struct isPodLike > { + static const bool value = isPodLike::value & isPodLike::value; +}; + namespace dont_use { // These two functions should never be used. They are helpers to Modified: llvm/trunk/lib/Analysis/IPA/Andersens.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/IPA/Andersens.cpp?rev=91421&r1=91420&r2=91421&view=diff ============================================================================== --- llvm/trunk/lib/Analysis/IPA/Andersens.cpp (original) +++ llvm/trunk/lib/Analysis/IPA/Andersens.cpp Tue Dec 15 01:26:43 2009 @@ -121,8 +121,6 @@ return *LHS == *RHS; } - - static bool isPod() { return true; } }; class Andersens : public ModulePass, public AliasAnalysis, Modified: llvm/trunk/lib/Bitcode/Reader/Deserialize.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Bitcode/Reader/Deserialize.cpp?rev=91421&r1=91420&r2=91421&view=diff ============================================================================== --- llvm/trunk/lib/Bitcode/Reader/Deserialize.cpp (original) +++ llvm/trunk/lib/Bitcode/Reader/Deserialize.cpp Tue Dec 15 01:26:43 2009 @@ -413,7 +413,7 @@ return GetFinalPtr(E); } -void Deserializer::BPEntry::SetPtr(BPNode*& FreeList, void* P) { +void BPEntry::SetPtr(BPNode*& FreeList, void* P) { BPNode* Last = NULL; for (BPNode* N = Head; N != NULL; N=N->Next) { Modified: llvm/trunk/lib/CodeGen/AsmPrinter/DwarfException.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/AsmPrinter/DwarfException.h?rev=91421&r1=91420&r2=91421&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/AsmPrinter/DwarfException.h (original) +++ llvm/trunk/lib/CodeGen/AsmPrinter/DwarfException.h Tue Dec 15 01:26:43 2009 @@ -119,7 +119,6 @@ static inline unsigned getTombstoneKey() { return -2U; } static unsigned getHashValue(const unsigned &Key) { return Key; } static bool isEqual(unsigned LHS, unsigned RHS) { return LHS == RHS; } - static bool isPod() { return true; } }; /// PadRange - Structure holding a try-range and the associated landing pad. Modified: llvm/trunk/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp?rev=91421&r1=91420&r2=91421&view=diff ============================================================================== --- llvm/trunk/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp (original) +++ llvm/trunk/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp Tue Dec 15 01:26:43 2009 @@ -175,7 +175,6 @@ static inline unsigned getTombstoneKey() { return -2U; } static unsigned getHashValue(const unsigned &Key) { return Key; } static bool isEqual(unsigned LHS, unsigned RHS) { return LHS == RHS; } - static bool isPod() { return true; } }; /// ActionEntry - Structure describing an entry in the actions table. Modified: llvm/trunk/lib/Transforms/Scalar/GVN.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/GVN.cpp?rev=91421&r1=91420&r2=91421&view=diff ============================================================================== --- llvm/trunk/lib/Transforms/Scalar/GVN.cpp (original) +++ llvm/trunk/lib/Transforms/Scalar/GVN.cpp Tue Dec 15 01:26:43 2009 @@ -190,8 +190,11 @@ static bool isEqual(const Expression &LHS, const Expression &RHS) { return LHS == RHS; } - static bool isPod() { return true; } }; + +template <> +struct isPodLike { static const bool value = true; }; + } //===----------------------------------------------------------------------===// Modified: llvm/trunk/lib/Transforms/Scalar/InstructionCombining.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/InstructionCombining.cpp?rev=91421&r1=91420&r2=91421&view=diff ============================================================================== --- llvm/trunk/lib/Transforms/Scalar/InstructionCombining.cpp (original) +++ llvm/trunk/lib/Transforms/Scalar/InstructionCombining.cpp Tue Dec 15 01:26:43 2009 @@ -11200,8 +11200,9 @@ return LHS.PN == RHS.PN && LHS.Shift == RHS.Shift && LHS.Width == RHS.Width; } - static bool isPod() { return true; } }; + template <> + struct isPodLike { static const bool value = true; }; } Modified: llvm/trunk/lib/Transforms/Scalar/SCCVN.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/SCCVN.cpp?rev=91421&r1=91420&r2=91421&view=diff ============================================================================== --- llvm/trunk/lib/Transforms/Scalar/SCCVN.cpp (original) +++ llvm/trunk/lib/Transforms/Scalar/SCCVN.cpp Tue Dec 15 01:26:43 2009 @@ -154,8 +154,10 @@ static bool isEqual(const Expression &LHS, const Expression &RHS) { return LHS == RHS; } - static bool isPod() { return true; } }; +template <> +struct isPodLike { static const bool value = true; }; + } //===----------------------------------------------------------------------===// Modified: llvm/trunk/lib/Transforms/Utils/PromoteMemoryToRegister.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Utils/PromoteMemoryToRegister.cpp?rev=91421&r1=91420&r2=91421&view=diff ============================================================================== --- llvm/trunk/lib/Transforms/Utils/PromoteMemoryToRegister.cpp (original) +++ llvm/trunk/lib/Transforms/Utils/PromoteMemoryToRegister.cpp Tue Dec 15 01:26:43 2009 @@ -55,7 +55,6 @@ static bool isEqual(const EltTy &LHS, const EltTy &RHS) { return LHS == RHS; } - static bool isPod() { return true; } }; } Modified: llvm/trunk/lib/VMCore/LLVMContextImpl.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/VMCore/LLVMContextImpl.h?rev=91421&r1=91420&r2=91421&view=diff ============================================================================== --- llvm/trunk/lib/VMCore/LLVMContextImpl.h (original) +++ llvm/trunk/lib/VMCore/LLVMContextImpl.h Tue Dec 15 01:26:43 2009 @@ -62,7 +62,6 @@ static bool isEqual(const KeyTy &LHS, const KeyTy &RHS) { return LHS == RHS; } - static bool isPod() { return false; } }; struct DenseMapAPFloatKeyInfo { @@ -89,7 +88,6 @@ static bool isEqual(const KeyTy &LHS, const KeyTy &RHS) { return LHS == RHS; } - static bool isPod() { return false; } }; class LLVMContextImpl { From sabre at nondot.org Tue Dec 15 01:27:58 2009 From: sabre at nondot.org (Chris Lattner) Date: Tue, 15 Dec 2009 07:27:58 -0000 Subject: [llvm-commits] [llvm] r91423 - /llvm/trunk/include/llvm/CodeGen/SlotIndexes.h Message-ID: <200912150727.nBF7Rwru007913@zion.cs.uiuc.edu> Author: lattner Date: Tue Dec 15 01:27:58 2009 New Revision: 91423 URL: http://llvm.org/viewvc/llvm-project?rev=91423&view=rev Log: Lang verified that SlotIndex is "pod like" even though it isn't a pod. Modified: llvm/trunk/include/llvm/CodeGen/SlotIndexes.h Modified: llvm/trunk/include/llvm/CodeGen/SlotIndexes.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/SlotIndexes.h?rev=91423&r1=91422&r2=91423&view=diff ============================================================================== --- llvm/trunk/include/llvm/CodeGen/SlotIndexes.h (original) +++ llvm/trunk/include/llvm/CodeGen/SlotIndexes.h Tue Dec 15 01:27:58 2009 @@ -329,7 +329,6 @@ }; /// DenseMapInfo specialization for SlotIndex. - /// TODO: Not a POD? template <> struct DenseMapInfo { static inline SlotIndex getEmptyKey() { @@ -345,6 +344,9 @@ return (LHS == RHS); } }; + + template <> struct isPodLike { static const bool value = true; }; + inline raw_ostream& operator<<(raw_ostream &os, SlotIndex li) { li.print(os); From sabre at nondot.org Tue Dec 15 01:40:44 2009 From: sabre at nondot.org (Chris Lattner) Date: Tue, 15 Dec 2009 07:40:44 -0000 Subject: [llvm-commits] [llvm] r91425 - in /llvm/trunk/include/llvm: ADT/SmallVector.h Support/type_traits.h Message-ID: <200912150740.nBF7eipD008398@zion.cs.uiuc.edu> Author: lattner Date: Tue Dec 15 01:40:44 2009 New Revision: 91425 URL: http://llvm.org/viewvc/llvm-project?rev=91425&view=rev Log: improve isPodLike to know that all non-class types are pod. Modified: llvm/trunk/include/llvm/ADT/SmallVector.h llvm/trunk/include/llvm/Support/type_traits.h Modified: llvm/trunk/include/llvm/ADT/SmallVector.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/ADT/SmallVector.h?rev=91425&r1=91424&r2=91425&view=diff ============================================================================== --- llvm/trunk/include/llvm/ADT/SmallVector.h (original) +++ llvm/trunk/include/llvm/ADT/SmallVector.h Tue Dec 15 01:40:44 2009 @@ -46,6 +46,13 @@ namespace llvm { +/// SmallVectorBase - This is all the non-templated stuff common to all +/// SmallVectors. +class SmallVectorBase { + +}; + + /// SmallVectorImpl - This class consists of common code factored out of the /// SmallVector class to reduce code duplication based on the SmallVector 'N' /// template parameter. Modified: llvm/trunk/include/llvm/Support/type_traits.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Support/type_traits.h?rev=91425&r1=91424&r2=91425&view=diff ============================================================================== --- llvm/trunk/include/llvm/Support/type_traits.h (original) +++ llvm/trunk/include/llvm/Support/type_traits.h Tue Dec 15 01:40:44 2009 @@ -25,33 +25,6 @@ // works with VC7.0, but other interactions seem to fail when we use it. namespace llvm { - -/// isPodLike - This is a type trait that is used to determine whether a given -/// type can be copied around with memcpy instead of running ctors etc. -template -struct isPodLike { - static const bool value = false; -}; - -// pointers are all pod-like. -template -struct isPodLike { static const bool value = true; }; - -// builtin types are pod-like as well. -// There is probably a much better way to do this. -template <> struct isPodLike { static const bool value = true; }; -template <> struct isPodLike { static const bool value = true; }; -template <> struct isPodLike { static const bool value = true; }; -template <> struct isPodLike { - static const bool value = true; -}; - - -// pairs are pod-like if their elements are. -template -struct isPodLike > { - static const bool value = isPodLike::value & isPodLike::value; -}; namespace dont_use { @@ -77,6 +50,23 @@ public: enum { value = sizeof(char) == sizeof(dont_use::is_class_helper(0)) }; }; + + +/// isPodLike - This is a type trait that is used to determine whether a given +/// type can be copied around with memcpy instead of running ctors etc. +template +struct isPodLike { + // If we don't know anything else, we can (at least) assume that all non-class + // types are PODs. + static const bool value = !is_class::value; +}; + +// std::pair's are pod-like if their elements are. +template +struct isPodLike > { + static const bool value = isPodLike::value & isPodLike::value; +}; + /// \brief Metafunction that determines whether the two given types are /// equivalent. From anton at korobeynikov.info Tue Dec 15 02:03:34 2009 From: anton at korobeynikov.info (Anton Korobeynikov) Date: Tue, 15 Dec 2009 11:03:34 +0300 Subject: [llvm-commits] [llvm] r91416 - /llvm/trunk/include/llvm/Support/Compiler.h In-Reply-To: <200912150614.nBF6EXLV005148@zion.cs.uiuc.edu> References: <200912150614.nBF6EXLV005148@zion.cs.uiuc.edu> Message-ID: > +#else > +// TODO: No idea how to do this with MSVC. > +#define ALWAYS_INLINE __forceinline iirc -- With best regards, Anton Korobeynikov Faculty of Mathematics and Mechanics, Saint Petersburg State University From sabre at nondot.org Tue Dec 15 02:16:40 2009 From: sabre at nondot.org (Chris Lattner) Date: Tue, 15 Dec 2009 00:16:40 -0800 Subject: [llvm-commits] [llvm] r91416 - /llvm/trunk/include/llvm/Support/Compiler.h In-Reply-To: References: <200912150614.nBF6EXLV005148@zion.cs.uiuc.edu> Message-ID: <800B636E-F0C5-4CD0-BAB3-27856352E386@nondot.org> On Dec 15, 2009, at 12:03 AM, Anton Korobeynikov wrote: >> +#else >> +// TODO: No idea how to do this with MSVC. >> +#define ALWAYS_INLINE > __forceinline iirc I'll let someone who can test it do it if they care. This is just an optimization hint, so it doesn't hurt not to. -Chris From sabre at nondot.org Tue Dec 15 02:29:22 2009 From: sabre at nondot.org (Chris Lattner) Date: Tue, 15 Dec 2009 08:29:22 -0000 Subject: [llvm-commits] [llvm] r91426 - /llvm/trunk/include/llvm/ADT/SmallVector.h Message-ID: <200912150829.nBF8TMtU010096@zion.cs.uiuc.edu> Author: lattner Date: Tue Dec 15 02:29:22 2009 New Revision: 91426 URL: http://llvm.org/viewvc/llvm-project?rev=91426&view=rev Log: hoist the begin/end/capacity members and a few trivial methods up into the non-templated SmallVectorBase class. Modified: llvm/trunk/include/llvm/ADT/SmallVector.h Modified: llvm/trunk/include/llvm/ADT/SmallVector.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/ADT/SmallVector.h?rev=91426&r1=91425&r2=91426&view=diff ============================================================================== --- llvm/trunk/include/llvm/ADT/SmallVector.h (original) +++ llvm/trunk/include/llvm/ADT/SmallVector.h Tue Dec 15 02:29:22 2009 @@ -49,24 +49,14 @@ /// SmallVectorBase - This is all the non-templated stuff common to all /// SmallVectors. class SmallVectorBase { - -}; - - -/// SmallVectorImpl - This class consists of common code factored out of the -/// SmallVector class to reduce code duplication based on the SmallVector 'N' -/// template parameter. -template -class SmallVectorImpl { protected: - T *Begin, *End, *Capacity; + void *BeginX, *EndX, *CapacityX; // Allocate raw space for N elements of type T. If T has a ctor or dtor, we // don't want it to be automatically run, so we need to represent the space as // something else. An array of char would work great, but might not be // aligned sufficiently. Instead, we either use GCC extensions, or some // number of union instances for the space, which guarantee maximal alignment. -protected: #ifdef __GNUC__ typedef char U; U FirstEl __attribute__((aligned)); @@ -79,46 +69,65 @@ } FirstEl; #endif // Space after 'FirstEl' is clobbered, do not add any instance vars after it. + +protected: + SmallVectorBase(size_t Size) + : BeginX(&FirstEl), EndX(&FirstEl), CapacityX((char*)&FirstEl+Size) {} + + /// isSmall - Return true if this is a smallvector which has not had dynamic + /// memory allocated for it. + bool isSmall() const { + return BeginX == static_cast(&FirstEl); + } + + +public: + bool empty() const { return BeginX == EndX; } +}; + +/// SmallVectorImpl - This class consists of common code factored out of the +/// SmallVector class to reduce code duplication based on the SmallVector 'N' +/// template parameter. +template +class SmallVectorImpl : public SmallVectorBase { + void setEnd(T *P) { EndX = P; } public: // Default ctor - Initialize to empty. - explicit SmallVectorImpl(unsigned N) - : Begin(reinterpret_cast(&FirstEl)), - End(reinterpret_cast(&FirstEl)), - Capacity(reinterpret_cast(&FirstEl)+N) { + explicit SmallVectorImpl(unsigned N) : SmallVectorBase(N*sizeof(T)) { } ~SmallVectorImpl() { // Destroy the constructed elements in the vector. - destroy_range(Begin, End); + destroy_range(begin(), end()); // If this wasn't grown from the inline copy, deallocate the old space. if (!isSmall()) - operator delete(Begin); + operator delete(begin()); } typedef size_t size_type; typedef ptrdiff_t difference_type; typedef T value_type; - typedef T* iterator; - typedef const T* const_iterator; - - typedef std::reverse_iterator const_reverse_iterator; - typedef std::reverse_iterator reverse_iterator; + typedef T *iterator; + typedef const T *const_iterator; - typedef T& reference; - typedef const T& const_reference; - typedef T* pointer; - typedef const T* const_pointer; + typedef std::reverse_iterator const_reverse_iterator; + typedef std::reverse_iterator reverse_iterator; - bool empty() const { return Begin == End; } - size_type size() const { return End-Begin; } - size_type max_size() const { return size_type(-1) / sizeof(T); } + typedef T &reference; + typedef const T &const_reference; + typedef T *pointer; + typedef const T *const_pointer; // forward iterator creation methods. - iterator begin() { return Begin; } - const_iterator begin() const { return Begin; } - iterator end() { return End; } - const_iterator end() const { return End; } + iterator begin() { return (iterator)BeginX; } + const_iterator begin() const { return (const_iterator)BeginX; } + iterator end() { return (iterator)EndX; } + const_iterator end() const { return (const_iterator)EndX; } +private: + iterator capacity_ptr() { return (iterator)CapacityX; } + const_iterator capacity_ptr() const { return (const_iterator)CapacityX; } +public: // reverse iterator creation methods. reverse_iterator rbegin() { return reverse_iterator(end()); } @@ -126,14 +135,25 @@ reverse_iterator rend() { return reverse_iterator(begin()); } const_reverse_iterator rend() const { return const_reverse_iterator(begin());} - + size_type size() const { return end()-begin(); } + size_type max_size() const { return size_type(-1) / sizeof(T); } + + /// capacity - Return the total number of elements in the currently allocated + /// buffer. + size_t capacity() const { return capacity_ptr() - begin(); } + + /// data - Return a pointer to the vector's buffer, even if empty(). + pointer data() { return pointer(begin()); } + /// data - Return a pointer to the vector's buffer, even if empty(). + const_pointer data() const { return const_pointer(begin()); } + reference operator[](unsigned idx) { - assert(Begin + idx < End); - return Begin[idx]; + assert(begin() + idx < end()); + return begin()[idx]; } const_reference operator[](unsigned idx) const { - assert(Begin + idx < End); - return Begin[idx]; + assert(begin() + idx < end()); + return begin()[idx]; } reference front() { @@ -151,10 +171,10 @@ } void push_back(const_reference Elt) { - if (End < Capacity) { + if (EndX < CapacityX) { Retry: - new (End) T(Elt); - ++End; + new (end()) T(Elt); + setEnd(end()+1); return; } grow(); @@ -162,8 +182,8 @@ } void pop_back() { - --End; - End->~T(); + setEnd(end()-1); + end()->~T(); } T pop_back_val() { @@ -173,36 +193,36 @@ } void clear() { - destroy_range(Begin, End); - End = Begin; + destroy_range(begin(), end()); + EndX = BeginX; } void resize(unsigned N) { if (N < size()) { - destroy_range(Begin+N, End); - End = Begin+N; + destroy_range(begin()+N, end()); + setEnd(begin()+N); } else if (N > size()) { - if (unsigned(Capacity-Begin) < N) + if (capacity() < N) grow(N); - construct_range(End, Begin+N, T()); - End = Begin+N; + construct_range(end(), begin()+N, T()); + setEnd(begin()+N); } } void resize(unsigned N, const T &NV) { if (N < size()) { - destroy_range(Begin+N, End); - End = Begin+N; + destroy_range(begin()+N, end()); + setEnd(begin()+N); } else if (N > size()) { - if (unsigned(Capacity-Begin) < N) + if (capacity() < N) grow(N); - construct_range(End, Begin+N, NV); - End = Begin+N; + construct_range(end(), begin()+N, NV); + setEnd(begin()+N); } } void reserve(unsigned N) { - if (unsigned(Capacity-Begin) < N) + if (capacity() < N) grow(N); } @@ -214,38 +234,38 @@ void append(in_iter in_start, in_iter in_end) { size_type NumInputs = std::distance(in_start, in_end); // Grow allocated space if needed. - if (NumInputs > size_type(Capacity-End)) + if (NumInputs > size_type(capacity_ptr()-end())) grow(size()+NumInputs); // Copy the new elements over. - std::uninitialized_copy(in_start, in_end, End); - End += NumInputs; + std::uninitialized_copy(in_start, in_end, end()); + setEnd(end() + NumInputs); } /// append - Add the specified range to the end of the SmallVector. /// void append(size_type NumInputs, const T &Elt) { // Grow allocated space if needed. - if (NumInputs > size_type(Capacity-End)) + if (NumInputs > size_type(capacity_ptr()-end())) grow(size()+NumInputs); // Copy the new elements over. - std::uninitialized_fill_n(End, NumInputs, Elt); - End += NumInputs; + std::uninitialized_fill_n(end(), NumInputs, Elt); + setEnd(end() + NumInputs); } void assign(unsigned NumElts, const T &Elt) { clear(); - if (unsigned(Capacity-Begin) < NumElts) + if (capacity() < NumElts) grow(NumElts); - End = Begin+NumElts; - construct_range(Begin, End, Elt); + setEnd(begin()+NumElts); + construct_range(begin(), end(), Elt); } iterator erase(iterator I) { iterator N = I; // Shift all elts down one. - std::copy(I+1, End, I); + std::copy(I+1, end(), I); // Drop the last elt. pop_back(); return(N); @@ -254,36 +274,36 @@ iterator erase(iterator S, iterator E) { iterator N = S; // Shift all elts down. - iterator I = std::copy(E, End, S); + iterator I = std::copy(E, end(), S); // Drop the last elts. - destroy_range(I, End); - End = I; + destroy_range(I, end()); + setEnd(I); return(N); } iterator insert(iterator I, const T &Elt) { - if (I == End) { // Important special case for empty vector. + if (I == end()) { // Important special case for empty vector. push_back(Elt); return end()-1; } - if (End < Capacity) { + if (EndX < CapacityX) { Retry: - new (End) T(back()); - ++End; + new (end()) T(back()); + setEnd(end()+1); // Push everything else over. - std::copy_backward(I, End-1, End); + std::copy_backward(I, end()-1, end()); *I = Elt; return I; } - size_t EltNo = I-Begin; + size_t EltNo = I-begin(); grow(); - I = Begin+EltNo; + I = begin()+EltNo; goto Retry; } iterator insert(iterator I, size_type NumToInsert, const T &Elt) { - if (I == End) { // Important special case for empty vector. + if (I == end()) { // Important special case for empty vector. append(NumToInsert, Elt); return end()-1; } @@ -302,8 +322,8 @@ // insertion. Since we already reserved space, we know that this won't // reallocate the vector. if (size_t(end()-I) >= NumToInsert) { - T *OldEnd = End; - append(End-NumToInsert, End); + T *OldEnd = end(); + append(end()-NumToInsert, end()); // Copy the existing elements that get replaced. std::copy_backward(I, OldEnd-NumToInsert, OldEnd); @@ -316,10 +336,10 @@ // not inserting at the end. // Copy over the elements that we're about to overwrite. - T *OldEnd = End; - End += NumToInsert; + T *OldEnd = end(); + setEnd(end() + NumToInsert); size_t NumOverwritten = OldEnd-I; - std::uninitialized_copy(I, OldEnd, End-NumOverwritten); + std::uninitialized_copy(I, OldEnd, end()-NumOverwritten); // Replace the overwritten part. std::fill_n(I, NumOverwritten, Elt); @@ -331,7 +351,7 @@ template iterator insert(iterator I, ItTy From, ItTy To) { - if (I == End) { // Important special case for empty vector. + if (I == end()) { // Important special case for empty vector. append(From, To); return end()-1; } @@ -351,8 +371,8 @@ // insertion. Since we already reserved space, we know that this won't // reallocate the vector. if (size_t(end()-I) >= NumToInsert) { - T *OldEnd = End; - append(End-NumToInsert, End); + T *OldEnd = end(); + append(end()-NumToInsert, end()); // Copy the existing elements that get replaced. std::copy_backward(I, OldEnd-NumToInsert, OldEnd); @@ -365,10 +385,10 @@ // not inserting at the end. // Copy over the elements that we're about to overwrite. - T *OldEnd = End; - End += NumToInsert; + T *OldEnd = end(); + setEnd(end() + NumToInsert); size_t NumOverwritten = OldEnd-I; - std::uninitialized_copy(I, OldEnd, End-NumOverwritten); + std::uninitialized_copy(I, OldEnd, end()-NumOverwritten); // Replace the overwritten part. std::copy(From, From+NumOverwritten, I); @@ -378,21 +398,11 @@ return I; } - /// data - Return a pointer to the vector's buffer, even if empty(). - pointer data() { - return pointer(Begin); - } - - /// data - Return a pointer to the vector's buffer, even if empty(). - const_pointer data() const { - return const_pointer(Begin); - } - const SmallVectorImpl &operator=(const SmallVectorImpl &RHS); bool operator==(const SmallVectorImpl &RHS) const { if (size() != RHS.size()) return false; - for (T *This = Begin, *That = RHS.Begin, *E = Begin+size(); + for (const T *This = begin(), *That = RHS.begin(), *E = end(); This != E; ++This, ++That) if (*This != *That) return false; @@ -405,10 +415,6 @@ RHS.begin(), RHS.end()); } - /// capacity - Return the total number of elements in the currently allocated - /// buffer. - size_t capacity() const { return Capacity - Begin; } - /// set_size - Set the array size to \arg N, which the current array must have /// enough capacity for. /// @@ -420,17 +426,10 @@ /// which will only be overwritten. void set_size(unsigned N) { assert(N <= capacity()); - End = Begin + N; + setEnd(begin() + N); } private: - /// isSmall - Return true if this is a smallvector which has not had dynamic - /// memory allocated for it. - bool isSmall() const { - return static_cast(Begin) == - static_cast(&FirstEl); - } - /// grow - double the size of the allocated memory, guaranteeing space for at /// least one more element or MinSize if specified. void grow(size_type MinSize = 0); @@ -441,6 +440,7 @@ } void destroy_range(T *S, T *E) { + // TODO: POD while (S != E) { --E; E->~T(); @@ -451,7 +451,7 @@ // Define this out-of-line to dissuade the C++ compiler from inlining it. template void SmallVectorImpl::grow(size_t MinSize) { - size_t CurCapacity = Capacity-Begin; + size_t CurCapacity = capacity(); size_t CurSize = size(); size_t NewCapacity = 2*CurCapacity; if (NewCapacity < MinSize) @@ -460,21 +460,21 @@ // Copy the elements over. if (is_class::value) - std::uninitialized_copy(Begin, End, NewElts); + std::uninitialized_copy(begin(), end(), NewElts); else // Use memcpy for PODs (std::uninitialized_copy optimizes to memmove). - memcpy(NewElts, Begin, CurSize * sizeof(T)); + memcpy(NewElts, begin(), CurSize * sizeof(T)); // Destroy the original elements. - destroy_range(Begin, End); + destroy_range(begin(), end()); // If this wasn't grown from the inline copy, deallocate the old space. if (!isSmall()) - operator delete(Begin); + operator delete(begin()); - Begin = NewElts; - End = NewElts+CurSize; - Capacity = Begin+NewCapacity; + setEnd(NewElts+CurSize); + BeginX = NewElts; + CapacityX = begin()+NewCapacity; } template @@ -483,35 +483,35 @@ // We can only avoid copying elements if neither vector is small. if (!isSmall() && !RHS.isSmall()) { - std::swap(Begin, RHS.Begin); - std::swap(End, RHS.End); - std::swap(Capacity, RHS.Capacity); + std::swap(BeginX, RHS.BeginX); + std::swap(EndX, RHS.EndX); + std::swap(CapacityX, RHS.CapacityX); return; } - if (RHS.size() > size_type(Capacity-Begin)) + if (RHS.size() > capacity()) grow(RHS.size()); - if (size() > size_type(RHS.Capacity-RHS.begin())) + if (size() > RHS.capacity()) RHS.grow(size()); // Swap the shared elements. size_t NumShared = size(); if (NumShared > RHS.size()) NumShared = RHS.size(); for (unsigned i = 0; i != static_cast(NumShared); ++i) - std::swap(Begin[i], RHS[i]); + std::swap((*this)[i], RHS[i]); // Copy over the extra elts. if (size() > RHS.size()) { size_t EltDiff = size() - RHS.size(); - std::uninitialized_copy(Begin+NumShared, End, RHS.End); - RHS.End += EltDiff; - destroy_range(Begin+NumShared, End); - End = Begin+NumShared; + std::uninitialized_copy(begin()+NumShared, end(), RHS.end()); + RHS.setEnd(RHS.end()+EltDiff); + destroy_range(begin()+NumShared, end()); + setEnd(begin()+NumShared); } else if (RHS.size() > size()) { size_t EltDiff = RHS.size() - size(); - std::uninitialized_copy(RHS.Begin+NumShared, RHS.End, End); - End += EltDiff; - destroy_range(RHS.Begin+NumShared, RHS.End); - RHS.End = RHS.Begin+NumShared; + std::uninitialized_copy(RHS.begin()+NumShared, RHS.end(), end()); + setEnd(end() + EltDiff); + destroy_range(RHS.begin()+NumShared, RHS.end()); + RHS.setEnd(RHS.begin()+NumShared); } } @@ -523,42 +523,42 @@ // If we already have sufficient space, assign the common elements, then // destroy any excess. - unsigned RHSSize = unsigned(RHS.size()); - unsigned CurSize = unsigned(size()); + size_t RHSSize = RHS.size(); + size_t CurSize = size(); if (CurSize >= RHSSize) { // Assign common elements. iterator NewEnd; if (RHSSize) - NewEnd = std::copy(RHS.Begin, RHS.Begin+RHSSize, Begin); + NewEnd = std::copy(RHS.begin(), RHS.begin()+RHSSize, begin()); else - NewEnd = Begin; + NewEnd = begin(); // Destroy excess elements. - destroy_range(NewEnd, End); + destroy_range(NewEnd, end()); // Trim. - End = NewEnd; + setEnd(NewEnd); return *this; } // If we have to grow to have enough elements, destroy the current elements. // This allows us to avoid copying them during the grow. - if (unsigned(Capacity-Begin) < RHSSize) { + if (capacity() < RHSSize) { // Destroy current elements. - destroy_range(Begin, End); - End = Begin; + destroy_range(begin(), end()); + setEnd(begin()); CurSize = 0; grow(RHSSize); } else if (CurSize) { // Otherwise, use assignment for the already-constructed elements. - std::copy(RHS.Begin, RHS.Begin+CurSize, Begin); + std::copy(RHS.begin(), RHS.begin()+CurSize, begin()); } // Copy construct the new elements in place. - std::uninitialized_copy(RHS.Begin+CurSize, RHS.End, Begin+CurSize); + std::uninitialized_copy(RHS.begin()+CurSize, RHS.end(), begin()+CurSize); // Set end. - End = Begin+RHSSize; + setEnd(begin()+RHSSize); return *this; } From sabre at nondot.org Tue Dec 15 02:34:01 2009 From: sabre at nondot.org (Chris Lattner) Date: Tue, 15 Dec 2009 08:34:01 -0000 Subject: [llvm-commits] [llvm] r91427 - /llvm/trunk/include/llvm/ADT/SmallVector.h Message-ID: <200912150834.nBF8Y16A010437@zion.cs.uiuc.edu> Author: lattner Date: Tue Dec 15 02:34:01 2009 New Revision: 91427 URL: http://llvm.org/viewvc/llvm-project?rev=91427&view=rev Log: a few improvements: 1. Use std::equal instead of reinventing it. 2. don't run dtors in destroy_range if element is pod-like. 3. Use isPodLike to decide between memcpy/uninitialized_copy instead of is_class. isPodLike is more generous in some cases. Modified: llvm/trunk/include/llvm/ADT/SmallVector.h Modified: llvm/trunk/include/llvm/ADT/SmallVector.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/ADT/SmallVector.h?rev=91427&r1=91426&r2=91427&view=diff ============================================================================== --- llvm/trunk/include/llvm/ADT/SmallVector.h (original) +++ llvm/trunk/include/llvm/ADT/SmallVector.h Tue Dec 15 02:34:01 2009 @@ -402,11 +402,7 @@ bool operator==(const SmallVectorImpl &RHS) const { if (size() != RHS.size()) return false; - for (const T *This = begin(), *That = RHS.begin(), *E = end(); - This != E; ++This, ++That) - if (*This != *That) - return false; - return true; + return std::equal(begin(), end(), RHS.begin()); } bool operator!=(const SmallVectorImpl &RHS) const { return !(*this == RHS); } @@ -440,7 +436,9 @@ } void destroy_range(T *S, T *E) { - // TODO: POD + // No need to do a destroy loop for POD's. + if (isPodLike::value) return; + while (S != E) { --E; E->~T(); @@ -459,11 +457,11 @@ T *NewElts = static_cast(operator new(NewCapacity*sizeof(T))); // Copy the elements over. - if (is_class::value) - std::uninitialized_copy(begin(), end(), NewElts); - else - // Use memcpy for PODs (std::uninitialized_copy optimizes to memmove). + if (isPodLike::value) + // Use memcpy for PODs: std::uninitialized_copy optimizes to memmove. memcpy(NewElts, begin(), CurSize * sizeof(T)); + else + std::uninitialized_copy(begin(), end(), NewElts); // Destroy the original elements. destroy_range(begin(), end()); From sabre at nondot.org Tue Dec 15 03:05:16 2009 From: sabre at nondot.org (Chris Lattner) Date: Tue, 15 Dec 2009 09:05:16 -0000 Subject: [llvm-commits] [llvm] r91428 - /llvm/trunk/lib/Transforms/Scalar/SimplifyLibCalls.cpp Message-ID: <200912150905.nBF95HBc025336@zion.cs.uiuc.edu> Author: lattner Date: Tue Dec 15 03:05:13 2009 New Revision: 91428 URL: http://llvm.org/viewvc/llvm-project?rev=91428&view=rev Log: add some other xforms that should be done as part of PR5783 Modified: llvm/trunk/lib/Transforms/Scalar/SimplifyLibCalls.cpp Modified: llvm/trunk/lib/Transforms/Scalar/SimplifyLibCalls.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/SimplifyLibCalls.cpp?rev=91428&r1=91427&r2=91428&view=diff ============================================================================== --- llvm/trunk/lib/Transforms/Scalar/SimplifyLibCalls.cpp (original) +++ llvm/trunk/lib/Transforms/Scalar/SimplifyLibCalls.cpp Tue Dec 15 03:05:13 2009 @@ -2644,10 +2644,11 @@ // * strcspn("",a) -> 0 // * strcspn(s,"") -> strlen(a) // -// strstr: +// strstr: (PR5783) // * strstr(x,x) -> x -// * strstr(s1,s2) -> offset_of_s2_in(s1) -// (if s1 and s2 are constant strings) +// * strstr(x, "") -> x +// * strstr(x, "a") -> strchr(x, 'a') +// * strstr(s1,s2) -> result (if s1 and s2 are constant strings) // // tan, tanf, tanl: // * tan(atan(x)) -> x From gohman at apple.com Tue Dec 15 10:30:10 2009 From: gohman at apple.com (Dan Gohman) Date: Tue, 15 Dec 2009 16:30:10 -0000 Subject: [llvm-commits] [llvm] r91432 - /llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp Message-ID: <200912151630.nBFGUAgT009603@zion.cs.uiuc.edu> Author: djg Date: Tue Dec 15 10:30:09 2009 New Revision: 91432 URL: http://llvm.org/viewvc/llvm-project?rev=91432&view=rev Log: Delete an unused function. Modified: llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp Modified: llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp?rev=91432&r1=91431&r2=91432&view=diff ============================================================================== --- llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp (original) +++ llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp Tue Dec 15 10:30:09 2009 @@ -232,44 +232,6 @@ } } -/// containsAddRecFromDifferentLoop - Determine whether expression S involves a -/// subexpression that is an AddRec from a loop other than L. An outer loop -/// of L is OK, but not an inner loop nor a disjoint loop. -static bool containsAddRecFromDifferentLoop(const SCEV *S, Loop *L) { - // This is very common, put it first. - if (isa(S)) - return false; - if (const SCEVCommutativeExpr *AE = dyn_cast(S)) { - for (unsigned int i=0; i< AE->getNumOperands(); i++) - if (containsAddRecFromDifferentLoop(AE->getOperand(i), L)) - return true; - return false; - } - if (const SCEVAddRecExpr *AE = dyn_cast(S)) { - if (const Loop *newLoop = AE->getLoop()) { - if (newLoop == L) - return false; - // if newLoop is an outer loop of L, this is OK. - if (newLoop->contains(L->getHeader())) - return false; - } - return true; - } - if (const SCEVUDivExpr *DE = dyn_cast(S)) - return containsAddRecFromDifferentLoop(DE->getLHS(), L) || - containsAddRecFromDifferentLoop(DE->getRHS(), L); -#if 0 - // SCEVSDivExpr has been backed out temporarily, but will be back; we'll - // need this when it is. - if (const SCEVSDivExpr *DE = dyn_cast(S)) - return containsAddRecFromDifferentLoop(DE->getLHS(), L) || - containsAddRecFromDifferentLoop(DE->getRHS(), L); -#endif - if (const SCEVCastExpr *CE = dyn_cast(S)) - return containsAddRecFromDifferentLoop(CE->getOperand(), L); - return false; -} - /// isAddressUse - Returns true if the specified instruction is using the /// specified value as an address. static bool isAddressUse(Instruction *Inst, Value *OperandVal) { From ken at linux.vnet.ibm.com Tue Dec 15 05:54:54 2009 From: ken at linux.vnet.ibm.com (Ken Werner) Date: Tue, 15 Dec 2009 12:54:54 +0100 Subject: [llvm-commits] [patch] disable biarch for llvm-gcc on PPC64 Message-ID: <200912151254.54223.ken@linux.vnet.ibm.com> Hi, This patch disables biarch on powerpc64-*-linux* as LLVM doesn't seem to support it. -ken -------------- next part -------------- A non-text attachment was scrubbed... Name: ppc64-disable-biarch.patch Type: text/x-patch Size: 585 bytes Desc: not available Url : http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20091215/32131787/attachment.bin From ken at linux.vnet.ibm.com Tue Dec 15 05:55:05 2009 From: ken at linux.vnet.ibm.com (Ken Werner) Date: Tue, 15 Dec 2009 12:55:05 +0100 Subject: [llvm-commits] [patch] frame pointer offset PPC64 Message-ID: <200912151255.05230.ken@linux.vnet.ibm.com> Hi, Small fix for the frame pointer offset on PPC64. -ken -------------- next part -------------- A non-text attachment was scrubbed... Name: ppc64-FPSaveOffset.patch Type: text/x-patch Size: 468 bytes Desc: not available Url : http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20091215/13c9cea3/attachment.bin From ken at linux.vnet.ibm.com Tue Dec 15 05:55:29 2009 From: ken at linux.vnet.ibm.com (Ken Werner) Date: Tue, 15 Dec 2009 12:55:29 +0100 Subject: [llvm-commits] [patch] fptr support for PPC64 Message-ID: <200912151255.29701.ken@linux.vnet.ibm.com> Hi, The attached patch adds support for indirect calls (through function pointer) according to the ABI (http://refspecs.linuxfoundation.org/ELF/ppc64/PPC- elf64abi-1.9.html#FUNC-CALLS). The patch was made against revision 91275. -ken -------------- next part -------------- A non-text attachment was scrubbed... Name: ppc64-fptr.patch Type: text/x-patch Size: 8130 bytes Desc: not available Url : http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20091215/e4c3f871/attachment.bin From johnny.chen at apple.com Tue Dec 15 11:24:15 2009 From: johnny.chen at apple.com (Johnny Chen) Date: Tue, 15 Dec 2009 17:24:15 -0000 Subject: [llvm-commits] [llvm] r91434 - in /llvm/trunk/lib/Target/ARM: ARMInstrFormats.td ARMInstrThumb.td ARMInstrThumb2.td Message-ID: <200912151724.nBFHOF6k011394@zion.cs.uiuc.edu> Author: johnny Date: Tue Dec 15 11:24:14 2009 New Revision: 91434 URL: http://llvm.org/viewvc/llvm-project?rev=91434&view=rev Log: Added encoding bits for the Thumb ISA. Initial checkin. Modified: llvm/trunk/lib/Target/ARM/ARMInstrFormats.td llvm/trunk/lib/Target/ARM/ARMInstrThumb.td llvm/trunk/lib/Target/ARM/ARMInstrThumb2.td Modified: llvm/trunk/lib/Target/ARM/ARMInstrFormats.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrFormats.td?rev=91434&r1=91433&r2=91434&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMInstrFormats.td (original) +++ llvm/trunk/lib/Target/ARM/ARMInstrFormats.td Tue Dec 15 11:24:14 2009 @@ -146,11 +146,9 @@ // ARM Instruction templates. // -class InstARM +class InstTemplate : Instruction { - field bits<32> Inst; - let Namespace = "ARM"; // TSFlagsFields @@ -179,6 +177,20 @@ let Itinerary = itin; } +class Encoding { + field bits<32> Inst; +} + +class InstARM + : InstTemplate, Encoding; + +// This Encoding-less class is used by Thumb1 to specify the encoding bits later +// on by adding flavors to specific instructions. +class InstThumb + : InstTemplate; + class PseudoInst pattern> : InstARM pattern> - : InstARM { + : InstThumb { let OutOperandList = oops; let InOperandList = iops; let AsmString = asm; @@ -876,9 +888,14 @@ class TIt pattern> : ThumbI; -// tBL, tBX instructions -class TIx2 pattern> - : ThumbI; +// tBL, tBX 32-bit instructions +class TIx2 opcod1, bits<2> opcod2, bit opcod3, + dag oops, dag iops, InstrItinClass itin, string asm, list pattern> + : ThumbI, Encoding { + let Inst{31-27} = opcod1; + let Inst{15-14} = opcod2; + let Inst{12} = opcod3; +} // BR_JT instructions class TJTI pattern> @@ -887,7 +904,7 @@ // Thumb1 only class Thumb1I pattern> - : InstARM { + : InstThumb { let OutOperandList = oops; let InOperandList = iops; let AsmString = asm; @@ -903,7 +920,8 @@ : Thumb1I; class T1JTI pattern> - : Thumb1I; + : Thumb1I, + Encoding; // Two-address instructions class T1It pattern> - : InstARM { + : InstThumb { let OutOperandList = !con(oops, (ops s_cc_out:$s)); let InOperandList = !con(iops, (ops pred:$p)); let AsmString = !strconcat(opc, !strconcat("${s}${p}", asm)); @@ -937,7 +955,7 @@ class Thumb1pI pattern> - : InstARM { + : InstThumb { let OutOperandList = oops; let InOperandList = !con(iops, (ops pred:$p)); let AsmString = !strconcat(opc, !strconcat("${p}", asm)); @@ -968,6 +986,46 @@ InstrItinClass itin, string opc, string asm, list pattern> : Thumb1pI; +// A6.2 16-bit Thumb instruction encoding +class T1Encoding opcode> : Encoding { + let Inst{15-10} = opcode; +} + +// A6.2.1 Shift (immediate), add, subtract, move, and compare encoding. +class T1General opcode> : Encoding { + let Inst{15-14} = 0b00; + let Inst{13-9} = opcode; +} + +// A6.2.2 Data-processing encoding. +class T1DataProcessing opcode> : Encoding { + let Inst{15-10} = 0b010000; + let Inst{9-6} = opcode; +} + +// A6.2.3 Special data instructions and branch and exchange encoding. +class T1Special opcode> : Encoding { + let Inst{15-10} = 0b010001; + let Inst{9-6} = opcode; +} + +// A6.2.4 Load/store single data item encoding. +class T1LoadStore opA, bits<3> opB> : Encoding { + let Inst{15-12} = opA; + let Inst{11-9} = opB; +} +class T1LdSt opB> : T1LoadStore<0b0101, opB>; +class T1LdSt4Imm opB> : T1LoadStore<0b0110, opB>; // Immediate, 4 bytes +class T1LdSt1Imm opB> : T1LoadStore<0b0111, opB>; // Immediate, 1 byte +class T1LdSt2Imm opB> : T1LoadStore<0b1000, opB>; // Immediate, 2 bytes +class T1LdStSP opB> : T1LoadStore<0b1001, opB>; // SP relative + +// A6.2.5 Miscellaneous 16-bit instructions encoding. +class T1Misc opcode> : Encoding { + let Inst{15-12} = 0b1011; + let Inst{11-5} = opcode; +} + // Thumb2I - Thumb2 instruction. Almost all Thumb2 instructions are predicable. class Thumb2I pattern> : Thumb2I; -class T2Ii8s4 pattern> - : Thumb2I; + : Thumb2I { + let Inst{31-27} = 0b11101; + let Inst{26-25} = 0b00; + let Inst{24} = P; + let Inst{23} = ?; // The U bit. + let Inst{22} = 1; + let Inst{21} = W; + let Inst{20} = load; +} class T2sI pattern> @@ -1055,8 +1122,9 @@ // T2Iidxldst - Thumb2 indexed load / store instructions. -class T2Iidxldst opcod, bit load, bit pre, + dag oops, dag iops, + AddrMode am, IndexMode im, InstrItinClass itin, string opc, string asm, string cstr, list pattern> : InstARM { let OutOperandList = oops; @@ -1064,6 +1132,16 @@ let AsmString = !strconcat(opc, !strconcat("${p}", asm)); let Pattern = pattern; list Predicates = [IsThumb2]; + let Inst{31-27} = 0b11111; + let Inst{26-25} = 0b00; + let Inst{24} = signed; + let Inst{23} = 0; + let Inst{22-21} = opcod; + let Inst{20} = load; + let Inst{11} = 1; + // (P, W) = (1, 1) Pre-indexed or (0, 1) Post-indexed + let Inst{10} = pre; // The P bit. + let Inst{8} = 1; // The W bit. } // Tv5Pat - Same as Pat<>, but requires V5T Thumb mode. Modified: llvm/trunk/lib/Target/ARM/ARMInstrThumb.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrThumb.td?rev=91434&r1=91433&r2=91434&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMInstrThumb.td (original) +++ llvm/trunk/lib/Target/ARM/ARMInstrThumb.td Tue Dec 15 11:24:14 2009 @@ -136,31 +136,46 @@ let isNotDuplicable = 1 in def tPICADD : TIt<(outs GPR:$dst), (ins GPR:$lhs, pclabel:$cp), IIC_iALUr, "\n$cp:\n\tadd\t$dst, pc", - [(set GPR:$dst, (ARMpic_add GPR:$lhs, imm:$cp))]>; + [(set GPR:$dst, (ARMpic_add GPR:$lhs, imm:$cp))]>, + T1Special<{0,0,?,?}> { + let Inst{6-3} = 0b1111; // A8.6.6 Rm = pc +} // PC relative add. def tADDrPCi : T1I<(outs tGPR:$dst), (ins t_imm_s4:$rhs), IIC_iALUi, - "add\t$dst, pc, $rhs", []>; + "add\t$dst, pc, $rhs", []>, + T1Encoding<{1,0,1,0,0,?}>; // A6.2 & A8.6.10 // ADD rd, sp, #imm8 def tADDrSPi : T1I<(outs tGPR:$dst), (ins GPR:$sp, t_imm_s4:$rhs), IIC_iALUi, - "add\t$dst, $sp, $rhs", []>; + "add\t$dst, $sp, $rhs", []>, + T1Encoding<{1,0,1,0,1,?}>; // A6.2 & A8.6.8 // ADD sp, sp, #imm7 def tADDspi : TIt<(outs GPR:$dst), (ins GPR:$lhs, t_imm_s4:$rhs), IIC_iALUi, - "add\t$dst, $rhs", []>; + "add\t$dst, $rhs", []>, + T1Misc<{0,0,0,0,0,?,?}>; // A6.2.5 & A8.6.8 // SUB sp, sp, #imm7 def tSUBspi : TIt<(outs GPR:$dst), (ins GPR:$lhs, t_imm_s4:$rhs), IIC_iALUi, - "sub\t$dst, $rhs", []>; + "sub\t$dst, $rhs", []>, + T1Misc<{0,0,0,0,1,?,?}>; // A6.2.5 & A8.6.215 // ADD rm, sp def tADDrSP : TIt<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iALUr, - "add\t$dst, $rhs", []>; + "add\t$dst, $rhs", []>, + T1Special<{0,0,?,?}> { + let Inst{6-3} = 0b1101; // A8.6.9 Encoding T1 +} // ADD sp, rm def tADDspr : TIt<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iALUr, - "add\t$dst, $rhs", []>; + "add\t$dst, $rhs", []>, + T1Special<{0,0,?,?}> { + // A8.6.9 Encoding T2 + let Inst{7} = 1; + let Inst{2-0} = 0b101; +} // Pseudo instruction that will expand into a tSUBspi + a copy. let usesCustomInserter = 1 in { // Expanded after instruction selection. @@ -180,22 +195,32 @@ // let isReturn = 1, isTerminator = 1, isBarrier = 1 in { - def tBX_RET : TI<(outs), (ins), IIC_Br, "bx\tlr", [(ARMretflag)]>; + def tBX_RET : TI<(outs), (ins), IIC_Br, "bx\tlr", [(ARMretflag)]>, + T1Special<{1,1,0,?}> { // A6.2.3 & A8.6.25 + let Inst{6-3} = 0b1110; // Rm = lr + } // Alternative return instruction used by vararg functions. - def tBX_RET_vararg : TI<(outs), (ins tGPR:$target), IIC_Br, "bx\t$target", []>; + def tBX_RET_vararg : TI<(outs), (ins tGPR:$target), IIC_Br, "bx\t$target", []>, + T1Special<{1,1,0,?}>; // A6.2.3 & A8.6.25 } // Indirect branches let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in { def tBRIND : TI<(outs), (ins GPR:$dst), IIC_Br, "mov\tpc, $dst", - [(brind GPR:$dst)]>; + [(brind GPR:$dst)]>, + T1Special<{1,0,?,?}> { + // = pc + let Inst{7} = 1; + let Inst{2-0} = 0b111; + } } // FIXME: remove when we have a way to marking a MI with these properties. let isReturn = 1, isTerminator = 1, isBarrier = 1, mayLoad = 1, hasExtraDefRegAllocReq = 1 in def tPOP_RET : T1I<(outs), (ins pred:$p, reglist:$wb, variable_ops), IIC_Br, - "pop${p}\t$wb", []>; + "pop${p}\t$wb", []>, + T1Misc<{1,1,0,?,?,?,?}>; let isCall = 1, Defs = [R0, R1, R2, R3, R12, LR, @@ -203,25 +228,29 @@ D16, D17, D18, D19, D20, D21, D22, D23, D24, D25, D26, D27, D28, D29, D30, D31, CPSR, FPSCR] in { // Also used for Thumb2 - def tBL : TIx2<(outs), (ins i32imm:$func, variable_ops), IIC_Br, - "bl\t${func:call}", - [(ARMtcall tglobaladdr:$func)]>, + def tBL : TIx2<0b11110, 0b11, 1, + (outs), (ins i32imm:$func, variable_ops), IIC_Br, + "bl\t${func:call}", + [(ARMtcall tglobaladdr:$func)]>, Requires<[IsThumb, IsNotDarwin]>; // ARMv5T and above, also used for Thumb2 - def tBLXi : TIx2<(outs), (ins i32imm:$func, variable_ops), IIC_Br, - "blx\t${func:call}", - [(ARMcall tglobaladdr:$func)]>, + def tBLXi : TIx2<0b11110, 0b11, 0, + (outs), (ins i32imm:$func, variable_ops), IIC_Br, + "blx\t${func:call}", + [(ARMcall tglobaladdr:$func)]>, Requires<[IsThumb, HasV5T, IsNotDarwin]>; // Also used for Thumb2 def tBLXr : TI<(outs), (ins GPR:$func, variable_ops), IIC_Br, "blx\t$func", [(ARMtcall GPR:$func)]>, - Requires<[IsThumb, HasV5T, IsNotDarwin]>; + Requires<[IsThumb, HasV5T, IsNotDarwin]>, + T1Special<{1,1,1,?}>; // A6.2.3 & A8.6.24; // ARMv4T - def tBX : TIx2<(outs), (ins tGPR:$func, variable_ops), IIC_Br, + def tBX : TIx2<{?,?,?,?,?}, {?,?}, ?, + (outs), (ins tGPR:$func, variable_ops), IIC_Br, "mov\tlr, pc\n\tbx\t$func", [(ARMcall_nolink tGPR:$func)]>, Requires<[IsThumb1Only, IsNotDarwin]>; @@ -234,27 +263,31 @@ D16, D17, D18, D19, D20, D21, D22, D23, D24, D25, D26, D27, D28, D29, D30, D31, CPSR, FPSCR] in { // Also used for Thumb2 - def tBLr9 : TIx2<(outs), (ins i32imm:$func, variable_ops), IIC_Br, + def tBLr9 : TIx2<0b11110, 0b11, 1, + (outs), (ins i32imm:$func, variable_ops), IIC_Br, "bl\t${func:call}", [(ARMtcall tglobaladdr:$func)]>, Requires<[IsThumb, IsDarwin]>; // ARMv5T and above, also used for Thumb2 - def tBLXi_r9 : TIx2<(outs), (ins i32imm:$func, variable_ops), IIC_Br, + def tBLXi_r9 : TIx2<0b11110, 0b11, 0, + (outs), (ins i32imm:$func, variable_ops), IIC_Br, "blx\t${func:call}", [(ARMcall tglobaladdr:$func)]>, Requires<[IsThumb, HasV5T, IsDarwin]>; // Also used for Thumb2 def tBLXr_r9 : TI<(outs), (ins GPR:$func, variable_ops), IIC_Br, - "blx\t$func", - [(ARMtcall GPR:$func)]>, - Requires<[IsThumb, HasV5T, IsDarwin]>; + "blx\t$func", + [(ARMtcall GPR:$func)]>, + Requires<[IsThumb, HasV5T, IsDarwin]>, + T1Special<{1,1,1,?}>; // A6.2.3 & A8.6.24 // ARMv4T - def tBXr9 : TIx2<(outs), (ins tGPR:$func, variable_ops), IIC_Br, - "mov\tlr, pc\n\tbx\t$func", - [(ARMcall_nolink tGPR:$func)]>, + def tBXr9 : TIx2<{?,?,?,?,?}, {?,?}, ?, + (outs), (ins tGPR:$func, variable_ops), IIC_Br, + "mov\tlr, pc\n\tbx\t$func", + [(ARMcall_nolink tGPR:$func)]>, Requires<[IsThumb1Only, IsDarwin]>; } @@ -262,11 +295,12 @@ let isBarrier = 1 in { let isPredicable = 1 in def tB : T1I<(outs), (ins brtarget:$target), IIC_Br, - "b\t$target", [(br bb:$target)]>; + "b\t$target", [(br bb:$target)]>, + T1Encoding<{1,1,1,0,0,?}>; // Far jump let Defs = [LR] in - def tBfar : TIx2<(outs), (ins brtarget:$target), IIC_Br, + def tBfar : TIx2<0b11110, 0b11, 1, (outs), (ins brtarget:$target), IIC_Br, "bl\t$target\t@ far jump",[]>; def tBR_JTr : T1JTI<(outs), @@ -281,15 +315,18 @@ let isBranch = 1, isTerminator = 1 in def tBcc : T1I<(outs), (ins brtarget:$target, pred:$cc), IIC_Br, "b$cc\t$target", - [/*(ARMbrcond bb:$target, imm:$cc)*/]>; + [/*(ARMbrcond bb:$target, imm:$cc)*/]>, + T1Encoding<{1,1,0,1,?,?}>; // Compare and branch on zero / non-zero let isBranch = 1, isTerminator = 1 in { def tCBZ : T1I<(outs), (ins tGPR:$cmp, brtarget:$target), IIC_Br, - "cbz\t$cmp, $target", []>; + "cbz\t$cmp, $target", []>, + T1Misc<{0,0,?,1,?,?,?}>; def tCBNZ : T1I<(outs), (ins tGPR:$cmp, brtarget:$target), IIC_Br, - "cbnz\t$cmp, $target", []>; + "cbnz\t$cmp, $target", []>, + T1Misc<{1,0,?,1,?,?,?}>; } //===----------------------------------------------------------------------===// @@ -299,71 +336,85 @@ let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in def tLDR : T1pI4<(outs tGPR:$dst), (ins t_addrmode_s4:$addr), IIC_iLoadr, "ldr", "\t$dst, $addr", - [(set tGPR:$dst, (load t_addrmode_s4:$addr))]>; + [(set tGPR:$dst, (load t_addrmode_s4:$addr))]>, + T1LdSt<0b100>; def tLDRB : T1pI1<(outs tGPR:$dst), (ins t_addrmode_s1:$addr), IIC_iLoadr, "ldrb", "\t$dst, $addr", - [(set tGPR:$dst, (zextloadi8 t_addrmode_s1:$addr))]>; + [(set tGPR:$dst, (zextloadi8 t_addrmode_s1:$addr))]>, + T1LdSt<0b110>; def tLDRH : T1pI2<(outs tGPR:$dst), (ins t_addrmode_s2:$addr), IIC_iLoadr, "ldrh", "\t$dst, $addr", - [(set tGPR:$dst, (zextloadi16 t_addrmode_s2:$addr))]>; + [(set tGPR:$dst, (zextloadi16 t_addrmode_s2:$addr))]>, + T1LdSt<0b101>; let AddedComplexity = 10 in def tLDRSB : T1pI1<(outs tGPR:$dst), (ins t_addrmode_rr:$addr), IIC_iLoadr, "ldrsb", "\t$dst, $addr", - [(set tGPR:$dst, (sextloadi8 t_addrmode_rr:$addr))]>; + [(set tGPR:$dst, (sextloadi8 t_addrmode_rr:$addr))]>, + T1LdSt<0b011>; let AddedComplexity = 10 in def tLDRSH : T1pI2<(outs tGPR:$dst), (ins t_addrmode_rr:$addr), IIC_iLoadr, "ldrsh", "\t$dst, $addr", - [(set tGPR:$dst, (sextloadi16 t_addrmode_rr:$addr))]>; + [(set tGPR:$dst, (sextloadi16 t_addrmode_rr:$addr))]>, + T1LdSt<0b111>; let canFoldAsLoad = 1 in def tLDRspi : T1pIs<(outs tGPR:$dst), (ins t_addrmode_sp:$addr), IIC_iLoadi, "ldr", "\t$dst, $addr", - [(set tGPR:$dst, (load t_addrmode_sp:$addr))]>; + [(set tGPR:$dst, (load t_addrmode_sp:$addr))]>, + T1LdStSP<{1,?,?}>; // Special instruction for restore. It cannot clobber condition register // when it's expanded by eliminateCallFramePseudoInstr(). let canFoldAsLoad = 1, mayLoad = 1 in def tRestore : T1pIs<(outs tGPR:$dst), (ins t_addrmode_sp:$addr), IIC_iLoadi, - "ldr", "\t$dst, $addr", []>; + "ldr", "\t$dst, $addr", []>, + T1LdStSP<{1,?,?}>; // Load tconstpool // FIXME: Use ldr.n to work around a Darwin assembler bug. let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in def tLDRpci : T1pIs<(outs tGPR:$dst), (ins i32imm:$addr), IIC_iLoadi, "ldr", ".n\t$dst, $addr", - [(set tGPR:$dst, (load (ARMWrapper tconstpool:$addr)))]>; + [(set tGPR:$dst, (load (ARMWrapper tconstpool:$addr)))]>, + T1Encoding<{0,1,0,0,1,?}>; // A6.2 & A8.6.59 // Special LDR for loads from non-pc-relative constpools. let canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in def tLDRcp : T1pIs<(outs tGPR:$dst), (ins i32imm:$addr), IIC_iLoadi, - "ldr", "\t$dst, $addr", []>; + "ldr", "\t$dst, $addr", []>, + T1LdStSP<{1,?,?}>; def tSTR : T1pI4<(outs), (ins tGPR:$src, t_addrmode_s4:$addr), IIC_iStorer, "str", "\t$src, $addr", - [(store tGPR:$src, t_addrmode_s4:$addr)]>; + [(store tGPR:$src, t_addrmode_s4:$addr)]>, + T1LdSt<0b000>; def tSTRB : T1pI1<(outs), (ins tGPR:$src, t_addrmode_s1:$addr), IIC_iStorer, "strb", "\t$src, $addr", - [(truncstorei8 tGPR:$src, t_addrmode_s1:$addr)]>; + [(truncstorei8 tGPR:$src, t_addrmode_s1:$addr)]>, + T1LdSt<0b010>; def tSTRH : T1pI2<(outs), (ins tGPR:$src, t_addrmode_s2:$addr), IIC_iStorer, "strh", "\t$src, $addr", - [(truncstorei16 tGPR:$src, t_addrmode_s2:$addr)]>; + [(truncstorei16 tGPR:$src, t_addrmode_s2:$addr)]>, + T1LdSt<0b001>; def tSTRspi : T1pIs<(outs), (ins tGPR:$src, t_addrmode_sp:$addr), IIC_iStorei, "str", "\t$src, $addr", - [(store tGPR:$src, t_addrmode_sp:$addr)]>; + [(store tGPR:$src, t_addrmode_sp:$addr)]>, + T1LdStSP<{0,?,?}>; let mayStore = 1 in { // Special instruction for spill. It cannot clobber condition register // when it's expanded by eliminateCallFramePseudoInstr(). def tSpill : T1pIs<(outs), (ins tGPR:$src, t_addrmode_sp:$addr), IIC_iStorei, - "str", "\t$src, $addr", []>; + "str", "\t$src, $addr", []>, + T1LdStSP<{0,?,?}>; } //===----------------------------------------------------------------------===// @@ -375,21 +426,25 @@ def tLDM : T1I<(outs), (ins addrmode4:$addr, pred:$p, reglist:$wb, variable_ops), IIC_iLoadm, - "ldm${addr:submode}${p}\t$addr, $wb", []>; + "ldm${addr:submode}${p}\t$addr, $wb", []>, + T1Encoding<{1,1,0,0,1,?}>; // A6.2 & A8.6.53 let mayStore = 1, hasExtraSrcRegAllocReq = 1 in def tSTM : T1I<(outs), (ins addrmode4:$addr, pred:$p, reglist:$wb, variable_ops), IIC_iStorem, - "stm${addr:submode}${p}\t$addr, $wb", []>; + "stm${addr:submode}${p}\t$addr, $wb", []>, + T1Encoding<{1,1,0,0,0,?}>; // A6.2 & A8.6.189 let mayLoad = 1, Uses = [SP], Defs = [SP], hasExtraDefRegAllocReq = 1 in def tPOP : T1I<(outs), (ins pred:$p, reglist:$wb, variable_ops), IIC_Br, - "pop${p}\t$wb", []>; + "pop${p}\t$wb", []>, + T1Misc<{1,1,0,?,?,?,?}>; let mayStore = 1, Uses = [SP], Defs = [SP], hasExtraSrcRegAllocReq = 1 in def tPUSH : T1I<(outs), (ins pred:$p, reglist:$wb, variable_ops), IIC_Br, - "push${p}\t$wb", []>; + "push${p}\t$wb", []>, + T1Misc<{0,1,0,?,?,?,?}>; //===----------------------------------------------------------------------===// // Arithmetic Instructions. @@ -399,82 +454,98 @@ let isCommutable = 1, Uses = [CPSR] in def tADC : T1sIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), IIC_iALUr, "adc", "\t$dst, $rhs", - [(set tGPR:$dst, (adde tGPR:$lhs, tGPR:$rhs))]>; + [(set tGPR:$dst, (adde tGPR:$lhs, tGPR:$rhs))]>, + T1DataProcessing<0b0101>; // Add immediate def tADDi3 : T1sI<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs), IIC_iALUi, "add", "\t$dst, $lhs, $rhs", - [(set tGPR:$dst, (add tGPR:$lhs, imm0_7:$rhs))]>; + [(set tGPR:$dst, (add tGPR:$lhs, imm0_7:$rhs))]>, + T1General<0b01110>; def tADDi8 : T1sIt<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs), IIC_iALUi, "add", "\t$dst, $rhs", - [(set tGPR:$dst, (add tGPR:$lhs, imm8_255:$rhs))]>; + [(set tGPR:$dst, (add tGPR:$lhs, imm8_255:$rhs))]>, + T1General<{1,1,0,?,?}>; // Add register let isCommutable = 1 in def tADDrr : T1sI<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), IIC_iALUr, "add", "\t$dst, $lhs, $rhs", - [(set tGPR:$dst, (add tGPR:$lhs, tGPR:$rhs))]>; + [(set tGPR:$dst, (add tGPR:$lhs, tGPR:$rhs))]>, + T1General<0b01100>; let neverHasSideEffects = 1 in def tADDhirr : T1pIt<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iALUr, - "add", "\t$dst, $rhs", []>; + "add", "\t$dst, $rhs", []>, + T1Special<{0,0,?,?}>; // And register let isCommutable = 1 in def tAND : T1sIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), IIC_iALUr, "and", "\t$dst, $rhs", - [(set tGPR:$dst, (and tGPR:$lhs, tGPR:$rhs))]>; + [(set tGPR:$dst, (and tGPR:$lhs, tGPR:$rhs))]>, + T1DataProcessing<0b0000>; // ASR immediate def tASRri : T1sI<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs), IIC_iMOVsi, "asr", "\t$dst, $lhs, $rhs", - [(set tGPR:$dst, (sra tGPR:$lhs, (i32 imm:$rhs)))]>; + [(set tGPR:$dst, (sra tGPR:$lhs, (i32 imm:$rhs)))]>, + T1General<{0,1,0,?,?}>; // ASR register def tASRrr : T1sIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), IIC_iMOVsr, "asr", "\t$dst, $rhs", - [(set tGPR:$dst, (sra tGPR:$lhs, tGPR:$rhs))]>; + [(set tGPR:$dst, (sra tGPR:$lhs, tGPR:$rhs))]>, + T1DataProcessing<0b0100>; // BIC register def tBIC : T1sIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), IIC_iALUr, "bic", "\t$dst, $rhs", - [(set tGPR:$dst, (and tGPR:$lhs, (not tGPR:$rhs)))]>; + [(set tGPR:$dst, (and tGPR:$lhs, (not tGPR:$rhs)))]>, + T1DataProcessing<0b1110>; // CMN register let Defs = [CPSR] in { def tCMN : T1pI<(outs), (ins tGPR:$lhs, tGPR:$rhs), IIC_iCMPr, "cmn", "\t$lhs, $rhs", - [(ARMcmp tGPR:$lhs, (ineg tGPR:$rhs))]>; + [(ARMcmp tGPR:$lhs, (ineg tGPR:$rhs))]>, + T1DataProcessing<0b1011>; def tCMNZ : T1pI<(outs), (ins tGPR:$lhs, tGPR:$rhs), IIC_iCMPr, "cmn", "\t$lhs, $rhs", - [(ARMcmpZ tGPR:$lhs, (ineg tGPR:$rhs))]>; + [(ARMcmpZ tGPR:$lhs, (ineg tGPR:$rhs))]>, + T1DataProcessing<0b1011>; } // CMP immediate let Defs = [CPSR] in { def tCMPi8 : T1pI<(outs), (ins tGPR:$lhs, i32imm:$rhs), IIC_iCMPi, "cmp", "\t$lhs, $rhs", - [(ARMcmp tGPR:$lhs, imm0_255:$rhs)]>; + [(ARMcmp tGPR:$lhs, imm0_255:$rhs)]>, + T1General<{1,0,1,?,?}>; def tCMPzi8 : T1pI<(outs), (ins tGPR:$lhs, i32imm:$rhs), IIC_iCMPi, "cmp", "\t$lhs, $rhs", - [(ARMcmpZ tGPR:$lhs, imm0_255:$rhs)]>; - + [(ARMcmpZ tGPR:$lhs, imm0_255:$rhs)]>, + T1General<{1,0,1,?,?}>; } // CMP register let Defs = [CPSR] in { def tCMPr : T1pI<(outs), (ins tGPR:$lhs, tGPR:$rhs), IIC_iCMPr, "cmp", "\t$lhs, $rhs", - [(ARMcmp tGPR:$lhs, tGPR:$rhs)]>; + [(ARMcmp tGPR:$lhs, tGPR:$rhs)]>, + T1DataProcessing<0b1010>; def tCMPzr : T1pI<(outs), (ins tGPR:$lhs, tGPR:$rhs), IIC_iCMPr, "cmp", "\t$lhs, $rhs", - [(ARMcmpZ tGPR:$lhs, tGPR:$rhs)]>; + [(ARMcmpZ tGPR:$lhs, tGPR:$rhs)]>, + T1DataProcessing<0b1010>; def tCMPhir : T1pI<(outs), (ins GPR:$lhs, GPR:$rhs), IIC_iCMPr, - "cmp", "\t$lhs, $rhs", []>; + "cmp", "\t$lhs, $rhs", []>, + T1Special<{0,1,?,?}>; def tCMPzhir : T1pI<(outs), (ins GPR:$lhs, GPR:$rhs), IIC_iCMPr, - "cmp", "\t$lhs, $rhs", []>; + "cmp", "\t$lhs, $rhs", []>, + T1Special<{0,1,?,?}>; } @@ -482,32 +553,38 @@ let isCommutable = 1 in def tEOR : T1sIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), IIC_iALUr, "eor", "\t$dst, $rhs", - [(set tGPR:$dst, (xor tGPR:$lhs, tGPR:$rhs))]>; + [(set tGPR:$dst, (xor tGPR:$lhs, tGPR:$rhs))]>, + T1DataProcessing<0b0001>; // LSL immediate def tLSLri : T1sI<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs), IIC_iMOVsi, "lsl", "\t$dst, $lhs, $rhs", - [(set tGPR:$dst, (shl tGPR:$lhs, (i32 imm:$rhs)))]>; + [(set tGPR:$dst, (shl tGPR:$lhs, (i32 imm:$rhs)))]>, + T1General<{0,0,0,?,?}>; // LSL register def tLSLrr : T1sIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), IIC_iMOVsr, "lsl", "\t$dst, $rhs", - [(set tGPR:$dst, (shl tGPR:$lhs, tGPR:$rhs))]>; + [(set tGPR:$dst, (shl tGPR:$lhs, tGPR:$rhs))]>, + T1DataProcessing<0b0010>; // LSR immediate def tLSRri : T1sI<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs), IIC_iMOVsi, "lsr", "\t$dst, $lhs, $rhs", - [(set tGPR:$dst, (srl tGPR:$lhs, (i32 imm:$rhs)))]>; + [(set tGPR:$dst, (srl tGPR:$lhs, (i32 imm:$rhs)))]>, + T1General<{0,0,1,?,?}>; // LSR register def tLSRrr : T1sIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), IIC_iMOVsr, "lsr", "\t$dst, $rhs", - [(set tGPR:$dst, (srl tGPR:$lhs, tGPR:$rhs))]>; + [(set tGPR:$dst, (srl tGPR:$lhs, tGPR:$rhs))]>, + T1DataProcessing<0b0011>; // move register def tMOVi8 : T1sI<(outs tGPR:$dst), (ins i32imm:$src), IIC_iMOVi, "mov", "\t$dst, $src", - [(set tGPR:$dst, imm0_255:$src)]>; + [(set tGPR:$dst, imm0_255:$src)]>, + T1General<{1,0,0,?,?}>; // TODO: A7-73: MOV(2) - mov setting flag. @@ -515,42 +592,52 @@ let neverHasSideEffects = 1 in { // FIXME: Make this predicable. def tMOVr : T1I<(outs tGPR:$dst), (ins tGPR:$src), IIC_iMOVr, - "mov\t$dst, $src", []>; + "mov\t$dst, $src", []>, + T1Special<0b1000>; let Defs = [CPSR] in def tMOVSr : T1I<(outs tGPR:$dst), (ins tGPR:$src), IIC_iMOVr, - "movs\t$dst, $src", []>; + "movs\t$dst, $src", []>, Encoding { + let Inst{15-6} = 0b0000000000; +} // FIXME: Make these predicable. def tMOVgpr2tgpr : T1I<(outs tGPR:$dst), (ins GPR:$src), IIC_iMOVr, - "mov\t$dst, $src", []>; + "mov\t$dst, $src", []>, + T1Special<{1,0,0,1}>; def tMOVtgpr2gpr : T1I<(outs GPR:$dst), (ins tGPR:$src), IIC_iMOVr, - "mov\t$dst, $src", []>; + "mov\t$dst, $src", []>, + T1Special<{1,0,1,0}>; def tMOVgpr2gpr : T1I<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVr, - "mov\t$dst, $src", []>; + "mov\t$dst, $src", []>, + T1Special<{1,0,1,1}>; } // neverHasSideEffects // multiply register let isCommutable = 1 in def tMUL : T1sIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), IIC_iMUL32, "mul", "\t$dst, $rhs", - [(set tGPR:$dst, (mul tGPR:$lhs, tGPR:$rhs))]>; + [(set tGPR:$dst, (mul tGPR:$lhs, tGPR:$rhs))]>, + T1DataProcessing<0b1101>; // move inverse register def tMVN : T1sI<(outs tGPR:$dst), (ins tGPR:$src), IIC_iMOVr, "mvn", "\t$dst, $src", - [(set tGPR:$dst, (not tGPR:$src))]>; + [(set tGPR:$dst, (not tGPR:$src))]>, + T1DataProcessing<0b1111>; // bitwise or register let isCommutable = 1 in def tORR : T1sIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), IIC_iALUr, "orr", "\t$dst, $rhs", - [(set tGPR:$dst, (or tGPR:$lhs, tGPR:$rhs))]>; + [(set tGPR:$dst, (or tGPR:$lhs, tGPR:$rhs))]>, + T1DataProcessing<0b1100>; // swaps def tREV : T1pI<(outs tGPR:$dst), (ins tGPR:$src), IIC_iUNAr, "rev", "\t$dst, $src", [(set tGPR:$dst, (bswap tGPR:$src))]>, - Requires<[IsThumb1Only, HasV6]>; + Requires<[IsThumb1Only, HasV6]>, + T1Misc<{1,0,1,0,0,0,?}>; def tREV16 : T1pI<(outs tGPR:$dst), (ins tGPR:$src), IIC_iUNAr, "rev16", "\t$dst, $src", @@ -559,7 +646,8 @@ (or (and (shl tGPR:$src, (i32 8)), 0xFF00), (or (and (srl tGPR:$src, (i32 8)), 0xFF0000), (and (shl tGPR:$src, (i32 8)), 0xFF000000)))))]>, - Requires<[IsThumb1Only, HasV6]>; + Requires<[IsThumb1Only, HasV6]>, + T1Misc<{1,0,1,0,0,1,?}>; def tREVSH : T1pI<(outs tGPR:$dst), (ins tGPR:$src), IIC_iUNAr, "revsh", "\t$dst, $src", @@ -567,37 +655,44 @@ (sext_inreg (or (srl (and tGPR:$src, 0xFF00), (i32 8)), (shl tGPR:$src, (i32 8))), i16))]>, - Requires<[IsThumb1Only, HasV6]>; + Requires<[IsThumb1Only, HasV6]>, + T1Misc<{1,0,1,0,1,1,?}>; // rotate right register def tROR : T1sIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), IIC_iMOVsr, "ror", "\t$dst, $rhs", - [(set tGPR:$dst, (rotr tGPR:$lhs, tGPR:$rhs))]>; + [(set tGPR:$dst, (rotr tGPR:$lhs, tGPR:$rhs))]>, + T1DataProcessing<0b0111>; // negate register def tRSB : T1sI<(outs tGPR:$dst), (ins tGPR:$src), IIC_iALUi, "rsb", "\t$dst, $src, #0", - [(set tGPR:$dst, (ineg tGPR:$src))]>; + [(set tGPR:$dst, (ineg tGPR:$src))]>, + T1DataProcessing<0b1001>; // Subtract with carry register let Uses = [CPSR] in def tSBC : T1sIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), IIC_iALUr, "sbc", "\t$dst, $rhs", - [(set tGPR:$dst, (sube tGPR:$lhs, tGPR:$rhs))]>; + [(set tGPR:$dst, (sube tGPR:$lhs, tGPR:$rhs))]>, + T1DataProcessing<0b0110>; // Subtract immediate def tSUBi3 : T1sI<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs), IIC_iALUi, "sub", "\t$dst, $lhs, $rhs", - [(set tGPR:$dst, (add tGPR:$lhs, imm0_7_neg:$rhs))]>; + [(set tGPR:$dst, (add tGPR:$lhs, imm0_7_neg:$rhs))]>, + T1General<0b01111>; def tSUBi8 : T1sIt<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs), IIC_iALUi, "sub", "\t$dst, $rhs", - [(set tGPR:$dst, (add tGPR:$lhs, imm8_255_neg:$rhs))]>; + [(set tGPR:$dst, (add tGPR:$lhs, imm8_255_neg:$rhs))]>, + T1General<{1,1,1,?,?}>; // subtract register def tSUBrr : T1sI<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), IIC_iALUr, "sub", "\t$dst, $lhs, $rhs", - [(set tGPR:$dst, (sub tGPR:$lhs, tGPR:$rhs))]>; + [(set tGPR:$dst, (sub tGPR:$lhs, tGPR:$rhs))]>, + T1General<0b01101>; // TODO: A7-96: STMIA - store multiple. @@ -605,31 +700,36 @@ def tSXTB : T1pI<(outs tGPR:$dst), (ins tGPR:$src), IIC_iUNAr, "sxtb", "\t$dst, $src", [(set tGPR:$dst, (sext_inreg tGPR:$src, i8))]>, - Requires<[IsThumb1Only, HasV6]>; + Requires<[IsThumb1Only, HasV6]>, + T1Misc<{0,0,1,0,0,1,?}>; // sign-extend short def tSXTH : T1pI<(outs tGPR:$dst), (ins tGPR:$src), IIC_iUNAr, "sxth", "\t$dst, $src", [(set tGPR:$dst, (sext_inreg tGPR:$src, i16))]>, - Requires<[IsThumb1Only, HasV6]>; + Requires<[IsThumb1Only, HasV6]>, + T1Misc<{0,0,1,0,0,0,?}>; // test let isCommutable = 1, Defs = [CPSR] in def tTST : T1pI<(outs), (ins tGPR:$lhs, tGPR:$rhs), IIC_iCMPr, "tst", "\t$lhs, $rhs", - [(ARMcmpZ (and tGPR:$lhs, tGPR:$rhs), 0)]>; + [(ARMcmpZ (and tGPR:$lhs, tGPR:$rhs), 0)]>, + T1DataProcessing<0b1000>; // zero-extend byte def tUXTB : T1pI<(outs tGPR:$dst), (ins tGPR:$src), IIC_iUNAr, "uxtb", "\t$dst, $src", [(set tGPR:$dst, (and tGPR:$src, 0xFF))]>, - Requires<[IsThumb1Only, HasV6]>; + Requires<[IsThumb1Only, HasV6]>, + T1Misc<{0,0,1,0,1,1,?}>; // zero-extend short def tUXTH : T1pI<(outs tGPR:$dst), (ins tGPR:$src), IIC_iUNAr, "uxth", "\t$dst, $src", [(set tGPR:$dst, (and tGPR:$src, 0xFFFF))]>, - Requires<[IsThumb1Only, HasV6]>; + Requires<[IsThumb1Only, HasV6]>, + T1Misc<{0,0,1,0,1,0,?}>; // Conditional move tMOVCCr - Used to implement the Thumb SELECT_CC DAG operation. @@ -643,19 +743,23 @@ // 16-bit movcc in IT blocks for Thumb2. def tMOVCCr : T1pIt<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iCMOVr, - "mov", "\t$dst, $rhs", []>; + "mov", "\t$dst, $rhs", []>, + T1Special<{1,0,?,?}>; def tMOVCCi : T1pIt<(outs GPR:$dst), (ins GPR:$lhs, i32imm:$rhs), IIC_iCMOVi, - "mov", "\t$dst, $rhs", []>; + "mov", "\t$dst, $rhs", []>, + T1General<{1,0,0,?,?}>; // tLEApcrel - Load a pc-relative address into a register without offending the // assembler. def tLEApcrel : T1I<(outs tGPR:$dst), (ins i32imm:$label, pred:$p), IIC_iALUi, - "adr$p\t$dst, #$label", []>; + "adr$p\t$dst, #$label", []>, + T1Encoding<{1,0,1,0,0,?}>; // A6.2 & A8.6.10 def tLEApcrelJT : T1I<(outs tGPR:$dst), (ins i32imm:$label, nohash_imm:$id, pred:$p), - IIC_iALUi, "adr$p\t$dst, #${label}_${id}", []>; + IIC_iALUi, "adr$p\t$dst, #${label}_${id}", []>, + T1Encoding<{1,0,1,0,0,?}>; // A6.2 & A8.6.10 //===----------------------------------------------------------------------===// // TLS Instructions @@ -664,9 +768,9 @@ // __aeabi_read_tp preserves the registers r1-r3. let isCall = 1, Defs = [R0, LR] in { - def tTPsoft : TIx2<(outs), (ins), IIC_Br, - "bl\t__aeabi_read_tp", - [(set R0, ARMthread_pointer)]>; + def tTPsoft : TIx2<0b11110, 0b11, 1, (outs), (ins), IIC_Br, + "bl\t__aeabi_read_tp", + [(set R0, ARMthread_pointer)]>; } // SJLJ Exception handling intrinsics Modified: llvm/trunk/lib/Target/ARM/ARMInstrThumb2.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrThumb2.td?rev=91434&r1=91433&r2=91434&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMInstrThumb2.td (original) +++ llvm/trunk/lib/Target/ARM/ARMInstrThumb2.td Tue Dec 15 11:24:14 2009 @@ -165,234 +165,465 @@ /// T2I_un_irs - Defines a set of (op reg, {so_imm|r|so_reg}) patterns for a /// unary operation that produces a value. These are predicable and can be /// changed to modify CPSR. -multiclass T2I_un_irs{ +multiclass T2I_un_irs opcod, string opc, PatFrag opnode, + bit Cheap = 0, bit ReMat = 0> { // shifted imm def i : T2sI<(outs GPR:$dst), (ins t2_so_imm:$src), IIC_iMOVi, opc, "\t$dst, $src", [(set GPR:$dst, (opnode t2_so_imm:$src))]> { let isAsCheapAsAMove = Cheap; let isReMaterializable = ReMat; + let Inst{31-27} = 0b11110; + let Inst{25} = 0; + let Inst{24-21} = opcod; + let Inst{20} = ?; // The S bit. + let Inst{19-16} = 0b1111; // Rn + let Inst{15} = 0; } // register def r : T2I<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVr, opc, ".w\t$dst, $src", - [(set GPR:$dst, (opnode GPR:$src))]>; + [(set GPR:$dst, (opnode GPR:$src))]> { + let Inst{31-27} = 0b11101; + let Inst{26-25} = 0b01; + let Inst{24-21} = opcod; + let Inst{20} = ?; // The S bit. + let Inst{19-16} = 0b1111; // Rn + let Inst{14-12} = 0b000; // imm3 + let Inst{7-6} = 0b00; // imm2 + let Inst{5-4} = 0b00; // type + } // shifted register def s : T2I<(outs GPR:$dst), (ins t2_so_reg:$src), IIC_iMOVsi, opc, ".w\t$dst, $src", - [(set GPR:$dst, (opnode t2_so_reg:$src))]>; + [(set GPR:$dst, (opnode t2_so_reg:$src))]> { + let Inst{31-27} = 0b11101; + let Inst{26-25} = 0b01; + let Inst{24-21} = opcod; + let Inst{20} = ?; // The S bit. + let Inst{19-16} = 0b1111; // Rn + } } /// T2I_bin_irs - Defines a set of (op reg, {so_imm|r|so_reg}) patterns for a // binary operation that produces a value. These are predicable and can be /// changed to modify CPSR. -multiclass T2I_bin_irs opcod, string opc, PatFrag opnode, bit Commutable = 0, string wide =""> { // shifted imm def ri : T2sI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_imm:$rhs), IIC_iALUi, opc, "\t$dst, $lhs, $rhs", - [(set GPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]>; + [(set GPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]> { + let Inst{31-27} = 0b11110; + let Inst{25} = 0; + let Inst{24-21} = opcod; + let Inst{20} = ?; // The S bit. + let Inst{15} = 0; + } // register def rr : T2sI<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iALUr, opc, !strconcat(wide, "\t$dst, $lhs, $rhs"), [(set GPR:$dst, (opnode GPR:$lhs, GPR:$rhs))]> { let isCommutable = Commutable; + let Inst{31-27} = 0b11101; + let Inst{26-25} = 0b01; + let Inst{24-21} = opcod; + let Inst{20} = ?; // The S bit. + let Inst{14-12} = 0b000; // imm3 + let Inst{7-6} = 0b00; // imm2 + let Inst{5-4} = 0b00; // type } // shifted register def rs : T2sI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs), IIC_iALUsi, opc, !strconcat(wide, "\t$dst, $lhs, $rhs"), - [(set GPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]>; + [(set GPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]> { + let Inst{31-27} = 0b11101; + let Inst{26-25} = 0b01; + let Inst{24-21} = opcod; + let Inst{20} = ?; // The S bit. + } } /// T2I_bin_w_irs - Same as T2I_bin_irs except these operations need // the ".w" prefix to indicate that they are wide. -multiclass T2I_bin_w_irs : - T2I_bin_irs; +multiclass T2I_bin_w_irs opcod, string opc, PatFrag opnode, + bit Commutable = 0> : + T2I_bin_irs; /// T2I_rbin_is - Same as T2I_bin_irs except the order of operands are /// reversed. It doesn't define the 'rr' form since it's handled by its /// T2I_bin_irs counterpart. -multiclass T2I_rbin_is { +multiclass T2I_rbin_is opcod, string opc, PatFrag opnode> { // shifted imm def ri : T2I<(outs GPR:$dst), (ins GPR:$rhs, t2_so_imm:$lhs), IIC_iALUi, opc, ".w\t$dst, $rhs, $lhs", - [(set GPR:$dst, (opnode t2_so_imm:$lhs, GPR:$rhs))]>; + [(set GPR:$dst, (opnode t2_so_imm:$lhs, GPR:$rhs))]> { + let Inst{31-27} = 0b11110; + let Inst{25} = 0; + let Inst{24-21} = opcod; + let Inst{20} = 0; // The S bit. + let Inst{15} = 0; + } // shifted register def rs : T2I<(outs GPR:$dst), (ins GPR:$rhs, t2_so_reg:$lhs), IIC_iALUsi, opc, "\t$dst, $rhs, $lhs", - [(set GPR:$dst, (opnode t2_so_reg:$lhs, GPR:$rhs))]>; + [(set GPR:$dst, (opnode t2_so_reg:$lhs, GPR:$rhs))]> { + let Inst{31-27} = 0b11101; + let Inst{26-25} = 0b01; + let Inst{24-21} = opcod; + let Inst{20} = 0; // The S bit. + } } /// T2I_bin_s_irs - Similar to T2I_bin_irs except it sets the 's' bit so the /// instruction modifies the CPSR register. let Defs = [CPSR] in { -multiclass T2I_bin_s_irs { +multiclass T2I_bin_s_irs opcod, string opc, PatFrag opnode, + bit Commutable = 0> { // shifted imm def ri : T2I<(outs GPR:$dst), (ins GPR:$lhs, t2_so_imm:$rhs), IIC_iALUi, !strconcat(opc, "s"), ".w\t$dst, $lhs, $rhs", - [(set GPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]>; + [(set GPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]> { + let Inst{31-27} = 0b11110; + let Inst{25} = 0; + let Inst{24-21} = opcod; + let Inst{20} = 1; // The S bit. + let Inst{15} = 0; + } // register def rr : T2I<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iALUr, !strconcat(opc, "s"), ".w\t$dst, $lhs, $rhs", [(set GPR:$dst, (opnode GPR:$lhs, GPR:$rhs))]> { let isCommutable = Commutable; + let Inst{31-27} = 0b11101; + let Inst{26-25} = 0b01; + let Inst{24-21} = opcod; + let Inst{20} = 1; // The S bit. + let Inst{14-12} = 0b000; // imm3 + let Inst{7-6} = 0b00; // imm2 + let Inst{5-4} = 0b00; // type } // shifted register def rs : T2I<(outs GPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs), IIC_iALUsi, !strconcat(opc, "s"), ".w\t$dst, $lhs, $rhs", - [(set GPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]>; + [(set GPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]> { + let Inst{31-27} = 0b11101; + let Inst{26-25} = 0b01; + let Inst{24-21} = opcod; + let Inst{20} = 1; // The S bit. + } } } /// T2I_bin_ii12rs - Defines a set of (op reg, {so_imm|imm0_4095|r|so_reg}) /// patterns for a binary operation that produces a value. -multiclass T2I_bin_ii12rs { +multiclass T2I_bin_ii12rs op23_21, string opc, PatFrag opnode, + bit Commutable = 0> { // shifted imm def ri : T2sI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_imm:$rhs), IIC_iALUi, opc, ".w\t$dst, $lhs, $rhs", - [(set GPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]>; + [(set GPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]> { + let Inst{31-27} = 0b11110; + let Inst{25} = 0; + let Inst{24} = 1; + let Inst{23-21} = op23_21; + let Inst{20} = 0; // The S bit. + let Inst{15} = 0; + } // 12-bit imm def ri12 : T2sI<(outs GPR:$dst), (ins GPR:$lhs, imm0_4095:$rhs), IIC_iALUi, !strconcat(opc, "w"), "\t$dst, $lhs, $rhs", - [(set GPR:$dst, (opnode GPR:$lhs, imm0_4095:$rhs))]>; + [(set GPR:$dst, (opnode GPR:$lhs, imm0_4095:$rhs))]> { + let Inst{31-27} = 0b11110; + let Inst{25} = 1; + let Inst{24} = 0; + let Inst{23-21} = op23_21; + let Inst{20} = 0; // The S bit. + let Inst{15} = 0; + } // register def rr : T2sI<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iALUr, opc, ".w\t$dst, $lhs, $rhs", [(set GPR:$dst, (opnode GPR:$lhs, GPR:$rhs))]> { let isCommutable = Commutable; + let Inst{31-27} = 0b11101; + let Inst{26-25} = 0b01; + let Inst{24} = 1; + let Inst{23-21} = op23_21; + let Inst{20} = 0; // The S bit. + let Inst{14-12} = 0b000; // imm3 + let Inst{7-6} = 0b00; // imm2 + let Inst{5-4} = 0b00; // type } // shifted register def rs : T2sI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs), IIC_iALUsi, opc, ".w\t$dst, $lhs, $rhs", - [(set GPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]>; + [(set GPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]> { + let Inst{31-27} = 0b11101; + let Inst{24} = 1; + let Inst{26-25} = 0b01; + let Inst{23-21} = op23_21; + let Inst{20} = 0; // The S bit. + } } /// T2I_adde_sube_irs - Defines a set of (op reg, {so_imm|r|so_reg}) patterns /// for a binary operation that produces a value and use and define the carry /// bit. It's not predicable. let Uses = [CPSR] in { -multiclass T2I_adde_sube_irs { +multiclass T2I_adde_sube_irs opcod, string opc, PatFrag opnode, bit Commutable = 0> { // shifted imm def ri : T2sI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_imm:$rhs), IIC_iALUi, opc, "\t$dst, $lhs, $rhs", [(set GPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]>, - Requires<[IsThumb2, CarryDefIsUnused]>; + Requires<[IsThumb2, CarryDefIsUnused]> { + let Inst{31-27} = 0b11110; + let Inst{25} = 0; + let Inst{24-21} = opcod; + let Inst{20} = 0; // The S bit. + let Inst{15} = 0; + } // register def rr : T2sI<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iALUr, opc, ".w\t$dst, $lhs, $rhs", [(set GPR:$dst, (opnode GPR:$lhs, GPR:$rhs))]>, Requires<[IsThumb2, CarryDefIsUnused]> { let isCommutable = Commutable; + let Inst{31-27} = 0b11101; + let Inst{26-25} = 0b01; + let Inst{24-21} = opcod; + let Inst{20} = 0; // The S bit. + let Inst{14-12} = 0b000; // imm3 + let Inst{7-6} = 0b00; // imm2 + let Inst{5-4} = 0b00; // type } // shifted register def rs : T2sI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs), IIC_iALUsi, opc, ".w\t$dst, $lhs, $rhs", [(set GPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]>, - Requires<[IsThumb2, CarryDefIsUnused]>; + Requires<[IsThumb2, CarryDefIsUnused]> { + let Inst{31-27} = 0b11101; + let Inst{26-25} = 0b01; + let Inst{24-21} = opcod; + let Inst{20} = 0; // The S bit. + } // Carry setting variants // shifted imm def Sri : T2XI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_imm:$rhs), IIC_iALUi, !strconcat(opc, "s\t$dst, $lhs, $rhs"), [(set GPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]>, Requires<[IsThumb2, CarryDefIsUsed]> { - let Defs = [CPSR]; - } + let Defs = [CPSR]; + let Inst{31-27} = 0b11110; + let Inst{25} = 0; + let Inst{24-21} = opcod; + let Inst{20} = 1; // The S bit. + let Inst{15} = 0; + } // register def Srr : T2XI<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iALUr, !strconcat(opc, "s.w\t$dst, $lhs, $rhs"), [(set GPR:$dst, (opnode GPR:$lhs, GPR:$rhs))]>, Requires<[IsThumb2, CarryDefIsUsed]> { - let Defs = [CPSR]; - let isCommutable = Commutable; + let Defs = [CPSR]; + let isCommutable = Commutable; + let Inst{31-27} = 0b11101; + let Inst{26-25} = 0b01; + let Inst{24-21} = opcod; + let Inst{20} = 1; // The S bit. + let Inst{14-12} = 0b000; // imm3 + let Inst{7-6} = 0b00; // imm2 + let Inst{5-4} = 0b00; // type } // shifted register def Srs : T2XI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs), IIC_iALUsi, !strconcat(opc, "s.w\t$dst, $lhs, $rhs"), [(set GPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]>, Requires<[IsThumb2, CarryDefIsUsed]> { - let Defs = [CPSR]; + let Defs = [CPSR]; + let Inst{31-27} = 0b11101; + let Inst{26-25} = 0b01; + let Inst{24-21} = opcod; + let Inst{20} = 1; // The S bit. } } } /// T2I_rbin_s_is - Same as T2I_rbin_is except sets 's' bit. let Defs = [CPSR] in { -multiclass T2I_rbin_s_is { +multiclass T2I_rbin_s_is opcod, string opc, PatFrag opnode> { // shifted imm def ri : T2XI<(outs GPR:$dst), (ins GPR:$rhs, t2_so_imm:$lhs, cc_out:$s), IIC_iALUi, !strconcat(opc, "${s}.w\t$dst, $rhs, $lhs"), - [(set GPR:$dst, (opnode t2_so_imm:$lhs, GPR:$rhs))]>; + [(set GPR:$dst, (opnode t2_so_imm:$lhs, GPR:$rhs))]> { + let Inst{31-27} = 0b11110; + let Inst{25} = 0; + let Inst{24-21} = opcod; + let Inst{20} = 1; // The S bit. + let Inst{15} = 0; + } // shifted register def rs : T2XI<(outs GPR:$dst), (ins GPR:$rhs, t2_so_reg:$lhs, cc_out:$s), IIC_iALUsi, !strconcat(opc, "${s}\t$dst, $rhs, $lhs"), - [(set GPR:$dst, (opnode t2_so_reg:$lhs, GPR:$rhs))]>; + [(set GPR:$dst, (opnode t2_so_reg:$lhs, GPR:$rhs))]> { + let Inst{31-27} = 0b11101; + let Inst{26-25} = 0b01; + let Inst{24-21} = opcod; + let Inst{20} = 1; // The S bit. + } } } /// T2I_sh_ir - Defines a set of (op reg, {so_imm|r}) patterns for a shift / // rotate operation that produces a value. -multiclass T2I_sh_ir { +multiclass T2I_sh_ir opcod, string opc, PatFrag opnode> { // 5-bit imm def ri : T2sI<(outs GPR:$dst), (ins GPR:$lhs, i32imm:$rhs), IIC_iMOVsi, opc, ".w\t$dst, $lhs, $rhs", - [(set GPR:$dst, (opnode GPR:$lhs, imm1_31:$rhs))]>; + [(set GPR:$dst, (opnode GPR:$lhs, imm1_31:$rhs))]> { + let Inst{31-27} = 0b11101; + let Inst{26-21} = 0b010010; + let Inst{19-16} = 0b1111; // Rn + let Inst{5-4} = opcod; + } // register def rr : T2sI<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iMOVsr, opc, ".w\t$dst, $lhs, $rhs", - [(set GPR:$dst, (opnode GPR:$lhs, GPR:$rhs))]>; + [(set GPR:$dst, (opnode GPR:$lhs, GPR:$rhs))]> { + let Inst{31-27} = 0b11111; + let Inst{26-23} = 0b0100; + let Inst{22-21} = opcod; + let Inst{15-12} = 0b1111; + let Inst{7-4} = 0b0000; + } } -/// T2I_cmp_is - Defines a set of (op r, {so_imm|r|so_reg}) cmp / test +/// T2I_cmp_irs - Defines a set of (op r, {so_imm|r|so_reg}) cmp / test /// patterns. Similar to T2I_bin_irs except the instruction does not produce /// a explicit result, only implicitly set CPSR. let Defs = [CPSR] in { -multiclass T2I_cmp_is { +multiclass T2I_cmp_irs opcod, string opc, PatFrag opnode> { // shifted imm def ri : T2I<(outs), (ins GPR:$lhs, t2_so_imm:$rhs), IIC_iCMPi, opc, ".w\t$lhs, $rhs", - [(opnode GPR:$lhs, t2_so_imm:$rhs)]>; + [(opnode GPR:$lhs, t2_so_imm:$rhs)]> { + let Inst{31-27} = 0b11110; + let Inst{25} = 0; + let Inst{24-21} = opcod; + let Inst{20} = 1; // The S bit. + let Inst{15} = 0; + let Inst{11-8} = 0b1111; // Rd + } // register def rr : T2I<(outs), (ins GPR:$lhs, GPR:$rhs), IIC_iCMPr, opc, ".w\t$lhs, $rhs", - [(opnode GPR:$lhs, GPR:$rhs)]>; + [(opnode GPR:$lhs, GPR:$rhs)]> { + let Inst{31-27} = 0b11101; + let Inst{26-25} = 0b01; + let Inst{24-21} = opcod; + let Inst{20} = 1; // The S bit. + let Inst{14-12} = 0b000; // imm3 + let Inst{11-8} = 0b1111; // Rd + let Inst{7-6} = 0b00; // imm2 + let Inst{5-4} = 0b00; // type + } // shifted register def rs : T2I<(outs), (ins GPR:$lhs, t2_so_reg:$rhs), IIC_iCMPsi, opc, ".w\t$lhs, $rhs", - [(opnode GPR:$lhs, t2_so_reg:$rhs)]>; + [(opnode GPR:$lhs, t2_so_reg:$rhs)]> { + let Inst{31-27} = 0b11101; + let Inst{26-25} = 0b01; + let Inst{24-21} = opcod; + let Inst{20} = 1; // The S bit. + let Inst{11-8} = 0b1111; // Rd + } } } /// T2I_ld - Defines a set of (op r, {imm12|imm8|so_reg}) load patterns. -multiclass T2I_ld { +multiclass T2I_ld opcod, string opc, PatFrag opnode> { def i12 : T2Ii12<(outs GPR:$dst), (ins t2addrmode_imm12:$addr), IIC_iLoadi, opc, ".w\t$dst, $addr", - [(set GPR:$dst, (opnode t2addrmode_imm12:$addr))]>; + [(set GPR:$dst, (opnode t2addrmode_imm12:$addr))]> { + let Inst{31-27} = 0b11111; + let Inst{26-25} = 0b00; + let Inst{24} = signed; + let Inst{23} = 1; + let Inst{22-21} = opcod; + let Inst{20} = 1; // load + } def i8 : T2Ii8 <(outs GPR:$dst), (ins t2addrmode_imm8:$addr), IIC_iLoadi, opc, "\t$dst, $addr", - [(set GPR:$dst, (opnode t2addrmode_imm8:$addr))]>; + [(set GPR:$dst, (opnode t2addrmode_imm8:$addr))]> { + let Inst{31-27} = 0b11111; + let Inst{26-25} = 0b00; + let Inst{24} = signed; + let Inst{23} = 0; + let Inst{22-21} = opcod; + let Inst{20} = 1; // load + let Inst{11} = 1; + // Offset: index==TRUE, wback==FALSE + let Inst{10} = 1; // The P bit. + let Inst{8} = 0; // The W bit. + } def s : T2Iso <(outs GPR:$dst), (ins t2addrmode_so_reg:$addr), IIC_iLoadr, opc, ".w\t$dst, $addr", - [(set GPR:$dst, (opnode t2addrmode_so_reg:$addr))]>; + [(set GPR:$dst, (opnode t2addrmode_so_reg:$addr))]> { + let Inst{31-27} = 0b11111; + let Inst{26-25} = 0b00; + let Inst{24} = signed; + let Inst{23} = 0; + let Inst{22-21} = opcod; + let Inst{20} = 1; // load + let Inst{11-6} = 0b000000; + } def pci : T2Ipc <(outs GPR:$dst), (ins i32imm:$addr), IIC_iLoadi, opc, ".w\t$dst, $addr", [(set GPR:$dst, (opnode (ARMWrapper tconstpool:$addr)))]> { let isReMaterializable = 1; + let Inst{31-27} = 0b11111; + let Inst{26-25} = 0b00; + let Inst{24} = signed; + let Inst{23} = ?; // add = (U == '1') + let Inst{22-21} = opcod; + let Inst{20} = 1; // load + let Inst{19-16} = 0b1111; // Rn } } /// T2I_st - Defines a set of (op r, {imm12|imm8|so_reg}) store patterns. -multiclass T2I_st { +multiclass T2I_st opcod, string opc, PatFrag opnode> { def i12 : T2Ii12<(outs), (ins GPR:$src, t2addrmode_imm12:$addr), IIC_iStorei, opc, ".w\t$src, $addr", - [(opnode GPR:$src, t2addrmode_imm12:$addr)]>; + [(opnode GPR:$src, t2addrmode_imm12:$addr)]> { + let Inst{31-27} = 0b11111; + let Inst{26-23} = 0b0001; + let Inst{22-21} = opcod; + let Inst{20} = 0; // !load + } def i8 : T2Ii8 <(outs), (ins GPR:$src, t2addrmode_imm8:$addr), IIC_iStorei, opc, "\t$src, $addr", - [(opnode GPR:$src, t2addrmode_imm8:$addr)]>; + [(opnode GPR:$src, t2addrmode_imm8:$addr)]> { + let Inst{31-27} = 0b11111; + let Inst{26-23} = 0b0000; + let Inst{22-21} = opcod; + let Inst{20} = 0; // !load + let Inst{11} = 1; + // Offset: index==TRUE, wback==FALSE + let Inst{10} = 1; // The P bit. + let Inst{8} = 0; // The W bit. + } def s : T2Iso <(outs), (ins GPR:$src, t2addrmode_so_reg:$addr), IIC_iStorer, opc, ".w\t$src, $addr", - [(opnode GPR:$src, t2addrmode_so_reg:$addr)]>; + [(opnode GPR:$src, t2addrmode_so_reg:$addr)]> { + let Inst{31-27} = 0b11111; + let Inst{26-23} = 0b0000; + let Inst{22-21} = opcod; + let Inst{20} = 0; // !load + let Inst{11-6} = 0b000000; + } } /// T2I_picld - Defines the PIC load pattern. @@ -410,25 +641,55 @@ /// T2I_unary_rrot - A unary operation with two forms: one whose operand is a /// register and one whose operand is a register rotated by 8/16/24. -multiclass T2I_unary_rrot { +multiclass T2I_unary_rrot opcod, string opc, PatFrag opnode> { def r : T2I<(outs GPR:$dst), (ins GPR:$src), IIC_iUNAr, opc, ".w\t$dst, $src", - [(set GPR:$dst, (opnode GPR:$src))]>; + [(set GPR:$dst, (opnode GPR:$src))]> { + let Inst{31-27} = 0b11111; + let Inst{26-23} = 0b0100; + let Inst{22-20} = opcod; + let Inst{19-16} = 0b1111; // Rn + let Inst{15-12} = 0b1111; + let Inst{7} = 1; + let Inst{5-4} = 0b00; // rotate + } def r_rot : T2I<(outs GPR:$dst), (ins GPR:$src, i32imm:$rot), IIC_iUNAsi, opc, ".w\t$dst, $src, ror $rot", - [(set GPR:$dst, (opnode (rotr GPR:$src, rot_imm:$rot)))]>; + [(set GPR:$dst, (opnode (rotr GPR:$src, rot_imm:$rot)))]> { + let Inst{31-27} = 0b11111; + let Inst{26-23} = 0b0100; + let Inst{22-20} = opcod; + let Inst{19-16} = 0b1111; // Rn + let Inst{15-12} = 0b1111; + let Inst{7} = 1; + let Inst{5-4} = {?,?}; // rotate + } } /// T2I_bin_rrot - A binary operation with two forms: one whose operand is a /// register and one whose operand is a register rotated by 8/16/24. -multiclass T2I_bin_rrot { +multiclass T2I_bin_rrot opcod, string opc, PatFrag opnode> { def rr : T2I<(outs GPR:$dst), (ins GPR:$LHS, GPR:$RHS), IIC_iALUr, opc, "\t$dst, $LHS, $RHS", - [(set GPR:$dst, (opnode GPR:$LHS, GPR:$RHS))]>; + [(set GPR:$dst, (opnode GPR:$LHS, GPR:$RHS))]> { + let Inst{31-27} = 0b11111; + let Inst{26-23} = 0b0100; + let Inst{22-20} = opcod; + let Inst{15-12} = 0b1111; + let Inst{7} = 1; + let Inst{5-4} = 0b00; // rotate + } def rr_rot : T2I<(outs GPR:$dst), (ins GPR:$LHS, GPR:$RHS, i32imm:$rot), IIC_iALUsr, opc, "\t$dst, $LHS, $RHS, ror $rot", [(set GPR:$dst, (opnode GPR:$LHS, - (rotr GPR:$RHS, rot_imm:$rot)))]>; + (rotr GPR:$RHS, rot_imm:$rot)))]> { + let Inst{31-27} = 0b11111; + let Inst{26-23} = 0b0100; + let Inst{22-20} = opcod; + let Inst{15-12} = 0b1111; + let Inst{7} = 1; + let Inst{5-4} = {?,?}; // rotate + } } //===----------------------------------------------------------------------===// @@ -442,33 +703,89 @@ // LEApcrel - Load a pc-relative address into a register without offending the // assembler. def t2LEApcrel : T2XI<(outs GPR:$dst), (ins i32imm:$label, pred:$p), IIC_iALUi, - "adr$p.w\t$dst, #$label", []>; - + "adr$p.w\t$dst, #$label", []> { + let Inst{31-27} = 0b11110; + let Inst{25-24} = 0b10; + // Inst{23:21} = '11' (add = FALSE) or '00' (add = TRUE) + let Inst{22} = 0; + let Inst{20} = 0; + let Inst{19-16} = 0b1111; // Rn + let Inst{15} = 0; +} def t2LEApcrelJT : T2XI<(outs GPR:$dst), (ins i32imm:$label, nohash_imm:$id, pred:$p), IIC_iALUi, - "adr$p.w\t$dst, #${label}_${id}", []>; + "adr$p.w\t$dst, #${label}_${id}", []> { + let Inst{31-27} = 0b11110; + let Inst{25-24} = 0b10; + // Inst{23:21} = '11' (add = FALSE) or '00' (add = TRUE) + let Inst{22} = 0; + let Inst{20} = 0; + let Inst{19-16} = 0b1111; // Rn + let Inst{15} = 0; +} // ADD r, sp, {so_imm|i12} def t2ADDrSPi : T2sI<(outs GPR:$dst), (ins GPR:$sp, t2_so_imm:$imm), - IIC_iALUi, "add", ".w\t$dst, $sp, $imm", []>; + IIC_iALUi, "add", ".w\t$dst, $sp, $imm", []> { + let Inst{31-27} = 0b11110; + let Inst{25} = 0; + let Inst{24-21} = 0b1000; + let Inst{20} = ?; // The S bit. + let Inst{19-16} = 0b1101; // Rn = sp + let Inst{15} = 0; +} def t2ADDrSPi12 : T2I<(outs GPR:$dst), (ins GPR:$sp, imm0_4095:$imm), - IIC_iALUi, "addw", "\t$dst, $sp, $imm", []>; + IIC_iALUi, "addw", "\t$dst, $sp, $imm", []> { + let Inst{31-27} = 0b11110; + let Inst{25} = 1; + let Inst{24-21} = 0b0000; + let Inst{20} = 0; // The S bit. + let Inst{19-16} = 0b1101; // Rn = sp + let Inst{15} = 0; +} // ADD r, sp, so_reg def t2ADDrSPs : T2sI<(outs GPR:$dst), (ins GPR:$sp, t2_so_reg:$rhs), - IIC_iALUsi, "add", ".w\t$dst, $sp, $rhs", []>; + IIC_iALUsi, "add", ".w\t$dst, $sp, $rhs", []> { + let Inst{31-27} = 0b11101; + let Inst{26-25} = 0b01; + let Inst{24-21} = 0b1000; + let Inst{20} = ?; // The S bit. + let Inst{19-16} = 0b1101; // Rn = sp + let Inst{15} = 0; +} // SUB r, sp, {so_imm|i12} def t2SUBrSPi : T2sI<(outs GPR:$dst), (ins GPR:$sp, t2_so_imm:$imm), - IIC_iALUi, "sub", ".w\t$dst, $sp, $imm", []>; + IIC_iALUi, "sub", ".w\t$dst, $sp, $imm", []> { + let Inst{31-27} = 0b11110; + let Inst{25} = 0; + let Inst{24-21} = 0b1101; + let Inst{20} = ?; // The S bit. + let Inst{19-16} = 0b1101; // Rn = sp + let Inst{15} = 0; +} def t2SUBrSPi12 : T2I<(outs GPR:$dst), (ins GPR:$sp, imm0_4095:$imm), - IIC_iALUi, "subw", "\t$dst, $sp, $imm", []>; + IIC_iALUi, "subw", "\t$dst, $sp, $imm", []> { + let Inst{31-27} = 0b11110; + let Inst{25} = 1; + let Inst{24-21} = 0b0101; + let Inst{20} = 0; // The S bit. + let Inst{19-16} = 0b1101; // Rn = sp + let Inst{15} = 0; +} // SUB r, sp, so_reg def t2SUBrSPs : T2sI<(outs GPR:$dst), (ins GPR:$sp, t2_so_reg:$rhs), IIC_iALUsi, - "sub", "\t$dst, $sp, $rhs", []>; - + "sub", "\t$dst, $sp, $rhs", []> { + let Inst{31-27} = 0b11101; + let Inst{26-25} = 0b01; + let Inst{24-21} = 0b1101; + let Inst{20} = ?; // The S bit. + let Inst{19-16} = 0b1101; // Rn = sp + let Inst{15} = 0; +} // Pseudo instruction that will expand into a t2SUBrSPi + a copy. let usesCustomInserter = 1 in { // Expanded after instruction selection. @@ -487,24 +804,26 @@ // Load let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in -defm t2LDR : T2I_ld<"ldr", UnOpFrag<(load node:$Src)>>; +defm t2LDR : T2I_ld<0, 0b10, "ldr", UnOpFrag<(load node:$Src)>>; // Loads with zero extension -defm t2LDRH : T2I_ld<"ldrh", UnOpFrag<(zextloadi16 node:$Src)>>; -defm t2LDRB : T2I_ld<"ldrb", UnOpFrag<(zextloadi8 node:$Src)>>; +defm t2LDRH : T2I_ld<0, 0b01, "ldrh", UnOpFrag<(zextloadi16 node:$Src)>>; +defm t2LDRB : T2I_ld<0, 0b00, "ldrb", UnOpFrag<(zextloadi8 node:$Src)>>; // Loads with sign extension -defm t2LDRSH : T2I_ld<"ldrsh", UnOpFrag<(sextloadi16 node:$Src)>>; -defm t2LDRSB : T2I_ld<"ldrsb", UnOpFrag<(sextloadi8 node:$Src)>>; +defm t2LDRSH : T2I_ld<1, 0b01, "ldrsh", UnOpFrag<(sextloadi16 node:$Src)>>; +defm t2LDRSB : T2I_ld<1, 0b00, "ldrsb", UnOpFrag<(sextloadi8 node:$Src)>>; let mayLoad = 1, hasExtraDefRegAllocReq = 1 in { // Load doubleword -def t2LDRDi8 : T2Ii8s4<(outs GPR:$dst1, GPR:$dst2), +def t2LDRDi8 : T2Ii8s4<1, 0, 1, (outs GPR:$dst1, GPR:$dst2), (ins t2addrmode_imm8s4:$addr), IIC_iLoadi, "ldrd", "\t$dst1, $addr", []>; -def t2LDRDpci : T2Ii8s4<(outs GPR:$dst1, GPR:$dst2), +def t2LDRDpci : T2Ii8s4; + "ldrd", "\t$dst1, $addr", []> { + let Inst{19-16} = 0b1111; // Rn +} } // zextload i1 -> zextload i8 @@ -549,57 +868,57 @@ // Indexed loads let mayLoad = 1 in { -def t2LDR_PRE : T2Iidxldst<(outs GPR:$dst, GPR:$base_wb), +def t2LDR_PRE : T2Iidxldst<0, 0b10, 1, 1, (outs GPR:$dst, GPR:$base_wb), (ins t2addrmode_imm8:$addr), AddrModeT2_i8, IndexModePre, IIC_iLoadiu, "ldr", "\t$dst, $addr!", "$addr.base = $base_wb", []>; -def t2LDR_POST : T2Iidxldst<(outs GPR:$dst, GPR:$base_wb), +def t2LDR_POST : T2Iidxldst<0, 0b10, 1, 0, (outs GPR:$dst, GPR:$base_wb), (ins GPR:$base, t2am_imm8_offset:$offset), AddrModeT2_i8, IndexModePost, IIC_iLoadiu, "ldr", "\t$dst, [$base], $offset", "$base = $base_wb", []>; -def t2LDRB_PRE : T2Iidxldst<(outs GPR:$dst, GPR:$base_wb), +def t2LDRB_PRE : T2Iidxldst<0, 0b00, 1, 1, (outs GPR:$dst, GPR:$base_wb), (ins t2addrmode_imm8:$addr), AddrModeT2_i8, IndexModePre, IIC_iLoadiu, "ldrb", "\t$dst, $addr!", "$addr.base = $base_wb", []>; -def t2LDRB_POST : T2Iidxldst<(outs GPR:$dst, GPR:$base_wb), +def t2LDRB_POST : T2Iidxldst<0, 0b00, 1, 0, (outs GPR:$dst, GPR:$base_wb), (ins GPR:$base, t2am_imm8_offset:$offset), AddrModeT2_i8, IndexModePost, IIC_iLoadiu, "ldrb", "\t$dst, [$base], $offset", "$base = $base_wb", []>; -def t2LDRH_PRE : T2Iidxldst<(outs GPR:$dst, GPR:$base_wb), +def t2LDRH_PRE : T2Iidxldst<0, 0b01, 1, 1, (outs GPR:$dst, GPR:$base_wb), (ins t2addrmode_imm8:$addr), AddrModeT2_i8, IndexModePre, IIC_iLoadiu, "ldrh", "\t$dst, $addr!", "$addr.base = $base_wb", []>; -def t2LDRH_POST : T2Iidxldst<(outs GPR:$dst, GPR:$base_wb), +def t2LDRH_POST : T2Iidxldst<0, 0b01, 1, 0, (outs GPR:$dst, GPR:$base_wb), (ins GPR:$base, t2am_imm8_offset:$offset), AddrModeT2_i8, IndexModePost, IIC_iLoadiu, "ldrh", "\t$dst, [$base], $offset", "$base = $base_wb", []>; -def t2LDRSB_PRE : T2Iidxldst<(outs GPR:$dst, GPR:$base_wb), +def t2LDRSB_PRE : T2Iidxldst<1, 0b00, 1, 1, (outs GPR:$dst, GPR:$base_wb), (ins t2addrmode_imm8:$addr), AddrModeT2_i8, IndexModePre, IIC_iLoadiu, "ldrsb", "\t$dst, $addr!", "$addr.base = $base_wb", []>; -def t2LDRSB_POST : T2Iidxldst<(outs GPR:$dst, GPR:$base_wb), +def t2LDRSB_POST : T2Iidxldst<1, 0b00, 1, 0, (outs GPR:$dst, GPR:$base_wb), (ins GPR:$base, t2am_imm8_offset:$offset), AddrModeT2_i8, IndexModePost, IIC_iLoadiu, "ldrsb", "\t$dst, [$base], $offset", "$base = $base_wb", []>; -def t2LDRSH_PRE : T2Iidxldst<(outs GPR:$dst, GPR:$base_wb), +def t2LDRSH_PRE : T2Iidxldst<1, 0b01, 1, 1, (outs GPR:$dst, GPR:$base_wb), (ins t2addrmode_imm8:$addr), AddrModeT2_i8, IndexModePre, IIC_iLoadiu, "ldrsh", "\t$dst, $addr!", "$addr.base = $base_wb", []>; -def t2LDRSH_POST : T2Iidxldst<(outs GPR:$dst, GPR:$base_wb), +def t2LDRSH_POST : T2Iidxldst<1, 0b01, 1, 0, (outs GPR:$dst, GPR:$base_wb), (ins GPR:$base, t2am_imm8_offset:$offset), AddrModeT2_i8, IndexModePost, IIC_iLoadiu, "ldrsh", "\t$dst, [$base], $offset", "$base = $base_wb", @@ -607,53 +926,53 @@ } // Store -defm t2STR : T2I_st<"str", BinOpFrag<(store node:$LHS, node:$RHS)>>; -defm t2STRB : T2I_st<"strb", BinOpFrag<(truncstorei8 node:$LHS, node:$RHS)>>; -defm t2STRH : T2I_st<"strh", BinOpFrag<(truncstorei16 node:$LHS, node:$RHS)>>; +defm t2STR : T2I_st<0b10, "str", BinOpFrag<(store node:$LHS, node:$RHS)>>; +defm t2STRB : T2I_st<0b00, "strb", BinOpFrag<(truncstorei8 node:$LHS, node:$RHS)>>; +defm t2STRH : T2I_st<0b01, "strh", BinOpFrag<(truncstorei16 node:$LHS, node:$RHS)>>; // Store doubleword let mayLoad = 1, hasExtraSrcRegAllocReq = 1 in -def t2STRDi8 : T2Ii8s4<(outs), +def t2STRDi8 : T2Ii8s4<1, 0, 0, (outs), (ins GPR:$src1, GPR:$src2, t2addrmode_imm8s4:$addr), IIC_iStorer, "strd", "\t$src1, $addr", []>; // Indexed stores -def t2STR_PRE : T2Iidxldst<(outs GPR:$base_wb), +def t2STR_PRE : T2Iidxldst<0, 0b10, 0, 1, (outs GPR:$base_wb), (ins GPR:$src, GPR:$base, t2am_imm8_offset:$offset), AddrModeT2_i8, IndexModePre, IIC_iStoreiu, "str", "\t$src, [$base, $offset]!", "$base = $base_wb", [(set GPR:$base_wb, (pre_store GPR:$src, GPR:$base, t2am_imm8_offset:$offset))]>; -def t2STR_POST : T2Iidxldst<(outs GPR:$base_wb), +def t2STR_POST : T2Iidxldst<0, 0b10, 0, 0, (outs GPR:$base_wb), (ins GPR:$src, GPR:$base, t2am_imm8_offset:$offset), AddrModeT2_i8, IndexModePost, IIC_iStoreiu, "str", "\t$src, [$base], $offset", "$base = $base_wb", [(set GPR:$base_wb, (post_store GPR:$src, GPR:$base, t2am_imm8_offset:$offset))]>; -def t2STRH_PRE : T2Iidxldst<(outs GPR:$base_wb), +def t2STRH_PRE : T2Iidxldst<0, 0b01, 0, 1, (outs GPR:$base_wb), (ins GPR:$src, GPR:$base, t2am_imm8_offset:$offset), AddrModeT2_i8, IndexModePre, IIC_iStoreiu, "strh", "\t$src, [$base, $offset]!", "$base = $base_wb", [(set GPR:$base_wb, (pre_truncsti16 GPR:$src, GPR:$base, t2am_imm8_offset:$offset))]>; -def t2STRH_POST : T2Iidxldst<(outs GPR:$base_wb), +def t2STRH_POST : T2Iidxldst<0, 0b01, 0, 0, (outs GPR:$base_wb), (ins GPR:$src, GPR:$base, t2am_imm8_offset:$offset), AddrModeT2_i8, IndexModePost, IIC_iStoreiu, "strh", "\t$src, [$base], $offset", "$base = $base_wb", [(set GPR:$base_wb, (post_truncsti16 GPR:$src, GPR:$base, t2am_imm8_offset:$offset))]>; -def t2STRB_PRE : T2Iidxldst<(outs GPR:$base_wb), +def t2STRB_PRE : T2Iidxldst<0, 0b00, 0, 1, (outs GPR:$base_wb), (ins GPR:$src, GPR:$base, t2am_imm8_offset:$offset), AddrModeT2_i8, IndexModePre, IIC_iStoreiu, "strb", "\t$src, [$base, $offset]!", "$base = $base_wb", [(set GPR:$base_wb, (pre_truncsti8 GPR:$src, GPR:$base, t2am_imm8_offset:$offset))]>; -def t2STRB_POST : T2Iidxldst<(outs GPR:$base_wb), +def t2STRB_POST : T2Iidxldst<0, 0b00, 0, 0, (outs GPR:$base_wb), (ins GPR:$src, GPR:$base, t2am_imm8_offset:$offset), AddrModeT2_i8, IndexModePost, IIC_iStoreiu, "strb", "\t$src, [$base], $offset", "$base = $base_wb", @@ -670,12 +989,26 @@ let mayLoad = 1, hasExtraDefRegAllocReq = 1 in def t2LDM : T2XI<(outs), (ins addrmode4:$addr, pred:$p, reglist:$wb, variable_ops), - IIC_iLoadm, "ldm${addr:submode}${p}${addr:wide}\t$addr, $wb", []>; + IIC_iLoadm, "ldm${addr:submode}${p}${addr:wide}\t$addr, $wb", []> { + let Inst{31-27} = 0b11101; + let Inst{26-25} = 0b00; + let Inst{24-23} = {?, ?}; // IA: '01', DB: '10' + let Inst{22} = 0; + let Inst{21} = ?; // The W bit. + let Inst{20} = 1; // Load +} let mayStore = 1, hasExtraSrcRegAllocReq = 1 in def t2STM : T2XI<(outs), (ins addrmode4:$addr, pred:$p, reglist:$wb, variable_ops), - IIC_iStorem, "stm${addr:submode}${p}${addr:wide}\t$addr, $wb", []>; + IIC_iStorem, "stm${addr:submode}${p}${addr:wide}\t$addr, $wb", []> { + let Inst{31-27} = 0b11101; + let Inst{26-25} = 0b00; + let Inst{24-23} = {?, ?}; // IA: '01', DB: '10' + let Inst{22} = 0; + let Inst{21} = ?; // The W bit. + let Inst{20} = 0; // Store +} //===----------------------------------------------------------------------===// // Move Instructions. @@ -683,24 +1016,51 @@ let neverHasSideEffects = 1 in def t2MOVr : T2sI<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVr, - "mov", ".w\t$dst, $src", []>; + "mov", ".w\t$dst, $src", []> { + let Inst{31-27} = 0b11101; + let Inst{26-25} = 0b01; + let Inst{24-21} = 0b0010; + let Inst{20} = ?; // The S bit. + let Inst{19-16} = 0b1111; // Rn + let Inst{14-12} = 0b000; + let Inst{7-4} = 0b0000; +} // AddedComplexity to ensure isel tries t2MOVi before t2MOVi16. let isReMaterializable = 1, isAsCheapAsAMove = 1, AddedComplexity = 1 in def t2MOVi : T2sI<(outs GPR:$dst), (ins t2_so_imm:$src), IIC_iMOVi, "mov", ".w\t$dst, $src", - [(set GPR:$dst, t2_so_imm:$src)]>; + [(set GPR:$dst, t2_so_imm:$src)]> { + let Inst{31-27} = 0b11110; + let Inst{25} = 0; + let Inst{24-21} = 0b0010; + let Inst{20} = ?; // The S bit. + let Inst{19-16} = 0b1111; // Rn + let Inst{15} = 0; +} let isReMaterializable = 1, isAsCheapAsAMove = 1 in def t2MOVi16 : T2I<(outs GPR:$dst), (ins i32imm:$src), IIC_iMOVi, "movw", "\t$dst, $src", - [(set GPR:$dst, imm0_65535:$src)]>; + [(set GPR:$dst, imm0_65535:$src)]> { + let Inst{31-27} = 0b11110; + let Inst{25} = 1; + let Inst{24-21} = 0b0010; + let Inst{20} = 0; // The S bit. + let Inst{15} = 0; +} let Constraints = "$src = $dst" in def t2MOVTi16 : T2I<(outs GPR:$dst), (ins GPR:$src, i32imm:$imm), IIC_iMOVi, "movt", "\t$dst, $imm", [(set GPR:$dst, - (or (and GPR:$src, 0xffff), lo16AllZero:$imm))]>; + (or (and GPR:$src, 0xffff), lo16AllZero:$imm))]> { + let Inst{31-27} = 0b11110; + let Inst{25} = 1; + let Inst{24-21} = 0b0110; + let Inst{20} = 0; // The S bit. + let Inst{15} = 0; +} def : T2Pat<(or GPR:$src, 0xffff0000), (t2MOVTi16 GPR:$src, 0xffff)>; @@ -710,12 +1070,14 @@ // Sign extenders -defm t2SXTB : T2I_unary_rrot<"sxtb", UnOpFrag<(sext_inreg node:$Src, i8)>>; -defm t2SXTH : T2I_unary_rrot<"sxth", UnOpFrag<(sext_inreg node:$Src, i16)>>; +defm t2SXTB : T2I_unary_rrot<0b100, "sxtb", + UnOpFrag<(sext_inreg node:$Src, i8)>>; +defm t2SXTH : T2I_unary_rrot<0b000, "sxth", + UnOpFrag<(sext_inreg node:$Src, i16)>>; -defm t2SXTAB : T2I_bin_rrot<"sxtab", +defm t2SXTAB : T2I_bin_rrot<0b100, "sxtab", BinOpFrag<(add node:$LHS, (sext_inreg node:$RHS, i8))>>; -defm t2SXTAH : T2I_bin_rrot<"sxtah", +defm t2SXTAH : T2I_bin_rrot<0b000, "sxtah", BinOpFrag<(add node:$LHS, (sext_inreg node:$RHS,i16))>>; // TODO: SXT(A){B|H}16 @@ -723,18 +1085,21 @@ // Zero extenders let AddedComplexity = 16 in { -defm t2UXTB : T2I_unary_rrot<"uxtb" , UnOpFrag<(and node:$Src, 0x000000FF)>>; -defm t2UXTH : T2I_unary_rrot<"uxth" , UnOpFrag<(and node:$Src, 0x0000FFFF)>>; -defm t2UXTB16 : T2I_unary_rrot<"uxtb16", UnOpFrag<(and node:$Src, 0x00FF00FF)>>; +defm t2UXTB : T2I_unary_rrot<0b101, "uxtb", + UnOpFrag<(and node:$Src, 0x000000FF)>>; +defm t2UXTH : T2I_unary_rrot<0b001, "uxth", + UnOpFrag<(and node:$Src, 0x0000FFFF)>>; +defm t2UXTB16 : T2I_unary_rrot<0b011, "uxtb16", + UnOpFrag<(and node:$Src, 0x00FF00FF)>>; def : T2Pat<(and (shl GPR:$Src, (i32 8)), 0xFF00FF), (t2UXTB16r_rot GPR:$Src, 24)>; def : T2Pat<(and (srl GPR:$Src, (i32 8)), 0xFF00FF), (t2UXTB16r_rot GPR:$Src, 8)>; -defm t2UXTAB : T2I_bin_rrot<"uxtab", +defm t2UXTAB : T2I_bin_rrot<0b101, "uxtab", BinOpFrag<(add node:$LHS, (and node:$RHS, 0x00FF))>>; -defm t2UXTAH : T2I_bin_rrot<"uxtah", +defm t2UXTAH : T2I_bin_rrot<0b001, "uxtah", BinOpFrag<(add node:$LHS, (and node:$RHS, 0xFFFF))>>; } @@ -742,19 +1107,27 @@ // Arithmetic Instructions. // -defm t2ADD : T2I_bin_ii12rs<"add", BinOpFrag<(add node:$LHS, node:$RHS)>, 1>; -defm t2SUB : T2I_bin_ii12rs<"sub", BinOpFrag<(sub node:$LHS, node:$RHS)>>; +defm t2ADD : T2I_bin_ii12rs<0b000, "add", + BinOpFrag<(add node:$LHS, node:$RHS)>, 1>; +defm t2SUB : T2I_bin_ii12rs<0b101, "sub", + BinOpFrag<(sub node:$LHS, node:$RHS)>>; // ADD and SUB with 's' bit set. No 12-bit immediate (T4) variants. -defm t2ADDS : T2I_bin_s_irs <"add", BinOpFrag<(addc node:$LHS, node:$RHS)>, 1>; -defm t2SUBS : T2I_bin_s_irs <"sub", BinOpFrag<(subc node:$LHS, node:$RHS)>>; - -defm t2ADC : T2I_adde_sube_irs<"adc",BinOpFrag<(adde node:$LHS, node:$RHS)>,1>; -defm t2SBC : T2I_adde_sube_irs<"sbc",BinOpFrag<(sube node:$LHS, node:$RHS)>>; +defm t2ADDS : T2I_bin_s_irs <0b1000, "add", + BinOpFrag<(addc node:$LHS, node:$RHS)>, 1>; +defm t2SUBS : T2I_bin_s_irs <0b1101, "sub", + BinOpFrag<(subc node:$LHS, node:$RHS)>>; + +defm t2ADC : T2I_adde_sube_irs<0b1010, "adc", + BinOpFrag<(adde node:$LHS, node:$RHS)>, 1>; +defm t2SBC : T2I_adde_sube_irs<0b1011, "sbc", + BinOpFrag<(sube node:$LHS, node:$RHS)>>; // RSB -defm t2RSB : T2I_rbin_is <"rsb", BinOpFrag<(sub node:$LHS, node:$RHS)>>; -defm t2RSBS : T2I_rbin_s_is <"rsb", BinOpFrag<(subc node:$LHS, node:$RHS)>>; +defm t2RSB : T2I_rbin_is <0b1110, "rsb", + BinOpFrag<(sub node:$LHS, node:$RHS)>>; +defm t2RSBS : T2I_rbin_s_is <0b1110, "rsb", + BinOpFrag<(subc node:$LHS, node:$RHS)>>; // (sub X, imm) gets canonicalized to (add X, -imm). Match this form. let AddedComplexity = 1 in @@ -770,54 +1143,103 @@ // Shift and rotate Instructions. // -defm t2LSL : T2I_sh_ir<"lsl", BinOpFrag<(shl node:$LHS, node:$RHS)>>; -defm t2LSR : T2I_sh_ir<"lsr", BinOpFrag<(srl node:$LHS, node:$RHS)>>; -defm t2ASR : T2I_sh_ir<"asr", BinOpFrag<(sra node:$LHS, node:$RHS)>>; -defm t2ROR : T2I_sh_ir<"ror", BinOpFrag<(rotr node:$LHS, node:$RHS)>>; +defm t2LSL : T2I_sh_ir<0b00, "lsl", BinOpFrag<(shl node:$LHS, node:$RHS)>>; +defm t2LSR : T2I_sh_ir<0b01, "lsr", BinOpFrag<(srl node:$LHS, node:$RHS)>>; +defm t2ASR : T2I_sh_ir<0b10, "asr", BinOpFrag<(sra node:$LHS, node:$RHS)>>; +defm t2ROR : T2I_sh_ir<0b11, "ror", BinOpFrag<(rotr node:$LHS, node:$RHS)>>; let Uses = [CPSR] in { def t2MOVrx : T2sI<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi, "rrx", "\t$dst, $src", - [(set GPR:$dst, (ARMrrx GPR:$src))]>; + [(set GPR:$dst, (ARMrrx GPR:$src))]> { + let Inst{31-27} = 0b11101; + let Inst{26-25} = 0b01; + let Inst{24-21} = 0b0010; + let Inst{20} = ?; // The S bit. + let Inst{19-16} = 0b1111; // Rn + let Inst{14-12} = 0b000; + let Inst{7-4} = 0b0011; +} } let Defs = [CPSR] in { def t2MOVsrl_flag : T2XI<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi, "lsrs.w\t$dst, $src, #1", - [(set GPR:$dst, (ARMsrl_flag GPR:$src))]>; + [(set GPR:$dst, (ARMsrl_flag GPR:$src))]> { + let Inst{31-27} = 0b11101; + let Inst{26-25} = 0b01; + let Inst{24-21} = 0b0010; + let Inst{20} = 1; // The S bit. + let Inst{19-16} = 0b1111; // Rn + let Inst{5-4} = 0b01; // Shift type. + // Shift amount = Inst{14-12:7-6} = 1. + let Inst{14-12} = 0b000; + let Inst{7-6} = 0b01; +} def t2MOVsra_flag : T2XI<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi, "asrs.w\t$dst, $src, #1", - [(set GPR:$dst, (ARMsra_flag GPR:$src))]>; + [(set GPR:$dst, (ARMsra_flag GPR:$src))]> { + let Inst{31-27} = 0b11101; + let Inst{26-25} = 0b01; + let Inst{24-21} = 0b0010; + let Inst{20} = 1; // The S bit. + let Inst{19-16} = 0b1111; // Rn + let Inst{5-4} = 0b10; // Shift type. + // Shift amount = Inst{14-12:7-6} = 1. + let Inst{14-12} = 0b000; + let Inst{7-6} = 0b01; +} } //===----------------------------------------------------------------------===// // Bitwise Instructions. // -defm t2AND : T2I_bin_w_irs<"and", BinOpFrag<(and node:$LHS, node:$RHS)>, 1>; -defm t2ORR : T2I_bin_w_irs<"orr", BinOpFrag<(or node:$LHS, node:$RHS)>, 1>; -defm t2EOR : T2I_bin_w_irs<"eor", BinOpFrag<(xor node:$LHS, node:$RHS)>, 1>; +defm t2AND : T2I_bin_w_irs<0b0000, "and", + BinOpFrag<(and node:$LHS, node:$RHS)>, 1>; +defm t2ORR : T2I_bin_w_irs<0b0010, "orr", + BinOpFrag<(or node:$LHS, node:$RHS)>, 1>; +defm t2EOR : T2I_bin_w_irs<0b0100, "eor", + BinOpFrag<(xor node:$LHS, node:$RHS)>, 1>; -defm t2BIC : T2I_bin_w_irs<"bic", BinOpFrag<(and node:$LHS, (not node:$RHS))>>; +defm t2BIC : T2I_bin_w_irs<0b0001, "bic", + BinOpFrag<(and node:$LHS, (not node:$RHS))>>; let Constraints = "$src = $dst" in def t2BFC : T2I<(outs GPR:$dst), (ins GPR:$src, bf_inv_mask_imm:$imm), IIC_iUNAsi, "bfc", "\t$dst, $imm", - [(set GPR:$dst, (and GPR:$src, bf_inv_mask_imm:$imm))]>; + [(set GPR:$dst, (and GPR:$src, bf_inv_mask_imm:$imm))]> { + let Inst{31-27} = 0b11110; + let Inst{25} = 1; + let Inst{24-20} = 0b10110; + let Inst{19-16} = 0b1111; // Rn + let Inst{15} = 0; +} def t2SBFX : T2I<(outs GPR:$dst), (ins GPR:$src, imm0_31:$lsb, imm0_31:$width), - IIC_iALUi, "sbfx", "\t$dst, $src, $lsb, $width", []>; + IIC_iALUi, "sbfx", "\t$dst, $src, $lsb, $width", []> { + let Inst{31-27} = 0b11110; + let Inst{25} = 1; + let Inst{24-20} = 0b10100; + let Inst{15} = 0; +} def t2UBFX : T2I<(outs GPR:$dst), (ins GPR:$src, imm0_31:$lsb, imm0_31:$width), - IIC_iALUi, "ubfx", "\t$dst, $src, $lsb, $width", []>; + IIC_iALUi, "ubfx", "\t$dst, $src, $lsb, $width", []> { + let Inst{31-27} = 0b11110; + let Inst{25} = 1; + let Inst{24-20} = 0b11100; + let Inst{15} = 0; +} // FIXME: A8.6.18 BFI - Bitfield insert (Encoding T1) -defm t2ORN : T2I_bin_irs<"orn", BinOpFrag<(or node:$LHS, (not node:$RHS))>>; +defm t2ORN : T2I_bin_irs<0b0011, "orn", BinOpFrag<(or node:$LHS, + (not node:$RHS))>>; // Prefer over of t2EORri ra, rb, -1 because mvn has 16-bit version let AddedComplexity = 1 in -defm t2MVN : T2I_un_irs <"mvn", UnOpFrag<(not node:$Src)>, 1, 1>; +defm t2MVN : T2I_un_irs <0b0011, "mvn", UnOpFrag<(not node:$Src)>, 1, 1>; def : T2Pat<(and GPR:$src, t2_so_imm_not:$imm), @@ -837,81 +1259,184 @@ let isCommutable = 1 in def t2MUL: T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMUL32, "mul", "\t$dst, $a, $b", - [(set GPR:$dst, (mul GPR:$a, GPR:$b))]>; + [(set GPR:$dst, (mul GPR:$a, GPR:$b))]> { + let Inst{31-27} = 0b11111; + let Inst{26-23} = 0b0110; + let Inst{22-20} = 0b000; + let Inst{15-12} = 0b1111; // Ra = 0b1111 (no accumulate) + let Inst{7-4} = 0b0000; // Multiply +} def t2MLA: T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMAC32, "mla", "\t$dst, $a, $b, $c", - [(set GPR:$dst, (add (mul GPR:$a, GPR:$b), GPR:$c))]>; + [(set GPR:$dst, (add (mul GPR:$a, GPR:$b), GPR:$c))]> { + let Inst{31-27} = 0b11111; + let Inst{26-23} = 0b0110; + let Inst{22-20} = 0b000; + let Inst{15-12} = {?, ?, ?, ?}; // Ra + let Inst{7-4} = 0b0000; // Multiply +} def t2MLS: T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMAC32, "mls", "\t$dst, $a, $b, $c", - [(set GPR:$dst, (sub GPR:$c, (mul GPR:$a, GPR:$b)))]>; + [(set GPR:$dst, (sub GPR:$c, (mul GPR:$a, GPR:$b)))]> { + let Inst{31-27} = 0b11111; + let Inst{26-23} = 0b0110; + let Inst{22-20} = 0b000; + let Inst{15-12} = {?, ?, ?, ?}; // Ra + let Inst{7-4} = 0b0001; // Multiply and Subtract +} // Extra precision multiplies with low / high results let neverHasSideEffects = 1 in { let isCommutable = 1 in { def t2SMULL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iMUL64, - "smull", "\t$ldst, $hdst, $a, $b", []>; + "smull", "\t$ldst, $hdst, $a, $b", []> { + let Inst{31-27} = 0b11111; + let Inst{26-23} = 0b0111; + let Inst{22-20} = 0b000; + let Inst{7-4} = 0b0000; +} def t2UMULL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iMUL64, - "umull", "\t$ldst, $hdst, $a, $b", []>; + "umull", "\t$ldst, $hdst, $a, $b", []> { + let Inst{31-27} = 0b11111; + let Inst{26-23} = 0b0111; + let Inst{22-20} = 0b010; + let Inst{7-4} = 0b0000; } +} // isCommutable // Multiply + accumulate def t2SMLAL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iMAC64, - "smlal", "\t$ldst, $hdst, $a, $b", []>; + "smlal", "\t$ldst, $hdst, $a, $b", []>{ + let Inst{31-27} = 0b11111; + let Inst{26-23} = 0b0111; + let Inst{22-20} = 0b100; + let Inst{7-4} = 0b0000; +} def t2UMLAL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iMAC64, - "umlal", "\t$ldst, $hdst, $a, $b", []>; + "umlal", "\t$ldst, $hdst, $a, $b", []>{ + let Inst{31-27} = 0b11111; + let Inst{26-23} = 0b0111; + let Inst{22-20} = 0b110; + let Inst{7-4} = 0b0000; +} def t2UMAAL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iMAC64, - "umaal", "\t$ldst, $hdst, $a, $b", []>; + "umaal", "\t$ldst, $hdst, $a, $b", []>{ + let Inst{31-27} = 0b11111; + let Inst{26-23} = 0b0111; + let Inst{22-20} = 0b110; + let Inst{7-4} = 0b0110; +} } // neverHasSideEffects // Most significant word multiply def t2SMMUL : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMUL32, "smmul", "\t$dst, $a, $b", - [(set GPR:$dst, (mulhs GPR:$a, GPR:$b))]>; + [(set GPR:$dst, (mulhs GPR:$a, GPR:$b))]> { + let Inst{31-27} = 0b11111; + let Inst{26-23} = 0b0110; + let Inst{22-20} = 0b101; + let Inst{15-12} = 0b1111; // Ra = 0b1111 (no accumulate) + let Inst{7-4} = 0b0000; // No Rounding (Inst{4} = 0) +} def t2SMMLA : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMAC32, "smmla", "\t$dst, $a, $b, $c", - [(set GPR:$dst, (add (mulhs GPR:$a, GPR:$b), GPR:$c))]>; + [(set GPR:$dst, (add (mulhs GPR:$a, GPR:$b), GPR:$c))]> { + let Inst{31-27} = 0b11111; + let Inst{26-23} = 0b0110; + let Inst{22-20} = 0b101; + let Inst{15-12} = {?, ?, ?, ?}; // Ra + let Inst{7-4} = 0b0000; // No Rounding (Inst{4} = 0) +} def t2SMMLS : T2I <(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMAC32, "smmls", "\t$dst, $a, $b, $c", - [(set GPR:$dst, (sub GPR:$c, (mulhs GPR:$a, GPR:$b)))]>; + [(set GPR:$dst, (sub GPR:$c, (mulhs GPR:$a, GPR:$b)))]> { + let Inst{31-27} = 0b11111; + let Inst{26-23} = 0b0110; + let Inst{22-20} = 0b110; + let Inst{15-12} = {?, ?, ?, ?}; // Ra + let Inst{7-4} = 0b0000; // No Rounding (Inst{4} = 0) +} multiclass T2I_smul { def BB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMUL32, !strconcat(opc, "bb"), "\t$dst, $a, $b", [(set GPR:$dst, (opnode (sext_inreg GPR:$a, i16), - (sext_inreg GPR:$b, i16)))]>; + (sext_inreg GPR:$b, i16)))]> { + let Inst{31-27} = 0b11111; + let Inst{26-23} = 0b0110; + let Inst{22-20} = 0b001; + let Inst{15-12} = 0b1111; // Ra = 0b1111 (no accumulate) + let Inst{7-6} = 0b00; + let Inst{5-4} = 0b00; + } def BT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMUL32, !strconcat(opc, "bt"), "\t$dst, $a, $b", [(set GPR:$dst, (opnode (sext_inreg GPR:$a, i16), - (sra GPR:$b, (i32 16))))]>; + (sra GPR:$b, (i32 16))))]> { + let Inst{31-27} = 0b11111; + let Inst{26-23} = 0b0110; + let Inst{22-20} = 0b001; + let Inst{15-12} = 0b1111; // Ra = 0b1111 (no accumulate) + let Inst{7-6} = 0b00; + let Inst{5-4} = 0b01; + } def TB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMUL32, !strconcat(opc, "tb"), "\t$dst, $a, $b", [(set GPR:$dst, (opnode (sra GPR:$a, (i32 16)), - (sext_inreg GPR:$b, i16)))]>; + (sext_inreg GPR:$b, i16)))]> { + let Inst{31-27} = 0b11111; + let Inst{26-23} = 0b0110; + let Inst{22-20} = 0b001; + let Inst{15-12} = 0b1111; // Ra = 0b1111 (no accumulate) + let Inst{7-6} = 0b00; + let Inst{5-4} = 0b10; + } def TT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMUL32, !strconcat(opc, "tt"), "\t$dst, $a, $b", [(set GPR:$dst, (opnode (sra GPR:$a, (i32 16)), - (sra GPR:$b, (i32 16))))]>; + (sra GPR:$b, (i32 16))))]> { + let Inst{31-27} = 0b11111; + let Inst{26-23} = 0b0110; + let Inst{22-20} = 0b001; + let Inst{15-12} = 0b1111; // Ra = 0b1111 (no accumulate) + let Inst{7-6} = 0b00; + let Inst{5-4} = 0b11; + } def WB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMUL16, !strconcat(opc, "wb"), "\t$dst, $a, $b", [(set GPR:$dst, (sra (opnode GPR:$a, - (sext_inreg GPR:$b, i16)), (i32 16)))]>; + (sext_inreg GPR:$b, i16)), (i32 16)))]> { + let Inst{31-27} = 0b11111; + let Inst{26-23} = 0b0110; + let Inst{22-20} = 0b011; + let Inst{15-12} = 0b1111; // Ra = 0b1111 (no accumulate) + let Inst{7-6} = 0b00; + let Inst{5-4} = 0b00; + } def WT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMUL16, !strconcat(opc, "wt"), "\t$dst, $a, $b", [(set GPR:$dst, (sra (opnode GPR:$a, - (sra GPR:$b, (i32 16))), (i32 16)))]>; + (sra GPR:$b, (i32 16))), (i32 16)))]> { + let Inst{31-27} = 0b11111; + let Inst{26-23} = 0b0110; + let Inst{22-20} = 0b011; + let Inst{15-12} = 0b1111; // Ra = 0b1111 (no accumulate) + let Inst{7-6} = 0b00; + let Inst{5-4} = 0b01; + } } @@ -920,32 +1445,74 @@ !strconcat(opc, "bb"), "\t$dst, $a, $b, $acc", [(set GPR:$dst, (add GPR:$acc, (opnode (sext_inreg GPR:$a, i16), - (sext_inreg GPR:$b, i16))))]>; + (sext_inreg GPR:$b, i16))))]> { + let Inst{31-27} = 0b11111; + let Inst{26-23} = 0b0110; + let Inst{22-20} = 0b001; + let Inst{15-12} = {?, ?, ?, ?}; // Ra + let Inst{7-6} = 0b00; + let Inst{5-4} = 0b00; + } def BT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMAC16, !strconcat(opc, "bt"), "\t$dst, $a, $b, $acc", [(set GPR:$dst, (add GPR:$acc, (opnode (sext_inreg GPR:$a, i16), - (sra GPR:$b, (i32 16)))))]>; + (sra GPR:$b, (i32 16)))))]> { + let Inst{31-27} = 0b11111; + let Inst{26-23} = 0b0110; + let Inst{22-20} = 0b001; + let Inst{15-12} = {?, ?, ?, ?}; // Ra + let Inst{7-6} = 0b00; + let Inst{5-4} = 0b01; + } def TB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMAC16, !strconcat(opc, "tb"), "\t$dst, $a, $b, $acc", [(set GPR:$dst, (add GPR:$acc, (opnode (sra GPR:$a, (i32 16)), - (sext_inreg GPR:$b, i16))))]>; + (sext_inreg GPR:$b, i16))))]> { + let Inst{31-27} = 0b11111; + let Inst{26-23} = 0b0110; + let Inst{22-20} = 0b001; + let Inst{15-12} = {?, ?, ?, ?}; // Ra + let Inst{7-6} = 0b00; + let Inst{5-4} = 0b10; + } def TT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMAC16, !strconcat(opc, "tt"), "\t$dst, $a, $b, $acc", [(set GPR:$dst, (add GPR:$acc, (opnode (sra GPR:$a, (i32 16)), - (sra GPR:$b, (i32 16)))))]>; + (sra GPR:$b, (i32 16)))))]> { + let Inst{31-27} = 0b11111; + let Inst{26-23} = 0b0110; + let Inst{22-20} = 0b001; + let Inst{15-12} = {?, ?, ?, ?}; // Ra + let Inst{7-6} = 0b00; + let Inst{5-4} = 0b11; + } def WB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMAC16, !strconcat(opc, "wb"), "\t$dst, $a, $b, $acc", [(set GPR:$dst, (add GPR:$acc, (sra (opnode GPR:$a, - (sext_inreg GPR:$b, i16)), (i32 16))))]>; + (sext_inreg GPR:$b, i16)), (i32 16))))]> { + let Inst{31-27} = 0b11111; + let Inst{26-23} = 0b0110; + let Inst{22-20} = 0b011; + let Inst{15-12} = {?, ?, ?, ?}; // Ra + let Inst{7-6} = 0b00; + let Inst{5-4} = 0b00; + } def WT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMAC16, !strconcat(opc, "wt"), "\t$dst, $a, $b, $acc", [(set GPR:$dst, (add GPR:$acc, (sra (opnode GPR:$a, - (sra GPR:$b, (i32 16))), (i32 16))))]>; + (sra GPR:$b, (i32 16))), (i32 16))))]> { + let Inst{31-27} = 0b11111; + let Inst{26-23} = 0b0110; + let Inst{22-20} = 0b011; + let Inst{15-12} = {?, ?, ?, ?}; // Ra + let Inst{7-6} = 0b00; + let Inst{5-4} = 0b01; + } } defm t2SMUL : T2I_smul<"smul", BinOpFrag<(mul node:$LHS, node:$RHS)>>; @@ -959,24 +1526,33 @@ // Misc. Arithmetic Instructions. // -def t2CLZ : T2I<(outs GPR:$dst), (ins GPR:$src), IIC_iUNAr, - "clz", "\t$dst, $src", - [(set GPR:$dst, (ctlz GPR:$src))]>; - -def t2REV : T2I<(outs GPR:$dst), (ins GPR:$src), IIC_iUNAr, - "rev", ".w\t$dst, $src", - [(set GPR:$dst, (bswap GPR:$src))]>; +class T2I_misc op1, bits<2> op2, dag oops, dag iops, InstrItinClass itin, + string opc, string asm, list pattern> + : T2I { + let Inst{31-27} = 0b11111; + let Inst{26-22} = 0b01010; + let Inst{21-20} = op1; + let Inst{15-12} = 0b1111; + let Inst{7-6} = 0b10; + let Inst{5-4} = op2; +} + +def t2CLZ : T2I_misc<0b11, 0b00, (outs GPR:$dst), (ins GPR:$src), IIC_iUNAr, + "clz", "\t$dst, $src", [(set GPR:$dst, (ctlz GPR:$src))]>; -def t2REV16 : T2I<(outs GPR:$dst), (ins GPR:$src), IIC_iUNAr, - "rev16", ".w\t$dst, $src", +def t2REV : T2I_misc<0b01, 0b00, (outs GPR:$dst), (ins GPR:$src), IIC_iUNAr, + "rev", ".w\t$dst, $src", [(set GPR:$dst, (bswap GPR:$src))]>; + +def t2REV16 : T2I_misc<0b01, 0b01, (outs GPR:$dst), (ins GPR:$src), IIC_iUNAr, + "rev16", ".w\t$dst, $src", [(set GPR:$dst, (or (and (srl GPR:$src, (i32 8)), 0xFF), (or (and (shl GPR:$src, (i32 8)), 0xFF00), (or (and (srl GPR:$src, (i32 8)), 0xFF0000), (and (shl GPR:$src, (i32 8)), 0xFF000000)))))]>; -def t2REVSH : T2I<(outs GPR:$dst), (ins GPR:$src), IIC_iUNAr, - "revsh", ".w\t$dst, $src", +def t2REVSH : T2I_misc<0b01, 0b11, (outs GPR:$dst), (ins GPR:$src), IIC_iUNAr, + "revsh", ".w\t$dst, $src", [(set GPR:$dst, (sext_inreg (or (srl (and GPR:$src, 0xFF00), (i32 8)), @@ -986,7 +1562,13 @@ IIC_iALUsi, "pkhbt", "\t$dst, $src1, $src2, LSL $shamt", [(set GPR:$dst, (or (and GPR:$src1, 0xFFFF), (and (shl GPR:$src2, (i32 imm:$shamt)), - 0xFFFF0000)))]>; + 0xFFFF0000)))]> { + let Inst{31-27} = 0b11101; + let Inst{26-25} = 0b01; + let Inst{24-20} = 0b01100; + let Inst{5} = 0; // BT form + let Inst{4} = 0; +} // Alternate cases for PKHBT where identities eliminate some nodes. def : T2Pat<(or (and GPR:$src1, 0xFFFF), (and GPR:$src2, 0xFFFF0000)), @@ -998,7 +1580,13 @@ IIC_iALUsi, "pkhtb", "\t$dst, $src1, $src2, ASR $shamt", [(set GPR:$dst, (or (and GPR:$src1, 0xFFFF0000), (and (sra GPR:$src2, imm16_31:$shamt), - 0xFFFF)))]>; + 0xFFFF)))]> { + let Inst{31-27} = 0b11101; + let Inst{26-25} = 0b01; + let Inst{24-20} = 0b01100; + let Inst{5} = 1; // TB form + let Inst{4} = 0; +} // Alternate cases for PKHTB where identities eliminate some nodes. Note that // a shift amount of 0 is *not legal* here, it is PKHBT instead. @@ -1012,15 +1600,15 @@ // Comparison Instructions... // -defm t2CMP : T2I_cmp_is<"cmp", - BinOpFrag<(ARMcmp node:$LHS, node:$RHS)>>; -defm t2CMPz : T2I_cmp_is<"cmp", - BinOpFrag<(ARMcmpZ node:$LHS, node:$RHS)>>; - -defm t2CMN : T2I_cmp_is<"cmn", - BinOpFrag<(ARMcmp node:$LHS,(ineg node:$RHS))>>; -defm t2CMNz : T2I_cmp_is<"cmn", - BinOpFrag<(ARMcmpZ node:$LHS,(ineg node:$RHS))>>; +defm t2CMP : T2I_cmp_irs<0b1101, "cmp", + BinOpFrag<(ARMcmp node:$LHS, node:$RHS)>>; +defm t2CMPz : T2I_cmp_irs<0b1101, "cmp", + BinOpFrag<(ARMcmpZ node:$LHS, node:$RHS)>>; + +defm t2CMN : T2I_cmp_irs<0b1000, "cmn", + BinOpFrag<(ARMcmp node:$LHS,(ineg node:$RHS))>>; +defm t2CMNz : T2I_cmp_irs<0b1000, "cmn", + BinOpFrag<(ARMcmpZ node:$LHS,(ineg node:$RHS))>>; def : T2Pat<(ARMcmp GPR:$src, t2_so_imm_neg:$imm), (t2CMNri GPR:$src, t2_so_imm_neg:$imm)>; @@ -1028,10 +1616,10 @@ def : T2Pat<(ARMcmpZ GPR:$src, t2_so_imm_neg:$imm), (t2CMNri GPR:$src, t2_so_imm_neg:$imm)>; -defm t2TST : T2I_cmp_is<"tst", - BinOpFrag<(ARMcmpZ (and node:$LHS, node:$RHS), 0)>>; -defm t2TEQ : T2I_cmp_is<"teq", - BinOpFrag<(ARMcmpZ (xor node:$LHS, node:$RHS), 0)>>; +defm t2TST : T2I_cmp_irs<0b0000, "tst", + BinOpFrag<(ARMcmpZ (and node:$LHS, node:$RHS), 0)>>; +defm t2TEQ : T2I_cmp_irs<0b0100, "teq", + BinOpFrag<(ARMcmpZ (xor node:$LHS, node:$RHS), 0)>>; // A8.6.27 CBNZ, CBZ - Compare and branch on (non)zero. // Short range conditional branch. Looks awesome for loops. Need to figure @@ -1044,25 +1632,54 @@ def t2MOVCCr : T2I<(outs GPR:$dst), (ins GPR:$false, GPR:$true), IIC_iCMOVr, "mov", ".w\t$dst, $true", [/*(set GPR:$dst, (ARMcmov GPR:$false, GPR:$true, imm:$cc, CCR:$ccr))*/]>, - RegConstraint<"$false = $dst">; + RegConstraint<"$false = $dst"> { + let Inst{31-27} = 0b11101; + let Inst{26-25} = 0b01; + let Inst{24-21} = 0b0010; + let Inst{20} = 0; // The S bit. + let Inst{19-16} = 0b1111; // Rn + let Inst{14-12} = 0b000; + let Inst{7-4} = 0b0000; +} def t2MOVCCi : T2I<(outs GPR:$dst), (ins GPR:$false, t2_so_imm:$true), IIC_iCMOVi, "mov", ".w\t$dst, $true", [/*(set GPR:$dst, (ARMcmov GPR:$false, t2_so_imm:$true, imm:$cc, CCR:$ccr))*/]>, - RegConstraint<"$false = $dst">; - -def t2MOVCClsl : T2I<(outs GPR:$dst), (ins GPR:$false, GPR:$true, i32imm:$rhs), - IIC_iCMOVsi, "lsl", ".w\t$dst, $true, $rhs", []>, - RegConstraint<"$false = $dst">; -def t2MOVCClsr : T2I<(outs GPR:$dst), (ins GPR:$false, GPR:$true, i32imm:$rhs), - IIC_iCMOVsi, "lsr", ".w\t$dst, $true, $rhs", []>, - RegConstraint<"$false = $dst">; -def t2MOVCCasr : T2I<(outs GPR:$dst), (ins GPR:$false, GPR:$true, i32imm:$rhs), - IIC_iCMOVsi, "asr", ".w\t$dst, $true, $rhs", []>, - RegConstraint<"$false = $dst">; -def t2MOVCCror : T2I<(outs GPR:$dst), (ins GPR:$false, GPR:$true, i32imm:$rhs), - IIC_iCMOVsi, "ror", ".w\t$dst, $true, $rhs", []>, - RegConstraint<"$false = $dst">; + RegConstraint<"$false = $dst"> { + let Inst{31-27} = 0b11110; + let Inst{25} = 0; + let Inst{24-21} = 0b0010; + let Inst{20} = 0; // The S bit. + let Inst{19-16} = 0b1111; // Rn + let Inst{15} = 0; +} + +class T2I_movcc_sh opcod, dag oops, dag iops, InstrItinClass itin, + string opc, string asm, list pattern> + : T2I { + let Inst{31-27} = 0b11101; + let Inst{26-25} = 0b01; + let Inst{24-21} = 0b0010; + let Inst{20} = 0; // The S bit. + let Inst{19-16} = 0b1111; // Rn + let Inst{5-4} = opcod; // Shift type. +} +def t2MOVCClsl : T2I_movcc_sh<0b00, (outs GPR:$dst), + (ins GPR:$false, GPR:$true, i32imm:$rhs), + IIC_iCMOVsi, "lsl", ".w\t$dst, $true, $rhs", []>, + RegConstraint<"$false = $dst">; +def t2MOVCClsr : T2I_movcc_sh<0b01, (outs GPR:$dst), + (ins GPR:$false, GPR:$true, i32imm:$rhs), + IIC_iCMOVsi, "lsr", ".w\t$dst, $true, $rhs", []>, + RegConstraint<"$false = $dst">; +def t2MOVCCasr : T2I_movcc_sh<0b10, (outs GPR:$dst), + (ins GPR:$false, GPR:$true, i32imm:$rhs), + IIC_iCMOVsi, "asr", ".w\t$dst, $true, $rhs", []>, + RegConstraint<"$false = $dst">; +def t2MOVCCror : T2I_movcc_sh<0b11, (outs GPR:$dst), + (ins GPR:$false, GPR:$true, i32imm:$rhs), + IIC_iCMOVsi, "ror", ".w\t$dst, $true, $rhs", []>, + RegConstraint<"$false = $dst">; //===----------------------------------------------------------------------===// // Atomic operations intrinsics @@ -1075,7 +1692,9 @@ "dmb", "", [(ARMMemBarrierV7)]>, Requires<[IsThumb2]> { + let Inst{31-4} = 0xF3BF8F5; // FIXME: add support for options other than a full system DMB + let Inst{3-0} = 0b1111; } def t2Int_SyncBarrierV7 : AInoP<(outs), (ins), @@ -1083,47 +1702,76 @@ "dsb", "", [(ARMSyncBarrierV7)]>, Requires<[IsThumb2]> { + let Inst{31-4} = 0xF3BF8F4; // FIXME: add support for options other than a full system DSB + let Inst{3-0} = 0b1111; +} } + +class T2I_ldrex opcod, dag oops, dag iops, AddrMode am, SizeFlagVal sz, + InstrItinClass itin, string opc, string asm, string cstr, + list pattern, bits<4> rt2 = 0b1111> + : Thumb2I { + let Inst{31-27} = 0b11101; + let Inst{26-20} = 0b0001101; + let Inst{11-8} = rt2; + let Inst{7-6} = 0b01; + let Inst{5-4} = opcod; + let Inst{3-0} = 0b1111; +} +class T2I_strex opcod, dag oops, dag iops, AddrMode am, SizeFlagVal sz, + InstrItinClass itin, string opc, string asm, string cstr, + list pattern, bits<4> rt2 = 0b1111> + : Thumb2I { + let Inst{31-27} = 0b11101; + let Inst{26-20} = 0b0001100; + let Inst{11-8} = rt2; + let Inst{7-6} = 0b01; + let Inst{5-4} = opcod; } let mayLoad = 1 in { -def t2LDREXB : Thumb2I<(outs GPR:$dest), (ins GPR:$ptr), AddrModeNone, - Size4Bytes, NoItinerary, - "ldrexb", "\t$dest, [$ptr]", "", - []>; -def t2LDREXH : Thumb2I<(outs GPR:$dest), (ins GPR:$ptr), AddrModeNone, - Size4Bytes, NoItinerary, - "ldrexh", "\t$dest, [$ptr]", "", - []>; +def t2LDREXB : T2I_ldrex<0b00, (outs GPR:$dest), (ins GPR:$ptr), AddrModeNone, + Size4Bytes, NoItinerary, "ldrexb", "\t$dest, [$ptr]", + "", []>; +def t2LDREXH : T2I_ldrex<0b01, (outs GPR:$dest), (ins GPR:$ptr), AddrModeNone, + Size4Bytes, NoItinerary, "ldrexh", "\t$dest, [$ptr]", + "", []>; def t2LDREX : Thumb2I<(outs GPR:$dest), (ins GPR:$ptr), AddrModeNone, - Size4Bytes, NoItinerary, - "ldrex", "\t$dest, [$ptr]", "", - []>; -def t2LDREXD : Thumb2I<(outs GPR:$dest, GPR:$dest2), (ins GPR:$ptr), - AddrModeNone, Size4Bytes, NoItinerary, - "ldrexd", "\t$dest, $dest2, [$ptr]", "", - []>; + Size4Bytes, NoItinerary, + "ldrex", "\t$dest, [$ptr]", "", + []> { + let Inst{31-27} = 0b11101; + let Inst{26-20} = 0b0000101; + let Inst{11-8} = 0b1111; + let Inst{7-0} = 0b00000000; // imm8 = 0 +} +def t2LDREXD : T2I_ldrex<0b11, (outs GPR:$dest, GPR:$dest2), (ins GPR:$ptr), + AddrModeNone, Size4Bytes, NoItinerary, + "ldrexd", "\t$dest, $dest2, [$ptr]", "", + [], {?, ?, ?, ?}>; } let mayStore = 1 in { -def t2STREXB : Thumb2I<(outs GPR:$success), (ins GPR:$src, GPR:$ptr), - AddrModeNone, Size4Bytes, NoItinerary, - "strexb", "\t$success, $src, [$ptr]", "", - []>; -def t2STREXH : Thumb2I<(outs GPR:$success), (ins GPR:$src, GPR:$ptr), - AddrModeNone, Size4Bytes, NoItinerary, - "strexh", "\t$success, $src, [$ptr]", "", - []>; +def t2STREXB : T2I_strex<0b00, (outs GPR:$success), (ins GPR:$src, GPR:$ptr), + AddrModeNone, Size4Bytes, NoItinerary, + "strexb", "\t$success, $src, [$ptr]", "", []>; +def t2STREXH : T2I_strex<0b01, (outs GPR:$success), (ins GPR:$src, GPR:$ptr), + AddrModeNone, Size4Bytes, NoItinerary, + "strexh", "\t$success, $src, [$ptr]", "", []>; def t2STREX : Thumb2I<(outs GPR:$success), (ins GPR:$src, GPR:$ptr), - AddrModeNone, Size4Bytes, NoItinerary, - "strex", "\t$success, $src, [$ptr]", "", - []>; -def t2STREXD : Thumb2I<(outs GPR:$success), - (ins GPR:$src, GPR:$src2, GPR:$ptr), - AddrModeNone, Size4Bytes, NoItinerary, - "strexd", "\t$success, $src, $src2, [$ptr]", "", - []>; + AddrModeNone, Size4Bytes, NoItinerary, + "strex", "\t$success, $src, [$ptr]", "", + []> { + let Inst{31-27} = 0b11101; + let Inst{26-20} = 0b0000100; + let Inst{7-0} = 0b00000000; // imm8 = 0 +} +def t2STREXD : T2I_strex<0b11, (outs GPR:$success), + (ins GPR:$src, GPR:$src2, GPR:$ptr), + AddrModeNone, Size4Bytes, NoItinerary, + "strexd", "\t$success, $src, $src2, [$ptr]", "", [], + {?, ?, ?, ?}>; } //===----------------------------------------------------------------------===// @@ -1135,7 +1783,11 @@ Defs = [R0, R12, LR, CPSR] in { def t2TPsoft : T2XI<(outs), (ins), IIC_Br, "bl\t__aeabi_read_tp", - [(set R0, ARMthread_pointer)]>; + [(set R0, ARMthread_pointer)]> { + let Inst{31-27} = 0b11110; + let Inst{15-14} = 0b11; + let Inst{12} = 1; + } } //===----------------------------------------------------------------------===// @@ -1183,31 +1835,61 @@ def t2LDM_RET : T2XI<(outs), (ins addrmode4:$addr, pred:$p, reglist:$wb, variable_ops), IIC_Br, "ldm${addr:submode}${p}${addr:wide}\t$addr, $wb", - []>; + []> { + let Inst{31-27} = 0b11101; + let Inst{26-25} = 0b00; + let Inst{24-23} = {?, ?}; // IA: '01', DB: '10' + let Inst{22} = 0; + let Inst{21} = ?; // The W bit. + let Inst{20} = 1; // Load +} let isBranch = 1, isTerminator = 1, isBarrier = 1 in { let isPredicable = 1 in def t2B : T2XI<(outs), (ins brtarget:$target), IIC_Br, "b.w\t$target", - [(br bb:$target)]>; + [(br bb:$target)]> { + let Inst{31-27} = 0b11110; + let Inst{15-14} = 0b10; + let Inst{12} = 1; +} let isNotDuplicable = 1, isIndirectBranch = 1 in { def t2BR_JT : T2JTI<(outs), (ins GPR:$target, GPR:$index, jt2block_operand:$jt, i32imm:$id), IIC_Br, "mov\tpc, $target\n$jt", - [(ARMbr2jt GPR:$target, GPR:$index, tjumptable:$jt, imm:$id)]>; + [(ARMbr2jt GPR:$target, GPR:$index, tjumptable:$jt, imm:$id)]> { + let Inst{31-27} = 0b11101; + let Inst{26-20} = 0b0100100; + let Inst{19-16} = 0b1111; + let Inst{14-12} = 0b000; + let Inst{11-8} = 0b1111; // Rd = pc + let Inst{7-4} = 0b0000; +} // FIXME: Add a non-pc based case that can be predicated. def t2TBB : T2JTI<(outs), (ins tb_addrmode:$index, jt2block_operand:$jt, i32imm:$id), - IIC_Br, "tbb\t$index\n$jt", []>; + IIC_Br, "tbb\t$index\n$jt", []> { + let Inst{31-27} = 0b11101; + let Inst{26-20} = 0b0001101; + let Inst{19-16} = 0b1111; // Rn = pc (table follows this instruction) + let Inst{15-8} = 0b11110000; + let Inst{7-4} = 0b0000; // B form +} def t2TBH : T2JTI<(outs), (ins tb_addrmode:$index, jt2block_operand:$jt, i32imm:$id), - IIC_Br, "tbh\t$index\n$jt", []>; + IIC_Br, "tbh\t$index\n$jt", []> { + let Inst{31-27} = 0b11101; + let Inst{26-20} = 0b0001101; + let Inst{19-16} = 0b1111; // Rn = pc (table follows this instruction) + let Inst{15-8} = 0b11110000; + let Inst{7-4} = 0b0001; // H form +} } // isNotDuplicable, isIndirectBranch } // isBranch, isTerminator, isBarrier @@ -1217,13 +1899,20 @@ let isBranch = 1, isTerminator = 1 in def t2Bcc : T2I<(outs), (ins brtarget:$target), IIC_Br, "b", ".w\t$target", - [/*(ARMbrcond bb:$target, imm:$cc)*/]>; + [/*(ARMbrcond bb:$target, imm:$cc)*/]> { + let Inst{31-27} = 0b11110; + let Inst{15-14} = 0b10; + let Inst{12} = 0; +} // IT block def t2IT : Thumb2XI<(outs), (ins it_pred:$cc, it_mask:$mask), AddrModeNone, Size2Bytes, IIC_iALUx, - "it$mask\t$cc", "", []>; + "it$mask\t$cc", "", []> { + // 16-bit instruction. + let Inst{15-8} = 0b10111111; +} //===----------------------------------------------------------------------===// // Non-Instruction Patterns From gohman at apple.com Tue Dec 15 12:30:57 2009 From: gohman at apple.com (Dan Gohman) Date: Tue, 15 Dec 2009 10:30:57 -0800 Subject: [llvm-commits] [llvm] r91381 - in /llvm/trunk: lib/Target/X86/X86ISelLowering.cpp lib/Target/X86/X86ISelLowering.h lib/Target/X86/X86Instr64bit.td lib/Target/X86/X86InstrInfo.td test/CodeGen/X86/setcc.ll In-Reply-To: <200912150053.nBF0rhfK025969@zion.cs.uiuc.edu> References: <200912150053.nBF0rhfK025969@zion.cs.uiuc.edu> Message-ID: On Dec 14, 2009, at 4:53 PM, Evan Cheng wrote: > > ============================================================================== > --- llvm/trunk/lib/Target/X86/X86Instr64bit.td (original) > +++ llvm/trunk/lib/Target/X86/X86Instr64bit.td Mon Dec 14 18:53:42 2009 > @@ -1333,6 +1333,15 @@ > X86_COND_NO, EFLAGS))]>, TB; > } // isTwoAddress > > +// Use sbb to materialize carry flag into a GPR. > +let Defs = [EFLAGS], Uses = [EFLAGS], isCodeGenOnly = 1 in > +def SETB_C64r : RI<0x19, MRMDestReg, (outs GR64:$dst), (ins), > + "sbb{q}\t$dst, $dst", > + [(set GR64:$dst, (zext (X86setcc_c X86_COND_B, EFLAGS)))]>; An sbbq will set all 64 bits. Why does the pattern here have a zext in it? Dan From gohman at apple.com Tue Dec 15 12:33:22 2009 From: gohman at apple.com (Dan Gohman) Date: Tue, 15 Dec 2009 10:33:22 -0800 Subject: [llvm-commits] [llvm] r91381 - in /llvm/trunk: lib/Target/X86/X86ISelLowering.cpp lib/Target/X86/X86ISelLowering.h lib/Target/X86/X86Instr64bit.td lib/Target/X86/X86InstrInfo.td test/CodeGen/X86/setcc.ll In-Reply-To: <200912150053.nBF0rhfK025969@zion.cs.uiuc.edu> References: <200912150053.nBF0rhfK025969@zion.cs.uiuc.edu> Message-ID: On Dec 14, 2009, at 4:53 PM, Evan Cheng wrote: > > --- llvm/trunk/lib/Target/X86/X86Instr64bit.td (original) > +++ llvm/trunk/lib/Target/X86/X86Instr64bit.td Mon Dec 14 18:53:42 2009 > @@ -1333,6 +1333,15 @@ > X86_COND_NO, EFLAGS))]>, TB; > } // isTwoAddress > > +// Use sbb to materialize carry flag into a GPR. > +let Defs = [EFLAGS], Uses = [EFLAGS], isCodeGenOnly = 1 in > +def SETB_C64r : RI<0x19, MRMDestReg, (outs GR64:$dst), (ins), > + "sbb{q}\t$dst, $dst", > + [(set GR64:$dst, (zext (X86setcc_c X86_COND_B, EFLAGS)))]>; An sbbq will set all 64 bits. Why does the pattern here have a zext in it? Dan From rjmccall at apple.com Tue Dec 15 12:34:45 2009 From: rjmccall at apple.com (John McCall) Date: Tue, 15 Dec 2009 10:34:45 -0800 Subject: [llvm-commits] [llvm] r91397 - /llvm/trunk/lib/Analysis/ProfileInfo.cpp In-Reply-To: <9367CAC4-5D67-4A50-A4F2-C4BE12F88A4C@apple.com> References: <200912150235.nBF2ZPTE029607@zion.cs.uiuc.edu> <9367CAC4-5D67-4A50-A4F2-C4BE12F88A4C@apple.com> Message-ID: <7585234D-3D83-4288-B718-8B8092FD6C7E@apple.com> On Dec 14, 2009, at 9:46 PM, Chris Lattner wrote: > > On Dec 14, 2009, at 6:35 PM, John McCall wrote: > >> Author: rjmccall >> Date: Mon Dec 14 20:35:24 2009 >> New Revision: 91397 >> >> URL: http://llvm.org/viewvc/llvm-project?rev=91397&view=rev >> Log: >> You can't use typedefs to declare template member specializations, and >> clang enforces it. > > Does clang produce a good diagnostic (with a fixit hint) for this error? We can't do the fixit hint yet because we don't track locations well enough in nested name specifiers. This also means the caret is not necessarily helpful. The diagnostic is error: cannot use typedef 'ProfileInfo' (a.k.a. 'ProfileInfoT< llvm:: Function,llvm::BasicBlock>') in scope specifier for out-of-line declaration which conveys the idea well enough, but is not exactly my finest diagnostic ever. John. From evan.cheng at apple.com Tue Dec 15 12:36:24 2009 From: evan.cheng at apple.com (Evan Cheng) Date: Tue, 15 Dec 2009 10:36:24 -0800 Subject: [llvm-commits] [llvm] r91381 - in /llvm/trunk: lib/Target/X86/X86ISelLowering.cpp lib/Target/X86/X86ISelLowering.h lib/Target/X86/X86Instr64bit.td lib/Target/X86/X86InstrInfo.td test/CodeGen/X86/setcc.ll In-Reply-To: References: <200912150053.nBF0rhfK025969@zion.cs.uiuc.edu> Message-ID: <0838237D-D73A-4A62-B10D-A86012B247BD@apple.com> On Dec 15, 2009, at 10:30 AM, Dan Gohman wrote: > > On Dec 14, 2009, at 4:53 PM, Evan Cheng wrote: >> >> ============================================================================== >> --- llvm/trunk/lib/Target/X86/X86Instr64bit.td (original) >> +++ llvm/trunk/lib/Target/X86/X86Instr64bit.td Mon Dec 14 18:53:42 2009 >> @@ -1333,6 +1333,15 @@ >> X86_COND_NO, EFLAGS))]>, TB; >> } // isTwoAddress >> >> +// Use sbb to materialize carry flag into a GPR. >> +let Defs = [EFLAGS], Uses = [EFLAGS], isCodeGenOnly = 1 in >> +def SETB_C64r : RI<0x19, MRMDestReg, (outs GR64:$dst), (ins), >> + "sbb{q}\t$dst, $dst", >> + [(set GR64:$dst, (zext (X86setcc_c X86_COND_B, EFLAGS)))]>; > > An sbbq will set all 64 bits. Why does the pattern here have a zext in it? X86setcc_c like X86setcc returns a i8 value. Yes, the semantics of is all zero or all one's. That's why x86isellowering will and the result with 1. Evan > > Dan > From clattner at apple.com Tue Dec 15 12:36:54 2009 From: clattner at apple.com (Chris Lattner) Date: Tue, 15 Dec 2009 10:36:54 -0800 Subject: [llvm-commits] [llvm] r91397 - /llvm/trunk/lib/Analysis/ProfileInfo.cpp In-Reply-To: <7585234D-3D83-4288-B718-8B8092FD6C7E@apple.com> References: <200912150235.nBF2ZPTE029607@zion.cs.uiuc.edu> <9367CAC4-5D67-4A50-A4F2-C4BE12F88A4C@apple.com> <7585234D-3D83-4288-B718-8B8092FD6C7E@apple.com> Message-ID: <9B8F37B7-70AB-4E82-9E5A-ED45ADD64979@apple.com> On Dec 15, 2009, at 10:34 AM, John McCall wrote: >>> >>> URL: http://llvm.org/viewvc/llvm-project?rev=91397&view=rev >>> Log: >>> You can't use typedefs to declare template member specializations, >>> and >>> clang enforces it. >> >> Does clang produce a good diagnostic (with a fixit hint) for this >> error? > > We can't do the fixit hint yet because we don't track locations well > enough in nested name specifiers. This also means the caret is not > necessarily helpful. > > The diagnostic is > error: cannot use typedef 'ProfileInfo' (a.k.a. 'ProfileInfoT< > llvm:: Function,llvm::BasicBlock>') in scope specifier for out-of- > line declaration > which conveys the idea well enough, but is not exactly my finest > diagnostic ever. Ok, please file a bugzilla about this. This is something that new clang++ users are likely to hit if they've been using GCC, so it would be very nice to give a really pretty diagnostic to explain what is wrong. Thanks John, -Chris From gohman at apple.com Tue Dec 15 12:39:18 2009 From: gohman at apple.com (Dan Gohman) Date: Tue, 15 Dec 2009 10:39:18 -0800 Subject: [llvm-commits] [llvm] r91381 - in /llvm/trunk: lib/Target/X86/X86ISelLowering.cpp lib/Target/X86/X86ISelLowering.h lib/Target/X86/X86Instr64bit.td lib/Target/X86/X86InstrInfo.td test/CodeGen/X86/setcc.ll In-Reply-To: <0838237D-D73A-4A62-B10D-A86012B247BD@apple.com> References: <200912150053.nBF0rhfK025969@zion.cs.uiuc.edu> <0838237D-D73A-4A62-B10D-A86012B247BD@apple.com> Message-ID: <90B7DF90-ACF4-499E-BB73-69AEFDAB14FC@apple.com> On Dec 15, 2009, at 10:36 AM, Evan Cheng wrote: > > On Dec 15, 2009, at 10:30 AM, Dan Gohman wrote: > >> >> On Dec 14, 2009, at 4:53 PM, Evan Cheng wrote: >>> >>> ============================================================================== >>> --- llvm/trunk/lib/Target/X86/X86Instr64bit.td (original) >>> +++ llvm/trunk/lib/Target/X86/X86Instr64bit.td Mon Dec 14 18:53:42 2009 >>> @@ -1333,6 +1333,15 @@ >>> X86_COND_NO, EFLAGS))]>, TB; >>> } // isTwoAddress >>> >>> +// Use sbb to materialize carry flag into a GPR. >>> +let Defs = [EFLAGS], Uses = [EFLAGS], isCodeGenOnly = 1 in >>> +def SETB_C64r : RI<0x19, MRMDestReg, (outs GR64:$dst), (ins), >>> + "sbb{q}\t$dst, $dst", >>> + [(set GR64:$dst, (zext (X86setcc_c X86_COND_B, EFLAGS)))]>; >> >> An sbbq will set all 64 bits. Why does the pattern here have a zext in it? > > X86setcc_c like X86setcc returns a i8 value. Yes, the semantics of is all zero or all one's. That's why x86isellowering will and the result with 1. Whatever X86setcc_c is, the current pattern doesn't describe the instruction. The pattern says the instruction will always set the most significant bits of the output to zero, and that's not what the instruction really does. Dan From dalej at apple.com Tue Dec 15 12:43:49 2009 From: dalej at apple.com (Dale Johannesen) Date: Tue, 15 Dec 2009 10:43:49 -0800 Subject: [llvm-commits] [patch] fptr support for PPC64 In-Reply-To: <200912151255.29701.ken@linux.vnet.ibm.com> References: <200912151255.29701.ken@linux.vnet.ibm.com> Message-ID: <310DA270-7D55-4D87-943B-2E1ED4EFE258@apple.com> On Dec 15, 2009, at 3:55 AMPST, Ken Werner wrote: > Hi, > The attached patch adds support for indirect calls (through function > pointer) > according to the ABI (http://refspecs.linuxfoundation.org/ELF/ppc64/PPC- > elf64abi-1.9.html#FUNC-CALLS). The patch was made against revision > 91275. > -ken -------------- next part -------------- A non-text attachment was scrubbed... Name: ppc64-fptr.patch Type: text/x-patch Size: 8130 bytes Desc: not available Url : http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20091215/022f9da0/attachment.bin -------------- next part -------------- > _______________________________________________ I can't speak for SVR4 ppc64, but I'm confident these 3 patches won't break any other target. It's preferable to get isPPC64 from PPCSubTarget, as elsewhere (inconsistently). Here that seems to mean an extra parameter. Why did you add a Chain to PPCISD::NOP? Do you have write access? From evan.cheng at apple.com Tue Dec 15 13:05:40 2009 From: evan.cheng at apple.com (Evan Cheng) Date: Tue, 15 Dec 2009 11:05:40 -0800 Subject: [llvm-commits] [llvm] r91381 - in /llvm/trunk: lib/Target/X86/X86ISelLowering.cpp lib/Target/X86/X86ISelLowering.h lib/Target/X86/X86Instr64bit.td lib/Target/X86/X86InstrInfo.td test/CodeGen/X86/setcc.ll In-Reply-To: <90B7DF90-ACF4-499E-BB73-69AEFDAB14FC@apple.com> References: <200912150053.nBF0rhfK025969@zion.cs.uiuc.edu> <0838237D-D73A-4A62-B10D-A86012B247BD@apple.com> <90B7DF90-ACF4-499E-BB73-69AEFDAB14FC@apple.com> Message-ID: <0B8D3D2F-9D9B-41A1-B102-68EA9727F767@apple.com> On Dec 15, 2009, at 10:39 AM, Dan Gohman wrote: > > On Dec 15, 2009, at 10:36 AM, Evan Cheng wrote: > >> >> On Dec 15, 2009, at 10:30 AM, Dan Gohman wrote: >> >>> >>> On Dec 14, 2009, at 4:53 PM, Evan Cheng wrote: >>>> >>>> ============================================================================== >>>> --- llvm/trunk/lib/Target/X86/X86Instr64bit.td (original) >>>> +++ llvm/trunk/lib/Target/X86/X86Instr64bit.td Mon Dec 14 18:53:42 2009 >>>> @@ -1333,6 +1333,15 @@ >>>> X86_COND_NO, EFLAGS))]>, TB; >>>> } // isTwoAddress >>>> >>>> +// Use sbb to materialize carry flag into a GPR. >>>> +let Defs = [EFLAGS], Uses = [EFLAGS], isCodeGenOnly = 1 in >>>> +def SETB_C64r : RI<0x19, MRMDestReg, (outs GR64:$dst), (ins), >>>> + "sbb{q}\t$dst, $dst", >>>> + [(set GR64:$dst, (zext (X86setcc_c X86_COND_B, EFLAGS)))]>; >>> >>> An sbbq will set all 64 bits. Why does the pattern here have a zext in it? >> >> X86setcc_c like X86setcc returns a i8 value. Yes, the semantics of is all zero or all one's. That's why x86isellowering will and the result with 1. > > Whatever X86setcc_c is, the current pattern doesn't describe the instruction. > The pattern says the instruction will always set the most significant bits of > the output to zero, and that's not what the instruction really does. You're right. I am going to have to re-implement this. Evan > > Dan > From sabre at nondot.org Tue Dec 15 13:14:40 2009 From: sabre at nondot.org (Chris Lattner) Date: Tue, 15 Dec 2009 19:14:40 -0000 Subject: [llvm-commits] [llvm] r91438 - in /llvm/trunk: lib/Transforms/Scalar/SimplifyLibCalls.cpp test/Transforms/SimplifyLibCalls/StrStr.ll Message-ID: <200912151914.nBFJEeJp015299@zion.cs.uiuc.edu> Author: lattner Date: Tue Dec 15 13:14:40 2009 New Revision: 91438 URL: http://llvm.org/viewvc/llvm-project?rev=91438&view=rev Log: optimize strstr, PR5783 Added: llvm/trunk/test/Transforms/SimplifyLibCalls/StrStr.ll Modified: llvm/trunk/lib/Transforms/Scalar/SimplifyLibCalls.cpp Modified: llvm/trunk/lib/Transforms/Scalar/SimplifyLibCalls.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/SimplifyLibCalls.cpp?rev=91438&r1=91437&r2=91438&view=diff ============================================================================== --- llvm/trunk/lib/Transforms/Scalar/SimplifyLibCalls.cpp (original) +++ llvm/trunk/lib/Transforms/Scalar/SimplifyLibCalls.cpp Tue Dec 15 13:14:40 2009 @@ -76,6 +76,11 @@ /// return value has 'intptr_t' type. Value *EmitStrLen(Value *Ptr, IRBuilder<> &B); + /// EmitStrChr - Emit a call to the strchr function to the builder, for the + /// specified pointer and character. Ptr is required to be some pointer type, + /// and the return value has 'i8*' type. + Value *EmitStrChr(Value *Ptr, char C, IRBuilder<> &B); + /// EmitMemCpy - Emit a call to the memcpy function to the builder. This /// always expects that the size has type 'intptr_t' and Dst/Src are pointers. Value *EmitMemCpy(Value *Dst, Value *Src, Value *Len, @@ -151,6 +156,26 @@ return CI; } +/// EmitStrChr - Emit a call to the strchr function to the builder, for the +/// specified pointer and character. Ptr is required to be some pointer type, +/// and the return value has 'i8*' type. +Value *LibCallOptimization::EmitStrChr(Value *Ptr, char C, IRBuilder<> &B) { + Module *M = Caller->getParent(); + AttributeWithIndex AWI = + AttributeWithIndex::get(~0u, Attribute::ReadOnly | Attribute::NoUnwind); + + const Type *I8Ptr = Type::getInt8PtrTy(*Context); + const Type *I32Ty = Type::getInt32Ty(*Context); + Constant *StrChr = M->getOrInsertFunction("strchr", AttrListPtr::get(&AWI, 1), + I8Ptr, I8Ptr, I32Ty, NULL); + CallInst *CI = B.CreateCall2(StrChr, CastToCStr(Ptr, B), + ConstantInt::get(I32Ty, C), "strchr"); + if (const Function *F = dyn_cast(StrChr->stripPointerCasts())) + CI->setCallingConv(F->getCallingConv()); + return CI; +} + + /// EmitMemCpy - Emit a call to the memcpy function to the builder. This always /// expects that the size has type 'intptr_t' and Dst/Src are pointers. Value *LibCallOptimization::EmitMemCpy(Value *Dst, Value *Src, Value *Len, @@ -890,7 +915,7 @@ }; //===---------------------------------------===// -// 'strto*' Optimizations +// 'strto*' Optimizations. This handles strtol, strtod, strtof, strtoul, etc. struct StrToOpt : public LibCallOptimization { virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { @@ -910,6 +935,52 @@ } }; +//===---------------------------------------===// +// 'strstr' Optimizations + +struct StrStrOpt : public LibCallOptimization { + virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + const FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 2 || + !isa(FT->getParamType(0)) || + !isa(FT->getParamType(1)) || + !isa(FT->getReturnType())) + return 0; + + // fold strstr(x, x) -> x. + if (CI->getOperand(1) == CI->getOperand(2)) + return CI->getOperand(1); + + // See if either input string is a constant string. + std::string SearchStr, ToFindStr; + bool HasStr1 = GetConstantStringInfo(CI->getOperand(1), SearchStr); + bool HasStr2 = GetConstantStringInfo(CI->getOperand(2), ToFindStr); + + // fold strstr(x, "") -> x. + if (HasStr2 && ToFindStr.empty()) + return B.CreateBitCast(CI->getOperand(1), CI->getType()); + + // If both strings are known, constant fold it. + if (HasStr1 && HasStr2) { + std::string::size_type Offset = SearchStr.find(ToFindStr); + + if (Offset == std::string::npos) // strstr("foo", "bar") -> null + return Constant::getNullValue(CI->getType()); + + // strstr("abcd", "bc") -> gep((char*)"abcd", 2) + Value *Result = CastToCStr(CI->getOperand(1), B); + Result = B.CreateConstInBoundsGEP1_64(Result, Offset, "strstr"); + return B.CreateBitCast(Result, CI->getType()); + } + + // fold strstr(x, "y") -> strchr(x, 'y'). + if (HasStr2 && ToFindStr.size() == 1) + return B.CreateBitCast(EmitStrChr(CI->getOperand(1), ToFindStr[0], B), + CI->getType()); + return 0; + } +}; + //===---------------------------------------===// // 'memcmp' Optimizations @@ -1675,8 +1746,8 @@ // String and Memory LibCall Optimizations StrCatOpt StrCat; StrNCatOpt StrNCat; StrChrOpt StrChr; StrCmpOpt StrCmp; StrNCmpOpt StrNCmp; StrCpyOpt StrCpy; StrNCpyOpt StrNCpy; StrLenOpt StrLen; - StrToOpt StrTo; MemCmpOpt MemCmp; MemCpyOpt MemCpy; MemMoveOpt MemMove; - MemSetOpt MemSet; + StrToOpt StrTo; StrStrOpt StrStr; + MemCmpOpt MemCmp; MemCpyOpt MemCpy; MemMoveOpt MemMove; MemSetOpt MemSet; // Math Library Optimizations PowOpt Pow; Exp2Opt Exp2; UnaryDoubleFPOpt UnaryDoubleFP; // Integer Optimizations @@ -1738,6 +1809,7 @@ Optimizations["strtoll"] = &StrTo; Optimizations["strtold"] = &StrTo; Optimizations["strtoull"] = &StrTo; + Optimizations["strstr"] = &StrStr; Optimizations["memcmp"] = &MemCmp; Optimizations["memcpy"] = &MemCpy; Optimizations["memmove"] = &MemMove; @@ -2644,12 +2716,6 @@ // * strcspn("",a) -> 0 // * strcspn(s,"") -> strlen(a) // -// strstr: (PR5783) -// * strstr(x,x) -> x -// * strstr(x, "") -> x -// * strstr(x, "a") -> strchr(x, 'a') -// * strstr(s1,s2) -> result (if s1 and s2 are constant strings) -// // tan, tanf, tanl: // * tan(atan(x)) -> x // Added: llvm/trunk/test/Transforms/SimplifyLibCalls/StrStr.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SimplifyLibCalls/StrStr.ll?rev=91438&view=auto ============================================================================== --- llvm/trunk/test/Transforms/SimplifyLibCalls/StrStr.ll (added) +++ llvm/trunk/test/Transforms/SimplifyLibCalls/StrStr.ll Tue Dec 15 13:14:40 2009 @@ -0,0 +1,48 @@ +; RUN: opt < %s -simplify-libcalls -S | FileCheck %s +; PR5783 + +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32" +target triple = "i386-apple-darwin9.0" + + at .str = private constant [1 x i8] zeroinitializer ; <[1 x i8]*> [#uses=1] + at .str1 = private constant [2 x i8] c"a\00" ; <[2 x i8]*> [#uses=1] + at .str2 = private constant [6 x i8] c"abcde\00" ; <[6 x i8]*> [#uses=1] + at .str3 = private constant [4 x i8] c"bcd\00" ; <[4 x i8]*> [#uses=1] + +define i8* @test1(i8* %P) nounwind readonly { +entry: + %call = tail call i8* @strstr(i8* %P, i8* getelementptr inbounds ([1 x i8]* @.str, i32 0, i32 0)) nounwind ; [#uses=1] + ret i8* %call +; strstr(P, "") -> P +; CHECK: @test1 +; CHECK: ret i8* %P +} + +declare i8* @strstr(i8*, i8* nocapture) nounwind readonly + +define i8* @test2(i8* %P) nounwind readonly { +entry: + %call = tail call i8* @strstr(i8* %P, i8* getelementptr inbounds ([2 x i8]* @.str1, i32 0, i32 0)) nounwind ; [#uses=1] + ret i8* %call +; strstr(P, "a") -> strchr(P, 'a') +; CHECK: @test2 +; CHECK: @strchr(i8* %P, i32 97) +} + +define i8* @test3(i8* nocapture %P) nounwind readonly { +entry: + %call = tail call i8* @strstr(i8* getelementptr inbounds ([6 x i8]* @.str2, i32 0, i32 0), i8* getelementptr inbounds ([4 x i8]* @.str3, i32 0, i32 0)) nounwind ; [#uses=1] + ret i8* %call +; strstr("abcde", "bcd") -> "abcde"+1 +; CHECK: @test3 +; CHECK: getelementptr inbounds ([6 x i8]* @.str2, i32 0, i64 1) +} + +define i8* @test4(i8* %P) nounwind readonly { +entry: + %call = tail call i8* @strstr(i8* %P, i8* %P) nounwind ; [#uses=1] + ret i8* %call +; strstr(P, P) -> P +; CHECK: @test4 +; CHECK: ret i8* %P +} From dpatel at apple.com Tue Dec 15 13:16:48 2009 From: dpatel at apple.com (Devang Patel) Date: Tue, 15 Dec 2009 19:16:48 -0000 Subject: [llvm-commits] [llvm] r91440 - in /llvm/trunk: include/llvm/Analysis/DebugInfo.h lib/Analysis/DebugInfo.cpp lib/CodeGen/AsmPrinter/DIE.h lib/CodeGen/AsmPrinter/DwarfDebug.cpp lib/CodeGen/AsmPrinter/DwarfDebug.h Message-ID: <200912151916.nBFJGnXo015403@zion.cs.uiuc.edu> Author: dpatel Date: Tue Dec 15 13:16:48 2009 New Revision: 91440 URL: http://llvm.org/viewvc/llvm-project?rev=91440&view=rev Log: Add support to emit debug info for C++ namespaces. Modified: llvm/trunk/include/llvm/Analysis/DebugInfo.h llvm/trunk/lib/Analysis/DebugInfo.cpp llvm/trunk/lib/CodeGen/AsmPrinter/DIE.h llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.h Modified: llvm/trunk/include/llvm/Analysis/DebugInfo.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Analysis/DebugInfo.h?rev=91440&r1=91439&r2=91440&view=diff ============================================================================== --- llvm/trunk/include/llvm/Analysis/DebugInfo.h (original) +++ llvm/trunk/include/llvm/Analysis/DebugInfo.h Tue Dec 15 13:16:48 2009 @@ -99,6 +99,7 @@ bool isGlobalVariable() const; bool isScope() const; bool isCompileUnit() const; + bool isNameSpace() const; bool isLexicalBlock() const; bool isSubrange() const; bool isEnumerator() const; @@ -218,7 +219,7 @@ virtual ~DIType() {} DIDescriptor getContext() const { return getDescriptorField(1); } - StringRef getName() const { return getStringField(2); } + StringRef getName() const { return getStringField(2); } DICompileUnit getCompileUnit() const{ return getFieldAs(3); } unsigned getLineNumber() const { return getUnsignedField(4); } uint64_t getSizeInBits() const { return getUInt64Field(5); } @@ -470,6 +471,22 @@ StringRef getFilename() const { return getContext().getFilename(); } }; + /// DINameSpace - A wrapper for a C++ style name space. + class DINameSpace : public DIScope { + public: + explicit DINameSpace(MDNode *N = 0) : DIScope(N) { + if (DbgNode && !isNameSpace()) + DbgNode = 0; + } + + DIScope getContext() const { return getFieldAs(1); } + StringRef getName() const { return getStringField(2); } + StringRef getDirectory() const { return getContext().getDirectory(); } + StringRef getFilename() const { return getContext().getFilename(); } + DICompileUnit getCompileUnit() const { return getFieldAs(3); } + unsigned getLineNumber() const { return getUnsignedField(4); } + }; + /// DILocation - This object holds location information. This object /// is not associated with any DWARF tag. class DILocation : public DIDescriptor { @@ -624,6 +641,11 @@ /// with the specified parent context. DILexicalBlock CreateLexicalBlock(DIDescriptor Context); + /// CreateNameSpace - This creates new descriptor for a namespace + /// with the specified parent context. + DINameSpace CreateNameSpace(DIDescriptor Context, StringRef Name, + DICompileUnit CU, unsigned LineNo); + /// CreateLocation - Creates a debug info location. DILocation CreateLocation(unsigned LineNo, unsigned ColumnNo, DIScope S, DILocation OrigLoc); Modified: llvm/trunk/lib/Analysis/DebugInfo.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/DebugInfo.cpp?rev=91440&r1=91439&r2=91440&view=diff ============================================================================== --- llvm/trunk/lib/Analysis/DebugInfo.cpp (original) +++ llvm/trunk/lib/Analysis/DebugInfo.cpp Tue Dec 15 13:16:48 2009 @@ -227,6 +227,7 @@ case dwarf::DW_TAG_compile_unit: case dwarf::DW_TAG_lexical_block: case dwarf::DW_TAG_subprogram: + case dwarf::DW_TAG_namespace: return true; default: break; @@ -242,6 +243,14 @@ return Tag == dwarf::DW_TAG_compile_unit; } +/// isNameSpace - Return true if the specified tag is DW_TAG_namespace. +bool DIDescriptor::isNameSpace() const { + assert (!isNull() && "Invalid descriptor!"); + unsigned Tag = getTag(); + + return Tag == dwarf::DW_TAG_namespace; +} + /// isLexicalBlock - Return true if the specified tag is DW_TAG_lexical_block. bool DIDescriptor::isLexicalBlock() const { assert (!isNull() && "Invalid descriptor!"); @@ -438,6 +447,8 @@ return DISubprogram(DbgNode).getFilename(); else if (isCompileUnit()) return DICompileUnit(DbgNode).getFilename(); + else if (isNameSpace()) + return DINameSpace(DbgNode).getFilename(); else assert (0 && "Invalid DIScope!"); return StringRef(); @@ -450,6 +461,8 @@ return DISubprogram(DbgNode).getDirectory(); else if (isCompileUnit()) return DICompileUnit(DbgNode).getDirectory(); + else if (isNameSpace()) + return DINameSpace(DbgNode).getDirectory(); else assert (0 && "Invalid DIScope!"); return StringRef(); @@ -996,6 +1009,21 @@ return DILexicalBlock(MDNode::get(VMContext, &Elts[0], 2)); } +/// CreateNameSpace - This creates new descriptor for a namespace +/// with the specified parent context. +DINameSpace DIFactory::CreateNameSpace(DIDescriptor Context, StringRef Name, + DICompileUnit CompileUnit, + unsigned LineNo) { + Value *Elts[] = { + GetTagConstant(dwarf::DW_TAG_namespace), + Context.getNode(), + MDString::get(VMContext, Name), + CompileUnit.getNode(), + ConstantInt::get(Type::getInt32Ty(VMContext), LineNo) + }; + return DINameSpace(MDNode::get(VMContext, &Elts[0], 5)); +} + /// CreateLocation - Creates a debug info location. DILocation DIFactory::CreateLocation(unsigned LineNo, unsigned ColumnNo, DIScope S, DILocation OrigLoc) { Modified: llvm/trunk/lib/CodeGen/AsmPrinter/DIE.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/AsmPrinter/DIE.h?rev=91440&r1=91439&r2=91440&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/AsmPrinter/DIE.h (original) +++ llvm/trunk/lib/CodeGen/AsmPrinter/DIE.h Tue Dec 15 13:16:48 2009 @@ -68,6 +68,7 @@ /// Data - Raw data bytes for abbreviation. /// SmallVector Data; + public: DIEAbbrev(unsigned T, unsigned C) : Tag(T), ChildrenFlag(C), Data() {} virtual ~DIEAbbrev() {} @@ -131,19 +132,18 @@ /// std::vector Children; + DIE *Parent; + /// Attributes values. /// SmallVector Values; - /// Abstract compile unit. - CompileUnit *AbstractCU; - // Private data for print() mutable unsigned IndentCount; public: explicit DIE(unsigned Tag) : Abbrev(Tag, dwarf::DW_CHILDREN_no), Offset(0), - Size(0), IndentCount(0) {} + Size(0), Parent (0), IndentCount(0) {} virtual ~DIE(); // Accessors. @@ -154,13 +154,12 @@ unsigned getSize() const { return Size; } const std::vector &getChildren() const { return Children; } SmallVector &getValues() { return Values; } - CompileUnit *getAbstractCompileUnit() const { return AbstractCU; } - + DIE *getParent() const { return Parent; } void setTag(unsigned Tag) { Abbrev.setTag(Tag); } void setOffset(unsigned O) { Offset = O; } void setSize(unsigned S) { Size = S; } - void setAbstractCompileUnit(CompileUnit *CU) { AbstractCU = CU; } - + void setParent(DIE *P) { Parent = P; } + /// addValue - Add a value and attributes to a DIE. /// void addValue(unsigned Attribute, unsigned Form, DIEValue *Value) { @@ -179,8 +178,13 @@ /// addChild - Add a child to the DIE. /// void addChild(DIE *Child) { + if (Child->getParent()) { + assert (Child->getParent() == this && "Unexpected DIE Parent!"); + return; + } Abbrev.setChildrenFlag(dwarf::DW_CHILDREN_yes); Children.push_back(Child); + Child->setParent(this); } #ifndef NDEBUG Modified: llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp?rev=91440&r1=91439&r2=91440&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp (original) +++ llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp Tue Dec 15 13:16:48 2009 @@ -112,7 +112,12 @@ /// getDIEEntry - Returns the debug information entry for the speciefied /// debug variable. - DIEEntry *getDIEEntry(MDNode *N) { return GVToDIEEntryMap.lookup(N); } + DIEEntry *getDIEEntry(MDNode *N) { + ValueMap::iterator I = GVToDIEEntryMap.find(N); + if (I == GVToDIEEntryMap.end()) + return NULL; + return I->second; + } /// insertDIEEntry - Insert debug information entry into the map. void insertDIEEntry(MDNode *N, DIEEntry *E) { @@ -446,6 +451,23 @@ addUInt(Die, dwarf::DW_AT_decl_line, 0, Line); } +/// addSourceLine - Add location information to specified debug information +/// entry. +void DwarfDebug::addSourceLine(DIE *Die, const DINameSpace *NS) { + // If there is no compile unit specified, don't add a line #. + if (NS->getCompileUnit().isNull()) + return; + + unsigned Line = NS->getLineNumber(); + StringRef FN = NS->getFilename(); + StringRef Dir = NS->getDirectory(); + + unsigned FileID = GetOrCreateSourceID(Dir, FN); + assert(FileID && "Invalid file id"); + addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID); + addUInt(Die, dwarf::DW_AT_decl_line, 0, Line); +} + /* Byref variables, in Blocks, are declared by the programmer as "SomeType VarName;", but the compiler creates a __Block_byref_x_VarName struct, and gives the variable VarName @@ -745,6 +767,9 @@ else if (Context.isType()) { DIE *ContextDIE = getOrCreateTypeDIE(DIType(Context.getNode())); ContextDIE->addChild(Die); + } else if (Context.isNameSpace()) { + DIE *ContextDIE = getOrCreateNameSpace(DINameSpace(Context.getNode())); + ContextDIE->addChild(Die); } else if (DIE *ContextDIE = ModuleCU->getDIE(Context.getNode())) ContextDIE->addChild(Die); else @@ -781,7 +806,6 @@ // Check for pre-existence. DIEEntry *Entry = ModuleCU->getDIEEntry(Ty.getNode()); - // If it exists then use the existing value. if (Entry) { Entity->addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, Entry); @@ -1030,13 +1054,6 @@ addUInt(GVDie, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1); addSourceLine(GVDie, &GV); - // Add address. - DIEBlock *Block = new DIEBlock(); - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_addr); - addObjectLabel(Block, 0, dwarf::DW_FORM_udata, - Asm->Mang->getMangledName(GV.getGlobal())); - addBlock(GVDie, dwarf::DW_AT_location, 0, Block); - return GVDie; } @@ -1285,7 +1302,6 @@ SPDie = new DIE(dwarf::DW_TAG_subprogram); addDIEEntry(SPDie, dwarf::DW_AT_specification, dwarf::DW_FORM_ref4, SPDeclDie); - ModuleCU->addDie(SPDie); } @@ -1559,6 +1575,20 @@ return SrcId; } +/// getOrCreateNameSpace - Create a DIE for DINameSpace. +DIE *DwarfDebug::getOrCreateNameSpace(DINameSpace NS) { + DIE *NDie = ModuleCU->getDIE(NS.getNode()); + if (NDie) + return NDie; + NDie = new DIE(dwarf::DW_TAG_namespace); + ModuleCU->insertDIE(NS.getNode(), NDie); + if (!NS.getName().empty()) + addString(NDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, NS.getName()); + addSourceLine(NDie, &NS); + addToContextOwner(NDie, NS.getContext()); + return NDie; +} + CompileUnit *DwarfDebug::constructCompileUnit(MDNode *N) { DICompileUnit DIUnit(N); StringRef FN = DIUnit.getFilename(); @@ -1620,6 +1650,25 @@ ModuleCU->insertDIE(N, VariableDie); // Add to context owner. + if (DI_GV.isDefinition() + && !DI_GV.getContext().isCompileUnit()) { + // Create specification DIE. + DIE *VariableSpecDIE = new DIE(dwarf::DW_TAG_variable); + addDIEEntry(VariableSpecDIE, dwarf::DW_AT_specification, + dwarf::DW_FORM_ref4, VariableDie); + DIEBlock *Block = new DIEBlock(); + addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_addr); + addObjectLabel(Block, 0, dwarf::DW_FORM_udata, + Asm->Mang->getMangledName(DI_GV.getGlobal())); + addBlock(VariableSpecDIE, dwarf::DW_AT_location, 0, Block); + ModuleCU->addDie(VariableSpecDIE); + } else { + DIEBlock *Block = new DIEBlock(); + addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_addr); + addObjectLabel(Block, 0, dwarf::DW_FORM_udata, + Asm->Mang->getMangledName(DI_GV.getGlobal())); + addBlock(VariableDie, dwarf::DW_AT_location, 0, Block); + } addToContextOwner(VariableDie, DI_GV.getContext()); // Expose as global. FIXME - need to check external flag. @@ -1652,9 +1701,7 @@ ModuleCU->insertDIE(N, SubprogramDie); // Add to context owner. - if (SP.getContext().getNode() == SP.getCompileUnit().getNode()) - if (TopLevelDIEs.insert(SubprogramDie)) - TopLevelDIEsVector.push_back(SubprogramDie); + addToContextOwner(SubprogramDie, SP.getContext()); // Expose as global. ModuleCU->addGlobal(SP.getName(), SubprogramDie); @@ -2365,7 +2412,6 @@ EmitLabel("info_end", ModuleCU->getID()); Asm->EOL(); - } /// emitAbbreviations - Emit the abbreviation section. Modified: llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.h?rev=91440&r1=91439&r2=91440&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.h (original) +++ llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.h Tue Dec 15 13:16:48 2009 @@ -285,6 +285,7 @@ void addSourceLine(DIE *Die, const DIGlobal *G); void addSourceLine(DIE *Die, const DISubprogram *SP); void addSourceLine(DIE *Die, const DIType *Ty); + void addSourceLine(DIE *Die, const DINameSpace *NS); /// addAddress - Add an address attribute to a die based on the location /// provided. @@ -315,6 +316,10 @@ /// addType - Add a new type attribute to the specified entity. void addType(DIE *Entity, DIType Ty); + + /// getOrCreateNameSpace - Create a DIE for DINameSpace. + DIE *getOrCreateNameSpace(DINameSpace NS); + /// getOrCreateTypeDIE - Find existing DIE or create new DIE for the /// given DIType. DIE *getOrCreateTypeDIE(DIType Ty); From dpatel at apple.com Tue Dec 15 13:17:30 2009 From: dpatel at apple.com (Devang Patel) Date: Tue, 15 Dec 2009 19:17:30 -0000 Subject: [llvm-commits] [llvm-gcc-4.2] r91441 - in /llvm-gcc-4.2/trunk/gcc: llvm-debug.cpp llvm-debug.h Message-ID: <200912151917.nBFJHUR6015432@zion.cs.uiuc.edu> Author: dpatel Date: Tue Dec 15 13:17:30 2009 New Revision: 91441 URL: http://llvm.org/viewvc/llvm-project?rev=91441&view=rev Log: Emit debug info for C++ namespaces. Modified: llvm-gcc-4.2/trunk/gcc/llvm-debug.cpp llvm-gcc-4.2/trunk/gcc/llvm-debug.h Modified: llvm-gcc-4.2/trunk/gcc/llvm-debug.cpp URL: http://llvm.org/viewvc/llvm-project/llvm-gcc-4.2/trunk/gcc/llvm-debug.cpp?rev=91441&r1=91440&r2=91441&view=diff ============================================================================== --- llvm-gcc-4.2/trunk/gcc/llvm-debug.cpp (original) +++ llvm-gcc-4.2/trunk/gcc/llvm-debug.cpp Tue Dec 15 13:17:30 2009 @@ -263,10 +263,20 @@ Virtuality = dwarf::DW_VIRTUALITY_virtual; ContainingType = getOrCreateType(DECL_CONTEXT (FnDecl)); } + bool UseModuleContext = false; + // If this artificial function has abstract origin then put this function + // at module scope. The abstract copy will be placed in appropriate region. + if (DECL_ABSTRACT_ORIGIN (FnDecl) != FnDecl + && DECL_ARTIFICIAL(FnDecl)) + UseModuleContext = true; + + const char *FnName = lang_hooks.dwarf_name(FnDecl, 0); DISubprogram SP = - DebugFactory.CreateSubprogram(findRegion(DECL_CONTEXT(FnDecl)), - lang_hooks.dwarf_name(FnDecl, 0), - lang_hooks.dwarf_name(FnDecl, 0), + DebugFactory.CreateSubprogram((UseModuleContext ? + getOrCreateCompileUnit(main_input_filename) : + findRegion(DECL_CONTEXT(FnDecl))), + (UseModuleContext ? FnName : StringRef()), + (UseModuleContext ? FnName : StringRef()), LinkageName, getOrCreateCompileUnit(Loc.file), lineno, FNType, @@ -282,7 +292,23 @@ RegionMap[FnDecl] = WeakVH(SP.getNode()); } - /// findRegion - Find tree_node N's region. +/// getOrCreateNameSpace - Get name space descriptor for the tree node. +DINameSpace DebugInfo::getOrCreateNameSpace(tree Node, DIDescriptor Context) { + std::map::iterator I = + NameSpaceCache.find(Node); + if (I != NameSpaceCache.end()) + return DINameSpace(cast(I->second)); + + expanded_location Loc = GetNodeLocation(Node, false); + DINameSpace DNS = + DebugFactory.CreateNameSpace(Context, GetNodeName(Node), + getOrCreateCompileUnit(Loc.file), Loc.line); + + NameSpaceCache[Node] = WeakVH(DNS.getNode()); + return DNS; +} + +/// findRegion - Find tree_node N's region. DIDescriptor DebugInfo::findRegion(tree Node) { if (Node == NULL_TREE) return getOrCreateCompileUnit(main_input_filename); @@ -295,8 +321,14 @@ if (TYPE_P (Node)) { DIType Ty = getOrCreateType(Node); return DIDescriptor(Ty.getNode()); - } else if (DECL_P (Node)) + } else if (DECL_P (Node)) { + if (TREE_CODE (Node) == NAMESPACE_DECL) { + DIDescriptor NSContext = findRegion(DECL_CONTEXT(Node)); + DINameSpace NS = getOrCreateNameSpace(Node, NSContext); + return DIDescriptor(NS.getNode()); + } return findRegion (DECL_CONTEXT (Node)); + } // Otherwise main compile unit covers everything. return getOrCreateCompileUnit(main_input_filename); Modified: llvm-gcc-4.2/trunk/gcc/llvm-debug.h URL: http://llvm.org/viewvc/llvm-project/llvm-gcc-4.2/trunk/gcc/llvm-debug.h?rev=91441&r1=91440&r2=91441&view=diff ============================================================================== --- llvm-gcc-4.2/trunk/gcc/llvm-debug.h (original) +++ llvm-gcc-4.2/trunk/gcc/llvm-debug.h Tue Dec 15 13:17:30 2009 @@ -68,6 +68,9 @@ std::map SPCache; // Cache of previously constructed // Subprograms. + std::map NameSpaceCache; + // Cache of previously constructed name + // spaces. SmallVector RegionStack; // Stack to track declarative scopes. @@ -136,6 +139,9 @@ /// findRegion - Find tree_node N's region. DIDescriptor findRegion(tree_node *n); + + /// getOrCreateNameSpace - Get name space descriptor for the tree node. + DINameSpace getOrCreateNameSpace(tree_node *Node, DIDescriptor Context); }; } // end namespace llvm From grosbach at apple.com Tue Dec 15 13:28:13 2009 From: grosbach at apple.com (Jim Grosbach) Date: Tue, 15 Dec 2009 19:28:13 -0000 Subject: [llvm-commits] [llvm] r91442 - /llvm/trunk/utils/TableGen/CodeGenInstruction.cpp Message-ID: <200912151928.nBFJSDqN015936@zion.cs.uiuc.edu> Author: grosbach Date: Tue Dec 15 13:28:13 2009 New Revision: 91442 URL: http://llvm.org/viewvc/llvm-project?rev=91442&view=rev Log: whitespace Modified: llvm/trunk/utils/TableGen/CodeGenInstruction.cpp Modified: llvm/trunk/utils/TableGen/CodeGenInstruction.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/utils/TableGen/CodeGenInstruction.cpp?rev=91442&r1=91441&r2=91442&view=diff ============================================================================== --- llvm/trunk/utils/TableGen/CodeGenInstruction.cpp (original) +++ llvm/trunk/utils/TableGen/CodeGenInstruction.cpp Tue Dec 15 13:28:13 2009 @@ -23,31 +23,31 @@ assert(pos != std::string::npos && "Unrecognized constraint"); std::string::size_type start = CStr.find_first_not_of(" \t"); std::string Name = CStr.substr(start, pos - start); - + // TIED_TO: $src1 = $dst std::string::size_type wpos = Name.find_first_of(" \t"); if (wpos == std::string::npos) throw "Illegal format for tied-to constraint: '" + CStr + "'"; std::string DestOpName = Name.substr(0, wpos); std::pair DestOp = I->ParseOperandName(DestOpName, false); - + Name = CStr.substr(pos+1); wpos = Name.find_first_not_of(" \t"); if (wpos == std::string::npos) throw "Illegal format for tied-to constraint: '" + CStr + "'"; - + std::pair SrcOp = I->ParseOperandName(Name.substr(wpos), false); if (SrcOp > DestOp) throw "Illegal tied-to operand constraint '" + CStr + "'"; - - + + unsigned FlatOpNo = I->getFlattenedOperandNumber(SrcOp); // Build the string for the operand. std::string OpConstraint = "((" + utostr(FlatOpNo) + " << 16) | (1 << TOI::TIED_TO))"; - - + + if (!I->OperandList[DestOp.first].Constraints[DestOp.second].empty()) throw "Operand '" + DestOpName + "' cannot have multiple constraints!"; I->OperandList[DestOp.first].Constraints[DestOp.second] = OpConstraint; @@ -56,20 +56,20 @@ static void ParseConstraints(const std::string &CStr, CodeGenInstruction *I) { // Make sure the constraints list for each operand is large enough to hold // constraint info, even if none is present. - for (unsigned i = 0, e = I->OperandList.size(); i != e; ++i) + for (unsigned i = 0, e = I->OperandList.size(); i != e; ++i) I->OperandList[i].Constraints.resize(I->OperandList[i].MINumOperands); - + if (CStr.empty()) return; - + const std::string delims(","); std::string::size_type bidx, eidx; - + bidx = CStr.find_first_not_of(delims); while (bidx != std::string::npos) { eidx = CStr.find_first_of(delims, bidx); if (eidx == std::string::npos) eidx = CStr.length(); - + ParseConstraint(CStr.substr(bidx, eidx - bidx), I); bidx = CStr.find_first_not_of(delims, eidx); } @@ -145,7 +145,7 @@ if (Rec->isSubClassOf("Operand")) { PrintMethod = Rec->getValueAsString("PrintMethod"); MIOpInfo = Rec->getValueAsDag("MIOperandInfo"); - + // Verify that MIOpInfo has an 'ops' root value. if (!dynamic_cast(MIOpInfo->getOperator()) || dynamic_cast(MIOpInfo->getOperator()) @@ -165,7 +165,7 @@ } else if (Rec->getName() == "variable_ops") { isVariadic = true; continue; - } else if (!Rec->isSubClassOf("RegisterClass") && + } else if (!Rec->isSubClassOf("RegisterClass") && Rec->getName() != "ptr_rc" && Rec->getName() != "unknown") throw "Unknown operand class '" + Rec->getName() + "' in '" + R->getName() + "' instruction!"; @@ -177,15 +177,15 @@ if (!OperandNames.insert(DI->getArgName(i)).second) throw "In instruction '" + R->getName() + "', operand #" + utostr(i) + " has the same name as a previous operand!"; - - OperandList.push_back(OperandInfo(Rec, DI->getArgName(i), PrintMethod, + + OperandList.push_back(OperandInfo(Rec, DI->getArgName(i), PrintMethod, MIOperandNo, NumOps, MIOpInfo)); MIOperandNo += NumOps; } // Parse Constraints. ParseConstraints(R->getValueAsString("Constraints"), this); - + // For backward compatibility: isTwoAddress means operand 1 is tied to // operand 0. if (isTwoAddress) { @@ -194,13 +194,13 @@ "already has constraint set!"; OperandList[1].Constraints[0] = "((0 << 16) | (1 << TOI::TIED_TO))"; } - + // Any operands with unset constraints get 0 as their constraint. for (unsigned op = 0, e = OperandList.size(); op != e; ++op) for (unsigned j = 0, e = OperandList[op].MINumOperands; j != e; ++j) if (OperandList[op].Constraints[j].empty()) OperandList[op].Constraints[j] = "0"; - + // Parse the DisableEncoding field. std::string DisableEncoding = R->getValueAsString("DisableEncoding"); while (1) { @@ -229,15 +229,15 @@ "' does not have an operand named '$" + Name + "'!"; } -std::pair +std::pair CodeGenInstruction::ParseOperandName(const std::string &Op, bool AllowWholeOp) { if (Op.empty() || Op[0] != '$') throw TheDef->getName() + ": Illegal operand name: '" + Op + "'"; - + std::string OpName = Op.substr(1); std::string SubOpName; - + // Check to see if this is $foo.bar. std::string::size_type DotIdx = OpName.find_first_of("."); if (DotIdx != std::string::npos) { @@ -246,7 +246,7 @@ throw TheDef->getName() + ": illegal empty suboperand name in '" +Op +"'"; OpName = OpName.substr(0, DotIdx); } - + unsigned OpIdx = getOperandNamed(OpName); if (SubOpName.empty()) { // If no suboperand name was specified: @@ -255,16 +255,16 @@ SubOpName.empty()) throw TheDef->getName() + ": Illegal to refer to" " whole operand part of complex operand '" + Op + "'"; - + // Otherwise, return the operand. return std::make_pair(OpIdx, 0U); } - + // Find the suboperand number involved. DagInit *MIOpInfo = OperandList[OpIdx].MIOperandInfo; if (MIOpInfo == 0) throw TheDef->getName() + ": unknown suboperand name in '" + Op + "'"; - + // Find the operand with the right name. for (unsigned i = 0, e = MIOpInfo->getNumArgs(); i != e; ++i) if (MIOpInfo->getArgName(i) == SubOpName) From sabre at nondot.org Tue Dec 15 13:34:21 2009 From: sabre at nondot.org (Chris Lattner) Date: Tue, 15 Dec 2009 19:34:21 -0000 Subject: [llvm-commits] [llvm] r91444 - in /llvm/trunk/lib/Analysis: DbgInfoPrinter.cpp ValueTracking.cpp Message-ID: <200912151934.nBFJYLlW016130@zion.cs.uiuc.edu> Author: lattner Date: Tue Dec 15 13:34:20 2009 New Revision: 91444 URL: http://llvm.org/viewvc/llvm-project?rev=91444&view=rev Log: Fix GetConstantStringInfo to not look into MDString (it works on real data, not metadata) and fix DbgInfoPrinter to not abuse GetConstantStringInfo. Modified: llvm/trunk/lib/Analysis/DbgInfoPrinter.cpp llvm/trunk/lib/Analysis/ValueTracking.cpp Modified: llvm/trunk/lib/Analysis/DbgInfoPrinter.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/DbgInfoPrinter.cpp?rev=91444&r1=91443&r2=91444&view=diff ============================================================================== --- llvm/trunk/lib/Analysis/DbgInfoPrinter.cpp (original) +++ llvm/trunk/lib/Analysis/DbgInfoPrinter.cpp Tue Dec 15 13:34:20 2009 @@ -22,7 +22,6 @@ #include "llvm/Assembly/Writer.h" #include "llvm/Analysis/DebugInfo.h" #include "llvm/Analysis/Passes.h" -#include "llvm/Analysis/ValueTracking.h" #include "llvm/Support/CFG.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/raw_ostream.h" @@ -75,18 +74,16 @@ } void PrintDbgInfo::printStopPoint(const DbgStopPointInst *DSI) { - if (PrintDirectory) { - std::string dir; - GetConstantStringInfo(DSI->getDirectory(), dir); - Out << dir << "/"; - } + if (PrintDirectory) + if (MDString *Str = dyn_cast(DSI->getDirectory())) + Out << Str->getString() << '/'; - std::string file; - GetConstantStringInfo(DSI->getFileName(), file); - Out << file << ":" << DSI->getLine(); + if (MDString *Str = dyn_cast(DSI->getFileName())) + Out << Str->getString(); + Out << ':' << DSI->getLine(); if (unsigned Col = DSI->getColumn()) - Out << ":" << Col; + Out << ':' << Col; } void PrintDbgInfo::printFuncStart(const DbgFuncStartInst *FS) { Modified: llvm/trunk/lib/Analysis/ValueTracking.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/ValueTracking.cpp?rev=91444&r1=91443&r2=91444&view=diff ============================================================================== --- llvm/trunk/lib/Analysis/ValueTracking.cpp (original) +++ llvm/trunk/lib/Analysis/ValueTracking.cpp Tue Dec 15 13:34:20 2009 @@ -1369,11 +1369,6 @@ StopAtNul); } - if (MDString *MDStr = dyn_cast(V)) { - Str = MDStr->getString(); - return true; - } - // The GEP instruction, constant or instruction, must reference a global // variable that is a constant and is initialized. The referenced constant // initializer is the array that we'll use for optimization. From edwintorok at gmail.com Tue Dec 15 13:39:32 2009 From: edwintorok at gmail.com (=?ISO-8859-1?Q?T=F6r=F6k_Edwin?=) Date: Tue, 15 Dec 2009 21:39:32 +0200 Subject: [llvm-commits] [llvm] r91444 - in /llvm/trunk/lib/Analysis: DbgInfoPrinter.cpp ValueTracking.cpp In-Reply-To: <200912151934.nBFJYLlW016130@zion.cs.uiuc.edu> References: <200912151934.nBFJYLlW016130@zion.cs.uiuc.edu> Message-ID: <4B27E5F4.6070104@gmail.com> On 2009-12-15 21:34, Chris Lattner wrote: > Author: lattner > Date: Tue Dec 15 13:34:20 2009 > New Revision: 91444 > > URL: http://llvm.org/viewvc/llvm-project?rev=91444&view=rev > Log: > Fix GetConstantStringInfo to not look into MDString (it works on > real data, not metadata) and fix DbgInfoPrinter to not abuse > GetConstantStringInfo. > > Thanks. The old way of representing debug info is gone now right? Best regards, --Edwin From clattner at apple.com Tue Dec 15 13:44:50 2009 From: clattner at apple.com (Chris Lattner) Date: Tue, 15 Dec 2009 11:44:50 -0800 Subject: [llvm-commits] [llvm] r91444 - in /llvm/trunk/lib/Analysis: DbgInfoPrinter.cpp ValueTracking.cpp In-Reply-To: <4B27E5F4.6070104@gmail.com> References: <200912151934.nBFJYLlW016130@zion.cs.uiuc.edu> <4B27E5F4.6070104@gmail.com> Message-ID: On Dec 15, 2009, at 11:39 AM, T?r?k Edwin wrote: >> >> URL: http://llvm.org/viewvc/llvm-project?rev=91444&view=rev >> Log: >> Fix GetConstantStringInfo to not look into MDString (it works on >> real data, not metadata) and fix DbgInfoPrinter to not abuse >> GetConstantStringInfo. >> >> > > Thanks. The old way of representing debug info is gone now right? I'm not sure what you mean? -Chris From edwintorok at gmail.com Tue Dec 15 13:47:54 2009 From: edwintorok at gmail.com (=?ISO-8859-1?Q?T=F6r=F6k_Edwin?=) Date: Tue, 15 Dec 2009 21:47:54 +0200 Subject: [llvm-commits] [llvm] r91444 - in /llvm/trunk/lib/Analysis: DbgInfoPrinter.cpp ValueTracking.cpp In-Reply-To: References: <200912151934.nBFJYLlW016130@zion.cs.uiuc.edu> <4B27E5F4.6070104@gmail.com> Message-ID: <4B27E7EA.5030005@gmail.com> On 2009-12-15 21:44, Chris Lattner wrote: > > On Dec 15, 2009, at 11:39 AM, T?r?k Edwin wrote: > >>> >>> URL: http://llvm.org/viewvc/llvm-project?rev=91444&view=rev >>> Log: >>> Fix GetConstantStringInfo to not look into MDString (it works on >>> real data, not metadata) and fix DbgInfoPrinter to not abuse >>> GetConstantStringInfo. >>> >>> >> >> Thanks. The old way of representing debug info is gone now right? > > I'm not sure what you mean? Debuginfo is only represented using metadata now, and the old way of representing debuginfo (not using metadata) is not supported at all now. Am I right? DbgInfoPrinter calling GetConstantStringInfo was probably the last piece of the old way. Best regards, --Edwin From dpatel at apple.com Tue Dec 15 14:18:58 2009 From: dpatel at apple.com (Devang Patel) Date: Tue, 15 Dec 2009 12:18:58 -0800 Subject: [llvm-commits] [llvm] r91444 - in /llvm/trunk/lib/Analysis: DbgInfoPrinter.cpp ValueTracking.cpp In-Reply-To: <4B27E7EA.5030005@gmail.com> References: <200912151934.nBFJYLlW016130@zion.cs.uiuc.edu> <4B27E5F4.6070104@gmail.com> <4B27E7EA.5030005@gmail.com> Message-ID: <6F41F94E-671C-435E-A1C2-089B343422C8@apple.com> On Dec 15, 2009, at 11:47 AM, T?r?k Edwin wrote: > On 2009-12-15 21:44, Chris Lattner wrote: >> >> On Dec 15, 2009, at 11:39 AM, T?r?k Edwin wrote: >> >>>> >>>> URL: http://llvm.org/viewvc/llvm-project?rev=91444&view=rev >>>> Log: >>>> Fix GetConstantStringInfo to not look into MDString (it works on >>>> real data, not metadata) and fix DbgInfoPrinter to not abuse >>>> GetConstantStringInfo. >>>> >>>> >>> >>> Thanks. The old way of representing debug info is gone now right? >> >> I'm not sure what you mean? > > Debuginfo is only represented using metadata now, and the old > way of representing debuginfo (not using metadata) is not supported at > all now. > Am I right? Yes. - Devang From gohman at apple.com Tue Dec 15 14:21:45 2009 From: gohman at apple.com (Dan Gohman) Date: Tue, 15 Dec 2009 20:21:45 -0000 Subject: [llvm-commits] [llvm] r91448 - in /llvm/trunk/utils/TableGen: CodeEmitterGen.cpp CodeEmitterGen.h Message-ID: <200912152021.nBFKLjPL019720@zion.cs.uiuc.edu> Author: djg Date: Tue Dec 15 14:21:44 2009 New Revision: 91448 URL: http://llvm.org/viewvc/llvm-project?rev=91448&view=rev Log: Revert 90628, which was incorrect. Modified: llvm/trunk/utils/TableGen/CodeEmitterGen.cpp llvm/trunk/utils/TableGen/CodeEmitterGen.h Modified: llvm/trunk/utils/TableGen/CodeEmitterGen.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/utils/TableGen/CodeEmitterGen.cpp?rev=91448&r1=91447&r2=91448&view=diff ============================================================================== --- llvm/trunk/utils/TableGen/CodeEmitterGen.cpp (original) +++ llvm/trunk/utils/TableGen/CodeEmitterGen.cpp Tue Dec 15 14:21:44 2009 @@ -61,11 +61,14 @@ // If the VarBitInit at position 'bit' matches the specified variable then // return the variable bit position. Otherwise return -1. -int CodeEmitterGen::getVariableBit(const Init *VarVal, +int CodeEmitterGen::getVariableBit(const std::string &VarName, BitsInit *BI, int bit) { if (VarBitInit *VBI = dynamic_cast(BI->getBit(bit))) { TypedInit *TI = VBI->getVariable(); - if (TI == VarVal) return VBI->getBitNum(); + + if (VarInit *VI = dynamic_cast(TI)) { + if (VI->getName() == VarName) return VBI->getBitNum(); + } } return -1; @@ -159,11 +162,11 @@ if (!Vals[i].getPrefix() && !Vals[i].getValue()->isComplete()) { // Is the operand continuous? If so, we can just mask and OR it in // instead of doing it bit-by-bit, saving a lot in runtime cost. - const Init *VarVal = Vals[i].getValue(); + const std::string &VarName = Vals[i].getName(); bool gotOp = false; for (int bit = BI->getNumBits()-1; bit >= 0; ) { - int varBit = getVariableBit(VarVal, BI, bit); + int varBit = getVariableBit(VarName, BI, bit); if (varBit == -1) { --bit; @@ -173,7 +176,7 @@ int N = 1; for (--bit; bit >= 0;) { - varBit = getVariableBit(VarVal, BI, bit); + varBit = getVariableBit(VarName, BI, bit); if (varBit == -1 || varBit != (beginVarBit - N)) break; ++N; --bit; @@ -185,7 +188,7 @@ while (CGI.isFlatOperandNotEmitted(op)) ++op; - Case += " // op: " + Vals[i].getName() + "\n" + Case += " // op: " + VarName + "\n" + " op = getMachineOpValue(MI, MI.getOperand(" + utostr(op++) + "));\n"; gotOp = true; Modified: llvm/trunk/utils/TableGen/CodeEmitterGen.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/utils/TableGen/CodeEmitterGen.h?rev=91448&r1=91447&r2=91448&view=diff ============================================================================== --- llvm/trunk/utils/TableGen/CodeEmitterGen.h (original) +++ llvm/trunk/utils/TableGen/CodeEmitterGen.h Tue Dec 15 14:21:44 2009 @@ -23,7 +23,6 @@ class RecordVal; class BitsInit; -struct Init; class CodeEmitterGen : public TableGenBackend { RecordKeeper &Records; @@ -36,7 +35,7 @@ void emitMachineOpEmitter(raw_ostream &o, const std::string &Namespace); void emitGetValueBit(raw_ostream &o, const std::string &Namespace); void reverseBits(std::vector &Insts); - int getVariableBit(const Init *VarVal, BitsInit *BI, int bit); + int getVariableBit(const std::string &VarName, BitsInit *BI, int bit); }; } // End llvm namespace From bob.wilson at apple.com Tue Dec 15 15:09:00 2009 From: bob.wilson at apple.com (Bob Wilson) Date: Tue, 15 Dec 2009 21:09:00 -0000 Subject: [llvm-commits] [llvm-gcc-4.2] r91452 - /llvm-gcc-4.2/trunk/gcc/config/arm/llvm-arm-target.h Message-ID: <200912152109.nBFL907l021380@zion.cs.uiuc.edu> Author: bwilson Date: Tue Dec 15 15:09:00 2009 New Revision: 91452 URL: http://llvm.org/viewvc/llvm-project?rev=91452&view=rev Log: ARM vectors bigger than 128bits cannot be returned in registers. Radar 7387206. Modified: llvm-gcc-4.2/trunk/gcc/config/arm/llvm-arm-target.h Modified: llvm-gcc-4.2/trunk/gcc/config/arm/llvm-arm-target.h URL: http://llvm.org/viewvc/llvm-project/llvm-gcc-4.2/trunk/gcc/config/arm/llvm-arm-target.h?rev=91452&r1=91451&r2=91452&view=diff ============================================================================== --- llvm-gcc-4.2/trunk/gcc/config/arm/llvm-arm-target.h (original) +++ llvm-gcc-4.2/trunk/gcc/config/arm/llvm-arm-target.h Tue Dec 15 15:09:00 2009 @@ -82,6 +82,10 @@ #define LLVM_SHOULD_NOT_USE_SHADOW_RETURN(X, CC) \ llvm_arm_should_pass_or_return_aggregate_in_regs((X), (CC)) +/* Vectors bigger than 128 are returned using sret. */ +#define LLVM_SHOULD_RETURN_VECTOR_AS_SHADOW(X, isBuiltin) \ + (TREE_INT_CST_LOW(TYPE_SIZE(X)) > 128) + #endif /* LLVM_ABI_H */ #endif /* ENABLE_LLVM */ /* LLVM LOCAL end (ENTIRE FILE!) */ From bob.wilson at apple.com Tue Dec 15 15:19:09 2009 From: bob.wilson at apple.com (Bob Wilson) Date: Tue, 15 Dec 2009 21:19:09 -0000 Subject: [llvm-commits] [test-suite] r91454 - /test-suite/trunk/SingleSource/Benchmarks/Misc/lowercase.c Message-ID: <200912152119.nBFLJ9mY021787@zion.cs.uiuc.edu> Author: bwilson Date: Tue Dec 15 15:19:09 2009 New Revision: 91454 URL: http://llvm.org/viewvc/llvm-project?rev=91454&view=rev Log: Fix this test to avoid using . Modified: test-suite/trunk/SingleSource/Benchmarks/Misc/lowercase.c Modified: test-suite/trunk/SingleSource/Benchmarks/Misc/lowercase.c URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/SingleSource/Benchmarks/Misc/lowercase.c?rev=91454&r1=91453&r2=91454&view=diff ============================================================================== --- test-suite/trunk/SingleSource/Benchmarks/Misc/lowercase.c (original) +++ test-suite/trunk/SingleSource/Benchmarks/Misc/lowercase.c Tue Dec 15 15:19:09 2009 @@ -1,6 +1,7 @@ #include #include -#include + +void *malloc(size_t); typedef unsigned short UChar; From bob.wilson at apple.com Tue Dec 15 15:43:20 2009 From: bob.wilson at apple.com (Bob Wilson) Date: Tue, 15 Dec 2009 21:43:20 -0000 Subject: [llvm-commits] [test-suite] r91456 - /test-suite/trunk/External/Makefile Message-ID: <200912152143.nBFLhKeX022615@zion.cs.uiuc.edu> Author: bwilson Date: Tue Dec 15 15:43:19 2009 New Revision: 91456 URL: http://llvm.org/viewvc/llvm-project?rev=91456&view=rev Log: Only run Nurbs on x86 and x86_64 architectures. Modified: test-suite/trunk/External/Makefile Modified: test-suite/trunk/External/Makefile URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/External/Makefile?rev=91456&r1=91455&r2=91456&view=diff ============================================================================== --- test-suite/trunk/External/Makefile (original) +++ test-suite/trunk/External/Makefile Tue Dec 15 15:43:19 2009 @@ -8,7 +8,12 @@ # # Create the list of directories to compile # -PARALLEL_DIRS := SPEC Povray Namd FPGrowth BoxedSim Nurbs HMMER +PARALLEL_DIRS := SPEC Povray Namd FPGrowth BoxedSim HMMER + +ifneq (,$(findstring x86,$(ARCH))) +# Nurbs uses SSE and only works on x86 and x86_64. +PARALLEL_DIRS += Nurbs +endif ifndef USE_POVRAY PARALLEL_DIRS := $(filter-out Povray, $(PARALLEL_DIRS)) From daniel at zuster.org Tue Dec 15 16:00:37 2009 From: daniel at zuster.org (Daniel Dunbar) Date: Tue, 15 Dec 2009 22:00:37 -0000 Subject: [llvm-commits] [llvm] r91458 - /llvm/trunk/utils/lit/TestFormats.py Message-ID: <200912152200.nBFM0b7U023223@zion.cs.uiuc.edu> Author: ddunbar Date: Tue Dec 15 16:00:37 2009 New Revision: 91458 URL: http://llvm.org/viewvc/llvm-project?rev=91458&view=rev Log: lit: Improve error when gtest discovery fails. Modified: llvm/trunk/utils/lit/TestFormats.py Modified: llvm/trunk/utils/lit/TestFormats.py URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/utils/lit/TestFormats.py?rev=91458&r1=91457&r2=91458&view=diff ============================================================================== --- llvm/trunk/utils/lit/TestFormats.py (original) +++ llvm/trunk/utils/lit/TestFormats.py Tue Dec 15 16:00:37 2009 @@ -9,12 +9,17 @@ self.test_sub_dir = str(test_sub_dir) self.test_suffix = str(test_suffix) - def getGTestTests(self, path): + def getGTestTests(self, path, litConfig): """getGTestTests(path) - [name] Return the tests available in gtest executable.""" - lines = Util.capture([path, '--gtest_list_tests']).split('\n') + try: + lines = Util.capture([path, '--gtest_list_tests']).split('\n') + except: + litConfig.error("unable to discover google-tests in %r" % path) + raise StopIteration + nested_tests = [] for ln in lines: if not ln.strip(): @@ -47,7 +52,7 @@ execpath = os.path.join(filepath, subfilename) # Discover the tests in this executable. - for name in self.getGTestTests(execpath): + for name in self.getGTestTests(execpath, litConfig): testPath = path_in_suite + (filename, subfilename, name) yield Test.Test(testSuite, testPath, localConfig) From bob.wilson at apple.com Tue Dec 15 16:00:51 2009 From: bob.wilson at apple.com (Bob Wilson) Date: Tue, 15 Dec 2009 22:00:51 -0000 Subject: [llvm-commits] [llvm] r91459 - in /llvm/trunk: lib/Transforms/Scalar/ScalarReplAggregates.cpp test/Transforms/ScalarRepl/2009-12-11-NeonTypes.ll Message-ID: <200912152200.nBFM0qqN023245@zion.cs.uiuc.edu> Author: bwilson Date: Tue Dec 15 16:00:51 2009 New Revision: 91459 URL: http://llvm.org/viewvc/llvm-project?rev=91459&view=rev Log: Reapply 91184 with fixes and an addition to the testcase to cover the problem found last time. Instead of trying to modify the IR while iterating over it, I've change it to keep a list of WeakVH references to dead instructions, and then delete those instructions later. I also added some special case code to detect and handle the situation when both operands of a memcpy intrinsic are referencing the same alloca. Added: llvm/trunk/test/Transforms/ScalarRepl/2009-12-11-NeonTypes.ll Modified: llvm/trunk/lib/Transforms/Scalar/ScalarReplAggregates.cpp Modified: llvm/trunk/lib/Transforms/Scalar/ScalarReplAggregates.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/ScalarReplAggregates.cpp?rev=91459&r1=91458&r2=91459&view=diff ============================================================================== --- llvm/trunk/lib/Transforms/Scalar/ScalarReplAggregates.cpp (original) +++ llvm/trunk/lib/Transforms/Scalar/ScalarReplAggregates.cpp Tue Dec 15 16:00:51 2009 @@ -74,6 +74,10 @@ private: TargetData *TD; + /// DeadInsts - Keep track of instructions we have made dead, so that + /// we can remove them after we are done working. + SmallVector DeadInsts; + /// AllocaInfo - When analyzing uses of an alloca instruction, this captures /// information about the uses. All these fields are initialized to false /// and set to true when something is learned. @@ -102,25 +106,30 @@ int isSafeAllocaToScalarRepl(AllocaInst *AI); - void isSafeUseOfAllocation(Instruction *User, AllocaInst *AI, - AllocaInfo &Info); - void isSafeElementUse(Value *Ptr, bool isFirstElt, AllocaInst *AI, - AllocaInfo &Info); - void isSafeMemIntrinsicOnAllocation(MemIntrinsic *MI, AllocaInst *AI, - unsigned OpNo, AllocaInfo &Info); - void isSafeUseOfBitCastedAllocation(BitCastInst *User, AllocaInst *AI, - AllocaInfo &Info); + void isSafeForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset, + uint64_t ArrayOffset, AllocaInfo &Info); + void isSafeGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t &Offset, + uint64_t &ArrayOffset, AllocaInfo &Info); + void isSafeMemAccess(AllocaInst *AI, uint64_t Offset, uint64_t ArrayOffset, + uint64_t MemSize, const Type *MemOpType, bool isStore, + AllocaInfo &Info); + bool TypeHasComponent(const Type *T, uint64_t Offset, uint64_t Size); + unsigned FindElementAndOffset(const Type *&T, uint64_t &Offset); void DoScalarReplacement(AllocaInst *AI, std::vector &WorkList); + void DeleteDeadInstructions(); void CleanupGEP(GetElementPtrInst *GEP); - void CleanupAllocaUsers(AllocaInst *AI); + void CleanupAllocaUsers(Value *V); AllocaInst *AddNewAlloca(Function &F, const Type *Ty, AllocaInst *Base); - void RewriteBitCastUserOfAlloca(Instruction *BCInst, AllocaInst *AI, - SmallVector &NewElts); - - void RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *BCInst, + void RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset, + SmallVector &NewElts); + void RewriteBitCast(BitCastInst *BC, AllocaInst *AI, uint64_t Offset, + SmallVector &NewElts); + void RewriteGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t Offset, + SmallVector &NewElts); + void RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst, AllocaInst *AI, SmallVector &NewElts); void RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI, @@ -360,176 +369,37 @@ } } - // Now that we have created the alloca instructions that we want to use, - // expand the getelementptr instructions to use them. - while (!AI->use_empty()) { - Instruction *User = cast(AI->use_back()); - if (BitCastInst *BCInst = dyn_cast(User)) { - RewriteBitCastUserOfAlloca(BCInst, AI, ElementAllocas); - BCInst->eraseFromParent(); - continue; - } - - // Replace: - // %res = load { i32, i32 }* %alloc - // with: - // %load.0 = load i32* %alloc.0 - // %insert.0 insertvalue { i32, i32 } zeroinitializer, i32 %load.0, 0 - // %load.1 = load i32* %alloc.1 - // %insert = insertvalue { i32, i32 } %insert.0, i32 %load.1, 1 - // (Also works for arrays instead of structs) - if (LoadInst *LI = dyn_cast(User)) { - Value *Insert = UndefValue::get(LI->getType()); - for (unsigned i = 0, e = ElementAllocas.size(); i != e; ++i) { - Value *Load = new LoadInst(ElementAllocas[i], "load", LI); - Insert = InsertValueInst::Create(Insert, Load, i, "insert", LI); - } - LI->replaceAllUsesWith(Insert); - LI->eraseFromParent(); - continue; - } + // Now that we have created the new alloca instructions, rewrite all the + // uses of the old alloca. + DeadInsts.push_back(AI); + RewriteForScalarRepl(AI, AI, 0, ElementAllocas); - // Replace: - // store { i32, i32 } %val, { i32, i32 }* %alloc - // with: - // %val.0 = extractvalue { i32, i32 } %val, 0 - // store i32 %val.0, i32* %alloc.0 - // %val.1 = extractvalue { i32, i32 } %val, 1 - // store i32 %val.1, i32* %alloc.1 - // (Also works for arrays instead of structs) - if (StoreInst *SI = dyn_cast(User)) { - Value *Val = SI->getOperand(0); - for (unsigned i = 0, e = ElementAllocas.size(); i != e; ++i) { - Value *Extract = ExtractValueInst::Create(Val, i, Val->getName(), SI); - new StoreInst(Extract, ElementAllocas[i], SI); - } - SI->eraseFromParent(); - continue; - } - - GetElementPtrInst *GEPI = cast(User); - // We now know that the GEP is of the form: GEP , 0, - unsigned Idx = - (unsigned)cast(GEPI->getOperand(2))->getZExtValue(); - - assert(Idx < ElementAllocas.size() && "Index out of range?"); - AllocaInst *AllocaToUse = ElementAllocas[Idx]; - - Value *RepValue; - if (GEPI->getNumOperands() == 3) { - // Do not insert a new getelementptr instruction with zero indices, only - // to have it optimized out later. - RepValue = AllocaToUse; - } else { - // We are indexing deeply into the structure, so we still need a - // getelement ptr instruction to finish the indexing. This may be - // expanded itself once the worklist is rerun. - // - SmallVector NewArgs; - NewArgs.push_back(Constant::getNullValue( - Type::getInt32Ty(AI->getContext()))); - NewArgs.append(GEPI->op_begin()+3, GEPI->op_end()); - RepValue = GetElementPtrInst::Create(AllocaToUse, NewArgs.begin(), - NewArgs.end(), "", GEPI); - RepValue->takeName(GEPI); - } - - // If this GEP is to the start of the aggregate, check for memcpys. - if (Idx == 0 && GEPI->hasAllZeroIndices()) - RewriteBitCastUserOfAlloca(GEPI, AI, ElementAllocas); - - // Move all of the users over to the new GEP. - GEPI->replaceAllUsesWith(RepValue); - // Delete the old GEP - GEPI->eraseFromParent(); - } + // Now erase any instructions that were made dead while rewriting the alloca. + DeleteDeadInstructions(); - // Finally, delete the Alloca instruction - AI->eraseFromParent(); NumReplaced++; } -/// isSafeElementUse - Check to see if this use is an allowed use for a -/// getelementptr instruction of an array aggregate allocation. isFirstElt -/// indicates whether Ptr is known to the start of the aggregate. -void SROA::isSafeElementUse(Value *Ptr, bool isFirstElt, AllocaInst *AI, - AllocaInfo &Info) { - for (Value::use_iterator I = Ptr->use_begin(), E = Ptr->use_end(); - I != E; ++I) { - Instruction *User = cast(*I); - switch (User->getOpcode()) { - case Instruction::Load: break; - case Instruction::Store: - // Store is ok if storing INTO the pointer, not storing the pointer - if (User->getOperand(0) == Ptr) return MarkUnsafe(Info); - break; - case Instruction::GetElementPtr: { - GetElementPtrInst *GEP = cast(User); - bool AreAllZeroIndices = isFirstElt; - if (GEP->getNumOperands() > 1 && - (!isa(GEP->getOperand(1)) || - !cast(GEP->getOperand(1))->isZero())) - // Using pointer arithmetic to navigate the array. - return MarkUnsafe(Info); - - // Verify that any array subscripts are in range. - for (gep_type_iterator GEPIt = gep_type_begin(GEP), - E = gep_type_end(GEP); GEPIt != E; ++GEPIt) { - // Ignore struct elements, no extra checking needed for these. - if (isa(*GEPIt)) - continue; - - // This GEP indexes an array. Verify that this is an in-range - // constant integer. Specifically, consider A[0][i]. We cannot know that - // the user isn't doing invalid things like allowing i to index an - // out-of-range subscript that accesses A[1]. Because of this, we have - // to reject SROA of any accesses into structs where any of the - // components are variables. - ConstantInt *IdxVal = dyn_cast(GEPIt.getOperand()); - if (!IdxVal) return MarkUnsafe(Info); - - // Are all indices still zero? - AreAllZeroIndices &= IdxVal->isZero(); - - if (const ArrayType *AT = dyn_cast(*GEPIt)) { - if (IdxVal->getZExtValue() >= AT->getNumElements()) - return MarkUnsafe(Info); - } else if (const VectorType *VT = dyn_cast(*GEPIt)) { - if (IdxVal->getZExtValue() >= VT->getNumElements()) - return MarkUnsafe(Info); - } +/// DeleteDeadInstructions - Erase instructions on the DeadInstrs list, +/// recursively including all their operands that become trivially dead. +void SROA::DeleteDeadInstructions() { + while (!DeadInsts.empty()) { + Instruction *I = dyn_cast_or_null(DeadInsts.pop_back_val()); + if (I == 0) + continue; + + for (User::op_iterator OI = I->op_begin(), E = I->op_end(); OI != E; ++OI) + if (Instruction *U = dyn_cast(*OI)) { + // Zero out the operand and see if it becomes trivially dead. + *OI = 0; + if (isInstructionTriviallyDead(U)) + DeadInsts.push_back(U); } - - isSafeElementUse(GEP, AreAllZeroIndices, AI, Info); - if (Info.isUnsafe) return; - break; - } - case Instruction::BitCast: - if (isFirstElt) { - isSafeUseOfBitCastedAllocation(cast(User), AI, Info); - if (Info.isUnsafe) return; - break; - } - DEBUG(errs() << " Transformation preventing inst: " << *User << '\n'); - return MarkUnsafe(Info); - case Instruction::Call: - if (MemIntrinsic *MI = dyn_cast(User)) { - if (isFirstElt) { - isSafeMemIntrinsicOnAllocation(MI, AI, I.getOperandNo(), Info); - if (Info.isUnsafe) return; - break; - } - } - DEBUG(errs() << " Transformation preventing inst: " << *User << '\n'); - return MarkUnsafe(Info); - default: - DEBUG(errs() << " Transformation preventing inst: " << *User << '\n'); - return MarkUnsafe(Info); - } + + I->eraseFromParent(); } - return; // All users look ok :) } - + /// AllUsersAreLoads - Return true if all users of this value are loads. static bool AllUsersAreLoads(Value *Ptr) { for (Value::use_iterator I = Ptr->use_begin(), E = Ptr->use_end(); @@ -539,72 +409,116 @@ return true; } -/// isSafeUseOfAllocation - Check if this user is an allowed use for an -/// aggregate allocation. -void SROA::isSafeUseOfAllocation(Instruction *User, AllocaInst *AI, - AllocaInfo &Info) { - if (BitCastInst *C = dyn_cast(User)) - return isSafeUseOfBitCastedAllocation(C, AI, Info); - - if (LoadInst *LI = dyn_cast(User)) - if (!LI->isVolatile()) - return;// Loads (returning a first class aggregrate) are always rewritable - - if (StoreInst *SI = dyn_cast(User)) - if (!SI->isVolatile() && SI->getOperand(0) != AI) - return;// Store is ok if storing INTO the pointer, not storing the pointer - - GetElementPtrInst *GEPI = dyn_cast(User); - if (GEPI == 0) - return MarkUnsafe(Info); - - gep_type_iterator I = gep_type_begin(GEPI), E = gep_type_end(GEPI); +/// isSafeForScalarRepl - Check if instruction I is a safe use with regard to +/// performing scalar replacement of alloca AI. The results are flagged in +/// the Info parameter. Offset and ArrayOffset indicate the position within +/// AI that is referenced by this instruction. +void SROA::isSafeForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset, + uint64_t ArrayOffset, AllocaInfo &Info) { + for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI!=E; ++UI) { + Instruction *User = cast(*UI); - // The GEP is not safe to transform if not of the form "GEP , 0, ". - if (I == E || - I.getOperand() != Constant::getNullValue(I.getOperand()->getType())) { - return MarkUnsafe(Info); + if (BitCastInst *BC = dyn_cast(User)) { + isSafeForScalarRepl(BC, AI, Offset, ArrayOffset, Info); + } else if (GetElementPtrInst *GEPI = dyn_cast(User)) { + uint64_t GEPArrayOffset = ArrayOffset; + uint64_t GEPOffset = Offset; + isSafeGEP(GEPI, AI, GEPOffset, GEPArrayOffset, Info); + if (!Info.isUnsafe) + isSafeForScalarRepl(GEPI, AI, GEPOffset, GEPArrayOffset, Info); + } else if (MemIntrinsic *MI = dyn_cast(UI)) { + ConstantInt *Length = dyn_cast(MI->getLength()); + if (Length) + isSafeMemAccess(AI, Offset, ArrayOffset, Length->getZExtValue(), 0, + UI.getOperandNo() == 1, Info); + else + MarkUnsafe(Info); + } else if (LoadInst *LI = dyn_cast(User)) { + if (!LI->isVolatile()) { + const Type *LIType = LI->getType(); + isSafeMemAccess(AI, Offset, ArrayOffset, TD->getTypeAllocSize(LIType), + LIType, false, Info); + } else + MarkUnsafe(Info); + } else if (StoreInst *SI = dyn_cast(User)) { + // Store is ok if storing INTO the pointer, not storing the pointer + if (!SI->isVolatile() && SI->getOperand(0) != I) { + const Type *SIType = SI->getOperand(0)->getType(); + isSafeMemAccess(AI, Offset, ArrayOffset, TD->getTypeAllocSize(SIType), + SIType, true, Info); + } else + MarkUnsafe(Info); + } else if (isa(UI)) { + // If one user is DbgInfoIntrinsic then check if all users are + // DbgInfoIntrinsics. + if (OnlyUsedByDbgInfoIntrinsics(I)) { + Info.needsCleanup = true; + return; + } + MarkUnsafe(Info); + } else { + DEBUG(errs() << " Transformation preventing inst: " << *User << '\n'); + MarkUnsafe(Info); + } + if (Info.isUnsafe) return; } +} - ++I; - if (I == E) return MarkUnsafe(Info); // ran out of GEP indices?? +/// isSafeGEP - Check if a GEP instruction can be handled for scalar +/// replacement. It is safe when all the indices are constant, in-bounds +/// references, and when the resulting offset corresponds to an element within +/// the alloca type. The results are flagged in the Info parameter. Upon +/// return, Offset is adjusted as specified by the GEP indices. For the +/// special case of a variable index to a 2-element array, ArrayOffset is set +/// to the array element size. +void SROA::isSafeGEP(GetElementPtrInst *GEPI, AllocaInst *AI, + uint64_t &Offset, uint64_t &ArrayOffset, + AllocaInfo &Info) { + gep_type_iterator GEPIt = gep_type_begin(GEPI), E = gep_type_end(GEPI); + if (GEPIt == E) + return; + + // The first GEP index must be zero. + if (!isa(GEPIt.getOperand()) || + !cast(GEPIt.getOperand())->isZero()) + return MarkUnsafe(Info); + if (++GEPIt == E) + return; - bool IsAllZeroIndices = true; - // If the first index is a non-constant index into an array, see if we can // handle it as a special case. - if (const ArrayType *AT = dyn_cast(*I)) { - if (!isa(I.getOperand())) { - IsAllZeroIndices = 0; - uint64_t NumElements = AT->getNumElements(); - - // If this is an array index and the index is not constant, we cannot - // promote... that is unless the array has exactly one or two elements in - // it, in which case we CAN promote it, but we have to canonicalize this - // out if this is the only problem. - if ((NumElements == 1 || NumElements == 2) && - AllUsersAreLoads(GEPI)) { + const Type *ArrayEltTy = 0; + if (ArrayOffset == 0 && Offset == 0) { + if (const ArrayType *AT = dyn_cast(*GEPIt)) { + if (!isa(GEPIt.getOperand())) { + uint64_t NumElements = AT->getNumElements(); + + // If this is an array index and the index is not constant, we cannot + // promote... that is unless the array has exactly one or two elements + // in it, in which case we CAN promote it, but we have to canonicalize + // this out if this is the only problem. + if ((NumElements != 1 && NumElements != 2) || !AllUsersAreLoads(GEPI)) + return MarkUnsafe(Info); Info.needsCleanup = true; - return; // Canonicalization required! + ArrayOffset = TD->getTypeAllocSizeInBits(AT->getElementType()); + ArrayEltTy = AT->getElementType(); + ++GEPIt; } - return MarkUnsafe(Info); } } - + // Walk through the GEP type indices, checking the types that this indexes // into. - for (; I != E; ++I) { + for (; GEPIt != E; ++GEPIt) { // Ignore struct elements, no extra checking needed for these. - if (isa(*I)) + if (isa(*GEPIt)) continue; - - ConstantInt *IdxVal = dyn_cast(I.getOperand()); - if (!IdxVal) return MarkUnsafe(Info); - // Are all indices still zero? - IsAllZeroIndices &= IdxVal->isZero(); - - if (const ArrayType *AT = dyn_cast(*I)) { + ConstantInt *IdxVal = dyn_cast(GEPIt.getOperand()); + if (!IdxVal) + return MarkUnsafe(Info); + + if (const ArrayType *AT = dyn_cast(*GEPIt)) { // This GEP indexes an array. Verify that this is an in-range constant // integer. Specifically, consider A[0][i]. We cannot know that the user // isn't doing invalid things like allowing i to index an out-of-range @@ -612,147 +526,255 @@ // of any accesses into structs where any of the components are variables. if (IdxVal->getZExtValue() >= AT->getNumElements()) return MarkUnsafe(Info); - } else if (const VectorType *VT = dyn_cast(*I)) { + } else { + const VectorType *VT = dyn_cast(*GEPIt); + assert(VT && "unexpected type in GEP type iterator"); if (IdxVal->getZExtValue() >= VT->getNumElements()) return MarkUnsafe(Info); } } - - // If there are any non-simple uses of this getelementptr, make sure to reject - // them. - return isSafeElementUse(GEPI, IsAllZeroIndices, AI, Info); + + // All the indices are safe. Now compute the offset due to this GEP and + // check if the alloca has a component element at that offset. + if (ArrayOffset == 0) { + SmallVector Indices(GEPI->op_begin() + 1, GEPI->op_end()); + Offset += TD->getIndexedOffset(GEPI->getPointerOperandType(), + &Indices[0], Indices.size()); + } else { + // Both array elements have the same type, so it suffices to check one of + // them. Copy the GEP indices starting from the array index, but replace + // that variable index with a constant zero. + SmallVector Indices(GEPI->op_begin() + 2, GEPI->op_end()); + Indices[0] = Constant::getNullValue(Type::getInt32Ty(GEPI->getContext())); + const Type *ArrayEltPtr = PointerType::getUnqual(ArrayEltTy); + Offset += TD->getIndexedOffset(ArrayEltPtr, &Indices[0], Indices.size()); + } + if (!TypeHasComponent(AI->getAllocatedType(), Offset, 0)) + MarkUnsafe(Info); +} + +/// isSafeMemAccess - Check if a load/store/memcpy operates on the entire AI +/// alloca or has an offset and size that corresponds to a component element +/// within it. The offset checked here may have been formed from a GEP with a +/// pointer bitcasted to a different type. +void SROA::isSafeMemAccess(AllocaInst *AI, uint64_t Offset, + uint64_t ArrayOffset, uint64_t MemSize, + const Type *MemOpType, bool isStore, + AllocaInfo &Info) { + // Check if this is a load/store of the entire alloca. + if (Offset == 0 && ArrayOffset == 0 && + MemSize == TD->getTypeAllocSize(AI->getAllocatedType())) { + bool UsesAggregateType = (MemOpType == AI->getAllocatedType()); + // This is safe for MemIntrinsics (where MemOpType is 0), integer types + // (which are essentially the same as the MemIntrinsics, especially with + // regard to copying padding between elements), or references using the + // aggregate type of the alloca. + if (!MemOpType || isa(MemOpType) || UsesAggregateType) { + if (!UsesAggregateType) { + if (isStore) + Info.isMemCpyDst = true; + else + Info.isMemCpySrc = true; + } + return; + } + } + // Check if the offset/size correspond to a component within the alloca type. + const Type *T = AI->getAllocatedType(); + if (TypeHasComponent(T, Offset, MemSize) && + (ArrayOffset == 0 || TypeHasComponent(T, Offset + ArrayOffset, MemSize))) + return; + + return MarkUnsafe(Info); } -/// isSafeMemIntrinsicOnAllocation - Check if the specified memory -/// intrinsic can be promoted by SROA. At this point, we know that the operand -/// of the memintrinsic is a pointer to the beginning of the allocation. -void SROA::isSafeMemIntrinsicOnAllocation(MemIntrinsic *MI, AllocaInst *AI, - unsigned OpNo, AllocaInfo &Info) { - // If not constant length, give up. - ConstantInt *Length = dyn_cast(MI->getLength()); - if (!Length) return MarkUnsafe(Info); - - // If not the whole aggregate, give up. - if (Length->getZExtValue() != - TD->getTypeAllocSize(AI->getType()->getElementType())) - return MarkUnsafe(Info); - - // We only know about memcpy/memset/memmove. - if (!isa(MI)) - return MarkUnsafe(Info); - - // Otherwise, we can transform it. Determine whether this is a memcpy/set - // into or out of the aggregate. - if (OpNo == 1) - Info.isMemCpyDst = true; - else { - assert(OpNo == 2); - Info.isMemCpySrc = true; +/// TypeHasComponent - Return true if T has a component type with the +/// specified offset and size. If Size is zero, do not check the size. +bool SROA::TypeHasComponent(const Type *T, uint64_t Offset, uint64_t Size) { + const Type *EltTy; + uint64_t EltSize; + if (const StructType *ST = dyn_cast(T)) { + const StructLayout *Layout = TD->getStructLayout(ST); + unsigned EltIdx = Layout->getElementContainingOffset(Offset); + EltTy = ST->getContainedType(EltIdx); + EltSize = TD->getTypeAllocSize(EltTy); + Offset -= Layout->getElementOffset(EltIdx); + } else if (const ArrayType *AT = dyn_cast(T)) { + EltTy = AT->getElementType(); + EltSize = TD->getTypeAllocSize(EltTy); + Offset %= EltSize; + } else { + return false; } + if (Offset == 0 && (Size == 0 || EltSize == Size)) + return true; + // Check if the component spans multiple elements. + if (Offset + Size > EltSize) + return false; + return TypeHasComponent(EltTy, Offset, Size); } -/// isSafeUseOfBitCastedAllocation - Check if all users of this bitcast -/// from an alloca are safe for SROA of that alloca. -void SROA::isSafeUseOfBitCastedAllocation(BitCastInst *BC, AllocaInst *AI, - AllocaInfo &Info) { - for (Value::use_iterator UI = BC->use_begin(), E = BC->use_end(); - UI != E; ++UI) { - if (BitCastInst *BCU = dyn_cast(UI)) { - isSafeUseOfBitCastedAllocation(BCU, AI, Info); - } else if (MemIntrinsic *MI = dyn_cast(UI)) { - isSafeMemIntrinsicOnAllocation(MI, AI, UI.getOperandNo(), Info); - } else if (StoreInst *SI = dyn_cast(UI)) { - if (SI->isVolatile()) - return MarkUnsafe(Info); - - // If storing the entire alloca in one chunk through a bitcasted pointer - // to integer, we can transform it. This happens (for example) when you - // cast a {i32,i32}* to i64* and store through it. This is similar to the - // memcpy case and occurs in various "byval" cases and emulated memcpys. - if (isa(SI->getOperand(0)->getType()) && - TD->getTypeAllocSize(SI->getOperand(0)->getType()) == - TD->getTypeAllocSize(AI->getType()->getElementType())) { - Info.isMemCpyDst = true; - continue; - } - return MarkUnsafe(Info); - } else if (LoadInst *LI = dyn_cast(UI)) { - if (LI->isVolatile()) - return MarkUnsafe(Info); +/// RewriteForScalarRepl - Alloca AI is being split into NewElts, so rewrite +/// the instruction I, which references it, to use the separate elements. +/// Offset indicates the position within AI that is referenced by this +/// instruction. +void SROA::RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset, + SmallVector &NewElts) { + for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI!=E; ++UI) { + Instruction *User = cast(*UI); - // If loading the entire alloca in one chunk through a bitcasted pointer - // to integer, we can transform it. This happens (for example) when you - // cast a {i32,i32}* to i64* and load through it. This is similar to the - // memcpy case and occurs in various "byval" cases and emulated memcpys. - if (isa(LI->getType()) && - TD->getTypeAllocSize(LI->getType()) == - TD->getTypeAllocSize(AI->getType()->getElementType())) { - Info.isMemCpySrc = true; - continue; + if (BitCastInst *BC = dyn_cast(User)) { + RewriteBitCast(BC, AI, Offset, NewElts); + } else if (GetElementPtrInst *GEPI = dyn_cast(User)) { + RewriteGEP(GEPI, AI, Offset, NewElts); + } else if (MemIntrinsic *MI = dyn_cast(User)) { + ConstantInt *Length = dyn_cast(MI->getLength()); + uint64_t MemSize = Length->getZExtValue(); + if (Offset == 0 && + MemSize == TD->getTypeAllocSize(AI->getAllocatedType())) + RewriteMemIntrinUserOfAlloca(MI, I, AI, NewElts); + } else if (LoadInst *LI = dyn_cast(User)) { + const Type *LIType = LI->getType(); + if (LIType == AI->getAllocatedType()) { + // Replace: + // %res = load { i32, i32 }* %alloc + // with: + // %load.0 = load i32* %alloc.0 + // %insert.0 insertvalue { i32, i32 } zeroinitializer, i32 %load.0, 0 + // %load.1 = load i32* %alloc.1 + // %insert = insertvalue { i32, i32 } %insert.0, i32 %load.1, 1 + // (Also works for arrays instead of structs) + Value *Insert = UndefValue::get(LIType); + for (unsigned i = 0, e = NewElts.size(); i != e; ++i) { + Value *Load = new LoadInst(NewElts[i], "load", LI); + Insert = InsertValueInst::Create(Insert, Load, i, "insert", LI); + } + LI->replaceAllUsesWith(Insert); + DeadInsts.push_back(LI); + } else if (isa(LIType) && + TD->getTypeAllocSize(LIType) == + TD->getTypeAllocSize(AI->getAllocatedType())) { + // If this is a load of the entire alloca to an integer, rewrite it. + RewriteLoadUserOfWholeAlloca(LI, AI, NewElts); } - return MarkUnsafe(Info); - } else if (isa(UI)) { - // If one user is DbgInfoIntrinsic then check if all users are - // DbgInfoIntrinsics. - if (OnlyUsedByDbgInfoIntrinsics(BC)) { - Info.needsCleanup = true; - return; + } else if (StoreInst *SI = dyn_cast(User)) { + Value *Val = SI->getOperand(0); + const Type *SIType = Val->getType(); + if (SIType == AI->getAllocatedType()) { + // Replace: + // store { i32, i32 } %val, { i32, i32 }* %alloc + // with: + // %val.0 = extractvalue { i32, i32 } %val, 0 + // store i32 %val.0, i32* %alloc.0 + // %val.1 = extractvalue { i32, i32 } %val, 1 + // store i32 %val.1, i32* %alloc.1 + // (Also works for arrays instead of structs) + for (unsigned i = 0, e = NewElts.size(); i != e; ++i) { + Value *Extract = ExtractValueInst::Create(Val, i, Val->getName(), SI); + new StoreInst(Extract, NewElts[i], SI); + } + DeadInsts.push_back(SI); + } else if (isa(SIType) && + TD->getTypeAllocSize(SIType) == + TD->getTypeAllocSize(AI->getAllocatedType())) { + // If this is a store of the entire alloca from an integer, rewrite it. + RewriteStoreUserOfWholeAlloca(SI, AI, NewElts); } - else - MarkUnsafe(Info); } - else { - return MarkUnsafe(Info); - } - if (Info.isUnsafe) return; } } -/// RewriteBitCastUserOfAlloca - BCInst (transitively) bitcasts AI, or indexes -/// to its first element. Transform users of the cast to use the new values -/// instead. -void SROA::RewriteBitCastUserOfAlloca(Instruction *BCInst, AllocaInst *AI, - SmallVector &NewElts) { - Value::use_iterator UI = BCInst->use_begin(), UE = BCInst->use_end(); - while (UI != UE) { - Instruction *User = cast(*UI++); - if (BitCastInst *BCU = dyn_cast(User)) { - RewriteBitCastUserOfAlloca(BCU, AI, NewElts); - if (BCU->use_empty()) BCU->eraseFromParent(); - continue; - } +/// RewriteBitCast - Update a bitcast reference to the alloca being replaced +/// and recursively continue updating all of its uses. +void SROA::RewriteBitCast(BitCastInst *BC, AllocaInst *AI, uint64_t Offset, + SmallVector &NewElts) { + RewriteForScalarRepl(BC, AI, Offset, NewElts); + if (BC->getOperand(0) != AI) + return; - if (MemIntrinsic *MI = dyn_cast(User)) { - // This must be memcpy/memmove/memset of the entire aggregate. - // Split into one per element. - RewriteMemIntrinUserOfAlloca(MI, BCInst, AI, NewElts); - continue; - } - - if (StoreInst *SI = dyn_cast(User)) { - // If this is a store of the entire alloca from an integer, rewrite it. - RewriteStoreUserOfWholeAlloca(SI, AI, NewElts); - continue; - } + // The bitcast references the original alloca. Replace its uses with + // references to the first new element alloca. + Instruction *Val = NewElts[0]; + if (Val->getType() != BC->getDestTy()) { + Val = new BitCastInst(Val, BC->getDestTy(), "", BC); + Val->takeName(BC); + } + BC->replaceAllUsesWith(Val); + DeadInsts.push_back(BC); +} + +/// FindElementAndOffset - Return the index of the element containing Offset +/// within the specified type, which must be either a struct or an array. +/// Sets T to the type of the element and Offset to the offset within that +/// element. +unsigned SROA::FindElementAndOffset(const Type *&T, uint64_t &Offset) { + unsigned Idx = 0; + if (const StructType *ST = dyn_cast(T)) { + const StructLayout *Layout = TD->getStructLayout(ST); + Idx = Layout->getElementContainingOffset(Offset); + T = ST->getContainedType(Idx); + Offset -= Layout->getElementOffset(Idx); + } else { + const ArrayType *AT = dyn_cast(T); + assert(AT && "unexpected type for scalar replacement"); + T = AT->getElementType(); + uint64_t EltSize = TD->getTypeAllocSize(T); + Idx = (unsigned)(Offset / EltSize); + Offset -= Idx * EltSize; + } + return Idx; +} + +/// RewriteGEP - Check if this GEP instruction moves the pointer across +/// elements of the alloca that are being split apart, and if so, rewrite +/// the GEP to be relative to the new element. +void SROA::RewriteGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t Offset, + SmallVector &NewElts) { + uint64_t OldOffset = Offset; + SmallVector Indices(GEPI->op_begin() + 1, GEPI->op_end()); + Offset += TD->getIndexedOffset(GEPI->getPointerOperandType(), + &Indices[0], Indices.size()); + + RewriteForScalarRepl(GEPI, AI, Offset, NewElts); + + const Type *T = AI->getAllocatedType(); + unsigned OldIdx = FindElementAndOffset(T, OldOffset); + if (GEPI->getOperand(0) == AI) + OldIdx = ~0U; // Force the GEP to be rewritten. + + T = AI->getAllocatedType(); + uint64_t EltOffset = Offset; + unsigned Idx = FindElementAndOffset(T, EltOffset); + + // If this GEP does not move the pointer across elements of the alloca + // being split, then it does not needs to be rewritten. + if (Idx == OldIdx) + return; - if (LoadInst *LI = dyn_cast(User)) { - // If this is a load of the entire alloca to an integer, rewrite it. - RewriteLoadUserOfWholeAlloca(LI, AI, NewElts); - continue; - } - - // Otherwise it must be some other user of a gep of the first pointer. Just - // leave these alone. - continue; - } + const Type *i32Ty = Type::getInt32Ty(AI->getContext()); + SmallVector NewArgs; + NewArgs.push_back(Constant::getNullValue(i32Ty)); + while (EltOffset != 0) { + unsigned EltIdx = FindElementAndOffset(T, EltOffset); + NewArgs.push_back(ConstantInt::get(i32Ty, EltIdx)); + } + Instruction *Val = NewElts[Idx]; + if (NewArgs.size() > 1) { + Val = GetElementPtrInst::CreateInBounds(Val, NewArgs.begin(), + NewArgs.end(), "", GEPI); + Val->takeName(GEPI); + } + if (Val->getType() != GEPI->getType()) + Val = new BitCastInst(Val, GEPI->getType(), Val->getNameStr(), GEPI); + GEPI->replaceAllUsesWith(Val); + DeadInsts.push_back(GEPI); } /// RewriteMemIntrinUserOfAlloca - MI is a memcpy/memset/memmove from or to AI. /// Rewrite it to copy or set the elements of the scalarized memory. -void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *BCInst, +void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst, AllocaInst *AI, SmallVector &NewElts) { - // If this is a memcpy/memmove, construct the other pointer as the // appropriate type. The "Other" pointer is the pointer that goes to memory // that doesn't have anything to do with the alloca that we are promoting. For @@ -761,28 +783,41 @@ LLVMContext &Context = MI->getContext(); unsigned MemAlignment = MI->getAlignment(); if (MemTransferInst *MTI = dyn_cast(MI)) { // memmove/memcopy - if (BCInst == MTI->getRawDest()) + if (Inst == MTI->getRawDest()) OtherPtr = MTI->getRawSource(); else { - assert(BCInst == MTI->getRawSource()); + assert(Inst == MTI->getRawSource()); OtherPtr = MTI->getRawDest(); } } - // Keep track of the other intrinsic argument, so it can be removed if it - // is dead when the intrinsic is replaced. - Value *PossiblyDead = OtherPtr; - // If there is an other pointer, we want to convert it to the same pointer // type as AI has, so we can GEP through it safely. if (OtherPtr) { - // It is likely that OtherPtr is a bitcast, if so, remove it. - if (BitCastInst *BC = dyn_cast(OtherPtr)) - OtherPtr = BC->getOperand(0); - // All zero GEPs are effectively bitcasts. - if (GetElementPtrInst *GEP = dyn_cast(OtherPtr)) - if (GEP->hasAllZeroIndices()) - OtherPtr = GEP->getOperand(0); + + // Remove bitcasts and all-zero GEPs from OtherPtr. This is an + // optimization, but it's also required to detect the corner case where + // both pointer operands are referencing the same memory, and where + // OtherPtr may be a bitcast or GEP that currently being rewritten. (This + // function is only called for mem intrinsics that access the whole + // aggregate, so non-zero GEPs are not an issue here.) + while (1) { + if (BitCastInst *BC = dyn_cast(OtherPtr)) { + OtherPtr = BC->getOperand(0); + continue; + } + if (GetElementPtrInst *GEP = dyn_cast(OtherPtr)) { + // All zero GEPs are effectively bitcasts. + if (GEP->hasAllZeroIndices()) { + OtherPtr = GEP->getOperand(0); + continue; + } + } + break; + } + // If OtherPtr has already been rewritten, this intrinsic will be dead. + if (OtherPtr == NewElts[0]) + return; if (ConstantExpr *BCE = dyn_cast(OtherPtr)) if (BCE->getOpcode() == Instruction::BitCast) @@ -798,7 +833,7 @@ // Process each element of the aggregate. Value *TheFn = MI->getOperand(0); const Type *BytePtrTy = MI->getRawDest()->getType(); - bool SROADest = MI->getRawDest() == BCInst; + bool SROADest = MI->getRawDest() == Inst; Constant *Zero = Constant::getNullValue(Type::getInt32Ty(MI->getContext())); @@ -807,12 +842,15 @@ Value *OtherElt = 0; unsigned OtherEltAlign = MemAlignment; - if (OtherPtr) { + if (OtherPtr == AI) { + OtherElt = NewElts[i]; + OtherEltAlign = 0; + } else if (OtherPtr) { Value *Idx[2] = { Zero, ConstantInt::get(Type::getInt32Ty(MI->getContext()), i) }; - OtherElt = GetElementPtrInst::Create(OtherPtr, Idx, Idx + 2, + OtherElt = GetElementPtrInst::CreateInBounds(OtherPtr, Idx, Idx + 2, OtherPtr->getNameStr()+"."+Twine(i), - MI); + MI); uint64_t EltOffset; const PointerType *OtherPtrTy = cast(OtherPtr->getType()); if (const StructType *ST = @@ -924,9 +962,7 @@ CallInst::Create(TheFn, Ops, Ops + 4, "", MI); } } - MI->eraseFromParent(); - if (PossiblyDead) - RecursivelyDeleteTriviallyDeadInstructions(PossiblyDead); + DeadInsts.push_back(MI); } /// RewriteStoreUserOfWholeAlloca - We found a store of an integer that @@ -937,15 +973,9 @@ // Extract each element out of the integer according to its structure offset // and store the element value to the individual alloca. Value *SrcVal = SI->getOperand(0); - const Type *AllocaEltTy = AI->getType()->getElementType(); + const Type *AllocaEltTy = AI->getAllocatedType(); uint64_t AllocaSizeBits = TD->getTypeAllocSizeInBits(AllocaEltTy); - // If this isn't a store of an integer to the whole alloca, it may be a store - // to the first element. Just ignore the store in this case and normal SROA - // will handle it. - if (!isa(SrcVal->getType()) || - TD->getTypeAllocSizeInBits(SrcVal->getType()) != AllocaSizeBits) - return; // Handle tail padding by extending the operand if (TD->getTypeSizeInBits(SrcVal->getType()) != AllocaSizeBits) SrcVal = new ZExtInst(SrcVal, @@ -1050,7 +1080,7 @@ } } - SI->eraseFromParent(); + DeadInsts.push_back(SI); } /// RewriteLoadUserOfWholeAlloca - We found a load of the entire allocation to @@ -1059,16 +1089,9 @@ SmallVector &NewElts) { // Extract each element out of the NewElts according to its structure offset // and form the result value. - const Type *AllocaEltTy = AI->getType()->getElementType(); + const Type *AllocaEltTy = AI->getAllocatedType(); uint64_t AllocaSizeBits = TD->getTypeAllocSizeInBits(AllocaEltTy); - // If this isn't a load of the whole alloca to an integer, it may be a load - // of the first element. Just ignore the load in this case and normal SROA - // will handle it. - if (!isa(LI->getType()) || - TD->getTypeAllocSizeInBits(LI->getType()) != AllocaSizeBits) - return; - DEBUG(errs() << "PROMOTING LOAD OF WHOLE ALLOCA: " << *AI << '\n' << *LI << '\n'); @@ -1139,10 +1162,9 @@ ResultVal = new TruncInst(ResultVal, LI->getType(), "", LI); LI->replaceAllUsesWith(ResultVal); - LI->eraseFromParent(); + DeadInsts.push_back(LI); } - /// HasPadding - Return true if the specified type has any structure or /// alignment padding, false otherwise. static bool HasPadding(const Type *Ty, const TargetData &TD) { @@ -1192,14 +1214,10 @@ // the users are safe to transform. AllocaInfo Info; - for (Value::use_iterator I = AI->use_begin(), E = AI->use_end(); - I != E; ++I) { - isSafeUseOfAllocation(cast(*I), AI, Info); - if (Info.isUnsafe) { - DEBUG(errs() << "Cannot transform: " << *AI << "\n due to user: " - << **I << '\n'); - return 0; - } + isSafeForScalarRepl(AI, AI, 0, 0, Info); + if (Info.isUnsafe) { + DEBUG(errs() << "Cannot transform: " << *AI << '\n'); + return 0; } // Okay, we know all the users are promotable. If the aggregate is a memcpy @@ -1208,7 +1226,7 @@ // types, but may actually be used. In these cases, we refuse to promote the // struct. if (Info.isMemCpySrc && Info.isMemCpyDst && - HasPadding(AI->getType()->getElementType(), *TD)) + HasPadding(AI->getAllocatedType(), *TD)) return 0; // If we require cleanup, return 1, otherwise return 3. @@ -1245,15 +1263,15 @@ // Insert the new GEP instructions, which are properly indexed. SmallVector Indices(GEPI->op_begin()+1, GEPI->op_end()); Indices[1] = Constant::getNullValue(Type::getInt32Ty(GEPI->getContext())); - Value *ZeroIdx = GetElementPtrInst::Create(GEPI->getOperand(0), - Indices.begin(), - Indices.end(), - GEPI->getName()+".0", GEPI); + Value *ZeroIdx = GetElementPtrInst::CreateInBounds(GEPI->getOperand(0), + Indices.begin(), + Indices.end(), + GEPI->getName()+".0",GEPI); Indices[1] = ConstantInt::get(Type::getInt32Ty(GEPI->getContext()), 1); - Value *OneIdx = GetElementPtrInst::Create(GEPI->getOperand(0), - Indices.begin(), - Indices.end(), - GEPI->getName()+".1", GEPI); + Value *OneIdx = GetElementPtrInst::CreateInBounds(GEPI->getOperand(0), + Indices.begin(), + Indices.end(), + GEPI->getName()+".1", GEPI); // Replace all loads of the variable index GEP with loads from both // indexes and a select. while (!GEPI->use_empty()) { @@ -1264,22 +1282,24 @@ LI->replaceAllUsesWith(R); LI->eraseFromParent(); } - GEPI->eraseFromParent(); } - /// CleanupAllocaUsers - If SROA reported that it can promote the specified /// allocation, but only if cleaned up, perform the cleanups required. -void SROA::CleanupAllocaUsers(AllocaInst *AI) { +void SROA::CleanupAllocaUsers(Value *V) { // At this point, we know that the end result will be SROA'd and promoted, so // we can insert ugly code if required so long as sroa+mem2reg will clean it // up. - for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); + for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; ) { User *U = *UI++; - if (GetElementPtrInst *GEPI = dyn_cast(U)) + if (isa(U)) { + CleanupAllocaUsers(U); + } else if (GetElementPtrInst *GEPI = dyn_cast(U)) { CleanupGEP(GEPI); - else { + CleanupAllocaUsers(GEPI); + if (GEPI->use_empty()) GEPI->eraseFromParent(); + } else { Instruction *I = cast(U); SmallVector DbgInUses; if (!isa(I) && OnlyUsedByDbgInfoIntrinsics(I, &DbgInUses)) { @@ -1395,7 +1415,7 @@ // Compute the offset that this GEP adds to the pointer. SmallVector Indices(GEP->op_begin()+1, GEP->op_end()); - uint64_t GEPOffset = TD->getIndexedOffset(GEP->getOperand(0)->getType(), + uint64_t GEPOffset = TD->getIndexedOffset(GEP->getPointerOperandType(), &Indices[0], Indices.size()); // See if all uses can be converted. if (!CanConvertToScalar(GEP, IsNotTrivial, VecTy, SawVec,Offset+GEPOffset, @@ -1457,7 +1477,7 @@ if (GetElementPtrInst *GEP = dyn_cast(User)) { // Compute the offset that this GEP adds to the pointer. SmallVector Indices(GEP->op_begin()+1, GEP->op_end()); - uint64_t GEPOffset = TD->getIndexedOffset(GEP->getOperand(0)->getType(), + uint64_t GEPOffset = TD->getIndexedOffset(GEP->getPointerOperandType(), &Indices[0], Indices.size()); ConvertUsesToScalar(GEP, NewAI, Offset+GEPOffset*8); GEP->eraseFromParent(); Added: llvm/trunk/test/Transforms/ScalarRepl/2009-12-11-NeonTypes.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/ScalarRepl/2009-12-11-NeonTypes.ll?rev=91459&view=auto ============================================================================== --- llvm/trunk/test/Transforms/ScalarRepl/2009-12-11-NeonTypes.ll (added) +++ llvm/trunk/test/Transforms/ScalarRepl/2009-12-11-NeonTypes.ll Tue Dec 15 16:00:51 2009 @@ -0,0 +1,89 @@ +; RUN: opt < %s -scalarrepl -S | FileCheck %s +; Radar 7441282 + +target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32" +target triple = "thumbv7-apple-darwin10" + +%struct.__neon_int16x8x2_t = type { <8 x i16>, <8 x i16> } +%struct.int16x8_t = type { <8 x i16> } +%struct.int16x8x2_t = type { [2 x %struct.int16x8_t] } +%union..0anon = type { %struct.int16x8x2_t } + +define arm_apcscc void @test(<8 x i16> %tmp.0, %struct.int16x8x2_t* %dst) nounwind { +; CHECK: @test +; CHECK-NOT: alloca +; CHECK: "alloca point" +entry: + %tmp_addr = alloca %struct.int16x8_t ; <%struct.int16x8_t*> [#uses=3] + %dst_addr = alloca %struct.int16x8x2_t* ; <%struct.int16x8x2_t**> [#uses=2] + %__rv = alloca %union..0anon ; <%union..0anon*> [#uses=2] + %__bx = alloca %struct.int16x8_t ; <%struct.int16x8_t*> [#uses=2] + %__ax = alloca %struct.int16x8_t ; <%struct.int16x8_t*> [#uses=2] + %tmp2 = alloca %struct.int16x8x2_t ; <%struct.int16x8x2_t*> [#uses=2] + %0 = alloca %struct.int16x8x2_t ; <%struct.int16x8x2_t*> [#uses=2] + %"alloca point" = bitcast i32 0 to i32 ; [#uses=0] + %1 = getelementptr inbounds %struct.int16x8_t* %tmp_addr, i32 0, i32 0 ; <<8 x i16>*> [#uses=1] + store <8 x i16> %tmp.0, <8 x i16>* %1 + store %struct.int16x8x2_t* %dst, %struct.int16x8x2_t** %dst_addr + %2 = getelementptr inbounds %struct.int16x8_t* %__ax, i32 0, i32 0 ; <<8 x i16>*> [#uses=1] + %3 = getelementptr inbounds %struct.int16x8_t* %tmp_addr, i32 0, i32 0 ; <<8 x i16>*> [#uses=1] + %4 = load <8 x i16>* %3, align 16 ; <<8 x i16>> [#uses=1] + store <8 x i16> %4, <8 x i16>* %2, align 16 + %5 = getelementptr inbounds %struct.int16x8_t* %__bx, i32 0, i32 0 ; <<8 x i16>*> [#uses=1] + %6 = getelementptr inbounds %struct.int16x8_t* %tmp_addr, i32 0, i32 0 ; <<8 x i16>*> [#uses=1] + %7 = load <8 x i16>* %6, align 16 ; <<8 x i16>> [#uses=1] + store <8 x i16> %7, <8 x i16>* %5, align 16 + %8 = getelementptr inbounds %struct.int16x8_t* %__ax, i32 0, i32 0 ; <<8 x i16>*> [#uses=1] + %9 = load <8 x i16>* %8, align 16 ; <<8 x i16>> [#uses=2] + %10 = getelementptr inbounds %struct.int16x8_t* %__bx, i32 0, i32 0 ; <<8 x i16>*> [#uses=1] + %11 = load <8 x i16>* %10, align 16 ; <<8 x i16>> [#uses=2] + %12 = getelementptr inbounds %union..0anon* %__rv, i32 0, i32 0 ; <%struct.int16x8x2_t*> [#uses=1] + %13 = bitcast %struct.int16x8x2_t* %12 to %struct.__neon_int16x8x2_t* ; <%struct.__neon_int16x8x2_t*> [#uses=2] + %14 = shufflevector <8 x i16> %9, <8 x i16> %11, <8 x i32> ; <<8 x i16>> [#uses=1] + %15 = getelementptr inbounds %struct.__neon_int16x8x2_t* %13, i32 0, i32 0 ; <<8 x i16>*> [#uses=1] + store <8 x i16> %14, <8 x i16>* %15 + %16 = shufflevector <8 x i16> %9, <8 x i16> %11, <8 x i32> ; <<8 x i16>> [#uses=1] + %17 = getelementptr inbounds %struct.__neon_int16x8x2_t* %13, i32 0, i32 1 ; <<8 x i16>*> [#uses=1] + store <8 x i16> %16, <8 x i16>* %17 + %18 = getelementptr inbounds %union..0anon* %__rv, i32 0, i32 0 ; <%struct.int16x8x2_t*> [#uses=1] + %19 = bitcast %struct.int16x8x2_t* %0 to i8* ; [#uses=1] + %20 = bitcast %struct.int16x8x2_t* %18 to i8* ; [#uses=1] + call void @llvm.memcpy.i32(i8* %19, i8* %20, i32 32, i32 16) + %tmp21 = bitcast %struct.int16x8x2_t* %tmp2 to i8* ; [#uses=1] + %21 = bitcast %struct.int16x8x2_t* %0 to i8* ; [#uses=1] + call void @llvm.memcpy.i32(i8* %tmp21, i8* %21, i32 32, i32 16) + %22 = load %struct.int16x8x2_t** %dst_addr, align 4 ; <%struct.int16x8x2_t*> [#uses=1] + %23 = bitcast %struct.int16x8x2_t* %22 to i8* ; [#uses=1] + %tmp22 = bitcast %struct.int16x8x2_t* %tmp2 to i8* ; [#uses=1] + call void @llvm.memcpy.i32(i8* %23, i8* %tmp22, i32 32, i32 16) + br label %return + +; CHECK: store <8 x i16> +; CHECK: store <8 x i16> + +return: ; preds = %entry + ret void +} + +; Radar 7466574 +%struct._NSRange = type { i64 } + +define arm_apcscc void @test_memcpy_self() nounwind { +; CHECK: @test_memcpy_self +; CHECK-NOT: alloca +; CHECK: br i1 +entry: + %range = alloca %struct._NSRange ; <%struct._NSRange*> [#uses=2] + br i1 undef, label %cond.true, label %cond.false + +cond.true: ; preds = %entry + %tmp3 = bitcast %struct._NSRange* %range to i8* ; [#uses=1] + %tmp4 = bitcast %struct._NSRange* %range to i8* ; [#uses=1] + call void @llvm.memcpy.i32(i8* %tmp3, i8* %tmp4, i32 8, i32 8) + ret void + +cond.false: ; preds = %entry + ret void +} + +declare void @llvm.memcpy.i32(i8* nocapture, i8* nocapture, i32, i32) nounwind From isanbard at gmail.com Tue Dec 15 16:42:20 2009 From: isanbard at gmail.com (Bill Wendling) Date: Tue, 15 Dec 2009 22:42:20 -0000 Subject: [llvm-commits] [llvm] r91463 - /llvm/trunk/utils/buildit/build_llvm Message-ID: <200912152242.nBFMgKCf025008@zion.cs.uiuc.edu> Author: void Date: Tue Dec 15 16:42:19 2009 New Revision: 91463 URL: http://llvm.org/viewvc/llvm-project?rev=91463&view=rev Log: Some command lines don't like numbers with leading zeros. Remove them. Modified: llvm/trunk/utils/buildit/build_llvm Modified: llvm/trunk/utils/buildit/build_llvm URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/utils/buildit/build_llvm?rev=91463&r1=91462&r2=91463&view=diff ============================================================================== --- llvm/trunk/utils/buildit/build_llvm (original) +++ llvm/trunk/utils/buildit/build_llvm Tue Dec 15 16:42:19 2009 @@ -243,7 +243,11 @@ fi # Install Version.h -RC_ProjectSourceSubversion=`printf "%d" $LLVM_SUBMIT_SUBVERSION` +LLVM_MINOR_VERSION=`echo $LLVM_SUBMIT_SUBVERSION | sed -e 's,0*\([1-9][0-9]*\),\1,'` +if [ "x$LLVM_MINOR_VERSION" = "x" ]; then + LLVM_MINOR_VERSION=0 +fi +RC_ProjectSourceSubversion=`printf "%d" $LLVM_MINOR_VERSION` echo "#define LLVM_VERSION ${RC_ProjectSourceVersion}" > $DEST_DIR$DEST_ROOT/include/llvm/Version.h echo "#define LLVM_MINOR_VERSION ${RC_ProjectSourceSubversion}" >> $DEST_DIR$DEST_ROOT/include/llvm/Version.h From jyasskin at google.com Tue Dec 15 16:42:46 2009 From: jyasskin at google.com (Jeffrey Yasskin) Date: Tue, 15 Dec 2009 22:42:46 -0000 Subject: [llvm-commits] [llvm] r91464 - in /llvm/trunk: include/llvm/CodeGen/JITCodeEmitter.h include/llvm/CodeGen/MachineCodeEmitter.h lib/ExecutionEngine/JIT/JITEmitter.cpp lib/Target/ARM/ARMJITInfo.cpp lib/Target/Alpha/AlphaJITInfo.cpp lib/Target/PowerPC/PPCJITInfo.cpp lib/Target/X86/X86JITInfo.cpp Message-ID: <200912152242.nBFMgkQt025040@zion.cs.uiuc.edu> Author: jyasskin Date: Tue Dec 15 16:42:46 2009 New Revision: 91464 URL: http://llvm.org/viewvc/llvm-project?rev=91464&view=rev Log: Change indirect-globals to use a dedicated allocIndirectGV. This lets us remove start/finishGVStub and the BufferState helper class from the MachineCodeEmitter interface. It has the side-effect of not setting the indirect global writable and then executable on ARM, but that shouldn't be necessary. Modified: llvm/trunk/include/llvm/CodeGen/JITCodeEmitter.h llvm/trunk/include/llvm/CodeGen/MachineCodeEmitter.h llvm/trunk/lib/ExecutionEngine/JIT/JITEmitter.cpp llvm/trunk/lib/Target/ARM/ARMJITInfo.cpp llvm/trunk/lib/Target/Alpha/AlphaJITInfo.cpp llvm/trunk/lib/Target/PowerPC/PPCJITInfo.cpp llvm/trunk/lib/Target/X86/X86JITInfo.cpp Modified: llvm/trunk/include/llvm/CodeGen/JITCodeEmitter.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/JITCodeEmitter.h?rev=91464&r1=91463&r2=91464&view=diff ============================================================================== --- llvm/trunk/include/llvm/CodeGen/JITCodeEmitter.h (original) +++ llvm/trunk/include/llvm/CodeGen/JITCodeEmitter.h Tue Dec 15 16:42:46 2009 @@ -68,29 +68,11 @@ /// virtual bool finishFunction(MachineFunction &F) = 0; - /// startGVStub - This callback is invoked when the JIT needs the address of a - /// GV (e.g. function) that has not been code generated yet. The StubSize - /// specifies the total size required by the stub. The BufferState must be - /// passed to finishGVStub, and start/finish pairs with the same BufferState - /// must be properly nested. - /// - virtual void startGVStub(BufferState &BS, const GlobalValue* GV, - unsigned StubSize, unsigned Alignment = 1) = 0; - - /// startGVStub - This callback is invoked when the JIT needs the address of a - /// GV (e.g. function) that has not been code generated yet. Buffer points to - /// memory already allocated for this stub. The BufferState must be passed to - /// finishGVStub, and start/finish pairs with the same BufferState must be - /// properly nested. - /// - virtual void startGVStub(BufferState &BS, void *Buffer, - unsigned StubSize) = 0; - - /// finishGVStub - This callback is invoked to terminate a GV stub and returns - /// the start address of the stub. The BufferState must first have been - /// passed to startGVStub. - /// - virtual void *finishGVStub(BufferState &BS) = 0; + /// allocIndirectGV - Allocates and fills storage for an indirect + /// GlobalValue, and returns the address. + virtual void *allocIndirectGV(const GlobalValue *GV, + const uint8_t *Buffer, size_t Size, + unsigned Alignment) = 0; /// emitByte - This callback is invoked when a byte needs to be written to the /// output stream. Modified: llvm/trunk/include/llvm/CodeGen/MachineCodeEmitter.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/MachineCodeEmitter.h?rev=91464&r1=91463&r2=91464&view=diff ============================================================================== --- llvm/trunk/include/llvm/CodeGen/MachineCodeEmitter.h (original) +++ llvm/trunk/include/llvm/CodeGen/MachineCodeEmitter.h Tue Dec 15 16:42:46 2009 @@ -48,41 +48,16 @@ /// occurred, more memory is allocated, and we reemit the code into it. /// class MachineCodeEmitter { -public: - class BufferState { - friend class MachineCodeEmitter; - /// BufferBegin/BufferEnd - Pointers to the start and end of the memory - /// allocated for this code buffer. - uint8_t *BufferBegin, *BufferEnd; - - /// CurBufferPtr - Pointer to the next byte of memory to fill when emitting - /// code. This is guranteed to be in the range [BufferBegin,BufferEnd]. If - /// this pointer is at BufferEnd, it will never move due to code emission, - /// and all code emission requests will be ignored (this is the buffer - /// overflow condition). - uint8_t *CurBufferPtr; - public: - BufferState() : BufferBegin(NULL), BufferEnd(NULL), CurBufferPtr(NULL) {} - }; - protected: - /// These have the same meanings as the fields in BufferState - uint8_t *BufferBegin, *BufferEnd, *CurBufferPtr; - - /// Save or restore the current buffer state. The BufferState objects must be - /// used as a stack. - void SaveStateTo(BufferState &BS) { - assert(BS.BufferBegin == NULL && - "Can't save state into the same BufferState twice."); - BS.BufferBegin = BufferBegin; - BS.BufferEnd = BufferEnd; - BS.CurBufferPtr = CurBufferPtr; - } - void RestoreStateFrom(BufferState &BS) { - BufferBegin = BS.BufferBegin; - BufferEnd = BS.BufferEnd; - CurBufferPtr = BS.CurBufferPtr; - } + /// BufferBegin/BufferEnd - Pointers to the start and end of the memory + /// allocated for this code buffer. + uint8_t *BufferBegin, *BufferEnd; + /// CurBufferPtr - Pointer to the next byte of memory to fill when emitting + /// code. This is guranteed to be in the range [BufferBegin,BufferEnd]. If + /// this pointer is at BufferEnd, it will never move due to code emission, and + /// all code emission requests will be ignored (this is the buffer overflow + /// condition). + uint8_t *CurBufferPtr; public: virtual ~MachineCodeEmitter() {} @@ -113,15 +88,23 @@ /// void emitWordLE(uint32_t W) { if (4 <= BufferEnd-CurBufferPtr) { - *CurBufferPtr++ = (uint8_t)(W >> 0); - *CurBufferPtr++ = (uint8_t)(W >> 8); - *CurBufferPtr++ = (uint8_t)(W >> 16); - *CurBufferPtr++ = (uint8_t)(W >> 24); + emitWordLEInto(CurBufferPtr, W); } else { CurBufferPtr = BufferEnd; } } - + + /// emitWordLEInto - This callback is invoked when a 32-bit word needs to be + /// written to an arbitrary buffer in little-endian format. Buf must have at + /// least 4 bytes of available space. + /// + static void emitWordLEInto(uint8_t *&Buf, uint32_t W) { + *Buf++ = (uint8_t)(W >> 0); + *Buf++ = (uint8_t)(W >> 8); + *Buf++ = (uint8_t)(W >> 16); + *Buf++ = (uint8_t)(W >> 24); + } + /// emitWordBE - This callback is invoked when a 32-bit word needs to be /// written to the output stream in big-endian format. /// Modified: llvm/trunk/lib/ExecutionEngine/JIT/JITEmitter.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/ExecutionEngine/JIT/JITEmitter.cpp?rev=91464&r1=91463&r2=91464&view=diff ============================================================================== --- llvm/trunk/lib/ExecutionEngine/JIT/JITEmitter.cpp (original) +++ llvm/trunk/lib/ExecutionEngine/JIT/JITEmitter.cpp Tue Dec 15 16:42:46 2009 @@ -271,6 +271,10 @@ class JITEmitter : public JITCodeEmitter { JITMemoryManager *MemMgr; + // When outputting a function stub in the context of some other function, we + // save BufferBegin/BufferEnd/CurBufferPtr here. + uint8_t *SavedBufferBegin, *SavedBufferEnd, *SavedCurBufferPtr; + // When reattempting to JIT a function after running out of space, we store // the estimated size of the function we're trying to JIT here, so we can // ask the memory manager for at least this much space. When we @@ -396,11 +400,13 @@ void initJumpTableInfo(MachineJumpTableInfo *MJTI); void emitJumpTableInfo(MachineJumpTableInfo *MJTI); - virtual void startGVStub(BufferState &BS, const GlobalValue* GV, - unsigned StubSize, unsigned Alignment = 1); - virtual void startGVStub(BufferState &BS, void *Buffer, - unsigned StubSize); - virtual void* finishGVStub(BufferState &BS); + void startGVStub(const GlobalValue* GV, + unsigned StubSize, unsigned Alignment = 1); + void startGVStub(void *Buffer, unsigned StubSize); + void finishGVStub(); + virtual void *allocIndirectGV(const GlobalValue *GV, + const uint8_t *Buffer, size_t Size, + unsigned Alignment); /// allocateSpace - Reserves space in the current block if any, or /// allocate a new one of the given size. @@ -521,13 +527,12 @@ if (!Actual) return 0; } - MachineCodeEmitter::BufferState BS; TargetJITInfo::StubLayout SL = TheJIT->getJITInfo().getStubLayout(); - JE.startGVStub(BS, F, SL.Size, SL.Alignment); + JE.startGVStub(F, SL.Size, SL.Alignment); // Codegen a new stub, calling the lazy resolver or the actual address of the // external function, if it was resolved. Stub = TheJIT->getJITInfo().emitFunctionStub(F, Actual, JE); - JE.finishGVStub(BS); + JE.finishGVStub(); if (Actual != (void*)(intptr_t)LazyResolverFn) { // If we are getting the stub for an external function, we really want the @@ -579,11 +584,10 @@ void *&Stub = ExternalFnToStubMap[FnAddr]; if (Stub) return Stub; - MachineCodeEmitter::BufferState BS; TargetJITInfo::StubLayout SL = TheJIT->getJITInfo().getStubLayout(); - JE.startGVStub(BS, 0, SL.Size, SL.Alignment); + JE.startGVStub(0, SL.Size, SL.Alignment); Stub = TheJIT->getJITInfo().emitFunctionStub(0, FnAddr, JE); - JE.finishGVStub(BS); + JE.finishGVStub(); DEBUG(errs() << "JIT: Stub emitted at [" << Stub << "] for external function at '" << FnAddr << "'\n"); @@ -1215,8 +1219,9 @@ if (DwarfExceptionHandling || JITEmitDebugInfo) { uintptr_t ActualSize = 0; - BufferState BS; - SaveStateTo(BS); + SavedBufferBegin = BufferBegin; + SavedBufferEnd = BufferEnd; + SavedCurBufferPtr = CurBufferPtr; if (MemMgr->NeedsExactSize()) { ActualSize = DE->GetDwarfTableSizeInBytes(F, *this, FnStart, FnEnd); @@ -1232,7 +1237,9 @@ MemMgr->endExceptionTable(F.getFunction(), BufferBegin, CurBufferPtr, FrameRegister); uint8_t *EhEnd = CurBufferPtr; - RestoreStateFrom(BS); + BufferBegin = SavedBufferBegin; + BufferEnd = SavedBufferEnd; + CurBufferPtr = SavedCurBufferPtr; if (DwarfExceptionHandling) { TheJIT->RegisterTable(FrameRegister); @@ -1438,27 +1445,39 @@ } } -void JITEmitter::startGVStub(BufferState &BS, const GlobalValue* GV, +void JITEmitter::startGVStub(const GlobalValue* GV, unsigned StubSize, unsigned Alignment) { - SaveStateTo(BS); + SavedBufferBegin = BufferBegin; + SavedBufferEnd = BufferEnd; + SavedCurBufferPtr = CurBufferPtr; BufferBegin = CurBufferPtr = MemMgr->allocateStub(GV, StubSize, Alignment); BufferEnd = BufferBegin+StubSize+1; } -void JITEmitter::startGVStub(BufferState &BS, void *Buffer, unsigned StubSize) { - SaveStateTo(BS); +void JITEmitter::startGVStub(void *Buffer, unsigned StubSize) { + SavedBufferBegin = BufferBegin; + SavedBufferEnd = BufferEnd; + SavedCurBufferPtr = CurBufferPtr; BufferBegin = CurBufferPtr = (uint8_t *)Buffer; BufferEnd = BufferBegin+StubSize+1; } -void *JITEmitter::finishGVStub(BufferState &BS) { +void JITEmitter::finishGVStub() { assert(CurBufferPtr != BufferEnd && "Stub overflowed allocated space."); NumBytes += getCurrentPCOffset(); - void *Result = BufferBegin; - RestoreStateFrom(BS); - return Result; + BufferBegin = SavedBufferBegin; + BufferEnd = SavedBufferEnd; + CurBufferPtr = SavedCurBufferPtr; +} + +void *JITEmitter::allocIndirectGV(const GlobalValue *GV, + const uint8_t *Buffer, size_t Size, + unsigned Alignment) { + uint8_t *IndGV = MemMgr->allocateStub(GV, Size, Alignment); + memcpy(IndGV, Buffer, Size); + return IndGV; } // getConstantPoolEntryAddress - Return the address of the 'ConstantNum' entry @@ -1546,11 +1565,10 @@ // Tell the target jit info to rewrite the stub at the specified address, // rather than creating a new one. - MachineCodeEmitter::BufferState BS; TargetJITInfo::StubLayout layout = getJITInfo().getStubLayout(); - JE->startGVStub(BS, Stub, layout.Size); + JE->startGVStub(Stub, layout.Size); getJITInfo().emitFunctionStub(F, Addr, *getCodeEmitter()); - JE->finishGVStub(BS); + JE->finishGVStub(); } /// freeMachineCodeForFunction - release machine code memory for given Function. Modified: llvm/trunk/lib/Target/ARM/ARMJITInfo.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMJITInfo.cpp?rev=91464&r1=91463&r2=91464&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMJITInfo.cpp (original) +++ llvm/trunk/lib/Target/ARM/ARMJITInfo.cpp Tue Dec 15 16:42:46 2009 @@ -139,17 +139,11 @@ void *ARMJITInfo::emitGlobalValueIndirectSym(const GlobalValue *GV, void *Ptr, JITCodeEmitter &JCE) { - MachineCodeEmitter::BufferState BS; - JCE.startGVStub(BS, GV, 4, 4); - intptr_t Addr = (intptr_t)JCE.getCurrentPCValue(); - if (!sys::Memory::setRangeWritable((void*)Addr, 4)) { - llvm_unreachable("ERROR: Unable to mark indirect symbol writable"); - } - JCE.emitWordLE((intptr_t)Ptr); - if (!sys::Memory::setRangeExecutable((void*)Addr, 4)) { - llvm_unreachable("ERROR: Unable to mark indirect symbol executable"); - } - void *PtrAddr = JCE.finishGVStub(BS); + uint8_t Buffer[4]; + uint8_t *Cur = Buffer; + MachineCodeEmitter::emitWordLEInto(Cur, (intptr_t)Ptr); + void *PtrAddr = JCE.allocIndirectGV( + GV, Buffer, sizeof(Buffer), /*Alignment=*/4); addIndirectSymAddr(Ptr, (intptr_t)PtrAddr); return PtrAddr; } Modified: llvm/trunk/lib/Target/Alpha/AlphaJITInfo.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Alpha/AlphaJITInfo.cpp?rev=91464&r1=91463&r2=91464&view=diff ============================================================================== --- llvm/trunk/lib/Target/Alpha/AlphaJITInfo.cpp (original) +++ llvm/trunk/lib/Target/Alpha/AlphaJITInfo.cpp Tue Dec 15 16:42:46 2009 @@ -202,7 +202,6 @@ void *AlphaJITInfo::emitFunctionStub(const Function* F, void *Fn, JITCodeEmitter &JCE) { - MachineCodeEmitter::BufferState BS; //assert(Fn == AlphaCompilationCallback && "Where are you going?\n"); //Do things in a stupid slow way! void* Addr = (void*)(intptr_t)JCE.getCurrentPCValue(); Modified: llvm/trunk/lib/Target/PowerPC/PPCJITInfo.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCJITInfo.cpp?rev=91464&r1=91463&r2=91464&view=diff ============================================================================== --- llvm/trunk/lib/Target/PowerPC/PPCJITInfo.cpp (original) +++ llvm/trunk/lib/Target/PowerPC/PPCJITInfo.cpp Tue Dec 15 16:42:46 2009 @@ -339,7 +339,6 @@ void *PPCJITInfo::emitFunctionStub(const Function* F, void *Fn, JITCodeEmitter &JCE) { - MachineCodeEmitter::BufferState BS; // If this is just a call to an external function, emit a branch instead of a // call. The code is the same except for one bit of the last instruction. if (Fn != (void*)(intptr_t)PPC32CompilationCallback && Modified: llvm/trunk/lib/Target/X86/X86JITInfo.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86JITInfo.cpp?rev=91464&r1=91463&r2=91464&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86JITInfo.cpp (original) +++ llvm/trunk/lib/Target/X86/X86JITInfo.cpp Tue Dec 15 16:42:46 2009 @@ -426,16 +426,19 @@ void *X86JITInfo::emitGlobalValueIndirectSym(const GlobalValue* GV, void *ptr, JITCodeEmitter &JCE) { - MachineCodeEmitter::BufferState BS; #if defined (X86_64_JIT) - JCE.startGVStub(BS, GV, 8, 8); - JCE.emitWordLE((unsigned)(intptr_t)ptr); - JCE.emitWordLE((unsigned)(((intptr_t)ptr) >> 32)); -#else - JCE.startGVStub(BS, GV, 4, 4); - JCE.emitWordLE((intptr_t)ptr); + const unsigned Alignment = 8; + uint8_t Buffer[8]; + uint8_t *Cur = Buffer; + MachineCodeEmitter::emitWordLEInto(Cur, (unsigned)(intptr_t)ptr); + MachineCodeEmitter::emitWordLEInto(Cur, (unsigned)(((intptr_t)ptr) >> 32)); +#else + const unsigned Alignment = 4; + uint8_t Buffer[4]; + uint8_t *Cur = Buffer; + MachineCodeEmitter::emitWordLEInto(Cur, (intptr_t)ptr); #endif - return JCE.finishGVStub(BS); + return JCE.allocIndirectGV(GV, Buffer, sizeof(Buffer), Alignment); } TargetJITInfo::StubLayout X86JITInfo::getStubLayout() { @@ -451,7 +454,6 @@ void *X86JITInfo::emitFunctionStub(const Function* F, void *Target, JITCodeEmitter &JCE) { - MachineCodeEmitter::BufferState BS; // Note, we cast to intptr_t here to silence a -pedantic warning that // complains about casting a function pointer to a normal pointer. #if defined (X86_32_JIT) && !defined (_MSC_VER) From isanbard at gmail.com Tue Dec 15 18:00:19 2009 From: isanbard at gmail.com (Bill Wendling) Date: Wed, 16 Dec 2009 00:00:19 -0000 Subject: [llvm-commits] [llvm] r91475 - /llvm/trunk/lib/CodeGen/BranchFolding.cpp Message-ID: <200912160000.nBG00JU2027930@zion.cs.uiuc.edu> Author: void Date: Tue Dec 15 18:00:18 2009 New Revision: 91475 URL: http://llvm.org/viewvc/llvm-project?rev=91475&view=rev Log: Initialize uninitialized variables. Modified: llvm/trunk/lib/CodeGen/BranchFolding.cpp Modified: llvm/trunk/lib/CodeGen/BranchFolding.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/BranchFolding.cpp?rev=91475&r1=91474&r2=91475&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/BranchFolding.cpp (original) +++ llvm/trunk/lib/CodeGen/BranchFolding.cpp Tue Dec 15 18:00:18 2009 @@ -1222,7 +1222,7 @@ // Analyze the branch at the end of the pred. MachineBasicBlock *PredBB = *PI; MachineFunction::iterator PredFallthrough = PredBB; ++PredFallthrough; - MachineBasicBlock *PredTBB, *PredFBB; + MachineBasicBlock *PredTBB = 0, *PredFBB = 0; SmallVector PredCond; if (PredBB != MBB && !PredBB->canFallThrough() && !TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true) From isanbard at gmail.com Tue Dec 15 18:01:27 2009 From: isanbard at gmail.com (Bill Wendling) Date: Wed, 16 Dec 2009 00:01:27 -0000 Subject: [llvm-commits] [llvm] r91477 - /llvm/trunk/lib/CodeGen/BranchFolding.cpp Message-ID: <200912160001.nBG01SRj027999@zion.cs.uiuc.edu> Author: void Date: Tue Dec 15 18:01:27 2009 New Revision: 91477 URL: http://llvm.org/viewvc/llvm-project?rev=91477&view=rev Log: Initialize uninitialized variables. Modified: llvm/trunk/lib/CodeGen/BranchFolding.cpp Modified: llvm/trunk/lib/CodeGen/BranchFolding.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/BranchFolding.cpp?rev=91477&r1=91476&r2=91477&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/BranchFolding.cpp (original) +++ llvm/trunk/lib/CodeGen/BranchFolding.cpp Tue Dec 15 18:01:27 2009 @@ -1274,7 +1274,7 @@ // Okay, there is no really great place to put this block. If, however, // the block before this one would be a fall-through if this block were // removed, move this block to the end of the function. - MachineBasicBlock *PrevTBB, *PrevFBB; + MachineBasicBlock *PrevTBB = 0, *PrevFBB = 0; SmallVector PrevCond; if (FallThrough != MF.end() && !TII->AnalyzeBranch(PrevBB, PrevTBB, PrevFBB, PrevCond, true) && From isanbard at gmail.com Tue Dec 15 18:08:36 2009 From: isanbard at gmail.com (Bill Wendling) Date: Wed, 16 Dec 2009 00:08:36 -0000 Subject: [llvm-commits] [llvm] r91479 - /llvm/trunk/lib/CodeGen/MachineBasicBlock.cpp Message-ID: <200912160008.nBG08bbj028247@zion.cs.uiuc.edu> Author: void Date: Tue Dec 15 18:08:36 2009 New Revision: 91479 URL: http://llvm.org/viewvc/llvm-project?rev=91479&view=rev Log: Helpful comment added. Some code cleanup. No functional change. Modified: llvm/trunk/lib/CodeGen/MachineBasicBlock.cpp Modified: llvm/trunk/lib/CodeGen/MachineBasicBlock.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/MachineBasicBlock.cpp?rev=91479&r1=91478&r2=91479&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/MachineBasicBlock.cpp (original) +++ llvm/trunk/lib/CodeGen/MachineBasicBlock.cpp Tue Dec 15 18:08:36 2009 @@ -450,14 +450,29 @@ /// CorrectExtraCFGEdges - Various pieces of code can cause excess edges in the /// CFG to be inserted. If we have proven that MBB can only branch to DestA and -/// DestB, remove any other MBB successors from the CFG. DestA and DestB can -/// be null. +/// DestB, remove any other MBB successors from the CFG. DestA and DestB can be +/// null. +/// /// Besides DestA and DestB, retain other edges leading to LandingPads /// (currently there can be only one; we don't check or require that here). /// Note it is possible that DestA and/or DestB are LandingPads. bool MachineBasicBlock::CorrectExtraCFGEdges(MachineBasicBlock *DestA, MachineBasicBlock *DestB, bool isCond) { + // The values of DestA and DestB frequently come from a call to the + // 'TargetInstrInfo::AnalyzeBranch' method. We take our meaning of the initial + // values from there. + // + // 1. If both DestA and DestB are null, then the block ends with no branches + // (it falls through to its successor). + // 2. If DestA is set, DestB is null, and isCond is false, then the block ends + // with only an unconditional branch. + // 3. If DestA is set, DestB is null, and isCond is true, then the block ends + // with a conditional branch that falls through to a successor (DestB). + // 4. If DestA and DestB is set and isCond is true, then the block ends with a + // conditional branch followed by an unconditional branch. DestA is the + // 'true' destination and DestB is the 'false' destination. + bool MadeChange = false; bool AddedFallThrough = false; @@ -483,14 +498,15 @@ MachineBasicBlock::succ_iterator SI = succ_begin(); MachineBasicBlock *OrigDestA = DestA, *OrigDestB = DestB; while (SI != succ_end()) { - if (*SI == DestA) { + const MachineBasicBlock *MBB = *SI; + if (MBB == DestA) { DestA = 0; ++SI; - } else if (*SI == DestB) { + } else if (MBB == DestB) { DestB = 0; ++SI; - } else if ((*SI)->isLandingPad() && - *SI!=OrigDestA && *SI!=OrigDestB) { + } else if (MBB->isLandingPad() && + MBB != OrigDestA && MBB != OrigDestB) { ++SI; } else { // Otherwise, this is a superfluous edge, remove it. @@ -498,12 +514,12 @@ MadeChange = true; } } - if (!AddedFallThrough) { - assert(DestA == 0 && DestB == 0 && - "MachineCFG is missing edges!"); - } else if (isCond) { + + if (!AddedFallThrough) + assert(DestA == 0 && DestB == 0 && "MachineCFG is missing edges!"); + else if (isCond) assert(DestA == 0 && "MachineCFG is missing edges!"); - } + return MadeChange; } From rjmccall at apple.com Tue Dec 15 18:13:25 2009 From: rjmccall at apple.com (John McCall) Date: Wed, 16 Dec 2009 00:13:25 -0000 Subject: [llvm-commits] [llvm] r91480 - in /llvm/trunk/lib/CodeGen: MachineDominators.cpp MachineLoopInfo.cpp Message-ID: <200912160013.nBG0DPm6028423@zion.cs.uiuc.edu> Author: rjmccall Date: Tue Dec 15 18:13:24 2009 New Revision: 91480 URL: http://llvm.org/viewvc/llvm-project?rev=91480&view=rev Log: Explicit template instantiations must happen in the template's immediately enclosing namespace. Caught by clang++. Modified: llvm/trunk/lib/CodeGen/MachineDominators.cpp llvm/trunk/lib/CodeGen/MachineLoopInfo.cpp Modified: llvm/trunk/lib/CodeGen/MachineDominators.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/MachineDominators.cpp?rev=91480&r1=91479&r2=91480&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/MachineDominators.cpp (original) +++ llvm/trunk/lib/CodeGen/MachineDominators.cpp Tue Dec 15 18:13:24 2009 @@ -17,8 +17,10 @@ using namespace llvm; +namespace llvm { TEMPLATE_INSTANTIATION(class DomTreeNodeBase); TEMPLATE_INSTANTIATION(class DominatorTreeBase); +} char MachineDominatorTree::ID = 0; Modified: llvm/trunk/lib/CodeGen/MachineLoopInfo.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/MachineLoopInfo.cpp?rev=91480&r1=91479&r2=91480&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/MachineLoopInfo.cpp (original) +++ llvm/trunk/lib/CodeGen/MachineLoopInfo.cpp Tue Dec 15 18:13:24 2009 @@ -19,12 +19,14 @@ #include "llvm/CodeGen/Passes.h" using namespace llvm; +namespace llvm { #define MLB class LoopBase TEMPLATE_INSTANTIATION(MLB); #undef MLB #define MLIB class LoopInfoBase TEMPLATE_INSTANTIATION(MLIB); #undef MLIB +} char MachineLoopInfo::ID = 0; static RegisterPass From rjmccall at apple.com Tue Dec 15 18:15:28 2009 From: rjmccall at apple.com (John McCall) Date: Wed, 16 Dec 2009 00:15:28 -0000 Subject: [llvm-commits] [llvm] r91481 - /llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp Message-ID: <200912160015.nBG0FSRb028494@zion.cs.uiuc.edu> Author: rjmccall Date: Tue Dec 15 18:15:28 2009 New Revision: 91481 URL: http://llvm.org/viewvc/llvm-project?rev=91481&view=rev Log: Every anonymous namespace is different. Caught by clang++. Modified: llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp Modified: llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp?rev=91481&r1=91480&r2=91481&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp (original) +++ llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp Tue Dec 15 18:15:28 2009 @@ -98,10 +98,6 @@ virtual bool ParseDirective(AsmToken DirectiveID); }; -} // end anonymous namespace - -namespace { - /// ARMOperand - Instances of this class represent a parsed ARM machine /// instruction. struct ARMOperand { From mrs at apple.com Tue Dec 15 18:24:48 2009 From: mrs at apple.com (Mike Stump) Date: Tue, 15 Dec 2009 16:24:48 -0800 Subject: [llvm-commits] [llvm] r91481 - /llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp In-Reply-To: <200912160015.nBG0FSRb028494@zion.cs.uiuc.edu> References: <200912160015.nBG0FSRb028494@zion.cs.uiuc.edu> Message-ID: On Dec 15, 2009, at 4:15 PM, John McCall wrote: > Author: rjmccall > Date: Tue Dec 15 18:15:28 2009 > New Revision: 91481 > Every anonymous namespace is different. Caught by clang++. But each unnamed namespace in a translation unit is in fact the same namespace. From dalej at apple.com Tue Dec 15 18:29:42 2009 From: dalej at apple.com (Dale Johannesen) Date: Wed, 16 Dec 2009 00:29:42 -0000 Subject: [llvm-commits] [llvm] r91485 - in /llvm/trunk: lib/CodeGen/RegAllocLocal.cpp test/CodeGen/PowerPC/2007-04-30-InlineAsmEarlyClobber.ll test/CodeGen/X86/phys-reg-local-regalloc.ll Message-ID: <200912160029.nBG0TgcE028980@zion.cs.uiuc.edu> Author: johannes Date: Tue Dec 15 18:29:41 2009 New Revision: 91485 URL: http://llvm.org/viewvc/llvm-project?rev=91485&view=rev Log: Do better with physical reg operands (typically, from inline asm) in local register allocator. If a reg-reg copy has a phys reg input and a virt reg output, and this is the last use of the phys reg, assign the phys reg to the virt reg. If a reg-reg copy has a phys reg output and we need to reload its spilled input, reload it directly into the phys reg than passing it through another reg. Following 76208, there is sometimes no dependency between the def of a phys reg and its use; this creates a window where that phys reg can be used for spilling (this is true in linear scan also). This is bad and needs to be fixed a better way, although 76208 works too well in practice to be reverted. However, there should normally be no spilling within inline asm blocks. The patch here goes a long way towards making this actually be true. Added: llvm/trunk/test/CodeGen/X86/phys-reg-local-regalloc.ll Modified: llvm/trunk/lib/CodeGen/RegAllocLocal.cpp llvm/trunk/test/CodeGen/PowerPC/2007-04-30-InlineAsmEarlyClobber.ll Modified: llvm/trunk/lib/CodeGen/RegAllocLocal.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/RegAllocLocal.cpp?rev=91485&r1=91484&r2=91485&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/RegAllocLocal.cpp (original) +++ llvm/trunk/lib/CodeGen/RegAllocLocal.cpp Tue Dec 15 18:29:41 2009 @@ -233,14 +233,17 @@ /// in one of several ways: if the register is available in a physical /// register already, it uses that physical register. If the value is not /// in a physical register, and if there are physical registers available, - /// it loads it into a register. If register pressure is high, and it is - /// possible, it tries to fold the load of the virtual register into the - /// instruction itself. It avoids doing this if register pressure is low to - /// improve the chance that subsequent instructions can use the reloaded - /// value. This method returns the modified instruction. + /// it loads it into a register: PhysReg if that is an available physical + /// register, otherwise any physical register of the right class. + /// If register pressure is high, and it is possible, it tries to fold the + /// load of the virtual register into the instruction itself. It avoids + /// doing this if register pressure is low to improve the chance that + /// subsequent instructions can use the reloaded value. This method + /// returns the modified instruction. /// MachineInstr *reloadVirtReg(MachineBasicBlock &MBB, MachineInstr *MI, - unsigned OpNum, SmallSet &RRegs); + unsigned OpNum, SmallSet &RRegs, + unsigned PhysReg); /// ComputeLocalLiveness - Computes liveness of registers within a basic /// block, setting the killed/dead flags as appropriate. @@ -471,15 +474,17 @@ /// one of several ways: if the register is available in a physical register /// already, it uses that physical register. If the value is not in a physical /// register, and if there are physical registers available, it loads it into a +/// register: PhysReg if that is an available physical register, otherwise any /// register. If register pressure is high, and it is possible, it tries to /// fold the load of the virtual register into the instruction itself. It /// avoids doing this if register pressure is low to improve the chance that -/// subsequent instructions can use the reloaded value. This method returns the -/// modified instruction. +/// subsequent instructions can use the reloaded value. This method returns +/// the modified instruction. /// MachineInstr *RALocal::reloadVirtReg(MachineBasicBlock &MBB, MachineInstr *MI, unsigned OpNum, - SmallSet &ReloadedRegs) { + SmallSet &ReloadedRegs, + unsigned PhysReg) { unsigned VirtReg = MI->getOperand(OpNum).getReg(); // If the virtual register is already available, just update the instruction @@ -494,7 +499,11 @@ // Otherwise, we need to fold it into the current instruction, or reload it. // If we have registers available to hold the value, use them. const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(VirtReg); - unsigned PhysReg = getFreeReg(RC); + // If we already have a PhysReg (this happens when the instruction is a + // reg-to-reg copy with a PhysReg destination) use that. + if (!PhysReg || !TargetRegisterInfo::isPhysicalRegister(PhysReg) || + !isPhysRegAvailable(PhysReg)) + PhysReg = getFreeReg(RC); int FrameIndex = getStackSpaceFor(VirtReg, RC); if (PhysReg) { // Register is available, allocate it! @@ -752,6 +761,12 @@ errs() << '\n'; }); + // Determine whether this is a copy instruction. The cases where the + // source or destination are phys regs are handled specially. + unsigned SrcCopyReg, DstCopyReg, SrcCopySubReg, DstCopySubReg; + bool isCopy = TII->isMoveInstr(*MI, SrcCopyReg, DstCopyReg, + SrcCopySubReg, DstCopySubReg); + // Loop over the implicit uses, making sure that they are at the head of the // use order list, so they don't get reallocated. if (TID.ImplicitUses) { @@ -835,7 +850,8 @@ // here we are looking for only used operands (never def&use) if (MO.isReg() && !MO.isDef() && MO.getReg() && !MO.isImplicit() && TargetRegisterInfo::isVirtualRegister(MO.getReg())) - MI = reloadVirtReg(MBB, MI, i, ReloadedRegs); + MI = reloadVirtReg(MBB, MI, i, ReloadedRegs, + isCopy ? DstCopyReg : 0); } // If this instruction is the last user of this register, kill the @@ -948,8 +964,17 @@ unsigned DestPhysReg; // If DestVirtReg already has a value, use it. - if (!(DestPhysReg = getVirt2PhysRegMapSlot(DestVirtReg))) - DestPhysReg = getReg(MBB, MI, DestVirtReg); + if (!(DestPhysReg = getVirt2PhysRegMapSlot(DestVirtReg))) { + // If this is a copy, the source reg is a phys reg, and + // that reg is available, use that phys reg for DestPhysReg. + if (isCopy && + TargetRegisterInfo::isPhysicalRegister(SrcCopyReg) && + isPhysRegAvailable(SrcCopyReg)) { + DestPhysReg = SrcCopyReg; + assignVirtToPhysReg(DestVirtReg, DestPhysReg); + } else + DestPhysReg = getReg(MBB, MI, DestVirtReg); + } MF->getRegInfo().setPhysRegUsed(DestPhysReg); markVirtRegModified(DestVirtReg); getVirtRegLastUse(DestVirtReg) = std::make_pair((MachineInstr*)0, 0); @@ -995,9 +1020,9 @@ // Finally, if this is a noop copy instruction, zap it. (Except that if // the copy is dead, it must be kept to avoid messing up liveness info for // the register scavenger. See pr4100.) - unsigned SrcReg, DstReg, SrcSubReg, DstSubReg; - if (TII->isMoveInstr(*MI, SrcReg, DstReg, SrcSubReg, DstSubReg) && - SrcReg == DstReg && DeadDefs.empty()) + if (TII->isMoveInstr(*MI, SrcCopyReg, DstCopyReg, + SrcCopySubReg, DstCopySubReg) && + SrcCopyReg == DstCopyReg && DeadDefs.empty()) MBB.erase(MI); } Modified: llvm/trunk/test/CodeGen/PowerPC/2007-04-30-InlineAsmEarlyClobber.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/2007-04-30-InlineAsmEarlyClobber.ll?rev=91485&r1=91484&r2=91485&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/PowerPC/2007-04-30-InlineAsmEarlyClobber.ll (original) +++ llvm/trunk/test/CodeGen/PowerPC/2007-04-30-InlineAsmEarlyClobber.ll Tue Dec 15 18:29:41 2009 @@ -1,7 +1,7 @@ ; RUN: llc < %s | grep {subfc r3,r5,r4} ; RUN: llc < %s | grep {subfze r4,r2} -; RUN: llc < %s -regalloc=local | grep {subfc r5,r2,r4} -; RUN: llc < %s -regalloc=local | grep {subfze r2,r3} +; RUN: llc < %s -regalloc=local | grep {subfc r5,r4,r3} +; RUN: llc < %s -regalloc=local | grep {subfze r2,r2} ; The first argument of subfc must not be the same as any other register. ; PR1357 Added: llvm/trunk/test/CodeGen/X86/phys-reg-local-regalloc.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/phys-reg-local-regalloc.ll?rev=91485&view=auto ============================================================================== --- llvm/trunk/test/CodeGen/X86/phys-reg-local-regalloc.ll (added) +++ llvm/trunk/test/CodeGen/X86/phys-reg-local-regalloc.ll Tue Dec 15 18:29:41 2009 @@ -0,0 +1,49 @@ +; RUN: llc < %s -march=x86 -mtriple=i386-apple-darwin9 -regalloc=local | FileCheck %s + + at .str = private constant [12 x i8] c"x + y = %i\0A\00", align 1 ; <[12 x i8]*> [#uses=1] + +define i32 @main() nounwind { +entry: +; CHECK: movl 24(%esp), %eax +; CHECK-NOT: movl +; CHECK: movl %eax, 36(%esp) +; CHECK-NOT: movl +; CHECK: movl 28(%esp), %ebx +; CHECK-NOT: movl +; CHECK: movl %ebx, 40(%esp) +; CHECK-NOT: movl +; CHECK: addl %ebx, %eax + %retval = alloca i32 ; [#uses=2] + %"%ebx" = alloca i32 ; [#uses=1] + %"%eax" = alloca i32 ; [#uses=2] + %result = alloca i32 ; [#uses=2] + %y = alloca i32 ; [#uses=2] + %x = alloca i32 ; [#uses=2] + %0 = alloca i32 ; [#uses=2] + %"alloca point" = bitcast i32 0 to i32 ; [#uses=0] + store i32 1, i32* %x, align 4 + store i32 2, i32* %y, align 4 + call void asm sideeffect alignstack "# top of block", "~{dirflag},~{fpsr},~{flags},~{edi},~{esi},~{edx},~{ecx},~{eax}"() nounwind + %asmtmp = call i32 asm sideeffect alignstack "movl $1, $0", "=={eax},*m,~{dirflag},~{fpsr},~{flags},~{memory}"(i32* %x) nounwind ; [#uses=1] + store i32 %asmtmp, i32* %"%eax" + %asmtmp1 = call i32 asm sideeffect alignstack "movl $1, $0", "=={ebx},*m,~{dirflag},~{fpsr},~{flags},~{memory}"(i32* %y) nounwind ; [#uses=1] + store i32 %asmtmp1, i32* %"%ebx" + %1 = call i32 asm "", "={bx}"() nounwind ; [#uses=1] + %2 = call i32 asm "", "={ax}"() nounwind ; [#uses=1] + %asmtmp2 = call i32 asm sideeffect alignstack "addl $1, $0", "=={eax},{ebx},{eax},~{dirflag},~{fpsr},~{flags},~{memory}"(i32 %1, i32 %2) nounwind ; [#uses=1] + store i32 %asmtmp2, i32* %"%eax" + %3 = call i32 asm "", "={ax}"() nounwind ; [#uses=1] + call void asm sideeffect alignstack "movl $0, $1", "{eax},*m,~{dirflag},~{fpsr},~{flags},~{memory}"(i32 %3, i32* %result) nounwind + %4 = load i32* %result, align 4 ; [#uses=1] + %5 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([12 x i8]* @.str, i32 0, i32 0), i32 %4) nounwind ; [#uses=0] + store i32 0, i32* %0, align 4 + %6 = load i32* %0, align 4 ; [#uses=1] + store i32 %6, i32* %retval, align 4 + br label %return + +return: ; preds = %entry + %retval3 = load i32* %retval ; [#uses=1] + ret i32 %retval3 +} + +declare i32 @printf(i8*, ...) nounwind From rjmccall at apple.com Tue Dec 15 18:37:39 2009 From: rjmccall at apple.com (John McCall) Date: Tue, 15 Dec 2009 16:37:39 -0800 Subject: [llvm-commits] [llvm] r91481 - /llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp In-Reply-To: References: <200912160015.nBG0FSRb028494@zion.cs.uiuc.edu> Message-ID: <08E52D79-3953-4203-ABAC-41FB422FC3E4@apple.com> On Dec 15, 2009, at 4:24 PM, Mike Stump wrote: > On Dec 15, 2009, at 4:15 PM, John McCall wrote: >> Author: rjmccall >> Date: Tue Dec 15 18:15:28 2009 >> New Revision: 91481 > >> Every anonymous namespace is different. Caught by clang++. > > But each unnamed namespace in a translation unit is in fact the same namespace. You're right! We're enforcing the wrong rule in clang++. This is my mistake. The cleanup is harmless, though. John. From sabre at nondot.org Tue Dec 15 18:46:02 2009 From: sabre at nondot.org (Chris Lattner) Date: Wed, 16 Dec 2009 00:46:02 -0000 Subject: [llvm-commits] [llvm] r91486 - in /llvm/trunk: lib/Transforms/Scalar/SimplifyLibCalls.cpp test/Transforms/SimplifyLibCalls/StrStr.ll Message-ID: <200912160046.nBG0k2Uf029630@zion.cs.uiuc.edu> Author: lattner Date: Tue Dec 15 18:46:02 2009 New Revision: 91486 URL: http://llvm.org/viewvc/llvm-project?rev=91486&view=rev Log: revert my strstr optimization, I'm told it breaks x86-64 bootstrap. Will reapply with a fix when I get a chance. Modified: llvm/trunk/lib/Transforms/Scalar/SimplifyLibCalls.cpp llvm/trunk/test/Transforms/SimplifyLibCalls/StrStr.ll Modified: llvm/trunk/lib/Transforms/Scalar/SimplifyLibCalls.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/SimplifyLibCalls.cpp?rev=91486&r1=91485&r2=91486&view=diff ============================================================================== --- llvm/trunk/lib/Transforms/Scalar/SimplifyLibCalls.cpp (original) +++ llvm/trunk/lib/Transforms/Scalar/SimplifyLibCalls.cpp Tue Dec 15 18:46:02 2009 @@ -76,11 +76,6 @@ /// return value has 'intptr_t' type. Value *EmitStrLen(Value *Ptr, IRBuilder<> &B); - /// EmitStrChr - Emit a call to the strchr function to the builder, for the - /// specified pointer and character. Ptr is required to be some pointer type, - /// and the return value has 'i8*' type. - Value *EmitStrChr(Value *Ptr, char C, IRBuilder<> &B); - /// EmitMemCpy - Emit a call to the memcpy function to the builder. This /// always expects that the size has type 'intptr_t' and Dst/Src are pointers. Value *EmitMemCpy(Value *Dst, Value *Src, Value *Len, @@ -156,26 +151,6 @@ return CI; } -/// EmitStrChr - Emit a call to the strchr function to the builder, for the -/// specified pointer and character. Ptr is required to be some pointer type, -/// and the return value has 'i8*' type. -Value *LibCallOptimization::EmitStrChr(Value *Ptr, char C, IRBuilder<> &B) { - Module *M = Caller->getParent(); - AttributeWithIndex AWI = - AttributeWithIndex::get(~0u, Attribute::ReadOnly | Attribute::NoUnwind); - - const Type *I8Ptr = Type::getInt8PtrTy(*Context); - const Type *I32Ty = Type::getInt32Ty(*Context); - Constant *StrChr = M->getOrInsertFunction("strchr", AttrListPtr::get(&AWI, 1), - I8Ptr, I8Ptr, I32Ty, NULL); - CallInst *CI = B.CreateCall2(StrChr, CastToCStr(Ptr, B), - ConstantInt::get(I32Ty, C), "strchr"); - if (const Function *F = dyn_cast(StrChr->stripPointerCasts())) - CI->setCallingConv(F->getCallingConv()); - return CI; -} - - /// EmitMemCpy - Emit a call to the memcpy function to the builder. This always /// expects that the size has type 'intptr_t' and Dst/Src are pointers. Value *LibCallOptimization::EmitMemCpy(Value *Dst, Value *Src, Value *Len, @@ -915,7 +890,7 @@ }; //===---------------------------------------===// -// 'strto*' Optimizations. This handles strtol, strtod, strtof, strtoul, etc. +// 'strto*' Optimizations struct StrToOpt : public LibCallOptimization { virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { @@ -935,52 +910,6 @@ } }; -//===---------------------------------------===// -// 'strstr' Optimizations - -struct StrStrOpt : public LibCallOptimization { - virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { - const FunctionType *FT = Callee->getFunctionType(); - if (FT->getNumParams() != 2 || - !isa(FT->getParamType(0)) || - !isa(FT->getParamType(1)) || - !isa(FT->getReturnType())) - return 0; - - // fold strstr(x, x) -> x. - if (CI->getOperand(1) == CI->getOperand(2)) - return CI->getOperand(1); - - // See if either input string is a constant string. - std::string SearchStr, ToFindStr; - bool HasStr1 = GetConstantStringInfo(CI->getOperand(1), SearchStr); - bool HasStr2 = GetConstantStringInfo(CI->getOperand(2), ToFindStr); - - // fold strstr(x, "") -> x. - if (HasStr2 && ToFindStr.empty()) - return B.CreateBitCast(CI->getOperand(1), CI->getType()); - - // If both strings are known, constant fold it. - if (HasStr1 && HasStr2) { - std::string::size_type Offset = SearchStr.find(ToFindStr); - - if (Offset == std::string::npos) // strstr("foo", "bar") -> null - return Constant::getNullValue(CI->getType()); - - // strstr("abcd", "bc") -> gep((char*)"abcd", 2) - Value *Result = CastToCStr(CI->getOperand(1), B); - Result = B.CreateConstInBoundsGEP1_64(Result, Offset, "strstr"); - return B.CreateBitCast(Result, CI->getType()); - } - - // fold strstr(x, "y") -> strchr(x, 'y'). - if (HasStr2 && ToFindStr.size() == 1) - return B.CreateBitCast(EmitStrChr(CI->getOperand(1), ToFindStr[0], B), - CI->getType()); - return 0; - } -}; - //===---------------------------------------===// // 'memcmp' Optimizations @@ -1746,8 +1675,8 @@ // String and Memory LibCall Optimizations StrCatOpt StrCat; StrNCatOpt StrNCat; StrChrOpt StrChr; StrCmpOpt StrCmp; StrNCmpOpt StrNCmp; StrCpyOpt StrCpy; StrNCpyOpt StrNCpy; StrLenOpt StrLen; - StrToOpt StrTo; StrStrOpt StrStr; - MemCmpOpt MemCmp; MemCpyOpt MemCpy; MemMoveOpt MemMove; MemSetOpt MemSet; + StrToOpt StrTo; MemCmpOpt MemCmp; MemCpyOpt MemCpy; MemMoveOpt MemMove; + MemSetOpt MemSet; // Math Library Optimizations PowOpt Pow; Exp2Opt Exp2; UnaryDoubleFPOpt UnaryDoubleFP; // Integer Optimizations @@ -1809,7 +1738,6 @@ Optimizations["strtoll"] = &StrTo; Optimizations["strtold"] = &StrTo; Optimizations["strtoull"] = &StrTo; - Optimizations["strstr"] = &StrStr; Optimizations["memcmp"] = &MemCmp; Optimizations["memcpy"] = &MemCpy; Optimizations["memmove"] = &MemMove; @@ -2716,6 +2644,12 @@ // * strcspn("",a) -> 0 // * strcspn(s,"") -> strlen(a) // +// strstr: (PR5783) +// * strstr(x,x) -> x +// * strstr(x, "") -> x +// * strstr(x, "a") -> strchr(x, 'a') +// * strstr(s1,s2) -> result (if s1 and s2 are constant strings) +// // tan, tanf, tanl: // * tan(atan(x)) -> x // Modified: llvm/trunk/test/Transforms/SimplifyLibCalls/StrStr.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SimplifyLibCalls/StrStr.ll?rev=91486&r1=91485&r2=91486&view=diff ============================================================================== --- llvm/trunk/test/Transforms/SimplifyLibCalls/StrStr.ll (original) +++ llvm/trunk/test/Transforms/SimplifyLibCalls/StrStr.ll Tue Dec 15 18:46:02 2009 @@ -1,48 +0,0 @@ -; RUN: opt < %s -simplify-libcalls -S | FileCheck %s -; PR5783 - -target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32" -target triple = "i386-apple-darwin9.0" - - at .str = private constant [1 x i8] zeroinitializer ; <[1 x i8]*> [#uses=1] - at .str1 = private constant [2 x i8] c"a\00" ; <[2 x i8]*> [#uses=1] - at .str2 = private constant [6 x i8] c"abcde\00" ; <[6 x i8]*> [#uses=1] - at .str3 = private constant [4 x i8] c"bcd\00" ; <[4 x i8]*> [#uses=1] - -define i8* @test1(i8* %P) nounwind readonly { -entry: - %call = tail call i8* @strstr(i8* %P, i8* getelementptr inbounds ([1 x i8]* @.str, i32 0, i32 0)) nounwind ; [#uses=1] - ret i8* %call -; strstr(P, "") -> P -; CHECK: @test1 -; CHECK: ret i8* %P -} - -declare i8* @strstr(i8*, i8* nocapture) nounwind readonly - -define i8* @test2(i8* %P) nounwind readonly { -entry: - %call = tail call i8* @strstr(i8* %P, i8* getelementptr inbounds ([2 x i8]* @.str1, i32 0, i32 0)) nounwind ; [#uses=1] - ret i8* %call -; strstr(P, "a") -> strchr(P, 'a') -; CHECK: @test2 -; CHECK: @strchr(i8* %P, i32 97) -} - -define i8* @test3(i8* nocapture %P) nounwind readonly { -entry: - %call = tail call i8* @strstr(i8* getelementptr inbounds ([6 x i8]* @.str2, i32 0, i32 0), i8* getelementptr inbounds ([4 x i8]* @.str3, i32 0, i32 0)) nounwind ; [#uses=1] - ret i8* %call -; strstr("abcde", "bcd") -> "abcde"+1 -; CHECK: @test3 -; CHECK: getelementptr inbounds ([6 x i8]* @.str2, i32 0, i64 1) -} - -define i8* @test4(i8* %P) nounwind readonly { -entry: - %call = tail call i8* @strstr(i8* %P, i8* %P) nounwind ; [#uses=1] - ret i8* %call -; strstr(P, P) -> P -; CHECK: @test4 -; CHECK: ret i8* %P -} From evan.cheng at apple.com Tue Dec 15 18:53:12 2009 From: evan.cheng at apple.com (Evan Cheng) Date: Wed, 16 Dec 2009 00:53:12 -0000 Subject: [llvm-commits] [llvm] r91489 - in /llvm/trunk: lib/Target/X86/X86ISelLowering.cpp lib/Target/X86/X86Instr64bit.td lib/Target/X86/X86InstrInfo.td test/CodeGen/X86/setcc.ll Message-ID: <200912160053.nBG0rDDv029906@zion.cs.uiuc.edu> Author: evancheng Date: Tue Dec 15 18:53:11 2009 New Revision: 91489 URL: http://llvm.org/viewvc/llvm-project?rev=91489&view=rev Log: Re-enable 91381 with fixes. Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp llvm/trunk/lib/Target/X86/X86Instr64bit.td llvm/trunk/lib/Target/X86/X86InstrInfo.td llvm/trunk/test/CodeGen/X86/setcc.ll Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=91489&r1=91488&r2=91489&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original) +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Tue Dec 15 18:53:11 2009 @@ -980,6 +980,7 @@ setTargetDAGCombine(ISD::SRL); setTargetDAGCombine(ISD::STORE); setTargetDAGCombine(ISD::MEMBARRIER); + setTargetDAGCombine(ISD::ZERO_EXTEND); if (Subtarget->is64Bit()) setTargetDAGCombine(ISD::MUL); @@ -5752,14 +5753,11 @@ SDValue Cond = EmitCmp(Op0, Op1, X86CC, DAG); // Use sbb x, x to materialize carry bit into a GPR. - // FIXME: Temporarily disabled since it breaks self-hosting. It's apparently - // miscompiling ARMISelDAGToDAG.cpp. - if (0 && !isFP && X86CC == X86::COND_B) { + if (X86CC == X86::COND_B) return DAG.getNode(ISD::AND, dl, MVT::i8, DAG.getNode(X86ISD::SETCC_CARRY, dl, MVT::i8, DAG.getConstant(X86CC, MVT::i8), Cond), DAG.getConstant(1, MVT::i8)); - } return DAG.getNode(X86ISD::SETCC, dl, MVT::i8, DAG.getConstant(X86CC, MVT::i8), Cond); @@ -9349,6 +9347,32 @@ } } +static SDValue PerformZExtCombine(SDNode *N, SelectionDAG &DAG) { + // (i32 zext (and (i8 x86isd::setcc_carry), 1)) -> + // (and (i32 x86isd::setcc_carry), 1) + // This eliminates the zext. This transformation is necessary because + // ISD::SETCC is always legalized to i8. + DebugLoc dl = N->getDebugLoc(); + SDValue N0 = N->getOperand(0); + EVT VT = N->getValueType(0); + if (N0.getOpcode() == ISD::AND && + N0.hasOneUse() && + N0.getOperand(0).hasOneUse()) { + SDValue N00 = N0.getOperand(0); + if (N00.getOpcode() != X86ISD::SETCC_CARRY) + return SDValue(); + ConstantSDNode *C = dyn_cast(N0.getOperand(1)); + if (!C || C->getZExtValue() != 1) + return SDValue(); + return DAG.getNode(ISD::AND, dl, VT, + DAG.getNode(X86ISD::SETCC_CARRY, dl, VT, + N00.getOperand(0), N00.getOperand(1)), + DAG.getConstant(1, VT)); + } + + return SDValue(); +} + SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; @@ -9368,6 +9392,7 @@ case X86ISD::BT: return PerformBTCombine(N, DAG, DCI); case X86ISD::VZEXT_MOVL: return PerformVZEXT_MOVLCombine(N, DAG); case ISD::MEMBARRIER: return PerformMEMBARRIERCombine(N, DAG); + case ISD::ZERO_EXTEND: return PerformZExtCombine(N, DAG); } return SDValue(); Modified: llvm/trunk/lib/Target/X86/X86Instr64bit.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86Instr64bit.td?rev=91489&r1=91488&r2=91489&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86Instr64bit.td (original) +++ llvm/trunk/lib/Target/X86/X86Instr64bit.td Tue Dec 15 18:53:11 2009 @@ -1337,9 +1337,9 @@ let Defs = [EFLAGS], Uses = [EFLAGS], isCodeGenOnly = 1 in def SETB_C64r : RI<0x19, MRMInitReg, (outs GR64:$dst), (ins), "sbb{q}\t$dst, $dst", - [(set GR64:$dst, (zext (X86setcc_c X86_COND_B, EFLAGS)))]>; + [(set GR64:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>; -def : Pat<(i64 (anyext (X86setcc_c X86_COND_B, EFLAGS))), +def : Pat<(i64 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))), (SETB_C64r)>; //===----------------------------------------------------------------------===// Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.td?rev=91489&r1=91488&r2=91489&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86InstrInfo.td (original) +++ llvm/trunk/lib/Target/X86/X86InstrInfo.td Tue Dec 15 18:53:11 2009 @@ -41,6 +41,9 @@ def SDTX86SetCC : SDTypeProfile<1, 2, [SDTCisVT<0, i8>, SDTCisVT<1, i8>, SDTCisVT<2, i32>]>; +def SDTX86SetCC_C : SDTypeProfile<1, 2, + [SDTCisInt<0>, + SDTCisVT<1, i8>, SDTCisVT<2, i32>]>; def SDTX86cas : SDTypeProfile<0, 3, [SDTCisPtrTy<0>, SDTCisInt<1>, SDTCisVT<2, i8>]>; @@ -87,7 +90,7 @@ def X86brcond : SDNode<"X86ISD::BRCOND", SDTX86BrCond, [SDNPHasChain]>; def X86setcc : SDNode<"X86ISD::SETCC", SDTX86SetCC>; -def X86setcc_c : SDNode<"X86ISD::SETCC_CARRY", SDTX86SetCC>; +def X86setcc_c : SDNode<"X86ISD::SETCC_CARRY", SDTX86SetCC_C>; def X86cas : SDNode<"X86ISD::LCMPXCHG_DAG", SDTX86cas, [SDNPHasChain, SDNPInFlag, SDNPOutFlag, SDNPMayStore, @@ -3068,11 +3071,11 @@ [(set GR8:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>; def SETB_C16r : I<0x19, MRMInitReg, (outs GR16:$dst), (ins), "sbb{w}\t$dst, $dst", - [(set GR16:$dst, (zext (X86setcc_c X86_COND_B, EFLAGS)))]>, + [(set GR16:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>, OpSize; def SETB_C32r : I<0x19, MRMInitReg, (outs GR32:$dst), (ins), "sbb{l}\t$dst, $dst", - [(set GR32:$dst, (zext (X86setcc_c X86_COND_B, EFLAGS)))]>; + [(set GR32:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>; } // isCodeGenOnly def SETEr : I<0x94, MRM0r, @@ -4185,10 +4188,10 @@ GR16:$src2, (i8 imm:$amt2)), addr:$dst), (SHLD16mri8 addr:$dst, GR16:$src2, (i8 imm:$amt1))>; -// (anyext (setcc_carry)) -> (zext (setcc_carry)) -def : Pat<(i16 (anyext (X86setcc_c X86_COND_B, EFLAGS))), +// (anyext (setcc_carry)) -> (setcc_carry) +def : Pat<(i16 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))), (SETB_C16r)>; -def : Pat<(i32 (anyext (X86setcc_c X86_COND_B, EFLAGS))), +def : Pat<(i32 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))), (SETB_C32r)>; //===----------------------------------------------------------------------===// Modified: llvm/trunk/test/CodeGen/X86/setcc.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/setcc.ll?rev=91489&r1=91488&r2=91489&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/X86/setcc.ll (original) +++ llvm/trunk/test/CodeGen/X86/setcc.ll Tue Dec 15 18:53:11 2009 @@ -1,5 +1,4 @@ ; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s -; XFAIL: * ; rdar://7329206 ; Use sbb x, x to materialize carry bit in a GPR. The value is either From dpatel at apple.com Tue Dec 15 20:11:38 2009 From: dpatel at apple.com (Devang Patel) Date: Wed, 16 Dec 2009 02:11:38 -0000 Subject: [llvm-commits] [llvm] r91495 - /llvm/trunk/test/DebugInfo/2009-12-01-CurrentFn.ll Message-ID: <200912160211.nBG2Bc9I032637@zion.cs.uiuc.edu> Author: dpatel Date: Tue Dec 15 20:11:38 2009 New Revision: 91495 URL: http://llvm.org/viewvc/llvm-project?rev=91495&view=rev Log: XFAIL on ppc-darwin. Modified: llvm/trunk/test/DebugInfo/2009-12-01-CurrentFn.ll Modified: llvm/trunk/test/DebugInfo/2009-12-01-CurrentFn.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/DebugInfo/2009-12-01-CurrentFn.ll?rev=91495&r1=91494&r2=91495&view=diff ============================================================================== --- llvm/trunk/test/DebugInfo/2009-12-01-CurrentFn.ll (original) +++ llvm/trunk/test/DebugInfo/2009-12-01-CurrentFn.ll Tue Dec 15 20:11:38 2009 @@ -1,5 +1,5 @@ ; RUN: llc < %s | grep "func_end1:" | count 1 - +; XFAIL: powerpc-apple-darwin declare void @foo() define void @bar(i32 %i) nounwind ssp { From johnny.chen at apple.com Tue Dec 15 20:32:54 2009 From: johnny.chen at apple.com (Johnny Chen) Date: Wed, 16 Dec 2009 02:32:54 -0000 Subject: [llvm-commits] [llvm] r91496 - in /llvm/trunk/lib/Target/ARM: ARMInstrFormats.td ARMInstrThumb.td ARMInstrThumb2.td Message-ID: <200912160232.nBG2WsJC001077@zion.cs.uiuc.edu> Author: johnny Date: Tue Dec 15 20:32:54 2009 New Revision: 91496 URL: http://llvm.org/viewvc/llvm-project?rev=91496&view=rev Log: Add encoding bits for some Thumb instructions. Plus explicitly set the top two bytes of Inst to 0x0000 for the benefit of the Thumb decoder. Modified: llvm/trunk/lib/Target/ARM/ARMInstrFormats.td llvm/trunk/lib/Target/ARM/ARMInstrThumb.td llvm/trunk/lib/Target/ARM/ARMInstrThumb2.td Modified: llvm/trunk/lib/Target/ARM/ARMInstrFormats.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrFormats.td?rev=91496&r1=91495&r2=91496&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMInstrFormats.td (original) +++ llvm/trunk/lib/Target/ARM/ARMInstrFormats.td Tue Dec 15 20:32:54 2009 @@ -920,8 +920,7 @@ : Thumb1I; class T1JTI pattern> - : Thumb1I, - Encoding; + : Thumb1I; // Two-address instructions class T1It pattern> : Thumb1pI; +class Encoding16 : Encoding { + let Inst{31-16} = 0x0000; +} + // A6.2 16-bit Thumb instruction encoding -class T1Encoding opcode> : Encoding { +class T1Encoding opcode> : Encoding16 { let Inst{15-10} = opcode; } // A6.2.1 Shift (immediate), add, subtract, move, and compare encoding. -class T1General opcode> : Encoding { +class T1General opcode> : Encoding16 { let Inst{15-14} = 0b00; let Inst{13-9} = opcode; } // A6.2.2 Data-processing encoding. -class T1DataProcessing opcode> : Encoding { +class T1DataProcessing opcode> : Encoding16 { let Inst{15-10} = 0b010000; let Inst{9-6} = opcode; } // A6.2.3 Special data instructions and branch and exchange encoding. -class T1Special opcode> : Encoding { +class T1Special opcode> : Encoding16 { let Inst{15-10} = 0b010001; let Inst{9-6} = opcode; } // A6.2.4 Load/store single data item encoding. -class T1LoadStore opA, bits<3> opB> : Encoding { +class T1LoadStore opA, bits<3> opB> : Encoding16 { let Inst{15-12} = opA; let Inst{11-9} = opB; } @@ -1021,7 +1024,7 @@ class T1LdStSP opB> : T1LoadStore<0b1001, opB>; // SP relative // A6.2.5 Miscellaneous 16-bit instructions encoding. -class T1Misc opcode> : Encoding { +class T1Misc opcode> : Encoding16 { let Inst{15-12} = 0b1011; let Inst{11-5} = opcode; } Modified: llvm/trunk/lib/Target/ARM/ARMInstrThumb.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrThumb.td?rev=91496&r1=91495&r2=91496&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMInstrThumb.td (original) +++ llvm/trunk/lib/Target/ARM/ARMInstrThumb.td Tue Dec 15 20:32:54 2009 @@ -306,7 +306,11 @@ def tBR_JTr : T1JTI<(outs), (ins tGPR:$target, jtblock_operand:$jt, i32imm:$id), IIC_Br, "mov\tpc, $target\n\t.align\t2\n$jt", - [(ARMbrjt tGPR:$target, tjumptable:$jt, imm:$id)]>; + [(ARMbrjt tGPR:$target, tjumptable:$jt, imm:$id)]>, + Encoding16 { + let Inst{15-7} = 0b010001101; + let Inst{2-0} = 0b111; + } } } @@ -596,7 +600,7 @@ T1Special<0b1000>; let Defs = [CPSR] in def tMOVSr : T1I<(outs tGPR:$dst), (ins tGPR:$src), IIC_iMOVr, - "movs\t$dst, $src", []>, Encoding { + "movs\t$dst, $src", []>, Encoding16 { let Inst{15-6} = 0b0000000000; } Modified: llvm/trunk/lib/Target/ARM/ARMInstrThumb2.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrThumb2.td?rev=91496&r1=91495&r2=91496&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMInstrThumb2.td (original) +++ llvm/trunk/lib/Target/ARM/ARMInstrThumb2.td Tue Dec 15 20:32:54 2009 @@ -1911,6 +1911,7 @@ AddrModeNone, Size2Bytes, IIC_iALUx, "it$mask\t$cc", "", []> { // 16-bit instruction. + let Inst{31-16} = 0x0000; let Inst{15-8} = 0b10111111; } From vhernandez at apple.com Tue Dec 15 20:52:09 2009 From: vhernandez at apple.com (Victor Hernandez) Date: Wed, 16 Dec 2009 02:52:09 -0000 Subject: [llvm-commits] [llvm] r91497 - in /llvm/trunk: include/llvm/Metadata.h lib/VMCore/Metadata.cpp Message-ID: <200912160252.nBG2q9FI001712@zion.cs.uiuc.edu> Author: hernande Date: Tue Dec 15 20:52:09 2009 New Revision: 91497 URL: http://llvm.org/viewvc/llvm-project?rev=91497&view=rev Log: MDNodes that refer to an instruction are local to a function; in that case, explicitly keep track of the function they are local to Modified: llvm/trunk/include/llvm/Metadata.h llvm/trunk/lib/VMCore/Metadata.cpp Modified: llvm/trunk/include/llvm/Metadata.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Metadata.h?rev=91497&r1=91496&r2=91497&view=diff ============================================================================== --- llvm/trunk/include/llvm/Metadata.h (original) +++ llvm/trunk/include/llvm/Metadata.h Tue Dec 15 20:52:09 2009 @@ -111,13 +111,16 @@ ElementVH *Node; unsigned NodeSize; + Function *LocalFunction; protected: - explicit MDNode(LLVMContext &C, Value *const *Vals, unsigned NumVals); + explicit MDNode(LLVMContext &C, Value *const *Vals, unsigned NumVals, + Function *LocalFunction = NULL); public: // Constructors and destructors. static MDNode *get(LLVMContext &Context, - Value *const *Vals, unsigned NumVals); + Value *const *Vals, unsigned NumVals, + Function *LocalFunction = NULL); /// ~MDNode - Destroy MDNode. ~MDNode(); @@ -130,6 +133,9 @@ /// getNumElements - Return number of MDNode elements. unsigned getNumElements() const { return NodeSize; } + + /// isFunctionLocal - Return whether MDNode is local to a function. + bool isFunctionLocal() const { return LocalFunction; } /// Profile - calculate a unique identifier for this MDNode to collapse /// duplicates Modified: llvm/trunk/lib/VMCore/Metadata.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/VMCore/Metadata.cpp?rev=91497&r1=91496&r2=91497&view=diff ============================================================================== --- llvm/trunk/lib/VMCore/Metadata.cpp (original) +++ llvm/trunk/lib/VMCore/Metadata.cpp Tue Dec 15 20:52:09 2009 @@ -49,13 +49,15 @@ //===----------------------------------------------------------------------===// // MDNode implementation. // -MDNode::MDNode(LLVMContext &C, Value *const *Vals, unsigned NumVals) +MDNode::MDNode(LLVMContext &C, Value *const *Vals, unsigned NumVals, + Function *LocalFunction) : MetadataBase(Type::getMetadataTy(C), Value::MDNodeVal) { NodeSize = NumVals; Node = new ElementVH[NodeSize]; ElementVH *Ptr = Node; for (unsigned i = 0; i != NumVals; ++i) *Ptr++ = ElementVH(Vals[i], this); + LocalFunction = LocalFunction; } void MDNode::Profile(FoldingSetNodeID &ID) const { @@ -63,17 +65,20 @@ ID.AddPointer(getElement(i)); } -MDNode *MDNode::get(LLVMContext &Context, Value*const* Vals, unsigned NumVals) { +MDNode *MDNode::get(LLVMContext &Context, Value*const* Vals, unsigned NumVals, + Function *LocalFunction) { LLVMContextImpl *pImpl = Context.pImpl; FoldingSetNodeID ID; for (unsigned i = 0; i != NumVals; ++i) ID.AddPointer(Vals[i]); + if (LocalFunction) + ID.AddPointer(LocalFunction); void *InsertPoint; MDNode *N = pImpl->MDNodeSet.FindNodeOrInsertPos(ID, InsertPoint); if (!N) { // InsertPoint will have been set by the FindNodeOrInsertPos call. - N = new MDNode(Context, Vals, NumVals); + N = new MDNode(Context, Vals, NumVals, LocalFunction); pImpl->MDNodeSet.InsertNode(N, InsertPoint); } return N; From nicholas at mxc.ca Wed Dec 16 00:47:45 2009 From: nicholas at mxc.ca (Nick Lewycky) Date: Tue, 15 Dec 2009 22:47:45 -0800 Subject: [llvm-commits] [llvm] r91497 - in /llvm/trunk: include/llvm/Metadata.h lib/VMCore/Metadata.cpp In-Reply-To: <200912160252.nBG2q9FI001712@zion.cs.uiuc.edu> References: <200912160252.nBG2q9FI001712@zion.cs.uiuc.edu> Message-ID: <4B288291.3090704@mxc.ca> Victor Hernandez wrote: > Author: hernande > Date: Tue Dec 15 20:52:09 2009 > New Revision: 91497 > > URL: http://llvm.org/viewvc/llvm-project?rev=91497&view=rev > Log: > MDNodes that refer to an instruction are local to a function; in that case, explicitly keep track of the function they are local to So what's the semantic here? What if the MDNode refers to an Instruction that gets spliced from one Function to another? What happens if the MDNode is attached to an Instruction that's RAUW'd with a Constant? Is this new field copied by MetadataContextImpl::copyMD? You also need to assert that any Instructions passed in also belong to LocalFunction. > Modified: > llvm/trunk/include/llvm/Metadata.h > llvm/trunk/lib/VMCore/Metadata.cpp > > Modified: llvm/trunk/include/llvm/Metadata.h > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Metadata.h?rev=91497&r1=91496&r2=91497&view=diff > > ============================================================================== > --- llvm/trunk/include/llvm/Metadata.h (original) > +++ llvm/trunk/include/llvm/Metadata.h Tue Dec 15 20:52:09 2009 > @@ -111,13 +111,16 @@ > > ElementVH *Node; > unsigned NodeSize; > + Function *LocalFunction; > > protected: > - explicit MDNode(LLVMContext&C, Value *const *Vals, unsigned NumVals); > + explicit MDNode(LLVMContext&C, Value *const *Vals, unsigned NumVals, > + Function *LocalFunction = NULL); > public: > // Constructors and destructors. > static MDNode *get(LLVMContext&Context, > - Value *const *Vals, unsigned NumVals); > + Value *const *Vals, unsigned NumVals, > + Function *LocalFunction = NULL); > > /// ~MDNode - Destroy MDNode. > ~MDNode(); > @@ -130,6 +133,9 @@ > > /// getNumElements - Return number of MDNode elements. > unsigned getNumElements() const { return NodeSize; } > + > + /// isFunctionLocal - Return whether MDNode is local to a function. > + bool isFunctionLocal() const { return LocalFunction; } > > /// Profile - calculate a unique identifier for this MDNode to collapse > /// duplicates > > Modified: llvm/trunk/lib/VMCore/Metadata.cpp > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/VMCore/Metadata.cpp?rev=91497&r1=91496&r2=91497&view=diff > > ============================================================================== > --- llvm/trunk/lib/VMCore/Metadata.cpp (original) > +++ llvm/trunk/lib/VMCore/Metadata.cpp Tue Dec 15 20:52:09 2009 > @@ -49,13 +49,15 @@ > //===----------------------------------------------------------------------===// > // MDNode implementation. > // > -MDNode::MDNode(LLVMContext&C, Value *const *Vals, unsigned NumVals) > +MDNode::MDNode(LLVMContext&C, Value *const *Vals, unsigned NumVals, > + Function *LocalFunction) > : MetadataBase(Type::getMetadataTy(C), Value::MDNodeVal) { > NodeSize = NumVals; > Node = new ElementVH[NodeSize]; > ElementVH *Ptr = Node; > for (unsigned i = 0; i != NumVals; ++i) > *Ptr++ = ElementVH(Vals[i], this); > + LocalFunction = LocalFunction; Perhaps you meant this->LocalFunction = LocalFunction? Nick > } > > void MDNode::Profile(FoldingSetNodeID&ID) const { > @@ -63,17 +65,20 @@ > ID.AddPointer(getElement(i)); > } > > -MDNode *MDNode::get(LLVMContext&Context, Value*const* Vals, unsigned NumVals) { > +MDNode *MDNode::get(LLVMContext&Context, Value*const* Vals, unsigned NumVals, > + Function *LocalFunction) { > LLVMContextImpl *pImpl = Context.pImpl; > FoldingSetNodeID ID; > for (unsigned i = 0; i != NumVals; ++i) > ID.AddPointer(Vals[i]); > + if (LocalFunction) > + ID.AddPointer(LocalFunction); > > void *InsertPoint; > MDNode *N = pImpl->MDNodeSet.FindNodeOrInsertPos(ID, InsertPoint); > if (!N) { > // InsertPoint will have been set by the FindNodeOrInsertPos call. > - N = new MDNode(Context, Vals, NumVals); > + N = new MDNode(Context, Vals, NumVals, LocalFunction); > pImpl->MDNodeSet.InsertNode(N, InsertPoint); > } > return N; > > > _______________________________________________ > llvm-commits mailing list > llvm-commits at cs.uiuc.edu > http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits > From sabre at nondot.org Wed Dec 16 00:55:45 2009 From: sabre at nondot.org (Chris Lattner) Date: Wed, 16 Dec 2009 06:55:45 -0000 Subject: [llvm-commits] [llvm] r91518 - /llvm/trunk/include/llvm/ADT/SmallVector.h Message-ID: <200912160655.nBG6tjcZ009938@zion.cs.uiuc.edu> Author: lattner Date: Wed Dec 16 00:55:45 2009 New Revision: 91518 URL: http://llvm.org/viewvc/llvm-project?rev=91518&view=rev Log: substantial refactoring of SmallVector, now most code is in SmallVectorTemplateCommon, and there is a new SmallVectorTemplateBase class in between it and SmallVectorImpl. SmallVectorTemplateBase can be specialized based on isPodLike. Modified: llvm/trunk/include/llvm/ADT/SmallVector.h Modified: llvm/trunk/include/llvm/ADT/SmallVector.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/ADT/SmallVector.h?rev=91518&r1=91517&r2=91518&view=diff ============================================================================== --- llvm/trunk/include/llvm/ADT/SmallVector.h (original) +++ llvm/trunk/include/llvm/ADT/SmallVector.h Wed Dec 16 00:55:45 2009 @@ -80,55 +80,49 @@ return BeginX == static_cast(&FirstEl); } - public: bool empty() const { return BeginX == EndX; } }; - -/// SmallVectorImpl - This class consists of common code factored out of the -/// SmallVector class to reduce code duplication based on the SmallVector 'N' -/// template parameter. + template -class SmallVectorImpl : public SmallVectorBase { - void setEnd(T *P) { EndX = P; } +class SmallVectorTemplateCommon : public SmallVectorBase { + void setEnd(T *P) { this->EndX = P; } public: - // Default ctor - Initialize to empty. - explicit SmallVectorImpl(unsigned N) : SmallVectorBase(N*sizeof(T)) { - } - - ~SmallVectorImpl() { + SmallVectorTemplateCommon(size_t Size) : SmallVectorBase(Size) {} + + ~SmallVectorTemplateCommon() { // Destroy the constructed elements in the vector. destroy_range(begin(), end()); - + // If this wasn't grown from the inline copy, deallocate the old space. - if (!isSmall()) + if (!this->isSmall()) operator delete(begin()); } - + typedef size_t size_type; typedef ptrdiff_t difference_type; typedef T value_type; typedef T *iterator; typedef const T *const_iterator; - + typedef std::reverse_iterator const_reverse_iterator; typedef std::reverse_iterator reverse_iterator; - + typedef T &reference; typedef const T &const_reference; typedef T *pointer; typedef const T *const_pointer; - + // forward iterator creation methods. - iterator begin() { return (iterator)BeginX; } - const_iterator begin() const { return (const_iterator)BeginX; } - iterator end() { return (iterator)EndX; } - const_iterator end() const { return (const_iterator)EndX; } + iterator begin() { return (iterator)this->BeginX; } + const_iterator begin() const { return (const_iterator)this->BeginX; } + iterator end() { return (iterator)this->EndX; } + const_iterator end() const { return (const_iterator)this->EndX; } private: - iterator capacity_ptr() { return (iterator)CapacityX; } - const_iterator capacity_ptr() const { return (const_iterator)CapacityX; } + iterator capacity_ptr() { return (iterator)this->CapacityX; } + const_iterator capacity_ptr() const { return (const_iterator)this->CapacityX;} public: - + // reverse iterator creation methods. reverse_iterator rbegin() { return reverse_iterator(end()); } const_reverse_iterator rbegin() const{ return const_reverse_iterator(end()); } @@ -171,7 +165,7 @@ } void push_back(const_reference Elt) { - if (EndX < CapacityX) { + if (this->EndX < this->CapacityX) { Retry: new (end()) T(Elt); setEnd(end()+1); @@ -194,7 +188,7 @@ void clear() { destroy_range(begin(), end()); - EndX = BeginX; + this->EndX = this->BeginX; } void resize(unsigned N) { @@ -226,7 +220,7 @@ grow(N); } - void swap(SmallVectorImpl &RHS); + void swap(SmallVectorTemplateCommon &RHS); /// append - Add the specified range to the end of the SmallVector. /// @@ -238,6 +232,8 @@ grow(size()+NumInputs); // Copy the new elements over. + // TODO: NEED To compile time dispatch on whether in_iter is a random access + // iterator to use the fast uninitialized_copy. std::uninitialized_copy(in_start, in_end, end()); setEnd(end() + NumInputs); } @@ -287,7 +283,7 @@ return end()-1; } - if (EndX < CapacityX) { + if (this->EndX < this->CapacityX) { Retry: new (end()) T(back()); setEnd(end()+1); @@ -339,7 +335,7 @@ T *OldEnd = end(); setEnd(end() + NumToInsert); size_t NumOverwritten = OldEnd-I; - std::uninitialized_copy(I, OldEnd, end()-NumOverwritten); + uninitialized_copy(I, OldEnd, end()-NumOverwritten); // Replace the overwritten part. std::fill_n(I, NumOverwritten, Elt); @@ -388,25 +384,28 @@ T *OldEnd = end(); setEnd(end() + NumToInsert); size_t NumOverwritten = OldEnd-I; - std::uninitialized_copy(I, OldEnd, end()-NumOverwritten); + uninitialized_copy(I, OldEnd, end()-NumOverwritten); // Replace the overwritten part. std::copy(From, From+NumOverwritten, I); // Insert the non-overwritten middle part. - std::uninitialized_copy(From+NumOverwritten, To, OldEnd); + uninitialized_copy(From+NumOverwritten, To, OldEnd); return I; } - const SmallVectorImpl &operator=(const SmallVectorImpl &RHS); + const SmallVectorTemplateCommon + &operator=(const SmallVectorTemplateCommon &RHS); - bool operator==(const SmallVectorImpl &RHS) const { + bool operator==(const SmallVectorTemplateCommon &RHS) const { if (size() != RHS.size()) return false; return std::equal(begin(), end(), RHS.begin()); } - bool operator!=(const SmallVectorImpl &RHS) const { return !(*this == RHS); } + bool operator!=(const SmallVectorTemplateCommon &RHS) const { + return !(*this == RHS); + } - bool operator<(const SmallVectorImpl &RHS) const { + bool operator<(const SmallVectorTemplateCommon &RHS) const { return std::lexicographical_compare(begin(), end(), RHS.begin(), RHS.end()); } @@ -430,12 +429,12 @@ /// least one more element or MinSize if specified. void grow(size_type MinSize = 0); - void construct_range(T *S, T *E, const T &Elt) { + static void construct_range(T *S, T *E, const T &Elt) { for (; S != E; ++S) new (S) T(Elt); } - void destroy_range(T *S, T *E) { + static void destroy_range(T *S, T *E) { // No need to do a destroy loop for POD's. if (isPodLike::value) return; @@ -444,11 +443,23 @@ E->~T(); } } + + /// uninitialized_copy - Copy the range [I, E) onto the uninitialized memory + /// starting with "Dest", constructing elements into it as needed. + template + static void uninitialized_copy(It1 I, It1 E, It2 Dest) { + // Use memcpy for PODs: std::uninitialized_copy optimizes to memmove. + if (isPodLike::value) + memcpy(&*Dest, &*I, (E-I)*sizeof(T)); + else + std::uninitialized_copy(I, E, Dest); + } }; + // Define this out-of-line to dissuade the C++ compiler from inlining it. template -void SmallVectorImpl::grow(size_t MinSize) { +void SmallVectorTemplateCommon::grow(size_t MinSize) { size_t CurCapacity = capacity(); size_t CurSize = size(); size_t NewCapacity = 2*CurCapacity; @@ -457,33 +468,29 @@ T *NewElts = static_cast(operator new(NewCapacity*sizeof(T))); // Copy the elements over. - if (isPodLike::value) - // Use memcpy for PODs: std::uninitialized_copy optimizes to memmove. - memcpy(NewElts, begin(), CurSize * sizeof(T)); - else - std::uninitialized_copy(begin(), end(), NewElts); + uninitialized_copy(begin(), end(), NewElts); // Destroy the original elements. destroy_range(begin(), end()); // If this wasn't grown from the inline copy, deallocate the old space. - if (!isSmall()) + if (!this->isSmall()) operator delete(begin()); setEnd(NewElts+CurSize); - BeginX = NewElts; - CapacityX = begin()+NewCapacity; + this->BeginX = NewElts; + this->CapacityX = begin()+NewCapacity; } template -void SmallVectorImpl::swap(SmallVectorImpl &RHS) { +void SmallVectorTemplateCommon::swap(SmallVectorTemplateCommon &RHS) { if (this == &RHS) return; // We can only avoid copying elements if neither vector is small. - if (!isSmall() && !RHS.isSmall()) { - std::swap(BeginX, RHS.BeginX); - std::swap(EndX, RHS.EndX); - std::swap(CapacityX, RHS.CapacityX); + if (!this->isSmall() && !RHS.isSmall()) { + std::swap(this->BeginX, RHS.BeginX); + std::swap(this->EndX, RHS.EndX); + std::swap(this->CapacityX, RHS.CapacityX); return; } if (RHS.size() > capacity()) @@ -500,13 +507,13 @@ // Copy over the extra elts. if (size() > RHS.size()) { size_t EltDiff = size() - RHS.size(); - std::uninitialized_copy(begin()+NumShared, end(), RHS.end()); + uninitialized_copy(begin()+NumShared, end(), RHS.end()); RHS.setEnd(RHS.end()+EltDiff); destroy_range(begin()+NumShared, end()); setEnd(begin()+NumShared); } else if (RHS.size() > size()) { size_t EltDiff = RHS.size() - size(); - std::uninitialized_copy(RHS.begin()+NumShared, RHS.end(), end()); + uninitialized_copy(RHS.begin()+NumShared, RHS.end(), end()); setEnd(end() + EltDiff); destroy_range(RHS.begin()+NumShared, RHS.end()); RHS.setEnd(RHS.begin()+NumShared); @@ -514,8 +521,9 @@ } template -const SmallVectorImpl & -SmallVectorImpl::operator=(const SmallVectorImpl &RHS) { +const SmallVectorTemplateCommon & +SmallVectorTemplateCommon:: + operator=(const SmallVectorTemplateCommon &RHS) { // Avoid self-assignment. if (this == &RHS) return *this; @@ -553,13 +561,41 @@ } // Copy construct the new elements in place. - std::uninitialized_copy(RHS.begin()+CurSize, RHS.end(), begin()+CurSize); + uninitialized_copy(RHS.begin()+CurSize, RHS.end(), begin()+CurSize); // Set end. setEnd(begin()+RHSSize); return *this; } + +template +class SmallVectorTemplateBase : public SmallVectorTemplateCommon { +public: + SmallVectorTemplateBase(size_t Size) : SmallVectorTemplateCommon(Size) {} + +}; + +template +class SmallVectorTemplateBase : public SmallVectorTemplateCommon { +public: + SmallVectorTemplateBase(size_t Size) : SmallVectorTemplateCommon(Size) {} + +}; + + +/// SmallVectorImpl - This class consists of common code factored out of the +/// SmallVector class to reduce code duplication based on the SmallVector 'N' +/// template parameter. +template +class SmallVectorImpl : public SmallVectorTemplateBase::value> { +public: + // Default ctor - Initialize to empty. + explicit SmallVectorImpl(unsigned N) + : SmallVectorTemplateBase::value>(N*sizeof(T)) { + } +}; + /// SmallVector - This is a 'vector' (really, a variable-sized array), optimized /// for the case when the array is small. It contains some number of elements /// in-place, which allows it to avoid heap allocation when the actual number of From nicholas at mxc.ca Wed Dec 16 01:35:25 2009 From: nicholas at mxc.ca (Nick Lewycky) Date: Wed, 16 Dec 2009 07:35:25 -0000 Subject: [llvm-commits] [llvm] r91521 - /llvm/trunk/test/CodeGen/Thumb2/large-stack.ll Message-ID: <200912160735.nBG7ZPtx011326@zion.cs.uiuc.edu> Author: nicholas Date: Wed Dec 16 01:35:25 2009 New Revision: 91521 URL: http://llvm.org/viewvc/llvm-project?rev=91521&view=rev Log: Make this test pass on Linux. Modified: llvm/trunk/test/CodeGen/Thumb2/large-stack.ll Modified: llvm/trunk/test/CodeGen/Thumb2/large-stack.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Thumb2/large-stack.ll?rev=91521&r1=91520&r2=91521&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/Thumb2/large-stack.ll (original) +++ llvm/trunk/test/CodeGen/Thumb2/large-stack.ll Wed Dec 16 01:35:25 2009 @@ -1,24 +1,35 @@ -; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s +; RUN: llc < %s -march=thumb -mattr=+thumb2 -mtriple=arm-apple-darwin | FileCheck %s -check-prefix=DARWIN +; RUN: llc < %s -march=thumb -mattr=+thumb2 -mtriple=arm-linux-gnueabi | FileCheck %s -check-prefix=LINUX define void @test1() { -; CHECK: test1: -; CHECK: sub sp, #256 +; DARWIN: test1: +; DARWIN: sub sp, #256 +; LINUX: test1: +; LINUX: sub sp, #256 %tmp = alloca [ 64 x i32 ] , align 4 ret void } define void @test2() { -; CHECK: test2: -; CHECK: sub.w sp, sp, #4160 -; CHECK: sub sp, #8 +; DARWIN: test2: +; DARWIN: sub.w sp, sp, #4160 +; DARWIN: sub sp, #8 +; LINUX: test2: +; LINUX: sub.w sp, sp, #4160 +; LINUX: sub sp, #8 %tmp = alloca [ 4168 x i8 ] , align 4 ret void } define i32 @test3() { -; CHECK: test3: -; CHECK: sub.w sp, sp, #805306368 -; CHECK: sub sp, #20 +; DARWIN: test3: +; DARWIN: push {r4, r7, lr} +; DARWIN: sub.w sp, sp, #805306368 +; DARWIN: sub sp, #20 +; LINUX: test3: +; LINUX: stmfd sp!, {r4, r7, r11, lr} +; LINUX: sub.w sp, sp, #805306368 +; LINUX: sub sp, #16 %retval = alloca i32, align 4 %tmp = alloca i32, align 4 %a = alloca [805306369 x i8], align 16 From sabre at nondot.org Wed Dec 16 02:05:48 2009 From: sabre at nondot.org (Chris Lattner) Date: Wed, 16 Dec 2009 08:05:48 -0000 Subject: [llvm-commits] [llvm] r91522 - /llvm/trunk/include/llvm/ADT/SmallVector.h Message-ID: <200912160805.nBG85mgl012563@zion.cs.uiuc.edu> Author: lattner Date: Wed Dec 16 02:05:48 2009 New Revision: 91522 URL: http://llvm.org/viewvc/llvm-project?rev=91522&view=rev Log: sink most of the meat in smallvector back from SmallVectorTemplateCommon down into SmallVectorImpl. This requires sprinking a ton of this->'s in, but gives us a place to factor. Modified: llvm/trunk/include/llvm/ADT/SmallVector.h Modified: llvm/trunk/include/llvm/ADT/SmallVector.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/ADT/SmallVector.h?rev=91522&r1=91521&r2=91522&view=diff ============================================================================== --- llvm/trunk/include/llvm/ADT/SmallVector.h (original) +++ llvm/trunk/include/llvm/ADT/SmallVector.h Wed Dec 16 02:05:48 2009 @@ -86,19 +86,11 @@ template class SmallVectorTemplateCommon : public SmallVectorBase { +protected: void setEnd(T *P) { this->EndX = P; } public: SmallVectorTemplateCommon(size_t Size) : SmallVectorBase(Size) {} - ~SmallVectorTemplateCommon() { - // Destroy the constructed elements in the vector. - destroy_range(begin(), end()); - - // If this wasn't grown from the inline copy, deallocate the old space. - if (!this->isSmall()) - operator delete(begin()); - } - typedef size_t size_type; typedef ptrdiff_t difference_type; typedef T value_type; @@ -118,7 +110,7 @@ const_iterator begin() const { return (const_iterator)this->BeginX; } iterator end() { return (iterator)this->EndX; } const_iterator end() const { return (const_iterator)this->EndX; } -private: +protected: iterator capacity_ptr() { return (iterator)this->CapacityX; } const_iterator capacity_ptr() const { return (const_iterator)this->CapacityX;} public: @@ -163,253 +155,297 @@ const_reference back() const { return end()[-1]; } +}; + + +template +class SmallVectorTemplateBase : public SmallVectorTemplateCommon { +public: + SmallVectorTemplateBase(size_t Size) : SmallVectorTemplateCommon(Size) {} - void push_back(const_reference Elt) { - if (this->EndX < this->CapacityX) { - Retry: - new (end()) T(Elt); - setEnd(end()+1); - return; - } - grow(); - goto Retry; - } +}; - void pop_back() { - setEnd(end()-1); - end()->~T(); +template +class SmallVectorTemplateBase : public SmallVectorTemplateCommon { +public: + SmallVectorTemplateBase(size_t Size) : SmallVectorTemplateCommon(Size) {} + +}; + + +/// SmallVectorImpl - This class consists of common code factored out of the +/// SmallVector class to reduce code duplication based on the SmallVector 'N' +/// template parameter. +template +class SmallVectorImpl : public SmallVectorTemplateBase::value> { +public: + typedef typename SmallVectorTemplateBase::value >::iterator + iterator; + typedef typename SmallVectorTemplateBase::value >::size_type + size_type; + + // Default ctor - Initialize to empty. + explicit SmallVectorImpl(unsigned N) + : SmallVectorTemplateBase::value>(N*sizeof(T)) { } - - T pop_back_val() { - T Result = back(); - pop_back(); - return Result; + + ~SmallVectorImpl() { + // Destroy the constructed elements in the vector. + destroy_range(this->begin(), this->end()); + + // If this wasn't grown from the inline copy, deallocate the old space. + if (!this->isSmall()) + operator delete(this->begin()); } - + + void clear() { - destroy_range(begin(), end()); + destroy_range(this->begin(), this->end()); this->EndX = this->BeginX; } void resize(unsigned N) { - if (N < size()) { - destroy_range(begin()+N, end()); - setEnd(begin()+N); - } else if (N > size()) { - if (capacity() < N) + if (N < this->size()) { + this->destroy_range(this->begin()+N, this->end()); + this->setEnd(this->begin()+N); + } else if (N > this->size()) { + if (this->capacity() < N) grow(N); - construct_range(end(), begin()+N, T()); - setEnd(begin()+N); + this->construct_range(this->end(), this->begin()+N, T()); + this->setEnd(this->begin()+N); } } void resize(unsigned N, const T &NV) { - if (N < size()) { - destroy_range(begin()+N, end()); - setEnd(begin()+N); - } else if (N > size()) { - if (capacity() < N) + if (N < this->size()) { + destroy_range(this->begin()+N, this->end()); + setEnd(this->begin()+N); + } else if (N > this->size()) { + if (this->capacity() < N) grow(N); - construct_range(end(), begin()+N, NV); - setEnd(begin()+N); + construct_range(this->end(), this->begin()+N, NV); + setEnd(this->begin()+N); } } void reserve(unsigned N) { - if (capacity() < N) + if (this->capacity() < N) grow(N); } - - void swap(SmallVectorTemplateCommon &RHS); - + + void push_back(const T &Elt) { + if (this->EndX < this->CapacityX) { + Retry: + new (this->end()) T(Elt); + setEnd(this->end()+1); + return; + } + this->grow(); + goto Retry; + } + + void pop_back() { + setEnd(this->end()-1); + this->end()->~T(); + } + + T pop_back_val() { + T Result = this->back(); + pop_back(); + return Result; + } + + + void swap(SmallVectorImpl &RHS); + /// append - Add the specified range to the end of the SmallVector. /// template void append(in_iter in_start, in_iter in_end) { size_type NumInputs = std::distance(in_start, in_end); // Grow allocated space if needed. - if (NumInputs > size_type(capacity_ptr()-end())) - grow(size()+NumInputs); - + if (NumInputs > size_type(this->capacity_ptr()-this->end())) + grow(this->size()+NumInputs); + // Copy the new elements over. // TODO: NEED To compile time dispatch on whether in_iter is a random access // iterator to use the fast uninitialized_copy. - std::uninitialized_copy(in_start, in_end, end()); - setEnd(end() + NumInputs); + std::uninitialized_copy(in_start, in_end, this->end()); + setEnd(this->end() + NumInputs); } - + /// append - Add the specified range to the end of the SmallVector. /// void append(size_type NumInputs, const T &Elt) { // Grow allocated space if needed. - if (NumInputs > size_type(capacity_ptr()-end())) - grow(size()+NumInputs); - + if (NumInputs > size_type(this->capacity_ptr()-this->end())) + grow(this->size()+NumInputs); + // Copy the new elements over. - std::uninitialized_fill_n(end(), NumInputs, Elt); - setEnd(end() + NumInputs); + std::uninitialized_fill_n(this->end(), NumInputs, Elt); + setEnd(this->end() + NumInputs); } - + void assign(unsigned NumElts, const T &Elt) { clear(); - if (capacity() < NumElts) + if (this->capacity() < NumElts) grow(NumElts); - setEnd(begin()+NumElts); - construct_range(begin(), end(), Elt); + setEnd(this->begin()+NumElts); + construct_range(this->begin(), this->end(), Elt); } - + iterator erase(iterator I) { iterator N = I; // Shift all elts down one. - std::copy(I+1, end(), I); + std::copy(I+1, this->end(), I); // Drop the last elt. pop_back(); return(N); } - + iterator erase(iterator S, iterator E) { iterator N = S; // Shift all elts down. - iterator I = std::copy(E, end(), S); + iterator I = std::copy(E, this->end(), S); // Drop the last elts. - destroy_range(I, end()); + destroy_range(I, this->end()); setEnd(I); return(N); } - + iterator insert(iterator I, const T &Elt) { - if (I == end()) { // Important special case for empty vector. + if (I == this->end()) { // Important special case for empty vector. push_back(Elt); - return end()-1; + return this->end()-1; } - + if (this->EndX < this->CapacityX) { - Retry: - new (end()) T(back()); - setEnd(end()+1); + Retry: + new (this->end()) T(this->back()); + this->setEnd(this->end()+1); // Push everything else over. - std::copy_backward(I, end()-1, end()); + std::copy_backward(I, this->end()-1, this->end()); *I = Elt; return I; } - size_t EltNo = I-begin(); - grow(); - I = begin()+EltNo; + size_t EltNo = I-this->begin(); + this->grow(); + I = this->begin()+EltNo; goto Retry; } - + iterator insert(iterator I, size_type NumToInsert, const T &Elt) { - if (I == end()) { // Important special case for empty vector. + if (I == this->end()) { // Important special case for empty vector. append(NumToInsert, Elt); - return end()-1; + return this->end()-1; } - + // Convert iterator to elt# to avoid invalidating iterator when we reserve() - size_t InsertElt = I-begin(); - + size_t InsertElt = I - this->begin(); + // Ensure there is enough space. - reserve(static_cast(size() + NumToInsert)); - + reserve(static_cast(this->size() + NumToInsert)); + // Uninvalidate the iterator. - I = begin()+InsertElt; - + I = this->begin()+InsertElt; + // If there are more elements between the insertion point and the end of the // range than there are being inserted, we can use a simple approach to // insertion. Since we already reserved space, we know that this won't // reallocate the vector. - if (size_t(end()-I) >= NumToInsert) { - T *OldEnd = end(); - append(end()-NumToInsert, end()); - + if (size_t(this->end()-I) >= NumToInsert) { + T *OldEnd = this->end(); + append(this->end()-NumToInsert, this->end()); + // Copy the existing elements that get replaced. std::copy_backward(I, OldEnd-NumToInsert, OldEnd); - + std::fill_n(I, NumToInsert, Elt); return I; } - + // Otherwise, we're inserting more elements than exist already, and we're // not inserting at the end. - + // Copy over the elements that we're about to overwrite. - T *OldEnd = end(); - setEnd(end() + NumToInsert); + T *OldEnd = this->end(); + setEnd(this->end() + NumToInsert); size_t NumOverwritten = OldEnd-I; - uninitialized_copy(I, OldEnd, end()-NumOverwritten); - + uninitialized_copy(I, OldEnd, this->end()-NumOverwritten); + // Replace the overwritten part. std::fill_n(I, NumOverwritten, Elt); - + // Insert the non-overwritten middle part. std::uninitialized_fill_n(OldEnd, NumToInsert-NumOverwritten, Elt); return I; } - + template iterator insert(iterator I, ItTy From, ItTy To) { - if (I == end()) { // Important special case for empty vector. + if (I == this->end()) { // Important special case for empty vector. append(From, To); - return end()-1; + return this->end()-1; } - + size_t NumToInsert = std::distance(From, To); // Convert iterator to elt# to avoid invalidating iterator when we reserve() - size_t InsertElt = I-begin(); - + size_t InsertElt = I - this->begin(); + // Ensure there is enough space. - reserve(static_cast(size() + NumToInsert)); - + reserve(static_cast(this->size() + NumToInsert)); + // Uninvalidate the iterator. - I = begin()+InsertElt; - + I = this->begin()+InsertElt; + // If there are more elements between the insertion point and the end of the // range than there are being inserted, we can use a simple approach to // insertion. Since we already reserved space, we know that this won't // reallocate the vector. - if (size_t(end()-I) >= NumToInsert) { - T *OldEnd = end(); - append(end()-NumToInsert, end()); - + if (size_t(this->end()-I) >= NumToInsert) { + T *OldEnd = this->end(); + append(this->end()-NumToInsert, this->end()); + // Copy the existing elements that get replaced. std::copy_backward(I, OldEnd-NumToInsert, OldEnd); - + std::copy(From, To, I); return I; } - + // Otherwise, we're inserting more elements than exist already, and we're // not inserting at the end. - + // Copy over the elements that we're about to overwrite. - T *OldEnd = end(); - setEnd(end() + NumToInsert); + T *OldEnd = this->end(); + setEnd(this->end() + NumToInsert); size_t NumOverwritten = OldEnd-I; - uninitialized_copy(I, OldEnd, end()-NumOverwritten); - + uninitialized_copy(I, OldEnd, this->end()-NumOverwritten); + // Replace the overwritten part. std::copy(From, From+NumOverwritten, I); - + // Insert the non-overwritten middle part. uninitialized_copy(From+NumOverwritten, To, OldEnd); return I; } - - const SmallVectorTemplateCommon - &operator=(const SmallVectorTemplateCommon &RHS); - - bool operator==(const SmallVectorTemplateCommon &RHS) const { - if (size() != RHS.size()) return false; - return std::equal(begin(), end(), RHS.begin()); + + const SmallVectorImpl + &operator=(const SmallVectorImpl &RHS); + + bool operator==(const SmallVectorImpl &RHS) const { + if (this->size() != RHS.size()) return false; + return std::equal(this->begin(), this->end(), RHS.begin()); } - bool operator!=(const SmallVectorTemplateCommon &RHS) const { + bool operator!=(const SmallVectorImpl &RHS) const { return !(*this == RHS); } - - bool operator<(const SmallVectorTemplateCommon &RHS) const { - return std::lexicographical_compare(begin(), end(), + + bool operator<(const SmallVectorImpl &RHS) const { + return std::lexicographical_compare(this->begin(), this->end(), RHS.begin(), RHS.end()); } - + /// set_size - Set the array size to \arg N, which the current array must have /// enough capacity for. /// @@ -420,20 +456,20 @@ /// update the size later. This avoids the cost of value initializing elements /// which will only be overwritten. void set_size(unsigned N) { - assert(N <= capacity()); - setEnd(begin() + N); + assert(N <= this->capacity()); + setEnd(this->begin() + N); } - + private: /// grow - double the size of the allocated memory, guaranteeing space for at /// least one more element or MinSize if specified. - void grow(size_type MinSize = 0); - + void grow(size_t MinSize = 0); + static void construct_range(T *S, T *E, const T &Elt) { for (; S != E; ++S) new (S) T(Elt); } - + static void destroy_range(T *S, T *E) { // No need to do a destroy loop for POD's. if (isPodLike::value) return; @@ -459,31 +495,31 @@ // Define this out-of-line to dissuade the C++ compiler from inlining it. template -void SmallVectorTemplateCommon::grow(size_t MinSize) { - size_t CurCapacity = capacity(); - size_t CurSize = size(); +void SmallVectorImpl::grow(size_t MinSize) { + size_t CurCapacity = this->capacity(); + size_t CurSize = this->size(); size_t NewCapacity = 2*CurCapacity; if (NewCapacity < MinSize) NewCapacity = MinSize; T *NewElts = static_cast(operator new(NewCapacity*sizeof(T))); // Copy the elements over. - uninitialized_copy(begin(), end(), NewElts); + uninitialized_copy(this->begin(), this->end(), NewElts); // Destroy the original elements. - destroy_range(begin(), end()); + destroy_range(this->begin(), this->end()); // If this wasn't grown from the inline copy, deallocate the old space. if (!this->isSmall()) - operator delete(begin()); + operator delete(this->begin()); setEnd(NewElts+CurSize); this->BeginX = NewElts; - this->CapacityX = begin()+NewCapacity; + this->CapacityX = this->begin()+NewCapacity; } template -void SmallVectorTemplateCommon::swap(SmallVectorTemplateCommon &RHS) { +void SmallVectorImpl::swap(SmallVectorImpl &RHS) { if (this == &RHS) return; // We can only avoid copying elements if neither vector is small. @@ -493,54 +529,53 @@ std::swap(this->CapacityX, RHS.CapacityX); return; } - if (RHS.size() > capacity()) + if (RHS.size() > this->capacity()) grow(RHS.size()); - if (size() > RHS.capacity()) - RHS.grow(size()); + if (this->size() > RHS.capacity()) + RHS.grow(this->size()); // Swap the shared elements. - size_t NumShared = size(); + size_t NumShared = this->size(); if (NumShared > RHS.size()) NumShared = RHS.size(); for (unsigned i = 0; i != static_cast(NumShared); ++i) std::swap((*this)[i], RHS[i]); // Copy over the extra elts. - if (size() > RHS.size()) { - size_t EltDiff = size() - RHS.size(); - uninitialized_copy(begin()+NumShared, end(), RHS.end()); + if (this->size() > RHS.size()) { + size_t EltDiff = this->size() - RHS.size(); + uninitialized_copy(this->begin()+NumShared, this->end(), RHS.end()); RHS.setEnd(RHS.end()+EltDiff); - destroy_range(begin()+NumShared, end()); - setEnd(begin()+NumShared); - } else if (RHS.size() > size()) { - size_t EltDiff = RHS.size() - size(); - uninitialized_copy(RHS.begin()+NumShared, RHS.end(), end()); - setEnd(end() + EltDiff); + destroy_range(this->begin()+NumShared, this->end()); + setEnd(this->begin()+NumShared); + } else if (RHS.size() > this->size()) { + size_t EltDiff = RHS.size() - this->size(); + uninitialized_copy(RHS.begin()+NumShared, RHS.end(), this->end()); + setEnd(this->end() + EltDiff); destroy_range(RHS.begin()+NumShared, RHS.end()); RHS.setEnd(RHS.begin()+NumShared); } } template -const SmallVectorTemplateCommon & -SmallVectorTemplateCommon:: - operator=(const SmallVectorTemplateCommon &RHS) { +const SmallVectorImpl &SmallVectorImpl:: + operator=(const SmallVectorImpl &RHS) { // Avoid self-assignment. if (this == &RHS) return *this; // If we already have sufficient space, assign the common elements, then // destroy any excess. size_t RHSSize = RHS.size(); - size_t CurSize = size(); + size_t CurSize = this->size(); if (CurSize >= RHSSize) { // Assign common elements. iterator NewEnd; if (RHSSize) - NewEnd = std::copy(RHS.begin(), RHS.begin()+RHSSize, begin()); + NewEnd = std::copy(RHS.begin(), RHS.begin()+RHSSize, this->begin()); else - NewEnd = begin(); + NewEnd = this->begin(); // Destroy excess elements. - destroy_range(NewEnd, end()); + destroy_range(NewEnd, this->end()); // Trim. setEnd(NewEnd); @@ -549,52 +584,25 @@ // If we have to grow to have enough elements, destroy the current elements. // This allows us to avoid copying them during the grow. - if (capacity() < RHSSize) { + if (this->capacity() < RHSSize) { // Destroy current elements. - destroy_range(begin(), end()); - setEnd(begin()); + destroy_range(this->begin(), this->end()); + setEnd(this->begin()); CurSize = 0; grow(RHSSize); } else if (CurSize) { // Otherwise, use assignment for the already-constructed elements. - std::copy(RHS.begin(), RHS.begin()+CurSize, begin()); + std::copy(RHS.begin(), RHS.begin()+CurSize, this->begin()); } // Copy construct the new elements in place. - uninitialized_copy(RHS.begin()+CurSize, RHS.end(), begin()+CurSize); + uninitialized_copy(RHS.begin()+CurSize, RHS.end(), this->begin()+CurSize); // Set end. - setEnd(begin()+RHSSize); + setEnd(this->begin()+RHSSize); return *this; } - -template -class SmallVectorTemplateBase : public SmallVectorTemplateCommon { -public: - SmallVectorTemplateBase(size_t Size) : SmallVectorTemplateCommon(Size) {} - -}; - -template -class SmallVectorTemplateBase : public SmallVectorTemplateCommon { -public: - SmallVectorTemplateBase(size_t Size) : SmallVectorTemplateCommon(Size) {} - -}; - - -/// SmallVectorImpl - This class consists of common code factored out of the -/// SmallVector class to reduce code duplication based on the SmallVector 'N' -/// template parameter. -template -class SmallVectorImpl : public SmallVectorTemplateBase::value> { -public: - // Default ctor - Initialize to empty. - explicit SmallVectorImpl(unsigned N) - : SmallVectorTemplateBase::value>(N*sizeof(T)) { - } -}; /// SmallVector - This is a 'vector' (really, a variable-sized array), optimized /// for the case when the array is small. It contains some number of elements From sabre at nondot.org Wed Dec 16 02:09:23 2009 From: sabre at nondot.org (Chris Lattner) Date: Wed, 16 Dec 2009 08:09:23 -0000 Subject: [llvm-commits] [llvm] r91523 - /llvm/trunk/include/llvm/ADT/SmallVector.h Message-ID: <200912160809.nBG89NOL012710@zion.cs.uiuc.edu> Author: lattner Date: Wed Dec 16 02:09:23 2009 New Revision: 91523 URL: http://llvm.org/viewvc/llvm-project?rev=91523&view=rev Log: pull destroy_range and uninitialized_copy up to the SmallVectorTemplateBase class, which allows us to statically dispatch on isPodLike instead of dynamically. Modified: llvm/trunk/include/llvm/ADT/SmallVector.h Modified: llvm/trunk/include/llvm/ADT/SmallVector.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/ADT/SmallVector.h?rev=91523&r1=91522&r2=91523&view=diff ============================================================================== --- llvm/trunk/include/llvm/ADT/SmallVector.h (original) +++ llvm/trunk/include/llvm/ADT/SmallVector.h Wed Dec 16 02:09:23 2009 @@ -157,19 +157,47 @@ } }; - +/// SmallVectorTemplateBase - This is where we put method +/// implementations that are designed to work with non-POD-like T's. template class SmallVectorTemplateBase : public SmallVectorTemplateCommon { public: SmallVectorTemplateBase(size_t Size) : SmallVectorTemplateCommon(Size) {} + static void destroy_range(T *S, T *E) { + while (S != E) { + --E; + E->~T(); + } + } + + /// uninitialized_copy - Copy the range [I, E) onto the uninitialized memory + /// starting with "Dest", constructing elements into it as needed. + template + static void uninitialized_copy(It1 I, It1 E, It2 Dest) { + std::uninitialized_copy(I, E, Dest); + } + }; +/// SmallVectorTemplateBase - This is where we put method +/// implementations that are designed to work with POD-like T's. template class SmallVectorTemplateBase : public SmallVectorTemplateCommon { public: SmallVectorTemplateBase(size_t Size) : SmallVectorTemplateCommon(Size) {} + // No need to do a destroy loop for POD's. + static void destroy_range(T *S, T *E) {} + + /// uninitialized_copy - Copy the range [I, E) onto the uninitialized memory + /// starting with "Dest", constructing elements into it as needed. + template + static void uninitialized_copy(It1 I, It1 E, It2 Dest) { + // Use memcpy for PODs: std::uninitialized_copy optimizes to memmove, memcpy + // is better. + memcpy(&*Dest, &*I, (E-I)*sizeof(T)); + } }; @@ -178,11 +206,10 @@ /// template parameter. template class SmallVectorImpl : public SmallVectorTemplateBase::value> { + typedef SmallVectorTemplateBase::value > SuperClass; public: - typedef typename SmallVectorTemplateBase::value >::iterator - iterator; - typedef typename SmallVectorTemplateBase::value >::size_type - size_type; + typedef typename SuperClass::iterator iterator; + typedef typename SuperClass::size_type size_type; // Default ctor - Initialize to empty. explicit SmallVectorImpl(unsigned N) @@ -469,27 +496,6 @@ for (; S != E; ++S) new (S) T(Elt); } - - static void destroy_range(T *S, T *E) { - // No need to do a destroy loop for POD's. - if (isPodLike::value) return; - - while (S != E) { - --E; - E->~T(); - } - } - - /// uninitialized_copy - Copy the range [I, E) onto the uninitialized memory - /// starting with "Dest", constructing elements into it as needed. - template - static void uninitialized_copy(It1 I, It1 E, It2 Dest) { - // Use memcpy for PODs: std::uninitialized_copy optimizes to memmove. - if (isPodLike::value) - memcpy(&*Dest, &*I, (E-I)*sizeof(T)); - else - std::uninitialized_copy(I, E, Dest); - } }; From vhernandez at apple.com Wed Dec 16 02:10:57 2009 From: vhernandez at apple.com (Victor Hernandez) Date: Wed, 16 Dec 2009 08:10:57 -0000 Subject: [llvm-commits] [llvm] r91524 - in /llvm/trunk: include/llvm/Metadata.h lib/VMCore/Metadata.cpp Message-ID: <200912160810.nBG8AvmD012861@zion.cs.uiuc.edu> Author: hernande Date: Wed Dec 16 02:10:57 2009 New Revision: 91524 URL: http://llvm.org/viewvc/llvm-project?rev=91524&view=rev Log: Use different name for argument and field Modified: llvm/trunk/include/llvm/Metadata.h llvm/trunk/lib/VMCore/Metadata.cpp Modified: llvm/trunk/include/llvm/Metadata.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Metadata.h?rev=91524&r1=91523&r2=91524&view=diff ============================================================================== --- llvm/trunk/include/llvm/Metadata.h (original) +++ llvm/trunk/include/llvm/Metadata.h Wed Dec 16 02:10:57 2009 @@ -115,7 +115,7 @@ protected: explicit MDNode(LLVMContext &C, Value *const *Vals, unsigned NumVals, - Function *LocalFunction = NULL); + Function *LocalFunc = NULL); public: // Constructors and destructors. static MDNode *get(LLVMContext &Context, Modified: llvm/trunk/lib/VMCore/Metadata.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/VMCore/Metadata.cpp?rev=91524&r1=91523&r2=91524&view=diff ============================================================================== --- llvm/trunk/lib/VMCore/Metadata.cpp (original) +++ llvm/trunk/lib/VMCore/Metadata.cpp Wed Dec 16 02:10:57 2009 @@ -50,14 +50,14 @@ // MDNode implementation. // MDNode::MDNode(LLVMContext &C, Value *const *Vals, unsigned NumVals, - Function *LocalFunction) + Function *LocalFunc) : MetadataBase(Type::getMetadataTy(C), Value::MDNodeVal) { NodeSize = NumVals; Node = new ElementVH[NodeSize]; ElementVH *Ptr = Node; for (unsigned i = 0; i != NumVals; ++i) *Ptr++ = ElementVH(Vals[i], this); - LocalFunction = LocalFunction; + LocalFunction = LocalFunc; } void MDNode::Profile(FoldingSetNodeID &ID) const { From vhernandez at apple.com Wed Dec 16 02:13:19 2009 From: vhernandez at apple.com (Victor Hernandez) Date: Wed, 16 Dec 2009 00:13:19 -0800 Subject: [llvm-commits] [llvm] r91497 - in /llvm/trunk: include/llvm/Metadata.h lib/VMCore/Metadata.cpp In-Reply-To: <4B288291.3090704@mxc.ca> References: <200912160252.nBG2q9FI001712@zion.cs.uiuc.edu> <4B288291.3090704@mxc.ca> Message-ID: Nick, Thanks for the review. On Dec 15, 2009, at 10:47 PM, Nick Lewycky wrote: > Victor Hernandez wrote: >> Author: hernande >> Date: Tue Dec 15 20:52:09 2009 >> New Revision: 91497 >> >> URL: http://llvm.org/viewvc/llvm-project?rev=91497&view=rev >> Log: >> MDNodes that refer to an instruction are local to a function; in that case, explicitly keep track of the function they are local to > > So what's the semantic here? What if the MDNode refers to an Instruction that gets spliced from one Function to another? What happens if the MDNode is attached to an Instruction that's RAUW'd with a Constant? > > Is this new field copied by MetadataContextImpl::copyMD? The semantic is that if an MDNode is created function-local, then it will continue to be function-local even if its operands are modified to no longer refer to any function-specific IR. I need to add that documentation to Metadata.h. If an instruction is copied from on Function to another, any function-local metadata that refers to it will have to point to the new instruction (and update its LocalFunction), or that operand can be replaced with null. The changes to the copying logic and the necessary asserts and verification are still in progress. > You also need to assert that any Instructions passed in also belong to LocalFunction. Good call. I will add that assert. I will also be adding Verification of function-local MDNodes that verifies the same. > >> Modified: >> llvm/trunk/include/llvm/Metadata.h >> llvm/trunk/lib/VMCore/Metadata.cpp >> >> Modified: llvm/trunk/include/llvm/Metadata.h >> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Metadata.h?rev=91497&r1=91496&r2=91497&view=diff >> >> ============================================================================== >> --- llvm/trunk/include/llvm/Metadata.h (original) >> +++ llvm/trunk/include/llvm/Metadata.h Tue Dec 15 20:52:09 2009 >> @@ -111,13 +111,16 @@ >> >> ElementVH *Node; >> unsigned NodeSize; >> + Function *LocalFunction; >> >> protected: >> - explicit MDNode(LLVMContext&C, Value *const *Vals, unsigned NumVals); >> + explicit MDNode(LLVMContext&C, Value *const *Vals, unsigned NumVals, >> + Function *LocalFunction = NULL); >> public: >> // Constructors and destructors. >> static MDNode *get(LLVMContext&Context, >> - Value *const *Vals, unsigned NumVals); >> + Value *const *Vals, unsigned NumVals, >> + Function *LocalFunction = NULL); >> >> /// ~MDNode - Destroy MDNode. >> ~MDNode(); >> @@ -130,6 +133,9 @@ >> >> /// getNumElements - Return number of MDNode elements. >> unsigned getNumElements() const { return NodeSize; } >> + >> + /// isFunctionLocal - Return whether MDNode is local to a function. >> + bool isFunctionLocal() const { return LocalFunction; } >> >> /// Profile - calculate a unique identifier for this MDNode to collapse >> /// duplicates >> >> Modified: llvm/trunk/lib/VMCore/Metadata.cpp >> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/VMCore/Metadata.cpp?rev=91497&r1=91496&r2=91497&view=diff >> >> ============================================================================== >> --- llvm/trunk/lib/VMCore/Metadata.cpp (original) >> +++ llvm/trunk/lib/VMCore/Metadata.cpp Tue Dec 15 20:52:09 2009 >> @@ -49,13 +49,15 @@ >> //===----------------------------------------------------------------------===// >> // MDNode implementation. >> // >> -MDNode::MDNode(LLVMContext&C, Value *const *Vals, unsigned NumVals) >> +MDNode::MDNode(LLVMContext&C, Value *const *Vals, unsigned NumVals, >> + Function *LocalFunction) >> : MetadataBase(Type::getMetadataTy(C), Value::MDNodeVal) { >> NodeSize = NumVals; >> Node = new ElementVH[NodeSize]; >> ElementVH *Ptr = Node; >> for (unsigned i = 0; i != NumVals; ++i) >> *Ptr++ = ElementVH(Vals[i], this); >> + LocalFunction = LocalFunction; > > Perhaps you meant this->LocalFunction = LocalFunction? Good catch. I fixed this in r91524. > > Nick > >> } >> >> void MDNode::Profile(FoldingSetNodeID&ID) const { >> @@ -63,17 +65,20 @@ >> ID.AddPointer(getElement(i)); >> } >> >> -MDNode *MDNode::get(LLVMContext&Context, Value*const* Vals, unsigned NumVals) { >> +MDNode *MDNode::get(LLVMContext&Context, Value*const* Vals, unsigned NumVals, >> + Function *LocalFunction) { >> LLVMContextImpl *pImpl = Context.pImpl; >> FoldingSetNodeID ID; >> for (unsigned i = 0; i != NumVals; ++i) >> ID.AddPointer(Vals[i]); >> + if (LocalFunction) >> + ID.AddPointer(LocalFunction); >> >> void *InsertPoint; >> MDNode *N = pImpl->MDNodeSet.FindNodeOrInsertPos(ID, InsertPoint); >> if (!N) { >> // InsertPoint will have been set by the FindNodeOrInsertPos call. >> - N = new MDNode(Context, Vals, NumVals); >> + N = new MDNode(Context, Vals, NumVals, LocalFunction); >> pImpl->MDNodeSet.InsertNode(N, InsertPoint); >> } >> return N; >> >> >> _______________________________________________ >> llvm-commits mailing list >> llvm-commits at cs.uiuc.edu >> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits >> > -------------- next part -------------- An HTML attachment was scrubbed... URL: http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20091216/3a807d4b/attachment.html From sabre at nondot.org Wed Dec 16 02:34:40 2009 From: sabre at nondot.org (Chris Lattner) Date: Wed, 16 Dec 2009 08:34:40 -0000 Subject: [llvm-commits] [llvm] r91526 - /llvm/trunk/include/llvm/ADT/SmallVector.h Message-ID: <200912160834.nBG8YejB013845@zion.cs.uiuc.edu> Author: lattner Date: Wed Dec 16 02:34:40 2009 New Revision: 91526 URL: http://llvm.org/viewvc/llvm-project?rev=91526&view=rev Log: factor out the grow() method for all pod implementations into one common function. It is still an inline method, which will be fixed next. Modified: llvm/trunk/include/llvm/ADT/SmallVector.h Modified: llvm/trunk/include/llvm/ADT/SmallVector.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/ADT/SmallVector.h?rev=91526&r1=91525&r2=91526&view=diff ============================================================================== --- llvm/trunk/include/llvm/ADT/SmallVector.h (original) +++ llvm/trunk/include/llvm/ADT/SmallVector.h Wed Dec 16 02:34:40 2009 @@ -80,9 +80,42 @@ return BeginX == static_cast(&FirstEl); } + /// size_in_bytes - This returns size()*sizeof(T). + size_t size_in_bytes() const { + return size_t((char*)EndX - (char*)BeginX); + } + + /// capacity_in_bytes - This returns capacity()*sizeof(T). + size_t capacity_in_bytes() const { + return size_t((char*)CapacityX - (char*)BeginX); + } + + inline void grow_pod(size_t MinSizeInBytes, size_t TSize); + public: bool empty() const { return BeginX == EndX; } }; + +inline void SmallVectorBase::grow_pod(size_t MinSizeInBytes, size_t TSize) { + size_t CurSizeBytes = size_in_bytes(); + size_t NewCapacityInBytes = 2 * capacity_in_bytes(); + if (NewCapacityInBytes < MinSizeInBytes) + NewCapacityInBytes = MinSizeInBytes; + void *NewElts = operator new(NewCapacityInBytes); + + // Copy the elements over. + memcpy(NewElts, this->BeginX, CurSizeBytes); + + // If this wasn't grown from the inline copy, deallocate the old space. + if (!this->isSmall()) + operator delete(this->BeginX); + + this->EndX = (char*)NewElts+CurSizeBytes; + this->BeginX = NewElts; + this->CapacityX = (char*)this->BeginX + NewCapacityInBytes; +} + + template class SmallVectorTemplateCommon : public SmallVectorBase { @@ -178,8 +211,37 @@ std::uninitialized_copy(I, E, Dest); } + /// grow - double the size of the allocated memory, guaranteeing space for at + /// least one more element or MinSize if specified. + void grow(size_t MinSize = 0); }; +// Define this out-of-line to dissuade the C++ compiler from inlining it. +template +void SmallVectorTemplateBase::grow(size_t MinSize) { + size_t CurCapacity = this->capacity(); + size_t CurSize = this->size(); + size_t NewCapacity = 2*CurCapacity; + if (NewCapacity < MinSize) + NewCapacity = MinSize; + T *NewElts = static_cast(operator new(NewCapacity*sizeof(T))); + + // Copy the elements over. + uninitialized_copy(this->begin(), this->end(), NewElts); + + // Destroy the original elements. + destroy_range(this->begin(), this->end()); + + // If this wasn't grown from the inline copy, deallocate the old space. + if (!this->isSmall()) + operator delete(this->begin()); + + setEnd(NewElts+CurSize); + this->BeginX = NewElts; + this->CapacityX = this->begin()+NewCapacity; +} + + /// SmallVectorTemplateBase - This is where we put method /// implementations that are designed to work with POD-like T's. template @@ -198,6 +260,12 @@ // is better. memcpy(&*Dest, &*I, (E-I)*sizeof(T)); } + + /// grow - double the size of the allocated memory, guaranteeing space for at + /// least one more element or MinSize if specified. + void grow(size_t MinSize = 0) { + this->grow_pod(MinSize*sizeof(T), sizeof(T)); + } }; @@ -237,7 +305,7 @@ this->setEnd(this->begin()+N); } else if (N > this->size()) { if (this->capacity() < N) - grow(N); + this->grow(N); this->construct_range(this->end(), this->begin()+N, T()); this->setEnd(this->begin()+N); } @@ -249,7 +317,7 @@ setEnd(this->begin()+N); } else if (N > this->size()) { if (this->capacity() < N) - grow(N); + this->grow(N); construct_range(this->end(), this->begin()+N, NV); setEnd(this->begin()+N); } @@ -257,7 +325,7 @@ void reserve(unsigned N) { if (this->capacity() < N) - grow(N); + this->grow(N); } void push_back(const T &Elt) { @@ -292,7 +360,7 @@ size_type NumInputs = std::distance(in_start, in_end); // Grow allocated space if needed. if (NumInputs > size_type(this->capacity_ptr()-this->end())) - grow(this->size()+NumInputs); + this->grow(this->size()+NumInputs); // Copy the new elements over. // TODO: NEED To compile time dispatch on whether in_iter is a random access @@ -306,7 +374,7 @@ void append(size_type NumInputs, const T &Elt) { // Grow allocated space if needed. if (NumInputs > size_type(this->capacity_ptr()-this->end())) - grow(this->size()+NumInputs); + this->grow(this->size()+NumInputs); // Copy the new elements over. std::uninitialized_fill_n(this->end(), NumInputs, Elt); @@ -316,7 +384,7 @@ void assign(unsigned NumElts, const T &Elt) { clear(); if (this->capacity() < NumElts) - grow(NumElts); + this->grow(NumElts); setEnd(this->begin()+NumElts); construct_range(this->begin(), this->end(), Elt); } @@ -488,10 +556,6 @@ } private: - /// grow - double the size of the allocated memory, guaranteeing space for at - /// least one more element or MinSize if specified. - void grow(size_t MinSize = 0); - static void construct_range(T *S, T *E, const T &Elt) { for (; S != E; ++S) new (S) T(Elt); @@ -499,31 +563,6 @@ }; -// Define this out-of-line to dissuade the C++ compiler from inlining it. -template -void SmallVectorImpl::grow(size_t MinSize) { - size_t CurCapacity = this->capacity(); - size_t CurSize = this->size(); - size_t NewCapacity = 2*CurCapacity; - if (NewCapacity < MinSize) - NewCapacity = MinSize; - T *NewElts = static_cast(operator new(NewCapacity*sizeof(T))); - - // Copy the elements over. - uninitialized_copy(this->begin(), this->end(), NewElts); - - // Destroy the original elements. - destroy_range(this->begin(), this->end()); - - // If this wasn't grown from the inline copy, deallocate the old space. - if (!this->isSmall()) - operator delete(this->begin()); - - setEnd(NewElts+CurSize); - this->BeginX = NewElts; - this->CapacityX = this->begin()+NewCapacity; -} - template void SmallVectorImpl::swap(SmallVectorImpl &RHS) { if (this == &RHS) return; @@ -536,7 +575,7 @@ return; } if (RHS.size() > this->capacity()) - grow(RHS.size()); + this->grow(RHS.size()); if (this->size() > RHS.capacity()) RHS.grow(this->size()); @@ -595,7 +634,7 @@ destroy_range(this->begin(), this->end()); setEnd(this->begin()); CurSize = 0; - grow(RHSSize); + this->grow(RHSSize); } else if (CurSize) { // Otherwise, use assignment for the already-constructed elements. std::copy(RHS.begin(), RHS.begin()+CurSize, this->begin()); From sabre at nondot.org Wed Dec 16 02:35:55 2009 From: sabre at nondot.org (Chris Lattner) Date: Wed, 16 Dec 2009 08:35:55 -0000 Subject: [llvm-commits] [llvm] r91527 - /llvm/trunk/lib/System/Unix/Path.inc Message-ID: <200912160835.nBG8ZtFq013885@zion.cs.uiuc.edu> Author: lattner Date: Wed Dec 16 02:35:54 2009 New Revision: 91527 URL: http://llvm.org/viewvc/llvm-project?rev=91527&view=rev Log: eliminate an extraneous use of SmallVector in a case where a fixed size buffer is perfectly fine. Modified: llvm/trunk/lib/System/Unix/Path.inc Modified: llvm/trunk/lib/System/Unix/Path.inc URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/System/Unix/Path.inc?rev=91527&r1=91526&r2=91527&view=diff ============================================================================== --- llvm/trunk/lib/System/Unix/Path.inc (original) +++ llvm/trunk/lib/System/Unix/Path.inc Wed Dec 16 02:35:54 2009 @@ -414,21 +414,19 @@ return path.substr(dot + 1); } -bool Path::getMagicNumber(std::string& Magic, unsigned len) const { +bool Path::getMagicNumber(std::string &Magic, unsigned len) const { assert(len < 1024 && "Request for magic string too long"); - SmallVector Buf; - Buf.resize(1 + len); - char* buf = Buf.data(); + char Buf[1025]; int fd = ::open(path.c_str(), O_RDONLY); if (fd < 0) return false; - ssize_t bytes_read = ::read(fd, buf, len); + ssize_t bytes_read = ::read(fd, Buf, len); ::close(fd); if (ssize_t(len) != bytes_read) { Magic.clear(); return false; } - Magic.assign(buf,len); + Magic.assign(Buf, len); return true; } From sabre at nondot.org Wed Dec 16 02:40:45 2009 From: sabre at nondot.org (Chris Lattner) Date: Wed, 16 Dec 2009 08:40:45 -0000 Subject: [llvm-commits] [llvm] r91528 - /llvm/trunk/lib/System/Unix/Path.inc Message-ID: <200912160840.nBG8ejwC016798@zion.cs.uiuc.edu> Author: lattner Date: Wed Dec 16 02:40:44 2009 New Revision: 91528 URL: http://llvm.org/viewvc/llvm-project?rev=91528&view=rev Log: remove use of SmallVector from Path::makeUnique. Path::makeUnique is not used by anything performance sensitive, so just use std::string. Modified: llvm/trunk/lib/System/Unix/Path.inc Modified: llvm/trunk/lib/System/Unix/Path.inc URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/System/Unix/Path.inc?rev=91528&r1=91527&r2=91528&view=diff ============================================================================== --- llvm/trunk/lib/System/Unix/Path.inc (original) +++ llvm/trunk/lib/System/Unix/Path.inc Wed Dec 16 02:40:44 2009 @@ -16,7 +16,6 @@ //=== is guaranteed to work on *all* UNIX variants. //===----------------------------------------------------------------------===// -#include "llvm/ADT/SmallVector.h" #include "Unix.h" #if HAVE_SYS_STAT_H #include @@ -859,18 +858,15 @@ // Append an XXXXXX pattern to the end of the file for use with mkstemp, // mktemp or our own implementation. - SmallVector Buf; - Buf.resize(path.size()+8); - char *FNBuffer = Buf.data(); - path.copy(FNBuffer,path.size()); + std::string Buf(path); if (isDirectory()) - strcpy(FNBuffer+path.size(), "/XXXXXX"); + Buf += "/XXXXXX"; else - strcpy(FNBuffer+path.size(), "-XXXXXX"); + Buf += "-XXXXXX"; #if defined(HAVE_MKSTEMP) int TempFD; - if ((TempFD = mkstemp(FNBuffer)) == -1) + if ((TempFD = mkstemp((char*)Buf.c_str())) == -1) return MakeErrMsg(ErrMsg, path + ": can't make unique filename"); // We don't need to hold the temp file descriptor... we will trust that no one @@ -878,21 +874,21 @@ close(TempFD); // Save the name - path = FNBuffer; + path = Buf; #elif defined(HAVE_MKTEMP) // If we don't have mkstemp, use the old and obsolete mktemp function. - if (mktemp(FNBuffer) == 0) + if (mktemp(Buf.c_str()) == 0) return MakeErrMsg(ErrMsg, path + ": can't make unique filename"); // Save the name - path = FNBuffer; + path = Buf; #else // Okay, looks like we have to do it all by our lonesome. static unsigned FCounter = 0; unsigned offset = path.size() + 1; - while ( FCounter < 999999 && exists()) { - sprintf(FNBuffer+offset,"%06u",++FCounter); - path = FNBuffer; + while (FCounter < 999999 && exists()) { + sprintf(Buf.data()+offset, "%06u", ++FCounter); + path = Buf; } if (FCounter > 999999) return MakeErrMsg(ErrMsg, From sabre at nondot.org Wed Dec 16 02:44:25 2009 From: sabre at nondot.org (Chris Lattner) Date: Wed, 16 Dec 2009 08:44:25 -0000 Subject: [llvm-commits] [llvm] r91529 - in /llvm/trunk: include/llvm/ADT/SmallVector.h lib/Support/CMakeLists.txt lib/Support/SmallVector.cpp Message-ID: <200912160844.nBG8iPJ2019156@zion.cs.uiuc.edu> Author: lattner Date: Wed Dec 16 02:44:24 2009 New Revision: 91529 URL: http://llvm.org/viewvc/llvm-project?rev=91529&view=rev Log: now that libsystem no longer uses SmallVector, we can move SmallVectorBase::grow_pod out of line, finally satisfying PR3758. Added: llvm/trunk/lib/Support/SmallVector.cpp Modified: llvm/trunk/include/llvm/ADT/SmallVector.h llvm/trunk/lib/Support/CMakeLists.txt Modified: llvm/trunk/include/llvm/ADT/SmallVector.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/ADT/SmallVector.h?rev=91529&r1=91528&r2=91529&view=diff ============================================================================== --- llvm/trunk/include/llvm/ADT/SmallVector.h (original) +++ llvm/trunk/include/llvm/ADT/SmallVector.h Wed Dec 16 02:44:24 2009 @@ -90,32 +90,14 @@ return size_t((char*)CapacityX - (char*)BeginX); } - inline void grow_pod(size_t MinSizeInBytes, size_t TSize); + /// grow_pod - This is an implementation of the grow() method which only works + /// on POD-like datatypes and is out of line to reduce code duplication. + void grow_pod(size_t MinSizeInBytes, size_t TSize); public: bool empty() const { return BeginX == EndX; } }; -inline void SmallVectorBase::grow_pod(size_t MinSizeInBytes, size_t TSize) { - size_t CurSizeBytes = size_in_bytes(); - size_t NewCapacityInBytes = 2 * capacity_in_bytes(); - if (NewCapacityInBytes < MinSizeInBytes) - NewCapacityInBytes = MinSizeInBytes; - void *NewElts = operator new(NewCapacityInBytes); - - // Copy the elements over. - memcpy(NewElts, this->BeginX, CurSizeBytes); - - // If this wasn't grown from the inline copy, deallocate the old space. - if (!this->isSmall()) - operator delete(this->BeginX); - - this->EndX = (char*)NewElts+CurSizeBytes; - this->BeginX = NewElts; - this->CapacityX = (char*)this->BeginX + NewCapacityInBytes; -} - - template class SmallVectorTemplateCommon : public SmallVectorBase { Modified: llvm/trunk/lib/Support/CMakeLists.txt URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Support/CMakeLists.txt?rev=91529&r1=91528&r2=91529&view=diff ============================================================================== --- llvm/trunk/lib/Support/CMakeLists.txt (original) +++ llvm/trunk/lib/Support/CMakeLists.txt Wed Dec 16 02:44:24 2009 @@ -23,6 +23,7 @@ Regex.cpp SlowOperationInformer.cpp SmallPtrSet.cpp + SmallVector.cpp SourceMgr.cpp Statistic.cpp StringExtras.cpp Added: llvm/trunk/lib/Support/SmallVector.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Support/SmallVector.cpp?rev=91529&view=auto ============================================================================== --- llvm/trunk/lib/Support/SmallVector.cpp (added) +++ llvm/trunk/lib/Support/SmallVector.cpp Wed Dec 16 02:44:24 2009 @@ -0,0 +1,37 @@ +//===- llvm/ADT/SmallVector.cpp - 'Normally small' vectors ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the SmallVector class. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/SmallVector.h" +using namespace llvm; + +/// grow_pod - This is an implementation of the grow() method which only works +/// on POD-like datatypes and is out of line to reduce code duplication. +void SmallVectorBase::grow_pod(size_t MinSizeInBytes, size_t TSize) { + size_t CurSizeBytes = size_in_bytes(); + size_t NewCapacityInBytes = 2 * capacity_in_bytes(); + if (NewCapacityInBytes < MinSizeInBytes) + NewCapacityInBytes = MinSizeInBytes; + void *NewElts = operator new(NewCapacityInBytes); + + // Copy the elements over. No need to run dtors on PODs. + memcpy(NewElts, this->BeginX, CurSizeBytes); + + // If this wasn't grown from the inline copy, deallocate the old space. + if (!this->isSmall()) + operator delete(this->BeginX); + + this->EndX = (char*)NewElts+CurSizeBytes; + this->BeginX = NewElts; + this->CapacityX = (char*)this->BeginX + NewCapacityInBytes; +} + From sabre at nondot.org Wed Dec 16 03:05:34 2009 From: sabre at nondot.org (Chris Lattner) Date: Wed, 16 Dec 2009 03:05:34 -0600 Subject: [llvm-commits] CVS: llvm-www/OpenProjects.html Message-ID: <200912160905.nBG95YP7028981@zion.cs.uiuc.edu> Changes in directory llvm-www: OpenProjects.html updated: 1.56 -> 1.57 --- Log message: update a few entries, add super optimizer idea from John. --- Diffs of the changes: (+20 -28) OpenProjects.html | 48 ++++++++++++++++++++---------------------------- 1 files changed, 20 insertions(+), 28 deletions(-) Index: llvm-www/OpenProjects.html diff -u llvm-www/OpenProjects.html:1.56 llvm-www/OpenProjects.html:1.57 --- llvm-www/OpenProjects.html:1.56 Wed Nov 11 00:41:50 2009 +++ llvm-www/OpenProjects.html Wed Dec 16 03:03:23 2009 @@ -22,7 +22,6 @@
  • Code Compaction
  • New Transformations and Analyses
  • Code Generator Improvements
  • -
  • Emit objects directly
  • Miscellaneous Additions
  • @@ -409,19 +408,15 @@ analysis machinery from LiveIntervals. Making the stack smaller is good for cache use and very important on targets where loads have limited displacement like ppc, thumb, mips, sparc, etc. This should be done as - a pass before prolog epilog insertion. + a pass before prolog epilog insertion. This is now done for register + allocator temporaries, but not for allocas.
  • Implement 'shrink wrapping', which is the intelligent placement of callee saved register save/restores. Right now PrologEpilogInsertion always saves every (modified) callee save reg in the prolog and restores it in the epilog. However, some paths through a function (e.g. an early exit) may not use all regs. Sinking the save down the CFG avoids useless work on - these paths.
  • + these paths. Work has started on this, please inquire on llvmdev.
  • Rename ISD::BIT_CONVERT to ISD::BITCAST to match the llvm ir concept
  • -
  • Change the VECTOR_SHUFFLE to be variadic. Right now, a VECTOR_SHUFFLE - takes three operands: two vectors and a shuffle mask. The shuffle mask is - required to be a BUILD_VECTOR node of N indices. It would be much better - (for several reasons) to eliminate the BUILD_VECTOR and make VECTOR_SHUFFLE - take 2+N operands instead.
  • Finish adapting existing targets to use the calling convention description mechanism (see lib/Target/X86/X86CallingConv.td for an example).
  • Implement interprocedural register allocation. The CallGraphSCCPass can be @@ -431,25 +426,6 @@
  • Implement a verifier for codegen level instructions. To help track down malformed machineinstrs sooner and make debugging problems easier.
  • -
  • Write a new backend for a target - (VAX, i960, PA-RISC, - MMIX?)
  • - - - - - - - -
    - -
      -
    1. Implement MachOWriter and ELFWriter to allow LLVM-based compilers to - bypass an external assembler.
    2. -
    3. Implement an assembler for inline assembly. Reuse the tablegen files - from each backend.
    @@ -486,6 +462,22 @@ for gcc compiled code.
  • Write LLVM IR level debugger (extend Interpreter?)
  • +
  • Write an LLVM Superoptimizer. It would be interesting to take ideas from + this superoptimizer for x86: +paper #1 and paper #2 and adapt them to run on LLVM code.

    + +It would seem that operating on LLVM code would save a lot of time +because its semantics are much simpler than x86. The cost of operating +on LLVM is that target-specific tricks would be missed.

    + +The outcome would be a new LLVM pass that subsumes at least the +instruction combiner, and probably a few other passes as well. Benefits +would include not missing cases missed by the current combiner and also +more easily adapting to changes in the LLVM IR.

    + +All previous superoptimizers have worked on linear sequences of code. +It would seem much better to operate on small subgraphs of the program +dependency graph.

  • @@ -500,7 +492,7 @@ src="http://www.w3.org/Icons/valid-html401" alt="Valid HTML 4.01!"> LLVM Compiler Infrastructure
    - Last modified: $Date: 2009/11/11 06:41:50 $ + Last modified: $Date: 2009/12/16 09:03:23 $ From sabre at nondot.org Wed Dec 16 03:09:56 2009 From: sabre at nondot.org (Chris Lattner) Date: Wed, 16 Dec 2009 09:09:56 -0000 Subject: [llvm-commits] [llvm] r91530 - /llvm/trunk/include/llvm/ADT/SmallVector.h Message-ID: <200912160909.nBG99umD029625@zion.cs.uiuc.edu> Author: lattner Date: Wed Dec 16 03:09:54 2009 New Revision: 91530 URL: http://llvm.org/viewvc/llvm-project?rev=91530&view=rev Log: Fix a missing this-> that clang++ notices. Modified: llvm/trunk/include/llvm/ADT/SmallVector.h Modified: llvm/trunk/include/llvm/ADT/SmallVector.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/ADT/SmallVector.h?rev=91530&r1=91529&r2=91530&view=diff ============================================================================== --- llvm/trunk/include/llvm/ADT/SmallVector.h (original) +++ llvm/trunk/include/llvm/ADT/SmallVector.h Wed Dec 16 03:09:54 2009 @@ -314,7 +314,7 @@ if (this->EndX < this->CapacityX) { Retry: new (this->end()) T(Elt); - setEnd(this->end()+1); + this->setEnd(this->end()+1); return; } this->grow(); From sabre at nondot.org Wed Dec 16 03:17:12 2009 From: sabre at nondot.org (Chris Lattner) Date: Wed, 16 Dec 2009 09:17:12 -0000 Subject: [llvm-commits] [llvm] r91531 - /llvm/trunk/include/llvm/ADT/SmallVector.h Message-ID: <200912160917.nBG9HC4W029882@zion.cs.uiuc.edu> Author: lattner Date: Wed Dec 16 03:17:12 2009 New Revision: 91531 URL: http://llvm.org/viewvc/llvm-project?rev=91531&view=rev Log: fix more missing this->'s to placate clang++ Modified: llvm/trunk/include/llvm/ADT/SmallVector.h Modified: llvm/trunk/include/llvm/ADT/SmallVector.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/ADT/SmallVector.h?rev=91531&r1=91530&r2=91531&view=diff ============================================================================== --- llvm/trunk/include/llvm/ADT/SmallVector.h (original) +++ llvm/trunk/include/llvm/ADT/SmallVector.h Wed Dec 16 03:17:12 2009 @@ -268,7 +268,7 @@ ~SmallVectorImpl() { // Destroy the constructed elements in the vector. - destroy_range(this->begin(), this->end()); + this->destroy_range(this->begin(), this->end()); // If this wasn't grown from the inline copy, deallocate the old space. if (!this->isSmall()) @@ -277,7 +277,7 @@ void clear() { - destroy_range(this->begin(), this->end()); + this->destroy_range(this->begin(), this->end()); this->EndX = this->BeginX; } @@ -295,13 +295,13 @@ void resize(unsigned N, const T &NV) { if (N < this->size()) { - destroy_range(this->begin()+N, this->end()); - setEnd(this->begin()+N); + this->destroy_range(this->begin()+N, this->end()); + this->setEnd(this->begin()+N); } else if (N > this->size()) { if (this->capacity() < N) this->grow(N); construct_range(this->end(), this->begin()+N, NV); - setEnd(this->begin()+N); + this->setEnd(this->begin()+N); } } @@ -322,7 +322,7 @@ } void pop_back() { - setEnd(this->end()-1); + this->setEnd(this->end()-1); this->end()->~T(); } @@ -348,7 +348,7 @@ // TODO: NEED To compile time dispatch on whether in_iter is a random access // iterator to use the fast uninitialized_copy. std::uninitialized_copy(in_start, in_end, this->end()); - setEnd(this->end() + NumInputs); + this->setEnd(this->end() + NumInputs); } /// append - Add the specified range to the end of the SmallVector. @@ -360,14 +360,14 @@ // Copy the new elements over. std::uninitialized_fill_n(this->end(), NumInputs, Elt); - setEnd(this->end() + NumInputs); + this->setEnd(this->end() + NumInputs); } void assign(unsigned NumElts, const T &Elt) { clear(); if (this->capacity() < NumElts) this->grow(NumElts); - setEnd(this->begin()+NumElts); + this->setEnd(this->begin()+NumElts); construct_range(this->begin(), this->end(), Elt); } @@ -385,8 +385,8 @@ // Shift all elts down. iterator I = std::copy(E, this->end(), S); // Drop the last elts. - destroy_range(I, this->end()); - setEnd(I); + this->destroy_range(I, this->end()); + this->setEnd(I); return(N); } @@ -446,9 +446,9 @@ // Copy over the elements that we're about to overwrite. T *OldEnd = this->end(); - setEnd(this->end() + NumToInsert); + this->setEnd(this->end() + NumToInsert); size_t NumOverwritten = OldEnd-I; - uninitialized_copy(I, OldEnd, this->end()-NumOverwritten); + this->uninitialized_copy(I, OldEnd, this->end()-NumOverwritten); // Replace the overwritten part. std::fill_n(I, NumOverwritten, Elt); @@ -534,7 +534,7 @@ /// which will only be overwritten. void set_size(unsigned N) { assert(N <= this->capacity()); - setEnd(this->begin() + N); + this->setEnd(this->begin() + N); } private: @@ -570,15 +570,15 @@ // Copy over the extra elts. if (this->size() > RHS.size()) { size_t EltDiff = this->size() - RHS.size(); - uninitialized_copy(this->begin()+NumShared, this->end(), RHS.end()); + this->uninitialized_copy(this->begin()+NumShared, this->end(), RHS.end()); RHS.setEnd(RHS.end()+EltDiff); - destroy_range(this->begin()+NumShared, this->end()); - setEnd(this->begin()+NumShared); + this->destroy_range(this->begin()+NumShared, this->end()); + this->setEnd(this->begin()+NumShared); } else if (RHS.size() > this->size()) { size_t EltDiff = RHS.size() - this->size(); - uninitialized_copy(RHS.begin()+NumShared, RHS.end(), this->end()); - setEnd(this->end() + EltDiff); - destroy_range(RHS.begin()+NumShared, RHS.end()); + this->uninitialized_copy(RHS.begin()+NumShared, RHS.end(), this->end()); + this->setEnd(this->end() + EltDiff); + this->destroy_range(RHS.begin()+NumShared, RHS.end()); RHS.setEnd(RHS.begin()+NumShared); } } @@ -602,10 +602,10 @@ NewEnd = this->begin(); // Destroy excess elements. - destroy_range(NewEnd, this->end()); + this->destroy_range(NewEnd, this->end()); // Trim. - setEnd(NewEnd); + this->setEnd(NewEnd); return *this; } @@ -613,8 +613,8 @@ // This allows us to avoid copying them during the grow. if (this->capacity() < RHSSize) { // Destroy current elements. - destroy_range(this->begin(), this->end()); - setEnd(this->begin()); + this->destroy_range(this->begin(), this->end()); + this->setEnd(this->begin()); CurSize = 0; this->grow(RHSSize); } else if (CurSize) { @@ -623,10 +623,11 @@ } // Copy construct the new elements in place. - uninitialized_copy(RHS.begin()+CurSize, RHS.end(), this->begin()+CurSize); + this->uninitialized_copy(RHS.begin()+CurSize, RHS.end(), + this->begin()+CurSize); // Set end. - setEnd(this->begin()+RHSSize); + this->setEnd(this->begin()+RHSSize); return *this; } From sabre at nondot.org Wed Dec 16 03:32:05 2009 From: sabre at nondot.org (Chris Lattner) Date: Wed, 16 Dec 2009 09:32:05 -0000 Subject: [llvm-commits] [llvm] r91532 - in /llvm/trunk: lib/Transforms/Scalar/SimplifyLibCalls.cpp test/Transforms/SimplifyLibCalls/StrStr.ll Message-ID: <200912160932.nBG9W5a5030421@zion.cs.uiuc.edu> Author: lattner Date: Wed Dec 16 03:32:05 2009 New Revision: 91532 URL: http://llvm.org/viewvc/llvm-project?rev=91532&view=rev Log: reapply my strstr optimization. I have reproduced the x86-64 bootstrap miscompile (i386.o miscompares) but it happens both with and without this patch. Modified: llvm/trunk/lib/Transforms/Scalar/SimplifyLibCalls.cpp llvm/trunk/test/Transforms/SimplifyLibCalls/StrStr.ll Modified: llvm/trunk/lib/Transforms/Scalar/SimplifyLibCalls.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/SimplifyLibCalls.cpp?rev=91532&r1=91531&r2=91532&view=diff ============================================================================== --- llvm/trunk/lib/Transforms/Scalar/SimplifyLibCalls.cpp (original) +++ llvm/trunk/lib/Transforms/Scalar/SimplifyLibCalls.cpp Wed Dec 16 03:32:05 2009 @@ -76,6 +76,11 @@ /// return value has 'intptr_t' type. Value *EmitStrLen(Value *Ptr, IRBuilder<> &B); + /// EmitStrChr - Emit a call to the strchr function to the builder, for the + /// specified pointer and character. Ptr is required to be some pointer type, + /// and the return value has 'i8*' type. + Value *EmitStrChr(Value *Ptr, char C, IRBuilder<> &B); + /// EmitMemCpy - Emit a call to the memcpy function to the builder. This /// always expects that the size has type 'intptr_t' and Dst/Src are pointers. Value *EmitMemCpy(Value *Dst, Value *Src, Value *Len, @@ -151,6 +156,26 @@ return CI; } +/// EmitStrChr - Emit a call to the strchr function to the builder, for the +/// specified pointer and character. Ptr is required to be some pointer type, +/// and the return value has 'i8*' type. +Value *LibCallOptimization::EmitStrChr(Value *Ptr, char C, IRBuilder<> &B) { + Module *M = Caller->getParent(); + AttributeWithIndex AWI = + AttributeWithIndex::get(~0u, Attribute::ReadOnly | Attribute::NoUnwind); + + const Type *I8Ptr = Type::getInt8PtrTy(*Context); + const Type *I32Ty = Type::getInt32Ty(*Context); + Constant *StrChr = M->getOrInsertFunction("strchr", AttrListPtr::get(&AWI, 1), + I8Ptr, I8Ptr, I32Ty, NULL); + CallInst *CI = B.CreateCall2(StrChr, CastToCStr(Ptr, B), + ConstantInt::get(I32Ty, C), "strchr"); + if (const Function *F = dyn_cast(StrChr->stripPointerCasts())) + CI->setCallingConv(F->getCallingConv()); + return CI; +} + + /// EmitMemCpy - Emit a call to the memcpy function to the builder. This always /// expects that the size has type 'intptr_t' and Dst/Src are pointers. Value *LibCallOptimization::EmitMemCpy(Value *Dst, Value *Src, Value *Len, @@ -890,7 +915,7 @@ }; //===---------------------------------------===// -// 'strto*' Optimizations +// 'strto*' Optimizations. This handles strtol, strtod, strtof, strtoul, etc. struct StrToOpt : public LibCallOptimization { virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { @@ -910,6 +935,52 @@ } }; +//===---------------------------------------===// +// 'strstr' Optimizations + +struct StrStrOpt : public LibCallOptimization { + virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + const FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 2 || + !isa(FT->getParamType(0)) || + !isa(FT->getParamType(1)) || + !isa(FT->getReturnType())) + return 0; + + // fold strstr(x, x) -> x. + if (CI->getOperand(1) == CI->getOperand(2)) + return B.CreateBitCast(CI->getOperand(1), CI->getType()); + + // See if either input string is a constant string. + std::string SearchStr, ToFindStr; + bool HasStr1 = GetConstantStringInfo(CI->getOperand(1), SearchStr); + bool HasStr2 = GetConstantStringInfo(CI->getOperand(2), ToFindStr); + + // fold strstr(x, "") -> x. + if (HasStr2 && ToFindStr.empty()) + return B.CreateBitCast(CI->getOperand(1), CI->getType()); + + // If both strings are known, constant fold it. + if (HasStr1 && HasStr2) { + std::string::size_type Offset = SearchStr.find(ToFindStr); + + if (Offset == std::string::npos) // strstr("foo", "bar") -> null + return Constant::getNullValue(CI->getType()); + + // strstr("abcd", "bc") -> gep((char*)"abcd", 1) + Value *Result = CastToCStr(CI->getOperand(1), B); + Result = B.CreateConstInBoundsGEP1_64(Result, Offset, "strstr"); + return B.CreateBitCast(Result, CI->getType()); + } + + // fold strstr(x, "y") -> strchr(x, 'y'). + if (HasStr2 && ToFindStr.size() == 1) + return B.CreateBitCast(EmitStrChr(CI->getOperand(1), ToFindStr[0], B), + CI->getType()); + return 0; + } +}; + //===---------------------------------------===// // 'memcmp' Optimizations @@ -1675,8 +1746,8 @@ // String and Memory LibCall Optimizations StrCatOpt StrCat; StrNCatOpt StrNCat; StrChrOpt StrChr; StrCmpOpt StrCmp; StrNCmpOpt StrNCmp; StrCpyOpt StrCpy; StrNCpyOpt StrNCpy; StrLenOpt StrLen; - StrToOpt StrTo; MemCmpOpt MemCmp; MemCpyOpt MemCpy; MemMoveOpt MemMove; - MemSetOpt MemSet; + StrToOpt StrTo; StrStrOpt StrStr; + MemCmpOpt MemCmp; MemCpyOpt MemCpy; MemMoveOpt MemMove; MemSetOpt MemSet; // Math Library Optimizations PowOpt Pow; Exp2Opt Exp2; UnaryDoubleFPOpt UnaryDoubleFP; // Integer Optimizations @@ -1738,6 +1809,7 @@ Optimizations["strtoll"] = &StrTo; Optimizations["strtold"] = &StrTo; Optimizations["strtoull"] = &StrTo; + Optimizations["strstr"] = &StrStr; Optimizations["memcmp"] = &MemCmp; Optimizations["memcpy"] = &MemCpy; Optimizations["memmove"] = &MemMove; @@ -2644,12 +2716,6 @@ // * strcspn("",a) -> 0 // * strcspn(s,"") -> strlen(a) // -// strstr: (PR5783) -// * strstr(x,x) -> x -// * strstr(x, "") -> x -// * strstr(x, "a") -> strchr(x, 'a') -// * strstr(s1,s2) -> result (if s1 and s2 are constant strings) -// // tan, tanf, tanl: // * tan(atan(x)) -> x // Modified: llvm/trunk/test/Transforms/SimplifyLibCalls/StrStr.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SimplifyLibCalls/StrStr.ll?rev=91532&r1=91531&r2=91532&view=diff ============================================================================== --- llvm/trunk/test/Transforms/SimplifyLibCalls/StrStr.ll (original) +++ llvm/trunk/test/Transforms/SimplifyLibCalls/StrStr.ll Wed Dec 16 03:32:05 2009 @@ -0,0 +1,48 @@ +; RUN: opt < %s -simplify-libcalls -S | FileCheck %s +; PR5783 + +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32" +target triple = "i386-apple-darwin9.0" + + at .str = private constant [1 x i8] zeroinitializer ; <[1 x i8]*> [#uses=1] + at .str1 = private constant [2 x i8] c"a\00" ; <[2 x i8]*> [#uses=1] + at .str2 = private constant [6 x i8] c"abcde\00" ; <[6 x i8]*> [#uses=1] + at .str3 = private constant [4 x i8] c"bcd\00" ; <[4 x i8]*> [#uses=1] + +define i8* @test1(i8* %P) nounwind readonly { +entry: + %call = tail call i8* @strstr(i8* %P, i8* getelementptr inbounds ([1 x i8]* @.str, i32 0, i32 0)) nounwind ; [#uses=1] + ret i8* %call +; strstr(P, "") -> P +; CHECK: @test1 +; CHECK: ret i8* %P +} + +declare i8* @strstr(i8*, i8* nocapture) nounwind readonly + +define i8* @test2(i8* %P) nounwind readonly { +entry: + %call = tail call i8* @strstr(i8* %P, i8* getelementptr inbounds ([2 x i8]* @.str1, i32 0, i32 0)) nounwind ; [#uses=1] + ret i8* %call +; strstr(P, "a") -> strchr(P, 'a') +; CHECK: @test2 +; CHECK: @strchr(i8* %P, i32 97) +} + +define i8* @test3(i8* nocapture %P) nounwind readonly { +entry: + %call = tail call i8* @strstr(i8* getelementptr inbounds ([6 x i8]* @.str2, i32 0, i32 0), i8* getelementptr inbounds ([4 x i8]* @.str3, i32 0, i32 0)) nounwind ; [#uses=1] + ret i8* %call +; strstr("abcde", "bcd") -> "abcde"+1 +; CHECK: @test3 +; CHECK: getelementptr inbounds ([6 x i8]* @.str2, i32 0, i64 1) +} + +define i8* @test4(i8* %P) nounwind readonly { +entry: + %call = tail call i8* @strstr(i8* %P, i8* %P) nounwind ; [#uses=1] + ret i8* %call +; strstr(P, P) -> P +; CHECK: @test4 +; CHECK: ret i8* %P +} From clattner at apple.com Wed Dec 16 03:35:02 2009 From: clattner at apple.com (Chris Lattner) Date: Wed, 16 Dec 2009 01:35:02 -0800 Subject: [llvm-commits] x86-64 miscompile Message-ID: <4635215F-70B3-4414-9B9F-52CE43810B4C@apple.com> I suspect a scheduling nondeterminism. The file that miscompares is i386.o and the only difference is: 1c1 < stage2-gcc/i386.o: --- > stage3-gcc/i386.o: 22171,22172c22171,22172 < 0000000000014a87 movq %rax,%rbx < 0000000000014a8a movq %rbx,0xc8(%rbp) --- > 0000000000014a87 movq %rax,0xc8(%rbp) > 0000000000014a8b movq %rax,%rbx It could also be a copy elimination or folding thing I guess. -Chris From edwintorok at gmail.com Wed Dec 16 04:38:01 2009 From: edwintorok at gmail.com (=?ISO-8859-1?Q?T=F6r=F6k_Edwin?=) Date: Wed, 16 Dec 2009 12:38:01 +0200 Subject: [llvm-commits] [llvm] r91489 - in /llvm/trunk: lib/Target/X86/X86ISelLowering.cpp lib/Target/X86/X86Instr64bit.td lib/Target/X86/X86InstrInfo.td test/CodeGen/X86/setcc.ll In-Reply-To: <200912160053.nBG0rDDv029906@zion.cs.uiuc.edu> References: <200912160053.nBG0rDDv029906@zion.cs.uiuc.edu> Message-ID: <4B28B889.4060107@gmail.com> On 2009-12-16 02:53, Evan Cheng wrote: > Author: evancheng > Date: Tue Dec 15 18:53:11 2009 > New Revision: 91489 > > URL: http://llvm.org/viewvc/llvm-project?rev=91489&view=rev > Log: > Re-enable 91381 with fixes. > > Modified: > llvm/trunk/lib/Target/X86/X86ISelLowering.cpp > llvm/trunk/lib/Target/X86/X86Instr64bit.td > llvm/trunk/lib/Target/X86/X86InstrInfo.td > llvm/trunk/test/CodeGen/X86/setcc.ll > > Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=91489&r1=91488&r2=91489&view=diff > > ============================================================================== > --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original) > +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Tue Dec 15 18:53:11 2009 > @@ -980,6 +980,7 @@ > setTargetDAGCombine(ISD::SRL); > setTargetDAGCombine(ISD::STORE); > setTargetDAGCombine(ISD::MEMBARRIER); > + setTargetDAGCombine(ISD::ZERO_EXTEND); > Hi Evan, I noticed an infloop convertin and->zero_extend->and, could this patch be the cause? http://llvm.org/bugs/show_bug.cgi?id=5802 Best regards, --Edwin From daniel at zuster.org Wed Dec 16 04:56:03 2009 From: daniel at zuster.org (Daniel Dunbar) Date: Wed, 16 Dec 2009 10:56:03 -0000 Subject: [llvm-commits] [llvm] r91533 - in /llvm/trunk: include/llvm/CodeGen/SelectionDAG.h lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp lib/CodeGen/SelectionDAG/SelectionDAG.cpp lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp Message-ID: <200912161056.nBGAu3JU007956@zion.cs.uiuc.edu> Author: ddunbar Date: Wed Dec 16 04:56:02 2009 New Revision: 91533 URL: http://llvm.org/viewvc/llvm-project?rev=91533&view=rev Log: Revert "Initial work on disabling the scheduler. This is a work in progress, and this", this broke llvm-gcc bootstrap for release builds on x86_64-apple-darwin10. Modified: llvm/trunk/include/llvm/CodeGen/SelectionDAG.h llvm/trunk/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp Modified: llvm/trunk/include/llvm/CodeGen/SelectionDAG.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/SelectionDAG.h?rev=91533&r1=91532&r2=91533&view=diff ============================================================================== --- llvm/trunk/include/llvm/CodeGen/SelectionDAG.h (original) +++ llvm/trunk/include/llvm/CodeGen/SelectionDAG.h Wed Dec 16 04:56:02 2009 @@ -110,46 +110,6 @@ /// SelectionDAG. BumpPtrAllocator Allocator; - /// NodeOrdering - Assigns a "line number" value to each SDNode that - /// corresponds to the "line number" of the original LLVM instruction. This - /// used for turning off scheduling, because we'll forgo the normal scheduling - /// algorithm and output the instructions according to this ordering. - class NodeOrdering { - /// LineNo - The line of the instruction the node corresponds to. A value of - /// `0' means it's not assigned. - unsigned LineNo; - std::map Order; - - void operator=(const NodeOrdering&); // Do not implement. - NodeOrdering(const NodeOrdering&); // Do not implement. - public: - NodeOrdering() : LineNo(0) {} - - void add(const SDNode *Node) { - assert(LineNo && "Invalid line number!"); - Order[Node] = LineNo; - } - void remove(const SDNode *Node) { - std::map::iterator Itr = Order.find(Node); - if (Itr != Order.end()) - Order.erase(Itr); - } - void clear() { - Order.clear(); - LineNo = 1; - } - unsigned getLineNo(const SDNode *Node) { - unsigned LN = Order[Node]; - assert(LN && "Node isn't in ordering map!"); - return LN; - } - void newInst() { - ++LineNo; - } - - void dump() const; - } *Ordering; - /// VerifyNode - Sanity check the given node. Aborts if it is invalid. void VerifyNode(SDNode *N); @@ -160,9 +120,6 @@ DenseSet &visited, int level, bool &printed); - void operator=(const SelectionDAG&); // Do not implement. - SelectionDAG(const SelectionDAG&); // Do not implement. - public: SelectionDAG(TargetLowering &tli, FunctionLoweringInfo &fli); ~SelectionDAG(); @@ -242,13 +199,6 @@ return Root = N; } - /// NewInst - Tell the ordering object that we're processing a new - /// instruction. - void NewInst() { - if (Ordering) - Ordering->newInst(); - } - /// Combine - This iterates over the nodes in the SelectionDAG, folding /// certain types of nodes together, or eliminating superfluous nodes. The /// Level argument controls whether Combine is allowed to produce nodes and Modified: llvm/trunk/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp?rev=91533&r1=91532&r2=91533&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp (original) +++ llvm/trunk/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp Wed Dec 16 04:56:02 2009 @@ -20,16 +20,10 @@ #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtarget.h" -#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; -cl::opt -DisableInstScheduling("disable-inst-scheduling", - cl::init(false), - cl::desc("Disable instruction scheduling")); - ScheduleDAGSDNodes::ScheduleDAGSDNodes(MachineFunction &mf) : ScheduleDAG(mf) { } Modified: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp?rev=91533&r1=91532&r2=91533&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp (original) +++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp Wed Dec 16 04:56:02 2009 @@ -48,8 +48,6 @@ #include using namespace llvm; -extern cl::opt DisableInstScheduling; - /// makeVTList - Return an instance of the SDVTList struct initialized with the /// specified members. static SDVTList makeVTList(const EVT *VTs, unsigned NumVTs) { @@ -554,9 +552,6 @@ } DeallocateNode(N); - - // Remove the ordering of this node. - if (Ordering) Ordering->remove(N); } } @@ -582,9 +577,6 @@ N->DropOperands(); DeallocateNode(N); - - // Remove the ordering of this node. - if (Ordering) Ordering->remove(N); } void SelectionDAG::DeallocateNode(SDNode *N) { @@ -596,9 +588,6 @@ N->NodeType = ISD::DELETED_NODE; NodeAllocator.Deallocate(AllNodes.remove(N)); - - // Remove the ordering of this node. - if (Ordering) Ordering->remove(N); } /// RemoveNodeFromCSEMaps - Take the specified node out of the CSE map that @@ -702,9 +691,7 @@ FoldingSetNodeID ID; AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops, 1); AddNodeIDCustom(ID, N); - SDNode *Node = CSEMap.FindNodeOrInsertPos(ID, InsertPos); - if (Ordering) Ordering->remove(Node); - return Node; + return CSEMap.FindNodeOrInsertPos(ID, InsertPos); } /// FindModifiedNodeSlot - Find a slot for the specified node if its operands @@ -721,9 +708,7 @@ FoldingSetNodeID ID; AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops, 2); AddNodeIDCustom(ID, N); - SDNode *Node = CSEMap.FindNodeOrInsertPos(ID, InsertPos); - if (Ordering) Ordering->remove(Node); - return Node; + return CSEMap.FindNodeOrInsertPos(ID, InsertPos); } @@ -740,9 +725,7 @@ FoldingSetNodeID ID; AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops, NumOps); AddNodeIDCustom(ID, N); - SDNode *Node = CSEMap.FindNodeOrInsertPos(ID, InsertPos); - if (Ordering) Ordering->remove(Node); - return Node; + return CSEMap.FindNodeOrInsertPos(ID, InsertPos); } /// VerifyNode - Sanity check the given node. Aborts if it is invalid. @@ -795,13 +778,8 @@ SelectionDAG::SelectionDAG(TargetLowering &tli, FunctionLoweringInfo &fli) : TLI(tli), FLI(fli), DW(0), EntryNode(ISD::EntryToken, DebugLoc::getUnknownLoc(), - getVTList(MVT::Other)), - Root(getEntryNode()), Ordering(0) { + getVTList(MVT::Other)), Root(getEntryNode()) { AllNodes.push_back(&EntryNode); - if (DisableInstScheduling) { - Ordering = new NodeOrdering(); - Ordering->add(&EntryNode); - } } void SelectionDAG::init(MachineFunction &mf, MachineModuleInfo *mmi, @@ -814,7 +792,6 @@ SelectionDAG::~SelectionDAG() { allnodes_clear(); - delete Ordering; } void SelectionDAG::allnodes_clear() { @@ -840,10 +817,6 @@ EntryNode.UseList = 0; AllNodes.push_back(&EntryNode); Root = getEntryNode(); - if (DisableInstScheduling) { - Ordering = new NodeOrdering(); - Ordering->add(&EntryNode); - } } SDValue SelectionDAG::getSExtOrTrunc(SDValue Op, DebugLoc DL, EVT VT) { @@ -904,17 +877,14 @@ ID.AddPointer(&Val); void *IP = 0; SDNode *N = NULL; - if ((N = CSEMap.FindNodeOrInsertPos(ID, IP))) { - if (Ordering) Ordering->add(N); + if ((N = CSEMap.FindNodeOrInsertPos(ID, IP))) if (!VT.isVector()) return SDValue(N, 0); - } if (!N) { N = NodeAllocator.Allocate(); new (N) ConstantSDNode(isT, &Val, EltVT); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); - if (Ordering) Ordering->add(N); } SDValue Result(N, 0); @@ -951,17 +921,14 @@ ID.AddPointer(&V); void *IP = 0; SDNode *N = NULL; - if ((N = CSEMap.FindNodeOrInsertPos(ID, IP))) { - if (Ordering) Ordering->add(N); + if ((N = CSEMap.FindNodeOrInsertPos(ID, IP))) if (!VT.isVector()) return SDValue(N, 0); - } if (!N) { N = NodeAllocator.Allocate(); new (N) ConstantFPSDNode(isTarget, &V, EltVT); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); - if (Ordering) Ordering->add(N); } SDValue Result(N, 0); @@ -1016,15 +983,12 @@ ID.AddInteger(Offset); ID.AddInteger(TargetFlags); void *IP = 0; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { - if (Ordering) Ordering->add(E); + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); - } SDNode *N = NodeAllocator.Allocate(); new (N) GlobalAddressSDNode(Opc, GV, VT, Offset, TargetFlags); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); - if (Ordering) Ordering->add(N); return SDValue(N, 0); } @@ -1034,15 +998,12 @@ AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0); ID.AddInteger(FI); void *IP = 0; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { - if (Ordering) Ordering->add(E); + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); - } SDNode *N = NodeAllocator.Allocate(); new (N) FrameIndexSDNode(FI, VT, isTarget); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); - if (Ordering) Ordering->add(N); return SDValue(N, 0); } @@ -1056,15 +1017,12 @@ ID.AddInteger(JTI); ID.AddInteger(TargetFlags); void *IP = 0; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { - if (Ordering) Ordering->add(E); + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); - } SDNode *N = NodeAllocator.Allocate(); new (N) JumpTableSDNode(JTI, VT, isTarget, TargetFlags); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); - if (Ordering) Ordering->add(N); return SDValue(N, 0); } @@ -1084,15 +1042,12 @@ ID.AddPointer(C); ID.AddInteger(TargetFlags); void *IP = 0; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { - if (Ordering) Ordering->add(E); + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); - } SDNode *N = NodeAllocator.Allocate(); new (N) ConstantPoolSDNode(isTarget, C, VT, Offset, Alignment, TargetFlags); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); - if (Ordering) Ordering->add(N); return SDValue(N, 0); } @@ -1113,15 +1068,12 @@ C->AddSelectionDAGCSEId(ID); ID.AddInteger(TargetFlags); void *IP = 0; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { - if (Ordering) Ordering->add(E); + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); - } SDNode *N = NodeAllocator.Allocate(); new (N) ConstantPoolSDNode(isTarget, C, VT, Offset, Alignment, TargetFlags); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); - if (Ordering) Ordering->add(N); return SDValue(N, 0); } @@ -1130,15 +1082,12 @@ AddNodeIDNode(ID, ISD::BasicBlock, getVTList(MVT::Other), 0, 0); ID.AddPointer(MBB); void *IP = 0; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { - if (Ordering) Ordering->add(E); + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); - } SDNode *N = NodeAllocator.Allocate(); new (N) BasicBlockSDNode(MBB); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); - if (Ordering) Ordering->add(N); return SDValue(N, 0); } @@ -1154,7 +1103,6 @@ N = NodeAllocator.Allocate(); new (N) VTSDNode(VT); AllNodes.push_back(N); - if (Ordering) Ordering->add(N); return SDValue(N, 0); } @@ -1164,7 +1112,6 @@ N = NodeAllocator.Allocate(); new (N) ExternalSymbolSDNode(false, Sym, 0, VT); AllNodes.push_back(N); - if (Ordering) Ordering->add(N); return SDValue(N, 0); } @@ -1177,7 +1124,6 @@ N = NodeAllocator.Allocate(); new (N) ExternalSymbolSDNode(true, Sym, TargetFlags, VT); AllNodes.push_back(N); - if (Ordering) Ordering->add(N); return SDValue(N, 0); } @@ -1190,7 +1136,6 @@ new (N) CondCodeSDNode(Cond); CondCodeNodes[Cond] = N; AllNodes.push_back(N); - if (Ordering) Ordering->add(N); } return SDValue(CondCodeNodes[Cond], 0); } @@ -1283,10 +1228,8 @@ ID.AddInteger(MaskVec[i]); void* IP = 0; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { - if (Ordering) Ordering->add(E); + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); - } // Allocate the mask array for the node out of the BumpPtrAllocator, since // SDNode doesn't have access to it. This memory will be "leaked" when @@ -1298,7 +1241,6 @@ new (N) ShuffleVectorSDNode(VT, dl, N1, N2, MaskAlloc); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); - if (Ordering) Ordering->add(N); return SDValue(N, 0); } @@ -1316,15 +1258,12 @@ SDValue Ops[] = { Val, DTy, STy, Rnd, Sat }; AddNodeIDNode(ID, ISD::CONVERT_RNDSAT, getVTList(VT), &Ops[0], 5); void* IP = 0; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { - if (Ordering) Ordering->add(E); + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); - } CvtRndSatSDNode *N = NodeAllocator.Allocate(); new (N) CvtRndSatSDNode(VT, dl, Ops, 5, Code); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); - if (Ordering) Ordering->add(N); return SDValue(N, 0); } @@ -1333,15 +1272,12 @@ AddNodeIDNode(ID, ISD::Register, getVTList(VT), 0, 0); ID.AddInteger(RegNo); void *IP = 0; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { - if (Ordering) Ordering->add(E); + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); - } SDNode *N = NodeAllocator.Allocate(); new (N) RegisterSDNode(RegNo, VT); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); - if (Ordering) Ordering->add(N); return SDValue(N, 0); } @@ -1353,15 +1289,12 @@ AddNodeIDNode(ID, Opcode, getVTList(MVT::Other), &Ops[0], 1); ID.AddInteger(LabelID); void *IP = 0; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { - if (Ordering) Ordering->add(E); + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); - } SDNode *N = NodeAllocator.Allocate(); new (N) LabelSDNode(Opcode, dl, Root, LabelID); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); - if (Ordering) Ordering->add(N); return SDValue(N, 0); } @@ -1375,15 +1308,12 @@ ID.AddPointer(BA); ID.AddInteger(TargetFlags); void *IP = 0; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { - if (Ordering) Ordering->add(E); + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); - } SDNode *N = NodeAllocator.Allocate(); new (N) BlockAddressSDNode(Opc, VT, BA, TargetFlags); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); - if (Ordering) Ordering->add(N); return SDValue(N, 0); } @@ -1396,16 +1326,13 @@ ID.AddPointer(V); void *IP = 0; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { - if (Ordering) Ordering->add(E); + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); - } SDNode *N = NodeAllocator.Allocate(); new (N) SrcValueSDNode(V); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); - if (Ordering) Ordering->add(N); return SDValue(N, 0); } @@ -2316,16 +2243,13 @@ FoldingSetNodeID ID; AddNodeIDNode(ID, Opcode, getVTList(VT), 0, 0); void *IP = 0; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { - if (Ordering) Ordering->add(E); + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); - } SDNode *N = NodeAllocator.Allocate(); new (N) SDNode(Opcode, DL, getVTList(VT)); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); - if (Ordering) Ordering->add(N); #ifndef NDEBUG VerifyNode(N); #endif @@ -2549,10 +2473,8 @@ SDValue Ops[1] = { Operand }; AddNodeIDNode(ID, Opcode, VTs, Ops, 1); void *IP = 0; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { - if (Ordering) Ordering->add(E); + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); - } N = NodeAllocator.Allocate(); new (N) UnarySDNode(Opcode, DL, VTs, Operand); CSEMap.InsertNode(N, IP); @@ -2562,7 +2484,6 @@ } AllNodes.push_back(N); - if (Ordering) Ordering->add(N); #ifndef NDEBUG VerifyNode(N); #endif @@ -2970,10 +2891,8 @@ FoldingSetNodeID ID; AddNodeIDNode(ID, Opcode, VTs, Ops, 2); void *IP = 0; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { - if (Ordering) Ordering->add(E); + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); - } N = NodeAllocator.Allocate(); new (N) BinarySDNode(Opcode, DL, VTs, N1, N2); CSEMap.InsertNode(N, IP); @@ -2983,7 +2902,6 @@ } AllNodes.push_back(N); - if (Ordering) Ordering->add(N); #ifndef NDEBUG VerifyNode(N); #endif @@ -3050,10 +2968,8 @@ FoldingSetNodeID ID; AddNodeIDNode(ID, Opcode, VTs, Ops, 3); void *IP = 0; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { - if (Ordering) Ordering->add(E); + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); - } N = NodeAllocator.Allocate(); new (N) TernarySDNode(Opcode, DL, VTs, N1, N2, N3); CSEMap.InsertNode(N, IP); @@ -3061,9 +2977,7 @@ N = NodeAllocator.Allocate(); new (N) TernarySDNode(Opcode, DL, VTs, N1, N2, N3); } - AllNodes.push_back(N); - if (Ordering) Ordering->add(N); #ifndef NDEBUG VerifyNode(N); #endif @@ -3659,14 +3573,12 @@ void* IP = 0; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { cast(E)->refineAlignment(MMO); - if (Ordering) Ordering->add(E); return SDValue(E, 0); } SDNode* N = NodeAllocator.Allocate(); new (N) AtomicSDNode(Opcode, dl, VTs, MemVT, Chain, Ptr, Cmp, Swp, MMO); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); - if (Ordering) Ordering->add(N); return SDValue(N, 0); } @@ -3724,14 +3636,12 @@ void* IP = 0; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { cast(E)->refineAlignment(MMO); - if (Ordering) Ordering->add(E); return SDValue(E, 0); } SDNode* N = NodeAllocator.Allocate(); new (N) AtomicSDNode(Opcode, dl, VTs, MemVT, Chain, Ptr, Val, MMO); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); - if (Ordering) Ordering->add(N); return SDValue(N, 0); } @@ -3804,7 +3714,6 @@ void *IP = 0; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { cast(E)->refineAlignment(MMO); - if (Ordering) Ordering->add(E); return SDValue(E, 0); } @@ -3816,7 +3725,6 @@ new (N) MemIntrinsicSDNode(Opcode, dl, VTList, Ops, NumOps, MemVT, MMO); } AllNodes.push_back(N); - if (Ordering) Ordering->add(N); return SDValue(N, 0); } @@ -3881,14 +3789,12 @@ void *IP = 0; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { cast(E)->refineAlignment(MMO); - if (Ordering) Ordering->add(E); return SDValue(E, 0); } SDNode *N = NodeAllocator.Allocate(); new (N) LoadSDNode(Ops, dl, VTs, AM, ExtType, MemVT, MMO); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); - if (Ordering) Ordering->add(N); return SDValue(N, 0); } @@ -3959,14 +3865,12 @@ void *IP = 0; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { cast(E)->refineAlignment(MMO); - if (Ordering) Ordering->add(E); return SDValue(E, 0); } SDNode *N = NodeAllocator.Allocate(); new (N) StoreSDNode(Ops, dl, VTs, ISD::UNINDEXED, false, VT, MMO); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); - if (Ordering) Ordering->add(N); return SDValue(N, 0); } @@ -4021,14 +3925,12 @@ void *IP = 0; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { cast(E)->refineAlignment(MMO); - if (Ordering) Ordering->add(E); return SDValue(E, 0); } SDNode *N = NodeAllocator.Allocate(); new (N) StoreSDNode(Ops, dl, VTs, ISD::UNINDEXED, true, SVT, MMO); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); - if (Ordering) Ordering->add(N); return SDValue(N, 0); } @@ -4045,17 +3947,14 @@ ID.AddInteger(ST->getMemoryVT().getRawBits()); ID.AddInteger(ST->getRawSubclassData()); void *IP = 0; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { - if (Ordering) Ordering->add(E); + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); - } SDNode *N = NodeAllocator.Allocate(); new (N) StoreSDNode(Ops, dl, VTs, AM, ST->isTruncatingStore(), ST->getMemoryVT(), ST->getMemOperand()); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); - if (Ordering) Ordering->add(N); return SDValue(N, 0); } @@ -4121,10 +4020,8 @@ AddNodeIDNode(ID, Opcode, VTs, Ops, NumOps); void *IP = 0; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { - if (Ordering) Ordering->add(E); + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); - } N = NodeAllocator.Allocate(); new (N) SDNode(Opcode, DL, VTs, Ops, NumOps); @@ -4135,7 +4032,6 @@ } AllNodes.push_back(N); - if (Ordering) Ordering->add(N); #ifndef NDEBUG VerifyNode(N); #endif @@ -4191,10 +4087,8 @@ FoldingSetNodeID ID; AddNodeIDNode(ID, Opcode, VTList, Ops, NumOps); void *IP = 0; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { - if (Ordering) Ordering->add(E); + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); - } if (NumOps == 1) { N = NodeAllocator.Allocate(); new (N) UnarySDNode(Opcode, DL, VTList, Ops[0]); @@ -4225,7 +4119,6 @@ } } AllNodes.push_back(N); - if (Ordering) Ordering->add(N); #ifndef NDEBUG VerifyNode(N); #endif @@ -4688,10 +4581,8 @@ if (VTs.VTs[VTs.NumVTs-1] != MVT::Flag) { FoldingSetNodeID ID; AddNodeIDNode(ID, Opc, VTs, Ops, NumOps); - if (SDNode *ON = CSEMap.FindNodeOrInsertPos(ID, IP)) { - if (Ordering) Ordering->add(ON); + if (SDNode *ON = CSEMap.FindNodeOrInsertPos(ID, IP)) return ON; - } } if (!RemoveNodeFromCSEMaps(N)) @@ -4755,7 +4646,6 @@ if (IP) CSEMap.InsertNode(N, IP); // Memoize the new node. - if (Ordering) Ordering->add(N); return N; } @@ -4894,10 +4784,8 @@ FoldingSetNodeID ID; AddNodeIDNode(ID, ~Opcode, VTs, Ops, NumOps); IP = 0; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { - if (Ordering) Ordering->add(E); + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return cast(E); - } } // Allocate a new MachineSDNode. @@ -4919,7 +4807,6 @@ CSEMap.InsertNode(N, IP); AllNodes.push_back(N); - if (Ordering) Ordering->add(N); #ifndef NDEBUG VerifyNode(N); #endif @@ -4956,10 +4843,8 @@ FoldingSetNodeID ID; AddNodeIDNode(ID, Opcode, VTList, Ops, NumOps); void *IP = 0; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { - if (Ordering) Ordering->add(E); + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return E; - } } return NULL; } @@ -6126,9 +6011,6 @@ errs() << "\n\n"; } -void SelectionDAG::NodeOrdering::dump() const { -} - void SDNode::printr(raw_ostream &OS, const SelectionDAG *G) const { print_types(OS, G); print_details(OS, G); @@ -6269,3 +6151,4 @@ return false; return true; } + Modified: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp?rev=91533&r1=91532&r2=91533&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp (original) +++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp Wed Dec 16 04:56:02 2009 @@ -583,9 +583,6 @@ } void SelectionDAGBuilder::visit(unsigned Opcode, User &I) { - // Tell the DAG that we're processing a new instruction. - DAG.NewInst(); - // Note: this doesn't use InstVisitor, because it has to work with // ConstantExpr's in addition to instructions. switch (Opcode) { From daniel at zuster.org Wed Dec 16 04:56:17 2009 From: daniel at zuster.org (Daniel Dunbar) Date: Wed, 16 Dec 2009 10:56:17 -0000 Subject: [llvm-commits] [llvm] r91534 - in /llvm/trunk: lib/Transforms/Scalar/ScalarReplAggregates.cpp test/Transforms/ScalarRepl/2009-12-11-NeonTypes.ll Message-ID: <200912161056.nBGAuHVk007975@zion.cs.uiuc.edu> Author: ddunbar Date: Wed Dec 16 04:56:17 2009 New Revision: 91534 URL: http://llvm.org/viewvc/llvm-project?rev=91534&view=rev Log: Revert "Reapply 91184 with fixes and an addition to the testcase to cover the problem", this broke llvm-gcc bootstrap for release builds on x86_64-apple-darwin10. This reverts commit db22309800b224a9f5f51baf76071d7a93ce59c9. Removed: llvm/trunk/test/Transforms/ScalarRepl/2009-12-11-NeonTypes.ll Modified: llvm/trunk/lib/Transforms/Scalar/ScalarReplAggregates.cpp Modified: llvm/trunk/lib/Transforms/Scalar/ScalarReplAggregates.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/ScalarReplAggregates.cpp?rev=91534&r1=91533&r2=91534&view=diff ============================================================================== --- llvm/trunk/lib/Transforms/Scalar/ScalarReplAggregates.cpp (original) +++ llvm/trunk/lib/Transforms/Scalar/ScalarReplAggregates.cpp Wed Dec 16 04:56:17 2009 @@ -74,10 +74,6 @@ private: TargetData *TD; - /// DeadInsts - Keep track of instructions we have made dead, so that - /// we can remove them after we are done working. - SmallVector DeadInsts; - /// AllocaInfo - When analyzing uses of an alloca instruction, this captures /// information about the uses. All these fields are initialized to false /// and set to true when something is learned. @@ -106,30 +102,25 @@ int isSafeAllocaToScalarRepl(AllocaInst *AI); - void isSafeForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset, - uint64_t ArrayOffset, AllocaInfo &Info); - void isSafeGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t &Offset, - uint64_t &ArrayOffset, AllocaInfo &Info); - void isSafeMemAccess(AllocaInst *AI, uint64_t Offset, uint64_t ArrayOffset, - uint64_t MemSize, const Type *MemOpType, bool isStore, - AllocaInfo &Info); - bool TypeHasComponent(const Type *T, uint64_t Offset, uint64_t Size); - unsigned FindElementAndOffset(const Type *&T, uint64_t &Offset); + void isSafeUseOfAllocation(Instruction *User, AllocaInst *AI, + AllocaInfo &Info); + void isSafeElementUse(Value *Ptr, bool isFirstElt, AllocaInst *AI, + AllocaInfo &Info); + void isSafeMemIntrinsicOnAllocation(MemIntrinsic *MI, AllocaInst *AI, + unsigned OpNo, AllocaInfo &Info); + void isSafeUseOfBitCastedAllocation(BitCastInst *User, AllocaInst *AI, + AllocaInfo &Info); void DoScalarReplacement(AllocaInst *AI, std::vector &WorkList); - void DeleteDeadInstructions(); void CleanupGEP(GetElementPtrInst *GEP); - void CleanupAllocaUsers(Value *V); + void CleanupAllocaUsers(AllocaInst *AI); AllocaInst *AddNewAlloca(Function &F, const Type *Ty, AllocaInst *Base); - void RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset, - SmallVector &NewElts); - void RewriteBitCast(BitCastInst *BC, AllocaInst *AI, uint64_t Offset, - SmallVector &NewElts); - void RewriteGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t Offset, - SmallVector &NewElts); - void RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst, + void RewriteBitCastUserOfAlloca(Instruction *BCInst, AllocaInst *AI, + SmallVector &NewElts); + + void RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *BCInst, AllocaInst *AI, SmallVector &NewElts); void RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI, @@ -369,37 +360,176 @@ } } - // Now that we have created the new alloca instructions, rewrite all the - // uses of the old alloca. - DeadInsts.push_back(AI); - RewriteForScalarRepl(AI, AI, 0, ElementAllocas); + // Now that we have created the alloca instructions that we want to use, + // expand the getelementptr instructions to use them. + while (!AI->use_empty()) { + Instruction *User = cast(AI->use_back()); + if (BitCastInst *BCInst = dyn_cast(User)) { + RewriteBitCastUserOfAlloca(BCInst, AI, ElementAllocas); + BCInst->eraseFromParent(); + continue; + } + + // Replace: + // %res = load { i32, i32 }* %alloc + // with: + // %load.0 = load i32* %alloc.0 + // %insert.0 insertvalue { i32, i32 } zeroinitializer, i32 %load.0, 0 + // %load.1 = load i32* %alloc.1 + // %insert = insertvalue { i32, i32 } %insert.0, i32 %load.1, 1 + // (Also works for arrays instead of structs) + if (LoadInst *LI = dyn_cast(User)) { + Value *Insert = UndefValue::get(LI->getType()); + for (unsigned i = 0, e = ElementAllocas.size(); i != e; ++i) { + Value *Load = new LoadInst(ElementAllocas[i], "load", LI); + Insert = InsertValueInst::Create(Insert, Load, i, "insert", LI); + } + LI->replaceAllUsesWith(Insert); + LI->eraseFromParent(); + continue; + } - // Now erase any instructions that were made dead while rewriting the alloca. - DeleteDeadInstructions(); + // Replace: + // store { i32, i32 } %val, { i32, i32 }* %alloc + // with: + // %val.0 = extractvalue { i32, i32 } %val, 0 + // store i32 %val.0, i32* %alloc.0 + // %val.1 = extractvalue { i32, i32 } %val, 1 + // store i32 %val.1, i32* %alloc.1 + // (Also works for arrays instead of structs) + if (StoreInst *SI = dyn_cast(User)) { + Value *Val = SI->getOperand(0); + for (unsigned i = 0, e = ElementAllocas.size(); i != e; ++i) { + Value *Extract = ExtractValueInst::Create(Val, i, Val->getName(), SI); + new StoreInst(Extract, ElementAllocas[i], SI); + } + SI->eraseFromParent(); + continue; + } + + GetElementPtrInst *GEPI = cast(User); + // We now know that the GEP is of the form: GEP , 0, + unsigned Idx = + (unsigned)cast(GEPI->getOperand(2))->getZExtValue(); + + assert(Idx < ElementAllocas.size() && "Index out of range?"); + AllocaInst *AllocaToUse = ElementAllocas[Idx]; + + Value *RepValue; + if (GEPI->getNumOperands() == 3) { + // Do not insert a new getelementptr instruction with zero indices, only + // to have it optimized out later. + RepValue = AllocaToUse; + } else { + // We are indexing deeply into the structure, so we still need a + // getelement ptr instruction to finish the indexing. This may be + // expanded itself once the worklist is rerun. + // + SmallVector NewArgs; + NewArgs.push_back(Constant::getNullValue( + Type::getInt32Ty(AI->getContext()))); + NewArgs.append(GEPI->op_begin()+3, GEPI->op_end()); + RepValue = GetElementPtrInst::Create(AllocaToUse, NewArgs.begin(), + NewArgs.end(), "", GEPI); + RepValue->takeName(GEPI); + } + + // If this GEP is to the start of the aggregate, check for memcpys. + if (Idx == 0 && GEPI->hasAllZeroIndices()) + RewriteBitCastUserOfAlloca(GEPI, AI, ElementAllocas); + + // Move all of the users over to the new GEP. + GEPI->replaceAllUsesWith(RepValue); + // Delete the old GEP + GEPI->eraseFromParent(); + } + // Finally, delete the Alloca instruction + AI->eraseFromParent(); NumReplaced++; } -/// DeleteDeadInstructions - Erase instructions on the DeadInstrs list, -/// recursively including all their operands that become trivially dead. -void SROA::DeleteDeadInstructions() { - while (!DeadInsts.empty()) { - Instruction *I = dyn_cast_or_null(DeadInsts.pop_back_val()); - if (I == 0) - continue; - - for (User::op_iterator OI = I->op_begin(), E = I->op_end(); OI != E; ++OI) - if (Instruction *U = dyn_cast(*OI)) { - // Zero out the operand and see if it becomes trivially dead. - *OI = 0; - if (isInstructionTriviallyDead(U)) - DeadInsts.push_back(U); - } +/// isSafeElementUse - Check to see if this use is an allowed use for a +/// getelementptr instruction of an array aggregate allocation. isFirstElt +/// indicates whether Ptr is known to the start of the aggregate. +void SROA::isSafeElementUse(Value *Ptr, bool isFirstElt, AllocaInst *AI, + AllocaInfo &Info) { + for (Value::use_iterator I = Ptr->use_begin(), E = Ptr->use_end(); + I != E; ++I) { + Instruction *User = cast(*I); + switch (User->getOpcode()) { + case Instruction::Load: break; + case Instruction::Store: + // Store is ok if storing INTO the pointer, not storing the pointer + if (User->getOperand(0) == Ptr) return MarkUnsafe(Info); + break; + case Instruction::GetElementPtr: { + GetElementPtrInst *GEP = cast(User); + bool AreAllZeroIndices = isFirstElt; + if (GEP->getNumOperands() > 1 && + (!isa(GEP->getOperand(1)) || + !cast(GEP->getOperand(1))->isZero())) + // Using pointer arithmetic to navigate the array. + return MarkUnsafe(Info); + + // Verify that any array subscripts are in range. + for (gep_type_iterator GEPIt = gep_type_begin(GEP), + E = gep_type_end(GEP); GEPIt != E; ++GEPIt) { + // Ignore struct elements, no extra checking needed for these. + if (isa(*GEPIt)) + continue; - I->eraseFromParent(); + // This GEP indexes an array. Verify that this is an in-range + // constant integer. Specifically, consider A[0][i]. We cannot know that + // the user isn't doing invalid things like allowing i to index an + // out-of-range subscript that accesses A[1]. Because of this, we have + // to reject SROA of any accesses into structs where any of the + // components are variables. + ConstantInt *IdxVal = dyn_cast(GEPIt.getOperand()); + if (!IdxVal) return MarkUnsafe(Info); + + // Are all indices still zero? + AreAllZeroIndices &= IdxVal->isZero(); + + if (const ArrayType *AT = dyn_cast(*GEPIt)) { + if (IdxVal->getZExtValue() >= AT->getNumElements()) + return MarkUnsafe(Info); + } else if (const VectorType *VT = dyn_cast(*GEPIt)) { + if (IdxVal->getZExtValue() >= VT->getNumElements()) + return MarkUnsafe(Info); + } + } + + isSafeElementUse(GEP, AreAllZeroIndices, AI, Info); + if (Info.isUnsafe) return; + break; + } + case Instruction::BitCast: + if (isFirstElt) { + isSafeUseOfBitCastedAllocation(cast(User), AI, Info); + if (Info.isUnsafe) return; + break; + } + DEBUG(errs() << " Transformation preventing inst: " << *User << '\n'); + return MarkUnsafe(Info); + case Instruction::Call: + if (MemIntrinsic *MI = dyn_cast(User)) { + if (isFirstElt) { + isSafeMemIntrinsicOnAllocation(MI, AI, I.getOperandNo(), Info); + if (Info.isUnsafe) return; + break; + } + } + DEBUG(errs() << " Transformation preventing inst: " << *User << '\n'); + return MarkUnsafe(Info); + default: + DEBUG(errs() << " Transformation preventing inst: " << *User << '\n'); + return MarkUnsafe(Info); + } } + return; // All users look ok :) } - + /// AllUsersAreLoads - Return true if all users of this value are loads. static bool AllUsersAreLoads(Value *Ptr) { for (Value::use_iterator I = Ptr->use_begin(), E = Ptr->use_end(); @@ -409,116 +539,72 @@ return true; } -/// isSafeForScalarRepl - Check if instruction I is a safe use with regard to -/// performing scalar replacement of alloca AI. The results are flagged in -/// the Info parameter. Offset and ArrayOffset indicate the position within -/// AI that is referenced by this instruction. -void SROA::isSafeForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset, - uint64_t ArrayOffset, AllocaInfo &Info) { - for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI!=E; ++UI) { - Instruction *User = cast(*UI); - - if (BitCastInst *BC = dyn_cast(User)) { - isSafeForScalarRepl(BC, AI, Offset, ArrayOffset, Info); - } else if (GetElementPtrInst *GEPI = dyn_cast(User)) { - uint64_t GEPArrayOffset = ArrayOffset; - uint64_t GEPOffset = Offset; - isSafeGEP(GEPI, AI, GEPOffset, GEPArrayOffset, Info); - if (!Info.isUnsafe) - isSafeForScalarRepl(GEPI, AI, GEPOffset, GEPArrayOffset, Info); - } else if (MemIntrinsic *MI = dyn_cast(UI)) { - ConstantInt *Length = dyn_cast(MI->getLength()); - if (Length) - isSafeMemAccess(AI, Offset, ArrayOffset, Length->getZExtValue(), 0, - UI.getOperandNo() == 1, Info); - else - MarkUnsafe(Info); - } else if (LoadInst *LI = dyn_cast(User)) { - if (!LI->isVolatile()) { - const Type *LIType = LI->getType(); - isSafeMemAccess(AI, Offset, ArrayOffset, TD->getTypeAllocSize(LIType), - LIType, false, Info); - } else - MarkUnsafe(Info); - } else if (StoreInst *SI = dyn_cast(User)) { - // Store is ok if storing INTO the pointer, not storing the pointer - if (!SI->isVolatile() && SI->getOperand(0) != I) { - const Type *SIType = SI->getOperand(0)->getType(); - isSafeMemAccess(AI, Offset, ArrayOffset, TD->getTypeAllocSize(SIType), - SIType, true, Info); - } else - MarkUnsafe(Info); - } else if (isa(UI)) { - // If one user is DbgInfoIntrinsic then check if all users are - // DbgInfoIntrinsics. - if (OnlyUsedByDbgInfoIntrinsics(I)) { - Info.needsCleanup = true; - return; - } - MarkUnsafe(Info); - } else { - DEBUG(errs() << " Transformation preventing inst: " << *User << '\n'); - MarkUnsafe(Info); - } - if (Info.isUnsafe) return; - } -} +/// isSafeUseOfAllocation - Check if this user is an allowed use for an +/// aggregate allocation. +void SROA::isSafeUseOfAllocation(Instruction *User, AllocaInst *AI, + AllocaInfo &Info) { + if (BitCastInst *C = dyn_cast(User)) + return isSafeUseOfBitCastedAllocation(C, AI, Info); + + if (LoadInst *LI = dyn_cast(User)) + if (!LI->isVolatile()) + return;// Loads (returning a first class aggregrate) are always rewritable + + if (StoreInst *SI = dyn_cast(User)) + if (!SI->isVolatile() && SI->getOperand(0) != AI) + return;// Store is ok if storing INTO the pointer, not storing the pointer + + GetElementPtrInst *GEPI = dyn_cast(User); + if (GEPI == 0) + return MarkUnsafe(Info); -/// isSafeGEP - Check if a GEP instruction can be handled for scalar -/// replacement. It is safe when all the indices are constant, in-bounds -/// references, and when the resulting offset corresponds to an element within -/// the alloca type. The results are flagged in the Info parameter. Upon -/// return, Offset is adjusted as specified by the GEP indices. For the -/// special case of a variable index to a 2-element array, ArrayOffset is set -/// to the array element size. -void SROA::isSafeGEP(GetElementPtrInst *GEPI, AllocaInst *AI, - uint64_t &Offset, uint64_t &ArrayOffset, - AllocaInfo &Info) { - gep_type_iterator GEPIt = gep_type_begin(GEPI), E = gep_type_end(GEPI); - if (GEPIt == E) - return; + gep_type_iterator I = gep_type_begin(GEPI), E = gep_type_end(GEPI); - // The first GEP index must be zero. - if (!isa(GEPIt.getOperand()) || - !cast(GEPIt.getOperand())->isZero()) + // The GEP is not safe to transform if not of the form "GEP , 0, ". + if (I == E || + I.getOperand() != Constant::getNullValue(I.getOperand()->getType())) { return MarkUnsafe(Info); - if (++GEPIt == E) - return; + } + ++I; + if (I == E) return MarkUnsafe(Info); // ran out of GEP indices?? + + bool IsAllZeroIndices = true; + // If the first index is a non-constant index into an array, see if we can // handle it as a special case. - const Type *ArrayEltTy = 0; - if (ArrayOffset == 0 && Offset == 0) { - if (const ArrayType *AT = dyn_cast(*GEPIt)) { - if (!isa(GEPIt.getOperand())) { - uint64_t NumElements = AT->getNumElements(); - - // If this is an array index and the index is not constant, we cannot - // promote... that is unless the array has exactly one or two elements - // in it, in which case we CAN promote it, but we have to canonicalize - // this out if this is the only problem. - if ((NumElements != 1 && NumElements != 2) || !AllUsersAreLoads(GEPI)) - return MarkUnsafe(Info); + if (const ArrayType *AT = dyn_cast(*I)) { + if (!isa(I.getOperand())) { + IsAllZeroIndices = 0; + uint64_t NumElements = AT->getNumElements(); + + // If this is an array index and the index is not constant, we cannot + // promote... that is unless the array has exactly one or two elements in + // it, in which case we CAN promote it, but we have to canonicalize this + // out if this is the only problem. + if ((NumElements == 1 || NumElements == 2) && + AllUsersAreLoads(GEPI)) { Info.needsCleanup = true; - ArrayOffset = TD->getTypeAllocSizeInBits(AT->getElementType()); - ArrayEltTy = AT->getElementType(); - ++GEPIt; + return; // Canonicalization required! } + return MarkUnsafe(Info); } } - + // Walk through the GEP type indices, checking the types that this indexes // into. - for (; GEPIt != E; ++GEPIt) { + for (; I != E; ++I) { // Ignore struct elements, no extra checking needed for these. - if (isa(*GEPIt)) + if (isa(*I)) continue; + + ConstantInt *IdxVal = dyn_cast(I.getOperand()); + if (!IdxVal) return MarkUnsafe(Info); - ConstantInt *IdxVal = dyn_cast(GEPIt.getOperand()); - if (!IdxVal) - return MarkUnsafe(Info); - - if (const ArrayType *AT = dyn_cast(*GEPIt)) { + // Are all indices still zero? + IsAllZeroIndices &= IdxVal->isZero(); + + if (const ArrayType *AT = dyn_cast(*I)) { // This GEP indexes an array. Verify that this is an in-range constant // integer. Specifically, consider A[0][i]. We cannot know that the user // isn't doing invalid things like allowing i to index an out-of-range @@ -526,255 +612,147 @@ // of any accesses into structs where any of the components are variables. if (IdxVal->getZExtValue() >= AT->getNumElements()) return MarkUnsafe(Info); - } else { - const VectorType *VT = dyn_cast(*GEPIt); - assert(VT && "unexpected type in GEP type iterator"); + } else if (const VectorType *VT = dyn_cast(*I)) { if (IdxVal->getZExtValue() >= VT->getNumElements()) return MarkUnsafe(Info); } } - - // All the indices are safe. Now compute the offset due to this GEP and - // check if the alloca has a component element at that offset. - if (ArrayOffset == 0) { - SmallVector Indices(GEPI->op_begin() + 1, GEPI->op_end()); - Offset += TD->getIndexedOffset(GEPI->getPointerOperandType(), - &Indices[0], Indices.size()); - } else { - // Both array elements have the same type, so it suffices to check one of - // them. Copy the GEP indices starting from the array index, but replace - // that variable index with a constant zero. - SmallVector Indices(GEPI->op_begin() + 2, GEPI->op_end()); - Indices[0] = Constant::getNullValue(Type::getInt32Ty(GEPI->getContext())); - const Type *ArrayEltPtr = PointerType::getUnqual(ArrayEltTy); - Offset += TD->getIndexedOffset(ArrayEltPtr, &Indices[0], Indices.size()); - } - if (!TypeHasComponent(AI->getAllocatedType(), Offset, 0)) - MarkUnsafe(Info); -} - -/// isSafeMemAccess - Check if a load/store/memcpy operates on the entire AI -/// alloca or has an offset and size that corresponds to a component element -/// within it. The offset checked here may have been formed from a GEP with a -/// pointer bitcasted to a different type. -void SROA::isSafeMemAccess(AllocaInst *AI, uint64_t Offset, - uint64_t ArrayOffset, uint64_t MemSize, - const Type *MemOpType, bool isStore, - AllocaInfo &Info) { - // Check if this is a load/store of the entire alloca. - if (Offset == 0 && ArrayOffset == 0 && - MemSize == TD->getTypeAllocSize(AI->getAllocatedType())) { - bool UsesAggregateType = (MemOpType == AI->getAllocatedType()); - // This is safe for MemIntrinsics (where MemOpType is 0), integer types - // (which are essentially the same as the MemIntrinsics, especially with - // regard to copying padding between elements), or references using the - // aggregate type of the alloca. - if (!MemOpType || isa(MemOpType) || UsesAggregateType) { - if (!UsesAggregateType) { - if (isStore) - Info.isMemCpyDst = true; - else - Info.isMemCpySrc = true; - } - return; - } - } - // Check if the offset/size correspond to a component within the alloca type. - const Type *T = AI->getAllocatedType(); - if (TypeHasComponent(T, Offset, MemSize) && - (ArrayOffset == 0 || TypeHasComponent(T, Offset + ArrayOffset, MemSize))) - return; - - return MarkUnsafe(Info); + + // If there are any non-simple uses of this getelementptr, make sure to reject + // them. + return isSafeElementUse(GEPI, IsAllZeroIndices, AI, Info); } -/// TypeHasComponent - Return true if T has a component type with the -/// specified offset and size. If Size is zero, do not check the size. -bool SROA::TypeHasComponent(const Type *T, uint64_t Offset, uint64_t Size) { - const Type *EltTy; - uint64_t EltSize; - if (const StructType *ST = dyn_cast(T)) { - const StructLayout *Layout = TD->getStructLayout(ST); - unsigned EltIdx = Layout->getElementContainingOffset(Offset); - EltTy = ST->getContainedType(EltIdx); - EltSize = TD->getTypeAllocSize(EltTy); - Offset -= Layout->getElementOffset(EltIdx); - } else if (const ArrayType *AT = dyn_cast(T)) { - EltTy = AT->getElementType(); - EltSize = TD->getTypeAllocSize(EltTy); - Offset %= EltSize; - } else { - return false; +/// isSafeMemIntrinsicOnAllocation - Check if the specified memory +/// intrinsic can be promoted by SROA. At this point, we know that the operand +/// of the memintrinsic is a pointer to the beginning of the allocation. +void SROA::isSafeMemIntrinsicOnAllocation(MemIntrinsic *MI, AllocaInst *AI, + unsigned OpNo, AllocaInfo &Info) { + // If not constant length, give up. + ConstantInt *Length = dyn_cast(MI->getLength()); + if (!Length) return MarkUnsafe(Info); + + // If not the whole aggregate, give up. + if (Length->getZExtValue() != + TD->getTypeAllocSize(AI->getType()->getElementType())) + return MarkUnsafe(Info); + + // We only know about memcpy/memset/memmove. + if (!isa(MI)) + return MarkUnsafe(Info); + + // Otherwise, we can transform it. Determine whether this is a memcpy/set + // into or out of the aggregate. + if (OpNo == 1) + Info.isMemCpyDst = true; + else { + assert(OpNo == 2); + Info.isMemCpySrc = true; } - if (Offset == 0 && (Size == 0 || EltSize == Size)) - return true; - // Check if the component spans multiple elements. - if (Offset + Size > EltSize) - return false; - return TypeHasComponent(EltTy, Offset, Size); } -/// RewriteForScalarRepl - Alloca AI is being split into NewElts, so rewrite -/// the instruction I, which references it, to use the separate elements. -/// Offset indicates the position within AI that is referenced by this -/// instruction. -void SROA::RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset, - SmallVector &NewElts) { - for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI!=E; ++UI) { - Instruction *User = cast(*UI); +/// isSafeUseOfBitCastedAllocation - Check if all users of this bitcast +/// from an alloca are safe for SROA of that alloca. +void SROA::isSafeUseOfBitCastedAllocation(BitCastInst *BC, AllocaInst *AI, + AllocaInfo &Info) { + for (Value::use_iterator UI = BC->use_begin(), E = BC->use_end(); + UI != E; ++UI) { + if (BitCastInst *BCU = dyn_cast(UI)) { + isSafeUseOfBitCastedAllocation(BCU, AI, Info); + } else if (MemIntrinsic *MI = dyn_cast(UI)) { + isSafeMemIntrinsicOnAllocation(MI, AI, UI.getOperandNo(), Info); + } else if (StoreInst *SI = dyn_cast(UI)) { + if (SI->isVolatile()) + return MarkUnsafe(Info); + + // If storing the entire alloca in one chunk through a bitcasted pointer + // to integer, we can transform it. This happens (for example) when you + // cast a {i32,i32}* to i64* and store through it. This is similar to the + // memcpy case and occurs in various "byval" cases and emulated memcpys. + if (isa(SI->getOperand(0)->getType()) && + TD->getTypeAllocSize(SI->getOperand(0)->getType()) == + TD->getTypeAllocSize(AI->getType()->getElementType())) { + Info.isMemCpyDst = true; + continue; + } + return MarkUnsafe(Info); + } else if (LoadInst *LI = dyn_cast(UI)) { + if (LI->isVolatile()) + return MarkUnsafe(Info); - if (BitCastInst *BC = dyn_cast(User)) { - RewriteBitCast(BC, AI, Offset, NewElts); - } else if (GetElementPtrInst *GEPI = dyn_cast(User)) { - RewriteGEP(GEPI, AI, Offset, NewElts); - } else if (MemIntrinsic *MI = dyn_cast(User)) { - ConstantInt *Length = dyn_cast(MI->getLength()); - uint64_t MemSize = Length->getZExtValue(); - if (Offset == 0 && - MemSize == TD->getTypeAllocSize(AI->getAllocatedType())) - RewriteMemIntrinUserOfAlloca(MI, I, AI, NewElts); - } else if (LoadInst *LI = dyn_cast(User)) { - const Type *LIType = LI->getType(); - if (LIType == AI->getAllocatedType()) { - // Replace: - // %res = load { i32, i32 }* %alloc - // with: - // %load.0 = load i32* %alloc.0 - // %insert.0 insertvalue { i32, i32 } zeroinitializer, i32 %load.0, 0 - // %load.1 = load i32* %alloc.1 - // %insert = insertvalue { i32, i32 } %insert.0, i32 %load.1, 1 - // (Also works for arrays instead of structs) - Value *Insert = UndefValue::get(LIType); - for (unsigned i = 0, e = NewElts.size(); i != e; ++i) { - Value *Load = new LoadInst(NewElts[i], "load", LI); - Insert = InsertValueInst::Create(Insert, Load, i, "insert", LI); - } - LI->replaceAllUsesWith(Insert); - DeadInsts.push_back(LI); - } else if (isa(LIType) && - TD->getTypeAllocSize(LIType) == - TD->getTypeAllocSize(AI->getAllocatedType())) { - // If this is a load of the entire alloca to an integer, rewrite it. - RewriteLoadUserOfWholeAlloca(LI, AI, NewElts); + // If loading the entire alloca in one chunk through a bitcasted pointer + // to integer, we can transform it. This happens (for example) when you + // cast a {i32,i32}* to i64* and load through it. This is similar to the + // memcpy case and occurs in various "byval" cases and emulated memcpys. + if (isa(LI->getType()) && + TD->getTypeAllocSize(LI->getType()) == + TD->getTypeAllocSize(AI->getType()->getElementType())) { + Info.isMemCpySrc = true; + continue; } - } else if (StoreInst *SI = dyn_cast(User)) { - Value *Val = SI->getOperand(0); - const Type *SIType = Val->getType(); - if (SIType == AI->getAllocatedType()) { - // Replace: - // store { i32, i32 } %val, { i32, i32 }* %alloc - // with: - // %val.0 = extractvalue { i32, i32 } %val, 0 - // store i32 %val.0, i32* %alloc.0 - // %val.1 = extractvalue { i32, i32 } %val, 1 - // store i32 %val.1, i32* %alloc.1 - // (Also works for arrays instead of structs) - for (unsigned i = 0, e = NewElts.size(); i != e; ++i) { - Value *Extract = ExtractValueInst::Create(Val, i, Val->getName(), SI); - new StoreInst(Extract, NewElts[i], SI); - } - DeadInsts.push_back(SI); - } else if (isa(SIType) && - TD->getTypeAllocSize(SIType) == - TD->getTypeAllocSize(AI->getAllocatedType())) { - // If this is a store of the entire alloca from an integer, rewrite it. - RewriteStoreUserOfWholeAlloca(SI, AI, NewElts); + return MarkUnsafe(Info); + } else if (isa(UI)) { + // If one user is DbgInfoIntrinsic then check if all users are + // DbgInfoIntrinsics. + if (OnlyUsedByDbgInfoIntrinsics(BC)) { + Info.needsCleanup = true; + return; } + else + MarkUnsafe(Info); } + else { + return MarkUnsafe(Info); + } + if (Info.isUnsafe) return; } } -/// RewriteBitCast - Update a bitcast reference to the alloca being replaced -/// and recursively continue updating all of its uses. -void SROA::RewriteBitCast(BitCastInst *BC, AllocaInst *AI, uint64_t Offset, - SmallVector &NewElts) { - RewriteForScalarRepl(BC, AI, Offset, NewElts); - if (BC->getOperand(0) != AI) - return; +/// RewriteBitCastUserOfAlloca - BCInst (transitively) bitcasts AI, or indexes +/// to its first element. Transform users of the cast to use the new values +/// instead. +void SROA::RewriteBitCastUserOfAlloca(Instruction *BCInst, AllocaInst *AI, + SmallVector &NewElts) { + Value::use_iterator UI = BCInst->use_begin(), UE = BCInst->use_end(); + while (UI != UE) { + Instruction *User = cast(*UI++); + if (BitCastInst *BCU = dyn_cast(User)) { + RewriteBitCastUserOfAlloca(BCU, AI, NewElts); + if (BCU->use_empty()) BCU->eraseFromParent(); + continue; + } - // The bitcast references the original alloca. Replace its uses with - // references to the first new element alloca. - Instruction *Val = NewElts[0]; - if (Val->getType() != BC->getDestTy()) { - Val = new BitCastInst(Val, BC->getDestTy(), "", BC); - Val->takeName(BC); - } - BC->replaceAllUsesWith(Val); - DeadInsts.push_back(BC); -} - -/// FindElementAndOffset - Return the index of the element containing Offset -/// within the specified type, which must be either a struct or an array. -/// Sets T to the type of the element and Offset to the offset within that -/// element. -unsigned SROA::FindElementAndOffset(const Type *&T, uint64_t &Offset) { - unsigned Idx = 0; - if (const StructType *ST = dyn_cast(T)) { - const StructLayout *Layout = TD->getStructLayout(ST); - Idx = Layout->getElementContainingOffset(Offset); - T = ST->getContainedType(Idx); - Offset -= Layout->getElementOffset(Idx); - } else { - const ArrayType *AT = dyn_cast(T); - assert(AT && "unexpected type for scalar replacement"); - T = AT->getElementType(); - uint64_t EltSize = TD->getTypeAllocSize(T); - Idx = (unsigned)(Offset / EltSize); - Offset -= Idx * EltSize; - } - return Idx; -} - -/// RewriteGEP - Check if this GEP instruction moves the pointer across -/// elements of the alloca that are being split apart, and if so, rewrite -/// the GEP to be relative to the new element. -void SROA::RewriteGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t Offset, - SmallVector &NewElts) { - uint64_t OldOffset = Offset; - SmallVector Indices(GEPI->op_begin() + 1, GEPI->op_end()); - Offset += TD->getIndexedOffset(GEPI->getPointerOperandType(), - &Indices[0], Indices.size()); - - RewriteForScalarRepl(GEPI, AI, Offset, NewElts); - - const Type *T = AI->getAllocatedType(); - unsigned OldIdx = FindElementAndOffset(T, OldOffset); - if (GEPI->getOperand(0) == AI) - OldIdx = ~0U; // Force the GEP to be rewritten. - - T = AI->getAllocatedType(); - uint64_t EltOffset = Offset; - unsigned Idx = FindElementAndOffset(T, EltOffset); - - // If this GEP does not move the pointer across elements of the alloca - // being split, then it does not needs to be rewritten. - if (Idx == OldIdx) - return; + if (MemIntrinsic *MI = dyn_cast(User)) { + // This must be memcpy/memmove/memset of the entire aggregate. + // Split into one per element. + RewriteMemIntrinUserOfAlloca(MI, BCInst, AI, NewElts); + continue; + } + + if (StoreInst *SI = dyn_cast(User)) { + // If this is a store of the entire alloca from an integer, rewrite it. + RewriteStoreUserOfWholeAlloca(SI, AI, NewElts); + continue; + } - const Type *i32Ty = Type::getInt32Ty(AI->getContext()); - SmallVector NewArgs; - NewArgs.push_back(Constant::getNullValue(i32Ty)); - while (EltOffset != 0) { - unsigned EltIdx = FindElementAndOffset(T, EltOffset); - NewArgs.push_back(ConstantInt::get(i32Ty, EltIdx)); - } - Instruction *Val = NewElts[Idx]; - if (NewArgs.size() > 1) { - Val = GetElementPtrInst::CreateInBounds(Val, NewArgs.begin(), - NewArgs.end(), "", GEPI); - Val->takeName(GEPI); - } - if (Val->getType() != GEPI->getType()) - Val = new BitCastInst(Val, GEPI->getType(), Val->getNameStr(), GEPI); - GEPI->replaceAllUsesWith(Val); - DeadInsts.push_back(GEPI); + if (LoadInst *LI = dyn_cast(User)) { + // If this is a load of the entire alloca to an integer, rewrite it. + RewriteLoadUserOfWholeAlloca(LI, AI, NewElts); + continue; + } + + // Otherwise it must be some other user of a gep of the first pointer. Just + // leave these alone. + continue; + } } /// RewriteMemIntrinUserOfAlloca - MI is a memcpy/memset/memmove from or to AI. /// Rewrite it to copy or set the elements of the scalarized memory. -void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst, +void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *BCInst, AllocaInst *AI, SmallVector &NewElts) { + // If this is a memcpy/memmove, construct the other pointer as the // appropriate type. The "Other" pointer is the pointer that goes to memory // that doesn't have anything to do with the alloca that we are promoting. For @@ -783,41 +761,28 @@ LLVMContext &Context = MI->getContext(); unsigned MemAlignment = MI->getAlignment(); if (MemTransferInst *MTI = dyn_cast(MI)) { // memmove/memcopy - if (Inst == MTI->getRawDest()) + if (BCInst == MTI->getRawDest()) OtherPtr = MTI->getRawSource(); else { - assert(Inst == MTI->getRawSource()); + assert(BCInst == MTI->getRawSource()); OtherPtr = MTI->getRawDest(); } } + // Keep track of the other intrinsic argument, so it can be removed if it + // is dead when the intrinsic is replaced. + Value *PossiblyDead = OtherPtr; + // If there is an other pointer, we want to convert it to the same pointer // type as AI has, so we can GEP through it safely. if (OtherPtr) { - - // Remove bitcasts and all-zero GEPs from OtherPtr. This is an - // optimization, but it's also required to detect the corner case where - // both pointer operands are referencing the same memory, and where - // OtherPtr may be a bitcast or GEP that currently being rewritten. (This - // function is only called for mem intrinsics that access the whole - // aggregate, so non-zero GEPs are not an issue here.) - while (1) { - if (BitCastInst *BC = dyn_cast(OtherPtr)) { - OtherPtr = BC->getOperand(0); - continue; - } - if (GetElementPtrInst *GEP = dyn_cast(OtherPtr)) { - // All zero GEPs are effectively bitcasts. - if (GEP->hasAllZeroIndices()) { - OtherPtr = GEP->getOperand(0); - continue; - } - } - break; - } - // If OtherPtr has already been rewritten, this intrinsic will be dead. - if (OtherPtr == NewElts[0]) - return; + // It is likely that OtherPtr is a bitcast, if so, remove it. + if (BitCastInst *BC = dyn_cast(OtherPtr)) + OtherPtr = BC->getOperand(0); + // All zero GEPs are effectively bitcasts. + if (GetElementPtrInst *GEP = dyn_cast(OtherPtr)) + if (GEP->hasAllZeroIndices()) + OtherPtr = GEP->getOperand(0); if (ConstantExpr *BCE = dyn_cast(OtherPtr)) if (BCE->getOpcode() == Instruction::BitCast) @@ -833,7 +798,7 @@ // Process each element of the aggregate. Value *TheFn = MI->getOperand(0); const Type *BytePtrTy = MI->getRawDest()->getType(); - bool SROADest = MI->getRawDest() == Inst; + bool SROADest = MI->getRawDest() == BCInst; Constant *Zero = Constant::getNullValue(Type::getInt32Ty(MI->getContext())); @@ -842,15 +807,12 @@ Value *OtherElt = 0; unsigned OtherEltAlign = MemAlignment; - if (OtherPtr == AI) { - OtherElt = NewElts[i]; - OtherEltAlign = 0; - } else if (OtherPtr) { + if (OtherPtr) { Value *Idx[2] = { Zero, ConstantInt::get(Type::getInt32Ty(MI->getContext()), i) }; - OtherElt = GetElementPtrInst::CreateInBounds(OtherPtr, Idx, Idx + 2, + OtherElt = GetElementPtrInst::Create(OtherPtr, Idx, Idx + 2, OtherPtr->getNameStr()+"."+Twine(i), - MI); + MI); uint64_t EltOffset; const PointerType *OtherPtrTy = cast(OtherPtr->getType()); if (const StructType *ST = @@ -962,7 +924,9 @@ CallInst::Create(TheFn, Ops, Ops + 4, "", MI); } } - DeadInsts.push_back(MI); + MI->eraseFromParent(); + if (PossiblyDead) + RecursivelyDeleteTriviallyDeadInstructions(PossiblyDead); } /// RewriteStoreUserOfWholeAlloca - We found a store of an integer that @@ -973,9 +937,15 @@ // Extract each element out of the integer according to its structure offset // and store the element value to the individual alloca. Value *SrcVal = SI->getOperand(0); - const Type *AllocaEltTy = AI->getAllocatedType(); + const Type *AllocaEltTy = AI->getType()->getElementType(); uint64_t AllocaSizeBits = TD->getTypeAllocSizeInBits(AllocaEltTy); + // If this isn't a store of an integer to the whole alloca, it may be a store + // to the first element. Just ignore the store in this case and normal SROA + // will handle it. + if (!isa(SrcVal->getType()) || + TD->getTypeAllocSizeInBits(SrcVal->getType()) != AllocaSizeBits) + return; // Handle tail padding by extending the operand if (TD->getTypeSizeInBits(SrcVal->getType()) != AllocaSizeBits) SrcVal = new ZExtInst(SrcVal, @@ -1080,7 +1050,7 @@ } } - DeadInsts.push_back(SI); + SI->eraseFromParent(); } /// RewriteLoadUserOfWholeAlloca - We found a load of the entire allocation to @@ -1089,9 +1059,16 @@ SmallVector &NewElts) { // Extract each element out of the NewElts according to its structure offset // and form the result value. - const Type *AllocaEltTy = AI->getAllocatedType(); + const Type *AllocaEltTy = AI->getType()->getElementType(); uint64_t AllocaSizeBits = TD->getTypeAllocSizeInBits(AllocaEltTy); + // If this isn't a load of the whole alloca to an integer, it may be a load + // of the first element. Just ignore the load in this case and normal SROA + // will handle it. + if (!isa(LI->getType()) || + TD->getTypeAllocSizeInBits(LI->getType()) != AllocaSizeBits) + return; + DEBUG(errs() << "PROMOTING LOAD OF WHOLE ALLOCA: " << *AI << '\n' << *LI << '\n'); @@ -1162,9 +1139,10 @@ ResultVal = new TruncInst(ResultVal, LI->getType(), "", LI); LI->replaceAllUsesWith(ResultVal); - DeadInsts.push_back(LI); + LI->eraseFromParent(); } + /// HasPadding - Return true if the specified type has any structure or /// alignment padding, false otherwise. static bool HasPadding(const Type *Ty, const TargetData &TD) { @@ -1214,10 +1192,14 @@ // the users are safe to transform. AllocaInfo Info; - isSafeForScalarRepl(AI, AI, 0, 0, Info); - if (Info.isUnsafe) { - DEBUG(errs() << "Cannot transform: " << *AI << '\n'); - return 0; + for (Value::use_iterator I = AI->use_begin(), E = AI->use_end(); + I != E; ++I) { + isSafeUseOfAllocation(cast(*I), AI, Info); + if (Info.isUnsafe) { + DEBUG(errs() << "Cannot transform: " << *AI << "\n due to user: " + << **I << '\n'); + return 0; + } } // Okay, we know all the users are promotable. If the aggregate is a memcpy @@ -1226,7 +1208,7 @@ // types, but may actually be used. In these cases, we refuse to promote the // struct. if (Info.isMemCpySrc && Info.isMemCpyDst && - HasPadding(AI->getAllocatedType(), *TD)) + HasPadding(AI->getType()->getElementType(), *TD)) return 0; // If we require cleanup, return 1, otherwise return 3. @@ -1263,15 +1245,15 @@ // Insert the new GEP instructions, which are properly indexed. SmallVector Indices(GEPI->op_begin()+1, GEPI->op_end()); Indices[1] = Constant::getNullValue(Type::getInt32Ty(GEPI->getContext())); - Value *ZeroIdx = GetElementPtrInst::CreateInBounds(GEPI->getOperand(0), - Indices.begin(), - Indices.end(), - GEPI->getName()+".0",GEPI); + Value *ZeroIdx = GetElementPtrInst::Create(GEPI->getOperand(0), + Indices.begin(), + Indices.end(), + GEPI->getName()+".0", GEPI); Indices[1] = ConstantInt::get(Type::getInt32Ty(GEPI->getContext()), 1); - Value *OneIdx = GetElementPtrInst::CreateInBounds(GEPI->getOperand(0), - Indices.begin(), - Indices.end(), - GEPI->getName()+".1", GEPI); + Value *OneIdx = GetElementPtrInst::Create(GEPI->getOperand(0), + Indices.begin(), + Indices.end(), + GEPI->getName()+".1", GEPI); // Replace all loads of the variable index GEP with loads from both // indexes and a select. while (!GEPI->use_empty()) { @@ -1282,24 +1264,22 @@ LI->replaceAllUsesWith(R); LI->eraseFromParent(); } + GEPI->eraseFromParent(); } + /// CleanupAllocaUsers - If SROA reported that it can promote the specified /// allocation, but only if cleaned up, perform the cleanups required. -void SROA::CleanupAllocaUsers(Value *V) { +void SROA::CleanupAllocaUsers(AllocaInst *AI) { // At this point, we know that the end result will be SROA'd and promoted, so // we can insert ugly code if required so long as sroa+mem2reg will clean it // up. - for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); + for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E; ) { User *U = *UI++; - if (isa(U)) { - CleanupAllocaUsers(U); - } else if (GetElementPtrInst *GEPI = dyn_cast(U)) { + if (GetElementPtrInst *GEPI = dyn_cast(U)) CleanupGEP(GEPI); - CleanupAllocaUsers(GEPI); - if (GEPI->use_empty()) GEPI->eraseFromParent(); - } else { + else { Instruction *I = cast(U); SmallVector DbgInUses; if (!isa(I) && OnlyUsedByDbgInfoIntrinsics(I, &DbgInUses)) { @@ -1415,7 +1395,7 @@ // Compute the offset that this GEP adds to the pointer. SmallVector Indices(GEP->op_begin()+1, GEP->op_end()); - uint64_t GEPOffset = TD->getIndexedOffset(GEP->getPointerOperandType(), + uint64_t GEPOffset = TD->getIndexedOffset(GEP->getOperand(0)->getType(), &Indices[0], Indices.size()); // See if all uses can be converted. if (!CanConvertToScalar(GEP, IsNotTrivial, VecTy, SawVec,Offset+GEPOffset, @@ -1477,7 +1457,7 @@ if (GetElementPtrInst *GEP = dyn_cast(User)) { // Compute the offset that this GEP adds to the pointer. SmallVector Indices(GEP->op_begin()+1, GEP->op_end()); - uint64_t GEPOffset = TD->getIndexedOffset(GEP->getPointerOperandType(), + uint64_t GEPOffset = TD->getIndexedOffset(GEP->getOperand(0)->getType(), &Indices[0], Indices.size()); ConvertUsesToScalar(GEP, NewAI, Offset+GEPOffset*8); GEP->eraseFromParent(); Removed: llvm/trunk/test/Transforms/ScalarRepl/2009-12-11-NeonTypes.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/ScalarRepl/2009-12-11-NeonTypes.ll?rev=91533&view=auto ============================================================================== --- llvm/trunk/test/Transforms/ScalarRepl/2009-12-11-NeonTypes.ll (original) +++ llvm/trunk/test/Transforms/ScalarRepl/2009-12-11-NeonTypes.ll (removed) @@ -1,89 +0,0 @@ -; RUN: opt < %s -scalarrepl -S | FileCheck %s -; Radar 7441282 - -target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32" -target triple = "thumbv7-apple-darwin10" - -%struct.__neon_int16x8x2_t = type { <8 x i16>, <8 x i16> } -%struct.int16x8_t = type { <8 x i16> } -%struct.int16x8x2_t = type { [2 x %struct.int16x8_t] } -%union..0anon = type { %struct.int16x8x2_t } - -define arm_apcscc void @test(<8 x i16> %tmp.0, %struct.int16x8x2_t* %dst) nounwind { -; CHECK: @test -; CHECK-NOT: alloca -; CHECK: "alloca point" -entry: - %tmp_addr = alloca %struct.int16x8_t ; <%struct.int16x8_t*> [#uses=3] - %dst_addr = alloca %struct.int16x8x2_t* ; <%struct.int16x8x2_t**> [#uses=2] - %__rv = alloca %union..0anon ; <%union..0anon*> [#uses=2] - %__bx = alloca %struct.int16x8_t ; <%struct.int16x8_t*> [#uses=2] - %__ax = alloca %struct.int16x8_t ; <%struct.int16x8_t*> [#uses=2] - %tmp2 = alloca %struct.int16x8x2_t ; <%struct.int16x8x2_t*> [#uses=2] - %0 = alloca %struct.int16x8x2_t ; <%struct.int16x8x2_t*> [#uses=2] - %"alloca point" = bitcast i32 0 to i32 ; [#uses=0] - %1 = getelementptr inbounds %struct.int16x8_t* %tmp_addr, i32 0, i32 0 ; <<8 x i16>*> [#uses=1] - store <8 x i16> %tmp.0, <8 x i16>* %1 - store %struct.int16x8x2_t* %dst, %struct.int16x8x2_t** %dst_addr - %2 = getelementptr inbounds %struct.int16x8_t* %__ax, i32 0, i32 0 ; <<8 x i16>*> [#uses=1] - %3 = getelementptr inbounds %struct.int16x8_t* %tmp_addr, i32 0, i32 0 ; <<8 x i16>*> [#uses=1] - %4 = load <8 x i16>* %3, align 16 ; <<8 x i16>> [#uses=1] - store <8 x i16> %4, <8 x i16>* %2, align 16 - %5 = getelementptr inbounds %struct.int16x8_t* %__bx, i32 0, i32 0 ; <<8 x i16>*> [#uses=1] - %6 = getelementptr inbounds %struct.int16x8_t* %tmp_addr, i32 0, i32 0 ; <<8 x i16>*> [#uses=1] - %7 = load <8 x i16>* %6, align 16 ; <<8 x i16>> [#uses=1] - store <8 x i16> %7, <8 x i16>* %5, align 16 - %8 = getelementptr inbounds %struct.int16x8_t* %__ax, i32 0, i32 0 ; <<8 x i16>*> [#uses=1] - %9 = load <8 x i16>* %8, align 16 ; <<8 x i16>> [#uses=2] - %10 = getelementptr inbounds %struct.int16x8_t* %__bx, i32 0, i32 0 ; <<8 x i16>*> [#uses=1] - %11 = load <8 x i16>* %10, align 16 ; <<8 x i16>> [#uses=2] - %12 = getelementptr inbounds %union..0anon* %__rv, i32 0, i32 0 ; <%struct.int16x8x2_t*> [#uses=1] - %13 = bitcast %struct.int16x8x2_t* %12 to %struct.__neon_int16x8x2_t* ; <%struct.__neon_int16x8x2_t*> [#uses=2] - %14 = shufflevector <8 x i16> %9, <8 x i16> %11, <8 x i32> ; <<8 x i16>> [#uses=1] - %15 = getelementptr inbounds %struct.__neon_int16x8x2_t* %13, i32 0, i32 0 ; <<8 x i16>*> [#uses=1] - store <8 x i16> %14, <8 x i16>* %15 - %16 = shufflevector <8 x i16> %9, <8 x i16> %11, <8 x i32> ; <<8 x i16>> [#uses=1] - %17 = getelementptr inbounds %struct.__neon_int16x8x2_t* %13, i32 0, i32 1 ; <<8 x i16>*> [#uses=1] - store <8 x i16> %16, <8 x i16>* %17 - %18 = getelementptr inbounds %union..0anon* %__rv, i32 0, i32 0 ; <%struct.int16x8x2_t*> [#uses=1] - %19 = bitcast %struct.int16x8x2_t* %0 to i8* ; [#uses=1] - %20 = bitcast %struct.int16x8x2_t* %18 to i8* ; [#uses=1] - call void @llvm.memcpy.i32(i8* %19, i8* %20, i32 32, i32 16) - %tmp21 = bitcast %struct.int16x8x2_t* %tmp2 to i8* ; [#uses=1] - %21 = bitcast %struct.int16x8x2_t* %0 to i8* ; [#uses=1] - call void @llvm.memcpy.i32(i8* %tmp21, i8* %21, i32 32, i32 16) - %22 = load %struct.int16x8x2_t** %dst_addr, align 4 ; <%struct.int16x8x2_t*> [#uses=1] - %23 = bitcast %struct.int16x8x2_t* %22 to i8* ; [#uses=1] - %tmp22 = bitcast %struct.int16x8x2_t* %tmp2 to i8* ; [#uses=1] - call void @llvm.memcpy.i32(i8* %23, i8* %tmp22, i32 32, i32 16) - br label %return - -; CHECK: store <8 x i16> -; CHECK: store <8 x i16> - -return: ; preds = %entry - ret void -} - -; Radar 7466574 -%struct._NSRange = type { i64 } - -define arm_apcscc void @test_memcpy_self() nounwind { -; CHECK: @test_memcpy_self -; CHECK-NOT: alloca -; CHECK: br i1 -entry: - %range = alloca %struct._NSRange ; <%struct._NSRange*> [#uses=2] - br i1 undef, label %cond.true, label %cond.false - -cond.true: ; preds = %entry - %tmp3 = bitcast %struct._NSRange* %range to i8* ; [#uses=1] - %tmp4 = bitcast %struct._NSRange* %range to i8* ; [#uses=1] - call void @llvm.memcpy.i32(i8* %tmp3, i8* %tmp4, i32 8, i32 8) - ret void - -cond.false: ; preds = %entry - ret void -} - -declare void @llvm.memcpy.i32(i8* nocapture, i8* nocapture, i32, i32) nounwind From daniel at zuster.org Wed Dec 16 04:57:29 2009 From: daniel at zuster.org (Daniel Dunbar) Date: Wed, 16 Dec 2009 02:57:29 -0800 Subject: [llvm-commits] [llvm] r91392 - in /llvm/trunk: include/llvm/CodeGen/SelectionDAG.h lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp lib/CodeGen/SelectionDAG/SelectionDAG.cpp lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp In-Reply-To: <200912150154.nBF1spun028126@zion.cs.uiuc.edu> References: <200912150154.nBF1spun028126@zion.cs.uiuc.edu> Message-ID: <6a8523d60912160257ia276cc5ya1e6a1437867ce79@mail.gmail.com> Hi Bill, I reverted this, it was one of the patches responsible for the failing x86_64-apple-darwin10 bootstrap in my testing. - Daniel On Mon, Dec 14, 2009 at 5:54 PM, Bill Wendling wrote: > Author: void > Date: Mon Dec 14 19:54:51 2009 > New Revision: 91392 > > URL: http://llvm.org/viewvc/llvm-project?rev=91392&view=rev > Log: > Initial work on disabling the scheduler. This is a work in progress, and this > stuff isn't used just yet. > > We want to model the GCC `-fno-schedule-insns' and `-fno-schedule-insns2' > flags. The hypothesis is that the people who use these flags know what they are > doing, and have hand-optimized the C code to reduce latencies and other > conflicts. > > The idea behind our scheme to turn off scheduling is to create a map "on the > side" during DAG generation. It will order the nodes by how they appeared in the > code. This map is then used during scheduling to get the ordering. > > Modified: > ? ?llvm/trunk/include/llvm/CodeGen/SelectionDAG.h > ? ?llvm/trunk/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp > ? ?llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp > ? ?llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp > > Modified: llvm/trunk/include/llvm/CodeGen/SelectionDAG.h > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/SelectionDAG.h?rev=91392&r1=91391&r2=91392&view=diff > > ============================================================================== > --- llvm/trunk/include/llvm/CodeGen/SelectionDAG.h (original) > +++ llvm/trunk/include/llvm/CodeGen/SelectionDAG.h Mon Dec 14 19:54:51 2009 > @@ -110,6 +110,46 @@ > ? /// SelectionDAG. > ? BumpPtrAllocator Allocator; > > + ?/// NodeOrdering - Assigns a "line number" value to each SDNode that > + ?/// corresponds to the "line number" of the original LLVM instruction. This > + ?/// used for turning off scheduling, because we'll forgo the normal scheduling > + ?/// algorithm and output the instructions according to this ordering. > + ?class NodeOrdering { > + ? ?/// LineNo - The line of the instruction the node corresponds to. A value of > + ? ?/// `0' means it's not assigned. > + ? ?unsigned LineNo; > + ? ?std::map Order; > + > + ? ?void operator=(const NodeOrdering&); // Do not implement. > + ? ?NodeOrdering(const NodeOrdering&); ? // Do not implement. > + ?public: > + ? ?NodeOrdering() : LineNo(0) {} > + > + ? ?void add(const SDNode *Node) { > + ? ? ?assert(LineNo && "Invalid line number!"); > + ? ? ?Order[Node] = LineNo; > + ? ?} > + ? ?void remove(const SDNode *Node) { > + ? ? ?std::map::iterator Itr = Order.find(Node); > + ? ? ?if (Itr != Order.end()) > + ? ? ? ?Order.erase(Itr); > + ? ?} > + ? ?void clear() { > + ? ? ?Order.clear(); > + ? ? ?LineNo = 1; > + ? ?} > + ? ?unsigned getLineNo(const SDNode *Node) { > + ? ? ?unsigned LN = Order[Node]; > + ? ? ?assert(LN && "Node isn't in ordering map!"); > + ? ? ?return LN; > + ? ?} > + ? ?void newInst() { > + ? ? ?++LineNo; > + ? ?} > + > + ? ?void dump() const; > + ?} *Ordering; > + > ? /// VerifyNode - Sanity check the given node. ?Aborts if it is invalid. > ? void VerifyNode(SDNode *N); > > @@ -120,6 +160,9 @@ > ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? DenseSet &visited, > ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? int level, bool &printed); > > + ?void operator=(const SelectionDAG&); // Do not implement. > + ?SelectionDAG(const SelectionDAG&); ? // Do not implement. > + > ?public: > ? SelectionDAG(TargetLowering &tli, FunctionLoweringInfo &fli); > ? ~SelectionDAG(); > @@ -199,6 +242,13 @@ > ? ? return Root = N; > ? } > > + ?/// NewInst - Tell the ordering object that we're processing a new > + ?/// instruction. > + ?void NewInst() { > + ? ?if (Ordering) > + ? ? ?Ordering->newInst(); > + ?} > + > ? /// Combine - This iterates over the nodes in the SelectionDAG, folding > ? /// certain types of nodes together, or eliminating superfluous nodes. ?The > ? /// Level argument controls whether Combine is allowed to produce nodes and > > Modified: llvm/trunk/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp?rev=91392&r1=91391&r2=91392&view=diff > > ============================================================================== > --- llvm/trunk/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp (original) > +++ llvm/trunk/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp Mon Dec 14 19:54:51 2009 > @@ -20,10 +20,16 @@ > ?#include "llvm/Target/TargetInstrInfo.h" > ?#include "llvm/Target/TargetRegisterInfo.h" > ?#include "llvm/Target/TargetSubtarget.h" > +#include "llvm/Support/CommandLine.h" > ?#include "llvm/Support/Debug.h" > ?#include "llvm/Support/raw_ostream.h" > ?using namespace llvm; > > +cl::opt > +DisableInstScheduling("disable-inst-scheduling", > + ? ? ? ? ? ? ? ? ? ? ?cl::init(false), > + ? ? ? ? ? ? ? ? ? ? ?cl::desc("Disable instruction scheduling")); > + > ?ScheduleDAGSDNodes::ScheduleDAGSDNodes(MachineFunction &mf) > ? : ScheduleDAG(mf) { > ?} > > Modified: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp?rev=91392&r1=91391&r2=91392&view=diff > > ============================================================================== > --- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp (original) > +++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp Mon Dec 14 19:54:51 2009 > @@ -48,6 +48,8 @@ > ?#include > ?using namespace llvm; > > +extern cl::opt DisableInstScheduling; > + > ?/// makeVTList - Return an instance of the SDVTList struct initialized with the > ?/// specified members. > ?static SDVTList makeVTList(const EVT *VTs, unsigned NumVTs) { > @@ -552,6 +554,9 @@ > ? ? } > > ? ? DeallocateNode(N); > + > + ? ?// Remove the ordering of this node. > + ? ?if (Ordering) Ordering->remove(N); > ? } > ?} > > @@ -577,6 +582,9 @@ > ? N->DropOperands(); > > ? DeallocateNode(N); > + > + ?// Remove the ordering of this node. > + ?if (Ordering) Ordering->remove(N); > ?} > > ?void SelectionDAG::DeallocateNode(SDNode *N) { > @@ -588,6 +596,9 @@ > ? N->NodeType = ISD::DELETED_NODE; > > ? NodeAllocator.Deallocate(AllNodes.remove(N)); > + > + ?// Remove the ordering of this node. > + ?if (Ordering) Ordering->remove(N); > ?} > > ?/// RemoveNodeFromCSEMaps - Take the specified node out of the CSE map that > @@ -691,7 +702,9 @@ > ? FoldingSetNodeID ID; > ? AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops, 1); > ? AddNodeIDCustom(ID, N); > - ?return CSEMap.FindNodeOrInsertPos(ID, InsertPos); > + ?SDNode *Node = CSEMap.FindNodeOrInsertPos(ID, InsertPos); > + ?if (Ordering) Ordering->remove(Node); > + ?return Node; > ?} > > ?/// FindModifiedNodeSlot - Find a slot for the specified node if its operands > @@ -708,7 +721,9 @@ > ? FoldingSetNodeID ID; > ? AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops, 2); > ? AddNodeIDCustom(ID, N); > - ?return CSEMap.FindNodeOrInsertPos(ID, InsertPos); > + ?SDNode *Node = CSEMap.FindNodeOrInsertPos(ID, InsertPos); > + ?if (Ordering) Ordering->remove(Node); > + ?return Node; > ?} > > > @@ -725,7 +740,9 @@ > ? FoldingSetNodeID ID; > ? AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops, NumOps); > ? AddNodeIDCustom(ID, N); > - ?return CSEMap.FindNodeOrInsertPos(ID, InsertPos); > + ?SDNode *Node = CSEMap.FindNodeOrInsertPos(ID, InsertPos); > + ?if (Ordering) Ordering->remove(Node); > + ?return Node; > ?} > > ?/// VerifyNode - Sanity check the given node. ?Aborts if it is invalid. > @@ -778,8 +795,13 @@ > ?SelectionDAG::SelectionDAG(TargetLowering &tli, FunctionLoweringInfo &fli) > ? : TLI(tli), FLI(fli), DW(0), > ? ? EntryNode(ISD::EntryToken, DebugLoc::getUnknownLoc(), > - ? ?getVTList(MVT::Other)), Root(getEntryNode()) { > + ? ? ? ? ? ? ?getVTList(MVT::Other)), > + ? ?Root(getEntryNode()), Ordering(0) { > ? AllNodes.push_back(&EntryNode); > + ?if (DisableInstScheduling) { > + ? ?Ordering = new NodeOrdering(); > + ? ?Ordering->add(&EntryNode); > + ?} > ?} > > ?void SelectionDAG::init(MachineFunction &mf, MachineModuleInfo *mmi, > @@ -792,6 +814,7 @@ > > ?SelectionDAG::~SelectionDAG() { > ? allnodes_clear(); > + ?delete Ordering; > ?} > > ?void SelectionDAG::allnodes_clear() { > @@ -817,6 +840,10 @@ > ? EntryNode.UseList = 0; > ? AllNodes.push_back(&EntryNode); > ? Root = getEntryNode(); > + ?if (DisableInstScheduling) { > + ? ?Ordering = new NodeOrdering(); > + ? ?Ordering->add(&EntryNode); > + ?} > ?} > > ?SDValue SelectionDAG::getSExtOrTrunc(SDValue Op, DebugLoc DL, EVT VT) { > @@ -877,14 +904,17 @@ > ? ID.AddPointer(&Val); > ? void *IP = 0; > ? SDNode *N = NULL; > - ?if ((N = CSEMap.FindNodeOrInsertPos(ID, IP))) > + ?if ((N = CSEMap.FindNodeOrInsertPos(ID, IP))) { > + ? ?if (Ordering) Ordering->add(N); > ? ? if (!VT.isVector()) > ? ? ? return SDValue(N, 0); > + ?} > ? if (!N) { > ? ? N = NodeAllocator.Allocate(); > ? ? new (N) ConstantSDNode(isT, &Val, EltVT); > ? ? CSEMap.InsertNode(N, IP); > ? ? AllNodes.push_back(N); > + ? ?if (Ordering) Ordering->add(N); > ? } > > ? SDValue Result(N, 0); > @@ -921,14 +951,17 @@ > ? ID.AddPointer(&V); > ? void *IP = 0; > ? SDNode *N = NULL; > - ?if ((N = CSEMap.FindNodeOrInsertPos(ID, IP))) > + ?if ((N = CSEMap.FindNodeOrInsertPos(ID, IP))) { > + ? ?if (Ordering) Ordering->add(N); > ? ? if (!VT.isVector()) > ? ? ? return SDValue(N, 0); > + ?} > ? if (!N) { > ? ? N = NodeAllocator.Allocate(); > ? ? new (N) ConstantFPSDNode(isTarget, &V, EltVT); > ? ? CSEMap.InsertNode(N, IP); > ? ? AllNodes.push_back(N); > + ? ?if (Ordering) Ordering->add(N); > ? } > > ? SDValue Result(N, 0); > @@ -983,12 +1016,15 @@ > ? ID.AddInteger(Offset); > ? ID.AddInteger(TargetFlags); > ? void *IP = 0; > - ?if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) > + ?if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { > + ? ?if (Ordering) Ordering->add(E); > ? ? return SDValue(E, 0); > + ?} > ? SDNode *N = NodeAllocator.Allocate(); > ? new (N) GlobalAddressSDNode(Opc, GV, VT, Offset, TargetFlags); > ? CSEMap.InsertNode(N, IP); > ? AllNodes.push_back(N); > + ?if (Ordering) Ordering->add(N); > ? return SDValue(N, 0); > ?} > > @@ -998,12 +1034,15 @@ > ? AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0); > ? ID.AddInteger(FI); > ? void *IP = 0; > - ?if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) > + ?if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { > + ? ?if (Ordering) Ordering->add(E); > ? ? return SDValue(E, 0); > + ?} > ? SDNode *N = NodeAllocator.Allocate(); > ? new (N) FrameIndexSDNode(FI, VT, isTarget); > ? CSEMap.InsertNode(N, IP); > ? AllNodes.push_back(N); > + ?if (Ordering) Ordering->add(N); > ? return SDValue(N, 0); > ?} > > @@ -1017,12 +1056,15 @@ > ? ID.AddInteger(JTI); > ? ID.AddInteger(TargetFlags); > ? void *IP = 0; > - ?if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) > + ?if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { > + ? ?if (Ordering) Ordering->add(E); > ? ? return SDValue(E, 0); > + ?} > ? SDNode *N = NodeAllocator.Allocate(); > ? new (N) JumpTableSDNode(JTI, VT, isTarget, TargetFlags); > ? CSEMap.InsertNode(N, IP); > ? AllNodes.push_back(N); > + ?if (Ordering) Ordering->add(N); > ? return SDValue(N, 0); > ?} > > @@ -1042,12 +1084,15 @@ > ? ID.AddPointer(C); > ? ID.AddInteger(TargetFlags); > ? void *IP = 0; > - ?if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) > + ?if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { > + ? ?if (Ordering) Ordering->add(E); > ? ? return SDValue(E, 0); > + ?} > ? SDNode *N = NodeAllocator.Allocate(); > ? new (N) ConstantPoolSDNode(isTarget, C, VT, Offset, Alignment, TargetFlags); > ? CSEMap.InsertNode(N, IP); > ? AllNodes.push_back(N); > + ?if (Ordering) Ordering->add(N); > ? return SDValue(N, 0); > ?} > > @@ -1068,12 +1113,15 @@ > ? C->AddSelectionDAGCSEId(ID); > ? ID.AddInteger(TargetFlags); > ? void *IP = 0; > - ?if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) > + ?if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { > + ? ?if (Ordering) Ordering->add(E); > ? ? return SDValue(E, 0); > + ?} > ? SDNode *N = NodeAllocator.Allocate(); > ? new (N) ConstantPoolSDNode(isTarget, C, VT, Offset, Alignment, TargetFlags); > ? CSEMap.InsertNode(N, IP); > ? AllNodes.push_back(N); > + ?if (Ordering) Ordering->add(N); > ? return SDValue(N, 0); > ?} > > @@ -1082,12 +1130,15 @@ > ? AddNodeIDNode(ID, ISD::BasicBlock, getVTList(MVT::Other), 0, 0); > ? ID.AddPointer(MBB); > ? void *IP = 0; > - ?if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) > + ?if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { > + ? ?if (Ordering) Ordering->add(E); > ? ? return SDValue(E, 0); > + ?} > ? SDNode *N = NodeAllocator.Allocate(); > ? new (N) BasicBlockSDNode(MBB); > ? CSEMap.InsertNode(N, IP); > ? AllNodes.push_back(N); > + ?if (Ordering) Ordering->add(N); > ? return SDValue(N, 0); > ?} > > @@ -1103,6 +1154,7 @@ > ? N = NodeAllocator.Allocate(); > ? new (N) VTSDNode(VT); > ? AllNodes.push_back(N); > + ?if (Ordering) Ordering->add(N); > ? return SDValue(N, 0); > ?} > > @@ -1112,6 +1164,7 @@ > ? N = NodeAllocator.Allocate(); > ? new (N) ExternalSymbolSDNode(false, Sym, 0, VT); > ? AllNodes.push_back(N); > + ?if (Ordering) Ordering->add(N); > ? return SDValue(N, 0); > ?} > > @@ -1124,6 +1177,7 @@ > ? N = NodeAllocator.Allocate(); > ? new (N) ExternalSymbolSDNode(true, Sym, TargetFlags, VT); > ? AllNodes.push_back(N); > + ?if (Ordering) Ordering->add(N); > ? return SDValue(N, 0); > ?} > > @@ -1136,6 +1190,7 @@ > ? ? new (N) CondCodeSDNode(Cond); > ? ? CondCodeNodes[Cond] = N; > ? ? AllNodes.push_back(N); > + ? ?if (Ordering) Ordering->add(N); > ? } > ? return SDValue(CondCodeNodes[Cond], 0); > ?} > @@ -1228,8 +1283,10 @@ > ? ? ID.AddInteger(MaskVec[i]); > > ? void* IP = 0; > - ?if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) > + ?if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { > + ? ?if (Ordering) Ordering->add(E); > ? ? return SDValue(E, 0); > + ?} > > ? // Allocate the mask array for the node out of the BumpPtrAllocator, since > ? // SDNode doesn't have access to it. ?This memory will be "leaked" when > @@ -1241,6 +1298,7 @@ > ? new (N) ShuffleVectorSDNode(VT, dl, N1, N2, MaskAlloc); > ? CSEMap.InsertNode(N, IP); > ? AllNodes.push_back(N); > + ?if (Ordering) Ordering->add(N); > ? return SDValue(N, 0); > ?} > > @@ -1258,12 +1316,15 @@ > ? SDValue Ops[] = { Val, DTy, STy, Rnd, Sat }; > ? AddNodeIDNode(ID, ISD::CONVERT_RNDSAT, getVTList(VT), &Ops[0], 5); > ? void* IP = 0; > - ?if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) > + ?if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { > + ? ?if (Ordering) Ordering->add(E); > ? ? return SDValue(E, 0); > + ?} > ? CvtRndSatSDNode *N = NodeAllocator.Allocate(); > ? new (N) CvtRndSatSDNode(VT, dl, Ops, 5, Code); > ? CSEMap.InsertNode(N, IP); > ? AllNodes.push_back(N); > + ?if (Ordering) Ordering->add(N); > ? return SDValue(N, 0); > ?} > > @@ -1272,12 +1333,15 @@ > ? AddNodeIDNode(ID, ISD::Register, getVTList(VT), 0, 0); > ? ID.AddInteger(RegNo); > ? void *IP = 0; > - ?if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) > + ?if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { > + ? ?if (Ordering) Ordering->add(E); > ? ? return SDValue(E, 0); > + ?} > ? SDNode *N = NodeAllocator.Allocate(); > ? new (N) RegisterSDNode(RegNo, VT); > ? CSEMap.InsertNode(N, IP); > ? AllNodes.push_back(N); > + ?if (Ordering) Ordering->add(N); > ? return SDValue(N, 0); > ?} > > @@ -1289,12 +1353,15 @@ > ? AddNodeIDNode(ID, Opcode, getVTList(MVT::Other), &Ops[0], 1); > ? ID.AddInteger(LabelID); > ? void *IP = 0; > - ?if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) > + ?if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { > + ? ?if (Ordering) Ordering->add(E); > ? ? return SDValue(E, 0); > + ?} > ? SDNode *N = NodeAllocator.Allocate(); > ? new (N) LabelSDNode(Opcode, dl, Root, LabelID); > ? CSEMap.InsertNode(N, IP); > ? AllNodes.push_back(N); > + ?if (Ordering) Ordering->add(N); > ? return SDValue(N, 0); > ?} > > @@ -1308,12 +1375,15 @@ > ? ID.AddPointer(BA); > ? ID.AddInteger(TargetFlags); > ? void *IP = 0; > - ?if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) > + ?if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { > + ? ?if (Ordering) Ordering->add(E); > ? ? return SDValue(E, 0); > + ?} > ? SDNode *N = NodeAllocator.Allocate(); > ? new (N) BlockAddressSDNode(Opc, VT, BA, TargetFlags); > ? CSEMap.InsertNode(N, IP); > ? AllNodes.push_back(N); > + ?if (Ordering) Ordering->add(N); > ? return SDValue(N, 0); > ?} > > @@ -1326,13 +1396,16 @@ > ? ID.AddPointer(V); > > ? void *IP = 0; > - ?if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) > + ?if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { > + ? ?if (Ordering) Ordering->add(E); > ? ? return SDValue(E, 0); > + ?} > > ? SDNode *N = NodeAllocator.Allocate(); > ? new (N) SrcValueSDNode(V); > ? CSEMap.InsertNode(N, IP); > ? AllNodes.push_back(N); > + ?if (Ordering) Ordering->add(N); > ? return SDValue(N, 0); > ?} > > @@ -2243,13 +2316,16 @@ > ? FoldingSetNodeID ID; > ? AddNodeIDNode(ID, Opcode, getVTList(VT), 0, 0); > ? void *IP = 0; > - ?if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) > + ?if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { > + ? ?if (Ordering) Ordering->add(E); > ? ? return SDValue(E, 0); > + ?} > ? SDNode *N = NodeAllocator.Allocate(); > ? new (N) SDNode(Opcode, DL, getVTList(VT)); > ? CSEMap.InsertNode(N, IP); > > ? AllNodes.push_back(N); > + ?if (Ordering) Ordering->add(N); > ?#ifndef NDEBUG > ? VerifyNode(N); > ?#endif > @@ -2473,8 +2549,10 @@ > ? ? SDValue Ops[1] = { Operand }; > ? ? AddNodeIDNode(ID, Opcode, VTs, Ops, 1); > ? ? void *IP = 0; > - ? ?if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) > + ? ?if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { > + ? ? ?if (Ordering) Ordering->add(E); > ? ? ? return SDValue(E, 0); > + ? ?} > ? ? N = NodeAllocator.Allocate(); > ? ? new (N) UnarySDNode(Opcode, DL, VTs, Operand); > ? ? CSEMap.InsertNode(N, IP); > @@ -2484,6 +2562,7 @@ > ? } > > ? AllNodes.push_back(N); > + ?if (Ordering) Ordering->add(N); > ?#ifndef NDEBUG > ? VerifyNode(N); > ?#endif > @@ -2891,8 +2970,10 @@ > ? ? FoldingSetNodeID ID; > ? ? AddNodeIDNode(ID, Opcode, VTs, Ops, 2); > ? ? void *IP = 0; > - ? ?if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) > + ? ?if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { > + ? ? ?if (Ordering) Ordering->add(E); > ? ? ? return SDValue(E, 0); > + ? ?} > ? ? N = NodeAllocator.Allocate(); > ? ? new (N) BinarySDNode(Opcode, DL, VTs, N1, N2); > ? ? CSEMap.InsertNode(N, IP); > @@ -2902,6 +2983,7 @@ > ? } > > ? AllNodes.push_back(N); > + ?if (Ordering) Ordering->add(N); > ?#ifndef NDEBUG > ? VerifyNode(N); > ?#endif > @@ -2968,8 +3050,10 @@ > ? ? FoldingSetNodeID ID; > ? ? AddNodeIDNode(ID, Opcode, VTs, Ops, 3); > ? ? void *IP = 0; > - ? ?if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) > + ? ?if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { > + ? ? ?if (Ordering) Ordering->add(E); > ? ? ? return SDValue(E, 0); > + ? ?} > ? ? N = NodeAllocator.Allocate(); > ? ? new (N) TernarySDNode(Opcode, DL, VTs, N1, N2, N3); > ? ? CSEMap.InsertNode(N, IP); > @@ -2977,7 +3061,9 @@ > ? ? N = NodeAllocator.Allocate(); > ? ? new (N) TernarySDNode(Opcode, DL, VTs, N1, N2, N3); > ? } > + > ? AllNodes.push_back(N); > + ?if (Ordering) Ordering->add(N); > ?#ifndef NDEBUG > ? VerifyNode(N); > ?#endif > @@ -3573,12 +3659,14 @@ > ? void* IP = 0; > ? if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { > ? ? cast(E)->refineAlignment(MMO); > + ? ?if (Ordering) Ordering->add(E); > ? ? return SDValue(E, 0); > ? } > ? SDNode* N = NodeAllocator.Allocate(); > ? new (N) AtomicSDNode(Opcode, dl, VTs, MemVT, Chain, Ptr, Cmp, Swp, MMO); > ? CSEMap.InsertNode(N, IP); > ? AllNodes.push_back(N); > + ?if (Ordering) Ordering->add(N); > ? return SDValue(N, 0); > ?} > > @@ -3636,12 +3724,14 @@ > ? void* IP = 0; > ? if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { > ? ? cast(E)->refineAlignment(MMO); > + ? ?if (Ordering) Ordering->add(E); > ? ? return SDValue(E, 0); > ? } > ? SDNode* N = NodeAllocator.Allocate(); > ? new (N) AtomicSDNode(Opcode, dl, VTs, MemVT, Chain, Ptr, Val, MMO); > ? CSEMap.InsertNode(N, IP); > ? AllNodes.push_back(N); > + ?if (Ordering) Ordering->add(N); > ? return SDValue(N, 0); > ?} > > @@ -3714,6 +3804,7 @@ > ? ? void *IP = 0; > ? ? if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { > ? ? ? cast(E)->refineAlignment(MMO); > + ? ? ?if (Ordering) Ordering->add(E); > ? ? ? return SDValue(E, 0); > ? ? } > > @@ -3725,6 +3816,7 @@ > ? ? new (N) MemIntrinsicSDNode(Opcode, dl, VTList, Ops, NumOps, MemVT, MMO); > ? } > ? AllNodes.push_back(N); > + ?if (Ordering) Ordering->add(N); > ? return SDValue(N, 0); > ?} > > @@ -3789,12 +3881,14 @@ > ? void *IP = 0; > ? if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { > ? ? cast(E)->refineAlignment(MMO); > + ? ?if (Ordering) Ordering->add(E); > ? ? return SDValue(E, 0); > ? } > ? SDNode *N = NodeAllocator.Allocate(); > ? new (N) LoadSDNode(Ops, dl, VTs, AM, ExtType, MemVT, MMO); > ? CSEMap.InsertNode(N, IP); > ? AllNodes.push_back(N); > + ?if (Ordering) Ordering->add(N); > ? return SDValue(N, 0); > ?} > > @@ -3865,12 +3959,14 @@ > ? void *IP = 0; > ? if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { > ? ? cast(E)->refineAlignment(MMO); > + ? ?if (Ordering) Ordering->add(E); > ? ? return SDValue(E, 0); > ? } > ? SDNode *N = NodeAllocator.Allocate(); > ? new (N) StoreSDNode(Ops, dl, VTs, ISD::UNINDEXED, false, VT, MMO); > ? CSEMap.InsertNode(N, IP); > ? AllNodes.push_back(N); > + ?if (Ordering) Ordering->add(N); > ? return SDValue(N, 0); > ?} > > @@ -3925,12 +4021,14 @@ > ? void *IP = 0; > ? if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { > ? ? cast(E)->refineAlignment(MMO); > + ? ?if (Ordering) Ordering->add(E); > ? ? return SDValue(E, 0); > ? } > ? SDNode *N = NodeAllocator.Allocate(); > ? new (N) StoreSDNode(Ops, dl, VTs, ISD::UNINDEXED, true, SVT, MMO); > ? CSEMap.InsertNode(N, IP); > ? AllNodes.push_back(N); > + ?if (Ordering) Ordering->add(N); > ? return SDValue(N, 0); > ?} > > @@ -3947,14 +4045,17 @@ > ? ID.AddInteger(ST->getMemoryVT().getRawBits()); > ? ID.AddInteger(ST->getRawSubclassData()); > ? void *IP = 0; > - ?if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) > + ?if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { > + ? ?if (Ordering) Ordering->add(E); > ? ? return SDValue(E, 0); > + ?} > ? SDNode *N = NodeAllocator.Allocate(); > ? new (N) StoreSDNode(Ops, dl, VTs, AM, > ? ? ? ? ? ? ? ? ? ? ? ST->isTruncatingStore(), ST->getMemoryVT(), > ? ? ? ? ? ? ? ? ? ? ? ST->getMemOperand()); > ? CSEMap.InsertNode(N, IP); > ? AllNodes.push_back(N); > + ?if (Ordering) Ordering->add(N); > ? return SDValue(N, 0); > ?} > > @@ -4020,8 +4121,10 @@ > ? ? AddNodeIDNode(ID, Opcode, VTs, Ops, NumOps); > ? ? void *IP = 0; > > - ? ?if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) > + ? ?if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { > + ? ? ?if (Ordering) Ordering->add(E); > ? ? ? return SDValue(E, 0); > + ? ?} > > ? ? N = NodeAllocator.Allocate(); > ? ? new (N) SDNode(Opcode, DL, VTs, Ops, NumOps); > @@ -4032,6 +4135,7 @@ > ? } > > ? AllNodes.push_back(N); > + ?if (Ordering) Ordering->add(N); > ?#ifndef NDEBUG > ? VerifyNode(N); > ?#endif > @@ -4087,8 +4191,10 @@ > ? ? FoldingSetNodeID ID; > ? ? AddNodeIDNode(ID, Opcode, VTList, Ops, NumOps); > ? ? void *IP = 0; > - ? ?if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) > + ? ?if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { > + ? ? ?if (Ordering) Ordering->add(E); > ? ? ? return SDValue(E, 0); > + ? ?} > ? ? if (NumOps == 1) { > ? ? ? N = NodeAllocator.Allocate(); > ? ? ? new (N) UnarySDNode(Opcode, DL, VTList, Ops[0]); > @@ -4119,6 +4225,7 @@ > ? ? } > ? } > ? AllNodes.push_back(N); > + ?if (Ordering) Ordering->add(N); > ?#ifndef NDEBUG > ? VerifyNode(N); > ?#endif > @@ -4581,8 +4688,10 @@ > ? if (VTs.VTs[VTs.NumVTs-1] != MVT::Flag) { > ? ? FoldingSetNodeID ID; > ? ? AddNodeIDNode(ID, Opc, VTs, Ops, NumOps); > - ? ?if (SDNode *ON = CSEMap.FindNodeOrInsertPos(ID, IP)) > + ? ?if (SDNode *ON = CSEMap.FindNodeOrInsertPos(ID, IP)) { > + ? ? ?if (Ordering) Ordering->add(ON); > ? ? ? return ON; > + ? ?} > ? } > > ? if (!RemoveNodeFromCSEMaps(N)) > @@ -4646,6 +4755,7 @@ > > ? if (IP) > ? ? CSEMap.InsertNode(N, IP); ? // Memoize the new node. > + ?if (Ordering) Ordering->add(N); > ? return N; > ?} > > @@ -4784,8 +4894,10 @@ > ? ? FoldingSetNodeID ID; > ? ? AddNodeIDNode(ID, ~Opcode, VTs, Ops, NumOps); > ? ? IP = 0; > - ? ?if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) > + ? ?if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { > + ? ? ?if (Ordering) Ordering->add(E); > ? ? ? return cast(E); > + ? ?} > ? } > > ? // Allocate a new MachineSDNode. > @@ -4807,6 +4919,7 @@ > ? ? CSEMap.InsertNode(N, IP); > > ? AllNodes.push_back(N); > + ?if (Ordering) Ordering->add(N); > ?#ifndef NDEBUG > ? VerifyNode(N); > ?#endif > @@ -4843,8 +4956,10 @@ > ? ? FoldingSetNodeID ID; > ? ? AddNodeIDNode(ID, Opcode, VTList, Ops, NumOps); > ? ? void *IP = 0; > - ? ?if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) > + ? ?if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { > + ? ? ?if (Ordering) Ordering->add(E); > ? ? ? return E; > + ? ?} > ? } > ? return NULL; > ?} > @@ -6011,6 +6126,9 @@ > ? errs() << "\n\n"; > ?} > > +void SelectionDAG::NodeOrdering::dump() const { > +} > + > ?void SDNode::printr(raw_ostream &OS, const SelectionDAG *G) const { > ? print_types(OS, G); > ? print_details(OS, G); > @@ -6151,4 +6269,3 @@ > ? ? ? return false; > ? return true; > ?} > - > > Modified: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp?rev=91392&r1=91391&r2=91392&view=diff > > ============================================================================== > --- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp (original) > +++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp Mon Dec 14 19:54:51 2009 > @@ -583,6 +583,9 @@ > ?} > > ?void SelectionDAGBuilder::visit(unsigned Opcode, User &I) { > + ?// Tell the DAG that we're processing a new instruction. > + ?DAG.NewInst(); > + > ? // Note: this doesn't use InstVisitor, because it has to work with > ? // ConstantExpr's in addition to instructions. > ? switch (Opcode) { > > > _______________________________________________ > llvm-commits mailing list > llvm-commits at cs.uiuc.edu > http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits > From daniel at zuster.org Wed Dec 16 04:58:08 2009 From: daniel at zuster.org (Daniel Dunbar) Date: Wed, 16 Dec 2009 02:58:08 -0800 Subject: [llvm-commits] [llvm] r91459 - in /llvm/trunk: lib/Transforms/Scalar/ScalarReplAggregates.cpp test/Transforms/ScalarRepl/2009-12-11-NeonTypes.ll In-Reply-To: <200912152200.nBFM0qqN023245@zion.cs.uiuc.edu> References: <200912152200.nBFM0qqN023245@zion.cs.uiuc.edu> Message-ID: <6a8523d60912160258p2d1b3458qcf76cabb26a1a5d0@mail.gmail.com> Hi Bob, I reverted this, it was one of the patches responsible for the failing x86_64-apple-darwin10 bootstrap in my testing. - Daniel On Tue, Dec 15, 2009 at 2:00 PM, Bob Wilson wrote: > Author: bwilson > Date: Tue Dec 15 16:00:51 2009 > New Revision: 91459 > > URL: http://llvm.org/viewvc/llvm-project?rev=91459&view=rev > Log: > Reapply 91184 with fixes and an addition to the testcase to cover the problem > found last time. ?Instead of trying to modify the IR while iterating over it, > I've change it to keep a list of WeakVH references to dead instructions, and > then delete those instructions later. ?I also added some special case code to > detect and handle the situation when both operands of a memcpy intrinsic are > referencing the same alloca. > > Added: > ? ?llvm/trunk/test/Transforms/ScalarRepl/2009-12-11-NeonTypes.ll > Modified: > ? ?llvm/trunk/lib/Transforms/Scalar/ScalarReplAggregates.cpp > > Modified: llvm/trunk/lib/Transforms/Scalar/ScalarReplAggregates.cpp > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/ScalarReplAggregates.cpp?rev=91459&r1=91458&r2=91459&view=diff > > ============================================================================== > --- llvm/trunk/lib/Transforms/Scalar/ScalarReplAggregates.cpp (original) > +++ llvm/trunk/lib/Transforms/Scalar/ScalarReplAggregates.cpp Tue Dec 15 16:00:51 2009 > @@ -74,6 +74,10 @@ > ? private: > ? ? TargetData *TD; > > + ? ?/// DeadInsts - Keep track of instructions we have made dead, so that > + ? ?/// we can remove them after we are done working. > + ? ?SmallVector DeadInsts; > + > ? ? /// AllocaInfo - When analyzing uses of an alloca instruction, this captures > ? ? /// information about the uses. ?All these fields are initialized to false > ? ? /// and set to true when something is learned. > @@ -102,25 +106,30 @@ > > ? ? int isSafeAllocaToScalarRepl(AllocaInst *AI); > > - ? ?void isSafeUseOfAllocation(Instruction *User, AllocaInst *AI, > - ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? AllocaInfo &Info); > - ? ?void isSafeElementUse(Value *Ptr, bool isFirstElt, AllocaInst *AI, > - ? ? ? ? ? ? ? ? ? ? ? ? ?AllocaInfo &Info); > - ? ?void isSafeMemIntrinsicOnAllocation(MemIntrinsic *MI, AllocaInst *AI, > - ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?unsigned OpNo, AllocaInfo &Info); > - ? ?void isSafeUseOfBitCastedAllocation(BitCastInst *User, AllocaInst *AI, > - ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?AllocaInfo &Info); > + ? ?void isSafeForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset, > + ? ? ? ? ? ? ? ? ? ? ? ? ? ? uint64_t ArrayOffset, AllocaInfo &Info); > + ? ?void isSafeGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t &Offset, > + ? ? ? ? ? ? ? ? ? uint64_t &ArrayOffset, AllocaInfo &Info); > + ? ?void isSafeMemAccess(AllocaInst *AI, uint64_t Offset, uint64_t ArrayOffset, > + ? ? ? ? ? ? ? ? ? ? ? ? uint64_t MemSize, const Type *MemOpType, bool isStore, > + ? ? ? ? ? ? ? ? ? ? ? ? AllocaInfo &Info); > + ? ?bool TypeHasComponent(const Type *T, uint64_t Offset, uint64_t Size); > + ? ?unsigned FindElementAndOffset(const Type *&T, uint64_t &Offset); > > ? ? void DoScalarReplacement(AllocaInst *AI, > ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?std::vector &WorkList); > + ? ?void DeleteDeadInstructions(); > ? ? void CleanupGEP(GetElementPtrInst *GEP); > - ? ?void CleanupAllocaUsers(AllocaInst *AI); > + ? ?void CleanupAllocaUsers(Value *V); > ? ? AllocaInst *AddNewAlloca(Function &F, const Type *Ty, AllocaInst *Base); > > - ? ?void RewriteBitCastUserOfAlloca(Instruction *BCInst, AllocaInst *AI, > - ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?SmallVector &NewElts); > - > - ? ?void RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *BCInst, > + ? ?void RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset, > + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?SmallVector &NewElts); > + ? ?void RewriteBitCast(BitCastInst *BC, AllocaInst *AI, uint64_t Offset, > + ? ? ? ? ? ? ? ? ? ? ? ?SmallVector &NewElts); > + ? ?void RewriteGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t Offset, > + ? ? ? ? ? ? ? ? ? ?SmallVector &NewElts); > + ? ?void RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst, > ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? AllocaInst *AI, > ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? SmallVector &NewElts); > ? ? void RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI, > @@ -360,176 +369,37 @@ > ? ? } > ? } > > - ?// Now that we have created the alloca instructions that we want to use, > - ?// expand the getelementptr instructions to use them. > - ?while (!AI->use_empty()) { > - ? ?Instruction *User = cast(AI->use_back()); > - ? ?if (BitCastInst *BCInst = dyn_cast(User)) { > - ? ? ?RewriteBitCastUserOfAlloca(BCInst, AI, ElementAllocas); > - ? ? ?BCInst->eraseFromParent(); > - ? ? ?continue; > - ? ?} > - > - ? ?// Replace: > - ? ?// ? %res = load { i32, i32 }* %alloc > - ? ?// with: > - ? ?// ? %load.0 = load i32* %alloc.0 > - ? ?// ? %insert.0 insertvalue { i32, i32 } zeroinitializer, i32 %load.0, 0 > - ? ?// ? %load.1 = load i32* %alloc.1 > - ? ?// ? %insert = insertvalue { i32, i32 } %insert.0, i32 %load.1, 1 > - ? ?// (Also works for arrays instead of structs) > - ? ?if (LoadInst *LI = dyn_cast(User)) { > - ? ? ?Value *Insert = UndefValue::get(LI->getType()); > - ? ? ?for (unsigned i = 0, e = ElementAllocas.size(); i != e; ++i) { > - ? ? ? ?Value *Load = new LoadInst(ElementAllocas[i], "load", LI); > - ? ? ? ?Insert = InsertValueInst::Create(Insert, Load, i, "insert", LI); > - ? ? ?} > - ? ? ?LI->replaceAllUsesWith(Insert); > - ? ? ?LI->eraseFromParent(); > - ? ? ?continue; > - ? ?} > + ?// Now that we have created the new alloca instructions, rewrite all the > + ?// uses of the old alloca. > + ?DeadInsts.push_back(AI); > + ?RewriteForScalarRepl(AI, AI, 0, ElementAllocas); > > - ? ?// Replace: > - ? ?// ? store { i32, i32 } %val, { i32, i32 }* %alloc > - ? ?// with: > - ? ?// ? %val.0 = extractvalue { i32, i32 } %val, 0 > - ? ?// ? store i32 %val.0, i32* %alloc.0 > - ? ?// ? %val.1 = extractvalue { i32, i32 } %val, 1 > - ? ?// ? store i32 %val.1, i32* %alloc.1 > - ? ?// (Also works for arrays instead of structs) > - ? ?if (StoreInst *SI = dyn_cast(User)) { > - ? ? ?Value *Val = SI->getOperand(0); > - ? ? ?for (unsigned i = 0, e = ElementAllocas.size(); i != e; ++i) { > - ? ? ? ?Value *Extract = ExtractValueInst::Create(Val, i, Val->getName(), SI); > - ? ? ? ?new StoreInst(Extract, ElementAllocas[i], SI); > - ? ? ?} > - ? ? ?SI->eraseFromParent(); > - ? ? ?continue; > - ? ?} > - > - ? ?GetElementPtrInst *GEPI = cast(User); > - ? ?// We now know that the GEP is of the form: GEP , 0, > - ? ?unsigned Idx = > - ? ? ? (unsigned)cast(GEPI->getOperand(2))->getZExtValue(); > - > - ? ?assert(Idx < ElementAllocas.size() && "Index out of range?"); > - ? ?AllocaInst *AllocaToUse = ElementAllocas[Idx]; > - > - ? ?Value *RepValue; > - ? ?if (GEPI->getNumOperands() == 3) { > - ? ? ?// Do not insert a new getelementptr instruction with zero indices, only > - ? ? ?// to have it optimized out later. > - ? ? ?RepValue = AllocaToUse; > - ? ?} else { > - ? ? ?// We are indexing deeply into the structure, so we still need a > - ? ? ?// getelement ptr instruction to finish the indexing. ?This may be > - ? ? ?// expanded itself once the worklist is rerun. > - ? ? ?// > - ? ? ?SmallVector NewArgs; > - ? ? ?NewArgs.push_back(Constant::getNullValue( > - ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? Type::getInt32Ty(AI->getContext()))); > - ? ? ?NewArgs.append(GEPI->op_begin()+3, GEPI->op_end()); > - ? ? ?RepValue = GetElementPtrInst::Create(AllocaToUse, NewArgs.begin(), > - ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? NewArgs.end(), "", GEPI); > - ? ? ?RepValue->takeName(GEPI); > - ? ?} > - > - ? ?// If this GEP is to the start of the aggregate, check for memcpys. > - ? ?if (Idx == 0 && GEPI->hasAllZeroIndices()) > - ? ? ?RewriteBitCastUserOfAlloca(GEPI, AI, ElementAllocas); > - > - ? ?// Move all of the users over to the new GEP. > - ? ?GEPI->replaceAllUsesWith(RepValue); > - ? ?// Delete the old GEP > - ? ?GEPI->eraseFromParent(); > - ?} > + ?// Now erase any instructions that were made dead while rewriting the alloca. > + ?DeleteDeadInstructions(); > > - ?// Finally, delete the Alloca instruction > - ?AI->eraseFromParent(); > ? NumReplaced++; > ?} > > -/// isSafeElementUse - Check to see if this use is an allowed use for a > -/// getelementptr instruction of an array aggregate allocation. ?isFirstElt > -/// indicates whether Ptr is known to the start of the aggregate. > -void SROA::isSafeElementUse(Value *Ptr, bool isFirstElt, AllocaInst *AI, > - ? ? ? ? ? ? ? ? ? ? ? ? ? ?AllocaInfo &Info) { > - ?for (Value::use_iterator I = Ptr->use_begin(), E = Ptr->use_end(); > - ? ? ? I != E; ++I) { > - ? ?Instruction *User = cast(*I); > - ? ?switch (User->getOpcode()) { > - ? ?case Instruction::Load: ?break; > - ? ?case Instruction::Store: > - ? ? ?// Store is ok if storing INTO the pointer, not storing the pointer > - ? ? ?if (User->getOperand(0) == Ptr) return MarkUnsafe(Info); > - ? ? ?break; > - ? ?case Instruction::GetElementPtr: { > - ? ? ?GetElementPtrInst *GEP = cast(User); > - ? ? ?bool AreAllZeroIndices = isFirstElt; > - ? ? ?if (GEP->getNumOperands() > 1 && > - ? ? ? ? ?(!isa(GEP->getOperand(1)) || > - ? ? ? ? ? !cast(GEP->getOperand(1))->isZero())) > - ? ? ? ?// Using pointer arithmetic to navigate the array. > - ? ? ? ?return MarkUnsafe(Info); > - > - ? ? ?// Verify that any array subscripts are in range. > - ? ? ?for (gep_type_iterator GEPIt = gep_type_begin(GEP), > - ? ? ? ? ? E = gep_type_end(GEP); GEPIt != E; ++GEPIt) { > - ? ? ? ?// Ignore struct elements, no extra checking needed for these. > - ? ? ? ?if (isa(*GEPIt)) > - ? ? ? ? ?continue; > - > - ? ? ? ?// This GEP indexes an array. ?Verify that this is an in-range > - ? ? ? ?// constant integer. Specifically, consider A[0][i]. We cannot know that > - ? ? ? ?// the user isn't doing invalid things like allowing i to index an > - ? ? ? ?// out-of-range subscript that accesses A[1]. ?Because of this, we have > - ? ? ? ?// to reject SROA of any accesses into structs where any of the > - ? ? ? ?// components are variables. > - ? ? ? ?ConstantInt *IdxVal = dyn_cast(GEPIt.getOperand()); > - ? ? ? ?if (!IdxVal) return MarkUnsafe(Info); > - > - ? ? ? ?// Are all indices still zero? > - ? ? ? ?AreAllZeroIndices &= IdxVal->isZero(); > - > - ? ? ? ?if (const ArrayType *AT = dyn_cast(*GEPIt)) { > - ? ? ? ? ?if (IdxVal->getZExtValue() >= AT->getNumElements()) > - ? ? ? ? ? ?return MarkUnsafe(Info); > - ? ? ? ?} else if (const VectorType *VT = dyn_cast(*GEPIt)) { > - ? ? ? ? ?if (IdxVal->getZExtValue() >= VT->getNumElements()) > - ? ? ? ? ? ?return MarkUnsafe(Info); > - ? ? ? ?} > +/// DeleteDeadInstructions - Erase instructions on the DeadInstrs list, > +/// recursively including all their operands that become trivially dead. > +void SROA::DeleteDeadInstructions() { > + ?while (!DeadInsts.empty()) { > + ? ?Instruction *I = dyn_cast_or_null(DeadInsts.pop_back_val()); > + ? ?if (I == 0) > + ? ? ?continue; > + > + ? ?for (User::op_iterator OI = I->op_begin(), E = I->op_end(); OI != E; ++OI) > + ? ? ?if (Instruction *U = dyn_cast(*OI)) { > + ? ? ? ?// Zero out the operand and see if it becomes trivially dead. > + ? ? ? ?*OI = 0; > + ? ? ? ?if (isInstructionTriviallyDead(U)) > + ? ? ? ? ?DeadInsts.push_back(U); > ? ? ? } > - > - ? ? ?isSafeElementUse(GEP, AreAllZeroIndices, AI, Info); > - ? ? ?if (Info.isUnsafe) return; > - ? ? ?break; > - ? ?} > - ? ?case Instruction::BitCast: > - ? ? ?if (isFirstElt) { > - ? ? ? ?isSafeUseOfBitCastedAllocation(cast(User), AI, Info); > - ? ? ? ?if (Info.isUnsafe) return; > - ? ? ? ?break; > - ? ? ?} > - ? ? ?DEBUG(errs() << " ?Transformation preventing inst: " << *User << '\n'); > - ? ? ?return MarkUnsafe(Info); > - ? ?case Instruction::Call: > - ? ? ?if (MemIntrinsic *MI = dyn_cast(User)) { > - ? ? ? ?if (isFirstElt) { > - ? ? ? ? ?isSafeMemIntrinsicOnAllocation(MI, AI, I.getOperandNo(), Info); > - ? ? ? ? ?if (Info.isUnsafe) return; > - ? ? ? ? ?break; > - ? ? ? ?} > - ? ? ?} > - ? ? ?DEBUG(errs() << " ?Transformation preventing inst: " << *User << '\n'); > - ? ? ?return MarkUnsafe(Info); > - ? ?default: > - ? ? ?DEBUG(errs() << " ?Transformation preventing inst: " << *User << '\n'); > - ? ? ?return MarkUnsafe(Info); > - ? ?} > + > + ? ?I->eraseFromParent(); > ? } > - ?return; ?// All users look ok :) > ?} > - > + > ?/// AllUsersAreLoads - Return true if all users of this value are loads. > ?static bool AllUsersAreLoads(Value *Ptr) { > ? for (Value::use_iterator I = Ptr->use_begin(), E = Ptr->use_end(); > @@ -539,72 +409,116 @@ > ? return true; > ?} > > -/// isSafeUseOfAllocation - Check if this user is an allowed use for an > -/// aggregate allocation. > -void SROA::isSafeUseOfAllocation(Instruction *User, AllocaInst *AI, > - ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? AllocaInfo &Info) { > - ?if (BitCastInst *C = dyn_cast(User)) > - ? ?return isSafeUseOfBitCastedAllocation(C, AI, Info); > - > - ?if (LoadInst *LI = dyn_cast(User)) > - ? ?if (!LI->isVolatile()) > - ? ? ?return;// Loads (returning a first class aggregrate) are always rewritable > - > - ?if (StoreInst *SI = dyn_cast(User)) > - ? ?if (!SI->isVolatile() && SI->getOperand(0) != AI) > - ? ? ?return;// Store is ok if storing INTO the pointer, not storing the pointer > - > - ?GetElementPtrInst *GEPI = dyn_cast(User); > - ?if (GEPI == 0) > - ? ?return MarkUnsafe(Info); > - > - ?gep_type_iterator I = gep_type_begin(GEPI), E = gep_type_end(GEPI); > +/// isSafeForScalarRepl - Check if instruction I is a safe use with regard to > +/// performing scalar replacement of alloca AI. ?The results are flagged in > +/// the Info parameter. ?Offset and ArrayOffset indicate the position within > +/// AI that is referenced by this instruction. > +void SROA::isSafeForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset, > + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? uint64_t ArrayOffset, AllocaInfo &Info) { > + ?for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI!=E; ++UI) { > + ? ?Instruction *User = cast(*UI); > > - ?// The GEP is not safe to transform if not of the form "GEP , 0, ". > - ?if (I == E || > - ? ? ?I.getOperand() != Constant::getNullValue(I.getOperand()->getType())) { > - ? ?return MarkUnsafe(Info); > + ? ?if (BitCastInst *BC = dyn_cast(User)) { > + ? ? ?isSafeForScalarRepl(BC, AI, Offset, ArrayOffset, Info); > + ? ?} else if (GetElementPtrInst *GEPI = dyn_cast(User)) { > + ? ? ?uint64_t GEPArrayOffset = ArrayOffset; > + ? ? ?uint64_t GEPOffset = Offset; > + ? ? ?isSafeGEP(GEPI, AI, GEPOffset, GEPArrayOffset, Info); > + ? ? ?if (!Info.isUnsafe) > + ? ? ? ?isSafeForScalarRepl(GEPI, AI, GEPOffset, GEPArrayOffset, Info); > + ? ?} else if (MemIntrinsic *MI = dyn_cast(UI)) { > + ? ? ?ConstantInt *Length = dyn_cast(MI->getLength()); > + ? ? ?if (Length) > + ? ? ? ?isSafeMemAccess(AI, Offset, ArrayOffset, Length->getZExtValue(), 0, > + ? ? ? ? ? ? ? ? ? ? ? ?UI.getOperandNo() == 1, Info); > + ? ? ?else > + ? ? ? ?MarkUnsafe(Info); > + ? ?} else if (LoadInst *LI = dyn_cast(User)) { > + ? ? ?if (!LI->isVolatile()) { > + ? ? ? ?const Type *LIType = LI->getType(); > + ? ? ? ?isSafeMemAccess(AI, Offset, ArrayOffset, TD->getTypeAllocSize(LIType), > + ? ? ? ? ? ? ? ? ? ? ? ?LIType, false, Info); > + ? ? ?} else > + ? ? ? ?MarkUnsafe(Info); > + ? ?} else if (StoreInst *SI = dyn_cast(User)) { > + ? ? ?// Store is ok if storing INTO the pointer, not storing the pointer > + ? ? ?if (!SI->isVolatile() && SI->getOperand(0) != I) { > + ? ? ? ?const Type *SIType = SI->getOperand(0)->getType(); > + ? ? ? ?isSafeMemAccess(AI, Offset, ArrayOffset, TD->getTypeAllocSize(SIType), > + ? ? ? ? ? ? ? ? ? ? ? ?SIType, true, Info); > + ? ? ?} else > + ? ? ? ?MarkUnsafe(Info); > + ? ?} else if (isa(UI)) { > + ? ? ?// If one user is DbgInfoIntrinsic then check if all users are > + ? ? ?// DbgInfoIntrinsics. > + ? ? ?if (OnlyUsedByDbgInfoIntrinsics(I)) { > + ? ? ? ?Info.needsCleanup = true; > + ? ? ? ?return; > + ? ? ?} > + ? ? ?MarkUnsafe(Info); > + ? ?} else { > + ? ? ?DEBUG(errs() << " ?Transformation preventing inst: " << *User << '\n'); > + ? ? ?MarkUnsafe(Info); > + ? ?} > + ? ?if (Info.isUnsafe) return; > ? } > +} > > - ?++I; > - ?if (I == E) return MarkUnsafe(Info); ?// ran out of GEP indices?? > +/// isSafeGEP - Check if a GEP instruction can be handled for scalar > +/// replacement. ?It is safe when all the indices are constant, in-bounds > +/// references, and when the resulting offset corresponds to an element within > +/// the alloca type. ?The results are flagged in the Info parameter. ?Upon > +/// return, Offset is adjusted as specified by the GEP indices. ?For the > +/// special case of a variable index to a 2-element array, ArrayOffset is set > +/// to the array element size. > +void SROA::isSafeGEP(GetElementPtrInst *GEPI, AllocaInst *AI, > + ? ? ? ? ? ? ? ? ? ? uint64_t &Offset, uint64_t &ArrayOffset, > + ? ? ? ? ? ? ? ? ? ? AllocaInfo &Info) { > + ?gep_type_iterator GEPIt = gep_type_begin(GEPI), E = gep_type_end(GEPI); > + ?if (GEPIt == E) > + ? ?return; > + > + ?// The first GEP index must be zero. > + ?if (!isa(GEPIt.getOperand()) || > + ? ? ?!cast(GEPIt.getOperand())->isZero()) > + ? ?return MarkUnsafe(Info); > + ?if (++GEPIt == E) > + ? ?return; > > - ?bool IsAllZeroIndices = true; > - > ? // If the first index is a non-constant index into an array, see if we can > ? // handle it as a special case. > - ?if (const ArrayType *AT = dyn_cast(*I)) { > - ? ?if (!isa(I.getOperand())) { > - ? ? ?IsAllZeroIndices = 0; > - ? ? ?uint64_t NumElements = AT->getNumElements(); > - > - ? ? ?// If this is an array index and the index is not constant, we cannot > - ? ? ?// promote... that is unless the array has exactly one or two elements in > - ? ? ?// it, in which case we CAN promote it, but we have to canonicalize this > - ? ? ?// out if this is the only problem. > - ? ? ?if ((NumElements == 1 || NumElements == 2) && > - ? ? ? ? ?AllUsersAreLoads(GEPI)) { > + ?const Type *ArrayEltTy = 0; > + ?if (ArrayOffset == 0 && Offset == 0) { > + ? ?if (const ArrayType *AT = dyn_cast(*GEPIt)) { > + ? ? ?if (!isa(GEPIt.getOperand())) { > + ? ? ? ?uint64_t NumElements = AT->getNumElements(); > + > + ? ? ? ?// If this is an array index and the index is not constant, we cannot > + ? ? ? ?// promote... that is unless the array has exactly one or two elements > + ? ? ? ?// in it, in which case we CAN promote it, but we have to canonicalize > + ? ? ? ?// this out if this is the only problem. > + ? ? ? ?if ((NumElements != 1 && NumElements != 2) || !AllUsersAreLoads(GEPI)) > + ? ? ? ? ?return MarkUnsafe(Info); > ? ? ? ? Info.needsCleanup = true; > - ? ? ? ?return; ?// Canonicalization required! > + ? ? ? ?ArrayOffset = TD->getTypeAllocSizeInBits(AT->getElementType()); > + ? ? ? ?ArrayEltTy = AT->getElementType(); > + ? ? ? ?++GEPIt; > ? ? ? } > - ? ? ?return MarkUnsafe(Info); > ? ? } > ? } > - > + > ? // Walk through the GEP type indices, checking the types that this indexes > ? // into. > - ?for (; I != E; ++I) { > + ?for (; GEPIt != E; ++GEPIt) { > ? ? // Ignore struct elements, no extra checking needed for these. > - ? ?if (isa(*I)) > + ? ?if (isa(*GEPIt)) > ? ? ? continue; > - > - ? ?ConstantInt *IdxVal = dyn_cast(I.getOperand()); > - ? ?if (!IdxVal) return MarkUnsafe(Info); > > - ? ?// Are all indices still zero? > - ? ?IsAllZeroIndices &= IdxVal->isZero(); > - > - ? ?if (const ArrayType *AT = dyn_cast(*I)) { > + ? ?ConstantInt *IdxVal = dyn_cast(GEPIt.getOperand()); > + ? ?if (!IdxVal) > + ? ? ?return MarkUnsafe(Info); > + > + ? ?if (const ArrayType *AT = dyn_cast(*GEPIt)) { > ? ? ? // This GEP indexes an array. ?Verify that this is an in-range constant > ? ? ? // integer. Specifically, consider A[0][i]. We cannot know that the user > ? ? ? // isn't doing invalid things like allowing i to index an out-of-range > @@ -612,147 +526,255 @@ > ? ? ? // of any accesses into structs where any of the components are variables. > ? ? ? if (IdxVal->getZExtValue() >= AT->getNumElements()) > ? ? ? ? return MarkUnsafe(Info); > - ? ?} else if (const VectorType *VT = dyn_cast(*I)) { > + ? ?} else { > + ? ? ?const VectorType *VT = dyn_cast(*GEPIt); > + ? ? ?assert(VT && "unexpected type in GEP type iterator"); > ? ? ? if (IdxVal->getZExtValue() >= VT->getNumElements()) > ? ? ? ? return MarkUnsafe(Info); > ? ? } > ? } > - > - ?// If there are any non-simple uses of this getelementptr, make sure to reject > - ?// them. > - ?return isSafeElementUse(GEPI, IsAllZeroIndices, AI, Info); > + > + ?// All the indices are safe. ?Now compute the offset due to this GEP and > + ?// check if the alloca has a component element at that offset. > + ?if (ArrayOffset == 0) { > + ? ?SmallVector Indices(GEPI->op_begin() + 1, GEPI->op_end()); > + ? ?Offset += TD->getIndexedOffset(GEPI->getPointerOperandType(), > + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? &Indices[0], Indices.size()); > + ?} else { > + ? ?// Both array elements have the same type, so it suffices to check one of > + ? ?// them. ?Copy the GEP indices starting from the array index, but replace > + ? ?// that variable index with a constant zero. > + ? ?SmallVector Indices(GEPI->op_begin() + 2, GEPI->op_end()); > + ? ?Indices[0] = Constant::getNullValue(Type::getInt32Ty(GEPI->getContext())); > + ? ?const Type *ArrayEltPtr = PointerType::getUnqual(ArrayEltTy); > + ? ?Offset += TD->getIndexedOffset(ArrayEltPtr, &Indices[0], Indices.size()); > + ?} > + ?if (!TypeHasComponent(AI->getAllocatedType(), Offset, 0)) > + ? ?MarkUnsafe(Info); > +} > + > +/// isSafeMemAccess - Check if a load/store/memcpy operates on the entire AI > +/// alloca or has an offset and size that corresponds to a component element > +/// within it. ?The offset checked here may have been formed from a GEP with a > +/// pointer bitcasted to a different type. > +void SROA::isSafeMemAccess(AllocaInst *AI, uint64_t Offset, > + ? ? ? ? ? ? ? ? ? ? ? ? ? uint64_t ArrayOffset, uint64_t MemSize, > + ? ? ? ? ? ? ? ? ? ? ? ? ? const Type *MemOpType, bool isStore, > + ? ? ? ? ? ? ? ? ? ? ? ? ? AllocaInfo &Info) { > + ?// Check if this is a load/store of the entire alloca. > + ?if (Offset == 0 && ArrayOffset == 0 && > + ? ? ?MemSize == TD->getTypeAllocSize(AI->getAllocatedType())) { > + ? ?bool UsesAggregateType = (MemOpType == AI->getAllocatedType()); > + ? ?// This is safe for MemIntrinsics (where MemOpType is 0), integer types > + ? ?// (which are essentially the same as the MemIntrinsics, especially with > + ? ?// regard to copying padding between elements), or references using the > + ? ?// aggregate type of the alloca. > + ? ?if (!MemOpType || isa(MemOpType) || UsesAggregateType) { > + ? ? ?if (!UsesAggregateType) { > + ? ? ? ?if (isStore) > + ? ? ? ? ?Info.isMemCpyDst = true; > + ? ? ? ?else > + ? ? ? ? ?Info.isMemCpySrc = true; > + ? ? ?} > + ? ? ?return; > + ? ?} > + ?} > + ?// Check if the offset/size correspond to a component within the alloca type. > + ?const Type *T = AI->getAllocatedType(); > + ?if (TypeHasComponent(T, Offset, MemSize) && > + ? ? ?(ArrayOffset == 0 || TypeHasComponent(T, Offset + ArrayOffset, MemSize))) > + ? ?return; > + > + ?return MarkUnsafe(Info); > ?} > > -/// isSafeMemIntrinsicOnAllocation - Check if the specified memory > -/// intrinsic can be promoted by SROA. ?At this point, we know that the operand > -/// of the memintrinsic is a pointer to the beginning of the allocation. > -void SROA::isSafeMemIntrinsicOnAllocation(MemIntrinsic *MI, AllocaInst *AI, > - ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?unsigned OpNo, AllocaInfo &Info) { > - ?// If not constant length, give up. > - ?ConstantInt *Length = dyn_cast(MI->getLength()); > - ?if (!Length) return MarkUnsafe(Info); > - > - ?// If not the whole aggregate, give up. > - ?if (Length->getZExtValue() != > - ? ? ?TD->getTypeAllocSize(AI->getType()->getElementType())) > - ? ?return MarkUnsafe(Info); > - > - ?// We only know about memcpy/memset/memmove. > - ?if (!isa(MI)) > - ? ?return MarkUnsafe(Info); > - > - ?// Otherwise, we can transform it. ?Determine whether this is a memcpy/set > - ?// into or out of the aggregate. > - ?if (OpNo == 1) > - ? ?Info.isMemCpyDst = true; > - ?else { > - ? ?assert(OpNo == 2); > - ? ?Info.isMemCpySrc = true; > +/// TypeHasComponent - Return true if T has a component type with the > +/// specified offset and size. ?If Size is zero, do not check the size. > +bool SROA::TypeHasComponent(const Type *T, uint64_t Offset, uint64_t Size) { > + ?const Type *EltTy; > + ?uint64_t EltSize; > + ?if (const StructType *ST = dyn_cast(T)) { > + ? ?const StructLayout *Layout = TD->getStructLayout(ST); > + ? ?unsigned EltIdx = Layout->getElementContainingOffset(Offset); > + ? ?EltTy = ST->getContainedType(EltIdx); > + ? ?EltSize = TD->getTypeAllocSize(EltTy); > + ? ?Offset -= Layout->getElementOffset(EltIdx); > + ?} else if (const ArrayType *AT = dyn_cast(T)) { > + ? ?EltTy = AT->getElementType(); > + ? ?EltSize = TD->getTypeAllocSize(EltTy); > + ? ?Offset %= EltSize; > + ?} else { > + ? ?return false; > ? } > + ?if (Offset == 0 && (Size == 0 || EltSize == Size)) > + ? ?return true; > + ?// Check if the component spans multiple elements. > + ?if (Offset + Size > EltSize) > + ? ?return false; > + ?return TypeHasComponent(EltTy, Offset, Size); > ?} > > -/// isSafeUseOfBitCastedAllocation - Check if all users of this bitcast > -/// from an alloca are safe for SROA of that alloca. > -void SROA::isSafeUseOfBitCastedAllocation(BitCastInst *BC, AllocaInst *AI, > - ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?AllocaInfo &Info) { > - ?for (Value::use_iterator UI = BC->use_begin(), E = BC->use_end(); > - ? ? ? UI != E; ++UI) { > - ? ?if (BitCastInst *BCU = dyn_cast(UI)) { > - ? ? ?isSafeUseOfBitCastedAllocation(BCU, AI, Info); > - ? ?} else if (MemIntrinsic *MI = dyn_cast(UI)) { > - ? ? ?isSafeMemIntrinsicOnAllocation(MI, AI, UI.getOperandNo(), Info); > - ? ?} else if (StoreInst *SI = dyn_cast(UI)) { > - ? ? ?if (SI->isVolatile()) > - ? ? ? ?return MarkUnsafe(Info); > - > - ? ? ?// If storing the entire alloca in one chunk through a bitcasted pointer > - ? ? ?// to integer, we can transform it. ?This happens (for example) when you > - ? ? ?// cast a {i32,i32}* to i64* and store through it. ?This is similar to the > - ? ? ?// memcpy case and occurs in various "byval" cases and emulated memcpys. > - ? ? ?if (isa(SI->getOperand(0)->getType()) && > - ? ? ? ? ?TD->getTypeAllocSize(SI->getOperand(0)->getType()) == > - ? ? ? ? ?TD->getTypeAllocSize(AI->getType()->getElementType())) { > - ? ? ? ?Info.isMemCpyDst = true; > - ? ? ? ?continue; > - ? ? ?} > - ? ? ?return MarkUnsafe(Info); > - ? ?} else if (LoadInst *LI = dyn_cast(UI)) { > - ? ? ?if (LI->isVolatile()) > - ? ? ? ?return MarkUnsafe(Info); > +/// RewriteForScalarRepl - Alloca AI is being split into NewElts, so rewrite > +/// the instruction I, which references it, to use the separate elements. > +/// Offset indicates the position within AI that is referenced by this > +/// instruction. > +void SROA::RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset, > + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?SmallVector &NewElts) { > + ?for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI!=E; ++UI) { > + ? ?Instruction *User = cast(*UI); > > - ? ? ?// If loading the entire alloca in one chunk through a bitcasted pointer > - ? ? ?// to integer, we can transform it. ?This happens (for example) when you > - ? ? ?// cast a {i32,i32}* to i64* and load through it. ?This is similar to the > - ? ? ?// memcpy case and occurs in various "byval" cases and emulated memcpys. > - ? ? ?if (isa(LI->getType()) && > - ? ? ? ? ?TD->getTypeAllocSize(LI->getType()) == > - ? ? ? ? ?TD->getTypeAllocSize(AI->getType()->getElementType())) { > - ? ? ? ?Info.isMemCpySrc = true; > - ? ? ? ?continue; > + ? ?if (BitCastInst *BC = dyn_cast(User)) { > + ? ? ?RewriteBitCast(BC, AI, Offset, NewElts); > + ? ?} else if (GetElementPtrInst *GEPI = dyn_cast(User)) { > + ? ? ?RewriteGEP(GEPI, AI, Offset, NewElts); > + ? ?} else if (MemIntrinsic *MI = dyn_cast(User)) { > + ? ? ?ConstantInt *Length = dyn_cast(MI->getLength()); > + ? ? ?uint64_t MemSize = Length->getZExtValue(); > + ? ? ?if (Offset == 0 && > + ? ? ? ? ?MemSize == TD->getTypeAllocSize(AI->getAllocatedType())) > + ? ? ? ?RewriteMemIntrinUserOfAlloca(MI, I, AI, NewElts); > + ? ?} else if (LoadInst *LI = dyn_cast(User)) { > + ? ? ?const Type *LIType = LI->getType(); > + ? ? ?if (LIType == AI->getAllocatedType()) { > + ? ? ? ?// Replace: > + ? ? ? ?// ? %res = load { i32, i32 }* %alloc > + ? ? ? ?// with: > + ? ? ? ?// ? %load.0 = load i32* %alloc.0 > + ? ? ? ?// ? %insert.0 insertvalue { i32, i32 } zeroinitializer, i32 %load.0, 0 > + ? ? ? ?// ? %load.1 = load i32* %alloc.1 > + ? ? ? ?// ? %insert = insertvalue { i32, i32 } %insert.0, i32 %load.1, 1 > + ? ? ? ?// (Also works for arrays instead of structs) > + ? ? ? ?Value *Insert = UndefValue::get(LIType); > + ? ? ? ?for (unsigned i = 0, e = NewElts.size(); i != e; ++i) { > + ? ? ? ? ?Value *Load = new LoadInst(NewElts[i], "load", LI); > + ? ? ? ? ?Insert = InsertValueInst::Create(Insert, Load, i, "insert", LI); > + ? ? ? ?} > + ? ? ? ?LI->replaceAllUsesWith(Insert); > + ? ? ? ?DeadInsts.push_back(LI); > + ? ? ?} else if (isa(LIType) && > + ? ? ? ? ? ? ? ? TD->getTypeAllocSize(LIType) == > + ? ? ? ? ? ? ? ? TD->getTypeAllocSize(AI->getAllocatedType())) { > + ? ? ? ?// If this is a load of the entire alloca to an integer, rewrite it. > + ? ? ? ?RewriteLoadUserOfWholeAlloca(LI, AI, NewElts); > ? ? ? } > - ? ? ?return MarkUnsafe(Info); > - ? ?} else if (isa(UI)) { > - ? ? ?// If one user is DbgInfoIntrinsic then check if all users are > - ? ? ?// DbgInfoIntrinsics. > - ? ? ?if (OnlyUsedByDbgInfoIntrinsics(BC)) { > - ? ? ? ?Info.needsCleanup = true; > - ? ? ? ?return; > + ? ?} else if (StoreInst *SI = dyn_cast(User)) { > + ? ? ?Value *Val = SI->getOperand(0); > + ? ? ?const Type *SIType = Val->getType(); > + ? ? ?if (SIType == AI->getAllocatedType()) { > + ? ? ? ?// Replace: > + ? ? ? ?// ? store { i32, i32 } %val, { i32, i32 }* %alloc > + ? ? ? ?// with: > + ? ? ? ?// ? %val.0 = extractvalue { i32, i32 } %val, 0 > + ? ? ? ?// ? store i32 %val.0, i32* %alloc.0 > + ? ? ? ?// ? %val.1 = extractvalue { i32, i32 } %val, 1 > + ? ? ? ?// ? store i32 %val.1, i32* %alloc.1 > + ? ? ? ?// (Also works for arrays instead of structs) > + ? ? ? ?for (unsigned i = 0, e = NewElts.size(); i != e; ++i) { > + ? ? ? ? ?Value *Extract = ExtractValueInst::Create(Val, i, Val->getName(), SI); > + ? ? ? ? ?new StoreInst(Extract, NewElts[i], SI); > + ? ? ? ?} > + ? ? ? ?DeadInsts.push_back(SI); > + ? ? ?} else if (isa(SIType) && > + ? ? ? ? ? ? ? ? TD->getTypeAllocSize(SIType) == > + ? ? ? ? ? ? ? ? TD->getTypeAllocSize(AI->getAllocatedType())) { > + ? ? ? ?// If this is a store of the entire alloca from an integer, rewrite it. > + ? ? ? ?RewriteStoreUserOfWholeAlloca(SI, AI, NewElts); > ? ? ? } > - ? ? ?else > - ? ? ? ?MarkUnsafe(Info); > ? ? } > - ? ?else { > - ? ? ?return MarkUnsafe(Info); > - ? ?} > - ? ?if (Info.isUnsafe) return; > ? } > ?} > > -/// RewriteBitCastUserOfAlloca - BCInst (transitively) bitcasts AI, or indexes > -/// to its first element. ?Transform users of the cast to use the new values > -/// instead. > -void SROA::RewriteBitCastUserOfAlloca(Instruction *BCInst, AllocaInst *AI, > - ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?SmallVector &NewElts) { > - ?Value::use_iterator UI = BCInst->use_begin(), UE = BCInst->use_end(); > - ?while (UI != UE) { > - ? ?Instruction *User = cast(*UI++); > - ? ?if (BitCastInst *BCU = dyn_cast(User)) { > - ? ? ?RewriteBitCastUserOfAlloca(BCU, AI, NewElts); > - ? ? ?if (BCU->use_empty()) BCU->eraseFromParent(); > - ? ? ?continue; > - ? ?} > +/// RewriteBitCast - Update a bitcast reference to the alloca being replaced > +/// and recursively continue updating all of its uses. > +void SROA::RewriteBitCast(BitCastInst *BC, AllocaInst *AI, uint64_t Offset, > + ? ? ? ? ? ? ? ? ? ? ? ? ?SmallVector &NewElts) { > + ?RewriteForScalarRepl(BC, AI, Offset, NewElts); > + ?if (BC->getOperand(0) != AI) > + ? ?return; > > - ? ?if (MemIntrinsic *MI = dyn_cast(User)) { > - ? ? ?// This must be memcpy/memmove/memset of the entire aggregate. > - ? ? ?// Split into one per element. > - ? ? ?RewriteMemIntrinUserOfAlloca(MI, BCInst, AI, NewElts); > - ? ? ?continue; > - ? ?} > - > - ? ?if (StoreInst *SI = dyn_cast(User)) { > - ? ? ?// If this is a store of the entire alloca from an integer, rewrite it. > - ? ? ?RewriteStoreUserOfWholeAlloca(SI, AI, NewElts); > - ? ? ?continue; > - ? ?} > + ?// The bitcast references the original alloca. ?Replace its uses with > + ?// references to the first new element alloca. > + ?Instruction *Val = NewElts[0]; > + ?if (Val->getType() != BC->getDestTy()) { > + ? ?Val = new BitCastInst(Val, BC->getDestTy(), "", BC); > + ? ?Val->takeName(BC); > + ?} > + ?BC->replaceAllUsesWith(Val); > + ?DeadInsts.push_back(BC); > +} > + > +/// FindElementAndOffset - Return the index of the element containing Offset > +/// within the specified type, which must be either a struct or an array. > +/// Sets T to the type of the element and Offset to the offset within that > +/// element. > +unsigned SROA::FindElementAndOffset(const Type *&T, uint64_t &Offset) { > + ?unsigned Idx = 0; > + ?if (const StructType *ST = dyn_cast(T)) { > + ? ?const StructLayout *Layout = TD->getStructLayout(ST); > + ? ?Idx = Layout->getElementContainingOffset(Offset); > + ? ?T = ST->getContainedType(Idx); > + ? ?Offset -= Layout->getElementOffset(Idx); > + ?} else { > + ? ?const ArrayType *AT = dyn_cast(T); > + ? ?assert(AT && "unexpected type for scalar replacement"); > + ? ?T = AT->getElementType(); > + ? ?uint64_t EltSize = TD->getTypeAllocSize(T); > + ? ?Idx = (unsigned)(Offset / EltSize); > + ? ?Offset -= Idx * EltSize; > + ?} > + ?return Idx; > +} > + > +/// RewriteGEP - Check if this GEP instruction moves the pointer across > +/// elements of the alloca that are being split apart, and if so, rewrite > +/// the GEP to be relative to the new element. > +void SROA::RewriteGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t Offset, > + ? ? ? ? ? ? ? ? ? ? ?SmallVector &NewElts) { > + ?uint64_t OldOffset = Offset; > + ?SmallVector Indices(GEPI->op_begin() + 1, GEPI->op_end()); > + ?Offset += TD->getIndexedOffset(GEPI->getPointerOperandType(), > + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? &Indices[0], Indices.size()); > + > + ?RewriteForScalarRepl(GEPI, AI, Offset, NewElts); > + > + ?const Type *T = AI->getAllocatedType(); > + ?unsigned OldIdx = FindElementAndOffset(T, OldOffset); > + ?if (GEPI->getOperand(0) == AI) > + ? ?OldIdx = ~0U; // Force the GEP to be rewritten. > + > + ?T = AI->getAllocatedType(); > + ?uint64_t EltOffset = Offset; > + ?unsigned Idx = FindElementAndOffset(T, EltOffset); > + > + ?// If this GEP does not move the pointer across elements of the alloca > + ?// being split, then it does not needs to be rewritten. > + ?if (Idx == OldIdx) > + ? ?return; > > - ? ?if (LoadInst *LI = dyn_cast(User)) { > - ? ? ?// If this is a load of the entire alloca to an integer, rewrite it. > - ? ? ?RewriteLoadUserOfWholeAlloca(LI, AI, NewElts); > - ? ? ?continue; > - ? ?} > - > - ? ?// Otherwise it must be some other user of a gep of the first pointer. ?Just > - ? ?// leave these alone. > - ? ?continue; > - ?} > + ?const Type *i32Ty = Type::getInt32Ty(AI->getContext()); > + ?SmallVector NewArgs; > + ?NewArgs.push_back(Constant::getNullValue(i32Ty)); > + ?while (EltOffset != 0) { > + ? ?unsigned EltIdx = FindElementAndOffset(T, EltOffset); > + ? ?NewArgs.push_back(ConstantInt::get(i32Ty, EltIdx)); > + ?} > + ?Instruction *Val = NewElts[Idx]; > + ?if (NewArgs.size() > 1) { > + ? ?Val = GetElementPtrInst::CreateInBounds(Val, NewArgs.begin(), > + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?NewArgs.end(), "", GEPI); > + ? ?Val->takeName(GEPI); > + ?} > + ?if (Val->getType() != GEPI->getType()) > + ? ?Val = new BitCastInst(Val, GEPI->getType(), Val->getNameStr(), GEPI); > + ?GEPI->replaceAllUsesWith(Val); > + ?DeadInsts.push_back(GEPI); > ?} > > ?/// RewriteMemIntrinUserOfAlloca - MI is a memcpy/memset/memmove from or to AI. > ?/// Rewrite it to copy or set the elements of the scalarized memory. > -void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *BCInst, > +void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst, > ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? AllocaInst *AI, > ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? SmallVector &NewElts) { > - > ? // If this is a memcpy/memmove, construct the other pointer as the > ? // appropriate type. ?The "Other" pointer is the pointer that goes to memory > ? // that doesn't have anything to do with the alloca that we are promoting. For > @@ -761,28 +783,41 @@ > ? LLVMContext &Context = MI->getContext(); > ? unsigned MemAlignment = MI->getAlignment(); > ? if (MemTransferInst *MTI = dyn_cast(MI)) { // memmove/memcopy > - ? ?if (BCInst == MTI->getRawDest()) > + ? ?if (Inst == MTI->getRawDest()) > ? ? ? OtherPtr = MTI->getRawSource(); > ? ? else { > - ? ? ?assert(BCInst == MTI->getRawSource()); > + ? ? ?assert(Inst == MTI->getRawSource()); > ? ? ? OtherPtr = MTI->getRawDest(); > ? ? } > ? } > > - ?// Keep track of the other intrinsic argument, so it can be removed if it > - ?// is dead when the intrinsic is replaced. > - ?Value *PossiblyDead = OtherPtr; > - > ? // If there is an other pointer, we want to convert it to the same pointer > ? // type as AI has, so we can GEP through it safely. > ? if (OtherPtr) { > - ? ?// It is likely that OtherPtr is a bitcast, if so, remove it. > - ? ?if (BitCastInst *BC = dyn_cast(OtherPtr)) > - ? ? ?OtherPtr = BC->getOperand(0); > - ? ?// All zero GEPs are effectively bitcasts. > - ? ?if (GetElementPtrInst *GEP = dyn_cast(OtherPtr)) > - ? ? ?if (GEP->hasAllZeroIndices()) > - ? ? ? ?OtherPtr = GEP->getOperand(0); > + > + ? ?// Remove bitcasts and all-zero GEPs from OtherPtr. ?This is an > + ? ?// optimization, but it's also required to detect the corner case where > + ? ?// both pointer operands are referencing the same memory, and where > + ? ?// OtherPtr may be a bitcast or GEP that currently being rewritten. ?(This > + ? ?// function is only called for mem intrinsics that access the whole > + ? ?// aggregate, so non-zero GEPs are not an issue here.) > + ? ?while (1) { > + ? ? ?if (BitCastInst *BC = dyn_cast(OtherPtr)) { > + ? ? ? ?OtherPtr = BC->getOperand(0); > + ? ? ? ?continue; > + ? ? ?} > + ? ? ?if (GetElementPtrInst *GEP = dyn_cast(OtherPtr)) { > + ? ? ? ?// All zero GEPs are effectively bitcasts. > + ? ? ? ?if (GEP->hasAllZeroIndices()) { > + ? ? ? ? ?OtherPtr = GEP->getOperand(0); > + ? ? ? ? ?continue; > + ? ? ? ?} > + ? ? ?} > + ? ? ?break; > + ? ?} > + ? ?// If OtherPtr has already been rewritten, this intrinsic will be dead. > + ? ?if (OtherPtr == NewElts[0]) > + ? ? ?return; > > ? ? if (ConstantExpr *BCE = dyn_cast(OtherPtr)) > ? ? ? if (BCE->getOpcode() == Instruction::BitCast) > @@ -798,7 +833,7 @@ > ? // Process each element of the aggregate. > ? Value *TheFn = MI->getOperand(0); > ? const Type *BytePtrTy = MI->getRawDest()->getType(); > - ?bool SROADest = MI->getRawDest() == BCInst; > + ?bool SROADest = MI->getRawDest() == Inst; > > ? Constant *Zero = Constant::getNullValue(Type::getInt32Ty(MI->getContext())); > > @@ -807,12 +842,15 @@ > ? ? Value *OtherElt = 0; > ? ? unsigned OtherEltAlign = MemAlignment; > > - ? ?if (OtherPtr) { > + ? ?if (OtherPtr == AI) { > + ? ? ?OtherElt = NewElts[i]; > + ? ? ?OtherEltAlign = 0; > + ? ?} else if (OtherPtr) { > ? ? ? Value *Idx[2] = { Zero, > ? ? ? ? ? ? ? ? ? ? ? ConstantInt::get(Type::getInt32Ty(MI->getContext()), i) }; > - ? ? ?OtherElt = GetElementPtrInst::Create(OtherPtr, Idx, Idx + 2, > + ? ? ?OtherElt = GetElementPtrInst::CreateInBounds(OtherPtr, Idx, Idx + 2, > ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?OtherPtr->getNameStr()+"."+Twine(i), > - ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? MI); > + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? MI); > ? ? ? uint64_t EltOffset; > ? ? ? const PointerType *OtherPtrTy = cast(OtherPtr->getType()); > ? ? ? if (const StructType *ST = > @@ -924,9 +962,7 @@ > ? ? ? CallInst::Create(TheFn, Ops, Ops + 4, "", MI); > ? ? } > ? } > - ?MI->eraseFromParent(); > - ?if (PossiblyDead) > - ? ?RecursivelyDeleteTriviallyDeadInstructions(PossiblyDead); > + ?DeadInsts.push_back(MI); > ?} > > ?/// RewriteStoreUserOfWholeAlloca - We found a store of an integer that > @@ -937,15 +973,9 @@ > ? // Extract each element out of the integer according to its structure offset > ? // and store the element value to the individual alloca. > ? Value *SrcVal = SI->getOperand(0); > - ?const Type *AllocaEltTy = AI->getType()->getElementType(); > + ?const Type *AllocaEltTy = AI->getAllocatedType(); > ? uint64_t AllocaSizeBits = TD->getTypeAllocSizeInBits(AllocaEltTy); > > - ?// If this isn't a store of an integer to the whole alloca, it may be a store > - ?// to the first element. ?Just ignore the store in this case and normal SROA > - ?// will handle it. > - ?if (!isa(SrcVal->getType()) || > - ? ? ?TD->getTypeAllocSizeInBits(SrcVal->getType()) != AllocaSizeBits) > - ? ?return; > ? // Handle tail padding by extending the operand > ? if (TD->getTypeSizeInBits(SrcVal->getType()) != AllocaSizeBits) > ? ? SrcVal = new ZExtInst(SrcVal, > @@ -1050,7 +1080,7 @@ > ? ? } > ? } > > - ?SI->eraseFromParent(); > + ?DeadInsts.push_back(SI); > ?} > > ?/// RewriteLoadUserOfWholeAlloca - We found a load of the entire allocation to > @@ -1059,16 +1089,9 @@ > ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? SmallVector &NewElts) { > ? // Extract each element out of the NewElts according to its structure offset > ? // and form the result value. > - ?const Type *AllocaEltTy = AI->getType()->getElementType(); > + ?const Type *AllocaEltTy = AI->getAllocatedType(); > ? uint64_t AllocaSizeBits = TD->getTypeAllocSizeInBits(AllocaEltTy); > > - ?// If this isn't a load of the whole alloca to an integer, it may be a load > - ?// of the first element. ?Just ignore the load in this case and normal SROA > - ?// will handle it. > - ?if (!isa(LI->getType()) || > - ? ? ?TD->getTypeAllocSizeInBits(LI->getType()) != AllocaSizeBits) > - ? ?return; > - > ? DEBUG(errs() << "PROMOTING LOAD OF WHOLE ALLOCA: " << *AI << '\n' << *LI > ? ? ? ? ? ? ? ?<< '\n'); > > @@ -1139,10 +1162,9 @@ > ? ? ResultVal = new TruncInst(ResultVal, LI->getType(), "", LI); > > ? LI->replaceAllUsesWith(ResultVal); > - ?LI->eraseFromParent(); > + ?DeadInsts.push_back(LI); > ?} > > - > ?/// HasPadding - Return true if the specified type has any structure or > ?/// alignment padding, false otherwise. > ?static bool HasPadding(const Type *Ty, const TargetData &TD) { > @@ -1192,14 +1214,10 @@ > ? // the users are safe to transform. > ? AllocaInfo Info; > > - ?for (Value::use_iterator I = AI->use_begin(), E = AI->use_end(); > - ? ? ? I != E; ++I) { > - ? ?isSafeUseOfAllocation(cast(*I), AI, Info); > - ? ?if (Info.isUnsafe) { > - ? ? ?DEBUG(errs() << "Cannot transform: " << *AI << "\n ?due to user: " > - ? ? ? ? ? ? ? ? ? << **I << '\n'); > - ? ? ?return 0; > - ? ?} > + ?isSafeForScalarRepl(AI, AI, 0, 0, Info); > + ?if (Info.isUnsafe) { > + ? ?DEBUG(errs() << "Cannot transform: " << *AI << '\n'); > + ? ?return 0; > ? } > > ? // Okay, we know all the users are promotable. ?If the aggregate is a memcpy > @@ -1208,7 +1226,7 @@ > ? // types, but may actually be used. ?In these cases, we refuse to promote the > ? // struct. > ? if (Info.isMemCpySrc && Info.isMemCpyDst && > - ? ? ?HasPadding(AI->getType()->getElementType(), *TD)) > + ? ? ?HasPadding(AI->getAllocatedType(), *TD)) > ? ? return 0; > > ? // If we require cleanup, return 1, otherwise return 3. > @@ -1245,15 +1263,15 @@ > ? // Insert the new GEP instructions, which are properly indexed. > ? SmallVector Indices(GEPI->op_begin()+1, GEPI->op_end()); > ? Indices[1] = Constant::getNullValue(Type::getInt32Ty(GEPI->getContext())); > - ?Value *ZeroIdx = GetElementPtrInst::Create(GEPI->getOperand(0), > - ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? Indices.begin(), > - ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? Indices.end(), > - ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? GEPI->getName()+".0", GEPI); > + ?Value *ZeroIdx = GetElementPtrInst::CreateInBounds(GEPI->getOperand(0), > + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? Indices.begin(), > + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? Indices.end(), > + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? GEPI->getName()+".0",GEPI); > ? Indices[1] = ConstantInt::get(Type::getInt32Ty(GEPI->getContext()), 1); > - ?Value *OneIdx = GetElementPtrInst::Create(GEPI->getOperand(0), > - ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?Indices.begin(), > - ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?Indices.end(), > - ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?GEPI->getName()+".1", GEPI); > + ?Value *OneIdx = GetElementPtrInst::CreateInBounds(GEPI->getOperand(0), > + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?Indices.begin(), > + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?Indices.end(), > + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?GEPI->getName()+".1", GEPI); > ? // Replace all loads of the variable index GEP with loads from both > ? // indexes and a select. > ? while (!GEPI->use_empty()) { > @@ -1264,22 +1282,24 @@ > ? ? LI->replaceAllUsesWith(R); > ? ? LI->eraseFromParent(); > ? } > - ?GEPI->eraseFromParent(); > ?} > > - > ?/// CleanupAllocaUsers - If SROA reported that it can promote the specified > ?/// allocation, but only if cleaned up, perform the cleanups required. > -void SROA::CleanupAllocaUsers(AllocaInst *AI) { > +void SROA::CleanupAllocaUsers(Value *V) { > ? // At this point, we know that the end result will be SROA'd and promoted, so > ? // we can insert ugly code if required so long as sroa+mem2reg will clean it > ? // up. > - ?for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); > + ?for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); > ? ? ? ?UI != E; ) { > ? ? User *U = *UI++; > - ? ?if (GetElementPtrInst *GEPI = dyn_cast(U)) > + ? ?if (isa(U)) { > + ? ? ?CleanupAllocaUsers(U); > + ? ?} else if (GetElementPtrInst *GEPI = dyn_cast(U)) { > ? ? ? CleanupGEP(GEPI); > - ? ?else { > + ? ? ?CleanupAllocaUsers(GEPI); > + ? ? ?if (GEPI->use_empty()) GEPI->eraseFromParent(); > + ? ?} else { > ? ? ? Instruction *I = cast(U); > ? ? ? SmallVector DbgInUses; > ? ? ? if (!isa(I) && OnlyUsedByDbgInfoIntrinsics(I, &DbgInUses)) { > @@ -1395,7 +1415,7 @@ > > ? ? ? // Compute the offset that this GEP adds to the pointer. > ? ? ? SmallVector Indices(GEP->op_begin()+1, GEP->op_end()); > - ? ? ?uint64_t GEPOffset = TD->getIndexedOffset(GEP->getOperand(0)->getType(), > + ? ? ?uint64_t GEPOffset = TD->getIndexedOffset(GEP->getPointerOperandType(), > ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? &Indices[0], Indices.size()); > ? ? ? // See if all uses can be converted. > ? ? ? if (!CanConvertToScalar(GEP, IsNotTrivial, VecTy, SawVec,Offset+GEPOffset, > @@ -1457,7 +1477,7 @@ > ? ? if (GetElementPtrInst *GEP = dyn_cast(User)) { > ? ? ? // Compute the offset that this GEP adds to the pointer. > ? ? ? SmallVector Indices(GEP->op_begin()+1, GEP->op_end()); > - ? ? ?uint64_t GEPOffset = TD->getIndexedOffset(GEP->getOperand(0)->getType(), > + ? ? ?uint64_t GEPOffset = TD->getIndexedOffset(GEP->getPointerOperandType(), > ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? &Indices[0], Indices.size()); > ? ? ? ConvertUsesToScalar(GEP, NewAI, Offset+GEPOffset*8); > ? ? ? GEP->eraseFromParent(); > > Added: llvm/trunk/test/Transforms/ScalarRepl/2009-12-11-NeonTypes.ll > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/ScalarRepl/2009-12-11-NeonTypes.ll?rev=91459&view=auto > > ============================================================================== > --- llvm/trunk/test/Transforms/ScalarRepl/2009-12-11-NeonTypes.ll (added) > +++ llvm/trunk/test/Transforms/ScalarRepl/2009-12-11-NeonTypes.ll Tue Dec 15 16:00:51 2009 > @@ -0,0 +1,89 @@ > +; RUN: opt < %s -scalarrepl -S | FileCheck %s > +; Radar 7441282 > + > +target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32" > +target triple = "thumbv7-apple-darwin10" > + > +%struct.__neon_int16x8x2_t = type { <8 x i16>, <8 x i16> } > +%struct.int16x8_t = type { <8 x i16> } > +%struct.int16x8x2_t = type { [2 x %struct.int16x8_t] } > +%union..0anon = type { %struct.int16x8x2_t } > + > +define arm_apcscc void @test(<8 x i16> %tmp.0, %struct.int16x8x2_t* %dst) nounwind { > +; CHECK: @test > +; CHECK-NOT: alloca > +; CHECK: "alloca point" > +entry: > + ?%tmp_addr = alloca %struct.int16x8_t ? ? ? ? ? ?; <%struct.int16x8_t*> [#uses=3] > + ?%dst_addr = alloca %struct.int16x8x2_t* ? ? ? ? ; <%struct.int16x8x2_t**> [#uses=2] > + ?%__rv = alloca %union..0anon ? ? ? ? ? ? ? ? ? ?; <%union..0anon*> [#uses=2] > + ?%__bx = alloca %struct.int16x8_t ? ? ? ? ? ? ? ?; <%struct.int16x8_t*> [#uses=2] > + ?%__ax = alloca %struct.int16x8_t ? ? ? ? ? ? ? ?; <%struct.int16x8_t*> [#uses=2] > + ?%tmp2 = alloca %struct.int16x8x2_t ? ? ? ? ? ? ?; <%struct.int16x8x2_t*> [#uses=2] > + ?%0 = alloca %struct.int16x8x2_t ? ? ? ? ? ? ? ? ; <%struct.int16x8x2_t*> [#uses=2] > + ?%"alloca point" = bitcast i32 0 to i32 ? ? ? ? ?; [#uses=0] > + ?%1 = getelementptr inbounds %struct.int16x8_t* %tmp_addr, i32 0, i32 0 ; <<8 x i16>*> [#uses=1] > + ?store <8 x i16> %tmp.0, <8 x i16>* %1 > + ?store %struct.int16x8x2_t* %dst, %struct.int16x8x2_t** %dst_addr > + ?%2 = getelementptr inbounds %struct.int16x8_t* %__ax, i32 0, i32 0 ; <<8 x i16>*> [#uses=1] > + ?%3 = getelementptr inbounds %struct.int16x8_t* %tmp_addr, i32 0, i32 0 ; <<8 x i16>*> [#uses=1] > + ?%4 = load <8 x i16>* %3, align 16 ? ? ? ? ? ? ? ; <<8 x i16>> [#uses=1] > + ?store <8 x i16> %4, <8 x i16>* %2, align 16 > + ?%5 = getelementptr inbounds %struct.int16x8_t* %__bx, i32 0, i32 0 ; <<8 x i16>*> [#uses=1] > + ?%6 = getelementptr inbounds %struct.int16x8_t* %tmp_addr, i32 0, i32 0 ; <<8 x i16>*> [#uses=1] > + ?%7 = load <8 x i16>* %6, align 16 ? ? ? ? ? ? ? ; <<8 x i16>> [#uses=1] > + ?store <8 x i16> %7, <8 x i16>* %5, align 16 > + ?%8 = getelementptr inbounds %struct.int16x8_t* %__ax, i32 0, i32 0 ; <<8 x i16>*> [#uses=1] > + ?%9 = load <8 x i16>* %8, align 16 ? ? ? ? ? ? ? ; <<8 x i16>> [#uses=2] > + ?%10 = getelementptr inbounds %struct.int16x8_t* %__bx, i32 0, i32 0 ; <<8 x i16>*> [#uses=1] > + ?%11 = load <8 x i16>* %10, align 16 ? ? ? ? ? ? ; <<8 x i16>> [#uses=2] > + ?%12 = getelementptr inbounds %union..0anon* %__rv, i32 0, i32 0 ; <%struct.int16x8x2_t*> [#uses=1] > + ?%13 = bitcast %struct.int16x8x2_t* %12 to %struct.__neon_int16x8x2_t* ; <%struct.__neon_int16x8x2_t*> [#uses=2] > + ?%14 = shufflevector <8 x i16> %9, <8 x i16> %11, <8 x i32> ; <<8 x i16>> [#uses=1] > + ?%15 = getelementptr inbounds %struct.__neon_int16x8x2_t* %13, i32 0, i32 0 ; <<8 x i16>*> [#uses=1] > + ?store <8 x i16> %14, <8 x i16>* %15 > + ?%16 = shufflevector <8 x i16> %9, <8 x i16> %11, <8 x i32> ; <<8 x i16>> [#uses=1] > + ?%17 = getelementptr inbounds %struct.__neon_int16x8x2_t* %13, i32 0, i32 1 ; <<8 x i16>*> [#uses=1] > + ?store <8 x i16> %16, <8 x i16>* %17 > + ?%18 = getelementptr inbounds %union..0anon* %__rv, i32 0, i32 0 ; <%struct.int16x8x2_t*> [#uses=1] > + ?%19 = bitcast %struct.int16x8x2_t* %0 to i8* ? ?; [#uses=1] > + ?%20 = bitcast %struct.int16x8x2_t* %18 to i8* ? ; [#uses=1] > + ?call void @llvm.memcpy.i32(i8* %19, i8* %20, i32 32, i32 16) > + ?%tmp21 = bitcast %struct.int16x8x2_t* %tmp2 to i8* ; [#uses=1] > + ?%21 = bitcast %struct.int16x8x2_t* %0 to i8* ? ?; [#uses=1] > + ?call void @llvm.memcpy.i32(i8* %tmp21, i8* %21, i32 32, i32 16) > + ?%22 = load %struct.int16x8x2_t** %dst_addr, align 4 ; <%struct.int16x8x2_t*> [#uses=1] > + ?%23 = bitcast %struct.int16x8x2_t* %22 to i8* ? ; [#uses=1] > + ?%tmp22 = bitcast %struct.int16x8x2_t* %tmp2 to i8* ; [#uses=1] > + ?call void @llvm.memcpy.i32(i8* %23, i8* %tmp22, i32 32, i32 16) > + ?br label %return > + > +; CHECK: store <8 x i16> > +; CHECK: store <8 x i16> > + > +return: ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ; preds = %entry > + ?ret void > +} > + > +; Radar 7466574 > +%struct._NSRange = type { i64 } > + > +define arm_apcscc void @test_memcpy_self() nounwind { > +; CHECK: @test_memcpy_self > +; CHECK-NOT: alloca > +; CHECK: br i1 > +entry: > + ?%range = alloca %struct._NSRange ? ? ? ? ? ? ? ?; <%struct._NSRange*> [#uses=2] > + ?br i1 undef, label %cond.true, label %cond.false > + > +cond.true: ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?; preds = %entry > + ?%tmp3 = bitcast %struct._NSRange* %range to i8* ; [#uses=1] > + ?%tmp4 = bitcast %struct._NSRange* %range to i8* ; [#uses=1] > + ?call void @llvm.memcpy.i32(i8* %tmp3, i8* %tmp4, i32 8, i32 8) > + ?ret void > + > +cond.false: ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ; preds = %entry > + ?ret void > +} > + > +declare void @llvm.memcpy.i32(i8* nocapture, i8* nocapture, i32, i32) nounwind > > > _______________________________________________ > llvm-commits mailing list > llvm-commits at cs.uiuc.edu > http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits > From edwintorok at gmail.com Wed Dec 16 05:07:43 2009 From: edwintorok at gmail.com (=?ISO-8859-1?Q?T=F6r=F6k_Edwin?=) Date: Wed, 16 Dec 2009 13:07:43 +0200 Subject: [llvm-commits] [llvm] r91489 - in /llvm/trunk: lib/Target/X86/X86ISelLowering.cpp lib/Target/X86/X86Instr64bit.td lib/Target/X86/X86InstrInfo.td test/CodeGen/X86/setcc.ll In-Reply-To: <4B28B889.4060107@gmail.com> References: <200912160053.nBG0rDDv029906@zion.cs.uiuc.edu> <4B28B889.4060107@gmail.com> Message-ID: <4B28BF7F.3090306@gmail.com> On 2009-12-16 12:38, T?r?k Edwin wrote: > On 2009-12-16 02:53, Evan Cheng wrote: > >> Author: evancheng >> Date: Tue Dec 15 18:53:11 2009 >> New Revision: 91489 >> >> URL: http://llvm.org/viewvc/llvm-project?rev=91489&view=rev >> Log: >> Re-enable 91381 with fixes. >> >> Modified: >> llvm/trunk/lib/Target/X86/X86ISelLowering.cpp >> llvm/trunk/lib/Target/X86/X86Instr64bit.td >> llvm/trunk/lib/Target/X86/X86InstrInfo.td >> llvm/trunk/test/CodeGen/X86/setcc.ll >> >> Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp >> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=91489&r1=91488&r2=91489&view=diff >> >> ============================================================================== >> --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original) >> +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Tue Dec 15 18:53:11 2009 >> @@ -980,6 +980,7 @@ >> setTargetDAGCombine(ISD::SRL); >> setTargetDAGCombine(ISD::STORE); >> setTargetDAGCombine(ISD::MEMBARRIER); >> + setTargetDAGCombine(ISD::ZERO_EXTEND); >> >> > > Hi Evan, > > I noticed an infloop convertin and->zero_extend->and, could this patch > be the cause? > http://llvm.org/bugs/show_bug.cgi?id=5802 > git bisect tells me that SVN r91380 introduces the infloops: 5b6187226b44f590ce7f614b128480b9c2d823ef is the first bad commit commit 5b6187226b44f590ce7f614b128480b9c2d823ef Author: Evan Cheng Date: Tue Dec 15 00:52:11 2009 +0000 Fold (zext (and x, cst)) -> (and (zext x), cst). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk at 91380 91177308-0d34-0410-b5e6-96231b3b80d8 Best regards, --Edwin From daniel at zuster.org Wed Dec 16 05:38:03 2009 From: daniel at zuster.org (Daniel Dunbar) Date: Wed, 16 Dec 2009 11:38:03 -0000 Subject: [llvm-commits] [llvm] r91536 - /llvm/trunk/include/llvm/ADT/SmallVector.h Message-ID: <200912161138.nBGBc3rN009550@zion.cs.uiuc.edu> Author: ddunbar Date: Wed Dec 16 05:38:03 2009 New Revision: 91536 URL: http://llvm.org/viewvc/llvm-project?rev=91536&view=rev Log: Fix one more missing this-> to placate that picky clang++. Modified: llvm/trunk/include/llvm/ADT/SmallVector.h Modified: llvm/trunk/include/llvm/ADT/SmallVector.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/ADT/SmallVector.h?rev=91536&r1=91535&r2=91536&view=diff ============================================================================== --- llvm/trunk/include/llvm/ADT/SmallVector.h (original) +++ llvm/trunk/include/llvm/ADT/SmallVector.h Wed Dec 16 05:38:03 2009 @@ -218,7 +218,7 @@ if (!this->isSmall()) operator delete(this->begin()); - setEnd(NewElts+CurSize); + this->setEnd(NewElts+CurSize); this->BeginX = NewElts; this->CapacityX = this->begin()+NewCapacity; } From evan.cheng at apple.com Wed Dec 16 11:15:58 2009 From: evan.cheng at apple.com (Evan Cheng) Date: Wed, 16 Dec 2009 09:15:58 -0800 Subject: [llvm-commits] x86-64 miscompile In-Reply-To: <4635215F-70B3-4414-9B9F-52CE43810B4C@apple.com> References: <4635215F-70B3-4414-9B9F-52CE43810B4C@apple.com> Message-ID: <1194062E-4420-4808-A155-E0CBBFC65831@apple.com> Yes, that's what I saw before Daniel backed out a couple of patches. However, as of r91438, the miscompare is insn-emit.o. The differences were much more significant. Evan On Dec 16, 2009, at 1:35 AM, Chris Lattner wrote: > I suspect a scheduling nondeterminism. The file that miscompares is i386.o and the only difference is: > > 1c1 > < stage2-gcc/i386.o: > --- >> stage3-gcc/i386.o: > 22171,22172c22171,22172 > < 0000000000014a87 movq %rax,%rbx > < 0000000000014a8a movq %rbx,0xc8(%rbp) > --- >> 0000000000014a87 movq %rax,0xc8(%rbp) >> 0000000000014a8b movq %rax,%rbx > > It could also be a copy elimination or folding thing I guess. > > -Chris > _______________________________________________ > llvm-commits mailing list > llvm-commits at cs.uiuc.edu > http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits From evan.cheng at apple.com Wed Dec 16 11:17:34 2009 From: evan.cheng at apple.com (Evan Cheng) Date: Wed, 16 Dec 2009 09:17:34 -0800 Subject: [llvm-commits] [llvm] r91459 - in /llvm/trunk: lib/Transforms/Scalar/ScalarReplAggregates.cpp test/Transforms/ScalarRepl/2009-12-11-NeonTypes.ll In-Reply-To: <6a8523d60912160258p2d1b3458qcf76cabb26a1a5d0@mail.gmail.com> References: <200912152200.nBFM0qqN023245@zion.cs.uiuc.edu> <6a8523d60912160258p2d1b3458qcf76cabb26a1a5d0@mail.gmail.com> Message-ID: <0804613A-3474-48FE-99E4-A06AC16D629F@apple.com> I thought the problem started prior to 91459? Evan On Dec 16, 2009, at 2:58 AM, Daniel Dunbar wrote: > Hi Bob, > > I reverted this, it was one of the patches responsible for the failing > x86_64-apple-darwin10 bootstrap in my testing. > > - Daniel > > On Tue, Dec 15, 2009 at 2:00 PM, Bob Wilson wrote: >> Author: bwilson >> Date: Tue Dec 15 16:00:51 2009 >> New Revision: 91459 >> >> URL: http://llvm.org/viewvc/llvm-project?rev=91459&view=rev >> Log: >> Reapply 91184 with fixes and an addition to the testcase to cover the problem >> found last time. Instead of trying to modify the IR while iterating over it, >> I've change it to keep a list of WeakVH references to dead instructions, and >> then delete those instructions later. I also added some special case code to >> detect and handle the situation when both operands of a memcpy intrinsic are >> referencing the same alloca. >> >> Added: >> llvm/trunk/test/Transforms/ScalarRepl/2009-12-11-NeonTypes.ll >> Modified: >> llvm/trunk/lib/Transforms/Scalar/ScalarReplAggregates.cpp >> >> Modified: llvm/trunk/lib/Transforms/Scalar/ScalarReplAggregates.cpp >> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/ScalarReplAggregates.cpp?rev=91459&r1=91458&r2=91459&view=diff >> >> ============================================================================== >> --- llvm/trunk/lib/Transforms/Scalar/ScalarReplAggregates.cpp (original) >> +++ llvm/trunk/lib/Transforms/Scalar/ScalarReplAggregates.cpp Tue Dec 15 16:00:51 2009 >> @@ -74,6 +74,10 @@ >> private: >> TargetData *TD; >> >> + /// DeadInsts - Keep track of instructions we have made dead, so that >> + /// we can remove them after we are done working. >> + SmallVector DeadInsts; >> + >> /// AllocaInfo - When analyzing uses of an alloca instruction, this captures >> /// information about the uses. All these fields are initialized to false >> /// and set to true when something is learned. >> @@ -102,25 +106,30 @@ >> >> int isSafeAllocaToScalarRepl(AllocaInst *AI); >> >> - void isSafeUseOfAllocation(Instruction *User, AllocaInst *AI, >> - AllocaInfo &Info); >> - void isSafeElementUse(Value *Ptr, bool isFirstElt, AllocaInst *AI, >> - AllocaInfo &Info); >> - void isSafeMemIntrinsicOnAllocation(MemIntrinsic *MI, AllocaInst *AI, >> - unsigned OpNo, AllocaInfo &Info); >> - void isSafeUseOfBitCastedAllocation(BitCastInst *User, AllocaInst *AI, >> - AllocaInfo &Info); >> + void isSafeForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset, >> + uint64_t ArrayOffset, AllocaInfo &Info); >> + void isSafeGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t &Offset, >> + uint64_t &ArrayOffset, AllocaInfo &Info); >> + void isSafeMemAccess(AllocaInst *AI, uint64_t Offset, uint64_t ArrayOffset, >> + uint64_t MemSize, const Type *MemOpType, bool isStore, >> + AllocaInfo &Info); >> + bool TypeHasComponent(const Type *T, uint64_t Offset, uint64_t Size); >> + unsigned FindElementAndOffset(const Type *&T, uint64_t &Offset); >> >> void DoScalarReplacement(AllocaInst *AI, >> std::vector &WorkList); >> + void DeleteDeadInstructions(); >> void CleanupGEP(GetElementPtrInst *GEP); >> - void CleanupAllocaUsers(AllocaInst *AI); >> + void CleanupAllocaUsers(Value *V); >> AllocaInst *AddNewAlloca(Function &F, const Type *Ty, AllocaInst *Base); >> >> - void RewriteBitCastUserOfAlloca(Instruction *BCInst, AllocaInst *AI, >> - SmallVector &NewElts); >> - >> - void RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *BCInst, >> + void RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset, >> + SmallVector &NewElts); >> + void RewriteBitCast(BitCastInst *BC, AllocaInst *AI, uint64_t Offset, >> + SmallVector &NewElts); >> + void RewriteGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t Offset, >> + SmallVector &NewElts); >> + void RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst, >> AllocaInst *AI, >> SmallVector &NewElts); >> void RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI, >> @@ -360,176 +369,37 @@ >> } >> } >> >> - // Now that we have created the alloca instructions that we want to use, >> - // expand the getelementptr instructions to use them. >> - while (!AI->use_empty()) { >> - Instruction *User = cast(AI->use_back()); >> - if (BitCastInst *BCInst = dyn_cast(User)) { >> - RewriteBitCastUserOfAlloca(BCInst, AI, ElementAllocas); >> - BCInst->eraseFromParent(); >> - continue; >> - } >> - >> - // Replace: >> - // %res = load { i32, i32 }* %alloc >> - // with: >> - // %load.0 = load i32* %alloc.0 >> - // %insert.0 insertvalue { i32, i32 } zeroinitializer, i32 %load.0, 0 >> - // %load.1 = load i32* %alloc.1 >> - // %insert = insertvalue { i32, i32 } %insert.0, i32 %load.1, 1 >> - // (Also works for arrays instead of structs) >> - if (LoadInst *LI = dyn_cast(User)) { >> - Value *Insert = UndefValue::get(LI->getType()); >> - for (unsigned i = 0, e = ElementAllocas.size(); i != e; ++i) { >> - Value *Load = new LoadInst(ElementAllocas[i], "load", LI); >> - Insert = InsertValueInst::Create(Insert, Load, i, "insert", LI); >> - } >> - LI->replaceAllUsesWith(Insert); >> - LI->eraseFromParent(); >> - continue; >> - } >> + // Now that we have created the new alloca instructions, rewrite all the >> + // uses of the old alloca. >> + DeadInsts.push_back(AI); >> + RewriteForScalarRepl(AI, AI, 0, ElementAllocas); >> >> - // Replace: >> - // store { i32, i32 } %val, { i32, i32 }* %alloc >> - // with: >> - // %val.0 = extractvalue { i32, i32 } %val, 0 >> - // store i32 %val.0, i32* %alloc.0 >> - // %val.1 = extractvalue { i32, i32 } %val, 1 >> - // store i32 %val.1, i32* %alloc.1 >> - // (Also works for arrays instead of structs) >> - if (StoreInst *SI = dyn_cast(User)) { >> - Value *Val = SI->getOperand(0); >> - for (unsigned i = 0, e = ElementAllocas.size(); i != e; ++i) { >> - Value *Extract = ExtractValueInst::Create(Val, i, Val->getName(), SI); >> - new StoreInst(Extract, ElementAllocas[i], SI); >> - } >> - SI->eraseFromParent(); >> - continue; >> - } >> - >> - GetElementPtrInst *GEPI = cast(User); >> - // We now know that the GEP is of the form: GEP , 0, >> - unsigned Idx = >> - (unsigned)cast(GEPI->getOperand(2))->getZExtValue(); >> - >> - assert(Idx < ElementAllocas.size() && "Index out of range?"); >> - AllocaInst *AllocaToUse = ElementAllocas[Idx]; >> - >> - Value *RepValue; >> - if (GEPI->getNumOperands() == 3) { >> - // Do not insert a new getelementptr instruction with zero indices, only >> - // to have it optimized out later. >> - RepValue = AllocaToUse; >> - } else { >> - // We are indexing deeply into the structure, so we still need a >> - // getelement ptr instruction to finish the indexing. This may be >> - // expanded itself once the worklist is rerun. >> - // >> - SmallVector NewArgs; >> - NewArgs.push_back(Constant::getNullValue( >> - Type::getInt32Ty(AI->getContext()))); >> - NewArgs.append(GEPI->op_begin()+3, GEPI->op_end()); >> - RepValue = GetElementPtrInst::Create(AllocaToUse, NewArgs.begin(), >> - NewArgs.end(), "", GEPI); >> - RepValue->takeName(GEPI); >> - } >> - >> - // If this GEP is to the start of the aggregate, check for memcpys. >> - if (Idx == 0 && GEPI->hasAllZeroIndices()) >> - RewriteBitCastUserOfAlloca(GEPI, AI, ElementAllocas); >> - >> - // Move all of the users over to the new GEP. >> - GEPI->replaceAllUsesWith(RepValue); >> - // Delete the old GEP >> - GEPI->eraseFromParent(); >> - } >> + // Now erase any instructions that were made dead while rewriting the alloca. >> + DeleteDeadInstructions(); >> >> - // Finally, delete the Alloca instruction >> - AI->eraseFromParent(); >> NumReplaced++; >> } >> >> -/// isSafeElementUse - Check to see if this use is an allowed use for a >> -/// getelementptr instruction of an array aggregate allocation. isFirstElt >> -/// indicates whether Ptr is known to the start of the aggregate. >> -void SROA::isSafeElementUse(Value *Ptr, bool isFirstElt, AllocaInst *AI, >> - AllocaInfo &Info) { >> - for (Value::use_iterator I = Ptr->use_begin(), E = Ptr->use_end(); >> - I != E; ++I) { >> - Instruction *User = cast(*I); >> - switch (User->getOpcode()) { >> - case Instruction::Load: break; >> - case Instruction::Store: >> - // Store is ok if storing INTO the pointer, not storing the pointer >> - if (User->getOperand(0) == Ptr) return MarkUnsafe(Info); >> - break; >> - case Instruction::GetElementPtr: { >> - GetElementPtrInst *GEP = cast(User); >> - bool AreAllZeroIndices = isFirstElt; >> - if (GEP->getNumOperands() > 1 && >> - (!isa(GEP->getOperand(1)) || >> - !cast(GEP->getOperand(1))->isZero())) >> - // Using pointer arithmetic to navigate the array. >> - return MarkUnsafe(Info); >> - >> - // Verify that any array subscripts are in range. >> - for (gep_type_iterator GEPIt = gep_type_begin(GEP), >> - E = gep_type_end(GEP); GEPIt != E; ++GEPIt) { >> - // Ignore struct elements, no extra checking needed for these. >> - if (isa(*GEPIt)) >> - continue; >> - >> - // This GEP indexes an array. Verify that this is an in-range >> - // constant integer. Specifically, consider A[0][i]. We cannot know that >> - // the user isn't doing invalid things like allowing i to index an >> - // out-of-range subscript that accesses A[1]. Because of this, we have >> - // to reject SROA of any accesses into structs where any of the >> - // components are variables. >> - ConstantInt *IdxVal = dyn_cast(GEPIt.getOperand()); >> - if (!IdxVal) return MarkUnsafe(Info); >> - >> - // Are all indices still zero? >> - AreAllZeroIndices &= IdxVal->isZero(); >> - >> - if (const ArrayType *AT = dyn_cast(*GEPIt)) { >> - if (IdxVal->getZExtValue() >= AT->getNumElements()) >> - return MarkUnsafe(Info); >> - } else if (const VectorType *VT = dyn_cast(*GEPIt)) { >> - if (IdxVal->getZExtValue() >= VT->getNumElements()) >> - return MarkUnsafe(Info); >> - } >> +/// DeleteDeadInstructions - Erase instructions on the DeadInstrs list, >> +/// recursively including all their operands that become trivially dead. >> +void SROA::DeleteDeadInstructions() { >> + while (!DeadInsts.empty()) { >> + Instruction *I = dyn_cast_or_null(DeadInsts.pop_back_val()); >> + if (I == 0) >> + continue; >> + >> + for (User::op_iterator OI = I->op_begin(), E = I->op_end(); OI != E; ++OI) >> + if (Instruction *U = dyn_cast(*OI)) { >> + // Zero out the operand and see if it becomes trivially dead. >> + *OI = 0; >> + if (isInstructionTriviallyDead(U)) >> + DeadInsts.push_back(U); >> } >> - >> - isSafeElementUse(GEP, AreAllZeroIndices, AI, Info); >> - if (Info.isUnsafe) return; >> - break; >> - } >> - case Instruction::BitCast: >> - if (isFirstElt) { >> - isSafeUseOfBitCastedAllocation(cast(User), AI, Info); >> - if (Info.isUnsafe) return; >> - break; >> - } >> - DEBUG(errs() << " Transformation preventing inst: " << *User << '\n'); >> - return MarkUnsafe(Info); >> - case Instruction::Call: >> - if (MemIntrinsic *MI = dyn_cast(User)) { >> - if (isFirstElt) { >> - isSafeMemIntrinsicOnAllocation(MI, AI, I.getOperandNo(), Info); >> - if (Info.isUnsafe) return; >> - break; >> - } >> - } >> - DEBUG(errs() << " Transformation preventing inst: " << *User << '\n'); >> - return MarkUnsafe(Info); >> - default: >> - DEBUG(errs() << " Transformation preventing inst: " << *User << '\n'); >> - return MarkUnsafe(Info); >> - } >> + >> + I->eraseFromParent(); >> } >> - return; // All users look ok :) >> } >> - >> + >> /// AllUsersAreLoads - Return true if all users of this value are loads. >> static bool AllUsersAreLoads(Value *Ptr) { >> for (Value::use_iterator I = Ptr->use_begin(), E = Ptr->use_end(); >> @@ -539,72 +409,116 @@ >> return true; >> } >> >> -/// isSafeUseOfAllocation - Check if this user is an allowed use for an >> -/// aggregate allocation. >> -void SROA::isSafeUseOfAllocation(Instruction *User, AllocaInst *AI, >> - AllocaInfo &Info) { >> - if (BitCastInst *C = dyn_cast(User)) >> - return isSafeUseOfBitCastedAllocation(C, AI, Info); >> - >> - if (LoadInst *LI = dyn_cast(User)) >> - if (!LI->isVolatile()) >> - return;// Loads (returning a first class aggregrate) are always rewritable >> - >> - if (StoreInst *SI = dyn_cast(User)) >> - if (!SI->isVolatile() && SI->getOperand(0) != AI) >> - return;// Store is ok if storing INTO the pointer, not storing the pointer >> - >> - GetElementPtrInst *GEPI = dyn_cast(User); >> - if (GEPI == 0) >> - return MarkUnsafe(Info); >> - >> - gep_type_iterator I = gep_type_begin(GEPI), E = gep_type_end(GEPI); >> +/// isSafeForScalarRepl - Check if instruction I is a safe use with regard to >> +/// performing scalar replacement of alloca AI. The results are flagged in >> +/// the Info parameter. Offset and ArrayOffset indicate the position within >> +/// AI that is referenced by this instruction. >> +void SROA::isSafeForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset, >> + uint64_t ArrayOffset, AllocaInfo &Info) { >> + for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI!=E; ++UI) { >> + Instruction *User = cast(*UI); >> >> - // The GEP is not safe to transform if not of the form "GEP , 0, ". >> - if (I == E || >> - I.getOperand() != Constant::getNullValue(I.getOperand()->getType())) { >> - return MarkUnsafe(Info); >> + if (BitCastInst *BC = dyn_cast(User)) { >> + isSafeForScalarRepl(BC, AI, Offset, ArrayOffset, Info); >> + } else if (GetElementPtrInst *GEPI = dyn_cast(User)) { >> + uint64_t GEPArrayOffset = ArrayOffset; >> + uint64_t GEPOffset = Offset; >> + isSafeGEP(GEPI, AI, GEPOffset, GEPArrayOffset, Info); >> + if (!Info.isUnsafe) >> + isSafeForScalarRepl(GEPI, AI, GEPOffset, GEPArrayOffset, Info); >> + } else if (MemIntrinsic *MI = dyn_cast(UI)) { >> + ConstantInt *Length = dyn_cast(MI->getLength()); >> + if (Length) >> + isSafeMemAccess(AI, Offset, ArrayOffset, Length->getZExtValue(), 0, >> + UI.getOperandNo() == 1, Info); >> + else >> + MarkUnsafe(Info); >> + } else if (LoadInst *LI = dyn_cast(User)) { >> + if (!LI->isVolatile()) { >> + const Type *LIType = LI->getType(); >> + isSafeMemAccess(AI, Offset, ArrayOffset, TD->getTypeAllocSize(LIType), >> + LIType, false, Info); >> + } else >> + MarkUnsafe(Info); >> + } else if (StoreInst *SI = dyn_cast(User)) { >> + // Store is ok if storing INTO the pointer, not storing the pointer >> + if (!SI->isVolatile() && SI->getOperand(0) != I) { >> + const Type *SIType = SI->getOperand(0)->getType(); >> + isSafeMemAccess(AI, Offset, ArrayOffset, TD->getTypeAllocSize(SIType), >> + SIType, true, Info); >> + } else >> + MarkUnsafe(Info); >> + } else if (isa(UI)) { >> + // If one user is DbgInfoIntrinsic then check if all users are >> + // DbgInfoIntrinsics. >> + if (OnlyUsedByDbgInfoIntrinsics(I)) { >> + Info.needsCleanup = true; >> + return; >> + } >> + MarkUnsafe(Info); >> + } else { >> + DEBUG(errs() << " Transformation preventing inst: " << *User << '\n'); >> + MarkUnsafe(Info); >> + } >> + if (Info.isUnsafe) return; >> } >> +} >> >> - ++I; >> - if (I == E) return MarkUnsafe(Info); // ran out of GEP indices?? >> +/// isSafeGEP - Check if a GEP instruction can be handled for scalar >> +/// replacement. It is safe when all the indices are constant, in-bounds >> +/// references, and when the resulting offset corresponds to an element within >> +/// the alloca type. The results are flagged in the Info parameter. Upon >> +/// return, Offset is adjusted as specified by the GEP indices. For the >> +/// special case of a variable index to a 2-element array, ArrayOffset is set >> +/// to the array element size. >> +void SROA::isSafeGEP(GetElementPtrInst *GEPI, AllocaInst *AI, >> + uint64_t &Offset, uint64_t &ArrayOffset, >> + AllocaInfo &Info) { >> + gep_type_iterator GEPIt = gep_type_begin(GEPI), E = gep_type_end(GEPI); >> + if (GEPIt == E) >> + return; >> + >> + // The first GEP index must be zero. >> + if (!isa(GEPIt.getOperand()) || >> + !cast(GEPIt.getOperand())->isZero()) >> + return MarkUnsafe(Info); >> + if (++GEPIt == E) >> + return; >> >> - bool IsAllZeroIndices = true; >> - >> // If the first index is a non-constant index into an array, see if we can >> // handle it as a special case. >> - if (const ArrayType *AT = dyn_cast(*I)) { >> - if (!isa(I.getOperand())) { >> - IsAllZeroIndices = 0; >> - uint64_t NumElements = AT->getNumElements(); >> - >> - // If this is an array index and the index is not constant, we cannot >> - // promote... that is unless the array has exactly one or two elements in >> - // it, in which case we CAN promote it, but we have to canonicalize this >> - // out if this is the only problem. >> - if ((NumElements == 1 || NumElements == 2) && >> - AllUsersAreLoads(GEPI)) { >> + const Type *ArrayEltTy = 0; >> + if (ArrayOffset == 0 && Offset == 0) { >> + if (const ArrayType *AT = dyn_cast(*GEPIt)) { >> + if (!isa(GEPIt.getOperand())) { >> + uint64_t NumElements = AT->getNumElements(); >> + >> + // If this is an array index and the index is not constant, we cannot >> + // promote... that is unless the array has exactly one or two elements >> + // in it, in which case we CAN promote it, but we have to canonicalize >> + // this out if this is the only problem. >> + if ((NumElements != 1 && NumElements != 2) || !AllUsersAreLoads(GEPI)) >> + return MarkUnsafe(Info); >> Info.needsCleanup = true; >> - return; // Canonicalization required! >> + ArrayOffset = TD->getTypeAllocSizeInBits(AT->getElementType()); >> + ArrayEltTy = AT->getElementType(); >> + ++GEPIt; >> } >> - return MarkUnsafe(Info); >> } >> } >> - >> + >> // Walk through the GEP type indices, checking the types that this indexes >> // into. >> - for (; I != E; ++I) { >> + for (; GEPIt != E; ++GEPIt) { >> // Ignore struct elements, no extra checking needed for these. >> - if (isa(*I)) >> + if (isa(*GEPIt)) >> continue; >> - >> - ConstantInt *IdxVal = dyn_cast(I.getOperand()); >> - if (!IdxVal) return MarkUnsafe(Info); >> >> - // Are all indices still zero? >> - IsAllZeroIndices &= IdxVal->isZero(); >> - >> - if (const ArrayType *AT = dyn_cast(*I)) { >> + ConstantInt *IdxVal = dyn_cast(GEPIt.getOperand()); >> + if (!IdxVal) >> + return MarkUnsafe(Info); >> + >> + if (const ArrayType *AT = dyn_cast(*GEPIt)) { >> // This GEP indexes an array. Verify that this is an in-range constant >> // integer. Specifically, consider A[0][i]. We cannot know that the user >> // isn't doing invalid things like allowing i to index an out-of-range >> @@ -612,147 +526,255 @@ >> // of any accesses into structs where any of the components are variables. >> if (IdxVal->getZExtValue() >= AT->getNumElements()) >> return MarkUnsafe(Info); >> - } else if (const VectorType *VT = dyn_cast(*I)) { >> + } else { >> + const VectorType *VT = dyn_cast(*GEPIt); >> + assert(VT && "unexpected type in GEP type iterator"); >> if (IdxVal->getZExtValue() >= VT->getNumElements()) >> return MarkUnsafe(Info); >> } >> } >> - >> - // If there are any non-simple uses of this getelementptr, make sure to reject >> - // them. >> - return isSafeElementUse(GEPI, IsAllZeroIndices, AI, Info); >> + >> + // All the indices are safe. Now compute the offset due to this GEP and >> + // check if the alloca has a component element at that offset. >> + if (ArrayOffset == 0) { >> + SmallVector Indices(GEPI->op_begin() + 1, GEPI->op_end()); >> + Offset += TD->getIndexedOffset(GEPI->getPointerOperandType(), >> + &Indices[0], Indices.size()); >> + } else { >> + // Both array elements have the same type, so it suffices to check one of >> + // them. Copy the GEP indices starting from the array index, but replace >> + // that variable index with a constant zero. >> + SmallVector Indices(GEPI->op_begin() + 2, GEPI->op_end()); >> + Indices[0] = Constant::getNullValue(Type::getInt32Ty(GEPI->getContext())); >> + const Type *ArrayEltPtr = PointerType::getUnqual(ArrayEltTy); >> + Offset += TD->getIndexedOffset(ArrayEltPtr, &Indices[0], Indices.size()); >> + } >> + if (!TypeHasComponent(AI->getAllocatedType(), Offset, 0)) >> + MarkUnsafe(Info); >> +} >> + >> +/// isSafeMemAccess - Check if a load/store/memcpy operates on the entire AI >> +/// alloca or has an offset and size that corresponds to a component element >> +/// within it. The offset checked here may have been formed from a GEP with a >> +/// pointer bitcasted to a different type. >> +void SROA::isSafeMemAccess(AllocaInst *AI, uint64_t Offset, >> + uint64_t ArrayOffset, uint64_t MemSize, >> + const Type *MemOpType, bool isStore, >> + AllocaInfo &Info) { >> + // Check if this is a load/store of the entire alloca. >> + if (Offset == 0 && ArrayOffset == 0 && >> + MemSize == TD->getTypeAllocSize(AI->getAllocatedType())) { >> + bool UsesAggregateType = (MemOpType == AI->getAllocatedType()); >> + // This is safe for MemIntrinsics (where MemOpType is 0), integer types >> + // (which are essentially the same as the MemIntrinsics, especially with >> + // regard to copying padding between elements), or references using the >> + // aggregate type of the alloca. >> + if (!MemOpType || isa(MemOpType) || UsesAggregateType) { >> + if (!UsesAggregateType) { >> + if (isStore) >> + Info.isMemCpyDst = true; >> + else >> + Info.isMemCpySrc = true; >> + } >> + return; >> + } >> + } >> + // Check if the offset/size correspond to a component within the alloca type. >> + const Type *T = AI->getAllocatedType(); >> + if (TypeHasComponent(T, Offset, MemSize) && >> + (ArrayOffset == 0 || TypeHasComponent(T, Offset + ArrayOffset, MemSize))) >> + return; >> + >> + return MarkUnsafe(Info); >> } >> >> -/// isSafeMemIntrinsicOnAllocation - Check if the specified memory >> -/// intrinsic can be promoted by SROA. At this point, we know that the operand >> -/// of the memintrinsic is a pointer to the beginning of the allocation. >> -void SROA::isSafeMemIntrinsicOnAllocation(MemIntrinsic *MI, AllocaInst *AI, >> - unsigned OpNo, AllocaInfo &Info) { >> - // If not constant length, give up. >> - ConstantInt *Length = dyn_cast(MI->getLength()); >> - if (!Length) return MarkUnsafe(Info); >> - >> - // If not the whole aggregate, give up. >> - if (Length->getZExtValue() != >> - TD->getTypeAllocSize(AI->getType()->getElementType())) >> - return MarkUnsafe(Info); >> - >> - // We only know about memcpy/memset/memmove. >> - if (!isa(MI)) >> - return MarkUnsafe(Info); >> - >> - // Otherwise, we can transform it. Determine whether this is a memcpy/set >> - // into or out of the aggregate. >> - if (OpNo == 1) >> - Info.isMemCpyDst = true; >> - else { >> - assert(OpNo == 2); >> - Info.isMemCpySrc = true; >> +/// TypeHasComponent - Return true if T has a component type with the >> +/// specified offset and size. If Size is zero, do not check the size. >> +bool SROA::TypeHasComponent(const Type *T, uint64_t Offset, uint64_t Size) { >> + const Type *EltTy; >> + uint64_t EltSize; >> + if (const StructType *ST = dyn_cast(T)) { >> + const StructLayout *Layout = TD->getStructLayout(ST); >> + unsigned EltIdx = Layout->getElementContainingOffset(Offset); >> + EltTy = ST->getContainedType(EltIdx); >> + EltSize = TD->getTypeAllocSize(EltTy); >> + Offset -= Layout->getElementOffset(EltIdx); >> + } else if (const ArrayType *AT = dyn_cast(T)) { >> + EltTy = AT->getElementType(); >> + EltSize = TD->getTypeAllocSize(EltTy); >> + Offset %= EltSize; >> + } else { >> + return false; >> } >> + if (Offset == 0 && (Size == 0 || EltSize == Size)) >> + return true; >> + // Check if the component spans multiple elements. >> + if (Offset + Size > EltSize) >> + return false; >> + return TypeHasComponent(EltTy, Offset, Size); >> } >> >> -/// isSafeUseOfBitCastedAllocation - Check if all users of this bitcast >> -/// from an alloca are safe for SROA of that alloca. >> -void SROA::isSafeUseOfBitCastedAllocation(BitCastInst *BC, AllocaInst *AI, >> - AllocaInfo &Info) { >> - for (Value::use_iterator UI = BC->use_begin(), E = BC->use_end(); >> - UI != E; ++UI) { >> - if (BitCastInst *BCU = dyn_cast(UI)) { >> - isSafeUseOfBitCastedAllocation(BCU, AI, Info); >> - } else if (MemIntrinsic *MI = dyn_cast(UI)) { >> - isSafeMemIntrinsicOnAllocation(MI, AI, UI.getOperandNo(), Info); >> - } else if (StoreInst *SI = dyn_cast(UI)) { >> - if (SI->isVolatile()) >> - return MarkUnsafe(Info); >> - >> - // If storing the entire alloca in one chunk through a bitcasted pointer >> - // to integer, we can transform it. This happens (for example) when you >> - // cast a {i32,i32}* to i64* and store through it. This is similar to the >> - // memcpy case and occurs in various "byval" cases and emulated memcpys. >> - if (isa(SI->getOperand(0)->getType()) && >> - TD->getTypeAllocSize(SI->getOperand(0)->getType()) == >> - TD->getTypeAllocSize(AI->getType()->getElementType())) { >> - Info.isMemCpyDst = true; >> - continue; >> - } >> - return MarkUnsafe(Info); >> - } else if (LoadInst *LI = dyn_cast(UI)) { >> - if (LI->isVolatile()) >> - return MarkUnsafe(Info); >> +/// RewriteForScalarRepl - Alloca AI is being split into NewElts, so rewrite >> +/// the instruction I, which references it, to use the separate elements. >> +/// Offset indicates the position within AI that is referenced by this >> +/// instruction. >> +void SROA::RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset, >> + SmallVector &NewElts) { >> + for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI!=E; ++UI) { >> + Instruction *User = cast(*UI); >> >> - // If loading the entire alloca in one chunk through a bitcasted pointer >> - // to integer, we can transform it. This happens (for example) when you >> - // cast a {i32,i32}* to i64* and load through it. This is similar to the >> - // memcpy case and occurs in various "byval" cases and emulated memcpys. >> - if (isa(LI->getType()) && >> - TD->getTypeAllocSize(LI->getType()) == >> - TD->getTypeAllocSize(AI->getType()->getElementType())) { >> - Info.isMemCpySrc = true; >> - continue; >> + if (BitCastInst *BC = dyn_cast(User)) { >> + RewriteBitCast(BC, AI, Offset, NewElts); >> + } else if (GetElementPtrInst *GEPI = dyn_cast(User)) { >> + RewriteGEP(GEPI, AI, Offset, NewElts); >> + } else if (MemIntrinsic *MI = dyn_cast(User)) { >> + ConstantInt *Length = dyn_cast(MI->getLength()); >> + uint64_t MemSize = Length->getZExtValue(); >> + if (Offset == 0 && >> + MemSize == TD->getTypeAllocSize(AI->getAllocatedType())) >> + RewriteMemIntrinUserOfAlloca(MI, I, AI, NewElts); >> + } else if (LoadInst *LI = dyn_cast(User)) { >> + const Type *LIType = LI->getType(); >> + if (LIType == AI->getAllocatedType()) { >> + // Replace: >> + // %res = load { i32, i32 }* %alloc >> + // with: >> + // %load.0 = load i32* %alloc.0 >> + // %insert.0 insertvalue { i32, i32 } zeroinitializer, i32 %load.0, 0 >> + // %load.1 = load i32* %alloc.1 >> + // %insert = insertvalue { i32, i32 } %insert.0, i32 %load.1, 1 >> + // (Also works for arrays instead of structs) >> + Value *Insert = UndefValue::get(LIType); >> + for (unsigned i = 0, e = NewElts.size(); i != e; ++i) { >> + Value *Load = new LoadInst(NewElts[i], "load", LI); >> + Insert = InsertValueInst::Create(Insert, Load, i, "insert", LI); >> + } >> + LI->replaceAllUsesWith(Insert); >> + DeadInsts.push_back(LI); >> + } else if (isa(LIType) && >> + TD->getTypeAllocSize(LIType) == >> + TD->getTypeAllocSize(AI->getAllocatedType())) { >> + // If this is a load of the entire alloca to an integer, rewrite it. >> + RewriteLoadUserOfWholeAlloca(LI, AI, NewElts); >> } >> - return MarkUnsafe(Info); >> - } else if (isa(UI)) { >> - // If one user is DbgInfoIntrinsic then check if all users are >> - // DbgInfoIntrinsics. >> - if (OnlyUsedByDbgInfoIntrinsics(BC)) { >> - Info.needsCleanup = true; >> - return; >> + } else if (StoreInst *SI = dyn_cast(User)) { >> + Value *Val = SI->getOperand(0); >> + const Type *SIType = Val->getType(); >> + if (SIType == AI->getAllocatedType()) { >> + // Replace: >> + // store { i32, i32 } %val, { i32, i32 }* %alloc >> + // with: >> + // %val.0 = extractvalue { i32, i32 } %val, 0 >> + // store i32 %val.0, i32* %alloc.0 >> + // %val.1 = extractvalue { i32, i32 } %val, 1 >> + // store i32 %val.1, i32* %alloc.1 >> + // (Also works for arrays instead of structs) >> + for (unsigned i = 0, e = NewElts.size(); i != e; ++i) { >> + Value *Extract = ExtractValueInst::Create(Val, i, Val->getName(), SI); >> + new StoreInst(Extract, NewElts[i], SI); >> + } >> + DeadInsts.push_back(SI); >> + } else if (isa(SIType) && >> + TD->getTypeAllocSize(SIType) == >> + TD->getTypeAllocSize(AI->getAllocatedType())) { >> + // If this is a store of the entire alloca from an integer, rewrite it. >> + RewriteStoreUserOfWholeAlloca(SI, AI, NewElts); >> } >> - else >> - MarkUnsafe(Info); >> } >> - else { >> - return MarkUnsafe(Info); >> - } >> - if (Info.isUnsafe) return; >> } >> } >> >> -/// RewriteBitCastUserOfAlloca - BCInst (transitively) bitcasts AI, or indexes >> -/// to its first element. Transform users of the cast to use the new values >> -/// instead. >> -void SROA::RewriteBitCastUserOfAlloca(Instruction *BCInst, AllocaInst *AI, >> - SmallVector &NewElts) { >> - Value::use_iterator UI = BCInst->use_begin(), UE = BCInst->use_end(); >> - while (UI != UE) { >> - Instruction *User = cast(*UI++); >> - if (BitCastInst *BCU = dyn_cast(User)) { >> - RewriteBitCastUserOfAlloca(BCU, AI, NewElts); >> - if (BCU->use_empty()) BCU->eraseFromParent(); >> - continue; >> - } >> +/// RewriteBitCast - Update a bitcast reference to the alloca being replaced >> +/// and recursively continue updating all of its uses. >> +void SROA::RewriteBitCast(BitCastInst *BC, AllocaInst *AI, uint64_t Offset, >> + SmallVector &NewElts) { >> + RewriteForScalarRepl(BC, AI, Offset, NewElts); >> + if (BC->getOperand(0) != AI) >> + return; >> >> - if (MemIntrinsic *MI = dyn_cast(User)) { >> - // This must be memcpy/memmove/memset of the entire aggregate. >> - // Split into one per element. >> - RewriteMemIntrinUserOfAlloca(MI, BCInst, AI, NewElts); >> - continue; >> - } >> - >> - if (StoreInst *SI = dyn_cast(User)) { >> - // If this is a store of the entire alloca from an integer, rewrite it. >> - RewriteStoreUserOfWholeAlloca(SI, AI, NewElts); >> - continue; >> - } >> + // The bitcast references the original alloca. Replace its uses with >> + // references to the first new element alloca. >> + Instruction *Val = NewElts[0]; >> + if (Val->getType() != BC->getDestTy()) { >> + Val = new BitCastInst(Val, BC->getDestTy(), "", BC); >> + Val->takeName(BC); >> + } >> + BC->replaceAllUsesWith(Val); >> + DeadInsts.push_back(BC); >> +} >> + >> +/// FindElementAndOffset - Return the index of the element containing Offset >> +/// within the specified type, which must be either a struct or an array. >> +/// Sets T to the type of the element and Offset to the offset within that >> +/// element. >> +unsigned SROA::FindElementAndOffset(const Type *&T, uint64_t &Offset) { >> + unsigned Idx = 0; >> + if (const StructType *ST = dyn_cast(T)) { >> + const StructLayout *Layout = TD->getStructLayout(ST); >> + Idx = Layout->getElementContainingOffset(Offset); >> + T = ST->getContainedType(Idx); >> + Offset -= Layout->getElementOffset(Idx); >> + } else { >> + const ArrayType *AT = dyn_cast(T); >> + assert(AT && "unexpected type for scalar replacement"); >> + T = AT->getElementType(); >> + uint64_t EltSize = TD->getTypeAllocSize(T); >> + Idx = (unsigned)(Offset / EltSize); >> + Offset -= Idx * EltSize; >> + } >> + return Idx; >> +} >> + >> +/// RewriteGEP - Check if this GEP instruction moves the pointer across >> +/// elements of the alloca that are being split apart, and if so, rewrite >> +/// the GEP to be relative to the new element. >> +void SROA::RewriteGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t Offset, >> + SmallVector &NewElts) { >> + uint64_t OldOffset = Offset; >> + SmallVector Indices(GEPI->op_begin() + 1, GEPI->op_end()); >> + Offset += TD->getIndexedOffset(GEPI->getPointerOperandType(), >> + &Indices[0], Indices.size()); >> + >> + RewriteForScalarRepl(GEPI, AI, Offset, NewElts); >> + >> + const Type *T = AI->getAllocatedType(); >> + unsigned OldIdx = FindElementAndOffset(T, OldOffset); >> + if (GEPI->getOperand(0) == AI) >> + OldIdx = ~0U; // Force the GEP to be rewritten. >> + >> + T = AI->getAllocatedType(); >> + uint64_t EltOffset = Offset; >> + unsigned Idx = FindElementAndOffset(T, EltOffset); >> + >> + // If this GEP does not move the pointer across elements of the alloca >> + // being split, then it does not needs to be rewritten. >> + if (Idx == OldIdx) >> + return; >> >> - if (LoadInst *LI = dyn_cast(User)) { >> - // If this is a load of the entire alloca to an integer, rewrite it. >> - RewriteLoadUserOfWholeAlloca(LI, AI, NewElts); >> - continue; >> - } >> - >> - // Otherwise it must be some other user of a gep of the first pointer. Just >> - // leave these alone. >> - continue; >> - } >> + const Type *i32Ty = Type::getInt32Ty(AI->getContext()); >> + SmallVector NewArgs; >> + NewArgs.push_back(Constant::getNullValue(i32Ty)); >> + while (EltOffset != 0) { >> + unsigned EltIdx = FindElementAndOffset(T, EltOffset); >> + NewArgs.push_back(ConstantInt::get(i32Ty, EltIdx)); >> + } >> + Instruction *Val = NewElts[Idx]; >> + if (NewArgs.size() > 1) { >> + Val = GetElementPtrInst::CreateInBounds(Val, NewArgs.begin(), >> + NewArgs.end(), "", GEPI); >> + Val->takeName(GEPI); >> + } >> + if (Val->getType() != GEPI->getType()) >> + Val = new BitCastInst(Val, GEPI->getType(), Val->getNameStr(), GEPI); >> + GEPI->replaceAllUsesWith(Val); >> + DeadInsts.push_back(GEPI); >> } >> >> /// RewriteMemIntrinUserOfAlloca - MI is a memcpy/memset/memmove from or to AI. >> /// Rewrite it to copy or set the elements of the scalarized memory. >> -void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *BCInst, >> +void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst, >> AllocaInst *AI, >> SmallVector &NewElts) { >> - >> // If this is a memcpy/memmove, construct the other pointer as the >> // appropriate type. The "Other" pointer is the pointer that goes to memory >> // that doesn't have anything to do with the alloca that we are promoting. For >> @@ -761,28 +783,41 @@ >> LLVMContext &Context = MI->getContext(); >> unsigned MemAlignment = MI->getAlignment(); >> if (MemTransferInst *MTI = dyn_cast(MI)) { // memmove/memcopy >> - if (BCInst == MTI->getRawDest()) >> + if (Inst == MTI->getRawDest()) >> OtherPtr = MTI->getRawSource(); >> else { >> - assert(BCInst == MTI->getRawSource()); >> + assert(Inst == MTI->getRawSource()); >> OtherPtr = MTI->getRawDest(); >> } >> } >> >> - // Keep track of the other intrinsic argument, so it can be removed if it >> - // is dead when the intrinsic is replaced. >> - Value *PossiblyDead = OtherPtr; >> - >> // If there is an other pointer, we want to convert it to the same pointer >> // type as AI has, so we can GEP through it safely. >> if (OtherPtr) { >> - // It is likely that OtherPtr is a bitcast, if so, remove it. >> - if (BitCastInst *BC = dyn_cast(OtherPtr)) >> - OtherPtr = BC->getOperand(0); >> - // All zero GEPs are effectively bitcasts. >> - if (GetElementPtrInst *GEP = dyn_cast(OtherPtr)) >> - if (GEP->hasAllZeroIndices()) >> - OtherPtr = GEP->getOperand(0); >> + >> + // Remove bitcasts and all-zero GEPs from OtherPtr. This is an >> + // optimization, but it's also required to detect the corner case where >> + // both pointer operands are referencing the same memory, and where >> + // OtherPtr may be a bitcast or GEP that currently being rewritten. (This >> + // function is only called for mem intrinsics that access the whole >> + // aggregate, so non-zero GEPs are not an issue here.) >> + while (1) { >> + if (BitCastInst *BC = dyn_cast(OtherPtr)) { >> + OtherPtr = BC->getOperand(0); >> + continue; >> + } >> + if (GetElementPtrInst *GEP = dyn_cast(OtherPtr)) { >> + // All zero GEPs are effectively bitcasts. >> + if (GEP->hasAllZeroIndices()) { >> + OtherPtr = GEP->getOperand(0); >> + continue; >> + } >> + } >> + break; >> + } >> + // If OtherPtr has already been rewritten, this intrinsic will be dead. >> + if (OtherPtr == NewElts[0]) >> + return; >> >> if (ConstantExpr *BCE = dyn_cast(OtherPtr)) >> if (BCE->getOpcode() == Instruction::BitCast) >> @@ -798,7 +833,7 @@ >> // Process each element of the aggregate. >> Value *TheFn = MI->getOperand(0); >> const Type *BytePtrTy = MI->getRawDest()->getType(); >> - bool SROADest = MI->getRawDest() == BCInst; >> + bool SROADest = MI->getRawDest() == Inst; >> >> Constant *Zero = Constant::getNullValue(Type::getInt32Ty(MI->getContext())); >> >> @@ -807,12 +842,15 @@ >> Value *OtherElt = 0; >> unsigned OtherEltAlign = MemAlignment; >> >> - if (OtherPtr) { >> + if (OtherPtr == AI) { >> + OtherElt = NewElts[i]; >> + OtherEltAlign = 0; >> + } else if (OtherPtr) { >> Value *Idx[2] = { Zero, >> ConstantInt::get(Type::getInt32Ty(MI->getContext()), i) }; >> - OtherElt = GetElementPtrInst::Create(OtherPtr, Idx, Idx + 2, >> + OtherElt = GetElementPtrInst::CreateInBounds(OtherPtr, Idx, Idx + 2, >> OtherPtr->getNameStr()+"."+Twine(i), >> - MI); >> + MI); >> uint64_t EltOffset; >> const PointerType *OtherPtrTy = cast(OtherPtr->getType()); >> if (const StructType *ST = >> @@ -924,9 +962,7 @@ >> CallInst::Create(TheFn, Ops, Ops + 4, "", MI); >> } >> } >> - MI->eraseFromParent(); >> - if (PossiblyDead) >> - RecursivelyDeleteTriviallyDeadInstructions(PossiblyDead); >> + DeadInsts.push_back(MI); >> } >> >> /// RewriteStoreUserOfWholeAlloca - We found a store of an integer that >> @@ -937,15 +973,9 @@ >> // Extract each element out of the integer according to its structure offset >> // and store the element value to the individual alloca. >> Value *SrcVal = SI->getOperand(0); >> - const Type *AllocaEltTy = AI->getType()->getElementType(); >> + const Type *AllocaEltTy = AI->getAllocatedType(); >> uint64_t AllocaSizeBits = TD->getTypeAllocSizeInBits(AllocaEltTy); >> >> - // If this isn't a store of an integer to the whole alloca, it may be a store >> - // to the first element. Just ignore the store in this case and normal SROA >> - // will handle it. >> - if (!isa(SrcVal->getType()) || >> - TD->getTypeAllocSizeInBits(SrcVal->getType()) != AllocaSizeBits) >> - return; >> // Handle tail padding by extending the operand >> if (TD->getTypeSizeInBits(SrcVal->getType()) != AllocaSizeBits) >> SrcVal = new ZExtInst(SrcVal, >> @@ -1050,7 +1080,7 @@ >> } >> } >> >> - SI->eraseFromParent(); >> + DeadInsts.push_back(SI); >> } >> >> /// RewriteLoadUserOfWholeAlloca - We found a load of the entire allocation to >> @@ -1059,16 +1089,9 @@ >> SmallVector &NewElts) { >> // Extract each element out of the NewElts according to its structure offset >> // and form the result value. >> - const Type *AllocaEltTy = AI->getType()->getElementType(); >> + const Type *AllocaEltTy = AI->getAllocatedType(); >> uint64_t AllocaSizeBits = TD->getTypeAllocSizeInBits(AllocaEltTy); >> >> - // If this isn't a load of the whole alloca to an integer, it may be a load >> - // of the first element. Just ignore the load in this case and normal SROA >> - // will handle it. >> - if (!isa(LI->getType()) || >> - TD->getTypeAllocSizeInBits(LI->getType()) != AllocaSizeBits) >> - return; >> - >> DEBUG(errs() << "PROMOTING LOAD OF WHOLE ALLOCA: " << *AI << '\n' << *LI >> << '\n'); >> >> @@ -1139,10 +1162,9 @@ >> ResultVal = new TruncInst(ResultVal, LI->getType(), "", LI); >> >> LI->replaceAllUsesWith(ResultVal); >> - LI->eraseFromParent(); >> + DeadInsts.push_back(LI); >> } >> >> - >> /// HasPadding - Return true if the specified type has any structure or >> /// alignment padding, false otherwise. >> static bool HasPadding(const Type *Ty, const TargetData &TD) { >> @@ -1192,14 +1214,10 @@ >> // the users are safe to transform. >> AllocaInfo Info; >> >> - for (Value::use_iterator I = AI->use_begin(), E = AI->use_end(); >> - I != E; ++I) { >> - isSafeUseOfAllocation(cast(*I), AI, Info); >> - if (Info.isUnsafe) { >> - DEBUG(errs() << "Cannot transform: " << *AI << "\n due to user: " >> - << **I << '\n'); >> - return 0; >> - } >> + isSafeForScalarRepl(AI, AI, 0, 0, Info); >> + if (Info.isUnsafe) { >> + DEBUG(errs() << "Cannot transform: " << *AI << '\n'); >> + return 0; >> } >> >> // Okay, we know all the users are promotable. If the aggregate is a memcpy >> @@ -1208,7 +1226,7 @@ >> // types, but may actually be used. In these cases, we refuse to promote the >> // struct. >> if (Info.isMemCpySrc && Info.isMemCpyDst && >> - HasPadding(AI->getType()->getElementType(), *TD)) >> + HasPadding(AI->getAllocatedType(), *TD)) >> return 0; >> >> // If we require cleanup, return 1, otherwise return 3. >> @@ -1245,15 +1263,15 @@ >> // Insert the new GEP instructions, which are properly indexed. >> SmallVector Indices(GEPI->op_begin()+1, GEPI->op_end()); >> Indices[1] = Constant::getNullValue(Type::getInt32Ty(GEPI->getContext())); >> - Value *ZeroIdx = GetElementPtrInst::Create(GEPI->getOperand(0), >> - Indices.begin(), >> - Indices.end(), >> - GEPI->getName()+".0", GEPI); >> + Value *ZeroIdx = GetElementPtrInst::CreateInBounds(GEPI->getOperand(0), >> + Indices.begin(), >> + Indices.end(), >> + GEPI->getName()+".0",GEPI); >> Indices[1] = ConstantInt::get(Type::getInt32Ty(GEPI->getContext()), 1); >> - Value *OneIdx = GetElementPtrInst::Create(GEPI->getOperand(0), >> - Indices.begin(), >> - Indices.end(), >> - GEPI->getName()+".1", GEPI); >> + Value *OneIdx = GetElementPtrInst::CreateInBounds(GEPI->getOperand(0), >> + Indices.begin(), >> + Indices.end(), >> + GEPI->getName()+".1", GEPI); >> // Replace all loads of the variable index GEP with loads from both >> // indexes and a select. >> while (!GEPI->use_empty()) { >> @@ -1264,22 +1282,24 @@ >> LI->replaceAllUsesWith(R); >> LI->eraseFromParent(); >> } >> - GEPI->eraseFromParent(); >> } >> >> - >> /// CleanupAllocaUsers - If SROA reported that it can promote the specified >> /// allocation, but only if cleaned up, perform the cleanups required. >> -void SROA::CleanupAllocaUsers(AllocaInst *AI) { >> +void SROA::CleanupAllocaUsers(Value *V) { >> // At this point, we know that the end result will be SROA'd and promoted, so >> // we can insert ugly code if required so long as sroa+mem2reg will clean it >> // up. >> - for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); >> + for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); >> UI != E; ) { >> User *U = *UI++; >> - if (GetElementPtrInst *GEPI = dyn_cast(U)) >> + if (isa(U)) { >> + CleanupAllocaUsers(U); >> + } else if (GetElementPtrInst *GEPI = dyn_cast(U)) { >> CleanupGEP(GEPI); >> - else { >> + CleanupAllocaUsers(GEPI); >> + if (GEPI->use_empty()) GEPI->eraseFromParent(); >> + } else { >> Instruction *I = cast(U); >> SmallVector DbgInUses; >> if (!isa(I) && OnlyUsedByDbgInfoIntrinsics(I, &DbgInUses)) { >> @@ -1395,7 +1415,7 @@ >> >> // Compute the offset that this GEP adds to the pointer. >> SmallVector Indices(GEP->op_begin()+1, GEP->op_end()); >> - uint64_t GEPOffset = TD->getIndexedOffset(GEP->getOperand(0)->getType(), >> + uint64_t GEPOffset = TD->getIndexedOffset(GEP->getPointerOperandType(), >> &Indices[0], Indices.size()); >> // See if all uses can be converted. >> if (!CanConvertToScalar(GEP, IsNotTrivial, VecTy, SawVec,Offset+GEPOffset, >> @@ -1457,7 +1477,7 @@ >> if (GetElementPtrInst *GEP = dyn_cast(User)) { >> // Compute the offset that this GEP adds to the pointer. >> SmallVector Indices(GEP->op_begin()+1, GEP->op_end()); >> - uint64_t GEPOffset = TD->getIndexedOffset(GEP->getOperand(0)->getType(), >> + uint64_t GEPOffset = TD->getIndexedOffset(GEP->getPointerOperandType(), >> &Indices[0], Indices.size()); >> ConvertUsesToScalar(GEP, NewAI, Offset+GEPOffset*8); >> GEP->eraseFromParent(); >> >> Added: llvm/trunk/test/Transforms/ScalarRepl/2009-12-11-NeonTypes.ll >> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/ScalarRepl/2009-12-11-NeonTypes.ll?rev=91459&view=auto >> >> ============================================================================== >> --- llvm/trunk/test/Transforms/ScalarRepl/2009-12-11-NeonTypes.ll (added) >> +++ llvm/trunk/test/Transforms/ScalarRepl/2009-12-11-NeonTypes.ll Tue Dec 15 16:00:51 2009 >> @@ -0,0 +1,89 @@ >> +; RUN: opt < %s -scalarrepl -S | FileCheck %s >> +; Radar 7441282 >> + >> +target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32" >> +target triple = "thumbv7-apple-darwin10" >> + >> +%struct.__neon_int16x8x2_t = type { <8 x i16>, <8 x i16> } >> +%struct.int16x8_t = type { <8 x i16> } >> +%struct.int16x8x2_t = type { [2 x %struct.int16x8_t] } >> +%union..0anon = type { %struct.int16x8x2_t } >> + >> +define arm_apcscc void @test(<8 x i16> %tmp.0, %struct.int16x8x2_t* %dst) nounwind { >> +; CHECK: @test >> +; CHECK-NOT: alloca >> +; CHECK: "alloca point" >> +entry: >> + %tmp_addr = alloca %struct.int16x8_t ; <%struct.int16x8_t*> [#uses=3] >> + %dst_addr = alloca %struct.int16x8x2_t* ; <%struct.int16x8x2_t**> [#uses=2] >> + %__rv = alloca %union..0anon ; <%union..0anon*> [#uses=2] >> + %__bx = alloca %struct.int16x8_t ; <%struct.int16x8_t*> [#uses=2] >> + %__ax = alloca %struct.int16x8_t ; <%struct.int16x8_t*> [#uses=2] >> + %tmp2 = alloca %struct.int16x8x2_t ; <%struct.int16x8x2_t*> [#uses=2] >> + %0 = alloca %struct.int16x8x2_t ; <%struct.int16x8x2_t*> [#uses=2] >> + %"alloca point" = bitcast i32 0 to i32 ; [#uses=0] >> + %1 = getelementptr inbounds %struct.int16x8_t* %tmp_addr, i32 0, i32 0 ; <<8 x i16>*> [#uses=1] >> + store <8 x i16> %tmp.0, <8 x i16>* %1 >> + store %struct.int16x8x2_t* %dst, %struct.int16x8x2_t** %dst_addr >> + %2 = getelementptr inbounds %struct.int16x8_t* %__ax, i32 0, i32 0 ; <<8 x i16>*> [#uses=1] >> + %3 = getelementptr inbounds %struct.int16x8_t* %tmp_addr, i32 0, i32 0 ; <<8 x i16>*> [#uses=1] >> + %4 = load <8 x i16>* %3, align 16 ; <<8 x i16>> [#uses=1] >> + store <8 x i16> %4, <8 x i16>* %2, align 16 >> + %5 = getelementptr inbounds %struct.int16x8_t* %__bx, i32 0, i32 0 ; <<8 x i16>*> [#uses=1] >> + %6 = getelementptr inbounds %struct.int16x8_t* %tmp_addr, i32 0, i32 0 ; <<8 x i16>*> [#uses=1] >> + %7 = load <8 x i16>* %6, align 16 ; <<8 x i16>> [#uses=1] >> + store <8 x i16> %7, <8 x i16>* %5, align 16 >> + %8 = getelementptr inbounds %struct.int16x8_t* %__ax, i32 0, i32 0 ; <<8 x i16>*> [#uses=1] >> + %9 = load <8 x i16>* %8, align 16 ; <<8 x i16>> [#uses=2] >> + %10 = getelementptr inbounds %struct.int16x8_t* %__bx, i32 0, i32 0 ; <<8 x i16>*> [#uses=1] >> + %11 = load <8 x i16>* %10, align 16 ; <<8 x i16>> [#uses=2] >> + %12 = getelementptr inbounds %union..0anon* %__rv, i32 0, i32 0 ; <%struct.int16x8x2_t*> [#uses=1] >> + %13 = bitcast %struct.int16x8x2_t* %12 to %struct.__neon_int16x8x2_t* ; <%struct.__neon_int16x8x2_t*> [#uses=2] >> + %14 = shufflevector <8 x i16> %9, <8 x i16> %11, <8 x i32> ; <<8 x i16>> [#uses=1] >> + %15 = getelementptr inbounds %struct.__neon_int16x8x2_t* %13, i32 0, i32 0 ; <<8 x i16>*> [#uses=1] >> + store <8 x i16> %14, <8 x i16>* %15 >> + %16 = shufflevector <8 x i16> %9, <8 x i16> %11, <8 x i32> ; <<8 x i16>> [#uses=1] >> + %17 = getelementptr inbounds %struct.__neon_int16x8x2_t* %13, i32 0, i32 1 ; <<8 x i16>*> [#uses=1] >> + store <8 x i16> %16, <8 x i16>* %17 >> + %18 = getelementptr inbounds %union..0anon* %__rv, i32 0, i32 0 ; <%struct.int16x8x2_t*> [#uses=1] >> + %19 = bitcast %struct.int16x8x2_t* %0 to i8* ; [#uses=1] >> + %20 = bitcast %struct.int16x8x2_t* %18 to i8* ; [#uses=1] >> + call void @llvm.memcpy.i32(i8* %19, i8* %20, i32 32, i32 16) >> + %tmp21 = bitcast %struct.int16x8x2_t* %tmp2 to i8* ; [#uses=1] >> + %21 = bitcast %struct.int16x8x2_t* %0 to i8* ; [#uses=1] >> + call void @llvm.memcpy.i32(i8* %tmp21, i8* %21, i32 32, i32 16) >> + %22 = load %struct.int16x8x2_t** %dst_addr, align 4 ; <%struct.int16x8x2_t*> [#uses=1] >> + %23 = bitcast %struct.int16x8x2_t* %22 to i8* ; [#uses=1] >> + %tmp22 = bitcast %struct.int16x8x2_t* %tmp2 to i8* ; [#uses=1] >> + call void @llvm.memcpy.i32(i8* %23, i8* %tmp22, i32 32, i32 16) >> + br label %return >> + >> +; CHECK: store <8 x i16> >> +; CHECK: store <8 x i16> >> + >> +return: ; preds = %entry >> + ret void >> +} >> + >> +; Radar 7466574 >> +%struct._NSRange = type { i64 } >> + >> +define arm_apcscc void @test_memcpy_self() nounwind { >> +; CHECK: @test_memcpy_self >> +; CHECK-NOT: alloca >> +; CHECK: br i1 >> +entry: >> + %range = alloca %struct._NSRange ; <%struct._NSRange*> [#uses=2] >> + br i1 undef, label %cond.true, label %cond.false >> + >> +cond.true: ; preds = %entry >> + %tmp3 = bitcast %struct._NSRange* %range to i8* ; [#uses=1] >> + %tmp4 = bitcast %struct._NSRange* %range to i8* ; [#uses=1] >> + call void @llvm.memcpy.i32(i8* %tmp3, i8* %tmp4, i32 8, i32 8) >> + ret void >> + >> +cond.false: ; preds = %entry >> + ret void >> +} >> + >> +declare void @llvm.memcpy.i32(i8* nocapture, i8* nocapture, i32, i32) nounwind >> >> >> _______________________________________________ >> llvm-commits mailing list >> llvm-commits at cs.uiuc.edu >> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits >> > > _______________________________________________ > llvm-commits mailing list > llvm-commits at cs.uiuc.edu > http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits From evan.cheng at apple.com Wed Dec 16 11:20:02 2009 From: evan.cheng at apple.com (Evan Cheng) Date: Wed, 16 Dec 2009 09:20:02 -0800 Subject: [llvm-commits] [llvm] r91489 - in /llvm/trunk: lib/Target/X86/X86ISelLowering.cpp lib/Target/X86/X86Instr64bit.td lib/Target/X86/X86InstrInfo.td test/CodeGen/X86/setcc.ll In-Reply-To: <4B28BF7F.3090306@gmail.com> References: <200912160053.nBG0rDDv029906@zion.cs.uiuc.edu> <4B28B889.4060107@gmail.com> <4B28BF7F.3090306@gmail.com> Message-ID: <2F2EAA2F-9DF5-45BD-BE36-2348F5F13479@apple.com> I'll take a look at this today. Evan On Dec 16, 2009, at 3:07 AM, T?r?k Edwin wrote: >> >> Hi Evan, >> >> I noticed an infloop convertin and->zero_extend->and, could this patch >> be the cause? >> http://llvm.org/bugs/show_bug.cgi?id=5802 >> > > git bisect tells me that SVN r91380 introduces the infloops: > > 5b6187226b44f590ce7f614b128480b9c2d823ef is the first bad commit > commit 5b6187226b44f590ce7f614b128480b9c2d823ef > Author: Evan Cheng > Date: Tue Dec 15 00:52:11 2009 +0000 > > Fold (zext (and x, cst)) -> (and (zext x), cst). > > > git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk at 91380 > 91177308-0d34-0410-b5e6-96231b3b80d8 > > Best regards, > --Edwin -------------- next part -------------- An HTML attachment was scrubbed... URL: http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20091216/96b33d56/attachment.html From Ken.Dyck at onsemi.com Wed Dec 16 09:33:07 2009 From: Ken.Dyck at onsemi.com (Ken Dyck) Date: Wed, 16 Dec 2009 08:33:07 -0700 Subject: [llvm-commits] [Review] Preparing for non-power-of-2 machine value types in X86ISelLowering and LegalizeDAG Message-ID: <8F2E4A8BCDA0B84DA6C9088EB5B27747CEAF38@NAMAIL.ad.onsemi.com> The attached patches prepare for the introduction of non-power-of-2 machine value types (as recently discussed [1]). They contain no functional changes. They merely eliminate assumptions that incrementing/decrementing a SimpleValueType doubles/halves its size and that all non-power-of-2 types are extended. Comments appreciated. -Ken [1] http://lists.cs.uiuc.edu/pipermail/llvmdev/2009-December/027598.html -------------- next part -------------- A non-text attachment was scrubbed... Name: non-po2.X86ISelLowering.diff Type: application/octet-stream Size: 873 bytes Desc: non-po2.X86ISelLowering.diff Url : http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20091216/594e7472/attachment.obj -------------- next part -------------- A non-text attachment was scrubbed... Name: non-po2.LegalizeDAG.diff Type: application/octet-stream Size: 1958 bytes Desc: non-po2.LegalizeDAG.diff Url : http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20091216/594e7472/attachment-0001.obj From grosbach at apple.com Wed Dec 16 11:52:50 2009 From: grosbach at apple.com (Jim Grosbach) Date: Wed, 16 Dec 2009 09:52:50 -0800 Subject: [llvm-commits] [llvm] r91496 - in /llvm/trunk/lib/Target/ARM: ARMInstrFormats.td ARMInstrThumb.td ARMInstrThumb2.td In-Reply-To: <200912160232.nBG2WsJC001077@zion.cs.uiuc.edu> References: <200912160232.nBG2WsJC001077@zion.cs.uiuc.edu> Message-ID: <3D1DF928-948F-4CA0-B107-B64E0F538CD4@apple.com> This is really fantastic to have the Thumb encoding information. Thanks for doing this, Johnny. Very cool. -Jim On Dec 15, 2009, at 6:32 PM, Johnny Chen wrote: > Author: johnny > Date: Tue Dec 15 20:32:54 2009 > New Revision: 91496 > > URL: http://llvm.org/viewvc/llvm-project?rev=91496&view=rev > Log: > Add encoding bits for some Thumb instructions. Plus explicitly set > the top two > bytes of Inst to 0x0000 for the benefit of the Thumb decoder. > > Modified: > llvm/trunk/lib/Target/ARM/ARMInstrFormats.td > llvm/trunk/lib/Target/ARM/ARMInstrThumb.td > llvm/trunk/lib/Target/ARM/ARMInstrThumb2.td > > Modified: llvm/trunk/lib/Target/ARM/ARMInstrFormats.td > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrFormats.td?rev=91496&r1=91495&r2=91496&view=diff > > = > = > = > = > = > = > = > = > ====================================================================== > --- llvm/trunk/lib/Target/ARM/ARMInstrFormats.td (original) > +++ llvm/trunk/lib/Target/ARM/ARMInstrFormats.td Tue Dec 15 20:32:54 > 2009 > @@ -920,8 +920,7 @@ > : Thumb1I pattern>; > class T1JTI string asm, list pattern> > - : Thumb1I pattern>, > - Encoding; > + : Thumb1I pattern>; > > // Two-address instructions > class T1It @@ -986,31 +985,35 @@ > InstrItinClass itin, string opc, string asm, list > pattern> > : Thumb1pI "", pattern>; > > +class Encoding16 : Encoding { > + let Inst{31-16} = 0x0000; > +} > + > // A6.2 16-bit Thumb instruction encoding > -class T1Encoding opcode> : Encoding { > +class T1Encoding opcode> : Encoding16 { > let Inst{15-10} = opcode; > } > > // A6.2.1 Shift (immediate), add, subtract, move, and compare > encoding. > -class T1General opcode> : Encoding { > +class T1General opcode> : Encoding16 { > let Inst{15-14} = 0b00; > let Inst{13-9} = opcode; > } > > // A6.2.2 Data-processing encoding. > -class T1DataProcessing opcode> : Encoding { > +class T1DataProcessing opcode> : Encoding16 { > let Inst{15-10} = 0b010000; > let Inst{9-6} = opcode; > } > > // A6.2.3 Special data instructions and branch and exchange encoding. > -class T1Special opcode> : Encoding { > +class T1Special opcode> : Encoding16 { > let Inst{15-10} = 0b010001; > let Inst{9-6} = opcode; > } > > // A6.2.4 Load/store single data item encoding. > -class T1LoadStore opA, bits<3> opB> : Encoding { > +class T1LoadStore opA, bits<3> opB> : Encoding16 { > let Inst{15-12} = opA; > let Inst{11-9} = opB; > } > @@ -1021,7 +1024,7 @@ > class T1LdStSP opB> : T1LoadStore<0b1001, opB>; // SP > relative > > // A6.2.5 Miscellaneous 16-bit instructions encoding. > -class T1Misc opcode> : Encoding { > +class T1Misc opcode> : Encoding16 { > let Inst{15-12} = 0b1011; > let Inst{11-5} = opcode; > } > > Modified: llvm/trunk/lib/Target/ARM/ARMInstrThumb.td > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrThumb.td?rev=91496&r1=91495&r2=91496&view=diff > > = > = > = > = > = > = > = > = > ====================================================================== > --- llvm/trunk/lib/Target/ARM/ARMInstrThumb.td (original) > +++ llvm/trunk/lib/Target/ARM/ARMInstrThumb.td Tue Dec 15 20:32:54 > 2009 > @@ -306,7 +306,11 @@ > def tBR_JTr : T1JTI<(outs), > (ins tGPR:$target, jtblock_operand:$jt, i32imm: > $id), > IIC_Br, "mov\tpc, $target\n\t.align\t2\n$jt", > - [(ARMbrjt tGPR:$target, tjumptable:$jt, imm: > $id)]>; > + [(ARMbrjt tGPR:$target, tjumptable:$jt, imm: > $id)]>, > + Encoding16 { > + let Inst{15-7} = 0b010001101; > + let Inst{2-0} = 0b111; > + } > } > } > > @@ -596,7 +600,7 @@ > T1Special<0b1000>; > let Defs = [CPSR] in > def tMOVSr : T1I<(outs tGPR:$dst), (ins tGPR:$src), IIC_iMOVr, > - "movs\t$dst, $src", []>, Encoding { > + "movs\t$dst, $src", []>, Encoding16 { > let Inst{15-6} = 0b0000000000; > } > > > Modified: llvm/trunk/lib/Target/ARM/ARMInstrThumb2.td > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrThumb2.td?rev=91496&r1=91495&r2=91496&view=diff > > = > = > = > = > = > = > = > = > ====================================================================== > --- llvm/trunk/lib/Target/ARM/ARMInstrThumb2.td (original) > +++ llvm/trunk/lib/Target/ARM/ARMInstrThumb2.td Tue Dec 15 20:32:54 > 2009 > @@ -1911,6 +1911,7 @@ > AddrModeNone, Size2Bytes, IIC_iALUx, > "it$mask\t$cc", "", []> { > // 16-bit instruction. > + let Inst{31-16} = 0x0000; > let Inst{15-8} = 0b10111111; > } > > > > _______________________________________________ > llvm-commits mailing list > llvm-commits at cs.uiuc.edu > http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits From grosbach at apple.com Wed Dec 16 11:54:34 2009 From: grosbach at apple.com (Jim Grosbach) Date: Wed, 16 Dec 2009 09:54:34 -0800 Subject: [llvm-commits] [llvm] r91521 - /llvm/trunk/test/CodeGen/Thumb2/large-stack.ll In-Reply-To: <200912160735.nBG7ZPtx011326@zion.cs.uiuc.edu> References: <200912160735.nBG7ZPtx011326@zion.cs.uiuc.edu> Message-ID: <14908104-D83F-4751-87CE-033D597F035E@apple.com> For reference, this fixes PR5721 (http://llvm.org/bugs/show_bug.cgi?id=5721 ). On Dec 15, 2009, at 11:35 PM, Nick Lewycky wrote: > Author: nicholas > Date: Wed Dec 16 01:35:25 2009 > New Revision: 91521 > > URL: http://llvm.org/viewvc/llvm-project?rev=91521&view=rev > Log: > Make this test pass on Linux. > > Modified: > llvm/trunk/test/CodeGen/Thumb2/large-stack.ll > > Modified: llvm/trunk/test/CodeGen/Thumb2/large-stack.ll > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Thumb2/large-stack.ll?rev=91521&r1=91520&r2=91521&view=diff > > = > = > = > = > = > = > = > = > ====================================================================== > --- llvm/trunk/test/CodeGen/Thumb2/large-stack.ll (original) > +++ llvm/trunk/test/CodeGen/Thumb2/large-stack.ll Wed Dec 16 > 01:35:25 2009 > @@ -1,24 +1,35 @@ > -; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s > +; RUN: llc < %s -march=thumb -mattr=+thumb2 -mtriple=arm-apple- > darwin | FileCheck %s -check-prefix=DARWIN > +; RUN: llc < %s -march=thumb -mattr=+thumb2 -mtriple=arm-linux- > gnueabi | FileCheck %s -check-prefix=LINUX > > define void @test1() { > -; CHECK: test1: > -; CHECK: sub sp, #256 > +; DARWIN: test1: > +; DARWIN: sub sp, #256 > +; LINUX: test1: > +; LINUX: sub sp, #256 > %tmp = alloca [ 64 x i32 ] , align 4 > ret void > } > > define void @test2() { > -; CHECK: test2: > -; CHECK: sub.w sp, sp, #4160 > -; CHECK: sub sp, #8 > +; DARWIN: test2: > +; DARWIN: sub.w sp, sp, #4160 > +; DARWIN: sub sp, #8 > +; LINUX: test2: > +; LINUX: sub.w sp, sp, #4160 > +; LINUX: sub sp, #8 > %tmp = alloca [ 4168 x i8 ] , align 4 > ret void > } > > define i32 @test3() { > -; CHECK: test3: > -; CHECK: sub.w sp, sp, #805306368 > -; CHECK: sub sp, #20 > +; DARWIN: test3: > +; DARWIN: push {r4, r7, lr} > +; DARWIN: sub.w sp, sp, #805306368 > +; DARWIN: sub sp, #20 > +; LINUX: test3: > +; LINUX: stmfd sp!, {r4, r7, r11, lr} > +; LINUX: sub.w sp, sp, #805306368 > +; LINUX: sub sp, #16 > %retval = alloca i32, align 4 > %tmp = alloca i32, align 4 > %a = alloca [805306369 x i8], align 16 > > > _______________________________________________ > llvm-commits mailing list > llvm-commits at cs.uiuc.edu > http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits From mrs at apple.com Wed Dec 16 11:56:01 2009 From: mrs at apple.com (Mike Stump) Date: Wed, 16 Dec 2009 09:56:01 -0800 Subject: [llvm-commits] [llvm] r91536 - /llvm/trunk/include/llvm/ADT/SmallVector.h In-Reply-To: <200912161138.nBGBc3rN009550@zion.cs.uiuc.edu> References: <200912161138.nBGBc3rN009550@zion.cs.uiuc.edu> Message-ID: <057C0E0F-91B2-470B-80A8-FF33B715D01E@apple.com> On Dec 16, 2009, at 3:38 AM, Daniel Dunbar wrote: > Fix one more missing this-> to placate that picky clang++. Did it at least have a fixit hint for being so picky? From gohman at apple.com Wed Dec 16 12:05:46 2009 From: gohman at apple.com (Dan Gohman) Date: Wed, 16 Dec 2009 10:05:46 -0800 Subject: [llvm-commits] [llvm] r91497 - in /llvm/trunk: include/llvm/Metadata.h lib/VMCore/Metadata.cpp In-Reply-To: References: <200912160252.nBG2q9FI001712@zion.cs.uiuc.edu> <4B288291.3090704@mxc.ca> Message-ID: <9528408C-04F4-48A1-B49F-1FB5B7169DD1@apple.com> On Dec 16, 2009, at 12:13 AM, Victor Hernandez wrote: > Nick, > > Thanks for the review. > > On Dec 15, 2009, at 10:47 PM, Nick Lewycky wrote: > >> Victor Hernandez wrote: >>> Author: hernande >>> Date: Tue Dec 15 20:52:09 2009 >>> New Revision: 91497 >>> >>> URL: http://llvm.org/viewvc/llvm-project?rev=91497&view=rev >>> Log: >>> MDNodes that refer to an instruction are local to a function; in that case, explicitly keep track of the function they are local to >> >> So what's the semantic here? What if the MDNode refers to an Instruction that gets spliced from one Function to another? What happens if the MDNode is attached to an Instruction that's RAUW'd with a Constant? >> >> Is this new field copied by MetadataContextImpl::copyMD? > > The semantic is that if an MDNode is created function-local, then it will continue to be function-local even if its operands are modified to no longer refer to any function-specific IR. I need to add that documentation to Metadata.h. What is this concept of function localness going to be used for? Dan From clattner at apple.com Wed Dec 16 12:14:28 2009 From: clattner at apple.com (Chris Lattner) Date: Wed, 16 Dec 2009 10:14:28 -0800 Subject: [llvm-commits] [llvm] r91536 - /llvm/trunk/include/llvm/ADT/SmallVector.h In-Reply-To: <057C0E0F-91B2-470B-80A8-FF33B715D01E@apple.com> References: <200912161138.nBGBc3rN009550@zion.cs.uiuc.edu> <057C0E0F-91B2-470B-80A8-FF33B715D01E@apple.com> Message-ID: <6DCF154E-4A8B-4439-A06E-4916996959DB@apple.com> On Dec 16, 2009, at 9:56 AM, Mike Stump wrote: > On Dec 16, 2009, at 3:38 AM, Daniel Dunbar wrote: >> Fix one more missing this-> to placate that picky clang++. > > Did it at least have a fixit hint for being so picky? No, but John is working on improving the diagnostics. -Chris From johnny.chen at apple.com Wed Dec 16 12:30:06 2009 From: johnny.chen at apple.com (Johnny Chen) Date: Wed, 16 Dec 2009 10:30:06 -0800 Subject: [llvm-commits] [llvm] r91496 - in /llvm/trunk/lib/Target/ARM: ARMInstrFormats.td ARMInstrThumb.td ARMInstrThumb2.td In-Reply-To: <3D1DF928-948F-4CA0-B107-B64E0F538CD4@apple.com> References: <200912160232.nBG2WsJC001077@zion.cs.uiuc.edu> <3D1DF928-948F-4CA0-B107-B64E0F538CD4@apple.com> Message-ID: <0BEF9B4C-41DE-453C-BA26-FEE778D74D57@apple.com> You're welcome, Jim! On Dec 16, 2009, at 9:52 AM, Jim Grosbach wrote: > This is really fantastic to have the Thumb encoding information. Thanks for doing this, Johnny. Very cool. > > -Jim > > On Dec 15, 2009, at 6:32 PM, Johnny Chen wrote: > >> Author: johnny >> Date: Tue Dec 15 20:32:54 2009 >> New Revision: 91496 >> >> URL: http://llvm.org/viewvc/llvm-project?rev=91496&view=rev >> Log: >> Add encoding bits for some Thumb instructions. Plus explicitly set the top two >> bytes of Inst to 0x0000 for the benefit of the Thumb decoder. >> >> Modified: >> llvm/trunk/lib/Target/ARM/ARMInstrFormats.td >> llvm/trunk/lib/Target/ARM/ARMInstrThumb.td >> llvm/trunk/lib/Target/ARM/ARMInstrThumb2.td >> >> Modified: llvm/trunk/lib/Target/ARM/ARMInstrFormats.td >> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrFormats.td?rev=91496&r1=91495&r2=91496&view=diff >> >> ============================================================================== >> --- llvm/trunk/lib/Target/ARM/ARMInstrFormats.td (original) >> +++ llvm/trunk/lib/Target/ARM/ARMInstrFormats.td Tue Dec 15 20:32:54 2009 >> @@ -920,8 +920,7 @@ >> : Thumb1I; >> class T1JTI> string asm, list pattern> >> - : Thumb1I, >> - Encoding; >> + : Thumb1I; >> >> // Two-address instructions >> class T1It> @@ -986,31 +985,35 @@ >> InstrItinClass itin, string opc, string asm, list pattern> >> : Thumb1pI; >> >> +class Encoding16 : Encoding { >> + let Inst{31-16} = 0x0000; >> +} >> + >> // A6.2 16-bit Thumb instruction encoding >> -class T1Encoding opcode> : Encoding { >> +class T1Encoding opcode> : Encoding16 { >> let Inst{15-10} = opcode; >> } >> >> // A6.2.1 Shift (immediate), add, subtract, move, and compare encoding. >> -class T1General opcode> : Encoding { >> +class T1General opcode> : Encoding16 { >> let Inst{15-14} = 0b00; >> let Inst{13-9} = opcode; >> } >> >> // A6.2.2 Data-processing encoding. >> -class T1DataProcessing opcode> : Encoding { >> +class T1DataProcessing opcode> : Encoding16 { >> let Inst{15-10} = 0b010000; >> let Inst{9-6} = opcode; >> } >> >> // A6.2.3 Special data instructions and branch and exchange encoding. >> -class T1Special opcode> : Encoding { >> +class T1Special opcode> : Encoding16 { >> let Inst{15-10} = 0b010001; >> let Inst{9-6} = opcode; >> } >> >> // A6.2.4 Load/store single data item encoding. >> -class T1LoadStore opA, bits<3> opB> : Encoding { >> +class T1LoadStore opA, bits<3> opB> : Encoding16 { >> let Inst{15-12} = opA; >> let Inst{11-9} = opB; >> } >> @@ -1021,7 +1024,7 @@ >> class T1LdStSP opB> : T1LoadStore<0b1001, opB>; // SP relative >> >> // A6.2.5 Miscellaneous 16-bit instructions encoding. >> -class T1Misc opcode> : Encoding { >> +class T1Misc opcode> : Encoding16 { >> let Inst{15-12} = 0b1011; >> let Inst{11-5} = opcode; >> } >> >> Modified: llvm/trunk/lib/Target/ARM/ARMInstrThumb.td >> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrThumb.td?rev=91496&r1=91495&r2=91496&view=diff >> >> ============================================================================== >> --- llvm/trunk/lib/Target/ARM/ARMInstrThumb.td (original) >> +++ llvm/trunk/lib/Target/ARM/ARMInstrThumb.td Tue Dec 15 20:32:54 2009 >> @@ -306,7 +306,11 @@ >> def tBR_JTr : T1JTI<(outs), >> (ins tGPR:$target, jtblock_operand:$jt, i32imm:$id), >> IIC_Br, "mov\tpc, $target\n\t.align\t2\n$jt", >> - [(ARMbrjt tGPR:$target, tjumptable:$jt, imm:$id)]>; >> + [(ARMbrjt tGPR:$target, tjumptable:$jt, imm:$id)]>, >> + Encoding16 { >> + let Inst{15-7} = 0b010001101; >> + let Inst{2-0} = 0b111; >> + } >> } >> } >> >> @@ -596,7 +600,7 @@ >> T1Special<0b1000>; >> let Defs = [CPSR] in >> def tMOVSr : T1I<(outs tGPR:$dst), (ins tGPR:$src), IIC_iMOVr, >> - "movs\t$dst, $src", []>, Encoding { >> + "movs\t$dst, $src", []>, Encoding16 { >> let Inst{15-6} = 0b0000000000; >> } >> >> >> Modified: llvm/trunk/lib/Target/ARM/ARMInstrThumb2.td >> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrThumb2.td?rev=91496&r1=91495&r2=91496&view=diff >> >> ============================================================================== >> --- llvm/trunk/lib/Target/ARM/ARMInstrThumb2.td (original) >> +++ llvm/trunk/lib/Target/ARM/ARMInstrThumb2.td Tue Dec 15 20:32:54 2009 >> @@ -1911,6 +1911,7 @@ >> AddrModeNone, Size2Bytes, IIC_iALUx, >> "it$mask\t$cc", "", []> { >> // 16-bit instruction. >> + let Inst{31-16} = 0x0000; >> let Inst{15-8} = 0b10111111; >> } >> >> >> >> _______________________________________________ >> llvm-commits mailing list >> llvm-commits at cs.uiuc.edu >> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits > From stoklund at 2pi.dk Wed Dec 16 12:55:54 2009 From: stoklund at 2pi.dk (Jakob Stoklund Olesen) Date: Wed, 16 Dec 2009 18:55:54 -0000 Subject: [llvm-commits] [llvm] r91549 - in /llvm/trunk/lib/CodeGen: LiveIntervalAnalysis.cpp PHIElimination.cpp PHIElimination.h Message-ID: <200912161855.nBGItsR5026189@zion.cs.uiuc.edu> Author: stoklund Date: Wed Dec 16 12:55:53 2009 New Revision: 91549 URL: http://llvm.org/viewvc/llvm-project?rev=91549&view=rev Log: Reuse lowered phi nodes. Tail duplication produces lots of identical phi nodes in different basic blocks. Teach PHIElimination to reuse the join registers when lowering a phi node that is identical to an already lowered node. This saves virtual registers, and more importantly it avoids creating copies the the coalescer doesn't know how to eliminate. Teach LiveIntervalAnalysis about the phi joins with multiple uses. This patch significantly reduces code size produced by -pre-regalloc-taildup. Modified: llvm/trunk/lib/CodeGen/LiveIntervalAnalysis.cpp llvm/trunk/lib/CodeGen/PHIElimination.cpp llvm/trunk/lib/CodeGen/PHIElimination.h Modified: llvm/trunk/lib/CodeGen/LiveIntervalAnalysis.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/LiveIntervalAnalysis.cpp?rev=91549&r1=91548&r2=91549&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/LiveIntervalAnalysis.cpp (original) +++ llvm/trunk/lib/CodeGen/LiveIntervalAnalysis.cpp Wed Dec 16 12:55:53 2009 @@ -415,19 +415,32 @@ // first redefinition of the vreg that we have seen, go back and change // the live range in the PHI block to be a different value number. if (interval.containsOneValue()) { - // Remove the old range that we now know has an incorrect number. + VNInfo *VNI = interval.getValNumInfo(0); - MachineInstr *Killer = vi.Kills[0]; - SlotIndex Start = getMBBStartIdx(Killer->getParent()); - SlotIndex End = getInstructionIndex(Killer).getDefIndex(); - DEBUG({ - errs() << " Removing [" << Start << "," << End << "] from: "; - interval.print(errs(), tri_); - errs() << "\n"; - }); - interval.removeRange(Start, End); - assert(interval.ranges.size() == 1 && - "Newly discovered PHI interval has >1 ranges."); + // Phi elimination may have reused the register for multiple identical + // phi nodes. There will be a kill per phi. Remove the old ranges that + // we now know have an incorrect number. + for (unsigned ki=0, ke=vi.Kills.size(); ki != ke; ++ki) { + MachineInstr *Killer = vi.Kills[ki]; + SlotIndex Start = getMBBStartIdx(Killer->getParent()); + SlotIndex End = getInstructionIndex(Killer).getDefIndex(); + DEBUG({ + errs() << "\n\t\trenaming [" << Start << "," << End << "] in: "; + interval.print(errs(), tri_); + }); + interval.removeRange(Start, End); + + // Replace the interval with one of a NEW value number. Note that + // this value number isn't actually defined by an instruction, weird + // huh? :) + LiveRange LR(Start, End, + interval.getNextValue(SlotIndex(Start, true), + 0, false, VNInfoAllocator)); + LR.valno->setIsPHIDef(true); + interval.addRange(LR); + LR.valno->addKill(End); + } + MachineBasicBlock *killMBB = getMBBFromIndex(VNI->def); VNI->addKill(indexes_->getTerminatorGap(killMBB)); VNI->setHasPHIKill(true); @@ -435,20 +448,6 @@ errs() << " RESULT: "; interval.print(errs(), tri_); }); - - // Replace the interval with one of a NEW value number. Note that this - // value number isn't actually defined by an instruction, weird huh? :) - LiveRange LR(Start, End, - interval.getNextValue(SlotIndex(getMBBStartIdx(Killer->getParent()), true), - 0, false, VNInfoAllocator)); - LR.valno->setIsPHIDef(true); - DEBUG(errs() << " replace range with " << LR); - interval.addRange(LR); - LR.valno->addKill(End); - DEBUG({ - errs() << " RESULT: "; - interval.print(errs(), tri_); - }); } // In the case of PHI elimination, each variable definition is only Modified: llvm/trunk/lib/CodeGen/PHIElimination.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/PHIElimination.cpp?rev=91549&r1=91548&r2=91549&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/PHIElimination.cpp (original) +++ llvm/trunk/lib/CodeGen/PHIElimination.cpp Wed Dec 16 12:55:53 2009 @@ -35,6 +35,7 @@ STATISTIC(NumAtomic, "Number of atomic phis lowered"); STATISTIC(NumSplits, "Number of critical edges split on demand"); +STATISTIC(NumReused, "Number of reused lowered phis"); char PHIElimination::ID = 0; static RegisterPass @@ -78,6 +79,12 @@ DefMI->eraseFromParent(); } + // Clean up the lowered PHI instructions. + for (LoweredPHIMap::iterator I = LoweredPHIs.begin(), E = LoweredPHIs.end(); + I != E; ++I) + Fn.DeleteMachineInstr(I->first); + LoweredPHIs.clear(); + ImpDefs.clear(); VRegPHIUseCount.clear(); return Changed; @@ -168,6 +175,7 @@ void llvm::PHIElimination::LowerAtomicPHINode( MachineBasicBlock &MBB, MachineBasicBlock::iterator AfterPHIsIt) { + ++NumAtomic; // Unlink the PHI node from the basic block, but don't delete the PHI yet. MachineInstr *MPhi = MBB.remove(MBB.begin()); @@ -179,6 +187,7 @@ MachineFunction &MF = *MBB.getParent(); const TargetRegisterClass *RC = MF.getRegInfo().getRegClass(DestReg); unsigned IncomingReg = 0; + bool reusedIncoming = false; // Is IncomingReg reused from an earlier PHI? // Insert a register to register copy at the top of the current block (but // after any remaining phi nodes) which copies the new incoming register @@ -190,7 +199,18 @@ BuildMI(MBB, AfterPHIsIt, MPhi->getDebugLoc(), TII->get(TargetInstrInfo::IMPLICIT_DEF), DestReg); else { - IncomingReg = MF.getRegInfo().createVirtualRegister(RC); + // Can we reuse an earlier PHI node? This only happens for critical edges, + // typically those created by tail duplication. + unsigned &entry = LoweredPHIs[MPhi]; + if (entry) { + // An identical PHI node was already lowered. Reuse the incoming register. + IncomingReg = entry; + reusedIncoming = true; + ++NumReused; + DEBUG(errs() << "Reusing %reg" << IncomingReg << " for " << *MPhi); + } else { + entry = IncomingReg = MF.getRegInfo().createVirtualRegister(RC); + } TII->copyRegToReg(MBB, AfterPHIsIt, DestReg, IncomingReg, RC, RC); } @@ -204,8 +224,20 @@ MachineInstr *PHICopy = prior(AfterPHIsIt); if (IncomingReg) { + LiveVariables::VarInfo &VI = LV->getVarInfo(IncomingReg); + // Increment use count of the newly created virtual register. - LV->getVarInfo(IncomingReg).NumUses++; + VI.NumUses++; + + // When we are reusing the incoming register, it may already have been + // killed in this block. The old kill will also have been inserted at + // AfterPHIsIt, so it appears before the current PHICopy. + if (reusedIncoming) + if (MachineInstr *OldKill = VI.findKill(&MBB)) { + DEBUG(errs() << "Remove old kill from " << *OldKill); + LV->removeVirtualRegisterKilled(IncomingReg, OldKill); + DEBUG(MBB.dump()); + } // Add information to LiveVariables to know that the incoming value is // killed. Note that because the value is defined in several places (once @@ -228,7 +260,7 @@ // Adjust the VRegPHIUseCount map to account for the removal of this PHI node. for (unsigned i = 1; i != MPhi->getNumOperands(); i += 2) - --VRegPHIUseCount[BBVRegPair(MPhi->getOperand(i + 1).getMBB(), + --VRegPHIUseCount[BBVRegPair(MPhi->getOperand(i+1).getMBB()->getNumber(), MPhi->getOperand(i).getReg())]; // Now loop over all of the incoming arguments, changing them to copy into the @@ -266,7 +298,8 @@ FindCopyInsertPoint(opBlock, MBB, SrcReg); // Insert the copy. - TII->copyRegToReg(opBlock, InsertPos, IncomingReg, SrcReg, RC, RC); + if (!reusedIncoming && IncomingReg) + TII->copyRegToReg(opBlock, InsertPos, IncomingReg, SrcReg, RC, RC); // Now update live variable information if we have it. Otherwise we're done if (!LV) continue; @@ -283,7 +316,7 @@ // point later. // Is it used by any PHI instructions in this block? - bool ValueIsUsed = VRegPHIUseCount[BBVRegPair(&opBlock, SrcReg)] != 0; + bool ValueIsUsed = VRegPHIUseCount[BBVRegPair(opBlock.getNumber(), SrcReg)]; // Okay, if we now know that the value is not live out of the block, we can // add a kill marker in this block saying that it kills the incoming value! @@ -293,11 +326,10 @@ // terminator instruction at the end of the block may also use the value. // In this case, we should mark *it* as being the killing block, not the // copy. - MachineBasicBlock::iterator KillInst = prior(InsertPos); + MachineBasicBlock::iterator KillInst; MachineBasicBlock::iterator Term = opBlock.getFirstTerminator(); - if (Term != opBlock.end()) { - if (Term->readsRegister(SrcReg)) - KillInst = Term; + if (Term != opBlock.end() && Term->readsRegister(SrcReg)) { + KillInst = Term; // Check that no other terminators use values. #ifndef NDEBUG @@ -308,7 +340,17 @@ "they are the first terminator in a block!"); } #endif + } else if (reusedIncoming || !IncomingReg) { + // We may have to rewind a bit if we didn't insert a copy this time. + KillInst = Term; + while (KillInst != opBlock.begin()) + if ((--KillInst)->readsRegister(SrcReg)) + break; + } else { + // We just inserted this copy. + KillInst = prior(InsertPos); } + assert(KillInst->readsRegister(SrcReg) && "Cannot find kill instruction"); // Finally, mark it killed. LV->addVirtualRegisterKilled(SrcReg, KillInst); @@ -319,9 +361,9 @@ } } - // Really delete the PHI instruction now! - MF.DeleteMachineInstr(MPhi); - ++NumAtomic; + // Really delete the PHI instruction now, if it is not in the LoweredPHIs map. + if (reusedIncoming || !IncomingReg) + MF.DeleteMachineInstr(MPhi); } /// analyzePHINodes - Gather information about the PHI nodes in here. In @@ -335,7 +377,7 @@ for (MachineBasicBlock::const_iterator BBI = I->begin(), BBE = I->end(); BBI != BBE && BBI->getOpcode() == TargetInstrInfo::PHI; ++BBI) for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2) - ++VRegPHIUseCount[BBVRegPair(BBI->getOperand(i + 1).getMBB(), + ++VRegPHIUseCount[BBVRegPair(BBI->getOperand(i+1).getMBB()->getNumber(), BBI->getOperand(i).getReg())]; } @@ -408,3 +450,34 @@ return NMBB; } + +unsigned +PHIElimination::PHINodeTraits::getHashValue(const MachineInstr *MI) { + if (!MI || MI==getEmptyKey() || MI==getTombstoneKey()) + return DenseMapInfo::getHashValue(MI); + unsigned hash = 0; + for (unsigned ni = 1, ne = MI->getNumOperands(); ni != ne; ni += 2) + hash = hash*37 + DenseMapInfo:: + getHashValue(BBVRegPair(MI->getOperand(ni+1).getMBB()->getNumber(), + MI->getOperand(ni).getReg())); + return hash; +} + +bool PHIElimination::PHINodeTraits::isEqual(const MachineInstr *LHS, + const MachineInstr *RHS) { + const MachineInstr *EmptyKey = getEmptyKey(); + const MachineInstr *TombstoneKey = getTombstoneKey(); + if (!LHS || !RHS || LHS==EmptyKey || RHS==EmptyKey || + LHS==TombstoneKey || RHS==TombstoneKey) + return LHS==RHS; + + unsigned ne = LHS->getNumOperands(); + if (ne != RHS->getNumOperands()) + return false; + // Ignore operand 0, the defined register. + for (unsigned ni = 1; ni != ne; ni += 2) + if (LHS->getOperand(ni).getReg() != RHS->getOperand(ni).getReg() || + LHS->getOperand(ni+1).getMBB() != RHS->getOperand(ni+1).getMBB()) + return false; + return true; +} Modified: llvm/trunk/lib/CodeGen/PHIElimination.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/PHIElimination.h?rev=91549&r1=91548&r2=91549&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/PHIElimination.h (original) +++ llvm/trunk/lib/CodeGen/PHIElimination.h Wed Dec 16 12:55:53 2009 @@ -16,8 +16,6 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/Target/TargetInstrInfo.h" -#include - namespace llvm { /// Lower PHI instructions to copies. @@ -120,8 +118,8 @@ return I; } - typedef std::pair BBVRegPair; - typedef std::map VRegPHIUse; + typedef std::pair BBVRegPair; + typedef DenseMap VRegPHIUse; VRegPHIUse VRegPHIUseCount; PHIDefMap PHIDefs; @@ -129,6 +127,17 @@ // Defs of PHI sources which are implicit_def. SmallPtrSet ImpDefs; + + // Lowered PHI nodes may be reused. We provide special DenseMap traits to + // match PHI nodes with identical arguments. + struct PHINodeTraits : public DenseMapInfo { + static unsigned getHashValue(const MachineInstr *PtrVal); + static bool isEqual(const MachineInstr *LHS, const MachineInstr *RHS); + }; + + // Map reusable lowered PHI node -> incoming join register. + typedef DenseMap LoweredPHIMap; + LoweredPHIMap LoweredPHIs; }; } From dpatel at apple.com Wed Dec 16 12:56:16 2009 From: dpatel at apple.com (Devang Patel) Date: Wed, 16 Dec 2009 10:56:16 -0800 Subject: [llvm-commits] [llvm] r91497 - in /llvm/trunk: include/llvm/Metadata.h lib/VMCore/Metadata.cpp In-Reply-To: References: <200912160252.nBG2q9FI001712@zion.cs.uiuc.edu> <4B288291.3090704@mxc.ca> Message-ID: <30E02A8D-2774-41E9-B029-660D702718AE@apple.com> On Dec 16, 2009, at 12:13 AM, Victor Hernandez wrote: > Nick, > > Thanks for the review. > > On Dec 15, 2009, at 10:47 PM, Nick Lewycky wrote: > >> Victor Hernandez wrote: >>> Author: hernande >>> Date: Tue Dec 15 20:52:09 2009 >>> New Revision: 91497 >>> >>> URL: http://llvm.org/viewvc/llvm-project?rev=91497&view=rev >>> Log: >>> MDNodes that refer to an instruction are local to a function; in >>> that case, explicitly keep track of the function they are local to >> >> So what's the semantic here? What if the MDNode refers to an >> Instruction that gets spliced from one Function to another? What >> happens if the MDNode is attached to an Instruction that's RAUW'd >> with a Constant? >> >> Is this new field copied by MetadataContextImpl::copyMD? > > The semantic is that if an MDNode is created function-local, then it > will continue to be function-local even if its operands are modified > to no longer refer to any function-specific IR. That'll break MDNode unique-ness. Adding Function * in MDNode increases size of nodes and I anticipate that vast majority of nodes are not function local. A better approach is to let utility function isFunctionLocal() iterate element and return respective Function *, if one or more elements are instructions. The verifier can use this utility to verify the entire module, instead of verifying metadata per function. - Devang > I need to add that documentation to Metadata.h. > If an instruction is copied from on Function to another, any > function-local metadata that refers to it will have to point to the > new instruction (and update its LocalFunction), or that operand can > be replaced with null. The changes to the copying logic and the > necessary asserts and verification are still in progress. > >> You also need to assert that any Instructions passed in also belong >> to LocalFunction. > > Good call. I will add that assert. I will also be adding > Verification of function-local MDNodes that verifies the same. > >> >>> Modified: >>> llvm/trunk/include/llvm/Metadata.h >>> llvm/trunk/lib/VMCore/Metadata.cpp >>> >>> Modified: llvm/trunk/include/llvm/Metadata.h >>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Metadata.h?rev=91497&r1=91496&r2=91497&view=diff >>> >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> ==================================================================== >>> --- llvm/trunk/include/llvm/Metadata.h (original) >>> +++ llvm/trunk/include/llvm/Metadata.h Tue Dec 15 20:52:09 2009 >>> @@ -111,13 +111,16 @@ >>> >>> ElementVH *Node; >>> unsigned NodeSize; >>> + Function *LocalFunction; >>> >>> protected: >>> - explicit MDNode(LLVMContext&C, Value *const *Vals, unsigned >>> NumVals); >>> + explicit MDNode(LLVMContext&C, Value *const *Vals, unsigned >>> NumVals, >>> + Function *LocalFunction = NULL); >>> public: >>> // Constructors and destructors. >>> static MDNode *get(LLVMContext&Context, >>> - Value *const *Vals, unsigned NumVals); >>> + Value *const *Vals, unsigned NumVals, >>> + Function *LocalFunction = NULL); >>> >>> /// ~MDNode - Destroy MDNode. >>> ~MDNode(); >>> @@ -130,6 +133,9 @@ >>> >>> /// getNumElements - Return number of MDNode elements. >>> unsigned getNumElements() const { return NodeSize; } >>> + >>> + /// isFunctionLocal - Return whether MDNode is local to a >>> function. >>> + bool isFunctionLocal() const { return LocalFunction; } >>> >>> /// Profile - calculate a unique identifier for this MDNode to >>> collapse >>> /// duplicates >>> >>> Modified: llvm/trunk/lib/VMCore/Metadata.cpp >>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/VMCore/Metadata.cpp?rev=91497&r1=91496&r2=91497&view=diff >>> >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> = >>> ==================================================================== >>> --- llvm/trunk/lib/VMCore/Metadata.cpp (original) >>> +++ llvm/trunk/lib/VMCore/Metadata.cpp Tue Dec 15 20:52:09 2009 >>> @@ -49,13 +49,15 @@ >>> // >>> = >>> = >>> = >>> ----------------------------------------------------------------------= >>> ==// >>> // MDNode implementation. >>> // >>> -MDNode::MDNode(LLVMContext&C, Value *const *Vals, unsigned NumVals) >>> +MDNode::MDNode(LLVMContext&C, Value *const *Vals, unsigned NumVals, >>> + Function *LocalFunction) >>> : MetadataBase(Type::getMetadataTy(C), Value::MDNodeVal) { >>> NodeSize = NumVals; >>> Node = new ElementVH[NodeSize]; >>> ElementVH *Ptr = Node; >>> for (unsigned i = 0; i != NumVals; ++i) >>> *Ptr++ = ElementVH(Vals[i], this); >>> + LocalFunction = LocalFunction; >> >> Perhaps you meant this->LocalFunction = LocalFunction? > > Good catch. I fixed this in r91524. > >> >> Nick >> >>> } >>> >>> void MDNode::Profile(FoldingSetNodeID&ID) const { >>> @@ -63,17 +65,20 @@ >>> ID.AddPointer(getElement(i)); >>> } >>> >>> -MDNode *MDNode::get(LLVMContext&Context, Value*const* Vals, >>> unsigned NumVals) { >>> +MDNode *MDNode::get(LLVMContext&Context, Value*const* Vals, >>> unsigned NumVals, >>> + Function *LocalFunction) { >>> LLVMContextImpl *pImpl = Context.pImpl; >>> FoldingSetNodeID ID; >>> for (unsigned i = 0; i != NumVals; ++i) >>> ID.AddPointer(Vals[i]); >>> + if (LocalFunction) >>> + ID.AddPointer(LocalFunction); >>> >>> void *InsertPoint; >>> MDNode *N = pImpl->MDNodeSet.FindNodeOrInsertPos(ID, >>> InsertPoint); >>> if (!N) { >>> // InsertPoint will have been set by the FindNodeOrInsertPos >>> call. >>> - N = new MDNode(Context, Vals, NumVals); >>> + N = new MDNode(Context, Vals, NumVals, LocalFunction); >>> pImpl->MDNodeSet.InsertNode(N, InsertPoint); >>> } >>> return N; >>> >>> >>> _______________________________________________ >>> llvm-commits mailing list >>> llvm-commits at cs.uiuc.edu >>> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits >>> >> > > _______________________________________________ > llvm-commits mailing list > llvm-commits at cs.uiuc.edu > http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits -------------- next part -------------- An HTML attachment was scrubbed... URL: http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20091216/f7983fd0/attachment.html From isanbard at gmail.com Wed Dec 16 13:00:44 2009 From: isanbard at gmail.com (Bill Wendling) Date: Wed, 16 Dec 2009 11:00:44 -0800 Subject: [llvm-commits] [llvm] r91392 - in /llvm/trunk: include/llvm/CodeGen/SelectionDAG.h lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp lib/CodeGen/SelectionDAG/SelectionDAG.cpp lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp In-Reply-To: <6a8523d60912160257ia276cc5ya1e6a1437867ce79@mail.gmail.com> References: <200912150154.nBF1spun028126@zion.cs.uiuc.edu> <6a8523d60912160257ia276cc5ya1e6a1437867ce79@mail.gmail.com> Message-ID: <82724A97-9AFB-41E8-B2CF-683CF38F1468@gmail.com> Hi Daniel, Okay. It really shouldn't have had an effect on code gen at all (the flag isn't used by anything at the moment). I'll wait until the tree is healthy and check it in again. -bw On Dec 16, 2009, at 2:57 AM, Daniel Dunbar wrote: > Hi Bill, > > I reverted this, it was one of the patches responsible for the failing > x86_64-apple-darwin10 bootstrap in my testing. > > - Daniel > > On Mon, Dec 14, 2009 at 5:54 PM, Bill Wendling wrote: >> Author: void >> Date: Mon Dec 14 19:54:51 2009 >> New Revision: 91392 >> >> URL: http://llvm.org/viewvc/llvm-project?rev=91392&view=rev >> Log: >> Initial work on disabling the scheduler. This is a work in progress, and this >> stuff isn't used just yet. >> >> We want to model the GCC `-fno-schedule-insns' and `-fno-schedule-insns2' >> flags. The hypothesis is that the people who use these flags know what they are >> doing, and have hand-optimized the C code to reduce latencies and other >> conflicts. >> >> The idea behind our scheme to turn off scheduling is to create a map "on the >> side" during DAG generation. It will order the nodes by how they appeared in the >> code. This map is then used during scheduling to get the ordering. >> >> Modified: >> llvm/trunk/include/llvm/CodeGen/SelectionDAG.h >> llvm/trunk/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp >> llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp >> llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp >> >> Modified: llvm/trunk/include/llvm/CodeGen/SelectionDAG.h >> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/SelectionDAG.h?rev=91392&r1=91391&r2=91392&view=diff >> >> ============================================================================== >> --- llvm/trunk/include/llvm/CodeGen/SelectionDAG.h (original) >> +++ llvm/trunk/include/llvm/CodeGen/SelectionDAG.h Mon Dec 14 19:54:51 2009 >> @@ -110,6 +110,46 @@ >> /// SelectionDAG. >> BumpPtrAllocator Allocator; >> >> + /// NodeOrdering - Assigns a "line number" value to each SDNode that >> + /// corresponds to the "line number" of the original LLVM instruction. This >> + /// used for turning off scheduling, because we'll forgo the normal scheduling >> + /// algorithm and output the instructions according to this ordering. >> + class NodeOrdering { >> + /// LineNo - The line of the instruction the node corresponds to. A value of >> + /// `0' means it's not assigned. >> + unsigned LineNo; >> + std::map Order; >> + >> + void operator=(const NodeOrdering&); // Do not implement. >> + NodeOrdering(const NodeOrdering&); // Do not implement. >> + public: >> + NodeOrdering() : LineNo(0) {} >> + >> + void add(const SDNode *Node) { >> + assert(LineNo && "Invalid line number!"); >> + Order[Node] = LineNo; >> + } >> + void remove(const SDNode *Node) { >> + std::map::iterator Itr = Order.find(Node); >> + if (Itr != Order.end()) >> + Order.erase(Itr); >> + } >> + void clear() { >> + Order.clear(); >> + LineNo = 1; >> + } >> + unsigned getLineNo(const SDNode *Node) { >> + unsigned LN = Order[Node]; >> + assert(LN && "Node isn't in ordering map!"); >> + return LN; >> + } >> + void newInst() { >> + ++LineNo; >> + } >> + >> + void dump() const; >> + } *Ordering; >> + >> /// VerifyNode - Sanity check the given node. Aborts if it is invalid. >> void VerifyNode(SDNode *N); >> >> @@ -120,6 +160,9 @@ >> DenseSet &visited, >> int level, bool &printed); >> >> + void operator=(const SelectionDAG&); // Do not implement. >> + SelectionDAG(const SelectionDAG&); // Do not implement. >> + >> public: >> SelectionDAG(TargetLowering &tli, FunctionLoweringInfo &fli); >> ~SelectionDAG(); >> @@ -199,6 +242,13 @@ >> return Root = N; >> } >> >> + /// NewInst - Tell the ordering object that we're processing a new >> + /// instruction. >> + void NewInst() { >> + if (Ordering) >> + Ordering->newInst(); >> + } >> + >> /// Combine - This iterates over the nodes in the SelectionDAG, folding >> /// certain types of nodes together, or eliminating superfluous nodes. The >> /// Level argument controls whether Combine is allowed to produce nodes and >> >> Modified: llvm/trunk/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp >> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp?rev=91392&r1=91391&r2=91392&view=diff >> >> ============================================================================== >> --- llvm/trunk/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp (original) >> +++ llvm/trunk/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp Mon Dec 14 19:54:51 2009 >> @@ -20,10 +20,16 @@ >> #include "llvm/Target/TargetInstrInfo.h" >> #include "llvm/Target/TargetRegisterInfo.h" >> #include "llvm/Target/TargetSubtarget.h" >> +#include "llvm/Support/CommandLine.h" >> #include "llvm/Support/Debug.h" >> #include "llvm/Support/raw_ostream.h" >> using namespace llvm; >> >> +cl::opt >> +DisableInstScheduling("disable-inst-scheduling", >> + cl::init(false), >> + cl::desc("Disable instruction scheduling")); >> + >> ScheduleDAGSDNodes::ScheduleDAGSDNodes(MachineFunction &mf) >> : ScheduleDAG(mf) { >> } >> >> Modified: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp >> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp?rev=91392&r1=91391&r2=91392&view=diff >> >> ============================================================================== >> --- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp (original) >> +++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp Mon Dec 14 19:54:51 2009 >> @@ -48,6 +48,8 @@ >> #include >> using namespace llvm; >> >> +extern cl::opt DisableInstScheduling; >> + >> /// makeVTList - Return an instance of the SDVTList struct initialized with the >> /// specified members. >> static SDVTList makeVTList(const EVT *VTs, unsigned NumVTs) { >> @@ -552,6 +554,9 @@ >> } >> >> DeallocateNode(N); >> + >> + // Remove the ordering of this node. >> + if (Ordering) Ordering->remove(N); >> } >> } >> >> @@ -577,6 +582,9 @@ >> N->DropOperands(); >> >> DeallocateNode(N); >> + >> + // Remove the ordering of this node. >> + if (Ordering) Ordering->remove(N); >> } >> >> void SelectionDAG::DeallocateNode(SDNode *N) { >> @@ -588,6 +596,9 @@ >> N->NodeType = ISD::DELETED_NODE; >> >> NodeAllocator.Deallocate(AllNodes.remove(N)); >> + >> + // Remove the ordering of this node. >> + if (Ordering) Ordering->remove(N); >> } >> >> /// RemoveNodeFromCSEMaps - Take the specified node out of the CSE map that >> @@ -691,7 +702,9 @@ >> FoldingSetNodeID ID; >> AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops, 1); >> AddNodeIDCustom(ID, N); >> - return CSEMap.FindNodeOrInsertPos(ID, InsertPos); >> + SDNode *Node = CSEMap.FindNodeOrInsertPos(ID, InsertPos); >> + if (Ordering) Ordering->remove(Node); >> + return Node; >> } >> >> /// FindModifiedNodeSlot - Find a slot for the specified node if its operands >> @@ -708,7 +721,9 @@ >> FoldingSetNodeID ID; >> AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops, 2); >> AddNodeIDCustom(ID, N); >> - return CSEMap.FindNodeOrInsertPos(ID, InsertPos); >> + SDNode *Node = CSEMap.FindNodeOrInsertPos(ID, InsertPos); >> + if (Ordering) Ordering->remove(Node); >> + return Node; >> } >> >> >> @@ -725,7 +740,9 @@ >> FoldingSetNodeID ID; >> AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops, NumOps); >> AddNodeIDCustom(ID, N); >> - return CSEMap.FindNodeOrInsertPos(ID, InsertPos); >> + SDNode *Node = CSEMap.FindNodeOrInsertPos(ID, InsertPos); >> + if (Ordering) Ordering->remove(Node); >> + return Node; >> } >> >> /// VerifyNode - Sanity check the given node. Aborts if it is invalid. >> @@ -778,8 +795,13 @@ >> SelectionDAG::SelectionDAG(TargetLowering &tli, FunctionLoweringInfo &fli) >> : TLI(tli), FLI(fli), DW(0), >> EntryNode(ISD::EntryToken, DebugLoc::getUnknownLoc(), >> - getVTList(MVT::Other)), Root(getEntryNode()) { >> + getVTList(MVT::Other)), >> + Root(getEntryNode()), Ordering(0) { >> AllNodes.push_back(&EntryNode); >> + if (DisableInstScheduling) { >> + Ordering = new NodeOrdering(); >> + Ordering->add(&EntryNode); >> + } >> } >> >> void SelectionDAG::init(MachineFunction &mf, MachineModuleInfo *mmi, >> @@ -792,6 +814,7 @@ >> >> SelectionDAG::~SelectionDAG() { >> allnodes_clear(); >> + delete Ordering; >> } >> >> void SelectionDAG::allnodes_clear() { >> @@ -817,6 +840,10 @@ >> EntryNode.UseList = 0; >> AllNodes.push_back(&EntryNode); >> Root = getEntryNode(); >> + if (DisableInstScheduling) { >> + Ordering = new NodeOrdering(); >> + Ordering->add(&EntryNode); >> + } >> } >> >> SDValue SelectionDAG::getSExtOrTrunc(SDValue Op, DebugLoc DL, EVT VT) { >> @@ -877,14 +904,17 @@ >> ID.AddPointer(&Val); >> void *IP = 0; >> SDNode *N = NULL; >> - if ((N = CSEMap.FindNodeOrInsertPos(ID, IP))) >> + if ((N = CSEMap.FindNodeOrInsertPos(ID, IP))) { >> + if (Ordering) Ordering->add(N); >> if (!VT.isVector()) >> return SDValue(N, 0); >> + } >> if (!N) { >> N = NodeAllocator.Allocate(); >> new (N) ConstantSDNode(isT, &Val, EltVT); >> CSEMap.InsertNode(N, IP); >> AllNodes.push_back(N); >> + if (Ordering) Ordering->add(N); >> } >> >> SDValue Result(N, 0); >> @@ -921,14 +951,17 @@ >> ID.AddPointer(&V); >> void *IP = 0; >> SDNode *N = NULL; >> - if ((N = CSEMap.FindNodeOrInsertPos(ID, IP))) >> + if ((N = CSEMap.FindNodeOrInsertPos(ID, IP))) { >> + if (Ordering) Ordering->add(N); >> if (!VT.isVector()) >> return SDValue(N, 0); >> + } >> if (!N) { >> N = NodeAllocator.Allocate(); >> new (N) ConstantFPSDNode(isTarget, &V, EltVT); >> CSEMap.InsertNode(N, IP); >> AllNodes.push_back(N); >> + if (Ordering) Ordering->add(N); >> } >> >> SDValue Result(N, 0); >> @@ -983,12 +1016,15 @@ >> ID.AddInteger(Offset); >> ID.AddInteger(TargetFlags); >> void *IP = 0; >> - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) >> + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { >> + if (Ordering) Ordering->add(E); >> return SDValue(E, 0); >> + } >> SDNode *N = NodeAllocator.Allocate(); >> new (N) GlobalAddressSDNode(Opc, GV, VT, Offset, TargetFlags); >> CSEMap.InsertNode(N, IP); >> AllNodes.push_back(N); >> + if (Ordering) Ordering->add(N); >> return SDValue(N, 0); >> } >> >> @@ -998,12 +1034,15 @@ >> AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0); >> ID.AddInteger(FI); >> void *IP = 0; >> - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) >> + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { >> + if (Ordering) Ordering->add(E); >> return SDValue(E, 0); >> + } >> SDNode *N = NodeAllocator.Allocate(); >> new (N) FrameIndexSDNode(FI, VT, isTarget); >> CSEMap.InsertNode(N, IP); >> AllNodes.push_back(N); >> + if (Ordering) Ordering->add(N); >> return SDValue(N, 0); >> } >> >> @@ -1017,12 +1056,15 @@ >> ID.AddInteger(JTI); >> ID.AddInteger(TargetFlags); >> void *IP = 0; >> - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) >> + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { >> + if (Ordering) Ordering->add(E); >> return SDValue(E, 0); >> + } >> SDNode *N = NodeAllocator.Allocate(); >> new (N) JumpTableSDNode(JTI, VT, isTarget, TargetFlags); >> CSEMap.InsertNode(N, IP); >> AllNodes.push_back(N); >> + if (Ordering) Ordering->add(N); >> return SDValue(N, 0); >> } >> >> @@ -1042,12 +1084,15 @@ >> ID.AddPointer(C); >> ID.AddInteger(TargetFlags); >> void *IP = 0; >> - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) >> + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { >> + if (Ordering) Ordering->add(E); >> return SDValue(E, 0); >> + } >> SDNode *N = NodeAllocator.Allocate(); >> new (N) ConstantPoolSDNode(isTarget, C, VT, Offset, Alignment, TargetFlags); >> CSEMap.InsertNode(N, IP); >> AllNodes.push_back(N); >> + if (Ordering) Ordering->add(N); >> return SDValue(N, 0); >> } >> >> @@ -1068,12 +1113,15 @@ >> C->AddSelectionDAGCSEId(ID); >> ID.AddInteger(TargetFlags); >> void *IP = 0; >> - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) >> + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { >> + if (Ordering) Ordering->add(E); >> return SDValue(E, 0); >> + } >> SDNode *N = NodeAllocator.Allocate(); >> new (N) ConstantPoolSDNode(isTarget, C, VT, Offset, Alignment, TargetFlags); >> CSEMap.InsertNode(N, IP); >> AllNodes.push_back(N); >> + if (Ordering) Ordering->add(N); >> return SDValue(N, 0); >> } >> >> @@ -1082,12 +1130,15 @@ >> AddNodeIDNode(ID, ISD::BasicBlock, getVTList(MVT::Other), 0, 0); >> ID.AddPointer(MBB); >> void *IP = 0; >> - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) >> + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { >> + if (Ordering) Ordering->add(E); >> return SDValue(E, 0); >> + } >> SDNode *N = NodeAllocator.Allocate(); >> new (N) BasicBlockSDNode(MBB); >> CSEMap.InsertNode(N, IP); >> AllNodes.push_back(N); >> + if (Ordering) Ordering->add(N); >> return SDValue(N, 0); >> } >> >> @@ -1103,6 +1154,7 @@ >> N = NodeAllocator.Allocate(); >> new (N) VTSDNode(VT); >> AllNodes.push_back(N); >> + if (Ordering) Ordering->add(N); >> return SDValue(N, 0); >> } >> >> @@ -1112,6 +1164,7 @@ >> N = NodeAllocator.Allocate(); >> new (N) ExternalSymbolSDNode(false, Sym, 0, VT); >> AllNodes.push_back(N); >> + if (Ordering) Ordering->add(N); >> return SDValue(N, 0); >> } >> >> @@ -1124,6 +1177,7 @@ >> N = NodeAllocator.Allocate(); >> new (N) ExternalSymbolSDNode(true, Sym, TargetFlags, VT); >> AllNodes.push_back(N); >> + if (Ordering) Ordering->add(N); >> return SDValue(N, 0); >> } >> >> @@ -1136,6 +1190,7 @@ >> new (N) CondCodeSDNode(Cond); >> CondCodeNodes[Cond] = N; >> AllNodes.push_back(N); >> + if (Ordering) Ordering->add(N); >> } >> return SDValue(CondCodeNodes[Cond], 0); >> } >> @@ -1228,8 +1283,10 @@ >> ID.AddInteger(MaskVec[i]); >> >> void* IP = 0; >> - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) >> + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { >> + if (Ordering) Ordering->add(E); >> return SDValue(E, 0); >> + } >> >> // Allocate the mask array for the node out of the BumpPtrAllocator, since >> // SDNode doesn't have access to it. This memory will be "leaked" when >> @@ -1241,6 +1298,7 @@ >> new (N) ShuffleVectorSDNode(VT, dl, N1, N2, MaskAlloc); >> CSEMap.InsertNode(N, IP); >> AllNodes.push_back(N); >> + if (Ordering) Ordering->add(N); >> return SDValue(N, 0); >> } >> >> @@ -1258,12 +1316,15 @@ >> SDValue Ops[] = { Val, DTy, STy, Rnd, Sat }; >> AddNodeIDNode(ID, ISD::CONVERT_RNDSAT, getVTList(VT), &Ops[0], 5); >> void* IP = 0; >> - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) >> + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { >> + if (Ordering) Ordering->add(E); >> return SDValue(E, 0); >> + } >> CvtRndSatSDNode *N = NodeAllocator.Allocate(); >> new (N) CvtRndSatSDNode(VT, dl, Ops, 5, Code); >> CSEMap.InsertNode(N, IP); >> AllNodes.push_back(N); >> + if (Ordering) Ordering->add(N); >> return SDValue(N, 0); >> } >> >> @@ -1272,12 +1333,15 @@ >> AddNodeIDNode(ID, ISD::Register, getVTList(VT), 0, 0); >> ID.AddInteger(RegNo); >> void *IP = 0; >> - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) >> + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { >> + if (Ordering) Ordering->add(E); >> return SDValue(E, 0); >> + } >> SDNode *N = NodeAllocator.Allocate(); >> new (N) RegisterSDNode(RegNo, VT); >> CSEMap.InsertNode(N, IP); >> AllNodes.push_back(N); >> + if (Ordering) Ordering->add(N); >> return SDValue(N, 0); >> } >> >> @@ -1289,12 +1353,15 @@ >> AddNodeIDNode(ID, Opcode, getVTList(MVT::Other), &Ops[0], 1); >> ID.AddInteger(LabelID); >> void *IP = 0; >> - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) >> + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { >> + if (Ordering) Ordering->add(E); >> return SDValue(E, 0); >> + } >> SDNode *N = NodeAllocator.Allocate(); >> new (N) LabelSDNode(Opcode, dl, Root, LabelID); >> CSEMap.InsertNode(N, IP); >> AllNodes.push_back(N); >> + if (Ordering) Ordering->add(N); >> return SDValue(N, 0); >> } >> >> @@ -1308,12 +1375,15 @@ >> ID.AddPointer(BA); >> ID.AddInteger(TargetFlags); >> void *IP = 0; >> - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) >> + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { >> + if (Ordering) Ordering->add(E); >> return SDValue(E, 0); >> + } >> SDNode *N = NodeAllocator.Allocate(); >> new (N) BlockAddressSDNode(Opc, VT, BA, TargetFlags); >> CSEMap.InsertNode(N, IP); >> AllNodes.push_back(N); >> + if (Ordering) Ordering->add(N); >> return SDValue(N, 0); >> } >> >> @@ -1326,13 +1396,16 @@ >> ID.AddPointer(V); >> >> void *IP = 0; >> - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) >> + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { >> + if (Ordering) Ordering->add(E); >> return SDValue(E, 0); >> + } >> >> SDNode *N = NodeAllocator.Allocate(); >> new (N) SrcValueSDNode(V); >> CSEMap.InsertNode(N, IP); >> AllNodes.push_back(N); >> + if (Ordering) Ordering->add(N); >> return SDValue(N, 0); >> } >> >> @@ -2243,13 +2316,16 @@ >> FoldingSetNodeID ID; >> AddNodeIDNode(ID, Opcode, getVTList(VT), 0, 0); >> void *IP = 0; >> - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) >> + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { >> + if (Ordering) Ordering->add(E); >> return SDValue(E, 0); >> + } >> SDNode *N = NodeAllocator.Allocate(); >> new (N) SDNode(Opcode, DL, getVTList(VT)); >> CSEMap.InsertNode(N, IP); >> >> AllNodes.push_back(N); >> + if (Ordering) Ordering->add(N); >> #ifndef NDEBUG >> VerifyNode(N); >> #endif >> @@ -2473,8 +2549,10 @@ >> SDValue Ops[1] = { Operand }; >> AddNodeIDNode(ID, Opcode, VTs, Ops, 1); >> void *IP = 0; >> - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) >> + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { >> + if (Ordering) Ordering->add(E); >> return SDValue(E, 0); >> + } >> N = NodeAllocator.Allocate(); >> new (N) UnarySDNode(Opcode, DL, VTs, Operand); >> CSEMap.InsertNode(N, IP); >> @@ -2484,6 +2562,7 @@ >> } >> >> AllNodes.push_back(N); >> + if (Ordering) Ordering->add(N); >> #ifndef NDEBUG >> VerifyNode(N); >> #endif >> @@ -2891,8 +2970,10 @@ >> FoldingSetNodeID ID; >> AddNodeIDNode(ID, Opcode, VTs, Ops, 2); >> void *IP = 0; >> - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) >> + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { >> + if (Ordering) Ordering->add(E); >> return SDValue(E, 0); >> + } >> N = NodeAllocator.Allocate(); >> new (N) BinarySDNode(Opcode, DL, VTs, N1, N2); >> CSEMap.InsertNode(N, IP); >> @@ -2902,6 +2983,7 @@ >> } >> >> AllNodes.push_back(N); >> + if (Ordering) Ordering->add(N); >> #ifndef NDEBUG >> VerifyNode(N); >> #endif >> @@ -2968,8 +3050,10 @@ >> FoldingSetNodeID ID; >> AddNodeIDNode(ID, Opcode, VTs, Ops, 3); >> void *IP = 0; >> - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) >> + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { >> + if (Ordering) Ordering->add(E); >> return SDValue(E, 0); >> + } >> N = NodeAllocator.Allocate(); >> new (N) TernarySDNode(Opcode, DL, VTs, N1, N2, N3); >> CSEMap.InsertNode(N, IP); >> @@ -2977,7 +3061,9 @@ >> N = NodeAllocator.Allocate(); >> new (N) TernarySDNode(Opcode, DL, VTs, N1, N2, N3); >> } >> + >> AllNodes.push_back(N); >> + if (Ordering) Ordering->add(N); >> #ifndef NDEBUG >> VerifyNode(N); >> #endif >> @@ -3573,12 +3659,14 @@ >> void* IP = 0; >> if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { >> cast(E)->refineAlignment(MMO); >> + if (Ordering) Ordering->add(E); >> return SDValue(E, 0); >> } >> SDNode* N = NodeAllocator.Allocate(); >> new (N) AtomicSDNode(Opcode, dl, VTs, MemVT, Chain, Ptr, Cmp, Swp, MMO); >> CSEMap.InsertNode(N, IP); >> AllNodes.push_back(N); >> + if (Ordering) Ordering->add(N); >> return SDValue(N, 0); >> } >> >> @@ -3636,12 +3724,14 @@ >> void* IP = 0; >> if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { >> cast(E)->refineAlignment(MMO); >> + if (Ordering) Ordering->add(E); >> return SDValue(E, 0); >> } >> SDNode* N = NodeAllocator.Allocate(); >> new (N) AtomicSDNode(Opcode, dl, VTs, MemVT, Chain, Ptr, Val, MMO); >> CSEMap.InsertNode(N, IP); >> AllNodes.push_back(N); >> + if (Ordering) Ordering->add(N); >> return SDValue(N, 0); >> } >> >> @@ -3714,6 +3804,7 @@ >> void *IP = 0; >> if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { >> cast(E)->refineAlignment(MMO); >> + if (Ordering) Ordering->add(E); >> return SDValue(E, 0); >> } >> >> @@ -3725,6 +3816,7 @@ >> new (N) MemIntrinsicSDNode(Opcode, dl, VTList, Ops, NumOps, MemVT, MMO); >> } >> AllNodes.push_back(N); >> + if (Ordering) Ordering->add(N); >> return SDValue(N, 0); >> } >> >> @@ -3789,12 +3881,14 @@ >> void *IP = 0; >> if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { >> cast(E)->refineAlignment(MMO); >> + if (Ordering) Ordering->add(E); >> return SDValue(E, 0); >> } >> SDNode *N = NodeAllocator.Allocate(); >> new (N) LoadSDNode(Ops, dl, VTs, AM, ExtType, MemVT, MMO); >> CSEMap.InsertNode(N, IP); >> AllNodes.push_back(N); >> + if (Ordering) Ordering->add(N); >> return SDValue(N, 0); >> } >> >> @@ -3865,12 +3959,14 @@ >> void *IP = 0; >> if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { >> cast(E)->refineAlignment(MMO); >> + if (Ordering) Ordering->add(E); >> return SDValue(E, 0); >> } >> SDNode *N = NodeAllocator.Allocate(); >> new (N) StoreSDNode(Ops, dl, VTs, ISD::UNINDEXED, false, VT, MMO); >> CSEMap.InsertNode(N, IP); >> AllNodes.push_back(N); >> + if (Ordering) Ordering->add(N); >> return SDValue(N, 0); >> } >> >> @@ -3925,12 +4021,14 @@ >> void *IP = 0; >> if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { >> cast(E)->refineAlignment(MMO); >> + if (Ordering) Ordering->add(E); >> return SDValue(E, 0); >> } >> SDNode *N = NodeAllocator.Allocate(); >> new (N) StoreSDNode(Ops, dl, VTs, ISD::UNINDEXED, true, SVT, MMO); >> CSEMap.InsertNode(N, IP); >> AllNodes.push_back(N); >> + if (Ordering) Ordering->add(N); >> return SDValue(N, 0); >> } >> >> @@ -3947,14 +4045,17 @@ >> ID.AddInteger(ST->getMemoryVT().getRawBits()); >> ID.AddInteger(ST->getRawSubclassData()); >> void *IP = 0; >> - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) >> + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { >> + if (Ordering) Ordering->add(E); >> return SDValue(E, 0); >> + } >> SDNode *N = NodeAllocator.Allocate(); >> new (N) StoreSDNode(Ops, dl, VTs, AM, >> ST->isTruncatingStore(), ST->getMemoryVT(), >> ST->getMemOperand()); >> CSEMap.InsertNode(N, IP); >> AllNodes.push_back(N); >> + if (Ordering) Ordering->add(N); >> return SDValue(N, 0); >> } >> >> @@ -4020,8 +4121,10 @@ >> AddNodeIDNode(ID, Opcode, VTs, Ops, NumOps); >> void *IP = 0; >> >> - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) >> + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { >> + if (Ordering) Ordering->add(E); >> return SDValue(E, 0); >> + } >> >> N = NodeAllocator.Allocate(); >> new (N) SDNode(Opcode, DL, VTs, Ops, NumOps); >> @@ -4032,6 +4135,7 @@ >> } >> >> AllNodes.push_back(N); >> + if (Ordering) Ordering->add(N); >> #ifndef NDEBUG >> VerifyNode(N); >> #endif >> @@ -4087,8 +4191,10 @@ >> FoldingSetNodeID ID; >> AddNodeIDNode(ID, Opcode, VTList, Ops, NumOps); >> void *IP = 0; >> - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) >> + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { >> + if (Ordering) Ordering->add(E); >> return SDValue(E, 0); >> + } >> if (NumOps == 1) { >> N = NodeAllocator.Allocate(); >> new (N) UnarySDNode(Opcode, DL, VTList, Ops[0]); >> @@ -4119,6 +4225,7 @@ >> } >> } >> AllNodes.push_back(N); >> + if (Ordering) Ordering->add(N); >> #ifndef NDEBUG >> VerifyNode(N); >> #endif >> @@ -4581,8 +4688,10 @@ >> if (VTs.VTs[VTs.NumVTs-1] != MVT::Flag) { >> FoldingSetNodeID ID; >> AddNodeIDNode(ID, Opc, VTs, Ops, NumOps); >> - if (SDNode *ON = CSEMap.FindNodeOrInsertPos(ID, IP)) >> + if (SDNode *ON = CSEMap.FindNodeOrInsertPos(ID, IP)) { >> + if (Ordering) Ordering->add(ON); >> return ON; >> + } >> } >> >> if (!RemoveNodeFromCSEMaps(N)) >> @@ -4646,6 +4755,7 @@ >> >> if (IP) >> CSEMap.InsertNode(N, IP); // Memoize the new node. >> + if (Ordering) Ordering->add(N); >> return N; >> } >> >> @@ -4784,8 +4894,10 @@ >> FoldingSetNodeID ID; >> AddNodeIDNode(ID, ~Opcode, VTs, Ops, NumOps); >> IP = 0; >> - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) >> + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { >> + if (Ordering) Ordering->add(E); >> return cast(E); >> + } >> } >> >> // Allocate a new MachineSDNode. >> @@ -4807,6 +4919,7 @@ >> CSEMap.InsertNode(N, IP); >> >> AllNodes.push_back(N); >> + if (Ordering) Ordering->add(N); >> #ifndef NDEBUG >> VerifyNode(N); >> #endif >> @@ -4843,8 +4956,10 @@ >> FoldingSetNodeID ID; >> AddNodeIDNode(ID, Opcode, VTList, Ops, NumOps); >> void *IP = 0; >> - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) >> + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { >> + if (Ordering) Ordering->add(E); >> return E; >> + } >> } >> return NULL; >> } >> @@ -6011,6 +6126,9 @@ >> errs() << "\n\n"; >> } >> >> +void SelectionDAG::NodeOrdering::dump() const { >> +} >> + >> void SDNode::printr(raw_ostream &OS, const SelectionDAG *G) const { >> print_types(OS, G); >> print_details(OS, G); >> @@ -6151,4 +6269,3 @@ >> return false; >> return true; >> } >> - >> >> Modified: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp >> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp?rev=91392&r1=91391&r2=91392&view=diff >> >> ============================================================================== >> --- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp (original) >> +++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp Mon Dec 14 19:54:51 2009 >> @@ -583,6 +583,9 @@ >> } >> >> void SelectionDAGBuilder::visit(unsigned Opcode, User &I) { >> + // Tell the DAG that we're processing a new instruction. >> + DAG.NewInst(); >> + >> // Note: this doesn't use InstVisitor, because it has to work with >> // ConstantExpr's in addition to instructions. >> switch (Opcode) { >> >> >> _______________________________________________ >> llvm-commits mailing list >> llvm-commits at cs.uiuc.edu >> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits >> > > _______________________________________________ > llvm-commits mailing list > llvm-commits at cs.uiuc.edu > http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits From vhernandez at apple.com Wed Dec 16 13:12:14 2009 From: vhernandez at apple.com (Victor Hernandez) Date: Wed, 16 Dec 2009 11:12:14 -0800 Subject: [llvm-commits] [llvm] r91497 - in /llvm/trunk: include/llvm/Metadata.h lib/VMCore/Metadata.cpp In-Reply-To: <9528408C-04F4-48A1-B49F-1FB5B7169DD1@apple.com> References: <200912160252.nBG2q9FI001712@zion.cs.uiuc.edu> <4B288291.3090704@mxc.ca> <9528408C-04F4-48A1-B49F-1FB5B7169DD1@apple.com> Message-ID: <661BF611-0C94-42FA-AAF0-77F336AFA5B1@apple.com> Dan, Initially, function-localness is going to be used to verify that metadata that refers to function-local IR is only used in that function. I want to be able to detect inlining bugs where the metadata was incorrectly cloned. In the long term, I believe that function-localness will allow us to avoid iterating over the complete list of global metadata while determining if a cloned or modified instruction is used by any metadata. Victor On Dec 16, 2009, at 10:05 AM, Dan Gohman wrote: > > On Dec 16, 2009, at 12:13 AM, Victor Hernandez wrote: > >> Nick, >> >> Thanks for the review. >> >> On Dec 15, 2009, at 10:47 PM, Nick Lewycky wrote: >> >>> Victor Hernandez wrote: >>>> Author: hernande >>>> Date: Tue Dec 15 20:52:09 2009 >>>> New Revision: 91497 >>>> >>>> URL: http://llvm.org/viewvc/llvm-project?rev=91497&view=rev >>>> Log: >>>> MDNodes that refer to an instruction are local to a function; in that case, explicitly keep track of the function they are local to >>> >>> So what's the semantic here? What if the MDNode refers to an Instruction that gets spliced from one Function to another? What happens if the MDNode is attached to an Instruction that's RAUW'd with a Constant? >>> >>> Is this new field copied by MetadataContextImpl::copyMD? >> >> The semantic is that if an MDNode is created function-local, then it will continue to be function-local even if its operands are modified to no longer refer to any function-specific IR. I need to add that documentation to Metadata.h. > > What is this concept of function localness going to be used for? > > Dan > From isanbard at gmail.com Wed Dec 16 13:36:42 2009 From: isanbard at gmail.com (Bill Wendling) Date: Wed, 16 Dec 2009 19:36:42 -0000 Subject: [llvm-commits] [llvm] r91552 - /llvm/trunk/utils/unittest/googletest/gtest.cc Message-ID: <200912161936.nBGJaguJ028112@zion.cs.uiuc.edu> Author: void Date: Wed Dec 16 13:36:42 2009 New Revision: 91552 URL: http://llvm.org/viewvc/llvm-project?rev=91552&view=rev Log: Remove superfluous 'extern' variable that was causing a warning with clang. Modified: llvm/trunk/utils/unittest/googletest/gtest.cc Modified: llvm/trunk/utils/unittest/googletest/gtest.cc URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/utils/unittest/googletest/gtest.cc?rev=91552&r1=91551&r2=91552&view=diff ============================================================================== --- llvm/trunk/utils/unittest/googletest/gtest.cc (original) +++ llvm/trunk/utils/unittest/googletest/gtest.cc Wed Dec 16 13:36:42 2009 @@ -532,7 +532,7 @@ // The value of GetTestTypeId() as seen from within the Google Test // library. This is solely for testing GetTestTypeId(). -extern const TypeId kTestTypeIdInGoogleTest = GetTestTypeId(); +const TypeId kTestTypeIdInGoogleTest = GetTestTypeId(); // This predicate-formatter checks that 'results' contains a test part // failure of the given type and that the failure message contains the From evan.cheng at apple.com Wed Dec 16 13:41:36 2009 From: evan.cheng at apple.com (Evan Cheng) Date: Wed, 16 Dec 2009 11:41:36 -0800 Subject: [llvm-commits] [llvm] r91549 - in /llvm/trunk/lib/CodeGen: LiveIntervalAnalysis.cpp PHIElimination.cpp PHIElimination.h In-Reply-To: <200912161855.nBGItsR5026189@zion.cs.uiuc.edu> References: <200912161855.nBGItsR5026189@zion.cs.uiuc.edu> Message-ID: <7A7A7CD6-6D4F-44E7-81A7-FED2037E1D1A@apple.com> Yay for less dumb phi elimination! Evan On Dec 16, 2009, at 10:55 AM, Jakob Stoklund Olesen wrote: > Author: stoklund > Date: Wed Dec 16 12:55:53 2009 > New Revision: 91549 > > URL: http://llvm.org/viewvc/llvm-project?rev=91549&view=rev > Log: > Reuse lowered phi nodes. > > Tail duplication produces lots of identical phi nodes in different basic > blocks. Teach PHIElimination to reuse the join registers when lowering a phi > node that is identical to an already lowered node. This saves virtual > registers, and more importantly it avoids creating copies the the coalescer > doesn't know how to eliminate. > > Teach LiveIntervalAnalysis about the phi joins with multiple uses. > > This patch significantly reduces code size produced by -pre-regalloc-taildup. > > Modified: > llvm/trunk/lib/CodeGen/LiveIntervalAnalysis.cpp > llvm/trunk/lib/CodeGen/PHIElimination.cpp > llvm/trunk/lib/CodeGen/PHIElimination.h > > Modified: llvm/trunk/lib/CodeGen/LiveIntervalAnalysis.cpp > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/LiveIntervalAnalysis.cpp?rev=91549&r1=91548&r2=91549&view=diff > > ============================================================================== > --- llvm/trunk/lib/CodeGen/LiveIntervalAnalysis.cpp (original) > +++ llvm/trunk/lib/CodeGen/LiveIntervalAnalysis.cpp Wed Dec 16 12:55:53 2009 > @@ -415,19 +415,32 @@ > // first redefinition of the vreg that we have seen, go back and change > // the live range in the PHI block to be a different value number. > if (interval.containsOneValue()) { > - // Remove the old range that we now know has an incorrect number. > + > VNInfo *VNI = interval.getValNumInfo(0); > - MachineInstr *Killer = vi.Kills[0]; > - SlotIndex Start = getMBBStartIdx(Killer->getParent()); > - SlotIndex End = getInstructionIndex(Killer).getDefIndex(); > - DEBUG({ > - errs() << " Removing [" << Start << "," << End << "] from: "; > - interval.print(errs(), tri_); > - errs() << "\n"; > - }); > - interval.removeRange(Start, End); > - assert(interval.ranges.size() == 1 && > - "Newly discovered PHI interval has >1 ranges."); > + // Phi elimination may have reused the register for multiple identical > + // phi nodes. There will be a kill per phi. Remove the old ranges that > + // we now know have an incorrect number. > + for (unsigned ki=0, ke=vi.Kills.size(); ki != ke; ++ki) { > + MachineInstr *Killer = vi.Kills[ki]; > + SlotIndex Start = getMBBStartIdx(Killer->getParent()); > + SlotIndex End = getInstructionIndex(Killer).getDefIndex(); > + DEBUG({ > + errs() << "\n\t\trenaming [" << Start << "," << End << "] in: "; > + interval.print(errs(), tri_); > + }); > + interval.removeRange(Start, End); > + > + // Replace the interval with one of a NEW value number. Note that > + // this value number isn't actually defined by an instruction, weird > + // huh? :) > + LiveRange LR(Start, End, > + interval.getNextValue(SlotIndex(Start, true), > + 0, false, VNInfoAllocator)); > + LR.valno->setIsPHIDef(true); > + interval.addRange(LR); > + LR.valno->addKill(End); > + } > + > MachineBasicBlock *killMBB = getMBBFromIndex(VNI->def); > VNI->addKill(indexes_->getTerminatorGap(killMBB)); > VNI->setHasPHIKill(true); > @@ -435,20 +448,6 @@ > errs() << " RESULT: "; > interval.print(errs(), tri_); > }); > - > - // Replace the interval with one of a NEW value number. Note that this > - // value number isn't actually defined by an instruction, weird huh? :) > - LiveRange LR(Start, End, > - interval.getNextValue(SlotIndex(getMBBStartIdx(Killer->getParent()), true), > - 0, false, VNInfoAllocator)); > - LR.valno->setIsPHIDef(true); > - DEBUG(errs() << " replace range with " << LR); > - interval.addRange(LR); > - LR.valno->addKill(End); > - DEBUG({ > - errs() << " RESULT: "; > - interval.print(errs(), tri_); > - }); > } > > // In the case of PHI elimination, each variable definition is only > > Modified: llvm/trunk/lib/CodeGen/PHIElimination.cpp > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/PHIElimination.cpp?rev=91549&r1=91548&r2=91549&view=diff > > ============================================================================== > --- llvm/trunk/lib/CodeGen/PHIElimination.cpp (original) > +++ llvm/trunk/lib/CodeGen/PHIElimination.cpp Wed Dec 16 12:55:53 2009 > @@ -35,6 +35,7 @@ > > STATISTIC(NumAtomic, "Number of atomic phis lowered"); > STATISTIC(NumSplits, "Number of critical edges split on demand"); > +STATISTIC(NumReused, "Number of reused lowered phis"); > > char PHIElimination::ID = 0; > static RegisterPass > @@ -78,6 +79,12 @@ > DefMI->eraseFromParent(); > } > > + // Clean up the lowered PHI instructions. > + for (LoweredPHIMap::iterator I = LoweredPHIs.begin(), E = LoweredPHIs.end(); > + I != E; ++I) > + Fn.DeleteMachineInstr(I->first); > + LoweredPHIs.clear(); > + > ImpDefs.clear(); > VRegPHIUseCount.clear(); > return Changed; > @@ -168,6 +175,7 @@ > void llvm::PHIElimination::LowerAtomicPHINode( > MachineBasicBlock &MBB, > MachineBasicBlock::iterator AfterPHIsIt) { > + ++NumAtomic; > // Unlink the PHI node from the basic block, but don't delete the PHI yet. > MachineInstr *MPhi = MBB.remove(MBB.begin()); > > @@ -179,6 +187,7 @@ > MachineFunction &MF = *MBB.getParent(); > const TargetRegisterClass *RC = MF.getRegInfo().getRegClass(DestReg); > unsigned IncomingReg = 0; > + bool reusedIncoming = false; // Is IncomingReg reused from an earlier PHI? > > // Insert a register to register copy at the top of the current block (but > // after any remaining phi nodes) which copies the new incoming register > @@ -190,7 +199,18 @@ > BuildMI(MBB, AfterPHIsIt, MPhi->getDebugLoc(), > TII->get(TargetInstrInfo::IMPLICIT_DEF), DestReg); > else { > - IncomingReg = MF.getRegInfo().createVirtualRegister(RC); > + // Can we reuse an earlier PHI node? This only happens for critical edges, > + // typically those created by tail duplication. > + unsigned &entry = LoweredPHIs[MPhi]; > + if (entry) { > + // An identical PHI node was already lowered. Reuse the incoming register. > + IncomingReg = entry; > + reusedIncoming = true; > + ++NumReused; > + DEBUG(errs() << "Reusing %reg" << IncomingReg << " for " << *MPhi); > + } else { > + entry = IncomingReg = MF.getRegInfo().createVirtualRegister(RC); > + } > TII->copyRegToReg(MBB, AfterPHIsIt, DestReg, IncomingReg, RC, RC); > } > > @@ -204,8 +224,20 @@ > MachineInstr *PHICopy = prior(AfterPHIsIt); > > if (IncomingReg) { > + LiveVariables::VarInfo &VI = LV->getVarInfo(IncomingReg); > + > // Increment use count of the newly created virtual register. > - LV->getVarInfo(IncomingReg).NumUses++; > + VI.NumUses++; > + > + // When we are reusing the incoming register, it may already have been > + // killed in this block. The old kill will also have been inserted at > + // AfterPHIsIt, so it appears before the current PHICopy. > + if (reusedIncoming) > + if (MachineInstr *OldKill = VI.findKill(&MBB)) { > + DEBUG(errs() << "Remove old kill from " << *OldKill); > + LV->removeVirtualRegisterKilled(IncomingReg, OldKill); > + DEBUG(MBB.dump()); > + } > > // Add information to LiveVariables to know that the incoming value is > // killed. Note that because the value is defined in several places (once > @@ -228,7 +260,7 @@ > > // Adjust the VRegPHIUseCount map to account for the removal of this PHI node. > for (unsigned i = 1; i != MPhi->getNumOperands(); i += 2) > - --VRegPHIUseCount[BBVRegPair(MPhi->getOperand(i + 1).getMBB(), > + --VRegPHIUseCount[BBVRegPair(MPhi->getOperand(i+1).getMBB()->getNumber(), > MPhi->getOperand(i).getReg())]; > > // Now loop over all of the incoming arguments, changing them to copy into the > @@ -266,7 +298,8 @@ > FindCopyInsertPoint(opBlock, MBB, SrcReg); > > // Insert the copy. > - TII->copyRegToReg(opBlock, InsertPos, IncomingReg, SrcReg, RC, RC); > + if (!reusedIncoming && IncomingReg) > + TII->copyRegToReg(opBlock, InsertPos, IncomingReg, SrcReg, RC, RC); > > // Now update live variable information if we have it. Otherwise we're done > if (!LV) continue; > @@ -283,7 +316,7 @@ > // point later. > > // Is it used by any PHI instructions in this block? > - bool ValueIsUsed = VRegPHIUseCount[BBVRegPair(&opBlock, SrcReg)] != 0; > + bool ValueIsUsed = VRegPHIUseCount[BBVRegPair(opBlock.getNumber(), SrcReg)]; > > // Okay, if we now know that the value is not live out of the block, we can > // add a kill marker in this block saying that it kills the incoming value! > @@ -293,11 +326,10 @@ > // terminator instruction at the end of the block may also use the value. > // In this case, we should mark *it* as being the killing block, not the > // copy. > - MachineBasicBlock::iterator KillInst = prior(InsertPos); > + MachineBasicBlock::iterator KillInst; > MachineBasicBlock::iterator Term = opBlock.getFirstTerminator(); > - if (Term != opBlock.end()) { > - if (Term->readsRegister(SrcReg)) > - KillInst = Term; > + if (Term != opBlock.end() && Term->readsRegister(SrcReg)) { > + KillInst = Term; > > // Check that no other terminators use values. > #ifndef NDEBUG > @@ -308,7 +340,17 @@ > "they are the first terminator in a block!"); > } > #endif > + } else if (reusedIncoming || !IncomingReg) { > + // We may have to rewind a bit if we didn't insert a copy this time. > + KillInst = Term; > + while (KillInst != opBlock.begin()) > + if ((--KillInst)->readsRegister(SrcReg)) > + break; > + } else { > + // We just inserted this copy. > + KillInst = prior(InsertPos); > } > + assert(KillInst->readsRegister(SrcReg) && "Cannot find kill instruction"); > > // Finally, mark it killed. > LV->addVirtualRegisterKilled(SrcReg, KillInst); > @@ -319,9 +361,9 @@ > } > } > > - // Really delete the PHI instruction now! > - MF.DeleteMachineInstr(MPhi); > - ++NumAtomic; > + // Really delete the PHI instruction now, if it is not in the LoweredPHIs map. > + if (reusedIncoming || !IncomingReg) > + MF.DeleteMachineInstr(MPhi); > } > > /// analyzePHINodes - Gather information about the PHI nodes in here. In > @@ -335,7 +377,7 @@ > for (MachineBasicBlock::const_iterator BBI = I->begin(), BBE = I->end(); > BBI != BBE && BBI->getOpcode() == TargetInstrInfo::PHI; ++BBI) > for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2) > - ++VRegPHIUseCount[BBVRegPair(BBI->getOperand(i + 1).getMBB(), > + ++VRegPHIUseCount[BBVRegPair(BBI->getOperand(i+1).getMBB()->getNumber(), > BBI->getOperand(i).getReg())]; > } > > @@ -408,3 +450,34 @@ > > return NMBB; > } > + > +unsigned > +PHIElimination::PHINodeTraits::getHashValue(const MachineInstr *MI) { > + if (!MI || MI==getEmptyKey() || MI==getTombstoneKey()) > + return DenseMapInfo::getHashValue(MI); > + unsigned hash = 0; > + for (unsigned ni = 1, ne = MI->getNumOperands(); ni != ne; ni += 2) > + hash = hash*37 + DenseMapInfo:: > + getHashValue(BBVRegPair(MI->getOperand(ni+1).getMBB()->getNumber(), > + MI->getOperand(ni).getReg())); > + return hash; > +} > + > +bool PHIElimination::PHINodeTraits::isEqual(const MachineInstr *LHS, > + const MachineInstr *RHS) { > + const MachineInstr *EmptyKey = getEmptyKey(); > + const MachineInstr *TombstoneKey = getTombstoneKey(); > + if (!LHS || !RHS || LHS==EmptyKey || RHS==EmptyKey || > + LHS==TombstoneKey || RHS==TombstoneKey) > + return LHS==RHS; > + > + unsigned ne = LHS->getNumOperands(); > + if (ne != RHS->getNumOperands()) > + return false; > + // Ignore operand 0, the defined register. > + for (unsigned ni = 1; ni != ne; ni += 2) > + if (LHS->getOperand(ni).getReg() != RHS->getOperand(ni).getReg() || > + LHS->getOperand(ni+1).getMBB() != RHS->getOperand(ni+1).getMBB()) > + return false; > + return true; > +} > > Modified: llvm/trunk/lib/CodeGen/PHIElimination.h > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/PHIElimination.h?rev=91549&r1=91548&r2=91549&view=diff > > ============================================================================== > --- llvm/trunk/lib/CodeGen/PHIElimination.h (original) > +++ llvm/trunk/lib/CodeGen/PHIElimination.h Wed Dec 16 12:55:53 2009 > @@ -16,8 +16,6 @@ > #include "llvm/CodeGen/MachineFunctionPass.h" > #include "llvm/Target/TargetInstrInfo.h" > > -#include > - > namespace llvm { > > /// Lower PHI instructions to copies. > @@ -120,8 +118,8 @@ > return I; > } > > - typedef std::pair BBVRegPair; > - typedef std::map VRegPHIUse; > + typedef std::pair BBVRegPair; > + typedef DenseMap VRegPHIUse; > > VRegPHIUse VRegPHIUseCount; > PHIDefMap PHIDefs; > @@ -129,6 +127,17 @@ > > // Defs of PHI sources which are implicit_def. > SmallPtrSet ImpDefs; > + > + // Lowered PHI nodes may be reused. We provide special DenseMap traits to > + // match PHI nodes with identical arguments. > + struct PHINodeTraits : public DenseMapInfo { > + static unsigned getHashValue(const MachineInstr *PtrVal); > + static bool isEqual(const MachineInstr *LHS, const MachineInstr *RHS); > + }; > + > + // Map reusable lowered PHI node -> incoming join register. > + typedef DenseMap LoweredPHIMap; > + LoweredPHIMap LoweredPHIs; > }; > > } > > > _______________________________________________ > llvm-commits mailing list > llvm-commits at cs.uiuc.edu > http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits From grosbach at apple.com Wed Dec 16 13:43:02 2009 From: grosbach at apple.com (Jim Grosbach) Date: Wed, 16 Dec 2009 19:43:02 -0000 Subject: [llvm-commits] [llvm] r91554 - in /llvm/trunk: include/llvm/Target/TargetInstrDesc.h lib/CodeGen/MachineInstr.cpp utils/TableGen/CodeGenInstruction.cpp Message-ID: <200912161943.nBGJh3UX028383@zion.cs.uiuc.edu> Author: grosbach Date: Wed Dec 16 13:43:02 2009 New Revision: 91554 URL: http://llvm.org/viewvc/llvm-project?rev=91554&view=rev Log: Add @earlyclobber TableGen constraint Modified: llvm/trunk/include/llvm/Target/TargetInstrDesc.h llvm/trunk/lib/CodeGen/MachineInstr.cpp llvm/trunk/utils/TableGen/CodeGenInstruction.cpp Modified: llvm/trunk/include/llvm/Target/TargetInstrDesc.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Target/TargetInstrDesc.h?rev=91554&r1=91553&r2=91554&view=diff ============================================================================== --- llvm/trunk/include/llvm/Target/TargetInstrDesc.h (original) +++ llvm/trunk/include/llvm/Target/TargetInstrDesc.h Wed Dec 16 13:43:02 2009 @@ -25,9 +25,10 @@ //===----------------------------------------------------------------------===// namespace TOI { - // Operand constraints: only "tied_to" for now. + // Operand constraints enum OperandConstraint { - TIED_TO = 0 // Must be allocated the same register as. + TIED_TO = 0, // Must be allocated the same register as. + EARLY_CLOBBER // Operand is an early clobber register operand }; /// OperandFlags - These are flags set on operands, but should be considered Modified: llvm/trunk/lib/CodeGen/MachineInstr.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/MachineInstr.cpp?rev=91554&r1=91553&r2=91554&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/MachineInstr.cpp (original) +++ llvm/trunk/lib/CodeGen/MachineInstr.cpp Wed Dec 16 13:43:02 2009 @@ -555,8 +555,13 @@ Operands.back().ParentMI = this; // If the operand is a register, update the operand's use list. - if (Op.isReg()) + if (Op.isReg()) { Operands.back().AddRegOperandToRegInfo(RegInfo); + // If the register operand is flagged as early, mark the operand as such + unsigned OpNo = Operands.size() - 1; + if (TID->getOperandConstraint(OpNo, TOI::EARLY_CLOBBER) != -1) + Operands[OpNo].setIsEarlyClobber(true); + } return; } } @@ -573,8 +578,12 @@ // Do explicitly set the reginfo for this operand though, to ensure the // next/prev fields are properly nulled out. - if (Operands[OpNo].isReg()) + if (Operands[OpNo].isReg()) { Operands[OpNo].AddRegOperandToRegInfo(0); + // If the register operand is flagged as early, mark the operand as such + if (TID->getOperandConstraint(OpNo, TOI::EARLY_CLOBBER) != -1) + Operands[OpNo].setIsEarlyClobber(true); + } } else if (Operands.size()+1 <= Operands.capacity()) { // Otherwise, we have to remove register operands from their register use @@ -594,8 +603,12 @@ Operands.insert(Operands.begin()+OpNo, Op); Operands[OpNo].ParentMI = this; - if (Operands[OpNo].isReg()) + if (Operands[OpNo].isReg()) { Operands[OpNo].AddRegOperandToRegInfo(RegInfo); + // If the register operand is flagged as early, mark the operand as such + if (TID->getOperandConstraint(OpNo, TOI::EARLY_CLOBBER) != -1) + Operands[OpNo].setIsEarlyClobber(true); + } // Re-add all the implicit ops. for (unsigned i = OpNo+1, e = Operands.size(); i != e; ++i) { @@ -613,6 +626,11 @@ // Re-add all the operands. AddRegOperandsToUseLists(*RegInfo); + + // If the register operand is flagged as early, mark the operand as such + if (Operands[OpNo].isReg() + && TID->getOperandConstraint(OpNo, TOI::EARLY_CLOBBER) != -1) + Operands[OpNo].setIsEarlyClobber(true); } } Modified: llvm/trunk/utils/TableGen/CodeGenInstruction.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/utils/TableGen/CodeGenInstruction.cpp?rev=91554&r1=91553&r2=91554&view=diff ============================================================================== --- llvm/trunk/utils/TableGen/CodeGenInstruction.cpp (original) +++ llvm/trunk/utils/TableGen/CodeGenInstruction.cpp Wed Dec 16 13:43:02 2009 @@ -18,14 +18,35 @@ using namespace llvm; static void ParseConstraint(const std::string &CStr, CodeGenInstruction *I) { - // FIXME: Only supports TIED_TO for now. + // EARLY_CLOBBER: @early $reg + std::string::size_type wpos = CStr.find_first_of(" \t"); + std::string::size_type start = CStr.find_first_not_of(" \t"); + std::string Tok = CStr.substr(start, wpos - start); + if (Tok == "@earlyclobber") { + std::string Name = CStr.substr(wpos+1); + wpos = Name.find_first_not_of(" \t"); + if (wpos == std::string::npos) + throw "Illegal format for @earlyclobber constraint: '" + CStr + "'"; + Name = Name.substr(wpos); + std::pair Op = + I->ParseOperandName(Name, false); + + // Build the string for the operand + std::string OpConstraint = "(1 << TOI::EARLY_CLOBBER)"; + if (!I->OperandList[Op.first].Constraints[Op.second].empty()) + throw "Operand '" + Name + "' cannot have multiple constraints!"; + I->OperandList[Op.first].Constraints[Op.second] = OpConstraint; + return; + } + + // Only other constraint is "TIED_TO" for now. std::string::size_type pos = CStr.find_first_of('='); assert(pos != std::string::npos && "Unrecognized constraint"); - std::string::size_type start = CStr.find_first_not_of(" \t"); + start = CStr.find_first_not_of(" \t"); std::string Name = CStr.substr(start, pos - start); // TIED_TO: $src1 = $dst - std::string::size_type wpos = Name.find_first_of(" \t"); + wpos = Name.find_first_of(" \t"); if (wpos == std::string::npos) throw "Illegal format for tied-to constraint: '" + CStr + "'"; std::string DestOpName = Name.substr(0, wpos); @@ -47,7 +68,6 @@ std::string OpConstraint = "((" + utostr(FlatOpNo) + " << 16) | (1 << TOI::TIED_TO))"; - if (!I->OperandList[DestOp.first].Constraints[DestOp.second].empty()) throw "Operand '" + DestOpName + "' cannot have multiple constraints!"; I->OperandList[DestOp.first].Constraints[DestOp.second] = OpConstraint; From grosbach at apple.com Wed Dec 16 13:44:06 2009 From: grosbach at apple.com (Jim Grosbach) Date: Wed, 16 Dec 2009 19:44:06 -0000 Subject: [llvm-commits] [llvm] r91555 - in /llvm/trunk/lib/Target/ARM: ARMInstrInfo.td ARMInstrThumb2.td Message-ID: <200912161944.nBGJi6kc028431@zion.cs.uiuc.edu> Author: grosbach Date: Wed Dec 16 13:44:06 2009 New Revision: 91555 URL: http://llvm.org/viewvc/llvm-project?rev=91555&view=rev Log: Mark STREX* as earlyclobber for the success result register. Modified: llvm/trunk/lib/Target/ARM/ARMInstrInfo.td llvm/trunk/lib/Target/ARM/ARMInstrThumb2.td Modified: llvm/trunk/lib/Target/ARM/ARMInstrInfo.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrInfo.td?rev=91555&r1=91554&r2=91555&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMInstrInfo.td (original) +++ llvm/trunk/lib/Target/ARM/ARMInstrInfo.td Wed Dec 16 13:44:06 2009 @@ -1740,7 +1740,7 @@ []>; } -let mayStore = 1 in { +let mayStore = 1, Constraints = "@earlyclobber $success" in { def STREXB : AIstrex<0b10, (outs GPR:$success), (ins GPR:$src, GPR:$ptr), NoItinerary, "strexb", "\t$success, $src, [$ptr]", Modified: llvm/trunk/lib/Target/ARM/ARMInstrThumb2.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrThumb2.td?rev=91555&r1=91554&r2=91555&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMInstrThumb2.td (original) +++ llvm/trunk/lib/Target/ARM/ARMInstrThumb2.td Wed Dec 16 13:44:06 2009 @@ -1752,7 +1752,7 @@ [], {?, ?, ?, ?}>; } -let mayStore = 1 in { +let mayStore = 1, Constraints = "@earlyclobber $success" in { def t2STREXB : T2I_strex<0b00, (outs GPR:$success), (ins GPR:$src, GPR:$ptr), AddrModeNone, Size4Bytes, NoItinerary, "strexb", "\t$success, $src, [$ptr]", "", []>; From ken at linux.vnet.ibm.com Wed Dec 16 13:45:15 2009 From: ken at linux.vnet.ibm.com (Ken Werner) Date: Wed, 16 Dec 2009 20:45:15 +0100 Subject: [llvm-commits] [patch] fptr support for PPC64 In-Reply-To: <310DA270-7D55-4D87-943B-2E1ED4EFE258@apple.com> References: <200912151255.29701.ken@linux.vnet.ibm.com> <310DA270-7D55-4D87-943B-2E1ED4EFE258@apple.com> Message-ID: <200912162045.15956.ken@linux.vnet.ibm.com> On Tuesday 15 December 2009 07:43:49 pm you wrote: > On Dec 15, 2009, at 3:55 AMPST, Ken Werner wrote: > > Hi, > > The attached patch adds support for indirect calls (through function > > pointer) > > according to the ABI (http://refspecs.linuxfoundation.org/ELF/ppc64/PPC- > > elf64abi-1.9.html#FUNC-CALLS). The patch was made against revision > > 91275. > I can't speak for SVR4 ppc64, but I'm confident these 3 patches won't > break any other target. > > It's preferable to get isPPC64 from PPCSubTarget, as elsewhere > (inconsistently). Here that seems to mean an extra parameter. > Why did you add a Chain to PPCISD::NOP? > Do you have write access? The upated version of the patch obtains isPPC64 from PPCSubTarget and leaves the PPCISD::NOP without SDNPHasChain. I don't have commit privileges. Thanks for reviewing the patch. -ken -------------- next part -------------- A non-text attachment was scrubbed... Name: ppc64-fptr.patch Type: text/x-patch Size: 8123 bytes Desc: not available Url : http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20091216/5f3edece/attachment.bin From anton at korobeynikov.info Wed Dec 16 13:51:07 2009 From: anton at korobeynikov.info (Anton Korobeynikov) Date: Wed, 16 Dec 2009 22:51:07 +0300 Subject: [llvm-commits] [patch] fptr support for PPC64 In-Reply-To: <200912162045.15956.ken@linux.vnet.ibm.com> References: <200912151255.29701.ken@linux.vnet.ibm.com> <310DA270-7D55-4D87-943B-2E1ED4EFE258@apple.com> <200912162045.15956.ken@linux.vnet.ibm.com> Message-ID: Hello, Ken > The upated version of the patch obtains isPPC64 from PPCSubTarget and leaves > the PPCISD::NOP without SDNPHasChain. I don't have commit privileges. > Thanks for reviewing the patch. I asked Tilmann (who did ppc/linux support stuff during his GSoC) to review these patches -- With best regards, Anton Korobeynikov Faculty of Mathematics and Mechanics, Saint Petersburg State University From daniel at zuster.org Wed Dec 16 14:00:07 2009 From: daniel at zuster.org (Daniel Dunbar) Date: Wed, 16 Dec 2009 12:00:07 -0800 Subject: [llvm-commits] [llvm] r91392 - in /llvm/trunk: include/llvm/CodeGen/SelectionDAG.h lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp lib/CodeGen/SelectionDAG/SelectionDAG.cpp lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp In-Reply-To: <82724A97-9AFB-41E8-B2CF-683CF38F1468@gmail.com> References: <200912150154.nBF1spun028126@zion.cs.uiuc.edu> <6a8523d60912160257ia276cc5ya1e6a1437867ce79@mail.gmail.com> <82724A97-9AFB-41E8-B2CF-683CF38F1468@gmail.com> Message-ID: <6a8523d60912161200n36180a70x1139c8317b1df1a3@mail.gmail.com> On Wed, Dec 16, 2009 at 11:00 AM, Bill Wendling wrote: > Hi Daniel, > > Okay. It really shouldn't have had an effect on code gen at all (the flag isn't used by anything at the moment). I'll wait until the tree is healthy and check it in again. Right, I think it was just exposing a bug -- I was just trying to get to green. It didn't work though, so I've reapplied, we need to just diagnose the failure. - Daniel > > -bw > > On Dec 16, 2009, at 2:57 AM, Daniel Dunbar wrote: > >> Hi Bill, >> >> I reverted this, it was one of the patches responsible for the failing >> x86_64-apple-darwin10 bootstrap in my testing. >> >> - Daniel >> >> On Mon, Dec 14, 2009 at 5:54 PM, Bill Wendling wrote: >>> Author: void >>> Date: Mon Dec 14 19:54:51 2009 >>> New Revision: 91392 >>> >>> URL: http://llvm.org/viewvc/llvm-project?rev=91392&view=rev >>> Log: >>> Initial work on disabling the scheduler. This is a work in progress, and this >>> stuff isn't used just yet. >>> >>> We want to model the GCC `-fno-schedule-insns' and `-fno-schedule-insns2' >>> flags. The hypothesis is that the people who use these flags know what they are >>> doing, and have hand-optimized the C code to reduce latencies and other >>> conflicts. >>> >>> The idea behind our scheme to turn off scheduling is to create a map "on the >>> side" during DAG generation. It will order the nodes by how they appeared in the >>> code. This map is then used during scheduling to get the ordering. >>> >>> Modified: >>> ? ?llvm/trunk/include/llvm/CodeGen/SelectionDAG.h >>> ? ?llvm/trunk/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp >>> ? ?llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp >>> ? ?llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp >>> >>> Modified: llvm/trunk/include/llvm/CodeGen/SelectionDAG.h >>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/SelectionDAG.h?rev=91392&r1=91391&r2=91392&view=diff >>> >>> ============================================================================== >>> --- llvm/trunk/include/llvm/CodeGen/SelectionDAG.h (original) >>> +++ llvm/trunk/include/llvm/CodeGen/SelectionDAG.h Mon Dec 14 19:54:51 2009 >>> @@ -110,6 +110,46 @@ >>> ? /// SelectionDAG. >>> ? BumpPtrAllocator Allocator; >>> >>> + ?/// NodeOrdering - Assigns a "line number" value to each SDNode that >>> + ?/// corresponds to the "line number" of the original LLVM instruction. This >>> + ?/// used for turning off scheduling, because we'll forgo the normal scheduling >>> + ?/// algorithm and output the instructions according to this ordering. >>> + ?class NodeOrdering { >>> + ? ?/// LineNo - The line of the instruction the node corresponds to. A value of >>> + ? ?/// `0' means it's not assigned. >>> + ? ?unsigned LineNo; >>> + ? ?std::map Order; >>> + >>> + ? ?void operator=(const NodeOrdering&); // Do not implement. >>> + ? ?NodeOrdering(const NodeOrdering&); ? // Do not implement. >>> + ?public: >>> + ? ?NodeOrdering() : LineNo(0) {} >>> + >>> + ? ?void add(const SDNode *Node) { >>> + ? ? ?assert(LineNo && "Invalid line number!"); >>> + ? ? ?Order[Node] = LineNo; >>> + ? ?} >>> + ? ?void remove(const SDNode *Node) { >>> + ? ? ?std::map::iterator Itr = Order.find(Node); >>> + ? ? ?if (Itr != Order.end()) >>> + ? ? ? ?Order.erase(Itr); >>> + ? ?} >>> + ? ?void clear() { >>> + ? ? ?Order.clear(); >>> + ? ? ?LineNo = 1; >>> + ? ?} >>> + ? ?unsigned getLineNo(const SDNode *Node) { >>> + ? ? ?unsigned LN = Order[Node]; >>> + ? ? ?assert(LN && "Node isn't in ordering map!"); >>> + ? ? ?return LN; >>> + ? ?} >>> + ? ?void newInst() { >>> + ? ? ?++LineNo; >>> + ? ?} >>> + >>> + ? ?void dump() const; >>> + ?} *Ordering; >>> + >>> ? /// VerifyNode - Sanity check the given node. ?Aborts if it is invalid. >>> ? void VerifyNode(SDNode *N); >>> >>> @@ -120,6 +160,9 @@ >>> ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? DenseSet &visited, >>> ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? int level, bool &printed); >>> >>> + ?void operator=(const SelectionDAG&); // Do not implement. >>> + ?SelectionDAG(const SelectionDAG&); ? // Do not implement. >>> + >>> ?public: >>> ? SelectionDAG(TargetLowering &tli, FunctionLoweringInfo &fli); >>> ? ~SelectionDAG(); >>> @@ -199,6 +242,13 @@ >>> ? ? return Root = N; >>> ? } >>> >>> + ?/// NewInst - Tell the ordering object that we're processing a new >>> + ?/// instruction. >>> + ?void NewInst() { >>> + ? ?if (Ordering) >>> + ? ? ?Ordering->newInst(); >>> + ?} >>> + >>> ? /// Combine - This iterates over the nodes in the SelectionDAG, folding >>> ? /// certain types of nodes together, or eliminating superfluous nodes. ?The >>> ? /// Level argument controls whether Combine is allowed to produce nodes and >>> >>> Modified: llvm/trunk/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp >>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp?rev=91392&r1=91391&r2=91392&view=diff >>> >>> ============================================================================== >>> --- llvm/trunk/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp (original) >>> +++ llvm/trunk/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp Mon Dec 14 19:54:51 2009 >>> @@ -20,10 +20,16 @@ >>> ?#include "llvm/Target/TargetInstrInfo.h" >>> ?#include "llvm/Target/TargetRegisterInfo.h" >>> ?#include "llvm/Target/TargetSubtarget.h" >>> +#include "llvm/Support/CommandLine.h" >>> ?#include "llvm/Support/Debug.h" >>> ?#include "llvm/Support/raw_ostream.h" >>> ?using namespace llvm; >>> >>> +cl::opt >>> +DisableInstScheduling("disable-inst-scheduling", >>> + ? ? ? ? ? ? ? ? ? ? ?cl::init(false), >>> + ? ? ? ? ? ? ? ? ? ? ?cl::desc("Disable instruction scheduling")); >>> + >>> ?ScheduleDAGSDNodes::ScheduleDAGSDNodes(MachineFunction &mf) >>> ? : ScheduleDAG(mf) { >>> ?} >>> >>> Modified: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp >>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp?rev=91392&r1=91391&r2=91392&view=diff >>> >>> ============================================================================== >>> --- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp (original) >>> +++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp Mon Dec 14 19:54:51 2009 >>> @@ -48,6 +48,8 @@ >>> ?#include >>> ?using namespace llvm; >>> >>> +extern cl::opt DisableInstScheduling; >>> + >>> ?/// makeVTList - Return an instance of the SDVTList struct initialized with the >>> ?/// specified members. >>> ?static SDVTList makeVTList(const EVT *VTs, unsigned NumVTs) { >>> @@ -552,6 +554,9 @@ >>> ? ? } >>> >>> ? ? DeallocateNode(N); >>> + >>> + ? ?// Remove the ordering of this node. >>> + ? ?if (Ordering) Ordering->remove(N); >>> ? } >>> ?} >>> >>> @@ -577,6 +582,9 @@ >>> ? N->DropOperands(); >>> >>> ? DeallocateNode(N); >>> + >>> + ?// Remove the ordering of this node. >>> + ?if (Ordering) Ordering->remove(N); >>> ?} >>> >>> ?void SelectionDAG::DeallocateNode(SDNode *N) { >>> @@ -588,6 +596,9 @@ >>> ? N->NodeType = ISD::DELETED_NODE; >>> >>> ? NodeAllocator.Deallocate(AllNodes.remove(N)); >>> + >>> + ?// Remove the ordering of this node. >>> + ?if (Ordering) Ordering->remove(N); >>> ?} >>> >>> ?/// RemoveNodeFromCSEMaps - Take the specified node out of the CSE map that >>> @@ -691,7 +702,9 @@ >>> ? FoldingSetNodeID ID; >>> ? AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops, 1); >>> ? AddNodeIDCustom(ID, N); >>> - ?return CSEMap.FindNodeOrInsertPos(ID, InsertPos); >>> + ?SDNode *Node = CSEMap.FindNodeOrInsertPos(ID, InsertPos); >>> + ?if (Ordering) Ordering->remove(Node); >>> + ?return Node; >>> ?} >>> >>> ?/// FindModifiedNodeSlot - Find a slot for the specified node if its operands >>> @@ -708,7 +721,9 @@ >>> ? FoldingSetNodeID ID; >>> ? AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops, 2); >>> ? AddNodeIDCustom(ID, N); >>> - ?return CSEMap.FindNodeOrInsertPos(ID, InsertPos); >>> + ?SDNode *Node = CSEMap.FindNodeOrInsertPos(ID, InsertPos); >>> + ?if (Ordering) Ordering->remove(Node); >>> + ?return Node; >>> ?} >>> >>> >>> @@ -725,7 +740,9 @@ >>> ? FoldingSetNodeID ID; >>> ? AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops, NumOps); >>> ? AddNodeIDCustom(ID, N); >>> - ?return CSEMap.FindNodeOrInsertPos(ID, InsertPos); >>> + ?SDNode *Node = CSEMap.FindNodeOrInsertPos(ID, InsertPos); >>> + ?if (Ordering) Ordering->remove(Node); >>> + ?return Node; >>> ?} >>> >>> ?/// VerifyNode - Sanity check the given node. ?Aborts if it is invalid. >>> @@ -778,8 +795,13 @@ >>> ?SelectionDAG::SelectionDAG(TargetLowering &tli, FunctionLoweringInfo &fli) >>> ? : TLI(tli), FLI(fli), DW(0), >>> ? ? EntryNode(ISD::EntryToken, DebugLoc::getUnknownLoc(), >>> - ? ?getVTList(MVT::Other)), Root(getEntryNode()) { >>> + ? ? ? ? ? ? ?getVTList(MVT::Other)), >>> + ? ?Root(getEntryNode()), Ordering(0) { >>> ? AllNodes.push_back(&EntryNode); >>> + ?if (DisableInstScheduling) { >>> + ? ?Ordering = new NodeOrdering(); >>> + ? ?Ordering->add(&EntryNode); >>> + ?} >>> ?} >>> >>> ?void SelectionDAG::init(MachineFunction &mf, MachineModuleInfo *mmi, >>> @@ -792,6 +814,7 @@ >>> >>> ?SelectionDAG::~SelectionDAG() { >>> ? allnodes_clear(); >>> + ?delete Ordering; >>> ?} >>> >>> ?void SelectionDAG::allnodes_clear() { >>> @@ -817,6 +840,10 @@ >>> ? EntryNode.UseList = 0; >>> ? AllNodes.push_back(&EntryNode); >>> ? Root = getEntryNode(); >>> + ?if (DisableInstScheduling) { >>> + ? ?Ordering = new NodeOrdering(); >>> + ? ?Ordering->add(&EntryNode); >>> + ?} >>> ?} >>> >>> ?SDValue SelectionDAG::getSExtOrTrunc(SDValue Op, DebugLoc DL, EVT VT) { >>> @@ -877,14 +904,17 @@ >>> ? ID.AddPointer(&Val); >>> ? void *IP = 0; >>> ? SDNode *N = NULL; >>> - ?if ((N = CSEMap.FindNodeOrInsertPos(ID, IP))) >>> + ?if ((N = CSEMap.FindNodeOrInsertPos(ID, IP))) { >>> + ? ?if (Ordering) Ordering->add(N); >>> ? ? if (!VT.isVector()) >>> ? ? ? return SDValue(N, 0); >>> + ?} >>> ? if (!N) { >>> ? ? N = NodeAllocator.Allocate(); >>> ? ? new (N) ConstantSDNode(isT, &Val, EltVT); >>> ? ? CSEMap.InsertNode(N, IP); >>> ? ? AllNodes.push_back(N); >>> + ? ?if (Ordering) Ordering->add(N); >>> ? } >>> >>> ? SDValue Result(N, 0); >>> @@ -921,14 +951,17 @@ >>> ? ID.AddPointer(&V); >>> ? void *IP = 0; >>> ? SDNode *N = NULL; >>> - ?if ((N = CSEMap.FindNodeOrInsertPos(ID, IP))) >>> + ?if ((N = CSEMap.FindNodeOrInsertPos(ID, IP))) { >>> + ? ?if (Ordering) Ordering->add(N); >>> ? ? if (!VT.isVector()) >>> ? ? ? return SDValue(N, 0); >>> + ?} >>> ? if (!N) { >>> ? ? N = NodeAllocator.Allocate(); >>> ? ? new (N) ConstantFPSDNode(isTarget, &V, EltVT); >>> ? ? CSEMap.InsertNode(N, IP); >>> ? ? AllNodes.push_back(N); >>> + ? ?if (Ordering) Ordering->add(N); >>> ? } >>> >>> ? SDValue Result(N, 0); >>> @@ -983,12 +1016,15 @@ >>> ? ID.AddInteger(Offset); >>> ? ID.AddInteger(TargetFlags); >>> ? void *IP = 0; >>> - ?if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) >>> + ?if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { >>> + ? ?if (Ordering) Ordering->add(E); >>> ? ? return SDValue(E, 0); >>> + ?} >>> ? SDNode *N = NodeAllocator.Allocate(); >>> ? new (N) GlobalAddressSDNode(Opc, GV, VT, Offset, TargetFlags); >>> ? CSEMap.InsertNode(N, IP); >>> ? AllNodes.push_back(N); >>> + ?if (Ordering) Ordering->add(N); >>> ? return SDValue(N, 0); >>> ?} >>> >>> @@ -998,12 +1034,15 @@ >>> ? AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0); >>> ? ID.AddInteger(FI); >>> ? void *IP = 0; >>> - ?if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) >>> + ?if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { >>> + ? ?if (Ordering) Ordering->add(E); >>> ? ? return SDValue(E, 0); >>> + ?} >>> ? SDNode *N = NodeAllocator.Allocate(); >>> ? new (N) FrameIndexSDNode(FI, VT, isTarget); >>> ? CSEMap.InsertNode(N, IP); >>> ? AllNodes.push_back(N); >>> + ?if (Ordering) Ordering->add(N); >>> ? return SDValue(N, 0); >>> ?} >>> >>> @@ -1017,12 +1056,15 @@ >>> ? ID.AddInteger(JTI); >>> ? ID.AddInteger(TargetFlags); >>> ? void *IP = 0; >>> - ?if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) >>> + ?if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { >>> + ? ?if (Ordering) Ordering->add(E); >>> ? ? return SDValue(E, 0); >>> + ?} >>> ? SDNode *N = NodeAllocator.Allocate(); >>> ? new (N) JumpTableSDNode(JTI, VT, isTarget, TargetFlags); >>> ? CSEMap.InsertNode(N, IP); >>> ? AllNodes.push_back(N); >>> + ?if (Ordering) Ordering->add(N); >>> ? return SDValue(N, 0); >>> ?} >>> >>> @@ -1042,12 +1084,15 @@ >>> ? ID.AddPointer(C); >>> ? ID.AddInteger(TargetFlags); >>> ? void *IP = 0; >>> - ?if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) >>> + ?if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { >>> + ? ?if (Ordering) Ordering->add(E); >>> ? ? return SDValue(E, 0); >>> + ?} >>> ? SDNode *N = NodeAllocator.Allocate(); >>> ? new (N) ConstantPoolSDNode(isTarget, C, VT, Offset, Alignment, TargetFlags); >>> ? CSEMap.InsertNode(N, IP); >>> ? AllNodes.push_back(N); >>> + ?if (Ordering) Ordering->add(N); >>> ? return SDValue(N, 0); >>> ?} >>> >>> @@ -1068,12 +1113,15 @@ >>> ? C->AddSelectionDAGCSEId(ID); >>> ? ID.AddInteger(TargetFlags); >>> ? void *IP = 0; >>> - ?if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) >>> + ?if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { >>> + ? ?if (Ordering) Ordering->add(E); >>> ? ? return SDValue(E, 0); >>> + ?} >>> ? SDNode *N = NodeAllocator.Allocate(); >>> ? new (N) ConstantPoolSDNode(isTarget, C, VT, Offset, Alignment, TargetFlags); >>> ? CSEMap.InsertNode(N, IP); >>> ? AllNodes.push_back(N); >>> + ?if (Ordering) Ordering->add(N); >>> ? return SDValue(N, 0); >>> ?} >>> >>> @@ -1082,12 +1130,15 @@ >>> ? AddNodeIDNode(ID, ISD::BasicBlock, getVTList(MVT::Other), 0, 0); >>> ? ID.AddPointer(MBB); >>> ? void *IP = 0; >>> - ?if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) >>> + ?if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { >>> + ? ?if (Ordering) Ordering->add(E); >>> ? ? return SDValue(E, 0); >>> + ?} >>> ? SDNode *N = NodeAllocator.Allocate(); >>> ? new (N) BasicBlockSDNode(MBB); >>> ? CSEMap.InsertNode(N, IP); >>> ? AllNodes.push_back(N); >>> + ?if (Ordering) Ordering->add(N); >>> ? return SDValue(N, 0); >>> ?} >>> >>> @@ -1103,6 +1154,7 @@ >>> ? N = NodeAllocator.Allocate(); >>> ? new (N) VTSDNode(VT); >>> ? AllNodes.push_back(N); >>> + ?if (Ordering) Ordering->add(N); >>> ? return SDValue(N, 0); >>> ?} >>> >>> @@ -1112,6 +1164,7 @@ >>> ? N = NodeAllocator.Allocate(); >>> ? new (N) ExternalSymbolSDNode(false, Sym, 0, VT); >>> ? AllNodes.push_back(N); >>> + ?if (Ordering) Ordering->add(N); >>> ? return SDValue(N, 0); >>> ?} >>> >>> @@ -1124,6 +1177,7 @@ >>> ? N = NodeAllocator.Allocate(); >>> ? new (N) ExternalSymbolSDNode(true, Sym, TargetFlags, VT); >>> ? AllNodes.push_back(N); >>> + ?if (Ordering) Ordering->add(N); >>> ? return SDValue(N, 0); >>> ?} >>> >>> @@ -1136,6 +1190,7 @@ >>> ? ? new (N) CondCodeSDNode(Cond); >>> ? ? CondCodeNodes[Cond] = N; >>> ? ? AllNodes.push_back(N); >>> + ? ?if (Ordering) Ordering->add(N); >>> ? } >>> ? return SDValue(CondCodeNodes[Cond], 0); >>> ?} >>> @@ -1228,8 +1283,10 @@ >>> ? ? ID.AddInteger(MaskVec[i]); >>> >>> ? void* IP = 0; >>> - ?if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) >>> + ?if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { >>> + ? ?if (Ordering) Ordering->add(E); >>> ? ? return SDValue(E, 0); >>> + ?} >>> >>> ? // Allocate the mask array for the node out of the BumpPtrAllocator, since >>> ? // SDNode doesn't have access to it. ?This memory will be "leaked" when >>> @@ -1241,6 +1298,7 @@ >>> ? new (N) ShuffleVectorSDNode(VT, dl, N1, N2, MaskAlloc); >>> ? CSEMap.InsertNode(N, IP); >>> ? AllNodes.push_back(N); >>> + ?if (Ordering) Ordering->add(N); >>> ? return SDValue(N, 0); >>> ?} >>> >>> @@ -1258,12 +1316,15 @@ >>> ? SDValue Ops[] = { Val, DTy, STy, Rnd, Sat }; >>> ? AddNodeIDNode(ID, ISD::CONVERT_RNDSAT, getVTList(VT), &Ops[0], 5); >>> ? void* IP = 0; >>> - ?if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) >>> + ?if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { >>> + ? ?if (Ordering) Ordering->add(E); >>> ? ? return SDValue(E, 0); >>> + ?} >>> ? CvtRndSatSDNode *N = NodeAllocator.Allocate(); >>> ? new (N) CvtRndSatSDNode(VT, dl, Ops, 5, Code); >>> ? CSEMap.InsertNode(N, IP); >>> ? AllNodes.push_back(N); >>> + ?if (Ordering) Ordering->add(N); >>> ? return SDValue(N, 0); >>> ?} >>> >>> @@ -1272,12 +1333,15 @@ >>> ? AddNodeIDNode(ID, ISD::Register, getVTList(VT), 0, 0); >>> ? ID.AddInteger(RegNo); >>> ? void *IP = 0; >>> - ?if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) >>> + ?if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { >>> + ? ?if (Ordering) Ordering->add(E); >>> ? ? return SDValue(E, 0); >>> + ?} >>> ? SDNode *N = NodeAllocator.Allocate(); >>> ? new (N) RegisterSDNode(RegNo, VT); >>> ? CSEMap.InsertNode(N, IP); >>> ? AllNodes.push_back(N); >>> + ?if (Ordering) Ordering->add(N); >>> ? return SDValue(N, 0); >>> ?} >>> >>> @@ -1289,12 +1353,15 @@ >>> ? AddNodeIDNode(ID, Opcode, getVTList(MVT::Other), &Ops[0], 1); >>> ? ID.AddInteger(LabelID); >>> ? void *IP = 0; >>> - ?if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) >>> + ?if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { >>> + ? ?if (Ordering) Ordering->add(E); >>> ? ? return SDValue(E, 0); >>> + ?} >>> ? SDNode *N = NodeAllocator.Allocate(); >>> ? new (N) LabelSDNode(Opcode, dl, Root, LabelID); >>> ? CSEMap.InsertNode(N, IP); >>> ? AllNodes.push_back(N); >>> + ?if (Ordering) Ordering->add(N); >>> ? return SDValue(N, 0); >>> ?} >>> >>> @@ -1308,12 +1375,15 @@ >>> ? ID.AddPointer(BA); >>> ? ID.AddInteger(TargetFlags); >>> ? void *IP = 0; >>> - ?if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) >>> + ?if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { >>> + ? ?if (Ordering) Ordering->add(E); >>> ? ? return SDValue(E, 0); >>> + ?} >>> ? SDNode *N = NodeAllocator.Allocate(); >>> ? new (N) BlockAddressSDNode(Opc, VT, BA, TargetFlags); >>> ? CSEMap.InsertNode(N, IP); >>> ? AllNodes.push_back(N); >>> + ?if (Ordering) Ordering->add(N); >>> ? return SDValue(N, 0); >>> ?} >>> >>> @@ -1326,13 +1396,16 @@ >>> ? ID.AddPointer(V); >>> >>> ? void *IP = 0; >>> - ?if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) >>> + ?if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { >>> + ? ?if (Ordering) Ordering->add(E); >>> ? ? return SDValue(E, 0); >>> + ?} >>> >>> ? SDNode *N = NodeAllocator.Allocate(); >>> ? new (N) SrcValueSDNode(V); >>> ? CSEMap.InsertNode(N, IP); >>> ? AllNodes.push_back(N); >>> + ?if (Ordering) Ordering->add(N); >>> ? return SDValue(N, 0); >>> ?} >>> >>> @@ -2243,13 +2316,16 @@ >>> ? FoldingSetNodeID ID; >>> ? AddNodeIDNode(ID, Opcode, getVTList(VT), 0, 0); >>> ? void *IP = 0; >>> - ?if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) >>> + ?if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { >>> + ? ?if (Ordering) Ordering->add(E); >>> ? ? return SDValue(E, 0); >>> + ?} >>> ? SDNode *N = NodeAllocator.Allocate(); >>> ? new (N) SDNode(Opcode, DL, getVTList(VT)); >>> ? CSEMap.InsertNode(N, IP); >>> >>> ? AllNodes.push_back(N); >>> + ?if (Ordering) Ordering->add(N); >>> ?#ifndef NDEBUG >>> ? VerifyNode(N); >>> ?#endif >>> @@ -2473,8 +2549,10 @@ >>> ? ? SDValue Ops[1] = { Operand }; >>> ? ? AddNodeIDNode(ID, Opcode, VTs, Ops, 1); >>> ? ? void *IP = 0; >>> - ? ?if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) >>> + ? ?if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { >>> + ? ? ?if (Ordering) Ordering->add(E); >>> ? ? ? return SDValue(E, 0); >>> + ? ?} >>> ? ? N = NodeAllocator.Allocate(); >>> ? ? new (N) UnarySDNode(Opcode, DL, VTs, Operand); >>> ? ? CSEMap.InsertNode(N, IP); >>> @@ -2484,6 +2562,7 @@ >>> ? } >>> >>> ? AllNodes.push_back(N); >>> + ?if (Ordering) Ordering->add(N); >>> ?#ifndef NDEBUG >>> ? VerifyNode(N); >>> ?#endif >>> @@ -2891,8 +2970,10 @@ >>> ? ? FoldingSetNodeID ID; >>> ? ? AddNodeIDNode(ID, Opcode, VTs, Ops, 2); >>> ? ? void *IP = 0; >>> - ? ?if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) >>> + ? ?if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { >>> + ? ? ?if (Ordering) Ordering->add(E); >>> ? ? ? return SDValue(E, 0); >>> + ? ?} >>> ? ? N = NodeAllocator.Allocate(); >>> ? ? new (N) BinarySDNode(Opcode, DL, VTs, N1, N2); >>> ? ? CSEMap.InsertNode(N, IP); >>> @@ -2902,6 +2983,7 @@ >>> ? } >>> >>> ? AllNodes.push_back(N); >>> + ?if (Ordering) Ordering->add(N); >>> ?#ifndef NDEBUG >>> ? VerifyNode(N); >>> ?#endif >>> @@ -2968,8 +3050,10 @@ >>> ? ? FoldingSetNodeID ID; >>> ? ? AddNodeIDNode(ID, Opcode, VTs, Ops, 3); >>> ? ? void *IP = 0; >>> - ? ?if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) >>> + ? ?if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { >>> + ? ? ?if (Ordering) Ordering->add(E); >>> ? ? ? return SDValue(E, 0); >>> + ? ?} >>> ? ? N = NodeAllocator.Allocate(); >>> ? ? new (N) TernarySDNode(Opcode, DL, VTs, N1, N2, N3); >>> ? ? CSEMap.InsertNode(N, IP); >>> @@ -2977,7 +3061,9 @@ >>> ? ? N = NodeAllocator.Allocate(); >>> ? ? new (N) TernarySDNode(Opcode, DL, VTs, N1, N2, N3); >>> ? } >>> + >>> ? AllNodes.push_back(N); >>> + ?if (Ordering) Ordering->add(N); >>> ?#ifndef NDEBUG >>> ? VerifyNode(N); >>> ?#endif >>> @@ -3573,12 +3659,14 @@ >>> ? void* IP = 0; >>> ? if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { >>> ? ? cast(E)->refineAlignment(MMO); >>> + ? ?if (Ordering) Ordering->add(E); >>> ? ? return SDValue(E, 0); >>> ? } >>> ? SDNode* N = NodeAllocator.Allocate(); >>> ? new (N) AtomicSDNode(Opcode, dl, VTs, MemVT, Chain, Ptr, Cmp, Swp, MMO); >>> ? CSEMap.InsertNode(N, IP); >>> ? AllNodes.push_back(N); >>> + ?if (Ordering) Ordering->add(N); >>> ? return SDValue(N, 0); >>> ?} >>> >>> @@ -3636,12 +3724,14 @@ >>> ? void* IP = 0; >>> ? if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { >>> ? ? cast(E)->refineAlignment(MMO); >>> + ? ?if (Ordering) Ordering->add(E); >>> ? ? return SDValue(E, 0); >>> ? } >>> ? SDNode* N = NodeAllocator.Allocate(); >>> ? new (N) AtomicSDNode(Opcode, dl, VTs, MemVT, Chain, Ptr, Val, MMO); >>> ? CSEMap.InsertNode(N, IP); >>> ? AllNodes.push_back(N); >>> + ?if (Ordering) Ordering->add(N); >>> ? return SDValue(N, 0); >>> ?} >>> >>> @@ -3714,6 +3804,7 @@ >>> ? ? void *IP = 0; >>> ? ? if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { >>> ? ? ? cast(E)->refineAlignment(MMO); >>> + ? ? ?if (Ordering) Ordering->add(E); >>> ? ? ? return SDValue(E, 0); >>> ? ? } >>> >>> @@ -3725,6 +3816,7 @@ >>> ? ? new (N) MemIntrinsicSDNode(Opcode, dl, VTList, Ops, NumOps, MemVT, MMO); >>> ? } >>> ? AllNodes.push_back(N); >>> + ?if (Ordering) Ordering->add(N); >>> ? return SDValue(N, 0); >>> ?} >>> >>> @@ -3789,12 +3881,14 @@ >>> ? void *IP = 0; >>> ? if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { >>> ? ? cast(E)->refineAlignment(MMO); >>> + ? ?if (Ordering) Ordering->add(E); >>> ? ? return SDValue(E, 0); >>> ? } >>> ? SDNode *N = NodeAllocator.Allocate(); >>> ? new (N) LoadSDNode(Ops, dl, VTs, AM, ExtType, MemVT, MMO); >>> ? CSEMap.InsertNode(N, IP); >>> ? AllNodes.push_back(N); >>> + ?if (Ordering) Ordering->add(N); >>> ? return SDValue(N, 0); >>> ?} >>> >>> @@ -3865,12 +3959,14 @@ >>> ? void *IP = 0; >>> ? if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { >>> ? ? cast(E)->refineAlignment(MMO); >>> + ? ?if (Ordering) Ordering->add(E); >>> ? ? return SDValue(E, 0); >>> ? } >>> ? SDNode *N = NodeAllocator.Allocate(); >>> ? new (N) StoreSDNode(Ops, dl, VTs, ISD::UNINDEXED, false, VT, MMO); >>> ? CSEMap.InsertNode(N, IP); >>> ? AllNodes.push_back(N); >>> + ?if (Ordering) Ordering->add(N); >>> ? return SDValue(N, 0); >>> ?} >>> >>> @@ -3925,12 +4021,14 @@ >>> ? void *IP = 0; >>> ? if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { >>> ? ? cast(E)->refineAlignment(MMO); >>> + ? ?if (Ordering) Ordering->add(E); >>> ? ? return SDValue(E, 0); >>> ? } >>> ? SDNode *N = NodeAllocator.Allocate(); >>> ? new (N) StoreSDNode(Ops, dl, VTs, ISD::UNINDEXED, true, SVT, MMO); >>> ? CSEMap.InsertNode(N, IP); >>> ? AllNodes.push_back(N); >>> + ?if (Ordering) Ordering->add(N); >>> ? return SDValue(N, 0); >>> ?} >>> >>> @@ -3947,14 +4045,17 @@ >>> ? ID.AddInteger(ST->getMemoryVT().getRawBits()); >>> ? ID.AddInteger(ST->getRawSubclassData()); >>> ? void *IP = 0; >>> - ?if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) >>> + ?if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { >>> + ? ?if (Ordering) Ordering->add(E); >>> ? ? return SDValue(E, 0); >>> + ?} >>> ? SDNode *N = NodeAllocator.Allocate(); >>> ? new (N) StoreSDNode(Ops, dl, VTs, AM, >>> ? ? ? ? ? ? ? ? ? ? ? ST->isTruncatingStore(), ST->getMemoryVT(), >>> ? ? ? ? ? ? ? ? ? ? ? ST->getMemOperand()); >>> ? CSEMap.InsertNode(N, IP); >>> ? AllNodes.push_back(N); >>> + ?if (Ordering) Ordering->add(N); >>> ? return SDValue(N, 0); >>> ?} >>> >>> @@ -4020,8 +4121,10 @@ >>> ? ? AddNodeIDNode(ID, Opcode, VTs, Ops, NumOps); >>> ? ? void *IP = 0; >>> >>> - ? ?if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) >>> + ? ?if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { >>> + ? ? ?if (Ordering) Ordering->add(E); >>> ? ? ? return SDValue(E, 0); >>> + ? ?} >>> >>> ? ? N = NodeAllocator.Allocate(); >>> ? ? new (N) SDNode(Opcode, DL, VTs, Ops, NumOps); >>> @@ -4032,6 +4135,7 @@ >>> ? } >>> >>> ? AllNodes.push_back(N); >>> + ?if (Ordering) Ordering->add(N); >>> ?#ifndef NDEBUG >>> ? VerifyNode(N); >>> ?#endif >>> @@ -4087,8 +4191,10 @@ >>> ? ? FoldingSetNodeID ID; >>> ? ? AddNodeIDNode(ID, Opcode, VTList, Ops, NumOps); >>> ? ? void *IP = 0; >>> - ? ?if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) >>> + ? ?if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { >>> + ? ? ?if (Ordering) Ordering->add(E); >>> ? ? ? return SDValue(E, 0); >>> + ? ?} >>> ? ? if (NumOps == 1) { >>> ? ? ? N = NodeAllocator.Allocate(); >>> ? ? ? new (N) UnarySDNode(Opcode, DL, VTList, Ops[0]); >>> @@ -4119,6 +4225,7 @@ >>> ? ? } >>> ? } >>> ? AllNodes.push_back(N); >>> + ?if (Ordering) Ordering->add(N); >>> ?#ifndef NDEBUG >>> ? VerifyNode(N); >>> ?#endif >>> @@ -4581,8 +4688,10 @@ >>> ? if (VTs.VTs[VTs.NumVTs-1] != MVT::Flag) { >>> ? ? FoldingSetNodeID ID; >>> ? ? AddNodeIDNode(ID, Opc, VTs, Ops, NumOps); >>> - ? ?if (SDNode *ON = CSEMap.FindNodeOrInsertPos(ID, IP)) >>> + ? ?if (SDNode *ON = CSEMap.FindNodeOrInsertPos(ID, IP)) { >>> + ? ? ?if (Ordering) Ordering->add(ON); >>> ? ? ? return ON; >>> + ? ?} >>> ? } >>> >>> ? if (!RemoveNodeFromCSEMaps(N)) >>> @@ -4646,6 +4755,7 @@ >>> >>> ? if (IP) >>> ? ? CSEMap.InsertNode(N, IP); ? // Memoize the new node. >>> + ?if (Ordering) Ordering->add(N); >>> ? return N; >>> ?} >>> >>> @@ -4784,8 +4894,10 @@ >>> ? ? FoldingSetNodeID ID; >>> ? ? AddNodeIDNode(ID, ~Opcode, VTs, Ops, NumOps); >>> ? ? IP = 0; >>> - ? ?if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) >>> + ? ?if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { >>> + ? ? ?if (Ordering) Ordering->add(E); >>> ? ? ? return cast(E); >>> + ? ?} >>> ? } >>> >>> ? // Allocate a new MachineSDNode. >>> @@ -4807,6 +4919,7 @@ >>> ? ? CSEMap.InsertNode(N, IP); >>> >>> ? AllNodes.push_back(N); >>> + ?if (Ordering) Ordering->add(N); >>> ?#ifndef NDEBUG >>> ? VerifyNode(N); >>> ?#endif >>> @@ -4843,8 +4956,10 @@ >>> ? ? FoldingSetNodeID ID; >>> ? ? AddNodeIDNode(ID, Opcode, VTList, Ops, NumOps); >>> ? ? void *IP = 0; >>> - ? ?if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) >>> + ? ?if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { >>> + ? ? ?if (Ordering) Ordering->add(E); >>> ? ? ? return E; >>> + ? ?} >>> ? } >>> ? return NULL; >>> ?} >>> @@ -6011,6 +6126,9 @@ >>> ? errs() << "\n\n"; >>> ?} >>> >>> +void SelectionDAG::NodeOrdering::dump() const { >>> +} >>> + >>> ?void SDNode::printr(raw_ostream &OS, const SelectionDAG *G) const { >>> ? print_types(OS, G); >>> ? print_details(OS, G); >>> @@ -6151,4 +6269,3 @@ >>> ? ? ? return false; >>> ? return true; >>> ?} >>> - >>> >>> Modified: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp >>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp?rev=91392&r1=91391&r2=91392&view=diff >>> >>> ============================================================================== >>> --- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp (original) >>> +++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp Mon Dec 14 19:54:51 2009 >>> @@ -583,6 +583,9 @@ >>> ?} >>> >>> ?void SelectionDAGBuilder::visit(unsigned Opcode, User &I) { >>> + ?// Tell the DAG that we're processing a new instruction. >>> + ?DAG.NewInst(); >>> + >>> ? // Note: this doesn't use InstVisitor, because it has to work with >>> ? // ConstantExpr's in addition to instructions. >>> ? switch (Opcode) { >>> >>> >>> _______________________________________________ >>> llvm-commits mailing list >>> llvm-commits at cs.uiuc.edu >>> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits >>> >> >> _______________________________________________ >> llvm-commits mailing list >> llvm-commits at cs.uiuc.edu >> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits > > From daniel at zuster.org Wed Dec 16 14:03:52 2009 From: daniel at zuster.org (Daniel Dunbar) Date: Wed, 16 Dec 2009 12:03:52 -0800 Subject: [llvm-commits] [llvm] r91459 - in /llvm/trunk: lib/Transforms/Scalar/ScalarReplAggregates.cpp test/Transforms/ScalarRepl/2009-12-11-NeonTypes.ll In-Reply-To: <0804613A-3474-48FE-99E4-A06AC16D629F@apple.com> References: <200912152200.nBFM0qqN023245@zion.cs.uiuc.edu> <6a8523d60912160258p2d1b3458qcf76cabb26a1a5d0@mail.gmail.com> <0804613A-3474-48FE-99E4-A06AC16D629F@apple.com> Message-ID: <6a8523d60912161203r499a267atab9b42cf13ffba6c@mail.gmail.com> On Wed, Dec 16, 2009 at 9:17 AM, Evan Cheng wrote: > I thought the problem started prior to 91459? I presume the patch was just exposing the bug. At the time I reverted taking out this and Bill's patch restored bootstrap, but then subsequently more patches went in to TOT and it didn't help. I will reapply this and Bill's patch. I want a close-the-tree button. - Daniel > Evan > > On Dec 16, 2009, at 2:58 AM, Daniel Dunbar wrote: > >> Hi Bob, >> >> I reverted this, it was one of the patches responsible for the failing >> x86_64-apple-darwin10 bootstrap in my testing. >> >> - Daniel >> >> On Tue, Dec 15, 2009 at 2:00 PM, Bob Wilson wrote: >>> Author: bwilson >>> Date: Tue Dec 15 16:00:51 2009 >>> New Revision: 91459 >>> >>> URL: http://llvm.org/viewvc/llvm-project?rev=91459&view=rev >>> Log: >>> Reapply 91184 with fixes and an addition to the testcase to cover the problem >>> found last time. ?Instead of trying to modify the IR while iterating over it, >>> I've change it to keep a list of WeakVH references to dead instructions, and >>> then delete those instructions later. ?I also added some special case code to >>> detect and handle the situation when both operands of a memcpy intrinsic are >>> referencing the same alloca. >>> >>> Added: >>> ? ?llvm/trunk/test/Transforms/ScalarRepl/2009-12-11-NeonTypes.ll >>> Modified: >>> ? ?llvm/trunk/lib/Transforms/Scalar/ScalarReplAggregates.cpp >>> >>> Modified: llvm/trunk/lib/Transforms/Scalar/ScalarReplAggregates.cpp >>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/ScalarReplAggregates.cpp?rev=91459&r1=91458&r2=91459&view=diff >>> >>> ============================================================================== >>> --- llvm/trunk/lib/Transforms/Scalar/ScalarReplAggregates.cpp (original) >>> +++ llvm/trunk/lib/Transforms/Scalar/ScalarReplAggregates.cpp Tue Dec 15 16:00:51 2009 >>> @@ -74,6 +74,10 @@ >>> ? private: >>> ? ? TargetData *TD; >>> >>> + ? ?/// DeadInsts - Keep track of instructions we have made dead, so that >>> + ? ?/// we can remove them after we are done working. >>> + ? ?SmallVector DeadInsts; >>> + >>> ? ? /// AllocaInfo - When analyzing uses of an alloca instruction, this captures >>> ? ? /// information about the uses. ?All these fields are initialized to false >>> ? ? /// and set to true when something is learned. >>> @@ -102,25 +106,30 @@ >>> >>> ? ? int isSafeAllocaToScalarRepl(AllocaInst *AI); >>> >>> - ? ?void isSafeUseOfAllocation(Instruction *User, AllocaInst *AI, >>> - ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? AllocaInfo &Info); >>> - ? ?void isSafeElementUse(Value *Ptr, bool isFirstElt, AllocaInst *AI, >>> - ? ? ? ? ? ? ? ? ? ? ? ? ?AllocaInfo &Info); >>> - ? ?void isSafeMemIntrinsicOnAllocation(MemIntrinsic *MI, AllocaInst *AI, >>> - ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?unsigned OpNo, AllocaInfo &Info); >>> - ? ?void isSafeUseOfBitCastedAllocation(BitCastInst *User, AllocaInst *AI, >>> - ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?AllocaInfo &Info); >>> + ? ?void isSafeForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset, >>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? uint64_t ArrayOffset, AllocaInfo &Info); >>> + ? ?void isSafeGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t &Offset, >>> + ? ? ? ? ? ? ? ? ? uint64_t &ArrayOffset, AllocaInfo &Info); >>> + ? ?void isSafeMemAccess(AllocaInst *AI, uint64_t Offset, uint64_t ArrayOffset, >>> + ? ? ? ? ? ? ? ? ? ? ? ? uint64_t MemSize, const Type *MemOpType, bool isStore, >>> + ? ? ? ? ? ? ? ? ? ? ? ? AllocaInfo &Info); >>> + ? ?bool TypeHasComponent(const Type *T, uint64_t Offset, uint64_t Size); >>> + ? ?unsigned FindElementAndOffset(const Type *&T, uint64_t &Offset); >>> >>> ? ? void DoScalarReplacement(AllocaInst *AI, >>> ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?std::vector &WorkList); >>> + ? ?void DeleteDeadInstructions(); >>> ? ? void CleanupGEP(GetElementPtrInst *GEP); >>> - ? ?void CleanupAllocaUsers(AllocaInst *AI); >>> + ? ?void CleanupAllocaUsers(Value *V); >>> ? ? AllocaInst *AddNewAlloca(Function &F, const Type *Ty, AllocaInst *Base); >>> >>> - ? ?void RewriteBitCastUserOfAlloca(Instruction *BCInst, AllocaInst *AI, >>> - ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?SmallVector &NewElts); >>> - >>> - ? ?void RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *BCInst, >>> + ? ?void RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset, >>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?SmallVector &NewElts); >>> + ? ?void RewriteBitCast(BitCastInst *BC, AllocaInst *AI, uint64_t Offset, >>> + ? ? ? ? ? ? ? ? ? ? ? ?SmallVector &NewElts); >>> + ? ?void RewriteGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t Offset, >>> + ? ? ? ? ? ? ? ? ? ?SmallVector &NewElts); >>> + ? ?void RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst, >>> ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? AllocaInst *AI, >>> ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? SmallVector &NewElts); >>> ? ? void RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI, >>> @@ -360,176 +369,37 @@ >>> ? ? } >>> ? } >>> >>> - ?// Now that we have created the alloca instructions that we want to use, >>> - ?// expand the getelementptr instructions to use them. >>> - ?while (!AI->use_empty()) { >>> - ? ?Instruction *User = cast(AI->use_back()); >>> - ? ?if (BitCastInst *BCInst = dyn_cast(User)) { >>> - ? ? ?RewriteBitCastUserOfAlloca(BCInst, AI, ElementAllocas); >>> - ? ? ?BCInst->eraseFromParent(); >>> - ? ? ?continue; >>> - ? ?} >>> - >>> - ? ?// Replace: >>> - ? ?// ? %res = load { i32, i32 }* %alloc >>> - ? ?// with: >>> - ? ?// ? %load.0 = load i32* %alloc.0 >>> - ? ?// ? %insert.0 insertvalue { i32, i32 } zeroinitializer, i32 %load.0, 0 >>> - ? ?// ? %load.1 = load i32* %alloc.1 >>> - ? ?// ? %insert = insertvalue { i32, i32 } %insert.0, i32 %load.1, 1 >>> - ? ?// (Also works for arrays instead of structs) >>> - ? ?if (LoadInst *LI = dyn_cast(User)) { >>> - ? ? ?Value *Insert = UndefValue::get(LI->getType()); >>> - ? ? ?for (unsigned i = 0, e = ElementAllocas.size(); i != e; ++i) { >>> - ? ? ? ?Value *Load = new LoadInst(ElementAllocas[i], "load", LI); >>> - ? ? ? ?Insert = InsertValueInst::Create(Insert, Load, i, "insert", LI); >>> - ? ? ?} >>> - ? ? ?LI->replaceAllUsesWith(Insert); >>> - ? ? ?LI->eraseFromParent(); >>> - ? ? ?continue; >>> - ? ?} >>> + ?// Now that we have created the new alloca instructions, rewrite all the >>> + ?// uses of the old alloca. >>> + ?DeadInsts.push_back(AI); >>> + ?RewriteForScalarRepl(AI, AI, 0, ElementAllocas); >>> >>> - ? ?// Replace: >>> - ? ?// ? store { i32, i32 } %val, { i32, i32 }* %alloc >>> - ? ?// with: >>> - ? ?// ? %val.0 = extractvalue { i32, i32 } %val, 0 >>> - ? ?// ? store i32 %val.0, i32* %alloc.0 >>> - ? ?// ? %val.1 = extractvalue { i32, i32 } %val, 1 >>> - ? ?// ? store i32 %val.1, i32* %alloc.1 >>> - ? ?// (Also works for arrays instead of structs) >>> - ? ?if (StoreInst *SI = dyn_cast(User)) { >>> - ? ? ?Value *Val = SI->getOperand(0); >>> - ? ? ?for (unsigned i = 0, e = ElementAllocas.size(); i != e; ++i) { >>> - ? ? ? ?Value *Extract = ExtractValueInst::Create(Val, i, Val->getName(), SI); >>> - ? ? ? ?new StoreInst(Extract, ElementAllocas[i], SI); >>> - ? ? ?} >>> - ? ? ?SI->eraseFromParent(); >>> - ? ? ?continue; >>> - ? ?} >>> - >>> - ? ?GetElementPtrInst *GEPI = cast(User); >>> - ? ?// We now know that the GEP is of the form: GEP , 0, >>> - ? ?unsigned Idx = >>> - ? ? ? (unsigned)cast(GEPI->getOperand(2))->getZExtValue(); >>> - >>> - ? ?assert(Idx < ElementAllocas.size() && "Index out of range?"); >>> - ? ?AllocaInst *AllocaToUse = ElementAllocas[Idx]; >>> - >>> - ? ?Value *RepValue; >>> - ? ?if (GEPI->getNumOperands() == 3) { >>> - ? ? ?// Do not insert a new getelementptr instruction with zero indices, only >>> - ? ? ?// to have it optimized out later. >>> - ? ? ?RepValue = AllocaToUse; >>> - ? ?} else { >>> - ? ? ?// We are indexing deeply into the structure, so we still need a >>> - ? ? ?// getelement ptr instruction to finish the indexing. ?This may be >>> - ? ? ?// expanded itself once the worklist is rerun. >>> - ? ? ?// >>> - ? ? ?SmallVector NewArgs; >>> - ? ? ?NewArgs.push_back(Constant::getNullValue( >>> - ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? Type::getInt32Ty(AI->getContext()))); >>> - ? ? ?NewArgs.append(GEPI->op_begin()+3, GEPI->op_end()); >>> - ? ? ?RepValue = GetElementPtrInst::Create(AllocaToUse, NewArgs.begin(), >>> - ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? NewArgs.end(), "", GEPI); >>> - ? ? ?RepValue->takeName(GEPI); >>> - ? ?} >>> - >>> - ? ?// If this GEP is to the start of the aggregate, check for memcpys. >>> - ? ?if (Idx == 0 && GEPI->hasAllZeroIndices()) >>> - ? ? ?RewriteBitCastUserOfAlloca(GEPI, AI, ElementAllocas); >>> - >>> - ? ?// Move all of the users over to the new GEP. >>> - ? ?GEPI->replaceAllUsesWith(RepValue); >>> - ? ?// Delete the old GEP >>> - ? ?GEPI->eraseFromParent(); >>> - ?} >>> + ?// Now erase any instructions that were made dead while rewriting the alloca. >>> + ?DeleteDeadInstructions(); >>> >>> - ?// Finally, delete the Alloca instruction >>> - ?AI->eraseFromParent(); >>> ? NumReplaced++; >>> ?} >>> >>> -/// isSafeElementUse - Check to see if this use is an allowed use for a >>> -/// getelementptr instruction of an array aggregate allocation. ?isFirstElt >>> -/// indicates whether Ptr is known to the start of the aggregate. >>> -void SROA::isSafeElementUse(Value *Ptr, bool isFirstElt, AllocaInst *AI, >>> - ? ? ? ? ? ? ? ? ? ? ? ? ? ?AllocaInfo &Info) { >>> - ?for (Value::use_iterator I = Ptr->use_begin(), E = Ptr->use_end(); >>> - ? ? ? I != E; ++I) { >>> - ? ?Instruction *User = cast(*I); >>> - ? ?switch (User->getOpcode()) { >>> - ? ?case Instruction::Load: ?break; >>> - ? ?case Instruction::Store: >>> - ? ? ?// Store is ok if storing INTO the pointer, not storing the pointer >>> - ? ? ?if (User->getOperand(0) == Ptr) return MarkUnsafe(Info); >>> - ? ? ?break; >>> - ? ?case Instruction::GetElementPtr: { >>> - ? ? ?GetElementPtrInst *GEP = cast(User); >>> - ? ? ?bool AreAllZeroIndices = isFirstElt; >>> - ? ? ?if (GEP->getNumOperands() > 1 && >>> - ? ? ? ? ?(!isa(GEP->getOperand(1)) || >>> - ? ? ? ? ? !cast(GEP->getOperand(1))->isZero())) >>> - ? ? ? ?// Using pointer arithmetic to navigate the array. >>> - ? ? ? ?return MarkUnsafe(Info); >>> - >>> - ? ? ?// Verify that any array subscripts are in range. >>> - ? ? ?for (gep_type_iterator GEPIt = gep_type_begin(GEP), >>> - ? ? ? ? ? E = gep_type_end(GEP); GEPIt != E; ++GEPIt) { >>> - ? ? ? ?// Ignore struct elements, no extra checking needed for these. >>> - ? ? ? ?if (isa(*GEPIt)) >>> - ? ? ? ? ?continue; >>> - >>> - ? ? ? ?// This GEP indexes an array. ?Verify that this is an in-range >>> - ? ? ? ?// constant integer. Specifically, consider A[0][i]. We cannot know that >>> - ? ? ? ?// the user isn't doing invalid things like allowing i to index an >>> - ? ? ? ?// out-of-range subscript that accesses A[1]. ?Because of this, we have >>> - ? ? ? ?// to reject SROA of any accesses into structs where any of the >>> - ? ? ? ?// components are variables. >>> - ? ? ? ?ConstantInt *IdxVal = dyn_cast(GEPIt.getOperand()); >>> - ? ? ? ?if (!IdxVal) return MarkUnsafe(Info); >>> - >>> - ? ? ? ?// Are all indices still zero? >>> - ? ? ? ?AreAllZeroIndices &= IdxVal->isZero(); >>> - >>> - ? ? ? ?if (const ArrayType *AT = dyn_cast(*GEPIt)) { >>> - ? ? ? ? ?if (IdxVal->getZExtValue() >= AT->getNumElements()) >>> - ? ? ? ? ? ?return MarkUnsafe(Info); >>> - ? ? ? ?} else if (const VectorType *VT = dyn_cast(*GEPIt)) { >>> - ? ? ? ? ?if (IdxVal->getZExtValue() >= VT->getNumElements()) >>> - ? ? ? ? ? ?return MarkUnsafe(Info); >>> - ? ? ? ?} >>> +/// DeleteDeadInstructions - Erase instructions on the DeadInstrs list, >>> +/// recursively including all their operands that become trivially dead. >>> +void SROA::DeleteDeadInstructions() { >>> + ?while (!DeadInsts.empty()) { >>> + ? ?Instruction *I = dyn_cast_or_null(DeadInsts.pop_back_val()); >>> + ? ?if (I == 0) >>> + ? ? ?continue; >>> + >>> + ? ?for (User::op_iterator OI = I->op_begin(), E = I->op_end(); OI != E; ++OI) >>> + ? ? ?if (Instruction *U = dyn_cast(*OI)) { >>> + ? ? ? ?// Zero out the operand and see if it becomes trivially dead. >>> + ? ? ? ?*OI = 0; >>> + ? ? ? ?if (isInstructionTriviallyDead(U)) >>> + ? ? ? ? ?DeadInsts.push_back(U); >>> ? ? ? } >>> - >>> - ? ? ?isSafeElementUse(GEP, AreAllZeroIndices, AI, Info); >>> - ? ? ?if (Info.isUnsafe) return; >>> - ? ? ?break; >>> - ? ?} >>> - ? ?case Instruction::BitCast: >>> - ? ? ?if (isFirstElt) { >>> - ? ? ? ?isSafeUseOfBitCastedAllocation(cast(User), AI, Info); >>> - ? ? ? ?if (Info.isUnsafe) return; >>> - ? ? ? ?break; >>> - ? ? ?} >>> - ? ? ?DEBUG(errs() << " ?Transformation preventing inst: " << *User << '\n'); >>> - ? ? ?return MarkUnsafe(Info); >>> - ? ?case Instruction::Call: >>> - ? ? ?if (MemIntrinsic *MI = dyn_cast(User)) { >>> - ? ? ? ?if (isFirstElt) { >>> - ? ? ? ? ?isSafeMemIntrinsicOnAllocation(MI, AI, I.getOperandNo(), Info); >>> - ? ? ? ? ?if (Info.isUnsafe) return; >>> - ? ? ? ? ?break; >>> - ? ? ? ?} >>> - ? ? ?} >>> - ? ? ?DEBUG(errs() << " ?Transformation preventing inst: " << *User << '\n'); >>> - ? ? ?return MarkUnsafe(Info); >>> - ? ?default: >>> - ? ? ?DEBUG(errs() << " ?Transformation preventing inst: " << *User << '\n'); >>> - ? ? ?return MarkUnsafe(Info); >>> - ? ?} >>> + >>> + ? ?I->eraseFromParent(); >>> ? } >>> - ?return; ?// All users look ok :) >>> ?} >>> - >>> + >>> ?/// AllUsersAreLoads - Return true if all users of this value are loads. >>> ?static bool AllUsersAreLoads(Value *Ptr) { >>> ? for (Value::use_iterator I = Ptr->use_begin(), E = Ptr->use_end(); >>> @@ -539,72 +409,116 @@ >>> ? return true; >>> ?} >>> >>> -/// isSafeUseOfAllocation - Check if this user is an allowed use for an >>> -/// aggregate allocation. >>> -void SROA::isSafeUseOfAllocation(Instruction *User, AllocaInst *AI, >>> - ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? AllocaInfo &Info) { >>> - ?if (BitCastInst *C = dyn_cast(User)) >>> - ? ?return isSafeUseOfBitCastedAllocation(C, AI, Info); >>> - >>> - ?if (LoadInst *LI = dyn_cast(User)) >>> - ? ?if (!LI->isVolatile()) >>> - ? ? ?return;// Loads (returning a first class aggregrate) are always rewritable >>> - >>> - ?if (StoreInst *SI = dyn_cast(User)) >>> - ? ?if (!SI->isVolatile() && SI->getOperand(0) != AI) >>> - ? ? ?return;// Store is ok if storing INTO the pointer, not storing the pointer >>> - >>> - ?GetElementPtrInst *GEPI = dyn_cast(User); >>> - ?if (GEPI == 0) >>> - ? ?return MarkUnsafe(Info); >>> - >>> - ?gep_type_iterator I = gep_type_begin(GEPI), E = gep_type_end(GEPI); >>> +/// isSafeForScalarRepl - Check if instruction I is a safe use with regard to >>> +/// performing scalar replacement of alloca AI. ?The results are flagged in >>> +/// the Info parameter. ?Offset and ArrayOffset indicate the position within >>> +/// AI that is referenced by this instruction. >>> +void SROA::isSafeForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset, >>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? uint64_t ArrayOffset, AllocaInfo &Info) { >>> + ?for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI!=E; ++UI) { >>> + ? ?Instruction *User = cast(*UI); >>> >>> - ?// The GEP is not safe to transform if not of the form "GEP , 0, ". >>> - ?if (I == E || >>> - ? ? ?I.getOperand() != Constant::getNullValue(I.getOperand()->getType())) { >>> - ? ?return MarkUnsafe(Info); >>> + ? ?if (BitCastInst *BC = dyn_cast(User)) { >>> + ? ? ?isSafeForScalarRepl(BC, AI, Offset, ArrayOffset, Info); >>> + ? ?} else if (GetElementPtrInst *GEPI = dyn_cast(User)) { >>> + ? ? ?uint64_t GEPArrayOffset = ArrayOffset; >>> + ? ? ?uint64_t GEPOffset = Offset; >>> + ? ? ?isSafeGEP(GEPI, AI, GEPOffset, GEPArrayOffset, Info); >>> + ? ? ?if (!Info.isUnsafe) >>> + ? ? ? ?isSafeForScalarRepl(GEPI, AI, GEPOffset, GEPArrayOffset, Info); >>> + ? ?} else if (MemIntrinsic *MI = dyn_cast(UI)) { >>> + ? ? ?ConstantInt *Length = dyn_cast(MI->getLength()); >>> + ? ? ?if (Length) >>> + ? ? ? ?isSafeMemAccess(AI, Offset, ArrayOffset, Length->getZExtValue(), 0, >>> + ? ? ? ? ? ? ? ? ? ? ? ?UI.getOperandNo() == 1, Info); >>> + ? ? ?else >>> + ? ? ? ?MarkUnsafe(Info); >>> + ? ?} else if (LoadInst *LI = dyn_cast(User)) { >>> + ? ? ?if (!LI->isVolatile()) { >>> + ? ? ? ?const Type *LIType = LI->getType(); >>> + ? ? ? ?isSafeMemAccess(AI, Offset, ArrayOffset, TD->getTypeAllocSize(LIType), >>> + ? ? ? ? ? ? ? ? ? ? ? ?LIType, false, Info); >>> + ? ? ?} else >>> + ? ? ? ?MarkUnsafe(Info); >>> + ? ?} else if (StoreInst *SI = dyn_cast(User)) { >>> + ? ? ?// Store is ok if storing INTO the pointer, not storing the pointer >>> + ? ? ?if (!SI->isVolatile() && SI->getOperand(0) != I) { >>> + ? ? ? ?const Type *SIType = SI->getOperand(0)->getType(); >>> + ? ? ? ?isSafeMemAccess(AI, Offset, ArrayOffset, TD->getTypeAllocSize(SIType), >>> + ? ? ? ? ? ? ? ? ? ? ? ?SIType, true, Info); >>> + ? ? ?} else >>> + ? ? ? ?MarkUnsafe(Info); >>> + ? ?} else if (isa(UI)) { >>> + ? ? ?// If one user is DbgInfoIntrinsic then check if all users are >>> + ? ? ?// DbgInfoIntrinsics. >>> + ? ? ?if (OnlyUsedByDbgInfoIntrinsics(I)) { >>> + ? ? ? ?Info.needsCleanup = true; >>> + ? ? ? ?return; >>> + ? ? ?} >>> + ? ? ?MarkUnsafe(Info); >>> + ? ?} else { >>> + ? ? ?DEBUG(errs() << " ?Transformation preventing inst: " << *User << '\n'); >>> + ? ? ?MarkUnsafe(Info); >>> + ? ?} >>> + ? ?if (Info.isUnsafe) return; >>> ? } >>> +} >>> >>> - ?++I; >>> - ?if (I == E) return MarkUnsafe(Info); ?// ran out of GEP indices?? >>> +/// isSafeGEP - Check if a GEP instruction can be handled for scalar >>> +/// replacement. ?It is safe when all the indices are constant, in-bounds >>> +/// references, and when the resulting offset corresponds to an element within >>> +/// the alloca type. ?The results are flagged in the Info parameter. ?Upon >>> +/// return, Offset is adjusted as specified by the GEP indices. ?For the >>> +/// special case of a variable index to a 2-element array, ArrayOffset is set >>> +/// to the array element size. >>> +void SROA::isSafeGEP(GetElementPtrInst *GEPI, AllocaInst *AI, >>> + ? ? ? ? ? ? ? ? ? ? uint64_t &Offset, uint64_t &ArrayOffset, >>> + ? ? ? ? ? ? ? ? ? ? AllocaInfo &Info) { >>> + ?gep_type_iterator GEPIt = gep_type_begin(GEPI), E = gep_type_end(GEPI); >>> + ?if (GEPIt == E) >>> + ? ?return; >>> + >>> + ?// The first GEP index must be zero. >>> + ?if (!isa(GEPIt.getOperand()) || >>> + ? ? ?!cast(GEPIt.getOperand())->isZero()) >>> + ? ?return MarkUnsafe(Info); >>> + ?if (++GEPIt == E) >>> + ? ?return; >>> >>> - ?bool IsAllZeroIndices = true; >>> - >>> ? // If the first index is a non-constant index into an array, see if we can >>> ? // handle it as a special case. >>> - ?if (const ArrayType *AT = dyn_cast(*I)) { >>> - ? ?if (!isa(I.getOperand())) { >>> - ? ? ?IsAllZeroIndices = 0; >>> - ? ? ?uint64_t NumElements = AT->getNumElements(); >>> - >>> - ? ? ?// If this is an array index and the index is not constant, we cannot >>> - ? ? ?// promote... that is unless the array has exactly one or two elements in >>> - ? ? ?// it, in which case we CAN promote it, but we have to canonicalize this >>> - ? ? ?// out if this is the only problem. >>> - ? ? ?if ((NumElements == 1 || NumElements == 2) && >>> - ? ? ? ? ?AllUsersAreLoads(GEPI)) { >>> + ?const Type *ArrayEltTy = 0; >>> + ?if (ArrayOffset == 0 && Offset == 0) { >>> + ? ?if (const ArrayType *AT = dyn_cast(*GEPIt)) { >>> + ? ? ?if (!isa(GEPIt.getOperand())) { >>> + ? ? ? ?uint64_t NumElements = AT->getNumElements(); >>> + >>> + ? ? ? ?// If this is an array index and the index is not constant, we cannot >>> + ? ? ? ?// promote... that is unless the array has exactly one or two elements >>> + ? ? ? ?// in it, in which case we CAN promote it, but we have to canonicalize >>> + ? ? ? ?// this out if this is the only problem. >>> + ? ? ? ?if ((NumElements != 1 && NumElements != 2) || !AllUsersAreLoads(GEPI)) >>> + ? ? ? ? ?return MarkUnsafe(Info); >>> ? ? ? ? Info.needsCleanup = true; >>> - ? ? ? ?return; ?// Canonicalization required! >>> + ? ? ? ?ArrayOffset = TD->getTypeAllocSizeInBits(AT->getElementType()); >>> + ? ? ? ?ArrayEltTy = AT->getElementType(); >>> + ? ? ? ?++GEPIt; >>> ? ? ? } >>> - ? ? ?return MarkUnsafe(Info); >>> ? ? } >>> ? } >>> - >>> + >>> ? // Walk through the GEP type indices, checking the types that this indexes >>> ? // into. >>> - ?for (; I != E; ++I) { >>> + ?for (; GEPIt != E; ++GEPIt) { >>> ? ? // Ignore struct elements, no extra checking needed for these. >>> - ? ?if (isa(*I)) >>> + ? ?if (isa(*GEPIt)) >>> ? ? ? continue; >>> - >>> - ? ?ConstantInt *IdxVal = dyn_cast(I.getOperand()); >>> - ? ?if (!IdxVal) return MarkUnsafe(Info); >>> >>> - ? ?// Are all indices still zero? >>> - ? ?IsAllZeroIndices &= IdxVal->isZero(); >>> - >>> - ? ?if (const ArrayType *AT = dyn_cast(*I)) { >>> + ? ?ConstantInt *IdxVal = dyn_cast(GEPIt.getOperand()); >>> + ? ?if (!IdxVal) >>> + ? ? ?return MarkUnsafe(Info); >>> + >>> + ? ?if (const ArrayType *AT = dyn_cast(*GEPIt)) { >>> ? ? ? // This GEP indexes an array. ?Verify that this is an in-range constant >>> ? ? ? // integer. Specifically, consider A[0][i]. We cannot know that the user >>> ? ? ? // isn't doing invalid things like allowing i to index an out-of-range >>> @@ -612,147 +526,255 @@ >>> ? ? ? // of any accesses into structs where any of the components are variables. >>> ? ? ? if (IdxVal->getZExtValue() >= AT->getNumElements()) >>> ? ? ? ? return MarkUnsafe(Info); >>> - ? ?} else if (const VectorType *VT = dyn_cast(*I)) { >>> + ? ?} else { >>> + ? ? ?const VectorType *VT = dyn_cast(*GEPIt); >>> + ? ? ?assert(VT && "unexpected type in GEP type iterator"); >>> ? ? ? if (IdxVal->getZExtValue() >= VT->getNumElements()) >>> ? ? ? ? return MarkUnsafe(Info); >>> ? ? } >>> ? } >>> - >>> - ?// If there are any non-simple uses of this getelementptr, make sure to reject >>> - ?// them. >>> - ?return isSafeElementUse(GEPI, IsAllZeroIndices, AI, Info); >>> + >>> + ?// All the indices are safe. ?Now compute the offset due to this GEP and >>> + ?// check if the alloca has a component element at that offset. >>> + ?if (ArrayOffset == 0) { >>> + ? ?SmallVector Indices(GEPI->op_begin() + 1, GEPI->op_end()); >>> + ? ?Offset += TD->getIndexedOffset(GEPI->getPointerOperandType(), >>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? &Indices[0], Indices.size()); >>> + ?} else { >>> + ? ?// Both array elements have the same type, so it suffices to check one of >>> + ? ?// them. ?Copy the GEP indices starting from the array index, but replace >>> + ? ?// that variable index with a constant zero. >>> + ? ?SmallVector Indices(GEPI->op_begin() + 2, GEPI->op_end()); >>> + ? ?Indices[0] = Constant::getNullValue(Type::getInt32Ty(GEPI->getContext())); >>> + ? ?const Type *ArrayEltPtr = PointerType::getUnqual(ArrayEltTy); >>> + ? ?Offset += TD->getIndexedOffset(ArrayEltPtr, &Indices[0], Indices.size()); >>> + ?} >>> + ?if (!TypeHasComponent(AI->getAllocatedType(), Offset, 0)) >>> + ? ?MarkUnsafe(Info); >>> +} >>> + >>> +/// isSafeMemAccess - Check if a load/store/memcpy operates on the entire AI >>> +/// alloca or has an offset and size that corresponds to a component element >>> +/// within it. ?The offset checked here may have been formed from a GEP with a >>> +/// pointer bitcasted to a different type. >>> +void SROA::isSafeMemAccess(AllocaInst *AI, uint64_t Offset, >>> + ? ? ? ? ? ? ? ? ? ? ? ? ? uint64_t ArrayOffset, uint64_t MemSize, >>> + ? ? ? ? ? ? ? ? ? ? ? ? ? const Type *MemOpType, bool isStore, >>> + ? ? ? ? ? ? ? ? ? ? ? ? ? AllocaInfo &Info) { >>> + ?// Check if this is a load/store of the entire alloca. >>> + ?if (Offset == 0 && ArrayOffset == 0 && >>> + ? ? ?MemSize == TD->getTypeAllocSize(AI->getAllocatedType())) { >>> + ? ?bool UsesAggregateType = (MemOpType == AI->getAllocatedType()); >>> + ? ?// This is safe for MemIntrinsics (where MemOpType is 0), integer types >>> + ? ?// (which are essentially the same as the MemIntrinsics, especially with >>> + ? ?// regard to copying padding between elements), or references using the >>> + ? ?// aggregate type of the alloca. >>> + ? ?if (!MemOpType || isa(MemOpType) || UsesAggregateType) { >>> + ? ? ?if (!UsesAggregateType) { >>> + ? ? ? ?if (isStore) >>> + ? ? ? ? ?Info.isMemCpyDst = true; >>> + ? ? ? ?else >>> + ? ? ? ? ?Info.isMemCpySrc = true; >>> + ? ? ?} >>> + ? ? ?return; >>> + ? ?} >>> + ?} >>> + ?// Check if the offset/size correspond to a component within the alloca type. >>> + ?const Type *T = AI->getAllocatedType(); >>> + ?if (TypeHasComponent(T, Offset, MemSize) && >>> + ? ? ?(ArrayOffset == 0 || TypeHasComponent(T, Offset + ArrayOffset, MemSize))) >>> + ? ?return; >>> + >>> + ?return MarkUnsafe(Info); >>> ?} >>> >>> -/// isSafeMemIntrinsicOnAllocation - Check if the specified memory >>> -/// intrinsic can be promoted by SROA. ?At this point, we know that the operand >>> -/// of the memintrinsic is a pointer to the beginning of the allocation. >>> -void SROA::isSafeMemIntrinsicOnAllocation(MemIntrinsic *MI, AllocaInst *AI, >>> - ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?unsigned OpNo, AllocaInfo &Info) { >>> - ?// If not constant length, give up. >>> - ?ConstantInt *Length = dyn_cast(MI->getLength()); >>> - ?if (!Length) return MarkUnsafe(Info); >>> - >>> - ?// If not the whole aggregate, give up. >>> - ?if (Length->getZExtValue() != >>> - ? ? ?TD->getTypeAllocSize(AI->getType()->getElementType())) >>> - ? ?return MarkUnsafe(Info); >>> - >>> - ?// We only know about memcpy/memset/memmove. >>> - ?if (!isa(MI)) >>> - ? ?return MarkUnsafe(Info); >>> - >>> - ?// Otherwise, we can transform it. ?Determine whether this is a memcpy/set >>> - ?// into or out of the aggregate. >>> - ?if (OpNo == 1) >>> - ? ?Info.isMemCpyDst = true; >>> - ?else { >>> - ? ?assert(OpNo == 2); >>> - ? ?Info.isMemCpySrc = true; >>> +/// TypeHasComponent - Return true if T has a component type with the >>> +/// specified offset and size. ?If Size is zero, do not check the size. >>> +bool SROA::TypeHasComponent(const Type *T, uint64_t Offset, uint64_t Size) { >>> + ?const Type *EltTy; >>> + ?uint64_t EltSize; >>> + ?if (const StructType *ST = dyn_cast(T)) { >>> + ? ?const StructLayout *Layout = TD->getStructLayout(ST); >>> + ? ?unsigned EltIdx = Layout->getElementContainingOffset(Offset); >>> + ? ?EltTy = ST->getContainedType(EltIdx); >>> + ? ?EltSize = TD->getTypeAllocSize(EltTy); >>> + ? ?Offset -= Layout->getElementOffset(EltIdx); >>> + ?} else if (const ArrayType *AT = dyn_cast(T)) { >>> + ? ?EltTy = AT->getElementType(); >>> + ? ?EltSize = TD->getTypeAllocSize(EltTy); >>> + ? ?Offset %= EltSize; >>> + ?} else { >>> + ? ?return false; >>> ? } >>> + ?if (Offset == 0 && (Size == 0 || EltSize == Size)) >>> + ? ?return true; >>> + ?// Check if the component spans multiple elements. >>> + ?if (Offset + Size > EltSize) >>> + ? ?return false; >>> + ?return TypeHasComponent(EltTy, Offset, Size); >>> ?} >>> >>> -/// isSafeUseOfBitCastedAllocation - Check if all users of this bitcast >>> -/// from an alloca are safe for SROA of that alloca. >>> -void SROA::isSafeUseOfBitCastedAllocation(BitCastInst *BC, AllocaInst *AI, >>> - ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?AllocaInfo &Info) { >>> - ?for (Value::use_iterator UI = BC->use_begin(), E = BC->use_end(); >>> - ? ? ? UI != E; ++UI) { >>> - ? ?if (BitCastInst *BCU = dyn_cast(UI)) { >>> - ? ? ?isSafeUseOfBitCastedAllocation(BCU, AI, Info); >>> - ? ?} else if (MemIntrinsic *MI = dyn_cast(UI)) { >>> - ? ? ?isSafeMemIntrinsicOnAllocation(MI, AI, UI.getOperandNo(), Info); >>> - ? ?} else if (StoreInst *SI = dyn_cast(UI)) { >>> - ? ? ?if (SI->isVolatile()) >>> - ? ? ? ?return MarkUnsafe(Info); >>> - >>> - ? ? ?// If storing the entire alloca in one chunk through a bitcasted pointer >>> - ? ? ?// to integer, we can transform it. ?This happens (for example) when you >>> - ? ? ?// cast a {i32,i32}* to i64* and store through it. ?This is similar to the >>> - ? ? ?// memcpy case and occurs in various "byval" cases and emulated memcpys. >>> - ? ? ?if (isa(SI->getOperand(0)->getType()) && >>> - ? ? ? ? ?TD->getTypeAllocSize(SI->getOperand(0)->getType()) == >>> - ? ? ? ? ?TD->getTypeAllocSize(AI->getType()->getElementType())) { >>> - ? ? ? ?Info.isMemCpyDst = true; >>> - ? ? ? ?continue; >>> - ? ? ?} >>> - ? ? ?return MarkUnsafe(Info); >>> - ? ?} else if (LoadInst *LI = dyn_cast(UI)) { >>> - ? ? ?if (LI->isVolatile()) >>> - ? ? ? ?return MarkUnsafe(Info); >>> +/// RewriteForScalarRepl - Alloca AI is being split into NewElts, so rewrite >>> +/// the instruction I, which references it, to use the separate elements. >>> +/// Offset indicates the position within AI that is referenced by this >>> +/// instruction. >>> +void SROA::RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset, >>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?SmallVector &NewElts) { >>> + ?for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI!=E; ++UI) { >>> + ? ?Instruction *User = cast(*UI); >>> >>> - ? ? ?// If loading the entire alloca in one chunk through a bitcasted pointer >>> - ? ? ?// to integer, we can transform it. ?This happens (for example) when you >>> - ? ? ?// cast a {i32,i32}* to i64* and load through it. ?This is similar to the >>> - ? ? ?// memcpy case and occurs in various "byval" cases and emulated memcpys. >>> - ? ? ?if (isa(LI->getType()) && >>> - ? ? ? ? ?TD->getTypeAllocSize(LI->getType()) == >>> - ? ? ? ? ?TD->getTypeAllocSize(AI->getType()->getElementType())) { >>> - ? ? ? ?Info.isMemCpySrc = true; >>> - ? ? ? ?continue; >>> + ? ?if (BitCastInst *BC = dyn_cast(User)) { >>> + ? ? ?RewriteBitCast(BC, AI, Offset, NewElts); >>> + ? ?} else if (GetElementPtrInst *GEPI = dyn_cast(User)) { >>> + ? ? ?RewriteGEP(GEPI, AI, Offset, NewElts); >>> + ? ?} else if (MemIntrinsic *MI = dyn_cast(User)) { >>> + ? ? ?ConstantInt *Length = dyn_cast(MI->getLength()); >>> + ? ? ?uint64_t MemSize = Length->getZExtValue(); >>> + ? ? ?if (Offset == 0 && >>> + ? ? ? ? ?MemSize == TD->getTypeAllocSize(AI->getAllocatedType())) >>> + ? ? ? ?RewriteMemIntrinUserOfAlloca(MI, I, AI, NewElts); >>> + ? ?} else if (LoadInst *LI = dyn_cast(User)) { >>> + ? ? ?const Type *LIType = LI->getType(); >>> + ? ? ?if (LIType == AI->getAllocatedType()) { >>> + ? ? ? ?// Replace: >>> + ? ? ? ?// ? %res = load { i32, i32 }* %alloc >>> + ? ? ? ?// with: >>> + ? ? ? ?// ? %load.0 = load i32* %alloc.0 >>> + ? ? ? ?// ? %insert.0 insertvalue { i32, i32 } zeroinitializer, i32 %load.0, 0 >>> + ? ? ? ?// ? %load.1 = load i32* %alloc.1 >>> + ? ? ? ?// ? %insert = insertvalue { i32, i32 } %insert.0, i32 %load.1, 1 >>> + ? ? ? ?// (Also works for arrays instead of structs) >>> + ? ? ? ?Value *Insert = UndefValue::get(LIType); >>> + ? ? ? ?for (unsigned i = 0, e = NewElts.size(); i != e; ++i) { >>> + ? ? ? ? ?Value *Load = new LoadInst(NewElts[i], "load", LI); >>> + ? ? ? ? ?Insert = InsertValueInst::Create(Insert, Load, i, "insert", LI); >>> + ? ? ? ?} >>> + ? ? ? ?LI->replaceAllUsesWith(Insert); >>> + ? ? ? ?DeadInsts.push_back(LI); >>> + ? ? ?} else if (isa(LIType) && >>> + ? ? ? ? ? ? ? ? TD->getTypeAllocSize(LIType) == >>> + ? ? ? ? ? ? ? ? TD->getTypeAllocSize(AI->getAllocatedType())) { >>> + ? ? ? ?// If this is a load of the entire alloca to an integer, rewrite it. >>> + ? ? ? ?RewriteLoadUserOfWholeAlloca(LI, AI, NewElts); >>> ? ? ? } >>> - ? ? ?return MarkUnsafe(Info); >>> - ? ?} else if (isa(UI)) { >>> - ? ? ?// If one user is DbgInfoIntrinsic then check if all users are >>> - ? ? ?// DbgInfoIntrinsics. >>> - ? ? ?if (OnlyUsedByDbgInfoIntrinsics(BC)) { >>> - ? ? ? ?Info.needsCleanup = true; >>> - ? ? ? ?return; >>> + ? ?} else if (StoreInst *SI = dyn_cast(User)) { >>> + ? ? ?Value *Val = SI->getOperand(0); >>> + ? ? ?const Type *SIType = Val->getType(); >>> + ? ? ?if (SIType == AI->getAllocatedType()) { >>> + ? ? ? ?// Replace: >>> + ? ? ? ?// ? store { i32, i32 } %val, { i32, i32 }* %alloc >>> + ? ? ? ?// with: >>> + ? ? ? ?// ? %val.0 = extractvalue { i32, i32 } %val, 0 >>> + ? ? ? ?// ? store i32 %val.0, i32* %alloc.0 >>> + ? ? ? ?// ? %val.1 = extractvalue { i32, i32 } %val, 1 >>> + ? ? ? ?// ? store i32 %val.1, i32* %alloc.1 >>> + ? ? ? ?// (Also works for arrays instead of structs) >>> + ? ? ? ?for (unsigned i = 0, e = NewElts.size(); i != e; ++i) { >>> + ? ? ? ? ?Value *Extract = ExtractValueInst::Create(Val, i, Val->getName(), SI); >>> + ? ? ? ? ?new StoreInst(Extract, NewElts[i], SI); >>> + ? ? ? ?} >>> + ? ? ? ?DeadInsts.push_back(SI); >>> + ? ? ?} else if (isa(SIType) && >>> + ? ? ? ? ? ? ? ? TD->getTypeAllocSize(SIType) == >>> + ? ? ? ? ? ? ? ? TD->getTypeAllocSize(AI->getAllocatedType())) { >>> + ? ? ? ?// If this is a store of the entire alloca from an integer, rewrite it. >>> + ? ? ? ?RewriteStoreUserOfWholeAlloca(SI, AI, NewElts); >>> ? ? ? } >>> - ? ? ?else >>> - ? ? ? ?MarkUnsafe(Info); >>> ? ? } >>> - ? ?else { >>> - ? ? ?return MarkUnsafe(Info); >>> - ? ?} >>> - ? ?if (Info.isUnsafe) return; >>> ? } >>> ?} >>> >>> -/// RewriteBitCastUserOfAlloca - BCInst (transitively) bitcasts AI, or indexes >>> -/// to its first element. ?Transform users of the cast to use the new values >>> -/// instead. >>> -void SROA::RewriteBitCastUserOfAlloca(Instruction *BCInst, AllocaInst *AI, >>> - ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?SmallVector &NewElts) { >>> - ?Value::use_iterator UI = BCInst->use_begin(), UE = BCInst->use_end(); >>> - ?while (UI != UE) { >>> - ? ?Instruction *User = cast(*UI++); >>> - ? ?if (BitCastInst *BCU = dyn_cast(User)) { >>> - ? ? ?RewriteBitCastUserOfAlloca(BCU, AI, NewElts); >>> - ? ? ?if (BCU->use_empty()) BCU->eraseFromParent(); >>> - ? ? ?continue; >>> - ? ?} >>> +/// RewriteBitCast - Update a bitcast reference to the alloca being replaced >>> +/// and recursively continue updating all of its uses. >>> +void SROA::RewriteBitCast(BitCastInst *BC, AllocaInst *AI, uint64_t Offset, >>> + ? ? ? ? ? ? ? ? ? ? ? ? ?SmallVector &NewElts) { >>> + ?RewriteForScalarRepl(BC, AI, Offset, NewElts); >>> + ?if (BC->getOperand(0) != AI) >>> + ? ?return; >>> >>> - ? ?if (MemIntrinsic *MI = dyn_cast(User)) { >>> - ? ? ?// This must be memcpy/memmove/memset of the entire aggregate. >>> - ? ? ?// Split into one per element. >>> - ? ? ?RewriteMemIntrinUserOfAlloca(MI, BCInst, AI, NewElts); >>> - ? ? ?continue; >>> - ? ?} >>> - >>> - ? ?if (StoreInst *SI = dyn_cast(User)) { >>> - ? ? ?// If this is a store of the entire alloca from an integer, rewrite it. >>> - ? ? ?RewriteStoreUserOfWholeAlloca(SI, AI, NewElts); >>> - ? ? ?continue; >>> - ? ?} >>> + ?// The bitcast references the original alloca. ?Replace its uses with >>> + ?// references to the first new element alloca. >>> + ?Instruction *Val = NewElts[0]; >>> + ?if (Val->getType() != BC->getDestTy()) { >>> + ? ?Val = new BitCastInst(Val, BC->getDestTy(), "", BC); >>> + ? ?Val->takeName(BC); >>> + ?} >>> + ?BC->replaceAllUsesWith(Val); >>> + ?DeadInsts.push_back(BC); >>> +} >>> + >>> +/// FindElementAndOffset - Return the index of the element containing Offset >>> +/// within the specified type, which must be either a struct or an array. >>> +/// Sets T to the type of the element and Offset to the offset within that >>> +/// element. >>> +unsigned SROA::FindElementAndOffset(const Type *&T, uint64_t &Offset) { >>> + ?unsigned Idx = 0; >>> + ?if (const StructType *ST = dyn_cast(T)) { >>> + ? ?const StructLayout *Layout = TD->getStructLayout(ST); >>> + ? ?Idx = Layout->getElementContainingOffset(Offset); >>> + ? ?T = ST->getContainedType(Idx); >>> + ? ?Offset -= Layout->getElementOffset(Idx); >>> + ?} else { >>> + ? ?const ArrayType *AT = dyn_cast(T); >>> + ? ?assert(AT && "unexpected type for scalar replacement"); >>> + ? ?T = AT->getElementType(); >>> + ? ?uint64_t EltSize = TD->getTypeAllocSize(T); >>> + ? ?Idx = (unsigned)(Offset / EltSize); >>> + ? ?Offset -= Idx * EltSize; >>> + ?} >>> + ?return Idx; >>> +} >>> + >>> +/// RewriteGEP - Check if this GEP instruction moves the pointer across >>> +/// elements of the alloca that are being split apart, and if so, rewrite >>> +/// the GEP to be relative to the new element. >>> +void SROA::RewriteGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t Offset, >>> + ? ? ? ? ? ? ? ? ? ? ?SmallVector &NewElts) { >>> + ?uint64_t OldOffset = Offset; >>> + ?SmallVector Indices(GEPI->op_begin() + 1, GEPI->op_end()); >>> + ?Offset += TD->getIndexedOffset(GEPI->getPointerOperandType(), >>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? &Indices[0], Indices.size()); >>> + >>> + ?RewriteForScalarRepl(GEPI, AI, Offset, NewElts); >>> + >>> + ?const Type *T = AI->getAllocatedType(); >>> + ?unsigned OldIdx = FindElementAndOffset(T, OldOffset); >>> + ?if (GEPI->getOperand(0) == AI) >>> + ? ?OldIdx = ~0U; // Force the GEP to be rewritten. >>> + >>> + ?T = AI->getAllocatedType(); >>> + ?uint64_t EltOffset = Offset; >>> + ?unsigned Idx = FindElementAndOffset(T, EltOffset); >>> + >>> + ?// If this GEP does not move the pointer across elements of the alloca >>> + ?// being split, then it does not needs to be rewritten. >>> + ?if (Idx == OldIdx) >>> + ? ?return; >>> >>> - ? ?if (LoadInst *LI = dyn_cast(User)) { >>> - ? ? ?// If this is a load of the entire alloca to an integer, rewrite it. >>> - ? ? ?RewriteLoadUserOfWholeAlloca(LI, AI, NewElts); >>> - ? ? ?continue; >>> - ? ?} >>> - >>> - ? ?// Otherwise it must be some other user of a gep of the first pointer. ?Just >>> - ? ?// leave these alone. >>> - ? ?continue; >>> - ?} >>> + ?const Type *i32Ty = Type::getInt32Ty(AI->getContext()); >>> + ?SmallVector NewArgs; >>> + ?NewArgs.push_back(Constant::getNullValue(i32Ty)); >>> + ?while (EltOffset != 0) { >>> + ? ?unsigned EltIdx = FindElementAndOffset(T, EltOffset); >>> + ? ?NewArgs.push_back(ConstantInt::get(i32Ty, EltIdx)); >>> + ?} >>> + ?Instruction *Val = NewElts[Idx]; >>> + ?if (NewArgs.size() > 1) { >>> + ? ?Val = GetElementPtrInst::CreateInBounds(Val, NewArgs.begin(), >>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?NewArgs.end(), "", GEPI); >>> + ? ?Val->takeName(GEPI); >>> + ?} >>> + ?if (Val->getType() != GEPI->getType()) >>> + ? ?Val = new BitCastInst(Val, GEPI->getType(), Val->getNameStr(), GEPI); >>> + ?GEPI->replaceAllUsesWith(Val); >>> + ?DeadInsts.push_back(GEPI); >>> ?} >>> >>> ?/// RewriteMemIntrinUserOfAlloca - MI is a memcpy/memset/memmove from or to AI. >>> ?/// Rewrite it to copy or set the elements of the scalarized memory. >>> -void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *BCInst, >>> +void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst, >>> ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? AllocaInst *AI, >>> ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? SmallVector &NewElts) { >>> - >>> ? // If this is a memcpy/memmove, construct the other pointer as the >>> ? // appropriate type. ?The "Other" pointer is the pointer that goes to memory >>> ? // that doesn't have anything to do with the alloca that we are promoting. For >>> @@ -761,28 +783,41 @@ >>> ? LLVMContext &Context = MI->getContext(); >>> ? unsigned MemAlignment = MI->getAlignment(); >>> ? if (MemTransferInst *MTI = dyn_cast(MI)) { // memmove/memcopy >>> - ? ?if (BCInst == MTI->getRawDest()) >>> + ? ?if (Inst == MTI->getRawDest()) >>> ? ? ? OtherPtr = MTI->getRawSource(); >>> ? ? else { >>> - ? ? ?assert(BCInst == MTI->getRawSource()); >>> + ? ? ?assert(Inst == MTI->getRawSource()); >>> ? ? ? OtherPtr = MTI->getRawDest(); >>> ? ? } >>> ? } >>> >>> - ?// Keep track of the other intrinsic argument, so it can be removed if it >>> - ?// is dead when the intrinsic is replaced. >>> - ?Value *PossiblyDead = OtherPtr; >>> - >>> ? // If there is an other pointer, we want to convert it to the same pointer >>> ? // type as AI has, so we can GEP through it safely. >>> ? if (OtherPtr) { >>> - ? ?// It is likely that OtherPtr is a bitcast, if so, remove it. >>> - ? ?if (BitCastInst *BC = dyn_cast(OtherPtr)) >>> - ? ? ?OtherPtr = BC->getOperand(0); >>> - ? ?// All zero GEPs are effectively bitcasts. >>> - ? ?if (GetElementPtrInst *GEP = dyn_cast(OtherPtr)) >>> - ? ? ?if (GEP->hasAllZeroIndices()) >>> - ? ? ? ?OtherPtr = GEP->getOperand(0); >>> + >>> + ? ?// Remove bitcasts and all-zero GEPs from OtherPtr. ?This is an >>> + ? ?// optimization, but it's also required to detect the corner case where >>> + ? ?// both pointer operands are referencing the same memory, and where >>> + ? ?// OtherPtr may be a bitcast or GEP that currently being rewritten. ?(This >>> + ? ?// function is only called for mem intrinsics that access the whole >>> + ? ?// aggregate, so non-zero GEPs are not an issue here.) >>> + ? ?while (1) { >>> + ? ? ?if (BitCastInst *BC = dyn_cast(OtherPtr)) { >>> + ? ? ? ?OtherPtr = BC->getOperand(0); >>> + ? ? ? ?continue; >>> + ? ? ?} >>> + ? ? ?if (GetElementPtrInst *GEP = dyn_cast(OtherPtr)) { >>> + ? ? ? ?// All zero GEPs are effectively bitcasts. >>> + ? ? ? ?if (GEP->hasAllZeroIndices()) { >>> + ? ? ? ? ?OtherPtr = GEP->getOperand(0); >>> + ? ? ? ? ?continue; >>> + ? ? ? ?} >>> + ? ? ?} >>> + ? ? ?break; >>> + ? ?} >>> + ? ?// If OtherPtr has already been rewritten, this intrinsic will be dead. >>> + ? ?if (OtherPtr == NewElts[0]) >>> + ? ? ?return; >>> >>> ? ? if (ConstantExpr *BCE = dyn_cast(OtherPtr)) >>> ? ? ? if (BCE->getOpcode() == Instruction::BitCast) >>> @@ -798,7 +833,7 @@ >>> ? // Process each element of the aggregate. >>> ? Value *TheFn = MI->getOperand(0); >>> ? const Type *BytePtrTy = MI->getRawDest()->getType(); >>> - ?bool SROADest = MI->getRawDest() == BCInst; >>> + ?bool SROADest = MI->getRawDest() == Inst; >>> >>> ? Constant *Zero = Constant::getNullValue(Type::getInt32Ty(MI->getContext())); >>> >>> @@ -807,12 +842,15 @@ >>> ? ? Value *OtherElt = 0; >>> ? ? unsigned OtherEltAlign = MemAlignment; >>> >>> - ? ?if (OtherPtr) { >>> + ? ?if (OtherPtr == AI) { >>> + ? ? ?OtherElt = NewElts[i]; >>> + ? ? ?OtherEltAlign = 0; >>> + ? ?} else if (OtherPtr) { >>> ? ? ? Value *Idx[2] = { Zero, >>> ? ? ? ? ? ? ? ? ? ? ? ConstantInt::get(Type::getInt32Ty(MI->getContext()), i) }; >>> - ? ? ?OtherElt = GetElementPtrInst::Create(OtherPtr, Idx, Idx + 2, >>> + ? ? ?OtherElt = GetElementPtrInst::CreateInBounds(OtherPtr, Idx, Idx + 2, >>> ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?OtherPtr->getNameStr()+"."+Twine(i), >>> - ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? MI); >>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? MI); >>> ? ? ? uint64_t EltOffset; >>> ? ? ? const PointerType *OtherPtrTy = cast(OtherPtr->getType()); >>> ? ? ? if (const StructType *ST = >>> @@ -924,9 +962,7 @@ >>> ? ? ? CallInst::Create(TheFn, Ops, Ops + 4, "", MI); >>> ? ? } >>> ? } >>> - ?MI->eraseFromParent(); >>> - ?if (PossiblyDead) >>> - ? ?RecursivelyDeleteTriviallyDeadInstructions(PossiblyDead); >>> + ?DeadInsts.push_back(MI); >>> ?} >>> >>> ?/// RewriteStoreUserOfWholeAlloca - We found a store of an integer that >>> @@ -937,15 +973,9 @@ >>> ? // Extract each element out of the integer according to its structure offset >>> ? // and store the element value to the individual alloca. >>> ? Value *SrcVal = SI->getOperand(0); >>> - ?const Type *AllocaEltTy = AI->getType()->getElementType(); >>> + ?const Type *AllocaEltTy = AI->getAllocatedType(); >>> ? uint64_t AllocaSizeBits = TD->getTypeAllocSizeInBits(AllocaEltTy); >>> >>> - ?// If this isn't a store of an integer to the whole alloca, it may be a store >>> - ?// to the first element. ?Just ignore the store in this case and normal SROA >>> - ?// will handle it. >>> - ?if (!isa(SrcVal->getType()) || >>> - ? ? ?TD->getTypeAllocSizeInBits(SrcVal->getType()) != AllocaSizeBits) >>> - ? ?return; >>> ? // Handle tail padding by extending the operand >>> ? if (TD->getTypeSizeInBits(SrcVal->getType()) != AllocaSizeBits) >>> ? ? SrcVal = new ZExtInst(SrcVal, >>> @@ -1050,7 +1080,7 @@ >>> ? ? } >>> ? } >>> >>> - ?SI->eraseFromParent(); >>> + ?DeadInsts.push_back(SI); >>> ?} >>> >>> ?/// RewriteLoadUserOfWholeAlloca - We found a load of the entire allocation to >>> @@ -1059,16 +1089,9 @@ >>> ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? SmallVector &NewElts) { >>> ? // Extract each element out of the NewElts according to its structure offset >>> ? // and form the result value. >>> - ?const Type *AllocaEltTy = AI->getType()->getElementType(); >>> + ?const Type *AllocaEltTy = AI->getAllocatedType(); >>> ? uint64_t AllocaSizeBits = TD->getTypeAllocSizeInBits(AllocaEltTy); >>> >>> - ?// If this isn't a load of the whole alloca to an integer, it may be a load >>> - ?// of the first element. ?Just ignore the load in this case and normal SROA >>> - ?// will handle it. >>> - ?if (!isa(LI->getType()) || >>> - ? ? ?TD->getTypeAllocSizeInBits(LI->getType()) != AllocaSizeBits) >>> - ? ?return; >>> - >>> ? DEBUG(errs() << "PROMOTING LOAD OF WHOLE ALLOCA: " << *AI << '\n' << *LI >>> ? ? ? ? ? ? ? ?<< '\n'); >>> >>> @@ -1139,10 +1162,9 @@ >>> ? ? ResultVal = new TruncInst(ResultVal, LI->getType(), "", LI); >>> >>> ? LI->replaceAllUsesWith(ResultVal); >>> - ?LI->eraseFromParent(); >>> + ?DeadInsts.push_back(LI); >>> ?} >>> >>> - >>> ?/// HasPadding - Return true if the specified type has any structure or >>> ?/// alignment padding, false otherwise. >>> ?static bool HasPadding(const Type *Ty, const TargetData &TD) { >>> @@ -1192,14 +1214,10 @@ >>> ? // the users are safe to transform. >>> ? AllocaInfo Info; >>> >>> - ?for (Value::use_iterator I = AI->use_begin(), E = AI->use_end(); >>> - ? ? ? I != E; ++I) { >>> - ? ?isSafeUseOfAllocation(cast(*I), AI, Info); >>> - ? ?if (Info.isUnsafe) { >>> - ? ? ?DEBUG(errs() << "Cannot transform: " << *AI << "\n ?due to user: " >>> - ? ? ? ? ? ? ? ? ? << **I << '\n'); >>> - ? ? ?return 0; >>> - ? ?} >>> + ?isSafeForScalarRepl(AI, AI, 0, 0, Info); >>> + ?if (Info.isUnsafe) { >>> + ? ?DEBUG(errs() << "Cannot transform: " << *AI << '\n'); >>> + ? ?return 0; >>> ? } >>> >>> ? // Okay, we know all the users are promotable. ?If the aggregate is a memcpy >>> @@ -1208,7 +1226,7 @@ >>> ? // types, but may actually be used. ?In these cases, we refuse to promote the >>> ? // struct. >>> ? if (Info.isMemCpySrc && Info.isMemCpyDst && >>> - ? ? ?HasPadding(AI->getType()->getElementType(), *TD)) >>> + ? ? ?HasPadding(AI->getAllocatedType(), *TD)) >>> ? ? return 0; >>> >>> ? // If we require cleanup, return 1, otherwise return 3. >>> @@ -1245,15 +1263,15 @@ >>> ? // Insert the new GEP instructions, which are properly indexed. >>> ? SmallVector Indices(GEPI->op_begin()+1, GEPI->op_end()); >>> ? Indices[1] = Constant::getNullValue(Type::getInt32Ty(GEPI->getContext())); >>> - ?Value *ZeroIdx = GetElementPtrInst::Create(GEPI->getOperand(0), >>> - ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? Indices.begin(), >>> - ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? Indices.end(), >>> - ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? GEPI->getName()+".0", GEPI); >>> + ?Value *ZeroIdx = GetElementPtrInst::CreateInBounds(GEPI->getOperand(0), >>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? Indices.begin(), >>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? Indices.end(), >>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? GEPI->getName()+".0",GEPI); >>> ? Indices[1] = ConstantInt::get(Type::getInt32Ty(GEPI->getContext()), 1); >>> - ?Value *OneIdx = GetElementPtrInst::Create(GEPI->getOperand(0), >>> - ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?Indices.begin(), >>> - ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?Indices.end(), >>> - ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?GEPI->getName()+".1", GEPI); >>> + ?Value *OneIdx = GetElementPtrInst::CreateInBounds(GEPI->getOperand(0), >>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?Indices.begin(), >>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?Indices.end(), >>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?GEPI->getName()+".1", GEPI); >>> ? // Replace all loads of the variable index GEP with loads from both >>> ? // indexes and a select. >>> ? while (!GEPI->use_empty()) { >>> @@ -1264,22 +1282,24 @@ >>> ? ? LI->replaceAllUsesWith(R); >>> ? ? LI->eraseFromParent(); >>> ? } >>> - ?GEPI->eraseFromParent(); >>> ?} >>> >>> - >>> ?/// CleanupAllocaUsers - If SROA reported that it can promote the specified >>> ?/// allocation, but only if cleaned up, perform the cleanups required. >>> -void SROA::CleanupAllocaUsers(AllocaInst *AI) { >>> +void SROA::CleanupAllocaUsers(Value *V) { >>> ? // At this point, we know that the end result will be SROA'd and promoted, so >>> ? // we can insert ugly code if required so long as sroa+mem2reg will clean it >>> ? // up. >>> - ?for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); >>> + ?for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); >>> ? ? ? ?UI != E; ) { >>> ? ? User *U = *UI++; >>> - ? ?if (GetElementPtrInst *GEPI = dyn_cast(U)) >>> + ? ?if (isa(U)) { >>> + ? ? ?CleanupAllocaUsers(U); >>> + ? ?} else if (GetElementPtrInst *GEPI = dyn_cast(U)) { >>> ? ? ? CleanupGEP(GEPI); >>> - ? ?else { >>> + ? ? ?CleanupAllocaUsers(GEPI); >>> + ? ? ?if (GEPI->use_empty()) GEPI->eraseFromParent(); >>> + ? ?} else { >>> ? ? ? Instruction *I = cast(U); >>> ? ? ? SmallVector DbgInUses; >>> ? ? ? if (!isa(I) && OnlyUsedByDbgInfoIntrinsics(I, &DbgInUses)) { >>> @@ -1395,7 +1415,7 @@ >>> >>> ? ? ? // Compute the offset that this GEP adds to the pointer. >>> ? ? ? SmallVector Indices(GEP->op_begin()+1, GEP->op_end()); >>> - ? ? ?uint64_t GEPOffset = TD->getIndexedOffset(GEP->getOperand(0)->getType(), >>> + ? ? ?uint64_t GEPOffset = TD->getIndexedOffset(GEP->getPointerOperandType(), >>> ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? &Indices[0], Indices.size()); >>> ? ? ? // See if all uses can be converted. >>> ? ? ? if (!CanConvertToScalar(GEP, IsNotTrivial, VecTy, SawVec,Offset+GEPOffset, >>> @@ -1457,7 +1477,7 @@ >>> ? ? if (GetElementPtrInst *GEP = dyn_cast(User)) { >>> ? ? ? // Compute the offset that this GEP adds to the pointer. >>> ? ? ? SmallVector Indices(GEP->op_begin()+1, GEP->op_end()); >>> - ? ? ?uint64_t GEPOffset = TD->getIndexedOffset(GEP->getOperand(0)->getType(), >>> + ? ? ?uint64_t GEPOffset = TD->getIndexedOffset(GEP->getPointerOperandType(), >>> ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? &Indices[0], Indices.size()); >>> ? ? ? ConvertUsesToScalar(GEP, NewAI, Offset+GEPOffset*8); >>> ? ? ? GEP->eraseFromParent(); >>> >>> Added: llvm/trunk/test/Transforms/ScalarRepl/2009-12-11-NeonTypes.ll >>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/ScalarRepl/2009-12-11-NeonTypes.ll?rev=91459&view=auto >>> >>> ============================================================================== >>> --- llvm/trunk/test/Transforms/ScalarRepl/2009-12-11-NeonTypes.ll (added) >>> +++ llvm/trunk/test/Transforms/ScalarRepl/2009-12-11-NeonTypes.ll Tue Dec 15 16:00:51 2009 >>> @@ -0,0 +1,89 @@ >>> +; RUN: opt < %s -scalarrepl -S | FileCheck %s >>> +; Radar 7441282 >>> + >>> +target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32" >>> +target triple = "thumbv7-apple-darwin10" >>> + >>> +%struct.__neon_int16x8x2_t = type { <8 x i16>, <8 x i16> } >>> +%struct.int16x8_t = type { <8 x i16> } >>> +%struct.int16x8x2_t = type { [2 x %struct.int16x8_t] } >>> +%union..0anon = type { %struct.int16x8x2_t } >>> + >>> +define arm_apcscc void @test(<8 x i16> %tmp.0, %struct.int16x8x2_t* %dst) nounwind { >>> +; CHECK: @test >>> +; CHECK-NOT: alloca >>> +; CHECK: "alloca point" >>> +entry: >>> + ?%tmp_addr = alloca %struct.int16x8_t ? ? ? ? ? ?; <%struct.int16x8_t*> [#uses=3] >>> + ?%dst_addr = alloca %struct.int16x8x2_t* ? ? ? ? ; <%struct.int16x8x2_t**> [#uses=2] >>> + ?%__rv = alloca %union..0anon ? ? ? ? ? ? ? ? ? ?; <%union..0anon*> [#uses=2] >>> + ?%__bx = alloca %struct.int16x8_t ? ? ? ? ? ? ? ?; <%struct.int16x8_t*> [#uses=2] >>> + ?%__ax = alloca %struct.int16x8_t ? ? ? ? ? ? ? ?; <%struct.int16x8_t*> [#uses=2] >>> + ?%tmp2 = alloca %struct.int16x8x2_t ? ? ? ? ? ? ?; <%struct.int16x8x2_t*> [#uses=2] >>> + ?%0 = alloca %struct.int16x8x2_t ? ? ? ? ? ? ? ? ; <%struct.int16x8x2_t*> [#uses=2] >>> + ?%"alloca point" = bitcast i32 0 to i32 ? ? ? ? ?; [#uses=0] >>> + ?%1 = getelementptr inbounds %struct.int16x8_t* %tmp_addr, i32 0, i32 0 ; <<8 x i16>*> [#uses=1] >>> + ?store <8 x i16> %tmp.0, <8 x i16>* %1 >>> + ?store %struct.int16x8x2_t* %dst, %struct.int16x8x2_t** %dst_addr >>> + ?%2 = getelementptr inbounds %struct.int16x8_t* %__ax, i32 0, i32 0 ; <<8 x i16>*> [#uses=1] >>> + ?%3 = getelementptr inbounds %struct.int16x8_t* %tmp_addr, i32 0, i32 0 ; <<8 x i16>*> [#uses=1] >>> + ?%4 = load <8 x i16>* %3, align 16 ? ? ? ? ? ? ? ; <<8 x i16>> [#uses=1] >>> + ?store <8 x i16> %4, <8 x i16>* %2, align 16 >>> + ?%5 = getelementptr inbounds %struct.int16x8_t* %__bx, i32 0, i32 0 ; <<8 x i16>*> [#uses=1] >>> + ?%6 = getelementptr inbounds %struct.int16x8_t* %tmp_addr, i32 0, i32 0 ; <<8 x i16>*> [#uses=1] >>> + ?%7 = load <8 x i16>* %6, align 16 ? ? ? ? ? ? ? ; <<8 x i16>> [#uses=1] >>> + ?store <8 x i16> %7, <8 x i16>* %5, align 16 >>> + ?%8 = getelementptr inbounds %struct.int16x8_t* %__ax, i32 0, i32 0 ; <<8 x i16>*> [#uses=1] >>> + ?%9 = load <8 x i16>* %8, align 16 ? ? ? ? ? ? ? ; <<8 x i16>> [#uses=2] >>> + ?%10 = getelementptr inbounds %struct.int16x8_t* %__bx, i32 0, i32 0 ; <<8 x i16>*> [#uses=1] >>> + ?%11 = load <8 x i16>* %10, align 16 ? ? ? ? ? ? ; <<8 x i16>> [#uses=2] >>> + ?%12 = getelementptr inbounds %union..0anon* %__rv, i32 0, i32 0 ; <%struct.int16x8x2_t*> [#uses=1] >>> + ?%13 = bitcast %struct.int16x8x2_t* %12 to %struct.__neon_int16x8x2_t* ; <%struct.__neon_int16x8x2_t*> [#uses=2] >>> + ?%14 = shufflevector <8 x i16> %9, <8 x i16> %11, <8 x i32> ; <<8 x i16>> [#uses=1] >>> + ?%15 = getelementptr inbounds %struct.__neon_int16x8x2_t* %13, i32 0, i32 0 ; <<8 x i16>*> [#uses=1] >>> + ?store <8 x i16> %14, <8 x i16>* %15 >>> + ?%16 = shufflevector <8 x i16> %9, <8 x i16> %11, <8 x i32> ; <<8 x i16>> [#uses=1] >>> + ?%17 = getelementptr inbounds %struct.__neon_int16x8x2_t* %13, i32 0, i32 1 ; <<8 x i16>*> [#uses=1] >>> + ?store <8 x i16> %16, <8 x i16>* %17 >>> + ?%18 = getelementptr inbounds %union..0anon* %__rv, i32 0, i32 0 ; <%struct.int16x8x2_t*> [#uses=1] >>> + ?%19 = bitcast %struct.int16x8x2_t* %0 to i8* ? ?; [#uses=1] >>> + ?%20 = bitcast %struct.int16x8x2_t* %18 to i8* ? ; [#uses=1] >>> + ?call void @llvm.memcpy.i32(i8* %19, i8* %20, i32 32, i32 16) >>> + ?%tmp21 = bitcast %struct.int16x8x2_t* %tmp2 to i8* ; [#uses=1] >>> + ?%21 = bitcast %struct.int16x8x2_t* %0 to i8* ? ?; [#uses=1] >>> + ?call void @llvm.memcpy.i32(i8* %tmp21, i8* %21, i32 32, i32 16) >>> + ?%22 = load %struct.int16x8x2_t** %dst_addr, align 4 ; <%struct.int16x8x2_t*> [#uses=1] >>> + ?%23 = bitcast %struct.int16x8x2_t* %22 to i8* ? ; [#uses=1] >>> + ?%tmp22 = bitcast %struct.int16x8x2_t* %tmp2 to i8* ; [#uses=1] >>> + ?call void @llvm.memcpy.i32(i8* %23, i8* %tmp22, i32 32, i32 16) >>> + ?br label %return >>> + >>> +; CHECK: store <8 x i16> >>> +; CHECK: store <8 x i16> >>> + >>> +return: ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ; preds = %entry >>> + ?ret void >>> +} >>> + >>> +; Radar 7466574 >>> +%struct._NSRange = type { i64 } >>> + >>> +define arm_apcscc void @test_memcpy_self() nounwind { >>> +; CHECK: @test_memcpy_self >>> +; CHECK-NOT: alloca >>> +; CHECK: br i1 >>> +entry: >>> + ?%range = alloca %struct._NSRange ? ? ? ? ? ? ? ?; <%struct._NSRange*> [#uses=2] >>> + ?br i1 undef, label %cond.true, label %cond.false >>> + >>> +cond.true: ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?; preds = %entry >>> + ?%tmp3 = bitcast %struct._NSRange* %range to i8* ; [#uses=1] >>> + ?%tmp4 = bitcast %struct._NSRange* %range to i8* ; [#uses=1] >>> + ?call void @llvm.memcpy.i32(i8* %tmp3, i8* %tmp4, i32 8, i32 8) >>> + ?ret void >>> + >>> +cond.false: ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ; preds = %entry >>> + ?ret void >>> +} >>> + >>> +declare void @llvm.memcpy.i32(i8* nocapture, i8* nocapture, i32, i32) nounwind >>> >>> >>> _______________________________________________ >>> llvm-commits mailing list >>> llvm-commits at cs.uiuc.edu >>> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits >>> >> >> _______________________________________________ >> llvm-commits mailing list >> llvm-commits at cs.uiuc.edu >> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits > > From daniel at zuster.org Wed Dec 16 14:09:54 2009 From: daniel at zuster.org (Daniel Dunbar) Date: Wed, 16 Dec 2009 20:09:54 -0000 Subject: [llvm-commits] [llvm] r91559 - in /llvm/trunk: lib/Transforms/Scalar/ScalarReplAggregates.cpp test/Transforms/ScalarRepl/2009-12-11-NeonTypes.ll Message-ID: <200912162009.nBGK9sd8029466@zion.cs.uiuc.edu> Author: ddunbar Date: Wed Dec 16 14:09:53 2009 New Revision: 91559 URL: http://llvm.org/viewvc/llvm-project?rev=91559&view=rev Log: Reapply r91459, it was only unmasking the bug, and since TOT is still broken having it reverted does no good. Added: llvm/trunk/test/Transforms/ScalarRepl/2009-12-11-NeonTypes.ll Modified: llvm/trunk/lib/Transforms/Scalar/ScalarReplAggregates.cpp Modified: llvm/trunk/lib/Transforms/Scalar/ScalarReplAggregates.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/ScalarReplAggregates.cpp?rev=91559&r1=91558&r2=91559&view=diff ============================================================================== --- llvm/trunk/lib/Transforms/Scalar/ScalarReplAggregates.cpp (original) +++ llvm/trunk/lib/Transforms/Scalar/ScalarReplAggregates.cpp Wed Dec 16 14:09:53 2009 @@ -74,6 +74,10 @@ private: TargetData *TD; + /// DeadInsts - Keep track of instructions we have made dead, so that + /// we can remove them after we are done working. + SmallVector DeadInsts; + /// AllocaInfo - When analyzing uses of an alloca instruction, this captures /// information about the uses. All these fields are initialized to false /// and set to true when something is learned. @@ -102,25 +106,30 @@ int isSafeAllocaToScalarRepl(AllocaInst *AI); - void isSafeUseOfAllocation(Instruction *User, AllocaInst *AI, - AllocaInfo &Info); - void isSafeElementUse(Value *Ptr, bool isFirstElt, AllocaInst *AI, - AllocaInfo &Info); - void isSafeMemIntrinsicOnAllocation(MemIntrinsic *MI, AllocaInst *AI, - unsigned OpNo, AllocaInfo &Info); - void isSafeUseOfBitCastedAllocation(BitCastInst *User, AllocaInst *AI, - AllocaInfo &Info); + void isSafeForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset, + uint64_t ArrayOffset, AllocaInfo &Info); + void isSafeGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t &Offset, + uint64_t &ArrayOffset, AllocaInfo &Info); + void isSafeMemAccess(AllocaInst *AI, uint64_t Offset, uint64_t ArrayOffset, + uint64_t MemSize, const Type *MemOpType, bool isStore, + AllocaInfo &Info); + bool TypeHasComponent(const Type *T, uint64_t Offset, uint64_t Size); + unsigned FindElementAndOffset(const Type *&T, uint64_t &Offset); void DoScalarReplacement(AllocaInst *AI, std::vector &WorkList); + void DeleteDeadInstructions(); void CleanupGEP(GetElementPtrInst *GEP); - void CleanupAllocaUsers(AllocaInst *AI); + void CleanupAllocaUsers(Value *V); AllocaInst *AddNewAlloca(Function &F, const Type *Ty, AllocaInst *Base); - void RewriteBitCastUserOfAlloca(Instruction *BCInst, AllocaInst *AI, - SmallVector &NewElts); - - void RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *BCInst, + void RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset, + SmallVector &NewElts); + void RewriteBitCast(BitCastInst *BC, AllocaInst *AI, uint64_t Offset, + SmallVector &NewElts); + void RewriteGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t Offset, + SmallVector &NewElts); + void RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst, AllocaInst *AI, SmallVector &NewElts); void RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI, @@ -360,176 +369,37 @@ } } - // Now that we have created the alloca instructions that we want to use, - // expand the getelementptr instructions to use them. - while (!AI->use_empty()) { - Instruction *User = cast(AI->use_back()); - if (BitCastInst *BCInst = dyn_cast(User)) { - RewriteBitCastUserOfAlloca(BCInst, AI, ElementAllocas); - BCInst->eraseFromParent(); - continue; - } - - // Replace: - // %res = load { i32, i32 }* %alloc - // with: - // %load.0 = load i32* %alloc.0 - // %insert.0 insertvalue { i32, i32 } zeroinitializer, i32 %load.0, 0 - // %load.1 = load i32* %alloc.1 - // %insert = insertvalue { i32, i32 } %insert.0, i32 %load.1, 1 - // (Also works for arrays instead of structs) - if (LoadInst *LI = dyn_cast(User)) { - Value *Insert = UndefValue::get(LI->getType()); - for (unsigned i = 0, e = ElementAllocas.size(); i != e; ++i) { - Value *Load = new LoadInst(ElementAllocas[i], "load", LI); - Insert = InsertValueInst::Create(Insert, Load, i, "insert", LI); - } - LI->replaceAllUsesWith(Insert); - LI->eraseFromParent(); - continue; - } + // Now that we have created the new alloca instructions, rewrite all the + // uses of the old alloca. + DeadInsts.push_back(AI); + RewriteForScalarRepl(AI, AI, 0, ElementAllocas); - // Replace: - // store { i32, i32 } %val, { i32, i32 }* %alloc - // with: - // %val.0 = extractvalue { i32, i32 } %val, 0 - // store i32 %val.0, i32* %alloc.0 - // %val.1 = extractvalue { i32, i32 } %val, 1 - // store i32 %val.1, i32* %alloc.1 - // (Also works for arrays instead of structs) - if (StoreInst *SI = dyn_cast(User)) { - Value *Val = SI->getOperand(0); - for (unsigned i = 0, e = ElementAllocas.size(); i != e; ++i) { - Value *Extract = ExtractValueInst::Create(Val, i, Val->getName(), SI); - new StoreInst(Extract, ElementAllocas[i], SI); - } - SI->eraseFromParent(); - continue; - } - - GetElementPtrInst *GEPI = cast(User); - // We now know that the GEP is of the form: GEP , 0, - unsigned Idx = - (unsigned)cast(GEPI->getOperand(2))->getZExtValue(); - - assert(Idx < ElementAllocas.size() && "Index out of range?"); - AllocaInst *AllocaToUse = ElementAllocas[Idx]; - - Value *RepValue; - if (GEPI->getNumOperands() == 3) { - // Do not insert a new getelementptr instruction with zero indices, only - // to have it optimized out later. - RepValue = AllocaToUse; - } else { - // We are indexing deeply into the structure, so we still need a - // getelement ptr instruction to finish the indexing. This may be - // expanded itself once the worklist is rerun. - // - SmallVector NewArgs; - NewArgs.push_back(Constant::getNullValue( - Type::getInt32Ty(AI->getContext()))); - NewArgs.append(GEPI->op_begin()+3, GEPI->op_end()); - RepValue = GetElementPtrInst::Create(AllocaToUse, NewArgs.begin(), - NewArgs.end(), "", GEPI); - RepValue->takeName(GEPI); - } - - // If this GEP is to the start of the aggregate, check for memcpys. - if (Idx == 0 && GEPI->hasAllZeroIndices()) - RewriteBitCastUserOfAlloca(GEPI, AI, ElementAllocas); - - // Move all of the users over to the new GEP. - GEPI->replaceAllUsesWith(RepValue); - // Delete the old GEP - GEPI->eraseFromParent(); - } + // Now erase any instructions that were made dead while rewriting the alloca. + DeleteDeadInstructions(); - // Finally, delete the Alloca instruction - AI->eraseFromParent(); NumReplaced++; } -/// isSafeElementUse - Check to see if this use is an allowed use for a -/// getelementptr instruction of an array aggregate allocation. isFirstElt -/// indicates whether Ptr is known to the start of the aggregate. -void SROA::isSafeElementUse(Value *Ptr, bool isFirstElt, AllocaInst *AI, - AllocaInfo &Info) { - for (Value::use_iterator I = Ptr->use_begin(), E = Ptr->use_end(); - I != E; ++I) { - Instruction *User = cast(*I); - switch (User->getOpcode()) { - case Instruction::Load: break; - case Instruction::Store: - // Store is ok if storing INTO the pointer, not storing the pointer - if (User->getOperand(0) == Ptr) return MarkUnsafe(Info); - break; - case Instruction::GetElementPtr: { - GetElementPtrInst *GEP = cast(User); - bool AreAllZeroIndices = isFirstElt; - if (GEP->getNumOperands() > 1 && - (!isa(GEP->getOperand(1)) || - !cast(GEP->getOperand(1))->isZero())) - // Using pointer arithmetic to navigate the array. - return MarkUnsafe(Info); - - // Verify that any array subscripts are in range. - for (gep_type_iterator GEPIt = gep_type_begin(GEP), - E = gep_type_end(GEP); GEPIt != E; ++GEPIt) { - // Ignore struct elements, no extra checking needed for these. - if (isa(*GEPIt)) - continue; - - // This GEP indexes an array. Verify that this is an in-range - // constant integer. Specifically, consider A[0][i]. We cannot know that - // the user isn't doing invalid things like allowing i to index an - // out-of-range subscript that accesses A[1]. Because of this, we have - // to reject SROA of any accesses into structs where any of the - // components are variables. - ConstantInt *IdxVal = dyn_cast(GEPIt.getOperand()); - if (!IdxVal) return MarkUnsafe(Info); - - // Are all indices still zero? - AreAllZeroIndices &= IdxVal->isZero(); - - if (const ArrayType *AT = dyn_cast(*GEPIt)) { - if (IdxVal->getZExtValue() >= AT->getNumElements()) - return MarkUnsafe(Info); - } else if (const VectorType *VT = dyn_cast(*GEPIt)) { - if (IdxVal->getZExtValue() >= VT->getNumElements()) - return MarkUnsafe(Info); - } +/// DeleteDeadInstructions - Erase instructions on the DeadInstrs list, +/// recursively including all their operands that become trivially dead. +void SROA::DeleteDeadInstructions() { + while (!DeadInsts.empty()) { + Instruction *I = dyn_cast_or_null(DeadInsts.pop_back_val()); + if (I == 0) + continue; + + for (User::op_iterator OI = I->op_begin(), E = I->op_end(); OI != E; ++OI) + if (Instruction *U = dyn_cast(*OI)) { + // Zero out the operand and see if it becomes trivially dead. + *OI = 0; + if (isInstructionTriviallyDead(U)) + DeadInsts.push_back(U); } - - isSafeElementUse(GEP, AreAllZeroIndices, AI, Info); - if (Info.isUnsafe) return; - break; - } - case Instruction::BitCast: - if (isFirstElt) { - isSafeUseOfBitCastedAllocation(cast(User), AI, Info); - if (Info.isUnsafe) return; - break; - } - DEBUG(errs() << " Transformation preventing inst: " << *User << '\n'); - return MarkUnsafe(Info); - case Instruction::Call: - if (MemIntrinsic *MI = dyn_cast(User)) { - if (isFirstElt) { - isSafeMemIntrinsicOnAllocation(MI, AI, I.getOperandNo(), Info); - if (Info.isUnsafe) return; - break; - } - } - DEBUG(errs() << " Transformation preventing inst: " << *User << '\n'); - return MarkUnsafe(Info); - default: - DEBUG(errs() << " Transformation preventing inst: " << *User << '\n'); - return MarkUnsafe(Info); - } + + I->eraseFromParent(); } - return; // All users look ok :) } - + /// AllUsersAreLoads - Return true if all users of this value are loads. static bool AllUsersAreLoads(Value *Ptr) { for (Value::use_iterator I = Ptr->use_begin(), E = Ptr->use_end(); @@ -539,72 +409,116 @@ return true; } -/// isSafeUseOfAllocation - Check if this user is an allowed use for an -/// aggregate allocation. -void SROA::isSafeUseOfAllocation(Instruction *User, AllocaInst *AI, - AllocaInfo &Info) { - if (BitCastInst *C = dyn_cast(User)) - return isSafeUseOfBitCastedAllocation(C, AI, Info); - - if (LoadInst *LI = dyn_cast(User)) - if (!LI->isVolatile()) - return;// Loads (returning a first class aggregrate) are always rewritable - - if (StoreInst *SI = dyn_cast(User)) - if (!SI->isVolatile() && SI->getOperand(0) != AI) - return;// Store is ok if storing INTO the pointer, not storing the pointer - - GetElementPtrInst *GEPI = dyn_cast(User); - if (GEPI == 0) - return MarkUnsafe(Info); - - gep_type_iterator I = gep_type_begin(GEPI), E = gep_type_end(GEPI); +/// isSafeForScalarRepl - Check if instruction I is a safe use with regard to +/// performing scalar replacement of alloca AI. The results are flagged in +/// the Info parameter. Offset and ArrayOffset indicate the position within +/// AI that is referenced by this instruction. +void SROA::isSafeForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset, + uint64_t ArrayOffset, AllocaInfo &Info) { + for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI!=E; ++UI) { + Instruction *User = cast(*UI); - // The GEP is not safe to transform if not of the form "GEP , 0, ". - if (I == E || - I.getOperand() != Constant::getNullValue(I.getOperand()->getType())) { - return MarkUnsafe(Info); + if (BitCastInst *BC = dyn_cast(User)) { + isSafeForScalarRepl(BC, AI, Offset, ArrayOffset, Info); + } else if (GetElementPtrInst *GEPI = dyn_cast(User)) { + uint64_t GEPArrayOffset = ArrayOffset; + uint64_t GEPOffset = Offset; + isSafeGEP(GEPI, AI, GEPOffset, GEPArrayOffset, Info); + if (!Info.isUnsafe) + isSafeForScalarRepl(GEPI, AI, GEPOffset, GEPArrayOffset, Info); + } else if (MemIntrinsic *MI = dyn_cast(UI)) { + ConstantInt *Length = dyn_cast(MI->getLength()); + if (Length) + isSafeMemAccess(AI, Offset, ArrayOffset, Length->getZExtValue(), 0, + UI.getOperandNo() == 1, Info); + else + MarkUnsafe(Info); + } else if (LoadInst *LI = dyn_cast(User)) { + if (!LI->isVolatile()) { + const Type *LIType = LI->getType(); + isSafeMemAccess(AI, Offset, ArrayOffset, TD->getTypeAllocSize(LIType), + LIType, false, Info); + } else + MarkUnsafe(Info); + } else if (StoreInst *SI = dyn_cast(User)) { + // Store is ok if storing INTO the pointer, not storing the pointer + if (!SI->isVolatile() && SI->getOperand(0) != I) { + const Type *SIType = SI->getOperand(0)->getType(); + isSafeMemAccess(AI, Offset, ArrayOffset, TD->getTypeAllocSize(SIType), + SIType, true, Info); + } else + MarkUnsafe(Info); + } else if (isa(UI)) { + // If one user is DbgInfoIntrinsic then check if all users are + // DbgInfoIntrinsics. + if (OnlyUsedByDbgInfoIntrinsics(I)) { + Info.needsCleanup = true; + return; + } + MarkUnsafe(Info); + } else { + DEBUG(errs() << " Transformation preventing inst: " << *User << '\n'); + MarkUnsafe(Info); + } + if (Info.isUnsafe) return; } +} - ++I; - if (I == E) return MarkUnsafe(Info); // ran out of GEP indices?? +/// isSafeGEP - Check if a GEP instruction can be handled for scalar +/// replacement. It is safe when all the indices are constant, in-bounds +/// references, and when the resulting offset corresponds to an element within +/// the alloca type. The results are flagged in the Info parameter. Upon +/// return, Offset is adjusted as specified by the GEP indices. For the +/// special case of a variable index to a 2-element array, ArrayOffset is set +/// to the array element size. +void SROA::isSafeGEP(GetElementPtrInst *GEPI, AllocaInst *AI, + uint64_t &Offset, uint64_t &ArrayOffset, + AllocaInfo &Info) { + gep_type_iterator GEPIt = gep_type_begin(GEPI), E = gep_type_end(GEPI); + if (GEPIt == E) + return; + + // The first GEP index must be zero. + if (!isa(GEPIt.getOperand()) || + !cast(GEPIt.getOperand())->isZero()) + return MarkUnsafe(Info); + if (++GEPIt == E) + return; - bool IsAllZeroIndices = true; - // If the first index is a non-constant index into an array, see if we can // handle it as a special case. - if (const ArrayType *AT = dyn_cast(*I)) { - if (!isa(I.getOperand())) { - IsAllZeroIndices = 0; - uint64_t NumElements = AT->getNumElements(); - - // If this is an array index and the index is not constant, we cannot - // promote... that is unless the array has exactly one or two elements in - // it, in which case we CAN promote it, but we have to canonicalize this - // out if this is the only problem. - if ((NumElements == 1 || NumElements == 2) && - AllUsersAreLoads(GEPI)) { + const Type *ArrayEltTy = 0; + if (ArrayOffset == 0 && Offset == 0) { + if (const ArrayType *AT = dyn_cast(*GEPIt)) { + if (!isa(GEPIt.getOperand())) { + uint64_t NumElements = AT->getNumElements(); + + // If this is an array index and the index is not constant, we cannot + // promote... that is unless the array has exactly one or two elements + // in it, in which case we CAN promote it, but we have to canonicalize + // this out if this is the only problem. + if ((NumElements != 1 && NumElements != 2) || !AllUsersAreLoads(GEPI)) + return MarkUnsafe(Info); Info.needsCleanup = true; - return; // Canonicalization required! + ArrayOffset = TD->getTypeAllocSizeInBits(AT->getElementType()); + ArrayEltTy = AT->getElementType(); + ++GEPIt; } - return MarkUnsafe(Info); } } - + // Walk through the GEP type indices, checking the types that this indexes // into. - for (; I != E; ++I) { + for (; GEPIt != E; ++GEPIt) { // Ignore struct elements, no extra checking needed for these. - if (isa(*I)) + if (isa(*GEPIt)) continue; - - ConstantInt *IdxVal = dyn_cast(I.getOperand()); - if (!IdxVal) return MarkUnsafe(Info); - // Are all indices still zero? - IsAllZeroIndices &= IdxVal->isZero(); - - if (const ArrayType *AT = dyn_cast(*I)) { + ConstantInt *IdxVal = dyn_cast(GEPIt.getOperand()); + if (!IdxVal) + return MarkUnsafe(Info); + + if (const ArrayType *AT = dyn_cast(*GEPIt)) { // This GEP indexes an array. Verify that this is an in-range constant // integer. Specifically, consider A[0][i]. We cannot know that the user // isn't doing invalid things like allowing i to index an out-of-range @@ -612,147 +526,255 @@ // of any accesses into structs where any of the components are variables. if (IdxVal->getZExtValue() >= AT->getNumElements()) return MarkUnsafe(Info); - } else if (const VectorType *VT = dyn_cast(*I)) { + } else { + const VectorType *VT = dyn_cast(*GEPIt); + assert(VT && "unexpected type in GEP type iterator"); if (IdxVal->getZExtValue() >= VT->getNumElements()) return MarkUnsafe(Info); } } - - // If there are any non-simple uses of this getelementptr, make sure to reject - // them. - return isSafeElementUse(GEPI, IsAllZeroIndices, AI, Info); + + // All the indices are safe. Now compute the offset due to this GEP and + // check if the alloca has a component element at that offset. + if (ArrayOffset == 0) { + SmallVector Indices(GEPI->op_begin() + 1, GEPI->op_end()); + Offset += TD->getIndexedOffset(GEPI->getPointerOperandType(), + &Indices[0], Indices.size()); + } else { + // Both array elements have the same type, so it suffices to check one of + // them. Copy the GEP indices starting from the array index, but replace + // that variable index with a constant zero. + SmallVector Indices(GEPI->op_begin() + 2, GEPI->op_end()); + Indices[0] = Constant::getNullValue(Type::getInt32Ty(GEPI->getContext())); + const Type *ArrayEltPtr = PointerType::getUnqual(ArrayEltTy); + Offset += TD->getIndexedOffset(ArrayEltPtr, &Indices[0], Indices.size()); + } + if (!TypeHasComponent(AI->getAllocatedType(), Offset, 0)) + MarkUnsafe(Info); +} + +/// isSafeMemAccess - Check if a load/store/memcpy operates on the entire AI +/// alloca or has an offset and size that corresponds to a component element +/// within it. The offset checked here may have been formed from a GEP with a +/// pointer bitcasted to a different type. +void SROA::isSafeMemAccess(AllocaInst *AI, uint64_t Offset, + uint64_t ArrayOffset, uint64_t MemSize, + const Type *MemOpType, bool isStore, + AllocaInfo &Info) { + // Check if this is a load/store of the entire alloca. + if (Offset == 0 && ArrayOffset == 0 && + MemSize == TD->getTypeAllocSize(AI->getAllocatedType())) { + bool UsesAggregateType = (MemOpType == AI->getAllocatedType()); + // This is safe for MemIntrinsics (where MemOpType is 0), integer types + // (which are essentially the same as the MemIntrinsics, especially with + // regard to copying padding between elements), or references using the + // aggregate type of the alloca. + if (!MemOpType || isa(MemOpType) || UsesAggregateType) { + if (!UsesAggregateType) { + if (isStore) + Info.isMemCpyDst = true; + else + Info.isMemCpySrc = true; + } + return; + } + } + // Check if the offset/size correspond to a component within the alloca type. + const Type *T = AI->getAllocatedType(); + if (TypeHasComponent(T, Offset, MemSize) && + (ArrayOffset == 0 || TypeHasComponent(T, Offset + ArrayOffset, MemSize))) + return; + + return MarkUnsafe(Info); } -/// isSafeMemIntrinsicOnAllocation - Check if the specified memory -/// intrinsic can be promoted by SROA. At this point, we know that the operand -/// of the memintrinsic is a pointer to the beginning of the allocation. -void SROA::isSafeMemIntrinsicOnAllocation(MemIntrinsic *MI, AllocaInst *AI, - unsigned OpNo, AllocaInfo &Info) { - // If not constant length, give up. - ConstantInt *Length = dyn_cast(MI->getLength()); - if (!Length) return MarkUnsafe(Info); - - // If not the whole aggregate, give up. - if (Length->getZExtValue() != - TD->getTypeAllocSize(AI->getType()->getElementType())) - return MarkUnsafe(Info); - - // We only know about memcpy/memset/memmove. - if (!isa(MI)) - return MarkUnsafe(Info); - - // Otherwise, we can transform it. Determine whether this is a memcpy/set - // into or out of the aggregate. - if (OpNo == 1) - Info.isMemCpyDst = true; - else { - assert(OpNo == 2); - Info.isMemCpySrc = true; +/// TypeHasComponent - Return true if T has a component type with the +/// specified offset and size. If Size is zero, do not check the size. +bool SROA::TypeHasComponent(const Type *T, uint64_t Offset, uint64_t Size) { + const Type *EltTy; + uint64_t EltSize; + if (const StructType *ST = dyn_cast(T)) { + const StructLayout *Layout = TD->getStructLayout(ST); + unsigned EltIdx = Layout->getElementContainingOffset(Offset); + EltTy = ST->getContainedType(EltIdx); + EltSize = TD->getTypeAllocSize(EltTy); + Offset -= Layout->getElementOffset(EltIdx); + } else if (const ArrayType *AT = dyn_cast(T)) { + EltTy = AT->getElementType(); + EltSize = TD->getTypeAllocSize(EltTy); + Offset %= EltSize; + } else { + return false; } + if (Offset == 0 && (Size == 0 || EltSize == Size)) + return true; + // Check if the component spans multiple elements. + if (Offset + Size > EltSize) + return false; + return TypeHasComponent(EltTy, Offset, Size); } -/// isSafeUseOfBitCastedAllocation - Check if all users of this bitcast -/// from an alloca are safe for SROA of that alloca. -void SROA::isSafeUseOfBitCastedAllocation(BitCastInst *BC, AllocaInst *AI, - AllocaInfo &Info) { - for (Value::use_iterator UI = BC->use_begin(), E = BC->use_end(); - UI != E; ++UI) { - if (BitCastInst *BCU = dyn_cast(UI)) { - isSafeUseOfBitCastedAllocation(BCU, AI, Info); - } else if (MemIntrinsic *MI = dyn_cast(UI)) { - isSafeMemIntrinsicOnAllocation(MI, AI, UI.getOperandNo(), Info); - } else if (StoreInst *SI = dyn_cast(UI)) { - if (SI->isVolatile()) - return MarkUnsafe(Info); - - // If storing the entire alloca in one chunk through a bitcasted pointer - // to integer, we can transform it. This happens (for example) when you - // cast a {i32,i32}* to i64* and store through it. This is similar to the - // memcpy case and occurs in various "byval" cases and emulated memcpys. - if (isa(SI->getOperand(0)->getType()) && - TD->getTypeAllocSize(SI->getOperand(0)->getType()) == - TD->getTypeAllocSize(AI->getType()->getElementType())) { - Info.isMemCpyDst = true; - continue; - } - return MarkUnsafe(Info); - } else if (LoadInst *LI = dyn_cast(UI)) { - if (LI->isVolatile()) - return MarkUnsafe(Info); +/// RewriteForScalarRepl - Alloca AI is being split into NewElts, so rewrite +/// the instruction I, which references it, to use the separate elements. +/// Offset indicates the position within AI that is referenced by this +/// instruction. +void SROA::RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset, + SmallVector &NewElts) { + for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI!=E; ++UI) { + Instruction *User = cast(*UI); - // If loading the entire alloca in one chunk through a bitcasted pointer - // to integer, we can transform it. This happens (for example) when you - // cast a {i32,i32}* to i64* and load through it. This is similar to the - // memcpy case and occurs in various "byval" cases and emulated memcpys. - if (isa(LI->getType()) && - TD->getTypeAllocSize(LI->getType()) == - TD->getTypeAllocSize(AI->getType()->getElementType())) { - Info.isMemCpySrc = true; - continue; + if (BitCastInst *BC = dyn_cast(User)) { + RewriteBitCast(BC, AI, Offset, NewElts); + } else if (GetElementPtrInst *GEPI = dyn_cast(User)) { + RewriteGEP(GEPI, AI, Offset, NewElts); + } else if (MemIntrinsic *MI = dyn_cast(User)) { + ConstantInt *Length = dyn_cast(MI->getLength()); + uint64_t MemSize = Length->getZExtValue(); + if (Offset == 0 && + MemSize == TD->getTypeAllocSize(AI->getAllocatedType())) + RewriteMemIntrinUserOfAlloca(MI, I, AI, NewElts); + } else if (LoadInst *LI = dyn_cast(User)) { + const Type *LIType = LI->getType(); + if (LIType == AI->getAllocatedType()) { + // Replace: + // %res = load { i32, i32 }* %alloc + // with: + // %load.0 = load i32* %alloc.0 + // %insert.0 insertvalue { i32, i32 } zeroinitializer, i32 %load.0, 0 + // %load.1 = load i32* %alloc.1 + // %insert = insertvalue { i32, i32 } %insert.0, i32 %load.1, 1 + // (Also works for arrays instead of structs) + Value *Insert = UndefValue::get(LIType); + for (unsigned i = 0, e = NewElts.size(); i != e; ++i) { + Value *Load = new LoadInst(NewElts[i], "load", LI); + Insert = InsertValueInst::Create(Insert, Load, i, "insert", LI); + } + LI->replaceAllUsesWith(Insert); + DeadInsts.push_back(LI); + } else if (isa(LIType) && + TD->getTypeAllocSize(LIType) == + TD->getTypeAllocSize(AI->getAllocatedType())) { + // If this is a load of the entire alloca to an integer, rewrite it. + RewriteLoadUserOfWholeAlloca(LI, AI, NewElts); } - return MarkUnsafe(Info); - } else if (isa(UI)) { - // If one user is DbgInfoIntrinsic then check if all users are - // DbgInfoIntrinsics. - if (OnlyUsedByDbgInfoIntrinsics(BC)) { - Info.needsCleanup = true; - return; + } else if (StoreInst *SI = dyn_cast(User)) { + Value *Val = SI->getOperand(0); + const Type *SIType = Val->getType(); + if (SIType == AI->getAllocatedType()) { + // Replace: + // store { i32, i32 } %val, { i32, i32 }* %alloc + // with: + // %val.0 = extractvalue { i32, i32 } %val, 0 + // store i32 %val.0, i32* %alloc.0 + // %val.1 = extractvalue { i32, i32 } %val, 1 + // store i32 %val.1, i32* %alloc.1 + // (Also works for arrays instead of structs) + for (unsigned i = 0, e = NewElts.size(); i != e; ++i) { + Value *Extract = ExtractValueInst::Create(Val, i, Val->getName(), SI); + new StoreInst(Extract, NewElts[i], SI); + } + DeadInsts.push_back(SI); + } else if (isa(SIType) && + TD->getTypeAllocSize(SIType) == + TD->getTypeAllocSize(AI->getAllocatedType())) { + // If this is a store of the entire alloca from an integer, rewrite it. + RewriteStoreUserOfWholeAlloca(SI, AI, NewElts); } - else - MarkUnsafe(Info); } - else { - return MarkUnsafe(Info); - } - if (Info.isUnsafe) return; } } -/// RewriteBitCastUserOfAlloca - BCInst (transitively) bitcasts AI, or indexes -/// to its first element. Transform users of the cast to use the new values -/// instead. -void SROA::RewriteBitCastUserOfAlloca(Instruction *BCInst, AllocaInst *AI, - SmallVector &NewElts) { - Value::use_iterator UI = BCInst->use_begin(), UE = BCInst->use_end(); - while (UI != UE) { - Instruction *User = cast(*UI++); - if (BitCastInst *BCU = dyn_cast(User)) { - RewriteBitCastUserOfAlloca(BCU, AI, NewElts); - if (BCU->use_empty()) BCU->eraseFromParent(); - continue; - } +/// RewriteBitCast - Update a bitcast reference to the alloca being replaced +/// and recursively continue updating all of its uses. +void SROA::RewriteBitCast(BitCastInst *BC, AllocaInst *AI, uint64_t Offset, + SmallVector &NewElts) { + RewriteForScalarRepl(BC, AI, Offset, NewElts); + if (BC->getOperand(0) != AI) + return; - if (MemIntrinsic *MI = dyn_cast(User)) { - // This must be memcpy/memmove/memset of the entire aggregate. - // Split into one per element. - RewriteMemIntrinUserOfAlloca(MI, BCInst, AI, NewElts); - continue; - } - - if (StoreInst *SI = dyn_cast(User)) { - // If this is a store of the entire alloca from an integer, rewrite it. - RewriteStoreUserOfWholeAlloca(SI, AI, NewElts); - continue; - } + // The bitcast references the original alloca. Replace its uses with + // references to the first new element alloca. + Instruction *Val = NewElts[0]; + if (Val->getType() != BC->getDestTy()) { + Val = new BitCastInst(Val, BC->getDestTy(), "", BC); + Val->takeName(BC); + } + BC->replaceAllUsesWith(Val); + DeadInsts.push_back(BC); +} + +/// FindElementAndOffset - Return the index of the element containing Offset +/// within the specified type, which must be either a struct or an array. +/// Sets T to the type of the element and Offset to the offset within that +/// element. +unsigned SROA::FindElementAndOffset(const Type *&T, uint64_t &Offset) { + unsigned Idx = 0; + if (const StructType *ST = dyn_cast(T)) { + const StructLayout *Layout = TD->getStructLayout(ST); + Idx = Layout->getElementContainingOffset(Offset); + T = ST->getContainedType(Idx); + Offset -= Layout->getElementOffset(Idx); + } else { + const ArrayType *AT = dyn_cast(T); + assert(AT && "unexpected type for scalar replacement"); + T = AT->getElementType(); + uint64_t EltSize = TD->getTypeAllocSize(T); + Idx = (unsigned)(Offset / EltSize); + Offset -= Idx * EltSize; + } + return Idx; +} + +/// RewriteGEP - Check if this GEP instruction moves the pointer across +/// elements of the alloca that are being split apart, and if so, rewrite +/// the GEP to be relative to the new element. +void SROA::RewriteGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t Offset, + SmallVector &NewElts) { + uint64_t OldOffset = Offset; + SmallVector Indices(GEPI->op_begin() + 1, GEPI->op_end()); + Offset += TD->getIndexedOffset(GEPI->getPointerOperandType(), + &Indices[0], Indices.size()); + + RewriteForScalarRepl(GEPI, AI, Offset, NewElts); + + const Type *T = AI->getAllocatedType(); + unsigned OldIdx = FindElementAndOffset(T, OldOffset); + if (GEPI->getOperand(0) == AI) + OldIdx = ~0U; // Force the GEP to be rewritten. + + T = AI->getAllocatedType(); + uint64_t EltOffset = Offset; + unsigned Idx = FindElementAndOffset(T, EltOffset); + + // If this GEP does not move the pointer across elements of the alloca + // being split, then it does not needs to be rewritten. + if (Idx == OldIdx) + return; - if (LoadInst *LI = dyn_cast(User)) { - // If this is a load of the entire alloca to an integer, rewrite it. - RewriteLoadUserOfWholeAlloca(LI, AI, NewElts); - continue; - } - - // Otherwise it must be some other user of a gep of the first pointer. Just - // leave these alone. - continue; - } + const Type *i32Ty = Type::getInt32Ty(AI->getContext()); + SmallVector NewArgs; + NewArgs.push_back(Constant::getNullValue(i32Ty)); + while (EltOffset != 0) { + unsigned EltIdx = FindElementAndOffset(T, EltOffset); + NewArgs.push_back(ConstantInt::get(i32Ty, EltIdx)); + } + Instruction *Val = NewElts[Idx]; + if (NewArgs.size() > 1) { + Val = GetElementPtrInst::CreateInBounds(Val, NewArgs.begin(), + NewArgs.end(), "", GEPI); + Val->takeName(GEPI); + } + if (Val->getType() != GEPI->getType()) + Val = new BitCastInst(Val, GEPI->getType(), Val->getNameStr(), GEPI); + GEPI->replaceAllUsesWith(Val); + DeadInsts.push_back(GEPI); } /// RewriteMemIntrinUserOfAlloca - MI is a memcpy/memset/memmove from or to AI. /// Rewrite it to copy or set the elements of the scalarized memory. -void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *BCInst, +void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst, AllocaInst *AI, SmallVector &NewElts) { - // If this is a memcpy/memmove, construct the other pointer as the // appropriate type. The "Other" pointer is the pointer that goes to memory // that doesn't have anything to do with the alloca that we are promoting. For @@ -761,28 +783,41 @@ LLVMContext &Context = MI->getContext(); unsigned MemAlignment = MI->getAlignment(); if (MemTransferInst *MTI = dyn_cast(MI)) { // memmove/memcopy - if (BCInst == MTI->getRawDest()) + if (Inst == MTI->getRawDest()) OtherPtr = MTI->getRawSource(); else { - assert(BCInst == MTI->getRawSource()); + assert(Inst == MTI->getRawSource()); OtherPtr = MTI->getRawDest(); } } - // Keep track of the other intrinsic argument, so it can be removed if it - // is dead when the intrinsic is replaced. - Value *PossiblyDead = OtherPtr; - // If there is an other pointer, we want to convert it to the same pointer // type as AI has, so we can GEP through it safely. if (OtherPtr) { - // It is likely that OtherPtr is a bitcast, if so, remove it. - if (BitCastInst *BC = dyn_cast(OtherPtr)) - OtherPtr = BC->getOperand(0); - // All zero GEPs are effectively bitcasts. - if (GetElementPtrInst *GEP = dyn_cast(OtherPtr)) - if (GEP->hasAllZeroIndices()) - OtherPtr = GEP->getOperand(0); + + // Remove bitcasts and all-zero GEPs from OtherPtr. This is an + // optimization, but it's also required to detect the corner case where + // both pointer operands are referencing the same memory, and where + // OtherPtr may be a bitcast or GEP that currently being rewritten. (This + // function is only called for mem intrinsics that access the whole + // aggregate, so non-zero GEPs are not an issue here.) + while (1) { + if (BitCastInst *BC = dyn_cast(OtherPtr)) { + OtherPtr = BC->getOperand(0); + continue; + } + if (GetElementPtrInst *GEP = dyn_cast(OtherPtr)) { + // All zero GEPs are effectively bitcasts. + if (GEP->hasAllZeroIndices()) { + OtherPtr = GEP->getOperand(0); + continue; + } + } + break; + } + // If OtherPtr has already been rewritten, this intrinsic will be dead. + if (OtherPtr == NewElts[0]) + return; if (ConstantExpr *BCE = dyn_cast(OtherPtr)) if (BCE->getOpcode() == Instruction::BitCast) @@ -798,7 +833,7 @@ // Process each element of the aggregate. Value *TheFn = MI->getOperand(0); const Type *BytePtrTy = MI->getRawDest()->getType(); - bool SROADest = MI->getRawDest() == BCInst; + bool SROADest = MI->getRawDest() == Inst; Constant *Zero = Constant::getNullValue(Type::getInt32Ty(MI->getContext())); @@ -807,12 +842,15 @@ Value *OtherElt = 0; unsigned OtherEltAlign = MemAlignment; - if (OtherPtr) { + if (OtherPtr == AI) { + OtherElt = NewElts[i]; + OtherEltAlign = 0; + } else if (OtherPtr) { Value *Idx[2] = { Zero, ConstantInt::get(Type::getInt32Ty(MI->getContext()), i) }; - OtherElt = GetElementPtrInst::Create(OtherPtr, Idx, Idx + 2, + OtherElt = GetElementPtrInst::CreateInBounds(OtherPtr, Idx, Idx + 2, OtherPtr->getNameStr()+"."+Twine(i), - MI); + MI); uint64_t EltOffset; const PointerType *OtherPtrTy = cast(OtherPtr->getType()); if (const StructType *ST = @@ -924,9 +962,7 @@ CallInst::Create(TheFn, Ops, Ops + 4, "", MI); } } - MI->eraseFromParent(); - if (PossiblyDead) - RecursivelyDeleteTriviallyDeadInstructions(PossiblyDead); + DeadInsts.push_back(MI); } /// RewriteStoreUserOfWholeAlloca - We found a store of an integer that @@ -937,15 +973,9 @@ // Extract each element out of the integer according to its structure offset // and store the element value to the individual alloca. Value *SrcVal = SI->getOperand(0); - const Type *AllocaEltTy = AI->getType()->getElementType(); + const Type *AllocaEltTy = AI->getAllocatedType(); uint64_t AllocaSizeBits = TD->getTypeAllocSizeInBits(AllocaEltTy); - // If this isn't a store of an integer to the whole alloca, it may be a store - // to the first element. Just ignore the store in this case and normal SROA - // will handle it. - if (!isa(SrcVal->getType()) || - TD->getTypeAllocSizeInBits(SrcVal->getType()) != AllocaSizeBits) - return; // Handle tail padding by extending the operand if (TD->getTypeSizeInBits(SrcVal->getType()) != AllocaSizeBits) SrcVal = new ZExtInst(SrcVal, @@ -1050,7 +1080,7 @@ } } - SI->eraseFromParent(); + DeadInsts.push_back(SI); } /// RewriteLoadUserOfWholeAlloca - We found a load of the entire allocation to @@ -1059,16 +1089,9 @@ SmallVector &NewElts) { // Extract each element out of the NewElts according to its structure offset // and form the result value. - const Type *AllocaEltTy = AI->getType()->getElementType(); + const Type *AllocaEltTy = AI->getAllocatedType(); uint64_t AllocaSizeBits = TD->getTypeAllocSizeInBits(AllocaEltTy); - // If this isn't a load of the whole alloca to an integer, it may be a load - // of the first element. Just ignore the load in this case and normal SROA - // will handle it. - if (!isa(LI->getType()) || - TD->getTypeAllocSizeInBits(LI->getType()) != AllocaSizeBits) - return; - DEBUG(errs() << "PROMOTING LOAD OF WHOLE ALLOCA: " << *AI << '\n' << *LI << '\n'); @@ -1139,10 +1162,9 @@ ResultVal = new TruncInst(ResultVal, LI->getType(), "", LI); LI->replaceAllUsesWith(ResultVal); - LI->eraseFromParent(); + DeadInsts.push_back(LI); } - /// HasPadding - Return true if the specified type has any structure or /// alignment padding, false otherwise. static bool HasPadding(const Type *Ty, const TargetData &TD) { @@ -1192,14 +1214,10 @@ // the users are safe to transform. AllocaInfo Info; - for (Value::use_iterator I = AI->use_begin(), E = AI->use_end(); - I != E; ++I) { - isSafeUseOfAllocation(cast(*I), AI, Info); - if (Info.isUnsafe) { - DEBUG(errs() << "Cannot transform: " << *AI << "\n due to user: " - << **I << '\n'); - return 0; - } + isSafeForScalarRepl(AI, AI, 0, 0, Info); + if (Info.isUnsafe) { + DEBUG(errs() << "Cannot transform: " << *AI << '\n'); + return 0; } // Okay, we know all the users are promotable. If the aggregate is a memcpy @@ -1208,7 +1226,7 @@ // types, but may actually be used. In these cases, we refuse to promote the // struct. if (Info.isMemCpySrc && Info.isMemCpyDst && - HasPadding(AI->getType()->getElementType(), *TD)) + HasPadding(AI->getAllocatedType(), *TD)) return 0; // If we require cleanup, return 1, otherwise return 3. @@ -1245,15 +1263,15 @@ // Insert the new GEP instructions, which are properly indexed. SmallVector Indices(GEPI->op_begin()+1, GEPI->op_end()); Indices[1] = Constant::getNullValue(Type::getInt32Ty(GEPI->getContext())); - Value *ZeroIdx = GetElementPtrInst::Create(GEPI->getOperand(0), - Indices.begin(), - Indices.end(), - GEPI->getName()+".0", GEPI); + Value *ZeroIdx = GetElementPtrInst::CreateInBounds(GEPI->getOperand(0), + Indices.begin(), + Indices.end(), + GEPI->getName()+".0",GEPI); Indices[1] = ConstantInt::get(Type::getInt32Ty(GEPI->getContext()), 1); - Value *OneIdx = GetElementPtrInst::Create(GEPI->getOperand(0), - Indices.begin(), - Indices.end(), - GEPI->getName()+".1", GEPI); + Value *OneIdx = GetElementPtrInst::CreateInBounds(GEPI->getOperand(0), + Indices.begin(), + Indices.end(), + GEPI->getName()+".1", GEPI); // Replace all loads of the variable index GEP with loads from both // indexes and a select. while (!GEPI->use_empty()) { @@ -1264,22 +1282,24 @@ LI->replaceAllUsesWith(R); LI->eraseFromParent(); } - GEPI->eraseFromParent(); } - /// CleanupAllocaUsers - If SROA reported that it can promote the specified /// allocation, but only if cleaned up, perform the cleanups required. -void SROA::CleanupAllocaUsers(AllocaInst *AI) { +void SROA::CleanupAllocaUsers(Value *V) { // At this point, we know that the end result will be SROA'd and promoted, so // we can insert ugly code if required so long as sroa+mem2reg will clean it // up. - for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); + for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; ) { User *U = *UI++; - if (GetElementPtrInst *GEPI = dyn_cast(U)) + if (isa(U)) { + CleanupAllocaUsers(U); + } else if (GetElementPtrInst *GEPI = dyn_cast(U)) { CleanupGEP(GEPI); - else { + CleanupAllocaUsers(GEPI); + if (GEPI->use_empty()) GEPI->eraseFromParent(); + } else { Instruction *I = cast(U); SmallVector DbgInUses; if (!isa(I) && OnlyUsedByDbgInfoIntrinsics(I, &DbgInUses)) { @@ -1395,7 +1415,7 @@ // Compute the offset that this GEP adds to the pointer. SmallVector Indices(GEP->op_begin()+1, GEP->op_end()); - uint64_t GEPOffset = TD->getIndexedOffset(GEP->getOperand(0)->getType(), + uint64_t GEPOffset = TD->getIndexedOffset(GEP->getPointerOperandType(), &Indices[0], Indices.size()); // See if all uses can be converted. if (!CanConvertToScalar(GEP, IsNotTrivial, VecTy, SawVec,Offset+GEPOffset, @@ -1457,7 +1477,7 @@ if (GetElementPtrInst *GEP = dyn_cast(User)) { // Compute the offset that this GEP adds to the pointer. SmallVector Indices(GEP->op_begin()+1, GEP->op_end()); - uint64_t GEPOffset = TD->getIndexedOffset(GEP->getOperand(0)->getType(), + uint64_t GEPOffset = TD->getIndexedOffset(GEP->getPointerOperandType(), &Indices[0], Indices.size()); ConvertUsesToScalar(GEP, NewAI, Offset+GEPOffset*8); GEP->eraseFromParent(); Added: llvm/trunk/test/Transforms/ScalarRepl/2009-12-11-NeonTypes.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/ScalarRepl/2009-12-11-NeonTypes.ll?rev=91559&view=auto ============================================================================== --- llvm/trunk/test/Transforms/ScalarRepl/2009-12-11-NeonTypes.ll (added) +++ llvm/trunk/test/Transforms/ScalarRepl/2009-12-11-NeonTypes.ll Wed Dec 16 14:09:53 2009 @@ -0,0 +1,89 @@ +; RUN: opt < %s -scalarrepl -S | FileCheck %s +; Radar 7441282 + +target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32" +target triple = "thumbv7-apple-darwin10" + +%struct.__neon_int16x8x2_t = type { <8 x i16>, <8 x i16> } +%struct.int16x8_t = type { <8 x i16> } +%struct.int16x8x2_t = type { [2 x %struct.int16x8_t] } +%union..0anon = type { %struct.int16x8x2_t } + +define arm_apcscc void @test(<8 x i16> %tmp.0, %struct.int16x8x2_t* %dst) nounwind { +; CHECK: @test +; CHECK-NOT: alloca +; CHECK: "alloca point" +entry: + %tmp_addr = alloca %struct.int16x8_t ; <%struct.int16x8_t*> [#uses=3] + %dst_addr = alloca %struct.int16x8x2_t* ; <%struct.int16x8x2_t**> [#uses=2] + %__rv = alloca %union..0anon ; <%union..0anon*> [#uses=2] + %__bx = alloca %struct.int16x8_t ; <%struct.int16x8_t*> [#uses=2] + %__ax = alloca %struct.int16x8_t ; <%struct.int16x8_t*> [#uses=2] + %tmp2 = alloca %struct.int16x8x2_t ; <%struct.int16x8x2_t*> [#uses=2] + %0 = alloca %struct.int16x8x2_t ; <%struct.int16x8x2_t*> [#uses=2] + %"alloca point" = bitcast i32 0 to i32 ; [#uses=0] + %1 = getelementptr inbounds %struct.int16x8_t* %tmp_addr, i32 0, i32 0 ; <<8 x i16>*> [#uses=1] + store <8 x i16> %tmp.0, <8 x i16>* %1 + store %struct.int16x8x2_t* %dst, %struct.int16x8x2_t** %dst_addr + %2 = getelementptr inbounds %struct.int16x8_t* %__ax, i32 0, i32 0 ; <<8 x i16>*> [#uses=1] + %3 = getelementptr inbounds %struct.int16x8_t* %tmp_addr, i32 0, i32 0 ; <<8 x i16>*> [#uses=1] + %4 = load <8 x i16>* %3, align 16 ; <<8 x i16>> [#uses=1] + store <8 x i16> %4, <8 x i16>* %2, align 16 + %5 = getelementptr inbounds %struct.int16x8_t* %__bx, i32 0, i32 0 ; <<8 x i16>*> [#uses=1] + %6 = getelementptr inbounds %struct.int16x8_t* %tmp_addr, i32 0, i32 0 ; <<8 x i16>*> [#uses=1] + %7 = load <8 x i16>* %6, align 16 ; <<8 x i16>> [#uses=1] + store <8 x i16> %7, <8 x i16>* %5, align 16 + %8 = getelementptr inbounds %struct.int16x8_t* %__ax, i32 0, i32 0 ; <<8 x i16>*> [#uses=1] + %9 = load <8 x i16>* %8, align 16 ; <<8 x i16>> [#uses=2] + %10 = getelementptr inbounds %struct.int16x8_t* %__bx, i32 0, i32 0 ; <<8 x i16>*> [#uses=1] + %11 = load <8 x i16>* %10, align 16 ; <<8 x i16>> [#uses=2] + %12 = getelementptr inbounds %union..0anon* %__rv, i32 0, i32 0 ; <%struct.int16x8x2_t*> [#uses=1] + %13 = bitcast %struct.int16x8x2_t* %12 to %struct.__neon_int16x8x2_t* ; <%struct.__neon_int16x8x2_t*> [#uses=2] + %14 = shufflevector <8 x i16> %9, <8 x i16> %11, <8 x i32> ; <<8 x i16>> [#uses=1] + %15 = getelementptr inbounds %struct.__neon_int16x8x2_t* %13, i32 0, i32 0 ; <<8 x i16>*> [#uses=1] + store <8 x i16> %14, <8 x i16>* %15 + %16 = shufflevector <8 x i16> %9, <8 x i16> %11, <8 x i32> ; <<8 x i16>> [#uses=1] + %17 = getelementptr inbounds %struct.__neon_int16x8x2_t* %13, i32 0, i32 1 ; <<8 x i16>*> [#uses=1] + store <8 x i16> %16, <8 x i16>* %17 + %18 = getelementptr inbounds %union..0anon* %__rv, i32 0, i32 0 ; <%struct.int16x8x2_t*> [#uses=1] + %19 = bitcast %struct.int16x8x2_t* %0 to i8* ; [#uses=1] + %20 = bitcast %struct.int16x8x2_t* %18 to i8* ; [#uses=1] + call void @llvm.memcpy.i32(i8* %19, i8* %20, i32 32, i32 16) + %tmp21 = bitcast %struct.int16x8x2_t* %tmp2 to i8* ; [#uses=1] + %21 = bitcast %struct.int16x8x2_t* %0 to i8* ; [#uses=1] + call void @llvm.memcpy.i32(i8* %tmp21, i8* %21, i32 32, i32 16) + %22 = load %struct.int16x8x2_t** %dst_addr, align 4 ; <%struct.int16x8x2_t*> [#uses=1] + %23 = bitcast %struct.int16x8x2_t* %22 to i8* ; [#uses=1] + %tmp22 = bitcast %struct.int16x8x2_t* %tmp2 to i8* ; [#uses=1] + call void @llvm.memcpy.i32(i8* %23, i8* %tmp22, i32 32, i32 16) + br label %return + +; CHECK: store <8 x i16> +; CHECK: store <8 x i16> + +return: ; preds = %entry + ret void +} + +; Radar 7466574 +%struct._NSRange = type { i64 } + +define arm_apcscc void @test_memcpy_self() nounwind { +; CHECK: @test_memcpy_self +; CHECK-NOT: alloca +; CHECK: br i1 +entry: + %range = alloca %struct._NSRange ; <%struct._NSRange*> [#uses=2] + br i1 undef, label %cond.true, label %cond.false + +cond.true: ; preds = %entry + %tmp3 = bitcast %struct._NSRange* %range to i8* ; [#uses=1] + %tmp4 = bitcast %struct._NSRange* %range to i8* ; [#uses=1] + call void @llvm.memcpy.i32(i8* %tmp3, i8* %tmp4, i32 8, i32 8) + ret void + +cond.false: ; preds = %entry + ret void +} + +declare void @llvm.memcpy.i32(i8* nocapture, i8* nocapture, i32, i32) nounwind From daniel at zuster.org Wed Dec 16 14:10:06 2009 From: daniel at zuster.org (Daniel Dunbar) Date: Wed, 16 Dec 2009 20:10:06 -0000 Subject: [llvm-commits] [llvm] r91560 - in /llvm/trunk: include/llvm/CodeGen/SelectionDAG.h lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp lib/CodeGen/SelectionDAG/SelectionDAG.cpp lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp Message-ID: <200912162010.nBGKA6YP029490@zion.cs.uiuc.edu> Author: ddunbar Date: Wed Dec 16 14:10:05 2009 New Revision: 91560 URL: http://llvm.org/viewvc/llvm-project?rev=91560&view=rev Log: Reapply r91392, it was only unmasking the bug, and since TOT is still broken having it reverted does no good. Modified: llvm/trunk/include/llvm/CodeGen/SelectionDAG.h llvm/trunk/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp Modified: llvm/trunk/include/llvm/CodeGen/SelectionDAG.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/SelectionDAG.h?rev=91560&r1=91559&r2=91560&view=diff ============================================================================== --- llvm/trunk/include/llvm/CodeGen/SelectionDAG.h (original) +++ llvm/trunk/include/llvm/CodeGen/SelectionDAG.h Wed Dec 16 14:10:05 2009 @@ -110,6 +110,46 @@ /// SelectionDAG. BumpPtrAllocator Allocator; + /// NodeOrdering - Assigns a "line number" value to each SDNode that + /// corresponds to the "line number" of the original LLVM instruction. This + /// used for turning off scheduling, because we'll forgo the normal scheduling + /// algorithm and output the instructions according to this ordering. + class NodeOrdering { + /// LineNo - The line of the instruction the node corresponds to. A value of + /// `0' means it's not assigned. + unsigned LineNo; + std::map Order; + + void operator=(const NodeOrdering&); // Do not implement. + NodeOrdering(const NodeOrdering&); // Do not implement. + public: + NodeOrdering() : LineNo(0) {} + + void add(const SDNode *Node) { + assert(LineNo && "Invalid line number!"); + Order[Node] = LineNo; + } + void remove(const SDNode *Node) { + std::map::iterator Itr = Order.find(Node); + if (Itr != Order.end()) + Order.erase(Itr); + } + void clear() { + Order.clear(); + LineNo = 1; + } + unsigned getLineNo(const SDNode *Node) { + unsigned LN = Order[Node]; + assert(LN && "Node isn't in ordering map!"); + return LN; + } + void newInst() { + ++LineNo; + } + + void dump() const; + } *Ordering; + /// VerifyNode - Sanity check the given node. Aborts if it is invalid. void VerifyNode(SDNode *N); @@ -120,6 +160,9 @@ DenseSet &visited, int level, bool &printed); + void operator=(const SelectionDAG&); // Do not implement. + SelectionDAG(const SelectionDAG&); // Do not implement. + public: SelectionDAG(TargetLowering &tli, FunctionLoweringInfo &fli); ~SelectionDAG(); @@ -199,6 +242,13 @@ return Root = N; } + /// NewInst - Tell the ordering object that we're processing a new + /// instruction. + void NewInst() { + if (Ordering) + Ordering->newInst(); + } + /// Combine - This iterates over the nodes in the SelectionDAG, folding /// certain types of nodes together, or eliminating superfluous nodes. The /// Level argument controls whether Combine is allowed to produce nodes and Modified: llvm/trunk/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp?rev=91560&r1=91559&r2=91560&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp (original) +++ llvm/trunk/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp Wed Dec 16 14:10:05 2009 @@ -20,10 +20,16 @@ #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtarget.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; +cl::opt +DisableInstScheduling("disable-inst-scheduling", + cl::init(false), + cl::desc("Disable instruction scheduling")); + ScheduleDAGSDNodes::ScheduleDAGSDNodes(MachineFunction &mf) : ScheduleDAG(mf) { } Modified: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp?rev=91560&r1=91559&r2=91560&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp (original) +++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp Wed Dec 16 14:10:05 2009 @@ -48,6 +48,8 @@ #include using namespace llvm; +extern cl::opt DisableInstScheduling; + /// makeVTList - Return an instance of the SDVTList struct initialized with the /// specified members. static SDVTList makeVTList(const EVT *VTs, unsigned NumVTs) { @@ -552,6 +554,9 @@ } DeallocateNode(N); + + // Remove the ordering of this node. + if (Ordering) Ordering->remove(N); } } @@ -577,6 +582,9 @@ N->DropOperands(); DeallocateNode(N); + + // Remove the ordering of this node. + if (Ordering) Ordering->remove(N); } void SelectionDAG::DeallocateNode(SDNode *N) { @@ -588,6 +596,9 @@ N->NodeType = ISD::DELETED_NODE; NodeAllocator.Deallocate(AllNodes.remove(N)); + + // Remove the ordering of this node. + if (Ordering) Ordering->remove(N); } /// RemoveNodeFromCSEMaps - Take the specified node out of the CSE map that @@ -691,7 +702,9 @@ FoldingSetNodeID ID; AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops, 1); AddNodeIDCustom(ID, N); - return CSEMap.FindNodeOrInsertPos(ID, InsertPos); + SDNode *Node = CSEMap.FindNodeOrInsertPos(ID, InsertPos); + if (Ordering) Ordering->remove(Node); + return Node; } /// FindModifiedNodeSlot - Find a slot for the specified node if its operands @@ -708,7 +721,9 @@ FoldingSetNodeID ID; AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops, 2); AddNodeIDCustom(ID, N); - return CSEMap.FindNodeOrInsertPos(ID, InsertPos); + SDNode *Node = CSEMap.FindNodeOrInsertPos(ID, InsertPos); + if (Ordering) Ordering->remove(Node); + return Node; } @@ -725,7 +740,9 @@ FoldingSetNodeID ID; AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops, NumOps); AddNodeIDCustom(ID, N); - return CSEMap.FindNodeOrInsertPos(ID, InsertPos); + SDNode *Node = CSEMap.FindNodeOrInsertPos(ID, InsertPos); + if (Ordering) Ordering->remove(Node); + return Node; } /// VerifyNode - Sanity check the given node. Aborts if it is invalid. @@ -778,8 +795,13 @@ SelectionDAG::SelectionDAG(TargetLowering &tli, FunctionLoweringInfo &fli) : TLI(tli), FLI(fli), DW(0), EntryNode(ISD::EntryToken, DebugLoc::getUnknownLoc(), - getVTList(MVT::Other)), Root(getEntryNode()) { + getVTList(MVT::Other)), + Root(getEntryNode()), Ordering(0) { AllNodes.push_back(&EntryNode); + if (DisableInstScheduling) { + Ordering = new NodeOrdering(); + Ordering->add(&EntryNode); + } } void SelectionDAG::init(MachineFunction &mf, MachineModuleInfo *mmi, @@ -792,6 +814,7 @@ SelectionDAG::~SelectionDAG() { allnodes_clear(); + delete Ordering; } void SelectionDAG::allnodes_clear() { @@ -817,6 +840,10 @@ EntryNode.UseList = 0; AllNodes.push_back(&EntryNode); Root = getEntryNode(); + if (DisableInstScheduling) { + Ordering = new NodeOrdering(); + Ordering->add(&EntryNode); + } } SDValue SelectionDAG::getSExtOrTrunc(SDValue Op, DebugLoc DL, EVT VT) { @@ -877,14 +904,17 @@ ID.AddPointer(&Val); void *IP = 0; SDNode *N = NULL; - if ((N = CSEMap.FindNodeOrInsertPos(ID, IP))) + if ((N = CSEMap.FindNodeOrInsertPos(ID, IP))) { + if (Ordering) Ordering->add(N); if (!VT.isVector()) return SDValue(N, 0); + } if (!N) { N = NodeAllocator.Allocate(); new (N) ConstantSDNode(isT, &Val, EltVT); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); + if (Ordering) Ordering->add(N); } SDValue Result(N, 0); @@ -921,14 +951,17 @@ ID.AddPointer(&V); void *IP = 0; SDNode *N = NULL; - if ((N = CSEMap.FindNodeOrInsertPos(ID, IP))) + if ((N = CSEMap.FindNodeOrInsertPos(ID, IP))) { + if (Ordering) Ordering->add(N); if (!VT.isVector()) return SDValue(N, 0); + } if (!N) { N = NodeAllocator.Allocate(); new (N) ConstantFPSDNode(isTarget, &V, EltVT); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); + if (Ordering) Ordering->add(N); } SDValue Result(N, 0); @@ -983,12 +1016,15 @@ ID.AddInteger(Offset); ID.AddInteger(TargetFlags); void *IP = 0; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { + if (Ordering) Ordering->add(E); return SDValue(E, 0); + } SDNode *N = NodeAllocator.Allocate(); new (N) GlobalAddressSDNode(Opc, GV, VT, Offset, TargetFlags); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); + if (Ordering) Ordering->add(N); return SDValue(N, 0); } @@ -998,12 +1034,15 @@ AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0); ID.AddInteger(FI); void *IP = 0; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { + if (Ordering) Ordering->add(E); return SDValue(E, 0); + } SDNode *N = NodeAllocator.Allocate(); new (N) FrameIndexSDNode(FI, VT, isTarget); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); + if (Ordering) Ordering->add(N); return SDValue(N, 0); } @@ -1017,12 +1056,15 @@ ID.AddInteger(JTI); ID.AddInteger(TargetFlags); void *IP = 0; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { + if (Ordering) Ordering->add(E); return SDValue(E, 0); + } SDNode *N = NodeAllocator.Allocate(); new (N) JumpTableSDNode(JTI, VT, isTarget, TargetFlags); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); + if (Ordering) Ordering->add(N); return SDValue(N, 0); } @@ -1042,12 +1084,15 @@ ID.AddPointer(C); ID.AddInteger(TargetFlags); void *IP = 0; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { + if (Ordering) Ordering->add(E); return SDValue(E, 0); + } SDNode *N = NodeAllocator.Allocate(); new (N) ConstantPoolSDNode(isTarget, C, VT, Offset, Alignment, TargetFlags); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); + if (Ordering) Ordering->add(N); return SDValue(N, 0); } @@ -1068,12 +1113,15 @@ C->AddSelectionDAGCSEId(ID); ID.AddInteger(TargetFlags); void *IP = 0; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { + if (Ordering) Ordering->add(E); return SDValue(E, 0); + } SDNode *N = NodeAllocator.Allocate(); new (N) ConstantPoolSDNode(isTarget, C, VT, Offset, Alignment, TargetFlags); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); + if (Ordering) Ordering->add(N); return SDValue(N, 0); } @@ -1082,12 +1130,15 @@ AddNodeIDNode(ID, ISD::BasicBlock, getVTList(MVT::Other), 0, 0); ID.AddPointer(MBB); void *IP = 0; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { + if (Ordering) Ordering->add(E); return SDValue(E, 0); + } SDNode *N = NodeAllocator.Allocate(); new (N) BasicBlockSDNode(MBB); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); + if (Ordering) Ordering->add(N); return SDValue(N, 0); } @@ -1103,6 +1154,7 @@ N = NodeAllocator.Allocate(); new (N) VTSDNode(VT); AllNodes.push_back(N); + if (Ordering) Ordering->add(N); return SDValue(N, 0); } @@ -1112,6 +1164,7 @@ N = NodeAllocator.Allocate(); new (N) ExternalSymbolSDNode(false, Sym, 0, VT); AllNodes.push_back(N); + if (Ordering) Ordering->add(N); return SDValue(N, 0); } @@ -1124,6 +1177,7 @@ N = NodeAllocator.Allocate(); new (N) ExternalSymbolSDNode(true, Sym, TargetFlags, VT); AllNodes.push_back(N); + if (Ordering) Ordering->add(N); return SDValue(N, 0); } @@ -1136,6 +1190,7 @@ new (N) CondCodeSDNode(Cond); CondCodeNodes[Cond] = N; AllNodes.push_back(N); + if (Ordering) Ordering->add(N); } return SDValue(CondCodeNodes[Cond], 0); } @@ -1228,8 +1283,10 @@ ID.AddInteger(MaskVec[i]); void* IP = 0; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { + if (Ordering) Ordering->add(E); return SDValue(E, 0); + } // Allocate the mask array for the node out of the BumpPtrAllocator, since // SDNode doesn't have access to it. This memory will be "leaked" when @@ -1241,6 +1298,7 @@ new (N) ShuffleVectorSDNode(VT, dl, N1, N2, MaskAlloc); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); + if (Ordering) Ordering->add(N); return SDValue(N, 0); } @@ -1258,12 +1316,15 @@ SDValue Ops[] = { Val, DTy, STy, Rnd, Sat }; AddNodeIDNode(ID, ISD::CONVERT_RNDSAT, getVTList(VT), &Ops[0], 5); void* IP = 0; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { + if (Ordering) Ordering->add(E); return SDValue(E, 0); + } CvtRndSatSDNode *N = NodeAllocator.Allocate(); new (N) CvtRndSatSDNode(VT, dl, Ops, 5, Code); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); + if (Ordering) Ordering->add(N); return SDValue(N, 0); } @@ -1272,12 +1333,15 @@ AddNodeIDNode(ID, ISD::Register, getVTList(VT), 0, 0); ID.AddInteger(RegNo); void *IP = 0; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { + if (Ordering) Ordering->add(E); return SDValue(E, 0); + } SDNode *N = NodeAllocator.Allocate(); new (N) RegisterSDNode(RegNo, VT); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); + if (Ordering) Ordering->add(N); return SDValue(N, 0); } @@ -1289,12 +1353,15 @@ AddNodeIDNode(ID, Opcode, getVTList(MVT::Other), &Ops[0], 1); ID.AddInteger(LabelID); void *IP = 0; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { + if (Ordering) Ordering->add(E); return SDValue(E, 0); + } SDNode *N = NodeAllocator.Allocate(); new (N) LabelSDNode(Opcode, dl, Root, LabelID); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); + if (Ordering) Ordering->add(N); return SDValue(N, 0); } @@ -1308,12 +1375,15 @@ ID.AddPointer(BA); ID.AddInteger(TargetFlags); void *IP = 0; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { + if (Ordering) Ordering->add(E); return SDValue(E, 0); + } SDNode *N = NodeAllocator.Allocate(); new (N) BlockAddressSDNode(Opc, VT, BA, TargetFlags); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); + if (Ordering) Ordering->add(N); return SDValue(N, 0); } @@ -1326,13 +1396,16 @@ ID.AddPointer(V); void *IP = 0; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { + if (Ordering) Ordering->add(E); return SDValue(E, 0); + } SDNode *N = NodeAllocator.Allocate(); new (N) SrcValueSDNode(V); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); + if (Ordering) Ordering->add(N); return SDValue(N, 0); } @@ -2243,13 +2316,16 @@ FoldingSetNodeID ID; AddNodeIDNode(ID, Opcode, getVTList(VT), 0, 0); void *IP = 0; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { + if (Ordering) Ordering->add(E); return SDValue(E, 0); + } SDNode *N = NodeAllocator.Allocate(); new (N) SDNode(Opcode, DL, getVTList(VT)); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); + if (Ordering) Ordering->add(N); #ifndef NDEBUG VerifyNode(N); #endif @@ -2473,8 +2549,10 @@ SDValue Ops[1] = { Operand }; AddNodeIDNode(ID, Opcode, VTs, Ops, 1); void *IP = 0; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { + if (Ordering) Ordering->add(E); return SDValue(E, 0); + } N = NodeAllocator.Allocate(); new (N) UnarySDNode(Opcode, DL, VTs, Operand); CSEMap.InsertNode(N, IP); @@ -2484,6 +2562,7 @@ } AllNodes.push_back(N); + if (Ordering) Ordering->add(N); #ifndef NDEBUG VerifyNode(N); #endif @@ -2891,8 +2970,10 @@ FoldingSetNodeID ID; AddNodeIDNode(ID, Opcode, VTs, Ops, 2); void *IP = 0; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { + if (Ordering) Ordering->add(E); return SDValue(E, 0); + } N = NodeAllocator.Allocate(); new (N) BinarySDNode(Opcode, DL, VTs, N1, N2); CSEMap.InsertNode(N, IP); @@ -2902,6 +2983,7 @@ } AllNodes.push_back(N); + if (Ordering) Ordering->add(N); #ifndef NDEBUG VerifyNode(N); #endif @@ -2968,8 +3050,10 @@ FoldingSetNodeID ID; AddNodeIDNode(ID, Opcode, VTs, Ops, 3); void *IP = 0; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { + if (Ordering) Ordering->add(E); return SDValue(E, 0); + } N = NodeAllocator.Allocate(); new (N) TernarySDNode(Opcode, DL, VTs, N1, N2, N3); CSEMap.InsertNode(N, IP); @@ -2977,7 +3061,9 @@ N = NodeAllocator.Allocate(); new (N) TernarySDNode(Opcode, DL, VTs, N1, N2, N3); } + AllNodes.push_back(N); + if (Ordering) Ordering->add(N); #ifndef NDEBUG VerifyNode(N); #endif @@ -3573,12 +3659,14 @@ void* IP = 0; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { cast(E)->refineAlignment(MMO); + if (Ordering) Ordering->add(E); return SDValue(E, 0); } SDNode* N = NodeAllocator.Allocate(); new (N) AtomicSDNode(Opcode, dl, VTs, MemVT, Chain, Ptr, Cmp, Swp, MMO); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); + if (Ordering) Ordering->add(N); return SDValue(N, 0); } @@ -3636,12 +3724,14 @@ void* IP = 0; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { cast(E)->refineAlignment(MMO); + if (Ordering) Ordering->add(E); return SDValue(E, 0); } SDNode* N = NodeAllocator.Allocate(); new (N) AtomicSDNode(Opcode, dl, VTs, MemVT, Chain, Ptr, Val, MMO); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); + if (Ordering) Ordering->add(N); return SDValue(N, 0); } @@ -3714,6 +3804,7 @@ void *IP = 0; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { cast(E)->refineAlignment(MMO); + if (Ordering) Ordering->add(E); return SDValue(E, 0); } @@ -3725,6 +3816,7 @@ new (N) MemIntrinsicSDNode(Opcode, dl, VTList, Ops, NumOps, MemVT, MMO); } AllNodes.push_back(N); + if (Ordering) Ordering->add(N); return SDValue(N, 0); } @@ -3789,12 +3881,14 @@ void *IP = 0; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { cast(E)->refineAlignment(MMO); + if (Ordering) Ordering->add(E); return SDValue(E, 0); } SDNode *N = NodeAllocator.Allocate(); new (N) LoadSDNode(Ops, dl, VTs, AM, ExtType, MemVT, MMO); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); + if (Ordering) Ordering->add(N); return SDValue(N, 0); } @@ -3865,12 +3959,14 @@ void *IP = 0; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { cast(E)->refineAlignment(MMO); + if (Ordering) Ordering->add(E); return SDValue(E, 0); } SDNode *N = NodeAllocator.Allocate(); new (N) StoreSDNode(Ops, dl, VTs, ISD::UNINDEXED, false, VT, MMO); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); + if (Ordering) Ordering->add(N); return SDValue(N, 0); } @@ -3925,12 +4021,14 @@ void *IP = 0; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { cast(E)->refineAlignment(MMO); + if (Ordering) Ordering->add(E); return SDValue(E, 0); } SDNode *N = NodeAllocator.Allocate(); new (N) StoreSDNode(Ops, dl, VTs, ISD::UNINDEXED, true, SVT, MMO); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); + if (Ordering) Ordering->add(N); return SDValue(N, 0); } @@ -3947,14 +4045,17 @@ ID.AddInteger(ST->getMemoryVT().getRawBits()); ID.AddInteger(ST->getRawSubclassData()); void *IP = 0; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { + if (Ordering) Ordering->add(E); return SDValue(E, 0); + } SDNode *N = NodeAllocator.Allocate(); new (N) StoreSDNode(Ops, dl, VTs, AM, ST->isTruncatingStore(), ST->getMemoryVT(), ST->getMemOperand()); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); + if (Ordering) Ordering->add(N); return SDValue(N, 0); } @@ -4020,8 +4121,10 @@ AddNodeIDNode(ID, Opcode, VTs, Ops, NumOps); void *IP = 0; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { + if (Ordering) Ordering->add(E); return SDValue(E, 0); + } N = NodeAllocator.Allocate(); new (N) SDNode(Opcode, DL, VTs, Ops, NumOps); @@ -4032,6 +4135,7 @@ } AllNodes.push_back(N); + if (Ordering) Ordering->add(N); #ifndef NDEBUG VerifyNode(N); #endif @@ -4087,8 +4191,10 @@ FoldingSetNodeID ID; AddNodeIDNode(ID, Opcode, VTList, Ops, NumOps); void *IP = 0; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { + if (Ordering) Ordering->add(E); return SDValue(E, 0); + } if (NumOps == 1) { N = NodeAllocator.Allocate(); new (N) UnarySDNode(Opcode, DL, VTList, Ops[0]); @@ -4119,6 +4225,7 @@ } } AllNodes.push_back(N); + if (Ordering) Ordering->add(N); #ifndef NDEBUG VerifyNode(N); #endif @@ -4581,8 +4688,10 @@ if (VTs.VTs[VTs.NumVTs-1] != MVT::Flag) { FoldingSetNodeID ID; AddNodeIDNode(ID, Opc, VTs, Ops, NumOps); - if (SDNode *ON = CSEMap.FindNodeOrInsertPos(ID, IP)) + if (SDNode *ON = CSEMap.FindNodeOrInsertPos(ID, IP)) { + if (Ordering) Ordering->add(ON); return ON; + } } if (!RemoveNodeFromCSEMaps(N)) @@ -4646,6 +4755,7 @@ if (IP) CSEMap.InsertNode(N, IP); // Memoize the new node. + if (Ordering) Ordering->add(N); return N; } @@ -4784,8 +4894,10 @@ FoldingSetNodeID ID; AddNodeIDNode(ID, ~Opcode, VTs, Ops, NumOps); IP = 0; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { + if (Ordering) Ordering->add(E); return cast(E); + } } // Allocate a new MachineSDNode. @@ -4807,6 +4919,7 @@ CSEMap.InsertNode(N, IP); AllNodes.push_back(N); + if (Ordering) Ordering->add(N); #ifndef NDEBUG VerifyNode(N); #endif @@ -4843,8 +4956,10 @@ FoldingSetNodeID ID; AddNodeIDNode(ID, Opcode, VTList, Ops, NumOps); void *IP = 0; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { + if (Ordering) Ordering->add(E); return E; + } } return NULL; } @@ -6011,6 +6126,9 @@ errs() << "\n\n"; } +void SelectionDAG::NodeOrdering::dump() const { +} + void SDNode::printr(raw_ostream &OS, const SelectionDAG *G) const { print_types(OS, G); print_details(OS, G); @@ -6151,4 +6269,3 @@ return false; return true; } - Modified: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp?rev=91560&r1=91559&r2=91560&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp (original) +++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp Wed Dec 16 14:10:05 2009 @@ -583,6 +583,9 @@ } void SelectionDAGBuilder::visit(unsigned Opcode, User &I) { + // Tell the DAG that we're processing a new instruction. + DAG.NewInst(); + // Note: this doesn't use InstVisitor, because it has to work with // ConstantExpr's in addition to instructions. switch (Opcode) { From rjmccall at apple.com Wed Dec 16 14:31:50 2009 From: rjmccall at apple.com (John McCall) Date: Wed, 16 Dec 2009 20:31:50 -0000 Subject: [llvm-commits] [llvm] r91564 - /llvm/trunk/lib/Target/ARM/Thumb1InstrInfo.cpp Message-ID: <200912162031.nBGKVoxD030308@zion.cs.uiuc.edu> Author: rjmccall Date: Wed Dec 16 14:31:50 2009 New Revision: 91564 URL: http://llvm.org/viewvc/llvm-project?rev=91564&view=rev Log: Silence a clang warning about the deprecated (but perfectly reasonable in context) increment-of-bool idiom. Modified: llvm/trunk/lib/Target/ARM/Thumb1InstrInfo.cpp Modified: llvm/trunk/lib/Target/ARM/Thumb1InstrInfo.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/Thumb1InstrInfo.cpp?rev=91564&r1=91563&r2=91564&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/Thumb1InstrInfo.cpp (original) +++ llvm/trunk/lib/Target/ARM/Thumb1InstrInfo.cpp Wed Dec 16 14:31:50 2009 @@ -180,7 +180,7 @@ AddDefaultPred(MIB); MIB.addReg(0); // No write back. - bool NumRegs = 0; + bool NumRegs = false; for (unsigned i = CSI.size(); i != 0; --i) { unsigned Reg = CSI[i-1].getReg(); if (Reg == ARM::LR) { @@ -192,7 +192,7 @@ MI = MBB.erase(MI); } MIB.addReg(Reg, getDefRegState(true)); - ++NumRegs; + NumRegs = true; } // It's illegal to emit pop instruction without operands. From mrs at apple.com Wed Dec 16 17:00:47 2009 From: mrs at apple.com (Mike Stump) Date: Wed, 16 Dec 2009 15:00:47 -0800 Subject: [llvm-commits] [llvm] r91459 - in /llvm/trunk: lib/Transforms/Scalar/ScalarReplAggregates.cpp test/Transforms/ScalarRepl/2009-12-11-NeonTypes.ll In-Reply-To: <6a8523d60912161203r499a267atab9b42cf13ffba6c@mail.gmail.com> References: <200912152200.nBFM0qqN023245@zion.cs.uiuc.edu> <6a8523d60912160258p2d1b3458qcf76cabb26a1a5d0@mail.gmail.com> <0804613A-3474-48FE-99E4-A06AC16D629F@apple.com> <6a8523d60912161203r499a267atab9b42cf13ffba6c@mail.gmail.com> Message-ID: On Dec 16, 2009, at 12:03 PM, Daniel Dunbar wrote: > I want a close-the-tree button. :-) From eli.friedman at gmail.com Wed Dec 16 17:27:31 2009 From: eli.friedman at gmail.com (Eli Friedman) Date: Wed, 16 Dec 2009 15:27:31 -0800 Subject: [llvm-commits] [llvm] r91552 - /llvm/trunk/utils/unittest/googletest/gtest.cc In-Reply-To: <200912161936.nBGJaguJ028112@zion.cs.uiuc.edu> References: <200912161936.nBGJaguJ028112@zion.cs.uiuc.edu> Message-ID: On Wed, Dec 16, 2009 at 11:36 AM, Bill Wendling wrote: > Author: void > Date: Wed Dec 16 13:36:42 2009 > New Revision: 91552 > > URL: http://llvm.org/viewvc/llvm-project?rev=91552&view=rev > Log: > Remove superfluous 'extern' variable that was causing a warning with clang. > > Modified: > ? ?llvm/trunk/utils/unittest/googletest/gtest.cc Do we care whether fixes are sent upstream for googletest? -Eli From dalej at apple.com Wed Dec 16 17:32:48 2009 From: dalej at apple.com (Dale Johannesen) Date: Wed, 16 Dec 2009 15:32:48 -0800 Subject: [llvm-commits] [patch] fptr support for PPC64 In-Reply-To: References: <200912151255.29701.ken@linux.vnet.ibm.com> <310DA270-7D55-4D87-943B-2E1ED4EFE258@apple.com> <200912162045.15956.ken@linux.vnet.ibm.com> Message-ID: On Dec 16, 2009, at 11:51 AMPST, Anton Korobeynikov wrote: > Hello, Ken > >> The upated version of the patch obtains isPPC64 from PPCSubTarget >> and leaves >> the PPCISD::NOP without SDNPHasChain. I don't have commit privileges. >> Thanks for reviewing the patch. > I asked Tilmann (who did ppc/linux support stuff during his GSoC) to > review these patches I saw, I won't commit in front of him. From johnny.chen at apple.com Wed Dec 16 17:36:52 2009 From: johnny.chen at apple.com (Johnny Chen) Date: Wed, 16 Dec 2009 23:36:52 -0000 Subject: [llvm-commits] [llvm] r91571 - /llvm/trunk/lib/Target/ARM/ARMInstrThumb.td Message-ID: <200912162336.nBGNaqrn004017@zion.cs.uiuc.edu> Author: johnny Date: Wed Dec 16 17:36:52 2009 New Revision: 91571 URL: http://llvm.org/viewvc/llvm-project?rev=91571&view=rev Log: Renamed "tCMNZ" to "tCMNz" to be consistent with other similar namings. Modified: llvm/trunk/lib/Target/ARM/ARMInstrThumb.td Modified: llvm/trunk/lib/Target/ARM/ARMInstrThumb.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrThumb.td?rev=91571&r1=91570&r2=91571&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMInstrThumb.td (original) +++ llvm/trunk/lib/Target/ARM/ARMInstrThumb.td Wed Dec 16 17:36:52 2009 @@ -515,7 +515,7 @@ "cmn", "\t$lhs, $rhs", [(ARMcmp tGPR:$lhs, (ineg tGPR:$rhs))]>, T1DataProcessing<0b1011>; -def tCMNZ : T1pI<(outs), (ins tGPR:$lhs, tGPR:$rhs), IIC_iCMPr, +def tCMNz : T1pI<(outs), (ins tGPR:$lhs, tGPR:$rhs), IIC_iCMPr, "cmn", "\t$lhs, $rhs", [(ARMcmpZ tGPR:$lhs, (ineg tGPR:$rhs))]>, T1DataProcessing<0b1011>; From collinwinter at google.com Wed Dec 16 17:48:03 2009 From: collinwinter at google.com (collinwinter at google.com) Date: Wed, 16 Dec 2009 23:48:03 +0000 Subject: [llvm-commits] [PATCH] Fix a memory leak in OpaqueType (issue180073) Message-ID: <00163630ead9316d10047ae12316@google.com> Reviewers: , Message: Please take a look. Description: This fixes a memory leak found by Google's internal heapchecker. LLVM/Clang tests pass. Please review this at http://codereview.appspot.com/180073 Affected files: M include/llvm/DerivedTypes.h M lib/VMCore/LLVMContextImpl.h M lib/VMCore/Type.cpp A unittests/VMCore/DerivedTypesTest.cpp From isanbard at gmail.com Wed Dec 16 17:59:59 2009 From: isanbard at gmail.com (Bill Wendling) Date: Wed, 16 Dec 2009 15:59:59 -0800 Subject: [llvm-commits] [llvm] r91552 - /llvm/trunk/utils/unittest/googletest/gtest.cc In-Reply-To: References: <200912161936.nBGJaguJ028112@zion.cs.uiuc.edu> Message-ID: <1D581B9F-9A4A-42F5-874B-8E9C7ABBDAA0@gmail.com> On Dec 16, 2009, at 3:27 PM, Eli Friedman wrote: > On Wed, Dec 16, 2009 at 11:36 AM, Bill Wendling wrote: >> Author: void >> Date: Wed Dec 16 13:36:42 2009 >> New Revision: 91552 >> >> URL: http://llvm.org/viewvc/llvm-project?rev=91552&view=rev >> Log: >> Remove superfluous 'extern' variable that was causing a warning with clang. >> >> Modified: >> llvm/trunk/utils/unittest/googletest/gtest.cc > > Do we care whether fixes are sent upstream for googletest? > If they don't mind taking the patch, then sure. -bw From clattner at apple.com Wed Dec 16 18:16:14 2009 From: clattner at apple.com (Chris Lattner) Date: Wed, 16 Dec 2009 16:16:14 -0800 Subject: [llvm-commits] [llvm] r91549 - in /llvm/trunk/lib/CodeGen: LiveIntervalAnalysis.cpp PHIElimination.cpp PHIElimination.h In-Reply-To: <200912161855.nBGItsR5026189@zion.cs.uiuc.edu> References: <200912161855.nBGItsR5026189@zion.cs.uiuc.edu> Message-ID: On Dec 16, 2009, at 10:55 AM, Jakob Stoklund Olesen wrote: > Author: stoklund > Date: Wed Dec 16 12:55:53 2009 > New Revision: 91549 > > URL: http://llvm.org/viewvc/llvm-project?rev=91549&view=rev > Log: > Reuse lowered phi nodes. > > Tail duplication produces lots of identical phi nodes in different > basic > blocks. Teach PHIElimination to reuse the join registers when > lowering a phi > node that is identical to an already lowered node. This saves virtual > registers, and more importantly it avoids creating copies the the > coalescer > doesn't know how to eliminate. Hi Jakob, I don't have any objection to this patch, but would it alternatively (or also) make sense for the SSAUpdate engine in taildupe to reuse PHI nodes when they already exist, instead of always inserting new ones? Would that help with this issue? -Chris > > Teach LiveIntervalAnalysis about the phi joins with multiple uses. > > This patch significantly reduces code size produced by -pre-regalloc- > taildup. > > Modified: > llvm/trunk/lib/CodeGen/LiveIntervalAnalysis.cpp > llvm/trunk/lib/CodeGen/PHIElimination.cpp > llvm/trunk/lib/CodeGen/PHIElimination.h > > Modified: llvm/trunk/lib/CodeGen/LiveIntervalAnalysis.cpp > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/LiveIntervalAnalysis.cpp?rev=91549&r1=91548&r2=91549&view=diff > > = > = > = > = > = > = > = > = > ====================================================================== > --- llvm/trunk/lib/CodeGen/LiveIntervalAnalysis.cpp (original) > +++ llvm/trunk/lib/CodeGen/LiveIntervalAnalysis.cpp Wed Dec 16 > 12:55:53 2009 > @@ -415,19 +415,32 @@ > // first redefinition of the vreg that we have seen, go back > and change > // the live range in the PHI block to be a different value > number. > if (interval.containsOneValue()) { > - // Remove the old range that we now know has an incorrect > number. > + > VNInfo *VNI = interval.getValNumInfo(0); > - MachineInstr *Killer = vi.Kills[0]; > - SlotIndex Start = getMBBStartIdx(Killer->getParent()); > - SlotIndex End = getInstructionIndex(Killer).getDefIndex(); > - DEBUG({ > - errs() << " Removing [" << Start << "," << End << "] > from: "; > - interval.print(errs(), tri_); > - errs() << "\n"; > - }); > - interval.removeRange(Start, End); > - assert(interval.ranges.size() == 1 && > - "Newly discovered PHI interval has >1 ranges."); > + // Phi elimination may have reused the register for > multiple identical > + // phi nodes. There will be a kill per phi. Remove the old > ranges that > + // we now know have an incorrect number. > + for (unsigned ki=0, ke=vi.Kills.size(); ki != ke; ++ki) { > + MachineInstr *Killer = vi.Kills[ki]; > + SlotIndex Start = getMBBStartIdx(Killer->getParent()); > + SlotIndex End = getInstructionIndex(Killer).getDefIndex(); > + DEBUG({ > + errs() << "\n\t\trenaming [" << Start << "," << End > << "] in: "; > + interval.print(errs(), tri_); > + }); > + interval.removeRange(Start, End); > + > + // Replace the interval with one of a NEW value number. > Note that > + // this value number isn't actually defined by an > instruction, weird > + // huh? :) > + LiveRange LR(Start, End, > + interval.getNextValue(SlotIndex(Start, true), > + 0, false, > VNInfoAllocator)); > + LR.valno->setIsPHIDef(true); > + interval.addRange(LR); > + LR.valno->addKill(End); > + } > + > MachineBasicBlock *killMBB = getMBBFromIndex(VNI->def); > VNI->addKill(indexes_->getTerminatorGap(killMBB)); > VNI->setHasPHIKill(true); > @@ -435,20 +448,6 @@ > errs() << " RESULT: "; > interval.print(errs(), tri_); > }); > - > - // Replace the interval with one of a NEW value number. > Note that this > - // value number isn't actually defined by an instruction, > weird huh? :) > - LiveRange LR(Start, End, > - > interval.getNextValue(SlotIndex(getMBBStartIdx(Killer->getParent()), > true), > - 0, false, VNInfoAllocator)); > - LR.valno->setIsPHIDef(true); > - DEBUG(errs() << " replace range with " << LR); > - interval.addRange(LR); > - LR.valno->addKill(End); > - DEBUG({ > - errs() << " RESULT: "; > - interval.print(errs(), tri_); > - }); > } > > // In the case of PHI elimination, each variable definition is > only > > Modified: llvm/trunk/lib/CodeGen/PHIElimination.cpp > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/PHIElimination.cpp?rev=91549&r1=91548&r2=91549&view=diff > > = > = > = > = > = > = > = > = > ====================================================================== > --- llvm/trunk/lib/CodeGen/PHIElimination.cpp (original) > +++ llvm/trunk/lib/CodeGen/PHIElimination.cpp Wed Dec 16 12:55:53 2009 > @@ -35,6 +35,7 @@ > > STATISTIC(NumAtomic, "Number of atomic phis lowered"); > STATISTIC(NumSplits, "Number of critical edges split on demand"); > +STATISTIC(NumReused, "Number of reused lowered phis"); > > char PHIElimination::ID = 0; > static RegisterPass > @@ -78,6 +79,12 @@ > DefMI->eraseFromParent(); > } > > + // Clean up the lowered PHI instructions. > + for (LoweredPHIMap::iterator I = LoweredPHIs.begin(), E = > LoweredPHIs.end(); > + I != E; ++I) > + Fn.DeleteMachineInstr(I->first); > + LoweredPHIs.clear(); > + > ImpDefs.clear(); > VRegPHIUseCount.clear(); > return Changed; > @@ -168,6 +175,7 @@ > void llvm::PHIElimination::LowerAtomicPHINode( > MachineBasicBlock &MBB, > MachineBasicBlock::iterator > AfterPHIsIt) { > + ++NumAtomic; > // Unlink the PHI node from the basic block, but don't delete the > PHI yet. > MachineInstr *MPhi = MBB.remove(MBB.begin()); > > @@ -179,6 +187,7 @@ > MachineFunction &MF = *MBB.getParent(); > const TargetRegisterClass *RC = > MF.getRegInfo().getRegClass(DestReg); > unsigned IncomingReg = 0; > + bool reusedIncoming = false; // Is IncomingReg reused from an > earlier PHI? > > // Insert a register to register copy at the top of the current > block (but > // after any remaining phi nodes) which copies the new incoming > register > @@ -190,7 +199,18 @@ > BuildMI(MBB, AfterPHIsIt, MPhi->getDebugLoc(), > TII->get(TargetInstrInfo::IMPLICIT_DEF), DestReg); > else { > - IncomingReg = MF.getRegInfo().createVirtualRegister(RC); > + // Can we reuse an earlier PHI node? This only happens for > critical edges, > + // typically those created by tail duplication. > + unsigned &entry = LoweredPHIs[MPhi]; > + if (entry) { > + // An identical PHI node was already lowered. Reuse the > incoming register. > + IncomingReg = entry; > + reusedIncoming = true; > + ++NumReused; > + DEBUG(errs() << "Reusing %reg" << IncomingReg << " for " << > *MPhi); > + } else { > + entry = IncomingReg = > MF.getRegInfo().createVirtualRegister(RC); > + } > TII->copyRegToReg(MBB, AfterPHIsIt, DestReg, IncomingReg, RC, RC); > } > > @@ -204,8 +224,20 @@ > MachineInstr *PHICopy = prior(AfterPHIsIt); > > if (IncomingReg) { > + LiveVariables::VarInfo &VI = LV->getVarInfo(IncomingReg); > + > // Increment use count of the newly created virtual register. > - LV->getVarInfo(IncomingReg).NumUses++; > + VI.NumUses++; > + > + // When we are reusing the incoming register, it may already > have been > + // killed in this block. The old kill will also have been > inserted at > + // AfterPHIsIt, so it appears before the current PHICopy. > + if (reusedIncoming) > + if (MachineInstr *OldKill = VI.findKill(&MBB)) { > + DEBUG(errs() << "Remove old kill from " << *OldKill); > + LV->removeVirtualRegisterKilled(IncomingReg, OldKill); > + DEBUG(MBB.dump()); > + } > > // Add information to LiveVariables to know that the incoming > value is > // killed. Note that because the value is defined in several > places (once > @@ -228,7 +260,7 @@ > > // Adjust the VRegPHIUseCount map to account for the removal of > this PHI node. > for (unsigned i = 1; i != MPhi->getNumOperands(); i += 2) > - --VRegPHIUseCount[BBVRegPair(MPhi->getOperand(i + 1).getMBB(), > + --VRegPHIUseCount[BBVRegPair(MPhi->getOperand(i+1).getMBB()- > >getNumber(), > MPhi->getOperand(i).getReg())]; > > // Now loop over all of the incoming arguments, changing them to > copy into the > @@ -266,7 +298,8 @@ > FindCopyInsertPoint(opBlock, MBB, SrcReg); > > // Insert the copy. > - TII->copyRegToReg(opBlock, InsertPos, IncomingReg, SrcReg, RC, > RC); > + if (!reusedIncoming && IncomingReg) > + TII->copyRegToReg(opBlock, InsertPos, IncomingReg, SrcReg, > RC, RC); > > // Now update live variable information if we have it. > Otherwise we're done > if (!LV) continue; > @@ -283,7 +316,7 @@ > // point later. > > // Is it used by any PHI instructions in this block? > - bool ValueIsUsed = VRegPHIUseCount[BBVRegPair(&opBlock, > SrcReg)] != 0; > + bool ValueIsUsed = > VRegPHIUseCount[BBVRegPair(opBlock.getNumber(), SrcReg)]; > > // Okay, if we now know that the value is not live out of the > block, we can > // add a kill marker in this block saying that it kills the > incoming value! > @@ -293,11 +326,10 @@ > // terminator instruction at the end of the block may also use > the value. > // In this case, we should mark *it* as being the killing > block, not the > // copy. > - MachineBasicBlock::iterator KillInst = prior(InsertPos); > + MachineBasicBlock::iterator KillInst; > MachineBasicBlock::iterator Term = opBlock.getFirstTerminator(); > - if (Term != opBlock.end()) { > - if (Term->readsRegister(SrcReg)) > - KillInst = Term; > + if (Term != opBlock.end() && Term->readsRegister(SrcReg)) { > + KillInst = Term; > > // Check that no other terminators use values. > #ifndef NDEBUG > @@ -308,7 +340,17 @@ > "they are the first terminator in a block!"); > } > #endif > + } else if (reusedIncoming || !IncomingReg) { > + // We may have to rewind a bit if we didn't insert a copy > this time. > + KillInst = Term; > + while (KillInst != opBlock.begin()) > + if ((--KillInst)->readsRegister(SrcReg)) > + break; > + } else { > + // We just inserted this copy. > + KillInst = prior(InsertPos); > } > + assert(KillInst->readsRegister(SrcReg) && "Cannot find kill > instruction"); > > // Finally, mark it killed. > LV->addVirtualRegisterKilled(SrcReg, KillInst); > @@ -319,9 +361,9 @@ > } > } > > - // Really delete the PHI instruction now! > - MF.DeleteMachineInstr(MPhi); > - ++NumAtomic; > + // Really delete the PHI instruction now, if it is not in the > LoweredPHIs map. > + if (reusedIncoming || !IncomingReg) > + MF.DeleteMachineInstr(MPhi); > } > > /// analyzePHINodes - Gather information about the PHI nodes in > here. In > @@ -335,7 +377,7 @@ > for (MachineBasicBlock::const_iterator BBI = I->begin(), BBE = I- > >end(); > BBI != BBE && BBI->getOpcode() == TargetInstrInfo::PHI; + > +BBI) > for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2) > - ++VRegPHIUseCount[BBVRegPair(BBI->getOperand(i + 1).getMBB(), > + ++VRegPHIUseCount[BBVRegPair(BBI->getOperand(i+1).getMBB()- > >getNumber(), > BBI->getOperand(i).getReg())]; > } > > @@ -408,3 +450,34 @@ > > return NMBB; > } > + > +unsigned > +PHIElimination::PHINodeTraits::getHashValue(const MachineInstr *MI) { > + if (!MI || MI==getEmptyKey() || MI==getTombstoneKey()) > + return DenseMapInfo::getHashValue(MI); > + unsigned hash = 0; > + for (unsigned ni = 1, ne = MI->getNumOperands(); ni != ne; ni += 2) > + hash = hash*37 + DenseMapInfo:: > + getHashValue(BBVRegPair(MI->getOperand(ni+1).getMBB()- > >getNumber(), > + MI->getOperand(ni).getReg())); > + return hash; > +} > + > +bool PHIElimination::PHINodeTraits::isEqual(const MachineInstr *LHS, > + const MachineInstr > *RHS) { > + const MachineInstr *EmptyKey = getEmptyKey(); > + const MachineInstr *TombstoneKey = getTombstoneKey(); > + if (!LHS || !RHS || LHS==EmptyKey || RHS==EmptyKey || > + LHS==TombstoneKey || RHS==TombstoneKey) > + return LHS==RHS; > + > + unsigned ne = LHS->getNumOperands(); > + if (ne != RHS->getNumOperands()) > + return false; > + // Ignore operand 0, the defined register. > + for (unsigned ni = 1; ni != ne; ni += 2) > + if (LHS->getOperand(ni).getReg() != RHS- > >getOperand(ni).getReg() || > + LHS->getOperand(ni+1).getMBB() != RHS->getOperand(ni > +1).getMBB()) > + return false; > + return true; > +} > > Modified: llvm/trunk/lib/CodeGen/PHIElimination.h > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/PHIElimination.h?rev=91549&r1=91548&r2=91549&view=diff > > = > = > = > = > = > = > = > = > ====================================================================== > --- llvm/trunk/lib/CodeGen/PHIElimination.h (original) > +++ llvm/trunk/lib/CodeGen/PHIElimination.h Wed Dec 16 12:55:53 2009 > @@ -16,8 +16,6 @@ > #include "llvm/CodeGen/MachineFunctionPass.h" > #include "llvm/Target/TargetInstrInfo.h" > > -#include > - > namespace llvm { > > /// Lower PHI instructions to copies. > @@ -120,8 +118,8 @@ > return I; > } > > - typedef std::pair BBVRegPair; > - typedef std::map VRegPHIUse; > + typedef std::pair BBVRegPair; > + typedef DenseMap VRegPHIUse; > > VRegPHIUse VRegPHIUseCount; > PHIDefMap PHIDefs; > @@ -129,6 +127,17 @@ > > // Defs of PHI sources which are implicit_def. > SmallPtrSet ImpDefs; > + > + // Lowered PHI nodes may be reused. We provide special DenseMap > traits to > + // match PHI nodes with identical arguments. > + struct PHINodeTraits : public DenseMapInfo { > + static unsigned getHashValue(const MachineInstr *PtrVal); > + static bool isEqual(const MachineInstr *LHS, const > MachineInstr *RHS); > + }; > + > + // Map reusable lowered PHI node -> incoming join register. > + typedef DenseMap > LoweredPHIMap; > + LoweredPHIMap LoweredPHIs; > }; > > } > > > _______________________________________________ > llvm-commits mailing list > llvm-commits at cs.uiuc.edu > http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits From evan.cheng at apple.com Wed Dec 16 18:40:05 2009 From: evan.cheng at apple.com (Evan Cheng) Date: Thu, 17 Dec 2009 00:40:05 -0000 Subject: [llvm-commits] [llvm] r91574 - in /llvm/trunk: lib/CodeGen/SelectionDAG/DAGCombiner.cpp test/CodeGen/CellSPU/and_ops.ll Message-ID: <200912170040.nBH0e56o006674@zion.cs.uiuc.edu> Author: evancheng Date: Wed Dec 16 18:40:05 2009 New Revision: 91574 URL: http://llvm.org/viewvc/llvm-project?rev=91574&view=rev Log: Revert this dag combine change: Fold (zext (and x, cst)) -> (and (zext x), cst) DAG combiner likes to optimize expression in the other way so this would end up cause an infinite looping. Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp llvm/trunk/test/CodeGen/CellSPU/and_ops.ll Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=91574&r1=91573&r2=91574&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original) +++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Wed Dec 16 18:40:05 2009 @@ -3202,19 +3202,6 @@ X, DAG.getConstant(Mask, VT)); } - // Fold (zext (and x, cst)) -> (and (zext x), cst) - if (N0.getOpcode() == ISD::AND && - N0.getOperand(1).getOpcode() == ISD::Constant && - N0.getOperand(0).getOpcode() != ISD::TRUNCATE && - N0.getOperand(0).hasOneUse()) { - APInt Mask = cast(N0.getOperand(1))->getAPIntValue(); - Mask.zext(VT.getSizeInBits()); - return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, - DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, - N0.getOperand(0)), - DAG.getConstant(Mask, VT)); - } - // fold (zext (load x)) -> (zext (truncate (zextload x))) if (ISD::isNON_EXTLoad(N0.getNode()) && ((!LegalOperations && !cast(N0)->isVolatile()) || Modified: llvm/trunk/test/CodeGen/CellSPU/and_ops.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/CellSPU/and_ops.ll?rev=91574&r1=91573&r2=91574&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/CellSPU/and_ops.ll (original) +++ llvm/trunk/test/CodeGen/CellSPU/and_ops.ll Wed Dec 16 18:40:05 2009 @@ -1,9 +1,9 @@ ; RUN: llc < %s -march=cellspu > %t1.s -; RUN: grep and %t1.s | count 230 +; RUN: grep and %t1.s | count 234 ; RUN: grep andc %t1.s | count 85 -; RUN: grep andi %t1.s | count 39 -; RUN: grep andhi %t1.s | count 28 -; RUN: grep andbi %t1.s | count 2 +; RUN: grep andi %t1.s | count 37 +; RUN: grep andhi %t1.s | count 30 +; RUN: grep andbi %t1.s | count 4 target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" target triple = "spu" From stoklund at 2pi.dk Wed Dec 16 18:45:54 2009 From: stoklund at 2pi.dk (Jakob Stoklund Olesen) Date: Wed, 16 Dec 2009 16:45:54 -0800 Subject: [llvm-commits] [llvm] r91549 - in /llvm/trunk/lib/CodeGen: LiveIntervalAnalysis.cpp PHIElimination.cpp PHIElimination.h In-Reply-To: References: <200912161855.nBGItsR5026189@zion.cs.uiuc.edu> Message-ID: <4303AA18-FB58-4073-8ABF-E999349F9F27@2pi.dk> On Dec 16, 2009, at 4:16 PM, Chris Lattner wrote: > > On Dec 16, 2009, at 10:55 AM, Jakob Stoklund Olesen wrote: > >> Author: stoklund >> Date: Wed Dec 16 12:55:53 2009 >> New Revision: 91549 >> >> URL: http://llvm.org/viewvc/llvm-project?rev=91549&view=rev >> Log: >> Reuse lowered phi nodes. >> >> Tail duplication produces lots of identical phi nodes in different basic >> blocks. Teach PHIElimination to reuse the join registers when lowering a phi >> node that is identical to an already lowered node. This saves virtual >> registers, and more importantly it avoids creating copies the the coalescer >> doesn't know how to eliminate. > > Hi Jakob, > > I don't have any objection to this patch, but would it alternatively (or also) make sense for the SSAUpdate engine in taildupe to reuse PHI nodes when they already exist, instead of always inserting new ones? Would that help with this issue? I haven't seen that happen outside weird test cases. With this patch they will share a join register after lowering, so x = phi ... y = phi ... becomes x = mov j y = mov j This pattern can still cause a bit of coalescer confusion - it sees false interference between x and y, and cannot join them without joining one with j first. I think in most cases it will be fine. The patch handles a similar issue in the predecessor blocks. If there are multiple phi successors, each would have its own join register: j1 = mov r j2 = mov r brind ... The coalescer could not join j1 and j2 because of false interference. It had to join one with r first, and that was not always possible. With the patch, the successor phis share a join register: j = mov r brind ... If SSAUpdate produces lots of identical phis in the same block, it would probably be healthy to merge them, but if it is a freak occurence, I don't think it matters much. /jakob -------------- next part -------------- A non-text attachment was scrubbed... Name: smime.p7s Type: application/pkcs7-signature Size: 1929 bytes Desc: not available Url : http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20091216/2bb30931/attachment.bin From devang.patel at gmail.com Wed Dec 16 18:47:08 2009 From: devang.patel at gmail.com (Devang Patel) Date: Wed, 16 Dec 2009 16:47:08 -0800 Subject: [llvm-commits] [llvm] r91497 - in /llvm/trunk: include/llvm/Metadata.h lib/VMCore/Metadata.cpp In-Reply-To: <661BF611-0C94-42FA-AAF0-77F336AFA5B1@apple.com> References: <200912160252.nBG2q9FI001712@zion.cs.uiuc.edu> <4B288291.3090704@mxc.ca> <9528408C-04F4-48A1-B49F-1FB5B7169DD1@apple.com> <661BF611-0C94-42FA-AAF0-77F336AFA5B1@apple.com> Message-ID: <352a1fb20912161647h1f2f8d75l53af27dcda653ec1@mail.gmail.com> Victor, On Wed, Dec 16, 2009 at 11:12 AM, Victor Hernandez wrote: > Dan, > > Initially, function-localness is going to be used to verify that metadata that refers to function-local IR is only used in that function. ?I want to be able to detect inlining bugs where the metadata was incorrectly cloned. > > In the long term, I believe that function-localness will allow us to avoid iterating over the complete list of global metadata while determining if a cloned or modified instruction is used by any metadata. There is not any global list of all metadatas. There is no need to have one. It is same as constants, There is not any global list of all constants. - Devang > > Victor > > On Dec 16, 2009, at 10:05 AM, Dan Gohman wrote: > >> >> On Dec 16, 2009, at 12:13 AM, Victor Hernandez wrote: >> >>> Nick, >>> >>> Thanks for the review. >>> >>> On Dec 15, 2009, at 10:47 PM, Nick Lewycky wrote: >>> >>>> Victor Hernandez wrote: >>>>> Author: hernande >>>>> Date: Tue Dec 15 20:52:09 2009 >>>>> New Revision: 91497 >>>>> >>>>> URL: http://llvm.org/viewvc/llvm-project?rev=91497&view=rev >>>>> Log: >>>>> MDNodes that refer to an instruction are local to a function; in that case, explicitly keep track of the function they are local to >>>> >>>> So what's the semantic here? What if the MDNode refers to an Instruction that gets spliced from one Function to another? What happens if the MDNode is attached to an Instruction that's RAUW'd with a Constant? >>>> >>>> Is this new field copied by MetadataContextImpl::copyMD? >>> >>> The semantic is that if an MDNode is created function-local, then it will continue to be function-local even if its operands are modified to no longer refer to any function-specific IR. ?I need to add that documentation to Metadata.h. >> >> What is this concept of function localness going to be used for? >> >> Dan >> > > > _______________________________________________ > llvm-commits mailing list > llvm-commits at cs.uiuc.edu > http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits > -- - Devang From dalej at apple.com Wed Dec 16 19:22:22 2009 From: dalej at apple.com (Dale Johannesen) Date: Wed, 16 Dec 2009 17:22:22 -0800 Subject: [llvm-commits] [patch] fptr support for PPC64 In-Reply-To: <200912162045.15956.ken@linux.vnet.ibm.com> References: <200912151255.29701.ken@linux.vnet.ibm.com> <310DA270-7D55-4D87-943B-2E1ED4EFE258@apple.com> <200912162045.15956.ken@linux.vnet.ibm.com> Message-ID: On Dec 16, 2009, at 11:45 AMPST, Ken Werner wrote: > On Tuesday 15 December 2009 07:43:49 pm you wrote: >> On Dec 15, 2009, at 3:55 AMPST, Ken Werner wrote: >>> Hi, >>> The attached patch adds support for indirect calls (through function >>> pointer) >>> according to the ABI (http://refspecs.linuxfoundation.org/ELF/ppc64/PPC- >>> elf64abi-1.9.html#FUNC-CALLS). The patch was made against revision >>> 91275. >> I can't speak for SVR4 ppc64, but I'm confident these 3 patches won't >> break any other target. >> >> It's preferable to get isPPC64 from PPCSubTarget, as elsewhere >> (inconsistently). Here that seems to mean an extra parameter. >> Why did you add a Chain to PPCISD::NOP? >> Do you have write access? > The upated version of the patch obtains isPPC64 from PPCSubTarget Um, no, you changed one of the existing places where it was obtained otherwise (which is fine), but not the place you added it. > and leaves > the PPCISD::NOP without SDNPHasChain. I don't have commit privileges. > Thanks for reviewing the patch. > -ken -------------- next part -------------- A non-text attachment was scrubbed... Name: ppc64-fptr.patch Type: text/x-patch Size: 8123 bytes Desc: not available Url : http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20091216/0e28d3af/attachment.bin -------------- next part -------------- From evan.cheng at apple.com Wed Dec 16 19:25:12 2009 From: evan.cheng at apple.com (Evan Cheng) Date: Wed, 16 Dec 2009 17:25:12 -0800 Subject: [llvm-commits] [llvm] r91549 - in /llvm/trunk/lib/CodeGen: LiveIntervalAnalysis.cpp PHIElimination.cpp PHIElimination.h In-Reply-To: <4303AA18-FB58-4073-8ABF-E999349F9F27@2pi.dk> References: <200912161855.nBGItsR5026189@zion.cs.uiuc.edu> <4303AA18-FB58-4073-8ABF-E999349F9F27@2pi.dk> Message-ID: <2D306390-BFAB-4012-B064-EBDCA3011AAD@apple.com> Jakob, could you commit a test case? Evan On Dec 16, 2009, at 4:45 PM, Jakob Stoklund Olesen wrote: > > On Dec 16, 2009, at 4:16 PM, Chris Lattner wrote: > >> >> On Dec 16, 2009, at 10:55 AM, Jakob Stoklund Olesen wrote: >> >>> Author: stoklund >>> Date: Wed Dec 16 12:55:53 2009 >>> New Revision: 91549 >>> >>> URL: http://llvm.org/viewvc/llvm-project?rev=91549&view=rev >>> Log: >>> Reuse lowered phi nodes. >>> >>> Tail duplication produces lots of identical phi nodes in different basic >>> blocks. Teach PHIElimination to reuse the join registers when lowering a phi >>> node that is identical to an already lowered node. This saves virtual >>> registers, and more importantly it avoids creating copies the the coalescer >>> doesn't know how to eliminate. >> >> Hi Jakob, >> >> I don't have any objection to this patch, but would it alternatively (or also) make sense for the SSAUpdate engine in taildupe to reuse PHI nodes when they already exist, instead of always inserting new ones? Would that help with this issue? > > I haven't seen that happen outside weird test cases. With this patch they will share a join register after lowering, so > > x = phi ... > y = phi ... > > becomes > > x = mov j > y = mov j > > This pattern can still cause a bit of coalescer confusion - it sees false interference between x and y, and cannot join them without joining one with j first. I think in most cases it will be fine. > > The patch handles a similar issue in the predecessor blocks. If there are multiple phi successors, each would have its own join register: > > j1 = mov r > j2 = mov r > brind ... > > The coalescer could not join j1 and j2 because of false interference. It had to join one with r first, and that was not always possible. With the patch, the successor phis share a join register: > > j = mov r > brind ... > > If SSAUpdate produces lots of identical phis in the same block, it would probably be healthy to merge them, but if it is a freak occurence, I don't think it matters much. > > /jakob > > _______________________________________________ > llvm-commits mailing list > llvm-commits at cs.uiuc.edu > http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits From scallanan at apple.com Wed Dec 16 19:50:00 2009 From: scallanan at apple.com (Sean Callanan) Date: Thu, 17 Dec 2009 01:50:00 -0000 Subject: [llvm-commits] [llvm] r91579 - in /llvm/trunk/tools/llvm-mc: HexDisassembler.cpp HexDisassembler.h llvm-mc.cpp Message-ID: <200912170150.nBH1o09Q009342@zion.cs.uiuc.edu> Author: spyffe Date: Wed Dec 16 19:49:59 2009 New Revision: 91579 URL: http://llvm.org/viewvc/llvm-project?rev=91579&view=rev Log: Test harness for the LLVM disassembler. When invoked with -disassemble, llvm-mc now accepts lines of the form 0x00 0x00 and passes the resulting bytes to the disassembler for the chosen (or default) target, printing the result. Added: llvm/trunk/tools/llvm-mc/HexDisassembler.cpp llvm/trunk/tools/llvm-mc/HexDisassembler.h Modified: llvm/trunk/tools/llvm-mc/llvm-mc.cpp Added: llvm/trunk/tools/llvm-mc/HexDisassembler.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-mc/HexDisassembler.cpp?rev=91579&view=auto ============================================================================== --- llvm/trunk/tools/llvm-mc/HexDisassembler.cpp (added) +++ llvm/trunk/tools/llvm-mc/HexDisassembler.cpp Wed Dec 16 19:49:59 2009 @@ -0,0 +1,158 @@ +//===- HexDisassembler.cpp - Disassembler for hex strings -----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This class implements the disassembler of strings of bytes written in +// hexadecimal, from standard input or from a file. +// +//===----------------------------------------------------------------------===// + +#include "HexDisassembler.h" + +#include "llvm/ADT/OwningPtr.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCDisassembler.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstPrinter.h" +#include "llvm/Target/TargetRegistry.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/MemoryObject.h" +#include "llvm/Support/raw_ostream.h" + +#include + +using namespace llvm; + +class VectorMemoryObject : public MemoryObject { +private: + const std::vector &Bytes; +public: + VectorMemoryObject(const std::vector &bytes) : + Bytes(bytes) { + } + + uint64_t getBase() const { + return 0; + } + + uint64_t getExtent() const { + return Bytes.size(); + } + + int readByte(uint64_t addr, uint8_t *byte) const { + if (addr > getExtent()) + return -1; + else + *byte = Bytes[addr]; + + return 0; + } +}; + +void printInst(const llvm::MCDisassembler &disassembler, + llvm::MCInstPrinter &instPrinter, + const std::vector &bytes) { + // Wrap the vector in a MemoryObject. + + VectorMemoryObject memoryObject(bytes); + + // Disassemble it. + + MCInst inst; + uint64_t size; + + std::string verboseOStr; + llvm::raw_string_ostream verboseOS(verboseOStr); + + if (disassembler.getInstruction(inst, + size, + memoryObject, + 0, + verboseOS)) { + instPrinter.printInst(&inst); + outs() << "\n"; + } + else { + errs() << "error: invalid instruction" << "\n"; + errs() << "Diagnostic log:" << "\n"; + errs() << verboseOStr.c_str() << "\n"; + } +} + +int HexDisassembler::disassemble(const Target &target, + const std::string &tripleString, + MemoryBuffer &buffer) { + // Set up disassembler + + llvm::OwningPtr asmInfo + (target.createAsmInfo(tripleString)); + + if (!asmInfo) { + errs() << "error: no assembly info for target " << tripleString << "\n"; + return -1; + } + + llvm::OwningPtr disassembler + (target.createMCDisassembler()); + + if (!disassembler) { + errs() << "error: no disassembler for target " << tripleString << "\n"; + return -1; + } + + llvm::MCInstPrinter *instPrinter = target.createMCInstPrinter(0, + *asmInfo, + outs()); + + if (!instPrinter) { + errs() << "error: no instruction printer for target " << tripleString + << "\n"; + return -1; + } + + // Convert the input to a vector for disassembly. + + std::vector bytes; + + StringRef str = buffer.getBuffer(); + + while (!str.empty()) { + if(str.find_first_of("\n") < str.find_first_not_of(" \t\n\r")) { + printInst(*disassembler, *instPrinter, bytes); + + bytes.clear(); + } + + // Skip leading space. + str = str.substr(str.find_first_not_of(" \t\n\r")); + + // Get the current token. + size_t next = str.find_first_of(" \t\n\r"); + + if(next == (size_t)StringRef::npos) + break; + + StringRef value = str.slice(0, next); + + // Convert to a byte and add to the byte vector. + unsigned byte; + if (value.getAsInteger(0, byte) || byte > 255) { + errs() << "warning: invalid input token '" << value << "' of length " + << next << "\n"; + } + else { + bytes.push_back((unsigned char)byte); + } + str = str.substr(next); + } + + if (!bytes.empty()) + printInst(*disassembler, *instPrinter, bytes); + + return 0; +} Added: llvm/trunk/tools/llvm-mc/HexDisassembler.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-mc/HexDisassembler.h?rev=91579&view=auto ============================================================================== --- llvm/trunk/tools/llvm-mc/HexDisassembler.h (added) +++ llvm/trunk/tools/llvm-mc/HexDisassembler.h Wed Dec 16 19:49:59 2009 @@ -0,0 +1,34 @@ +//===- HexDisassembler.h - Disassembler for hex strings -------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This class implements the disassembler of strings of bytes written in +// hexadecimal, from standard input or from a file. +// +//===----------------------------------------------------------------------===// + +#ifndef HEXDISASSEMBLER_H +#define HEXDISASSEMBLER_H + +#include + +namespace llvm { + +class Target; +class MemoryBuffer; + +class HexDisassembler { +public: + static int disassemble(const Target &target, + const std::string &tripleString, + MemoryBuffer &buffer); +}; + +} // namespace llvm + +#endif Modified: llvm/trunk/tools/llvm-mc/llvm-mc.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-mc/llvm-mc.cpp?rev=91579&r1=91578&r2=91579&view=diff ============================================================================== --- llvm/trunk/tools/llvm-mc/llvm-mc.cpp (original) +++ llvm/trunk/tools/llvm-mc/llvm-mc.cpp Wed Dec 16 19:49:59 2009 @@ -32,6 +32,7 @@ #include "llvm/Target/TargetMachine.h" // FIXME. #include "llvm/Target/TargetSelect.h" #include "AsmParser.h" +#include "HexDisassembler.h" using namespace llvm; static cl::opt @@ -76,7 +77,8 @@ enum ActionType { AC_AsLex, - AC_Assemble + AC_Assemble, + AC_Disassemble }; static cl::opt @@ -86,6 +88,8 @@ "Lex tokens from a .s file"), clEnumValN(AC_Assemble, "assemble", "Assemble a .s file (default)"), + clEnumValN(AC_Disassemble, "disassemble", + "Disassemble strings of hex bytes"), clEnumValEnd)); static const Target *GetTarget(const char *ProgName) { @@ -281,7 +285,33 @@ delete Out; return Res; -} +} + +static int DisassembleInput(const char *ProgName) { + std::string Error; + const Target *TheTarget = TargetRegistry::lookupTarget(TripleName, Error); + if (TheTarget == 0) { + errs() << ProgName << ": error: unable to get target for '" << TripleName + << "', see --version and --triple.\n"; + return 0; + } + + std::string ErrorMessage; + + MemoryBuffer *Buffer = MemoryBuffer::getFileOrSTDIN(InputFilename, + &ErrorMessage); + + if (Buffer == 0) { + errs() << ProgName << ": "; + if (ErrorMessage.size()) + errs() << ErrorMessage << "\n"; + else + errs() << "input file didn't read correctly.\n"; + return 1; + } + + return HexDisassembler::disassemble(*TheTarget, TripleName, *Buffer); +} int main(int argc, char **argv) { @@ -296,6 +326,7 @@ llvm::InitializeAllTargets(); llvm::InitializeAllAsmPrinters(); llvm::InitializeAllAsmParsers(); + llvm::InitializeAllDisassemblers(); cl::ParseCommandLineOptions(argc, argv, "llvm machine code playground\n"); @@ -305,6 +336,8 @@ return AsLexInput(argv[0]); case AC_Assemble: return AssembleInput(argv[0]); + case AC_Disassemble: + return DisassembleInput(argv[0]); } return 0; From mrs at apple.com Wed Dec 16 21:22:49 2009 From: mrs at apple.com (Mike Stump) Date: Wed, 16 Dec 2009 19:22:49 -0800 Subject: [llvm-commits] [llvm] r91337 - /llvm/trunk/lib/CodeGen/AsmPrinter/DwarfException.cpp In-Reply-To: <200912142149.nBELnijF018208@zion.cs.uiuc.edu> References: <200912142149.nBELnijF018208@zion.cs.uiuc.edu> Message-ID: <06BEB5FC-FA16-4619-86F4-A15CCC1EDCE9@apple.com> On Dec 14, 2009, at 1:49 PM, Bill Wendling wrote: > Author: void > Date: Mon Dec 14 15:49:44 2009 > New Revision: 91337 > > URL: http://llvm.org/viewvc/llvm-project?rev=91337&view=rev > Log: > The CIE says that the LSDA point in the FDE section is an "sdata4". That's fine, > but we need it to actually be 4-bytes in the FDE. This causes: Tests that now fail, but worked before: g++.old-deja/g++.mike/eh10.C execution test g++.old-deja/g++.mike/eh2.C execution test g++.old-deja/g++.mike/eh23.C execution test g++.old-deja/g++.mike/eh25.C execution test g++.old-deja/g++.mike/eh3.C execution test g++.old-deja/g++.mike/eh33.C execution test g++.old-deja/g++.mike/eh39.C execution test g++.old-deja/g++.mike/eh40.C execution test g++.old-deja/g++.mike/eh41.C execution test g++.old-deja/g++.mike/eh42.C execution test g++.old-deja/g++.mike/eh44.C execution test g++.old-deja/g++.mike/eh49.C execution test g++.old-deja/g++.mike/eh5.C execution test g++.old-deja/g++.mike/eh50.C execution test g++.old-deja/g++.mike/eh51.C execution test g++.old-deja/g++.mike/eh6.C execution test g++.old-deja/g++.robertl/eh990323-3.C execution test from the g++ EH testsuite under clang. Can you revert this until this problem is tracked down and fixed? From bob.wilson at apple.com Wed Dec 16 23:05:37 2009 From: bob.wilson at apple.com (Bob Wilson) Date: Thu, 17 Dec 2009 05:05:37 -0000 Subject: [llvm-commits] [llvm] r91582 - /llvm/trunk/include/llvm/CodeGen/SelectionDAGNodes.h Message-ID: <200912170505.nBH55b2R018265@zion.cs.uiuc.edu> Author: bwilson Date: Wed Dec 16 23:05:36 2009 New Revision: 91582 URL: http://llvm.org/viewvc/llvm-project?rev=91582&view=rev Log: BIT_CONVERT nodes are used for vector types, too. Modified: llvm/trunk/include/llvm/CodeGen/SelectionDAGNodes.h Modified: llvm/trunk/include/llvm/CodeGen/SelectionDAGNodes.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/SelectionDAGNodes.h?rev=91582&r1=91581&r2=91582&view=diff ============================================================================== --- llvm/trunk/include/llvm/CodeGen/SelectionDAGNodes.h (original) +++ llvm/trunk/include/llvm/CodeGen/SelectionDAGNodes.h Wed Dec 16 23:05:36 2009 @@ -414,12 +414,13 @@ /// X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type. FP_EXTEND, - // BIT_CONVERT - Theis operator converts between integer and FP values, as - // if one was stored to memory as integer and the other was loaded from the - // same address (or equivalently for vector format conversions, etc). The - // source and result are required to have the same bit size (e.g. - // f32 <-> i32). This can also be used for int-to-int or fp-to-fp - // conversions, but that is a noop, deleted by getNode(). + // BIT_CONVERT - This operator converts between integer, vector and FP + // values, as if the value was stored to memory with one type and loaded + // from the same address with the other type (or equivalently for vector + // format conversions, etc). The source and result are required to have + // the same bit size (e.g. f32 <-> i32). This can also be used for + // int-to-int or fp-to-fp conversions, but that is a noop, deleted by + // getNode(). BIT_CONVERT, // CONVERT_RNDSAT - This operator is used to support various conversions From bob.wilson at apple.com Wed Dec 16 23:07:36 2009 From: bob.wilson at apple.com (Bob Wilson) Date: Thu, 17 Dec 2009 05:07:36 -0000 Subject: [llvm-commits] [llvm] r91584 - /llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp Message-ID: <200912170507.nBH57aFW018424@zion.cs.uiuc.edu> Author: bwilson Date: Wed Dec 16 23:07:36 2009 New Revision: 91584 URL: http://llvm.org/viewvc/llvm-project?rev=91584&view=rev Log: Fix a comment grammaro. Modified: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp Modified: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp?rev=91584&r1=91583&r2=91584&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp (original) +++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp Wed Dec 16 23:07:36 2009 @@ -4663,8 +4663,8 @@ /// GetRegistersForValue - Assign registers (virtual or physical) for the /// specified operand. We prefer to assign virtual registers, to allow the -/// register allocator handle the assignment process. However, if the asm uses -/// features that we can't model on machineinstrs, we have SDISel do the +/// register allocator to handle the assignment process. However, if the asm +/// uses features that we can't model on machineinstrs, we have SDISel do the /// allocation. This produces generally horrible, but correct, code. /// /// OpInfo describes the operand. From eli.friedman at gmail.com Thu Dec 17 00:07:05 2009 From: eli.friedman at gmail.com (Eli Friedman) Date: Thu, 17 Dec 2009 06:07:05 -0000 Subject: [llvm-commits] [llvm] r91587 - in /llvm/trunk: lib/VMCore/ConstantFold.cpp test/Transforms/InstCombine/constant-fold-compare.ll Message-ID: <200912170607.nBH6759V022644@zion.cs.uiuc.edu> Author: efriedma Date: Thu Dec 17 00:07:04 2009 New Revision: 91587 URL: http://llvm.org/viewvc/llvm-project?rev=91587&view=rev Log: Aggressively flip compare constant expressions where appropriate; constant folding in particular expects null to be on the RHS. Added: llvm/trunk/test/Transforms/InstCombine/constant-fold-compare.ll Modified: llvm/trunk/lib/VMCore/ConstantFold.cpp Modified: llvm/trunk/lib/VMCore/ConstantFold.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/VMCore/ConstantFold.cpp?rev=91587&r1=91586&r2=91587&view=diff ============================================================================== --- llvm/trunk/lib/VMCore/ConstantFold.cpp (original) +++ llvm/trunk/lib/VMCore/ConstantFold.cpp Thu Dec 17 00:07:04 2009 @@ -1839,14 +1839,16 @@ } } - if (!isa(C1) && isa(C2)) { + if ((!isa(C1) && isa(C2)) || + (C1->isNullValue() && !C2->isNullValue())) { // If C2 is a constant expr and C1 isn't, flip them around and fold the // other way if possible. + // Also, if C1 is null and C2 isn't, flip them around. switch (pred) { case ICmpInst::ICMP_EQ: case ICmpInst::ICMP_NE: // No change of predicate required. - return ConstantFoldCompareInstruction(Context, pred, C2, C1); + return ConstantExpr::getICmp(pred, C2, C1); case ICmpInst::ICMP_ULT: case ICmpInst::ICMP_SLT: @@ -1858,7 +1860,7 @@ case ICmpInst::ICMP_SGE: // Change the predicate as necessary to swap the operands. pred = ICmpInst::getSwappedPredicate((ICmpInst::Predicate)pred); - return ConstantFoldCompareInstruction(Context, pred, C2, C1); + return ConstantExpr::getICmp(pred, C2, C1); default: // These predicates cannot be flopped around. break; Added: llvm/trunk/test/Transforms/InstCombine/constant-fold-compare.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/constant-fold-compare.ll?rev=91587&view=auto ============================================================================== --- llvm/trunk/test/Transforms/InstCombine/constant-fold-compare.ll (added) +++ llvm/trunk/test/Transforms/InstCombine/constant-fold-compare.ll Thu Dec 17 00:07:04 2009 @@ -0,0 +1,8 @@ +; RUN: opt < %s -instcombine -S | FileCheck %s +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32" + +define i32 @a() nounwind readnone { +entry: + ret i32 zext (i1 icmp eq (i32 0, i32 ptrtoint (i32 ()* @a to i32)) to i32) +} +; CHECK: ret i32 0 From chandlerc at gmail.com Thu Dec 17 00:35:17 2009 From: chandlerc at gmail.com (Chandler Carruth) Date: Thu, 17 Dec 2009 06:35:17 -0000 Subject: [llvm-commits] [llvm] r91589 - /llvm/trunk/tools/llvm-mc/CMakeLists.txt Message-ID: <200912170635.nBH6ZHCV023610@zion.cs.uiuc.edu> Author: chandlerc Date: Thu Dec 17 00:35:17 2009 New Revision: 91589 URL: http://llvm.org/viewvc/llvm-project?rev=91589&view=rev Log: Update CMake build to include HexDisassembler.cpp. Modified: llvm/trunk/tools/llvm-mc/CMakeLists.txt Modified: llvm/trunk/tools/llvm-mc/CMakeLists.txt URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-mc/CMakeLists.txt?rev=91589&r1=91588&r2=91589&view=diff ============================================================================== --- llvm/trunk/tools/llvm-mc/CMakeLists.txt (original) +++ llvm/trunk/tools/llvm-mc/CMakeLists.txt Thu Dec 17 00:35:17 2009 @@ -4,4 +4,5 @@ llvm-mc.cpp AsmLexer.cpp AsmParser.cpp + HexDisassembler.cpp ) From foldr at codedgers.com Thu Dec 17 01:48:34 2009 From: foldr at codedgers.com (Mikhail Glushenkov) Date: Thu, 17 Dec 2009 07:48:34 -0000 Subject: [llvm-commits] [llvm] r91592 - /llvm/trunk/utils/TableGen/LLVMCConfigurationEmitter.cpp Message-ID: <200912170748.nBH7mZSi025802@zion.cs.uiuc.edu> Author: foldr Date: Thu Dec 17 01:48:34 2009 New Revision: 91592 URL: http://llvm.org/viewvc/llvm-project?rev=91592&view=rev Log: s/TokenizeCmdline/TokenizeCmdLine/ Modified: llvm/trunk/utils/TableGen/LLVMCConfigurationEmitter.cpp Modified: llvm/trunk/utils/TableGen/LLVMCConfigurationEmitter.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/utils/TableGen/LLVMCConfigurationEmitter.cpp?rev=91592&r1=91591&r2=91592&view=diff ============================================================================== --- llvm/trunk/utils/TableGen/LLVMCConfigurationEmitter.cpp (original) +++ llvm/trunk/utils/TableGen/LLVMCConfigurationEmitter.cpp Thu Dec 17 01:48:34 2009 @@ -26,6 +26,7 @@ using namespace llvm; +namespace { //===----------------------------------------------------------------------===// /// Typedefs @@ -37,18 +38,16 @@ /// Constants // Indentation. -static const unsigned TabWidth = 4; -static const unsigned Indent1 = TabWidth*1; -static const unsigned Indent2 = TabWidth*2; -static const unsigned Indent3 = TabWidth*3; +const unsigned TabWidth = 4; +const unsigned Indent1 = TabWidth*1; +const unsigned Indent2 = TabWidth*2; +const unsigned Indent3 = TabWidth*3; // Default help string. -static const char * const DefaultHelpString = "NO HELP MESSAGE PROVIDED"; +const char * const DefaultHelpString = "NO HELP MESSAGE PROVIDED"; // Name for the "sink" option. -static const char * const SinkOptionName = "AutoGeneratedSinkOption"; - -namespace { +const char * const SinkOptionName = "AutoGeneratedSinkOption"; //===----------------------------------------------------------------------===// /// Helper functions @@ -1452,10 +1451,10 @@ EmitCaseStatementCallback(Callback, O), IndentLevel); } -/// TokenizeCmdline - converts from +/// TokenizeCmdLine - converts from /// "$CALL(HookName, 'Arg1', 'Arg2')/path -arg1 -arg2" to /// ["$CALL(", "HookName", "Arg1", "Arg2", ")/path", "-arg1", "-arg2"]. -void TokenizeCmdline(const std::string& CmdLine, StrVector& Out) { +void TokenizeCmdLine(const std::string& CmdLine, StrVector& Out) { const char* Delimiters = " \t\n\v\f\r"; enum TokenizerState { Normal, SpecialCommand, InsideSpecialCommand, InsideQuotationMarks } @@ -1642,7 +1641,7 @@ bool IsJoin, unsigned IndentLevel, raw_ostream& O) { StrVector StrVec; - TokenizeCmdline(InitPtrToString(CmdLine), StrVec); + TokenizeCmdLine(InitPtrToString(CmdLine), StrVec); if (StrVec.empty()) throw "Tool '" + ToolName + "' has empty command line!"; @@ -1828,7 +1827,7 @@ unsigned IndentLevel, raw_ostream& O) const { StrVector Out; - TokenizeCmdline(Str, Out); + TokenizeCmdLine(Str, Out); for (StrVector::const_iterator B = Out.begin(), E = Out.end(); B != E; ++B) { @@ -1962,7 +1961,7 @@ bool IsOutFileIndexCheckRequiredStr (const Init* CmdLine) { StrVector StrVec; - TokenizeCmdline(InitPtrToString(CmdLine), StrVec); + TokenizeCmdLine(InitPtrToString(CmdLine), StrVec); for (StrVector::const_iterator I = StrVec.begin(), E = StrVec.end(); I != E; ++I) { @@ -2556,7 +2555,7 @@ void onCmdLine(const std::string& Cmd) { StrVector cmds; - TokenizeCmdline(Cmd, cmds); + TokenizeCmdLine(Cmd, cmds); for (StrVector::const_iterator B = cmds.begin(), E = cmds.end(); B != E; ++B) { From foldr at codedgers.com Thu Dec 17 01:48:49 2009 From: foldr at codedgers.com (Mikhail Glushenkov) Date: Thu, 17 Dec 2009 07:48:49 -0000 Subject: [llvm-commits] [llvm] r91593 - /llvm/trunk/utils/TableGen/LLVMCConfigurationEmitter.cpp Message-ID: <200912170748.nBH7mnYq025820@zion.cs.uiuc.edu> Author: foldr Date: Thu Dec 17 01:48:49 2009 New Revision: 91593 URL: http://llvm.org/viewvc/llvm-project?rev=91593&view=rev Log: Refactoring, no functionality change. Modified: llvm/trunk/utils/TableGen/LLVMCConfigurationEmitter.cpp Modified: llvm/trunk/utils/TableGen/LLVMCConfigurationEmitter.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/utils/TableGen/LLVMCConfigurationEmitter.cpp?rev=91593&r1=91592&r2=91593&view=diff ============================================================================== --- llvm/trunk/utils/TableGen/LLVMCConfigurationEmitter.cpp (original) +++ llvm/trunk/utils/TableGen/LLVMCConfigurationEmitter.cpp Thu Dec 17 01:48:49 2009 @@ -85,26 +85,19 @@ return val; } -const std::string GetOperatorName(const DagInit* D) { - return D->getOperator()->getAsString(); -} - const std::string GetOperatorName(const DagInit& D) { - return GetOperatorName(&D); + return D.getOperator()->getAsString(); } // checkNumberOfArguments - Ensure that the number of args in d is // greater than or equal to min_arguments, otherwise throw an exception. -void checkNumberOfArguments (const DagInit* d, unsigned minArgs) { - if (!d || d->getNumArgs() < minArgs) - throw GetOperatorName(d) + ": too few arguments!"; -} void checkNumberOfArguments (const DagInit& d, unsigned minArgs) { - checkNumberOfArguments(&d, minArgs); + if (d.getNumArgs() < minArgs) + throw GetOperatorName(d) + ": too few arguments!"; } // isDagEmpty - is this DAG marked with an empty marker? -bool isDagEmpty (const DagInit* d) { +bool isDagEmpty (const DagInit& d) { return GetOperatorName(d) == "empty_dag_marker"; } @@ -498,17 +491,36 @@ }; +template +Handler GetHandler(FunctionObject* Obj, const DagInit& Dag) { + const std::string& HandlerName = GetOperatorName(Dag); + return Obj->GetHandler(HandlerName); +} + template -void InvokeDagInitHandler(FunctionObject* Obj, Init* i) { - typedef void (FunctionObject::*Handler) (const DagInit*); +void InvokeDagInitHandler(FunctionObject* Obj, Init* I) { + typedef void (FunctionObject::*Handler) (const DagInit&); - const DagInit& property = InitPtrToDag(i); - const std::string& property_name = GetOperatorName(property); - Handler h = Obj->GetHandler(property_name); + const DagInit& Dag = InitPtrToDag(I); + Handler h = GetHandler(Obj, Dag); - ((Obj)->*(h))(&property); + ((Obj)->*(h))(Dag); } +template +void InvokeDagInitHandler(const FunctionObject* const Obj, + const Init* I, unsigned IndentLevel, raw_ostream& O) +{ + typedef void (FunctionObject::*Handler) + (const DagInit&, unsigned IndentLevel, raw_ostream& O) const; + + const DagInit& Dag = InitPtrToDag(I); + Handler h = GetHandler(Obj, Dag); + + ((Obj)->*(h))(Dag, IndentLevel, O); +} + + template typename HandlerTable::HandlerMap HandlerTable::Handlers_; @@ -520,7 +532,7 @@ /// option property list. class CollectOptionProperties; typedef void (CollectOptionProperties::* CollectOptionPropertiesHandler) -(const DagInit*); +(const DagInit&); class CollectOptionProperties : public HandlerTable @@ -554,8 +566,8 @@ /// operator() - Just forwards to the corresponding property /// handler. - void operator() (Init* i) { - InvokeDagInitHandler(this, i); + void operator() (Init* I) { + InvokeDagInitHandler(this, I); } private: @@ -563,34 +575,34 @@ /// Option property handlers -- /// Methods that handle option properties such as (help) or (hidden). - void onExtern (const DagInit* d) { + void onExtern (const DagInit& d) { checkNumberOfArguments(d, 0); optDesc_.setExtern(); } - void onHelp (const DagInit* d) { + void onHelp (const DagInit& d) { checkNumberOfArguments(d, 1); - optDesc_.Help = InitPtrToString(d->getArg(0)); + optDesc_.Help = InitPtrToString(d.getArg(0)); } - void onHidden (const DagInit* d) { + void onHidden (const DagInit& d) { checkNumberOfArguments(d, 0); optDesc_.setHidden(); } - void onReallyHidden (const DagInit* d) { + void onReallyHidden (const DagInit& d) { checkNumberOfArguments(d, 0); optDesc_.setReallyHidden(); } - void onCommaSeparated (const DagInit* d) { + void onCommaSeparated (const DagInit& d) { checkNumberOfArguments(d, 0); if (!optDesc_.isList()) throw "'comma_separated' is valid only on list options!"; optDesc_.setCommaSeparated(); } - void onRequired (const DagInit* d) { + void onRequired (const DagInit& d) { checkNumberOfArguments(d, 0); if (optDesc_.isOneOrMore() || optDesc_.isOptional()) throw "Only one of (required), (optional) or " @@ -598,9 +610,9 @@ optDesc_.setRequired(); } - void onInit (const DagInit* d) { + void onInit (const DagInit& d) { checkNumberOfArguments(d, 1); - Init* i = d->getArg(0); + Init* i = d.getArg(0); const std::string& str = i->getAsString(); bool correct = optDesc_.isParameter() && dynamic_cast(i); @@ -612,7 +624,7 @@ optDesc_.InitVal = i; } - void onOneOrMore (const DagInit* d) { + void onOneOrMore (const DagInit& d) { checkNumberOfArguments(d, 0); if (optDesc_.isRequired() || optDesc_.isOptional()) throw "Only one of (required), (optional) or " @@ -623,7 +635,7 @@ optDesc_.setOneOrMore(); } - void onOptional (const DagInit* d) { + void onOptional (const DagInit& d) { checkNumberOfArguments(d, 0); if (optDesc_.isRequired() || optDesc_.isOneOrMore()) throw "Only one of (required), (optional) or " @@ -634,9 +646,9 @@ optDesc_.setOptional(); } - void onMultiVal (const DagInit* d) { + void onMultiVal (const DagInit& d) { checkNumberOfArguments(d, 1); - int val = InitPtrToInt(d->getArg(0)); + int val = InitPtrToInt(d.getArg(0)); if (val < 2) throw "Error in the 'multi_val' property: " "the value must be greater than 1!"; @@ -659,7 +671,7 @@ void operator()(const Init* i) { const DagInit& d = InitPtrToDag(i); - checkNumberOfArguments(&d, 1); + checkNumberOfArguments(d, 1); const OptionType::OptionType Type = stringToOptionType(GetOperatorName(d)); @@ -668,14 +680,14 @@ OptionDescription OD(Type, Name); if (!OD.isExtern()) - checkNumberOfArguments(&d, 2); + checkNumberOfArguments(d, 2); if (OD.isAlias()) { // Aliases store the aliased option name in the 'Help' field. OD.Help = InitPtrToString(d.getArg(1)); } else if (!OD.isExtern()) { - processOptionProperties(&d, OD); + processOptionProperties(d, OD); } OptDescs_.InsertDescription(OD); } @@ -683,12 +695,12 @@ private: /// processOptionProperties - Go through the list of option /// properties and call a corresponding handler for each. - static void processOptionProperties (const DagInit* d, OptionDescription& o) { + static void processOptionProperties (const DagInit& d, OptionDescription& o) { checkNumberOfArguments(d, 2); - DagInit::const_arg_iterator B = d->arg_begin(); + DagInit::const_arg_iterator B = d.arg_begin(); // Skip the first argument: it's always the option name. ++B; - std::for_each(B, d->arg_end(), CollectOptionProperties(o)); + std::for_each(B, d.arg_end(), CollectOptionProperties(o)); } }; @@ -749,7 +761,7 @@ class CollectToolProperties; typedef void (CollectToolProperties::* CollectToolPropertiesHandler) -(const DagInit*); +(const DagInit&); class CollectToolProperties : public HandlerTable { @@ -778,8 +790,8 @@ } } - void operator() (Init* i) { - InvokeDagInitHandler(this, i); + void operator() (Init* I) { + InvokeDagInitHandler(this, I); } private: @@ -788,23 +800,23 @@ /// Functions that extract information about tool properties from /// DAG representation. - void onActions (const DagInit* d) { + void onActions (const DagInit& d) { checkNumberOfArguments(d, 1); - Init* Case = d->getArg(0); + Init* Case = d.getArg(0); if (typeid(*Case) != typeid(DagInit) || - GetOperatorName(static_cast(Case)) != "case") + GetOperatorName(static_cast(*Case)) != "case") throw "The argument to (actions) should be a 'case' construct!"; toolDesc_.Actions = Case; } - void onCmdLine (const DagInit* d) { + void onCmdLine (const DagInit& d) { checkNumberOfArguments(d, 1); - toolDesc_.CmdLine = d->getArg(0); + toolDesc_.CmdLine = d.getArg(0); } - void onInLanguage (const DagInit* d) { + void onInLanguage (const DagInit& d) { checkNumberOfArguments(d, 1); - Init* arg = d->getArg(0); + Init* arg = d.getArg(0); // Find out the argument's type. if (typeid(*arg) == typeid(StringInit)) { @@ -829,22 +841,22 @@ } } - void onJoin (const DagInit* d) { + void onJoin (const DagInit& d) { checkNumberOfArguments(d, 0); toolDesc_.setJoin(); } - void onOutLanguage (const DagInit* d) { + void onOutLanguage (const DagInit& d) { checkNumberOfArguments(d, 1); - toolDesc_.OutLanguage = InitPtrToString(d->getArg(0)); + toolDesc_.OutLanguage = InitPtrToString(d.getArg(0)); } - void onOutputSuffix (const DagInit* d) { + void onOutputSuffix (const DagInit& d) { checkNumberOfArguments(d, 1); - toolDesc_.OutputSuffix = InitPtrToString(d->getArg(0)); + toolDesc_.OutputSuffix = InitPtrToString(d.getArg(0)); } - void onSink (const DagInit* d) { + void onSink (const DagInit& d) { checkNumberOfArguments(d, 0); toolDesc_.setSink(); } @@ -1032,12 +1044,12 @@ throw "Case construct handler: no corresponding action " "found for the test " + Test.getAsString() + '!'; - TestCallback(&Test, IndentLevel, (i == 1)); + TestCallback(Test, IndentLevel, (i == 1)); } else { if (dynamic_cast(arg) - && GetOperatorName(static_cast(arg)) == "case") { + && GetOperatorName(static_cast(*arg)) == "case") { // Nested 'case'. WalkCase(arg, TestCallback, StatementCallback, IndentLevel + Indent1); } @@ -1065,7 +1077,7 @@ ActionName == "switch_on" || ActionName == "parameter_equals" || ActionName == "element_in_list" || ActionName == "not_empty" || ActionName == "empty") { - checkNumberOfArguments(&Stmt, 1); + checkNumberOfArguments(Stmt, 1); const std::string& Name = InitPtrToString(Stmt.getArg(0)); OptionNames_.insert(Name); } @@ -1092,8 +1104,8 @@ } } - void operator()(const DagInit* Test, unsigned, bool) { - this->operator()(Test); + void operator()(const DagInit& Test, unsigned, bool) { + this->operator()(&Test); } void operator()(const Init* Statement, unsigned) { this->operator()(Statement); @@ -1124,10 +1136,10 @@ for (RecordVector::const_iterator B = Edges.begin(), E = Edges.end(); B != E; ++B) { const Record* Edge = *B; - DagInit* Weight = Edge->getValueAsDag("weight"); + DagInit& Weight = *Edge->getValueAsDag("weight"); if (!isDagEmpty(Weight)) - WalkCase(Weight, ExtractOptionNames(nonSuperfluousOptions), Id()); + WalkCase(&Weight, ExtractOptionNames(nonSuperfluousOptions), Id()); } // Check that all options in OptDescs belong to the set of @@ -1283,7 +1295,7 @@ const DagInit& d, const OptionDescriptions& OptDescs, raw_ostream& O) { - checkNumberOfArguments(&d, 1); + checkNumberOfArguments(d, 1); if (typeid(*d.getArg(0)) == typeid(ListInit)) return EmitCaseTest1ArgList(TestName, d, OptDescs, O); else @@ -1296,7 +1308,7 @@ unsigned IndentLevel, const OptionDescriptions& OptDescs, raw_ostream& O) { - checkNumberOfArguments(&d, 2); + checkNumberOfArguments(d, 2); const std::string& OptName = InitPtrToString(d.getArg(0)); const std::string& OptArg = InitPtrToString(d.getArg(1)); @@ -1347,7 +1359,7 @@ void EmitLogicalNot(const DagInit& d, unsigned IndentLevel, const OptionDescriptions& OptDescs, raw_ostream& O) { - checkNumberOfArguments(&d, 1); + checkNumberOfArguments(d, 1); const DagInit& InnerTest = InitPtrToDag(d.getArg(0)); O << "! ("; EmitCaseTest(InnerTest, IndentLevel, OptDescs, O); @@ -1389,7 +1401,7 @@ : EmitElseIf_(EmitElseIf), OptDescs_(OptDescs), O_(O) {} - void operator()(const DagInit* Test, unsigned IndentLevel, bool FirstTest) + void operator()(const DagInit& Test, unsigned IndentLevel, bool FirstTest) { if (GetOperatorName(Test) == "default") { O_.indent(IndentLevel) << "else {\n"; @@ -1397,7 +1409,7 @@ else { O_.indent(IndentLevel) << ((!FirstTest && EmitElseIf_) ? "else if (" : "if ("); - EmitCaseTest(*Test, IndentLevel, OptDescs_, O_); + EmitCaseTest(Test, IndentLevel, OptDescs_, O_); O_ << ") {\n"; } } @@ -1418,7 +1430,7 @@ // Ignore nested 'case' DAG. if (!(dynamic_cast(Statement) && - GetOperatorName(static_cast(Statement)) == "case")) { + GetOperatorName(static_cast(*Statement)) == "case")) { if (typeid(*Statement) == typeid(ListInit)) { const ListInit& DagList = *static_cast(Statement); for (ListInit::const_iterator B = DagList.begin(), E = DagList.end(); @@ -1785,7 +1797,8 @@ /// ActionHandlingCallbackBase - Base class of EmitActionHandlersCallback and /// EmitPreprocessOptionsCallback. -struct ActionHandlingCallbackBase { +struct ActionHandlingCallbackBase +{ void onErrorDag(const DagInit& d, unsigned IndentLevel, raw_ostream& O) const @@ -1800,7 +1813,7 @@ void onWarningDag(const DagInit& d, unsigned IndentLevel, raw_ostream& O) const { - checkNumberOfArguments(&d, 1); + checkNumberOfArguments(d, 1); O.indent(IndentLevel) << "llvm::errs() << \"" << InitPtrToString(d.getArg(0)) << "\";\n"; } @@ -1809,17 +1822,20 @@ /// EmitActionHandlersCallback - Emit code that handles actions. Used by /// EmitGenerateActionMethod() as an argument to EmitCaseConstructHandler(). + class EmitActionHandlersCallback; + typedef void (EmitActionHandlersCallback::* EmitActionHandlersCallbackHandler) (const DagInit&, unsigned, raw_ostream&) const; -class EmitActionHandlersCallback -: public ActionHandlingCallbackBase, +class EmitActionHandlersCallback : + public ActionHandlingCallbackBase, public HandlerTable { - const OptionDescriptions& OptDescs; typedef EmitActionHandlersCallbackHandler Handler; + const OptionDescriptions& OptDescs; + /// EmitHookInvocation - Common code for hook invocation from actions. Used by /// onAppendCmd and onOutputSuffix. void EmitHookInvocation(const std::string& Str, @@ -1847,7 +1863,7 @@ void onAppendCmd (const DagInit& Dag, unsigned IndentLevel, raw_ostream& O) const { - checkNumberOfArguments(&Dag, 1); + checkNumberOfArguments(Dag, 1); this->EmitHookInvocation(InitPtrToString(Dag.getArg(0)), "vec.push_back(", ");\n", IndentLevel, O); } @@ -1855,7 +1871,7 @@ void onForward (const DagInit& Dag, unsigned IndentLevel, raw_ostream& O) const { - checkNumberOfArguments(&Dag, 1); + checkNumberOfArguments(Dag, 1); const std::string& Name = InitPtrToString(Dag.getArg(0)); EmitForwardOptionPropertyHandlingCode(OptDescs.FindOption(Name), IndentLevel, "", O); @@ -1864,7 +1880,7 @@ void onForwardAs (const DagInit& Dag, unsigned IndentLevel, raw_ostream& O) const { - checkNumberOfArguments(&Dag, 2); + checkNumberOfArguments(Dag, 2); const std::string& Name = InitPtrToString(Dag.getArg(0)); const std::string& NewName = InitPtrToString(Dag.getArg(1)); EmitForwardOptionPropertyHandlingCode(OptDescs.FindOption(Name), @@ -1874,7 +1890,7 @@ void onForwardValue (const DagInit& Dag, unsigned IndentLevel, raw_ostream& O) const { - checkNumberOfArguments(&Dag, 1); + checkNumberOfArguments(Dag, 1); const std::string& Name = InitPtrToString(Dag.getArg(0)); const OptionDescription& D = OptDescs.FindListOrParameter(Name); @@ -1892,7 +1908,7 @@ void onForwardTransformedValue (const DagInit& Dag, unsigned IndentLevel, raw_ostream& O) const { - checkNumberOfArguments(&Dag, 2); + checkNumberOfArguments(Dag, 2); const std::string& Name = InitPtrToString(Dag.getArg(0)); const std::string& Hook = InitPtrToString(Dag.getArg(1)); const OptionDescription& D = OptDescs.FindListOrParameter(Name); @@ -1905,7 +1921,7 @@ void onOutputSuffix (const DagInit& Dag, unsigned IndentLevel, raw_ostream& O) const { - checkNumberOfArguments(&Dag, 1); + checkNumberOfArguments(Dag, 1); this->EmitHookInvocation(InitPtrToString(Dag.getArg(0)), "output_suffix = ", ";\n", IndentLevel, O); } @@ -1948,14 +1964,10 @@ } } - void operator()(const Init* Statement, + void operator()(const Init* I, unsigned IndentLevel, raw_ostream& O) const { - const DagInit& Dag = InitPtrToDag(Statement); - const std::string& ActionName = GetOperatorName(Dag); - Handler h = GetHandler(ActionName); - - ((this)->*(h))(Dag, IndentLevel, O); + InvokeDagInitHandler(this, I, IndentLevel, O); } }; @@ -2279,11 +2291,25 @@ /// EmitPreprocessOptionsCallback - Helper function passed to /// EmitCaseConstructHandler() by EmitPreprocessOptions(). -class EmitPreprocessOptionsCallback : ActionHandlingCallbackBase { + +class EmitPreprocessOptionsCallback; + +typedef void +(EmitPreprocessOptionsCallback::* EmitPreprocessOptionsCallbackHandler) +(const DagInit&, unsigned, raw_ostream&) const; + +class EmitPreprocessOptionsCallback : + public ActionHandlingCallbackBase, + public HandlerTable +{ + typedef EmitPreprocessOptionsCallbackHandler Handler; + const OptionDescriptions& OptDescs_; - void onUnsetOption(Init* i, unsigned IndentLevel, raw_ostream& O) { - const std::string& OptName = InitPtrToString(i); + void onUnsetOptionStr(const Init* I, + unsigned IndentLevel, raw_ostream& O) const + { + const std::string& OptName = InitPtrToString(I); const OptionDescription& OptDesc = OptDescs_.FindOption(OptName); if (OptDesc.isSwitch()) { @@ -2300,45 +2326,48 @@ } } - void processDag(const Init* I, unsigned IndentLevel, raw_ostream& O) + void onUnsetOptionList(const ListInit& L, + unsigned IndentLevel, raw_ostream& O) const { - const DagInit& d = InitPtrToDag(I); - const std::string& OpName = GetOperatorName(d); + for (ListInit::const_iterator B = L.begin(), E = L.end(); B != E; ++B) + this->onUnsetOptionStr(*B, IndentLevel, O); + } - if (OpName == "warning") { - this->onWarningDag(d, IndentLevel, O); - } - else if (OpName == "error") { - this->onWarningDag(d, IndentLevel, O); - } - else if (OpName == "unset_option") { - checkNumberOfArguments(&d, 1); - Init* I = d.getArg(0); - if (typeid(*I) == typeid(ListInit)) { - const ListInit& DagList = *static_cast(I); - for (ListInit::const_iterator B = DagList.begin(), E = DagList.end(); - B != E; ++B) - this->onUnsetOption(*B, IndentLevel, O); - } - else { - this->onUnsetOption(I, IndentLevel, O); - } + void onUnsetOption(const DagInit& d, + unsigned IndentLevel, raw_ostream& O) const + { + checkNumberOfArguments(d, 1); + Init* I = d.getArg(0); + + if (typeid(*I) == typeid(ListInit)) { + const ListInit& L = *static_cast(I); + this->onUnsetOptionList(L, IndentLevel, O); } else { - throw "Unknown operator in the option preprocessor: '" + OpName + "'!" - "\nOnly 'warning', 'error' and 'unset_option' are allowed."; + this->onUnsetOptionStr(I, IndentLevel, O); } } public: - void operator()(const Init* I, unsigned IndentLevel, raw_ostream& O) { - this->processDag(I, IndentLevel, O); - } - EmitPreprocessOptionsCallback(const OptionDescriptions& OptDescs) : OptDescs_(OptDescs) - {} + { + if (!staticMembersInitialized_) { + AddHandler("error", &EmitPreprocessOptionsCallback::onErrorDag); + AddHandler("warning", &EmitPreprocessOptionsCallback::onWarningDag); + AddHandler("unset_option", &EmitPreprocessOptionsCallback::onUnsetOption); + + staticMembersInitialized_ = true; + } + } + + void operator()(const Init* I, + unsigned IndentLevel, raw_ostream& O) const + { + InvokeDagInitHandler(this, I, IndentLevel, O); + } + }; /// EmitPreprocessOptions - Emit the PreprocessOptionsLocal() function. @@ -2406,7 +2435,7 @@ O.indent(IndentLevel) << "ret -= "; } else if (OpName == "error") { - checkNumberOfArguments(&d, 1); + checkNumberOfArguments(d, 1); O.indent(IndentLevel) << "throw std::runtime_error(\"" << InitPtrToString(d.getArg(0)) << "\");\n"; @@ -2456,10 +2485,10 @@ E = EdgeVector.end(); B != E; ++B) { const Record* Edge = *B; const std::string& NodeB = Edge->getValueAsString("b"); - DagInit* Weight = Edge->getValueAsDag("weight"); + DagInit& Weight = *Edge->getValueAsDag("weight"); if (!isDagEmpty(Weight)) - EmitEdgeClass(i, NodeB, Weight, OptDescs, O); + EmitEdgeClass(i, NodeB, &Weight, OptDescs, O); ++i; } } @@ -2486,7 +2515,7 @@ const Record* Edge = *B; const std::string& NodeA = Edge->getValueAsString("a"); const std::string& NodeB = Edge->getValueAsString("b"); - DagInit* Weight = Edge->getValueAsDag("weight"); + DagInit& Weight = *Edge->getValueAsDag("weight"); O.indent(Indent1) << "G.insertEdge(\"" << NodeA << "\", "; From foldr at codedgers.com Thu Dec 17 01:49:16 2009 From: foldr at codedgers.com (Mikhail Glushenkov) Date: Thu, 17 Dec 2009 07:49:16 -0000 Subject: [llvm-commits] [llvm] r91594 - in /llvm/trunk: include/llvm/CompilerDriver/Common.td test/LLVMC/OptionPreprocessor.td tools/llvmc/doc/LLVMC-Reference.rst tools/llvmc/plugins/Base/Base.td.in utils/TableGen/LLVMCConfigurationEmitter.cpp Message-ID: <200912170749.nBH7nGrR025852@zion.cs.uiuc.edu> Author: foldr Date: Thu Dec 17 01:49:16 2009 New Revision: 91594 URL: http://llvm.org/viewvc/llvm-project?rev=91594&view=rev Log: Add a 'set_option' action for use in OptionPreprocessor. Modified: llvm/trunk/include/llvm/CompilerDriver/Common.td llvm/trunk/test/LLVMC/OptionPreprocessor.td llvm/trunk/tools/llvmc/doc/LLVMC-Reference.rst llvm/trunk/tools/llvmc/plugins/Base/Base.td.in llvm/trunk/utils/TableGen/LLVMCConfigurationEmitter.cpp Modified: llvm/trunk/include/llvm/CompilerDriver/Common.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CompilerDriver/Common.td?rev=91594&r1=91593&r2=91594&view=diff ============================================================================== --- llvm/trunk/include/llvm/CompilerDriver/Common.td (original) +++ llvm/trunk/include/llvm/CompilerDriver/Common.td Thu Dec 17 01:49:16 2009 @@ -84,6 +84,7 @@ def unpack_values; def warning; def error; +def set_option; def unset_option; // Increase/decrease the edge weight. Modified: llvm/trunk/test/LLVMC/OptionPreprocessor.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/LLVMC/OptionPreprocessor.td?rev=91594&r1=91593&r2=91594&view=diff ============================================================================== --- llvm/trunk/test/LLVMC/OptionPreprocessor.td (original) +++ llvm/trunk/test/LLVMC/OptionPreprocessor.td Thu Dec 17 01:49:16 2009 @@ -11,20 +11,30 @@ (switch_option "baz", (help "dummy")), (parameter_option "foo_p", (help "dummy")), (parameter_option "bar_p", (help "dummy")), -(parameter_option "baz_p", (help "dummy")) +(parameter_option "baz_p", (help "dummy")), +(parameter_list_option "foo_l", (help "dummy")) ]>; def Preprocess : OptionPreprocessor< (case // CHECK: W1 + // CHECK: foo = false; + // CHECK: foo_p = ""; + // CHECK: foo_l.clear(); (and (switch_on "foo"), (any_switch_on ["bar", "baz"])), - (warning "W1"), + [(warning "W1"), (unset_option "foo"), + (unset_option "foo_p"), (unset_option "foo_l")], // CHECK: W2 + // CHECK: foo = true; + // CHECK: foo_p = "asdf"; (and (switch_on ["foo", "bar"]), (any_empty ["foo_p", "bar_p"])), - (warning "W2"), + [(warning "W2"), (set_option "foo"), (set_option "foo_p", "asdf")], // CHECK: W3 + // CHECK: foo = true; + // CHECK: bar = true; + // CHECK: baz = true; (and (empty ["foo_p", "bar_p"]), (any_not_empty ["baz_p"])), - (warning "W3")) + [(warning "W3"), (set_option ["foo", "bar", "baz"])]) >; // Shut up warnings... @@ -38,7 +48,8 @@ (switch_on "baz"), (error), (not_empty "foo_p"), (error), (not_empty "bar_p"), (error), - (not_empty "baz_p"), (error))) + (not_empty "baz_p"), (error), + (not_empty "foo_l"), (error))) ]>; def Graph : CompilationGraph<[Edge<"root", "dummy">]>; Modified: llvm/trunk/tools/llvmc/doc/LLVMC-Reference.rst URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvmc/doc/LLVMC-Reference.rst?rev=91594&r1=91593&r2=91594&view=diff ============================================================================== --- llvm/trunk/tools/llvmc/doc/LLVMC-Reference.rst (original) +++ llvm/trunk/tools/llvmc/doc/LLVMC-Reference.rst Thu Dec 17 01:49:16 2009 @@ -656,10 +656,10 @@ $ llvmc hello.cpp llvmc: Unknown suffix: cpp -The language map entries should be added only for tools that are -linked with the root node. Since tools are not allowed to have -multiple output languages, for nodes "inside" the graph the input and -output languages should match. This is enforced at compile-time. +The language map entries are needed only for the tools that are linked from the +root node. Since a tool can't have multiple output languages, for inner nodes of +the graph the input and output languages should match. This is enforced at +compile-time. Option preprocessor =================== @@ -672,24 +672,31 @@ The ``OptionPreprocessor`` feature is reserved specially for these occasions. Example (adapted from the built-in Base plugin):: - def Preprocess : OptionPreprocessor< - (case (and (switch_on "O3"), (any_switch_on ["O0", "O1", "O2"])), - [(unset_option ["O0", "O1", "O2"]), - (warning "Multiple -O options specified, defaulted to -O3.")], - (and (switch_on "O2"), (any_switch_on ["O0", "O1"])), - (unset_option ["O0", "O1"]), - (and (switch_on "O1"), (switch_on "O0")), - (unset_option "O0")) - >; -Here, ``OptionPreprocessor`` is used to unset all spurious optimization options -(so that they are not forwarded to the compiler). + def Preprocess : OptionPreprocessor< + (case (not (any_switch_on ["O0", "O1", "O2", "O3"])), + (set_option "O2"), + (and (switch_on "O3"), (any_switch_on ["O0", "O1", "O2"])), + (unset_option ["O0", "O1", "O2"]), + (and (switch_on "O2"), (any_switch_on ["O0", "O1"])), + (unset_option ["O0", "O1"]), + (and (switch_on "O1"), (switch_on "O0")), + (unset_option "O0")) + >; + +Here, ``OptionPreprocessor`` is used to unset all spurious ``-O`` options so +that they are not forwarded to the compiler. If no optimization options are +specified, ``-O2`` is enabled. ``OptionPreprocessor`` is basically a single big ``case`` expression, which is evaluated only once right after the plugin is loaded. The only allowed actions -in ``OptionPreprocessor`` are ``error``, ``warning`` and a special action -``unset_option``, which, as the name suggests, unsets a given option. For -convenience, ``unset_option`` also works on lists. +in ``OptionPreprocessor`` are ``error``, ``warning`` and two special actions: +``unset_option`` and ``set_option``. As their names suggest, they can be used to +set or unset a given option. To set a parameter option with ``set_option``, use +the two-argument form: ``(set_option "parameter", "value")``. For convenience, +``set_option`` and ``unset_option`` also work on lists (that is, instead of +``[(unset_option "A"), (unset_option "B")]`` you can use ``(unset_option ["A", +"B"])``). More advanced topics Modified: llvm/trunk/tools/llvmc/plugins/Base/Base.td.in URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvmc/plugins/Base/Base.td.in?rev=91594&r1=91593&r2=91594&view=diff ============================================================================== --- llvm/trunk/tools/llvmc/plugins/Base/Base.td.in (original) +++ llvm/trunk/tools/llvmc/plugins/Base/Base.td.in Thu Dec 17 01:49:16 2009 @@ -91,7 +91,9 @@ // Option preprocessor. def Preprocess : OptionPreprocessor< -(case (and (switch_on "O3"), (any_switch_on ["O0", "O1", "O2"])), +(case (not (any_switch_on ["O0", "O1", "O2", "O3"])), + (set_option "O2"), + (and (switch_on "O3"), (any_switch_on ["O0", "O1", "O2"])), (unset_option ["O0", "O1", "O2"]), (and (switch_on "O2"), (any_switch_on ["O0", "O1"])), (unset_option ["O0", "O1"]), Modified: llvm/trunk/utils/TableGen/LLVMCConfigurationEmitter.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/utils/TableGen/LLVMCConfigurationEmitter.cpp?rev=91594&r1=91593&r2=91594&view=diff ============================================================================== --- llvm/trunk/utils/TableGen/LLVMCConfigurationEmitter.cpp (original) +++ llvm/trunk/utils/TableGen/LLVMCConfigurationEmitter.cpp Thu Dec 17 01:49:16 2009 @@ -2303,11 +2303,32 @@ public HandlerTable { typedef EmitPreprocessOptionsCallbackHandler Handler; + typedef void + (EmitPreprocessOptionsCallback::* HandlerImpl) + (const Init*, unsigned, raw_ostream&) const; const OptionDescriptions& OptDescs_; - void onUnsetOptionStr(const Init* I, - unsigned IndentLevel, raw_ostream& O) const + void onListOrDag(HandlerImpl h, + const DagInit& d, unsigned IndentLevel, raw_ostream& O) const + { + checkNumberOfArguments(d, 1); + const Init* I = d.getArg(0); + + // If I is a list, apply h to each element. + if (typeid(*I) == typeid(ListInit)) { + const ListInit& L = *static_cast(I); + for (ListInit::const_iterator B = L.begin(), E = L.end(); B != E; ++B) + ((this)->*(h))(*B, IndentLevel, O); + } + // Otherwise, apply h to I. + else { + ((this)->*(h))(I, IndentLevel, O); + } + } + + void onUnsetOptionImpl(const Init* I, + unsigned IndentLevel, raw_ostream& O) const { const std::string& OptName = InitPtrToString(I); const OptionDescription& OptDesc = OptDescs_.FindOption(OptName); @@ -2326,26 +2347,52 @@ } } - void onUnsetOptionList(const ListInit& L, - unsigned IndentLevel, raw_ostream& O) const + void onUnsetOption(const DagInit& d, + unsigned IndentLevel, raw_ostream& O) const { - for (ListInit::const_iterator B = L.begin(), E = L.end(); B != E; ++B) - this->onUnsetOptionStr(*B, IndentLevel, O); + this->onListOrDag(&EmitPreprocessOptionsCallback::onUnsetOptionImpl, + d, IndentLevel, O); } - void onUnsetOption(const DagInit& d, - unsigned IndentLevel, raw_ostream& O) const + void onSetParameter(const DagInit& d, + unsigned IndentLevel, raw_ostream& O) const { + checkNumberOfArguments(d, 2); + const std::string& OptName = InitPtrToString(d.getArg(0)); + const std::string& Value = InitPtrToString(d.getArg(1)); + const OptionDescription& OptDesc = OptDescs_.FindOption(OptName); + + if (OptDesc.isParameter()) + O.indent(IndentLevel) << OptDesc.GenVariableName() + << " = \"" << Value << "\";\n"; + else + throw "Two-argument 'set_option' " + "can be only applied to parameter options!"; + } + + void onSetSwitch(const Init* I, + unsigned IndentLevel, raw_ostream& O) const { + const std::string& OptName = InitPtrToString(I); + const OptionDescription& OptDesc = OptDescs_.FindOption(OptName); + + if (OptDesc.isSwitch()) + O.indent(IndentLevel) << OptDesc.GenVariableName() << " = true;\n"; + else + throw "One-argument 'set_option' can be only applied to switch options!"; + } + + void onSetOption(const DagInit& d, + unsigned IndentLevel, raw_ostream& O) const { checkNumberOfArguments(d, 1); - Init* I = d.getArg(0); - if (typeid(*I) == typeid(ListInit)) { - const ListInit& L = *static_cast(I); - this->onUnsetOptionList(L, IndentLevel, O); - } - else { - this->onUnsetOptionStr(I, IndentLevel, O); - } + // Two arguments: (set_option "parameter", "value") + if (d.getNumArgs() > 1) + this->onSetParameter(d, IndentLevel, O); + // One argument: (set_option "switch") + // or (set_option ["switch1", "switch2", ...]) + else + this->onListOrDag(&EmitPreprocessOptionsCallback::onSetSwitch, + d, IndentLevel, O); } public: @@ -2357,6 +2404,7 @@ AddHandler("error", &EmitPreprocessOptionsCallback::onErrorDag); AddHandler("warning", &EmitPreprocessOptionsCallback::onWarningDag); AddHandler("unset_option", &EmitPreprocessOptionsCallback::onUnsetOption); + AddHandler("set_option", &EmitPreprocessOptionsCallback::onSetOption); staticMembersInitialized_ = true; } From foldr at codedgers.com Thu Dec 17 01:49:26 2009 From: foldr at codedgers.com (Mikhail Glushenkov) Date: Thu, 17 Dec 2009 07:49:26 -0000 Subject: [llvm-commits] [llvm] r91595 - /llvm/trunk/docs/CompilerDriver.html Message-ID: <200912170749.nBH7nQoj025867@zion.cs.uiuc.edu> Author: foldr Date: Thu Dec 17 01:49:26 2009 New Revision: 91595 URL: http://llvm.org/viewvc/llvm-project?rev=91595&view=rev Log: Regenerate. Modified: llvm/trunk/docs/CompilerDriver.html Modified: llvm/trunk/docs/CompilerDriver.html URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/docs/CompilerDriver.html?rev=91595&r1=91594&r2=91595&view=diff ============================================================================== --- llvm/trunk/docs/CompilerDriver.html (original) +++ llvm/trunk/docs/CompilerDriver.html Thu Dec 17 01:49:26 2009 @@ -334,8 +334,8 @@ only for list options in conjunction with multi_val; for ordinary lists it is synonymous with required. Incompatible with required and zero_or_one. -
  • zero_or_one - the option can be specified zero or one times. Useful -only for list options in conjunction with multi_val. Incompatible with +
  • optional - the option can be specified zero or one times. Useful only +for list options in conjunction with multi_val. Incompatible with required and one_or_more.
  • hidden - the description of this option will not appear in the --help output (but will appear in the --help-hidden @@ -350,13 +350,14 @@
  • multi_val n - this option takes n arguments (can be useful in some special cases). Usage example: (parameter_list_option "foo", (multi_val 3)); the command-line syntax is '-foo a b c'. Only list options can have -this attribute; you can, however, use the one_or_more, zero_or_one +this attribute; you can, however, use the one_or_more, optional and required properties.
  • init - this option has a default value, either a string (if it is a -parameter), or a boolean (if it is a switch; boolean constants are called -true and false). List options can't have this attribute. Usage -examples: (switch_option "foo", (init true)); (prefix_option "bar", -(init "baz")).
  • +parameter), or a boolean (if it is a switch; as in C++, boolean constants +are called true and false). List options can't have init +attribute. +Usage examples: (switch_option "foo", (init true)); (prefix_option +"bar", (init "baz")).
  • extern - this option is defined in some other plugin, see below.
  • @@ -604,10 +605,10 @@ $ llvmc hello.cpp llvmc: Unknown suffix: cpp -

    The language map entries should be added only for tools that are -linked with the root node. Since tools are not allowed to have -multiple output languages, for nodes "inside" the graph the input and -output languages should match. This is enforced at compile-time.

    +

    The language map entries are needed only for the tools that are linked from the +root node. Since a tool can't have multiple output languages, for inner nodes of +the graph the input and output languages should match. This is enforced at +compile-time.

    Option preprocessor

    @@ -619,22 +620,28 @@ occasions. Example (adapted from the built-in Base plugin):

     def Preprocess : OptionPreprocessor<
    -(case (and (switch_on "O3"), (any_switch_on ["O0", "O1", "O2"])),
    -           [(unset_option ["O0", "O1", "O2"]),
    -            (warning "Multiple -O options specified, defaulted to -O3.")],
    +(case (not (any_switch_on ["O0", "O1", "O2", "O3"])),
    +           (set_option "O2"),
    +      (and (switch_on "O3"), (any_switch_on ["O0", "O1", "O2"])),
    +           (unset_option ["O0", "O1", "O2"]),
           (and (switch_on "O2"), (any_switch_on ["O0", "O1"])),
                (unset_option ["O0", "O1"]),
           (and (switch_on "O1"), (switch_on "O0")),
                (unset_option "O0"))
     >;
     
    -

    Here, OptionPreprocessor is used to unset all spurious optimization options -(so that they are not forwarded to the compiler).

    +

    Here, OptionPreprocessor is used to unset all spurious -O options so +that they are not forwarded to the compiler. If no optimization options are +specified, -O2 is enabled.

    OptionPreprocessor is basically a single big case expression, which is evaluated only once right after the plugin is loaded. The only allowed actions -in OptionPreprocessor are error, warning and a special action -unset_option, which, as the name suggests, unsets a given option. For -convenience, unset_option also works on lists.

    +in OptionPreprocessor are error, warning and two special actions: +unset_option and set_option. As their names suggest, they can be used to +set or unset a given option. To set a parameter option with set_option, use +the two-argument form: (set_option "parameter", "value"). For convenience, +set_option and unset_option also work on lists (that is, instead of +[(unset_option "A"), (unset_option "B")] you can use (unset_option ["A", +"B"])).

    More advanced topics

    From clattner at apple.com Thu Dec 17 02:04:02 2009 From: clattner at apple.com (Chris Lattner) Date: Thu, 17 Dec 2009 00:04:02 -0800 Subject: [llvm-commits] [Review] Preparing for non-power-of-2 machine value types in X86ISelLowering and LegalizeDAG In-Reply-To: <8F2E4A8BCDA0B84DA6C9088EB5B27747CEAF38@NAMAIL.ad.onsemi.com> References: <8F2E4A8BCDA0B84DA6C9088EB5B27747CEAF38@NAMAIL.ad.onsemi.com> Message-ID: <8FECBCA3-DE6F-42E4-B067-7E234A31FBFF@apple.com> On Dec 16, 2009, at 7:33 AM, Ken Dyck wrote: > The attached patches prepare for the introduction of non-power-of-2 > machine value types (as recently discussed [1]). They contain no > functional changes. They merely eliminate assumptions that > incrementing/decrementing a SimpleValueType doubles/halves its size and > that all non-power-of-2 types are extended. The change in LowerEXTRACT_VECTOR_ELT can be simplified: just force i32 there, that is the only valid valuetype for PEXTRW. The legalize change looks reasonable to me, but please change the for loop to be a while loop (I think it will be clearer) and make it be a method on EVT instead of inline in the legalizer. -Chris From clattner at apple.com Thu Dec 17 02:06:03 2009 From: clattner at apple.com (Chris Lattner) Date: Thu, 17 Dec 2009 00:06:03 -0800 Subject: [llvm-commits] [llvm] r91497 - in /llvm/trunk: include/llvm/Metadata.h lib/VMCore/Metadata.cpp In-Reply-To: <30E02A8D-2774-41E9-B029-660D702718AE@apple.com> References: <200912160252.nBG2q9FI001712@zion.cs.uiuc.edu> <4B288291.3090704@mxc.ca> <30E02A8D-2774-41E9-B029-660D702718AE@apple.com> Message-ID: <17FF84E5-8CBA-4136-AD38-2D050F774DAC@apple.com> On Dec 16, 2009, at 10:56 AM, Devang Patel wrote: >>> >>> Is this new field copied by MetadataContextImpl::copyMD? >> >> The semantic is that if an MDNode is created function-local, then it will continue to be function-local even if its operands are modified to no longer refer to any function-specific IR. > > That'll break MDNode unique-ness. We don't guarantee MDNode uniqueness, we just endeavor to preserve it when it isn't "too hard". > Adding Function * in MDNode increases size of nodes and I anticipate that vast majority of nodes are not function local. A better approach is to let utility function isFunctionLocal() iterate element and return respective Function *, if one or more elements are instructions. I agree. MDNode should just have a *bit* that indicates whether it is function local (stored in SubclassData) and have a (slow) method that gets the Function* it is in by (recursively) walking the operands. This method won't be called by much more than the verifier. -Chris -------------- next part -------------- An HTML attachment was scrubbed... URL: http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20091217/b8804445/attachment.html From evan.cheng at apple.com Thu Dec 17 03:39:49 2009 From: evan.cheng at apple.com (Evan Cheng) Date: Thu, 17 Dec 2009 09:39:49 -0000 Subject: [llvm-commits] [llvm] r91598 - in /llvm/trunk: include/llvm/Analysis/IVUsers.h include/llvm/Analysis/LoopInfo.h lib/Analysis/IVUsers.cpp lib/CodeGen/LLVMTargetMachine.cpp lib/Transforms/Scalar/CodeGenPrepare.cpp lib/Transforms/Scalar/LoopStrengthReduce.cpp Message-ID: <200912170939.nBH9dnUc011047@zion.cs.uiuc.edu> Author: evancheng Date: Thu Dec 17 03:39:49 2009 New Revision: 91598 URL: http://llvm.org/viewvc/llvm-project?rev=91598&view=rev Log: Revert 91280-91283, 91286-91289, 91291, 91293, 91295-91296. It apparently introduced a non-deterministic behavior in the optimizer somewhere. Modified: llvm/trunk/include/llvm/Analysis/IVUsers.h llvm/trunk/include/llvm/Analysis/LoopInfo.h llvm/trunk/lib/Analysis/IVUsers.cpp llvm/trunk/lib/CodeGen/LLVMTargetMachine.cpp llvm/trunk/lib/Transforms/Scalar/CodeGenPrepare.cpp llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp Modified: llvm/trunk/include/llvm/Analysis/IVUsers.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Analysis/IVUsers.h?rev=91598&r1=91597&r2=91598&view=diff ============================================================================== --- llvm/trunk/include/llvm/Analysis/IVUsers.h (original) +++ llvm/trunk/include/llvm/Analysis/IVUsers.h Thu Dec 17 03:39:49 2009 @@ -175,11 +175,11 @@ ScalarEvolution *SE; SmallPtrSet Processed; +public: /// IVUses - A list of all tracked IV uses of induction variable expressions /// we are interested in. ilist IVUses; -public: /// IVUsesByStride - A mapping from the strides in StrideOrder to the /// uses in IVUses. std::map IVUsesByStride; Modified: llvm/trunk/include/llvm/Analysis/LoopInfo.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Analysis/LoopInfo.h?rev=91598&r1=91597&r2=91598&view=diff ============================================================================== --- llvm/trunk/include/llvm/Analysis/LoopInfo.h (original) +++ llvm/trunk/include/llvm/Analysis/LoopInfo.h Thu Dec 17 03:39:49 2009 @@ -976,6 +976,13 @@ void removeBlock(BasicBlock *BB) { LI.removeBlock(BB); } + + static bool isNotAlreadyContainedIn(const Loop *SubLoop, + const Loop *ParentLoop) { + return + LoopInfoBase::isNotAlreadyContainedIn(SubLoop, + ParentLoop); + } }; Modified: llvm/trunk/lib/Analysis/IVUsers.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/IVUsers.cpp?rev=91598&r1=91597&r2=91598&view=diff ============================================================================== --- llvm/trunk/lib/Analysis/IVUsers.cpp (original) +++ llvm/trunk/lib/Analysis/IVUsers.cpp Thu Dec 17 03:39:49 2009 @@ -53,7 +53,7 @@ if (newLoop == L) return false; // if newLoop is an outer loop of L, this is OK. - if (newLoop->contains(L->getHeader())) + if (!LoopInfo::isNotAlreadyContainedIn(L, newLoop)) return false; } return true; @@ -307,7 +307,6 @@ for (BasicBlock::iterator I = L->getHeader()->begin(); isa(I); ++I) AddUsersIfInteresting(I); - Processed.clear(); return false; } @@ -370,7 +369,7 @@ void IVUsers::releaseMemory() { IVUsesByStride.clear(); StrideOrder.clear(); - IVUses.clear(); + Processed.clear(); } void IVStrideUse::deleted() { Modified: llvm/trunk/lib/CodeGen/LLVMTargetMachine.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/LLVMTargetMachine.cpp?rev=91598&r1=91597&r2=91598&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/LLVMTargetMachine.cpp (original) +++ llvm/trunk/lib/CodeGen/LLVMTargetMachine.cpp Thu Dec 17 03:39:49 2009 @@ -31,6 +31,10 @@ bool EnableFastISel; } +static cl::opt X1("x1"); +static cl::opt X2("x2"); +static cl::opt X3("x3"); +static cl::opt X4("x4"); static cl::opt DisablePostRA("disable-post-ra", cl::Hidden, cl::desc("Disable Post Regalloc")); static cl::opt DisableBranchFold("disable-branch-fold", cl::Hidden, @@ -239,6 +243,11 @@ PM.add(createGVNPass(/*NoPRE=*/false, /*NoLoads=*/true)); } + if (X1) + PM.add(createPrintFunctionPass("\n\n" + "*** Before LSR ***\n", + &errs())); + // Run loop strength reduction before anything else. if (OptLevel != CodeGenOpt::None && !DisableLSR) { PM.add(createLoopStrengthReducePass(getTargetLowering())); @@ -246,6 +255,11 @@ PM.add(createPrintFunctionPass("\n\n*** Code after LSR ***\n", &errs())); } + if (X2) + PM.add(createPrintFunctionPass("\n\n" + "*** After LSR ***\n", + &errs())); + // Turn exception handling constructs into something the code generators can // handle. switch (getMCAsmInfo()->getExceptionHandlingType()) @@ -268,9 +282,19 @@ // Make sure that no unreachable blocks are instruction selected. PM.add(createUnreachableBlockEliminationPass()); + if (X3) + PM.add(createPrintFunctionPass("\n\n" + "*** Before CGP ***\n", + &errs())); + if (OptLevel != CodeGenOpt::None && !DisableCGP) PM.add(createCodeGenPreparePass(getTargetLowering())); + if (X4) + PM.add(createPrintFunctionPass("\n\n" + "*** After CGP ***\n", + &errs())); + PM.add(createStackProtectorPass(getTargetLowering())); if (PrintISelInput) Modified: llvm/trunk/lib/Transforms/Scalar/CodeGenPrepare.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/CodeGenPrepare.cpp?rev=91598&r1=91597&r2=91598&view=diff ============================================================================== --- llvm/trunk/lib/Transforms/Scalar/CodeGenPrepare.cpp (original) +++ llvm/trunk/lib/Transforms/Scalar/CodeGenPrepare.cpp Thu Dec 17 03:39:49 2009 @@ -48,7 +48,7 @@ /// TLI - Keep a pointer of a TargetLowering to consult for determining /// transformation profitability. const TargetLowering *TLI; - ProfileInfo *PI; + ProfileInfo *PFI; /// BackEdges - Keep a set of all the loop back edges. /// @@ -99,7 +99,7 @@ bool CodeGenPrepare::runOnFunction(Function &F) { bool EverMadeChange = false; - PI = getAnalysisIfAvailable(); + PFI = getAnalysisIfAvailable(); // First pass, eliminate blocks that contain only PHI nodes and an // unconditional branch. EverMadeChange |= EliminateMostlyEmptyBlocks(F); @@ -288,9 +288,9 @@ // The PHIs are now updated, change everything that refers to BB to use // DestBB and remove BB. BB->replaceAllUsesWith(DestBB); - if (PI) { - PI->replaceAllUses(BB, DestBB); - PI->removeEdge(ProfileInfo::getEdge(BB, DestBB)); + if (PFI) { + PFI->replaceAllUses(BB, DestBB); + PFI->removeEdge(ProfileInfo::getEdge(BB, DestBB)); } BB->eraseFromParent(); @@ -368,9 +368,9 @@ // If we found a workable predecessor, change TI to branch to Succ. if (FoundMatch) { - ProfileInfo *PI = P->getAnalysisIfAvailable(); - if (PI) - PI->splitEdge(TIBB, Dest, Pred); + ProfileInfo *PFI = P->getAnalysisIfAvailable(); + if (PFI) + PFI->splitEdge(TIBB, Dest, Pred); Dest->removePredecessor(TIBB); TI->setSuccessor(SuccNum, Pred); return; Modified: llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp?rev=91598&r1=91597&r2=91598&view=diff ============================================================================== --- llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp (original) +++ llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp Thu Dec 17 03:39:49 2009 @@ -24,14 +24,18 @@ #include "llvm/Constants.h" #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" +#include "llvm/Type.h" #include "llvm/DerivedTypes.h" +#include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/IVUsers.h" +#include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolutionExpander.h" #include "llvm/Transforms/Utils/AddrModeMatcher.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Support/CFG.h" #include "llvm/Support/Debug.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ValueHandle.h" @@ -81,6 +85,8 @@ class LoopStrengthReduce : public LoopPass { IVUsers *IU; + LoopInfo *LI; + DominatorTree *DT; ScalarEvolution *SE; bool Changed; @@ -88,6 +94,10 @@ /// particular stride. std::map IVsByStride; + /// StrideNoReuse - Keep track of all the strides whose ivs cannot be + /// reused (nor should they be rewritten to reuse other strides). + SmallSet StrideNoReuse; + /// DeadInsts - Keep track of instructions we may have made dead, so that /// we can remove them after we are done working. SmallVector DeadInsts; @@ -99,7 +109,8 @@ public: static char ID; // Pass ID, replacement for typeid explicit LoopStrengthReduce(const TargetLowering *tli = NULL) : - LoopPass(&ID), TLI(tli) {} + LoopPass(&ID), TLI(tli) { + } bool runOnLoop(Loop *L, LPPassManager &LPM); @@ -107,11 +118,13 @@ // We split critical edges, so we change the CFG. However, we do update // many analyses if they are around. AU.addPreservedID(LoopSimplifyID); - AU.addPreserved("loops"); - AU.addPreserved("domfrontier"); - AU.addPreserved("domtree"); + AU.addPreserved(); + AU.addPreserved(); + AU.addPreserved(); AU.addRequiredID(LoopSimplifyID); + AU.addRequired(); + AU.addRequired(); AU.addRequired(); AU.addPreserved(); AU.addRequired(); @@ -215,17 +228,19 @@ if (DeadInsts.empty()) return; while (!DeadInsts.empty()) { - Instruction *I = dyn_cast_or_null(DeadInsts.pop_back_val()); + Instruction *I = dyn_cast_or_null(DeadInsts.back()); + DeadInsts.pop_back(); if (I == 0 || !isInstructionTriviallyDead(I)) continue; - for (User::op_iterator OI = I->op_begin(), E = I->op_end(); OI != E; ++OI) + for (User::op_iterator OI = I->op_begin(), E = I->op_end(); OI != E; ++OI) { if (Instruction *U = dyn_cast(*OI)) { *OI = 0; if (U->use_empty()) DeadInsts.push_back(U); } + } I->eraseFromParent(); Changed = true; @@ -285,6 +300,9 @@ /// BasedUser - For a particular base value, keep information about how we've /// partitioned the expression so far. struct BasedUser { + /// SE - The current ScalarEvolution object. + ScalarEvolution *SE; + /// Base - The Base value for the PHI node that needs to be inserted for /// this use. As the use is processed, information gets moved from this /// field to the Imm field (below). BasedUser values are sorted by this @@ -316,9 +334,9 @@ bool isUseOfPostIncrementedValue; BasedUser(IVStrideUse &IVSU, ScalarEvolution *se) - : Base(IVSU.getOffset()), Inst(IVSU.getUser()), + : SE(se), Base(IVSU.getOffset()), Inst(IVSU.getUser()), OperandValToReplace(IVSU.getOperandValToReplace()), - Imm(se->getIntegerSCEV(0, Base->getType())), + Imm(SE->getIntegerSCEV(0, Base->getType())), isUseOfPostIncrementedValue(IVSU.isUseOfPostIncrementedValue()) {} // Once we rewrite the code to insert the new IVs we want, update the @@ -327,14 +345,14 @@ void RewriteInstructionToUseNewBase(const SCEV *const &NewBase, Instruction *InsertPt, SCEVExpander &Rewriter, Loop *L, Pass *P, - SmallVectorImpl &DeadInsts, - ScalarEvolution *SE); + LoopInfo &LI, + SmallVectorImpl &DeadInsts); Value *InsertCodeForBaseAtPosition(const SCEV *const &NewBase, const Type *Ty, SCEVExpander &Rewriter, - Instruction *IP, - ScalarEvolution *SE); + Instruction *IP, Loop *L, + LoopInfo &LI); void dump() const; }; } @@ -348,12 +366,27 @@ Value *BasedUser::InsertCodeForBaseAtPosition(const SCEV *const &NewBase, const Type *Ty, SCEVExpander &Rewriter, - Instruction *IP, - ScalarEvolution *SE) { - Value *Base = Rewriter.expandCodeFor(NewBase, 0, IP); + Instruction *IP, Loop *L, + LoopInfo &LI) { + // Figure out where we *really* want to insert this code. In particular, if + // the user is inside of a loop that is nested inside of L, we really don't + // want to insert this expression before the user, we'd rather pull it out as + // many loops as possible. + Instruction *BaseInsertPt = IP; + + // Figure out the most-nested loop that IP is in. + Loop *InsertLoop = LI.getLoopFor(IP->getParent()); + + // If InsertLoop is not L, and InsertLoop is nested inside of L, figure out + // the preheader of the outer-most loop where NewBase is not loop invariant. + if (L->contains(IP->getParent())) + while (InsertLoop && NewBase->isLoopInvariant(InsertLoop)) { + BaseInsertPt = InsertLoop->getLoopPreheader()->getTerminator(); + InsertLoop = InsertLoop->getParentLoop(); + } + + Value *Base = Rewriter.expandCodeFor(NewBase, 0, BaseInsertPt); - // Wrap the base in a SCEVUnknown so that ScalarEvolution doesn't try to - // re-analyze it. const SCEV *NewValSCEV = SE->getUnknown(Base); // Always emit the immediate into the same block as the user. @@ -372,8 +405,8 @@ void BasedUser::RewriteInstructionToUseNewBase(const SCEV *const &NewBase, Instruction *NewBasePt, SCEVExpander &Rewriter, Loop *L, Pass *P, - SmallVectorImpl &DeadInsts, - ScalarEvolution *SE) { + LoopInfo &LI, + SmallVectorImpl &DeadInsts) { if (!isa(Inst)) { // By default, insert code at the user instruction. BasicBlock::iterator InsertPt = Inst; @@ -402,7 +435,7 @@ } Value *NewVal = InsertCodeForBaseAtPosition(NewBase, OperandValToReplace->getType(), - Rewriter, InsertPt, SE); + Rewriter, InsertPt, L, LI); // Replace the use of the operand Value with the new Phi we just created. Inst->replaceUsesOfWith(OperandValToReplace, NewVal); @@ -464,7 +497,7 @@ PHIPred->getTerminator() : OldLoc->getParent()->getTerminator(); Code = InsertCodeForBaseAtPosition(NewBase, PN->getType(), - Rewriter, InsertPt, SE); + Rewriter, InsertPt, L, LI); DEBUG(errs() << " Changing PHI use to "); DEBUG(WriteAsOperand(errs(), Code, /*PrintType=*/false)); @@ -940,13 +973,17 @@ const SCEV *const &Stride, IVExpr &IV, const Type *Ty, const std::vector& UsersToProcess) { + if (StrideNoReuse.count(Stride)) + return SE->getIntegerSCEV(0, Stride->getType()); + if (const SCEVConstant *SC = dyn_cast(Stride)) { int64_t SInt = SC->getValue()->getSExtValue(); for (unsigned NewStride = 0, e = IU->StrideOrder.size(); NewStride != e; ++NewStride) { std::map::iterator SI = IVsByStride.find(IU->StrideOrder[NewStride]); - if (SI == IVsByStride.end() || !isa(SI->first)) + if (SI == IVsByStride.end() || !isa(SI->first) || + StrideNoReuse.count(SI->first)) continue; // The other stride has no uses, don't reuse it. std::map::iterator UI = @@ -1705,8 +1742,8 @@ RewriteExpr = SE->getAddExpr(RewriteExpr, SE->getUnknown(BaseV)); User.RewriteInstructionToUseNewBase(RewriteExpr, NewBasePt, - Rewriter, L, this, - DeadInsts, SE); + Rewriter, L, this, *LI, + DeadInsts); // Mark old value we replaced as possibly dead, so that it is eliminated // if we just replaced the last use of that value. @@ -2670,6 +2707,8 @@ bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager &LPM) { IU = &getAnalysis(); + LI = &getAnalysis(); + DT = &getAnalysis(); SE = &getAnalysis(); Changed = false; @@ -2715,15 +2754,16 @@ // After all sharing is done, see if we can adjust the loop to test against // zero instead of counting up to a maximum. This is usually faster. OptimizeLoopCountIV(L); - - // We're done analyzing this loop; release all the state we built up for it. - IVsByStride.clear(); - - // Clean up after ourselves - if (!DeadInsts.empty()) - DeleteTriviallyDeadInstructions(); } + // We're done analyzing this loop; release all the state we built up for it. + IVsByStride.clear(); + StrideNoReuse.clear(); + + // Clean up after ourselves + if (!DeadInsts.empty()) + DeleteTriviallyDeadInstructions(); + // At this point, it is worth checking to see if any recurrence PHIs are also // dead, so that we can remove them as well. DeleteDeadPHIs(L->getHeader()); From anton at korobeynikov.info Thu Dec 17 04:58:44 2009 From: anton at korobeynikov.info (Anton Korobeynikov) Date: Thu, 17 Dec 2009 13:58:44 +0300 Subject: [llvm-commits] [llvm] r91598 - in /llvm/trunk: include/llvm/Analysis/IVUsers.h include/llvm/Analysis/LoopInfo.h lib/Analysis/IVUsers.cpp lib/CodeGen/LLVMTargetMachine.cpp lib/Transforms/Scalar/CodeGenPrepare.cpp lib/Transforms/Scalar/LoopStrength Message-ID: Hello, Evan > +static cl::opt X1("x1"); > +static cl::opt X2("x2"); > +static cl::opt X3("x3"); > +static cl::opt X4("x4"); Late night hacking fallout? :) -- With best regards, Anton Korobeynikov Faculty of Mathematics and Mechanics, Saint Petersburg State University From ken at linux.vnet.ibm.com Thu Dec 17 06:28:06 2009 From: ken at linux.vnet.ibm.com (Ken Werner) Date: Thu, 17 Dec 2009 13:28:06 +0100 Subject: [llvm-commits] [patch] fptr support for PPC64 In-Reply-To: References: <200912151255.29701.ken@linux.vnet.ibm.com> <200912162045.15956.ken@linux.vnet.ibm.com> Message-ID: <200912171328.06756.ken@linux.vnet.ibm.com> On Thursday 17 December 2009 02:22:22 am you wrote: > >> It's preferable to get isPPC64 from PPCSubTarget, as elsewhere > >> (inconsistently). Here that seems to mean an extra parameter. > >> Why did you add a Chain to PPCISD::NOP? > >> Do you have write access? > > > > The upated version of the patch obtains isPPC64 from PPCSubTarget > > Um, no, you changed one of the existing places where it was obtained > otherwise (which is fine), but not the place you added it. > Whoops, my bad. What is the preferred way to get PPCSubTarget from a static method? Thanks -ken From ken.dyck at onsemi.com Thu Dec 17 09:31:53 2009 From: ken.dyck at onsemi.com (Ken Dyck) Date: Thu, 17 Dec 2009 15:31:53 -0000 Subject: [llvm-commits] [llvm] r91602 - /llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Message-ID: <200912171531.nBHFVr8o024174@zion.cs.uiuc.edu> Author: kjdyck Date: Thu Dec 17 09:31:52 2009 New Revision: 91602 URL: http://llvm.org/viewvc/llvm-project?rev=91602&view=rev Log: In LowerEXTRACT_VECTOR_ELT, force an i32 value type for PEXTWR instead of incrementing the simple value type of the 16-bit type, which would give the wrong type if an intemediate MVT (such as i24) were introduced. Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=91602&r1=91601&r2=91602&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original) +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Thu Dec 17 09:31:52 2009 @@ -4584,7 +4584,7 @@ MVT::v4i32, Vec), Op.getOperand(1))); // Transform it so it match pextrw which produces a 32-bit result. - EVT EltVT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy+1); + EVT EltVT = MVT::i32; SDValue Extract = DAG.getNode(X86ISD::PEXTRW, dl, EltVT, Op.getOperand(0), Op.getOperand(1)); SDValue Assert = DAG.getNode(ISD::AssertZext, dl, EltVT, Extract, From Ken.Dyck at onsemi.com Thu Dec 17 10:07:45 2009 From: Ken.Dyck at onsemi.com (Ken Dyck) Date: Thu, 17 Dec 2009 09:07:45 -0700 Subject: [llvm-commits] [Review] Preparing for non-power-of-2 machine value types in X86ISelLowering and LegalizeDAG In-Reply-To: <8FECBCA3-DE6F-42E4-B067-7E234A31FBFF@apple.com> References: <8F2E4A8BCDA0B84DA6C9088EB5B27747CEAF38@NAMAIL.ad.onsemi.com> <8FECBCA3-DE6F-42E4-B067-7E234A31FBFF@apple.com> Message-ID: <8F2E4A8BCDA0B84DA6C9088EB5B27747CEB38A@NAMAIL.ad.onsemi.com> On Thursday, December 17, 2009 3:04 AM, Chris Lattner wrote: > > On Dec 16, 2009, at 7:33 AM, Ken Dyck wrote: > > > The attached patches prepare for the introduction of non-power-of-2 > > machine value types (as recently discussed [1]). They contain no > > functional changes. They merely eliminate assumptions that > > incrementing/decrementing a SimpleValueType doubles/halves its size > > and that all non-power-of-2 types are extended. > > The change in LowerEXTRACT_VECTOR_ELT can be simplified: just > force i32 there, that is the only valid valuetype for PEXTRW. Okay. Committed as 91602. > The legalize change looks reasonable to me, but please change > the for loop to be a while loop (I think it will be clearer) > and make it be a method on EVT instead of inline in the legalizer. In moving it to EVT, the knowledge that the source type is simple is lost and the loop boundaries change. So a for loop seems to make more sense now. What do you think of the attached updated patch? -Ken -------------- next part -------------- A non-text attachment was scrubbed... Name: non-po2.LegalizeDAG.diff Type: application/octet-stream Size: 3044 bytes Desc: non-po2.LegalizeDAG.diff Url : http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20091217/6208f196/attachment.obj From grosbach at apple.com Thu Dec 17 11:18:12 2009 From: grosbach at apple.com (Jim Grosbach) Date: Thu, 17 Dec 2009 17:18:12 -0000 Subject: [llvm-commits] [llvm] r91603 - /llvm/trunk/docs/GettingStarted.html Message-ID: <200912171718.nBHHICut028463@zion.cs.uiuc.edu> Author: grosbach Date: Thu Dec 17 11:18:11 2009 New Revision: 91603 URL: http://llvm.org/viewvc/llvm-project?rev=91603&view=rev Log: Add more detail for getting started on Windows. Patch from jon.forums at gmail.com Modified: llvm/trunk/docs/GettingStarted.html Modified: llvm/trunk/docs/GettingStarted.html URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/docs/GettingStarted.html?rev=91603&r1=91602&r2=91603&view=diff ============================================================================== --- llvm/trunk/docs/GettingStarted.html (original) +++ llvm/trunk/docs/GettingStarted.html Thu Dec 17 11:18:11 2009 @@ -114,13 +114,15 @@
  • Read the documentation.
  • Read the documentation.
  • Remember that you were warned twice about reading the documentation.
  • -
  • Install the llvm-gcc-4.2 front end if you intend to compile C or C++: +
  • Install the llvm-gcc-4.2 front end if you intend to compile C or C++ + (see Install the GCC Front End for details):
    1. cd where-you-want-the-C-front-end-to-live
    2. -
    3. gunzip --stdout llvm-gcc-4.2-version-platform.tar.gz | tar -xvf - -
    4. -
    5. Note: If the binary extension is ".bz" use bunzip2 instead of gunzip.
    6. -
    7. Add llvm-gcc's "bin" directory to your PATH variable.
    8. +
    9. gunzip --stdout llvm-gcc-4.2-version-platform.tar.gz | tar -xvf -
    10. +
    11. install-binutils-binary-from-MinGW (Windows only)
    12. +
    13. Note: If the binary extension is ".bz" use bunzip2 instead of gunzip.
    14. +
    15. Note: On Windows, use 7-Zip or a similar archiving tool.
    16. +
    17. Add llvm-gcc's "bin" directory to your PATH environment variable.
  • Get the LLVM Source Code @@ -774,13 +776,14 @@
    -

    Before configuring and compiling the LLVM suite, you can optionally extract the -LLVM GCC front end from the binary distribution. It is used for running the -llvm-test testsuite and for compiling C/C++ programs. Note that you can optionally -build llvm-gcc yourself after building the +

    Before configuring and compiling the LLVM suite (or if you want to use just the LLVM +GCC front end) you can optionally extract the front end from the binary distribution. +It is used for running the llvm-test testsuite and for compiling C/C++ programs. Note that +you can optionally build llvm-gcc yourself after building the main LLVM repository.

    -

    To install the GCC front end, do the following:

    +

    To install the GCC front end, do the following (on Windows, use an archival tool +like 7-zip that understands gzipped tars):

    1. cd where-you-want-the-front-end-to-live
    2. @@ -788,22 +791,51 @@ -
    -

    Once the binary is uncompressed, you should add a symlink for llvm-gcc and -llvm-g++ to some directory in your path. When you configure LLVM, it will -automatically detect llvm-gcc's presence (if it is in your path) enabling its -use in llvm-test. Note that you can always build or install llvm-gcc at any -pointer after building the main LLVM repository: just reconfigure llvm and +

    Once the binary is uncompressed, if you're using a *nix-based system, add a symlink for +llvm-gcc and llvm-g++ to some directory in your path. If you're using a +Windows-based system, add the bin subdirectory of your front end installation directory +to your PATH environment variable. For example, if you uncompressed the binary to +c:\llvm-gcc, add c:\llvm-gcc\bin to your PATH.

    + +

    If you now want to build LLVM from source, when you configure LLVM, it will +automatically detect llvm-gcc's presence (if it is in your path) enabling its +use in llvm-test. Note that you can always build or install llvm-gcc at any +point after building the main LLVM repository: just reconfigure llvm and llvm-test will pick it up.

    -

    The binary versions of the GCC front end may not suit all of your needs. For -example, the binary distribution may include an old version of a system header -file, not "fix" a header file that needs to be fixed for GCC, or it may be -linked with libraries not available on your system.

    +

    As a convenience for Windows users, the front end binaries for MinGW/x86 include +versions of the required w32api and mingw-runtime binaries. The last remaining step for +Windows users is to simply uncompress the binary binutils package from +MinGW into your front end installation directory. While the +front end installation steps are not quite the same as a typical manual MinGW installation, +they should be similar enough to those who have previously installed MinGW on Windows systems.

    + +

    To install binutils on Windows:

    -

    In cases like these, you may want to try building the GCC front end from source. This is -much easier now than it was in the past.

    +
      +
    1. download GNU Binutils from MinGW Downloads
    2. +
    3. cd where-you-uncompressed-the-front-end
    4. +
    5. uncompress archived binutils directories (not the tar file) into the current directory
    6. +
    + +

    The binary versions of the LLVM GCC front end may not suit all of your needs. For +example, the binary distribution may include an old version of a system header +file, not "fix" a header file that needs to be fixed for GCC, or it may be linked with +libraries not available on your system. In cases like these, you may want to try +building the GCC front end from source. Thankfully, +this is much easier now than it was in the past.

    + +

    We also do not currently support updating of the GCC front end by manually overlaying +newer versions of the w32api and mingw-runtime binary packages that may become available +from MinGW. At this time, it's best to think of the MinGW LLVM GCC front end binary as +a self-contained convenience package that requires Windows users to simply download and +uncompress the GNU Binutils binary package from the MinGW project.

    + +

    Regardless of your platform, if you discover that installing the LLVM GCC front end +binaries is not as easy as previously described, or you would like to suggest improvements, +please let us know how you would like to see things improved by dropping us a note on our +mailing list.

    @@ -1171,7 +1203,6 @@
  • -
    Program Layout From evan.cheng at apple.com Thu Dec 17 12:03:13 2009 From: evan.cheng at apple.com (Evan Cheng) Date: Thu, 17 Dec 2009 18:03:13 -0000 Subject: [llvm-commits] [llvm] r91604 - /llvm/trunk/lib/CodeGen/LLVMTargetMachine.cpp Message-ID: <200912171803.nBHI3DeK030062@zion.cs.uiuc.edu> Author: evancheng Date: Thu Dec 17 12:03:12 2009 New Revision: 91604 URL: http://llvm.org/viewvc/llvm-project?rev=91604&view=rev Log: Remove debugging code. Modified: llvm/trunk/lib/CodeGen/LLVMTargetMachine.cpp Modified: llvm/trunk/lib/CodeGen/LLVMTargetMachine.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/LLVMTargetMachine.cpp?rev=91604&r1=91603&r2=91604&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/LLVMTargetMachine.cpp (original) +++ llvm/trunk/lib/CodeGen/LLVMTargetMachine.cpp Thu Dec 17 12:03:12 2009 @@ -31,10 +31,6 @@ bool EnableFastISel; } -static cl::opt X1("x1"); -static cl::opt X2("x2"); -static cl::opt X3("x3"); -static cl::opt X4("x4"); static cl::opt DisablePostRA("disable-post-ra", cl::Hidden, cl::desc("Disable Post Regalloc")); static cl::opt DisableBranchFold("disable-branch-fold", cl::Hidden, @@ -243,11 +239,6 @@ PM.add(createGVNPass(/*NoPRE=*/false, /*NoLoads=*/true)); } - if (X1) - PM.add(createPrintFunctionPass("\n\n" - "*** Before LSR ***\n", - &errs())); - // Run loop strength reduction before anything else. if (OptLevel != CodeGenOpt::None && !DisableLSR) { PM.add(createLoopStrengthReducePass(getTargetLowering())); @@ -255,11 +246,6 @@ PM.add(createPrintFunctionPass("\n\n*** Code after LSR ***\n", &errs())); } - if (X2) - PM.add(createPrintFunctionPass("\n\n" - "*** After LSR ***\n", - &errs())); - // Turn exception handling constructs into something the code generators can // handle. switch (getMCAsmInfo()->getExceptionHandlingType()) @@ -282,19 +268,9 @@ // Make sure that no unreachable blocks are instruction selected. PM.add(createUnreachableBlockEliminationPass()); - if (X3) - PM.add(createPrintFunctionPass("\n\n" - "*** Before CGP ***\n", - &errs())); - if (OptLevel != CodeGenOpt::None && !DisableCGP) PM.add(createCodeGenPreparePass(getTargetLowering())); - if (X4) - PM.add(createPrintFunctionPass("\n\n" - "*** After CGP ***\n", - &errs())); - PM.add(createStackProtectorPass(getTargetLowering())); if (PrintISelInput) From sabre at nondot.org Thu Dec 17 12:17:58 2009 From: sabre at nondot.org (Chris Lattner) Date: Thu, 17 Dec 2009 18:17:58 -0000 Subject: [llvm-commits] [test-suite] r91605 - /test-suite/trunk/SingleSource/Benchmarks/CoyoteBench/almabench.c Message-ID: <200912171817.nBHIHww9030529@zion.cs.uiuc.edu> Author: lattner Date: Thu Dec 17 12:17:58 2009 New Revision: 91605 URL: http://llvm.org/viewvc/llvm-project?rev=91605&view=rev Log: change this benchmark to actually produce some output, preventing the compiler from DCE'ing major parts of it. This will cause a big spike in the execution time of the test. Modified: test-suite/trunk/SingleSource/Benchmarks/CoyoteBench/almabench.c Modified: test-suite/trunk/SingleSource/Benchmarks/CoyoteBench/almabench.c URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/SingleSource/Benchmarks/CoyoteBench/almabench.c?rev=91605&r1=91604&r2=91605&view=diff ============================================================================== --- test-suite/trunk/SingleSource/Benchmarks/CoyoteBench/almabench.c (original) +++ test-suite/trunk/SingleSource/Benchmarks/CoyoteBench/almabench.c Thu Dec 17 12:17:58 2009 @@ -372,6 +372,8 @@ { planetpv(jd,p,pv); radecdist(pv,position); + + printf("%f %f %f\n", position[0], position[1], position[2]); } } } From sabre at nondot.org Thu Dec 17 12:19:59 2009 From: sabre at nondot.org (Chris Lattner) Date: Thu, 17 Dec 2009 18:19:59 -0000 Subject: [llvm-commits] [test-suite] r91606 - /test-suite/trunk/SingleSource/Benchmarks/CoyoteBench/almabench.c Message-ID: <200912171819.nBHIJxJj030610@zion.cs.uiuc.edu> Author: lattner Date: Thu Dec 17 12:19:59 2009 New Revision: 91606 URL: http://llvm.org/viewvc/llvm-project?rev=91606&view=rev Log: ok, don't make it completely bound by printf time, just print out send of the redundant output at the end. Modified: test-suite/trunk/SingleSource/Benchmarks/CoyoteBench/almabench.c Modified: test-suite/trunk/SingleSource/Benchmarks/CoyoteBench/almabench.c URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/SingleSource/Benchmarks/CoyoteBench/almabench.c?rev=91606&r1=91605&r2=91606&view=diff ============================================================================== --- test-suite/trunk/SingleSource/Benchmarks/CoyoteBench/almabench.c (original) +++ test-suite/trunk/SingleSource/Benchmarks/CoyoteBench/almabench.c Thu Dec 17 12:19:59 2009 @@ -340,7 +340,7 @@ int i, n, p; double jd[2]; double pv[2][3]; - double position[3]; + double position[8][3]; bool ga_testing = false; // do we have verbose output? @@ -371,13 +371,15 @@ for (p = 0; p < 8; ++p) { planetpv(jd,p,pv); - radecdist(pv,position); - - printf("%f %f %f\n", position[0], position[1], position[2]); + radecdist(pv,position[p]); } } } + for (p = 0; p < 8; ++p) + printf("%f %f %f\n", position[p][0], position[p][1], position[p][2]); + + // get final time // report runtime From bob.wilson at apple.com Thu Dec 17 12:34:25 2009 From: bob.wilson at apple.com (Bob Wilson) Date: Thu, 17 Dec 2009 18:34:25 -0000 Subject: [llvm-commits] [llvm] r91607 - in /llvm/trunk: lib/Transforms/Scalar/ScalarReplAggregates.cpp test/Transforms/ScalarRepl/2009-12-11-NeonTypes.ll Message-ID: <200912171834.nBHIYPH7031147@zion.cs.uiuc.edu> Author: bwilson Date: Thu Dec 17 12:34:24 2009 New Revision: 91607 URL: http://llvm.org/viewvc/llvm-project?rev=91607&view=rev Log: Re-revert 91459. It's breaking the x86_64 darwin bootstrap. Removed: llvm/trunk/test/Transforms/ScalarRepl/2009-12-11-NeonTypes.ll Modified: llvm/trunk/lib/Transforms/Scalar/ScalarReplAggregates.cpp Modified: llvm/trunk/lib/Transforms/Scalar/ScalarReplAggregates.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/ScalarReplAggregates.cpp?rev=91607&r1=91606&r2=91607&view=diff ============================================================================== --- llvm/trunk/lib/Transforms/Scalar/ScalarReplAggregates.cpp (original) +++ llvm/trunk/lib/Transforms/Scalar/ScalarReplAggregates.cpp Thu Dec 17 12:34:24 2009 @@ -74,10 +74,6 @@ private: TargetData *TD; - /// DeadInsts - Keep track of instructions we have made dead, so that - /// we can remove them after we are done working. - SmallVector DeadInsts; - /// AllocaInfo - When analyzing uses of an alloca instruction, this captures /// information about the uses. All these fields are initialized to false /// and set to true when something is learned. @@ -106,30 +102,25 @@ int isSafeAllocaToScalarRepl(AllocaInst *AI); - void isSafeForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset, - uint64_t ArrayOffset, AllocaInfo &Info); - void isSafeGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t &Offset, - uint64_t &ArrayOffset, AllocaInfo &Info); - void isSafeMemAccess(AllocaInst *AI, uint64_t Offset, uint64_t ArrayOffset, - uint64_t MemSize, const Type *MemOpType, bool isStore, - AllocaInfo &Info); - bool TypeHasComponent(const Type *T, uint64_t Offset, uint64_t Size); - unsigned FindElementAndOffset(const Type *&T, uint64_t &Offset); + void isSafeUseOfAllocation(Instruction *User, AllocaInst *AI, + AllocaInfo &Info); + void isSafeElementUse(Value *Ptr, bool isFirstElt, AllocaInst *AI, + AllocaInfo &Info); + void isSafeMemIntrinsicOnAllocation(MemIntrinsic *MI, AllocaInst *AI, + unsigned OpNo, AllocaInfo &Info); + void isSafeUseOfBitCastedAllocation(BitCastInst *User, AllocaInst *AI, + AllocaInfo &Info); void DoScalarReplacement(AllocaInst *AI, std::vector &WorkList); - void DeleteDeadInstructions(); void CleanupGEP(GetElementPtrInst *GEP); - void CleanupAllocaUsers(Value *V); + void CleanupAllocaUsers(AllocaInst *AI); AllocaInst *AddNewAlloca(Function &F, const Type *Ty, AllocaInst *Base); - void RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset, - SmallVector &NewElts); - void RewriteBitCast(BitCastInst *BC, AllocaInst *AI, uint64_t Offset, - SmallVector &NewElts); - void RewriteGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t Offset, - SmallVector &NewElts); - void RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst, + void RewriteBitCastUserOfAlloca(Instruction *BCInst, AllocaInst *AI, + SmallVector &NewElts); + + void RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *BCInst, AllocaInst *AI, SmallVector &NewElts); void RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI, @@ -369,37 +360,176 @@ } } - // Now that we have created the new alloca instructions, rewrite all the - // uses of the old alloca. - DeadInsts.push_back(AI); - RewriteForScalarRepl(AI, AI, 0, ElementAllocas); + // Now that we have created the alloca instructions that we want to use, + // expand the getelementptr instructions to use them. + while (!AI->use_empty()) { + Instruction *User = cast(AI->use_back()); + if (BitCastInst *BCInst = dyn_cast(User)) { + RewriteBitCastUserOfAlloca(BCInst, AI, ElementAllocas); + BCInst->eraseFromParent(); + continue; + } + + // Replace: + // %res = load { i32, i32 }* %alloc + // with: + // %load.0 = load i32* %alloc.0 + // %insert.0 insertvalue { i32, i32 } zeroinitializer, i32 %load.0, 0 + // %load.1 = load i32* %alloc.1 + // %insert = insertvalue { i32, i32 } %insert.0, i32 %load.1, 1 + // (Also works for arrays instead of structs) + if (LoadInst *LI = dyn_cast(User)) { + Value *Insert = UndefValue::get(LI->getType()); + for (unsigned i = 0, e = ElementAllocas.size(); i != e; ++i) { + Value *Load = new LoadInst(ElementAllocas[i], "load", LI); + Insert = InsertValueInst::Create(Insert, Load, i, "insert", LI); + } + LI->replaceAllUsesWith(Insert); + LI->eraseFromParent(); + continue; + } - // Now erase any instructions that were made dead while rewriting the alloca. - DeleteDeadInstructions(); + // Replace: + // store { i32, i32 } %val, { i32, i32 }* %alloc + // with: + // %val.0 = extractvalue { i32, i32 } %val, 0 + // store i32 %val.0, i32* %alloc.0 + // %val.1 = extractvalue { i32, i32 } %val, 1 + // store i32 %val.1, i32* %alloc.1 + // (Also works for arrays instead of structs) + if (StoreInst *SI = dyn_cast(User)) { + Value *Val = SI->getOperand(0); + for (unsigned i = 0, e = ElementAllocas.size(); i != e; ++i) { + Value *Extract = ExtractValueInst::Create(Val, i, Val->getName(), SI); + new StoreInst(Extract, ElementAllocas[i], SI); + } + SI->eraseFromParent(); + continue; + } + + GetElementPtrInst *GEPI = cast(User); + // We now know that the GEP is of the form: GEP , 0, + unsigned Idx = + (unsigned)cast(GEPI->getOperand(2))->getZExtValue(); + + assert(Idx < ElementAllocas.size() && "Index out of range?"); + AllocaInst *AllocaToUse = ElementAllocas[Idx]; + + Value *RepValue; + if (GEPI->getNumOperands() == 3) { + // Do not insert a new getelementptr instruction with zero indices, only + // to have it optimized out later. + RepValue = AllocaToUse; + } else { + // We are indexing deeply into the structure, so we still need a + // getelement ptr instruction to finish the indexing. This may be + // expanded itself once the worklist is rerun. + // + SmallVector NewArgs; + NewArgs.push_back(Constant::getNullValue( + Type::getInt32Ty(AI->getContext()))); + NewArgs.append(GEPI->op_begin()+3, GEPI->op_end()); + RepValue = GetElementPtrInst::Create(AllocaToUse, NewArgs.begin(), + NewArgs.end(), "", GEPI); + RepValue->takeName(GEPI); + } + + // If this GEP is to the start of the aggregate, check for memcpys. + if (Idx == 0 && GEPI->hasAllZeroIndices()) + RewriteBitCastUserOfAlloca(GEPI, AI, ElementAllocas); + + // Move all of the users over to the new GEP. + GEPI->replaceAllUsesWith(RepValue); + // Delete the old GEP + GEPI->eraseFromParent(); + } + // Finally, delete the Alloca instruction + AI->eraseFromParent(); NumReplaced++; } -/// DeleteDeadInstructions - Erase instructions on the DeadInstrs list, -/// recursively including all their operands that become trivially dead. -void SROA::DeleteDeadInstructions() { - while (!DeadInsts.empty()) { - Instruction *I = dyn_cast_or_null(DeadInsts.pop_back_val()); - if (I == 0) - continue; - - for (User::op_iterator OI = I->op_begin(), E = I->op_end(); OI != E; ++OI) - if (Instruction *U = dyn_cast(*OI)) { - // Zero out the operand and see if it becomes trivially dead. - *OI = 0; - if (isInstructionTriviallyDead(U)) - DeadInsts.push_back(U); - } +/// isSafeElementUse - Check to see if this use is an allowed use for a +/// getelementptr instruction of an array aggregate allocation. isFirstElt +/// indicates whether Ptr is known to the start of the aggregate. +void SROA::isSafeElementUse(Value *Ptr, bool isFirstElt, AllocaInst *AI, + AllocaInfo &Info) { + for (Value::use_iterator I = Ptr->use_begin(), E = Ptr->use_end(); + I != E; ++I) { + Instruction *User = cast(*I); + switch (User->getOpcode()) { + case Instruction::Load: break; + case Instruction::Store: + // Store is ok if storing INTO the pointer, not storing the pointer + if (User->getOperand(0) == Ptr) return MarkUnsafe(Info); + break; + case Instruction::GetElementPtr: { + GetElementPtrInst *GEP = cast(User); + bool AreAllZeroIndices = isFirstElt; + if (GEP->getNumOperands() > 1 && + (!isa(GEP->getOperand(1)) || + !cast(GEP->getOperand(1))->isZero())) + // Using pointer arithmetic to navigate the array. + return MarkUnsafe(Info); + + // Verify that any array subscripts are in range. + for (gep_type_iterator GEPIt = gep_type_begin(GEP), + E = gep_type_end(GEP); GEPIt != E; ++GEPIt) { + // Ignore struct elements, no extra checking needed for these. + if (isa(*GEPIt)) + continue; - I->eraseFromParent(); + // This GEP indexes an array. Verify that this is an in-range + // constant integer. Specifically, consider A[0][i]. We cannot know that + // the user isn't doing invalid things like allowing i to index an + // out-of-range subscript that accesses A[1]. Because of this, we have + // to reject SROA of any accesses into structs where any of the + // components are variables. + ConstantInt *IdxVal = dyn_cast(GEPIt.getOperand()); + if (!IdxVal) return MarkUnsafe(Info); + + // Are all indices still zero? + AreAllZeroIndices &= IdxVal->isZero(); + + if (const ArrayType *AT = dyn_cast(*GEPIt)) { + if (IdxVal->getZExtValue() >= AT->getNumElements()) + return MarkUnsafe(Info); + } else if (const VectorType *VT = dyn_cast(*GEPIt)) { + if (IdxVal->getZExtValue() >= VT->getNumElements()) + return MarkUnsafe(Info); + } + } + + isSafeElementUse(GEP, AreAllZeroIndices, AI, Info); + if (Info.isUnsafe) return; + break; + } + case Instruction::BitCast: + if (isFirstElt) { + isSafeUseOfBitCastedAllocation(cast(User), AI, Info); + if (Info.isUnsafe) return; + break; + } + DEBUG(errs() << " Transformation preventing inst: " << *User << '\n'); + return MarkUnsafe(Info); + case Instruction::Call: + if (MemIntrinsic *MI = dyn_cast(User)) { + if (isFirstElt) { + isSafeMemIntrinsicOnAllocation(MI, AI, I.getOperandNo(), Info); + if (Info.isUnsafe) return; + break; + } + } + DEBUG(errs() << " Transformation preventing inst: " << *User << '\n'); + return MarkUnsafe(Info); + default: + DEBUG(errs() << " Transformation preventing inst: " << *User << '\n'); + return MarkUnsafe(Info); + } } + return; // All users look ok :) } - + /// AllUsersAreLoads - Return true if all users of this value are loads. static bool AllUsersAreLoads(Value *Ptr) { for (Value::use_iterator I = Ptr->use_begin(), E = Ptr->use_end(); @@ -409,116 +539,72 @@ return true; } -/// isSafeForScalarRepl - Check if instruction I is a safe use with regard to -/// performing scalar replacement of alloca AI. The results are flagged in -/// the Info parameter. Offset and ArrayOffset indicate the position within -/// AI that is referenced by this instruction. -void SROA::isSafeForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset, - uint64_t ArrayOffset, AllocaInfo &Info) { - for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI!=E; ++UI) { - Instruction *User = cast(*UI); - - if (BitCastInst *BC = dyn_cast(User)) { - isSafeForScalarRepl(BC, AI, Offset, ArrayOffset, Info); - } else if (GetElementPtrInst *GEPI = dyn_cast(User)) { - uint64_t GEPArrayOffset = ArrayOffset; - uint64_t GEPOffset = Offset; - isSafeGEP(GEPI, AI, GEPOffset, GEPArrayOffset, Info); - if (!Info.isUnsafe) - isSafeForScalarRepl(GEPI, AI, GEPOffset, GEPArrayOffset, Info); - } else if (MemIntrinsic *MI = dyn_cast(UI)) { - ConstantInt *Length = dyn_cast(MI->getLength()); - if (Length) - isSafeMemAccess(AI, Offset, ArrayOffset, Length->getZExtValue(), 0, - UI.getOperandNo() == 1, Info); - else - MarkUnsafe(Info); - } else if (LoadInst *LI = dyn_cast(User)) { - if (!LI->isVolatile()) { - const Type *LIType = LI->getType(); - isSafeMemAccess(AI, Offset, ArrayOffset, TD->getTypeAllocSize(LIType), - LIType, false, Info); - } else - MarkUnsafe(Info); - } else if (StoreInst *SI = dyn_cast(User)) { - // Store is ok if storing INTO the pointer, not storing the pointer - if (!SI->isVolatile() && SI->getOperand(0) != I) { - const Type *SIType = SI->getOperand(0)->getType(); - isSafeMemAccess(AI, Offset, ArrayOffset, TD->getTypeAllocSize(SIType), - SIType, true, Info); - } else - MarkUnsafe(Info); - } else if (isa(UI)) { - // If one user is DbgInfoIntrinsic then check if all users are - // DbgInfoIntrinsics. - if (OnlyUsedByDbgInfoIntrinsics(I)) { - Info.needsCleanup = true; - return; - } - MarkUnsafe(Info); - } else { - DEBUG(errs() << " Transformation preventing inst: " << *User << '\n'); - MarkUnsafe(Info); - } - if (Info.isUnsafe) return; - } -} +/// isSafeUseOfAllocation - Check if this user is an allowed use for an +/// aggregate allocation. +void SROA::isSafeUseOfAllocation(Instruction *User, AllocaInst *AI, + AllocaInfo &Info) { + if (BitCastInst *C = dyn_cast(User)) + return isSafeUseOfBitCastedAllocation(C, AI, Info); + + if (LoadInst *LI = dyn_cast(User)) + if (!LI->isVolatile()) + return;// Loads (returning a first class aggregrate) are always rewritable + + if (StoreInst *SI = dyn_cast(User)) + if (!SI->isVolatile() && SI->getOperand(0) != AI) + return;// Store is ok if storing INTO the pointer, not storing the pointer + + GetElementPtrInst *GEPI = dyn_cast(User); + if (GEPI == 0) + return MarkUnsafe(Info); -/// isSafeGEP - Check if a GEP instruction can be handled for scalar -/// replacement. It is safe when all the indices are constant, in-bounds -/// references, and when the resulting offset corresponds to an element within -/// the alloca type. The results are flagged in the Info parameter. Upon -/// return, Offset is adjusted as specified by the GEP indices. For the -/// special case of a variable index to a 2-element array, ArrayOffset is set -/// to the array element size. -void SROA::isSafeGEP(GetElementPtrInst *GEPI, AllocaInst *AI, - uint64_t &Offset, uint64_t &ArrayOffset, - AllocaInfo &Info) { - gep_type_iterator GEPIt = gep_type_begin(GEPI), E = gep_type_end(GEPI); - if (GEPIt == E) - return; + gep_type_iterator I = gep_type_begin(GEPI), E = gep_type_end(GEPI); - // The first GEP index must be zero. - if (!isa(GEPIt.getOperand()) || - !cast(GEPIt.getOperand())->isZero()) + // The GEP is not safe to transform if not of the form "GEP , 0, ". + if (I == E || + I.getOperand() != Constant::getNullValue(I.getOperand()->getType())) { return MarkUnsafe(Info); - if (++GEPIt == E) - return; + } + ++I; + if (I == E) return MarkUnsafe(Info); // ran out of GEP indices?? + + bool IsAllZeroIndices = true; + // If the first index is a non-constant index into an array, see if we can // handle it as a special case. - const Type *ArrayEltTy = 0; - if (ArrayOffset == 0 && Offset == 0) { - if (const ArrayType *AT = dyn_cast(*GEPIt)) { - if (!isa(GEPIt.getOperand())) { - uint64_t NumElements = AT->getNumElements(); - - // If this is an array index and the index is not constant, we cannot - // promote... that is unless the array has exactly one or two elements - // in it, in which case we CAN promote it, but we have to canonicalize - // this out if this is the only problem. - if ((NumElements != 1 && NumElements != 2) || !AllUsersAreLoads(GEPI)) - return MarkUnsafe(Info); + if (const ArrayType *AT = dyn_cast(*I)) { + if (!isa(I.getOperand())) { + IsAllZeroIndices = 0; + uint64_t NumElements = AT->getNumElements(); + + // If this is an array index and the index is not constant, we cannot + // promote... that is unless the array has exactly one or two elements in + // it, in which case we CAN promote it, but we have to canonicalize this + // out if this is the only problem. + if ((NumElements == 1 || NumElements == 2) && + AllUsersAreLoads(GEPI)) { Info.needsCleanup = true; - ArrayOffset = TD->getTypeAllocSizeInBits(AT->getElementType()); - ArrayEltTy = AT->getElementType(); - ++GEPIt; + return; // Canonicalization required! } + return MarkUnsafe(Info); } } - + // Walk through the GEP type indices, checking the types that this indexes // into. - for (; GEPIt != E; ++GEPIt) { + for (; I != E; ++I) { // Ignore struct elements, no extra checking needed for these. - if (isa(*GEPIt)) + if (isa(*I)) continue; + + ConstantInt *IdxVal = dyn_cast(I.getOperand()); + if (!IdxVal) return MarkUnsafe(Info); - ConstantInt *IdxVal = dyn_cast(GEPIt.getOperand()); - if (!IdxVal) - return MarkUnsafe(Info); - - if (const ArrayType *AT = dyn_cast(*GEPIt)) { + // Are all indices still zero? + IsAllZeroIndices &= IdxVal->isZero(); + + if (const ArrayType *AT = dyn_cast(*I)) { // This GEP indexes an array. Verify that this is an in-range constant // integer. Specifically, consider A[0][i]. We cannot know that the user // isn't doing invalid things like allowing i to index an out-of-range @@ -526,255 +612,147 @@ // of any accesses into structs where any of the components are variables. if (IdxVal->getZExtValue() >= AT->getNumElements()) return MarkUnsafe(Info); - } else { - const VectorType *VT = dyn_cast(*GEPIt); - assert(VT && "unexpected type in GEP type iterator"); + } else if (const VectorType *VT = dyn_cast(*I)) { if (IdxVal->getZExtValue() >= VT->getNumElements()) return MarkUnsafe(Info); } } - - // All the indices are safe. Now compute the offset due to this GEP and - // check if the alloca has a component element at that offset. - if (ArrayOffset == 0) { - SmallVector Indices(GEPI->op_begin() + 1, GEPI->op_end()); - Offset += TD->getIndexedOffset(GEPI->getPointerOperandType(), - &Indices[0], Indices.size()); - } else { - // Both array elements have the same type, so it suffices to check one of - // them. Copy the GEP indices starting from the array index, but replace - // that variable index with a constant zero. - SmallVector Indices(GEPI->op_begin() + 2, GEPI->op_end()); - Indices[0] = Constant::getNullValue(Type::getInt32Ty(GEPI->getContext())); - const Type *ArrayEltPtr = PointerType::getUnqual(ArrayEltTy); - Offset += TD->getIndexedOffset(ArrayEltPtr, &Indices[0], Indices.size()); - } - if (!TypeHasComponent(AI->getAllocatedType(), Offset, 0)) - MarkUnsafe(Info); -} - -/// isSafeMemAccess - Check if a load/store/memcpy operates on the entire AI -/// alloca or has an offset and size that corresponds to a component element -/// within it. The offset checked here may have been formed from a GEP with a -/// pointer bitcasted to a different type. -void SROA::isSafeMemAccess(AllocaInst *AI, uint64_t Offset, - uint64_t ArrayOffset, uint64_t MemSize, - const Type *MemOpType, bool isStore, - AllocaInfo &Info) { - // Check if this is a load/store of the entire alloca. - if (Offset == 0 && ArrayOffset == 0 && - MemSize == TD->getTypeAllocSize(AI->getAllocatedType())) { - bool UsesAggregateType = (MemOpType == AI->getAllocatedType()); - // This is safe for MemIntrinsics (where MemOpType is 0), integer types - // (which are essentially the same as the MemIntrinsics, especially with - // regard to copying padding between elements), or references using the - // aggregate type of the alloca. - if (!MemOpType || isa(MemOpType) || UsesAggregateType) { - if (!UsesAggregateType) { - if (isStore) - Info.isMemCpyDst = true; - else - Info.isMemCpySrc = true; - } - return; - } - } - // Check if the offset/size correspond to a component within the alloca type. - const Type *T = AI->getAllocatedType(); - if (TypeHasComponent(T, Offset, MemSize) && - (ArrayOffset == 0 || TypeHasComponent(T, Offset + ArrayOffset, MemSize))) - return; - - return MarkUnsafe(Info); + + // If there are any non-simple uses of this getelementptr, make sure to reject + // them. + return isSafeElementUse(GEPI, IsAllZeroIndices, AI, Info); } -/// TypeHasComponent - Return true if T has a component type with the -/// specified offset and size. If Size is zero, do not check the size. -bool SROA::TypeHasComponent(const Type *T, uint64_t Offset, uint64_t Size) { - const Type *EltTy; - uint64_t EltSize; - if (const StructType *ST = dyn_cast(T)) { - const StructLayout *Layout = TD->getStructLayout(ST); - unsigned EltIdx = Layout->getElementContainingOffset(Offset); - EltTy = ST->getContainedType(EltIdx); - EltSize = TD->getTypeAllocSize(EltTy); - Offset -= Layout->getElementOffset(EltIdx); - } else if (const ArrayType *AT = dyn_cast(T)) { - EltTy = AT->getElementType(); - EltSize = TD->getTypeAllocSize(EltTy); - Offset %= EltSize; - } else { - return false; +/// isSafeMemIntrinsicOnAllocation - Check if the specified memory +/// intrinsic can be promoted by SROA. At this point, we know that the operand +/// of the memintrinsic is a pointer to the beginning of the allocation. +void SROA::isSafeMemIntrinsicOnAllocation(MemIntrinsic *MI, AllocaInst *AI, + unsigned OpNo, AllocaInfo &Info) { + // If not constant length, give up. + ConstantInt *Length = dyn_cast(MI->getLength()); + if (!Length) return MarkUnsafe(Info); + + // If not the whole aggregate, give up. + if (Length->getZExtValue() != + TD->getTypeAllocSize(AI->getType()->getElementType())) + return MarkUnsafe(Info); + + // We only know about memcpy/memset/memmove. + if (!isa(MI)) + return MarkUnsafe(Info); + + // Otherwise, we can transform it. Determine whether this is a memcpy/set + // into or out of the aggregate. + if (OpNo == 1) + Info.isMemCpyDst = true; + else { + assert(OpNo == 2); + Info.isMemCpySrc = true; } - if (Offset == 0 && (Size == 0 || EltSize == Size)) - return true; - // Check if the component spans multiple elements. - if (Offset + Size > EltSize) - return false; - return TypeHasComponent(EltTy, Offset, Size); } -/// RewriteForScalarRepl - Alloca AI is being split into NewElts, so rewrite -/// the instruction I, which references it, to use the separate elements. -/// Offset indicates the position within AI that is referenced by this -/// instruction. -void SROA::RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset, - SmallVector &NewElts) { - for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI!=E; ++UI) { - Instruction *User = cast(*UI); +/// isSafeUseOfBitCastedAllocation - Check if all users of this bitcast +/// from an alloca are safe for SROA of that alloca. +void SROA::isSafeUseOfBitCastedAllocation(BitCastInst *BC, AllocaInst *AI, + AllocaInfo &Info) { + for (Value::use_iterator UI = BC->use_begin(), E = BC->use_end(); + UI != E; ++UI) { + if (BitCastInst *BCU = dyn_cast(UI)) { + isSafeUseOfBitCastedAllocation(BCU, AI, Info); + } else if (MemIntrinsic *MI = dyn_cast(UI)) { + isSafeMemIntrinsicOnAllocation(MI, AI, UI.getOperandNo(), Info); + } else if (StoreInst *SI = dyn_cast(UI)) { + if (SI->isVolatile()) + return MarkUnsafe(Info); + + // If storing the entire alloca in one chunk through a bitcasted pointer + // to integer, we can transform it. This happens (for example) when you + // cast a {i32,i32}* to i64* and store through it. This is similar to the + // memcpy case and occurs in various "byval" cases and emulated memcpys. + if (isa(SI->getOperand(0)->getType()) && + TD->getTypeAllocSize(SI->getOperand(0)->getType()) == + TD->getTypeAllocSize(AI->getType()->getElementType())) { + Info.isMemCpyDst = true; + continue; + } + return MarkUnsafe(Info); + } else if (LoadInst *LI = dyn_cast(UI)) { + if (LI->isVolatile()) + return MarkUnsafe(Info); - if (BitCastInst *BC = dyn_cast(User)) { - RewriteBitCast(BC, AI, Offset, NewElts); - } else if (GetElementPtrInst *GEPI = dyn_cast(User)) { - RewriteGEP(GEPI, AI, Offset, NewElts); - } else if (MemIntrinsic *MI = dyn_cast(User)) { - ConstantInt *Length = dyn_cast(MI->getLength()); - uint64_t MemSize = Length->getZExtValue(); - if (Offset == 0 && - MemSize == TD->getTypeAllocSize(AI->getAllocatedType())) - RewriteMemIntrinUserOfAlloca(MI, I, AI, NewElts); - } else if (LoadInst *LI = dyn_cast(User)) { - const Type *LIType = LI->getType(); - if (LIType == AI->getAllocatedType()) { - // Replace: - // %res = load { i32, i32 }* %alloc - // with: - // %load.0 = load i32* %alloc.0 - // %insert.0 insertvalue { i32, i32 } zeroinitializer, i32 %load.0, 0 - // %load.1 = load i32* %alloc.1 - // %insert = insertvalue { i32, i32 } %insert.0, i32 %load.1, 1 - // (Also works for arrays instead of structs) - Value *Insert = UndefValue::get(LIType); - for (unsigned i = 0, e = NewElts.size(); i != e; ++i) { - Value *Load = new LoadInst(NewElts[i], "load", LI); - Insert = InsertValueInst::Create(Insert, Load, i, "insert", LI); - } - LI->replaceAllUsesWith(Insert); - DeadInsts.push_back(LI); - } else if (isa(LIType) && - TD->getTypeAllocSize(LIType) == - TD->getTypeAllocSize(AI->getAllocatedType())) { - // If this is a load of the entire alloca to an integer, rewrite it. - RewriteLoadUserOfWholeAlloca(LI, AI, NewElts); + // If loading the entire alloca in one chunk through a bitcasted pointer + // to integer, we can transform it. This happens (for example) when you + // cast a {i32,i32}* to i64* and load through it. This is similar to the + // memcpy case and occurs in various "byval" cases and emulated memcpys. + if (isa(LI->getType()) && + TD->getTypeAllocSize(LI->getType()) == + TD->getTypeAllocSize(AI->getType()->getElementType())) { + Info.isMemCpySrc = true; + continue; } - } else if (StoreInst *SI = dyn_cast(User)) { - Value *Val = SI->getOperand(0); - const Type *SIType = Val->getType(); - if (SIType == AI->getAllocatedType()) { - // Replace: - // store { i32, i32 } %val, { i32, i32 }* %alloc - // with: - // %val.0 = extractvalue { i32, i32 } %val, 0 - // store i32 %val.0, i32* %alloc.0 - // %val.1 = extractvalue { i32, i32 } %val, 1 - // store i32 %val.1, i32* %alloc.1 - // (Also works for arrays instead of structs) - for (unsigned i = 0, e = NewElts.size(); i != e; ++i) { - Value *Extract = ExtractValueInst::Create(Val, i, Val->getName(), SI); - new StoreInst(Extract, NewElts[i], SI); - } - DeadInsts.push_back(SI); - } else if (isa(SIType) && - TD->getTypeAllocSize(SIType) == - TD->getTypeAllocSize(AI->getAllocatedType())) { - // If this is a store of the entire alloca from an integer, rewrite it. - RewriteStoreUserOfWholeAlloca(SI, AI, NewElts); + return MarkUnsafe(Info); + } else if (isa(UI)) { + // If one user is DbgInfoIntrinsic then check if all users are + // DbgInfoIntrinsics. + if (OnlyUsedByDbgInfoIntrinsics(BC)) { + Info.needsCleanup = true; + return; } + else + MarkUnsafe(Info); } + else { + return MarkUnsafe(Info); + } + if (Info.isUnsafe) return; } } -/// RewriteBitCast - Update a bitcast reference to the alloca being replaced -/// and recursively continue updating all of its uses. -void SROA::RewriteBitCast(BitCastInst *BC, AllocaInst *AI, uint64_t Offset, - SmallVector &NewElts) { - RewriteForScalarRepl(BC, AI, Offset, NewElts); - if (BC->getOperand(0) != AI) - return; +/// RewriteBitCastUserOfAlloca - BCInst (transitively) bitcasts AI, or indexes +/// to its first element. Transform users of the cast to use the new values +/// instead. +void SROA::RewriteBitCastUserOfAlloca(Instruction *BCInst, AllocaInst *AI, + SmallVector &NewElts) { + Value::use_iterator UI = BCInst->use_begin(), UE = BCInst->use_end(); + while (UI != UE) { + Instruction *User = cast(*UI++); + if (BitCastInst *BCU = dyn_cast(User)) { + RewriteBitCastUserOfAlloca(BCU, AI, NewElts); + if (BCU->use_empty()) BCU->eraseFromParent(); + continue; + } - // The bitcast references the original alloca. Replace its uses with - // references to the first new element alloca. - Instruction *Val = NewElts[0]; - if (Val->getType() != BC->getDestTy()) { - Val = new BitCastInst(Val, BC->getDestTy(), "", BC); - Val->takeName(BC); - } - BC->replaceAllUsesWith(Val); - DeadInsts.push_back(BC); -} - -/// FindElementAndOffset - Return the index of the element containing Offset -/// within the specified type, which must be either a struct or an array. -/// Sets T to the type of the element and Offset to the offset within that -/// element. -unsigned SROA::FindElementAndOffset(const Type *&T, uint64_t &Offset) { - unsigned Idx = 0; - if (const StructType *ST = dyn_cast(T)) { - const StructLayout *Layout = TD->getStructLayout(ST); - Idx = Layout->getElementContainingOffset(Offset); - T = ST->getContainedType(Idx); - Offset -= Layout->getElementOffset(Idx); - } else { - const ArrayType *AT = dyn_cast(T); - assert(AT && "unexpected type for scalar replacement"); - T = AT->getElementType(); - uint64_t EltSize = TD->getTypeAllocSize(T); - Idx = (unsigned)(Offset / EltSize); - Offset -= Idx * EltSize; - } - return Idx; -} - -/// RewriteGEP - Check if this GEP instruction moves the pointer across -/// elements of the alloca that are being split apart, and if so, rewrite -/// the GEP to be relative to the new element. -void SROA::RewriteGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t Offset, - SmallVector &NewElts) { - uint64_t OldOffset = Offset; - SmallVector Indices(GEPI->op_begin() + 1, GEPI->op_end()); - Offset += TD->getIndexedOffset(GEPI->getPointerOperandType(), - &Indices[0], Indices.size()); - - RewriteForScalarRepl(GEPI, AI, Offset, NewElts); - - const Type *T = AI->getAllocatedType(); - unsigned OldIdx = FindElementAndOffset(T, OldOffset); - if (GEPI->getOperand(0) == AI) - OldIdx = ~0U; // Force the GEP to be rewritten. - - T = AI->getAllocatedType(); - uint64_t EltOffset = Offset; - unsigned Idx = FindElementAndOffset(T, EltOffset); - - // If this GEP does not move the pointer across elements of the alloca - // being split, then it does not needs to be rewritten. - if (Idx == OldIdx) - return; + if (MemIntrinsic *MI = dyn_cast(User)) { + // This must be memcpy/memmove/memset of the entire aggregate. + // Split into one per element. + RewriteMemIntrinUserOfAlloca(MI, BCInst, AI, NewElts); + continue; + } + + if (StoreInst *SI = dyn_cast(User)) { + // If this is a store of the entire alloca from an integer, rewrite it. + RewriteStoreUserOfWholeAlloca(SI, AI, NewElts); + continue; + } - const Type *i32Ty = Type::getInt32Ty(AI->getContext()); - SmallVector NewArgs; - NewArgs.push_back(Constant::getNullValue(i32Ty)); - while (EltOffset != 0) { - unsigned EltIdx = FindElementAndOffset(T, EltOffset); - NewArgs.push_back(ConstantInt::get(i32Ty, EltIdx)); - } - Instruction *Val = NewElts[Idx]; - if (NewArgs.size() > 1) { - Val = GetElementPtrInst::CreateInBounds(Val, NewArgs.begin(), - NewArgs.end(), "", GEPI); - Val->takeName(GEPI); - } - if (Val->getType() != GEPI->getType()) - Val = new BitCastInst(Val, GEPI->getType(), Val->getNameStr(), GEPI); - GEPI->replaceAllUsesWith(Val); - DeadInsts.push_back(GEPI); + if (LoadInst *LI = dyn_cast(User)) { + // If this is a load of the entire alloca to an integer, rewrite it. + RewriteLoadUserOfWholeAlloca(LI, AI, NewElts); + continue; + } + + // Otherwise it must be some other user of a gep of the first pointer. Just + // leave these alone. + continue; + } } /// RewriteMemIntrinUserOfAlloca - MI is a memcpy/memset/memmove from or to AI. /// Rewrite it to copy or set the elements of the scalarized memory. -void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst, +void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *BCInst, AllocaInst *AI, SmallVector &NewElts) { + // If this is a memcpy/memmove, construct the other pointer as the // appropriate type. The "Other" pointer is the pointer that goes to memory // that doesn't have anything to do with the alloca that we are promoting. For @@ -783,41 +761,28 @@ LLVMContext &Context = MI->getContext(); unsigned MemAlignment = MI->getAlignment(); if (MemTransferInst *MTI = dyn_cast(MI)) { // memmove/memcopy - if (Inst == MTI->getRawDest()) + if (BCInst == MTI->getRawDest()) OtherPtr = MTI->getRawSource(); else { - assert(Inst == MTI->getRawSource()); + assert(BCInst == MTI->getRawSource()); OtherPtr = MTI->getRawDest(); } } + // Keep track of the other intrinsic argument, so it can be removed if it + // is dead when the intrinsic is replaced. + Value *PossiblyDead = OtherPtr; + // If there is an other pointer, we want to convert it to the same pointer // type as AI has, so we can GEP through it safely. if (OtherPtr) { - - // Remove bitcasts and all-zero GEPs from OtherPtr. This is an - // optimization, but it's also required to detect the corner case where - // both pointer operands are referencing the same memory, and where - // OtherPtr may be a bitcast or GEP that currently being rewritten. (This - // function is only called for mem intrinsics that access the whole - // aggregate, so non-zero GEPs are not an issue here.) - while (1) { - if (BitCastInst *BC = dyn_cast(OtherPtr)) { - OtherPtr = BC->getOperand(0); - continue; - } - if (GetElementPtrInst *GEP = dyn_cast(OtherPtr)) { - // All zero GEPs are effectively bitcasts. - if (GEP->hasAllZeroIndices()) { - OtherPtr = GEP->getOperand(0); - continue; - } - } - break; - } - // If OtherPtr has already been rewritten, this intrinsic will be dead. - if (OtherPtr == NewElts[0]) - return; + // It is likely that OtherPtr is a bitcast, if so, remove it. + if (BitCastInst *BC = dyn_cast(OtherPtr)) + OtherPtr = BC->getOperand(0); + // All zero GEPs are effectively bitcasts. + if (GetElementPtrInst *GEP = dyn_cast(OtherPtr)) + if (GEP->hasAllZeroIndices()) + OtherPtr = GEP->getOperand(0); if (ConstantExpr *BCE = dyn_cast(OtherPtr)) if (BCE->getOpcode() == Instruction::BitCast) @@ -833,7 +798,7 @@ // Process each element of the aggregate. Value *TheFn = MI->getOperand(0); const Type *BytePtrTy = MI->getRawDest()->getType(); - bool SROADest = MI->getRawDest() == Inst; + bool SROADest = MI->getRawDest() == BCInst; Constant *Zero = Constant::getNullValue(Type::getInt32Ty(MI->getContext())); @@ -842,15 +807,12 @@ Value *OtherElt = 0; unsigned OtherEltAlign = MemAlignment; - if (OtherPtr == AI) { - OtherElt = NewElts[i]; - OtherEltAlign = 0; - } else if (OtherPtr) { + if (OtherPtr) { Value *Idx[2] = { Zero, ConstantInt::get(Type::getInt32Ty(MI->getContext()), i) }; - OtherElt = GetElementPtrInst::CreateInBounds(OtherPtr, Idx, Idx + 2, + OtherElt = GetElementPtrInst::Create(OtherPtr, Idx, Idx + 2, OtherPtr->getNameStr()+"."+Twine(i), - MI); + MI); uint64_t EltOffset; const PointerType *OtherPtrTy = cast(OtherPtr->getType()); if (const StructType *ST = @@ -962,7 +924,9 @@ CallInst::Create(TheFn, Ops, Ops + 4, "", MI); } } - DeadInsts.push_back(MI); + MI->eraseFromParent(); + if (PossiblyDead) + RecursivelyDeleteTriviallyDeadInstructions(PossiblyDead); } /// RewriteStoreUserOfWholeAlloca - We found a store of an integer that @@ -973,9 +937,15 @@ // Extract each element out of the integer according to its structure offset // and store the element value to the individual alloca. Value *SrcVal = SI->getOperand(0); - const Type *AllocaEltTy = AI->getAllocatedType(); + const Type *AllocaEltTy = AI->getType()->getElementType(); uint64_t AllocaSizeBits = TD->getTypeAllocSizeInBits(AllocaEltTy); + // If this isn't a store of an integer to the whole alloca, it may be a store + // to the first element. Just ignore the store in this case and normal SROA + // will handle it. + if (!isa(SrcVal->getType()) || + TD->getTypeAllocSizeInBits(SrcVal->getType()) != AllocaSizeBits) + return; // Handle tail padding by extending the operand if (TD->getTypeSizeInBits(SrcVal->getType()) != AllocaSizeBits) SrcVal = new ZExtInst(SrcVal, @@ -1080,7 +1050,7 @@ } } - DeadInsts.push_back(SI); + SI->eraseFromParent(); } /// RewriteLoadUserOfWholeAlloca - We found a load of the entire allocation to @@ -1089,9 +1059,16 @@ SmallVector &NewElts) { // Extract each element out of the NewElts according to its structure offset // and form the result value. - const Type *AllocaEltTy = AI->getAllocatedType(); + const Type *AllocaEltTy = AI->getType()->getElementType(); uint64_t AllocaSizeBits = TD->getTypeAllocSizeInBits(AllocaEltTy); + // If this isn't a load of the whole alloca to an integer, it may be a load + // of the first element. Just ignore the load in this case and normal SROA + // will handle it. + if (!isa(LI->getType()) || + TD->getTypeAllocSizeInBits(LI->getType()) != AllocaSizeBits) + return; + DEBUG(errs() << "PROMOTING LOAD OF WHOLE ALLOCA: " << *AI << '\n' << *LI << '\n'); @@ -1162,9 +1139,10 @@ ResultVal = new TruncInst(ResultVal, LI->getType(), "", LI); LI->replaceAllUsesWith(ResultVal); - DeadInsts.push_back(LI); + LI->eraseFromParent(); } + /// HasPadding - Return true if the specified type has any structure or /// alignment padding, false otherwise. static bool HasPadding(const Type *Ty, const TargetData &TD) { @@ -1214,10 +1192,14 @@ // the users are safe to transform. AllocaInfo Info; - isSafeForScalarRepl(AI, AI, 0, 0, Info); - if (Info.isUnsafe) { - DEBUG(errs() << "Cannot transform: " << *AI << '\n'); - return 0; + for (Value::use_iterator I = AI->use_begin(), E = AI->use_end(); + I != E; ++I) { + isSafeUseOfAllocation(cast(*I), AI, Info); + if (Info.isUnsafe) { + DEBUG(errs() << "Cannot transform: " << *AI << "\n due to user: " + << **I << '\n'); + return 0; + } } // Okay, we know all the users are promotable. If the aggregate is a memcpy @@ -1226,7 +1208,7 @@ // types, but may actually be used. In these cases, we refuse to promote the // struct. if (Info.isMemCpySrc && Info.isMemCpyDst && - HasPadding(AI->getAllocatedType(), *TD)) + HasPadding(AI->getType()->getElementType(), *TD)) return 0; // If we require cleanup, return 1, otherwise return 3. @@ -1263,15 +1245,15 @@ // Insert the new GEP instructions, which are properly indexed. SmallVector Indices(GEPI->op_begin()+1, GEPI->op_end()); Indices[1] = Constant::getNullValue(Type::getInt32Ty(GEPI->getContext())); - Value *ZeroIdx = GetElementPtrInst::CreateInBounds(GEPI->getOperand(0), - Indices.begin(), - Indices.end(), - GEPI->getName()+".0",GEPI); + Value *ZeroIdx = GetElementPtrInst::Create(GEPI->getOperand(0), + Indices.begin(), + Indices.end(), + GEPI->getName()+".0", GEPI); Indices[1] = ConstantInt::get(Type::getInt32Ty(GEPI->getContext()), 1); - Value *OneIdx = GetElementPtrInst::CreateInBounds(GEPI->getOperand(0), - Indices.begin(), - Indices.end(), - GEPI->getName()+".1", GEPI); + Value *OneIdx = GetElementPtrInst::Create(GEPI->getOperand(0), + Indices.begin(), + Indices.end(), + GEPI->getName()+".1", GEPI); // Replace all loads of the variable index GEP with loads from both // indexes and a select. while (!GEPI->use_empty()) { @@ -1282,24 +1264,22 @@ LI->replaceAllUsesWith(R); LI->eraseFromParent(); } + GEPI->eraseFromParent(); } + /// CleanupAllocaUsers - If SROA reported that it can promote the specified /// allocation, but only if cleaned up, perform the cleanups required. -void SROA::CleanupAllocaUsers(Value *V) { +void SROA::CleanupAllocaUsers(AllocaInst *AI) { // At this point, we know that the end result will be SROA'd and promoted, so // we can insert ugly code if required so long as sroa+mem2reg will clean it // up. - for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); + for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E; ) { User *U = *UI++; - if (isa(U)) { - CleanupAllocaUsers(U); - } else if (GetElementPtrInst *GEPI = dyn_cast(U)) { + if (GetElementPtrInst *GEPI = dyn_cast(U)) CleanupGEP(GEPI); - CleanupAllocaUsers(GEPI); - if (GEPI->use_empty()) GEPI->eraseFromParent(); - } else { + else { Instruction *I = cast(U); SmallVector DbgInUses; if (!isa(I) && OnlyUsedByDbgInfoIntrinsics(I, &DbgInUses)) { @@ -1415,7 +1395,7 @@ // Compute the offset that this GEP adds to the pointer. SmallVector Indices(GEP->op_begin()+1, GEP->op_end()); - uint64_t GEPOffset = TD->getIndexedOffset(GEP->getPointerOperandType(), + uint64_t GEPOffset = TD->getIndexedOffset(GEP->getOperand(0)->getType(), &Indices[0], Indices.size()); // See if all uses can be converted. if (!CanConvertToScalar(GEP, IsNotTrivial, VecTy, SawVec,Offset+GEPOffset, @@ -1477,7 +1457,7 @@ if (GetElementPtrInst *GEP = dyn_cast(User)) { // Compute the offset that this GEP adds to the pointer. SmallVector Indices(GEP->op_begin()+1, GEP->op_end()); - uint64_t GEPOffset = TD->getIndexedOffset(GEP->getPointerOperandType(), + uint64_t GEPOffset = TD->getIndexedOffset(GEP->getOperand(0)->getType(), &Indices[0], Indices.size()); ConvertUsesToScalar(GEP, NewAI, Offset+GEPOffset*8); GEP->eraseFromParent(); Removed: llvm/trunk/test/Transforms/ScalarRepl/2009-12-11-NeonTypes.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/ScalarRepl/2009-12-11-NeonTypes.ll?rev=91606&view=auto ============================================================================== --- llvm/trunk/test/Transforms/ScalarRepl/2009-12-11-NeonTypes.ll (original) +++ llvm/trunk/test/Transforms/ScalarRepl/2009-12-11-NeonTypes.ll (removed) @@ -1,89 +0,0 @@ -; RUN: opt < %s -scalarrepl -S | FileCheck %s -; Radar 7441282 - -target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32" -target triple = "thumbv7-apple-darwin10" - -%struct.__neon_int16x8x2_t = type { <8 x i16>, <8 x i16> } -%struct.int16x8_t = type { <8 x i16> } -%struct.int16x8x2_t = type { [2 x %struct.int16x8_t] } -%union..0anon = type { %struct.int16x8x2_t } - -define arm_apcscc void @test(<8 x i16> %tmp.0, %struct.int16x8x2_t* %dst) nounwind { -; CHECK: @test -; CHECK-NOT: alloca -; CHECK: "alloca point" -entry: - %tmp_addr = alloca %struct.int16x8_t ; <%struct.int16x8_t*> [#uses=3] - %dst_addr = alloca %struct.int16x8x2_t* ; <%struct.int16x8x2_t**> [#uses=2] - %__rv = alloca %union..0anon ; <%union..0anon*> [#uses=2] - %__bx = alloca %struct.int16x8_t ; <%struct.int16x8_t*> [#uses=2] - %__ax = alloca %struct.int16x8_t ; <%struct.int16x8_t*> [#uses=2] - %tmp2 = alloca %struct.int16x8x2_t ; <%struct.int16x8x2_t*> [#uses=2] - %0 = alloca %struct.int16x8x2_t ; <%struct.int16x8x2_t*> [#uses=2] - %"alloca point" = bitcast i32 0 to i32 ; [#uses=0] - %1 = getelementptr inbounds %struct.int16x8_t* %tmp_addr, i32 0, i32 0 ; <<8 x i16>*> [#uses=1] - store <8 x i16> %tmp.0, <8 x i16>* %1 - store %struct.int16x8x2_t* %dst, %struct.int16x8x2_t** %dst_addr - %2 = getelementptr inbounds %struct.int16x8_t* %__ax, i32 0, i32 0 ; <<8 x i16>*> [#uses=1] - %3 = getelementptr inbounds %struct.int16x8_t* %tmp_addr, i32 0, i32 0 ; <<8 x i16>*> [#uses=1] - %4 = load <8 x i16>* %3, align 16 ; <<8 x i16>> [#uses=1] - store <8 x i16> %4, <8 x i16>* %2, align 16 - %5 = getelementptr inbounds %struct.int16x8_t* %__bx, i32 0, i32 0 ; <<8 x i16>*> [#uses=1] - %6 = getelementptr inbounds %struct.int16x8_t* %tmp_addr, i32 0, i32 0 ; <<8 x i16>*> [#uses=1] - %7 = load <8 x i16>* %6, align 16 ; <<8 x i16>> [#uses=1] - store <8 x i16> %7, <8 x i16>* %5, align 16 - %8 = getelementptr inbounds %struct.int16x8_t* %__ax, i32 0, i32 0 ; <<8 x i16>*> [#uses=1] - %9 = load <8 x i16>* %8, align 16 ; <<8 x i16>> [#uses=2] - %10 = getelementptr inbounds %struct.int16x8_t* %__bx, i32 0, i32 0 ; <<8 x i16>*> [#uses=1] - %11 = load <8 x i16>* %10, align 16 ; <<8 x i16>> [#uses=2] - %12 = getelementptr inbounds %union..0anon* %__rv, i32 0, i32 0 ; <%struct.int16x8x2_t*> [#uses=1] - %13 = bitcast %struct.int16x8x2_t* %12 to %struct.__neon_int16x8x2_t* ; <%struct.__neon_int16x8x2_t*> [#uses=2] - %14 = shufflevector <8 x i16> %9, <8 x i16> %11, <8 x i32> ; <<8 x i16>> [#uses=1] - %15 = getelementptr inbounds %struct.__neon_int16x8x2_t* %13, i32 0, i32 0 ; <<8 x i16>*> [#uses=1] - store <8 x i16> %14, <8 x i16>* %15 - %16 = shufflevector <8 x i16> %9, <8 x i16> %11, <8 x i32> ; <<8 x i16>> [#uses=1] - %17 = getelementptr inbounds %struct.__neon_int16x8x2_t* %13, i32 0, i32 1 ; <<8 x i16>*> [#uses=1] - store <8 x i16> %16, <8 x i16>* %17 - %18 = getelementptr inbounds %union..0anon* %__rv, i32 0, i32 0 ; <%struct.int16x8x2_t*> [#uses=1] - %19 = bitcast %struct.int16x8x2_t* %0 to i8* ; [#uses=1] - %20 = bitcast %struct.int16x8x2_t* %18 to i8* ; [#uses=1] - call void @llvm.memcpy.i32(i8* %19, i8* %20, i32 32, i32 16) - %tmp21 = bitcast %struct.int16x8x2_t* %tmp2 to i8* ; [#uses=1] - %21 = bitcast %struct.int16x8x2_t* %0 to i8* ; [#uses=1] - call void @llvm.memcpy.i32(i8* %tmp21, i8* %21, i32 32, i32 16) - %22 = load %struct.int16x8x2_t** %dst_addr, align 4 ; <%struct.int16x8x2_t*> [#uses=1] - %23 = bitcast %struct.int16x8x2_t* %22 to i8* ; [#uses=1] - %tmp22 = bitcast %struct.int16x8x2_t* %tmp2 to i8* ; [#uses=1] - call void @llvm.memcpy.i32(i8* %23, i8* %tmp22, i32 32, i32 16) - br label %return - -; CHECK: store <8 x i16> -; CHECK: store <8 x i16> - -return: ; preds = %entry - ret void -} - -; Radar 7466574 -%struct._NSRange = type { i64 } - -define arm_apcscc void @test_memcpy_self() nounwind { -; CHECK: @test_memcpy_self -; CHECK-NOT: alloca -; CHECK: br i1 -entry: - %range = alloca %struct._NSRange ; <%struct._NSRange*> [#uses=2] - br i1 undef, label %cond.true, label %cond.false - -cond.true: ; preds = %entry - %tmp3 = bitcast %struct._NSRange* %range to i8* ; [#uses=1] - %tmp4 = bitcast %struct._NSRange* %range to i8* ; [#uses=1] - call void @llvm.memcpy.i32(i8* %tmp3, i8* %tmp4, i32 8, i32 8) - ret void - -cond.false: ; preds = %entry - ret void -} - -declare void @llvm.memcpy.i32(i8* nocapture, i8* nocapture, i32, i32) nounwind From echristo at apple.com Thu Dec 17 13:07:20 2009 From: echristo at apple.com (Eric Christopher) Date: Thu, 17 Dec 2009 19:07:20 -0000 Subject: [llvm-commits] [llvm] r91609 - /llvm/trunk/include/llvm/ADT/SmallVector.h Message-ID: <200912171907.nBHJ7Kqj032155@zion.cs.uiuc.edu> Author: echristo Date: Thu Dec 17 13:07:19 2009 New Revision: 91609 URL: http://llvm.org/viewvc/llvm-project?rev=91609&view=rev Log: Fix unused variable warning. Modified: llvm/trunk/include/llvm/ADT/SmallVector.h Modified: llvm/trunk/include/llvm/ADT/SmallVector.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/ADT/SmallVector.h?rev=91609&r1=91608&r2=91609&view=diff ============================================================================== --- llvm/trunk/include/llvm/ADT/SmallVector.h (original) +++ llvm/trunk/include/llvm/ADT/SmallVector.h Thu Dec 17 13:07:19 2009 @@ -232,7 +232,7 @@ SmallVectorTemplateBase(size_t Size) : SmallVectorTemplateCommon(Size) {} // No need to do a destroy loop for POD's. - static void destroy_range(T *S, T *E) {} + static void destroy_range(T *, T *) {} /// uninitialized_copy - Copy the range [I, E) onto the uninitialized memory /// starting with "Dest", constructing elements into it as needed. From clattner at apple.com Thu Dec 17 13:24:55 2009 From: clattner at apple.com (Chris Lattner) Date: Thu, 17 Dec 2009 11:24:55 -0800 Subject: [llvm-commits] [Review] Preparing for non-power-of-2 machine value types in X86ISelLowering and LegalizeDAG In-Reply-To: <8F2E4A8BCDA0B84DA6C9088EB5B27747CEB38A@NAMAIL.ad.onsemi.com> References: <8F2E4A8BCDA0B84DA6C9088EB5B27747CEAF38@NAMAIL.ad.onsemi.com> <8FECBCA3-DE6F-42E4-B067-7E234A31FBFF@apple.com> <8F2E4A8BCDA0B84DA6C9088EB5B27747CEB38A@NAMAIL.ad.onsemi.com> Message-ID: On Dec 17, 2009, at 8:07 AM, Ken Dyck wrote: > On Thursday, December 17, 2009 3:04 AM, Chris Lattner wrote: >> >> On Dec 16, 2009, at 7:33 AM, Ken Dyck wrote: >> >>> The attached patches prepare for the introduction of non-power-of-2 >>> machine value types (as recently discussed [1]). They contain no >>> functional changes. They merely eliminate assumptions that >>> incrementing/decrementing a SimpleValueType doubles/halves its size >>> and that all non-power-of-2 types are extended. >> >> The change in LowerEXTRACT_VECTOR_ELT can be simplified: just >> force i32 there, that is the only valid valuetype for PEXTRW. > > Okay. Committed as 91602. > >> The legalize change looks reasonable to me, but please change >> the for loop to be a while loop (I think it will be clearer) >> and make it be a method on EVT instead of inline in the legalizer. > > In moving it to EVT, the knowledge that the source type is simple is > lost and the loop boundaries change. So a for loop seems to make more > sense now. What do you think of the attached updated patch? Looks good to me, thanks Ken! -Chris From clattner at apple.com Thu Dec 17 13:25:46 2009 From: clattner at apple.com (Chris Lattner) Date: Thu, 17 Dec 2009 11:25:46 -0800 Subject: [llvm-commits] [PATCH] Fix a memory leak in OpaqueType (issue180073) In-Reply-To: <00163630ead9316d10047ae12316@google.com> References: <00163630ead9316d10047ae12316@google.com> Message-ID: <3A8560EC-89BA-4AC4-83EE-52D5114755F2@apple.com> On Dec 16, 2009, at 3:48 PM, collinwinter at google.com wrote: > Reviewers: , > > Message: > Please take a look. > > Description: > This fixes a memory leak found by Google's internal heapchecker. > LLVM/Clang tests pass. > > Please review this at http://codereview.appspot.com/180073 Looks great to me, please apply. Nice catch! -Chris From clattner at apple.com Thu Dec 17 13:26:59 2009 From: clattner at apple.com (Chris Lattner) Date: Thu, 17 Dec 2009 11:26:59 -0800 Subject: [llvm-commits] [PATCH] Fix http://llvm.org/PR5735: available_externally codegen In-Reply-To: References: Message-ID: <48984198-AC74-4973-899D-F9FE5ADA7852@apple.com> On Dec 14, 2009, at 5:25 PM, Jeffrey Yasskin wrote: > Patch at http://llvm.org/bugs/attachment.cgi?id=3948 or > http://codereview.appspot.com/179048. Seems reasonable to me, though I'm not a great expert in this area anymore. -Chris From clattner at apple.com Thu Dec 17 13:38:33 2009 From: clattner at apple.com (Chris Lattner) Date: Thu, 17 Dec 2009 11:38:33 -0800 Subject: [llvm-commits] [llvm] r91392 - in /llvm/trunk: include/llvm/CodeGen/SelectionDAG.h lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp lib/CodeGen/SelectionDAG/SelectionDAG.cpp lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp In-Reply-To: <200912150154.nBF1spun028126@zion.cs.uiuc.edu> References: <200912150154.nBF1spun028126@zion.cs.uiuc.edu> Message-ID: <36ED7EB5-3000-4621-80A1-3B5EC80E965C@apple.com> On Dec 14, 2009, at 5:54 PM, Bill Wendling wrote: > Author: void > Date: Mon Dec 14 19:54:51 2009 > New Revision: 91392 > > URL: http://llvm.org/viewvc/llvm-project?rev=91392&view=rev > Log: > Initial work on disabling the scheduler. This is a work in progress, and this > stuff isn't used just yet. Ok, cool. Here are some comments out of order. > +++ llvm/trunk/include/llvm/CodeGen/SelectionDAG.h Mon Dec 14 19:54:51 2009 > @@ -110,6 +110,46 @@ > /// SelectionDAG. > BumpPtrAllocator Allocator; > > + /// NodeOrdering - Assigns a "line number" value to each SDNode that > + /// corresponds to the "line number" of the original LLVM instruction. This > + /// used for turning off scheduling, because we'll forgo the normal scheduling > + /// algorithm and output the instructions according to this ordering. > + class NodeOrdering { SelectionDAG.h already has a ton of stuff in it. Please pull this out into its own SDNodeOrdering.h file. SelectionDAG.h can just forward declare the class. > + /// LineNo - The line of the instruction the node corresponds to. A value of > + /// `0' means it's not assigned. > + unsigned LineNo; I don't understand how tis is a "line number". It seems like these are really unique instruction ID's? Does this have any correspondence at all to source line numbers? > + std::map Order; This should use DenseMap. > + /// NewInst - Tell the ordering object that we're processing a new > + /// instruction. > + void NewInst() { > + if (Ordering) > + Ordering->newInst(); > + } This should be in SelectionDAGBuilder, not SelectionDAG. Likewise, the "current instruction" state should be split out from the NodeOrdering class. > ++ public: > + NodeOrdering() : LineNo(0) {} > + > + void add(const SDNode *Node) { > + assert(LineNo && "Invalid line number!"); > + Order[Node] = LineNo; > + } > + void remove(const SDNode *Node) { > + std::map::iterator Itr = Order.find(Node); > + if (Itr != Order.end()) > + Order.erase(Itr); > + } > + void clear() { > + Order.clear(); > + LineNo = 1; > + } > + unsigned getLineNo(const SDNode *Node) { > + unsigned LN = Order[Node]; > + assert(LN && "Node isn't in ordering map!"); > + return LN; > + } > + void newInst() { > + ++LineNo; > + } As above, I don't like the style of this API: you're mixing the construction of the datastructure with the storage of it. SDBuilder should just have the unsigned counter that it maintains, and this class should just be a thin wrapper around a densemap. > +++ llvm/trunk/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp Mon Dec 14 19:54:51 2009 > @@ -20,10 +20,16 @@ > #include "llvm/Target/TargetInstrInfo.h" > #include "llvm/Target/TargetRegisterInfo.h" > #include "llvm/Target/TargetSubtarget.h" > +#include "llvm/Support/CommandLine.h" > #include "llvm/Support/Debug.h" > #include "llvm/Support/raw_ostream.h" > using namespace llvm; > > +cl::opt > +DisableInstScheduling("disable-inst-scheduling", > + cl::init(false), > + cl::desc("Disable instruction scheduling")); This be in TargetOptions.h, not a command line option. > +++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp Mon Dec 14 19:54:51 2009@@ -778,8 +795,13 @@ > @@ -877,14 +904,17 @@ > ID.AddPointer(&Val); > void *IP = 0; > SDNode *N = NULL; > - if ((N = CSEMap.FindNodeOrInsertPos(ID, IP))) > + if ((N = CSEMap.FindNodeOrInsertPos(ID, IP))) { > + if (Ordering) Ordering->add(N); > if (!VT.isVector()) > return SDValue(N, 0); > + } > if (!N) { > N = NodeAllocator.Allocate(); > new (N) ConstantSDNode(isT, &Val, EltVT); > CSEMap.InsertNode(N, IP); > AllNodes.push_back(N); > + if (Ordering) Ordering->add(N); I don't think that this is the right layer to do this at. The "Ordering" of nodes is only defined at Builder time, not in general when instructions are randomly created by other parts of SD machinery. The various calls to Ordering->add should only happen from the builder. OTOH, calls to *remove* a node from the ordering *should* happen from the common SD code when the node is about to be deleted. > +void SelectionDAG::NodeOrdering::dump() const { > +} Please implement or remove this. -Chris From clattner at apple.com Thu Dec 17 13:41:15 2009 From: clattner at apple.com (Chris Lattner) Date: Thu, 17 Dec 2009 11:41:15 -0800 Subject: [llvm-commits] [llvm] r91273 - in /llvm/trunk: include/llvm/CodeGen/CalcSpillWeights.h lib/CodeGen/CalcSpillWeights.cpp lib/CodeGen/PreAllocSplitting.cpp lib/CodeGen/RegAllocLinearScan.cpp lib/CodeGen/RegAllocPBQP.cpp lib/CodeGen/SimpleRegisterCoalescing.cpp lib/CodeGen/SimpleRegisterCoalescing.h In-Reply-To: <200912140649.nBE6ngqC000877@zion.cs.uiuc.edu> References: <200912140649.nBE6ngqC000877@zion.cs.uiuc.edu> Message-ID: On Dec 13, 2009, at 10:49 PM, Lang Hames wrote: > Author: lhames > Date: Mon Dec 14 00:49:42 2009 > New Revision: 91273 > > URL: http://llvm.org/viewvc/llvm-project?rev=91273&view=rev > Log: > Moved spill weight calculation out of SimpleRegisterCoalescing and into its own pass: CalculateSpillWeights. Hi Lang, I'm all for better factoring! However, does this really make sense to be a pass? Wouldn't it make more sense to just be an object that the various register allocators can hold? -Chris > > Added: > llvm/trunk/include/llvm/CodeGen/CalcSpillWeights.h > llvm/trunk/lib/CodeGen/CalcSpillWeights.cpp > Modified: > llvm/trunk/lib/CodeGen/PreAllocSplitting.cpp > llvm/trunk/lib/CodeGen/RegAllocLinearScan.cpp > llvm/trunk/lib/CodeGen/RegAllocPBQP.cpp > llvm/trunk/lib/CodeGen/SimpleRegisterCoalescing.cpp > llvm/trunk/lib/CodeGen/SimpleRegisterCoalescing.h > > Added: llvm/trunk/include/llvm/CodeGen/CalcSpillWeights.h > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/CalcSpillWeights.h?rev=91273&view=auto > > ============================================================================== > --- llvm/trunk/include/llvm/CodeGen/CalcSpillWeights.h (added) > +++ llvm/trunk/include/llvm/CodeGen/CalcSpillWeights.h Mon Dec 14 00:49:42 2009 > @@ -0,0 +1,39 @@ > +//===---------------- lib/CodeGen/CalcSpillWeights.h ------------*- C++ -*-===// > +// > +// The LLVM Compiler Infrastructure > +// > +// This file is distributed under the University of Illinois Open Source > +// License. See LICENSE.TXT for details. > +// > +//===----------------------------------------------------------------------===// > + > + > +#ifndef LLVM_CODEGEN_CALCSPILLWEIGHTS_H > +#define LLVM_CODEGEN_CALCSPILLWEIGHTS_H > + > +#include "llvm/CodeGen/MachineFunctionPass.h" > + > +namespace llvm { > + > + class LiveInterval; > + > + /// CalculateSpillWeights - Compute spill weights for all virtual register > + /// live intervals. > + class CalculateSpillWeights : public MachineFunctionPass { > + public: > + static char ID; > + > + CalculateSpillWeights() : MachineFunctionPass(&ID) {} > + > + virtual void getAnalysisUsage(AnalysisUsage &au) const; > + > + virtual bool runOnMachineFunction(MachineFunction &fn); > + > + private: > + /// Returns true if the given live interval is zero length. > + bool isZeroLengthInterval(LiveInterval *li) const; > + }; > + > +} > + > +#endif // LLVM_CODEGEN_CALCSPILLWEIGHTS_H > > Added: llvm/trunk/lib/CodeGen/CalcSpillWeights.cpp > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/CalcSpillWeights.cpp?rev=91273&view=auto > > ============================================================================== > --- llvm/trunk/lib/CodeGen/CalcSpillWeights.cpp (added) > +++ llvm/trunk/lib/CodeGen/CalcSpillWeights.cpp Mon Dec 14 00:49:42 2009 > @@ -0,0 +1,154 @@ > +//===------------------------ CalcSpillWeights.cpp ------------------------===// > +// > +// The LLVM Compiler Infrastructure > +// > +// This file is distributed under the University of Illinois Open Source > +// License. See LICENSE.TXT for details. > +// > +//===----------------------------------------------------------------------===// > + > +#define DEBUG_TYPE "calcspillweights" > + > +#include "llvm/Function.h" > +#include "llvm/ADT/SmallSet.h" > +#include "llvm/CodeGen/CalcSpillWeights.h" > +#include "llvm/CodeGen/LiveIntervalAnalysis.h" > +#include "llvm/CodeGen/MachineFunction.h" > +#include "llvm/CodeGen/MachineLoopInfo.h" > +#include "llvm/CodeGen/MachineRegisterInfo.h" > +#include "llvm/CodeGen/SlotIndexes.h" > +#include "llvm/Support/Debug.h" > +#include "llvm/Support/raw_ostream.h" > +#include "llvm/Target/TargetInstrInfo.h" > +#include "llvm/Target/TargetRegisterInfo.h" > + > +using namespace llvm; > + > +char CalculateSpillWeights::ID = 0; > +static RegisterPass X("calcspillweights", > + "Calculate spill weights"); > + > +void CalculateSpillWeights::getAnalysisUsage(AnalysisUsage &au) const { > + au.addRequired(); > + au.addRequired(); > + au.setPreservesAll(); > + MachineFunctionPass::getAnalysisUsage(au); > +} > + > +bool CalculateSpillWeights::runOnMachineFunction(MachineFunction &fn) { > + > + DEBUG(errs() << "********** Compute Spill Weights **********\n" > + << "********** Function: " > + << fn.getFunction()->getName() << '\n'); > + > + LiveIntervals *lis = &getAnalysis(); > + MachineLoopInfo *loopInfo = &getAnalysis(); > + const TargetInstrInfo *tii = fn.getTarget().getInstrInfo(); > + MachineRegisterInfo *mri = &fn.getRegInfo(); > + > + SmallSet processed; > + for (MachineFunction::iterator mbbi = fn.begin(), mbbe = fn.end(); > + mbbi != mbbe; ++mbbi) { > + MachineBasicBlock* mbb = mbbi; > + SlotIndex mbbEnd = lis->getMBBEndIdx(mbb); > + MachineLoop* loop = loopInfo->getLoopFor(mbb); > + unsigned loopDepth = loop ? loop->getLoopDepth() : 0; > + bool isExiting = loop ? loop->isLoopExiting(mbb) : false; > + > + for (MachineBasicBlock::const_iterator mii = mbb->begin(), mie = mbb->end(); > + mii != mie; ++mii) { > + const MachineInstr *mi = mii; > + if (tii->isIdentityCopy(*mi)) > + continue; > + > + if (mi->getOpcode() == TargetInstrInfo::IMPLICIT_DEF) > + continue; > + > + for (unsigned i = 0, e = mi->getNumOperands(); i != e; ++i) { > + const MachineOperand &mopi = mi->getOperand(i); > + if (!mopi.isReg() || mopi.getReg() == 0) > + continue; > + unsigned reg = mopi.getReg(); > + if (!TargetRegisterInfo::isVirtualRegister(mopi.getReg())) > + continue; > + // Multiple uses of reg by the same instruction. It should not > + // contribute to spill weight again. > + if (!processed.insert(reg)) > + continue; > + > + bool hasDef = mopi.isDef(); > + bool hasUse = !hasDef; > + for (unsigned j = i+1; j != e; ++j) { > + const MachineOperand &mopj = mi->getOperand(j); > + if (!mopj.isReg() || mopj.getReg() != reg) > + continue; > + hasDef |= mopj.isDef(); > + hasUse |= mopj.isUse(); > + if (hasDef && hasUse) > + break; > + } > + > + LiveInterval ®Int = lis->getInterval(reg); > + float weight = lis->getSpillWeight(hasDef, hasUse, loopDepth); > + if (hasDef && isExiting) { > + // Looks like this is a loop count variable update. > + SlotIndex defIdx = lis->getInstructionIndex(mi).getDefIndex(); > + const LiveRange *dlr = > + lis->getInterval(reg).getLiveRangeContaining(defIdx); > + if (dlr->end > mbbEnd) > + weight *= 3.0F; > + } > + regInt.weight += weight; > + } > + processed.clear(); > + } > + } > + > + for (LiveIntervals::iterator I = lis->begin(), E = lis->end(); I != E; ++I) { > + LiveInterval &li = *I->second; > + if (TargetRegisterInfo::isVirtualRegister(li.reg)) { > + // If the live interval length is essentially zero, i.e. in every live > + // range the use follows def immediately, it doesn't make sense to spill > + // it and hope it will be easier to allocate for this li. > + if (isZeroLengthInterval(&li)) { > + li.weight = HUGE_VALF; > + continue; > + } > + > + bool isLoad = false; > + SmallVector spillIs; > + if (lis->isReMaterializable(li, spillIs, isLoad)) { > + // If all of the definitions of the interval are re-materializable, > + // it is a preferred candidate for spilling. If non of the defs are > + // loads, then it's potentially very cheap to re-materialize. > + // FIXME: this gets much more complicated once we support non-trivial > + // re-materialization. > + if (isLoad) > + li.weight *= 0.9F; > + else > + li.weight *= 0.5F; > + } > + > + // Slightly prefer live interval that has been assigned a preferred reg. > + std::pair Hint = mri->getRegAllocationHint(li.reg); > + if (Hint.first || Hint.second) > + li.weight *= 1.01F; > + > + // Divide the weight of the interval by its size. This encourages > + // spilling of intervals that are large and have few uses, and > + // discourages spilling of small intervals with many uses. > + li.weight /= lis->getApproximateInstructionCount(li) * SlotIndex::NUM; > + } > + } > + > + return false; > +} > + > +/// Returns true if the given live interval is zero length. > +bool CalculateSpillWeights::isZeroLengthInterval(LiveInterval *li) const { > + for (LiveInterval::Ranges::const_iterator > + i = li->ranges.begin(), e = li->ranges.end(); i != e; ++i) > + if (i->end.getPrevIndex() > i->start) > + return false; > + return true; > +} > > Modified: llvm/trunk/lib/CodeGen/PreAllocSplitting.cpp > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/PreAllocSplitting.cpp?rev=91273&r1=91272&r2=91273&view=diff > > ============================================================================== > --- llvm/trunk/lib/CodeGen/PreAllocSplitting.cpp (original) > +++ llvm/trunk/lib/CodeGen/PreAllocSplitting.cpp Mon Dec 14 00:49:42 2009 > @@ -16,6 +16,7 @@ > > #define DEBUG_TYPE "pre-alloc-split" > #include "VirtRegMap.h" > +#include "llvm/CodeGen/CalcSpillWeights.h" > #include "llvm/CodeGen/LiveIntervalAnalysis.h" > #include "llvm/CodeGen/LiveStackAnalysis.h" > #include "llvm/CodeGen/MachineDominators.h" > @@ -104,6 +105,7 @@ > AU.addRequired(); > AU.addPreserved(); > AU.addPreserved(); > + AU.addPreserved(); > if (StrongPHIElim) > AU.addPreservedID(StrongPHIEliminationID); > else > > Modified: llvm/trunk/lib/CodeGen/RegAllocLinearScan.cpp > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/RegAllocLinearScan.cpp?rev=91273&r1=91272&r2=91273&view=diff > > ============================================================================== > --- llvm/trunk/lib/CodeGen/RegAllocLinearScan.cpp (original) > +++ llvm/trunk/lib/CodeGen/RegAllocLinearScan.cpp Mon Dec 14 00:49:42 2009 > @@ -16,6 +16,7 @@ > #include "VirtRegRewriter.h" > #include "Spiller.h" > #include "llvm/Function.h" > +#include "llvm/CodeGen/CalcSpillWeights.h" > #include "llvm/CodeGen/LiveIntervalAnalysis.h" > #include "llvm/CodeGen/LiveStackAnalysis.h" > #include "llvm/CodeGen/MachineFunctionPass.h" > @@ -187,6 +188,7 @@ > // Make sure PassManager knows which analyses to make available > // to coalescing and which analyses coalescing invalidates. > AU.addRequiredTransitive(); > + AU.addRequired(); > if (PreSplitIntervals) > AU.addRequiredID(PreAllocSplittingID); > AU.addRequired(); > > Modified: llvm/trunk/lib/CodeGen/RegAllocPBQP.cpp > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/RegAllocPBQP.cpp?rev=91273&r1=91272&r2=91273&view=diff > > ============================================================================== > --- llvm/trunk/lib/CodeGen/RegAllocPBQP.cpp (original) > +++ llvm/trunk/lib/CodeGen/RegAllocPBQP.cpp Mon Dec 14 00:49:42 2009 > @@ -36,6 +36,7 @@ > #include "PBQP/Heuristics/Briggs.h" > #include "VirtRegMap.h" > #include "VirtRegRewriter.h" > +#include "llvm/CodeGen/CalcSpillWeights.h" > #include "llvm/CodeGen/LiveIntervalAnalysis.h" > #include "llvm/CodeGen/LiveStackAnalysis.h" > #include "llvm/CodeGen/MachineFunctionPass.h" > @@ -90,6 +91,7 @@ > au.addRequired(); > //au.addRequiredID(SplitCriticalEdgesID); > au.addRequired(); > + au.addRequired(); > au.addRequired(); > au.addPreserved(); > au.addRequired(); > > Modified: llvm/trunk/lib/CodeGen/SimpleRegisterCoalescing.cpp > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SimpleRegisterCoalescing.cpp?rev=91273&r1=91272&r2=91273&view=diff > > ============================================================================== > --- llvm/trunk/lib/CodeGen/SimpleRegisterCoalescing.cpp (original) > +++ llvm/trunk/lib/CodeGen/SimpleRegisterCoalescing.cpp Mon Dec 14 00:49:42 2009 > @@ -2622,114 +2622,6 @@ > ReMatDefs.clear(); > } > > -/// Returns true if the given live interval is zero length. > -static bool isZeroLengthInterval(LiveInterval *li, LiveIntervals *li_) { > - for (LiveInterval::Ranges::const_iterator > - i = li->ranges.begin(), e = li->ranges.end(); i != e; ++i) > - if (i->end.getPrevIndex() > i->start) > - return false; > - return true; > -} > - > - > -void SimpleRegisterCoalescing::CalculateSpillWeights() { > - SmallSet Processed; > - for (MachineFunction::iterator mbbi = mf_->begin(), mbbe = mf_->end(); > - mbbi != mbbe; ++mbbi) { > - MachineBasicBlock* MBB = mbbi; > - SlotIndex MBBEnd = li_->getMBBEndIdx(MBB); > - MachineLoop* loop = loopInfo->getLoopFor(MBB); > - unsigned loopDepth = loop ? loop->getLoopDepth() : 0; > - bool isExiting = loop ? loop->isLoopExiting(MBB) : false; > - > - for (MachineBasicBlock::const_iterator mii = MBB->begin(), mie = MBB->end(); > - mii != mie; ++mii) { > - const MachineInstr *MI = mii; > - if (tii_->isIdentityCopy(*MI)) > - continue; > - > - if (MI->getOpcode() == TargetInstrInfo::IMPLICIT_DEF) > - continue; > - > - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { > - const MachineOperand &mopi = MI->getOperand(i); > - if (!mopi.isReg() || mopi.getReg() == 0) > - continue; > - unsigned Reg = mopi.getReg(); > - if (!TargetRegisterInfo::isVirtualRegister(mopi.getReg())) > - continue; > - // Multiple uses of reg by the same instruction. It should not > - // contribute to spill weight again. > - if (!Processed.insert(Reg)) > - continue; > - > - bool HasDef = mopi.isDef(); > - bool HasUse = !HasDef; > - for (unsigned j = i+1; j != e; ++j) { > - const MachineOperand &mopj = MI->getOperand(j); > - if (!mopj.isReg() || mopj.getReg() != Reg) > - continue; > - HasDef |= mopj.isDef(); > - HasUse |= mopj.isUse(); > - if (HasDef && HasUse) > - break; > - } > - > - LiveInterval &RegInt = li_->getInterval(Reg); > - float Weight = li_->getSpillWeight(HasDef, HasUse, loopDepth); > - if (HasDef && isExiting) { > - // Looks like this is a loop count variable update. > - SlotIndex DefIdx = li_->getInstructionIndex(MI).getDefIndex(); > - const LiveRange *DLR = > - li_->getInterval(Reg).getLiveRangeContaining(DefIdx); > - if (DLR->end > MBBEnd) > - Weight *= 3.0F; > - } > - RegInt.weight += Weight; > - } > - Processed.clear(); > - } > - } > - > - for (LiveIntervals::iterator I = li_->begin(), E = li_->end(); I != E; ++I) { > - LiveInterval &LI = *I->second; > - if (TargetRegisterInfo::isVirtualRegister(LI.reg)) { > - // If the live interval length is essentially zero, i.e. in every live > - // range the use follows def immediately, it doesn't make sense to spill > - // it and hope it will be easier to allocate for this li. > - if (isZeroLengthInterval(&LI, li_)) { > - LI.weight = HUGE_VALF; > - continue; > - } > - > - bool isLoad = false; > - SmallVector SpillIs; > - if (li_->isReMaterializable(LI, SpillIs, isLoad)) { > - // If all of the definitions of the interval are re-materializable, > - // it is a preferred candidate for spilling. If non of the defs are > - // loads, then it's potentially very cheap to re-materialize. > - // FIXME: this gets much more complicated once we support non-trivial > - // re-materialization. > - if (isLoad) > - LI.weight *= 0.9F; > - else > - LI.weight *= 0.5F; > - } > - > - // Slightly prefer live interval that has been assigned a preferred reg. > - std::pair Hint = mri_->getRegAllocationHint(LI.reg); > - if (Hint.first || Hint.second) > - LI.weight *= 1.01F; > - > - // Divide the weight of the interval by its size. This encourages > - // spilling of intervals that are large and have few uses, and > - // discourages spilling of small intervals with many uses. > - LI.weight /= li_->getApproximateInstructionCount(LI) * InstrSlots::NUM; > - } > - } > -} > - > - > bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) { > mf_ = &fn; > mri_ = &fn.getRegInfo(); > @@ -2860,8 +2752,6 @@ > } > } > > - CalculateSpillWeights(); > - > DEBUG(dump()); > return true; > } > > Modified: llvm/trunk/lib/CodeGen/SimpleRegisterCoalescing.h > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SimpleRegisterCoalescing.h?rev=91273&r1=91272&r2=91273&view=diff > > ============================================================================== > --- llvm/trunk/lib/CodeGen/SimpleRegisterCoalescing.h (original) > +++ llvm/trunk/lib/CodeGen/SimpleRegisterCoalescing.h Mon Dec 14 00:49:42 2009 > @@ -244,10 +244,6 @@ > MachineOperand *lastRegisterUse(SlotIndex Start, SlotIndex End, > unsigned Reg, SlotIndex &LastUseIdx) const; > > - /// CalculateSpillWeights - Compute spill weights for all virtual register > - /// live intervals. > - void CalculateSpillWeights(); > - > void printRegName(unsigned reg) const; > }; > > > > _______________________________________________ > llvm-commits mailing list > llvm-commits at cs.uiuc.edu > http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits From clattner at apple.com Thu Dec 17 13:47:00 2009 From: clattner at apple.com (Chris Lattner) Date: Thu, 17 Dec 2009 11:47:00 -0800 Subject: [llvm-commits] [PATCH] Make Path use StringRef instead of std::string where possible (issue161054) In-Reply-To: <0016e6d2849355ce81047a2b80d2@google.com> References: <0016e6d2849355ce81047a2b80d2@google.com> Message-ID: <06685DA5-FCD0-41A8-8571-5BCDD080F9DD@apple.com> On Dec 7, 2009, at 3:06 PM, jyasskin at gmail.com wrote: > Reviewers: , > > Message: > This depends on the patch I just sent to cfe-commits. I wanted to run it > by the list in case there are objections to the interface change (that, > for example, Path.getSuffix().c_str() no longer works). This looks like a huge improvement to me, please apply! -Chris From clattner at apple.com Thu Dec 17 13:48:43 2009 From: clattner at apple.com (Chris Lattner) Date: Thu, 17 Dec 2009 11:48:43 -0800 Subject: [llvm-commits] [patch] Change the alignment in the X86.Windows subtarget In-Reply-To: <20091206012255.145970@gmx.net> References: <20091206012255.145970@gmx.net> Message-ID: On Dec 5, 2009, at 5:22 PM, Michael Beck wrote: > Hi all, > > the following patch changes the data-alignment of the 32bit Windows subtarget to values used by the MSVC compiler. > I assume this is the right subtarget for MSVC builds, as isCygwin and isMingw exists and at least cmake build have it ... This patch looks fine to me, but I can't vouch for its correctness. Anton, what is your position on it? -Chris From clattner at apple.com Thu Dec 17 13:52:17 2009 From: clattner at apple.com (Chris Lattner) Date: Thu, 17 Dec 2009 11:52:17 -0800 Subject: [llvm-commits] [llvm] r90656 - /llvm/trunk/lib/Target/TargetData.cpp In-Reply-To: <200912050759.nB57x42v023280@zion.cs.uiuc.edu> References: <200912050759.nB57x42v023280@zion.cs.uiuc.edu> Message-ID: On Dec 4, 2009, at 11:59 PM, Bill Wendling wrote: > Author: void > Date: Sat Dec 5 01:59:04 2009 > New Revision: 90656 > > URL: http://llvm.org/viewvc/llvm-project?rev=90656&view=rev > Log: > Calling InvalidateEntry during the refinement was breaking the bootstrap. How? They do the same thing? -Chris > > Modified: > llvm/trunk/lib/Target/TargetData.cpp > > Modified: llvm/trunk/lib/Target/TargetData.cpp > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/TargetData.cpp?rev=90656&r1=90655&r2=90656&view=diff > > ============================================================================== > --- llvm/trunk/lib/Target/TargetData.cpp (original) > +++ llvm/trunk/lib/Target/TargetData.cpp Sat Dec 5 01:59:04 2009 > @@ -327,7 +327,12 @@ > /// > virtual void refineAbstractType(const DerivedType *OldTy, > const Type *) { > - InvalidateEntry(cast(OldTy)); > + const StructType *STy = cast(OldTy); > + LayoutInfoTy::iterator Iter = LayoutInfo.find(STy); > + Iter->second->~StructLayout(); > + free(Iter->second); > + LayoutInfo.erase(Iter); > + OldTy->removeAbstractTypeUser(this); > } > > /// typeBecameConcrete - The other case which AbstractTypeUsers must be aware > @@ -336,7 +341,12 @@ > /// This method notifies ATU's when this occurs for a type. > /// > virtual void typeBecameConcrete(const DerivedType *AbsTy) { > - InvalidateEntry(cast(AbsTy)); > + const StructType *STy = cast(AbsTy); > + LayoutInfoTy::iterator Iter = LayoutInfo.find(STy); > + Iter->second->~StructLayout(); > + free(Iter->second); > + LayoutInfo.erase(Iter); > + AbsTy->removeAbstractTypeUser(this); > } > > public: > > > _______________________________________________ > llvm-commits mailing list > llvm-commits at cs.uiuc.edu > http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits From asl at math.spbu.ru Thu Dec 17 13:53:24 2009 From: asl at math.spbu.ru (Anton Korobeynikov) Date: Thu, 17 Dec 2009 22:53:24 +0300 Subject: [llvm-commits] [patch] Change the alignment in the X86.Windows subtarget In-Reply-To: References: <20091206012255.145970@gmx.net> Message-ID: Hi, Chris > This patch looks fine to me, but I can't vouch for its correctness. ?Anton, what is your position on it? We discussed it a bit, let me find the log. :) I think we agreed to modify mingw target data (which serves as "windows"), since we in any case need to be compatible with vcpp there. -- With best regards, Anton Korobeynikov Faculty of Mathematics and Mechanics, Saint Petersburg State University From jyasskin at google.com Thu Dec 17 13:55:07 2009 From: jyasskin at google.com (Jeffrey Yasskin) Date: Thu, 17 Dec 2009 19:55:07 -0000 Subject: [llvm-commits] [llvm] r91611 - in /llvm/trunk: include/llvm/DerivedTypes.h lib/VMCore/LLVMContextImpl.h lib/VMCore/Type.cpp unittests/VMCore/DerivedTypesTest.cpp Message-ID: <200912171955.nBHJt7F3001242@zion.cs.uiuc.edu> Author: jyasskin Date: Thu Dec 17 13:55:06 2009 New Revision: 91611 URL: http://llvm.org/viewvc/llvm-project?rev=91611&view=rev Log: This fixes a memory leak in OpaqueType found by Google's internal heapchecker. Added: llvm/trunk/unittests/VMCore/DerivedTypesTest.cpp Modified: llvm/trunk/include/llvm/DerivedTypes.h llvm/trunk/lib/VMCore/LLVMContextImpl.h llvm/trunk/lib/VMCore/Type.cpp Modified: llvm/trunk/include/llvm/DerivedTypes.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/DerivedTypes.h?rev=91611&r1=91610&r2=91611&view=diff ============================================================================== --- llvm/trunk/include/llvm/DerivedTypes.h (original) +++ llvm/trunk/include/llvm/DerivedTypes.h Thu Dec 17 13:55:06 2009 @@ -502,9 +502,7 @@ public: /// OpaqueType::get - Static factory method for the OpaqueType class... /// - static OpaqueType *get(LLVMContext &C) { - return new OpaqueType(C); // All opaque types are distinct - } + static OpaqueType *get(LLVMContext &C); // Implement support for type inquiry through isa, cast, and dyn_cast: static inline bool classof(const OpaqueType *) { return true; } Modified: llvm/trunk/lib/VMCore/LLVMContextImpl.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/VMCore/LLVMContextImpl.h?rev=91611&r1=91610&r2=91611&view=diff ============================================================================== --- llvm/trunk/lib/VMCore/LLVMContextImpl.h (original) +++ llvm/trunk/lib/VMCore/LLVMContextImpl.h Thu Dec 17 13:55:06 2009 @@ -27,6 +27,7 @@ #include "llvm/ADT/APInt.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/FoldingSet.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/StringMap.h" #include @@ -159,6 +160,11 @@ TypeMap StructTypes; TypeMap IntegerTypes; + // Opaque types are not structurally uniqued, so don't use TypeMap. + typedef SmallPtrSet OpaqueTypesTy; + OpaqueTypesTy OpaqueTypes; + + /// ValueHandles - This map keeps track of all of the value handles that are /// watching a Value*. The Value::HasValueHandle bit is used to know // whether or not a value has an entry in this map. @@ -201,6 +207,11 @@ delete I->second; } MDNodeSet.clear(); + for (OpaqueTypesTy::iterator I = OpaqueTypes.begin(), E = OpaqueTypes.end(); + I != E; ++I) { + (*I)->AbstractTypeUsers.clear(); + delete *I; + } } }; Modified: llvm/trunk/lib/VMCore/Type.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/VMCore/Type.cpp?rev=91611&r1=91610&r2=91611&view=diff ============================================================================== --- llvm/trunk/lib/VMCore/Type.cpp (original) +++ llvm/trunk/lib/VMCore/Type.cpp Thu Dec 17 13:55:06 2009 @@ -79,6 +79,9 @@ operator delete(const_cast(this)); return; + } else if (const OpaqueType *opaque_this = dyn_cast(this)) { + LLVMContextImpl *pImpl = this->getContext().pImpl; + pImpl->OpaqueTypes.erase(opaque_this); } // For all the other type subclasses, there is either no contained types or @@ -955,6 +958,20 @@ //===----------------------------------------------------------------------===// +// Opaque Type Factory... +// + +OpaqueType *OpaqueType::get(LLVMContext &C) { + OpaqueType *OT = new OpaqueType(C); // All opaque types are distinct + + LLVMContextImpl *pImpl = C.pImpl; + pImpl->OpaqueTypes.insert(OT); + return OT; +} + + + +//===----------------------------------------------------------------------===// // Derived Type Refinement Functions //===----------------------------------------------------------------------===// Added: llvm/trunk/unittests/VMCore/DerivedTypesTest.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/unittests/VMCore/DerivedTypesTest.cpp?rev=91611&view=auto ============================================================================== --- llvm/trunk/unittests/VMCore/DerivedTypesTest.cpp (added) +++ llvm/trunk/unittests/VMCore/DerivedTypesTest.cpp Thu Dec 17 13:55:06 2009 @@ -0,0 +1,31 @@ +//===- llvm/unittest/VMCore/DerivedTypesTest.cpp - Types unit tests -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "gtest/gtest.h" +#include "../lib/VMCore/LLVMContextImpl.h" +#include "llvm/Type.h" +#include "llvm/DerivedTypes.h" +#include "llvm/LLVMContext.h" +using namespace llvm; + +namespace { + +TEST(OpaqueTypeTest, RegisterWithContext) { + LLVMContext C; + LLVMContextImpl *pImpl = C.pImpl; + + EXPECT_EQ(0u, pImpl->OpaqueTypes.size()); + { + PATypeHolder Type = OpaqueType::get(C); + EXPECT_EQ(1u, pImpl->OpaqueTypes.size()); + } + EXPECT_EQ(0u, pImpl->OpaqueTypes.size()); +} + +} // namespace From clattner at apple.com Thu Dec 17 13:58:24 2009 From: clattner at apple.com (Chris Lattner) Date: Thu, 17 Dec 2009 11:58:24 -0800 Subject: [llvm-commits] [llvm] r90656 - /llvm/trunk/lib/Target/TargetData.cpp In-Reply-To: References: <200912050759.nB57x42v023280@zion.cs.uiuc.edu> Message-ID: On Dec 17, 2009, at 11:52 AM, Chris Lattner wrote: > > On Dec 4, 2009, at 11:59 PM, Bill Wendling wrote: > >> Author: void >> Date: Sat Dec 5 01:59:04 2009 >> New Revision: 90656 >> >> URL: http://llvm.org/viewvc/llvm-project?rev=90656&view=rev >> Log: >> Calling InvalidateEntry during the refinement was breaking the bootstrap. > > How? They do the same thing? Ah, the answer is that ->isAbstract() is returning false in the 'typeBecameConcrete' case, but we still need to remove the type from the LayoutInfo map. I'll fix this. -Chris From clattner at apple.com Thu Dec 17 13:58:54 2009 From: clattner at apple.com (Chris Lattner) Date: Thu, 17 Dec 2009 11:58:54 -0800 Subject: [llvm-commits] [patch] Change the alignment in the X86.Windows subtarget In-Reply-To: References: <20091206012255.145970@gmx.net> Message-ID: <997D070A-CC0A-4576-B483-CF4F38D51D4E@apple.com> On Dec 17, 2009, at 11:53 AM, Anton Korobeynikov wrote: > Hi, Chris > >> This patch looks fine to me, but I can't vouch for its correctness. Anton, what is your position on it? > We discussed it a bit, let me find the log. :) > I think we agreed to modify mingw target data (which serves as > "windows"), since we in any case need to be compatible with vcpp > there. Ok. I just wanted to make sure the patch didn't get dropped, -Chris From espindola at google.com Thu Dec 17 13:59:54 2009 From: espindola at google.com (Rafael Espindola) Date: Thu, 17 Dec 2009 14:59:54 -0500 Subject: [llvm-commits] [patch] More strict checking in LeakDetectorImpl::addGarbage Message-ID: <38a0d8450912171159l399974e2o5261ac810f44bc77@mail.gmail.com> Currently we assert in LeakDetectorImpl::addGarbage that we don't mark something as garbage twice. Unfortunately, as the comments in the file say, we do so only in the uncommon case "by far" :-( The attached patch adds a similar check for the common case. This makes PR5790 much easier to reproduce and probably helps PR5770 too. Tested with a "make check-lit". No unexpected failures. OK for trunk? Cheers, -- Rafael ?vila de Esp?ndola -------------- next part -------------- A non-text attachment was scrubbed... Name: assert.patch Type: text/x-patch Size: 415 bytes Desc: not available Url : http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20091217/d88be232/attachment.bin From sabre at nondot.org Thu Dec 17 14:00:23 2009 From: sabre at nondot.org (Chris Lattner) Date: Thu, 17 Dec 2009 20:00:23 -0000 Subject: [llvm-commits] [llvm] r91612 - /llvm/trunk/lib/Target/TargetData.cpp Message-ID: <200912172000.nBHK0Nh9001407@zion.cs.uiuc.edu> Author: lattner Date: Thu Dec 17 14:00:21 2009 New Revision: 91612 URL: http://llvm.org/viewvc/llvm-project?rev=91612&view=rev Log: finish cleaning up StructLayoutMap. Modified: llvm/trunk/lib/Target/TargetData.cpp Modified: llvm/trunk/lib/Target/TargetData.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/TargetData.cpp?rev=91612&r1=91611&r2=91612&view=diff ============================================================================== --- llvm/trunk/lib/Target/TargetData.cpp (original) +++ llvm/trunk/lib/Target/TargetData.cpp Thu Dec 17 14:00:21 2009 @@ -321,18 +321,24 @@ typedef DenseMap LayoutInfoTy; LayoutInfoTy LayoutInfo; + void RemoveEntry(LayoutInfoTy::iterator I, bool WasAbstract) { + I->second->~StructLayout(); + free(I->second); + if (WasAbstract) + I->first->removeAbstractTypeUser(this); + LayoutInfo.erase(I); + } + + /// refineAbstractType - The callback method invoked when an abstract type is /// resolved to another type. An object must override this method to update /// its internal state to reference NewType instead of OldType. /// virtual void refineAbstractType(const DerivedType *OldTy, const Type *) { - const StructType *STy = cast(OldTy); - LayoutInfoTy::iterator Iter = LayoutInfo.find(STy); - Iter->second->~StructLayout(); - free(Iter->second); - LayoutInfo.erase(Iter); - OldTy->removeAbstractTypeUser(this); + LayoutInfoTy::iterator I = LayoutInfo.find(cast(OldTy)); + assert(I != LayoutInfo.end() && "Using type but not in map?"); + RemoveEntry(I, true); } /// typeBecameConcrete - The other case which AbstractTypeUsers must be aware @@ -341,12 +347,9 @@ /// This method notifies ATU's when this occurs for a type. /// virtual void typeBecameConcrete(const DerivedType *AbsTy) { - const StructType *STy = cast(AbsTy); - LayoutInfoTy::iterator Iter = LayoutInfo.find(STy); - Iter->second->~StructLayout(); - free(Iter->second); - LayoutInfo.erase(Iter); - AbsTy->removeAbstractTypeUser(this); + LayoutInfoTy::iterator I = LayoutInfo.find(cast(AbsTy)); + assert(I != LayoutInfo.end() && "Using type but not in map?"); + RemoveEntry(I, true); } public: @@ -368,13 +371,7 @@ void InvalidateEntry(const StructType *Ty) { LayoutInfoTy::iterator I = LayoutInfo.find(Ty); if (I == LayoutInfo.end()) return; - - I->second->~StructLayout(); - free(I->second); - LayoutInfo.erase(I); - - if (Ty->isAbstract()) - Ty->removeAbstractTypeUser(this); + RemoveEntry(I, Ty->isAbstract()); } StructLayout *&operator[](const StructType *STy) { @@ -424,8 +421,7 @@ void TargetData::InvalidateStructLayoutInfo(const StructType *Ty) const { if (!LayoutMap) return; // No cache. - StructLayoutMap *STM = static_cast(LayoutMap); - STM->InvalidateEntry(Ty); + static_cast(LayoutMap)->InvalidateEntry(Ty); } std::string TargetData::getStringRepresentation() const { From ken.dyck at onsemi.com Thu Dec 17 14:09:43 2009 From: ken.dyck at onsemi.com (Ken Dyck) Date: Thu, 17 Dec 2009 20:09:43 -0000 Subject: [llvm-commits] [llvm] r91614 - in /llvm/trunk: include/llvm/CodeGen/ValueTypes.h lib/CodeGen/SelectionDAG/LegalizeDAG.cpp Message-ID: <200912172009.nBHK9hPQ001888@zion.cs.uiuc.edu> Author: kjdyck Date: Thu Dec 17 14:09:43 2009 New Revision: 91614 URL: http://llvm.org/viewvc/llvm-project?rev=91614&view=rev Log: Introduce EVT::getHalfSizedIntegerVT() for use in ExpandUnalignedStore() in LegalizeDAG.cpp. Unlike the code it replaces, which simply decrements the simple type by one, getHalfSizedIntegerVT() searches for the smallest simple integer type that is at least half the size of the type it is called on. This approach has the advantage that it will continue working if a new value type (such as i24) is added to MVT. Also, in preparation for new value types, remove the assertions that non-power-of-2 8-bit-mutiple types are Extended when legalizing extload and truncstore operations. Modified: llvm/trunk/include/llvm/CodeGen/ValueTypes.h llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp Modified: llvm/trunk/include/llvm/CodeGen/ValueTypes.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/ValueTypes.h?rev=91614&r1=91613&r2=91614&view=diff ============================================================================== --- llvm/trunk/include/llvm/CodeGen/ValueTypes.h (original) +++ llvm/trunk/include/llvm/CodeGen/ValueTypes.h Thu Dec 17 14:09:43 2009 @@ -589,7 +589,25 @@ return getIntegerVT(Context, 1 << Log2_32_Ceil(BitWidth)); } - /// isPow2VectorType - Retuns true if the given vector is a power of 2. + /// getHalfSizedIntegerVT - Finds the smallest simple value type that is + /// greater than or equal to half the width of this EVT. If no simple + /// value type can be found, an extended integer value type of half the + /// size (rounded up) is returned. + EVT getHalfSizedIntegerVT(LLVMContext &Context) const { + assert(isInteger() && !isVector() && "Invalid integer type!"); + unsigned EVTSize = getSizeInBits(); + for (unsigned IntVT = MVT::FIRST_INTEGER_VALUETYPE; + IntVT <= MVT::LAST_INTEGER_VALUETYPE; + ++IntVT) { + EVT HalfVT = EVT((MVT::SimpleValueType)IntVT); + if(HalfVT.getSizeInBits() * 2 >= EVTSize) { + return HalfVT; + } + } + return getIntegerVT(Context, (EVTSize + 1) / 2); + } + + /// isPow2VectorType - Returns true if the given vector is a power of 2. bool isPow2VectorType() const { unsigned NElts = getVectorNumElements(); return !(NElts & (NElts - 1)); Modified: llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp?rev=91614&r1=91613&r2=91614&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp (original) +++ llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp Thu Dec 17 14:09:43 2009 @@ -461,8 +461,7 @@ !ST->getMemoryVT().isVector() && "Unaligned store of unknown type."); // Get the half-size VT - EVT NewStoredVT = - (MVT::SimpleValueType)(ST->getMemoryVT().getSimpleVT().SimpleTy - 1); + EVT NewStoredVT = ST->getMemoryVT().getHalfSizedIntegerVT(*DAG.getContext()); int NumBits = NewStoredVT.getSizeInBits(); int IncrementSize = NumBits / 8; @@ -1170,8 +1169,7 @@ Tmp2 = LegalizeOp(Ch); } else if (SrcWidth & (SrcWidth - 1)) { // If not loading a power-of-2 number of bits, expand as two loads. - assert(SrcVT.isExtended() && !SrcVT.isVector() && - "Unsupported extload!"); + assert(!SrcVT.isVector() && "Unsupported extload!"); unsigned RoundWidth = 1 << Log2_32(SrcWidth); assert(RoundWidth < SrcWidth); unsigned ExtraWidth = SrcWidth - RoundWidth; @@ -1384,8 +1382,7 @@ SVOffset, NVT, isVolatile, Alignment); } else if (StWidth & (StWidth - 1)) { // If not storing a power-of-2 number of bits, expand as two stores. - assert(StVT.isExtended() && !StVT.isVector() && - "Unsupported truncstore!"); + assert(!StVT.isVector() && "Unsupported truncstore!"); unsigned RoundWidth = 1 << Log2_32(StWidth); assert(RoundWidth < StWidth); unsigned ExtraWidth = StWidth - RoundWidth; From Ken.Dyck at onsemi.com Thu Dec 17 14:16:17 2009 From: Ken.Dyck at onsemi.com (Ken Dyck) Date: Thu, 17 Dec 2009 13:16:17 -0700 Subject: [llvm-commits] [Review] Preparing for non-power-of-2 machine value types in X86ISelLowering and LegalizeDAG In-Reply-To: References: <8F2E4A8BCDA0B84DA6C9088EB5B27747CEAF38@NAMAIL.ad.onsemi.com> <8FECBCA3-DE6F-42E4-B067-7E234A31FBFF@apple.com> <8F2E4A8BCDA0B84DA6C9088EB5B27747CEB38A@NAMAIL.ad.onsemi.com> Message-ID: <8F2E4A8BCDA0B84DA6C9088EB5B27747CEB521@NAMAIL.ad.onsemi.com> On Thursday, December 17, 2009 2:25 PM, Chris Lattner wrote: > > On Dec 17, 2009, at 8:07 AM, Ken Dyck wrote: > > > On Thursday, December 17, 2009 3:04 AM, Chris Lattner wrote: > > > > > > On Dec 16, 2009, at 7:33 AM, Ken Dyck wrote: > > > > > > > The attached patches prepare for the > > > > introduction of non-power-of-2 machine > > > > value types (as recently discussed [1]). > > > > They contain no functional changes. They > > > > merely eliminate assumptions that > > > > incrementing/decrementing a > > > > SimpleValueType doubles/halves its size > > > > and that all non-power-of-2 types are > > > > extended. > > > > > > The legalize change looks reasonable to me, > > > but please change the for loop to be a while > > > loop (I think it will be clearer) and make > > > it be a method on EVT instead of inline in > > > the legalizer. > > > > In moving it to EVT, the knowledge that the > > source type is simple is lost and the loop > > boundaries change. So a for loop seems to make > > more sense now. What do you think of the > > attached updated patch? > > Looks good to me, thanks Ken! Committed in 91614. From espindola at google.com Thu Dec 17 14:30:05 2009 From: espindola at google.com (Rafael Espindola) Date: Thu, 17 Dec 2009 15:30:05 -0500 Subject: [llvm-commits] [patch] More strict checking in LeakDetectorImpl::addGarbage In-Reply-To: <38a0d8450912171159l399974e2o5261ac810f44bc77@mail.gmail.com> References: <38a0d8450912171159l399974e2o5261ac810f44bc77@mail.gmail.com> Message-ID: <38a0d8450912171230r5b951e70h6c25e49c3a4cd76e@mail.gmail.com> > OK for trunk? Actually, I think the attached patch is better. It catches more cases and is simpler. Cheers, -- Rafael ?vila de Esp?ndola -------------- next part -------------- A non-text attachment was scrubbed... Name: assert.patch Type: text/x-patch Size: 609 bytes Desc: not available Url : http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20091217/5cd693df/attachment.bin From mrs at apple.com Thu Dec 17 14:38:20 2009 From: mrs at apple.com (Mike Stump) Date: Thu, 17 Dec 2009 12:38:20 -0800 Subject: [llvm-commits] [llvm] r91337 - /llvm/trunk/lib/CodeGen/AsmPrinter/DwarfException.cpp In-Reply-To: <200912142149.nBELnijF018208@zion.cs.uiuc.edu> References: <200912142149.nBELnijF018208@zion.cs.uiuc.edu> Message-ID: <16F8D403-0ADF-474C-9080-746B3BAB58DE@apple.com> On Dec 14, 2009, at 1:49 PM, Bill Wendling wrote: > Author: void > Date: Mon Dec 14 15:49:44 2009 > New Revision: 91337 > > URL: http://llvm.org/viewvc/llvm-project?rev=91337&view=rev > Log: > The CIE says that the LSDA point in the FDE section is an "sdata4". That's fine, > but we need it to actually be 4-bytes in the FDE. I think this will also need: Index: lib/CodeGen/AsmPrinter/DwarfException.cpp =================================================================== --- lib/CodeGen/AsmPrinter/DwarfException.cpp (revision 91608) +++ lib/CodeGen/AsmPrinter/DwarfException.cpp (working copy) @@ -289,7 +289,8 @@ if (MMI->getPersonalities()[0] != NULL) { bool is4Byte = TD->getPointerSize() == sizeof(int32_t); - Asm->EmitULEB128Bytes(is4Byte ? 4 : 8); + // Below we choose a 4-byte encoding. + Asm->EmitULEB128Bytes(4); Asm->EOL("Augmentation size"); // We force 32-bits here because we've encoded our LSDA in the CIE with but even that didn't seem to fix all the problems... From snaroff at apple.com Thu Dec 17 14:39:34 2009 From: snaroff at apple.com (Steve Naroff) Date: Thu, 17 Dec 2009 20:39:34 -0000 Subject: [llvm-commits] [llvm] r91617 - /llvm/trunk/include/llvm/ADT/SmallVector.h Message-ID: <200912172039.nBHKdYKI003088@zion.cs.uiuc.edu> Author: snaroff Date: Thu Dec 17 14:39:34 2009 New Revision: 91617 URL: http://llvm.org/viewvc/llvm-project?rev=91617&view=rev Log: Fix Windows build breakage... Modified: llvm/trunk/include/llvm/ADT/SmallVector.h Modified: llvm/trunk/include/llvm/ADT/SmallVector.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/ADT/SmallVector.h?rev=91617&r1=91616&r2=91617&view=diff ============================================================================== --- llvm/trunk/include/llvm/ADT/SmallVector.h (original) +++ llvm/trunk/include/llvm/ADT/SmallVector.h Thu Dec 17 14:39:34 2009 @@ -209,7 +209,7 @@ T *NewElts = static_cast(operator new(NewCapacity*sizeof(T))); // Copy the elements over. - uninitialized_copy(this->begin(), this->end(), NewElts); + this->uninitialized_copy(this->begin(), this->end(), NewElts); // Destroy the original elements. destroy_range(this->begin(), this->end()); @@ -495,15 +495,15 @@ // Copy over the elements that we're about to overwrite. T *OldEnd = this->end(); - setEnd(this->end() + NumToInsert); + this->setEnd(this->end() + NumToInsert); size_t NumOverwritten = OldEnd-I; - uninitialized_copy(I, OldEnd, this->end()-NumOverwritten); + this->uninitialized_copy(I, OldEnd, this->end()-NumOverwritten); // Replace the overwritten part. std::copy(From, From+NumOverwritten, I); // Insert the non-overwritten middle part. - uninitialized_copy(From+NumOverwritten, To, OldEnd); + this->uninitialized_copy(From+NumOverwritten, To, OldEnd); return I; } From isanbard at gmail.com Thu Dec 17 14:39:56 2009 From: isanbard at gmail.com (Bill Wendling) Date: Thu, 17 Dec 2009 12:39:56 -0800 Subject: [llvm-commits] [llvm] r91337 - /llvm/trunk/lib/CodeGen/AsmPrinter/DwarfException.cpp In-Reply-To: <06BEB5FC-FA16-4619-86F4-A15CCC1EDCE9@apple.com> References: <200912142149.nBELnijF018208@zion.cs.uiuc.edu> <06BEB5FC-FA16-4619-86F4-A15CCC1EDCE9@apple.com> Message-ID: <164BA461-85DC-4B9F-B491-42DEDD71CDB4@gmail.com> On Dec 16, 2009, at 7:22 PM, Mike Stump wrote: > On Dec 14, 2009, at 1:49 PM, Bill Wendling wrote: >> Author: void >> Date: Mon Dec 14 15:49:44 2009 >> New Revision: 91337 >> >> URL: http://llvm.org/viewvc/llvm-project?rev=91337&view=rev >> Log: >> The CIE says that the LSDA point in the FDE section is an "sdata4". That's fine, >> but we need it to actually be 4-bytes in the FDE. > > This causes: > > Tests that now fail, but worked before: > > g++.old-deja/g++.mike/eh10.C execution test > g++.old-deja/g++.mike/eh2.C execution test > g++.old-deja/g++.mike/eh23.C execution test > g++.old-deja/g++.mike/eh25.C execution test > g++.old-deja/g++.mike/eh3.C execution test > g++.old-deja/g++.mike/eh33.C execution test > g++.old-deja/g++.mike/eh39.C execution test > g++.old-deja/g++.mike/eh40.C execution test > g++.old-deja/g++.mike/eh41.C execution test > g++.old-deja/g++.mike/eh42.C execution test > g++.old-deja/g++.mike/eh44.C execution test > g++.old-deja/g++.mike/eh49.C execution test > g++.old-deja/g++.mike/eh5.C execution test > g++.old-deja/g++.mike/eh50.C execution test > g++.old-deja/g++.mike/eh51.C execution test > g++.old-deja/g++.mike/eh6.C execution test > g++.old-deja/g++.robertl/eh990323-3.C execution test > > from the g++ EH testsuite under clang. Can you revert this until this problem is tracked down and fixed? Are you testing this on Barolo or SnowLeopard? It works on Barolo, but not SnowLeopard. I suspect it's a linker issue. -bw From isanbard at gmail.com Thu Dec 17 14:41:01 2009 From: isanbard at gmail.com (Bill Wendling) Date: Thu, 17 Dec 2009 20:41:01 -0000 Subject: [llvm-commits] [llvm] r91618 - /llvm/trunk/lib/CodeGen/AsmPrinter/DwarfException.cpp Message-ID: <200912172041.nBHKf1V4003149@zion.cs.uiuc.edu> Author: void Date: Thu Dec 17 14:41:01 2009 New Revision: 91618 URL: http://llvm.org/viewvc/llvm-project?rev=91618&view=rev Log: Temporarily revert 91337. It's causing testcase failures. $ svn merge -c -91337 https://llvm.org/svn/llvm-project/llvm/trunk --- Reverse-merging r91337 into '.': U lib/CodeGen/AsmPrinter/DwarfException.cpp Modified: llvm/trunk/lib/CodeGen/AsmPrinter/DwarfException.cpp Modified: llvm/trunk/lib/CodeGen/AsmPrinter/DwarfException.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/AsmPrinter/DwarfException.cpp?rev=91618&r1=91617&r2=91618&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/AsmPrinter/DwarfException.cpp (original) +++ llvm/trunk/lib/CodeGen/AsmPrinter/DwarfException.cpp Thu Dec 17 14:41:01 2009 @@ -292,13 +292,14 @@ Asm->EmitULEB128Bytes(is4Byte ? 4 : 8); Asm->EOL("Augmentation size"); - // We force 32-bits here because we've encoded our LSDA in the CIE with - // `dwarf::DW_EH_PE_sdata4'. And the CIE and FDE should agree. if (EHFrameInfo.hasLandingPads) - EmitReference("exception", EHFrameInfo.Number, true, true); - else - Asm->EmitInt32((int)0); - + EmitReference("exception", EHFrameInfo.Number, true, false); + else { + if (is4Byte) + Asm->EmitInt32((int)0); + else + Asm->EmitInt64((int)0); + } Asm->EOL("Language Specific Data Area"); } else { Asm->EmitULEB128Bytes(0); From jyasskin at google.com Thu Dec 17 15:02:40 2009 From: jyasskin at google.com (Jeffrey Yasskin) Date: Thu, 17 Dec 2009 21:02:40 -0000 Subject: [llvm-commits] [llvm] r91620 - in /llvm/trunk: include/llvm/System/Path.h lib/CompilerDriver/CompilationGraph.cpp lib/System/Path.cpp lib/System/Unix/Path.inc lib/System/Win32/Path.inc Message-ID: <200912172102.nBHL2eYc003946@zion.cs.uiuc.edu> Author: jyasskin Date: Thu Dec 17 15:02:39 2009 New Revision: 91620 URL: http://llvm.org/viewvc/llvm-project?rev=91620&view=rev Log: Make Path use StringRef instead of std::string where possible. Modified: llvm/trunk/include/llvm/System/Path.h llvm/trunk/lib/CompilerDriver/CompilationGraph.cpp llvm/trunk/lib/System/Path.cpp llvm/trunk/lib/System/Unix/Path.inc llvm/trunk/lib/System/Win32/Path.inc Modified: llvm/trunk/include/llvm/System/Path.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/System/Path.h?rev=91620&r1=91619&r2=91620&view=diff ============================================================================== --- llvm/trunk/include/llvm/System/Path.h (original) +++ llvm/trunk/include/llvm/System/Path.h Thu Dec 17 15:02:39 2009 @@ -14,6 +14,7 @@ #ifndef LLVM_SYSTEM_PATH_H #define LLVM_SYSTEM_PATH_H +#include "llvm/ADT/StringRef.h" #include "llvm/System/TimeValue.h" #include #include @@ -159,7 +160,7 @@ /// between processes. /// @returns The dynamic link library suffix for the current platform. /// @brief Return the dynamic link library suffix. - static std::string GetDLLSuffix(); + static StringRef GetDLLSuffix(); /// GetMainExecutable - Return the path to the main executable, given the /// value of argv[0] from program startup and the address of main itself. @@ -174,12 +175,12 @@ Path() : path() {} Path(const Path &that) : path(that.path) {} - /// This constructor will accept a std::string as a path. No checking is - /// done on this path to determine if it is valid. To determine validity - /// of the path, use the isValid method. + /// This constructor will accept a char* or std::string as a path. No + /// checking is done on this path to determine if it is valid. To + /// determine validity of the path, use the isValid method. /// @param p The path to assign. /// @brief Construct a Path from a string. - explicit Path(const std::string& p); + explicit Path(StringRef p); /// This constructor will accept a character range as a path. No checking /// is done on this path to determine if it is valid. To determine @@ -202,10 +203,10 @@ } /// Makes a copy of \p that to \p this. - /// @param \p that A std::string denoting the path + /// @param \p that A StringRef denoting the path /// @returns \p this /// @brief Assignment Operator - Path &operator=(const std::string &that); + Path &operator=(StringRef that); /// Compares \p this Path with \p that Path for equality. /// @returns true if \p this and \p that refer to the same thing. @@ -251,28 +252,28 @@ /// component is the file or directory name occuring after the last /// directory separator. If no directory separator is present, the entire /// path name is returned (i.e. same as toString). - /// @returns std::string containing the last component of the path name. + /// @returns StringRef containing the last component of the path name. /// @brief Returns the last component of the path name. - std::string getLast() const; + StringRef getLast() const; /// This function strips off the path and suffix of the file or directory /// name and returns just the basename. For example /a/foo.bar would cause /// this function to return "foo". - /// @returns std::string containing the basename of the path + /// @returns StringRef containing the basename of the path /// @brief Get the base name of the path - std::string getBasename() const; + StringRef getBasename() const; /// This function strips off the suffix of the path beginning with the /// path separator ('/' on Unix, '\' on Windows) and returns the result. - std::string getDirname() const; + StringRef getDirname() const; /// This function strips off the path and basename(up to and /// including the last dot) of the file or directory name and /// returns just the suffix. For example /a/foo.bar would cause /// this function to return "bar". - /// @returns std::string containing the suffix of the path + /// @returns StringRef containing the suffix of the path /// @brief Get the suffix of the path - std::string getSuffix() const; + StringRef getSuffix() const; /// Obtain a 'C' string for the path name. /// @returns a 'C' string containing the path name. @@ -315,7 +316,7 @@ /// cases (file not found, file not accessible, etc.) it returns false. /// @returns true if the magic number of the file matches \p magic. /// @brief Determine if file has a specific magic number - bool hasMagicNumber(const std::string& magic) const; + bool hasMagicNumber(StringRef magic) const; /// This function retrieves the first \p len bytes of the file associated /// with \p this. These bytes are returned as the "magic number" in the @@ -422,8 +423,8 @@ /// Path object takes on the path value of \p unverified_path /// @returns true if the path was set, false otherwise. /// @param unverified_path The path to be set in Path object. - /// @brief Set a full path from a std::string - bool set(const std::string& unverified_path); + /// @brief Set a full path from a StringRef + bool set(StringRef unverified_path); /// One path component is removed from the Path. If only one component is /// present in the path, the Path object becomes empty. If the Path object @@ -437,7 +438,7 @@ /// needed. /// @returns false if the path component could not be added. /// @brief Appends one path component to the Path. - bool appendComponent( const std::string& component ); + bool appendComponent(StringRef component); /// A period and the \p suffix are appended to the end of the pathname. /// The precondition for this function is that the Path reference a file @@ -446,7 +447,7 @@ /// become invalid for the host operating system, false is returned. /// @returns false if the suffix could not be added, true if it was. /// @brief Adds a period and the \p suffix to the end of the pathname. - bool appendSuffix(const std::string& suffix); + bool appendSuffix(StringRef suffix); /// The suffix of the filename is erased. The suffix begins with and /// includes the last . character in the filename after the last directory @@ -620,12 +621,12 @@ PathWithStatus(const Path &other) : Path(other), status(), fsIsValid(false) {} - /// This constructor will accept a std::string as a path. No checking is - /// done on this path to determine if it is valid. To determine validity - /// of the path, use the isValid method. + /// This constructor will accept a char* or std::string as a path. No + /// checking is done on this path to determine if it is valid. To + /// determine validity of the path, use the isValid method. /// @brief Construct a Path from a string. explicit PathWithStatus( - const std::string& p ///< The path to assign. + StringRef p ///< The path to assign. ) : Path(p), status(), fsIsValid(false) {} /// This constructor will accept a character range as a path. No checking Modified: llvm/trunk/lib/CompilerDriver/CompilationGraph.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CompilerDriver/CompilationGraph.cpp?rev=91620&r1=91619&r2=91620&view=diff ============================================================================== --- llvm/trunk/lib/CompilerDriver/CompilationGraph.cpp (original) +++ llvm/trunk/lib/CompilerDriver/CompilationGraph.cpp Thu Dec 17 15:02:39 2009 @@ -35,7 +35,7 @@ const std::string& LanguageMap::GetLanguage(const sys::Path& File) const { LanguageMap::const_iterator Lang = this->find(File.getSuffix()); if (Lang == this->end()) - throw std::runtime_error("Unknown suffix: " + File.getSuffix()); + throw std::runtime_error(("Unknown suffix: " + File.getSuffix()).str()); return Lang->second; } } Modified: llvm/trunk/lib/System/Path.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/System/Path.cpp?rev=91620&r1=91619&r2=91620&view=diff ============================================================================== --- llvm/trunk/lib/System/Path.cpp (original) +++ llvm/trunk/lib/System/Path.cpp Thu Dec 17 15:02:39 2009 @@ -176,7 +176,7 @@ return sys::Path(); } -std::string Path::GetDLLSuffix() { +StringRef Path::GetDLLSuffix() { return LTDL_SHLIB_EXT; } @@ -191,7 +191,7 @@ return FT == Bitcode_FileType; } -bool Path::hasMagicNumber(const std::string &Magic) const { +bool Path::hasMagicNumber(StringRef Magic) const { std::string actualMagic; if (getMagicNumber(actualMagic, static_cast(Magic.size()))) return Magic == actualMagic; @@ -217,8 +217,9 @@ Paths.push_back(tmpPath); } -static std::string getDirnameCharSep(const std::string& path, char Sep) { - +static StringRef getDirnameCharSep(StringRef path, const char *Sep) { + assert(Sep[0] != '\0' && Sep[1] == '\0' && + "Sep must be a 1-character string literal."); if (path.empty()) return "."; @@ -227,31 +228,31 @@ signed pos = static_cast(path.size()) - 1; - while (pos >= 0 && path[pos] == Sep) + while (pos >= 0 && path[pos] == Sep[0]) --pos; if (pos < 0) - return path[0] == Sep ? std::string(1, Sep) : std::string("."); + return path[0] == Sep[0] ? Sep : "."; // Any slashes left? signed i = 0; - while (i < pos && path[i] != Sep) + while (i < pos && path[i] != Sep[0]) ++i; if (i == pos) // No slashes? Return "." return "."; // There is at least one slash left. Remove all trailing non-slashes. - while (pos >= 0 && path[pos] != Sep) + while (pos >= 0 && path[pos] != Sep[0]) --pos; // Remove any trailing slashes. - while (pos >= 0 && path[pos] == Sep) + while (pos >= 0 && path[pos] == Sep[0]) --pos; if (pos < 0) - return path[0] == Sep ? std::string(1, Sep) : std::string("."); + return path[0] == Sep[0] ? Sep : "."; return path.substr(0, pos+1); } Modified: llvm/trunk/lib/System/Unix/Path.inc URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/System/Unix/Path.inc?rev=91620&r1=91619&r2=91620&view=diff ============================================================================== --- llvm/trunk/lib/System/Unix/Path.inc (original) +++ llvm/trunk/lib/System/Unix/Path.inc Thu Dec 17 15:02:39 2009 @@ -78,15 +78,15 @@ const char sys::PathSeparator = ':'; -Path::Path(const std::string& p) +Path::Path(StringRef p) : path(p) {} Path::Path(const char *StrStart, unsigned StrLen) : path(StrStart, StrLen) {} Path& -Path::operator=(const std::string &that) { - path = that; +Path::operator=(StringRef that) { + path.assign(that.data(), that.size()); return *this; } @@ -377,11 +377,11 @@ } -std::string Path::getDirname() const { - return getDirnameCharSep(path, '/'); +StringRef Path::getDirname() const { + return getDirnameCharSep(path, "/"); } -std::string +StringRef Path::getBasename() const { // Find the last slash std::string::size_type slash = path.rfind('/'); @@ -392,12 +392,12 @@ std::string::size_type dot = path.rfind('.'); if (dot == std::string::npos || dot < slash) - return path.substr(slash); + return StringRef(path).substr(slash); else - return path.substr(slash, dot - slash); + return StringRef(path).substr(slash, dot - slash); } -std::string +StringRef Path::getSuffix() const { // Find the last slash std::string::size_type slash = path.rfind('/'); @@ -408,9 +408,9 @@ std::string::size_type dot = path.rfind('.'); if (dot == std::string::npos || dot < slash) - return std::string(); + return StringRef(""); else - return path.substr(dot + 1); + return StringRef(path).substr(dot + 1); } bool Path::getMagicNumber(std::string &Magic, unsigned len) const { @@ -478,7 +478,7 @@ return true; } -std::string +StringRef Path::getLast() const { // Find the last slash size_t pos = path.rfind('/'); @@ -492,12 +492,12 @@ // Find the second to last slash size_t pos2 = path.rfind('/', pos-1); if (pos2 == std::string::npos) - return path.substr(0,pos); + return StringRef(path).substr(0,pos); else - return path.substr(pos2+1,pos-pos2-1); + return StringRef(path).substr(pos2+1,pos-pos2-1); } // Return everything after the last slash - return path.substr(pos+1); + return StringRef(path).substr(pos+1); } const FileStatus * @@ -589,7 +589,7 @@ } bool -Path::set(const std::string& a_path) { +Path::set(StringRef a_path) { if (a_path.empty()) return false; std::string save(path); @@ -602,7 +602,7 @@ } bool -Path::appendComponent(const std::string& name) { +Path::appendComponent(StringRef name) { if (name.empty()) return false; std::string save(path); @@ -634,7 +634,7 @@ } bool -Path::appendSuffix(const std::string& suffix) { +Path::appendSuffix(StringRef suffix) { std::string save(path); path.append("."); path.append(suffix); Modified: llvm/trunk/lib/System/Win32/Path.inc URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/System/Win32/Path.inc?rev=91620&r1=91619&r2=91620&view=diff ============================================================================== --- llvm/trunk/lib/System/Win32/Path.inc (original) +++ llvm/trunk/lib/System/Win32/Path.inc Thu Dec 17 15:02:39 2009 @@ -58,8 +58,8 @@ } Path& -Path::operator=(const std::string &that) { - path = that; +Path::operator=(StringRef that) { + path.assign(that.data(), that.size()); FlipBackSlashes(path); return *this; } @@ -287,11 +287,11 @@ return len > 0 && path[len-1] == '/'; } -std::string Path::getDirname() const { - return getDirnameCharSep(path, '/'); +StringRef Path::getDirname() const { + return getDirnameCharSep(path, "/"); } -std::string +StringRef Path::getBasename() const { // Find the last slash size_t slash = path.rfind('/'); @@ -302,12 +302,12 @@ size_t dot = path.rfind('.'); if (dot == std::string::npos || dot < slash) - return path.substr(slash); + return StringRef(path).substr(slash); else - return path.substr(slash, dot - slash); + return StringRef(path).substr(slash, dot - slash); } -std::string +StringRef Path::getSuffix() const { // Find the last slash size_t slash = path.rfind('/'); @@ -318,9 +318,9 @@ size_t dot = path.rfind('.'); if (dot == std::string::npos || dot < slash) - return std::string(); + return StringRef(""); else - return path.substr(dot + 1); + return StringRef(path).substr(dot + 1); } bool @@ -364,7 +364,7 @@ return true; } -std::string +StringRef Path::getLast() const { // Find the last slash size_t pos = path.rfind('/'); @@ -378,7 +378,7 @@ return path; // Return everything after the last slash - return path.substr(pos+1); + return StringRef(path).substr(pos+1); } const FileStatus * @@ -490,7 +490,7 @@ } bool -Path::set(const std::string& a_path) { +Path::set(StringRef a_path) { if (a_path.empty()) return false; std::string save(path); @@ -504,7 +504,7 @@ } bool -Path::appendComponent(const std::string& name) { +Path::appendComponent(StringRef name) { if (name.empty()) return false; std::string save(path); @@ -536,7 +536,7 @@ } bool -Path::appendSuffix(const std::string& suffix) { +Path::appendSuffix(StringRef suffix) { std::string save(path); path.append("."); path.append(suffix); From eli.friedman at gmail.com Thu Dec 17 15:07:32 2009 From: eli.friedman at gmail.com (Eli Friedman) Date: Thu, 17 Dec 2009 21:07:32 -0000 Subject: [llvm-commits] [llvm] r91621 - in /llvm/trunk: lib/Transforms/Scalar/InstructionCombining.cpp test/Transforms/InstCombine/memcpy.ll Message-ID: <200912172107.nBHL7WFB004095@zion.cs.uiuc.edu> Author: efriedma Date: Thu Dec 17 15:07:31 2009 New Revision: 91621 URL: http://llvm.org/viewvc/llvm-project?rev=91621&view=rev Log: Slightly generalize transformation of memmove(a,a,n) so that it also applies to memcpy. (Such a memcpy is technically illegal, but in practice is safe and is generated by struct self-assignment in C code.) Added: llvm/trunk/test/Transforms/InstCombine/memcpy.ll Modified: llvm/trunk/lib/Transforms/Scalar/InstructionCombining.cpp Modified: llvm/trunk/lib/Transforms/Scalar/InstructionCombining.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/InstructionCombining.cpp?rev=91621&r1=91620&r2=91621&view=diff ============================================================================== --- llvm/trunk/lib/Transforms/Scalar/InstructionCombining.cpp (original) +++ llvm/trunk/lib/Transforms/Scalar/InstructionCombining.cpp Thu Dec 17 15:07:31 2009 @@ -9896,9 +9896,11 @@ Intrinsic::getDeclaration(M, MemCpyID, Tys, 1)); Changed = true; } + } + if (MemTransferInst *MTI = dyn_cast(MI)) { // memmove(x,x,size) -> noop. - if (MMI->getSource() == MMI->getDest()) + if (MTI->getSource() == MTI->getDest()) return EraseInstFromFunction(CI); } Added: llvm/trunk/test/Transforms/InstCombine/memcpy.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/memcpy.ll?rev=91621&view=auto ============================================================================== --- llvm/trunk/test/Transforms/InstCombine/memcpy.ll (added) +++ llvm/trunk/test/Transforms/InstCombine/memcpy.ll Thu Dec 17 15:07:31 2009 @@ -0,0 +1,10 @@ +; RUN: opt < %s -instcombine -S | FileCheck %s + +declare void @llvm.memcpy.i32(i8*, i8*, i32, i32) + +define void @test4(i8* %a) { + tail call void @llvm.memcpy.i32( i8* %a, i8* %a, i32 100, i32 1 ) + ret void +} +; CHECK: define void @test4 +; CHECK-NEXT: ret void From sabre at nondot.org Thu Dec 17 15:23:46 2009 From: sabre at nondot.org (Chris Lattner) Date: Thu, 17 Dec 2009 21:23:46 -0000 Subject: [llvm-commits] [llvm] r91622 - /llvm/trunk/include/llvm/ADT/SmallVector.h Message-ID: <200912172123.nBHLNl0v004705@zion.cs.uiuc.edu> Author: lattner Date: Thu Dec 17 15:23:46 2009 New Revision: 91622 URL: http://llvm.org/viewvc/llvm-project?rev=91622&view=rev Log: tabs -> spaces. Modified: llvm/trunk/include/llvm/ADT/SmallVector.h Modified: llvm/trunk/include/llvm/ADT/SmallVector.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/ADT/SmallVector.h?rev=91622&r1=91621&r2=91622&view=diff ============================================================================== --- llvm/trunk/include/llvm/ADT/SmallVector.h (original) +++ llvm/trunk/include/llvm/ADT/SmallVector.h Thu Dec 17 15:23:46 2009 @@ -495,15 +495,15 @@ // Copy over the elements that we're about to overwrite. T *OldEnd = this->end(); - this->setEnd(this->end() + NumToInsert); + this->setEnd(this->end() + NumToInsert); size_t NumOverwritten = OldEnd-I; - this->uninitialized_copy(I, OldEnd, this->end()-NumOverwritten); + this->uninitialized_copy(I, OldEnd, this->end()-NumOverwritten); // Replace the overwritten part. std::copy(From, From+NumOverwritten, I); // Insert the non-overwritten middle part. - this->uninitialized_copy(From+NumOverwritten, To, OldEnd); + this->uninitialized_copy(From+NumOverwritten, To, OldEnd); return I; } From evan.cheng at apple.com Thu Dec 17 15:23:59 2009 From: evan.cheng at apple.com (Evan Cheng) Date: Thu, 17 Dec 2009 21:23:59 -0000 Subject: [llvm-commits] [llvm] r91623 - /llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp Message-ID: <200912172123.nBHLNxqW004724@zion.cs.uiuc.edu> Author: evancheng Date: Thu Dec 17 15:23:58 2009 New Revision: 91623 URL: http://llvm.org/viewvc/llvm-project?rev=91623&view=rev Log: Remove an unused option. Modified: llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp Modified: llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp?rev=91623&r1=91622&r2=91623&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp (original) +++ llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp Thu Dec 17 15:23:58 2009 @@ -50,9 +50,6 @@ #include "llvm/ADT/Statistic.h" using namespace llvm; -#include "llvm/Support/CommandLine.h" -static cl::opt AvoidDupAddrCompute("x86-avoid-dup-address", cl::Hidden); - STATISTIC(NumLoadMoved, "Number of loads moved below TokenFactor"); //===----------------------------------------------------------------------===// @@ -1276,7 +1273,7 @@ SDValue &Disp, SDValue &Segment) { X86ISelAddressMode AM; bool Done = false; - if (AvoidDupAddrCompute && !N.hasOneUse()) { + if (!N.hasOneUse()) { unsigned Opcode = N.getOpcode(); if (Opcode != ISD::Constant && Opcode != ISD::FrameIndex && Opcode != X86ISD::Wrapper && Opcode != X86ISD::WrapperRIP) { From eli.friedman at gmail.com Thu Dec 17 15:27:47 2009 From: eli.friedman at gmail.com (Eli Friedman) Date: Thu, 17 Dec 2009 21:27:47 -0000 Subject: [llvm-commits] [llvm] r91624 - in /llvm/trunk: lib/Transforms/Scalar/InstructionCombining.cpp test/Transforms/InstCombine/cast_ptr.ll Message-ID: <200912172127.nBHLRlB9004841@zion.cs.uiuc.edu> Author: efriedma Date: Thu Dec 17 15:27:47 2009 New Revision: 91624 URL: http://llvm.org/viewvc/llvm-project?rev=91624&view=rev Log: Make the ptrtoint comparison simplification work if one side is a global. Modified: llvm/trunk/lib/Transforms/Scalar/InstructionCombining.cpp llvm/trunk/test/Transforms/InstCombine/cast_ptr.ll Modified: llvm/trunk/lib/Transforms/Scalar/InstructionCombining.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/InstructionCombining.cpp?rev=91624&r1=91623&r2=91624&view=diff ============================================================================== --- llvm/trunk/lib/Transforms/Scalar/InstructionCombining.cpp (original) +++ llvm/trunk/lib/Transforms/Scalar/InstructionCombining.cpp Thu Dec 17 15:27:47 2009 @@ -6452,7 +6452,7 @@ // if (X) ... // For generality, we handle any zero-extension of any operand comparison // with a constant or another cast from the same type. - if (isa(Op1) || isa(Op1)) + if (isa(Op1) || isa(Op1)) if (Instruction *R = visitICmpInstWithCastAndCast(I)) return R; } Modified: llvm/trunk/test/Transforms/InstCombine/cast_ptr.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/cast_ptr.ll?rev=91624&r1=91623&r2=91624&view=diff ============================================================================== --- llvm/trunk/test/Transforms/InstCombine/cast_ptr.ll (original) +++ llvm/trunk/test/Transforms/InstCombine/cast_ptr.ll Thu Dec 17 15:27:47 2009 @@ -27,3 +27,12 @@ ret i1 %r } +; These casts should also be folded away. +; CHECK: @test3 +; CHECK: icmp eq i8* %a, @global + at global = global i8 0 +define i1 @test3(i8* %a) { + %tmpa = ptrtoint i8* %a to i32 + %r = icmp eq i32 %tmpa, ptrtoint (i8* @global to i32) + ret i1 %r +} From jyasskin at google.com Thu Dec 17 15:35:30 2009 From: jyasskin at google.com (Jeffrey Yasskin) Date: Thu, 17 Dec 2009 21:35:30 -0000 Subject: [llvm-commits] [llvm] r91626 - in /llvm/trunk: lib/ExecutionEngine/JIT/JIT.cpp lib/ExecutionEngine/JIT/JIT.h lib/ExecutionEngine/JIT/JITEmitter.cpp unittests/ExecutionEngine/JIT/JITTest.cpp Message-ID: <200912172135.nBHLZU71005142@zion.cs.uiuc.edu> Author: jyasskin Date: Thu Dec 17 15:35:29 2009 New Revision: 91626 URL: http://llvm.org/viewvc/llvm-project?rev=91626&view=rev Log: Don't codegen available_externally functions. Fixes http://llvm.org/PR5735. Modified: llvm/trunk/lib/ExecutionEngine/JIT/JIT.cpp llvm/trunk/lib/ExecutionEngine/JIT/JIT.h llvm/trunk/lib/ExecutionEngine/JIT/JITEmitter.cpp llvm/trunk/unittests/ExecutionEngine/JIT/JITTest.cpp Modified: llvm/trunk/lib/ExecutionEngine/JIT/JIT.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/ExecutionEngine/JIT/JIT.cpp?rev=91626&r1=91625&r2=91626&view=diff ============================================================================== --- llvm/trunk/lib/ExecutionEngine/JIT/JIT.cpp (original) +++ llvm/trunk/lib/ExecutionEngine/JIT/JIT.cpp Thu Dec 17 15:35:29 2009 @@ -366,6 +366,32 @@ } } +/// materializeFunction - make sure the given function is fully read. If the +/// module is corrupt, this returns true and fills in the optional string with +/// information about the problem. If successful, this returns false. +bool JIT::materializeFunction(Function *F, std::string *ErrInfo) { + // Read in the function if it exists in this Module. + if (F->hasNotBeenReadFromBitcode()) { + // Determine the module provider this function is provided by. + Module *M = F->getParent(); + ModuleProvider *MP = 0; + for (unsigned i = 0, e = Modules.size(); i != e; ++i) { + if (Modules[i]->getModule() == M) { + MP = Modules[i]; + break; + } + } + if (MP) + return MP->materializeFunction(F, ErrInfo); + + if (ErrInfo) + *ErrInfo = "Function isn't in a module we know about!"; + return true; + } + // Succeed if the function is already read. + return false; +} + /// run - Start execution with the specified function and arguments. /// GenericValue JIT::runFunction(Function *F, @@ -607,6 +633,9 @@ Function *PF = jitstate->getPendingFunctions(locked).back(); jitstate->getPendingFunctions(locked).pop_back(); + assert(!PF->hasAvailableExternallyLinkage() && + "Externally-defined function should not be in pending list."); + // JIT the function isAlreadyCodeGenerating = true; jitstate->getPM(locked).run(*PF); @@ -627,36 +656,19 @@ return Addr; // Check if function already code gen'd MutexGuard locked(lock); - - // Now that this thread owns the lock, check if another thread has already - // code gen'd the function. - if (void *Addr = getPointerToGlobalIfAvailable(F)) - return Addr; - // Make sure we read in the function if it exists in this Module. - if (F->hasNotBeenReadFromBitcode()) { - // Determine the module provider this function is provided by. - Module *M = F->getParent(); - ModuleProvider *MP = 0; - for (unsigned i = 0, e = Modules.size(); i != e; ++i) { - if (Modules[i]->getModule() == M) { - MP = Modules[i]; - break; - } - } - assert(MP && "Function isn't in a module we know about!"); - - std::string ErrorMsg; - if (MP->materializeFunction(F, &ErrorMsg)) { - llvm_report_error("Error reading function '" + F->getName()+ - "' from bitcode file: " + ErrorMsg); - } - - // Now retry to get the address. - if (void *Addr = getPointerToGlobalIfAvailable(F)) - return Addr; + // Now that this thread owns the lock, make sure we read in the function if it + // exists in this Module. + std::string ErrorMsg; + if (materializeFunction(F, &ErrorMsg)) { + llvm_report_error("Error reading function '" + F->getName()+ + "' from bitcode file: " + ErrorMsg); } + // ... and check if another thread has already code gen'd the function. + if (void *Addr = getPointerToGlobalIfAvailable(F)) + return Addr; + if (F->isDeclaration() || F->hasAvailableExternallyLinkage()) { bool AbortOnFailure = !F->hasExternalWeakLinkage(); void *Addr = getPointerToNamedFunction(F->getName(), AbortOnFailure); Modified: llvm/trunk/lib/ExecutionEngine/JIT/JIT.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/ExecutionEngine/JIT/JIT.h?rev=91626&r1=91625&r2=91626&view=diff ============================================================================== --- llvm/trunk/lib/ExecutionEngine/JIT/JIT.h (original) +++ llvm/trunk/lib/ExecutionEngine/JIT/JIT.h Thu Dec 17 15:35:29 2009 @@ -104,6 +104,12 @@ /// the underlying module. virtual void deleteModuleProvider(ModuleProvider *P,std::string *ErrInfo = 0); + /// materializeFunction - make sure the given function is fully read. If the + /// module is corrupt, this returns true and fills in the optional string with + /// information about the problem. If successful, this returns false. + /// + bool materializeFunction(Function *F, std::string *ErrInfo = 0); + /// runFunction - Start execution with the specified function and arguments. /// virtual GenericValue runFunction(Function *F, Modified: llvm/trunk/lib/ExecutionEngine/JIT/JITEmitter.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/ExecutionEngine/JIT/JITEmitter.cpp?rev=91626&r1=91625&r2=91626&view=diff ============================================================================== --- llvm/trunk/lib/ExecutionEngine/JIT/JITEmitter.cpp (original) +++ llvm/trunk/lib/ExecutionEngine/JIT/JITEmitter.cpp Thu Dec 17 15:35:29 2009 @@ -517,9 +517,15 @@ void *Actual = TheJIT->isCompilingLazily() ? (void *)(intptr_t)LazyResolverFn : (void *)0; + // TODO: Delete this when PR5737 is fixed. + std::string ErrorMsg; + if (TheJIT->materializeFunction(F, &ErrorMsg)) { + llvm_report_error("Error reading function '" + F->getName()+ + "' from bitcode file: " + ErrorMsg); + } // If this is an external declaration, attempt to resolve the address now // to place in the stub. - if (F->isDeclaration() && !F->hasNotBeenReadFromBitcode()) { + if (F->isDeclaration() || F->hasAvailableExternallyLinkage()) { Actual = TheJIT->getPointerToFunction(F); // If we resolved the symbol to a null address (eg. a weak external) @@ -552,7 +558,7 @@ // exist yet, add it to the JIT's work list so that we can fill in the stub // address later. if (!Actual && !TheJIT->isCompilingLazily()) - if (!F->isDeclaration() || F->hasNotBeenReadFromBitcode()) + if (!F->isDeclaration() && !F->hasAvailableExternallyLinkage()) TheJIT->addPendingFunction(F); return Stub; @@ -755,9 +761,16 @@ void *ResultPtr = TheJIT->getPointerToGlobalIfAvailable(F); if (ResultPtr) return ResultPtr; + // TODO: Delete this when PR5737 is fixed. + std::string ErrorMsg; + if (TheJIT->materializeFunction(F, &ErrorMsg)) { + llvm_report_error("Error reading function '" + F->getName()+ + "' from bitcode file: " + ErrorMsg); + } + // If this is an external function pointer, we can force the JIT to // 'compile' it, which really just adds it to the map. - if (F->isDeclaration() && !F->hasNotBeenReadFromBitcode()) + if (F->isDeclaration() || F->hasAvailableExternallyLinkage()) return TheJIT->getPointerToFunction(F); } @@ -1562,6 +1575,7 @@ JITEmitter *JE = cast(getCodeEmitter()); void *Stub = JE->getJITResolver().getLazyFunctionStub(F); void *Addr = getPointerToGlobalIfAvailable(F); + assert(Addr != Stub && "Function must have non-stub address to be updated."); // Tell the target jit info to rewrite the stub at the specified address, // rather than creating a new one. Modified: llvm/trunk/unittests/ExecutionEngine/JIT/JITTest.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/unittests/ExecutionEngine/JIT/JITTest.cpp?rev=91626&r1=91625&r2=91626&view=diff ============================================================================== --- llvm/trunk/unittests/ExecutionEngine/JIT/JITTest.cpp (original) +++ llvm/trunk/unittests/ExecutionEngine/JIT/JITTest.cpp Thu Dec 17 15:35:29 2009 @@ -559,6 +559,35 @@ << " not 7 from the IR version."; } +} // anonymous namespace +// This function is intentionally defined differently in the statically-compiled +// program from the IR input to the JIT to assert that the JIT doesn't use its +// definition. +extern "C" int32_t JITTest_AvailableExternallyFunction() { + return 42; +} +namespace { + +TEST_F(JITTest, AvailableExternallyFunctionIsntCompiled) { + TheJIT->DisableLazyCompilation(true); + LoadAssembly("define available_externally i32 " + " @JITTest_AvailableExternallyFunction() { " + " ret i32 7 " + "} " + " " + "define i32 @func() { " + " %result = tail call i32 " + " @JITTest_AvailableExternallyFunction() " + " ret i32 %result " + "} "); + Function *funcIR = M->getFunction("func"); + + int32_t (*func)() = reinterpret_cast( + (intptr_t)TheJIT->getPointerToFunction(funcIR)); + EXPECT_EQ(42, func()) << "func should return 42 from the static version," + << " not 7 from the IR version."; +} + // This code is copied from JITEventListenerTest, but it only runs once for all // the tests in this directory. Everything seems fine, but that's strange // behavior. From jyasskin at google.com Thu Dec 17 15:46:44 2009 From: jyasskin at google.com (Jeffrey Yasskin) Date: Thu, 17 Dec 2009 13:46:44 -0800 Subject: [llvm-commits] [PATCH] Fix http://llvm.org/PR5735: available_externally codegen In-Reply-To: <48984198-AC74-4973-899D-F9FE5ADA7852@apple.com> References: <48984198-AC74-4973-899D-F9FE5ADA7852@apple.com> Message-ID: On Thu, Dec 17, 2009 at 11:26 AM, Chris Lattner wrote: > > On Dec 14, 2009, at 5:25 PM, Jeffrey Yasskin wrote: > >> Patch at http://llvm.org/bugs/attachment.cgi?id=3948 or >> http://codereview.appspot.com/179048. > > Seems reasonable to me, though I'm not a great expert in this area anymore. Thanks for looking at it. Hopefully one of the real experts will have a chance to look over the commit. From clattner at apple.com Thu Dec 17 16:19:13 2009 From: clattner at apple.com (Chris Lattner) Date: Thu, 17 Dec 2009 14:19:13 -0800 Subject: [llvm-commits] [llvm] r91184 - in /llvm/trunk: lib/Transforms/Scalar/ScalarReplAggregates.cpp test/Transforms/ScalarRepl/2009-12-11-NeonTypes.ll In-Reply-To: <200912112347.nBBNleh0002511@zion.cs.uiuc.edu> References: <200912112347.nBBNleh0002511@zion.cs.uiuc.edu> Message-ID: <1A988EB1-8A06-4461-8DC0-8D59DA2BBA31@apple.com> On Dec 11, 2009, at 3:47 PM, Bob Wilson wrote: > Author: bwilson > Date: Fri Dec 11 17:47:40 2009 > New Revision: 91184 > > URL: http://llvm.org/viewvc/llvm-project?rev=91184&view=rev > Log: > Revise scalar replacement to be more flexible about handle bitcasts and GEPs. > While scanning through the uses of an alloca, keep track of the current offset > relative to the start of the alloca, and check memory references to see if > the offset & size correspond to a component within the alloca. This has the > nice benefit of unifying much of the code from isSafeUseOfAllocation, > isSafeElementUse, and isSafeUseOfBitCastedAllocation. The code to rewrite > the uses of a promoted alloca, after it is determined to be safe, is > reorganized in the same way. > > Also, when rewriting GEP instructions, mark them as "in-bounds" since all the > indices are known to be safe. Very nice! This is a great improvement over the old code. Here are a couple of thoughts: > +/// FindElementAndOffset - Return the index of the element containing Offset > +/// within the specified type, which must be either a struct or an array. > +/// Sets T to the type of the element and Offset to the offset within that > +/// element. > +unsigned SROA::FindElementAndOffset(const Type *&T, uint64_t &Offset) { > + unsigned Idx = 0; > + if (const StructType *ST = dyn_cast(T)) { > + const StructLayout *Layout = TD->getStructLayout(ST); > + Idx = Layout->getElementContainingOffset(Offset); > + T = ST->getContainedType(Idx); > + Offset -= Layout->getElementOffset(Idx); Please early exit here to unnest the else. > + } else { > + const ArrayType *AT = dyn_cast(T); > + assert(AT && "unexpected type for scalar replacement"); Please use cast<> if you know that it has to be an array. However, is this really true? Can't it be a vector? If so, use SequentialType and explicitly assert it isn't a pointer. > + T = AT->getElementType(); > + uint64_t EltSize = TD->getTypeAllocSize(T); > + Idx = (unsigned)(Offset / EltSize); The returned Idx should be a uint64_t for large indexes into large arrays. > +/// isSafeForScalarRepl - Check if instruction I is a safe use with regard to > +/// performing scalar replacement of alloca AI. The results are flagged in > +/// the Info parameter. Offset and ArrayOffset indicate the position within > +/// AI that is referenced by this instruction. > +void SROA::isSafeForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset, > + uint64_t ArrayOffset, AllocaInfo &Info) { I'm not really sure what the difference between ArrayOffset and Offset are. Later I see: > For the > +/// special case of a variable index to a 2-element array, ArrayOffset is set > +/// to the array element size. But what is the semantic of ArrayOffset at this level? What does it mean and how does it impact safety, what is the relation between it and Offset? I think the code handles it correctly, but the comment needs to be improved to explain these points. > +/// isSafeGEP - Check if a GEP instruction can be handled for scalar > +/// replacement. It is safe when all the indices are constant, in-bounds > +/// references, and when the resulting offset corresponds to an element within > +/// the alloca type. The results are flagged in the Info parameter. Upon > +/// return, Offset is adjusted as specified by the GEP indices. For the > +/// special case of a variable index to a 2-element array, ArrayOffset is set > +/// to the array element size. > +void SROA::isSafeGEP(GetElementPtrInst *GEPI, AllocaInst *AI, > + uint64_t &Offset, uint64_t &ArrayOffset, > + AllocaInfo &Info) { > + gep_type_iterator GEPIt = gep_type_begin(GEPI), E = gep_type_end(GEPI); > + if (GEPIt == E) > + return; > + > + // The first GEP index must be zero. > + if (!isa(GEPIt.getOperand()) || > + !cast(GEPIt.getOperand())->isZero()) > + return MarkUnsafe(Info); > + if (++GEPIt == E) > + return; With your rewrite, I think this constraint could be relaxed, allowing non-zero constant ints as the first index. SRoA should be able to handle stuff like this pretty easily: A = alloca { i32, [4 x i32] } B = gep A, 0, 1, 0 C = gep B, 2 > // If the first index is a non-constant index into an array, see if we can > // handle it as a special case. > + const Type *ArrayEltTy = 0; > + if (ArrayOffset == 0 && Offset == 0) { > + if (const ArrayType *AT = dyn_cast(*GEPIt)) { > + if (!isa(GEPIt.getOperand())) { Please check the !isa as the first predicate for this big "if" block. > + uint64_t NumElements = AT->getNumElements(); > + > + // If this is an array index and the index is not constant, we cannot > + // promote... that is unless the array has exactly one or two elements > + // in it, in which case we CAN promote it, but we have to canonicalize > + // this out if this is the only problem. > + if ((NumElements != 1 && NumElements != 2) || !AllUsersAreLoads(GEPI)) > + return MarkUnsafe(Info); > Info.needsCleanup = true; > + ArrayOffset = TD->getTypeAllocSizeInBits(AT->getElementType()); > + ArrayEltTy = AT->getElementType(); > + ++GEPIt; Actually, on reflection, I'm not sure that this transformation is safe without more checking. Consider indexing into the array with a variable index in a type like {[2 x i32], i32, i32}. SRoA shouldn't try to canonicalize this because the index might be '3' (even though that is gross). I'm not really sure how useful "variable indexes in two element arrays" really is anyway though. It might be better to just remove this transformation. That would simplify the code quite a bit too. > @@ -612,144 +494,254 @@ > // of any accesses into structs where any of the components are variables. > if (IdxVal->getZExtValue() >= AT->getNumElements()) > return MarkUnsafe(Info); > - } else if (const VectorType *VT = dyn_cast(*I)) { > + } else { > + const VectorType *VT = dyn_cast(*GEPIt); > + assert(VT && "unexpected type in GEP type iterator"); Please use cast instead of dyn_cast + assert. The cast<> turns into a noop in non-assert builds. > +/// RewriteForScalarRepl - Alloca AI is being split into NewElts, so rewrite > +/// the instruction I, which references it, to use the separate elements. > +/// Offset indicates the position within AI that is referenced by this > +/// instruction. > +void SROA::RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset, > + SmallVector &NewElts) { > + for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI != E; ) { > + Instruction *User = cast(*UI++); > > + if (BitCastInst *BC = dyn_cast(User)) { > + if (BC->getOperand(0) == AI) > + BC->setOperand(0, NewElts[0]); What does this setOperand do? If the invariant is that RewriteForScalarRepl leaves I with zero uses after the transformation, then this isn't needed. If I does have uses, this seems like potentially the wrong thing to do. At the very least, this would benefit from a comment. > + // If the bitcast type now matches the operand type, it will be removed > + // after processing its uses. > + RewriteForScalarRepl(BC, AI, Offset, NewElts); > + } else if (GetElementPtrInst *GEPI = dyn_cast(User)) { > + RewriteGEP(GEPI, AI, Offset, NewElts); > + } else if (MemIntrinsic *MI = dyn_cast(User)) { > + ConstantInt *Length = dyn_cast(MI->getLength()); > + uint64_t MemSize = Length->getZExtValue(); > + if (Offset == 0 && > + MemSize == TD->getTypeAllocSize(AI->getAllocatedType())) > + RewriteMemIntrinUserOfAlloca(MI, I, AI, NewElts); What is the 'else' case here? If Offset != 0 it seems like a failure of the safety checking code? Should the 'if' be turned into an assert? > > + // Delete unused instructions and identity bitcasts. > + if (I->use_empty()) > + I->eraseFromParent(); > + else if (BitCastInst *BC = dyn_cast(I)) { > + if (BC->getDestTy() == BC->getSrcTy()) { > + BC->replaceAllUsesWith(BC->getOperand(0)); > + BC->eraseFromParent(); This code (zapping identity bitcasts) seems both unneeded and harmful: this will add another use of the parent instruction whose use list is being iterated over. Was this something that existed before or did you add it? It is probably best to just remove this. -Chris From clattner at apple.com Thu Dec 17 16:22:09 2009 From: clattner at apple.com (Chris Lattner) Date: Thu, 17 Dec 2009 14:22:09 -0800 Subject: [llvm-commits] [llvm] r91459 - in /llvm/trunk: lib/Transforms/Scalar/ScalarReplAggregates.cpp test/Transforms/ScalarRepl/2009-12-11-NeonTypes.ll In-Reply-To: <200912152200.nBFM0qqN023245@zion.cs.uiuc.edu> References: <200912152200.nBFM0qqN023245@zion.cs.uiuc.edu> Message-ID: <4607AAC4-5E76-4502-88E2-FE586BAFB296@apple.com> On Dec 15, 2009, at 2:00 PM, Bob Wilson wrote: > Author: bwilson > Date: Tue Dec 15 16:00:51 2009 > New Revision: 91459 > > URL: http://llvm.org/viewvc/llvm-project?rev=91459&view=rev > Log: > Reapply 91184 with fixes and an addition to the testcase to cover the problem > found last time. Instead of trying to modify the IR while iterating over it, > I've change it to keep a list of WeakVH references to dead instructions, and > then delete those instructions later. I also added some special case code to > detect and handle the situation when both operands of a memcpy intrinsic are > referencing the same alloca. This is a pretty big hammer. Can you explain in what circumstance this happens? The only case I can think of where this would occur is when one instruction uses two pointers derived from the same alloca. This can only really happen for phis, calls, and yes, memset/cpy/move. However, the only time that memset/cpy/move could do this is when they are a noop copy from the start of the alloca to the end of the alloca. Is this the only case that you're interested in here? If so, there are probably lighter weight ways to solve this problem. -Chris From eli.friedman at gmail.com Thu Dec 17 16:42:30 2009 From: eli.friedman at gmail.com (Eli Friedman) Date: Thu, 17 Dec 2009 22:42:30 -0000 Subject: [llvm-commits] [llvm] r91631 - in /llvm/trunk: lib/Transforms/Scalar/InstructionCombining.cpp test/Transforms/InstCombine/2004-11-27-SetCCForCastLargerAndConstant.ll test/Transforms/InstCombine/2006-10-19-SignedToUnsignedCastAndConst.ll Message-ID: <200912172242.nBHMgUPE007935@zion.cs.uiuc.edu> Author: efriedma Date: Thu Dec 17 16:42:29 2009 New Revision: 91631 URL: http://llvm.org/viewvc/llvm-project?rev=91631&view=rev Log: Allow instcombine to combine "sext(a) >u const" to "a >u trunc(const)". Removed: llvm/trunk/test/Transforms/InstCombine/2006-10-19-SignedToUnsignedCastAndConst.ll Modified: llvm/trunk/lib/Transforms/Scalar/InstructionCombining.cpp llvm/trunk/test/Transforms/InstCombine/2004-11-27-SetCCForCastLargerAndConstant.ll Modified: llvm/trunk/lib/Transforms/Scalar/InstructionCombining.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/InstructionCombining.cpp?rev=91631&r1=91630&r2=91631&view=diff ============================================================================== --- llvm/trunk/lib/Transforms/Scalar/InstructionCombining.cpp (original) +++ llvm/trunk/lib/Transforms/Scalar/InstructionCombining.cpp Thu Dec 17 16:42:29 2009 @@ -7299,19 +7299,17 @@ // If the re-extended constant didn't change... if (Res2 == CI) { - // Make sure that sign of the Cmp and the sign of the Cast are the same. - // For example, we might have: - // %A = sext i16 %X to i32 - // %B = icmp ugt i32 %A, 1330 - // It is incorrect to transform this into - // %B = icmp ugt i16 %X, 1330 - // because %A may have negative value. - // - // However, we allow this when the compare is EQ/NE, because they are - // signless. - if (isSignedExt == isSignedCmp || ICI.isEquality()) + // Deal with equality cases early. + if (ICI.isEquality()) return new ICmpInst(ICI.getPredicate(), LHSCIOp, Res1); - return 0; + + // A signed comparison of sign extended values simplifies into a + // signed comparison. + if (isSignedExt && isSignedCmp) + return new ICmpInst(ICI.getPredicate(), LHSCIOp, Res1); + + // The other three cases all fold into an unsigned comparison. + return new ICmpInst(ICI.getUnsignedPredicate(), LHSCIOp, Res1); } // The re-extended constant changed so the constant cannot be represented Modified: llvm/trunk/test/Transforms/InstCombine/2004-11-27-SetCCForCastLargerAndConstant.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/2004-11-27-SetCCForCastLargerAndConstant.ll?rev=91631&r1=91630&r2=91631&view=diff ============================================================================== --- llvm/trunk/test/Transforms/InstCombine/2004-11-27-SetCCForCastLargerAndConstant.ll (original) +++ llvm/trunk/test/Transforms/InstCombine/2004-11-27-SetCCForCastLargerAndConstant.ll Thu Dec 17 16:42:29 2009 @@ -33,6 +33,14 @@ ; CHECK: ret i1 false } +define i1 @lt_signed_to_small_unsigned(i8 %SB) { + %Y = sext i8 %SB to i32 + %C = icmp ult i32 %Y, 17 + ret i1 %C +; CHECK: %C = icmp ult i8 %SB, 17 +; CHECK: ret i1 %C +} + define i1 @lt_signed_to_small_signed(i8 %SB) { %Y = sext i8 %SB to i32 ; [#uses=1] %C = icmp slt i32 %Y, 17 ; [#uses=1] @@ -77,6 +85,14 @@ ; CHECK: ret i1 %C } +define i1 @lt_unsigned_to_small_signed(i8 %SB) { + %Y = zext i8 %SB to i32 + %C = icmp slt i32 %Y, 17 + ret i1 %C +; CHECK: %C = icmp ult i8 %SB, 17 +; CHECK: ret i1 %C +} + define i1 @lt_unsigned_to_small_negative(i8 %SB) { %Y = zext i8 %SB to i32 ; [#uses=1] %C = icmp slt i32 %Y, -17 ; [#uses=1] @@ -106,6 +122,14 @@ ; CHECK: ret i1 true } +define i1 @gt_signed_to_small_unsigned(i8 %SB) { + %Y = sext i8 %SB to i32 + %C = icmp ugt i32 %Y, 17 + ret i1 %C +; CHECK: %C = icmp ugt i8 %SB, 17 +; CHECK: ret i1 %C +} + define i1 @gt_signed_to_small_signed(i8 %SB) { %Y = sext i8 %SB to i32 ; [#uses=1] %C = icmp sgt i32 %Y, 17 ; [#uses=1] @@ -151,6 +175,14 @@ ; CHECK: ret i1 %C } +define i1 @gt_unsigned_to_small_signed(i8 %SB) { + %Y = zext i8 %SB to i32 + %C = icmp sgt i32 %Y, 17 + ret i1 %C +; CHECK: %C = icmp ugt i8 %SB, 17 +; CHECK: ret i1 %C +} + define i1 @gt_unsigned_to_small_negative(i8 %SB) { %Y = zext i8 %SB to i32 ; [#uses=1] %C = icmp sgt i32 %Y, -17 ; [#uses=1] Removed: llvm/trunk/test/Transforms/InstCombine/2006-10-19-SignedToUnsignedCastAndConst.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/2006-10-19-SignedToUnsignedCastAndConst.ll?rev=91630&view=auto ============================================================================== --- llvm/trunk/test/Transforms/InstCombine/2006-10-19-SignedToUnsignedCastAndConst.ll (original) +++ llvm/trunk/test/Transforms/InstCombine/2006-10-19-SignedToUnsignedCastAndConst.ll (removed) @@ -1,12 +0,0 @@ -; This test case is reduced from llvmAsmParser.cpp -; The optimizer should not remove the cast here. -; RUN: opt < %s -instcombine -S | \ -; RUN: grep sext.*i32 - - -define i1 @test(i16 %X) { - %A = sext i16 %X to i32 ; [#uses=1] - %B = icmp ugt i32 %A, 1330 ; [#uses=1] - ret i1 %B -} - From jyasskin at google.com Thu Dec 17 16:44:34 2009 From: jyasskin at google.com (Jeffrey Yasskin) Date: Thu, 17 Dec 2009 22:44:34 -0000 Subject: [llvm-commits] [llvm] r91632 - /llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp Message-ID: <200912172244.nBHMiYIN008033@zion.cs.uiuc.edu> Author: jyasskin Date: Thu Dec 17 16:44:34 2009 New Revision: 91632 URL: http://llvm.org/viewvc/llvm-project?rev=91632&view=rev Log: Revert r91623 to unbreak the buildbots. Modified: llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp Modified: llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp?rev=91632&r1=91631&r2=91632&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp (original) +++ llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp Thu Dec 17 16:44:34 2009 @@ -50,6 +50,9 @@ #include "llvm/ADT/Statistic.h" using namespace llvm; +#include "llvm/Support/CommandLine.h" +static cl::opt AvoidDupAddrCompute("x86-avoid-dup-address", cl::Hidden); + STATISTIC(NumLoadMoved, "Number of loads moved below TokenFactor"); //===----------------------------------------------------------------------===// @@ -1273,7 +1276,7 @@ SDValue &Disp, SDValue &Segment) { X86ISelAddressMode AM; bool Done = false; - if (!N.hasOneUse()) { + if (AvoidDupAddrCompute && !N.hasOneUse()) { unsigned Opcode = N.getOpcode(); if (Opcode != ISD::Constant && Opcode != ISD::FrameIndex && Opcode != X86ISD::Wrapper && Opcode != X86ISD::WrapperRIP) { From foom at fuhm.net Thu Dec 17 17:05:09 2009 From: foom at fuhm.net (James Y Knight) Date: Thu, 17 Dec 2009 18:05:09 -0500 Subject: [llvm-commits] llvm-c/Target.h SWIG friendliness Message-ID: <5713ED16-A811-45D9-94A5-B6FF6B994B7C@fuhm.net> Without this patch, SWIG chokes on llvm-c/Target.h, as, if you don't use #include files, it appears to have duplicate #defines. (SWIG doesn't follow #include when looking for functions/enums/etc to wrap). Since LLVM-C's main purpose is to generate bindings, it seems like a good idea to have it not cause SWIG to barf. Of course, even with this patch, SWIG won't notice the LLVMInitialize*Target and LLVMInitialize*TargetInfo functions, but at least it's not completely failing. Perhaps it would be a better idea to pre-expand all these definitions, rather than using fancy #include tricks with llvm/Config/Targets.def at all, but this works well enough for me. --- Target.h~ 2009-11-17 13:58:45.000000000 -0500 +++ Target.h 2009-12-17 17:36:55.675913000 -0500 @@ -35,9 +35,11 @@ /* Declare all of the target-initialization functions that are available. */ #define LLVM_TARGET(TargetName) void LLVMInitialize##TargetName##TargetInfo(); #include "llvm/Config/Targets.def" +#undef LLVM_TARGET #define LLVM_TARGET(TargetName) void LLVMInitialize##TargetName##Target(); #include "llvm/Config/Targets.def" +#undef LLVM_TARGET /** LLVMInitializeAllTargetInfos - The main program should call this function if it wants access to all available targets that LLVM is configured to @@ -45,6 +47,7 @@ static inline void LLVMInitializeAllTargetInfos() { #define LLVM_TARGET(TargetName) LLVMInitialize##TargetName##TargetInfo(); #include "llvm/Config/Targets.def" +#undef LLVM_TARGET } /** LLVMInitializeAllTargets - The main program should call this function if it @@ -53,6 +56,7 @@ static inline void LLVMInitializeAllTargets() { #define LLVM_TARGET(TargetName) LLVMInitialize##TargetName##Target(); #include "llvm/Config/Targets.def" +#undef LLVM_TARGET } /** LLVMInitializeNativeTarget - The main program should call this function to From isanbard at gmail.com Thu Dec 17 17:37:05 2009 From: isanbard at gmail.com (Bill Wendling) Date: Thu, 17 Dec 2009 15:37:05 -0800 Subject: [llvm-commits] [llvm] r91392 - in /llvm/trunk: include/llvm/CodeGen/SelectionDAG.h lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp lib/CodeGen/SelectionDAG/SelectionDAG.cpp lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp In-Reply-To: <36ED7EB5-3000-4621-80A1-3B5EC80E965C@apple.com> References: <200912150154.nBF1spun028126@zion.cs.uiuc.edu> <36ED7EB5-3000-4621-80A1-3B5EC80E965C@apple.com> Message-ID: On Dec 17, 2009, at 11:38 AM, Chris Lattner wrote: > On Dec 14, 2009, at 5:54 PM, Bill Wendling wrote: > >> Author: void >> Date: Mon Dec 14 19:54:51 2009 >> New Revision: 91392 >> >> URL: http://llvm.org/viewvc/llvm-project?rev=91392&view=rev >> Log: >> Initial work on disabling the scheduler. This is a work in progress, and this >> stuff isn't used just yet. > > Ok, cool. Here are some comments out of order. > >> +++ llvm/trunk/include/llvm/CodeGen/SelectionDAG.h Mon Dec 14 19:54:51 2009 >> @@ -110,6 +110,46 @@ >> /// SelectionDAG. >> BumpPtrAllocator Allocator; >> >> + /// NodeOrdering - Assigns a "line number" value to each SDNode that >> + /// corresponds to the "line number" of the original LLVM instruction. This >> + /// used for turning off scheduling, because we'll forgo the normal scheduling >> + /// algorithm and output the instructions according to this ordering. >> + class NodeOrdering { > > SelectionDAG.h already has a ton of stuff in it. Please pull this out into its own SDNodeOrdering.h file. SelectionDAG.h can just forward declare the class. > Sure. >> + /// LineNo - The line of the instruction the node corresponds to. A value of >> + /// `0' means it's not assigned. >> + unsigned LineNo; > > I don't understand how tis is a "line number". It seems like these are really unique instruction ID's? Does this have any correspondence at all to source line numbers? > Nope. :-) They're just unique IDs. I'll change the name to make it clearer. >> + std::map Order; > > This should use DenseMap. > Okay. >> + /// NewInst - Tell the ordering object that we're processing a new >> + /// instruction. >> + void NewInst() { >> + if (Ordering) >> + Ordering->newInst(); >> + } > > This should be in SelectionDAGBuilder, not SelectionDAG. Likewise, the "current instruction" state should be split out from the NodeOrdering class. > >> ++ public: >> + NodeOrdering() : LineNo(0) {} >> + >> + void add(const SDNode *Node) { >> + assert(LineNo && "Invalid line number!"); >> + Order[Node] = LineNo; >> + } >> + void remove(const SDNode *Node) { >> + std::map::iterator Itr = Order.find(Node); >> + if (Itr != Order.end()) >> + Order.erase(Itr); >> + } >> + void clear() { >> + Order.clear(); >> + LineNo = 1; >> + } >> + unsigned getLineNo(const SDNode *Node) { >> + unsigned LN = Order[Node]; >> + assert(LN && "Node isn't in ordering map!"); >> + return LN; >> + } >> + void newInst() { >> + ++LineNo; >> + } > > As above, I don't like the style of this API: you're mixing the construction of the datastructure with the storage of it. SDBuilder should just have the unsigned counter that it maintains, and this class should just be a thin wrapper around a densemap. > >> +++ llvm/trunk/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp Mon Dec 14 19:54:51 2009 >> @@ -20,10 +20,16 @@ >> #include "llvm/Target/TargetInstrInfo.h" >> #include "llvm/Target/TargetRegisterInfo.h" >> #include "llvm/Target/TargetSubtarget.h" >> +#include "llvm/Support/CommandLine.h" >> #include "llvm/Support/Debug.h" >> #include "llvm/Support/raw_ostream.h" >> using namespace llvm; >> >> +cl::opt >> +DisableInstScheduling("disable-inst-scheduling", >> + cl::init(false), >> + cl::desc("Disable instruction scheduling")); > > This be in TargetOptions.h, not a command line option. > >> +++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp Mon Dec 14 19:54:51 2009@@ -778,8 +795,13 @@ >> @@ -877,14 +904,17 @@ >> ID.AddPointer(&Val); >> void *IP = 0; >> SDNode *N = NULL; >> - if ((N = CSEMap.FindNodeOrInsertPos(ID, IP))) >> + if ((N = CSEMap.FindNodeOrInsertPos(ID, IP))) { >> + if (Ordering) Ordering->add(N); >> if (!VT.isVector()) >> return SDValue(N, 0); >> + } >> if (!N) { >> N = NodeAllocator.Allocate(); >> new (N) ConstantSDNode(isT, &Val, EltVT); >> CSEMap.InsertNode(N, IP); >> AllNodes.push_back(N); >> + if (Ordering) Ordering->add(N); > > I don't think that this is the right layer to do this at. The "Ordering" of nodes is only defined at Builder time, not in general when instructions are randomly created by other parts of SD machinery. The various calls to Ordering->add should only happen from the builder. > > OTOH, calls to *remove* a node from the ordering *should* happen from the common SD code when the node is about to be deleted. > Hmm...I was hoping to make this a very low-level thing so that the higher levels wouldn't even know about it. There may be a problem because node creation & modification doesn't stop at the builder, but could happen when lowering to target nodes. I'll look into it, though. -bw -------------- next part -------------- An HTML attachment was scrubbed... URL: http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20091217/ec6a82c0/attachment.html From isanbard at gmail.com Thu Dec 17 17:42:32 2009 From: isanbard at gmail.com (Bill Wendling) Date: Thu, 17 Dec 2009 23:42:32 -0000 Subject: [llvm-commits] [llvm] r91634 - in /llvm/trunk/lib/CodeGen: PHIElimination.cpp StrongPHIElimination.cpp Message-ID: <200912172342.nBHNgW6V010207@zion.cs.uiuc.edu> Author: void Date: Thu Dec 17 17:42:32 2009 New Revision: 91634 URL: http://llvm.org/viewvc/llvm-project?rev=91634&view=rev Log: Turn off critical edge splitting for landing pads. The introduction of a non-landing pad basic block as the successor to a block that ends in an unconditional jump will cause block folding to remove the added block as a successor. Thus eventually removing it AND the landing pad entirely. Critical edge splitting is an optimization, so we can safely turn it off when dealing with landing pads. Modified: llvm/trunk/lib/CodeGen/PHIElimination.cpp llvm/trunk/lib/CodeGen/StrongPHIElimination.cpp Modified: llvm/trunk/lib/CodeGen/PHIElimination.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/PHIElimination.cpp?rev=91634&r1=91633&r2=91634&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/PHIElimination.cpp (original) +++ llvm/trunk/lib/CodeGen/PHIElimination.cpp Thu Dec 17 17:42:32 2009 @@ -71,7 +71,7 @@ Changed |= EliminatePHINodes(Fn, *I); // Remove dead IMPLICIT_DEF instructions. - for (SmallPtrSet::iterator I = ImpDefs.begin(), + for (SmallPtrSet::iterator I = ImpDefs.begin(), E = ImpDefs.end(); I != E; ++I) { MachineInstr *DefMI = *I; unsigned DefReg = DefMI->getOperand(0).getReg(); @@ -83,8 +83,8 @@ for (LoweredPHIMap::iterator I = LoweredPHIs.begin(), E = LoweredPHIs.end(); I != E; ++I) Fn.DeleteMachineInstr(I->first); - LoweredPHIs.clear(); + LoweredPHIs.clear(); ImpDefs.clear(); VRegPHIUseCount.clear(); return Changed; @@ -384,7 +384,8 @@ bool llvm::PHIElimination::SplitPHIEdges(MachineFunction &MF, MachineBasicBlock &MBB, LiveVariables &LV) { - if (MBB.empty() || MBB.front().getOpcode() != TargetInstrInfo::PHI) + if (MBB.empty() || MBB.front().getOpcode() != TargetInstrInfo::PHI || + MBB.isLandingPad()) return false; // Quick exit for basic blocks without PHIs. for (MachineBasicBlock::const_iterator BBI = MBB.begin(), BBE = MBB.end(); Modified: llvm/trunk/lib/CodeGen/StrongPHIElimination.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/StrongPHIElimination.cpp?rev=91634&r1=91633&r2=91634&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/StrongPHIElimination.cpp (original) +++ llvm/trunk/lib/CodeGen/StrongPHIElimination.cpp Thu Dec 17 17:42:32 2009 @@ -907,7 +907,7 @@ // Determine which phi node operands need copies for (MachineFunction::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) - if (!I->empty() && + if (!I->empty() && !I->isLandingPad() && I->begin()->getOpcode() == TargetInstrInfo::PHI) processBlock(I); From isanbard at gmail.com Thu Dec 17 17:45:18 2009 From: isanbard at gmail.com (Bill Wendling) Date: Thu, 17 Dec 2009 23:45:18 -0000 Subject: [llvm-commits] [llvm] r91635 - /llvm/trunk/lib/CodeGen/StrongPHIElimination.cpp Message-ID: <200912172345.nBHNjIGR010293@zion.cs.uiuc.edu> Author: void Date: Thu Dec 17 17:45:18 2009 New Revision: 91635 URL: http://llvm.org/viewvc/llvm-project?rev=91635&view=rev Log: Revert accidental commit. Modified: llvm/trunk/lib/CodeGen/StrongPHIElimination.cpp Modified: llvm/trunk/lib/CodeGen/StrongPHIElimination.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/StrongPHIElimination.cpp?rev=91635&r1=91634&r2=91635&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/StrongPHIElimination.cpp (original) +++ llvm/trunk/lib/CodeGen/StrongPHIElimination.cpp Thu Dec 17 17:45:18 2009 @@ -907,7 +907,7 @@ // Determine which phi node operands need copies for (MachineFunction::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) - if (!I->empty() && !I->isLandingPad() && + if (!I->empty() && I->begin()->getOpcode() == TargetInstrInfo::PHI) processBlock(I); From rjmccall at apple.com Thu Dec 17 17:49:16 2009 From: rjmccall at apple.com (John McCall) Date: Thu, 17 Dec 2009 23:49:16 -0000 Subject: [llvm-commits] [llvm] r91636 - in /llvm/trunk: include/llvm/CodeGen/MachinePassRegistry.h lib/CodeGen/PBQP/AnnotatedGraph.h Message-ID: <200912172349.nBHNnHWJ010427@zion.cs.uiuc.edu> Author: rjmccall Date: Thu Dec 17 17:49:16 2009 New Revision: 91636 URL: http://llvm.org/viewvc/llvm-project?rev=91636&view=rev Log: Sundry dependent-name fixes flagged by clang++. Modified: llvm/trunk/include/llvm/CodeGen/MachinePassRegistry.h llvm/trunk/lib/CodeGen/PBQP/AnnotatedGraph.h Modified: llvm/trunk/include/llvm/CodeGen/MachinePassRegistry.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/MachinePassRegistry.h?rev=91636&r1=91635&r2=91636&view=diff ============================================================================== --- llvm/trunk/include/llvm/CodeGen/MachinePassRegistry.h (original) +++ llvm/trunk/include/llvm/CodeGen/MachinePassRegistry.h Thu Dec 17 17:49:16 2009 @@ -129,9 +129,9 @@ // Add existing passes to option. for (RegistryClass *Node = RegistryClass::getList(); Node; Node = Node->getNext()) { - addLiteralOption(Node->getName(), + this->addLiteralOption(Node->getName(), (typename RegistryClass::FunctionPassCtor)Node->getCtor(), - Node->getDescription()); + Node->getDescription()); } // Make sure we listen for list changes. Modified: llvm/trunk/lib/CodeGen/PBQP/AnnotatedGraph.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/PBQP/AnnotatedGraph.h?rev=91636&r1=91635&r2=91636&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/PBQP/AnnotatedGraph.h (original) +++ llvm/trunk/lib/CodeGen/PBQP/AnnotatedGraph.h Thu Dec 17 17:49:16 2009 @@ -132,19 +132,19 @@ } NodeData& getNodeData(const NodeIterator &nodeItr) { - return getNodeEntry(nodeItr).getNodeData(); + return PGraph::getNodeEntry(nodeItr).getNodeData(); } const NodeData& getNodeData(const NodeIterator &nodeItr) const { - return getNodeEntry(nodeItr).getNodeData(); + return PGraph::getNodeEntry(nodeItr).getNodeData(); } EdgeData& getEdgeData(const EdgeIterator &edgeItr) { - return getEdgeEntry(edgeItr).getEdgeData(); + return PGraph::getEdgeEntry(edgeItr).getEdgeData(); } const EdgeEntry& getEdgeData(const EdgeIterator &edgeItr) const { - return getEdgeEntry(edgeItr).getEdgeData(); + return PGraph::getEdgeEntry(edgeItr).getEdgeData(); } SimpleGraph toSimpleGraph() const { From bob.wilson at apple.com Thu Dec 17 17:51:53 2009 From: bob.wilson at apple.com (Bob Wilson) Date: Thu, 17 Dec 2009 15:51:53 -0800 Subject: [llvm-commits] [llvm] r91459 - in /llvm/trunk: lib/Transforms/Scalar/ScalarReplAggregates.cpp test/Transforms/ScalarRepl/2009-12-11-NeonTypes.ll In-Reply-To: <4607AAC4-5E76-4502-88E2-FE586BAFB296@apple.com> References: <200912152200.nBFM0qqN023245@zion.cs.uiuc.edu> <4607AAC4-5E76-4502-88E2-FE586BAFB296@apple.com> Message-ID: <54082996-0500-4418-8FD7-C278DE6BBAC8@apple.com> On Dec 17, 2009, at 2:22 PM, Chris Lattner wrote: > > On Dec 15, 2009, at 2:00 PM, Bob Wilson wrote: > >> Author: bwilson >> Date: Tue Dec 15 16:00:51 2009 >> New Revision: 91459 >> >> URL: http://llvm.org/viewvc/llvm-project?rev=91459&view=rev >> Log: >> Reapply 91184 with fixes and an addition to the testcase to cover the problem >> found last time. Instead of trying to modify the IR while iterating over it, >> I've change it to keep a list of WeakVH references to dead instructions, and >> then delete those instructions later. I also added some special case code to >> detect and handle the situation when both operands of a memcpy intrinsic are >> referencing the same alloca. > > This is a pretty big hammer. Can you explain in what circumstance this happens? The only case I can think of where this would occur is when one instruction uses two pointers derived from the same alloca. This can only really happen for phis, calls, and yes, memset/cpy/move. > > However, the only time that memset/cpy/move could do this is when they are a noop copy from the start of the alloca to the end of the alloca. Is this the only case that you're interested in here? If so, there are probably lighter weight ways to solve this problem. The immediate problem that prompted this change was indeed the no-op memcpy. But, while investigating it, I realized that I hadn't thought at all about the potential perils of modifying the instructions while iterating on their use lists. Apparently I didn't think hard enough because the x86_64 self-host buildbot just hit a similar problem.... I assume the big hammer you're referring to is the use of the WeakVH references? I introduced that with one of my earlier attempts at solving this problem, and I don't think it's even necessary anymore. I will try to replace it. The code for handling the no-op memcpy is actually very lightweight: In RewriteMemIntrinsicUserOfAlloca, I changed the code to remove bitcasts and all-zero GEPs from "OtherPtr" so that it iterates instead of just removing one bitcast and one GEP, and then after that loop, I added this check: // If OtherPtr has already been rewritten, this intrinsic will be dead. if (OtherPtr == NewElts[0]) return; From scallanan at apple.com Thu Dec 17 18:01:26 2009 From: scallanan at apple.com (Sean Callanan) Date: Fri, 18 Dec 2009 00:01:26 -0000 Subject: [llvm-commits] [llvm] r91638 - in /llvm/trunk: lib/Target/X86/ test/CodeGen/X86/ Message-ID: <200912180001.nBI01TbZ010821@zion.cs.uiuc.edu> Author: spyffe Date: Thu Dec 17 18:01:26 2009 New Revision: 91638 URL: http://llvm.org/viewvc/llvm-project?rev=91638&view=rev Log: Instruction fixes, added instructions, and AsmString changes in the X86 instruction tables. Also (while I was at it) cleaned up the X86 tables, removing tabs and 80-line violations. This patch was reviewed by Chris Lattner, but please let me know if there are any problems. * X86*.td Removed tabs and fixed 80-line violations * X86Instr64bit.td (IRET, POPCNT, BT_, LSL, SWPGS, PUSH_S, POP_S, L_S, SMSW) Added (CALL, CMOV) Added qualifiers (JMP) Added PC-relative jump instruction (POPFQ/PUSHFQ) Added qualifiers; renamed PUSHFQ to indicate that it is 64-bit only (ambiguous since it has no REX prefix) (MOV) Added rr form going the other way, which is encoded differently (MOV) Changed immediates to offsets, which is more correct; also fixed MOV64o64a to have to a 64-bit offset (MOV) Fixed qualifiers (MOV) Added debug-register and condition-register moves (MOVZX) Added more forms (ADC, SUB, SBB, AND, OR, XOR) Added reverse forms, which (as with MOV) are encoded differently (ROL) Made REX.W required (BT) Uncommented mr form for disassembly only (CVT__2__) Added several missing non-intrinsic forms (LXADD, XCHG) Reordered operands to make more sense for MRMSrcMem (XCHG) Added register-to-register forms (XADD, CMPXCHG, XCHG) Added non-locked forms * X86InstrSSE.td (CVTSS2SI, COMISS, CVTTPS2DQ, CVTPS2PD, CVTPD2PS, MOVQ) Added * X86InstrFPStack.td (COM_FST0, COMP_FST0, COM_FI, COM_FIP, FFREE, FNCLEX, FNOP, FXAM, FLDL2T, FLDL2E, FLDPI, FLDLG2, FLDLN2, F2XM1, FYL2X, FPTAN, FPATAN, FXTRACT, FPREM1, FDECSTP, FINCSTP, FPREM, FYL2XP1, FSINCOS, FRNDINT, FSCALE, FCOMPP, FXSAVE, FXRSTOR) Added (FCOM, FCOMP) Added qualifiers (FSTENV, FSAVE, FSTSW) Fixed opcode names (FNSTSW) Added implicit register operand * X86InstrInfo.td (opaque512mem) Added for FXSAVE/FXRSTOR (offset8, offset16, offset32, offset64) Added for MOV (NOOPW, IRET, POPCNT, IN, BTC, BTR, BTS, LSL, INVLPG, STR, LTR, PUSHFS, PUSHGS, POPFS, POPGS, LDS, LSS, LES, LFS, LGS, VERR, VERW, SGDT, SIDT, SLDT, LGDT, LIDT, LLDT, LODSD, OUTSB, OUTSW, OUTSD, HLT, RSM, FNINIT, CLC, STC, CLI, STI, CLD, STD, CMC, CLTS, XLAT, WRMSR, RDMSR, RDPMC, SMSW, LMSW, CPUID, INVD, WBINVD, INVEPT, INVVPID, VMCALL, VMCLEAR, VMLAUNCH, VMRESUME, VMPTRLD, VMPTRST, VMREAD, VMWRITE, VMXOFF, VMXON) Added (NOOPL, POPF, POPFD, PUSHF, PUSHFD) Added qualifier (JO, JNO, JB, JAE, JE, JNE, JBE, JA, JS, JNS, JP, JNP, JL, JGE, JLE, JG, JCXZ) Added 32-bit forms (MOV) Changed some immediate forms to offset forms (MOV) Added reversed reg-reg forms, which are encoded differently (MOV) Added debug-register and condition-register moves (CMOV) Added qualifiers (AND, OR, XOR, ADC, SUB, SBB) Added reverse forms, like MOV (BT) Uncommented memory-register forms for disassembler (MOVSX, MOVZX) Added forms (XCHG, LXADD) Made operand order make sense for MRMSrcMem (XCHG) Added register-register forms (XADD, CMPXCHG) Added unlocked forms * X86InstrMMX.td (MMX_MOVD, MMV_MOVQ) Added forms * X86InstrInfo.cpp: Changed PUSHFQ to PUSHFQ64 to reflect table change * X86RegisterInfo.td: Added debug and condition register sets * x86-64-pic-3.ll: Fixed testcase to reflect call qualifier * peep-test-3.ll: Fixed testcase to reflect test qualifier * cmov.ll: Fixed testcase to reflect cmov qualifier * loop-blocks.ll: Fixed testcase to reflect call qualifier * x86-64-pic-11.ll: Fixed testcase to reflect call qualifier * 2009-11-04-SubregCoalescingBug.ll: Fixed testcase to reflect call qualifier * x86-64-pic-2.ll: Fixed testcase to reflect call qualifier * live-out-reg-info.ll: Fixed testcase to reflect test qualifier * tail-opts.ll: Fixed testcase to reflect call qualifiers * x86-64-pic-10.ll: Fixed testcase to reflect call qualifier * bss-pagealigned.ll: Fixed testcase to reflect call qualifier * x86-64-pic-1.ll: Fixed testcase to reflect call qualifier * widen_load-1.ll: Fixed testcase to reflect call qualifier Modified: llvm/trunk/lib/Target/X86/X86.td llvm/trunk/lib/Target/X86/X86Instr64bit.td llvm/trunk/lib/Target/X86/X86InstrFPStack.td llvm/trunk/lib/Target/X86/X86InstrFormats.td llvm/trunk/lib/Target/X86/X86InstrInfo.cpp llvm/trunk/lib/Target/X86/X86InstrInfo.td llvm/trunk/lib/Target/X86/X86InstrMMX.td llvm/trunk/lib/Target/X86/X86InstrSSE.td llvm/trunk/lib/Target/X86/X86RegisterInfo.td llvm/trunk/test/CodeGen/X86/2009-11-04-SubregCoalescingBug.ll llvm/trunk/test/CodeGen/X86/abi-isel.ll llvm/trunk/test/CodeGen/X86/bss_pagealigned.ll llvm/trunk/test/CodeGen/X86/cmov.ll llvm/trunk/test/CodeGen/X86/live-out-reg-info.ll llvm/trunk/test/CodeGen/X86/loop-blocks.ll llvm/trunk/test/CodeGen/X86/peep-test-3.ll llvm/trunk/test/CodeGen/X86/select-aggregate.ll llvm/trunk/test/CodeGen/X86/tail-opts.ll llvm/trunk/test/CodeGen/X86/widen_load-1.ll llvm/trunk/test/CodeGen/X86/x86-64-pic-1.ll llvm/trunk/test/CodeGen/X86/x86-64-pic-10.ll llvm/trunk/test/CodeGen/X86/x86-64-pic-11.ll llvm/trunk/test/CodeGen/X86/x86-64-pic-2.ll llvm/trunk/test/CodeGen/X86/x86-64-pic-3.ll Modified: llvm/trunk/lib/Target/X86/X86.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86.td?rev=91638&r1=91637&r2=91638&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86.td (original) +++ llvm/trunk/lib/Target/X86/X86.td Thu Dec 17 18:01:26 2009 @@ -63,7 +63,7 @@ def FeatureAVX : SubtargetFeature<"avx", "HasAVX", "true", "Enable AVX instructions">; def FeatureFMA3 : SubtargetFeature<"fma3", "HasFMA3", "true", - "Enable three-operand fused multiple-add">; + "Enable three-operand fused multiple-add">; def FeatureFMA4 : SubtargetFeature<"fma4", "HasFMA4", "true", "Enable four-operand fused multiple-add">; Modified: llvm/trunk/lib/Target/X86/X86Instr64bit.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86Instr64bit.td?rev=91638&r1=91637&r2=91638&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86Instr64bit.td (original) +++ llvm/trunk/lib/Target/X86/X86Instr64bit.td Thu Dec 17 18:01:26 2009 @@ -111,6 +111,9 @@ Requires<[In64BitMode]>; } +// Interrupt Instructions +def IRET64 : RI<0xcf, RawFrm, (outs), (ins), "iret{q}", []>; + //===----------------------------------------------------------------------===// // Call Instructions... // @@ -131,20 +134,21 @@ // the 32-bit pcrel field that we have. def CALL64pcrel32 : Ii32<0xE8, RawFrm, (outs), (ins i64i32imm_pcrel:$dst, variable_ops), - "call\t$dst", []>, + "call{q}\t$dst", []>, Requires<[In64BitMode, NotWin64]>; def CALL64r : I<0xFF, MRM2r, (outs), (ins GR64:$dst, variable_ops), - "call\t{*}$dst", [(X86call GR64:$dst)]>, + "call{q}\t{*}$dst", [(X86call GR64:$dst)]>, Requires<[NotWin64]>; def CALL64m : I<0xFF, MRM2m, (outs), (ins i64mem:$dst, variable_ops), - "call\t{*}$dst", [(X86call (loadi64 addr:$dst))]>, + "call{q}\t{*}$dst", [(X86call (loadi64 addr:$dst))]>, Requires<[NotWin64]>; def FARCALL64 : RI<0xFF, MRM3m, (outs), (ins opaque80mem:$dst), "lcall{q}\t{*}$dst", []>; } - // FIXME: We need to teach codegen about single list of call-clobbered registers. + // FIXME: We need to teach codegen about single list of call-clobbered + // registers. let isCall = 1 in // All calls clobber the non-callee saved registers. RSP is marked as // a use to prevent stack-pointer assignments that appear immediately @@ -162,9 +166,10 @@ def WINCALL64r : I<0xFF, MRM2r, (outs), (ins GR64:$dst, variable_ops), "call\t{*}$dst", [(X86call GR64:$dst)]>, Requires<[IsWin64]>; - def WINCALL64m : I<0xFF, MRM2m, (outs), (ins i64mem:$dst, variable_ops), - "call\t{*}$dst", - [(X86call (loadi64 addr:$dst))]>, Requires<[IsWin64]>; + def WINCALL64m : I<0xFF, MRM2m, (outs), + (ins i64mem:$dst, variable_ops), "call\t{*}$dst", + [(X86call (loadi64 addr:$dst))]>, + Requires<[IsWin64]>; } @@ -188,6 +193,8 @@ // Branches let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in { + def JMP64pcrel32 : I<0xE9, RawFrm, (outs), (ins brtarget:$dst), + "jmp{q}\t$dst", []>; def JMP64r : I<0xFF, MRM4r, (outs), (ins GR64:$dst), "jmp{q}\t{*}$dst", [(brind GR64:$dst)]>; def JMP64m : I<0xFF, MRM4m, (outs), (ins i64mem:$dst), "jmp{q}\t{*}$dst", @@ -210,6 +217,12 @@ //===----------------------------------------------------------------------===// // Miscellaneous Instructions... // + +def POPCNT64rr : RI<0xB8, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src), + "popcnt{q}\t{$src, $dst|$dst, $src}", []>, XS; +def POPCNT64rm : RI<0xB8, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), + "popcnt{q}\t{$src, $dst|$dst, $src}", []>, XS; + let Defs = [RBP,RSP], Uses = [RBP,RSP], mayLoad = 1, neverHasSideEffects = 1 in def LEAVE64 : I<0xC9, RawFrm, (outs), (ins), "leave", []>; @@ -238,9 +251,9 @@ } let Defs = [RSP, EFLAGS], Uses = [RSP], mayLoad = 1 in -def POPFQ : I<0x9D, RawFrm, (outs), (ins), "popf", []>, REX_W; +def POPFQ : I<0x9D, RawFrm, (outs), (ins), "popf{q}", []>, REX_W; let Defs = [RSP], Uses = [RSP, EFLAGS], mayStore = 1 in -def PUSHFQ : I<0x9C, RawFrm, (outs), (ins), "pushf", []>; +def PUSHFQ64 : I<0x9C, RawFrm, (outs), (ins), "pushf{q}", []>; def LEA64_32r : I<0x8D, MRMSrcMem, (outs GR32:$dst), (ins lea64_32mem:$src), @@ -309,6 +322,9 @@ [(set GR64:$dst, i64immSExt32:$src)]>; } +def MOV64rr_REV : RI<0x8B, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src), + "mov{q}\t{$src, $dst|$dst, $src}", []>; + let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in def MOV64rm : RI<0x8B, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), "mov{q}\t{$src, $dst|$dst, $src}", @@ -321,24 +337,36 @@ "mov{q}\t{$src, $dst|$dst, $src}", [(store i64immSExt32:$src, addr:$dst)]>; -def MOV64o8a : RIi8<0xA0, RawFrm, (outs), (ins i8imm:$src), +def MOV64o8a : RIi8<0xA0, RawFrm, (outs), (ins offset8:$src), "mov{q}\t{$src, %rax|%rax, $src}", []>; -def MOV64o32a : RIi32<0xA1, RawFrm, (outs), (ins i32imm:$src), +def MOV64o64a : RIi32<0xA1, RawFrm, (outs), (ins offset64:$src), "mov{q}\t{$src, %rax|%rax, $src}", []>; -def MOV64ao8 : RIi8<0xA2, RawFrm, (outs i8imm:$dst), (ins), +def MOV64ao8 : RIi8<0xA2, RawFrm, (outs offset8:$dst), (ins), "mov{q}\t{%rax, $dst|$dst, %rax}", []>; -def MOV64ao32 : RIi32<0xA3, RawFrm, (outs i32imm:$dst), (ins), +def MOV64ao64 : RIi32<0xA3, RawFrm, (outs offset64:$dst), (ins), "mov{q}\t{%rax, $dst|$dst, %rax}", []>; // Moves to and from segment registers def MOV64rs : RI<0x8C, MRMDestReg, (outs GR64:$dst), (ins SEGMENT_REG:$src), - "mov{w}\t{$src, $dst|$dst, $src}", []>; + "mov{q}\t{$src, $dst|$dst, $src}", []>; def MOV64ms : RI<0x8C, MRMDestMem, (outs i64mem:$dst), (ins SEGMENT_REG:$src), - "mov{w}\t{$src, $dst|$dst, $src}", []>; + "mov{q}\t{$src, $dst|$dst, $src}", []>; def MOV64sr : RI<0x8E, MRMSrcReg, (outs SEGMENT_REG:$dst), (ins GR64:$src), - "mov{w}\t{$src, $dst|$dst, $src}", []>; + "mov{q}\t{$src, $dst|$dst, $src}", []>; def MOV64sm : RI<0x8E, MRMSrcMem, (outs SEGMENT_REG:$dst), (ins i64mem:$src), - "mov{w}\t{$src, $dst|$dst, $src}", []>; + "mov{q}\t{$src, $dst|$dst, $src}", []>; + +// Moves to and from debug registers +def MOV64rd : I<0x21, MRMDestReg, (outs GR64:$dst), (ins DEBUG_REG:$src), + "mov{q}\t{$src, $dst|$dst, $src}", []>, TB; +def MOV64dr : I<0x23, MRMSrcReg, (outs DEBUG_REG:$dst), (ins GR64:$src), + "mov{q}\t{$src, $dst|$dst, $src}", []>, TB; + +// Moves to and from control registers +def MOV64rc : I<0x20, MRMDestReg, (outs GR64:$dst), (ins CONTROL_REG_64:$src), + "mov{q}\t{$src, $dst|$dst, $src}", []>, TB; +def MOV64cr : I<0x22, MRMSrcReg, (outs CONTROL_REG_64:$dst), (ins GR64:$src), + "mov{q}\t{$src, $dst|$dst, $src}", []>, TB; // Sign/Zero extenders @@ -365,6 +393,16 @@ "movs{lq|xd}\t{$src, $dst|$dst, $src}", [(set GR64:$dst, (sextloadi64i32 addr:$src))]>; +// movzbq and movzwq encodings for the disassembler +def MOVZX64rr8_Q : RI<0xB6, MRMSrcReg, (outs GR64:$dst), (ins GR8:$src), + "movz{bq|x}\t{$src, $dst|$dst, $src}", []>, TB; +def MOVZX64rm8_Q : RI<0xB6, MRMSrcMem, (outs GR64:$dst), (ins i8mem:$src), + "movz{bq|x}\t{$src, $dst|$dst, $src}", []>, TB; +def MOVZX64rr16_Q : RI<0xB7, MRMSrcReg, (outs GR64:$dst), (ins GR16:$src), + "movz{wq|x}\t{$src, $dst|$dst, $src}", []>, TB; +def MOVZX64rm16_Q : RI<0xB7, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src), + "movz{wq|x}\t{$src, $dst|$dst, $src}", []>, TB; + // Use movzbl instead of movzbq when the destination is a register; it's // equivalent due to implicit zero-extending, and it has a smaller encoding. def MOVZX64rr8 : I<0xB6, MRMSrcReg, (outs GR64:$dst), (ins GR8 :$src), @@ -430,31 +468,36 @@ let isConvertibleToThreeAddress = 1 in { let isCommutable = 1 in // Register-Register Addition -def ADD64rr : RI<0x01, MRMDestReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), +def ADD64rr : RI<0x01, MRMDestReg, (outs GR64:$dst), + (ins GR64:$src1, GR64:$src2), "add{q}\t{$src2, $dst|$dst, $src2}", [(set GR64:$dst, (add GR64:$src1, GR64:$src2)), (implicit EFLAGS)]>; // Register-Integer Addition -def ADD64ri8 : RIi8<0x83, MRM0r, (outs GR64:$dst), (ins GR64:$src1, i64i8imm:$src2), +def ADD64ri8 : RIi8<0x83, MRM0r, (outs GR64:$dst), + (ins GR64:$src1, i64i8imm:$src2), "add{q}\t{$src2, $dst|$dst, $src2}", [(set GR64:$dst, (add GR64:$src1, i64immSExt8:$src2)), (implicit EFLAGS)]>; -def ADD64ri32 : RIi32<0x81, MRM0r, (outs GR64:$dst), (ins GR64:$src1, i64i32imm:$src2), +def ADD64ri32 : RIi32<0x81, MRM0r, (outs GR64:$dst), + (ins GR64:$src1, i64i32imm:$src2), "add{q}\t{$src2, $dst|$dst, $src2}", [(set GR64:$dst, (add GR64:$src1, i64immSExt32:$src2)), (implicit EFLAGS)]>; } // isConvertibleToThreeAddress // Register-Memory Addition -def ADD64rm : RI<0x03, MRMSrcMem, (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2), +def ADD64rm : RI<0x03, MRMSrcMem, (outs GR64:$dst), + (ins GR64:$src1, i64mem:$src2), "add{q}\t{$src2, $dst|$dst, $src2}", [(set GR64:$dst, (add GR64:$src1, (load addr:$src2))), (implicit EFLAGS)]>; // Register-Register Addition - Equivalent to the normal rr form (ADD64rr), but // differently encoded. -def ADD64mrmrr : RI<0x03, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), +def ADD64mrmrr : RI<0x03, MRMSrcReg, (outs GR64:$dst), + (ins GR64:$src1, GR64:$src2), "add{l}\t{$src2, $dst|$dst, $src2}", []>; } // isTwoAddress @@ -480,18 +523,26 @@ let isTwoAddress = 1 in { let isCommutable = 1 in -def ADC64rr : RI<0x11, MRMDestReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), +def ADC64rr : RI<0x11, MRMDestReg, (outs GR64:$dst), + (ins GR64:$src1, GR64:$src2), "adc{q}\t{$src2, $dst|$dst, $src2}", [(set GR64:$dst, (adde GR64:$src1, GR64:$src2))]>; -def ADC64rm : RI<0x13, MRMSrcMem , (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2), +def ADC64rr_REV : RI<0x13, MRMSrcReg , (outs GR32:$dst), + (ins GR64:$src1, GR64:$src2), + "adc{q}\t{$src2, $dst|$dst, $src2}", []>; + +def ADC64rm : RI<0x13, MRMSrcMem , (outs GR64:$dst), + (ins GR64:$src1, i64mem:$src2), "adc{q}\t{$src2, $dst|$dst, $src2}", [(set GR64:$dst, (adde GR64:$src1, (load addr:$src2)))]>; -def ADC64ri8 : RIi8<0x83, MRM2r, (outs GR64:$dst), (ins GR64:$src1, i64i8imm:$src2), +def ADC64ri8 : RIi8<0x83, MRM2r, (outs GR64:$dst), + (ins GR64:$src1, i64i8imm:$src2), "adc{q}\t{$src2, $dst|$dst, $src2}", [(set GR64:$dst, (adde GR64:$src1, i64immSExt8:$src2))]>; -def ADC64ri32 : RIi32<0x81, MRM2r, (outs GR64:$dst), (ins GR64:$src1, i64i32imm:$src2), +def ADC64ri32 : RIi32<0x81, MRM2r, (outs GR64:$dst), + (ins GR64:$src1, i64i32imm:$src2), "adc{q}\t{$src2, $dst|$dst, $src2}", [(set GR64:$dst, (adde GR64:$src1, i64immSExt32:$src2))]>; } // isTwoAddress @@ -501,21 +552,29 @@ [(store (adde (load addr:$dst), GR64:$src2), addr:$dst)]>; def ADC64mi8 : RIi8<0x83, MRM2m, (outs), (ins i64mem:$dst, i64i8imm :$src2), "adc{q}\t{$src2, $dst|$dst, $src2}", - [(store (adde (load addr:$dst), i64immSExt8:$src2), addr:$dst)]>; + [(store (adde (load addr:$dst), i64immSExt8:$src2), + addr:$dst)]>; def ADC64mi32 : RIi32<0x81, MRM2m, (outs), (ins i64mem:$dst, i64i32imm:$src2), "adc{q}\t{$src2, $dst|$dst, $src2}", - [(store (adde (load addr:$dst), i64immSExt8:$src2), addr:$dst)]>; + [(store (adde (load addr:$dst), i64immSExt8:$src2), + addr:$dst)]>; } // Uses = [EFLAGS] let isTwoAddress = 1 in { // Register-Register Subtraction -def SUB64rr : RI<0x29, MRMDestReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), +def SUB64rr : RI<0x29, MRMDestReg, (outs GR64:$dst), + (ins GR64:$src1, GR64:$src2), "sub{q}\t{$src2, $dst|$dst, $src2}", [(set GR64:$dst, (sub GR64:$src1, GR64:$src2)), (implicit EFLAGS)]>; +def SUB64rr_REV : RI<0x2B, MRMSrcReg, (outs GR64:$dst), + (ins GR64:$src1, GR64:$src2), + "sub{q}\t{$src2, $dst|$dst, $src2}", []>; + // Register-Memory Subtraction -def SUB64rm : RI<0x2B, MRMSrcMem, (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2), +def SUB64rm : RI<0x2B, MRMSrcMem, (outs GR64:$dst), + (ins GR64:$src1, i64mem:$src2), "sub{q}\t{$src2, $dst|$dst, $src2}", [(set GR64:$dst, (sub GR64:$src1, (load addr:$src2))), (implicit EFLAGS)]>; @@ -556,18 +615,26 @@ let Uses = [EFLAGS] in { let isTwoAddress = 1 in { -def SBB64rr : RI<0x19, MRMDestReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), +def SBB64rr : RI<0x19, MRMDestReg, (outs GR64:$dst), + (ins GR64:$src1, GR64:$src2), "sbb{q}\t{$src2, $dst|$dst, $src2}", [(set GR64:$dst, (sube GR64:$src1, GR64:$src2))]>; -def SBB64rm : RI<0x1B, MRMSrcMem, (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2), +def SBB64rr_REV : RI<0x1B, MRMSrcReg, (outs GR64:$dst), + (ins GR64:$src1, GR64:$src2), + "sbb{q}\t{$src2, $dst|$dst, $src2}", []>; + +def SBB64rm : RI<0x1B, MRMSrcMem, (outs GR64:$dst), + (ins GR64:$src1, i64mem:$src2), "sbb{q}\t{$src2, $dst|$dst, $src2}", [(set GR64:$dst, (sube GR64:$src1, (load addr:$src2)))]>; -def SBB64ri8 : RIi8<0x83, MRM3r, (outs GR64:$dst), (ins GR64:$src1, i64i8imm:$src2), +def SBB64ri8 : RIi8<0x83, MRM3r, (outs GR64:$dst), + (ins GR64:$src1, i64i8imm:$src2), "sbb{q}\t{$src2, $dst|$dst, $src2}", [(set GR64:$dst, (sube GR64:$src1, i64immSExt8:$src2))]>; -def SBB64ri32 : RIi32<0x81, MRM3r, (outs GR64:$dst), (ins GR64:$src1, i64i32imm:$src2), +def SBB64ri32 : RIi32<0x81, MRM3r, (outs GR64:$dst), + (ins GR64:$src1, i64i32imm:$src2), "sbb{q}\t{$src2, $dst|$dst, $src2}", [(set GR64:$dst, (sube GR64:$src1, i64immSExt32:$src2))]>; } // isTwoAddress @@ -652,15 +719,19 @@ // Unsigned division / remainder let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in { -def DIV64r : RI<0xF7, MRM6r, (outs), (ins GR64:$src), // RDX:RAX/r64 = RAX,RDX +// RDX:RAX/r64 = RAX,RDX +def DIV64r : RI<0xF7, MRM6r, (outs), (ins GR64:$src), "div{q}\t$src", []>; // Signed division / remainder -def IDIV64r: RI<0xF7, MRM7r, (outs), (ins GR64:$src), // RDX:RAX/r64 = RAX,RDX +// RDX:RAX/r64 = RAX,RDX +def IDIV64r: RI<0xF7, MRM7r, (outs), (ins GR64:$src), "idiv{q}\t$src", []>; let mayLoad = 1 in { -def DIV64m : RI<0xF7, MRM6m, (outs), (ins i64mem:$src), // RDX:RAX/[mem64] = RAX,RDX +// RDX:RAX/[mem64] = RAX,RDX +def DIV64m : RI<0xF7, MRM6m, (outs), (ins i64mem:$src), "div{q}\t$src", []>; -def IDIV64m: RI<0xF7, MRM7m, (outs), (ins i64mem:$src), // RDX:RAX/[mem64] = RAX,RDX +// RDX:RAX/[mem64] = RAX,RDX +def IDIV64m: RI<0xF7, MRM7m, (outs), (ins i64mem:$src), "idiv{q}\t$src", []>; } } @@ -694,19 +765,23 @@ // In 64-bit mode, single byte INC and DEC cannot be encoded. let isTwoAddress = 1, isConvertibleToThreeAddress = 1 in { // Can transform into LEA. -def INC64_16r : I<0xFF, MRM0r, (outs GR16:$dst), (ins GR16:$src), "inc{w}\t$dst", +def INC64_16r : I<0xFF, MRM0r, (outs GR16:$dst), (ins GR16:$src), + "inc{w}\t$dst", [(set GR16:$dst, (add GR16:$src, 1)), (implicit EFLAGS)]>, OpSize, Requires<[In64BitMode]>; -def INC64_32r : I<0xFF, MRM0r, (outs GR32:$dst), (ins GR32:$src), "inc{l}\t$dst", +def INC64_32r : I<0xFF, MRM0r, (outs GR32:$dst), (ins GR32:$src), + "inc{l}\t$dst", [(set GR32:$dst, (add GR32:$src, 1)), (implicit EFLAGS)]>, Requires<[In64BitMode]>; -def DEC64_16r : I<0xFF, MRM1r, (outs GR16:$dst), (ins GR16:$src), "dec{w}\t$dst", +def DEC64_16r : I<0xFF, MRM1r, (outs GR16:$dst), (ins GR16:$src), + "dec{w}\t$dst", [(set GR16:$dst, (add GR16:$src, -1)), (implicit EFLAGS)]>, OpSize, Requires<[In64BitMode]>; -def DEC64_32r : I<0xFF, MRM1r, (outs GR32:$dst), (ins GR32:$src), "dec{l}\t$dst", +def DEC64_32r : I<0xFF, MRM1r, (outs GR32:$dst), (ins GR32:$src), + "dec{l}\t$dst", [(set GR32:$dst, (add GR32:$src, -1)), (implicit EFLAGS)]>, Requires<[In64BitMode]>; @@ -743,13 +818,14 @@ "shl{q}\t{%cl, $dst|$dst, %CL}", [(set GR64:$dst, (shl GR64:$src, CL))]>; let isConvertibleToThreeAddress = 1 in // Can transform into LEA. -def SHL64ri : RIi8<0xC1, MRM4r, (outs GR64:$dst), (ins GR64:$src1, i8imm:$src2), +def SHL64ri : RIi8<0xC1, MRM4r, (outs GR64:$dst), + (ins GR64:$src1, i8imm:$src2), "shl{q}\t{$src2, $dst|$dst, $src2}", [(set GR64:$dst, (shl GR64:$src1, (i8 imm:$src2)))]>; // NOTE: We don't include patterns for shifts of a register by one, because // 'add reg,reg' is cheaper. def SHL64r1 : RI<0xD1, MRM4r, (outs GR64:$dst), (ins GR64:$src1), - "shr{q}\t$dst", []>; + "shl{q}\t$dst", []>; } // isTwoAddress let Uses = [CL] in @@ -792,9 +868,10 @@ def SAR64rCL : RI<0xD3, MRM7r, (outs GR64:$dst), (ins GR64:$src), "sar{q}\t{%cl, $dst|$dst, %CL}", [(set GR64:$dst, (sra GR64:$src, CL))]>; -def SAR64ri : RIi8<0xC1, MRM7r, (outs GR64:$dst), (ins GR64:$src1, i8imm:$src2), - "sar{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, (sra GR64:$src1, (i8 imm:$src2)))]>; +def SAR64ri : RIi8<0xC1, MRM7r, (outs GR64:$dst), + (ins GR64:$src1, i8imm:$src2), + "sar{q}\t{$src2, $dst|$dst, $src2}", + [(set GR64:$dst, (sra GR64:$src1, (i8 imm:$src2)))]>; def SAR64r1 : RI<0xD1, MRM7r, (outs GR64:$dst), (ins GR64:$src1), "sar{q}\t$dst", [(set GR64:$dst, (sra GR64:$src1, (i8 1)))]>; @@ -826,7 +903,8 @@ } def RCL64ri : RIi8<0xC1, MRM2r, (outs GR64:$dst), (ins GR64:$src, i8imm:$cnt), "rcl{q}\t{$cnt, $dst|$dst, $cnt}", []>; -def RCL64mi : RIi8<0xC1, MRM2m, (outs i64mem:$dst), (ins i64mem:$src, i8imm:$cnt), +def RCL64mi : RIi8<0xC1, MRM2m, (outs i64mem:$dst), + (ins i64mem:$src, i8imm:$cnt), "rcl{q}\t{$cnt, $dst|$dst, $cnt}", []>; def RCR64r1 : RI<0xD1, MRM3r, (outs GR64:$dst), (ins GR64:$src), @@ -841,7 +919,8 @@ } def RCR64ri : RIi8<0xC1, MRM3r, (outs GR64:$dst), (ins GR64:$src, i8imm:$cnt), "rcr{q}\t{$cnt, $dst|$dst, $cnt}", []>; -def RCR64mi : RIi8<0xC1, MRM3m, (outs i64mem:$dst), (ins i64mem:$src, i8imm:$cnt), +def RCR64mi : RIi8<0xC1, MRM3m, (outs i64mem:$dst), + (ins i64mem:$src, i8imm:$cnt), "rcr{q}\t{$cnt, $dst|$dst, $cnt}", []>; } @@ -850,7 +929,8 @@ def ROL64rCL : RI<0xD3, MRM0r, (outs GR64:$dst), (ins GR64:$src), "rol{q}\t{%cl, $dst|$dst, %CL}", [(set GR64:$dst, (rotl GR64:$src, CL))]>; -def ROL64ri : RIi8<0xC1, MRM0r, (outs GR64:$dst), (ins GR64:$src1, i8imm:$src2), +def ROL64ri : RIi8<0xC1, MRM0r, (outs GR64:$dst), + (ins GR64:$src1, i8imm:$src2), "rol{q}\t{$src2, $dst|$dst, $src2}", [(set GR64:$dst, (rotl GR64:$src1, (i8 imm:$src2)))]>; def ROL64r1 : RI<0xD1, MRM0r, (outs GR64:$dst), (ins GR64:$src1), @@ -859,9 +939,9 @@ } // isTwoAddress let Uses = [CL] in -def ROL64mCL : I<0xD3, MRM0m, (outs), (ins i64mem:$dst), - "rol{q}\t{%cl, $dst|$dst, %CL}", - [(store (rotl (loadi64 addr:$dst), CL), addr:$dst)]>; +def ROL64mCL : RI<0xD3, MRM0m, (outs), (ins i64mem:$dst), + "rol{q}\t{%cl, $dst|$dst, %CL}", + [(store (rotl (loadi64 addr:$dst), CL), addr:$dst)]>; def ROL64mi : RIi8<0xC1, MRM0m, (outs), (ins i64mem:$dst, i8imm:$src), "rol{q}\t{$src, $dst|$dst, $src}", [(store (rotl (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)]>; @@ -874,7 +954,8 @@ def ROR64rCL : RI<0xD3, MRM1r, (outs GR64:$dst), (ins GR64:$src), "ror{q}\t{%cl, $dst|$dst, %CL}", [(set GR64:$dst, (rotr GR64:$src, CL))]>; -def ROR64ri : RIi8<0xC1, MRM1r, (outs GR64:$dst), (ins GR64:$src1, i8imm:$src2), +def ROR64ri : RIi8<0xC1, MRM1r, (outs GR64:$dst), + (ins GR64:$src1, i8imm:$src2), "ror{q}\t{$src2, $dst|$dst, $src2}", [(set GR64:$dst, (rotr GR64:$src1, (i8 imm:$src2)))]>; def ROR64r1 : RI<0xD1, MRM1r, (outs GR64:$dst), (ins GR64:$src1), @@ -896,23 +977,29 @@ // Double shift instructions (generalizations of rotate) let isTwoAddress = 1 in { let Uses = [CL] in { -def SHLD64rrCL : RI<0xA5, MRMDestReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), +def SHLD64rrCL : RI<0xA5, MRMDestReg, (outs GR64:$dst), + (ins GR64:$src1, GR64:$src2), "shld{q}\t{%cl, $src2, $dst|$dst, $src2, %CL}", - [(set GR64:$dst, (X86shld GR64:$src1, GR64:$src2, CL))]>, TB; -def SHRD64rrCL : RI<0xAD, MRMDestReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), + [(set GR64:$dst, (X86shld GR64:$src1, GR64:$src2, CL))]>, + TB; +def SHRD64rrCL : RI<0xAD, MRMDestReg, (outs GR64:$dst), + (ins GR64:$src1, GR64:$src2), "shrd{q}\t{%cl, $src2, $dst|$dst, $src2, %CL}", - [(set GR64:$dst, (X86shrd GR64:$src1, GR64:$src2, CL))]>, TB; + [(set GR64:$dst, (X86shrd GR64:$src1, GR64:$src2, CL))]>, + TB; } let isCommutable = 1 in { // FIXME: Update X86InstrInfo::commuteInstruction def SHLD64rri8 : RIi8<0xA4, MRMDestReg, - (outs GR64:$dst), (ins GR64:$src1, GR64:$src2, i8imm:$src3), + (outs GR64:$dst), + (ins GR64:$src1, GR64:$src2, i8imm:$src3), "shld{q}\t{$src3, $src2, $dst|$dst, $src2, $src3}", [(set GR64:$dst, (X86shld GR64:$src1, GR64:$src2, (i8 imm:$src3)))]>, TB; def SHRD64rri8 : RIi8<0xAC, MRMDestReg, - (outs GR64:$dst), (ins GR64:$src1, GR64:$src2, i8imm:$src3), + (outs GR64:$dst), + (ins GR64:$src1, GR64:$src2, i8imm:$src3), "shrd{q}\t{$src3, $src2, $dst|$dst, $src2, $src3}", [(set GR64:$dst, (X86shrd GR64:$src1, GR64:$src2, (i8 imm:$src3)))]>, @@ -965,6 +1052,9 @@ "and{q}\t{$src2, $dst|$dst, $src2}", [(set GR64:$dst, (and GR64:$src1, GR64:$src2)), (implicit EFLAGS)]>; +def AND64rr_REV : RI<0x23, MRMSrcReg, (outs GR64:$dst), + (ins GR64:$src1, GR64:$src2), + "and{q}\t{$src2, $dst|$dst, $src2}", []>; def AND64rm : RI<0x23, MRMSrcMem, (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2), "and{q}\t{$src2, $dst|$dst, $src2}", @@ -1000,19 +1090,26 @@ let isTwoAddress = 1 in { let isCommutable = 1 in -def OR64rr : RI<0x09, MRMDestReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), +def OR64rr : RI<0x09, MRMDestReg, (outs GR64:$dst), + (ins GR64:$src1, GR64:$src2), "or{q}\t{$src2, $dst|$dst, $src2}", [(set GR64:$dst, (or GR64:$src1, GR64:$src2)), (implicit EFLAGS)]>; -def OR64rm : RI<0x0B, MRMSrcMem , (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2), +def OR64rr_REV : RI<0x0B, MRMSrcReg, (outs GR64:$dst), + (ins GR64:$src1, GR64:$src2), + "or{q}\t{$src2, $dst|$dst, $src2}", []>; +def OR64rm : RI<0x0B, MRMSrcMem , (outs GR64:$dst), + (ins GR64:$src1, i64mem:$src2), "or{q}\t{$src2, $dst|$dst, $src2}", [(set GR64:$dst, (or GR64:$src1, (load addr:$src2))), (implicit EFLAGS)]>; -def OR64ri8 : RIi8<0x83, MRM1r, (outs GR64:$dst), (ins GR64:$src1, i64i8imm:$src2), +def OR64ri8 : RIi8<0x83, MRM1r, (outs GR64:$dst), + (ins GR64:$src1, i64i8imm:$src2), "or{q}\t{$src2, $dst|$dst, $src2}", [(set GR64:$dst, (or GR64:$src1, i64immSExt8:$src2)), (implicit EFLAGS)]>; -def OR64ri32 : RIi32<0x81, MRM1r, (outs GR64:$dst), (ins GR64:$src1, i64i32imm:$src2), +def OR64ri32 : RIi32<0x81, MRM1r, (outs GR64:$dst), + (ins GR64:$src1, i64i32imm:$src2), "or{q}\t{$src2, $dst|$dst, $src2}", [(set GR64:$dst, (or GR64:$src1, i64immSExt32:$src2)), (implicit EFLAGS)]>; @@ -1036,15 +1133,21 @@ let isTwoAddress = 1 in { let isCommutable = 1 in -def XOR64rr : RI<0x31, MRMDestReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), +def XOR64rr : RI<0x31, MRMDestReg, (outs GR64:$dst), + (ins GR64:$src1, GR64:$src2), "xor{q}\t{$src2, $dst|$dst, $src2}", [(set GR64:$dst, (xor GR64:$src1, GR64:$src2)), (implicit EFLAGS)]>; -def XOR64rm : RI<0x33, MRMSrcMem, (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2), +def XOR64rr_REV : RI<0x33, MRMSrcReg, (outs GR64:$dst), + (ins GR64:$src1, GR64:$src2), + "xor{q}\t{$src2, $dst|$dst, $src2}", []>; +def XOR64rm : RI<0x33, MRMSrcMem, (outs GR64:$dst), + (ins GR64:$src1, i64mem:$src2), "xor{q}\t{$src2, $dst|$dst, $src2}", [(set GR64:$dst, (xor GR64:$src1, (load addr:$src2))), (implicit EFLAGS)]>; -def XOR64ri8 : RIi8<0x83, MRM6r, (outs GR64:$dst), (ins GR64:$src1, i64i8imm:$src2), +def XOR64ri8 : RIi8<0x83, MRM6r, (outs GR64:$dst), + (ins GR64:$src1, i64i8imm:$src2), "xor{q}\t{$src2, $dst|$dst, $src2}", [(set GR64:$dst, (xor GR64:$src1, i64immSExt8:$src2)), (implicit EFLAGS)]>; @@ -1148,10 +1251,12 @@ // Unlike with the register+register form, the memory+register form of the // bt instruction does not ignore the high bits of the index. From ISel's // perspective, this is pretty bizarre. Disable these instructions for now. -//def BT64mr : RI<0xA3, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2), -// "bt{q}\t{$src2, $src1|$src1, $src2}", +def BT64mr : RI<0xA3, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2), + "bt{q}\t{$src2, $src1|$src1, $src2}", // [(X86bt (loadi64 addr:$src1), GR64:$src2), -// (implicit EFLAGS)]>, TB; +// (implicit EFLAGS)] + [] + >, TB; def BT64ri8 : Ii8<0xBA, MRM4r, (outs), (ins GR64:$src1, i64i8imm:$src2), "bt{q}\t{$src2, $src1|$src1, $src2}", @@ -1164,6 +1269,33 @@ "bt{q}\t{$src2, $src1|$src1, $src2}", [(X86bt (loadi64 addr:$src1), i64immSExt8:$src2), (implicit EFLAGS)]>, TB; + +def BTC64rr : RI<0xBB, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2), + "btc{q}\t{$src2, $src1|$src1, $src2}", []>, TB; +def BTC64mr : RI<0xBB, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2), + "btc{q}\t{$src2, $src1|$src1, $src2}", []>, TB; +def BTC64ri8 : RIi8<0xBA, MRM7r, (outs), (ins GR64:$src1, i64i8imm:$src2), + "btc{q}\t{$src2, $src1|$src1, $src2}", []>, TB; +def BTC64mi8 : RIi8<0xBA, MRM7m, (outs), (ins i64mem:$src1, i64i8imm:$src2), + "btc{q}\t{$src2, $src1|$src1, $src2}", []>, TB; + +def BTR64rr : RI<0xB3, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2), + "btr{q}\t{$src2, $src1|$src1, $src2}", []>, TB; +def BTR64mr : RI<0xB3, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2), + "btr{q}\t{$src2, $src1|$src1, $src2}", []>, TB; +def BTR64ri8 : RIi8<0xBA, MRM6r, (outs), (ins GR64:$src1, i64i8imm:$src2), + "btr{q}\t{$src2, $src1|$src1, $src2}", []>, TB; +def BTR64mi8 : RIi8<0xBA, MRM6m, (outs), (ins i64mem:$src1, i64i8imm:$src2), + "btr{q}\t{$src2, $src1|$src1, $src2}", []>, TB; + +def BTS64rr : RI<0xAB, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2), + "bts{q}\t{$src2, $src1|$src1, $src2}", []>, TB; +def BTS64mr : RI<0xAB, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2), + "bts{q}\t{$src2, $src1|$src1, $src2}", []>, TB; +def BTS64ri8 : RIi8<0xBA, MRM5r, (outs), (ins GR64:$src1, i64i8imm:$src2), + "bts{q}\t{$src2, $src1|$src1, $src2}", []>, TB; +def BTS64mi8 : RIi8<0xBA, MRM5m, (outs), (ins i64mem:$src1, i64i8imm:$src2), + "bts{q}\t{$src2, $src1|$src1, $src2}", []>, TB; } // Defs = [EFLAGS] // Conditional moves @@ -1171,164 +1303,164 @@ let isCommutable = 1 in { def CMOVB64rr : RI<0x42, MRMSrcReg, // if , TB; def CMOVAE64rr: RI<0x43, MRMSrcReg, // if >=u, GR64 = GR64 (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), - "cmovae\t{$src2, $dst|$dst, $src2}", + "cmovae{q}\t{$src2, $dst|$dst, $src2}", [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2, X86_COND_AE, EFLAGS))]>, TB; def CMOVE64rr : RI<0x44, MRMSrcReg, // if ==, GR64 = GR64 (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), - "cmove\t{$src2, $dst|$dst, $src2}", + "cmove{q}\t{$src2, $dst|$dst, $src2}", [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2, X86_COND_E, EFLAGS))]>, TB; def CMOVNE64rr: RI<0x45, MRMSrcReg, // if !=, GR64 = GR64 (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), - "cmovne\t{$src2, $dst|$dst, $src2}", + "cmovne{q}\t{$src2, $dst|$dst, $src2}", [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2, X86_COND_NE, EFLAGS))]>, TB; def CMOVBE64rr: RI<0x46, MRMSrcReg, // if <=u, GR64 = GR64 (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), - "cmovbe\t{$src2, $dst|$dst, $src2}", + "cmovbe{q}\t{$src2, $dst|$dst, $src2}", [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2, X86_COND_BE, EFLAGS))]>, TB; def CMOVA64rr : RI<0x47, MRMSrcReg, // if >u, GR64 = GR64 (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), - "cmova\t{$src2, $dst|$dst, $src2}", + "cmova{q}\t{$src2, $dst|$dst, $src2}", [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2, X86_COND_A, EFLAGS))]>, TB; def CMOVL64rr : RI<0x4C, MRMSrcReg, // if , TB; def CMOVGE64rr: RI<0x4D, MRMSrcReg, // if >=s, GR64 = GR64 (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), - "cmovge\t{$src2, $dst|$dst, $src2}", + "cmovge{q}\t{$src2, $dst|$dst, $src2}", [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2, X86_COND_GE, EFLAGS))]>, TB; def CMOVLE64rr: RI<0x4E, MRMSrcReg, // if <=s, GR64 = GR64 (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), - "cmovle\t{$src2, $dst|$dst, $src2}", + "cmovle{q}\t{$src2, $dst|$dst, $src2}", [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2, X86_COND_LE, EFLAGS))]>, TB; def CMOVG64rr : RI<0x4F, MRMSrcReg, // if >s, GR64 = GR64 (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), - "cmovg\t{$src2, $dst|$dst, $src2}", + "cmovg{q}\t{$src2, $dst|$dst, $src2}", [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2, X86_COND_G, EFLAGS))]>, TB; def CMOVS64rr : RI<0x48, MRMSrcReg, // if signed, GR64 = GR64 (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), - "cmovs\t{$src2, $dst|$dst, $src2}", + "cmovs{q}\t{$src2, $dst|$dst, $src2}", [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2, X86_COND_S, EFLAGS))]>, TB; def CMOVNS64rr: RI<0x49, MRMSrcReg, // if !signed, GR64 = GR64 (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), - "cmovns\t{$src2, $dst|$dst, $src2}", + "cmovns{q}\t{$src2, $dst|$dst, $src2}", [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2, X86_COND_NS, EFLAGS))]>, TB; def CMOVP64rr : RI<0x4A, MRMSrcReg, // if parity, GR64 = GR64 (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), - "cmovp\t{$src2, $dst|$dst, $src2}", + "cmovp{q}\t{$src2, $dst|$dst, $src2}", [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2, X86_COND_P, EFLAGS))]>, TB; def CMOVNP64rr : RI<0x4B, MRMSrcReg, // if !parity, GR64 = GR64 (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), - "cmovnp\t{$src2, $dst|$dst, $src2}", + "cmovnp{q}\t{$src2, $dst|$dst, $src2}", [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2, X86_COND_NP, EFLAGS))]>, TB; def CMOVO64rr : RI<0x40, MRMSrcReg, // if overflow, GR64 = GR64 (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), - "cmovo\t{$src2, $dst|$dst, $src2}", + "cmovo{q}\t{$src2, $dst|$dst, $src2}", [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2, X86_COND_O, EFLAGS))]>, TB; def CMOVNO64rr : RI<0x41, MRMSrcReg, // if !overflow, GR64 = GR64 (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), - "cmovno\t{$src2, $dst|$dst, $src2}", + "cmovno{q}\t{$src2, $dst|$dst, $src2}", [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2, X86_COND_NO, EFLAGS))]>, TB; } // isCommutable = 1 def CMOVB64rm : RI<0x42, MRMSrcMem, // if , TB; def CMOVAE64rm: RI<0x43, MRMSrcMem, // if >=u, GR64 = [mem64] (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2), - "cmovae\t{$src2, $dst|$dst, $src2}", + "cmovae{q}\t{$src2, $dst|$dst, $src2}", [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2), X86_COND_AE, EFLAGS))]>, TB; def CMOVE64rm : RI<0x44, MRMSrcMem, // if ==, GR64 = [mem64] (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2), - "cmove\t{$src2, $dst|$dst, $src2}", + "cmove{q}\t{$src2, $dst|$dst, $src2}", [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2), X86_COND_E, EFLAGS))]>, TB; def CMOVNE64rm: RI<0x45, MRMSrcMem, // if !=, GR64 = [mem64] (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2), - "cmovne\t{$src2, $dst|$dst, $src2}", + "cmovne{q}\t{$src2, $dst|$dst, $src2}", [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2), X86_COND_NE, EFLAGS))]>, TB; def CMOVBE64rm: RI<0x46, MRMSrcMem, // if <=u, GR64 = [mem64] (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2), - "cmovbe\t{$src2, $dst|$dst, $src2}", + "cmovbe{q}\t{$src2, $dst|$dst, $src2}", [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2), X86_COND_BE, EFLAGS))]>, TB; def CMOVA64rm : RI<0x47, MRMSrcMem, // if >u, GR64 = [mem64] (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2), - "cmova\t{$src2, $dst|$dst, $src2}", + "cmova{q}\t{$src2, $dst|$dst, $src2}", [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2), X86_COND_A, EFLAGS))]>, TB; def CMOVL64rm : RI<0x4C, MRMSrcMem, // if , TB; def CMOVGE64rm: RI<0x4D, MRMSrcMem, // if >=s, GR64 = [mem64] (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2), - "cmovge\t{$src2, $dst|$dst, $src2}", + "cmovge{q}\t{$src2, $dst|$dst, $src2}", [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2), X86_COND_GE, EFLAGS))]>, TB; def CMOVLE64rm: RI<0x4E, MRMSrcMem, // if <=s, GR64 = [mem64] (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2), - "cmovle\t{$src2, $dst|$dst, $src2}", + "cmovle{q}\t{$src2, $dst|$dst, $src2}", [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2), X86_COND_LE, EFLAGS))]>, TB; def CMOVG64rm : RI<0x4F, MRMSrcMem, // if >s, GR64 = [mem64] (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2), - "cmovg\t{$src2, $dst|$dst, $src2}", + "cmovg{q}\t{$src2, $dst|$dst, $src2}", [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2), X86_COND_G, EFLAGS))]>, TB; def CMOVS64rm : RI<0x48, MRMSrcMem, // if signed, GR64 = [mem64] (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2), - "cmovs\t{$src2, $dst|$dst, $src2}", + "cmovs{q}\t{$src2, $dst|$dst, $src2}", [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2), X86_COND_S, EFLAGS))]>, TB; def CMOVNS64rm: RI<0x49, MRMSrcMem, // if !signed, GR64 = [mem64] (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2), - "cmovns\t{$src2, $dst|$dst, $src2}", + "cmovns{q}\t{$src2, $dst|$dst, $src2}", [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2), X86_COND_NS, EFLAGS))]>, TB; def CMOVP64rm : RI<0x4A, MRMSrcMem, // if parity, GR64 = [mem64] (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2), - "cmovp\t{$src2, $dst|$dst, $src2}", + "cmovp{q}\t{$src2, $dst|$dst, $src2}", [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2), X86_COND_P, EFLAGS))]>, TB; def CMOVNP64rm : RI<0x4B, MRMSrcMem, // if !parity, GR64 = [mem64] (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2), - "cmovnp\t{$src2, $dst|$dst, $src2}", + "cmovnp{q}\t{$src2, $dst|$dst, $src2}", [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2), X86_COND_NP, EFLAGS))]>, TB; def CMOVO64rm : RI<0x40, MRMSrcMem, // if overflow, GR64 = [mem64] (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2), - "cmovo\t{$src2, $dst|$dst, $src2}", + "cmovo{q}\t{$src2, $dst|$dst, $src2}", [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2), X86_COND_O, EFLAGS))]>, TB; def CMOVNO64rm : RI<0x41, MRMSrcMem, // if !overflow, GR64 = [mem64] (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2), - "cmovno\t{$src2, $dst|$dst, $src2}", + "cmovno{q}\t{$src2, $dst|$dst, $src2}", [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2), X86_COND_NO, EFLAGS))]>, TB; } // isTwoAddress @@ -1347,11 +1479,16 @@ // // f64 -> signed i64 +def CVTSD2SI64rr: RSDI<0x2D, MRMSrcReg, (outs GR64:$dst), (ins FR64:$src), + "cvtsd2si{q}\t{$src, $dst|$dst, $src}", []>; +def CVTSD2SI64rm: RSDI<0x2D, MRMSrcMem, (outs GR64:$dst), (ins f64mem:$src), + "cvtsd2si{q}\t{$src, $dst|$dst, $src}", []>; def Int_CVTSD2SI64rr: RSDI<0x2D, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src), "cvtsd2si{q}\t{$src, $dst|$dst, $src}", [(set GR64:$dst, (int_x86_sse2_cvtsd2si64 VR128:$src))]>; -def Int_CVTSD2SI64rm: RSDI<0x2D, MRMSrcMem, (outs GR64:$dst), (ins f128mem:$src), +def Int_CVTSD2SI64rm: RSDI<0x2D, MRMSrcMem, (outs GR64:$dst), + (ins f128mem:$src), "cvtsd2si{q}\t{$src, $dst|$dst, $src}", [(set GR64:$dst, (int_x86_sse2_cvtsd2si64 (load addr:$src)))]>; @@ -1365,7 +1502,8 @@ "cvttsd2si{q}\t{$src, $dst|$dst, $src}", [(set GR64:$dst, (int_x86_sse2_cvttsd2si64 VR128:$src))]>; -def Int_CVTTSD2SI64rm: RSDI<0x2C, MRMSrcMem, (outs GR64:$dst), (ins f128mem:$src), +def Int_CVTTSD2SI64rm: RSDI<0x2C, MRMSrcMem, (outs GR64:$dst), + (ins f128mem:$src), "cvttsd2si{q}\t{$src, $dst|$dst, $src}", [(set GR64:$dst, (int_x86_sse2_cvttsd2si64 @@ -1410,7 +1548,8 @@ (int_x86_sse_cvtsi642ss VR128:$src1, GR64:$src2))]>; def Int_CVTSI2SS64rm : RSSI<0x2A, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, i64mem:$src2), + (outs VR128:$dst), + (ins VR128:$src1, i64mem:$src2), "cvtsi2ss{q}\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, (int_x86_sse_cvtsi642ss VR128:$src1, @@ -1418,6 +1557,10 @@ } // f32 -> signed i64 +def CVTSS2SI64rr: RSSI<0x2D, MRMSrcReg, (outs GR64:$dst), (ins FR32:$src), + "cvtss2si{q}\t{$src, $dst|$dst, $src}", []>; +def CVTSS2SI64rm: RSSI<0x2D, MRMSrcMem, (outs GR64:$dst), (ins f32mem:$src), + "cvtss2si{q}\t{$src, $dst|$dst, $src}", []>; def Int_CVTSS2SI64rr: RSSI<0x2D, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src), "cvtss2si{q}\t{$src, $dst|$dst, $src}", [(set GR64:$dst, @@ -1436,10 +1579,20 @@ "cvttss2si{q}\t{$src, $dst|$dst, $src}", [(set GR64:$dst, (int_x86_sse_cvttss2si64 VR128:$src))]>; -def Int_CVTTSS2SI64rm: RSSI<0x2C, MRMSrcMem, (outs GR64:$dst), (ins f32mem:$src), +def Int_CVTTSS2SI64rm: RSSI<0x2C, MRMSrcMem, (outs GR64:$dst), + (ins f32mem:$src), "cvttss2si{q}\t{$src, $dst|$dst, $src}", [(set GR64:$dst, (int_x86_sse_cvttss2si64 (load addr:$src)))]>; + +// Descriptor-table support instructions + +// LLDT is not interpreted specially in 64-bit mode because there is no sign +// extension. +def SLDT64r : RI<0x00, MRM0r, (outs GR64:$dst), (ins), + "sldt{q}\t$dst", []>, TB; +def SLDT64m : RI<0x00, MRM0m, (outs i16mem:$dst), (ins), + "sldt{q}\t$dst", []>, TB; //===----------------------------------------------------------------------===// // Alias Instructions @@ -1505,17 +1658,37 @@ let Constraints = "$val = $dst" in { let Defs = [EFLAGS] in -def LXADD64 : RI<0xC1, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$ptr,GR64:$val), +def LXADD64 : RI<0xC1, MRMSrcMem, (outs GR64:$dst), (ins GR64:$val,i64mem:$ptr), "lock\n\t" "xadd\t$val, $ptr", [(set GR64:$dst, (atomic_load_add_64 addr:$ptr, GR64:$val))]>, TB, LOCK; -def XCHG64rm : RI<0x87, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$ptr,GR64:$val), - "xchg\t$val, $ptr", +def XCHG64rm : RI<0x87, MRMSrcMem, (outs GR64:$dst), + (ins GR64:$val,i64mem:$ptr), + "xchg{q}\t{$val, $ptr|$ptr, $val}", [(set GR64:$dst, (atomic_swap_64 addr:$ptr, GR64:$val))]>; + +def XCHG64rr : RI<0x87, MRMSrcReg, (outs GR64:$dst), (ins GR64:$val,GR64:$src), + "xchg{q}\t{$val, $src|$src, $val}", []>; } +def XADD64rr : RI<0xC1, MRMDestReg, (outs GR64:$dst), (ins GR64:$src), + "xadd{q}\t{$src, $dst|$dst, $src}", []>, TB; +def XADD64rm : RI<0xC1, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src), + "xadd{q}\t{$src, $dst|$dst, $src}", []>, TB; + +def CMPXCHG64rr : RI<0xB1, MRMDestReg, (outs GR64:$dst), (ins GR64:$src), + "cmpxchg{q}\t{$src, $dst|$dst, $src}", []>, TB; +def CMPXCHG64rm : RI<0xB1, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src), + "cmpxchg{q}\t{$src, $dst|$dst, $src}", []>, TB; + +def CMPXCHG16B : RI<0xC7, MRM1m, (outs), (ins i128mem:$dst), + "cmpxchg16b\t$dst", []>, TB; + +def XCHG64ar : RI<0x90, AddRegFrm, (outs), (ins GR64:$src), + "xchg{q}\t{$src, %rax|%rax, $src}", []>; + // Optimized codegen when the non-memory output is not used. let Defs = [EFLAGS] in { // FIXME: Use normal add / sub instructions and add lock prefix dynamically. @@ -1585,6 +1758,36 @@ def LAR64rr : RI<0x02, MRMSrcReg, (outs GR64:$dst), (ins GR32:$src), "lar{q}\t{$src, $dst|$dst, $src}", []>, TB; +def LSL64rm : RI<0x03, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), + "lsl{q}\t{$src, $dst|$dst, $src}", []>, TB; +def LSL64rr : RI<0x03, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src), + "lsl{q}\t{$src, $dst|$dst, $src}", []>, TB; + +def SWPGS : I<0x01, RawFrm, (outs), (ins), "swpgs", []>, TB; + +def PUSHFS64 : I<0xa0, RawFrm, (outs), (ins), + "push{q}\t%fs", []>, TB; +def PUSHGS64 : I<0xa8, RawFrm, (outs), (ins), + "push{q}\t%gs", []>, TB; + +def POPFS64 : I<0xa1, RawFrm, (outs), (ins), + "pop{q}\t%fs", []>, TB; +def POPGS64 : I<0xa9, RawFrm, (outs), (ins), + "pop{q}\t%gs", []>, TB; + +def LSS64rm : RI<0xb2, MRMSrcMem, (outs GR64:$dst), (ins opaque80mem:$src), + "lss{q}\t{$src, $dst|$dst, $src}", []>, TB; +def LFS64rm : RI<0xb4, MRMSrcMem, (outs GR64:$dst), (ins opaque80mem:$src), + "lfs{q}\t{$src, $dst|$dst, $src}", []>, TB; +def LGS64rm : RI<0xb5, MRMSrcMem, (outs GR64:$dst), (ins opaque80mem:$src), + "lgs{q}\t{$src, $dst|$dst, $src}", []>, TB; + +// Specialized register support + +// no m form encodable; use SMSW16m +def SMSW64r : RI<0x01, MRM4r, (outs GR64:$dst), (ins), + "smsw{q}\t$dst", []>, TB; + // String manipulation instructions def LODSQ : RI<0xAD, RawFrm, (outs), (ins), "lodsq", []>; @@ -1722,9 +1925,9 @@ def : Pat<(zextloadi64i1 addr:$src), (MOVZX64rm8 addr:$src)>; // extload -// When extloading from 16-bit and smaller memory locations into 64-bit registers, -// use zero-extending loads so that the entire 64-bit register is defined, avoiding -// partial-register updates. +// When extloading from 16-bit and smaller memory locations into 64-bit +// registers, use zero-extending loads so that the entire 64-bit register is +// defined, avoiding partial-register updates. def : Pat<(extloadi64i1 addr:$src), (MOVZX64rm8 addr:$src)>; def : Pat<(extloadi64i8 addr:$src), (MOVZX64rm8 addr:$src)>; def : Pat<(extloadi64i16 addr:$src), (MOVZX64rm16 addr:$src)>; @@ -1995,7 +2198,8 @@ addr:$dst), (implicit EFLAGS)), (ADD64mi8 addr:$dst, i64immSExt8:$src2)>; -def : Pat<(parallel (store (X86add_flag (loadi64 addr:$dst), i64immSExt32:$src2), +def : Pat<(parallel (store (X86add_flag (loadi64 addr:$dst), + i64immSExt32:$src2), addr:$dst), (implicit EFLAGS)), (ADD64mi32 addr:$dst, i64immSExt32:$src2)>; @@ -2025,11 +2229,13 @@ (SUB64mr addr:$dst, GR64:$src2)>; // Memory-Integer Subtraction with EFLAGS result -def : Pat<(parallel (store (X86sub_flag (loadi64 addr:$dst), i64immSExt8:$src2), +def : Pat<(parallel (store (X86sub_flag (loadi64 addr:$dst), + i64immSExt8:$src2), addr:$dst), (implicit EFLAGS)), (SUB64mi8 addr:$dst, i64immSExt8:$src2)>; -def : Pat<(parallel (store (X86sub_flag (loadi64 addr:$dst), i64immSExt32:$src2), +def : Pat<(parallel (store (X86sub_flag (loadi64 addr:$dst), + i64immSExt32:$src2), addr:$dst), (implicit EFLAGS)), (SUB64mi32 addr:$dst, i64immSExt32:$src2)>; @@ -2153,7 +2359,8 @@ addr:$dst), (implicit EFLAGS)), (XOR64mi8 addr:$dst, i64immSExt8:$src2)>; -def : Pat<(parallel (store (X86xor_flag (loadi64 addr:$dst), i64immSExt32:$src2), +def : Pat<(parallel (store (X86xor_flag (loadi64 addr:$dst), + i64immSExt32:$src2), addr:$dst), (implicit EFLAGS)), (XOR64mi32 addr:$dst, i64immSExt32:$src2)>; @@ -2185,7 +2392,8 @@ addr:$dst), (implicit EFLAGS)), (AND64mi8 addr:$dst, i64immSExt8:$src2)>; -def : Pat<(parallel (store (X86and_flag (loadi64 addr:$dst), i64immSExt32:$src2), +def : Pat<(parallel (store (X86and_flag (loadi64 addr:$dst), + i64immSExt32:$src2), addr:$dst), (implicit EFLAGS)), (AND64mi32 addr:$dst, i64immSExt32:$src2)>; Modified: llvm/trunk/lib/Target/X86/X86InstrFPStack.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrFPStack.td?rev=91638&r1=91637&r2=91638&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86InstrFPStack.td (original) +++ llvm/trunk/lib/Target/X86/X86InstrFPStack.td Thu Dec 17 18:01:26 2009 @@ -195,48 +195,67 @@ // These instructions cannot address 80-bit memory. multiclass FPBinary { // ST(0) = ST(0) + [mem] -def _Fp32m : FpIf32<(outs RFP32:$dst), (ins RFP32:$src1, f32mem:$src2), OneArgFPRW, +def _Fp32m : FpIf32<(outs RFP32:$dst), + (ins RFP32:$src1, f32mem:$src2), OneArgFPRW, [(set RFP32:$dst, (OpNode RFP32:$src1, (loadf32 addr:$src2)))]>; -def _Fp64m : FpIf64<(outs RFP64:$dst), (ins RFP64:$src1, f64mem:$src2), OneArgFPRW, +def _Fp64m : FpIf64<(outs RFP64:$dst), + (ins RFP64:$src1, f64mem:$src2), OneArgFPRW, [(set RFP64:$dst, (OpNode RFP64:$src1, (loadf64 addr:$src2)))]>; -def _Fp64m32: FpIf64<(outs RFP64:$dst), (ins RFP64:$src1, f32mem:$src2), OneArgFPRW, +def _Fp64m32: FpIf64<(outs RFP64:$dst), + (ins RFP64:$src1, f32mem:$src2), OneArgFPRW, [(set RFP64:$dst, (OpNode RFP64:$src1, (f64 (extloadf32 addr:$src2))))]>; -def _Fp80m32: FpI_<(outs RFP80:$dst), (ins RFP80:$src1, f32mem:$src2), OneArgFPRW, +def _Fp80m32: FpI_<(outs RFP80:$dst), + (ins RFP80:$src1, f32mem:$src2), OneArgFPRW, [(set RFP80:$dst, (OpNode RFP80:$src1, (f80 (extloadf32 addr:$src2))))]>; -def _Fp80m64: FpI_<(outs RFP80:$dst), (ins RFP80:$src1, f64mem:$src2), OneArgFPRW, +def _Fp80m64: FpI_<(outs RFP80:$dst), + (ins RFP80:$src1, f64mem:$src2), OneArgFPRW, [(set RFP80:$dst, (OpNode RFP80:$src1, (f80 (extloadf64 addr:$src2))))]>; def _F32m : FPI<0xD8, fp, (outs), (ins f32mem:$src), - !strconcat("f", !strconcat(asmstring, "{s}\t$src"))> { let mayLoad = 1; } + !strconcat("f", !strconcat(asmstring, "{s}\t$src"))> { + let mayLoad = 1; +} def _F64m : FPI<0xDC, fp, (outs), (ins f64mem:$src), - !strconcat("f", !strconcat(asmstring, "{l}\t$src"))> { let mayLoad = 1; } + !strconcat("f", !strconcat(asmstring, "{l}\t$src"))> { + let mayLoad = 1; +} // ST(0) = ST(0) + [memint] -def _FpI16m32 : FpIf32<(outs RFP32:$dst), (ins RFP32:$src1, i16mem:$src2), OneArgFPRW, +def _FpI16m32 : FpIf32<(outs RFP32:$dst), (ins RFP32:$src1, i16mem:$src2), + OneArgFPRW, [(set RFP32:$dst, (OpNode RFP32:$src1, (X86fild addr:$src2, i16)))]>; -def _FpI32m32 : FpIf32<(outs RFP32:$dst), (ins RFP32:$src1, i32mem:$src2), OneArgFPRW, +def _FpI32m32 : FpIf32<(outs RFP32:$dst), (ins RFP32:$src1, i32mem:$src2), + OneArgFPRW, [(set RFP32:$dst, (OpNode RFP32:$src1, (X86fild addr:$src2, i32)))]>; -def _FpI16m64 : FpIf64<(outs RFP64:$dst), (ins RFP64:$src1, i16mem:$src2), OneArgFPRW, +def _FpI16m64 : FpIf64<(outs RFP64:$dst), (ins RFP64:$src1, i16mem:$src2), + OneArgFPRW, [(set RFP64:$dst, (OpNode RFP64:$src1, (X86fild addr:$src2, i16)))]>; -def _FpI32m64 : FpIf64<(outs RFP64:$dst), (ins RFP64:$src1, i32mem:$src2), OneArgFPRW, +def _FpI32m64 : FpIf64<(outs RFP64:$dst), (ins RFP64:$src1, i32mem:$src2), + OneArgFPRW, [(set RFP64:$dst, (OpNode RFP64:$src1, (X86fild addr:$src2, i32)))]>; -def _FpI16m80 : FpI_<(outs RFP80:$dst), (ins RFP80:$src1, i16mem:$src2), OneArgFPRW, +def _FpI16m80 : FpI_<(outs RFP80:$dst), (ins RFP80:$src1, i16mem:$src2), + OneArgFPRW, [(set RFP80:$dst, (OpNode RFP80:$src1, (X86fild addr:$src2, i16)))]>; -def _FpI32m80 : FpI_<(outs RFP80:$dst), (ins RFP80:$src1, i32mem:$src2), OneArgFPRW, +def _FpI32m80 : FpI_<(outs RFP80:$dst), (ins RFP80:$src1, i32mem:$src2), + OneArgFPRW, [(set RFP80:$dst, (OpNode RFP80:$src1, (X86fild addr:$src2, i32)))]>; def _FI16m : FPI<0xDE, fp, (outs), (ins i16mem:$src), - !strconcat("fi", !strconcat(asmstring, "{s}\t$src"))> { let mayLoad = 1; } + !strconcat("fi", !strconcat(asmstring, "{s}\t$src"))> { + let mayLoad = 1; +} def _FI32m : FPI<0xDA, fp, (outs), (ins i32mem:$src), - !strconcat("fi", !strconcat(asmstring, "{l}\t$src"))> { let mayLoad = 1; } + !strconcat("fi", !strconcat(asmstring, "{l}\t$src"))> { + let mayLoad = 1; +} } defm ADD : FPBinary_rr; @@ -279,6 +298,9 @@ def DIVR_FrST0 : FPrST0Inst <0xF0, "fdiv{|r}\t{%st(0), $op|$op, %ST(0)}">; def DIVR_FPrST0 : FPrST0PInst<0xF0, "fdiv{|r}p\t$op">; +def COM_FST0r : FPST0rInst <0xD0, "fcom\t$op">; +def COMP_FST0r : FPST0rInst <0xD8, "fcomp\t$op">; + // Unary operations. multiclass FPUnary opcode, string asmstring> { def _Fp32 : FpIf32<(outs RFP32:$dst), (ins RFP32:$src), OneArgFPRW, @@ -305,22 +327,22 @@ // Versions of FP instructions that take a single memory operand. Added for the // disassembler; remove as they are included with patterns elsewhere. -def FCOM32m : FPI<0xD8, MRM2m, (outs), (ins f32mem:$src), "fcom\t$src">; -def FCOMP32m : FPI<0xD8, MRM3m, (outs), (ins f32mem:$src), "fcomp\t$src">; +def FCOM32m : FPI<0xD8, MRM2m, (outs), (ins f32mem:$src), "fcom{l}\t$src">; +def FCOMP32m : FPI<0xD8, MRM3m, (outs), (ins f32mem:$src), "fcomp{l}\t$src">; def FLDENVm : FPI<0xD9, MRM4m, (outs), (ins f32mem:$src), "fldenv\t$src">; -def FSTENVm : FPI<0xD9, MRM6m, (outs f32mem:$dst), (ins), "fstenv\t$dst">; +def FSTENVm : FPI<0xD9, MRM6m, (outs f32mem:$dst), (ins), "fnstenv\t$dst">; def FICOM32m : FPI<0xDA, MRM2m, (outs), (ins i32mem:$src), "ficom{l}\t$src">; def FICOMP32m: FPI<0xDA, MRM3m, (outs), (ins i32mem:$src), "ficomp{l}\t$src">; -def FCOM64m : FPI<0xDC, MRM2m, (outs), (ins f64mem:$src), "fcom\t$src">; -def FCOMP64m : FPI<0xDC, MRM3m, (outs), (ins f64mem:$src), "fcomp\t$src">; +def FCOM64m : FPI<0xDC, MRM2m, (outs), (ins f64mem:$src), "fcom{ll}\t$src">; +def FCOMP64m : FPI<0xDC, MRM3m, (outs), (ins f64mem:$src), "fcomp{ll}\t$src">; def FISTTP32m: FPI<0xDD, MRM1m, (outs i32mem:$dst), (ins), "fisttp{l}\t$dst">; def FRSTORm : FPI<0xDD, MRM4m, (outs f32mem:$dst), (ins), "frstor\t$dst">; -def FSAVEm : FPI<0xDD, MRM6m, (outs f32mem:$dst), (ins), "fsave\t$dst">; -def FSTSWm : FPI<0xDD, MRM7m, (outs f32mem:$dst), (ins), "fstsw\t$dst">; +def FSAVEm : FPI<0xDD, MRM6m, (outs f32mem:$dst), (ins), "fnsave\t$dst">; +def FNSTSWm : FPI<0xDD, MRM7m, (outs f32mem:$dst), (ins), "fnstsw\t$dst">; def FICOM16m : FPI<0xDE, MRM2m, (outs), (ins i16mem:$src), "ficom{w}\t$src">; def FICOMP16m: FPI<0xDE, MRM3m, (outs), (ins i16mem:$src), "ficomp{w}\t$src">; @@ -493,7 +515,8 @@ let mayStore = 1 in { def ISTT_FP16m : FPI<0xDF, MRM1m, (outs), (ins i16mem:$dst), "fisttp{s}\t$dst">; def ISTT_FP32m : FPI<0xDB, MRM1m, (outs), (ins i32mem:$dst), "fisttp{l}\t$dst">; -def ISTT_FP64m : FPI<0xDD, MRM1m, (outs), (ins i64mem:$dst), "fisttp{ll}\t$dst">; +def ISTT_FP64m : FPI<0xDD, MRM1m, (outs), (ins i64mem:$dst), + "fisttp{ll}\t$dst">; } // FP Stack manipulation instructions. @@ -561,10 +584,15 @@ "fucomip\t{$reg, %st(0)|%ST(0), $reg}">, DF; } +def COM_FIr : FPI<0xF0, AddRegFrm, (outs), (ins RST:$reg), + "fcomi\t{$reg, %st(0)|%ST(0), $reg}">, DB; +def COM_FIPr : FPI<0xF0, AddRegFrm, (outs), (ins RST:$reg), + "fcomip\t{$reg, %st(0)|%ST(0), $reg}">, DF; + // Floating point flag ops. let Defs = [AX] in def FNSTSW8r : I<0xE0, RawFrm, // AX = fp flags - (outs), (ins), "fnstsw", []>, DF; + (outs), (ins), "fnstsw %ax", []>, DF; def FNSTCW16m : I<0xD9, MRM7m, // [mem16] = X87 control world (outs), (ins i16mem:$dst), "fnstcw\t$dst", @@ -574,6 +602,44 @@ def FLDCW16m : I<0xD9, MRM5m, // X87 control world = [mem16] (outs), (ins i16mem:$dst), "fldcw\t$dst", []>; +// Register free + +def FFREE : FPI<0xC0, AddRegFrm, (outs), (ins RST:$reg), + "ffree\t$reg">, DD; + +// Clear exceptions + +def FNCLEX : I<0xE2, RawFrm, (outs), (ins), "fnclex", []>, DB; + +// Operandless floating-point instructions for the disassembler + +def FNOP : I<0xD0, RawFrm, (outs), (ins), "fnop", []>, D9; +def FXAM : I<0xE5, RawFrm, (outs), (ins), "fxam", []>, D9; +def FLDL2T : I<0xE9, RawFrm, (outs), (ins), "fldl2t", []>, D9; +def FLDL2E : I<0xEA, RawFrm, (outs), (ins), "fldl2e", []>, D9; +def FLDPI : I<0xEB, RawFrm, (outs), (ins), "fldpi", []>, D9; +def FLDLG2 : I<0xEC, RawFrm, (outs), (ins), "fldlg2", []>, D9; +def FLDLN2 : I<0xED, RawFrm, (outs), (ins), "fldln2", []>, D9; +def F2XM1 : I<0xF0, RawFrm, (outs), (ins), "f2xm1", []>, D9; +def FYL2X : I<0xF1, RawFrm, (outs), (ins), "fyl2x", []>, D9; +def FPTAN : I<0xF2, RawFrm, (outs), (ins), "fptan", []>, D9; +def FPATAN : I<0xF3, RawFrm, (outs), (ins), "fpatan", []>, D9; +def FXTRACT : I<0xF4, RawFrm, (outs), (ins), "fxtract", []>, D9; +def FPREM1 : I<0xF5, RawFrm, (outs), (ins), "fprem1", []>, D9; +def FDECSTP : I<0xF6, RawFrm, (outs), (ins), "fdecstp", []>, D9; +def FINCSTP : I<0xF7, RawFrm, (outs), (ins), "fincstp", []>, D9; +def FPREM : I<0xF8, RawFrm, (outs), (ins), "fprem", []>, D9; +def FYL2XP1 : I<0xF9, RawFrm, (outs), (ins), "fyl2xp1", []>, D9; +def FSINCOS : I<0xFB, RawFrm, (outs), (ins), "fsincos", []>, D9; +def FRNDINT : I<0xFC, RawFrm, (outs), (ins), "frndint", []>, D9; +def FSCALE : I<0xFD, RawFrm, (outs), (ins), "fscale", []>, D9; +def FCOMPP : I<0xD9, RawFrm, (outs), (ins), "fcompp", []>, DE; + +def FXSAVE : I<0xAE, MRM0m, (outs opaque512mem:$dst), (ins), + "fxsave\t$dst", []>, TB; +def FXRSTOR : I<0xAE, MRM1m, (outs), (ins opaque512mem:$src), + "fxrstor\t$src", []>, TB; + //===----------------------------------------------------------------------===// // Non-Instruction Patterns //===----------------------------------------------------------------------===// @@ -585,11 +651,15 @@ // Required for CALL which return f32 / f64 / f80 values. def : Pat<(X86fst RFP32:$src, addr:$op, f32), (ST_Fp32m addr:$op, RFP32:$src)>; -def : Pat<(X86fst RFP64:$src, addr:$op, f32), (ST_Fp64m32 addr:$op, RFP64:$src)>; +def : Pat<(X86fst RFP64:$src, addr:$op, f32), (ST_Fp64m32 addr:$op, + RFP64:$src)>; def : Pat<(X86fst RFP64:$src, addr:$op, f64), (ST_Fp64m addr:$op, RFP64:$src)>; -def : Pat<(X86fst RFP80:$src, addr:$op, f32), (ST_Fp80m32 addr:$op, RFP80:$src)>; -def : Pat<(X86fst RFP80:$src, addr:$op, f64), (ST_Fp80m64 addr:$op, RFP80:$src)>; -def : Pat<(X86fst RFP80:$src, addr:$op, f80), (ST_FpP80m addr:$op, RFP80:$src)>; +def : Pat<(X86fst RFP80:$src, addr:$op, f32), (ST_Fp80m32 addr:$op, + RFP80:$src)>; +def : Pat<(X86fst RFP80:$src, addr:$op, f64), (ST_Fp80m64 addr:$op, + RFP80:$src)>; +def : Pat<(X86fst RFP80:$src, addr:$op, f80), (ST_FpP80m addr:$op, + RFP80:$src)>; // Floating point constant -0.0 and -1.0 def : Pat<(f32 fpimmneg0), (CHS_Fp32 (LD_Fp032))>, Requires<[FPStackf32]>; Modified: llvm/trunk/lib/Target/X86/X86InstrFormats.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrFormats.td?rev=91638&r1=91637&r2=91638&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86InstrFormats.td (original) +++ llvm/trunk/lib/Target/X86/X86InstrFormats.td Thu Dec 17 18:01:26 2009 @@ -115,17 +115,20 @@ let Pattern = pattern; let CodeSize = 3; } -class Ii8 o, Format f, dag outs, dag ins, string asm, list pattern> +class Ii8 o, Format f, dag outs, dag ins, string asm, + list pattern> : X86Inst { let Pattern = pattern; let CodeSize = 3; } -class Ii16 o, Format f, dag outs, dag ins, string asm, list pattern> +class Ii16 o, Format f, dag outs, dag ins, string asm, + list pattern> : X86Inst { let Pattern = pattern; let CodeSize = 3; } -class Ii32 o, Format f, dag outs, dag ins, string asm, list pattern> +class Ii32 o, Format f, dag outs, dag ins, string asm, + list pattern> : X86Inst { let Pattern = pattern; let CodeSize = 3; @@ -169,7 +172,8 @@ class SSI o, Format F, dag outs, dag ins, string asm, list pattern> : I, XS, Requires<[HasSSE1]>; -class SSIi8 o, Format F, dag outs, dag ins, string asm, list pattern> +class SSIi8 o, Format F, dag outs, dag ins, string asm, + list pattern> : Ii8, XS, Requires<[HasSSE1]>; class PSI o, Format F, dag outs, dag ins, string asm, list pattern> : I, TB, Requires<[HasSSE1]>; @@ -205,9 +209,11 @@ // S3SI - SSE3 instructions with XS prefix. // S3DI - SSE3 instructions with XD prefix. -class S3SI o, Format F, dag outs, dag ins, string asm, list pattern> +class S3SI o, Format F, dag outs, dag ins, string asm, + list pattern> : I, XS, Requires<[HasSSE3]>; -class S3DI o, Format F, dag outs, dag ins, string asm, list pattern> +class S3DI o, Format F, dag outs, dag ins, string asm, + list pattern> : I, XD, Requires<[HasSSE3]>; class S3I o, Format F, dag outs, dag ins, string asm, list pattern> : I, TB, OpSize, Requires<[HasSSE3]>; @@ -255,7 +261,7 @@ // SS42AI = SSE 4.2 instructions with TA prefix class SS42AI o, Format F, dag outs, dag ins, string asm, - list pattern> + list pattern> : I, TA, Requires<[HasSSE42]>; // X86-64 Instruction templates... @@ -297,17 +303,24 @@ // MMXIi8 - MMX instructions with ImmT == Imm8 and TB prefix. // MMXID - MMX instructions with XD prefix. // MMXIS - MMX instructions with XS prefix. -class MMXI o, Format F, dag outs, dag ins, string asm, list pattern> +class MMXI o, Format F, dag outs, dag ins, string asm, + list pattern> : I, TB, Requires<[HasMMX]>; -class MMXI64 o, Format F, dag outs, dag ins, string asm, list pattern> +class MMXI64 o, Format F, dag outs, dag ins, string asm, + list pattern> : I, TB, Requires<[HasMMX,In64BitMode]>; -class MMXRI o, Format F, dag outs, dag ins, string asm, list pattern> +class MMXRI o, Format F, dag outs, dag ins, string asm, + list pattern> : I, TB, REX_W, Requires<[HasMMX]>; -class MMX2I o, Format F, dag outs, dag ins, string asm, list pattern> +class MMX2I o, Format F, dag outs, dag ins, string asm, + list pattern> : I, TB, OpSize, Requires<[HasMMX]>; -class MMXIi8 o, Format F, dag outs, dag ins, string asm, list pattern> +class MMXIi8 o, Format F, dag outs, dag ins, string asm, + list pattern> : Ii8, TB, Requires<[HasMMX]>; -class MMXID o, Format F, dag outs, dag ins, string asm, list pattern> +class MMXID o, Format F, dag outs, dag ins, string asm, + list pattern> : Ii8, XD, Requires<[HasMMX]>; -class MMXIS o, Format F, dag outs, dag ins, string asm, list pattern> +class MMXIS o, Format F, dag outs, dag ins, string asm, + list pattern> : Ii8, XS, Requires<[HasMMX]>; Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.cpp?rev=91638&r1=91637&r2=91638&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86InstrInfo.cpp (original) +++ llvm/trunk/lib/Target/X86/X86InstrInfo.cpp Thu Dec 17 18:01:26 2009 @@ -1880,7 +1880,7 @@ if (SrcReg != X86::EFLAGS) return false; if (DestRC == &X86::GR64RegClass || DestRC == &X86::GR64_NOSPRegClass) { - BuildMI(MBB, MI, DL, get(X86::PUSHFQ)); + BuildMI(MBB, MI, DL, get(X86::PUSHFQ64)); BuildMI(MBB, MI, DL, get(X86::POP64r), DestReg); return true; } else if (DestRC == &X86::GR32RegClass || Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.td?rev=91638&r1=91637&r2=91638&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86InstrInfo.td (original) +++ llvm/trunk/lib/Target/X86/X86InstrInfo.td Thu Dec 17 18:01:26 2009 @@ -1,4 +1,4 @@ -//===- X86InstrInfo.td - Describe the X86 Instruction Set --*- tablegen -*-===// + // // The LLVM Compiler Infrastructure // @@ -199,6 +199,12 @@ def opaque32mem : X86MemOperand<"printopaquemem">; def opaque48mem : X86MemOperand<"printopaquemem">; def opaque80mem : X86MemOperand<"printopaquemem">; +def opaque512mem : X86MemOperand<"printopaquemem">; + +def offset8 : Operand { let PrintMethod = "print_pcrel_imm"; } +def offset16 : Operand { let PrintMethod = "print_pcrel_imm"; } +def offset32 : Operand { let PrintMethod = "print_pcrel_imm"; } +def offset64 : Operand { let PrintMethod = "print_pcrel_imm"; } def i8mem : X86MemOperand<"printi8mem">; def i16mem : X86MemOperand<"printi16mem">; @@ -354,7 +360,8 @@ return false; }]>; -def loadi16_anyext : PatFrag<(ops node:$ptr), (i32 (unindexedload node:$ptr)), [{ +def loadi16_anyext : PatFrag<(ops node:$ptr), (i32 (unindexedload node:$ptr)), +[{ LoadSDNode *LD = cast(N); if (const Value *Src = LD->getSrcValue()) if (const PointerType *PT = dyn_cast(Src->getType())) @@ -542,13 +549,17 @@ // Nop let neverHasSideEffects = 1 in { def NOOP : I<0x90, RawFrm, (outs), (ins), "nop", []>; + def NOOPW : I<0x1f, MRM0m, (outs), (ins i16mem:$zero), + "nop{w}\t$zero", []>, TB, OpSize; def NOOPL : I<0x1f, MRM0m, (outs), (ins i32mem:$zero), - "nopl\t$zero", []>, TB; + "nop{l}\t$zero", []>, TB; } // Trap def INT3 : I<0xcc, RawFrm, (outs), (ins), "int\t3", []>; def INT : I<0xcd, RawFrm, (outs), (ins i8imm:$trap), "int\t$trap", []>; +def IRET16 : I<0xcf, RawFrm, (outs), (ins), "iret{w}", []>, OpSize; +def IRET32 : I<0xcf, RawFrm, (outs), (ins), "iret{l}", []>; // PIC base construction. This expands to code that looks like this: // call $next_inst @@ -712,12 +723,14 @@ // Tail call stuff. let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in -def TCRETURNdi : I<0, Pseudo, (outs), (ins i32imm:$dst, i32imm:$offset, variable_ops), +def TCRETURNdi : I<0, Pseudo, (outs), + (ins i32imm:$dst, i32imm:$offset, variable_ops), "#TC_RETURN $dst $offset", []>; let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in -def TCRETURNri : I<0, Pseudo, (outs), (ins GR32:$dst, i32imm:$offset, variable_ops), +def TCRETURNri : I<0, Pseudo, (outs), + (ins GR32:$dst, i32imm:$offset, variable_ops), "#TC_RETURN $dst $offset", []>; @@ -725,7 +738,8 @@ def TAILJMPd : IBr<0xE9, (ins i32imm_pcrel:$dst), "jmp\t$dst # TAILCALL", []>; let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in - def TAILJMPr : I<0xFF, MRM4r, (outs), (ins GR32:$dst), "jmp{l}\t{*}$dst # TAILCALL", + def TAILJMPr : I<0xFF, MRM4r, (outs), (ins GR32:$dst), + "jmp{l}\t{*}$dst # TAILCALL", []>; let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in def TAILJMPm : I<0xFF, MRM4m, (outs), (ins i32mem:$dst), @@ -738,6 +752,15 @@ def LEAVE : I<0xC9, RawFrm, (outs), (ins), "leave", []>; +def POPCNT16rr : I<0xB8, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src), + "popcnt{w}\t{$src, $dst|$dst, $src}", []>, OpSize, XS; +def POPCNT16rm : I<0xB8, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src), + "popcnt{w}\t{$src, $dst|$dst, $src}", []>, OpSize, XS; +def POPCNT32rr : I<0xB8, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), + "popcnt{l}\t{$src, $dst|$dst, $src}", []>, XS; +def POPCNT32rm : I<0xB8, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), + "popcnt{l}\t{$src, $dst|$dst, $src}", []>, XS; + let Defs = [ESP], Uses = [ESP], neverHasSideEffects=1 in { let mayLoad = 1 in { def POP16r : I<0x58, AddRegFrm, (outs GR16:$reg), (ins), "pop{w}\t$reg", []>, @@ -773,10 +796,14 @@ "push{l}\t$imm", []>; } -let Defs = [ESP, EFLAGS], Uses = [ESP], mayLoad = 1, neverHasSideEffects=1 in -def POPFD : I<0x9D, RawFrm, (outs), (ins), "popf", []>; -let Defs = [ESP], Uses = [ESP, EFLAGS], mayStore = 1, neverHasSideEffects=1 in -def PUSHFD : I<0x9C, RawFrm, (outs), (ins), "pushf", []>; +let Defs = [ESP, EFLAGS], Uses = [ESP], mayLoad = 1, neverHasSideEffects=1 in { +def POPF : I<0x9D, RawFrm, (outs), (ins), "popf{w}", []>, OpSize; +def POPFD : I<0x9D, RawFrm, (outs), (ins), "popf{l}", []>; +} +let Defs = [ESP], Uses = [ESP, EFLAGS], mayStore = 1, neverHasSideEffects=1 in { +def PUSHF : I<0x9C, RawFrm, (outs), (ins), "pushf{w}", []>, OpSize; +def PUSHFD : I<0x9C, RawFrm, (outs), (ins), "pushf{l}", []>; +} let isTwoAddress = 1 in // GR32 = bswap GR32 def BSWAP32r : I<0xC8, AddRegFrm, @@ -918,6 +945,13 @@ def OUT32ir : Ii8<0xE7, RawFrm, (outs), (ins i16i8imm:$port), "out{l}\t{%eax, $port|$port, %EAX}", []>; +def IN8 : I<0x6C, RawFrm, (outs), (ins), + "ins{b}", []>; +def IN16 : I<0x6D, RawFrm, (outs), (ins), + "ins{w}", []>, OpSize; +def IN32 : I<0x6D, RawFrm, (outs), (ins), + "ins{l}", []>; + //===----------------------------------------------------------------------===// // Move Instructions... // @@ -950,18 +984,18 @@ "mov{l}\t{$src, $dst|$dst, $src}", [(store (i32 imm:$src), addr:$dst)]>; -def MOV8o8a : Ii8 <0xA0, RawFrm, (outs), (ins i8imm:$src), +def MOV8o8a : Ii8 <0xA0, RawFrm, (outs), (ins offset8:$src), "mov{b}\t{$src, %al|%al, $src}", []>; -def MOV16o16a : Ii16 <0xA1, RawFrm, (outs), (ins i16imm:$src), +def MOV16o16a : Ii16 <0xA1, RawFrm, (outs), (ins offset16:$src), "mov{w}\t{$src, %ax|%ax, $src}", []>, OpSize; -def MOV32o32a : Ii32 <0xA1, RawFrm, (outs), (ins i32imm:$src), +def MOV32o32a : Ii32 <0xA1, RawFrm, (outs), (ins offset32:$src), "mov{l}\t{$src, %eax|%eax, $src}", []>; -def MOV8ao8 : Ii8 <0xA2, RawFrm, (outs i8imm:$dst), (ins), +def MOV8ao8 : Ii8 <0xA2, RawFrm, (outs offset8:$dst), (ins), "mov{b}\t{%al, $dst|$dst, %al}", []>; -def MOV16ao16 : Ii16 <0xA3, RawFrm, (outs i16imm:$dst), (ins), +def MOV16ao16 : Ii16 <0xA3, RawFrm, (outs offset16:$dst), (ins), "mov{w}\t{%ax, $dst|$dst, %ax}", []>, OpSize; -def MOV32ao32 : Ii32 <0xA3, RawFrm, (outs i32imm:$dst), (ins), +def MOV32ao32 : Ii32 <0xA3, RawFrm, (outs offset32:$dst), (ins), "mov{l}\t{%eax, $dst|$dst, %eax}", []>; // Moves to and from segment registers @@ -974,6 +1008,13 @@ def MOV16sm : I<0x8E, MRMSrcMem, (outs SEGMENT_REG:$dst), (ins i16mem:$src), "mov{w}\t{$src, $dst|$dst, $src}", []>; +def MOV8rr_REV : I<0x8A, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src), + "mov{b}\t{$src, $dst|$dst, $src}", []>; +def MOV16rr_REV : I<0x8B, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src), + "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize; +def MOV32rr_REV : I<0x8B, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), + "mov{l}\t{$src, $dst|$dst, $src}", []>; + let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in { def MOV8rm : I<0x8A, MRMSrcMem, (outs GR8 :$dst), (ins i8mem :$src), "mov{b}\t{$src, $dst|$dst, $src}", @@ -1013,6 +1054,18 @@ (outs GR8_NOREX:$dst), (ins i8mem_NOREX:$src), "mov{b}\t{$src, $dst|$dst, $src} # NOREX", []>; +// Moves to and from debug registers +def MOV32rd : I<0x21, MRMDestReg, (outs GR32:$dst), (ins DEBUG_REG:$src), + "mov{l}\t{$src, $dst|$dst, $src}", []>, TB; +def MOV32dr : I<0x23, MRMSrcReg, (outs DEBUG_REG:$dst), (ins GR32:$src), + "mov{l}\t{$src, $dst|$dst, $src}", []>, TB; + +// Moves to and from control registers +def MOV32rc : I<0x20, MRMDestReg, (outs GR32:$dst), (ins CONTROL_REG_32:$src), + "mov{q}\t{$src, $dst|$dst, $src}", []>, TB; +def MOV32cr : I<0x22, MRMSrcReg, (outs CONTROL_REG_32:$dst), (ins GR32:$src), + "mov{q}\t{$src, $dst|$dst, $src}", []>, TB; + //===----------------------------------------------------------------------===// // Fixed-Register Multiplication and Division Instructions... // @@ -1082,45 +1135,47 @@ // unsigned division/remainder let Defs = [AL,AH,EFLAGS], Uses = [AX] in -def DIV8r : I<0xF6, MRM6r, (outs), (ins GR8:$src), // AX/r8 = AL,AH +def DIV8r : I<0xF6, MRM6r, (outs), (ins GR8:$src), // AX/r8 = AL,AH "div{b}\t$src", []>; let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in -def DIV16r : I<0xF7, MRM6r, (outs), (ins GR16:$src), // DX:AX/r16 = AX,DX +def DIV16r : I<0xF7, MRM6r, (outs), (ins GR16:$src), // DX:AX/r16 = AX,DX "div{w}\t$src", []>, OpSize; let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in -def DIV32r : I<0xF7, MRM6r, (outs), (ins GR32:$src), // EDX:EAX/r32 = EAX,EDX +def DIV32r : I<0xF7, MRM6r, (outs), (ins GR32:$src), // EDX:EAX/r32 = EAX,EDX "div{l}\t$src", []>; let mayLoad = 1 in { let Defs = [AL,AH,EFLAGS], Uses = [AX] in -def DIV8m : I<0xF6, MRM6m, (outs), (ins i8mem:$src), // AX/[mem8] = AL,AH +def DIV8m : I<0xF6, MRM6m, (outs), (ins i8mem:$src), // AX/[mem8] = AL,AH "div{b}\t$src", []>; let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in -def DIV16m : I<0xF7, MRM6m, (outs), (ins i16mem:$src), // DX:AX/[mem16] = AX,DX +def DIV16m : I<0xF7, MRM6m, (outs), (ins i16mem:$src), // DX:AX/[mem16] = AX,DX "div{w}\t$src", []>, OpSize; let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in -def DIV32m : I<0xF7, MRM6m, (outs), (ins i32mem:$src), // EDX:EAX/[mem32] = EAX,EDX + // EDX:EAX/[mem32] = EAX,EDX +def DIV32m : I<0xF7, MRM6m, (outs), (ins i32mem:$src), "div{l}\t$src", []>; } // Signed division/remainder. let Defs = [AL,AH,EFLAGS], Uses = [AX] in -def IDIV8r : I<0xF6, MRM7r, (outs), (ins GR8:$src), // AX/r8 = AL,AH +def IDIV8r : I<0xF6, MRM7r, (outs), (ins GR8:$src), // AX/r8 = AL,AH "idiv{b}\t$src", []>; let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in -def IDIV16r: I<0xF7, MRM7r, (outs), (ins GR16:$src), // DX:AX/r16 = AX,DX +def IDIV16r: I<0xF7, MRM7r, (outs), (ins GR16:$src), // DX:AX/r16 = AX,DX "idiv{w}\t$src", []>, OpSize; let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in -def IDIV32r: I<0xF7, MRM7r, (outs), (ins GR32:$src), // EDX:EAX/r32 = EAX,EDX +def IDIV32r: I<0xF7, MRM7r, (outs), (ins GR32:$src), // EDX:EAX/r32 = EAX,EDX "idiv{l}\t$src", []>; let mayLoad = 1, mayLoad = 1 in { let Defs = [AL,AH,EFLAGS], Uses = [AX] in -def IDIV8m : I<0xF6, MRM7m, (outs), (ins i8mem:$src), // AX/[mem8] = AL,AH +def IDIV8m : I<0xF6, MRM7m, (outs), (ins i8mem:$src), // AX/[mem8] = AL,AH "idiv{b}\t$src", []>; let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in -def IDIV16m: I<0xF7, MRM7m, (outs), (ins i16mem:$src), // DX:AX/[mem16] = AX,DX +def IDIV16m: I<0xF7, MRM7m, (outs), (ins i16mem:$src), // DX:AX/[mem16] = AX,DX "idiv{w}\t$src", []>, OpSize; let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in -def IDIV32m: I<0xF7, MRM7m, (outs), (ins i32mem:$src), // EDX:EAX/[mem32] = EAX,EDX +def IDIV32m: I<0xF7, MRM7m, (outs), (ins i32mem:$src), + // EDX:EAX/[mem32] = EAX,EDX "idiv{l}\t$src", []>; } @@ -1148,193 +1203,193 @@ let isCommutable = 1 in { def CMOVB16rr : I<0x42, MRMSrcReg, // if , TB, OpSize; def CMOVB32rr : I<0x42, MRMSrcReg, // if , TB; def CMOVAE16rr: I<0x43, MRMSrcReg, // if >=u, GR16 = GR16 (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), - "cmovae\t{$src2, $dst|$dst, $src2}", + "cmovae{w}\t{$src2, $dst|$dst, $src2}", [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2, X86_COND_AE, EFLAGS))]>, TB, OpSize; def CMOVAE32rr: I<0x43, MRMSrcReg, // if >=u, GR32 = GR32 (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), - "cmovae\t{$src2, $dst|$dst, $src2}", + "cmovae{l}\t{$src2, $dst|$dst, $src2}", [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2, X86_COND_AE, EFLAGS))]>, TB; def CMOVE16rr : I<0x44, MRMSrcReg, // if ==, GR16 = GR16 (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), - "cmove\t{$src2, $dst|$dst, $src2}", + "cmove{w}\t{$src2, $dst|$dst, $src2}", [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2, X86_COND_E, EFLAGS))]>, TB, OpSize; def CMOVE32rr : I<0x44, MRMSrcReg, // if ==, GR32 = GR32 (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), - "cmove\t{$src2, $dst|$dst, $src2}", + "cmove{l}\t{$src2, $dst|$dst, $src2}", [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2, X86_COND_E, EFLAGS))]>, TB; def CMOVNE16rr: I<0x45, MRMSrcReg, // if !=, GR16 = GR16 (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), - "cmovne\t{$src2, $dst|$dst, $src2}", + "cmovne{w}\t{$src2, $dst|$dst, $src2}", [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2, X86_COND_NE, EFLAGS))]>, TB, OpSize; def CMOVNE32rr: I<0x45, MRMSrcReg, // if !=, GR32 = GR32 (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), - "cmovne\t{$src2, $dst|$dst, $src2}", + "cmovne{l}\t{$src2, $dst|$dst, $src2}", [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2, X86_COND_NE, EFLAGS))]>, TB; def CMOVBE16rr: I<0x46, MRMSrcReg, // if <=u, GR16 = GR16 (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), - "cmovbe\t{$src2, $dst|$dst, $src2}", + "cmovbe{w}\t{$src2, $dst|$dst, $src2}", [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2, X86_COND_BE, EFLAGS))]>, TB, OpSize; def CMOVBE32rr: I<0x46, MRMSrcReg, // if <=u, GR32 = GR32 (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), - "cmovbe\t{$src2, $dst|$dst, $src2}", + "cmovbe{l}\t{$src2, $dst|$dst, $src2}", [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2, X86_COND_BE, EFLAGS))]>, TB; def CMOVA16rr : I<0x47, MRMSrcReg, // if >u, GR16 = GR16 (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), - "cmova\t{$src2, $dst|$dst, $src2}", + "cmova{w}\t{$src2, $dst|$dst, $src2}", [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2, X86_COND_A, EFLAGS))]>, TB, OpSize; def CMOVA32rr : I<0x47, MRMSrcReg, // if >u, GR32 = GR32 (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), - "cmova\t{$src2, $dst|$dst, $src2}", + "cmova{l}\t{$src2, $dst|$dst, $src2}", [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2, X86_COND_A, EFLAGS))]>, TB; def CMOVL16rr : I<0x4C, MRMSrcReg, // if , TB, OpSize; def CMOVL32rr : I<0x4C, MRMSrcReg, // if , TB; def CMOVGE16rr: I<0x4D, MRMSrcReg, // if >=s, GR16 = GR16 (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), - "cmovge\t{$src2, $dst|$dst, $src2}", + "cmovge{w}\t{$src2, $dst|$dst, $src2}", [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2, X86_COND_GE, EFLAGS))]>, TB, OpSize; def CMOVGE32rr: I<0x4D, MRMSrcReg, // if >=s, GR32 = GR32 (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), - "cmovge\t{$src2, $dst|$dst, $src2}", + "cmovge{l}\t{$src2, $dst|$dst, $src2}", [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2, X86_COND_GE, EFLAGS))]>, TB; def CMOVLE16rr: I<0x4E, MRMSrcReg, // if <=s, GR16 = GR16 (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), - "cmovle\t{$src2, $dst|$dst, $src2}", + "cmovle{w}\t{$src2, $dst|$dst, $src2}", [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2, X86_COND_LE, EFLAGS))]>, TB, OpSize; def CMOVLE32rr: I<0x4E, MRMSrcReg, // if <=s, GR32 = GR32 (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), - "cmovle\t{$src2, $dst|$dst, $src2}", + "cmovle{l}\t{$src2, $dst|$dst, $src2}", [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2, X86_COND_LE, EFLAGS))]>, TB; def CMOVG16rr : I<0x4F, MRMSrcReg, // if >s, GR16 = GR16 (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), - "cmovg\t{$src2, $dst|$dst, $src2}", + "cmovg{w}\t{$src2, $dst|$dst, $src2}", [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2, X86_COND_G, EFLAGS))]>, TB, OpSize; def CMOVG32rr : I<0x4F, MRMSrcReg, // if >s, GR32 = GR32 (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), - "cmovg\t{$src2, $dst|$dst, $src2}", + "cmovg{l}\t{$src2, $dst|$dst, $src2}", [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2, X86_COND_G, EFLAGS))]>, TB; def CMOVS16rr : I<0x48, MRMSrcReg, // if signed, GR16 = GR16 (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), - "cmovs\t{$src2, $dst|$dst, $src2}", + "cmovs{w}\t{$src2, $dst|$dst, $src2}", [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2, X86_COND_S, EFLAGS))]>, TB, OpSize; def CMOVS32rr : I<0x48, MRMSrcReg, // if signed, GR32 = GR32 (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), - "cmovs\t{$src2, $dst|$dst, $src2}", + "cmovs{l}\t{$src2, $dst|$dst, $src2}", [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2, X86_COND_S, EFLAGS))]>, TB; def CMOVNS16rr: I<0x49, MRMSrcReg, // if !signed, GR16 = GR16 (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), - "cmovns\t{$src2, $dst|$dst, $src2}", + "cmovns{w}\t{$src2, $dst|$dst, $src2}", [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2, X86_COND_NS, EFLAGS))]>, TB, OpSize; def CMOVNS32rr: I<0x49, MRMSrcReg, // if !signed, GR32 = GR32 (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), - "cmovns\t{$src2, $dst|$dst, $src2}", + "cmovns{l}\t{$src2, $dst|$dst, $src2}", [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2, X86_COND_NS, EFLAGS))]>, TB; def CMOVP16rr : I<0x4A, MRMSrcReg, // if parity, GR16 = GR16 (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), - "cmovp\t{$src2, $dst|$dst, $src2}", + "cmovp{w}\t{$src2, $dst|$dst, $src2}", [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2, X86_COND_P, EFLAGS))]>, TB, OpSize; def CMOVP32rr : I<0x4A, MRMSrcReg, // if parity, GR32 = GR32 (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), - "cmovp\t{$src2, $dst|$dst, $src2}", + "cmovp{l}\t{$src2, $dst|$dst, $src2}", [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2, X86_COND_P, EFLAGS))]>, TB; def CMOVNP16rr : I<0x4B, MRMSrcReg, // if !parity, GR16 = GR16 (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), - "cmovnp\t{$src2, $dst|$dst, $src2}", + "cmovnp{w}\t{$src2, $dst|$dst, $src2}", [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2, X86_COND_NP, EFLAGS))]>, TB, OpSize; def CMOVNP32rr : I<0x4B, MRMSrcReg, // if !parity, GR32 = GR32 (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), - "cmovnp\t{$src2, $dst|$dst, $src2}", + "cmovnp{l}\t{$src2, $dst|$dst, $src2}", [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2, X86_COND_NP, EFLAGS))]>, TB; def CMOVO16rr : I<0x40, MRMSrcReg, // if overflow, GR16 = GR16 (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), - "cmovo\t{$src2, $dst|$dst, $src2}", + "cmovo{w}\t{$src2, $dst|$dst, $src2}", [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2, X86_COND_O, EFLAGS))]>, TB, OpSize; def CMOVO32rr : I<0x40, MRMSrcReg, // if overflow, GR32 = GR32 (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), - "cmovo\t{$src2, $dst|$dst, $src2}", + "cmovo{l}\t{$src2, $dst|$dst, $src2}", [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2, X86_COND_O, EFLAGS))]>, TB; def CMOVNO16rr : I<0x41, MRMSrcReg, // if !overflow, GR16 = GR16 (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), - "cmovno\t{$src2, $dst|$dst, $src2}", + "cmovno{w}\t{$src2, $dst|$dst, $src2}", [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2, X86_COND_NO, EFLAGS))]>, TB, OpSize; def CMOVNO32rr : I<0x41, MRMSrcReg, // if !overflow, GR32 = GR32 (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), - "cmovno\t{$src2, $dst|$dst, $src2}", + "cmovno{l}\t{$src2, $dst|$dst, $src2}", [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2, X86_COND_NO, EFLAGS))]>, TB; @@ -1342,193 +1397,193 @@ def CMOVB16rm : I<0x42, MRMSrcMem, // if , TB, OpSize; def CMOVB32rm : I<0x42, MRMSrcMem, // if , TB; def CMOVAE16rm: I<0x43, MRMSrcMem, // if >=u, GR16 = [mem16] (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2), - "cmovae\t{$src2, $dst|$dst, $src2}", + "cmovae{w}\t{$src2, $dst|$dst, $src2}", [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2), X86_COND_AE, EFLAGS))]>, TB, OpSize; def CMOVAE32rm: I<0x43, MRMSrcMem, // if >=u, GR32 = [mem32] (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2), - "cmovae\t{$src2, $dst|$dst, $src2}", + "cmovae{l}\t{$src2, $dst|$dst, $src2}", [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2), X86_COND_AE, EFLAGS))]>, TB; def CMOVE16rm : I<0x44, MRMSrcMem, // if ==, GR16 = [mem16] (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2), - "cmove\t{$src2, $dst|$dst, $src2}", + "cmove{w}\t{$src2, $dst|$dst, $src2}", [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2), X86_COND_E, EFLAGS))]>, TB, OpSize; def CMOVE32rm : I<0x44, MRMSrcMem, // if ==, GR32 = [mem32] (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2), - "cmove\t{$src2, $dst|$dst, $src2}", + "cmove{l}\t{$src2, $dst|$dst, $src2}", [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2), X86_COND_E, EFLAGS))]>, TB; def CMOVNE16rm: I<0x45, MRMSrcMem, // if !=, GR16 = [mem16] (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2), - "cmovne\t{$src2, $dst|$dst, $src2}", + "cmovne{w}\t{$src2, $dst|$dst, $src2}", [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2), X86_COND_NE, EFLAGS))]>, TB, OpSize; def CMOVNE32rm: I<0x45, MRMSrcMem, // if !=, GR32 = [mem32] (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2), - "cmovne\t{$src2, $dst|$dst, $src2}", + "cmovne{l}\t{$src2, $dst|$dst, $src2}", [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2), X86_COND_NE, EFLAGS))]>, TB; def CMOVBE16rm: I<0x46, MRMSrcMem, // if <=u, GR16 = [mem16] (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2), - "cmovbe\t{$src2, $dst|$dst, $src2}", + "cmovbe{w}\t{$src2, $dst|$dst, $src2}", [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2), X86_COND_BE, EFLAGS))]>, TB, OpSize; def CMOVBE32rm: I<0x46, MRMSrcMem, // if <=u, GR32 = [mem32] (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2), - "cmovbe\t{$src2, $dst|$dst, $src2}", + "cmovbe{l}\t{$src2, $dst|$dst, $src2}", [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2), X86_COND_BE, EFLAGS))]>, TB; def CMOVA16rm : I<0x47, MRMSrcMem, // if >u, GR16 = [mem16] (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2), - "cmova\t{$src2, $dst|$dst, $src2}", + "cmova{w}\t{$src2, $dst|$dst, $src2}", [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2), X86_COND_A, EFLAGS))]>, TB, OpSize; def CMOVA32rm : I<0x47, MRMSrcMem, // if >u, GR32 = [mem32] (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2), - "cmova\t{$src2, $dst|$dst, $src2}", + "cmova{l}\t{$src2, $dst|$dst, $src2}", [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2), X86_COND_A, EFLAGS))]>, TB; def CMOVL16rm : I<0x4C, MRMSrcMem, // if , TB, OpSize; def CMOVL32rm : I<0x4C, MRMSrcMem, // if , TB; def CMOVGE16rm: I<0x4D, MRMSrcMem, // if >=s, GR16 = [mem16] (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2), - "cmovge\t{$src2, $dst|$dst, $src2}", + "cmovge{w}\t{$src2, $dst|$dst, $src2}", [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2), X86_COND_GE, EFLAGS))]>, TB, OpSize; def CMOVGE32rm: I<0x4D, MRMSrcMem, // if >=s, GR32 = [mem32] (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2), - "cmovge\t{$src2, $dst|$dst, $src2}", + "cmovge{l}\t{$src2, $dst|$dst, $src2}", [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2), X86_COND_GE, EFLAGS))]>, TB; def CMOVLE16rm: I<0x4E, MRMSrcMem, // if <=s, GR16 = [mem16] (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2), - "cmovle\t{$src2, $dst|$dst, $src2}", + "cmovle{w}\t{$src2, $dst|$dst, $src2}", [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2), X86_COND_LE, EFLAGS))]>, TB, OpSize; def CMOVLE32rm: I<0x4E, MRMSrcMem, // if <=s, GR32 = [mem32] (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2), - "cmovle\t{$src2, $dst|$dst, $src2}", + "cmovle{l}\t{$src2, $dst|$dst, $src2}", [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2), X86_COND_LE, EFLAGS))]>, TB; def CMOVG16rm : I<0x4F, MRMSrcMem, // if >s, GR16 = [mem16] (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2), - "cmovg\t{$src2, $dst|$dst, $src2}", + "cmovg{w}\t{$src2, $dst|$dst, $src2}", [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2), X86_COND_G, EFLAGS))]>, TB, OpSize; def CMOVG32rm : I<0x4F, MRMSrcMem, // if >s, GR32 = [mem32] (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2), - "cmovg\t{$src2, $dst|$dst, $src2}", + "cmovg{l}\t{$src2, $dst|$dst, $src2}", [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2), X86_COND_G, EFLAGS))]>, TB; def CMOVS16rm : I<0x48, MRMSrcMem, // if signed, GR16 = [mem16] (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2), - "cmovs\t{$src2, $dst|$dst, $src2}", + "cmovs{w}\t{$src2, $dst|$dst, $src2}", [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2), X86_COND_S, EFLAGS))]>, TB, OpSize; def CMOVS32rm : I<0x48, MRMSrcMem, // if signed, GR32 = [mem32] (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2), - "cmovs\t{$src2, $dst|$dst, $src2}", + "cmovs{l}\t{$src2, $dst|$dst, $src2}", [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2), X86_COND_S, EFLAGS))]>, TB; def CMOVNS16rm: I<0x49, MRMSrcMem, // if !signed, GR16 = [mem16] (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2), - "cmovns\t{$src2, $dst|$dst, $src2}", + "cmovns{w}\t{$src2, $dst|$dst, $src2}", [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2), X86_COND_NS, EFLAGS))]>, TB, OpSize; def CMOVNS32rm: I<0x49, MRMSrcMem, // if !signed, GR32 = [mem32] (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2), - "cmovns\t{$src2, $dst|$dst, $src2}", + "cmovns{l}\t{$src2, $dst|$dst, $src2}", [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2), X86_COND_NS, EFLAGS))]>, TB; def CMOVP16rm : I<0x4A, MRMSrcMem, // if parity, GR16 = [mem16] (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2), - "cmovp\t{$src2, $dst|$dst, $src2}", + "cmovp{w}\t{$src2, $dst|$dst, $src2}", [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2), X86_COND_P, EFLAGS))]>, TB, OpSize; def CMOVP32rm : I<0x4A, MRMSrcMem, // if parity, GR32 = [mem32] (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2), - "cmovp\t{$src2, $dst|$dst, $src2}", + "cmovp{l}\t{$src2, $dst|$dst, $src2}", [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2), X86_COND_P, EFLAGS))]>, TB; def CMOVNP16rm : I<0x4B, MRMSrcMem, // if !parity, GR16 = [mem16] (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2), - "cmovnp\t{$src2, $dst|$dst, $src2}", + "cmovnp{w}\t{$src2, $dst|$dst, $src2}", [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2), X86_COND_NP, EFLAGS))]>, TB, OpSize; def CMOVNP32rm : I<0x4B, MRMSrcMem, // if !parity, GR32 = [mem32] (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2), - "cmovnp\t{$src2, $dst|$dst, $src2}", + "cmovnp{l}\t{$src2, $dst|$dst, $src2}", [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2), X86_COND_NP, EFLAGS))]>, TB; def CMOVO16rm : I<0x40, MRMSrcMem, // if overflow, GR16 = [mem16] (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2), - "cmovo\t{$src2, $dst|$dst, $src2}", + "cmovo{w}\t{$src2, $dst|$dst, $src2}", [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2), X86_COND_O, EFLAGS))]>, TB, OpSize; def CMOVO32rm : I<0x40, MRMSrcMem, // if overflow, GR32 = [mem32] (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2), - "cmovo\t{$src2, $dst|$dst, $src2}", + "cmovo{l}\t{$src2, $dst|$dst, $src2}", [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2), X86_COND_O, EFLAGS))]>, TB; def CMOVNO16rm : I<0x41, MRMSrcMem, // if !overflow, GR16 = [mem16] (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2), - "cmovno\t{$src2, $dst|$dst, $src2}", + "cmovno{w}\t{$src2, $dst|$dst, $src2}", [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2), X86_COND_NO, EFLAGS))]>, TB, OpSize; def CMOVNO32rm : I<0x41, MRMSrcMem, // if !overflow, GR32 = [mem32] (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2), - "cmovno\t{$src2, $dst|$dst, $src2}", + "cmovno{l}\t{$src2, $dst|$dst, $src2}", [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2), X86_COND_NO, EFLAGS))]>, TB; @@ -1586,11 +1641,13 @@ [(set GR8:$dst, (add GR8:$src, 1)), (implicit EFLAGS)]>; let isConvertibleToThreeAddress = 1, CodeSize = 1 in { // Can xform into LEA. -def INC16r : I<0x40, AddRegFrm, (outs GR16:$dst), (ins GR16:$src), "inc{w}\t$dst", +def INC16r : I<0x40, AddRegFrm, (outs GR16:$dst), (ins GR16:$src), + "inc{w}\t$dst", [(set GR16:$dst, (add GR16:$src, 1)), (implicit EFLAGS)]>, OpSize, Requires<[In32BitMode]>; -def INC32r : I<0x40, AddRegFrm, (outs GR32:$dst), (ins GR32:$src), "inc{l}\t$dst", +def INC32r : I<0x40, AddRegFrm, (outs GR32:$dst), (ins GR32:$src), + "inc{l}\t$dst", [(set GR32:$dst, (add GR32:$src, 1)), (implicit EFLAGS)]>, Requires<[In32BitMode]>; } @@ -1613,11 +1670,13 @@ [(set GR8:$dst, (add GR8:$src, -1)), (implicit EFLAGS)]>; let isConvertibleToThreeAddress = 1, CodeSize = 1 in { // Can xform into LEA. -def DEC16r : I<0x48, AddRegFrm, (outs GR16:$dst), (ins GR16:$src), "dec{w}\t$dst", +def DEC16r : I<0x48, AddRegFrm, (outs GR16:$dst), (ins GR16:$src), + "dec{w}\t$dst", [(set GR16:$dst, (add GR16:$src, -1)), (implicit EFLAGS)]>, OpSize, Requires<[In32BitMode]>; -def DEC32r : I<0x48, AddRegFrm, (outs GR32:$dst), (ins GR32:$src), "dec{l}\t$dst", +def DEC32r : I<0x48, AddRegFrm, (outs GR32:$dst), (ins GR32:$src), + "dec{l}\t$dst", [(set GR32:$dst, (add GR32:$src, -1)), (implicit EFLAGS)]>, Requires<[In32BitMode]>; } @@ -1657,6 +1716,17 @@ (implicit EFLAGS)]>; } +// AND instructions with the destination register in REG and the source register +// in R/M. Included for the disassembler. +def AND8rr_REV : I<0x22, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src1, GR8:$src2), + "and{b}\t{$src2, $dst|$dst, $src2}", []>; +def AND16rr_REV : I<0x23, MRMSrcReg, (outs GR16:$dst), + (ins GR16:$src1, GR16:$src2), + "and{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize; +def AND32rr_REV : I<0x23, MRMSrcReg, (outs GR32:$dst), + (ins GR32:$src1, GR32:$src2), + "and{l}\t{$src2, $dst|$dst, $src2}", []>; + def AND8rm : I<0x22, MRMSrcMem, (outs GR8 :$dst), (ins GR8 :$src1, i8mem :$src2), "and{b}\t{$src2, $dst|$dst, $src2}", @@ -1756,50 +1826,73 @@ let isCommutable = 1 in { // X = OR Y, Z --> X = OR Z, Y -def OR8rr : I<0x08, MRMDestReg, (outs GR8 :$dst), (ins GR8 :$src1, GR8 :$src2), +def OR8rr : I<0x08, MRMDestReg, (outs GR8 :$dst), + (ins GR8 :$src1, GR8 :$src2), "or{b}\t{$src2, $dst|$dst, $src2}", [(set GR8:$dst, (or GR8:$src1, GR8:$src2)), (implicit EFLAGS)]>; -def OR16rr : I<0x09, MRMDestReg, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), +def OR16rr : I<0x09, MRMDestReg, (outs GR16:$dst), + (ins GR16:$src1, GR16:$src2), "or{w}\t{$src2, $dst|$dst, $src2}", [(set GR16:$dst, (or GR16:$src1, GR16:$src2)), (implicit EFLAGS)]>, OpSize; -def OR32rr : I<0x09, MRMDestReg, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), +def OR32rr : I<0x09, MRMDestReg, (outs GR32:$dst), + (ins GR32:$src1, GR32:$src2), "or{l}\t{$src2, $dst|$dst, $src2}", [(set GR32:$dst, (or GR32:$src1, GR32:$src2)), (implicit EFLAGS)]>; } -def OR8rm : I<0x0A, MRMSrcMem , (outs GR8 :$dst), (ins GR8 :$src1, i8mem :$src2), + +// OR instructions with the destination register in REG and the source register +// in R/M. Included for the disassembler. +def OR8rr_REV : I<0x0A, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src1, GR8:$src2), + "or{b}\t{$src2, $dst|$dst, $src2}", []>; +def OR16rr_REV : I<0x0B, MRMSrcReg, (outs GR16:$dst), + (ins GR16:$src1, GR16:$src2), + "or{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize; +def OR32rr_REV : I<0x0B, MRMSrcReg, (outs GR32:$dst), + (ins GR32:$src1, GR32:$src2), + "or{l}\t{$src2, $dst|$dst, $src2}", []>; + +def OR8rm : I<0x0A, MRMSrcMem , (outs GR8 :$dst), + (ins GR8 :$src1, i8mem :$src2), "or{b}\t{$src2, $dst|$dst, $src2}", [(set GR8:$dst, (or GR8:$src1, (load addr:$src2))), (implicit EFLAGS)]>; -def OR16rm : I<0x0B, MRMSrcMem , (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2), +def OR16rm : I<0x0B, MRMSrcMem , (outs GR16:$dst), + (ins GR16:$src1, i16mem:$src2), "or{w}\t{$src2, $dst|$dst, $src2}", [(set GR16:$dst, (or GR16:$src1, (load addr:$src2))), (implicit EFLAGS)]>, OpSize; -def OR32rm : I<0x0B, MRMSrcMem , (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2), +def OR32rm : I<0x0B, MRMSrcMem , (outs GR32:$dst), + (ins GR32:$src1, i32mem:$src2), "or{l}\t{$src2, $dst|$dst, $src2}", [(set GR32:$dst, (or GR32:$src1, (load addr:$src2))), (implicit EFLAGS)]>; -def OR8ri : Ii8 <0x80, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2), +def OR8ri : Ii8 <0x80, MRM1r, (outs GR8 :$dst), + (ins GR8 :$src1, i8imm:$src2), "or{b}\t{$src2, $dst|$dst, $src2}", [(set GR8:$dst, (or GR8:$src1, imm:$src2)), (implicit EFLAGS)]>; -def OR16ri : Ii16<0x81, MRM1r, (outs GR16:$dst), (ins GR16:$src1, i16imm:$src2), +def OR16ri : Ii16<0x81, MRM1r, (outs GR16:$dst), + (ins GR16:$src1, i16imm:$src2), "or{w}\t{$src2, $dst|$dst, $src2}", [(set GR16:$dst, (or GR16:$src1, imm:$src2)), (implicit EFLAGS)]>, OpSize; -def OR32ri : Ii32<0x81, MRM1r, (outs GR32:$dst), (ins GR32:$src1, i32imm:$src2), +def OR32ri : Ii32<0x81, MRM1r, (outs GR32:$dst), + (ins GR32:$src1, i32imm:$src2), "or{l}\t{$src2, $dst|$dst, $src2}", [(set GR32:$dst, (or GR32:$src1, imm:$src2)), (implicit EFLAGS)]>; -def OR16ri8 : Ii8<0x83, MRM1r, (outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2), +def OR16ri8 : Ii8<0x83, MRM1r, (outs GR16:$dst), + (ins GR16:$src1, i16i8imm:$src2), "or{w}\t{$src2, $dst|$dst, $src2}", [(set GR16:$dst, (or GR16:$src1, i16immSExt8:$src2)), (implicit EFLAGS)]>, OpSize; -def OR32ri8 : Ii8<0x83, MRM1r, (outs GR32:$dst), (ins GR32:$src1, i32i8imm:$src2), +def OR32ri8 : Ii8<0x83, MRM1r, (outs GR32:$dst), + (ins GR32:$src1, i32i8imm:$src2), "or{l}\t{$src2, $dst|$dst, $src2}", [(set GR32:$dst, (or GR32:$src1, i32immSExt8:$src2)), (implicit EFLAGS)]>; @@ -1866,6 +1959,17 @@ (implicit EFLAGS)]>; } // isCommutable = 1 +// XOR instructions with the destination register in REG and the source register +// in R/M. Included for the disassembler. +def XOR8rr_REV : I<0x32, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src1, GR8:$src2), + "xor{b}\t{$src2, $dst|$dst, $src2}", []>; +def XOR16rr_REV : I<0x33, MRMSrcReg, (outs GR16:$dst), + (ins GR16:$src1, GR16:$src2), + "xor{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize; +def XOR32rr_REV : I<0x33, MRMSrcReg, (outs GR32:$dst), + (ins GR32:$src1, GR32:$src2), + "xor{l}\t{$src2, $dst|$dst, $src2}", []>; + def XOR8rm : I<0x32, MRMSrcMem , (outs GR8 :$dst), (ins GR8:$src1, i8mem :$src2), "xor{b}\t{$src2, $dst|$dst, $src2}", @@ -2205,7 +2309,8 @@ } def RCL16ri : Ii8<0xC1, MRM2r, (outs GR16:$dst), (ins GR16:$src, i8imm:$cnt), "rcl{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize; -def RCL16mi : Ii8<0xC1, MRM2m, (outs i16mem:$dst), (ins i16mem:$src, i8imm:$cnt), +def RCL16mi : Ii8<0xC1, MRM2m, (outs i16mem:$dst), + (ins i16mem:$src, i8imm:$cnt), "rcl{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize; def RCL32r1 : I<0xD1, MRM2r, (outs GR32:$dst), (ins GR32:$src), @@ -2220,7 +2325,8 @@ } def RCL32ri : Ii8<0xC1, MRM2r, (outs GR32:$dst), (ins GR32:$src, i8imm:$cnt), "rcl{l}\t{$cnt, $dst|$dst, $cnt}", []>; -def RCL32mi : Ii8<0xC1, MRM2m, (outs i32mem:$dst), (ins i32mem:$src, i8imm:$cnt), +def RCL32mi : Ii8<0xC1, MRM2m, (outs i32mem:$dst), + (ins i32mem:$src, i8imm:$cnt), "rcl{l}\t{$cnt, $dst|$dst, $cnt}", []>; def RCR8r1 : I<0xD0, MRM3r, (outs GR8:$dst), (ins GR8:$src), @@ -2250,7 +2356,8 @@ } def RCR16ri : Ii8<0xC1, MRM3r, (outs GR16:$dst), (ins GR16:$src, i8imm:$cnt), "rcr{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize; -def RCR16mi : Ii8<0xC1, MRM3m, (outs i16mem:$dst), (ins i16mem:$src, i8imm:$cnt), +def RCR16mi : Ii8<0xC1, MRM3m, (outs i16mem:$dst), + (ins i16mem:$src, i8imm:$cnt), "rcr{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize; def RCR32r1 : I<0xD1, MRM3r, (outs GR32:$dst), (ins GR32:$src), @@ -2265,7 +2372,8 @@ } def RCR32ri : Ii8<0xC1, MRM3r, (outs GR32:$dst), (ins GR32:$src, i8imm:$cnt), "rcr{l}\t{$cnt, $dst|$dst, $cnt}", []>; -def RCR32mi : Ii8<0xC1, MRM3m, (outs i32mem:$dst), (ins i32mem:$src, i8imm:$cnt), +def RCR32mi : Ii8<0xC1, MRM3m, (outs i32mem:$dst), + (ins i32mem:$src, i8imm:$cnt), "rcr{l}\t{$cnt, $dst|$dst, $cnt}", []>; // FIXME: provide shorter instructions when imm8 == 1 @@ -2286,7 +2394,8 @@ [(set GR8:$dst, (rotl GR8:$src1, (i8 imm:$src2)))]>; def ROL16ri : Ii8<0xC1, MRM0r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2), "rol{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, (rotl GR16:$src1, (i8 imm:$src2)))]>, OpSize; + [(set GR16:$dst, (rotl GR16:$src1, (i8 imm:$src2)))]>, + OpSize; def ROL32ri : Ii8<0xC1, MRM0r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2), "rol{l}\t{$src2, $dst|$dst, $src2}", [(set GR32:$dst, (rotl GR32:$src1, (i8 imm:$src2)))]>; @@ -2355,7 +2464,8 @@ [(set GR8:$dst, (rotr GR8:$src1, (i8 imm:$src2)))]>; def ROR16ri : Ii8<0xC1, MRM1r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2), "ror{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, (rotr GR16:$src1, (i8 imm:$src2)))]>, OpSize; + [(set GR16:$dst, (rotr GR16:$src1, (i8 imm:$src2)))]>, + OpSize; def ROR32ri : Ii8<0xC1, MRM1r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2), "ror{l}\t{$src2, $dst|$dst, $src2}", [(set GR32:$dst, (rotr GR32:$src1, (i8 imm:$src2)))]>; @@ -2411,17 +2521,21 @@ // Double shift instructions (generalizations of rotate) let Uses = [CL] in { -def SHLD32rrCL : I<0xA5, MRMDestReg, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), +def SHLD32rrCL : I<0xA5, MRMDestReg, (outs GR32:$dst), + (ins GR32:$src1, GR32:$src2), "shld{l}\t{%cl, $src2, $dst|$dst, $src2, CL}", [(set GR32:$dst, (X86shld GR32:$src1, GR32:$src2, CL))]>, TB; -def SHRD32rrCL : I<0xAD, MRMDestReg, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), +def SHRD32rrCL : I<0xAD, MRMDestReg, (outs GR32:$dst), + (ins GR32:$src1, GR32:$src2), "shrd{l}\t{%cl, $src2, $dst|$dst, $src2, CL}", [(set GR32:$dst, (X86shrd GR32:$src1, GR32:$src2, CL))]>, TB; -def SHLD16rrCL : I<0xA5, MRMDestReg, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), +def SHLD16rrCL : I<0xA5, MRMDestReg, (outs GR16:$dst), + (ins GR16:$src1, GR16:$src2), "shld{w}\t{%cl, $src2, $dst|$dst, $src2, CL}", [(set GR16:$dst, (X86shld GR16:$src1, GR16:$src2, CL))]>, TB, OpSize; -def SHRD16rrCL : I<0xAD, MRMDestReg, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), +def SHRD16rrCL : I<0xAD, MRMDestReg, (outs GR16:$dst), + (ins GR16:$src1, GR16:$src2), "shrd{w}\t{%cl, $src2, $dst|$dst, $src2, CL}", [(set GR16:$dst, (X86shrd GR16:$src1, GR16:$src2, CL))]>, TB, OpSize; @@ -2429,25 +2543,29 @@ let isCommutable = 1 in { // These instructions commute to each other. def SHLD32rri8 : Ii8<0xA4, MRMDestReg, - (outs GR32:$dst), (ins GR32:$src1, GR32:$src2, i8imm:$src3), + (outs GR32:$dst), + (ins GR32:$src1, GR32:$src2, i8imm:$src3), "shld{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}", [(set GR32:$dst, (X86shld GR32:$src1, GR32:$src2, (i8 imm:$src3)))]>, TB; def SHRD32rri8 : Ii8<0xAC, MRMDestReg, - (outs GR32:$dst), (ins GR32:$src1, GR32:$src2, i8imm:$src3), + (outs GR32:$dst), + (ins GR32:$src1, GR32:$src2, i8imm:$src3), "shrd{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}", [(set GR32:$dst, (X86shrd GR32:$src1, GR32:$src2, (i8 imm:$src3)))]>, TB; def SHLD16rri8 : Ii8<0xA4, MRMDestReg, - (outs GR16:$dst), (ins GR16:$src1, GR16:$src2, i8imm:$src3), + (outs GR16:$dst), + (ins GR16:$src1, GR16:$src2, i8imm:$src3), "shld{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}", [(set GR16:$dst, (X86shld GR16:$src1, GR16:$src2, (i8 imm:$src3)))]>, TB, OpSize; def SHRD16rri8 : Ii8<0xAC, MRMDestReg, - (outs GR16:$dst), (ins GR16:$src1, GR16:$src2, i8imm:$src3), + (outs GR16:$dst), + (ins GR16:$src1, GR16:$src2, i8imm:$src3), "shrd{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}", [(set GR16:$dst, (X86shrd GR16:$src1, GR16:$src2, (i8 imm:$src3)))]>, @@ -2645,6 +2763,16 @@ "adc{l}\t{$src2, $dst|$dst, $src2}", [(set GR32:$dst, (adde GR32:$src1, GR32:$src2))]>; } + +def ADC8rr_REV : I<0x12, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src1, GR8:$src2), + "adc{b}\t{$src2, $dst|$dst, $src2}", []>; +def ADC16rr_REV : I<0x13, MRMSrcReg, (outs GR16:$dst), + (ins GR16:$src1, GR16:$src2), + "adc{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize; +def ADC32rr_REV : I<0x13, MRMSrcReg, (outs GR32:$dst), + (ins GR32:$src1, GR32:$src2), + "adc{l}\t{$src2, $dst|$dst, $src2}", []>; + def ADC8rm : I<0x12, MRMSrcMem , (outs GR8:$dst), (ins GR8:$src1, i8mem:$src2), "adc{b}\t{$src2, $dst|$dst, $src2}", @@ -2731,6 +2859,15 @@ [(set GR32:$dst, (sub GR32:$src1, GR32:$src2)), (implicit EFLAGS)]>; +def SUB8rr_REV : I<0x2A, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src1, GR8:$src2), + "sub{b}\t{$src2, $dst|$dst, $src2}", []>; +def SUB16rr_REV : I<0x2B, MRMSrcReg, (outs GR16:$dst), + (ins GR16:$src1, GR16:$src2), + "sub{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize; +def SUB32rr_REV : I<0x2B, MRMSrcReg, (outs GR32:$dst), + (ins GR32:$src1, GR32:$src2), + "sub{l}\t{$src2, $dst|$dst, $src2}", []>; + // Register-Memory Subtraction def SUB8rm : I<0x2A, MRMSrcMem, (outs GR8 :$dst), (ins GR8 :$src1, i8mem :$src2), @@ -2872,6 +3009,16 @@ def SBB32i32 : Ii32<0x1D, RawFrm, (outs), (ins i32imm:$src), "sbb{l}\t{$src, %eax|%eax, $src}", []>; } + +def SBB8rr_REV : I<0x1A, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src1, GR8:$src2), + "sbb{b}\t{$src2, $dst|$dst, $src2}", []>; +def SBB16rr_REV : I<0x1B, MRMSrcReg, (outs GR16:$dst), + (ins GR16:$src1, GR16:$src2), + "sbb{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize; +def SBB32rr_REV : I<0x1B, MRMSrcReg, (outs GR32:$dst), + (ins GR32:$src1, GR32:$src2), + "sbb{l}\t{$src2, $dst|$dst, $src2}", []>; + def SBB8rm : I<0x1A, MRMSrcMem, (outs GR8:$dst), (ins GR8:$src1, i8mem:$src2), "sbb{b}\t{$src2, $dst|$dst, $src2}", [(set GR8:$dst, (sube GR8:$src1, (load addr:$src2)))]>; @@ -2926,7 +3073,8 @@ "imul{w}\t{$src2, $dst|$dst, $src2}", [(set GR16:$dst, (mul GR16:$src1, (load addr:$src2))), (implicit EFLAGS)]>, TB, OpSize; -def IMUL32rm : I<0xAF, MRMSrcMem, (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2), +def IMUL32rm : I<0xAF, MRMSrcMem, (outs GR32:$dst), + (ins GR32:$src1, i32mem:$src2), "imul{l}\t{$src2, $dst|$dst, $src2}", [(set GR32:$dst, (mul GR32:$src1, (load addr:$src2))), (implicit EFLAGS)]>, TB; @@ -2958,12 +3106,12 @@ (implicit EFLAGS)]>; // Memory-Integer Signed Integer Multiply -def IMUL16rmi : Ii16<0x69, MRMSrcMem, // GR16 = [mem16]*I16 +def IMUL16rmi : Ii16<0x69, MRMSrcMem, // GR16 = [mem16]*I16 (outs GR16:$dst), (ins i16mem:$src1, i16imm:$src2), "imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set GR16:$dst, (mul (load addr:$src1), imm:$src2)), (implicit EFLAGS)]>, OpSize; -def IMUL32rmi : Ii32<0x69, MRMSrcMem, // GR32 = [mem32]*I32 +def IMUL32rmi : Ii32<0x69, MRMSrcMem, // GR32 = [mem32]*I32 (outs GR32:$dst), (ins i32mem:$src1, i32imm:$src2), "imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set GR32:$dst, (mul (load addr:$src1), imm:$src2)), @@ -3374,15 +3522,21 @@ // Unlike with the register+register form, the memory+register form of the // bt instruction does not ignore the high bits of the index. From ISel's -// perspective, this is pretty bizarre. Disable these instructions for now. -//def BT16mr : I<0xA3, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2), -// "bt{w}\t{$src2, $src1|$src1, $src2}", +// perspective, this is pretty bizarre. Make these instructions disassembly +// only for now. + +def BT16mr : I<0xA3, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2), + "bt{w}\t{$src2, $src1|$src1, $src2}", // [(X86bt (loadi16 addr:$src1), GR16:$src2), -// (implicit EFLAGS)]>, OpSize, TB, Requires<[FastBTMem]>; -//def BT32mr : I<0xA3, MRMDestMem, (outs), (ins i32mem:$src1, GR32:$src2), -// "bt{l}\t{$src2, $src1|$src1, $src2}", +// (implicit EFLAGS)] + [] + >, OpSize, TB, Requires<[FastBTMem]>; +def BT32mr : I<0xA3, MRMDestMem, (outs), (ins i32mem:$src1, GR32:$src2), + "bt{l}\t{$src2, $src1|$src1, $src2}", // [(X86bt (loadi32 addr:$src1), GR32:$src2), -// (implicit EFLAGS)]>, TB, Requires<[FastBTMem]>; +// (implicit EFLAGS)] + [] + >, TB, Requires<[FastBTMem]>; def BT16ri8 : Ii8<0xBA, MRM4r, (outs), (ins GR16:$src1, i16i8imm:$src2), "bt{w}\t{$src2, $src1|$src1, $src2}", @@ -3403,12 +3557,67 @@ "bt{l}\t{$src2, $src1|$src1, $src2}", [(X86bt (loadi32 addr:$src1), i32immSExt8:$src2), (implicit EFLAGS)]>, TB; + +def BTC16rr : I<0xBB, MRMDestReg, (outs), (ins GR16:$src1, GR16:$src2), + "btc{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB; +def BTC32rr : I<0xBB, MRMDestReg, (outs), (ins GR32:$src1, GR32:$src2), + "btc{l}\t{$src2, $src1|$src1, $src2}", []>, TB; +def BTC16mr : I<0xBB, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2), + "btc{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB; +def BTC32mr : I<0xBB, MRMDestMem, (outs), (ins i32mem:$src1, GR32:$src2), + "btc{l}\t{$src2, $src1|$src1, $src2}", []>, TB; +def BTC16ri8 : Ii8<0xBA, MRM7r, (outs), (ins GR16:$src1, i16i8imm:$src2), + "btc{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB; +def BTC32ri8 : Ii8<0xBA, MRM7r, (outs), (ins GR32:$src1, i32i8imm:$src2), + "btc{l}\t{$src2, $src1|$src1, $src2}", []>, TB; +def BTC16mi8 : Ii8<0xBA, MRM7m, (outs), (ins i16mem:$src1, i16i8imm:$src2), + "btc{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB; +def BTC32mi8 : Ii8<0xBA, MRM7m, (outs), (ins i32mem:$src1, i32i8imm:$src2), + "btc{l}\t{$src2, $src1|$src1, $src2}", []>, TB; + +def BTR16rr : I<0xB3, MRMDestReg, (outs), (ins GR16:$src1, GR16:$src2), + "btr{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB; +def BTR32rr : I<0xB3, MRMDestReg, (outs), (ins GR32:$src1, GR32:$src2), + "btr{l}\t{$src2, $src1|$src1, $src2}", []>, TB; +def BTR16mr : I<0xB3, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2), + "btr{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB; +def BTR32mr : I<0xB3, MRMDestMem, (outs), (ins i32mem:$src1, GR32:$src2), + "btr{l}\t{$src2, $src1|$src1, $src2}", []>, TB; +def BTR16ri8 : Ii8<0xBA, MRM6r, (outs), (ins GR16:$src1, i16i8imm:$src2), + "btr{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB; +def BTR32ri8 : Ii8<0xBA, MRM6r, (outs), (ins GR32:$src1, i32i8imm:$src2), + "btr{l}\t{$src2, $src1|$src1, $src2}", []>, TB; +def BTR16mi8 : Ii8<0xBA, MRM6m, (outs), (ins i16mem:$src1, i16i8imm:$src2), + "btr{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB; +def BTR32mi8 : Ii8<0xBA, MRM6m, (outs), (ins i32mem:$src1, i32i8imm:$src2), + "btr{l}\t{$src2, $src1|$src1, $src2}", []>, TB; + +def BTS16rr : I<0xAB, MRMDestReg, (outs), (ins GR16:$src1, GR16:$src2), + "bts{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB; +def BTS32rr : I<0xAB, MRMDestReg, (outs), (ins GR32:$src1, GR32:$src2), + "bts{l}\t{$src2, $src1|$src1, $src2}", []>, TB; +def BTS16mr : I<0xAB, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2), + "bts{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB; +def BTS32mr : I<0xAB, MRMDestMem, (outs), (ins i32mem:$src1, GR32:$src2), + "bts{l}\t{$src2, $src1|$src1, $src2}", []>, TB; +def BTS16ri8 : Ii8<0xBA, MRM5r, (outs), (ins GR16:$src1, i16i8imm:$src2), + "bts{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB; +def BTS32ri8 : Ii8<0xBA, MRM5r, (outs), (ins GR32:$src1, i32i8imm:$src2), + "bts{l}\t{$src2, $src1|$src1, $src2}", []>, TB; +def BTS16mi8 : Ii8<0xBA, MRM5m, (outs), (ins i16mem:$src1, i16i8imm:$src2), + "bts{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB; +def BTS32mi8 : Ii8<0xBA, MRM5m, (outs), (ins i32mem:$src1, i32i8imm:$src2), + "bts{l}\t{$src2, $src1|$src1, $src2}", []>, TB; } // Defs = [EFLAGS] // Sign/Zero extenders // Use movsbl intead of movsbw; we don't care about the high 16 bits // of the register here. This has a smaller encoding and avoids a -// partial-register update. +// partial-register update. Actual movsbw included for the disassembler. +def MOVSX16rr8W : I<0xBE, MRMSrcReg, (outs GR16:$dst), (ins GR8:$src), + "movs{bw|x}\t{$src, $dst|$dst, $src}", []>, TB, OpSize; +def MOVSX16rm8W : I<0xBE, MRMSrcMem, (outs GR16:$dst), (ins i8mem:$src), + "movs{bw|x}\t{$src, $dst|$dst, $src}", []>, TB, OpSize; def MOVSX16rr8 : I<0xBE, MRMSrcReg, (outs GR16:$dst), (ins GR8 :$src), "", [(set GR16:$dst, (sext GR8:$src))]>, TB; def MOVSX16rm8 : I<0xBE, MRMSrcMem, (outs GR16:$dst), (ins i8mem :$src), @@ -3428,7 +3637,11 @@ // Use movzbl intead of movzbw; we don't care about the high 16 bits // of the register here. This has a smaller encoding and avoids a -// partial-register update. +// partial-register update. Actual movzbw included for the disassembler. +def MOVZX16rr8W : I<0xB6, MRMSrcReg, (outs GR16:$dst), (ins GR8:$src), + "movz{bw|x}\t{$src, $dst|$dst, $src}", []>, TB, OpSize; +def MOVZX16rm8W : I<0xB6, MRMSrcMem, (outs GR16:$dst), (ins i8mem:$src), + "movz{bw|x}\t{$src, $dst|$dst, $src}", []>, TB, OpSize; def MOVZX16rr8 : I<0xB6, MRMSrcReg, (outs GR16:$dst), (ins GR8 :$src), "", [(set GR16:$dst, (zext GR8:$src))]>, TB; def MOVZX16rm8 : I<0xB6, MRMSrcMem, (outs GR16:$dst), (ins i8mem :$src), @@ -3541,18 +3754,32 @@ // Atomic swap. These are just normal xchg instructions. But since a memory // operand is referenced, the atomicity is ensured. let Constraints = "$val = $dst" in { -def XCHG32rm : I<0x87, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$ptr, GR32:$val), +def XCHG32rm : I<0x87, MRMSrcMem, (outs GR32:$dst), + (ins GR32:$val, i32mem:$ptr), "xchg{l}\t{$val, $ptr|$ptr, $val}", [(set GR32:$dst, (atomic_swap_32 addr:$ptr, GR32:$val))]>; -def XCHG16rm : I<0x87, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$ptr, GR16:$val), +def XCHG16rm : I<0x87, MRMSrcMem, (outs GR16:$dst), + (ins GR16:$val, i16mem:$ptr), "xchg{w}\t{$val, $ptr|$ptr, $val}", [(set GR16:$dst, (atomic_swap_16 addr:$ptr, GR16:$val))]>, OpSize; -def XCHG8rm : I<0x86, MRMSrcMem, (outs GR8:$dst), (ins i8mem:$ptr, GR8:$val), +def XCHG8rm : I<0x86, MRMSrcMem, (outs GR8:$dst), (ins GR8:$val, i8mem:$ptr), "xchg{b}\t{$val, $ptr|$ptr, $val}", [(set GR8:$dst, (atomic_swap_8 addr:$ptr, GR8:$val))]>; + +def XCHG32rr : I<0x87, MRMSrcReg, (outs GR32:$dst), (ins GR32:$val, GR32:$src), + "xchg{l}\t{$val, $src|$src, $val}", []>; +def XCHG16rr : I<0x87, MRMSrcReg, (outs GR16:$dst), (ins GR16:$val, GR16:$src), + "xchg{w}\t{$val, $src|$src, $val}", []>, OpSize; +def XCHG8rr : I<0x86, MRMSrcReg, (outs GR8:$dst), (ins GR8:$val, GR8:$src), + "xchg{b}\t{$val, $src|$src, $val}", []>; } +def XCHG16ar : I<0x90, AddRegFrm, (outs), (ins GR16:$src), + "xchg{w}\t{$src, %ax|%ax, $src}", []>, OpSize; +def XCHG32ar : I<0x90, AddRegFrm, (outs), (ins GR32:$src), + "xchg{l}\t{$src, %eax|%eax, $src}", []>; + // Atomic compare and swap. let Defs = [EAX, EFLAGS], Uses = [EAX] in { def LCMPXCHG32 : I<0xB1, MRMDestMem, (outs), (ins i32mem:$ptr, GR32:$swap), @@ -3582,23 +3809,54 @@ // Atomic exchange and add let Constraints = "$val = $dst", Defs = [EFLAGS] in { -def LXADD32 : I<0xC1, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$ptr, GR32:$val), +def LXADD32 : I<0xC1, MRMSrcMem, (outs GR32:$dst), (ins GR32:$val, i32mem:$ptr), "lock\n\t" "xadd{l}\t{$val, $ptr|$ptr, $val}", [(set GR32:$dst, (atomic_load_add_32 addr:$ptr, GR32:$val))]>, TB, LOCK; -def LXADD16 : I<0xC1, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$ptr, GR16:$val), +def LXADD16 : I<0xC1, MRMSrcMem, (outs GR16:$dst), (ins GR16:$val, i16mem:$ptr), "lock\n\t" "xadd{w}\t{$val, $ptr|$ptr, $val}", [(set GR16:$dst, (atomic_load_add_16 addr:$ptr, GR16:$val))]>, TB, OpSize, LOCK; -def LXADD8 : I<0xC0, MRMSrcMem, (outs GR8:$dst), (ins i8mem:$ptr, GR8:$val), +def LXADD8 : I<0xC0, MRMSrcMem, (outs GR8:$dst), (ins GR8:$val, i8mem:$ptr), "lock\n\t" "xadd{b}\t{$val, $ptr|$ptr, $val}", [(set GR8:$dst, (atomic_load_add_8 addr:$ptr, GR8:$val))]>, TB, LOCK; } +def XADD8rr : I<0xC0, MRMDestReg, (outs GR8:$dst), (ins GR8:$src), + "xadd{b}\t{$src, $dst|$dst, $src}", []>, TB; +def XADD16rr : I<0xC1, MRMDestReg, (outs GR16:$dst), (ins GR16:$src), + "xadd{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize; +def XADD32rr : I<0xC1, MRMDestReg, (outs GR32:$dst), (ins GR32:$src), + "xadd{l}\t{$src, $dst|$dst, $src}", []>, TB; + +def XADD8rm : I<0xC0, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src), + "xadd{b}\t{$src, $dst|$dst, $src}", []>, TB; +def XADD16rm : I<0xC1, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src), + "xadd{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize; +def XADD32rm : I<0xC1, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src), + "xadd{l}\t{$src, $dst|$dst, $src}", []>, TB; + +def CMPXCHG8rr : I<0xB0, MRMDestReg, (outs GR8:$dst), (ins GR8:$src), + "cmpxchg{b}\t{$src, $dst|$dst, $src}", []>, TB; +def CMPXCHG16rr : I<0xB1, MRMDestReg, (outs GR16:$dst), (ins GR16:$src), + "cmpxchg{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize; +def CMPXCHG32rr : I<0xB1, MRMDestReg, (outs GR32:$dst), (ins GR32:$src), + "cmpxchg{l}\t{$src, $dst|$dst, $src}", []>, TB; + +def CMPXCHG8rm : I<0xB0, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src), + "cmpxchg{b}\t{$src, $dst|$dst, $src}", []>, TB; +def CMPXCHG16rm : I<0xB1, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src), + "cmpxchg{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize; +def CMPXCHG32rm : I<0xB1, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src), + "cmpxchg{l}\t{$src, $dst|$dst, $src}", []>, TB; + +def CMPXCHG8B : I<0xC7, MRM1m, (outs), (ins i64mem:$dst), + "cmpxchg8b\t$dst", []>, TB; + // Optimized codegen when the non-memory output is not used. // FIXME: Use normal add / sub instructions and add lock prefix dynamically. let Defs = [EFLAGS] in { @@ -3655,7 +3913,7 @@ def LOCK_SUB32mi : Ii32<0x81, MRM5m, (outs), (ins i32mem:$dst, i32imm:$src2), "lock\n\t" "sub{l}\t{$src2, $dst|$dst, $src2}", []>, LOCK; -def LOCK_SUB16mi8 : Ii8<0x83, MRM5m, (outs), (ins i16mem:$dst, i16i8imm :$src2), +def LOCK_SUB16mi8 : Ii8<0x83, MRM5m, (outs), (ins i16mem:$dst, i16i8imm :$src2), "lock\n\t" "sub{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize, LOCK; def LOCK_SUB32mi8 : Ii8<0x83, MRM5m, (outs), (ins i32mem:$dst, i32i8imm :$src2), @@ -3780,12 +4038,193 @@ "lar{l}\t{$src, $dst|$dst, $src}", []>, TB; def LAR32rr : I<0x02, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), "lar{l}\t{$src, $dst|$dst, $src}", []>, TB; + +def LSL16rm : I<0x03, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src), + "lsl{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize; +def LSL16rr : I<0x03, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src), + "lsl{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize; +def LSL32rm : I<0x03, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), + "lsl{l}\t{$src, $dst|$dst, $src}", []>, TB; +def LSL32rr : I<0x03, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), + "lsl{l}\t{$src, $dst|$dst, $src}", []>, TB; + +def INVLPG : I<0x01, RawFrm, (outs), (ins), "invlpg", []>, TB; + +def STRr : I<0x00, MRM1r, (outs GR16:$dst), (ins), + "str{w}\t{$dst}", []>, TB; +def STRm : I<0x00, MRM1m, (outs i16mem:$dst), (ins), + "str{w}\t{$dst}", []>, TB; +def LTRr : I<0x00, MRM3r, (outs), (ins GR16:$src), + "ltr{w}\t{$src}", []>, TB; +def LTRm : I<0x00, MRM3m, (outs), (ins i16mem:$src), + "ltr{w}\t{$src}", []>, TB; + +def PUSHFS16 : I<0xa0, RawFrm, (outs), (ins), + "push{w}\t%fs", []>, OpSize, TB; +def PUSHFS32 : I<0xa0, RawFrm, (outs), (ins), + "push{l}\t%fs", []>, TB; +def PUSHGS16 : I<0xa8, RawFrm, (outs), (ins), + "push{w}\t%gs", []>, OpSize, TB; +def PUSHGS32 : I<0xa8, RawFrm, (outs), (ins), + "push{l}\t%gs", []>, TB; + +def POPFS16 : I<0xa1, RawFrm, (outs), (ins), + "pop{w}\t%fs", []>, OpSize, TB; +def POPFS32 : I<0xa1, RawFrm, (outs), (ins), + "pop{l}\t%fs", []>, TB; +def POPGS16 : I<0xa9, RawFrm, (outs), (ins), + "pop{w}\t%gs", []>, OpSize, TB; +def POPGS32 : I<0xa9, RawFrm, (outs), (ins), + "pop{l}\t%gs", []>, TB; + +def LDS16rm : I<0xc5, MRMSrcMem, (outs GR16:$dst), (ins opaque32mem:$src), + "lds{w}\t{$src, $dst|$dst, $src}", []>, OpSize; +def LDS32rm : I<0xc5, MRMSrcMem, (outs GR32:$dst), (ins opaque48mem:$src), + "lds{l}\t{$src, $dst|$dst, $src}", []>; +def LSS16rm : I<0xb2, MRMSrcMem, (outs GR16:$dst), (ins opaque32mem:$src), + "lss{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize; +def LSS32rm : I<0xb2, MRMSrcMem, (outs GR32:$dst), (ins opaque48mem:$src), + "lss{l}\t{$src, $dst|$dst, $src}", []>, TB; +def LES16rm : I<0xc4, MRMSrcMem, (outs GR16:$dst), (ins opaque32mem:$src), + "les{w}\t{$src, $dst|$dst, $src}", []>, OpSize; +def LES32rm : I<0xc4, MRMSrcMem, (outs GR32:$dst), (ins opaque48mem:$src), + "les{l}\t{$src, $dst|$dst, $src}", []>; +def LFS16rm : I<0xb4, MRMSrcMem, (outs GR16:$dst), (ins opaque32mem:$src), + "lfs{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize; +def LFS32rm : I<0xb4, MRMSrcMem, (outs GR32:$dst), (ins opaque48mem:$src), + "lfs{l}\t{$src, $dst|$dst, $src}", []>, TB; +def LGS16rm : I<0xb5, MRMSrcMem, (outs GR16:$dst), (ins opaque32mem:$src), + "lgs{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize; +def LGS32rm : I<0xb5, MRMSrcMem, (outs GR32:$dst), (ins opaque48mem:$src), + "lgs{l}\t{$src, $dst|$dst, $src}", []>, TB; + +def VERRr : I<0x00, MRM4r, (outs), (ins GR16:$seg), + "verr\t$seg", []>, TB; +def VERRm : I<0x00, MRM4m, (outs), (ins i16mem:$seg), + "verr\t$seg", []>, TB; +def VERWr : I<0x00, MRM5r, (outs), (ins GR16:$seg), + "verw\t$seg", []>, TB; +def VERWm : I<0x00, MRM5m, (outs), (ins i16mem:$seg), + "verw\t$seg", []>, TB; + +// Descriptor-table support instructions + +def SGDTm : I<0x01, MRM0m, (outs opaque48mem:$dst), (ins), + "sgdt\t$dst", []>, TB; +def SIDTm : I<0x01, MRM1m, (outs opaque48mem:$dst), (ins), + "sidt\t$dst", []>, TB; +def SLDT16r : I<0x00, MRM0r, (outs GR16:$dst), (ins), + "sldt{w}\t$dst", []>, TB; +def SLDT16m : I<0x00, MRM0m, (outs i16mem:$dst), (ins), + "sldt{w}\t$dst", []>, TB; +def LGDTm : I<0x01, MRM2m, (outs), (ins opaque48mem:$src), + "lgdt\t$src", []>, TB; +def LIDTm : I<0x01, MRM3m, (outs), (ins opaque48mem:$src), + "lidt\t$src", []>, TB; +def LLDT16r : I<0x00, MRM2r, (outs), (ins GR16:$src), + "lldt{w}\t$src", []>, TB; +def LLDT16m : I<0x00, MRM2m, (outs), (ins i16mem:$src), + "lldt{w}\t$src", []>, TB; // String manipulation instructions def LODSB : I<0xAC, RawFrm, (outs), (ins), "lodsb", []>; def LODSW : I<0xAD, RawFrm, (outs), (ins), "lodsw", []>, OpSize; -def LODSD : I<0xAD, RawFrm, (outs), (ins), "lodsd", []>; +def LODSD : I<0xAD, RawFrm, (outs), (ins), "lods{l|d}", []>; + +def OUTSB : I<0x6E, RawFrm, (outs), (ins), "outsb", []>; +def OUTSW : I<0x6F, RawFrm, (outs), (ins), "outsw", []>, OpSize; +def OUTSD : I<0x6F, RawFrm, (outs), (ins), "outs{l|d}", []>; + +// CPU flow control instructions + +def HLT : I<0xF4, RawFrm, (outs), (ins), "hlt", []>; +def RSM : I<0xAA, RawFrm, (outs), (ins), "rsm", []>, TB; + +// FPU control instructions + +def FNINIT : I<0xE3, RawFrm, (outs), (ins), "fninit", []>, DB; + +// Flag instructions + +def CLC : I<0xF8, RawFrm, (outs), (ins), "clc", []>; +def STC : I<0xF9, RawFrm, (outs), (ins), "stc", []>; +def CLI : I<0xFA, RawFrm, (outs), (ins), "cli", []>; +def STI : I<0xFB, RawFrm, (outs), (ins), "sti", []>; +def CLD : I<0xFC, RawFrm, (outs), (ins), "cld", []>; +def STD : I<0xFD, RawFrm, (outs), (ins), "std", []>; +def CMC : I<0xF5, RawFrm, (outs), (ins), "cmc", []>; + +def CLTS : I<0x06, RawFrm, (outs), (ins), "clts", []>, TB; + +// Table lookup instructions + +def XLAT : I<0xD7, RawFrm, (outs), (ins), "xlatb", []>; + +// Specialized register support + +def WRMSR : I<0x30, RawFrm, (outs), (ins), "wrmsr", []>, TB; +def RDMSR : I<0x32, RawFrm, (outs), (ins), "rdmsr", []>, TB; +def RDPMC : I<0x33, RawFrm, (outs), (ins), "rdpmc", []>, TB; + +def SMSW16r : I<0x01, MRM4r, (outs GR16:$dst), (ins), + "smsw{w}\t$dst", []>, OpSize, TB; +def SMSW32r : I<0x01, MRM4r, (outs GR32:$dst), (ins), + "smsw{l}\t$dst", []>, TB; +// For memory operands, there is only a 16-bit form +def SMSW16m : I<0x01, MRM4m, (outs i16mem:$dst), (ins), + "smsw{w}\t$dst", []>, TB; + +def LMSW16r : I<0x01, MRM6r, (outs), (ins GR16:$src), + "lmsw{w}\t$src", []>, TB; +def LMSW16m : I<0x01, MRM6m, (outs), (ins i16mem:$src), + "lmsw{w}\t$src", []>, TB; + +def CPUID : I<0xA2, RawFrm, (outs), (ins), "cpuid", []>, TB; + +// Cache instructions + +def INVD : I<0x08, RawFrm, (outs), (ins), "invd", []>, TB; +def WBINVD : I<0x09, RawFrm, (outs), (ins), "wbinvd", []>, TB; + +// VMX instructions + +// 66 0F 38 80 +def INVEPT : I<0x38, RawFrm, (outs), (ins), "invept", []>, OpSize, TB; +// 66 0F 38 81 +def INVVPID : I<0x38, RawFrm, (outs), (ins), "invvpid", []>, OpSize, TB; +// 0F 01 C1 +def VMCALL : I<0x01, RawFrm, (outs), (ins), "vmcall", []>, TB; +def VMCLEARm : I<0xC7, MRM6m, (outs), (ins i64mem:$vmcs), + "vmclear\t$vmcs", []>, OpSize, TB; +// 0F 01 C2 +def VMLAUNCH : I<0x01, RawFrm, (outs), (ins), "vmlaunch", []>, TB; +// 0F 01 C3 +def VMRESUME : I<0x01, RawFrm, (outs), (ins), "vmresume", []>, TB; +def VMPTRLDm : I<0xC7, MRM6m, (outs), (ins i64mem:$vmcs), + "vmptrld\t$vmcs", []>, TB; +def VMPTRSTm : I<0xC7, MRM7m, (outs i64mem:$vmcs), (ins), + "vmptrst\t$vmcs", []>, TB; +def VMREAD64rm : I<0x78, MRMDestMem, (outs i64mem:$dst), (ins GR64:$src), + "vmread{q}\t{$src, $dst|$dst, $src}", []>, TB; +def VMREAD64rr : I<0x78, MRMDestReg, (outs GR64:$dst), (ins GR64:$src), + "vmread{q}\t{$src, $dst|$dst, $src}", []>, TB; +def VMREAD32rm : I<0x78, MRMDestMem, (outs i32mem:$dst), (ins GR32:$src), + "vmread{l}\t{$src, $dst|$dst, $src}", []>, TB; +def VMREAD32rr : I<0x78, MRMDestReg, (outs GR32:$dst), (ins GR32:$src), + "vmread{l}\t{$src, $dst|$dst, $src}", []>, TB; +def VMWRITE64rm : I<0x79, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), + "vmwrite{q}\t{$src, $dst|$dst, $src}", []>, TB; +def VMWRITE64rr : I<0x79, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src), + "vmwrite{q}\t{$src, $dst|$dst, $src}", []>, TB; +def VMWRITE32rm : I<0x79, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), + "vmwrite{l}\t{$src, $dst|$dst, $src}", []>, TB; +def VMWRITE32rr : I<0x79, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), + "vmwrite{l}\t{$src, $dst|$dst, $src}", []>, TB; +// 0F 01 C4 +def VMXOFF : I<0x01, RawFrm, (outs), (ins), "vmxoff", []>, OpSize; +def VMXON : I<0xC7, MRM6m, (outs), (ins i64mem:$vmxon), + "vmxon\t{$vmxon}", []>, XD; //===----------------------------------------------------------------------===// // Non-Instruction Patterns @@ -4031,15 +4470,18 @@ x86_subreg_16bit)>, Requires<[In32BitMode]>; def : Pat<(i32 (zext (srl_su GR16:$src, (i8 8)))), - (MOVZX32rr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)), + (MOVZX32rr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, + GR16_ABCD)), x86_subreg_8bit_hi))>, Requires<[In32BitMode]>; def : Pat<(i32 (anyext (srl_su GR16:$src, (i8 8)))), - (MOVZX32rr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)), + (MOVZX32rr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, + GR16_ABCD)), x86_subreg_8bit_hi))>, Requires<[In32BitMode]>; def : Pat<(and (srl_su GR32:$src, (i8 8)), (i32 255)), - (MOVZX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)), + (MOVZX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, + GR32_ABCD)), x86_subreg_8bit_hi))>, Requires<[In32BitMode]>; Modified: llvm/trunk/lib/Target/X86/X86InstrMMX.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrMMX.td?rev=91638&r1=91637&r2=91638&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86InstrMMX.td (original) +++ llvm/trunk/lib/Target/X86/X86InstrMMX.td Thu Dec 17 18:01:26 2009 @@ -72,13 +72,13 @@ multiclass MMXI_binop_rm opc, string OpcodeStr, SDNode OpNode, ValueType OpVT, bit Commutable = 0> { def rr : MMXI { let isCommutable = Commutable; } def rm : MMXI opc, string OpcodeStr, Intrinsic IntId, bit Commutable = 0> { def rr : MMXI { let isCommutable = Commutable; } def rm : MMXI; @@ -144,9 +144,9 @@ //===----------------------------------------------------------------------===// def MMX_EMMS : MMXI<0x77, RawFrm, (outs), (ins), "emms", - [(int_x86_mmx_emms)]>; + [(int_x86_mmx_emms)]>; def MMX_FEMMS : MMXI<0x0E, RawFrm, (outs), (ins), "femms", - [(int_x86_mmx_femms)]>; + [(int_x86_mmx_femms)]>; //===----------------------------------------------------------------------===// // MMX Scalar Instructions @@ -155,16 +155,21 @@ // Data Transfer Instructions def MMX_MOVD64rr : MMXI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR32:$src), "movd\t{$src, $dst|$dst, $src}", - [(set VR64:$dst, - (v2i32 (scalar_to_vector GR32:$src)))]>; + [(set VR64:$dst, + (v2i32 (scalar_to_vector GR32:$src)))]>; let canFoldAsLoad = 1, isReMaterializable = 1 in def MMX_MOVD64rm : MMXI<0x6E, MRMSrcMem, (outs VR64:$dst), (ins i32mem:$src), "movd\t{$src, $dst|$dst, $src}", [(set VR64:$dst, - (v2i32 (scalar_to_vector (loadi32 addr:$src))))]>; + (v2i32 (scalar_to_vector (loadi32 addr:$src))))]>; let mayStore = 1 in def MMX_MOVD64mr : MMXI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR64:$src), "movd\t{$src, $dst|$dst, $src}", []>; +def MMX_MOVD64grr : MMXI<0x7E, MRMDestReg, (outs), (ins GR32:$dst, VR64:$src), + "movd\t{$src, $dst|$dst, $src}", []>; +def MMX_MOVQ64gmr : MMXRI<0x7E, MRMDestMem, (outs), + (ins i64mem:$dst, VR64:$src), + "movq\t{$src, $dst|$dst, $src}", []>; let neverHasSideEffects = 1 in def MMX_MOVD64to64rr : MMXRI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR64:$src), @@ -181,7 +186,7 @@ def MMX_MOVD64rrv164 : MMXI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR64:$src), "movd\t{$src, $dst|$dst, $src}", [(set VR64:$dst, - (v1i64 (scalar_to_vector GR64:$src)))]>; + (v1i64 (scalar_to_vector GR64:$src)))]>; let neverHasSideEffects = 1 in def MMX_MOVQ64rr : MMXI<0x6F, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src), @@ -223,7 +228,7 @@ (v2i32 (X86vzmovl (v2i32 (scalar_to_vector GR32:$src)))))]>; let AddedComplexity = 20 in def MMX_MOVZDI2PDIrm : MMXI<0x6E, MRMSrcMem, (outs VR64:$dst), - (ins i32mem:$src), + (ins i32mem:$src), "movd\t{$src, $dst|$dst, $src}", [(set VR64:$dst, (v2i32 (X86vzmovl (v2i32 @@ -432,21 +437,21 @@ "cvtpd2pi\t{$src, $dst|$dst, $src}", []>; let mayLoad = 1 in def MMX_CVTPD2PIrm : MMX2I<0x2D, MRMSrcMem, (outs VR64:$dst), - (ins f128mem:$src), + (ins f128mem:$src), "cvtpd2pi\t{$src, $dst|$dst, $src}", []>; def MMX_CVTPI2PDrr : MMX2I<0x2A, MRMSrcReg, (outs VR128:$dst), (ins VR64:$src), "cvtpi2pd\t{$src, $dst|$dst, $src}", []>; let mayLoad = 1 in def MMX_CVTPI2PDrm : MMX2I<0x2A, MRMSrcMem, (outs VR128:$dst), - (ins i64mem:$src), + (ins i64mem:$src), "cvtpi2pd\t{$src, $dst|$dst, $src}", []>; def MMX_CVTPI2PSrr : MMXI<0x2A, MRMSrcReg, (outs VR128:$dst), (ins VR64:$src), "cvtpi2ps\t{$src, $dst|$dst, $src}", []>; let mayLoad = 1 in def MMX_CVTPI2PSrm : MMXI<0x2A, MRMSrcMem, (outs VR128:$dst), - (ins i64mem:$src), + (ins i64mem:$src), "cvtpi2ps\t{$src, $dst|$dst, $src}", []>; def MMX_CVTPS2PIrr : MMXI<0x2D, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src), @@ -459,7 +464,7 @@ "cvttpd2pi\t{$src, $dst|$dst, $src}", []>; let mayLoad = 1 in def MMX_CVTTPD2PIrm : MMX2I<0x2C, MRMSrcMem, (outs VR64:$dst), - (ins f128mem:$src), + (ins f128mem:$src), "cvttpd2pi\t{$src, $dst|$dst, $src}", []>; def MMX_CVTTPS2PIrr : MMXI<0x2C, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src), @@ -481,14 +486,14 @@ (iPTR imm:$src2)))]>; let Constraints = "$src1 = $dst" in { def MMX_PINSRWrri : MMXIi8<0xC4, MRMSrcReg, - (outs VR64:$dst), (ins VR64:$src1, GR32:$src2, - i16i8imm:$src3), + (outs VR64:$dst), + (ins VR64:$src1, GR32:$src2,i16i8imm:$src3), "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}", [(set VR64:$dst, (v4i16 (MMX_X86pinsrw (v4i16 VR64:$src1), GR32:$src2,(iPTR imm:$src3))))]>; def MMX_PINSRWrmi : MMXIi8<0xC4, MRMSrcMem, - (outs VR64:$dst), (ins VR64:$src1, i16mem:$src2, - i16i8imm:$src3), + (outs VR64:$dst), + (ins VR64:$src1, i16mem:$src2, i16i8imm:$src3), "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}", [(set VR64:$dst, (v4i16 (MMX_X86pinsrw (v4i16 VR64:$src1), Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=91638&r1=91637&r2=91638&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original) +++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Thu Dec 17 18:01:26 2009 @@ -70,7 +70,7 @@ def X86pcmpgtq : SDNode<"X86ISD::PCMPGTQ", SDTIntBinOp>; def SDTX86CmpPTest : SDTypeProfile<0, 2, [SDTCisVT<0, v4f32>, - SDTCisVT<1, v4f32>]>; + SDTCisVT<1, v4f32>]>; def X86ptest : SDNode<"X86ISD::PTEST", SDTX86CmpPTest>; //===----------------------------------------------------------------------===// @@ -116,12 +116,18 @@ return cast(N)->getAlignment() >= 16; }]>; -def alignedloadfsf32 : PatFrag<(ops node:$ptr), (f32 (alignedload node:$ptr))>; -def alignedloadfsf64 : PatFrag<(ops node:$ptr), (f64 (alignedload node:$ptr))>; -def alignedloadv4f32 : PatFrag<(ops node:$ptr), (v4f32 (alignedload node:$ptr))>; -def alignedloadv2f64 : PatFrag<(ops node:$ptr), (v2f64 (alignedload node:$ptr))>; -def alignedloadv4i32 : PatFrag<(ops node:$ptr), (v4i32 (alignedload node:$ptr))>; -def alignedloadv2i64 : PatFrag<(ops node:$ptr), (v2i64 (alignedload node:$ptr))>; +def alignedloadfsf32 : PatFrag<(ops node:$ptr), + (f32 (alignedload node:$ptr))>; +def alignedloadfsf64 : PatFrag<(ops node:$ptr), + (f64 (alignedload node:$ptr))>; +def alignedloadv4f32 : PatFrag<(ops node:$ptr), + (v4f32 (alignedload node:$ptr))>; +def alignedloadv2f64 : PatFrag<(ops node:$ptr), + (v2f64 (alignedload node:$ptr))>; +def alignedloadv4i32 : PatFrag<(ops node:$ptr), + (v4i32 (alignedload node:$ptr))>; +def alignedloadv2i64 : PatFrag<(ops node:$ptr), + (v2i64 (alignedload node:$ptr))>; // Like 'load', but uses special alignment checks suitable for use in // memory operands in most SSE instructions, which are required to @@ -363,6 +369,11 @@ [(set FR32:$dst, (sint_to_fp (loadi32 addr:$src)))]>; // Match intrinsics which expect XMM operand(s). +def CVTSS2SIrr: SSI<0x2D, MRMSrcReg, (outs GR32:$dst), (ins FR32:$src), + "cvtss2si{l}\t{$src, $dst|$dst, $src}", []>; +def CVTSS2SIrm: SSI<0x2D, MRMSrcMem, (outs GR32:$dst), (ins f32mem:$src), + "cvtss2si{l}\t{$src, $dst|$dst, $src}", []>; + def Int_CVTSS2SIrr : SSI<0x2D, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src), "cvtss2si\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (int_x86_sse_cvtss2si VR128:$src))]>; @@ -441,19 +452,26 @@ "ucomiss\t{$src2, $src1|$src1, $src2}", [(X86cmp FR32:$src1, (loadf32 addr:$src2)), (implicit EFLAGS)]>; + +def COMISSrr: PSI<0x2F, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2), + "comiss\t{$src2, $src1|$src1, $src2}", []>; +def COMISSrm: PSI<0x2F, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2), + "comiss\t{$src2, $src1|$src1, $src2}", []>; + } // Defs = [EFLAGS] // Aliases to match intrinsics which expect XMM operand(s). let Constraints = "$src1 = $dst" in { def Int_CMPSSrr : SSIi8<0xC2, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, VR128:$src, - SSECC:$cc), + (outs VR128:$dst), + (ins VR128:$src1, VR128:$src, SSECC:$cc), "cmp${cc}ss\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse_cmp_ss VR128:$src1, - VR128:$src, imm:$cc))]>; + [(set VR128:$dst, (int_x86_sse_cmp_ss + VR128:$src1, + VR128:$src, imm:$cc))]>; def Int_CMPSSrm : SSIi8<0xC2, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, f32mem:$src, - SSECC:$cc), + (outs VR128:$dst), + (ins VR128:$src1, f32mem:$src, SSECC:$cc), "cmp${cc}ss\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse_cmp_ss VR128:$src1, (load addr:$src), imm:$cc))]>; @@ -1205,14 +1223,14 @@ // Aliases to match intrinsics which expect XMM operand(s). let Constraints = "$src1 = $dst" in { def Int_CMPSDrr : SDIi8<0xC2, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, VR128:$src, - SSECC:$cc), + (outs VR128:$dst), + (ins VR128:$src1, VR128:$src, SSECC:$cc), "cmp${cc}sd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cmp_sd VR128:$src1, VR128:$src, imm:$cc))]>; def Int_CMPSDrm : SDIi8<0xC2, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, f64mem:$src, - SSECC:$cc), + (outs VR128:$dst), + (ins VR128:$src1, f64mem:$src, SSECC:$cc), "cmp${cc}sd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cmp_sd VR128:$src1, (load addr:$src), imm:$cc))]>; @@ -1542,9 +1560,15 @@ [(set VR128:$dst, (int_x86_sse2_cvtps2dq (memop addr:$src)))]>; // SSE2 packed instructions with XS prefix +def CVTTPS2DQrr : SSI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "cvttps2dq\t{$src, $dst|$dst, $src}", []>; +def CVTTPS2DQrm : SSI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), + "cvttps2dq\t{$src, $dst|$dst, $src}", []>; + def Int_CVTTPS2DQrr : I<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvttps2dq\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvttps2dq VR128:$src))]>, + [(set VR128:$dst, + (int_x86_sse2_cvttps2dq VR128:$src))]>, XS, Requires<[HasSSE2]>; def Int_CVTTPS2DQrm : I<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "cvttps2dq\t{$src, $dst|$dst, $src}", @@ -1572,6 +1596,11 @@ (memop addr:$src)))]>; // SSE2 instructions without OpSize prefix +def CVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "cvtps2pd\t{$src, $dst|$dst, $src}", []>, TB; +def CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), + "cvtps2pd\t{$src, $dst|$dst, $src}", []>, TB; + def Int_CVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtps2pd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))]>, @@ -1582,6 +1611,12 @@ (load addr:$src)))]>, TB, Requires<[HasSSE2]>; +def CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "cvtpd2ps\t{$src, $dst|$dst, $src}", []>; +def CVTPD2PSrm : PDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), + "cvtpd2ps\t{$src, $dst|$dst, $src}", []>; + + def Int_CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtpd2ps\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtpd2ps VR128:$src))]>; @@ -1856,31 +1891,34 @@ multiclass PDI_binop_rm_int opc, string OpcodeStr, Intrinsic IntId, bit Commutable = 0> { - def rr : PDI { let isCommutable = Commutable; } - def rm : PDI; + (bitconvert (memopv2i64 + addr:$src2))))]>; } multiclass PDI_binop_rmi_int opc, bits<8> opc2, Format ImmForm, string OpcodeStr, Intrinsic IntId, Intrinsic IntId2> { - def rr : PDI; - def rm : PDI; - def ri : PDIi8; } @@ -1888,14 +1926,14 @@ /// PDI_binop_rm - Simple SSE2 binary operator. multiclass PDI_binop_rm opc, string OpcodeStr, SDNode OpNode, ValueType OpVT, bit Commutable = 0> { - def rr : PDI { let isCommutable = Commutable; } - def rm : PDI; @@ -1909,16 +1947,16 @@ multiclass PDI_binop_rm_v2i64 opc, string OpcodeStr, SDNode OpNode, bit Commutable = 0> { def rr : PDI { let isCommutable = Commutable; } def rm : PDI; + (memopv2i64 addr:$src2)))]>; } } // Constraints = "$src1 = $dst" @@ -2455,6 +2493,13 @@ (MOVZPQILo2PQIrm addr:$src)>; } +// Instructions for the disassembler +// xr = XMM register +// xm = mem64 + +def MOVQxrxr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "movq\t{$src, $dst|$dst, $src}", []>, XS; + //===---------------------------------------------------------------------===// // SSE3 Instructions //===---------------------------------------------------------------------===// @@ -3661,7 +3706,7 @@ "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), [(set VR128:$dst, (X86insrtps VR128:$src1, VR128:$src2, imm:$src3))]>, - OpSize; + OpSize; def rm : SS4AIi8, OpSize; + (ins VR128:$src1, VR128:$src2, i8imm:$src3), + "#PCMPISTRM128rr PSEUDO!", + [(set VR128:$dst, (int_x86_sse42_pcmpistrm128 VR128:$src1, VR128:$src2, + imm:$src3))]>, OpSize; def PCMPISTRM128MEM : SS42AI<0, Pseudo, (outs VR128:$dst), - (ins VR128:$src1, i128mem:$src2, i8imm:$src3), - "#PCMPISTRM128rm PSEUDO!", - [(set VR128:$dst, - (int_x86_sse42_pcmpistrm128 VR128:$src1, - (load addr:$src2), - imm:$src3))]>, OpSize; + (ins VR128:$src1, i128mem:$src2, i8imm:$src3), + "#PCMPISTRM128rm PSEUDO!", + [(set VR128:$dst, (int_x86_sse42_pcmpistrm128 VR128:$src1, (load addr:$src2), + imm:$src3))]>, OpSize; } let Defs = [XMM0, EFLAGS] in { def PCMPISTRM128rr : SS42AI<0x62, MRMSrcReg, (outs), - (ins VR128:$src1, VR128:$src2, i8imm:$src3), - "pcmpistrm\t{$src3, $src2, $src1|$src1, $src2, $src3}", - []>, OpSize; + (ins VR128:$src1, VR128:$src2, i8imm:$src3), + "pcmpistrm\t{$src3, $src2, $src1|$src1, $src2, $src3}", []>, OpSize; def PCMPISTRM128rm : SS42AI<0x62, MRMSrcMem, (outs), - (ins VR128:$src1, i128mem:$src2, i8imm:$src3), - "pcmpistrm\t{$src3, $src2, $src1|$src1, $src2, $src3}", - []>, OpSize; + (ins VR128:$src1, i128mem:$src2, i8imm:$src3), + "pcmpistrm\t{$src3, $src2, $src1|$src1, $src2, $src3}", []>, OpSize; } -let Defs = [EFLAGS], Uses = [EAX, EDX], - usesCustomInserter = 1 in { +let Defs = [EFLAGS], Uses = [EAX, EDX], usesCustomInserter = 1 in { def PCMPESTRM128REG : SS42AI<0, Pseudo, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src3, i8imm:$src5), - "#PCMPESTRM128rr PSEUDO!", - [(set VR128:$dst, - (int_x86_sse42_pcmpestrm128 VR128:$src1, EAX, - VR128:$src3, - EDX, imm:$src5))]>, OpSize; + (ins VR128:$src1, VR128:$src3, i8imm:$src5), + "#PCMPESTRM128rr PSEUDO!", + [(set VR128:$dst, + (int_x86_sse42_pcmpestrm128 + VR128:$src1, EAX, VR128:$src3, EDX, imm:$src5))]>, OpSize; + def PCMPESTRM128MEM : SS42AI<0, Pseudo, (outs VR128:$dst), - (ins VR128:$src1, i128mem:$src3, i8imm:$src5), - "#PCMPESTRM128rm PSEUDO!", - [(set VR128:$dst, - (int_x86_sse42_pcmpestrm128 VR128:$src1, EAX, - (load addr:$src3), - EDX, imm:$src5))]>, OpSize; + (ins VR128:$src1, i128mem:$src3, i8imm:$src5), + "#PCMPESTRM128rm PSEUDO!", + [(set VR128:$dst, (int_x86_sse42_pcmpestrm128 + VR128:$src1, EAX, (load addr:$src3), EDX, imm:$src5))]>, + OpSize; } let Defs = [XMM0, EFLAGS], Uses = [EAX, EDX] in { def PCMPESTRM128rr : SS42AI<0x60, MRMSrcReg, (outs), - (ins VR128:$src1, VR128:$src3, i8imm:$src5), - "pcmpestrm\t{$src5, $src3, $src1|$src1, $src3, $src5}", - []>, OpSize; + (ins VR128:$src1, VR128:$src3, i8imm:$src5), + "pcmpestrm\t{$src5, $src3, $src1|$src1, $src3, $src5}", []>, OpSize; def PCMPESTRM128rm : SS42AI<0x60, MRMSrcMem, (outs), - (ins VR128:$src1, i128mem:$src3, i8imm:$src5), - "pcmpestrm\t{$src5, $src3, $src1|$src1, $src3, $src5}", - []>, OpSize; + (ins VR128:$src1, i128mem:$src3, i8imm:$src5), + "pcmpestrm\t{$src5, $src3, $src1|$src1, $src3, $src5}", []>, OpSize; } let Defs = [ECX, EFLAGS] in { multiclass SS42AI_pcmpistri { - def rr : SS42AI<0x63, MRMSrcReg, (outs), - (ins VR128:$src1, VR128:$src2, i8imm:$src3), - "pcmpistri\t{$src3, $src2, $src1|$src1, $src2, $src3}", - [(set ECX, - (IntId128 VR128:$src1, VR128:$src2, imm:$src3)), - (implicit EFLAGS)]>, - OpSize; + def rr : SS42AI<0x63, MRMSrcReg, (outs), + (ins VR128:$src1, VR128:$src2, i8imm:$src3), + "pcmpistri\t{$src3, $src2, $src1|$src1, $src2, $src3}", + [(set ECX, (IntId128 VR128:$src1, VR128:$src2, imm:$src3)), + (implicit EFLAGS)]>, OpSize; def rm : SS42AI<0x63, MRMSrcMem, (outs), - (ins VR128:$src1, i128mem:$src2, i8imm:$src3), - "pcmpistri\t{$src3, $src2, $src1|$src1, $src2, $src3}", - [(set ECX, - (IntId128 VR128:$src1, (load addr:$src2), imm:$src3)), - (implicit EFLAGS)]>, - OpSize; + (ins VR128:$src1, i128mem:$src2, i8imm:$src3), + "pcmpistri\t{$src3, $src2, $src1|$src1, $src2, $src3}", + [(set ECX, (IntId128 VR128:$src1, (load addr:$src2), imm:$src3)), + (implicit EFLAGS)]>, OpSize; } } @@ -3870,20 +3902,16 @@ let Uses = [EAX, EDX] in { multiclass SS42AI_pcmpestri { def rr : SS42AI<0x61, MRMSrcReg, (outs), - (ins VR128:$src1, VR128:$src3, i8imm:$src5), - "pcmpestri\t{$src5, $src3, $src1|$src1, $src3, $src5}", - [(set ECX, - (IntId128 VR128:$src1, EAX, VR128:$src3, EDX, imm:$src5)), - (implicit EFLAGS)]>, - OpSize; + (ins VR128:$src1, VR128:$src3, i8imm:$src5), + "pcmpestri\t{$src5, $src3, $src1|$src1, $src3, $src5}", + [(set ECX, (IntId128 VR128:$src1, EAX, VR128:$src3, EDX, imm:$src5)), + (implicit EFLAGS)]>, OpSize; def rm : SS42AI<0x61, MRMSrcMem, (outs), - (ins VR128:$src1, i128mem:$src3, i8imm:$src5), - "pcmpestri\t{$src5, $src3, $src1|$src1, $src3, $src5}", - [(set ECX, - (IntId128 VR128:$src1, EAX, (load addr:$src3), - EDX, imm:$src5)), - (implicit EFLAGS)]>, - OpSize; + (ins VR128:$src1, i128mem:$src3, i8imm:$src5), + "pcmpestri\t{$src5, $src3, $src1|$src1, $src3, $src5}", + [(set ECX, + (IntId128 VR128:$src1, EAX, (load addr:$src3), EDX, imm:$src5)), + (implicit EFLAGS)]>, OpSize; } } } Modified: llvm/trunk/lib/Target/X86/X86RegisterInfo.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86RegisterInfo.td?rev=91638&r1=91637&r2=91638&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86RegisterInfo.td (original) +++ llvm/trunk/lib/Target/X86/X86RegisterInfo.td Thu Dec 17 18:01:26 2009 @@ -195,6 +195,36 @@ def ES : Register<"es">; def FS : Register<"fs">; def GS : Register<"gs">; + + // Debug registers + def DR0 : Register<"dr0">; + def DR1 : Register<"dr1">; + def DR2 : Register<"dr2">; + def DR3 : Register<"dr3">; + def DR4 : Register<"dr4">; + def DR5 : Register<"dr5">; + def DR6 : Register<"dr6">; + def DR7 : Register<"dr7">; + + // Condition registers + def ECR0 : Register<"ecr0">; + def ECR1 : Register<"ecr1">; + def ECR2 : Register<"ecr2">; + def ECR3 : Register<"ecr3">; + def ECR4 : Register<"ecr4">; + def ECR5 : Register<"ecr5">; + def ECR6 : Register<"ecr6">; + def ECR7 : Register<"ecr7">; + + def RCR0 : Register<"rcr0">; + def RCR1 : Register<"rcr1">; + def RCR2 : Register<"rcr2">; + def RCR3 : Register<"rcr3">; + def RCR4 : Register<"rcr4">; + def RCR5 : Register<"rcr5">; + def RCR6 : Register<"rcr6">; + def RCR7 : Register<"rcr7">; + def RCR8 : Register<"rcr8">; } @@ -446,6 +476,22 @@ def SEGMENT_REG : RegisterClass<"X86", [i16], 16, [CS, DS, SS, ES, FS, GS]> { } +// Debug registers. +def DEBUG_REG : RegisterClass<"X86", [i32], 32, + [DR0, DR1, DR2, DR3, DR4, DR5, DR6, DR7]> { +} + +// Control registers. +def CONTROL_REG_32 : RegisterClass<"X86", [i32], 32, + [ECR0, ECR1, ECR2, ECR3, ECR4, ECR5, ECR6, + ECR7]> { +} + +def CONTROL_REG_64 : RegisterClass<"X86", [i64], 64, + [RCR0, RCR1, RCR2, RCR3, RCR4, RCR5, RCR6, + RCR7, RCR8]> { +} + // GR8_ABCD_L, GR8_ABCD_H, GR16_ABCD, GR32_ABCD, GR64_ABCD - Subclasses of // GR8, GR16, GR32, and GR64 which contain just the "a" "b", "c", and "d" // registers. On x86-32, GR16_ABCD and GR32_ABCD are classes for registers @@ -661,7 +707,8 @@ }]; let MethodBodies = [{ GR64_NOREX_NOSPClass::iterator - GR64_NOREX_NOSPClass::allocation_order_end(const MachineFunction &MF) const { + GR64_NOREX_NOSPClass::allocation_order_end(const MachineFunction &MF) const + { const TargetMachine &TM = MF.getTarget(); const TargetRegisterInfo *RI = TM.getRegisterInfo(); // Does the function dedicate RBP to being a frame ptr? Modified: llvm/trunk/test/CodeGen/X86/2009-11-04-SubregCoalescingBug.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/2009-11-04-SubregCoalescingBug.ll?rev=91638&r1=91637&r2=91638&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/X86/2009-11-04-SubregCoalescingBug.ll (original) +++ llvm/trunk/test/CodeGen/X86/2009-11-04-SubregCoalescingBug.ll Thu Dec 17 18:01:26 2009 @@ -5,7 +5,7 @@ entry: ; CHECK: leal 15(%rsi), %edi ; CHECK-NOT: movl -; CHECK: call _foo +; CHECK: callq _foo %0 = add i32 %a, 15 ; [#uses=1] %1 = zext i32 %0 to i64 ; [#uses=1] tail call void @foo(i64 %1) nounwind Modified: llvm/trunk/test/CodeGen/X86/abi-isel.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/abi-isel.ll?rev=91638&r1=91637&r2=91638&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/X86/abi-isel.ll (original) +++ llvm/trunk/test/CodeGen/X86/abi-isel.ll Thu Dec 17 18:01:26 2009 @@ -8365,13 +8365,13 @@ tail call void @x() nounwind ret void ; LINUX-64-STATIC: lcallee: -; LINUX-64-STATIC: call x -; LINUX-64-STATIC: call x -; LINUX-64-STATIC: call x -; LINUX-64-STATIC: call x -; LINUX-64-STATIC: call x -; LINUX-64-STATIC: call x -; LINUX-64-STATIC: call x +; LINUX-64-STATIC: callq x +; LINUX-64-STATIC: callq x +; LINUX-64-STATIC: callq x +; LINUX-64-STATIC: callq x +; LINUX-64-STATIC: callq x +; LINUX-64-STATIC: callq x +; LINUX-64-STATIC: callq x ; LINUX-64-STATIC: ret ; LINUX-32-STATIC: lcallee: @@ -8400,13 +8400,13 @@ ; LINUX-64-PIC: lcallee: ; LINUX-64-PIC: subq $8, %rsp -; LINUX-64-PIC-NEXT: call x at PLT -; LINUX-64-PIC-NEXT: call x at PLT -; LINUX-64-PIC-NEXT: call x at PLT -; LINUX-64-PIC-NEXT: call x at PLT -; LINUX-64-PIC-NEXT: call x at PLT -; LINUX-64-PIC-NEXT: call x at PLT -; LINUX-64-PIC-NEXT: call x at PLT +; LINUX-64-PIC-NEXT: callq x at PLT +; LINUX-64-PIC-NEXT: callq x at PLT +; LINUX-64-PIC-NEXT: callq x at PLT +; LINUX-64-PIC-NEXT: callq x at PLT +; LINUX-64-PIC-NEXT: callq x at PLT +; LINUX-64-PIC-NEXT: callq x at PLT +; LINUX-64-PIC-NEXT: callq x at PLT ; LINUX-64-PIC-NEXT: addq $8, %rsp ; LINUX-64-PIC-NEXT: ret @@ -8448,37 +8448,37 @@ ; DARWIN-64-STATIC: _lcallee: ; DARWIN-64-STATIC: subq $8, %rsp -; DARWIN-64-STATIC-NEXT: call _x -; DARWIN-64-STATIC-NEXT: call _x -; DARWIN-64-STATIC-NEXT: call _x -; DARWIN-64-STATIC-NEXT: call _x -; DARWIN-64-STATIC-NEXT: call _x -; DARWIN-64-STATIC-NEXT: call _x -; DARWIN-64-STATIC-NEXT: call _x +; DARWIN-64-STATIC-NEXT: callq _x +; DARWIN-64-STATIC-NEXT: callq _x +; DARWIN-64-STATIC-NEXT: callq _x +; DARWIN-64-STATIC-NEXT: callq _x +; DARWIN-64-STATIC-NEXT: callq _x +; DARWIN-64-STATIC-NEXT: callq _x +; DARWIN-64-STATIC-NEXT: callq _x ; DARWIN-64-STATIC-NEXT: addq $8, %rsp ; DARWIN-64-STATIC-NEXT: ret ; DARWIN-64-DYNAMIC: _lcallee: ; DARWIN-64-DYNAMIC: subq $8, %rsp -; DARWIN-64-DYNAMIC-NEXT: call _x -; DARWIN-64-DYNAMIC-NEXT: call _x -; DARWIN-64-DYNAMIC-NEXT: call _x -; DARWIN-64-DYNAMIC-NEXT: call _x -; DARWIN-64-DYNAMIC-NEXT: call _x -; DARWIN-64-DYNAMIC-NEXT: call _x -; DARWIN-64-DYNAMIC-NEXT: call _x +; DARWIN-64-DYNAMIC-NEXT: callq _x +; DARWIN-64-DYNAMIC-NEXT: callq _x +; DARWIN-64-DYNAMIC-NEXT: callq _x +; DARWIN-64-DYNAMIC-NEXT: callq _x +; DARWIN-64-DYNAMIC-NEXT: callq _x +; DARWIN-64-DYNAMIC-NEXT: callq _x +; DARWIN-64-DYNAMIC-NEXT: callq _x ; DARWIN-64-DYNAMIC-NEXT: addq $8, %rsp ; DARWIN-64-DYNAMIC-NEXT: ret ; DARWIN-64-PIC: _lcallee: ; DARWIN-64-PIC: subq $8, %rsp -; DARWIN-64-PIC-NEXT: call _x -; DARWIN-64-PIC-NEXT: call _x -; DARWIN-64-PIC-NEXT: call _x -; DARWIN-64-PIC-NEXT: call _x -; DARWIN-64-PIC-NEXT: call _x -; DARWIN-64-PIC-NEXT: call _x -; DARWIN-64-PIC-NEXT: call _x +; DARWIN-64-PIC-NEXT: callq _x +; DARWIN-64-PIC-NEXT: callq _x +; DARWIN-64-PIC-NEXT: callq _x +; DARWIN-64-PIC-NEXT: callq _x +; DARWIN-64-PIC-NEXT: callq _x +; DARWIN-64-PIC-NEXT: callq _x +; DARWIN-64-PIC-NEXT: callq _x ; DARWIN-64-PIC-NEXT: addq $8, %rsp ; DARWIN-64-PIC-NEXT: ret } @@ -8496,13 +8496,13 @@ tail call void @y() nounwind ret void ; LINUX-64-STATIC: dcallee: -; LINUX-64-STATIC: call y -; LINUX-64-STATIC: call y -; LINUX-64-STATIC: call y -; LINUX-64-STATIC: call y -; LINUX-64-STATIC: call y -; LINUX-64-STATIC: call y -; LINUX-64-STATIC: call y +; LINUX-64-STATIC: callq y +; LINUX-64-STATIC: callq y +; LINUX-64-STATIC: callq y +; LINUX-64-STATIC: callq y +; LINUX-64-STATIC: callq y +; LINUX-64-STATIC: callq y +; LINUX-64-STATIC: callq y ; LINUX-64-STATIC: ret ; LINUX-32-STATIC: dcallee: @@ -8531,13 +8531,13 @@ ; LINUX-64-PIC: dcallee: ; LINUX-64-PIC: subq $8, %rsp -; LINUX-64-PIC-NEXT: call y at PLT -; LINUX-64-PIC-NEXT: call y at PLT -; LINUX-64-PIC-NEXT: call y at PLT -; LINUX-64-PIC-NEXT: call y at PLT -; LINUX-64-PIC-NEXT: call y at PLT -; LINUX-64-PIC-NEXT: call y at PLT -; LINUX-64-PIC-NEXT: call y at PLT +; LINUX-64-PIC-NEXT: callq y at PLT +; LINUX-64-PIC-NEXT: callq y at PLT +; LINUX-64-PIC-NEXT: callq y at PLT +; LINUX-64-PIC-NEXT: callq y at PLT +; LINUX-64-PIC-NEXT: callq y at PLT +; LINUX-64-PIC-NEXT: callq y at PLT +; LINUX-64-PIC-NEXT: callq y at PLT ; LINUX-64-PIC-NEXT: addq $8, %rsp ; LINUX-64-PIC-NEXT: ret @@ -8579,37 +8579,37 @@ ; DARWIN-64-STATIC: _dcallee: ; DARWIN-64-STATIC: subq $8, %rsp -; DARWIN-64-STATIC-NEXT: call _y -; DARWIN-64-STATIC-NEXT: call _y -; DARWIN-64-STATIC-NEXT: call _y -; DARWIN-64-STATIC-NEXT: call _y -; DARWIN-64-STATIC-NEXT: call _y -; DARWIN-64-STATIC-NEXT: call _y -; DARWIN-64-STATIC-NEXT: call _y +; DARWIN-64-STATIC-NEXT: callq _y +; DARWIN-64-STATIC-NEXT: callq _y +; DARWIN-64-STATIC-NEXT: callq _y +; DARWIN-64-STATIC-NEXT: callq _y +; DARWIN-64-STATIC-NEXT: callq _y +; DARWIN-64-STATIC-NEXT: callq _y +; DARWIN-64-STATIC-NEXT: callq _y ; DARWIN-64-STATIC-NEXT: addq $8, %rsp ; DARWIN-64-STATIC-NEXT: ret ; DARWIN-64-DYNAMIC: _dcallee: ; DARWIN-64-DYNAMIC: subq $8, %rsp -; DARWIN-64-DYNAMIC-NEXT: call _y -; DARWIN-64-DYNAMIC-NEXT: call _y -; DARWIN-64-DYNAMIC-NEXT: call _y -; DARWIN-64-DYNAMIC-NEXT: call _y -; DARWIN-64-DYNAMIC-NEXT: call _y -; DARWIN-64-DYNAMIC-NEXT: call _y -; DARWIN-64-DYNAMIC-NEXT: call _y +; DARWIN-64-DYNAMIC-NEXT: callq _y +; DARWIN-64-DYNAMIC-NEXT: callq _y +; DARWIN-64-DYNAMIC-NEXT: callq _y +; DARWIN-64-DYNAMIC-NEXT: callq _y +; DARWIN-64-DYNAMIC-NEXT: callq _y +; DARWIN-64-DYNAMIC-NEXT: callq _y +; DARWIN-64-DYNAMIC-NEXT: callq _y ; DARWIN-64-DYNAMIC-NEXT: addq $8, %rsp ; DARWIN-64-DYNAMIC-NEXT: ret ; DARWIN-64-PIC: _dcallee: ; DARWIN-64-PIC: subq $8, %rsp -; DARWIN-64-PIC-NEXT: call _y -; DARWIN-64-PIC-NEXT: call _y -; DARWIN-64-PIC-NEXT: call _y -; DARWIN-64-PIC-NEXT: call _y -; DARWIN-64-PIC-NEXT: call _y -; DARWIN-64-PIC-NEXT: call _y -; DARWIN-64-PIC-NEXT: call _y +; DARWIN-64-PIC-NEXT: callq _y +; DARWIN-64-PIC-NEXT: callq _y +; DARWIN-64-PIC-NEXT: callq _y +; DARWIN-64-PIC-NEXT: callq _y +; DARWIN-64-PIC-NEXT: callq _y +; DARWIN-64-PIC-NEXT: callq _y +; DARWIN-64-PIC-NEXT: callq _y ; DARWIN-64-PIC-NEXT: addq $8, %rsp ; DARWIN-64-PIC-NEXT: ret } @@ -8765,8 +8765,8 @@ tail call void @callee() nounwind ret void ; LINUX-64-STATIC: caller: -; LINUX-64-STATIC: call callee -; LINUX-64-STATIC: call callee +; LINUX-64-STATIC: callq callee +; LINUX-64-STATIC: callq callee ; LINUX-64-STATIC: ret ; LINUX-32-STATIC: caller: @@ -8785,8 +8785,8 @@ ; LINUX-64-PIC: caller: ; LINUX-64-PIC: subq $8, %rsp -; LINUX-64-PIC-NEXT: call callee at PLT -; LINUX-64-PIC-NEXT: call callee at PLT +; LINUX-64-PIC-NEXT: callq callee at PLT +; LINUX-64-PIC-NEXT: callq callee at PLT ; LINUX-64-PIC-NEXT: addq $8, %rsp ; LINUX-64-PIC-NEXT: ret @@ -8813,22 +8813,22 @@ ; DARWIN-64-STATIC: _caller: ; DARWIN-64-STATIC: subq $8, %rsp -; DARWIN-64-STATIC-NEXT: call _callee -; DARWIN-64-STATIC-NEXT: call _callee +; DARWIN-64-STATIC-NEXT: callq _callee +; DARWIN-64-STATIC-NEXT: callq _callee ; DARWIN-64-STATIC-NEXT: addq $8, %rsp ; DARWIN-64-STATIC-NEXT: ret ; DARWIN-64-DYNAMIC: _caller: ; DARWIN-64-DYNAMIC: subq $8, %rsp -; DARWIN-64-DYNAMIC-NEXT: call _callee -; DARWIN-64-DYNAMIC-NEXT: call _callee +; DARWIN-64-DYNAMIC-NEXT: callq _callee +; DARWIN-64-DYNAMIC-NEXT: callq _callee ; DARWIN-64-DYNAMIC-NEXT: addq $8, %rsp ; DARWIN-64-DYNAMIC-NEXT: ret ; DARWIN-64-PIC: _caller: ; DARWIN-64-PIC: subq $8, %rsp -; DARWIN-64-PIC-NEXT: call _callee -; DARWIN-64-PIC-NEXT: call _callee +; DARWIN-64-PIC-NEXT: callq _callee +; DARWIN-64-PIC-NEXT: callq _callee ; DARWIN-64-PIC-NEXT: addq $8, %rsp ; DARWIN-64-PIC-NEXT: ret } @@ -8839,8 +8839,8 @@ tail call void @dcallee() nounwind ret void ; LINUX-64-STATIC: dcaller: -; LINUX-64-STATIC: call dcallee -; LINUX-64-STATIC: call dcallee +; LINUX-64-STATIC: callq dcallee +; LINUX-64-STATIC: callq dcallee ; LINUX-64-STATIC: ret ; LINUX-32-STATIC: dcaller: @@ -8859,8 +8859,8 @@ ; LINUX-64-PIC: dcaller: ; LINUX-64-PIC: subq $8, %rsp -; LINUX-64-PIC-NEXT: call dcallee -; LINUX-64-PIC-NEXT: call dcallee +; LINUX-64-PIC-NEXT: callq dcallee +; LINUX-64-PIC-NEXT: callq dcallee ; LINUX-64-PIC-NEXT: addq $8, %rsp ; LINUX-64-PIC-NEXT: ret @@ -8887,22 +8887,22 @@ ; DARWIN-64-STATIC: _dcaller: ; DARWIN-64-STATIC: subq $8, %rsp -; DARWIN-64-STATIC-NEXT: call _dcallee -; DARWIN-64-STATIC-NEXT: call _dcallee +; DARWIN-64-STATIC-NEXT: callq _dcallee +; DARWIN-64-STATIC-NEXT: callq _dcallee ; DARWIN-64-STATIC-NEXT: addq $8, %rsp ; DARWIN-64-STATIC-NEXT: ret ; DARWIN-64-DYNAMIC: _dcaller: ; DARWIN-64-DYNAMIC: subq $8, %rsp -; DARWIN-64-DYNAMIC-NEXT: call _dcallee -; DARWIN-64-DYNAMIC-NEXT: call _dcallee +; DARWIN-64-DYNAMIC-NEXT: callq _dcallee +; DARWIN-64-DYNAMIC-NEXT: callq _dcallee ; DARWIN-64-DYNAMIC-NEXT: addq $8, %rsp ; DARWIN-64-DYNAMIC-NEXT: ret ; DARWIN-64-PIC: _dcaller: ; DARWIN-64-PIC: subq $8, %rsp -; DARWIN-64-PIC-NEXT: call _dcallee -; DARWIN-64-PIC-NEXT: call _dcallee +; DARWIN-64-PIC-NEXT: callq _dcallee +; DARWIN-64-PIC-NEXT: callq _dcallee ; DARWIN-64-PIC-NEXT: addq $8, %rsp ; DARWIN-64-PIC-NEXT: ret } @@ -8913,8 +8913,8 @@ tail call void @lcallee() nounwind ret void ; LINUX-64-STATIC: lcaller: -; LINUX-64-STATIC: call lcallee -; LINUX-64-STATIC: call lcallee +; LINUX-64-STATIC: callq lcallee +; LINUX-64-STATIC: callq lcallee ; LINUX-64-STATIC: ret ; LINUX-32-STATIC: lcaller: @@ -8933,8 +8933,8 @@ ; LINUX-64-PIC: lcaller: ; LINUX-64-PIC: subq $8, %rsp -; LINUX-64-PIC-NEXT: call lcallee at PLT -; LINUX-64-PIC-NEXT: call lcallee at PLT +; LINUX-64-PIC-NEXT: callq lcallee at PLT +; LINUX-64-PIC-NEXT: callq lcallee at PLT ; LINUX-64-PIC-NEXT: addq $8, %rsp ; LINUX-64-PIC-NEXT: ret @@ -8961,22 +8961,22 @@ ; DARWIN-64-STATIC: _lcaller: ; DARWIN-64-STATIC: subq $8, %rsp -; DARWIN-64-STATIC-NEXT: call _lcallee -; DARWIN-64-STATIC-NEXT: call _lcallee +; DARWIN-64-STATIC-NEXT: callq _lcallee +; DARWIN-64-STATIC-NEXT: callq _lcallee ; DARWIN-64-STATIC-NEXT: addq $8, %rsp ; DARWIN-64-STATIC-NEXT: ret ; DARWIN-64-DYNAMIC: _lcaller: ; DARWIN-64-DYNAMIC: subq $8, %rsp -; DARWIN-64-DYNAMIC-NEXT: call _lcallee -; DARWIN-64-DYNAMIC-NEXT: call _lcallee +; DARWIN-64-DYNAMIC-NEXT: callq _lcallee +; DARWIN-64-DYNAMIC-NEXT: callq _lcallee ; DARWIN-64-DYNAMIC-NEXT: addq $8, %rsp ; DARWIN-64-DYNAMIC-NEXT: ret ; DARWIN-64-PIC: _lcaller: ; DARWIN-64-PIC: subq $8, %rsp -; DARWIN-64-PIC-NEXT: call _lcallee -; DARWIN-64-PIC-NEXT: call _lcallee +; DARWIN-64-PIC-NEXT: callq _lcallee +; DARWIN-64-PIC-NEXT: callq _lcallee ; DARWIN-64-PIC-NEXT: addq $8, %rsp ; DARWIN-64-PIC-NEXT: ret } @@ -8986,7 +8986,7 @@ tail call void @callee() nounwind ret void ; LINUX-64-STATIC: tailcaller: -; LINUX-64-STATIC: call callee +; LINUX-64-STATIC: callq callee ; LINUX-64-STATIC: ret ; LINUX-32-STATIC: tailcaller: @@ -9003,7 +9003,7 @@ ; LINUX-64-PIC: tailcaller: ; LINUX-64-PIC: subq $8, %rsp -; LINUX-64-PIC-NEXT: call callee at PLT +; LINUX-64-PIC-NEXT: callq callee at PLT ; LINUX-64-PIC-NEXT: addq $8, %rsp ; LINUX-64-PIC-NEXT: ret @@ -9027,19 +9027,19 @@ ; DARWIN-64-STATIC: _tailcaller: ; DARWIN-64-STATIC: subq $8, %rsp -; DARWIN-64-STATIC-NEXT: call _callee +; DARWIN-64-STATIC-NEXT: callq _callee ; DARWIN-64-STATIC-NEXT: addq $8, %rsp ; DARWIN-64-STATIC-NEXT: ret ; DARWIN-64-DYNAMIC: _tailcaller: ; DARWIN-64-DYNAMIC: subq $8, %rsp -; DARWIN-64-DYNAMIC-NEXT: call _callee +; DARWIN-64-DYNAMIC-NEXT: callq _callee ; DARWIN-64-DYNAMIC-NEXT: addq $8, %rsp ; DARWIN-64-DYNAMIC-NEXT: ret ; DARWIN-64-PIC: _tailcaller: ; DARWIN-64-PIC: subq $8, %rsp -; DARWIN-64-PIC-NEXT: call _callee +; DARWIN-64-PIC-NEXT: callq _callee ; DARWIN-64-PIC-NEXT: addq $8, %rsp ; DARWIN-64-PIC-NEXT: ret } @@ -9049,7 +9049,7 @@ tail call void @dcallee() nounwind ret void ; LINUX-64-STATIC: dtailcaller: -; LINUX-64-STATIC: call dcallee +; LINUX-64-STATIC: callq dcallee ; LINUX-64-STATIC: ret ; LINUX-32-STATIC: dtailcaller: @@ -9066,7 +9066,7 @@ ; LINUX-64-PIC: dtailcaller: ; LINUX-64-PIC: subq $8, %rsp -; LINUX-64-PIC-NEXT: call dcallee +; LINUX-64-PIC-NEXT: callq dcallee ; LINUX-64-PIC-NEXT: addq $8, %rsp ; LINUX-64-PIC-NEXT: ret @@ -9090,19 +9090,19 @@ ; DARWIN-64-STATIC: _dtailcaller: ; DARWIN-64-STATIC: subq $8, %rsp -; DARWIN-64-STATIC-NEXT: call _dcallee +; DARWIN-64-STATIC-NEXT: callq _dcallee ; DARWIN-64-STATIC-NEXT: addq $8, %rsp ; DARWIN-64-STATIC-NEXT: ret ; DARWIN-64-DYNAMIC: _dtailcaller: ; DARWIN-64-DYNAMIC: subq $8, %rsp -; DARWIN-64-DYNAMIC-NEXT: call _dcallee +; DARWIN-64-DYNAMIC-NEXT: callq _dcallee ; DARWIN-64-DYNAMIC-NEXT: addq $8, %rsp ; DARWIN-64-DYNAMIC-NEXT: ret ; DARWIN-64-PIC: _dtailcaller: ; DARWIN-64-PIC: subq $8, %rsp -; DARWIN-64-PIC-NEXT: call _dcallee +; DARWIN-64-PIC-NEXT: callq _dcallee ; DARWIN-64-PIC-NEXT: addq $8, %rsp ; DARWIN-64-PIC-NEXT: ret } @@ -9112,7 +9112,7 @@ tail call void @lcallee() nounwind ret void ; LINUX-64-STATIC: ltailcaller: -; LINUX-64-STATIC: call lcallee +; LINUX-64-STATIC: callq lcallee ; LINUX-64-STATIC: ret ; LINUX-32-STATIC: ltailcaller: @@ -9129,7 +9129,7 @@ ; LINUX-64-PIC: ltailcaller: ; LINUX-64-PIC: subq $8, %rsp -; LINUX-64-PIC-NEXT: call lcallee at PLT +; LINUX-64-PIC-NEXT: callq lcallee at PLT ; LINUX-64-PIC-NEXT: addq $8, %rsp ; LINUX-64-PIC-NEXT: ret @@ -9153,19 +9153,19 @@ ; DARWIN-64-STATIC: _ltailcaller: ; DARWIN-64-STATIC: subq $8, %rsp -; DARWIN-64-STATIC-NEXT: call _lcallee +; DARWIN-64-STATIC-NEXT: callq _lcallee ; DARWIN-64-STATIC-NEXT: addq $8, %rsp ; DARWIN-64-STATIC-NEXT: ret ; DARWIN-64-DYNAMIC: _ltailcaller: ; DARWIN-64-DYNAMIC: subq $8, %rsp -; DARWIN-64-DYNAMIC-NEXT: call _lcallee +; DARWIN-64-DYNAMIC-NEXT: callq _lcallee ; DARWIN-64-DYNAMIC-NEXT: addq $8, %rsp ; DARWIN-64-DYNAMIC-NEXT: ret ; DARWIN-64-PIC: _ltailcaller: ; DARWIN-64-PIC: subq $8, %rsp -; DARWIN-64-PIC-NEXT: call _lcallee +; DARWIN-64-PIC-NEXT: callq _lcallee ; DARWIN-64-PIC-NEXT: addq $8, %rsp ; DARWIN-64-PIC-NEXT: ret } @@ -9178,8 +9178,8 @@ tail call void %1() nounwind ret void ; LINUX-64-STATIC: icaller: -; LINUX-64-STATIC: call *ifunc -; LINUX-64-STATIC: call *ifunc +; LINUX-64-STATIC: callq *ifunc +; LINUX-64-STATIC: callq *ifunc ; LINUX-64-STATIC: ret ; LINUX-32-STATIC: icaller: @@ -9199,8 +9199,8 @@ ; LINUX-64-PIC: icaller: ; LINUX-64-PIC: pushq %rbx ; LINUX-64-PIC-NEXT: movq ifunc at GOTPCREL(%rip), %rbx -; LINUX-64-PIC-NEXT: call *(%rbx) -; LINUX-64-PIC-NEXT: call *(%rbx) +; LINUX-64-PIC-NEXT: callq *(%rbx) +; LINUX-64-PIC-NEXT: callq *(%rbx) ; LINUX-64-PIC-NEXT: popq %rbx ; LINUX-64-PIC-NEXT: ret @@ -9237,24 +9237,24 @@ ; DARWIN-64-STATIC: _icaller: ; DARWIN-64-STATIC: pushq %rbx ; DARWIN-64-STATIC-NEXT: movq _ifunc at GOTPCREL(%rip), %rbx -; DARWIN-64-STATIC-NEXT: call *(%rbx) -; DARWIN-64-STATIC-NEXT: call *(%rbx) +; DARWIN-64-STATIC-NEXT: callq *(%rbx) +; DARWIN-64-STATIC-NEXT: callq *(%rbx) ; DARWIN-64-STATIC-NEXT: popq %rbx ; DARWIN-64-STATIC-NEXT: ret ; DARWIN-64-DYNAMIC: _icaller: ; DARWIN-64-DYNAMIC: pushq %rbx ; DARWIN-64-DYNAMIC-NEXT: movq _ifunc at GOTPCREL(%rip), %rbx -; DARWIN-64-DYNAMIC-NEXT: call *(%rbx) -; DARWIN-64-DYNAMIC-NEXT: call *(%rbx) +; DARWIN-64-DYNAMIC-NEXT: callq *(%rbx) +; DARWIN-64-DYNAMIC-NEXT: callq *(%rbx) ; DARWIN-64-DYNAMIC-NEXT: popq %rbx ; DARWIN-64-DYNAMIC-NEXT: ret ; DARWIN-64-PIC: _icaller: ; DARWIN-64-PIC: pushq %rbx ; DARWIN-64-PIC-NEXT: movq _ifunc at GOTPCREL(%rip), %rbx -; DARWIN-64-PIC-NEXT: call *(%rbx) -; DARWIN-64-PIC-NEXT: call *(%rbx) +; DARWIN-64-PIC-NEXT: callq *(%rbx) +; DARWIN-64-PIC-NEXT: callq *(%rbx) ; DARWIN-64-PIC-NEXT: popq %rbx ; DARWIN-64-PIC-NEXT: ret } @@ -9267,8 +9267,8 @@ tail call void %1() nounwind ret void ; LINUX-64-STATIC: dicaller: -; LINUX-64-STATIC: call *difunc -; LINUX-64-STATIC: call *difunc +; LINUX-64-STATIC: callq *difunc +; LINUX-64-STATIC: callq *difunc ; LINUX-64-STATIC: ret ; LINUX-32-STATIC: dicaller: @@ -9288,8 +9288,8 @@ ; LINUX-64-PIC: dicaller: ; LINUX-64-PIC: pushq %rbx ; LINUX-64-PIC-NEXT: movq difunc at GOTPCREL(%rip), %rbx -; LINUX-64-PIC-NEXT: call *(%rbx) -; LINUX-64-PIC-NEXT: call *(%rbx) +; LINUX-64-PIC-NEXT: callq *(%rbx) +; LINUX-64-PIC-NEXT: callq *(%rbx) ; LINUX-64-PIC-NEXT: popq %rbx ; LINUX-64-PIC-NEXT: ret @@ -9321,22 +9321,22 @@ ; DARWIN-64-STATIC: _dicaller: ; DARWIN-64-STATIC: subq $8, %rsp -; DARWIN-64-STATIC-NEXT: call *_difunc(%rip) -; DARWIN-64-STATIC-NEXT: call *_difunc(%rip) +; DARWIN-64-STATIC-NEXT: callq *_difunc(%rip) +; DARWIN-64-STATIC-NEXT: callq *_difunc(%rip) ; DARWIN-64-STATIC-NEXT: addq $8, %rsp ; DARWIN-64-STATIC-NEXT: ret ; DARWIN-64-DYNAMIC: _dicaller: ; DARWIN-64-DYNAMIC: subq $8, %rsp -; DARWIN-64-DYNAMIC-NEXT: call *_difunc(%rip) -; DARWIN-64-DYNAMIC-NEXT: call *_difunc(%rip) +; DARWIN-64-DYNAMIC-NEXT: callq *_difunc(%rip) +; DARWIN-64-DYNAMIC-NEXT: callq *_difunc(%rip) ; DARWIN-64-DYNAMIC-NEXT: addq $8, %rsp ; DARWIN-64-DYNAMIC-NEXT: ret ; DARWIN-64-PIC: _dicaller: ; DARWIN-64-PIC: subq $8, %rsp -; DARWIN-64-PIC-NEXT: call *_difunc(%rip) -; DARWIN-64-PIC-NEXT: call *_difunc(%rip) +; DARWIN-64-PIC-NEXT: callq *_difunc(%rip) +; DARWIN-64-PIC-NEXT: callq *_difunc(%rip) ; DARWIN-64-PIC-NEXT: addq $8, %rsp ; DARWIN-64-PIC-NEXT: ret } @@ -9349,8 +9349,8 @@ tail call void %1() nounwind ret void ; LINUX-64-STATIC: licaller: -; LINUX-64-STATIC: call *lifunc -; LINUX-64-STATIC: call *lifunc +; LINUX-64-STATIC: callq *lifunc +; LINUX-64-STATIC: callq *lifunc ; LINUX-64-STATIC: ret ; LINUX-32-STATIC: licaller: @@ -9369,8 +9369,8 @@ ; LINUX-64-PIC: licaller: ; LINUX-64-PIC: subq $8, %rsp -; LINUX-64-PIC-NEXT: call *lifunc(%rip) -; LINUX-64-PIC-NEXT: call *lifunc(%rip) +; LINUX-64-PIC-NEXT: callq *lifunc(%rip) +; LINUX-64-PIC-NEXT: callq *lifunc(%rip) ; LINUX-64-PIC-NEXT: addq $8, %rsp ; LINUX-64-PIC-NEXT: ret @@ -9402,22 +9402,22 @@ ; DARWIN-64-STATIC: _licaller: ; DARWIN-64-STATIC: subq $8, %rsp -; DARWIN-64-STATIC-NEXT: call *_lifunc(%rip) -; DARWIN-64-STATIC-NEXT: call *_lifunc(%rip) +; DARWIN-64-STATIC-NEXT: callq *_lifunc(%rip) +; DARWIN-64-STATIC-NEXT: callq *_lifunc(%rip) ; DARWIN-64-STATIC-NEXT: addq $8, %rsp ; DARWIN-64-STATIC-NEXT: ret ; DARWIN-64-DYNAMIC: _licaller: ; DARWIN-64-DYNAMIC: subq $8, %rsp -; DARWIN-64-DYNAMIC-NEXT: call *_lifunc(%rip) -; DARWIN-64-DYNAMIC-NEXT: call *_lifunc(%rip) +; DARWIN-64-DYNAMIC-NEXT: callq *_lifunc(%rip) +; DARWIN-64-DYNAMIC-NEXT: callq *_lifunc(%rip) ; DARWIN-64-DYNAMIC-NEXT: addq $8, %rsp ; DARWIN-64-DYNAMIC-NEXT: ret ; DARWIN-64-PIC: _licaller: ; DARWIN-64-PIC: subq $8, %rsp -; DARWIN-64-PIC-NEXT: call *_lifunc(%rip) -; DARWIN-64-PIC-NEXT: call *_lifunc(%rip) +; DARWIN-64-PIC-NEXT: callq *_lifunc(%rip) +; DARWIN-64-PIC-NEXT: callq *_lifunc(%rip) ; DARWIN-64-PIC-NEXT: addq $8, %rsp ; DARWIN-64-PIC-NEXT: ret } @@ -9430,8 +9430,8 @@ tail call void %1() nounwind ret void ; LINUX-64-STATIC: itailcaller: -; LINUX-64-STATIC: call *ifunc -; LINUX-64-STATIC: call *ifunc +; LINUX-64-STATIC: callq *ifunc +; LINUX-64-STATIC: callq *ifunc ; LINUX-64-STATIC: ret ; LINUX-32-STATIC: itailcaller: @@ -9451,8 +9451,8 @@ ; LINUX-64-PIC: itailcaller: ; LINUX-64-PIC: pushq %rbx ; LINUX-64-PIC-NEXT: movq ifunc at GOTPCREL(%rip), %rbx -; LINUX-64-PIC-NEXT: call *(%rbx) -; LINUX-64-PIC-NEXT: call *(%rbx) +; LINUX-64-PIC-NEXT: callq *(%rbx) +; LINUX-64-PIC-NEXT: callq *(%rbx) ; LINUX-64-PIC-NEXT: popq %rbx ; LINUX-64-PIC-NEXT: ret @@ -9489,24 +9489,24 @@ ; DARWIN-64-STATIC: _itailcaller: ; DARWIN-64-STATIC: pushq %rbx ; DARWIN-64-STATIC-NEXT: movq _ifunc at GOTPCREL(%rip), %rbx -; DARWIN-64-STATIC-NEXT: call *(%rbx) -; DARWIN-64-STATIC-NEXT: call *(%rbx) +; DARWIN-64-STATIC-NEXT: callq *(%rbx) +; DARWIN-64-STATIC-NEXT: callq *(%rbx) ; DARWIN-64-STATIC-NEXT: popq %rbx ; DARWIN-64-STATIC-NEXT: ret ; DARWIN-64-DYNAMIC: _itailcaller: ; DARWIN-64-DYNAMIC: pushq %rbx ; DARWIN-64-DYNAMIC-NEXT: movq _ifunc at GOTPCREL(%rip), %rbx -; DARWIN-64-DYNAMIC-NEXT: call *(%rbx) -; DARWIN-64-DYNAMIC-NEXT: call *(%rbx) +; DARWIN-64-DYNAMIC-NEXT: callq *(%rbx) +; DARWIN-64-DYNAMIC-NEXT: callq *(%rbx) ; DARWIN-64-DYNAMIC-NEXT: popq %rbx ; DARWIN-64-DYNAMIC-NEXT: ret ; DARWIN-64-PIC: _itailcaller: ; DARWIN-64-PIC: pushq %rbx ; DARWIN-64-PIC-NEXT: movq _ifunc at GOTPCREL(%rip), %rbx -; DARWIN-64-PIC-NEXT: call *(%rbx) -; DARWIN-64-PIC-NEXT: call *(%rbx) +; DARWIN-64-PIC-NEXT: callq *(%rbx) +; DARWIN-64-PIC-NEXT: callq *(%rbx) ; DARWIN-64-PIC-NEXT: popq %rbx ; DARWIN-64-PIC-NEXT: ret } @@ -9517,7 +9517,7 @@ tail call void %0() nounwind ret void ; LINUX-64-STATIC: ditailcaller: -; LINUX-64-STATIC: call *difunc +; LINUX-64-STATIC: callq *difunc ; LINUX-64-STATIC: ret ; LINUX-32-STATIC: ditailcaller: @@ -9535,7 +9535,7 @@ ; LINUX-64-PIC: ditailcaller: ; LINUX-64-PIC: subq $8, %rsp ; LINUX-64-PIC-NEXT: movq difunc at GOTPCREL(%rip), %rax -; LINUX-64-PIC-NEXT: call *(%rax) +; LINUX-64-PIC-NEXT: callq *(%rax) ; LINUX-64-PIC-NEXT: addq $8, %rsp ; LINUX-64-PIC-NEXT: ret @@ -9562,18 +9562,18 @@ ; DARWIN-64-STATIC: _ditailcaller: ; DARWIN-64-STATIC: subq $8, %rsp -; DARWIN-64-STATIC-NEXT: call *_difunc(%rip) +; DARWIN-64-STATIC-NEXT: callq *_difunc(%rip) ; DARWIN-64-STATIC-NEXT: addq $8, %rsp ; DARWIN-64-STATIC-NEXT: ret ; DARWIN-64-DYNAMIC: _ditailcaller: ; DARWIN-64-DYNAMIC: subq $8, %rsp -; DARWIN-64-DYNAMIC-NEXT: call *_difunc(%rip) +; DARWIN-64-DYNAMIC-NEXT: callq *_difunc(%rip) ; DARWIN-64-DYNAMIC-NEXT: addq $8, %rsp ; DARWIN-64-DYNAMIC-NEXT: ret ; DARWIN-64-PIC: _ditailcaller: -; DARWIN-64-PIC: call *_difunc(%rip) +; DARWIN-64-PIC: callq *_difunc(%rip) ; DARWIN-64-PIC-NEXT: addq $8, %rsp ; DARWIN-64-PIC-NEXT: ret } @@ -9584,7 +9584,7 @@ tail call void %0() nounwind ret void ; LINUX-64-STATIC: litailcaller: -; LINUX-64-STATIC: call *lifunc +; LINUX-64-STATIC: callq *lifunc ; LINUX-64-STATIC: ret ; LINUX-32-STATIC: litailcaller: @@ -9601,7 +9601,7 @@ ; LINUX-64-PIC: litailcaller: ; LINUX-64-PIC: subq $8, %rsp -; LINUX-64-PIC-NEXT: call *lifunc(%rip) +; LINUX-64-PIC-NEXT: callq *lifunc(%rip) ; LINUX-64-PIC-NEXT: addq $8, %rsp ; LINUX-64-PIC-NEXT: ret @@ -9628,19 +9628,19 @@ ; DARWIN-64-STATIC: _litailcaller: ; DARWIN-64-STATIC: subq $8, %rsp -; DARWIN-64-STATIC-NEXT: call *_lifunc(%rip) +; DARWIN-64-STATIC-NEXT: callq *_lifunc(%rip) ; DARWIN-64-STATIC-NEXT: addq $8, %rsp ; DARWIN-64-STATIC-NEXT: ret ; DARWIN-64-DYNAMIC: _litailcaller: ; DARWIN-64-DYNAMIC: subq $8, %rsp -; DARWIN-64-DYNAMIC-NEXT: call *_lifunc(%rip) +; DARWIN-64-DYNAMIC-NEXT: callq *_lifunc(%rip) ; DARWIN-64-DYNAMIC-NEXT: addq $8, %rsp ; DARWIN-64-DYNAMIC-NEXT: ret ; DARWIN-64-PIC: _litailcaller: ; DARWIN-64-PIC: subq $8, %rsp -; DARWIN-64-PIC-NEXT: call *_lifunc(%rip) +; DARWIN-64-PIC-NEXT: callq *_lifunc(%rip) ; DARWIN-64-PIC-NEXT: addq $8, %rsp ; DARWIN-64-PIC-NEXT: ret } Modified: llvm/trunk/test/CodeGen/X86/bss_pagealigned.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/bss_pagealigned.ll?rev=91638&r1=91637&r2=91638&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/X86/bss_pagealigned.ll (original) +++ llvm/trunk/test/CodeGen/X86/bss_pagealigned.ll Thu Dec 17 18:01:26 2009 @@ -10,7 +10,7 @@ ; CHECK: movq $bm_pte, %rdi ; CHECK-NEXT: xorl %esi, %esi ; CHECK-NEXT: movl $4096, %edx -; CHECK-NEXT: call memset +; CHECK-NEXT: callq memset ret void } @bm_pte = internal global [512 x %struct.kmem_cache_order_objects] zeroinitializer, section ".bss.page_aligned", align 4096 Modified: llvm/trunk/test/CodeGen/X86/cmov.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/cmov.ll?rev=91638&r1=91637&r2=91638&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/X86/cmov.ll (original) +++ llvm/trunk/test/CodeGen/X86/cmov.ll Thu Dec 17 18:01:26 2009 @@ -6,7 +6,7 @@ ; CHECK: test1: ; CHECK: btl ; CHECK-NEXT: movl $12, %eax -; CHECK-NEXT: cmovae (%rcx), %eax +; CHECK-NEXT: cmovael (%rcx), %eax ; CHECK-NEXT: ret %0 = lshr i32 %x, %n ; [#uses=1] @@ -21,7 +21,7 @@ ; CHECK: test2: ; CHECK: btl ; CHECK-NEXT: movl $12, %eax -; CHECK-NEXT: cmovb (%rcx), %eax +; CHECK-NEXT: cmovbl (%rcx), %eax ; CHECK-NEXT: ret %0 = lshr i32 %x, %n ; [#uses=1] @@ -41,7 +41,7 @@ define void @test3(i64 %a, i64 %b, i1 %p) nounwind { ; CHECK: test3: -; CHECK: cmovne %edi, %esi +; CHECK: cmovnel %edi, %esi ; CHECK-NEXT: movl %esi, %edi %c = trunc i64 %a to i32 Modified: llvm/trunk/test/CodeGen/X86/live-out-reg-info.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/live-out-reg-info.ll?rev=91638&r1=91637&r2=91638&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/X86/live-out-reg-info.ll (original) +++ llvm/trunk/test/CodeGen/X86/live-out-reg-info.ll Thu Dec 17 18:01:26 2009 @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=x86-64 | grep {testb \[$\]1,} +; RUN: llc < %s -march=x86-64 | grep testb ; Make sure dagcombine doesn't eliminate the comparison due ; to an off-by-one bug with ComputeMaskedBits information. Modified: llvm/trunk/test/CodeGen/X86/loop-blocks.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/loop-blocks.ll?rev=91638&r1=91637&r2=91638&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/X86/loop-blocks.ll (original) +++ llvm/trunk/test/CodeGen/X86/loop-blocks.ll Thu Dec 17 18:01:26 2009 @@ -10,9 +10,9 @@ ; CHECK: jmp .LBB1_1 ; CHECK-NEXT: align ; CHECK-NEXT: .LBB1_2: -; CHECK-NEXT: call loop_latch +; CHECK-NEXT: callq loop_latch ; CHECK-NEXT: .LBB1_1: -; CHECK-NEXT: call loop_header +; CHECK-NEXT: callq loop_header define void @simple() nounwind { entry: @@ -40,9 +40,9 @@ ; CHECK: jmp .LBB2_1 ; CHECK-NEXT: align ; CHECK-NEXT: .LBB2_4: -; CHECK-NEXT: call bar99 +; CHECK-NEXT: callq bar99 ; CHECK-NEXT: .LBB2_1: -; CHECK-NEXT: call body +; CHECK-NEXT: callq body define void @slightly_more_involved() nounwind { entry: @@ -75,18 +75,18 @@ ; CHECK: jmp .LBB3_1 ; CHECK-NEXT: align ; CHECK-NEXT: .LBB3_4: -; CHECK-NEXT: call bar99 -; CHECK-NEXT: call get +; CHECK-NEXT: callq bar99 +; CHECK-NEXT: callq get ; CHECK-NEXT: cmpl $2999, %eax ; CHECK-NEXT: jg .LBB3_6 -; CHECK-NEXT: call block_a_true_func +; CHECK-NEXT: callq block_a_true_func ; CHECK-NEXT: jmp .LBB3_7 ; CHECK-NEXT: .LBB3_6: -; CHECK-NEXT: call block_a_false_func +; CHECK-NEXT: callq block_a_false_func ; CHECK-NEXT: .LBB3_7: -; CHECK-NEXT: call block_a_merge_func +; CHECK-NEXT: callq block_a_merge_func ; CHECK-NEXT: .LBB3_1: -; CHECK-NEXT: call body +; CHECK-NEXT: callq body define void @yet_more_involved() nounwind { entry: @@ -134,18 +134,18 @@ ; CHECK: jmp .LBB4_1 ; CHECK-NEXT: align ; CHECK-NEXT: .LBB4_7: -; CHECK-NEXT: call bar100 +; CHECK-NEXT: callq bar100 ; CHECK-NEXT: jmp .LBB4_1 ; CHECK-NEXT: .LBB4_8: -; CHECK-NEXT: call bar101 +; CHECK-NEXT: callq bar101 ; CHECK-NEXT: jmp .LBB4_1 ; CHECK-NEXT: .LBB4_9: -; CHECK-NEXT: call bar102 +; CHECK-NEXT: callq bar102 ; CHECK-NEXT: jmp .LBB4_1 ; CHECK-NEXT: .LBB4_5: -; CHECK-NEXT: call loop_latch +; CHECK-NEXT: callq loop_latch ; CHECK-NEXT: .LBB4_1: -; CHECK-NEXT: call loop_header +; CHECK-NEXT: callq loop_header define void @cfg_islands() nounwind { entry: Modified: llvm/trunk/test/CodeGen/X86/peep-test-3.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/peep-test-3.ll?rev=91638&r1=91637&r2=91638&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/X86/peep-test-3.ll (original) +++ llvm/trunk/test/CodeGen/X86/peep-test-3.ll Thu Dec 17 18:01:26 2009 @@ -65,7 +65,7 @@ ret void } -; Just like @and, but without the trunc+store. This should use a testl +; Just like @and, but without the trunc+store. This should use a testb ; instead of an andl. ; CHECK: test: define void @test(float* %A, i32 %IA, i32 %N, i8* %p) nounwind { Modified: llvm/trunk/test/CodeGen/X86/select-aggregate.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/select-aggregate.ll?rev=91638&r1=91637&r2=91638&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/X86/select-aggregate.ll (original) +++ llvm/trunk/test/CodeGen/X86/select-aggregate.ll Thu Dec 17 18:01:26 2009 @@ -1,7 +1,7 @@ ; RUN: llc < %s -march=x86-64 | FileCheck %s ; PR5757 -; CHECK: cmovne %rdi, %rsi +; CHECK: cmovneq %rdi, %rsi ; CHECK: movl (%rsi), %eax %0 = type { i64, i32 } Modified: llvm/trunk/test/CodeGen/X86/tail-opts.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/tail-opts.ll?rev=91638&r1=91637&r2=91638&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/X86/tail-opts.ll (original) +++ llvm/trunk/test/CodeGen/X86/tail-opts.ll Thu Dec 17 18:01:26 2009 @@ -274,7 +274,7 @@ ; one ret instruction. ; CHECK: foo: -; CHECK: call func +; CHECK: callq func ; CHECK-NEXT: .LBB5_2: ; CHECK-NEXT: addq $8, %rsp ; CHECK-NEXT: ret Modified: llvm/trunk/test/CodeGen/X86/widen_load-1.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/widen_load-1.ll?rev=91638&r1=91637&r2=91638&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/X86/widen_load-1.ll (original) +++ llvm/trunk/test/CodeGen/X86/widen_load-1.ll Thu Dec 17 18:01:26 2009 @@ -5,7 +5,7 @@ ; CHECK: movq compl+128(%rip), %xmm0 ; CHECK: movaps %xmm0, (%rsp) -; CHECK: call killcommon +; CHECK: callq killcommon @compl = linkonce global [20 x i64] zeroinitializer, align 64 ; <[20 x i64]*> [#uses=1] Modified: llvm/trunk/test/CodeGen/X86/x86-64-pic-1.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/x86-64-pic-1.ll?rev=91638&r1=91637&r2=91638&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/X86/x86-64-pic-1.ll (original) +++ llvm/trunk/test/CodeGen/X86/x86-64-pic-1.ll Thu Dec 17 18:01:26 2009 @@ -1,5 +1,5 @@ ; RUN: llc < %s -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1 -; RUN: grep {call f at PLT} %t1 +; RUN: grep {callq f at PLT} %t1 define void @g() { entry: Modified: llvm/trunk/test/CodeGen/X86/x86-64-pic-10.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/x86-64-pic-10.ll?rev=91638&r1=91637&r2=91638&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/X86/x86-64-pic-10.ll (original) +++ llvm/trunk/test/CodeGen/X86/x86-64-pic-10.ll Thu Dec 17 18:01:26 2009 @@ -1,5 +1,5 @@ ; RUN: llc < %s -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1 -; RUN: grep {call g at PLT} %t1 +; RUN: grep {callq g at PLT} %t1 @g = alias weak i32 ()* @f Modified: llvm/trunk/test/CodeGen/X86/x86-64-pic-11.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/x86-64-pic-11.ll?rev=91638&r1=91637&r2=91638&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/X86/x86-64-pic-11.ll (original) +++ llvm/trunk/test/CodeGen/X86/x86-64-pic-11.ll Thu Dec 17 18:01:26 2009 @@ -1,5 +1,5 @@ ; RUN: llc < %s -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1 -; RUN: grep {call __fixunsxfti at PLT} %t1 +; RUN: grep {callq __fixunsxfti at PLT} %t1 define i128 @f(x86_fp80 %a) nounwind { entry: Modified: llvm/trunk/test/CodeGen/X86/x86-64-pic-2.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/x86-64-pic-2.ll?rev=91638&r1=91637&r2=91638&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/X86/x86-64-pic-2.ll (original) +++ llvm/trunk/test/CodeGen/X86/x86-64-pic-2.ll Thu Dec 17 18:01:26 2009 @@ -1,6 +1,6 @@ ; RUN: llc < %s -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1 -; RUN: grep {call f} %t1 -; RUN: not grep {call f at PLT} %t1 +; RUN: grep {callq f} %t1 +; RUN: not grep {callq f at PLT} %t1 define void @g() { entry: Modified: llvm/trunk/test/CodeGen/X86/x86-64-pic-3.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/x86-64-pic-3.ll?rev=91638&r1=91637&r2=91638&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/X86/x86-64-pic-3.ll (original) +++ llvm/trunk/test/CodeGen/X86/x86-64-pic-3.ll Thu Dec 17 18:01:26 2009 @@ -1,6 +1,6 @@ ; RUN: llc < %s -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1 -; RUN: grep {call f} %t1 -; RUN: not grep {call f at PLT} %t1 +; RUN: grep {callq f} %t1 +; RUN: not grep {callq f at PLT} %t1 define void @g() { entry: From gohman at apple.com Thu Dec 17 18:03:58 2009 From: gohman at apple.com (Dan Gohman) Date: Fri, 18 Dec 2009 00:03:58 -0000 Subject: [llvm-commits] [llvm] r91639 - /llvm/trunk/test/Transforms/ABCD/ Message-ID: <200912180003.nBI03wet010955@zion.cs.uiuc.edu> Author: djg Date: Thu Dec 17 18:03:58 2009 New Revision: 91639 URL: http://llvm.org/viewvc/llvm-project?rev=91639&view=rev Log: Add an svn:ignore. Modified: llvm/trunk/test/Transforms/ABCD/ (props changed) Propchange: llvm/trunk/test/Transforms/ABCD/ ------------------------------------------------------------------------------ --- svn:ignore (added) +++ svn:ignore Thu Dec 17 18:03:58 2009 @@ -0,0 +1,3 @@ +Output +*.log +*.sum From gohman at apple.com Thu Dec 17 18:06:20 2009 From: gohman at apple.com (Dan Gohman) Date: Fri, 18 Dec 2009 00:06:20 -0000 Subject: [llvm-commits] [llvm] r91641 - in /llvm/trunk: include/llvm/Analysis/IVUsers.h include/llvm/Analysis/LoopInfo.h lib/Analysis/IVUsers.cpp lib/Transforms/Scalar/LoopStrengthReduce.cpp Message-ID: <200912180006.nBI06KKR011124@zion.cs.uiuc.edu> Author: djg Date: Thu Dec 17 18:06:20 2009 New Revision: 91641 URL: http://llvm.org/viewvc/llvm-project?rev=91641&view=rev Log: Reapply LoopStrengthReduce and IVUsers cleanups, excluding the part of 91296 that caused trouble -- the Processed list needs to be preserved for the livetime of the pass, as AddUsersIfInteresting is called from other passes. Modified: llvm/trunk/include/llvm/Analysis/IVUsers.h llvm/trunk/include/llvm/Analysis/LoopInfo.h llvm/trunk/lib/Analysis/IVUsers.cpp llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp Modified: llvm/trunk/include/llvm/Analysis/IVUsers.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Analysis/IVUsers.h?rev=91641&r1=91640&r2=91641&view=diff ============================================================================== --- llvm/trunk/include/llvm/Analysis/IVUsers.h (original) +++ llvm/trunk/include/llvm/Analysis/IVUsers.h Thu Dec 17 18:06:20 2009 @@ -175,11 +175,11 @@ ScalarEvolution *SE; SmallPtrSet Processed; -public: /// IVUses - A list of all tracked IV uses of induction variable expressions /// we are interested in. ilist IVUses; +public: /// IVUsesByStride - A mapping from the strides in StrideOrder to the /// uses in IVUses. std::map IVUsesByStride; Modified: llvm/trunk/include/llvm/Analysis/LoopInfo.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Analysis/LoopInfo.h?rev=91641&r1=91640&r2=91641&view=diff ============================================================================== --- llvm/trunk/include/llvm/Analysis/LoopInfo.h (original) +++ llvm/trunk/include/llvm/Analysis/LoopInfo.h Thu Dec 17 18:06:20 2009 @@ -976,13 +976,6 @@ void removeBlock(BasicBlock *BB) { LI.removeBlock(BB); } - - static bool isNotAlreadyContainedIn(const Loop *SubLoop, - const Loop *ParentLoop) { - return - LoopInfoBase::isNotAlreadyContainedIn(SubLoop, - ParentLoop); - } }; Modified: llvm/trunk/lib/Analysis/IVUsers.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/IVUsers.cpp?rev=91641&r1=91640&r2=91641&view=diff ============================================================================== --- llvm/trunk/lib/Analysis/IVUsers.cpp (original) +++ llvm/trunk/lib/Analysis/IVUsers.cpp Thu Dec 17 18:06:20 2009 @@ -53,7 +53,7 @@ if (newLoop == L) return false; // if newLoop is an outer loop of L, this is OK. - if (!LoopInfo::isNotAlreadyContainedIn(L, newLoop)) + if (newLoop->contains(L->getHeader())) return false; } return true; @@ -370,6 +370,7 @@ IVUsesByStride.clear(); StrideOrder.clear(); Processed.clear(); + IVUses.clear(); } void IVStrideUse::deleted() { Modified: llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp?rev=91641&r1=91640&r2=91641&view=diff ============================================================================== --- llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp (original) +++ llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp Thu Dec 17 18:06:20 2009 @@ -24,18 +24,14 @@ #include "llvm/Constants.h" #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" -#include "llvm/Type.h" #include "llvm/DerivedTypes.h" -#include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/IVUsers.h" -#include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolutionExpander.h" #include "llvm/Transforms/Utils/AddrModeMatcher.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/ADT/Statistic.h" -#include "llvm/Support/CFG.h" #include "llvm/Support/Debug.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ValueHandle.h" @@ -85,8 +81,6 @@ class LoopStrengthReduce : public LoopPass { IVUsers *IU; - LoopInfo *LI; - DominatorTree *DT; ScalarEvolution *SE; bool Changed; @@ -94,10 +88,6 @@ /// particular stride. std::map IVsByStride; - /// StrideNoReuse - Keep track of all the strides whose ivs cannot be - /// reused (nor should they be rewritten to reuse other strides). - SmallSet StrideNoReuse; - /// DeadInsts - Keep track of instructions we may have made dead, so that /// we can remove them after we are done working. SmallVector DeadInsts; @@ -109,8 +99,7 @@ public: static char ID; // Pass ID, replacement for typeid explicit LoopStrengthReduce(const TargetLowering *tli = NULL) : - LoopPass(&ID), TLI(tli) { - } + LoopPass(&ID), TLI(tli) {} bool runOnLoop(Loop *L, LPPassManager &LPM); @@ -118,13 +107,11 @@ // We split critical edges, so we change the CFG. However, we do update // many analyses if they are around. AU.addPreservedID(LoopSimplifyID); - AU.addPreserved(); - AU.addPreserved(); - AU.addPreserved(); + AU.addPreserved("loops"); + AU.addPreserved("domfrontier"); + AU.addPreserved("domtree"); AU.addRequiredID(LoopSimplifyID); - AU.addRequired(); - AU.addRequired(); AU.addRequired(); AU.addPreserved(); AU.addRequired(); @@ -228,19 +215,17 @@ if (DeadInsts.empty()) return; while (!DeadInsts.empty()) { - Instruction *I = dyn_cast_or_null(DeadInsts.back()); - DeadInsts.pop_back(); + Instruction *I = dyn_cast_or_null(DeadInsts.pop_back_val()); if (I == 0 || !isInstructionTriviallyDead(I)) continue; - for (User::op_iterator OI = I->op_begin(), E = I->op_end(); OI != E; ++OI) { + for (User::op_iterator OI = I->op_begin(), E = I->op_end(); OI != E; ++OI) if (Instruction *U = dyn_cast(*OI)) { *OI = 0; if (U->use_empty()) DeadInsts.push_back(U); } - } I->eraseFromParent(); Changed = true; @@ -300,9 +285,6 @@ /// BasedUser - For a particular base value, keep information about how we've /// partitioned the expression so far. struct BasedUser { - /// SE - The current ScalarEvolution object. - ScalarEvolution *SE; - /// Base - The Base value for the PHI node that needs to be inserted for /// this use. As the use is processed, information gets moved from this /// field to the Imm field (below). BasedUser values are sorted by this @@ -334,9 +316,9 @@ bool isUseOfPostIncrementedValue; BasedUser(IVStrideUse &IVSU, ScalarEvolution *se) - : SE(se), Base(IVSU.getOffset()), Inst(IVSU.getUser()), + : Base(IVSU.getOffset()), Inst(IVSU.getUser()), OperandValToReplace(IVSU.getOperandValToReplace()), - Imm(SE->getIntegerSCEV(0, Base->getType())), + Imm(se->getIntegerSCEV(0, Base->getType())), isUseOfPostIncrementedValue(IVSU.isUseOfPostIncrementedValue()) {} // Once we rewrite the code to insert the new IVs we want, update the @@ -345,14 +327,14 @@ void RewriteInstructionToUseNewBase(const SCEV *const &NewBase, Instruction *InsertPt, SCEVExpander &Rewriter, Loop *L, Pass *P, - LoopInfo &LI, - SmallVectorImpl &DeadInsts); + SmallVectorImpl &DeadInsts, + ScalarEvolution *SE); Value *InsertCodeForBaseAtPosition(const SCEV *const &NewBase, const Type *Ty, SCEVExpander &Rewriter, - Instruction *IP, Loop *L, - LoopInfo &LI); + Instruction *IP, + ScalarEvolution *SE); void dump() const; }; } @@ -366,27 +348,12 @@ Value *BasedUser::InsertCodeForBaseAtPosition(const SCEV *const &NewBase, const Type *Ty, SCEVExpander &Rewriter, - Instruction *IP, Loop *L, - LoopInfo &LI) { - // Figure out where we *really* want to insert this code. In particular, if - // the user is inside of a loop that is nested inside of L, we really don't - // want to insert this expression before the user, we'd rather pull it out as - // many loops as possible. - Instruction *BaseInsertPt = IP; - - // Figure out the most-nested loop that IP is in. - Loop *InsertLoop = LI.getLoopFor(IP->getParent()); - - // If InsertLoop is not L, and InsertLoop is nested inside of L, figure out - // the preheader of the outer-most loop where NewBase is not loop invariant. - if (L->contains(IP->getParent())) - while (InsertLoop && NewBase->isLoopInvariant(InsertLoop)) { - BaseInsertPt = InsertLoop->getLoopPreheader()->getTerminator(); - InsertLoop = InsertLoop->getParentLoop(); - } - - Value *Base = Rewriter.expandCodeFor(NewBase, 0, BaseInsertPt); + Instruction *IP, + ScalarEvolution *SE) { + Value *Base = Rewriter.expandCodeFor(NewBase, 0, IP); + // Wrap the base in a SCEVUnknown so that ScalarEvolution doesn't try to + // re-analyze it. const SCEV *NewValSCEV = SE->getUnknown(Base); // Always emit the immediate into the same block as the user. @@ -405,8 +372,8 @@ void BasedUser::RewriteInstructionToUseNewBase(const SCEV *const &NewBase, Instruction *NewBasePt, SCEVExpander &Rewriter, Loop *L, Pass *P, - LoopInfo &LI, - SmallVectorImpl &DeadInsts) { + SmallVectorImpl &DeadInsts, + ScalarEvolution *SE) { if (!isa(Inst)) { // By default, insert code at the user instruction. BasicBlock::iterator InsertPt = Inst; @@ -435,7 +402,7 @@ } Value *NewVal = InsertCodeForBaseAtPosition(NewBase, OperandValToReplace->getType(), - Rewriter, InsertPt, L, LI); + Rewriter, InsertPt, SE); // Replace the use of the operand Value with the new Phi we just created. Inst->replaceUsesOfWith(OperandValToReplace, NewVal); @@ -497,7 +464,7 @@ PHIPred->getTerminator() : OldLoc->getParent()->getTerminator(); Code = InsertCodeForBaseAtPosition(NewBase, PN->getType(), - Rewriter, InsertPt, L, LI); + Rewriter, InsertPt, SE); DEBUG(errs() << " Changing PHI use to "); DEBUG(WriteAsOperand(errs(), Code, /*PrintType=*/false)); @@ -973,17 +940,13 @@ const SCEV *const &Stride, IVExpr &IV, const Type *Ty, const std::vector& UsersToProcess) { - if (StrideNoReuse.count(Stride)) - return SE->getIntegerSCEV(0, Stride->getType()); - if (const SCEVConstant *SC = dyn_cast(Stride)) { int64_t SInt = SC->getValue()->getSExtValue(); for (unsigned NewStride = 0, e = IU->StrideOrder.size(); NewStride != e; ++NewStride) { std::map::iterator SI = IVsByStride.find(IU->StrideOrder[NewStride]); - if (SI == IVsByStride.end() || !isa(SI->first) || - StrideNoReuse.count(SI->first)) + if (SI == IVsByStride.end() || !isa(SI->first)) continue; // The other stride has no uses, don't reuse it. std::map::iterator UI = @@ -1742,8 +1705,8 @@ RewriteExpr = SE->getAddExpr(RewriteExpr, SE->getUnknown(BaseV)); User.RewriteInstructionToUseNewBase(RewriteExpr, NewBasePt, - Rewriter, L, this, *LI, - DeadInsts); + Rewriter, L, this, + DeadInsts, SE); // Mark old value we replaced as possibly dead, so that it is eliminated // if we just replaced the last use of that value. @@ -2707,8 +2670,6 @@ bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager &LPM) { IU = &getAnalysis(); - LI = &getAnalysis(); - DT = &getAnalysis(); SE = &getAnalysis(); Changed = false; @@ -2754,15 +2715,14 @@ // After all sharing is done, see if we can adjust the loop to test against // zero instead of counting up to a maximum. This is usually faster. OptimizeLoopCountIV(L); - } - // We're done analyzing this loop; release all the state we built up for it. - IVsByStride.clear(); - StrideNoReuse.clear(); - - // Clean up after ourselves - if (!DeadInsts.empty()) - DeleteTriviallyDeadInstructions(); + // We're done analyzing this loop; release all the state we built up for it. + IVsByStride.clear(); + + // Clean up after ourselves + if (!DeadInsts.empty()) + DeleteTriviallyDeadInstructions(); + } // At this point, it is worth checking to see if any recurrence PHIs are also // dead, so that we can remove them as well. From stoklund at 2pi.dk Thu Dec 17 18:11:44 2009 From: stoklund at 2pi.dk (Jakob Stoklund Olesen) Date: Fri, 18 Dec 2009 00:11:44 -0000 Subject: [llvm-commits] [llvm] r91642 - /llvm/trunk/test/CodeGen/Thumb/2009-12-17-pre-regalloc-taildup.ll Message-ID: <200912180011.nBI0Bi2q011388@zion.cs.uiuc.edu> Author: stoklund Date: Thu Dec 17 18:11:44 2009 New Revision: 91642 URL: http://llvm.org/viewvc/llvm-project?rev=91642&view=rev Log: Add test case for the phi reuse patch. Added: llvm/trunk/test/CodeGen/Thumb/2009-12-17-pre-regalloc-taildup.ll Added: llvm/trunk/test/CodeGen/Thumb/2009-12-17-pre-regalloc-taildup.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Thumb/2009-12-17-pre-regalloc-taildup.ll?rev=91642&view=auto ============================================================================== --- llvm/trunk/test/CodeGen/Thumb/2009-12-17-pre-regalloc-taildup.ll (added) +++ llvm/trunk/test/CodeGen/Thumb/2009-12-17-pre-regalloc-taildup.ll Thu Dec 17 18:11:44 2009 @@ -0,0 +1,66 @@ +; RUN: llc -O3 -pre-regalloc-taildup < %s | FileCheck %s +target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32" +target triple = "thumbv7-apple-darwin10" + +; This test should not produce any spills, even when tail duplication creates lots of phi nodes. +; CHECK-NOT: push +; CHECK-NOT: pop +; CHECK: bx lr + + at codetable.2928 = internal constant [5 x i8*] [i8* blockaddress(@interpret_threaded, %RETURN), i8* blockaddress(@interpret_threaded, %INCREMENT), i8* blockaddress(@interpret_threaded, %DECREMENT), i8* blockaddress(@interpret_threaded, %DOUBLE), i8* blockaddress(@interpret_threaded, %SWAPWORD)] ; <[5 x i8*]*> [#uses=5] + at llvm.used = appending global [1 x i8*] [i8* bitcast (i32 (i8*)* @interpret_threaded to i8*)], section "llvm.metadata" ; <[1 x i8*]*> [#uses=0] + +define arm_apcscc i32 @interpret_threaded(i8* nocapture %opcodes) nounwind readonly optsize { +entry: + %0 = load i8* %opcodes, align 1 ; [#uses=1] + %1 = zext i8 %0 to i32 ; [#uses=1] + %2 = getelementptr inbounds [5 x i8*]* @codetable.2928, i32 0, i32 %1 ; [#uses=1] + br label %bb + +bb: ; preds = %bb.backedge, %entry + %indvar = phi i32 [ %phitmp, %bb.backedge ], [ 1, %entry ] ; [#uses=2] + %gotovar.22.0.in = phi i8** [ %gotovar.22.0.in.be, %bb.backedge ], [ %2, %entry ] ; [#uses=1] + %result.0 = phi i32 [ %result.0.be, %bb.backedge ], [ 0, %entry ] ; [#uses=6] + %opcodes_addr.0 = getelementptr i8* %opcodes, i32 %indvar ; [#uses=4] + %gotovar.22.0 = load i8** %gotovar.22.0.in, align 4 ; [#uses=1] + indirectbr i8* %gotovar.22.0, [label %RETURN, label %INCREMENT, label %DECREMENT, label %DOUBLE, label %SWAPWORD] + +RETURN: ; preds = %bb + ret i32 %result.0 + +INCREMENT: ; preds = %bb + %3 = add nsw i32 %result.0, 1 ; [#uses=1] + %4 = load i8* %opcodes_addr.0, align 1 ; [#uses=1] + %5 = zext i8 %4 to i32 ; [#uses=1] + %6 = getelementptr inbounds [5 x i8*]* @codetable.2928, i32 0, i32 %5 ; [#uses=1] + br label %bb.backedge + +bb.backedge: ; preds = %SWAPWORD, %DOUBLE, %DECREMENT, %INCREMENT + %gotovar.22.0.in.be = phi i8** [ %20, %SWAPWORD ], [ %14, %DOUBLE ], [ %10, %DECREMENT ], [ %6, %INCREMENT ] ; [#uses=1] + %result.0.be = phi i32 [ %17, %SWAPWORD ], [ %11, %DOUBLE ], [ %7, %DECREMENT ], [ %3, %INCREMENT ] ; [#uses=1] + %phitmp = add i32 %indvar, 1 ; [#uses=1] + br label %bb + +DECREMENT: ; preds = %bb + %7 = add i32 %result.0, -1 ; [#uses=1] + %8 = load i8* %opcodes_addr.0, align 1 ; [#uses=1] + %9 = zext i8 %8 to i32 ; [#uses=1] + %10 = getelementptr inbounds [5 x i8*]* @codetable.2928, i32 0, i32 %9 ; [#uses=1] + br label %bb.backedge + +DOUBLE: ; preds = %bb + %11 = shl i32 %result.0, 1 ; [#uses=1] + %12 = load i8* %opcodes_addr.0, align 1 ; [#uses=1] + %13 = zext i8 %12 to i32 ; [#uses=1] + %14 = getelementptr inbounds [5 x i8*]* @codetable.2928, i32 0, i32 %13 ; [#uses=1] + br label %bb.backedge + +SWAPWORD: ; preds = %bb + %15 = shl i32 %result.0, 16 ; [#uses=1] + %16 = ashr i32 %result.0, 16 ; [#uses=1] + %17 = or i32 %15, %16 ; [#uses=1] + %18 = load i8* %opcodes_addr.0, align 1 ; [#uses=1] + %19 = zext i8 %18 to i32 ; [#uses=1] + %20 = getelementptr inbounds [5 x i8*]* @codetable.2928, i32 0, i32 %19 ; [#uses=1] + br label %bb.backedge +} From rjmccall at apple.com Thu Dec 17 18:27:19 2009 From: rjmccall at apple.com (John McCall) Date: Fri, 18 Dec 2009 00:27:19 -0000 Subject: [llvm-commits] [llvm] r91644 - /llvm/trunk/lib/Target/MSP430/AsmPrinter/MSP430MCInstLower.cpp Message-ID: <200912180027.nBI0RJvx012173@zion.cs.uiuc.edu> Author: rjmccall Date: Thu Dec 17 18:27:18 2009 New Revision: 91644 URL: http://llvm.org/viewvc/llvm-project?rev=91644&view=rev Log: Pass the error string directly to llvm_unreachable instead of the residual (0 && "error"). Rough consensus seems to be that g++ *should* be diagnosing this because the pointer makes it not an ICE in c++03. Everyone agrees that the current standard is silly and null-pointer-ness should not be based on ICE-ness. Excellent fight scene in Act II, denouement weak, two stars. Modified: llvm/trunk/lib/Target/MSP430/AsmPrinter/MSP430MCInstLower.cpp Modified: llvm/trunk/lib/Target/MSP430/AsmPrinter/MSP430MCInstLower.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/MSP430/AsmPrinter/MSP430MCInstLower.cpp?rev=91644&r1=91643&r2=91644&view=diff ============================================================================== --- llvm/trunk/lib/Target/MSP430/AsmPrinter/MSP430MCInstLower.cpp (original) +++ llvm/trunk/lib/Target/MSP430/AsmPrinter/MSP430MCInstLower.cpp Thu Dec 17 18:27:18 2009 @@ -34,7 +34,7 @@ Mang.getNameWithPrefix(Name, GV, false); switch (MO.getTargetFlags()) { - default: llvm_unreachable(0 && "Unknown target flag on GV operand"); + default: llvm_unreachable("Unknown target flag on GV operand"); case 0: break; } From gohman at apple.com Thu Dec 17 18:28:43 2009 From: gohman at apple.com (Dan Gohman) Date: Fri, 18 Dec 2009 00:28:43 -0000 Subject: [llvm-commits] [llvm] r91645 - /llvm/trunk/lib/Transforms/Utils/LoopSimplify.cpp Message-ID: <200912180028.nBI0ShlR012236@zion.cs.uiuc.edu> Author: djg Date: Thu Dec 17 18:28:43 2009 New Revision: 91645 URL: http://llvm.org/viewvc/llvm-project?rev=91645&view=rev Log: Update a comment. Modified: llvm/trunk/lib/Transforms/Utils/LoopSimplify.cpp Modified: llvm/trunk/lib/Transforms/Utils/LoopSimplify.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Utils/LoopSimplify.cpp?rev=91645&r1=91644&r2=91645&view=diff ============================================================================== --- llvm/trunk/lib/Transforms/Utils/LoopSimplify.cpp (original) +++ llvm/trunk/lib/Transforms/Utils/LoopSimplify.cpp Thu Dec 17 18:28:43 2009 @@ -109,7 +109,7 @@ const PassInfo *const llvm::LoopSimplifyID = &X; Pass *llvm::createLoopSimplifyPass() { return new LoopSimplify(); } -/// runOnFunction - Run down all loops in the CFG (recursively, but we could do +/// runOnLoop - Run down all loops in the CFG (recursively, but we could do /// it in any convenient order) inserting preheaders... /// bool LoopSimplify::runOnLoop(Loop *l, LPPassManager &LPM) { From gohman at apple.com Thu Dec 17 18:38:08 2009 From: gohman at apple.com (Dan Gohman) Date: Fri, 18 Dec 2009 00:38:08 -0000 Subject: [llvm-commits] [llvm] r91647 - /llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp Message-ID: <200912180038.nBI0c8GK012612@zion.cs.uiuc.edu> Author: djg Date: Thu Dec 17 18:38:08 2009 New Revision: 91647 URL: http://llvm.org/viewvc/llvm-project?rev=91647&view=rev Log: Don't pass const pointers by reference. Modified: llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp Modified: llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp?rev=91647&r1=91646&r2=91647&view=diff ============================================================================== --- llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp (original) +++ llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp Thu Dec 17 18:38:08 2009 @@ -144,7 +144,7 @@ /// StrengthReduceIVUsersOfStride - Strength reduce all of the users of a /// single stride of IV. All of the users may have different starting /// values, and this may not be the only stride. - void StrengthReduceIVUsersOfStride(const SCEV *const &Stride, + void StrengthReduceIVUsersOfStride(const SCEV *Stride, IVUsersOfOneStride &Uses, Loop *L); void StrengthReduceIVUsers(Loop *L); @@ -157,14 +157,14 @@ bool FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse, const SCEV* &CondStride); bool RequiresTypeConversion(const Type *Ty, const Type *NewTy); - const SCEV *CheckForIVReuse(bool, bool, bool, const SCEV *const&, + const SCEV *CheckForIVReuse(bool, bool, bool, const SCEV *, IVExpr&, const Type*, const std::vector& UsersToProcess); bool ValidScale(bool, int64_t, const std::vector& UsersToProcess); bool ValidOffset(bool, int64_t, int64_t, const std::vector& UsersToProcess); - const SCEV *CollectIVUsers(const SCEV *const &Stride, + const SCEV *CollectIVUsers(const SCEV *Stride, IVUsersOfOneStride &Uses, Loop *L, bool &AllUsesAreAddresses, @@ -324,13 +324,13 @@ // Once we rewrite the code to insert the new IVs we want, update the // operands of Inst to use the new expression 'NewBase', with 'Imm' added // to it. - void RewriteInstructionToUseNewBase(const SCEV *const &NewBase, + void RewriteInstructionToUseNewBase(const SCEV *NewBase, Instruction *InsertPt, SCEVExpander &Rewriter, Loop *L, Pass *P, SmallVectorImpl &DeadInsts, ScalarEvolution *SE); - Value *InsertCodeForBaseAtPosition(const SCEV *const &NewBase, + Value *InsertCodeForBaseAtPosition(const SCEV *NewBase, const Type *Ty, SCEVExpander &Rewriter, Instruction *IP, @@ -345,7 +345,7 @@ errs() << " Inst: " << *Inst; } -Value *BasedUser::InsertCodeForBaseAtPosition(const SCEV *const &NewBase, +Value *BasedUser::InsertCodeForBaseAtPosition(const SCEV *NewBase, const Type *Ty, SCEVExpander &Rewriter, Instruction *IP, @@ -369,7 +369,7 @@ // value of NewBase in the case that it's a diffferent instruction from // the PHI that NewBase is computed from, or null otherwise. // -void BasedUser::RewriteInstructionToUseNewBase(const SCEV *const &NewBase, +void BasedUser::RewriteInstructionToUseNewBase(const SCEV *NewBase, Instruction *NewBasePt, SCEVExpander &Rewriter, Loop *L, Pass *P, SmallVectorImpl &DeadInsts, @@ -485,7 +485,7 @@ /// fitsInAddressMode - Return true if V can be subsumed within an addressing /// mode, and does not need to be put in a register first. -static bool fitsInAddressMode(const SCEV *const &V, const Type *AccessTy, +static bool fitsInAddressMode(const SCEV *V, const Type *AccessTy, const TargetLowering *TLI, bool HasBaseReg) { if (const SCEVConstant *SC = dyn_cast(V)) { int64_t VC = SC->getValue()->getSExtValue(); @@ -937,7 +937,7 @@ const SCEV *LoopStrengthReduce::CheckForIVReuse(bool HasBaseReg, bool AllUsesAreAddresses, bool AllUsesAreOutsideLoop, - const SCEV *const &Stride, + const SCEV *Stride, IVExpr &IV, const Type *Ty, const std::vector& UsersToProcess) { if (const SCEVConstant *SC = dyn_cast(Stride)) { @@ -1050,7 +1050,7 @@ /// isNonConstantNegative - Return true if the specified scev is negated, but /// not a constant. -static bool isNonConstantNegative(const SCEV *const &Expr) { +static bool isNonConstantNegative(const SCEV *Expr) { const SCEVMulExpr *Mul = dyn_cast(Expr); if (!Mul) return false; @@ -1067,7 +1067,7 @@ /// base of the strided accesses, as well as the old information from Uses. We /// progressively move information from the Base field to the Imm field, until /// we eventually have the full access expression to rewrite the use. -const SCEV *LoopStrengthReduce::CollectIVUsers(const SCEV *const &Stride, +const SCEV *LoopStrengthReduce::CollectIVUsers(const SCEV *Stride, IVUsersOfOneStride &Uses, Loop *L, bool &AllUsesAreAddresses, @@ -1444,7 +1444,7 @@ /// stride of IV. All of the users may have different starting values, and this /// may not be the only stride. void -LoopStrengthReduce::StrengthReduceIVUsersOfStride(const SCEV *const &Stride, +LoopStrengthReduce::StrengthReduceIVUsersOfStride(const SCEV *Stride, IVUsersOfOneStride &Uses, Loop *L) { // If all the users are moved to another stride, then there is nothing to do. @@ -1777,7 +1777,7 @@ const ScalarEvolution *SE; explicit StrideCompare(const ScalarEvolution *se) : SE(se) {} - bool operator()(const SCEV *const &LHS, const SCEV *const &RHS) { + bool operator()(const SCEV *LHS, const SCEV *RHS) { const SCEVConstant *LHSC = dyn_cast(LHS); const SCEVConstant *RHSC = dyn_cast(RHS); if (LHSC && RHSC) { From gohman at apple.com Thu Dec 17 19:02:18 2009 From: gohman at apple.com (Dan Gohman) Date: Fri, 18 Dec 2009 01:02:18 -0000 Subject: [llvm-commits] [llvm] r91648 - /llvm/trunk/test/CodeGen/X86/abi-isel.ll Message-ID: <200912180102.nBI12J2v013364@zion.cs.uiuc.edu> Author: djg Date: Thu Dec 17 19:02:18 2009 New Revision: 91648 URL: http://llvm.org/viewvc/llvm-project?rev=91648&view=rev Log: Remove "tail" keywords. These calls are not intended to be tail calls. This protects this test from depending on codegen not performing the tail call optimization by default. Modified: llvm/trunk/test/CodeGen/X86/abi-isel.ll Modified: llvm/trunk/test/CodeGen/X86/abi-isel.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/abi-isel.ll?rev=91648&r1=91647&r2=91648&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/X86/abi-isel.ll (original) +++ llvm/trunk/test/CodeGen/X86/abi-isel.ll Thu Dec 17 19:02:18 2009 @@ -8356,13 +8356,13 @@ define void @lcallee() nounwind { entry: - tail call void @x() nounwind - tail call void @x() nounwind - tail call void @x() nounwind - tail call void @x() nounwind - tail call void @x() nounwind - tail call void @x() nounwind - tail call void @x() nounwind + call void @x() nounwind + call void @x() nounwind + call void @x() nounwind + call void @x() nounwind + call void @x() nounwind + call void @x() nounwind + call void @x() nounwind ret void ; LINUX-64-STATIC: lcallee: ; LINUX-64-STATIC: callq x @@ -8487,13 +8487,13 @@ define internal void @dcallee() nounwind { entry: - tail call void @y() nounwind - tail call void @y() nounwind - tail call void @y() nounwind - tail call void @y() nounwind - tail call void @y() nounwind - tail call void @y() nounwind - tail call void @y() nounwind + call void @y() nounwind + call void @y() nounwind + call void @y() nounwind + call void @y() nounwind + call void @y() nounwind + call void @y() nounwind + call void @y() nounwind ret void ; LINUX-64-STATIC: dcallee: ; LINUX-64-STATIC: callq y @@ -8761,8 +8761,8 @@ define void @caller() nounwind { entry: - tail call void @callee() nounwind - tail call void @callee() nounwind + call void @callee() nounwind + call void @callee() nounwind ret void ; LINUX-64-STATIC: caller: ; LINUX-64-STATIC: callq callee @@ -8835,8 +8835,8 @@ define void @dcaller() nounwind { entry: - tail call void @dcallee() nounwind - tail call void @dcallee() nounwind + call void @dcallee() nounwind + call void @dcallee() nounwind ret void ; LINUX-64-STATIC: dcaller: ; LINUX-64-STATIC: callq dcallee @@ -8909,8 +8909,8 @@ define void @lcaller() nounwind { entry: - tail call void @lcallee() nounwind - tail call void @lcallee() nounwind + call void @lcallee() nounwind + call void @lcallee() nounwind ret void ; LINUX-64-STATIC: lcaller: ; LINUX-64-STATIC: callq lcallee @@ -8983,7 +8983,7 @@ define void @tailcaller() nounwind { entry: - tail call void @callee() nounwind + call void @callee() nounwind ret void ; LINUX-64-STATIC: tailcaller: ; LINUX-64-STATIC: callq callee @@ -9046,7 +9046,7 @@ define void @dtailcaller() nounwind { entry: - tail call void @dcallee() nounwind + call void @dcallee() nounwind ret void ; LINUX-64-STATIC: dtailcaller: ; LINUX-64-STATIC: callq dcallee @@ -9109,7 +9109,7 @@ define void @ltailcaller() nounwind { entry: - tail call void @lcallee() nounwind + call void @lcallee() nounwind ret void ; LINUX-64-STATIC: ltailcaller: ; LINUX-64-STATIC: callq lcallee @@ -9173,9 +9173,9 @@ define void @icaller() nounwind { entry: %0 = load void ()** @ifunc, align 8 - tail call void %0() nounwind + call void %0() nounwind %1 = load void ()** @ifunc, align 8 - tail call void %1() nounwind + call void %1() nounwind ret void ; LINUX-64-STATIC: icaller: ; LINUX-64-STATIC: callq *ifunc @@ -9262,9 +9262,9 @@ define void @dicaller() nounwind { entry: %0 = load void ()** @difunc, align 8 - tail call void %0() nounwind + call void %0() nounwind %1 = load void ()** @difunc, align 8 - tail call void %1() nounwind + call void %1() nounwind ret void ; LINUX-64-STATIC: dicaller: ; LINUX-64-STATIC: callq *difunc @@ -9344,9 +9344,9 @@ define void @licaller() nounwind { entry: %0 = load void ()** @lifunc, align 8 - tail call void %0() nounwind + call void %0() nounwind %1 = load void ()** @lifunc, align 8 - tail call void %1() nounwind + call void %1() nounwind ret void ; LINUX-64-STATIC: licaller: ; LINUX-64-STATIC: callq *lifunc @@ -9425,9 +9425,9 @@ define void @itailcaller() nounwind { entry: %0 = load void ()** @ifunc, align 8 - tail call void %0() nounwind + call void %0() nounwind %1 = load void ()** @ifunc, align 8 - tail call void %1() nounwind + call void %1() nounwind ret void ; LINUX-64-STATIC: itailcaller: ; LINUX-64-STATIC: callq *ifunc @@ -9514,7 +9514,7 @@ define void @ditailcaller() nounwind { entry: %0 = load void ()** @difunc, align 8 - tail call void %0() nounwind + call void %0() nounwind ret void ; LINUX-64-STATIC: ditailcaller: ; LINUX-64-STATIC: callq *difunc @@ -9581,7 +9581,7 @@ define void @litailcaller() nounwind { entry: %0 = load void ()** @lifunc, align 8 - tail call void %0() nounwind + call void %0() nounwind ret void ; LINUX-64-STATIC: litailcaller: ; LINUX-64-STATIC: callq *lifunc From bob.wilson at apple.com Thu Dec 17 19:03:30 2009 From: bob.wilson at apple.com (Bob Wilson) Date: Fri, 18 Dec 2009 01:03:30 -0000 Subject: [llvm-commits] [llvm] r91649 - in /llvm/trunk: lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp lib/Target/ARM/ARMISelLowering.cpp test/CodeGen/ARM/inlineasm3.ll Message-ID: <200912180103.nBI13VKc013422@zion.cs.uiuc.edu> Author: bwilson Date: Thu Dec 17 19:03:29 2009 New Revision: 91649 URL: http://llvm.org/viewvc/llvm-project?rev=91649&view=rev Log: Handle ARM inline asm "w" constraints with 64-bit ("d") registers. The change in SelectionDAGBuilder is needed to allow using bitcasts to convert between f64 (the default type for ARM "d" registers) and 64-bit Neon vector types. Radar 7457110. Modified: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp llvm/trunk/test/CodeGen/ARM/inlineasm3.ll Modified: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp?rev=91649&r1=91648&r2=91649&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp (original) +++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp Thu Dec 17 19:03:29 2009 @@ -443,7 +443,7 @@ // Vector ValueVT. if (NumParts == 1) { if (PartVT != ValueVT) { - if (PartVT.isVector()) { + if (PartVT.getSizeInBits() == ValueVT.getSizeInBits()) { Val = DAG.getNode(ISD::BIT_CONVERT, dl, PartVT, Val); } else { assert(ValueVT.getVectorElementType() == PartVT && Modified: llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp?rev=91649&r1=91648&r2=91649&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp (original) +++ llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp Thu Dec 17 19:03:29 2009 @@ -4265,7 +4265,7 @@ case 'w': if (VT == MVT::f32) return std::make_pair(0U, ARM::SPRRegisterClass); - if (VT == MVT::f64) + if (VT.getSizeInBits() == 64) return std::make_pair(0U, ARM::DPRRegisterClass); if (VT.getSizeInBits() == 128) return std::make_pair(0U, ARM::QPRRegisterClass); @@ -4302,7 +4302,7 @@ ARM::S20,ARM::S21,ARM::S22,ARM::S23, ARM::S24,ARM::S25,ARM::S26,ARM::S27, ARM::S28,ARM::S29,ARM::S30,ARM::S31, 0); - if (VT == MVT::f64) + if (VT.getSizeInBits() == 64) return make_vector(ARM::D0, ARM::D1, ARM::D2, ARM::D3, ARM::D4, ARM::D5, ARM::D6, ARM::D7, ARM::D8, ARM::D9, ARM::D10,ARM::D11, Modified: llvm/trunk/test/CodeGen/ARM/inlineasm3.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/inlineasm3.ll?rev=91649&r1=91648&r2=91649&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/ARM/inlineasm3.ll (original) +++ llvm/trunk/test/CodeGen/ARM/inlineasm3.ll Thu Dec 17 19:03:29 2009 @@ -1,5 +1,6 @@ ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s +; Radar 7449043 %struct.int32x4_t = type { <4 x i32> } define arm_apcscc void @t() nounwind { @@ -11,3 +12,14 @@ call void asm sideeffect "vmov.I64 q15, #0\0Avmov.32 d30[0], $1\0Avmov ${0:q}, q15\0A", "=*w,r,~{d31},~{d30}"(%struct.int32x4_t* %tmp, i32 8192) nounwind ret void } + +; Radar 7457110 +%struct.int32x2_t = type { <4 x i32> } + +define arm_apcscc void @t2() nounwind { +entry: +; CHECK: vmov d30, d0 +; CHECK: vmov.32 r0, d30[0] + %asmtmp2 = tail call i32 asm sideeffect "vmov d30, $1\0Avmov.32 $0, d30[0]\0A", "=r,w,~{d30}"(<2 x i32> undef) nounwind + ret void +} From gohman at apple.com Thu Dec 17 19:05:06 2009 From: gohman at apple.com (Dan Gohman) Date: Fri, 18 Dec 2009 01:05:06 -0000 Subject: [llvm-commits] [llvm] r91650 - /llvm/trunk/test/CodeGen/X86/tailcall1.ll Message-ID: <200912180105.nBI156HO013481@zion.cs.uiuc.edu> Author: djg Date: Thu Dec 17 19:05:06 2009 New Revision: 91650 URL: http://llvm.org/viewvc/llvm-project?rev=91650&view=rev Log: Tidy up this testcase and add test for tailcall optimization with unreachable. Modified: llvm/trunk/test/CodeGen/X86/tailcall1.ll Modified: llvm/trunk/test/CodeGen/X86/tailcall1.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/tailcall1.ll?rev=91650&r1=91649&r2=91650&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/X86/tailcall1.ll (original) +++ llvm/trunk/test/CodeGen/X86/tailcall1.ll Thu Dec 17 19:05:06 2009 @@ -1,12 +1,10 @@ -; RUN: llc < %s -march=x86 -tailcallopt | grep TAILCALL | count 4 -define fastcc i32 @tailcallee(i32 %a1, i32 %a2, i32 %a3, i32 %a4) { -entry: - ret i32 %a3 -} +; RUN: llc < %s -march=x86 -tailcallopt | grep TAILCALL | count 5 + +declare fastcc i32 @tailcallee(i32 %a1, i32 %a2, i32 %a3, i32 %a4) -define fastcc i32 @tailcaller(i32 %in1, i32 %in2) { +define fastcc i32 @tailcaller(i32 %in1, i32 %in2) nounwind { entry: - %tmp11 = tail call fastcc i32 @tailcallee( i32 %in1, i32 %in2, i32 %in1, i32 %in2 ) ; [#uses=1] + %tmp11 = tail call fastcc i32 @tailcallee(i32 %in1, i32 %in2, i32 %in1, i32 %in2) ret i32 %tmp11 } @@ -30,3 +28,10 @@ %p = tail call fastcc i32 @i32_callee() ret i32 undef } + +declare fastcc void @does_not_return() + +define fastcc i32 @noret() nounwind { + tail call fastcc void @does_not_return() + unreachable +} From gohman at apple.com Thu Dec 17 19:14:11 2009 From: gohman at apple.com (Dan Gohman) Date: Fri, 18 Dec 2009 01:14:11 -0000 Subject: [llvm-commits] [llvm] r91651 - /llvm/trunk/lib/Analysis/ScalarEvolution.cpp Message-ID: <200912180114.nBI1EBK4013729@zion.cs.uiuc.edu> Author: djg Date: Thu Dec 17 19:14:11 2009 New Revision: 91651 URL: http://llvm.org/viewvc/llvm-project?rev=91651&view=rev Log: Whitespace cleanups. Modified: llvm/trunk/lib/Analysis/ScalarEvolution.cpp Modified: llvm/trunk/lib/Analysis/ScalarEvolution.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/ScalarEvolution.cpp?rev=91651&r1=91650&r2=91651&view=diff ============================================================================== --- llvm/trunk/lib/Analysis/ScalarEvolution.cpp (original) +++ llvm/trunk/lib/Analysis/ScalarEvolution.cpp Thu Dec 17 19:14:11 2009 @@ -1838,10 +1838,10 @@ // Canonicalize nested AddRecs in by nesting them in order of loop depth. if (const SCEVAddRecExpr *NestedAR = dyn_cast(Operands[0])) { - const Loop* NestedLoop = NestedAR->getLoop(); + const Loop *NestedLoop = NestedAR->getLoop(); if (L->getLoopDepth() < NestedLoop->getLoopDepth()) { SmallVector NestedOperands(NestedAR->op_begin(), - NestedAR->op_end()); + NestedAR->op_end()); Operands[0] = NestedAR->getStart(); // AddRecs require their operands be loop-invariant with respect to their // loops. Don't perform this transformation if it would break this @@ -2441,7 +2441,7 @@ Instruction *I = Worklist.pop_back_val(); if (!Visited.insert(I)) continue; - std::map::iterator It = + std::map::iterator It = Scalars.find(static_cast(I)); if (It != Scalars.end()) { // Short-circuit the def-use traversal if the symbolic name @@ -3241,7 +3241,7 @@ // update the value. The temporary CouldNotCompute value tells SCEV // code elsewhere that it shouldn't attempt to request a new // backedge-taken count, which could result in infinite recursion. - std::pair::iterator, bool> Pair = + std::pair::iterator, bool> Pair = BackedgeTakenCounts.insert(std::make_pair(L, getCouldNotCompute())); if (Pair.second) { BackedgeTakenInfo ItCount = ComputeBackedgeTakenCount(L); @@ -3276,7 +3276,7 @@ Instruction *I = Worklist.pop_back_val(); if (!Visited.insert(I)) continue; - std::map::iterator It = + std::map::iterator It = Scalars.find(static_cast(I)); if (It != Scalars.end()) { // SCEVUnknown for a PHI either means that it has an unrecognized @@ -3316,7 +3316,7 @@ Instruction *I = Worklist.pop_back_val(); if (!Visited.insert(I)) continue; - std::map::iterator It = + std::map::iterator It = Scalars.find(static_cast(I)); if (It != Scalars.end()) { ValuesAtScopes.erase(It->second); @@ -3333,7 +3333,7 @@ /// of the specified loop will execute. ScalarEvolution::BackedgeTakenInfo ScalarEvolution::ComputeBackedgeTakenCount(const Loop *L) { - SmallVector ExitingBlocks; + SmallVector ExitingBlocks; L->getExitingBlocks(ExitingBlocks); // Examine all exits and pick the most conservative values. @@ -3839,7 +3839,7 @@ /// involving constants, fold it. Constant * ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN, - const APInt& BEs, + const APInt &BEs, const Loop *L) { std::map::iterator I = ConstantEvolutionLoopExitValue.find(PN); @@ -4008,7 +4008,7 @@ if (!isSCEVable(Op->getType())) return V; - const SCEV* OpV = getSCEVAtScope(Op, L); + const SCEV *OpV = getSCEVAtScope(Op, L); if (const SCEVConstant *SC = dyn_cast(OpV)) { Constant *C = SC->getValue(); if (C->getType() != Op->getType()) @@ -5183,7 +5183,7 @@ OS << "Loop " << L->getHeader()->getName() << ": "; - SmallVector ExitBlocks; + SmallVector ExitBlocks; L->getExitBlocks(ExitBlocks); if (ExitBlocks.size() != 1) OS << " "; @@ -5206,14 +5206,14 @@ OS << "\n"; } -void ScalarEvolution::print(raw_ostream &OS, const Module* ) const { +void ScalarEvolution::print(raw_ostream &OS, const Module *) const { // ScalarEvolution's implementaiton of the print method is to print // out SCEV values of all instructions that are interesting. Doing // this potentially causes it to create new SCEV objects though, // which technically conflicts with the const qualifier. This isn't // observable from outside the class though, so casting away the // const isn't dangerous. - ScalarEvolution &SE = *const_cast(this); + ScalarEvolution &SE = *const_cast(this); OS << "Classifying expressions for: " << F->getName() << "\n"; for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) From gohman at apple.com Thu Dec 17 19:20:44 2009 From: gohman at apple.com (Dan Gohman) Date: Fri, 18 Dec 2009 01:20:44 -0000 Subject: [llvm-commits] [llvm] r91653 - /llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp Message-ID: <200912180120.nBI1KiN2013920@zion.cs.uiuc.edu> Author: djg Date: Thu Dec 17 19:20:44 2009 New Revision: 91653 URL: http://llvm.org/viewvc/llvm-project?rev=91653&view=rev Log: Minor code simplification. Modified: llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp Modified: llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp?rev=91653&r1=91652&r2=91653&view=diff ============================================================================== --- llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp (original) +++ llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp Thu Dec 17 19:20:44 2009 @@ -212,8 +212,6 @@ /// specified set are trivially dead, delete them and see if this makes any of /// their operands subsequently dead. void LoopStrengthReduce::DeleteTriviallyDeadInstructions() { - if (DeadInsts.empty()) return; - while (!DeadInsts.empty()) { Instruction *I = dyn_cast_or_null(DeadInsts.pop_back_val()); @@ -2720,8 +2718,7 @@ IVsByStride.clear(); // Clean up after ourselves - if (!DeadInsts.empty()) - DeleteTriviallyDeadInstructions(); + DeleteTriviallyDeadInstructions(); } // At this point, it is worth checking to see if any recurrence PHIs are also From gohman at apple.com Thu Dec 17 19:24:10 2009 From: gohman at apple.com (Dan Gohman) Date: Fri, 18 Dec 2009 01:24:10 -0000 Subject: [llvm-commits] [llvm] r91654 - in /llvm/trunk: include/llvm/Analysis/LoopInfo.h lib/Analysis/IVUsers.cpp lib/Analysis/LoopInfo.cpp lib/Analysis/ScalarEvolution.cpp lib/CodeGen/MachineLICM.cpp lib/Transforms/Scalar/IndVarSimplify.cpp lib/Transforms/Scalar/LICM.cpp lib/Transforms/Scalar/LoopIndexSplit.cpp lib/Transforms/Scalar/LoopStrengthReduce.cpp lib/Transforms/Scalar/LoopUnswitch.cpp lib/Transforms/Utils/BreakCriticalEdges.cpp lib/Transforms/Utils/LoopUnroll.cpp Message-ID: <200912180124.nBI1OA74014035@zion.cs.uiuc.edu> Author: djg Date: Thu Dec 17 19:24:09 2009 New Revision: 91654 URL: http://llvm.org/viewvc/llvm-project?rev=91654&view=rev Log: Add Loop contains utility methods for testing whether a loop contains another loop, or an instruction. The loop form is substantially more efficient on large loops than the typical code it replaces. Modified: llvm/trunk/include/llvm/Analysis/LoopInfo.h llvm/trunk/lib/Analysis/IVUsers.cpp llvm/trunk/lib/Analysis/LoopInfo.cpp llvm/trunk/lib/Analysis/ScalarEvolution.cpp llvm/trunk/lib/CodeGen/MachineLICM.cpp llvm/trunk/lib/Transforms/Scalar/IndVarSimplify.cpp llvm/trunk/lib/Transforms/Scalar/LICM.cpp llvm/trunk/lib/Transforms/Scalar/LoopIndexSplit.cpp llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp llvm/trunk/lib/Transforms/Scalar/LoopUnswitch.cpp llvm/trunk/lib/Transforms/Utils/BreakCriticalEdges.cpp llvm/trunk/lib/Transforms/Utils/LoopUnroll.cpp Modified: llvm/trunk/include/llvm/Analysis/LoopInfo.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Analysis/LoopInfo.h?rev=91654&r1=91653&r2=91654&view=diff ============================================================================== --- llvm/trunk/include/llvm/Analysis/LoopInfo.h (original) +++ llvm/trunk/include/llvm/Analysis/LoopInfo.h Thu Dec 17 19:24:09 2009 @@ -93,12 +93,28 @@ BlockT *getHeader() const { return Blocks.front(); } LoopT *getParentLoop() const { return ParentLoop; } - /// contains - Return true if the specified basic block is in this loop + /// contains - Return true if the specified loop is contained within in + /// this loop. + /// + bool contains(const LoopT *L) const { + if (L == this) return true; + if (L == 0) return false; + return contains(L->getParentLoop()); + } + + /// contains - Return true if the specified basic block is in this loop. /// bool contains(const BlockT *BB) const { return std::find(block_begin(), block_end(), BB) != block_end(); } + /// contains - Return true if the specified instruction is in this loop. + /// + template + bool contains(const InstT *Inst) const { + return contains(Inst->getParent()); + } + /// iterator/begin/end - Return the loops contained entirely within this loop. /// const std::vector &getSubLoops() const { return SubLoops; } Modified: llvm/trunk/lib/Analysis/IVUsers.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/IVUsers.cpp?rev=91654&r1=91653&r2=91654&view=diff ============================================================================== --- llvm/trunk/lib/Analysis/IVUsers.cpp (original) +++ llvm/trunk/lib/Analysis/IVUsers.cpp Thu Dec 17 19:24:09 2009 @@ -53,7 +53,7 @@ if (newLoop == L) return false; // if newLoop is an outer loop of L, this is OK. - if (newLoop->contains(L->getHeader())) + if (newLoop->contains(L)) return false; } return true; @@ -148,7 +148,7 @@ Loop *L, LoopInfo *LI, DominatorTree *DT, Pass *P) { // If the user is in the loop, use the preinc value. - if (L->contains(User->getParent())) return false; + if (L->contains(User)) return false; BasicBlock *LatchBlock = L->getLoopLatch(); if (!LatchBlock) @@ -209,7 +209,7 @@ return false; // Non-reducible symbolic expression, bail out. // Keep things simple. Don't touch loop-variant strides. - if (!Stride->isLoopInvariant(L) && L->contains(I->getParent())) + if (!Stride->isLoopInvariant(L) && L->contains(I)) return false; SmallPtrSet UniqueUsers; @@ -324,7 +324,7 @@ if (U.isUseOfPostIncrementedValue()) RetVal = SE->getAddExpr(RetVal, U.getParent()->Stride); // Evaluate the expression out of the loop, if possible. - if (!L->contains(U.getUser()->getParent())) { + if (!L->contains(U.getUser())) { const SCEV *ExitVal = SE->getSCEVAtScope(RetVal, L->getParentLoop()); if (ExitVal->isLoopInvariant(L)) RetVal = ExitVal; Modified: llvm/trunk/lib/Analysis/LoopInfo.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/LoopInfo.cpp?rev=91654&r1=91653&r2=91654&view=diff ============================================================================== --- llvm/trunk/lib/Analysis/LoopInfo.cpp (original) +++ llvm/trunk/lib/Analysis/LoopInfo.cpp Thu Dec 17 19:24:09 2009 @@ -56,7 +56,7 @@ /// loop-invariant. /// bool Loop::isLoopInvariant(Instruction *I) const { - return !contains(I->getParent()); + return !contains(I); } /// makeLoopInvariant - If the given value is an instruciton inside of the Modified: llvm/trunk/lib/Analysis/ScalarEvolution.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/ScalarEvolution.cpp?rev=91654&r1=91653&r2=91654&view=diff ============================================================================== --- llvm/trunk/lib/Analysis/ScalarEvolution.cpp (original) +++ llvm/trunk/lib/Analysis/ScalarEvolution.cpp Thu Dec 17 19:24:09 2009 @@ -298,7 +298,7 @@ return false; // This recurrence is variant w.r.t. QueryLoop if QueryLoop contains L. - if (QueryLoop->contains(L->getHeader())) + if (QueryLoop->contains(L)) return false; // This recurrence is variant w.r.t. QueryLoop if any of its operands @@ -333,7 +333,7 @@ // Instructions are never considered invariant in the function body // (null loop) because they are defined within the "loop". if (Instruction *I = dyn_cast(V)) - return L && !L->contains(I->getParent()); + return L && !L->contains(I); return true; } @@ -3774,7 +3774,7 @@ // If this is not an instruction, or if this is an instruction outside of the // loop, it can't be derived from a loop PHI. Instruction *I = dyn_cast(V); - if (I == 0 || !L->contains(I->getParent())) return 0; + if (I == 0 || !L->contains(I)) return 0; if (PHINode *PN = dyn_cast(I)) { if (L->getHeader() == I->getParent()) @@ -4091,7 +4091,7 @@ // If this is a loop recurrence for a loop that does not contain L, then we // are dealing with the final value computed by the loop. if (const SCEVAddRecExpr *AddRec = dyn_cast(V)) { - if (!L || !AddRec->getLoop()->contains(L->getHeader())) { + if (!L || !AddRec->getLoop()->contains(L)) { // To evaluate this recurrence, we need to know how many times the AddRec // loop iterates. Compute this now. const SCEV *BackedgeTakenCount = getBackedgeTakenCount(AddRec->getLoop()); Modified: llvm/trunk/lib/CodeGen/MachineLICM.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/MachineLICM.cpp?rev=91654&r1=91653&r2=91654&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/MachineLICM.cpp (original) +++ llvm/trunk/lib/CodeGen/MachineLICM.cpp Thu Dec 17 19:24:09 2009 @@ -322,7 +322,7 @@ // If the loop contains the definition of an operand, then the instruction // isn't loop invariant. - if (CurLoop->contains(RegInfo->getVRegDef(Reg)->getParent())) + if (CurLoop->contains(RegInfo->getVRegDef(Reg))) return false; } Modified: llvm/trunk/lib/Transforms/Scalar/IndVarSimplify.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/IndVarSimplify.cpp?rev=91654&r1=91653&r2=91654&view=diff ============================================================================== --- llvm/trunk/lib/Transforms/Scalar/IndVarSimplify.cpp (original) +++ llvm/trunk/lib/Transforms/Scalar/IndVarSimplify.cpp Thu Dec 17 19:24:09 2009 @@ -258,7 +258,7 @@ // Check that InVal is defined in the loop. Instruction *Inst = cast(InVal); - if (!L->contains(Inst->getParent())) + if (!L->contains(Inst)) continue; // Okay, this instruction has a user outside of the current loop Modified: llvm/trunk/lib/Transforms/Scalar/LICM.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/LICM.cpp?rev=91654&r1=91653&r2=91654&view=diff ============================================================================== --- llvm/trunk/lib/Transforms/Scalar/LICM.cpp (original) +++ llvm/trunk/lib/Transforms/Scalar/LICM.cpp Thu Dec 17 19:24:09 2009 @@ -433,7 +433,7 @@ if (PN->getIncomingValue(i) == &I) if (CurLoop->contains(PN->getIncomingBlock(i))) return false; - } else if (CurLoop->contains(User->getParent())) { + } else if (CurLoop->contains(User)) { return false; } } @@ -831,7 +831,7 @@ UI != UE; ++UI) { // Ignore instructions not in this loop. Instruction *Use = dyn_cast(*UI); - if (!Use || !CurLoop->contains(Use->getParent())) + if (!Use || !CurLoop->contains(Use)) continue; if (!isa(Use) && !isa(Use)) { Modified: llvm/trunk/lib/Transforms/Scalar/LoopIndexSplit.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/LoopIndexSplit.cpp?rev=91654&r1=91653&r2=91654&view=diff ============================================================================== --- llvm/trunk/lib/Transforms/Scalar/LoopIndexSplit.cpp (original) +++ llvm/trunk/lib/Transforms/Scalar/LoopIndexSplit.cpp Thu Dec 17 19:24:09 2009 @@ -288,7 +288,7 @@ // isUsedOutsideLoop - Returns true iff V is used outside the loop L. static bool isUsedOutsideLoop(Value *V, Loop *L) { for(Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; ++UI) - if (!L->contains(cast(*UI)->getParent())) + if (!L->contains(cast(*UI))) return true; return false; } @@ -842,7 +842,7 @@ for (Value::use_iterator UI = PHV->use_begin(), E = PHV->use_end(); UI != E; ++UI) if (PHINode *U = dyn_cast(*UI)) - if (LP->contains(U->getParent())) { + if (LP->contains(U)) { NewV = U; break; } Modified: llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp?rev=91654&r1=91653&r2=91654&view=diff ============================================================================== --- llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp (original) +++ llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp Thu Dec 17 19:24:09 2009 @@ -388,7 +388,7 @@ // If this is a use outside the loop (which means after, since it is based // on a loop indvar) we use the post-incremented value, so that we don't // artificially make the preinc value live out the bottom of the loop. - if (!isUseOfPostIncrementedValue && L->contains(Inst->getParent())) { + if (!isUseOfPostIncrementedValue && L->contains(Inst)) { if (NewBasePt && isa(OperandValToReplace)) { InsertPt = NewBasePt; ++InsertPt; @@ -429,7 +429,7 @@ // that case(?). Instruction *OldLoc = dyn_cast(OperandValToReplace); BasicBlock *PHIPred = PN->getIncomingBlock(i); - if (L->contains(OldLoc->getParent())) { + if (L->contains(OldLoc)) { // If this is a critical edge, split the edge so that we do not insert // the code on all predecessor/successor paths. We do this unless this // is the canonical backedge for this loop, as this can make some @@ -446,7 +446,7 @@ // is outside of the loop, and PredTI is in the loop, we want to // move the block to be immediately before the PHI block, not // immediately after PredTI. - if (L->contains(PHIPred) && !L->contains(PN->getParent())) + if (L->contains(PHIPred) && !L->contains(PN)) NewBB->moveBefore(PN->getParent()); // Splitting the edge can reduce the number of PHI entries we have. @@ -458,7 +458,7 @@ Value *&Code = InsertedCode[PHIPred]; if (!Code) { // Insert the code into the end of the predecessor block. - Instruction *InsertPt = (L->contains(OldLoc->getParent())) ? + Instruction *InsertPt = (L->contains(OldLoc)) ? PHIPred->getTerminator() : OldLoc->getParent()->getTerminator(); Code = InsertCodeForBaseAtPosition(NewBase, PN->getType(), @@ -697,7 +697,7 @@ // it is clearly shared across all the IV's. If the use is outside the loop // (which means after it) we don't want to factor anything *into* the loop, // so just use 0 as the base. - if (L->contains(Uses[0].Inst->getParent())) + if (L->contains(Uses[0].Inst)) std::swap(Result, Uses[0].Base); return Result; } @@ -722,7 +722,7 @@ // after the loop to affect base computation of values *inside* the loop, // because we can always add their offsets to the result IV after the loop // is done, ensuring we get good code inside the loop. - if (!L->contains(Uses[i].Inst->getParent())) + if (!L->contains(Uses[i].Inst)) continue; NumUsesInsideLoop++; @@ -778,7 +778,7 @@ // and a Result in the same instruction (for example because it would // require too many registers). Check this. for (unsigned i=0; icontains(Uses[i].Inst->getParent())) + if (!L->contains(Uses[i].Inst)) continue; // We know this is an addressing mode use; if there are any uses that // are not, FreeResult would be Zero. @@ -814,7 +814,7 @@ // the final IV value coming into those uses does. Instead of trying to // remove the pieces of the common base, which might not be there, // subtract off the base to compensate for this. - if (!L->contains(Uses[i].Inst->getParent())) { + if (!L->contains(Uses[i].Inst)) { Uses[i].Base = SE->getMinusSCEV(Uses[i].Base, Result); continue; } @@ -1109,7 +1109,7 @@ // If the user is not in the current loop, this means it is using the exit // value of the IV. Do not put anything in the base, make sure it's all in // the immediate field to allow as much factoring as possible. - if (!L->contains(UsersToProcess[i].Inst->getParent())) { + if (!L->contains(UsersToProcess[i].Inst)) { UsersToProcess[i].Imm = SE->getAddExpr(UsersToProcess[i].Imm, UsersToProcess[i].Base); UsersToProcess[i].Base = @@ -1353,7 +1353,7 @@ const Loop *L) { if (UsersToProcess.size() == 1 && UsersToProcess[0].isUseOfPostIncrementedValue && - L->contains(UsersToProcess[0].Inst->getParent())) + L->contains(UsersToProcess[0].Inst)) return UsersToProcess[0].Inst; return L->getLoopLatch()->getTerminator(); } @@ -1626,7 +1626,7 @@ // loop to ensure it is dominated by the increment. In case it's the // only use of the iv, the increment instruction is already before the // use. - if (L->contains(User.Inst->getParent()) && User.Inst != IVIncInsertPt) + if (L->contains(User.Inst) && User.Inst != IVIncInsertPt) User.Inst->moveBefore(IVIncInsertPt); } @@ -1688,7 +1688,7 @@ // common base, and are adding it back here. Use the same expression // as before, rather than CommonBaseV, so DAGCombiner will zap it. if (!CommonExprs->isZero()) { - if (L->contains(User.Inst->getParent())) + if (L->contains(User.Inst)) RewriteExpr = SE->getAddExpr(RewriteExpr, SE->getUnknown(CommonBaseV)); else @@ -2363,7 +2363,7 @@ static bool ShouldCountToZero(ICmpInst *Cond, IVStrideUse* &CondUse, ScalarEvolution *SE, Loop *L, const TargetLowering *TLI = 0) { - if (!L->contains(Cond->getParent())) + if (!L->contains(Cond)) return false; if (!isa(CondUse->getOffset())) Modified: llvm/trunk/lib/Transforms/Scalar/LoopUnswitch.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/LoopUnswitch.cpp?rev=91654&r1=91653&r2=91654&view=diff ============================================================================== --- llvm/trunk/lib/Transforms/Scalar/LoopUnswitch.cpp (original) +++ llvm/trunk/lib/Transforms/Scalar/LoopUnswitch.cpp Thu Dec 17 19:24:09 2009 @@ -877,7 +877,7 @@ for (unsigned i = 0, e = Users.size(); i != e; ++i) if (Instruction *U = cast(Users[i])) { - if (!L->contains(U->getParent())) + if (!L->contains(U)) continue; U->replaceUsesOfWith(LIC, Replacement); Worklist.push_back(U); @@ -888,7 +888,7 @@ // can. This case occurs when we unswitch switch statements. for (unsigned i = 0, e = Users.size(); i != e; ++i) if (Instruction *U = cast(Users[i])) { - if (!L->contains(U->getParent())) + if (!L->contains(U)) continue; Worklist.push_back(U); Modified: llvm/trunk/lib/Transforms/Utils/BreakCriticalEdges.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Utils/BreakCriticalEdges.cpp?rev=91654&r1=91653&r2=91654&view=diff ============================================================================== --- llvm/trunk/lib/Transforms/Utils/BreakCriticalEdges.cpp (original) +++ llvm/trunk/lib/Transforms/Utils/BreakCriticalEdges.cpp Thu Dec 17 19:24:09 2009 @@ -309,10 +309,10 @@ if (TIL == DestLoop) { // Both in the same loop, the NewBB joins loop. DestLoop->addBasicBlockToLoop(NewBB, LI->getBase()); - } else if (TIL->contains(DestLoop->getHeader())) { + } else if (TIL->contains(DestLoop)) { // Edge from an outer loop to an inner loop. Add to the outer loop. TIL->addBasicBlockToLoop(NewBB, LI->getBase()); - } else if (DestLoop->contains(TIL->getHeader())) { + } else if (DestLoop->contains(TIL)) { // Edge from an inner loop to an outer loop. Add to the outer loop. DestLoop->addBasicBlockToLoop(NewBB, LI->getBase()); } else { Modified: llvm/trunk/lib/Transforms/Utils/LoopUnroll.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Utils/LoopUnroll.cpp?rev=91654&r1=91653&r2=91654&view=diff ============================================================================== --- llvm/trunk/lib/Transforms/Utils/LoopUnroll.cpp (original) +++ llvm/trunk/lib/Transforms/Utils/LoopUnroll.cpp Thu Dec 17 19:24:09 2009 @@ -194,7 +194,7 @@ OrigPHINode.push_back(PN); if (Instruction *I = dyn_cast(PN->getIncomingValueForBlock(LatchBlock))) - if (L->contains(I->getParent())) + if (L->contains(I)) LastValueMap[I] = I; } @@ -222,7 +222,7 @@ PHINode *NewPHI = cast(ValueMap[OrigPHINode[i]]); Value *InVal = NewPHI->getIncomingValueForBlock(LatchBlock); if (Instruction *InValI = dyn_cast(InVal)) - if (It > 1 && L->contains(InValI->getParent())) + if (It > 1 && L->contains(InValI)) InVal = LastValueMap[InValI]; ValueMap[OrigPHINode[i]] = InVal; New->getInstList().erase(NewPHI); @@ -244,7 +244,7 @@ UI != UE;) { Instruction *UseInst = cast(*UI); ++UI; - if (isa(UseInst) && !L->contains(UseInst->getParent())) { + if (isa(UseInst) && !L->contains(UseInst)) { PHINode *phi = cast(UseInst); Value *Incoming = phi->getIncomingValueForBlock(*BB); phi->addIncoming(Incoming, New); @@ -295,7 +295,7 @@ // If this value was defined in the loop, take the value defined by the // last iteration of the loop. if (Instruction *InValI = dyn_cast(InVal)) { - if (L->contains(InValI->getParent())) + if (L->contains(InValI)) InVal = LastValueMap[InVal]; } PN->addIncoming(InVal, LastIterationBB); From evan.cheng at apple.com Thu Dec 17 19:59:21 2009 From: evan.cheng at apple.com (Evan Cheng) Date: Fri, 18 Dec 2009 01:59:21 -0000 Subject: [llvm-commits] [llvm] r91655 - /llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp Message-ID: <200912180159.nBI1xLna015298@zion.cs.uiuc.edu> Author: evancheng Date: Thu Dec 17 19:59:21 2009 New Revision: 91655 URL: http://llvm.org/viewvc/llvm-project?rev=91655&view=rev Log: Re-apply 91623 now that I actually know what I was trying to do. Modified: llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp Modified: llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp?rev=91655&r1=91654&r2=91655&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp (original) +++ llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp Thu Dec 17 19:59:21 2009 @@ -50,9 +50,6 @@ #include "llvm/ADT/Statistic.h" using namespace llvm; -#include "llvm/Support/CommandLine.h" -static cl::opt AvoidDupAddrCompute("x86-avoid-dup-address", cl::Hidden); - STATISTIC(NumLoadMoved, "Number of loads moved below TokenFactor"); //===----------------------------------------------------------------------===// @@ -1275,28 +1272,7 @@ SDValue &Scale, SDValue &Index, SDValue &Disp, SDValue &Segment) { X86ISelAddressMode AM; - bool Done = false; - if (AvoidDupAddrCompute && !N.hasOneUse()) { - unsigned Opcode = N.getOpcode(); - if (Opcode != ISD::Constant && Opcode != ISD::FrameIndex && - Opcode != X86ISD::Wrapper && Opcode != X86ISD::WrapperRIP) { - // If we are able to fold N into addressing mode, then we'll allow it even - // if N has multiple uses. In general, addressing computation is used as - // addresses by all of its uses. But watch out for CopyToReg uses, that - // means the address computation is liveout. It will be computed by a LEA - // so we want to avoid computing the address twice. - for (SDNode::use_iterator UI = N.getNode()->use_begin(), - UE = N.getNode()->use_end(); UI != UE; ++UI) { - if (UI->getOpcode() == ISD::CopyToReg) { - MatchAddressBase(N, AM); - Done = true; - break; - } - } - } - } - - if (!Done && MatchAddress(N, AM)) + if (MatchAddress(N, AM)) return false; EVT VT = N.getValueType(); From gohman at apple.com Thu Dec 17 20:09:29 2009 From: gohman at apple.com (Dan Gohman) Date: Fri, 18 Dec 2009 02:09:29 -0000 Subject: [llvm-commits] [llvm] r91656 - in /llvm/trunk: include/llvm/Analysis/ScalarEvolution.h lib/Analysis/ScalarEvolution.cpp Message-ID: <200912180209.nBI29Tu9015710@zion.cs.uiuc.edu> Author: djg Date: Thu Dec 17 20:09:29 2009 New Revision: 91656 URL: http://llvm.org/viewvc/llvm-project?rev=91656&view=rev Log: Preserve NSW information in more places. Modified: llvm/trunk/include/llvm/Analysis/ScalarEvolution.h llvm/trunk/lib/Analysis/ScalarEvolution.cpp Modified: llvm/trunk/include/llvm/Analysis/ScalarEvolution.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Analysis/ScalarEvolution.h?rev=91656&r1=91655&r2=91656&view=diff ============================================================================== --- llvm/trunk/include/llvm/Analysis/ScalarEvolution.h (original) +++ llvm/trunk/include/llvm/Analysis/ScalarEvolution.h Thu Dec 17 20:09:29 2009 @@ -243,7 +243,7 @@ /// createNodeForGEP - Provide the special handling we need to analyze GEP /// SCEVs. - const SCEV *createNodeForGEP(Operator *GEP); + const SCEV *createNodeForGEP(GEPOperator *GEP); /// computeSCEVAtScope - Implementation code for getSCEVAtScope; called /// at most once for each SCEV+Loop pair. Modified: llvm/trunk/lib/Analysis/ScalarEvolution.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/ScalarEvolution.cpp?rev=91656&r1=91655&r2=91656&view=diff ============================================================================== --- llvm/trunk/lib/Analysis/ScalarEvolution.cpp (original) +++ llvm/trunk/lib/Analysis/ScalarEvolution.cpp Thu Dec 17 20:09:29 2009 @@ -1460,7 +1460,9 @@ AddRec->op_end()); AddRecOps[0] = getAddExpr(LIOps); - const SCEV *NewRec = getAddRecExpr(AddRecOps, AddRec->getLoop()); + const SCEV *NewRec = getAddRecExpr(AddRecOps, AddRec->getLoop(), + AddRec->hasNoUnsignedWrap() && HasNUW, + AddRec->hasNoSignedWrap() && HasNSW); // If all of the other operands were loop invariant, we are done. if (Ops.size() == 1) return NewRec; @@ -1636,7 +1638,9 @@ } } - const SCEV *NewRec = getAddRecExpr(NewOps, AddRec->getLoop()); + const SCEV *NewRec = getAddRecExpr(NewOps, AddRec->getLoop(), + AddRec->hasNoUnsignedWrap() && HasNUW, + AddRec->hasNoSignedWrap() && HasNSW); // If all of the other operands were loop invariant, we are done. if (Ops.size() == 1) return NewRec; @@ -2592,8 +2596,9 @@ /// createNodeForGEP - Expand GEP instructions into add and multiply /// operations. This allows them to be analyzed by regular SCEV code. /// -const SCEV *ScalarEvolution::createNodeForGEP(Operator *GEP) { +const SCEV *ScalarEvolution::createNodeForGEP(GEPOperator *GEP) { + bool InBounds = GEP->isInBounds(); const Type *IntPtrTy = getEffectiveSCEVType(GEP->getType()); Value *Base = GEP->getOperand(0); // Don't attempt to analyze GEPs over unsized objects. @@ -2610,18 +2615,24 @@ // For a struct, add the member offset. unsigned FieldNo = cast(Index)->getZExtValue(); TotalOffset = getAddExpr(TotalOffset, - getFieldOffsetExpr(STy, FieldNo)); + getFieldOffsetExpr(STy, FieldNo), + /*HasNUW=*/false, /*HasNSW=*/InBounds); } else { // For an array, add the element offset, explicitly scaled. const SCEV *LocalOffset = getSCEV(Index); if (!isa(LocalOffset->getType())) // Getelementptr indicies are signed. LocalOffset = getTruncateOrSignExtend(LocalOffset, IntPtrTy); - LocalOffset = getMulExpr(LocalOffset, getAllocSizeExpr(*GTI)); - TotalOffset = getAddExpr(TotalOffset, LocalOffset); + // Lower "inbounds" GEPs to NSW arithmetic. + bool HasNSW = GEP->isInBounds(); + LocalOffset = getMulExpr(LocalOffset, getAllocSizeExpr(*GTI), + /*HasNUW=*/false, /*HasNSW=*/InBounds); + TotalOffset = getAddExpr(TotalOffset, LocalOffset, + /*HasNUW=*/false, /*HasNSW=*/InBounds); } } - return getAddExpr(getSCEV(Base), TotalOffset); + return getAddExpr(getSCEV(Base), TotalOffset, + /*HasNUW=*/false, /*HasNSW=*/InBounds); } /// GetMinTrailingZeros - Determine the minimum number of zero bits that S is @@ -3130,7 +3141,7 @@ // expressions we handle are GEPs and address literals. case Instruction::GetElementPtr: - return createNodeForGEP(U); + return createNodeForGEP(cast(U)); case Instruction::PHI: return createNodeForPHI(cast(U)); From echristo at apple.com Thu Dec 17 20:12:54 2009 From: echristo at apple.com (Eric Christopher) Date: Fri, 18 Dec 2009 02:12:54 -0000 Subject: [llvm-commits] [llvm] r91657 - /llvm/trunk/lib/Target/MSIL/MSILWriter.cpp Message-ID: <200912180212.nBI2Csnb015884@zion.cs.uiuc.edu> Author: echristo Date: Thu Dec 17 20:12:53 2009 New Revision: 91657 URL: http://llvm.org/viewvc/llvm-project?rev=91657&view=rev Log: Fix typo. Modified: llvm/trunk/lib/Target/MSIL/MSILWriter.cpp Modified: llvm/trunk/lib/Target/MSIL/MSILWriter.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/MSIL/MSILWriter.cpp?rev=91657&r1=91656&r2=91657&view=diff ============================================================================== --- llvm/trunk/lib/Target/MSIL/MSILWriter.cpp (original) +++ llvm/trunk/lib/Target/MSIL/MSILWriter.cpp Thu Dec 17 20:12:53 2009 @@ -1696,7 +1696,7 @@ if (FileType != TargetMachine::AssemblyFile) return true; MSILWriter* Writer = new MSILWriter(o); PM.add(createGCLoweringPass()); - // FIXME: Handle switch trougth native IL instruction "switch" + // FIXME: Handle switch through native IL instruction "switch" PM.add(createLowerSwitchPass()); PM.add(createCFGSimplificationPass()); PM.add(new MSILModule(Writer->UsedTypes,Writer->TD)); From gohman at apple.com Thu Dec 17 20:14:38 2009 From: gohman at apple.com (Dan Gohman) Date: Fri, 18 Dec 2009 02:14:38 -0000 Subject: [llvm-commits] [llvm] r91659 - /llvm/trunk/lib/Analysis/ScalarEvolution.cpp Message-ID: <200912180214.nBI2Ecqi015957@zion.cs.uiuc.edu> Author: djg Date: Thu Dec 17 20:14:37 2009 New Revision: 91659 URL: http://llvm.org/viewvc/llvm-project?rev=91659&view=rev Log: Delete an unused variable. Modified: llvm/trunk/lib/Analysis/ScalarEvolution.cpp Modified: llvm/trunk/lib/Analysis/ScalarEvolution.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/ScalarEvolution.cpp?rev=91659&r1=91658&r2=91659&view=diff ============================================================================== --- llvm/trunk/lib/Analysis/ScalarEvolution.cpp (original) +++ llvm/trunk/lib/Analysis/ScalarEvolution.cpp Thu Dec 17 20:14:37 2009 @@ -2624,7 +2624,6 @@ // Getelementptr indicies are signed. LocalOffset = getTruncateOrSignExtend(LocalOffset, IntPtrTy); // Lower "inbounds" GEPs to NSW arithmetic. - bool HasNSW = GEP->isInBounds(); LocalOffset = getMulExpr(LocalOffset, getAllocSizeExpr(*GTI), /*HasNUW=*/false, /*HasNSW=*/InBounds); TotalOffset = getAddExpr(TotalOffset, LocalOffset, From gohman at apple.com Thu Dec 17 20:36:24 2009 From: gohman at apple.com (Dan Gohman) Date: Fri, 18 Dec 2009 02:36:24 -0000 Subject: [llvm-commits] [llvm-gcc-4.2] r91660 - /llvm-gcc-4.2/trunk/gcc/llvm-convert.cpp Message-ID: <200912180236.nBI2aO2Q016634@zion.cs.uiuc.edu> Author: djg Date: Thu Dec 17 20:36:24 2009 New Revision: 91660 URL: http://llvm.org/viewvc/llvm-project?rev=91660&view=rev Log: Set the NSW flag on signed-integer subtract operators. Modified: llvm-gcc-4.2/trunk/gcc/llvm-convert.cpp Modified: llvm-gcc-4.2/trunk/gcc/llvm-convert.cpp URL: http://llvm.org/viewvc/llvm-project/llvm-gcc-4.2/trunk/gcc/llvm-convert.cpp?rev=91660&r1=91659&r2=91660&view=diff ============================================================================== --- llvm-gcc-4.2/trunk/gcc/llvm-convert.cpp (original) +++ llvm-gcc-4.2/trunk/gcc/llvm-convert.cpp Thu Dec 17 20:36:24 2009 @@ -3365,7 +3365,6 @@ bool RHSIsSigned = !TYPE_UNSIGNED(TREE_TYPE(TREE_OPERAND(exp, 1))); bool TyIsSigned = !TYPE_UNSIGNED(TREE_TYPE(exp)); bool IsExactDiv = TREE_CODE(exp) == EXACT_DIV_EXPR; - bool IsPlus = TREE_CODE(exp) == PLUS_EXPR; LHS = CastToAnyType(LHS, LHSIsSigned, Ty, TyIsSigned); RHS = CastToAnyType(RHS, RHSIsSigned, Ty, TyIsSigned); @@ -3387,8 +3386,10 @@ Value *V; if (Opc == Instruction::SDiv && IsExactDiv) V = Builder.CreateExactSDiv(LHS, RHS); - else if (Opc == Instruction::Add && IsPlus && TyIsSigned && !flag_wrapv) + else if (Opc == Instruction::Add && TyIsSigned && !flag_wrapv) V = Builder.CreateNSWAdd(LHS, RHS); + else if (Opc == Instruction::Sub && TyIsSigned && !flag_wrapv) + V = Builder.CreateNSWSub(LHS, RHS); else V = Builder.CreateBinOp((Instruction::BinaryOps)Opc, LHS, RHS); if (ResTy != Ty) From gohman at apple.com Thu Dec 17 20:58:50 2009 From: gohman at apple.com (Dan Gohman) Date: Fri, 18 Dec 2009 02:58:50 -0000 Subject: [llvm-commits] [llvm] r91662 - in /llvm/trunk: include/llvm/Constants.h include/llvm/InstrTypes.h include/llvm/Support/ConstantFolder.h include/llvm/Support/IRBuilder.h include/llvm/Support/NoFolder.h include/llvm/Support/TargetFolder.h lib/VMCore/Constants.cpp lib/VMCore/Instructions.cpp Message-ID: <200912180258.nBI2wop4017411@zion.cs.uiuc.edu> Author: djg Date: Thu Dec 17 20:58:50 2009 New Revision: 91662 URL: http://llvm.org/viewvc/llvm-project?rev=91662&view=rev Log: Add utility routines for creating integer negation operators with NSW set. Integer negation only overflows with INT_MIN, but that's an important case. Modified: llvm/trunk/include/llvm/Constants.h llvm/trunk/include/llvm/InstrTypes.h llvm/trunk/include/llvm/Support/ConstantFolder.h llvm/trunk/include/llvm/Support/IRBuilder.h llvm/trunk/include/llvm/Support/NoFolder.h llvm/trunk/include/llvm/Support/TargetFolder.h llvm/trunk/lib/VMCore/Constants.cpp llvm/trunk/lib/VMCore/Instructions.cpp Modified: llvm/trunk/include/llvm/Constants.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Constants.h?rev=91662&r1=91661&r2=91662&view=diff ============================================================================== --- llvm/trunk/include/llvm/Constants.h (original) +++ llvm/trunk/include/llvm/Constants.h Thu Dec 17 20:58:50 2009 @@ -692,6 +692,7 @@ static Constant *getIntToPtr(Constant *C, const Type *Ty); static Constant *getBitCast (Constant *C, const Type *Ty); + static Constant *getNSWNeg(Constant *C); static Constant *getNSWAdd(Constant *C1, Constant *C2); static Constant *getNSWSub(Constant *C1, Constant *C2); static Constant *getExactSDiv(Constant *C1, Constant *C2); Modified: llvm/trunk/include/llvm/InstrTypes.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/InstrTypes.h?rev=91662&r1=91661&r2=91662&view=diff ============================================================================== --- llvm/trunk/include/llvm/InstrTypes.h (original) +++ llvm/trunk/include/llvm/InstrTypes.h Thu Dec 17 20:58:50 2009 @@ -308,6 +308,10 @@ Instruction *InsertBefore = 0); static BinaryOperator *CreateNeg(Value *Op, const Twine &Name, BasicBlock *InsertAtEnd); + static BinaryOperator *CreateNSWNeg(Value *Op, const Twine &Name = "", + Instruction *InsertBefore = 0); + static BinaryOperator *CreateNSWNeg(Value *Op, const Twine &Name, + BasicBlock *InsertAtEnd); static BinaryOperator *CreateFNeg(Value *Op, const Twine &Name = "", Instruction *InsertBefore = 0); static BinaryOperator *CreateFNeg(Value *Op, const Twine &Name, Modified: llvm/trunk/include/llvm/Support/ConstantFolder.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Support/ConstantFolder.h?rev=91662&r1=91661&r2=91662&view=diff ============================================================================== --- llvm/trunk/include/llvm/Support/ConstantFolder.h (original) +++ llvm/trunk/include/llvm/Support/ConstantFolder.h Thu Dec 17 20:58:50 2009 @@ -109,6 +109,9 @@ Constant *CreateNeg(Constant *C) const { return ConstantExpr::getNeg(C); } + Constant *CreateNSWNeg(Constant *C) const { + return ConstantExpr::getNSWNeg(C); + } Constant *CreateFNeg(Constant *C) const { return ConstantExpr::getFNeg(C); } Modified: llvm/trunk/include/llvm/Support/IRBuilder.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Support/IRBuilder.h?rev=91662&r1=91661&r2=91662&view=diff ============================================================================== --- llvm/trunk/include/llvm/Support/IRBuilder.h (original) +++ llvm/trunk/include/llvm/Support/IRBuilder.h Thu Dec 17 20:58:50 2009 @@ -478,6 +478,11 @@ return Folder.CreateNeg(VC); return Insert(BinaryOperator::CreateNeg(V), Name); } + Value *CreateNSWNeg(Value *V, const Twine &Name = "") { + if (Constant *VC = dyn_cast(V)) + return Folder.CreateNSWNeg(VC); + return Insert(BinaryOperator::CreateNSWNeg(V), Name); + } Value *CreateFNeg(Value *V, const Twine &Name = "") { if (Constant *VC = dyn_cast(V)) return Folder.CreateFNeg(VC); Modified: llvm/trunk/include/llvm/Support/NoFolder.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Support/NoFolder.h?rev=91662&r1=91661&r2=91662&view=diff ============================================================================== --- llvm/trunk/include/llvm/Support/NoFolder.h (original) +++ llvm/trunk/include/llvm/Support/NoFolder.h Thu Dec 17 20:58:50 2009 @@ -115,6 +115,9 @@ Value *CreateNeg(Constant *C) const { return BinaryOperator::CreateNeg(C); } + Value *CreateNSWNeg(Constant *C) const { + return BinaryOperator::CreateNSWNeg(C); + } Value *CreateNot(Constant *C) const { return BinaryOperator::CreateNot(C); } Modified: llvm/trunk/include/llvm/Support/TargetFolder.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Support/TargetFolder.h?rev=91662&r1=91661&r2=91662&view=diff ============================================================================== --- llvm/trunk/include/llvm/Support/TargetFolder.h (original) +++ llvm/trunk/include/llvm/Support/TargetFolder.h Thu Dec 17 20:58:50 2009 @@ -122,6 +122,9 @@ Constant *CreateNeg(Constant *C) const { return Fold(ConstantExpr::getNeg(C)); } + Constant *CreateNSWNeg(Constant *C) const { + return Fold(ConstantExpr::getNSWNeg(C)); + } Constant *CreateFNeg(Constant *C) const { return Fold(ConstantExpr::getFNeg(C)); } Modified: llvm/trunk/lib/VMCore/Constants.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/VMCore/Constants.cpp?rev=91662&r1=91661&r2=91662&view=diff ============================================================================== --- llvm/trunk/lib/VMCore/Constants.cpp (original) +++ llvm/trunk/lib/VMCore/Constants.cpp Thu Dec 17 20:58:50 2009 @@ -627,6 +627,12 @@ return get(std::vector(Vals, Vals+NumVals)); } +Constant* ConstantExpr::getNSWNeg(Constant* C) { + assert(C->getType()->isIntOrIntVector() && + "Cannot NEG a nonintegral value!"); + return getNSWSub(ConstantFP::getZeroValueForNegation(C->getType()), C); +} + Constant* ConstantExpr::getNSWAdd(Constant* C1, Constant* C2) { return getTy(C1->getType(), Instruction::Add, C1, C2, OverflowingBinaryOperator::NoSignedWrap); Modified: llvm/trunk/lib/VMCore/Instructions.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/VMCore/Instructions.cpp?rev=91662&r1=91661&r2=91662&view=diff ============================================================================== --- llvm/trunk/lib/VMCore/Instructions.cpp (original) +++ llvm/trunk/lib/VMCore/Instructions.cpp Thu Dec 17 20:58:50 2009 @@ -1772,6 +1772,18 @@ Op->getType(), Name, InsertAtEnd); } +BinaryOperator *BinaryOperator::CreateNSWNeg(Value *Op, const Twine &Name, + Instruction *InsertBefore) { + Value *zero = ConstantFP::getZeroValueForNegation(Op->getType()); + return BinaryOperator::CreateNSWSub(zero, Op, Name, InsertBefore); +} + +BinaryOperator *BinaryOperator::CreateNSWNeg(Value *Op, const Twine &Name, + BasicBlock *InsertAtEnd) { + Value *zero = ConstantFP::getZeroValueForNegation(Op->getType()); + return BinaryOperator::CreateNSWSub(zero, Op, Name, InsertAtEnd); +} + BinaryOperator *BinaryOperator::CreateFNeg(Value *Op, const Twine &Name, Instruction *InsertBefore) { Value *zero = ConstantFP::getZeroValueForNegation(Op->getType()); From gohman at apple.com Thu Dec 17 20:59:06 2009 From: gohman at apple.com (Dan Gohman) Date: Fri, 18 Dec 2009 02:59:06 -0000 Subject: [llvm-commits] [llvm-gcc-4.2] r91663 - /llvm-gcc-4.2/trunk/gcc/llvm-convert.cpp Message-ID: <200912180259.nBI2x6cP017428@zion.cs.uiuc.edu> Author: djg Date: Thu Dec 17 20:59:05 2009 New Revision: 91663 URL: http://llvm.org/viewvc/llvm-project?rev=91663&view=rev Log: Add the NSW bit to integer negation operators. Modified: llvm-gcc-4.2/trunk/gcc/llvm-convert.cpp Modified: llvm-gcc-4.2/trunk/gcc/llvm-convert.cpp URL: http://llvm.org/viewvc/llvm-project/llvm-gcc-4.2/trunk/gcc/llvm-convert.cpp?rev=91663&r1=91662&r2=91663&view=diff ============================================================================== --- llvm-gcc-4.2/trunk/gcc/llvm-convert.cpp (original) +++ llvm-gcc-4.2/trunk/gcc/llvm-convert.cpp Thu Dec 17 20:59:05 2009 @@ -3173,8 +3173,10 @@ Value *V = Emit(TREE_OPERAND(exp, 0), 0); if (V->getType()->isFPOrFPVector()) return Builder.CreateFNeg(V); - if (!isa(V->getType())) - return Builder.CreateNeg(V); + if (!isa(V->getType())) { + bool HasNSW = !TYPE_UNSIGNED(TREE_TYPE(exp)) && !flag_wrapv; + return HasNSW ? Builder.CreateNSWNeg(V) : Builder.CreateNeg(V); + } // GCC allows NEGATE_EXPR on pointers as well. Cast to int, negate, cast // back. From gohman at apple.com Thu Dec 17 21:10:27 2009 From: gohman at apple.com (Dan Gohman) Date: Fri, 18 Dec 2009 03:10:27 -0000 Subject: [llvm-commits] [llvm] r91664 - in /llvm/trunk: include/llvm/Constants.h include/llvm/InstrTypes.h include/llvm/Support/ConstantFolder.h include/llvm/Support/IRBuilder.h include/llvm/Support/NoFolder.h include/llvm/Support/TargetFolder.h lib/VMCore/Constants.cpp Message-ID: <200912180310.nBI3AR1t017847@zion.cs.uiuc.edu> Author: djg Date: Thu Dec 17 21:10:26 2009 New Revision: 91664 URL: http://llvm.org/viewvc/llvm-project?rev=91664&view=rev Log: Add utility routines for NSW multiply. Modified: llvm/trunk/include/llvm/Constants.h llvm/trunk/include/llvm/InstrTypes.h llvm/trunk/include/llvm/Support/ConstantFolder.h llvm/trunk/include/llvm/Support/IRBuilder.h llvm/trunk/include/llvm/Support/NoFolder.h llvm/trunk/include/llvm/Support/TargetFolder.h llvm/trunk/lib/VMCore/Constants.cpp Modified: llvm/trunk/include/llvm/Constants.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Constants.h?rev=91664&r1=91663&r2=91664&view=diff ============================================================================== --- llvm/trunk/include/llvm/Constants.h (original) +++ llvm/trunk/include/llvm/Constants.h Thu Dec 17 21:10:26 2009 @@ -695,6 +695,7 @@ static Constant *getNSWNeg(Constant *C); static Constant *getNSWAdd(Constant *C1, Constant *C2); static Constant *getNSWSub(Constant *C1, Constant *C2); + static Constant *getNSWMul(Constant *C1, Constant *C2); static Constant *getExactSDiv(Constant *C1, Constant *C2); /// Transparently provide more efficient getOperand methods. Modified: llvm/trunk/include/llvm/InstrTypes.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/InstrTypes.h?rev=91664&r1=91663&r2=91664&view=diff ============================================================================== --- llvm/trunk/include/llvm/InstrTypes.h (original) +++ llvm/trunk/include/llvm/InstrTypes.h Thu Dec 17 21:10:26 2009 @@ -277,6 +277,27 @@ return BO; } + /// CreateNSWMul - Create a Mul operator with the NSW flag set. + /// + static BinaryOperator *CreateNSWMul(Value *V1, Value *V2, + const Twine &Name = "") { + BinaryOperator *BO = CreateMul(V1, V2, Name); + BO->setHasNoSignedWrap(true); + return BO; + } + static BinaryOperator *CreateNSWMul(Value *V1, Value *V2, + const Twine &Name, BasicBlock *BB) { + BinaryOperator *BO = CreateMul(V1, V2, Name, BB); + BO->setHasNoSignedWrap(true); + return BO; + } + static BinaryOperator *CreateNSWMul(Value *V1, Value *V2, + const Twine &Name, Instruction *I) { + BinaryOperator *BO = CreateMul(V1, V2, Name, I); + BO->setHasNoSignedWrap(true); + return BO; + } + /// CreateExactSDiv - Create an SDiv operator with the exact flag set. /// static BinaryOperator *CreateExactSDiv(Value *V1, Value *V2, Modified: llvm/trunk/include/llvm/Support/ConstantFolder.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Support/ConstantFolder.h?rev=91664&r1=91663&r2=91664&view=diff ============================================================================== --- llvm/trunk/include/llvm/Support/ConstantFolder.h (original) +++ llvm/trunk/include/llvm/Support/ConstantFolder.h Thu Dec 17 21:10:26 2009 @@ -54,6 +54,9 @@ Constant *CreateMul(Constant *LHS, Constant *RHS) const { return ConstantExpr::getMul(LHS, RHS); } + Constant *CreateNSWMul(Constant *LHS, Constant *RHS) const { + return ConstantExpr::getNSWMul(LHS, RHS); + } Constant *CreateFMul(Constant *LHS, Constant *RHS) const { return ConstantExpr::getFMul(LHS, RHS); } Modified: llvm/trunk/include/llvm/Support/IRBuilder.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Support/IRBuilder.h?rev=91664&r1=91663&r2=91664&view=diff ============================================================================== --- llvm/trunk/include/llvm/Support/IRBuilder.h (original) +++ llvm/trunk/include/llvm/Support/IRBuilder.h Thu Dec 17 21:10:26 2009 @@ -353,6 +353,12 @@ return Folder.CreateMul(LC, RC); return Insert(BinaryOperator::CreateMul(LHS, RHS), Name); } + Value *CreateNSWMul(Value *LHS, Value *RHS, const Twine &Name = "") { + if (Constant *LC = dyn_cast(LHS)) + if (Constant *RC = dyn_cast(RHS)) + return Folder.CreateNSWMul(LC, RC); + return Insert(BinaryOperator::CreateNSWMul(LHS, RHS), Name); + } Value *CreateFMul(Value *LHS, Value *RHS, const Twine &Name = "") { if (Constant *LC = dyn_cast(LHS)) if (Constant *RC = dyn_cast(RHS)) Modified: llvm/trunk/include/llvm/Support/NoFolder.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Support/NoFolder.h?rev=91664&r1=91663&r2=91664&view=diff ============================================================================== --- llvm/trunk/include/llvm/Support/NoFolder.h (original) +++ llvm/trunk/include/llvm/Support/NoFolder.h Thu Dec 17 21:10:26 2009 @@ -60,6 +60,9 @@ Value *CreateMul(Constant *LHS, Constant *RHS) const { return BinaryOperator::CreateMul(LHS, RHS); } + Value *CreateNSWMul(Constant *LHS, Constant *RHS) const { + return BinaryOperator::CreateNSWMul(LHS, RHS); + } Value *CreateFMul(Constant *LHS, Constant *RHS) const { return BinaryOperator::CreateFMul(LHS, RHS); } Modified: llvm/trunk/include/llvm/Support/TargetFolder.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Support/TargetFolder.h?rev=91664&r1=91663&r2=91664&view=diff ============================================================================== --- llvm/trunk/include/llvm/Support/TargetFolder.h (original) +++ llvm/trunk/include/llvm/Support/TargetFolder.h Thu Dec 17 21:10:26 2009 @@ -67,6 +67,9 @@ Constant *CreateMul(Constant *LHS, Constant *RHS) const { return Fold(ConstantExpr::getMul(LHS, RHS)); } + Constant *CreateNSWMul(Constant *LHS, Constant *RHS) const { + return Fold(ConstantExpr::getNSWMul(LHS, RHS)); + } Constant *CreateFMul(Constant *LHS, Constant *RHS) const { return Fold(ConstantExpr::getFMul(LHS, RHS)); } Modified: llvm/trunk/lib/VMCore/Constants.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/VMCore/Constants.cpp?rev=91664&r1=91663&r2=91664&view=diff ============================================================================== --- llvm/trunk/lib/VMCore/Constants.cpp (original) +++ llvm/trunk/lib/VMCore/Constants.cpp Thu Dec 17 21:10:26 2009 @@ -643,6 +643,11 @@ OverflowingBinaryOperator::NoSignedWrap); } +Constant* ConstantExpr::getNSWMul(Constant* C1, Constant* C2) { + return getTy(C1->getType(), Instruction::Mul, C1, C2, + OverflowingBinaryOperator::NoSignedWrap); +} + Constant* ConstantExpr::getExactSDiv(Constant* C1, Constant* C2) { return getTy(C1->getType(), Instruction::SDiv, C1, C2, SDivOperator::IsExact); From gohman at apple.com Thu Dec 17 21:11:27 2009 From: gohman at apple.com (Dan Gohman) Date: Fri, 18 Dec 2009 03:11:27 -0000 Subject: [llvm-commits] [llvm-gcc-4.2] r91665 - /llvm-gcc-4.2/trunk/gcc/llvm-convert.cpp Message-ID: <200912180311.nBI3BR7A017892@zion.cs.uiuc.edu> Author: djg Date: Thu Dec 17 21:11:27 2009 New Revision: 91665 URL: http://llvm.org/viewvc/llvm-project?rev=91665&view=rev Log: Emit signed multiply with the NSW flag. Modified: llvm-gcc-4.2/trunk/gcc/llvm-convert.cpp Modified: llvm-gcc-4.2/trunk/gcc/llvm-convert.cpp URL: http://llvm.org/viewvc/llvm-project/llvm-gcc-4.2/trunk/gcc/llvm-convert.cpp?rev=91665&r1=91664&r2=91665&view=diff ============================================================================== --- llvm-gcc-4.2/trunk/gcc/llvm-convert.cpp (original) +++ llvm-gcc-4.2/trunk/gcc/llvm-convert.cpp Thu Dec 17 21:11:27 2009 @@ -3392,6 +3392,8 @@ V = Builder.CreateNSWAdd(LHS, RHS); else if (Opc == Instruction::Sub && TyIsSigned && !flag_wrapv) V = Builder.CreateNSWSub(LHS, RHS); + else if (Opc == Instruction::Mul && TyIsSigned && !flag_wrapv) + V = Builder.CreateNSWMul(LHS, RHS); else V = Builder.CreateBinOp((Instruction::BinaryOps)Opc, LHS, RHS); if (ResTy != Ty) From gohman at apple.com Thu Dec 17 21:25:52 2009 From: gohman at apple.com (Dan Gohman) Date: Fri, 18 Dec 2009 03:25:52 -0000 Subject: [llvm-commits] [llvm] r91666 - in /llvm/trunk/lib/Transforms/Scalar: GVN.cpp SCCVN.cpp Message-ID: <200912180325.nBI3Pqdd018453@zion.cs.uiuc.edu> Author: djg Date: Thu Dec 17 21:25:51 2009 New Revision: 91666 URL: http://llvm.org/viewvc/llvm-project?rev=91666&view=rev Log: Eliminte unnecessary uses of . Modified: llvm/trunk/lib/Transforms/Scalar/GVN.cpp llvm/trunk/lib/Transforms/Scalar/SCCVN.cpp Modified: llvm/trunk/lib/Transforms/Scalar/GVN.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/GVN.cpp?rev=91666&r1=91665&r2=91666&view=diff ============================================================================== --- llvm/trunk/lib/Transforms/Scalar/GVN.cpp (original) +++ llvm/trunk/lib/Transforms/Scalar/GVN.cpp Thu Dec 17 21:25:51 2009 @@ -48,7 +48,6 @@ #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/SSAUpdater.h" -#include using namespace llvm; STATISTIC(NumGVNInstr, "Number of instructions deleted"); @@ -733,13 +732,13 @@ "Global Value Numbering"); void GVN::dump(DenseMap& d) { - printf("{\n"); + errs() << "{\n"; for (DenseMap::iterator I = d.begin(), E = d.end(); I != E; ++I) { - printf("%d\n", I->first); + errs() << I->first << "\n"; I->second->dump(); } - printf("}\n"); + errs() << "}\n"; } static bool isSafeReplacement(PHINode* p, Instruction *inst) { Modified: llvm/trunk/lib/Transforms/Scalar/SCCVN.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/SCCVN.cpp?rev=91666&r1=91665&r2=91666&view=diff ============================================================================== --- llvm/trunk/lib/Transforms/Scalar/SCCVN.cpp (original) +++ llvm/trunk/lib/Transforms/Scalar/SCCVN.cpp Thu Dec 17 21:25:51 2009 @@ -34,7 +34,6 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Transforms/Utils/SSAUpdater.h" -#include using namespace llvm; STATISTIC(NumSCCVNInstr, "Number of instructions deleted by SCCVN"); From gohman at apple.com Thu Dec 17 21:57:04 2009 From: gohman at apple.com (Dan Gohman) Date: Fri, 18 Dec 2009 03:57:04 -0000 Subject: [llvm-commits] [llvm] r91667 - /llvm/trunk/lib/Analysis/ScalarEvolution.cpp Message-ID: <200912180357.nBI3v4V9019363@zion.cs.uiuc.edu> Author: djg Date: Thu Dec 17 21:57:04 2009 New Revision: 91667 URL: http://llvm.org/viewvc/llvm-project?rev=91667&view=rev Log: Revert this use of NSW; this one isn't actually safe. NSW addition is not reassociative. Modified: llvm/trunk/lib/Analysis/ScalarEvolution.cpp Modified: llvm/trunk/lib/Analysis/ScalarEvolution.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/ScalarEvolution.cpp?rev=91667&r1=91666&r2=91667&view=diff ============================================================================== --- llvm/trunk/lib/Analysis/ScalarEvolution.cpp (original) +++ llvm/trunk/lib/Analysis/ScalarEvolution.cpp Thu Dec 17 21:57:04 2009 @@ -1457,12 +1457,10 @@ LIOps.push_back(AddRec->getStart()); SmallVector AddRecOps(AddRec->op_begin(), - AddRec->op_end()); + AddRec->op_end()); AddRecOps[0] = getAddExpr(LIOps); - const SCEV *NewRec = getAddRecExpr(AddRecOps, AddRec->getLoop(), - AddRec->hasNoUnsignedWrap() && HasNUW, - AddRec->hasNoSignedWrap() && HasNSW); + const SCEV *NewRec = getAddRecExpr(AddRecOps, AddRec->getLoop()); // If all of the other operands were loop invariant, we are done. if (Ops.size() == 1) return NewRec; From nicholas at mxc.ca Thu Dec 17 23:00:22 2009 From: nicholas at mxc.ca (Nick Lewycky) Date: Thu, 17 Dec 2009 21:00:22 -0800 Subject: [llvm-commits] [llvm] r91617 - /llvm/trunk/include/llvm/ADT/SmallVector.h In-Reply-To: <200912172039.nBHKdYKI003088@zion.cs.uiuc.edu> References: <200912172039.nBHKdYKI003088@zion.cs.uiuc.edu> Message-ID: <4B2B0C66.3050007@mxc.ca> Steve Naroff wrote: > Author: snaroff > Date: Thu Dec 17 14:39:34 2009 > New Revision: 91617 > > URL: http://llvm.org/viewvc/llvm-project?rev=91617&view=rev > Log: > Fix Windows build breakage... > > Modified: > llvm/trunk/include/llvm/ADT/SmallVector.h > > Modified: llvm/trunk/include/llvm/ADT/SmallVector.h > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/ADT/SmallVector.h?rev=91617&r1=91616&r2=91617&view=diff > > ============================================================================== > --- llvm/trunk/include/llvm/ADT/SmallVector.h (original) > +++ llvm/trunk/include/llvm/ADT/SmallVector.h Thu Dec 17 14:39:34 2009 > @@ -209,7 +209,7 @@ > T *NewElts = static_cast(operator new(NewCapacity*sizeof(T))); > > // Copy the elements over. > - uninitialized_copy(this->begin(), this->end(), NewElts); > + this->uninitialized_copy(this->begin(), this->end(), NewElts); > > // Destroy the original elements. > destroy_range(this->begin(), this->end()); > @@ -495,15 +495,15 @@ > > // Copy over the elements that we're about to overwrite. > T *OldEnd = this->end(); > - setEnd(this->end() + NumToInsert); > + this->setEnd(this->end() + NumToInsert); Tab. > size_t NumOverwritten = OldEnd-I; > - uninitialized_copy(I, OldEnd, this->end()-NumOverwritten); > + this->uninitialized_copy(I, OldEnd, this->end()-NumOverwritten); Tab. > > // Replace the overwritten part. > std::copy(From, From+NumOverwritten, I); > > // Insert the non-overwritten middle part. > - uninitialized_copy(From+NumOverwritten, To, OldEnd); > + this->uninitialized_copy(From+NumOverwritten, To, OldEnd); TAAAAABB!!!! Nick > return I; > } > > > > _______________________________________________ > llvm-commits mailing list > llvm-commits at cs.uiuc.edu > http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits > From clattner at apple.com Thu Dec 17 23:34:36 2009 From: clattner at apple.com (Chris Lattner) Date: Thu, 17 Dec 2009 21:34:36 -0800 Subject: [llvm-commits] [llvm] r91617 - /llvm/trunk/include/llvm/ADT/SmallVector.h In-Reply-To: <4B2B0C66.3050007@mxc.ca> References: <200912172039.nBHKdYKI003088@zion.cs.uiuc.edu> <4B2B0C66.3050007@mxc.ca> Message-ID: On Dec 17, 2009, at 9:00 PM, Nick Lewycky wrote: >> // Copy over the elements that we're about to overwrite. >> T *OldEnd = this->end(); >> - setEnd(this->end() + NumToInsert); >> + this->setEnd(this->end() + NumToInsert); > > Tab. I already fixed these. Thanks Nick, -Chris > >> size_t NumOverwritten = OldEnd-I; >> - uninitialized_copy(I, OldEnd, this->end()-NumOverwritten); >> + this->uninitialized_copy(I, OldEnd, this->end()-NumOverwritten); > > Tab. > >> >> // Replace the overwritten part. >> std::copy(From, From+NumOverwritten, I); >> >> // Insert the non-overwritten middle part. >> - uninitialized_copy(From+NumOverwritten, To, OldEnd); >> + this->uninitialized_copy(From+NumOverwritten, To, OldEnd); > > TAAAAABB!!!! > > Nick > >> return I; >> } >> >> >> >> _______________________________________________ >> llvm-commits mailing list >> llvm-commits at cs.uiuc.edu >> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits >> > > _______________________________________________ > llvm-commits mailing list > llvm-commits at cs.uiuc.edu > http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits From nicholas at mxc.ca Thu Dec 17 23:41:44 2009 From: nicholas at mxc.ca (Nick Lewycky) Date: Thu, 17 Dec 2009 21:41:44 -0800 Subject: [llvm-commits] [llvm] r91617 - /llvm/trunk/include/llvm/ADT/SmallVector.h In-Reply-To: References: <200912172039.nBHKdYKI003088@zion.cs.uiuc.edu> <4B2B0C66.3050007@mxc.ca> Message-ID: <4B2B1618.6080103@mxc.ca> Chris Lattner wrote: > > On Dec 17, 2009, at 9:00 PM, Nick Lewycky wrote: >>> // Copy over the elements that we're about to overwrite. >>> T *OldEnd = this->end(); >>> - setEnd(this->end() + NumToInsert); >>> + this->setEnd(this->end() + NumToInsert); >> >> Tab. > > I already fixed these. Thanks Nick, So indeed! Thanks Chris! Nick > -Chris > >> >>> size_t NumOverwritten = OldEnd-I; >>> - uninitialized_copy(I, OldEnd, this->end()-NumOverwritten); >>> + this->uninitialized_copy(I, OldEnd, this->end()-NumOverwritten); >> >> Tab. >> >>> >>> // Replace the overwritten part. >>> std::copy(From, From+NumOverwritten, I); >>> >>> // Insert the non-overwritten middle part. >>> - uninitialized_copy(From+NumOverwritten, To, OldEnd); >>> + this->uninitialized_copy(From+NumOverwritten, To, OldEnd); >> >> TAAAAABB!!!! >> >> Nick >> >>> return I; >>> } >>> >>> >>> >>> _______________________________________________ >>> llvm-commits mailing list >>> llvm-commits at cs.uiuc.edu >>> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits >>> >> >> _______________________________________________ >> llvm-commits mailing list >> llvm-commits at cs.uiuc.edu >> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits > > From evan.cheng at apple.com Fri Dec 18 01:40:29 2009 From: evan.cheng at apple.com (Evan Cheng) Date: Fri, 18 Dec 2009 07:40:29 -0000 Subject: [llvm-commits] [llvm] r91672 - in /llvm/trunk: lib/Target/X86/X86.td lib/Target/X86/X86InstrInfo.cpp lib/Target/X86/X86InstrInfo.td lib/Target/X86/X86InstrSSE.td lib/Target/X86/X86Subtarget.cpp lib/Target/X86/X86Subtarget.h test/CodeGen/X86/break-sse-dep.ll Message-ID: <200912180740.nBI7eUQG026117@zion.cs.uiuc.edu> Author: evancheng Date: Fri Dec 18 01:40:29 2009 New Revision: 91672 URL: http://llvm.org/viewvc/llvm-project?rev=91672&view=rev Log: On recent Intel u-arch's, folding loads into some unary SSE instructions can be non-optimal. To be precise, we should avoid folding loads if the instructions only update part of the destination register, and the non-updated part is not needed. e.g. cvtss2sd, sqrtss. Unfolding the load from these instructions breaks the partial register dependency and it can improve performance. e.g. movss (%rdi), %xmm0 cvtss2sd %xmm0, %xmm0 instead of cvtss2sd (%rdi), %xmm0 An alternative method to break dependency is to clear the register first. e.g. xorps %xmm0, %xmm0 cvtss2sd (%rdi), %xmm0 Added: llvm/trunk/test/CodeGen/X86/break-sse-dep.ll Modified: llvm/trunk/lib/Target/X86/X86.td llvm/trunk/lib/Target/X86/X86InstrInfo.cpp llvm/trunk/lib/Target/X86/X86InstrInfo.td llvm/trunk/lib/Target/X86/X86InstrSSE.td llvm/trunk/lib/Target/X86/X86Subtarget.cpp llvm/trunk/lib/Target/X86/X86Subtarget.h Modified: llvm/trunk/lib/Target/X86/X86.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86.td?rev=91672&r1=91671&r2=91672&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86.td (original) +++ llvm/trunk/lib/Target/X86/X86.td Fri Dec 18 01:40:29 2009 @@ -57,6 +57,8 @@ "Support 64-bit instructions">; def FeatureSlowBTMem : SubtargetFeature<"slow-bt-mem", "IsBTMemSlow", "true", "Bit testing of memory is slow">; +def FeatureBreakSSEDep : SubtargetFeature<"break-sse-dep", "BreakSSEDep","true", + "Should break SSE partial update dep with load / xorps">; def FeatureSSE4A : SubtargetFeature<"sse4a", "HasSSE4A", "true", "Support SSE 4a instructions">; @@ -86,17 +88,27 @@ def : Proc<"pentium3", [FeatureSSE1]>; def : Proc<"pentium-m", [FeatureSSE2, FeatureSlowBTMem]>; def : Proc<"pentium4", [FeatureSSE2]>; -def : Proc<"x86-64", [FeatureSSE2, Feature64Bit, FeatureSlowBTMem]>; -def : Proc<"yonah", [FeatureSSE3, FeatureSlowBTMem]>; -def : Proc<"prescott", [FeatureSSE3, FeatureSlowBTMem]>; -def : Proc<"nocona", [FeatureSSE3, Feature64Bit, FeatureSlowBTMem]>; -def : Proc<"core2", [FeatureSSSE3, Feature64Bit, FeatureSlowBTMem]>; -def : Proc<"penryn", [FeatureSSE41, Feature64Bit, FeatureSlowBTMem]>; -def : Proc<"atom", [FeatureSSE3, Feature64Bit, FeatureSlowBTMem]>; -def : Proc<"corei7", [FeatureSSE42, Feature64Bit, FeatureSlowBTMem]>; -def : Proc<"nehalem", [FeatureSSE42, Feature64Bit, FeatureSlowBTMem]>; +def : Proc<"x86-64", [FeatureSSE2, Feature64Bit, FeatureSlowBTMem, + FeatureBreakSSEDep]>; +def : Proc<"yonah", [FeatureSSE3, FeatureSlowBTMem, + FeatureBreakSSEDep]>; +def : Proc<"prescott", [FeatureSSE3, FeatureSlowBTMem, + FeatureBreakSSEDep]>; +def : Proc<"nocona", [FeatureSSE3, Feature64Bit, FeatureSlowBTMem, + FeatureBreakSSEDep]>; +def : Proc<"core2", [FeatureSSSE3, Feature64Bit, FeatureSlowBTMem, + FeatureBreakSSEDep]>; +def : Proc<"penryn", [FeatureSSE41, Feature64Bit, FeatureSlowBTMem, + FeatureBreakSSEDep]>; +def : Proc<"atom", [FeatureSSE3, Feature64Bit, FeatureSlowBTMem, + FeatureBreakSSEDep]>; +def : Proc<"corei7", [FeatureSSE42, Feature64Bit, FeatureSlowBTMem, + FeatureBreakSSEDep]>; +def : Proc<"nehalem", [FeatureSSE42, Feature64Bit, FeatureSlowBTMem, + FeatureBreakSSEDep]>; // Sandy Bridge does not have FMA -def : Proc<"sandybridge", [FeatureSSE42, FeatureAVX, Feature64Bit]>; +def : Proc<"sandybridge", [FeatureSSE42, FeatureAVX, Feature64Bit, + FeatureBreakSSEDep]>; def : Proc<"k6", [FeatureMMX]>; def : Proc<"k6-2", [FeatureMMX, Feature3DNow]>; Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.cpp?rev=91672&r1=91671&r2=91672&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86InstrInfo.cpp (original) +++ llvm/trunk/lib/Target/X86/X86InstrInfo.cpp Fri Dec 18 01:40:29 2009 @@ -2370,6 +2370,23 @@ // Check switch flag if (NoFusing) return NULL; + if (TM.getSubtarget().shouldBreakSSEDep()) + switch (MI->getOpcode()) { + case X86::CVTSD2SSrr: + case X86::Int_CVTSD2SSrr: + case X86::CVTSS2SDrr: + case X86::Int_CVTSS2SDrr: + case X86::RCPSSr: + case X86::RCPSSr_Int: + case X86::ROUNDSDr_Int: + case X86::ROUNDSSr_Int: + case X86::RSQRTSSr: + case X86::RSQRTSSr_Int: + case X86::SQRTSSr: + case X86::SQRTSSr_Int: + return 0; + } + const MachineFrameInfo *MFI = MF.getFrameInfo(); unsigned Size = MFI->getObjectSize(FrameIndex); unsigned Alignment = MFI->getObjectAlignment(FrameIndex); @@ -2405,6 +2422,23 @@ // Check switch flag if (NoFusing) return NULL; + if (TM.getSubtarget().shouldBreakSSEDep()) + switch (MI->getOpcode()) { + case X86::CVTSD2SSrr: + case X86::Int_CVTSD2SSrr: + case X86::CVTSS2SDrr: + case X86::Int_CVTSS2SDrr: + case X86::RCPSSr: + case X86::RCPSSr_Int: + case X86::ROUNDSDr_Int: + case X86::ROUNDSSr_Int: + case X86::RSQRTSSr: + case X86::RSQRTSSr_Int: + case X86::SQRTSSr: + case X86::SQRTSSr_Int: + return 0; + } + // Determine the alignment of the load. unsigned Alignment = 0; if (LoadMI->hasOneMemOperand()) Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.td?rev=91672&r1=91671&r2=91672&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86InstrInfo.td (original) +++ llvm/trunk/lib/Target/X86/X86InstrInfo.td Fri Dec 18 01:40:29 2009 @@ -301,6 +301,8 @@ def OptForSpeed : Predicate<"!OptForSize">; def FastBTMem : Predicate<"!Subtarget->isBTMemSlow()">; def CallImmAddr : Predicate<"Subtarget->IsLegalToCallImmediateAddr(TM)">; +def SSEBreakDep : Predicate<"Subtarget->shouldBreakSSEDep() && !OptForSize">; +def NoSSEBreakDep: Predicate<"!Subtarget->shouldBreakSSEDep() || OptForSize">; //===----------------------------------------------------------------------===// // X86 Instruction Format Definitions. Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=91672&r1=91671&r2=91672&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original) +++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Fri Dec 18 01:40:29 2009 @@ -824,9 +824,10 @@ } // Scalar operation, mem. - def SSm : SSI; + [(set FR32:$dst, (OpNode (load addr:$src)))]>, XS, + Requires<[HasSSE1, NoSSEBreakDep]>; // Vector operation, reg. def PSr : PSI; -def CVTSD2SSrm : SDI<0x5A, MRMSrcMem, (outs FR32:$dst), (ins f64mem:$src), +def CVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst), (ins f64mem:$src), "cvtsd2ss\t{$src, $dst|$dst, $src}", - [(set FR32:$dst, (fround (loadf64 addr:$src)))]>; + [(set FR32:$dst, (fround (loadf64 addr:$src)))]>, XD, + Requires<[HasSSE2, NoSSEBreakDep]>; def CVTSI2SDrr : SDI<0x2A, MRMSrcReg, (outs FR64:$dst), (ins GR32:$src), "cvtsi2sd\t{$src, $dst|$dst, $src}", [(set FR64:$dst, (sint_to_fp GR32:$src))]>; @@ -1155,7 +1157,10 @@ def CVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), (ins f32mem:$src), "cvtss2sd\t{$src, $dst|$dst, $src}", [(set FR64:$dst, (extloadf32 addr:$src))]>, XS, - Requires<[HasSSE2]>; + Requires<[HasSSE2, NoSSEBreakDep]>; + +def : Pat<(extloadf32 addr:$src), + (CVTSS2SDrr (MOVSSrm addr:$src))>, Requires<[SSEBreakDep]>; // Match intrinsics which expect XMM operand(s). def Int_CVTSD2SIrr : SDI<0x2D, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src), @@ -3220,13 +3225,14 @@ OpSize; // Vector intrinsic operation, mem - def PSm_Int : SS4AIi8, - OpSize; + TA, OpSize, + Requires<[HasSSE41, NoSSEBreakDep]>; // Vector intrinsic operation, reg def PDr_Int : SS4AIi8= 13); + BreakSSEDep = IsIntel; GetCpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX); HasX86_64 = (EDX >> 29) & 0x1; @@ -286,6 +287,7 @@ , HasFMA3(false) , HasFMA4(false) , IsBTMemSlow(false) + , BreakSSEDep(false) , DarwinVers(0) , stackAlignment(8) // FIXME: this is a known good value for Yonah. How about others? Modified: llvm/trunk/lib/Target/X86/X86Subtarget.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86Subtarget.h?rev=91672&r1=91671&r2=91672&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86Subtarget.h (original) +++ llvm/trunk/lib/Target/X86/X86Subtarget.h Fri Dec 18 01:40:29 2009 @@ -77,6 +77,14 @@ /// IsBTMemSlow - True if BT (bit test) of memory instructions are slow. bool IsBTMemSlow; + + /// BreakSSEDep - True if codegen should unfold load or insert xorps / pxor + /// to break register dependency for a partial register update SSE + /// instruction. This is needed for instructions such as CVTSS2SD which + /// only update the lower part of the register, and the result of the updated + /// part does not depend on the contents of the destination before the + /// instruction, and the non-updated portion of the register is not used. + bool BreakSSEDep; /// DarwinVers - Nonzero if this is a darwin platform: the numeric /// version of the platform, e.g. 8 = 10.4 (Tiger), 9 = 10.5 (Leopard), etc. @@ -142,6 +150,7 @@ bool hasFMA3() const { return HasFMA3; } bool hasFMA4() const { return HasFMA4; } bool isBTMemSlow() const { return IsBTMemSlow; } + bool shouldBreakSSEDep() const { return BreakSSEDep; } bool isTargetDarwin() const { return TargetType == isDarwin; } bool isTargetELF() const { return TargetType == isELF; } Added: llvm/trunk/test/CodeGen/X86/break-sse-dep.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/break-sse-dep.ll?rev=91672&view=auto ============================================================================== --- llvm/trunk/test/CodeGen/X86/break-sse-dep.ll (added) +++ llvm/trunk/test/CodeGen/X86/break-sse-dep.ll Fri Dec 18 01:40:29 2009 @@ -0,0 +1,28 @@ +; RUN: llc < %s -march=x86-64 -mattr=+sse2,+break-sse-dep | FileCheck %s --check-prefix=YES +; RUN: llc < %s -march=x86-64 -mattr=+sse2,-break-sse-dep | FileCheck %s --check-prefix=NO + +define double @t1(float* nocapture %x) nounwind readonly ssp { +entry: +; YES: t1: +; YES: movss (%rdi), %xmm0 +; YES; cvtss2sd %xmm0, %xmm0 + +; NO: t1: +; NO; cvtss2sd (%rdi), %xmm0 + %0 = load float* %x, align 4 + %1 = fpext float %0 to double + ret double %1 +} + +define float @t2(double* nocapture %x) nounwind readonly ssp { +entry: +; YES: t2: +; YES: movsd (%rdi), %xmm0 +; YES; cvtsd2ss %xmm0, %xmm0 + +; NO: t2: +; NO; cvtsd2ss (%rdi), %xmm0 + %0 = load double* %x, align 8 + %1 = fptrunc double %0 to float + ret float %1 +} From evan.cheng at apple.com Fri Dec 18 01:48:44 2009 From: evan.cheng at apple.com (Evan Cheng) Date: Fri, 18 Dec 2009 07:48:44 -0000 Subject: [llvm-commits] [test-suite] r91673 - /test-suite/trunk/SingleSource/Benchmarks/Misc/fp-convert.c Message-ID: <200912180748.nBI7milI026365@zion.cs.uiuc.edu> Author: evancheng Date: Fri Dec 18 01:48:44 2009 New Revision: 91673 URL: http://llvm.org/viewvc/llvm-project?rev=91673&view=rev Log: Add a small test case to show the benefit of not folding load into cvtss2sd. Added: test-suite/trunk/SingleSource/Benchmarks/Misc/fp-convert.c Added: test-suite/trunk/SingleSource/Benchmarks/Misc/fp-convert.c URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/SingleSource/Benchmarks/Misc/fp-convert.c?rev=91673&view=auto ============================================================================== --- test-suite/trunk/SingleSource/Benchmarks/Misc/fp-convert.c (added) +++ test-suite/trunk/SingleSource/Benchmarks/Misc/fp-convert.c Fri Dec 18 01:48:44 2009 @@ -0,0 +1,39 @@ +#include +#include + +double loop(float *x, float *y, long length) { + long i; + double accumulator = 0.0; + for (i=0; i On Thu, Dec 17, 2009 at 11:40 PM, Evan Cheng wrote: > --- llvm/trunk/test/CodeGen/X86/break-sse-dep.ll (added) > +++ llvm/trunk/test/CodeGen/X86/break-sse-dep.ll Fri Dec 18 01:40:29 2009 > @@ -0,0 +1,28 @@ > +; RUN: llc < %s -march=x86-64 -mattr=+sse2,+break-sse-dep | FileCheck %s --check-prefix=YES > +; RUN: llc < %s -march=x86-64 -mattr=+sse2,-break-sse-dep | FileCheck %s --check-prefix=NO > + > +define double @t1(float* nocapture %x) nounwind readonly ssp { > +entry: > +; YES: t1: > +; YES: movss (%rdi), %xmm0 > +; YES; cvtss2sd %xmm0, %xmm0 > + > +; NO: t1: > +; NO; cvtss2sd (%rdi), %xmm0 > + ?%0 = load float* %x, align 4 > + ?%1 = fpext float %0 to double > + ?ret double %1 > +} > + > +define float @t2(double* nocapture %x) nounwind readonly ssp { > +entry: > +; YES: t2: > +; YES: movsd (%rdi), %xmm0 > +; YES; cvtsd2ss %xmm0, %xmm0 > + > +; NO: t2: > +; NO; cvtsd2ss (%rdi), %xmm0 > + ?%0 = load double* %x, align 8 > + ?%1 = fptrunc double %0 to float > + ?ret float %1 > +} > There are some semi-colons where there should be colons. (It'd be nice if FileCheck gave an error for that...) -Eli From eli.friedman at gmail.com Fri Dec 18 02:22:36 2009 From: eli.friedman at gmail.com (Eli Friedman) Date: Fri, 18 Dec 2009 08:22:36 -0000 Subject: [llvm-commits] [llvm] r91675 - in /llvm/trunk: lib/Transforms/Scalar/InstructionCombining.cpp test/Transforms/InstCombine/2009-12-17-CmpSelectNull.ll Message-ID: <200912180822.nBI8Ma46027424@zion.cs.uiuc.edu> Author: efriedma Date: Fri Dec 18 02:22:35 2009 New Revision: 91675 URL: http://llvm.org/viewvc/llvm-project?rev=91675&view=rev Log: Optimize icmp of null and select of two constants even if the select has multiple uses. (The construct in question was found in gcc.) Added: llvm/trunk/test/Transforms/InstCombine/2009-12-17-CmpSelectNull.ll Modified: llvm/trunk/lib/Transforms/Scalar/InstructionCombining.cpp Modified: llvm/trunk/lib/Transforms/Scalar/InstructionCombining.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/InstructionCombining.cpp?rev=91675&r1=91674&r2=91675&view=diff ============================================================================== --- llvm/trunk/lib/Transforms/Scalar/InstructionCombining.cpp (original) +++ llvm/trunk/lib/Transforms/Scalar/InstructionCombining.cpp Fri Dec 18 02:22:35 2009 @@ -6356,24 +6356,26 @@ // comparison into the select arms, which will cause one to be // constant folded and the select turned into a bitwise or. Value *Op1 = 0, *Op2 = 0; - if (LHSI->hasOneUse()) { - if (Constant *C = dyn_cast(LHSI->getOperand(1))) { - // Fold the known value into the constant operand. - Op1 = ConstantExpr::getICmp(I.getPredicate(), C, RHSC); - // Insert a new ICmp of the other select operand. - Op2 = Builder->CreateICmp(I.getPredicate(), LHSI->getOperand(2), - RHSC, I.getName()); - } else if (Constant *C = dyn_cast(LHSI->getOperand(2))) { - // Fold the known value into the constant operand. - Op2 = ConstantExpr::getICmp(I.getPredicate(), C, RHSC); - // Insert a new ICmp of the other select operand. + if (Constant *C = dyn_cast(LHSI->getOperand(1))) + Op1 = ConstantExpr::getICmp(I.getPredicate(), C, RHSC); + if (Constant *C = dyn_cast(LHSI->getOperand(2))) + Op2 = ConstantExpr::getICmp(I.getPredicate(), C, RHSC); + + // We only want to perform this transformation if it will not lead to + // additional code. This is true if either both sides of the select + // fold to a constant (in which case the icmp is replaced with a select + // which will usually simplify) or this is the only user of the + // select (in which case we are trading a select+icmp for a simpler + // select+icmp). + if ((Op1 && Op2) || (LHSI->hasOneUse() && (Op1 || Op2))) { + if (!Op1) Op1 = Builder->CreateICmp(I.getPredicate(), LHSI->getOperand(1), RHSC, I.getName()); - } - } - - if (Op1) + if (!Op2) + Op2 = Builder->CreateICmp(I.getPredicate(), LHSI->getOperand(2), + RHSC, I.getName()); return SelectInst::Create(LHSI->getOperand(0), Op1, Op2); + } break; } case Instruction::Call: Added: llvm/trunk/test/Transforms/InstCombine/2009-12-17-CmpSelectNull.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/2009-12-17-CmpSelectNull.ll?rev=91675&view=auto ============================================================================== --- llvm/trunk/test/Transforms/InstCombine/2009-12-17-CmpSelectNull.ll (added) +++ llvm/trunk/test/Transforms/InstCombine/2009-12-17-CmpSelectNull.ll Fri Dec 18 02:22:35 2009 @@ -0,0 +1,16 @@ +; RUN: opt < %s -instcombine -S | FileCheck %s + + at .str254 = internal constant [2 x i8] c".\00" + at .str557 = internal constant [3 x i8] c"::\00" + +define i8* @demangle_qualified(i32 %isfuncname) nounwind { +entry: + %tobool272 = icmp ne i32 %isfuncname, 0 + %cond276 = select i1 %tobool272, i8* getelementptr inbounds ([2 x i8]* @.str254, i32 0, i32 0), i8* getelementptr inbounds ([3 x i8]* @.str557, i32 0, i32 0) ; [#uses=4] + %cmp.i504 = icmp eq i8* %cond276, null + %rval = getelementptr i8* %cond276, i1 %cmp.i504 + ret i8* %rval +} + +; CHECK: %cond276 = select i1 +; CHECK: ret i8* %cond276 From chandlerc at google.com Fri Dec 18 02:31:05 2009 From: chandlerc at google.com (Chandler Carruth) Date: Fri, 18 Dec 2009 00:31:05 -0800 Subject: [llvm-commits] [llvm] r91552 - /llvm/trunk/utils/unittest/googletest/gtest.cc In-Reply-To: <1D581B9F-9A4A-42F5-874B-8E9C7ABBDAA0@gmail.com> References: <200912161936.nBGJaguJ028112@zion.cs.uiuc.edu> <1D581B9F-9A4A-42F5-874B-8E9C7ABBDAA0@gmail.com> Message-ID: <74c447500912180031g60199633mbc16f7fa6587606d@mail.gmail.com> On Wed, Dec 16, 2009 at 3:59 PM, Bill Wendling wrote: > On Dec 16, 2009, at 3:27 PM, Eli Friedman wrote: >> On Wed, Dec 16, 2009 at 11:36 AM, Bill Wendling wrote: >>> Author: void >>> Date: Wed Dec 16 13:36:42 2009 >>> New Revision: 91552 >>> >>> URL: http://llvm.org/viewvc/llvm-project?rev=91552&view=rev >>> Log: >>> Remove superfluous 'extern' variable that was causing a warning with clang. >>> >>> Modified: >>> ? ?llvm/trunk/utils/unittest/googletest/gtest.cc >> >> Do we care whether fixes are sent upstream for googletest? >> > If they don't mind taking the patch, then sure. It has already been fixed upstream IIRC. We're parsing gtest and gmock with Clang regularly. > > -bw > > > _______________________________________________ > llvm-commits mailing list > llvm-commits at cs.uiuc.edu > http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits > From clattner at apple.com Fri Dec 18 03:06:37 2009 From: clattner at apple.com (Chris Lattner) Date: Fri, 18 Dec 2009 01:06:37 -0800 Subject: [llvm-commits] [llvm] r91675 - in /llvm/trunk: lib/Transforms/Scalar/InstructionCombining.cpp test/Transforms/InstCombine/2009-12-17-CmpSelectNull.ll In-Reply-To: <200912180822.nBI8Ma46027424@zion.cs.uiuc.edu> References: <200912180822.nBI8Ma46027424@zion.cs.uiuc.edu> Message-ID: <9008A460-470C-4AC2-BB6E-74E3C930D6DB@apple.com> On Dec 18, 2009, at 12:22 AM, Eli Friedman wrote: Author: efriedma > Date: Fri Dec 18 02:22:35 2009 > New Revision: 91675 > > URL: http://llvm.org/viewvc/llvm-project?rev=91675&view=rev > Log: > Optimize icmp of null and select of two constants even if the select has > multiple uses. (The construct in question was found in gcc.) Nifty, this might be a good use for SimplifyCmpInst, instead of special casing constants. -Chris > > > Added: > llvm/trunk/test/Transforms/InstCombine/2009-12-17-CmpSelectNull.ll > Modified: > llvm/trunk/lib/Transforms/Scalar/InstructionCombining.cpp > > Modified: llvm/trunk/lib/Transforms/Scalar/InstructionCombining.cpp > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/InstructionCombining.cpp?rev=91675&r1=91674&r2=91675&view=diff > > ============================================================================== > --- llvm/trunk/lib/Transforms/Scalar/InstructionCombining.cpp (original) > +++ llvm/trunk/lib/Transforms/Scalar/InstructionCombining.cpp Fri Dec 18 02:22:35 2009 > @@ -6356,24 +6356,26 @@ > // comparison into the select arms, which will cause one to be > // constant folded and the select turned into a bitwise or. > Value *Op1 = 0, *Op2 = 0; > - if (LHSI->hasOneUse()) { > - if (Constant *C = dyn_cast(LHSI->getOperand(1))) { > - // Fold the known value into the constant operand. > - Op1 = ConstantExpr::getICmp(I.getPredicate(), C, RHSC); > - // Insert a new ICmp of the other select operand. > - Op2 = Builder->CreateICmp(I.getPredicate(), LHSI->getOperand(2), > - RHSC, I.getName()); > - } else if (Constant *C = dyn_cast(LHSI->getOperand(2))) { > - // Fold the known value into the constant operand. > - Op2 = ConstantExpr::getICmp(I.getPredicate(), C, RHSC); > - // Insert a new ICmp of the other select operand. > + if (Constant *C = dyn_cast(LHSI->getOperand(1))) > + Op1 = ConstantExpr::getICmp(I.getPredicate(), C, RHSC); > + if (Constant *C = dyn_cast(LHSI->getOperand(2))) > + Op2 = ConstantExpr::getICmp(I.getPredicate(), C, RHSC); > + > + // We only want to perform this transformation if it will not lead to > + // additional code. This is true if either both sides of the select > + // fold to a constant (in which case the icmp is replaced with a select > + // which will usually simplify) or this is the only user of the > + // select (in which case we are trading a select+icmp for a simpler > + // select+icmp). > + if ((Op1 && Op2) || (LHSI->hasOneUse() && (Op1 || Op2))) { > + if (!Op1) > Op1 = Builder->CreateICmp(I.getPredicate(), LHSI->getOperand(1), > RHSC, I.getName()); > - } > - } > - > - if (Op1) > + if (!Op2) > + Op2 = Builder->CreateICmp(I.getPredicate(), LHSI->getOperand(2), > + RHSC, I.getName()); > return SelectInst::Create(LHSI->getOperand(0), Op1, Op2); > + } > break; > } > case Instruction::Call: > > Added: llvm/trunk/test/Transforms/InstCombine/2009-12-17-CmpSelectNull.ll > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/2009-12-17-CmpSelectNull.ll?rev=91675&view=auto > > ============================================================================== > --- llvm/trunk/test/Transforms/InstCombine/2009-12-17-CmpSelectNull.ll (added) > +++ llvm/trunk/test/Transforms/InstCombine/2009-12-17-CmpSelectNull.ll Fri Dec 18 02:22:35 2009 > @@ -0,0 +1,16 @@ > +; RUN: opt < %s -instcombine -S | FileCheck %s > + > + at .str254 = internal constant [2 x i8] c".\00" > + at .str557 = internal constant [3 x i8] c"::\00" > + > +define i8* @demangle_qualified(i32 %isfuncname) nounwind { > +entry: > + %tobool272 = icmp ne i32 %isfuncname, 0 > + %cond276 = select i1 %tobool272, i8* getelementptr inbounds ([2 x i8]* @.str254, i32 0, i32 0), i8* getelementptr inbounds ([3 x i8]* @.str557, i32 0, i32 0) ; [#uses=4] > + %cmp.i504 = icmp eq i8* %cond276, null > + %rval = getelementptr i8* %cond276, i1 %cmp.i504 > + ret i8* %rval > +} > + > +; CHECK: %cond276 = select i1 > +; CHECK: ret i8* %cond276 > > > _______________________________________________ > llvm-commits mailing list > llvm-commits at cs.uiuc.edu > http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits From foldr at codedgers.com Fri Dec 18 05:27:26 2009 From: foldr at codedgers.com (Mikhail Glushenkov) Date: Fri, 18 Dec 2009 11:27:26 -0000 Subject: [llvm-commits] [llvm] r91679 - in /llvm/trunk: test/LLVMC/OptionPreprocessor.td utils/TableGen/LLVMCConfigurationEmitter.cpp Message-ID: <200912181127.nBIBRQqW016576@zion.cs.uiuc.edu> Author: foldr Date: Fri Dec 18 05:27:26 2009 New Revision: 91679 URL: http://llvm.org/viewvc/llvm-project?rev=91679&view=rev Log: Make 'set_option' work with list options. This works now: (set_option "list_opt", ["val_1", "val_2", "val_3"]) Modified: llvm/trunk/test/LLVMC/OptionPreprocessor.td llvm/trunk/utils/TableGen/LLVMCConfigurationEmitter.cpp Modified: llvm/trunk/test/LLVMC/OptionPreprocessor.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/LLVMC/OptionPreprocessor.td?rev=91679&r1=91678&r2=91679&view=diff ============================================================================== --- llvm/trunk/test/LLVMC/OptionPreprocessor.td (original) +++ llvm/trunk/test/LLVMC/OptionPreprocessor.td Fri Dec 18 05:27:26 2009 @@ -27,8 +27,13 @@ // CHECK: W2 // CHECK: foo = true; // CHECK: foo_p = "asdf"; + // CHECK: foo_l.clear(); + // CHECK: foo_l.push_back("qwert"); + // CHECK: foo_l.push_back("yuiop"); + // CHECK: foo_l.push_back("asdf"); (and (switch_on ["foo", "bar"]), (any_empty ["foo_p", "bar_p"])), - [(warning "W2"), (set_option "foo"), (set_option "foo_p", "asdf")], + [(warning "W2"), (set_option "foo"), (set_option "foo_p", "asdf"), + (set_option "foo_l", ["qwert", "yuiop", "asdf"])], // CHECK: W3 // CHECK: foo = true; // CHECK: bar = true; Modified: llvm/trunk/utils/TableGen/LLVMCConfigurationEmitter.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/utils/TableGen/LLVMCConfigurationEmitter.cpp?rev=91679&r1=91678&r2=91679&view=diff ============================================================================== --- llvm/trunk/utils/TableGen/LLVMCConfigurationEmitter.cpp (original) +++ llvm/trunk/utils/TableGen/LLVMCConfigurationEmitter.cpp Fri Dec 18 05:27:26 2009 @@ -2354,19 +2354,31 @@ d, IndentLevel, O); } - void onSetParameter(const DagInit& d, + void onSetListOrParameter(const DagInit& d, unsigned IndentLevel, raw_ostream& O) const { checkNumberOfArguments(d, 2); const std::string& OptName = InitPtrToString(d.getArg(0)); - const std::string& Value = InitPtrToString(d.getArg(1)); + const Init* Value = d.getArg(1); const OptionDescription& OptDesc = OptDescs_.FindOption(OptName); - if (OptDesc.isParameter()) + if (OptDesc.isList()) { + const ListInit& List = InitPtrToList(Value); + + O.indent(IndentLevel) << OptDesc.GenVariableName() << ".clear();\n"; + for (ListInit::const_iterator B = List.begin(), E = List.end(); + B != E; ++B) { + O.indent(IndentLevel) << OptDesc.GenVariableName() << ".push_back(\"" + << InitPtrToString(*B) << "\");\n"; + } + } + else if (OptDesc.isParameter()) { + const std::string& Str = InitPtrToString(Value); O.indent(IndentLevel) << OptDesc.GenVariableName() - << " = \"" << Value << "\";\n"; - else - throw "Two-argument 'set_option' " - "can be only applied to parameter options!"; + << " = \"" << Str << "\";\n"; + } + else { + throw "set_option: -" + OptName + ": is not a list or parameter option!"; + } } void onSetSwitch(const Init* I, @@ -2377,7 +2389,7 @@ if (OptDesc.isSwitch()) O.indent(IndentLevel) << OptDesc.GenVariableName() << " = true;\n"; else - throw "One-argument 'set_option' can be only applied to switch options!"; + throw "set_option: -" + OptName + " is not a switch option!"; } void onSetOption(const DagInit& d, @@ -2385,9 +2397,10 @@ { checkNumberOfArguments(d, 1); - // Two arguments: (set_option "parameter", "value") + // Two arguments: (set_option "parameter", VALUE), where VALUE is either a + // string or a string list. if (d.getNumArgs() > 1) - this->onSetParameter(d, IndentLevel, O); + this->onSetListOrParameter(d, IndentLevel, O); // One argument: (set_option "switch") // or (set_option ["switch1", "switch2", ...]) else From tilmann.scheller at googlemail.com Fri Dec 18 07:00:17 2009 From: tilmann.scheller at googlemail.com (Tilmann Scheller) Date: Fri, 18 Dec 2009 13:00:17 -0000 Subject: [llvm-commits] [llvm] r91680 - in /llvm/trunk/lib/Target/PowerPC: PPCISelLowering.cpp PPCISelLowering.h PPCInstr64Bit.td PPCInstrInfo.td Message-ID: <200912181300.nBID0HUA019692@zion.cs.uiuc.edu> Author: tilmann Date: Fri Dec 18 07:00:15 2009 New Revision: 91680 URL: http://llvm.org/viewvc/llvm-project?rev=91680&view=rev Log: Add support for calls through function pointers in the 64-bit PowerPC SVR4 ABI. Patch contributed by Ken Werner of IBM! Modified: llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h llvm/trunk/lib/Target/PowerPC/PPCInstr64Bit.td llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.td Modified: llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp?rev=91680&r1=91679&r2=91680&view=diff ============================================================================== --- llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp (original) +++ llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp Fri Dec 18 07:00:15 2009 @@ -419,6 +419,9 @@ case PPCISD::Hi: return "PPCISD::Hi"; case PPCISD::Lo: return "PPCISD::Lo"; case PPCISD::TOC_ENTRY: return "PPCISD::TOC_ENTRY"; + case PPCISD::TOC_RESTORE: return "PPCISD::TOC_RESTORE"; + case PPCISD::LOAD: return "PPCISD::LOAD"; + case PPCISD::LOAD_TOC: return "PPCISD::LOAD_TOC"; case PPCISD::DYNALLOC: return "PPCISD::DYNALLOC"; case PPCISD::GlobalBaseReg: return "PPCISD::GlobalBaseReg"; case PPCISD::SRL: return "PPCISD::SRL"; @@ -2428,7 +2431,7 @@ SDValue &Chain, DebugLoc dl, int SPDiff, bool isTailCall, SmallVector, 8> &RegsToPass, SmallVector &Ops, std::vector &NodeTys, - bool isSVR4ABI) { + bool isPPC64, bool isSVR4ABI) { EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); NodeTys.push_back(MVT::Other); // Returns a chain NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use. @@ -2449,6 +2452,74 @@ // Otherwise, this is an indirect call. We have to use a MTCTR/BCTRL pair // to do the call, we can't use PPCISD::CALL. SDValue MTCTROps[] = {Chain, Callee, InFlag}; + + if (isSVR4ABI && isPPC64) { + // Function pointers in the 64-bit SVR4 ABI do not point to the function + // entry point, but to the function descriptor (the function entry point + // address is part of the function descriptor though). + // The function descriptor is a three doubleword structure with the + // following fields: function entry point, TOC base address and + // environment pointer. + // Thus for a call through a function pointer, the following actions need + // to be performed: + // 1. Save the TOC of the caller in the TOC save area of its stack + // frame (this is done in LowerCall_Darwin()). + // 2. Load the address of the function entry point from the function + // descriptor. + // 3. Load the TOC of the callee from the function descriptor into r2. + // 4. Load the environment pointer from the function descriptor into + // r11. + // 5. Branch to the function entry point address. + // 6. On return of the callee, the TOC of the caller needs to be + // restored (this is done in FinishCall()). + // + // All those operations are flagged together to ensure that no other + // operations can be scheduled in between. E.g. without flagging the + // operations together, a TOC access in the caller could be scheduled + // between the load of the callee TOC and the branch to the callee, which + // results in the TOC access going through the TOC of the callee instead + // of going through the TOC of the caller, which leads to incorrect code. + + // Load the address of the function entry point from the function + // descriptor. + SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other, MVT::Flag); + SDValue LoadFuncPtr = DAG.getNode(PPCISD::LOAD, dl, VTs, MTCTROps, + InFlag.getNode() ? 3 : 2); + Chain = LoadFuncPtr.getValue(1); + InFlag = LoadFuncPtr.getValue(2); + + // Load environment pointer into r11. + // Offset of the environment pointer within the function descriptor. + SDValue PtrOff = DAG.getIntPtrConstant(16); + + SDValue AddPtr = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, PtrOff); + SDValue LoadEnvPtr = DAG.getNode(PPCISD::LOAD, dl, VTs, Chain, AddPtr, + InFlag); + Chain = LoadEnvPtr.getValue(1); + InFlag = LoadEnvPtr.getValue(2); + + SDValue EnvVal = DAG.getCopyToReg(Chain, dl, PPC::X11, LoadEnvPtr, + InFlag); + Chain = EnvVal.getValue(0); + InFlag = EnvVal.getValue(1); + + // Load TOC of the callee into r2. We are using a target-specific load + // with r2 hard coded, because the result of a target-independent load + // would never go directly into r2, since r2 is a reserved register (which + // prevents the register allocator from allocating it), resulting in an + // additional register being allocated and an unnecessary move instruction + // being generated. + VTs = DAG.getVTList(MVT::Other, MVT::Flag); + SDValue LoadTOCPtr = DAG.getNode(PPCISD::LOAD_TOC, dl, VTs, Chain, + Callee, InFlag); + Chain = LoadTOCPtr.getValue(0); + InFlag = LoadTOCPtr.getValue(1); + + MTCTROps[0] = Chain; + MTCTROps[1] = LoadFuncPtr; + MTCTROps[2] = InFlag; + } + Chain = DAG.getNode(PPCISD::MTCTR, dl, NodeTys, MTCTROps, 2 + (InFlag.getNode() != 0)); InFlag = Chain.getValue(1); @@ -2523,6 +2594,7 @@ SmallVector Ops; unsigned CallOpc = PrepareCall(DAG, Callee, InFlag, Chain, dl, SPDiff, isTailCall, RegsToPass, Ops, NodeTys, + PPCSubTarget.isPPC64(), PPCSubTarget.isSVR4ABI()); // When performing tail call optimization the callee pops its arguments off @@ -2569,8 +2641,23 @@ // stack frame. If caller and callee belong to the same module (and have the // same TOC), the NOP will remain unchanged. if (!isTailCall && PPCSubTarget.isSVR4ABI()&& PPCSubTarget.isPPC64()) { - // Insert NOP. - InFlag = DAG.getNode(PPCISD::NOP, dl, MVT::Flag, InFlag); + SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Flag); + if (CallOpc == PPCISD::BCTRL_SVR4) { + // This is a call through a function pointer. + // Restore the caller TOC from the save area into R2. + // See PrepareCall() for more information about calls through function + // pointers in the 64-bit SVR4 ABI. + // We are using a target-specific load with r2 hard coded, because the + // result of a target-independent load would never go directly into r2, + // since r2 is a reserved register (which prevents the register allocator + // from allocating it), resulting in an additional register being + // allocated and an unnecessary move instruction being generated. + Chain = DAG.getNode(PPCISD::TOC_RESTORE, dl, VTs, Chain, InFlag); + InFlag = Chain.getValue(1); + } else { + // Otherwise insert NOP. + InFlag = DAG.getNode(PPCISD::NOP, dl, MVT::Flag, InFlag); + } } Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true), @@ -3123,6 +3210,21 @@ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &MemOpChains[0], MemOpChains.size()); + // Check if this is an indirect call (MTCTR/BCTRL). + // See PrepareCall() for more information about calls through function + // pointers in the 64-bit SVR4 ABI. + if (!isTailCall && isPPC64 && PPCSubTarget.isSVR4ABI() && + !dyn_cast(Callee) && + !dyn_cast(Callee) && + !isBLACompatibleAddress(Callee, DAG)) { + // Load r2 into a virtual register and store it to the TOC save area. + SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64); + // TOC save area offset. + SDValue PtrOff = DAG.getIntPtrConstant(40); + SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff); + Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr, NULL, 0); + } + // Build a sequence of copy-to-reg nodes chained together with token chain // and flag operands which copy the outgoing args into the appropriate regs. SDValue InFlag; Modified: llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h?rev=91680&r1=91679&r2=91680&view=diff ============================================================================== --- llvm/trunk/