It can then use the following routines to access safe points.
LLVM allows a collector to print arbitrary assembly code before and after
the rest of a module's assembly code. From the latter callback, the collector
-can print stack maps from CollectorModuleMetadata populated by the code
-generator.
Note that LLVM does not currently support garbage collection code generation
-in the JIT, nor using the object writers.
Note that LLVM does not currently have analogous APIs to support code
+generation in the JIT, nor using the object writers.
. The collector itself
+contains the stack map for the entire module, and may access the
+
+methods. Here's a realistic example:
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/Function.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetAsmInfo.h"
-void MyCollector::finishAssembly(Module &M,
- CollectorModuleMetadata &MMD,
- std::ostream &OS, AsmPrinter &AP,
- const TargetAsmInfo &TAI) const {
+void MyCollector::beginAssembly(std::ostream &OS, AsmPrinter &AP,
+ const TargetAsmInfo &TAI) {
+ // Nothing to do.
+}
+
+void MyCollector::finishAssembly(std::ostream &OS, AsmPrinter &AP,
+ const TargetAsmInfo &TAI) {
// Set up for emitting addresses.
const char *AddressDirective;
int AddressAlignLog;
- if (TAI.getAddressSize() == sizeof(int32_t)) {
+ if (AP.TM.getTargetData()->getPointerSize() == sizeof(int32_t)) {
AddressDirective = TAI.getData32bitsDirective();
AddressAlignLog = 2;
} else {
@@ -1212,8 +1247,7 @@
AP.SwitchToDataSection(TAI.getDataSection());
// For each function...
- for (CollectorModuleMetadata::iterator FI = MMD.begin(),
- FE = MMD.end(); FI != FE; ++FI) {
+ for (iterator FI = begin(), FE = end(); FI != FE; ++FI) {
CollectorMetadata &MD = **FI;
// Emit this data structure:
Modified: llvm/trunk/include/llvm/CodeGen/Collector.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/Collector.h?rev=44827&r1=44826&r2=44827&view=diff
==============================================================================
--- llvm/trunk/include/llvm/CodeGen/Collector.h (original)
+++ llvm/trunk/include/llvm/CodeGen/Collector.h Mon Dec 10 18:30:17 2007
@@ -7,7 +7,7 @@
//
//===----------------------------------------------------------------------===//
//
-// GCInfo records sufficient information about a machine function to enable
+// Collector records sufficient information about a machine function to enable
// accurate garbage collectors. Specifically:
//
// - Safe points
@@ -25,8 +25,8 @@
// This generic information should used by ABI-specific passes to emit support
// tables for the runtime garbage collector.
//
-// GCSafePointPass identifies the GC safe points in the machine code. (Roots are
-// identified in SelectionDAGISel.)
+// MachineCodeAnalysis identifies the GC safe points in the machine code. (Roots
+// are identified in SelectionDAGISel.)
//
//===----------------------------------------------------------------------===//
@@ -35,19 +35,25 @@
#include "llvm/CodeGen/CollectorMetadata.h"
#include
+#include
namespace llvm {
- class AsmPrinter;
- class FunctionPassManager;
- class PassManager;
- class TargetAsmInfo;
-
-
/// Collector describes a garbage collector's code generation requirements,
/// and provides overridable hooks for those needs which cannot be abstractly
/// described.
class Collector {
+ public:
+ typedef std::vector list_type;
+ typedef list_type::iterator iterator;
+
+ private:
+ friend class CollectorModuleMetadata;
+ const Module *M;
+ std::string Name;
+
+ list_type Functions;
+
protected:
unsigned NeededSafePoints; //< Bitmask of required safe points.
bool CustomReadBarriers; //< Default is to insert loads.
@@ -55,16 +61,20 @@
bool CustomRoots; //< Default is to pass through to backend.
bool InitRoots; //< If set, roots are nulled during lowering.
- /// If any of the actions are set to Custom, this is expected to be
- /// overriden to create a transform to lower those actions to LLVM IR.
- virtual Pass *createCustomLoweringPass() const;
-
public:
Collector();
virtual ~Collector();
+ /// getName - The name of the collector, for debugging.
+ ///
+ const std::string &getName() const { return Name; }
+
+ /// getModule - The module upon which the collector is operating.
+ ///
+ const Module &getModule() const { return *M; }
+
/// True if this collector requires safe points of any kind. By default,
/// none are recorded.
bool needsSafePoints() const { return NeededSafePoints != 0; }
@@ -94,40 +104,30 @@
bool initializeRoots() const { return InitRoots; }
- /// Adds LLVM IR transforms to handle collection intrinsics. By default,
- /// read- and write barriers are replaced with direct memory accesses, and
- /// roots are passed on to the code generator.
- void addLoweringPasses(FunctionPassManager &PM) const;
-
- /// Same as addLoweringPasses(FunctionPassManager &), except uses a
- /// PassManager for compatibility with unusual backends (such as MSIL or
- /// CBackend).
- void addLoweringPasses(PassManager &PM) const;
-
- /// Adds target-independent MachineFunction pass to mark safe points. This
- /// is added very late during code generation, just prior to output, and
- /// importantly after all CFG transformations (like branch folding).
- void addGenericMachineCodePass(FunctionPassManager &PM,
- const TargetMachine &TM, bool Fast) const;
-
/// beginAssembly/finishAssembly - Emit module metadata as assembly code.
- virtual void beginAssembly(Module &M, std::ostream &OS, AsmPrinter &AP,
- const TargetAsmInfo &TAI) const;
- virtual void finishAssembly(Module &M, CollectorModuleMetadata &CMM,
- std::ostream &OS, AsmPrinter &AP,
- const TargetAsmInfo &TAI) const;
-
- private:
- bool NeedsDefaultLoweringPass() const;
- bool NeedsCustomLoweringPass() const;
-
+ virtual void beginAssembly(std::ostream &OS, AsmPrinter &AP,
+ const TargetAsmInfo &TAI);
+ virtual void finishAssembly(std::ostream &OS, AsmPrinter &AP,
+ const TargetAsmInfo &TAI);
+
+ /// begin/end - Iterators for function metadata.
+ ///
+ iterator begin() { return Functions.begin(); }
+ iterator end() { return Functions.end(); }
+
+ /// insertFunctionMetadata - Creates metadata for a function.
+ ///
+ CollectorMetadata *insertFunctionMetadata(const Function &F);
+
+ /// initializeCustomLowering/performCustomLowering - If any of the actions
+ /// are set to custom, performCustomLowering must be overriden to create a
+ /// transform to lower those actions to LLVM IR. initializeCustomLowering
+ /// is optional to override. These are the only Collector methods through
+ /// which the LLVM IR can be modified.
+ virtual bool initializeCustomLowering(Module &F);
+ virtual bool performCustomLowering(Function &F);
};
-
- /// If set, the code generator should generate garbage collection as specified
- /// by the collector properties.
- extern const Collector *TheCollector; // FIXME: Find a better home!
-
}
#endif
Modified: llvm/trunk/include/llvm/CodeGen/CollectorMetadata.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/CollectorMetadata.h?rev=44827&r1=44826&r2=44827&view=diff
==============================================================================
--- llvm/trunk/include/llvm/CodeGen/CollectorMetadata.h (original)
+++ llvm/trunk/include/llvm/CodeGen/CollectorMetadata.h Mon Dec 10 18:30:17 2007
@@ -9,7 +9,7 @@
//
// This file declares the CollectorMetadata and CollectorModuleMetadata classes,
// which are used as a communication channel from the target code generator
-// to the target garbage collector. This interface allows code generators and
+// to the target garbage collectors. This interface allows code generators and
// garbage collectors to be developed independently.
//
// The CollectorMetadata class records the data necessary to build a type
@@ -37,19 +37,14 @@
#include "llvm/Pass.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/StringMap.h"
namespace llvm {
+ class AsmPrinter;
+ class Collector;
class Constant;
-
-
- /// Creates a pass to print collector metadata.
- ///
- Pass *createCollectorMetadataPrinter(std::ostream &OS);
-
- /// Creates a pass to destroy collector metadata.
- ///
- Pass *createCollectorMetadataDeleter();
+ class TargetAsmInfo;
namespace GC {
@@ -77,7 +72,7 @@
struct GCRoot {
int Num; //< Usually a frame index.
int StackOffset; //< Offset from the stack pointer.
- Constant *Metadata; //< From the call to llvm.gcroot.
+ Constant *Metadata; //< Metadata straight from the call to llvm.gcroot.
GCRoot(int N, Constant *MD) : Num(N), StackOffset(-1), Metadata(MD) {}
};
@@ -93,6 +88,7 @@
private:
const Function &F;
+ Collector &C;
uint64_t FrameSize;
std::vector Roots;
std::vector SafePoints;
@@ -107,14 +103,18 @@
// The bit vector is the more compact representation where >3.2% of roots
// are live per safe point (1.5% on 64-bit hosts).
- friend class CollectorModuleMetadata;
- CollectorMetadata(const Function &F);
-
public:
+ CollectorMetadata(const Function &F, Collector &C);
~CollectorMetadata();
+ /// getFunction - Return the function to which this metadata applies.
+ ///
const Function &getFunction() const { return F; }
+ /// getCollector - Return the collector for the function.
+ ///
+ Collector &getCollector() { return C; }
+
/// addStackRoot - Registers a root that lives on the stack. Num is the
/// stack object ID for the alloca (if the code generator is using
/// MachineFrameInfo).
@@ -157,37 +157,36 @@
/// CollectorModuleMetadata - Garbage collection metadata for a whole module.
///
class CollectorModuleMetadata : public ImmutablePass {
- typedef std::vector list_type;
- typedef DenseMap map_type;
+ typedef StringMap collector_map_type;
+ typedef std::vector list_type;
+ typedef DenseMap function_map_type;
+
+ collector_map_type NameMap;
+ list_type Collectors;
+ function_map_type Map;
- Module *Mod;
- list_type Functions;
- map_type Map;
+ Collector *getOrCreateCollector(const Module *M, const std::string &Name);
public:
- typedef list_type::iterator iterator;
+ typedef list_type::const_iterator iterator;
static char ID;
CollectorModuleMetadata();
~CollectorModuleMetadata();
- /// clear - Used to delete module metadata. Collector invokes this as
- /// necessary.
+ /// clear - Used to delete module metadata. The metadata deleter pass calls
+ /// this.
void clear();
- /// begin/end - Iterators for function metadata.
- ///
- iterator begin() { return Functions.begin(); }
- iterator end() { return Functions.end(); }
-
- /// insert - Creates metadata for a function.
+ /// begin/end - Iterators for collectors.
///
- CollectorMetadata& insert(const Function *F);
+ iterator begin() const { return Collectors.begin(); }
+ iterator end() const { return Collectors.end(); }
- /// get - Looks up existing function metadata.
+ /// get - Look up function metadata.
///
- CollectorMetadata* get(const Function *F) const;
+ CollectorMetadata &get(const Function &F);
};
}
Modified: llvm/trunk/include/llvm/CodeGen/Collectors.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/Collectors.h?rev=44827&r1=44826&r2=44827&view=diff
==============================================================================
--- llvm/trunk/include/llvm/CodeGen/Collectors.h (original)
+++ llvm/trunk/include/llvm/CodeGen/Collectors.h Mon Dec 10 18:30:17 2007
@@ -25,6 +25,9 @@
///
typedef Registry CollectorRegistry;
+ /// FIXME: Collector instances are not useful on their own. These no longer
+ /// serve any purpose except to link in the plugins.
+
/// Creates an ocaml-compatible garbage collector.
Collector *createOcamlCollector();
Modified: llvm/trunk/include/llvm/CodeGen/Passes.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/Passes.h?rev=44827&r1=44826&r2=44827&view=diff
==============================================================================
--- llvm/trunk/include/llvm/CodeGen/Passes.h (original)
+++ llvm/trunk/include/llvm/CodeGen/Passes.h Mon Dec 10 18:30:17 2007
@@ -135,6 +135,24 @@
/// for the Sparc.
FunctionPass *getRegisterAllocator(TargetMachine &T);
+ /// IntrinsicLowering Pass - Performs target-independent LLVM IR
+ /// transformations for highly portable collectors.
+ FunctionPass *createGCLoweringPass();
+
+ /// MachineCodeAnalysis Pass - Target-independent pass to mark safe points in
+ /// machine code. Must be added very late during code generation, just prior
+ /// to output, and importantly after all CFG transformations (such as branch
+ /// folding).
+ FunctionPass *createGCMachineCodeAnalysisPass();
+
+ /// Deleter Pass - Releases collector metadata.
+ ///
+ FunctionPass *createCollectorMetadataDeleter();
+
+ /// Creates a pass to print collector metadata.
+ ///
+ FunctionPass *createCollectorMetadataPrinter(std::ostream &OS);
+
/// createMachineLICMPass - This pass performs LICM on machine instructions.
///
FunctionPass *createMachineLICMPass();
Modified: llvm/trunk/lib/CodeGen/Collector.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/Collector.cpp?rev=44827&r1=44826&r2=44827&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/Collector.cpp (original)
+++ llvm/trunk/lib/CodeGen/Collector.cpp Mon Dec 10 18:30:17 2007
@@ -13,6 +13,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/Collector.h"
+#include "llvm/CodeGen/Passes.h"
#include "llvm/IntrinsicInst.h"
#include "llvm/Module.h"
#include "llvm/PassManager.h"
@@ -29,38 +30,40 @@
namespace {
- /// This pass rewrites calls to the llvm.gcread or llvm.gcwrite intrinsics,
- /// replacing them with simple loads and stores as directed by the Collector.
- /// This is useful for most garbage collectors.
+ /// LowerIntrinsics - This pass rewrites calls to the llvm.gcread or
+ /// llvm.gcwrite intrinsics, replacing them with simple loads and stores as
+ /// directed by the Collector. It also performs automatic root initialization
+ /// and custom intrinsic lowering.
class VISIBILITY_HIDDEN LowerIntrinsics : public FunctionPass {
- const Collector &Coll;
-
/// GCRootInt, GCReadInt, GCWriteInt - The function prototypes for the
/// llvm.gc* intrinsics.
Function *GCRootInt, *GCReadInt, *GCWriteInt;
+ static bool NeedsDefaultLoweringPass(const Collector &C);
+ static bool NeedsCustomLoweringPass(const Collector &C);
static bool CouldBecomeSafePoint(Instruction *I);
- static void InsertRootInitializers(Function &F,
+ bool PerformDefaultLowering(Function &F, Collector &Coll);
+ static bool InsertRootInitializers(Function &F,
AllocaInst **Roots, unsigned Count);
public:
static char ID;
- LowerIntrinsics(const Collector &GC);
+ LowerIntrinsics();
const char *getPassName() const;
+ void getAnalysisUsage(AnalysisUsage &AU) const;
bool doInitialization(Module &M);
bool runOnFunction(Function &F);
};
- /// This is a target-independent pass over the machine function representation
- /// to identify safe points for the garbage collector in the machine code. It
- /// inserts labels at safe points and populates the GCInfo class.
+ /// MachineCodeAnalysis - This is a target-independent pass over the machine
+ /// function representation to identify safe points for the garbage collector
+ /// in the machine code. It inserts labels at safe points and populates a
+ /// CollectorMetadata record for each function.
class VISIBILITY_HIDDEN MachineCodeAnalysis : public MachineFunctionPass {
- const Collector &Coll;
- const TargetMachine &Targ;
-
+ const TargetMachine *TM;
CollectorMetadata *MD;
MachineModuleInfo *MMI;
const TargetInstrInfo *TII;
@@ -76,7 +79,7 @@
public:
static char ID;
- MachineCodeAnalysis(const Collector &C, const TargetMachine &T);
+ MachineCodeAnalysis();
const char *getPassName() const;
void getAnalysisUsage(AnalysisUsage &AU) const;
@@ -87,8 +90,6 @@
// -----------------------------------------------------------------------------
-const Collector *llvm::TheCollector = 0;
-
Collector::Collector() :
NeededSafePoints(0),
CustomReadBarriers(false),
@@ -97,85 +98,85 @@
InitRoots(true)
{}
-Collector::~Collector() {}
-
-void Collector::addLoweringPasses(FunctionPassManager &PM) const {
- if (NeedsDefaultLoweringPass())
- PM.add(new LowerIntrinsics(*this));
-
- if (NeedsCustomLoweringPass())
- PM.add(createCustomLoweringPass());
-}
-
-void Collector::addLoweringPasses(PassManager &PM) const {
- if (NeedsDefaultLoweringPass())
- PM.add(new LowerIntrinsics(*this));
-
- if (NeedsCustomLoweringPass())
- PM.add(createCustomLoweringPass());
-}
-
-void Collector::addGenericMachineCodePass(FunctionPassManager &PM,
- const TargetMachine &TM,
- bool Fast) const {
- if (needsSafePoints())
- PM.add(new MachineCodeAnalysis(*this, TM));
-}
-
-bool Collector::NeedsDefaultLoweringPass() const {
- // Default lowering is necessary only if read or write barriers have a default
- // action. The default for roots is no action.
- return !customWriteBarrier()
- || !customReadBarrier()
- || initializeRoots();
-}
-
-bool Collector::NeedsCustomLoweringPass() const {
- // Custom lowering is only necessary if enabled for some action.
- return customWriteBarrier()
- || customReadBarrier()
- || customRoots();
-}
-
-Pass *Collector::createCustomLoweringPass() const {
- cerr << "Collector must override createCustomLoweringPass.\n";
+Collector::~Collector() {
+ for (iterator I = begin(), E = end(); I != E; ++I)
+ delete *I;
+
+ Functions.clear();
+}
+
+bool Collector::initializeCustomLowering(Module &M) { return false; }
+
+bool Collector::performCustomLowering(Function &F) {
+ cerr << "gc " << getName() << " must override performCustomLowering.\n";
abort();
return 0;
}
-void Collector::beginAssembly(Module &M, std::ostream &OS, AsmPrinter &AP,
- const TargetAsmInfo &TAI) const {
+void Collector::beginAssembly(std::ostream &OS, AsmPrinter &AP,
+ const TargetAsmInfo &TAI) {
// Default is no action.
}
-void Collector::finishAssembly(Module &M, CollectorModuleMetadata &CMM,
- std::ostream &OS, AsmPrinter &AP,
- const TargetAsmInfo &TAI) const {
+void Collector::finishAssembly(std::ostream &OS, AsmPrinter &AP,
+ const TargetAsmInfo &TAI) {
// Default is no action.
}
+
+CollectorMetadata *Collector::insertFunctionMetadata(const Function &F) {
+ CollectorMetadata *CM = new CollectorMetadata(F, *this);
+ Functions.push_back(CM);
+ return CM;
+}
// -----------------------------------------------------------------------------
+FunctionPass *llvm::createGCLoweringPass() {
+ return new LowerIntrinsics();
+}
+
char LowerIntrinsics::ID = 0;
-LowerIntrinsics::LowerIntrinsics(const Collector &C)
- : FunctionPass((intptr_t)&ID), Coll(C),
+LowerIntrinsics::LowerIntrinsics()
+ : FunctionPass((intptr_t)&ID),
GCRootInt(0), GCReadInt(0), GCWriteInt(0) {}
const char *LowerIntrinsics::getPassName() const {
return "Lower Garbage Collection Instructions";
}
-/// doInitialization - If this module uses the GC intrinsics, find them now. If
-/// not, this pass does not do anything.
+void LowerIntrinsics::getAnalysisUsage(AnalysisUsage &AU) const {
+ FunctionPass::getAnalysisUsage(AU);
+ AU.addRequired();
+}
+
+/// doInitialization - If this module uses the GC intrinsics, find them now.
bool LowerIntrinsics::doInitialization(Module &M) {
GCReadInt = M.getFunction("llvm.gcread");
GCWriteInt = M.getFunction("llvm.gcwrite");
GCRootInt = M.getFunction("llvm.gcroot");
- return false;
+
+ // FIXME: This is rather antisocial in the context of a JIT since it performs
+ // work against the entire module. But this cannot be done at
+ // runFunction time (initializeCustomLowering likely needs to change
+ // the module).
+ CollectorModuleMetadata *CMM = getAnalysisToUpdate();
+ assert(CMM && "LowerIntrinsics didn't require CollectorModuleMetadata!?");
+ for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
+ if (I->hasCollector())
+ CMM->get(*I); // Instantiate the Collector.
+
+ bool MadeChange = false;
+ for (CollectorModuleMetadata::iterator I = CMM->begin(),
+ E = CMM->end(); I != E; ++I)
+ if (NeedsCustomLoweringPass(**I))
+ if ((*I)->initializeCustomLowering(M))
+ MadeChange = true;
+
+ return MadeChange;
}
-void LowerIntrinsics::InsertRootInitializers(Function &F, AllocaInst **Roots,
+bool LowerIntrinsics::InsertRootInitializers(Function &F, AllocaInst **Roots,
unsigned Count) {
// Scroll past alloca instructions.
BasicBlock::iterator IP = F.getEntryBlock().begin();
@@ -190,11 +191,32 @@
InitedRoots.insert(AI);
// Add root initializers.
+ bool MadeChange = false;
+
for (AllocaInst **I = Roots, **E = Roots + Count; I != E; ++I)
- if (!InitedRoots.count(*I))
+ if (!InitedRoots.count(*I)) {
new StoreInst(ConstantPointerNull::get(cast(
cast((*I)->getType())->getElementType())),
*I, IP);
+ MadeChange = true;
+ }
+
+ return MadeChange;
+}
+
+bool LowerIntrinsics::NeedsDefaultLoweringPass(const Collector &C) {
+ // Default lowering is necessary only if read or write barriers have a default
+ // action. The default for roots is no action.
+ return !C.customWriteBarrier()
+ || !C.customReadBarrier()
+ || C.initializeRoots();
+}
+
+bool LowerIntrinsics::NeedsCustomLoweringPass(const Collector &C) {
+ // Custom lowering is only necessary if enabled for some action.
+ return C.customWriteBarrier()
+ || C.customReadBarrier()
+ || C.customRoots();
}
/// CouldBecomeSafePoint - Predicate to conservatively determine whether the
@@ -228,9 +250,24 @@
/// runOnFunction - Replace gcread/gcwrite intrinsics with loads and stores.
/// Leave gcroot intrinsics; the code generator needs to see those.
bool LowerIntrinsics::runOnFunction(Function &F) {
- // Quick exit for programs that do not declare the intrinsics.
- if (!GCReadInt && !GCWriteInt && !GCRootInt) return false;
+ // Quick exit for functions that do not use GC.
+ if (!F.hasCollector()) return false;
+
+ CollectorMetadata &MD = getAnalysis().get(F);
+ Collector &Coll = MD.getCollector();
+
+ bool MadeChange = false;
+
+ if (NeedsDefaultLoweringPass(Coll))
+ MadeChange |= PerformDefaultLowering(F, Coll);
+
+ if (NeedsCustomLoweringPass(Coll))
+ MadeChange |= Coll.performCustomLowering(F);
+ return MadeChange;
+}
+
+bool LowerIntrinsics::PerformDefaultLowering(Function &F, Collector &Coll) {
bool LowerWr = !Coll.customWriteBarrier();
bool LowerRd = !Coll.customReadBarrier();
bool InitRoots = Coll.initializeRoots();
@@ -268,17 +305,21 @@
}
if (Roots.size())
- InsertRootInitializers(F, Roots.begin(), Roots.size());
+ MadeChange |= InsertRootInitializers(F, Roots.begin(), Roots.size());
return MadeChange;
}
// -----------------------------------------------------------------------------
+FunctionPass *llvm::createGCMachineCodeAnalysisPass() {
+ return new MachineCodeAnalysis();
+}
+
char MachineCodeAnalysis::ID = 0;
-MachineCodeAnalysis::MachineCodeAnalysis(const Collector &C, const TargetMachine &T)
- : MachineFunctionPass(intptr_t(&ID)), Coll(C), Targ(T) {}
+MachineCodeAnalysis::MachineCodeAnalysis()
+ : MachineFunctionPass(intptr_t(&ID)) {}
const char *MachineCodeAnalysis::getPassName() const {
return "Analyze Machine Code For Garbage Collection";
@@ -304,10 +345,10 @@
MachineBasicBlock::iterator RAI = CI;
++RAI;
- if (Coll.needsSafePoint(GC::PreCall))
+ if (MD->getCollector().needsSafePoint(GC::PreCall))
MD->addSafePoint(GC::PreCall, InsertLabel(*CI->getParent(), CI));
- if (Coll.needsSafePoint(GC::PostCall))
+ if (MD->getCollector().needsSafePoint(GC::PostCall))
MD->addSafePoint(GC::PostCall, InsertLabel(*CI->getParent(), RAI));
}
@@ -323,7 +364,7 @@
void MachineCodeAnalysis::FindStackOffsets(MachineFunction &MF) {
uint64_t StackSize = MFI->getStackSize();
uint64_t OffsetAdjustment = MFI->getOffsetAdjustment();
- uint64_t OffsetOfLocalArea = Targ.getFrameInfo()->getOffsetOfLocalArea();
+ uint64_t OffsetOfLocalArea = TM->getFrameInfo()->getOffsetOfLocalArea();
for (CollectorMetadata::roots_iterator RI = MD->roots_begin(),
RE = MD->roots_end(); RI != RE; ++RI)
@@ -332,12 +373,16 @@
}
bool MachineCodeAnalysis::runOnMachineFunction(MachineFunction &MF) {
- if (!Coll.needsSafePoints())
+ // Quick exit for functions that do not use GC.
+ if (!MF.getFunction()->hasCollector()) return false;
+
+ MD = &getAnalysis().get(*MF.getFunction());
+ if (!MD->getCollector().needsSafePoints())
return false;
- MD = getAnalysis().get(MF.getFunction());
+ TM = &MF.getTarget();
MMI = &getAnalysis();
- TII = MF.getTarget().getInstrInfo();
+ TII = TM->getInstrInfo();
MFI = MF.getFrameInfo();
// Find the size of the stack frame.
Modified: llvm/trunk/lib/CodeGen/CollectorMetadata.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/CollectorMetadata.cpp?rev=44827&r1=44826&r2=44827&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/CollectorMetadata.cpp (original)
+++ llvm/trunk/lib/CodeGen/CollectorMetadata.cpp Mon Dec 10 18:30:17 2007
@@ -13,8 +13,11 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/CollectorMetadata.h"
+#include "llvm/CodeGen/Collector.h"
+#include "llvm/CodeGen/Collectors.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/Pass.h"
+#include "llvm/CodeGen/Passes.h"
#include "llvm/Function.h"
#include "llvm/Support/Compiler.h"
@@ -22,7 +25,7 @@
namespace {
- class VISIBILITY_HIDDEN Printer : public MachineFunctionPass {
+ class VISIBILITY_HIDDEN Printer : public FunctionPass {
static char ID;
std::ostream &OS;
@@ -32,10 +35,10 @@
const char *getPassName() const;
void getAnalysisUsage(AnalysisUsage &AU) const;
- bool runOnMachineFunction(MachineFunction &MF);
+ bool runOnFunction(Function &F);
};
- class VISIBILITY_HIDDEN Deleter : public MachineFunctionPass {
+ class VISIBILITY_HIDDEN Deleter : public FunctionPass {
static char ID;
public:
@@ -44,7 +47,7 @@
const char *getPassName() const;
void getAnalysisUsage(AnalysisUsage &AU) const;
- bool runOnMachineFunction(MachineFunction &MF);
+ bool runOnFunction(Function &F);
bool doFinalization(Module &M);
};
@@ -55,8 +58,8 @@
// -----------------------------------------------------------------------------
-CollectorMetadata::CollectorMetadata(const Function &F)
- : F(F), FrameSize(~0LL) {}
+CollectorMetadata::CollectorMetadata(const Function &F, Collector &C)
+ : F(F), C(C), FrameSize(~0LL) {}
CollectorMetadata::~CollectorMetadata() {}
@@ -71,46 +74,71 @@
clear();
}
-CollectorMetadata& CollectorModuleMetadata::insert(const Function *F) {
- assert(Map.find(F) == Map.end() && "Function GC metadata already exists!");
- CollectorMetadata *FMD = new CollectorMetadata(*F);
- Functions.push_back(FMD);
- Map[F] = FMD;
- return *FMD;
+Collector *CollectorModuleMetadata::
+getOrCreateCollector(const Module *M, const std::string &Name) {
+ const char *Start = Name.c_str();
+
+ collector_map_type::iterator NMI = NameMap.find(Start, Start + Name.size());
+ if (NMI != NameMap.end())
+ return NMI->getValue();
+
+ for (CollectorRegistry::iterator I = CollectorRegistry::begin(),
+ E = CollectorRegistry::end(); I != E; ++I) {
+ if (strcmp(Start, I->getName()) == 0) {
+ Collector *C = I->instantiate();
+ C->M = M;
+ C->Name = Name;
+ NameMap.GetOrCreateValue(Start, Start + Name.size()).setValue(C);
+ Collectors.push_back(C);
+ return C;
+ }
+ }
+
+ cerr << "unsupported collector: " << Name << "\n";
+ abort();
}
-CollectorMetadata* CollectorModuleMetadata::get(const Function *F) const {
- map_type::iterator I = Map.find(F);
- if (I == Map.end())
- return 0;
- return I->second;
+CollectorMetadata &CollectorModuleMetadata::get(const Function &F) {
+ assert(F.hasCollector());
+ function_map_type::iterator I = Map.find(&F);
+ if (I != Map.end())
+ return *I->second;
+
+ Collector *C = getOrCreateCollector(F.getParent(), F.getCollector());
+ CollectorMetadata *MD = C->insertFunctionMetadata(F);
+ Map[&F] = MD;
+ return *MD;
}
void CollectorModuleMetadata::clear() {
+ Map.clear();
+
+ // TODO: StringMap should provide a clear method.
+ while (!NameMap.empty())
+ NameMap.erase(NameMap.begin());
+
for (iterator I = begin(), E = end(); I != E; ++I)
delete *I;
-
- Functions.clear();
- Map.clear();
+ Collectors.clear();
}
// -----------------------------------------------------------------------------
char Printer::ID = 0;
-Pass *llvm::createCollectorMetadataPrinter(std::ostream &OS) {
+FunctionPass *llvm::createCollectorMetadataPrinter(std::ostream &OS) {
return new Printer(OS);
}
Printer::Printer(std::ostream &OS)
- : MachineFunctionPass(intptr_t(&ID)), OS(OS) {}
+ : FunctionPass(intptr_t(&ID)), OS(OS) {}
const char *Printer::getPassName() const {
return "Print Garbage Collector Information";
}
void Printer::getAnalysisUsage(AnalysisUsage &AU) const {
- MachineFunctionPass::getAnalysisUsage(AU);
+ FunctionPass::getAnalysisUsage(AU);
AU.setPreservesAll();
AU.addRequired();
}
@@ -125,9 +153,9 @@
}
}
-bool Printer::runOnMachineFunction(MachineFunction &MF) {
- if (CollectorMetadata *FD =
- getAnalysis().get(MF.getFunction())) {
+bool Printer::runOnFunction(Function &F) {
+ if (F.hasCollector()) {
+ CollectorMetadata *FD = &getAnalysis().get(F);
OS << "GC roots for " << FD->getFunction().getNameStart() << ":\n";
for (CollectorMetadata::roots_iterator RI = FD->roots_begin(),
@@ -160,11 +188,11 @@
char Deleter::ID = 0;
-Pass *llvm::createCollectorMetadataDeleter() {
+FunctionPass *llvm::createCollectorMetadataDeleter() {
return new Deleter();
}
-Deleter::Deleter() : MachineFunctionPass(intptr_t(&ID)) {}
+Deleter::Deleter() : FunctionPass(intptr_t(&ID)) {}
const char *Deleter::getPassName() const {
return "Delete Garbage Collector Information";
@@ -175,11 +203,13 @@
AU.addRequired();
}
-bool Deleter::runOnMachineFunction(MachineFunction &MF) {
+bool Deleter::runOnFunction(Function &MF) {
return false;
}
bool Deleter::doFinalization(Module &M) {
- getAnalysis().clear();
+ CollectorModuleMetadata *CMM = getAnalysisToUpdate();
+ assert(CMM && "Deleter didn't require CollectorModuleMetadata?!");
+ CMM->clear();
return false;
}
From isanbard at gmail.com Mon Dec 10 18:30:57 2007
From: isanbard at gmail.com (Bill Wendling)
Date: Mon, 10 Dec 2007 16:30:57 -0800
Subject: [llvm-commits] [llvm] r44687 - in /llvm/trunk:
include/llvm/CodeGen/Passes.h
lib/CodeGen/LLVMTargetMachine.cpp lib/CodeGen/MachineLICM.cpp
lib/Target/PowerPC/PPCInstrInfo.td
In-Reply-To: <4CAE2FC5-85F5-4C02-B7FD-F19F195C6931@apple.com>
References: <20071210144741.GX26680@gold.us.cray.com>
<5BD5B624-4DF2-4547-A2EC-A06D261D70DE@apple.com>
<16e5fdf90712101323m635126f8o30aa7fb98f4d77a0@mail.gmail.com>
<4CAE2FC5-85F5-4C02-B7FD-F19F195C6931@apple.com>
Message-ID: <16e5fdf90712101630v4a596617y16e1bb8d420e2fa3@mail.gmail.com>
On Dec 10, 2007 3:44 PM, Evan Cheng wrote:
> On Dec 10, 2007, at 1:23 PM, Bill Wendling wrote:
> > On Dec 10, 2007 11:36 AM, Evan Cheng wrote:
> >> I don't think lifting loop invariant from inner loop all the way out
> >> of outer-most loop is a good idea. That will increase register
> >> pressure in basic blocks where it is not used.
> >>
> > This is going to happen with the current pass, though. Each loop is
> > going to see the hoisted instructions from the previous iteration and
> > try to re-hoist them. Is there some heuristic we should apply to
> > prevent it from hoisting instructions too far?
> >
>
> I am not sure. :-)
>
> For innermost loops, hoisting invariants out into the preheader always
> make sense. Intuitively, hoisting invariants from inner loops out of
> the outermost loop only makes sense when all (or a lot, whatever that
> means :-) of the inner loops use it. Or at least the first inner loop
> use it.
>
> What does the LLVM level LICM do?
>
It does the same thing that Machine LICM does...tries to hoist things
as far as possible.
-bw
From clattner at apple.com Mon Dec 10 18:33:31 2007
From: clattner at apple.com (Chris Lattner)
Date: Mon, 10 Dec 2007 16:33:31 -0800
Subject: [llvm-commits] [llvm] r44825 - in /llvm/trunk:
include/llvm/Target/TargetData.h lib/Target/TargetData.cpp
In-Reply-To: <200712110028.lBB0Sxth016975@zion.cs.uiuc.edu>
References: <200712110028.lBB0Sxth016975@zion.cs.uiuc.edu>
Message-ID: <1EA97F9B-57BF-4D16-B67B-E10E94F93D5C@apple.com>
> URL: http://llvm.org/viewvc/llvm-project?rev=44825&view=rev
> Log:
> Move TargetData::hostIsLittleEndian out of line, which means we
> don't have to #include config.h in it. #including config.h breaks
> other projects that have their own autoconf stuff and try to #include
> the llvm headers. One obscure example is llvm-gcc.
Duncan, this patch gets llvm-gcc to build, but it still doesn't
answer the big question: why does TargetData contain information
about the host? I don't think this is the right approach. Also, you
can get the host endianness without autoconf by using something
simple like:
bool islittleendian() {
union {
int i;
char c;
};
i = 1;
return c;
}
-Chris
From scottm at aero.org Mon Dec 10 18:35:46 2007
From: scottm at aero.org (Scott Michel)
Date: Tue, 11 Dec 2007 00:35:46 -0000
Subject: [llvm-commits] [llvm-gcc-4.2] r44828 -
/llvm-gcc-4.2/trunk/README.LLVM
Message-ID: <200712110035.lBB0Zk78017299@zion.cs.uiuc.edu>
Author: pingbak
Date: Mon Dec 10 18:35:46 2007
New Revision: 44828
URL: http://llvm.org/viewvc/llvm-project?rev=44828&view=rev
Log:
Add a blurb about installing a link to libstdc++.6.dylib so that C++
executables link correctly when the compiler is installed in a nonstandard
place.
Modified:
llvm-gcc-4.2/trunk/README.LLVM
Modified: llvm-gcc-4.2/trunk/README.LLVM
URL: http://llvm.org/viewvc/llvm-project/llvm-gcc-4.2/trunk/README.LLVM?rev=44828&r1=44827&r2=44828&view=diff
==============================================================================
--- llvm-gcc-4.2/trunk/README.LLVM (original)
+++ llvm-gcc-4.2/trunk/README.LLVM Mon Dec 10 18:35:46 2007
@@ -126,6 +126,11 @@
--enable-llvm=$LLVMOBJDIR --enable-languages=c,c++$EXTRALANGS $TARGETOPTIONS
$ make $BUILDOPTIONS
$ make install
+$ ln -sf /usr/lib/libstdc++.6.dylib `pwd`/../install/lib
+
+That last step, "ln -sf ..." is required so that the linker (collect2) can find
+libstdc++ ('-lstdc++') and subsequently link C++ executables link correctly.
+
Note that if you prefer to bootstrap llvm-gcc (so that the final llvm-gcc
executables have been compiled with llvm-gcc itself), replace "make" with
From clattner at apple.com Mon Dec 10 18:38:18 2007
From: clattner at apple.com (Chris Lattner)
Date: Mon, 10 Dec 2007 16:38:18 -0800
Subject: [llvm-commits] [llvm] r44687 - in /llvm/trunk:
include/llvm/CodeGen/Passes.h lib/CodeGen/LLVMTargetMachine.cpp
lib/CodeGen/MachineLICM.cpp lib/Target/PowerPC/PPCInstrInfo.td
In-Reply-To: <4CAE2FC5-85F5-4C02-B7FD-F19F195C6931@apple.com>
References: <20071210144741.GX26680@gold.us.cray.com>
<5BD5B624-4DF2-4547-A2EC-A06D261D70DE@apple.com>
<16e5fdf90712101323m635126f8o30aa7fb98f4d77a0@mail.gmail.com>
<4CAE2FC5-85F5-4C02-B7FD-F19F195C6931@apple.com>
Message-ID: <824E8AB9-8189-44FF-B2B4-538324698B1E@apple.com>
On Dec 10, 2007, at 3:44 PM, Evan Cheng wrote:
>> This is going to happen with the current pass, though. Each loop is
>> going to see the hoisted instructions from the previous iteration and
>> try to re-hoist them. Is there some heuristic we should apply to
>> prevent it from hoisting instructions too far?
>>
>
> For innermost loops, hoisting invariants out into the preheader always
> make sense. Intuitively, hoisting invariants from inner loops out of
> the outermost loop only makes sense when all (or a lot, whatever that
> means :-) of the inner loops use it. Or at least the first inner loop
> use it.
We discussed this today: I'm strongly of the opinion that licm should
hoist aggressively and not "think" about register pressure. It
should assume that remat is capable of resinking stuff into the loop
when possible. This means that we shouldn't have ad-hoc hacks in
LICM to avoid "increasing register pressure", but I'm fine with
making LICM aware of what remat is able to sink, and having it not
hoist things that it can't handle yet.
With that said, licm should hoist things as far out as possible. The
LLVM LICM pass is structured the way it is in order to hoist loads
out, which require checking alias information at each level of a loop
nest. We don't have short-term plans to hoist out loops (which will
require extensive machine aliasing support), so switching to a model
like dan describes (single pass over all bb's in outermost loops,
hoisting instructions once instead of iteratively) makes sense to me.
-Chris
From scottm at aero.org Mon Dec 10 18:43:14 2007
From: scottm at aero.org (Scott Michel)
Date: Tue, 11 Dec 2007 00:43:14 -0000
Subject: [llvm-commits] [llvm-gcc-4.2] r44829 -
/llvm-gcc-4.2/trunk/README.LLVM
Message-ID: <200712110043.lBB0hE7b017597@zion.cs.uiuc.edu>
Author: pingbak
Date: Mon Dec 10 18:43:14 2007
New Revision: 44829
URL: http://llvm.org/viewvc/llvm-project?rev=44829&view=rev
Log:
Fix typo.
Modified:
llvm-gcc-4.2/trunk/README.LLVM
Modified: llvm-gcc-4.2/trunk/README.LLVM
URL: http://llvm.org/viewvc/llvm-project/llvm-gcc-4.2/trunk/README.LLVM?rev=44829&r1=44828&r2=44829&view=diff
==============================================================================
--- llvm-gcc-4.2/trunk/README.LLVM (original)
+++ llvm-gcc-4.2/trunk/README.LLVM Mon Dec 10 18:43:14 2007
@@ -129,8 +129,7 @@
$ ln -sf /usr/lib/libstdc++.6.dylib `pwd`/../install/lib
That last step, "ln -sf ..." is required so that the linker (collect2) can find
-libstdc++ ('-lstdc++') and subsequently link C++ executables link correctly.
-
+libstdc++ ('-lstdc++') and subsequently link C++ executables correctly.
Note that if you prefer to bootstrap llvm-gcc (so that the final llvm-gcc
executables have been compiled with llvm-gcc itself), replace "make" with
From isanbard at gmail.com Mon Dec 10 18:50:03 2007
From: isanbard at gmail.com (Bill Wendling)
Date: Mon, 10 Dec 2007 16:50:03 -0800
Subject: [llvm-commits] [llvm] r44687 - in /llvm/trunk:
include/llvm/CodeGen/Passes.h
lib/CodeGen/LLVMTargetMachine.cpp lib/CodeGen/MachineLICM.cpp
lib/Target/PowerPC/PPCInstrInfo.td
In-Reply-To: <824E8AB9-8189-44FF-B2B4-538324698B1E@apple.com>
References: <20071210144741.GX26680@gold.us.cray.com>
<5BD5B624-4DF2-4547-A2EC-A06D261D70DE@apple.com>
<16e5fdf90712101323m635126f8o30aa7fb98f4d77a0@mail.gmail.com>
<4CAE2FC5-85F5-4C02-B7FD-F19F195C6931@apple.com>
<824E8AB9-8189-44FF-B2B4-538324698B1E@apple.com>
Message-ID: <16e5fdf90712101650o153eeff5ld780f8ae5f0bbfd3@mail.gmail.com>
On Dec 10, 2007 4:38 PM, Chris Lattner wrote:
> With that said, licm should hoist things as far out as possible. The
> LLVM LICM pass is structured the way it is in order to hoist loads
> out, which require checking alias information at each level of a loop
> nest. We don't have short-term plans to hoist out loops (which will
> require extensive machine aliasing support), so switching to a model
> like dan describes (single pass over all bb's in outermost loops,
> hoisting instructions once instead of iteratively) makes sense to me.
>
I'm confused. If we do that, we won't be able to hoist things in inner
loops into their pre-header blocks. Or are you suggesting that the
machine LICM pass looks for loop-invariant instructions. If they can
be hoisted all of the way out, then do that. Otherwise, find each
containing loop and try to hoist to that pre-header?
-bw
From isanbard at gmail.com Mon Dec 10 19:04:06 2007
From: isanbard at gmail.com (Bill Wendling)
Date: Tue, 11 Dec 2007 01:04:06 -0000
Subject: [llvm-commits] [llvm] r44832 - /llvm/tags/Apple/llvmCore-2004/
Message-ID: <200712110104.lBB1463d018608@zion.cs.uiuc.edu>
Author: void
Date: Mon Dec 10 19:04:05 2007
New Revision: 44832
URL: http://llvm.org/viewvc/llvm-project?rev=44832&view=rev
Log:
Retagging with the config.h fix.
Added:
llvm/tags/Apple/llvmCore-2004/
- copied from r44831, llvm/trunk/
From natebegeman at mac.com Mon Dec 10 19:41:33 2007
From: natebegeman at mac.com (Nate Begeman)
Date: Tue, 11 Dec 2007 01:41:33 -0000
Subject: [llvm-commits] [llvm] r44835 -
/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
Message-ID: <200712110141.lBB1fXSm021405@zion.cs.uiuc.edu>
Author: sampo
Date: Mon Dec 10 19:41:33 2007
New Revision: 44835
URL: http://llvm.org/viewvc/llvm-project?rev=44835&view=rev
Log:
x86 doesn't actually want to custom lower v3i32
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=44835&r1=44834&r2=44835&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Mon Dec 10 19:41:33 2007
@@ -600,6 +600,9 @@
// Custom lower build_vector, vector_shuffle, and extract_vector_elt.
for (unsigned VT = (unsigned)MVT::v16i8; VT != (unsigned)MVT::v2i64; VT++) {
+ // Do not attempt to custom lower non-power-of-2 vectors
+ if (!isPowerOf2_32(MVT::getVectorNumElements(VT)))
+ continue;
setOperationAction(ISD::BUILD_VECTOR, (MVT::ValueType)VT, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Custom);
From evan.cheng at apple.com Mon Dec 10 19:46:18 2007
From: evan.cheng at apple.com (Evan Cheng)
Date: Tue, 11 Dec 2007 01:46:18 -0000
Subject: [llvm-commits] [llvm] r44836 - in /llvm/trunk:
lib/Target/X86/X86ISelLowering.cpp test/CodeGen/X86/vec_shuffle-12.ll
test/CodeGen/X86/vec_shuffle-13.ll
Message-ID: <200712110146.lBB1kJJP021643@zion.cs.uiuc.edu>
Author: evancheng
Date: Mon Dec 10 19:46:18 2007
New Revision: 44836
URL: http://llvm.org/viewvc/llvm-project?rev=44836&view=rev
Log:
- Improved v8i16 shuffle lowering. It now uses pshuflw and pshufhw as much as
possible before resorting to pextrw and pinsrw.
- Better codegen for v4i32 shuffles masquerading as v8i16 or v16i8 shuffles.
- Improves (i16 extract_vector_element 0) codegen by recognizing
(i32 extract_vector_element 0) does not require a pextrw.
Added:
llvm/trunk/test/CodeGen/X86/vec_shuffle-13.ll
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/vec_shuffle-12.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=44836&r1=44835&r2=44836&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Mon Dec 10 19:46:18 2007
@@ -23,6 +23,7 @@
#include "llvm/GlobalVariable.h"
#include "llvm/Function.h"
#include "llvm/Intrinsics.h"
+#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/VectorExtras.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/CodeGen/CallingConvLower.h"
@@ -35,6 +36,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Target/TargetOptions.h"
+#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ParameterAttributes.h"
using namespace llvm;
@@ -2714,7 +2716,7 @@
if (Arg.getOpcode() == ISD::UNDEF) continue;
assert(isa(Arg) && "Invalid VECTOR_SHUFFLE mask!");
unsigned Val = cast(Arg)->getValue();
- if (Val > 4)
+ if (Val >= 4)
return false;
}
@@ -3130,6 +3132,8 @@
return V;
}
+/// is4WideVector - Returns true if the specific v8i16 or v16i8 vector is
+/// actually just a 4 wide vector. e.g.
SDOperand
X86TargetLowering::LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) {
// All zero's are handled with pxor, all one's are handled with pcmpeqd.
@@ -3154,7 +3158,7 @@
unsigned NumNonZero = 0;
unsigned NonZeros = 0;
unsigned NumNonZeroImms = 0;
- std::set Values;
+ SmallSet Values;
for (unsigned i = 0; i < NumElems; ++i) {
SDOperand Elt = Op.getOperand(i);
if (Elt.getOpcode() != ISD::UNDEF) {
@@ -3314,59 +3318,179 @@
SDOperand LowerVECTOR_SHUFFLEv8i16(SDOperand V1, SDOperand V2,
SDOperand PermMask, SelectionDAG &DAG,
TargetLowering &TLI) {
+ SDOperand NewV;
MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(8);
MVT::ValueType MaskEVT = MVT::getVectorElementType(MaskVT);
- if (isPSHUFHW_PSHUFLWMask(PermMask.Val)) {
- // Handle v8i16 shuffle high / low shuffle node pair.
+ MVT::ValueType PtrVT = TLI.getPointerTy();
+ SmallVector MaskElts(PermMask.Val->op_begin(),
+ PermMask.Val->op_end());
+
+ // First record which half of which vector the low elements come from.
+ SmallVector LowQuad(4);
+ for (unsigned i = 0; i < 4; ++i) {
+ SDOperand Elt = MaskElts[i];
+ if (Elt.getOpcode() == ISD::UNDEF)
+ continue;
+ unsigned EltIdx = cast(Elt)->getValue();
+ int QuadIdx = EltIdx / 4;
+ ++LowQuad[QuadIdx];
+ }
+ int BestLowQuad = -1;
+ unsigned MaxQuad = 1;
+ for (unsigned i = 0; i < 4; ++i) {
+ if (LowQuad[i] > MaxQuad) {
+ BestLowQuad = i;
+ MaxQuad = LowQuad[i];
+ }
+ }
+
+ // Record which half of which vector the high elements come from.
+ SmallVector HighQuad(4);
+ for (unsigned i = 4; i < 8; ++i) {
+ SDOperand Elt = MaskElts[i];
+ if (Elt.getOpcode() == ISD::UNDEF)
+ continue;
+ unsigned EltIdx = cast(Elt)->getValue();
+ int QuadIdx = EltIdx / 4;
+ ++HighQuad[QuadIdx];
+ }
+ int BestHighQuad = -1;
+ MaxQuad = 1;
+ for (unsigned i = 0; i < 4; ++i) {
+ if (HighQuad[i] > MaxQuad) {
+ BestHighQuad = i;
+ MaxQuad = HighQuad[i];
+ }
+ }
+
+ // If it's possible to sort parts of either half with PSHUF{H|L}W, then do it.
+ if (BestLowQuad != -1 || BestHighQuad != -1) {
+ // First sort the 4 chunks in order using shufpd.
SmallVector MaskVec;
- for (unsigned i = 0; i != 4; ++i)
- MaskVec.push_back(PermMask.getOperand(i));
- for (unsigned i = 4; i != 8; ++i)
- MaskVec.push_back(DAG.getConstant(i, MaskEVT));
- SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], 8);
- V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v8i16, V1, V2, Mask);
- MaskVec.clear();
- for (unsigned i = 0; i != 4; ++i)
- MaskVec.push_back(DAG.getConstant(i, MaskEVT));
- for (unsigned i = 4; i != 8; ++i)
- MaskVec.push_back(PermMask.getOperand(i));
- Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], 8);
- return DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v8i16, V1, V2, Mask);
+ if (BestLowQuad != -1)
+ MaskVec.push_back(DAG.getConstant(BestLowQuad, MVT::i32));
+ else
+ MaskVec.push_back(DAG.getConstant(0, MVT::i32));
+ if (BestHighQuad != -1)
+ MaskVec.push_back(DAG.getConstant(BestHighQuad, MVT::i32));
+ else
+ MaskVec.push_back(DAG.getConstant(1, MVT::i32));
+ SDOperand Mask= DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i32, &MaskVec[0],2);
+ NewV = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v2i64,
+ DAG.getNode(ISD::BIT_CONVERT, MVT::v2i64, V1),
+ DAG.getNode(ISD::BIT_CONVERT, MVT::v2i64, V2), Mask);
+ NewV = DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, NewV);
+
+ // Now sort high and low parts separately.
+ BitVector InOrder(8);
+ if (BestLowQuad != -1) {
+ // Sort lower half in order using PSHUFLW.
+ MaskVec.clear();
+ bool AnyOutOrder = false;
+ for (unsigned i = 0; i != 4; ++i) {
+ SDOperand Elt = MaskElts[i];
+ if (Elt.getOpcode() == ISD::UNDEF) {
+ MaskVec.push_back(Elt);
+ InOrder.set(i);
+ } else {
+ unsigned EltIdx = cast(Elt)->getValue();
+ if (EltIdx != i)
+ AnyOutOrder = true;
+ MaskVec.push_back(DAG.getConstant(EltIdx % 4, MaskEVT));
+ // If this element is in the right place after this shuffle, then
+ // remember it.
+ if ((int)(EltIdx / 4) == BestLowQuad)
+ InOrder.set(i);
+ }
+ }
+ if (AnyOutOrder) {
+ for (unsigned i = 4; i != 8; ++i)
+ MaskVec.push_back(DAG.getConstant(i, MaskEVT));
+ SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], 8);
+ NewV = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v8i16, NewV, NewV, Mask);
+ }
+ }
+
+ if (BestHighQuad != -1) {
+ // Sort high half in order using PSHUFHW if possible.
+ MaskVec.clear();
+ for (unsigned i = 0; i != 4; ++i)
+ MaskVec.push_back(DAG.getConstant(i, MaskEVT));
+ bool AnyOutOrder = false;
+ for (unsigned i = 4; i != 8; ++i) {
+ SDOperand Elt = MaskElts[i];
+ if (Elt.getOpcode() == ISD::UNDEF) {
+ MaskVec.push_back(Elt);
+ InOrder.set(i);
+ } else {
+ unsigned EltIdx = cast(Elt)->getValue();
+ if (EltIdx != i)
+ AnyOutOrder = true;
+ MaskVec.push_back(DAG.getConstant((EltIdx % 4) + 4, MaskEVT));
+ // If this element is in the right place after this shuffle, then
+ // remember it.
+ if ((int)(EltIdx / 4) == BestHighQuad)
+ InOrder.set(i);
+ }
+ }
+ if (AnyOutOrder) {
+ SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], 8);
+ NewV = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v8i16, NewV, NewV, Mask);
+ }
+ }
+
+ // The other elements are put in the right place using pextrw and pinsrw.
+ for (unsigned i = 0; i != 8; ++i) {
+ if (InOrder[i])
+ continue;
+ SDOperand Elt = MaskElts[i];
+ unsigned EltIdx = cast(Elt)->getValue();
+ if (EltIdx == i)
+ continue;
+ SDOperand ExtOp = (EltIdx < 8)
+ ? DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, V1,
+ DAG.getConstant(EltIdx, PtrVT))
+ : DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, V2,
+ DAG.getConstant(EltIdx - 8, PtrVT));
+ NewV = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, NewV, ExtOp,
+ DAG.getConstant(i, PtrVT));
+ }
+ return NewV;
}
- // Lower than into extracts and inserts but try to do as few as possible.
+ // PSHUF{H|L}W are not used. Lower into extracts and inserts but try to use
+ ///as few as possible.
// First, let's find out how many elements are already in the right order.
unsigned V1InOrder = 0;
unsigned V1FromV1 = 0;
unsigned V2InOrder = 0;
unsigned V2FromV2 = 0;
- SmallVector V1Elts;
- SmallVector V2Elts;
+ SmallVector V1Elts;
+ SmallVector V2Elts;
for (unsigned i = 0; i < 8; ++i) {
- SDOperand Elt = PermMask.getOperand(i);
+ SDOperand Elt = MaskElts[i];
if (Elt.getOpcode() == ISD::UNDEF) {
- V1Elts.push_back(i);
- V2Elts.push_back(i);
+ V1Elts.push_back(Elt);
+ V2Elts.push_back(Elt);
++V1InOrder;
++V2InOrder;
+ continue;
+ }
+ unsigned EltIdx = cast(Elt)->getValue();
+ if (EltIdx == i) {
+ V1Elts.push_back(Elt);
+ V2Elts.push_back(DAG.getConstant(i+8, MaskEVT));
+ ++V1InOrder;
+ } else if (EltIdx == i+8) {
+ V1Elts.push_back(Elt);
+ V2Elts.push_back(DAG.getConstant(i, MaskEVT));
+ ++V2InOrder;
+ } else if (EltIdx < 8) {
+ V1Elts.push_back(Elt);
+ ++V1FromV1;
} else {
- unsigned EltIdx = cast(Elt)->getValue();
- if (EltIdx == i) {
- V1Elts.push_back(i);
- V2Elts.push_back(i+8);
- ++V1InOrder;
- } else if (EltIdx == i+8) {
- V1Elts.push_back(i+8);
- V2Elts.push_back(i);
- ++V2InOrder;
- } else {
- V1Elts.push_back(EltIdx);
- V2Elts.push_back(EltIdx);
- if (EltIdx < 8)
- ++V1FromV1;
- else
- ++V2FromV2;
- }
+ V2Elts.push_back(DAG.getConstant(EltIdx-8, MaskEVT));
+ ++V2FromV2;
}
}
@@ -3377,33 +3501,92 @@
std::swap(V1FromV1, V2FromV2);
}
- MVT::ValueType PtrVT = TLI.getPointerTy();
- if (V1FromV1) {
- // If there are elements that are from V1 but out of place,
- // then first sort them in place
- SmallVector MaskVec;
+ if ((V1FromV1 + V1InOrder) != 8) {
+ // Some elements are from V2.
+ if (V1FromV1) {
+ // If there are elements that are from V1 but out of place,
+ // then first sort them in place
+ SmallVector MaskVec;
+ for (unsigned i = 0; i < 8; ++i) {
+ SDOperand Elt = V1Elts[i];
+ if (Elt.getOpcode() == ISD::UNDEF) {
+ MaskVec.push_back(DAG.getNode(ISD::UNDEF, MaskEVT));
+ continue;
+ }
+ unsigned EltIdx = cast(Elt)->getValue();
+ if (EltIdx >= 8)
+ MaskVec.push_back(DAG.getNode(ISD::UNDEF, MaskEVT));
+ else
+ MaskVec.push_back(DAG.getConstant(EltIdx, MaskEVT));
+ }
+ SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], 8);
+ V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v8i16, V1, V1, Mask);
+ }
+
+ NewV = V1;
for (unsigned i = 0; i < 8; ++i) {
- unsigned EltIdx = V1Elts[i];
- if (EltIdx >= 8)
- MaskVec.push_back(DAG.getNode(ISD::UNDEF, MaskEVT));
- else
- MaskVec.push_back(DAG.getConstant(EltIdx, MaskEVT));
+ SDOperand Elt = V1Elts[i];
+ if (Elt.getOpcode() == ISD::UNDEF)
+ continue;
+ unsigned EltIdx = cast(Elt)->getValue();
+ if (EltIdx < 8)
+ continue;
+ SDOperand ExtOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, V2,
+ DAG.getConstant(EltIdx - 8, PtrVT));
+ NewV = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, NewV, ExtOp,
+ DAG.getConstant(i, PtrVT));
}
- SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], 8);
- V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v8i16, V1, V1, Mask);
+ return NewV;
+ } else {
+ // All elements are from V1.
+ NewV = V1;
+ for (unsigned i = 0; i < 8; ++i) {
+ SDOperand Elt = V1Elts[i];
+ if (Elt.getOpcode() == ISD::UNDEF)
+ continue;
+ unsigned EltIdx = cast(Elt)->getValue();
+ SDOperand ExtOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, V1,
+ DAG.getConstant(EltIdx, PtrVT));
+ NewV = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, NewV, ExtOp,
+ DAG.getConstant(i, PtrVT));
+ }
+ return NewV;
}
+}
- // Now let's insert elements from the other vector.
- for (unsigned i = 0; i < 8; ++i) {
- unsigned EltIdx = V1Elts[i];
- if (EltIdx < 8)
- continue;
- SDOperand ExtOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, V2,
- DAG.getConstant(EltIdx - 8, PtrVT));
- V1 = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, V1, ExtOp,
- DAG.getConstant(i, PtrVT));
+/// RewriteAs4WideShuffle - Try rewriting v8i16 and v16i8 shuffles as 4 wide
+/// ones if possible. This can be done when every pair / quad of shuffle mask
+/// elements point to elements in the right sequence. e.g.
+/// vector_shuffle <>, <>, < 3, 4, | 10, 11, | 0, 1, | 14, 15>
+static
+SDOperand RewriteAs4WideShuffle(SDOperand V1, SDOperand V2,
+ SDOperand PermMask, SelectionDAG &DAG,
+ TargetLowering &TLI) {
+ unsigned NumElems = PermMask.getNumOperands();
+ unsigned Scale = NumElems / 4;
+ SmallVector MaskVec;
+ for (unsigned i = 0; i < NumElems; i += Scale) {
+ unsigned StartIdx = ~0U;
+ for (unsigned j = 0; j < Scale; ++j) {
+ SDOperand Elt = PermMask.getOperand(i+j);
+ if (Elt.getOpcode() == ISD::UNDEF)
+ continue;
+ unsigned EltIdx = cast(Elt)->getValue();
+ if (StartIdx == ~0U)
+ StartIdx = EltIdx - (EltIdx % Scale);
+ if (EltIdx != StartIdx + j)
+ return SDOperand();
+ }
+ if (StartIdx == ~0U)
+ MaskVec.push_back(DAG.getNode(ISD::UNDEF, MVT::i32));
+ else
+ MaskVec.push_back(DAG.getConstant(StartIdx / Scale, MVT::i32));
}
- return V1;
+
+ V1 = DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, V1);
+ V2 = DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, V2);
+ return DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v4i32, V1, V2,
+ DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, &MaskVec[0],4));
}
SDOperand
@@ -3544,18 +3727,31 @@
}
}
+ // If the shuffle can be rewritten as a 4 wide shuffle, then do it!
+ if (VT == MVT::v8i16 || VT == MVT::v16i8) {
+ SDOperand NewOp = RewriteAs4WideShuffle(V1, V2, PermMask, DAG, *this);
+ if (NewOp.Val)
+ return DAG.getNode(ISD::BIT_CONVERT, VT, LowerVECTOR_SHUFFLE(NewOp, DAG));
+ }
+
// Handle v8i16 specifically since SSE can do byte extraction and insertion.
- if (VT == MVT::v8i16)
- return LowerVECTOR_SHUFFLEv8i16(V1, V2, PermMask, DAG, *this);
+ if (VT == MVT::v8i16) {
+ SDOperand NewOp = LowerVECTOR_SHUFFLEv8i16(V1, V2, PermMask, DAG, *this);
+ if (NewOp.Val)
+ return NewOp;
+ }
- if (NumElems == 4 && MVT::getSizeInBits(VT) != 64) {
+ // Handle all 4 wide cases with a number of shuffles.
+ if (NumElems == 4 && MVT::getSizeInBits(VT) != 64) {
// Don't do this for MMX.
MVT::ValueType MaskVT = PermMask.getValueType();
MVT::ValueType MaskEVT = MVT::getVectorElementType(MaskVT);
SmallVector, 8> Locs;
Locs.reserve(NumElems);
- SmallVector Mask1(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT));
- SmallVector Mask2(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT));
+ SmallVector Mask1(NumElems,
+ DAG.getNode(ISD::UNDEF, MaskEVT));
+ SmallVector Mask2(NumElems,
+ DAG.getNode(ISD::UNDEF, MaskEVT));
unsigned NumHi = 0;
unsigned NumLo = 0;
// If no more than two elements come from either vector. This can be
@@ -3661,6 +3857,13 @@
MVT::ValueType VT = Op.getValueType();
// TODO: handle v16i8.
if (MVT::getSizeInBits(VT) == 16) {
+ SDOperand Vec = Op.getOperand(0);
+ unsigned Idx = cast(Op.getOperand(1))->getValue();
+ if (Idx == 0)
+ return DAG.getNode(ISD::TRUNCATE, MVT::i16,
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32,
+ DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, Vec),
+ Op.getOperand(1)));
// Transform it so it match pextrw which produces a 32-bit result.
MVT::ValueType EVT = (MVT::ValueType)(VT+1);
SDOperand Extract = DAG.getNode(X86ISD::PEXTRW, EVT,
@@ -3669,7 +3872,6 @@
DAG.getValueType(VT));
return DAG.getNode(ISD::TRUNCATE, VT, Assert);
} else if (MVT::getSizeInBits(VT) == 32) {
- SDOperand Vec = Op.getOperand(0);
unsigned Idx = cast(Op.getOperand(1))->getValue();
if (Idx == 0)
return Op;
@@ -3686,12 +3888,12 @@
push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT)));
SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
&IdxVec[0], IdxVec.size());
+ SDOperand Vec = Op.getOperand(0);
Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(),
Vec, DAG.getNode(ISD::UNDEF, Vec.getValueType()), Mask);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec,
DAG.getConstant(0, getPointerTy()));
} else if (MVT::getSizeInBits(VT) == 64) {
- SDOperand Vec = Op.getOperand(0);
unsigned Idx = cast(Op.getOperand(1))->getValue();
if (Idx == 0)
return Op;
@@ -3706,6 +3908,7 @@
push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT)));
SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
&IdxVec[0], IdxVec.size());
+ SDOperand Vec = Op.getOperand(0);
Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(),
Vec, DAG.getNode(ISD::UNDEF, Vec.getValueType()), Mask);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec,
Modified: llvm/trunk/test/CodeGen/X86/vec_shuffle-12.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_shuffle-12.ll?rev=44836&r1=44835&r2=44836&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vec_shuffle-12.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vec_shuffle-12.ll Mon Dec 10 19:46:18 2007
@@ -1,37 +1,28 @@
; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2
; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | not grep punpck
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep pextrw | count 7
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep pinsrw | count 7
-; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep pshuf | count 2
+; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep pextrw | count 4
+; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep pinsrw | count 6
+; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep pshuflw | count 3
+; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep pshufhw | count 2
-define void @t1(<8 x i16>* %res, <8 x i16>* %A, <8 x i16>* %B) {
+define <8 x i16> @t1(<8 x i16>* %A, <8 x i16>* %B) {
%tmp1 = load <8 x i16>* %A
%tmp2 = load <8 x i16>* %B
%tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> < i32 8, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7 >
- store <8 x i16> %tmp3, <8 x i16>* %res
- ret void
+ ret <8 x i16> %tmp3
}
-define void @t2(<8 x i16>* %res, <8 x i16>* %A, <8 x i16>* %B) {
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i16>* %B
- %tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> < i32 8, i32 1, i32 2, i32 13, i32 4, i32 5, i32 6, i32 7 >
- store <8 x i16> %tmp3, <8 x i16>* %res
- ret void
+define <8 x i16> @t2(<8 x i16> %A, <8 x i16> %B) {
+ %tmp = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> < i32 9, i32 1, i32 2, i32 9, i32 4, i32 5, i32 6, i32 7 >
+ ret <8 x i16> %tmp
}
-define void @t3(<8 x i16>* %res, <8 x i16>* %A, <8 x i16>* %B) {
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i16>* %B
- %tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> < i32 8, i32 3, i32 2, i32 13, i32 7, i32 6, i32 5, i32 4 >
- store <8 x i16> %tmp3, <8 x i16>* %res
- ret void
+define <8 x i16> @t3(<8 x i16> %A, <8 x i16> %B) {
+ %tmp = shufflevector <8 x i16> %A, <8 x i16> %A, <8 x i32> < i32 8, i32 3, i32 2, i32 13, i32 7, i32 6, i32 5, i32 4 >
+ ret <8 x i16> %tmp
}
-define void @t4(<8 x i16>* %res, <8 x i16>* %A, <8 x i16>* %B) {
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i16>* %B
- %tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> < i32 8, i32 9, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7 >
- store <8 x i16> %tmp3, <8 x i16>* %res
- ret void
+define <8 x i16> @t4(<8 x i16> %A, <8 x i16> %B) {
+ %tmp = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> < i32 0, i32 7, i32 2, i32 3, i32 1, i32 5, i32 6, i32 5 >
+ ret <8 x i16> %tmp
}
Added: llvm/trunk/test/CodeGen/X86/vec_shuffle-13.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_shuffle-13.ll?rev=44836&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vec_shuffle-13.ll (added)
+++ llvm/trunk/test/CodeGen/X86/vec_shuffle-13.ll Mon Dec 10 19:46:18 2007
@@ -0,0 +1,21 @@
+; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2
+; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep movlhps | count 1
+; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep movss | count 1
+; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep pshufd | count 1
+; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep pshuflw | count 1
+; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep pshufhw | count 1
+
+define <8 x i16> @t1(<8 x i16> %A, <8 x i16> %B) {
+ %tmp = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> < i32 8, i32 9, i32 0, i32 1, i32 10, i32 11, i32 2, i32 3 >
+ ret <8 x i16> %tmp
+}
+
+define <8 x i16> @t2(<8 x i16> %A, <8 x i16> %B) {
+ %tmp = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> < i32 8, i32 9, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7 >
+ ret <8 x i16> %tmp
+}
+
+define <8 x i16> @t3(<8 x i16> %A, <8 x i16> %B) {
+ %tmp = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> < i32 0, i32 0, i32 3, i32 2, i32 4, i32 6, i32 4, i32 7 >
+ ret <8 x i16> %tmp
+}
From evan.cheng at apple.com Mon Dec 10 20:08:35 2007
From: evan.cheng at apple.com (Evan Cheng)
Date: Tue, 11 Dec 2007 02:08:35 -0000
Subject: [llvm-commits] [llvm] r44837 -
/llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
Message-ID: <200712110208.lBB28ZQa022968@zion.cs.uiuc.edu>
Author: evancheng
Date: Mon Dec 10 20:08:35 2007
New Revision: 44837
URL: http://llvm.org/viewvc/llvm-project?rev=44837&view=rev
Log:
Pretty print shuffle mask operand.
Modified:
llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
Modified: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp?rev=44837&r1=44836&r2=44837&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp Mon Dec 10 20:08:35 2007
@@ -3855,6 +3855,19 @@
cerr << ":" << RN;
}
+ if (!isTargetOpcode() && getOpcode() == ISD::VECTOR_SHUFFLE) {
+ SDNode *Mask = getOperand(2).Val;
+ cerr << "<";
+ for (unsigned i = 0, e = Mask->getNumOperands(); i != e; ++i) {
+ if (i) cerr << ",";
+ if (Mask->getOperand(i).getOpcode() == ISD::UNDEF)
+ cerr << "u";
+ else
+ cerr << cast(Mask->getOperand(i))->getValue();
+ }
+ cerr << ">";
+ }
+
if (const ConstantSDNode *CSDN = dyn_cast(this)) {
cerr << "<" << CSDN->getValue() << ">";
} else if (const ConstantFPSDNode *CSDN = dyn_cast(this)) {
From evan.cheng at apple.com Mon Dec 10 20:09:24 2007
From: evan.cheng at apple.com (Evan Cheng)
Date: Tue, 11 Dec 2007 02:09:24 -0000
Subject: [llvm-commits] [llvm] r44838 - in /llvm/trunk:
include/llvm/CodeGen/LiveIntervalAnalysis.h
lib/CodeGen/LiveIntervalAnalysis.cpp lib/CodeGen/RegAllocLinearScan.cpp
lib/CodeGen/SimpleRegisterCoalescing.cpp
lib/CodeGen/SimpleRegisterCoalescing.h
Message-ID: <200712110209.lBB29OCo023089@zion.cs.uiuc.edu>
Author: evancheng
Date: Mon Dec 10 20:09:15 2007
New Revision: 44838
URL: http://llvm.org/viewvc/llvm-project?rev=44838&view=rev
Log:
Switch over to MachineLoopInfo.
Modified:
llvm/trunk/include/llvm/CodeGen/LiveIntervalAnalysis.h
llvm/trunk/lib/CodeGen/LiveIntervalAnalysis.cpp
llvm/trunk/lib/CodeGen/RegAllocLinearScan.cpp
llvm/trunk/lib/CodeGen/SimpleRegisterCoalescing.cpp
llvm/trunk/lib/CodeGen/SimpleRegisterCoalescing.h
Modified: llvm/trunk/include/llvm/CodeGen/LiveIntervalAnalysis.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/LiveIntervalAnalysis.h?rev=44838&r1=44837&r2=44838&view=diff
==============================================================================
--- llvm/trunk/include/llvm/CodeGen/LiveIntervalAnalysis.h (original)
+++ llvm/trunk/include/llvm/CodeGen/LiveIntervalAnalysis.h Mon Dec 10 20:09:15 2007
@@ -32,7 +32,7 @@
namespace llvm {
class LiveVariables;
- class LoopInfo;
+ class MachineLoopInfo;
class MRegisterInfo;
class SSARegMap;
class TargetInstrInfo;
@@ -231,7 +231,7 @@
/// the given interval.
std::vector
addIntervalsForSpills(const LiveInterval& i,
- const LoopInfo *loopInfo, VirtRegMap& vrm);
+ const MachineLoopInfo *loopInfo, VirtRegMap& vrm);
/// isReMaterializable - Returns true if every definition of MI of every
/// val# of the specified interval is re-materializable. Also returns true
@@ -321,7 +321,8 @@
bool isLoad, bool isLoadSS, bool DefIsReMat, bool CanDelete,
VirtRegMap &vrm, SSARegMap *RegMap, const TargetRegisterClass* rc,
SmallVector &ReMatIds,
- unsigned &NewVReg, bool &HasDef, bool &HasUse, const LoopInfo *loopInfo,
+ unsigned &NewVReg, bool &HasDef, bool &HasUse,
+ const MachineLoopInfo *loopInfo,
std::map &MBBVRegsMap,
std::vector &NewLIs);
void rewriteInstructionsForSpills(const LiveInterval &li, bool TrySplit,
@@ -329,7 +330,7 @@
MachineInstr *OrigDefMI, MachineInstr *DefMI, unsigned Slot, int LdSlot,
bool isLoad, bool isLoadSS, bool DefIsReMat, bool CanDelete,
VirtRegMap &vrm, SSARegMap *RegMap, const TargetRegisterClass* rc,
- SmallVector &ReMatIds, const LoopInfo *loopInfo,
+ SmallVector &ReMatIds, const MachineLoopInfo *loopInfo,
BitVector &SpillMBBs,
std::map > &SpillIdxes,
BitVector &RestoreMBBs,
Modified: llvm/trunk/lib/CodeGen/LiveIntervalAnalysis.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/LiveIntervalAnalysis.cpp?rev=44838&r1=44837&r2=44838&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/LiveIntervalAnalysis.cpp (original)
+++ llvm/trunk/lib/CodeGen/LiveIntervalAnalysis.cpp Mon Dec 10 20:09:15 2007
@@ -19,10 +19,10 @@
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
#include "VirtRegMap.h"
#include "llvm/Value.h"
-#include "llvm/Analysis/LoopInfo.h"
#include "llvm/CodeGen/LiveVariables.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/SSARegMap.h"
#include "llvm/Target/MRegisterInfo.h"
@@ -765,7 +765,7 @@
const TargetRegisterClass* rc,
SmallVector &ReMatIds,
unsigned &NewVReg, bool &HasDef, bool &HasUse,
- const LoopInfo *loopInfo,
+ const MachineLoopInfo *loopInfo,
std::map &MBBVRegsMap,
std::vector &NewLIs) {
bool CanFold = false;
@@ -962,7 +962,7 @@
VirtRegMap &vrm, SSARegMap *RegMap,
const TargetRegisterClass* rc,
SmallVector &ReMatIds,
- const LoopInfo *loopInfo,
+ const MachineLoopInfo *loopInfo,
BitVector &SpillMBBs,
std::map > &SpillIdxes,
BitVector &RestoreMBBs,
@@ -1119,7 +1119,7 @@
}
// Update spill weight.
- unsigned loopDepth = loopInfo->getLoopDepth(MBB->getBasicBlock());
+ unsigned loopDepth = loopInfo->getLoopDepth(MBB);
nI.weight += getSpillWeight(HasDef, HasUse, loopDepth);
}
@@ -1158,7 +1158,7 @@
std::vector LiveIntervals::
addIntervalsForSpills(const LiveInterval &li,
- const LoopInfo *loopInfo, VirtRegMap &vrm) {
+ const MachineLoopInfo *loopInfo, VirtRegMap &vrm) {
// Since this is called after the analysis is done we don't know if
// LiveVariables is available
lv_ = getAnalysisToUpdate();
Modified: llvm/trunk/lib/CodeGen/RegAllocLinearScan.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/RegAllocLinearScan.cpp?rev=44838&r1=44837&r2=44838&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/RegAllocLinearScan.cpp (original)
+++ llvm/trunk/lib/CodeGen/RegAllocLinearScan.cpp Mon Dec 10 20:09:15 2007
@@ -16,9 +16,9 @@
#include "PhysRegTracker.h"
#include "VirtRegMap.h"
#include "llvm/Function.h"
-#include "llvm/Analysis/LoopInfo.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/RegAllocRegistry.h"
#include "llvm/CodeGen/RegisterCoalescer.h"
@@ -67,7 +67,7 @@
SSARegMap *regmap_;
BitVector allocatableRegs_;
LiveIntervals* li_;
- const LoopInfo *loopInfo;
+ const MachineLoopInfo *loopInfo;
/// handled_ - Intervals are added to the handled_ set in the order of their
/// start value. This is uses for backtracking.
@@ -103,7 +103,7 @@
// Make sure PassManager knows which analyses to make available
// to coalescing and which analyses coalescing invalidates.
AU.addRequiredTransitive();
- AU.addRequired();
+ AU.addRequired();
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -254,7 +254,7 @@
regmap_ = mf_->getSSARegMap();
allocatableRegs_ = mri_->getAllocatableSet(fn);
li_ = &getAnalysis();
- loopInfo = &getAnalysis();
+ loopInfo = &getAnalysis();
// We don't run the coalescer here because we have no reason to
// interact with it. If the coalescer requires interaction, it
Modified: llvm/trunk/lib/CodeGen/SimpleRegisterCoalescing.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SimpleRegisterCoalescing.cpp?rev=44838&r1=44837&r2=44838&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SimpleRegisterCoalescing.cpp (original)
+++ llvm/trunk/lib/CodeGen/SimpleRegisterCoalescing.cpp Mon Dec 10 20:09:15 2007
@@ -17,10 +17,10 @@
#include "VirtRegMap.h"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
#include "llvm/Value.h"
-#include "llvm/Analysis/LoopInfo.h"
#include "llvm/CodeGen/LiveVariables.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/SSARegMap.h"
#include "llvm/CodeGen/RegisterCoalescer.h"
@@ -72,7 +72,7 @@
AU.addPreservedID(TwoAddressInstructionPassID);
AU.addRequired();
AU.addRequired();
- AU.addRequired();
+ AU.addRequired();
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -207,11 +207,10 @@
bool SimpleRegisterCoalescing::isBackEdgeCopy(MachineInstr *CopyMI,
unsigned DstReg) {
MachineBasicBlock *MBB = CopyMI->getParent();
- const BasicBlock *BB = MBB->getBasicBlock();
- const Loop *L = loopInfo->getLoopFor(BB);
+ const MachineLoop *L = loopInfo->getLoopFor(MBB);
if (!L)
return false;
- if (BB != L->getLoopLatch())
+ if (MBB != L->getLoopLatch())
return false;
DstReg = rep(DstReg);
@@ -540,8 +539,7 @@
unsigned SrcReg, DstReg;
if (CopyMI && tii_->isMoveInstr(*CopyMI, SrcReg, DstReg) &&
JoinedCopies.count(CopyMI) == 0) {
- unsigned LoopDepth =
- loopInfo->getLoopDepth(CopyMI->getParent()->getBasicBlock());
+ unsigned LoopDepth = loopInfo->getLoopDepth(CopyMI->getParent());
JoinQueue->push(CopyRec(CopyMI, SrcReg, DstReg, LoopDepth,
isBackEdgeCopy(CopyMI, DstReg)));
}
@@ -1072,7 +1070,7 @@
std::vector VirtCopies;
std::vector PhysCopies;
- unsigned LoopDepth = loopInfo->getLoopDepth(MBB->getBasicBlock());
+ unsigned LoopDepth = loopInfo->getLoopDepth(MBB);
for (MachineBasicBlock::iterator MII = MBB->begin(), E = MBB->end();
MII != E;) {
MachineInstr *Inst = MII++;
@@ -1143,9 +1141,10 @@
// Join intervals in the function prolog first. We want to join physical
// registers with virtual registers before the intervals got too long.
std::vector > MBBs;
- for (MachineFunction::iterator I = mf_->begin(), E = mf_->end(); I != E;++I)
- MBBs.push_back(std::make_pair(loopInfo->
- getLoopDepth(I->getBasicBlock()), I));
+ for (MachineFunction::iterator I = mf_->begin(), E = mf_->end();I != E;++I){
+ MachineBasicBlock *MBB = I;
+ MBBs.push_back(std::make_pair(loopInfo->getLoopDepth(MBB), I));
+ }
// Sort by loop depth.
std::sort(MBBs.begin(), MBBs.end(), DepthMBBCompare());
@@ -1380,7 +1379,7 @@
tii_ = tm_->getInstrInfo();
li_ = &getAnalysis();
lv_ = &getAnalysis();
- loopInfo = &getAnalysis();
+ loopInfo = &getAnalysis();
DOUT << "********** SIMPLE REGISTER COALESCING **********\n"
<< "********** Function: "
@@ -1427,7 +1426,7 @@
for (MachineFunction::iterator mbbi = mf_->begin(), mbbe = mf_->end();
mbbi != mbbe; ++mbbi) {
MachineBasicBlock* mbb = mbbi;
- unsigned loopDepth = loopInfo->getLoopDepth(mbb->getBasicBlock());
+ unsigned loopDepth = loopInfo->getLoopDepth(mbb);
for (MachineBasicBlock::iterator mii = mbb->begin(), mie = mbb->end();
mii != mie; ) {
Modified: llvm/trunk/lib/CodeGen/SimpleRegisterCoalescing.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SimpleRegisterCoalescing.h?rev=44838&r1=44837&r2=44838&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SimpleRegisterCoalescing.h (original)
+++ llvm/trunk/lib/CodeGen/SimpleRegisterCoalescing.h Mon Dec 10 20:09:15 2007
@@ -28,7 +28,7 @@
class MRegisterInfo;
class TargetInstrInfo;
class VirtRegMap;
- class LoopInfo;
+ class MachineLoopInfo;
/// CopyRec - Representation for copy instructions in coalescer queue.
///
@@ -84,7 +84,7 @@
const TargetInstrInfo* tii_;
LiveIntervals *li_;
LiveVariables *lv_;
- const LoopInfo* loopInfo;
+ const MachineLoopInfo* loopInfo;
BitVector allocatableRegs_;
DenseMap allocatableRCRegs_;
From rspencer at reidspencer.com Mon Dec 10 23:27:47 2007
From: rspencer at reidspencer.com (Reid Spencer)
Date: Tue, 11 Dec 2007 05:27:47 -0000
Subject: [llvm-commits] [support] r44846 -
/support/trunk/autoconf/m4/want_level.m4
Message-ID: <200712110527.lBB5RllS004837@zion.cs.uiuc.edu>
Author: reid
Date: Mon Dec 10 23:27:47 2007
New Revision: 44846
URL: http://llvm.org/viewvc/llvm-project?rev=44846&view=rev
Log:
Allow some additional values for levels that are non-numeric.
Modified:
support/trunk/autoconf/m4/want_level.m4
Modified: support/trunk/autoconf/m4/want_level.m4
URL: http://llvm.org/viewvc/llvm-project/support/trunk/autoconf/m4/want_level.m4?rev=44846&r1=44845&r2=44846&view=diff
==============================================================================
--- support/trunk/autoconf/m4/want_level.m4 (original)
+++ support/trunk/autoconf/m4/want_level.m4 Mon Dec 10 23:27:47 2007
@@ -9,6 +9,12 @@
m4_define([allcapsname],translit($1,a-z-,A-Z_))
AC_ARG_ENABLE([$1],
AS_HELP_STRING([--enable-$1],[$2 ($3)]),,enableval="$3")
+ case "$enableval" in
+ yes) enableval="1" ;;
+ no) enableval="0" ;;
+ [0-9]*) ;;
+ *) enableval="0" ;;
+ esac
digits=`echo "$enableval" | sed 's/[^0-9]//'`
if test -z "$digits" ; then
AC_MSG_ERROR([Expected numeric value for --enable-$1.])
From rspencer at reidspencer.com Mon Dec 10 23:33:05 2007
From: rspencer at reidspencer.com (Reid Spencer)
Date: Tue, 11 Dec 2007 05:33:05 -0000
Subject: [llvm-commits] [support] r44847 - /support/trunk/include/llvm/ADT/
Message-ID: <200712110533.lBB5X5MM005131@zion.cs.uiuc.edu>
Author: reid
Date: Mon Dec 10 23:33:05 2007
New Revision: 44847
URL: http://llvm.org/viewvc/llvm-project?rev=44847&view=rev
Log:
Prepare for update by deleting everything.
Removed:
support/trunk/include/llvm/ADT/
From rspencer at reidspencer.com Mon Dec 10 23:36:40 2007
From: rspencer at reidspencer.com (Reid Spencer)
Date: Tue, 11 Dec 2007 05:36:40 -0000
Subject: [llvm-commits] [support] r44848 - /support/trunk/include/llvm/ADT/
Message-ID: <200712110536.lBB5aefd005266@zion.cs.uiuc.edu>
Author: reid
Date: Mon Dec 10 23:36:40 2007
New Revision: 44848
URL: http://llvm.org/viewvc/llvm-project?rev=44848&view=rev
Log:
Add latest ADT header from llvm module to support module.
Added:
support/trunk/include/llvm/ADT/
- copied from r44847, llvm/trunk/include/llvm/ADT/
From sabre at nondot.org Mon Dec 10 23:55:02 2007
From: sabre at nondot.org (Chris Lattner)
Date: Tue, 11 Dec 2007 05:55:02 -0000
Subject: [llvm-commits] [llvm] r44849 -
/llvm/trunk/lib/VMCore/ConstantFold.cpp
Message-ID: <200712110555.lBB5t2dT006304@zion.cs.uiuc.edu>
Author: lattner
Date: Mon Dec 10 23:55:02 2007
New Revision: 44849
URL: http://llvm.org/viewvc/llvm-project?rev=44849&view=rev
Log:
refactor some code, no functionality change.
Modified:
llvm/trunk/lib/VMCore/ConstantFold.cpp
Modified: llvm/trunk/lib/VMCore/ConstantFold.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/VMCore/ConstantFold.cpp?rev=44849&r1=44848&r2=44849&view=diff
==============================================================================
--- llvm/trunk/lib/VMCore/ConstantFold.cpp (original)
+++ llvm/trunk/lib/VMCore/ConstantFold.cpp Mon Dec 10 23:55:02 2007
@@ -138,6 +138,101 @@
Type::Int64Ty);
}
+static Constant *FoldBitCast(Constant *V, const Type *DestTy) {
+ const Type *SrcTy = V->getType();
+ if (SrcTy == DestTy)
+ return V; // no-op cast
+
+ // Check to see if we are casting a pointer to an aggregate to a pointer to
+ // the first element. If so, return the appropriate GEP instruction.
+ if (const PointerType *PTy = dyn_cast(V->getType()))
+ if (const PointerType *DPTy = dyn_cast(DestTy)) {
+ SmallVector IdxList;
+ IdxList.push_back(Constant::getNullValue(Type::Int32Ty));
+ const Type *ElTy = PTy->getElementType();
+ while (ElTy != DPTy->getElementType()) {
+ if (const StructType *STy = dyn_cast(ElTy)) {
+ if (STy->getNumElements() == 0) break;
+ ElTy = STy->getElementType(0);
+ IdxList.push_back(Constant::getNullValue(Type::Int32Ty));
+ } else if (const SequentialType *STy = dyn_cast(ElTy)) {
+ if (isa(ElTy)) break; // Can't index into pointers!
+ ElTy = STy->getElementType();
+ IdxList.push_back(IdxList[0]);
+ } else {
+ break;
+ }
+ }
+
+ if (ElTy == DPTy->getElementType())
+ return ConstantExpr::getGetElementPtr(V, &IdxList[0], IdxList.size());
+ }
+
+ // Handle casts from one vector constant to another. We know that the src
+ // and dest type have the same size (otherwise its an illegal cast).
+ if (const VectorType *DestPTy = dyn_cast(DestTy)) {
+ if (const VectorType *SrcTy = dyn_cast(V->getType())) {
+ assert(DestPTy->getBitWidth() == SrcTy->getBitWidth() &&
+ "Not cast between same sized vectors!");
+ // First, check for null. Undef is already handled.
+ if (isa(V))
+ return Constant::getNullValue(DestTy);
+
+ if (const ConstantVector *CV = dyn_cast(V)) {
+ // This is a cast from a ConstantVector of one type to a
+ // ConstantVector of another type. Check to see if all elements of
+ // the input are simple.
+ bool AllSimpleConstants = true;
+ for (unsigned i = 0, e = CV->getNumOperands(); i != e; ++i) {
+ if (!isa(CV->getOperand(i)) &&
+ !isa(CV->getOperand(i))) {
+ AllSimpleConstants = false;
+ break;
+ }
+ }
+
+ // If all of the elements are simple constants, we can fold this.
+ if (AllSimpleConstants)
+ return CastConstantVector(const_cast(CV), DestPTy);
+ }
+ }
+ }
+
+ // Finally, implement bitcast folding now. The code below doesn't handle
+ // bitcast right.
+ if (isa(V)) // ptr->ptr cast.
+ return ConstantPointerNull::get(cast(DestTy));
+
+ // Handle integral constant input.
+ if (const ConstantInt *CI = dyn_cast(V)) {
+ if (DestTy->isInteger())
+ // Integral -> Integral. This is a no-op because the bit widths must
+ // be the same. Consequently, we just fold to V.
+ return V;
+
+ if (DestTy->isFloatingPoint()) {
+ assert((DestTy == Type::DoubleTy || DestTy == Type::FloatTy) &&
+ "Unknown FP type!");
+ return ConstantFP::get(DestTy, APFloat(CI->getValue()));
+ }
+ // Otherwise, can't fold this (vector?)
+ return 0;
+ }
+
+ // Handle ConstantFP input.
+ if (const ConstantFP *FP = dyn_cast(V)) {
+ // FP -> Integral.
+ if (DestTy == Type::Int32Ty) {
+ return ConstantInt::get(FP->getValueAPF().convertToAPInt());
+ } else {
+ assert(DestTy == Type::Int64Ty && "only support f32/f64 for now!");
+ return ConstantInt::get(FP->getValueAPF().convertToAPInt());
+ }
+ }
+ return 0;
+}
+
+
Constant *llvm::ConstantFoldCastInstruction(unsigned opc, const Constant *V,
const Type *DestTy) {
const Type *SrcTy = V->getType();
@@ -268,100 +363,7 @@
}
return 0;
case Instruction::BitCast:
- if (SrcTy == DestTy)
- return (Constant*)V; // no-op cast
-
- // Check to see if we are casting a pointer to an aggregate to a pointer to
- // the first element. If so, return the appropriate GEP instruction.
- if (const PointerType *PTy = dyn_cast(V->getType()))
- if (const PointerType *DPTy = dyn_cast(DestTy)) {
- SmallVector IdxList;
- IdxList.push_back(Constant::getNullValue(Type::Int32Ty));
- const Type *ElTy = PTy->getElementType();
- while (ElTy != DPTy->getElementType()) {
- if (const StructType *STy = dyn_cast(ElTy)) {
- if (STy->getNumElements() == 0) break;
- ElTy = STy->getElementType(0);
- IdxList.push_back(Constant::getNullValue(Type::Int32Ty));
- } else if (const SequentialType *STy =
- dyn_cast(ElTy)) {
- if (isa(ElTy)) break; // Can't index into pointers!
- ElTy = STy->getElementType();
- IdxList.push_back(IdxList[0]);
- } else {
- break;
- }
- }
-
- if (ElTy == DPTy->getElementType())
- return ConstantExpr::getGetElementPtr(
- const_cast(V), &IdxList[0], IdxList.size());
- }
-
- // Handle casts from one vector constant to another. We know that the src
- // and dest type have the same size (otherwise its an illegal cast).
- if (const VectorType *DestPTy = dyn_cast(DestTy)) {
- if (const VectorType *SrcTy = dyn_cast(V->getType())) {
- assert(DestPTy->getBitWidth() == SrcTy->getBitWidth() &&
- "Not cast between same sized vectors!");
- // First, check for null and undef
- if (isa(V))
- return Constant::getNullValue(DestTy);
- if (isa(V))
- return UndefValue::get(DestTy);
-
- if (const ConstantVector *CV = dyn_cast(V)) {
- // This is a cast from a ConstantVector of one type to a
- // ConstantVector of another type. Check to see if all elements of
- // the input are simple.
- bool AllSimpleConstants = true;
- for (unsigned i = 0, e = CV->getNumOperands(); i != e; ++i) {
- if (!isa(CV->getOperand(i)) &&
- !isa(CV->getOperand(i))) {
- AllSimpleConstants = false;
- break;
- }
- }
-
- // If all of the elements are simple constants, we can fold this.
- if (AllSimpleConstants)
- return CastConstantVector(const_cast(CV), DestPTy);
- }
- }
- }
-
- // Finally, implement bitcast folding now. The code below doesn't handle
- // bitcast right.
- if (isa(V)) // ptr->ptr cast.
- return ConstantPointerNull::get(cast(DestTy));
-
- // Handle integral constant input.
- if (const ConstantInt *CI = dyn_cast(V)) {
- if (DestTy->isInteger())
- // Integral -> Integral. This is a no-op because the bit widths must
- // be the same. Consequently, we just fold to V.
- return const_cast(V);
-
- if (DestTy->isFloatingPoint()) {
- assert((DestTy == Type::DoubleTy || DestTy == Type::FloatTy) &&
- "Unknown FP type!");
- return ConstantFP::get(DestTy, APFloat(CI->getValue()));
- }
- // Otherwise, can't fold this (vector?)
- return 0;
- }
-
- // Handle ConstantFP input.
- if (const ConstantFP *FP = dyn_cast(V)) {
- // FP -> Integral.
- if (DestTy == Type::Int32Ty) {
- return ConstantInt::get(FP->getValueAPF().convertToAPInt());
- } else {
- assert(DestTy == Type::Int64Ty && "only support f32/f64 for now!");
- return ConstantInt::get(FP->getValueAPF().convertToAPInt());
- }
- }
- return 0;
+ return FoldBitCast(const_cast(V), DestTy);
default:
assert(!"Invalid CE CastInst opcode");
break;
From sabre at nondot.org Tue Dec 11 00:07:40 2007
From: sabre at nondot.org (Chris Lattner)
Date: Tue, 11 Dec 2007 06:07:40 -0000
Subject: [llvm-commits] [llvm] r44850 -
/llvm/trunk/lib/VMCore/ConstantFold.cpp
Message-ID: <200712110607.lBB67ew1006983@zion.cs.uiuc.edu>
Author: lattner
Date: Tue Dec 11 00:07:39 2007
New Revision: 44850
URL: http://llvm.org/viewvc/llvm-project?rev=44850&view=rev
Log:
significantly simplify some code, no functionality change.
Modified:
llvm/trunk/lib/VMCore/ConstantFold.cpp
Modified: llvm/trunk/lib/VMCore/ConstantFold.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/VMCore/ConstantFold.cpp?rev=44850&r1=44849&r2=44850&view=diff
==============================================================================
--- llvm/trunk/lib/VMCore/ConstantFold.cpp (original)
+++ llvm/trunk/lib/VMCore/ConstantFold.cpp Tue Dec 11 00:07:39 2007
@@ -36,81 +36,31 @@
// ConstantFold*Instruction Implementations
//===----------------------------------------------------------------------===//
-/// CastConstantVector - Convert the specified ConstantVector node to the
+/// BitCastConstantVector - Convert the specified ConstantVector node to the
/// specified vector type. At this point, we know that the elements of the
/// input vector constant are all simple integer or FP values.
-static Constant *CastConstantVector(ConstantVector *CV,
- const VectorType *DstTy) {
- unsigned SrcNumElts = CV->getType()->getNumElements();
- unsigned DstNumElts = DstTy->getNumElements();
- const Type *SrcEltTy = CV->getType()->getElementType();
- const Type *DstEltTy = DstTy->getElementType();
+static Constant *BitCastConstantVector(ConstantVector *CV,
+ const VectorType *DstTy) {
+ // If this cast changes element count then we can't handle it here:
+ // doing so requires endianness information. This should be handled by
+ // Analysis/ConstantFolding.cpp
+ unsigned NumElts = DstTy->getNumElements();
+ if (NumElts != CV->getNumOperands())
+ return 0;
- // If both vectors have the same number of elements (thus, the elements
- // are the same size), perform the conversion now.
- if (SrcNumElts == DstNumElts) {
- std::vector Result;
-
- // If the src and dest elements are both integers, or both floats, we can
- // just BitCast each element because the elements are the same size.
- if ((SrcEltTy->isInteger() && DstEltTy->isInteger()) ||
- (SrcEltTy->isFloatingPoint() && DstEltTy->isFloatingPoint())) {
- for (unsigned i = 0; i != SrcNumElts; ++i)
- Result.push_back(
- ConstantExpr::getBitCast(CV->getOperand(i), DstEltTy));
- return ConstantVector::get(Result);
- }
-
- // If this is an int-to-fp cast ..
- if (SrcEltTy->isInteger()) {
- // Ensure that it is int-to-fp cast
- assert(DstEltTy->isFloatingPoint());
- if (DstEltTy->getTypeID() == Type::DoubleTyID) {
- for (unsigned i = 0; i != SrcNumElts; ++i) {
- ConstantInt *CI = cast(CV->getOperand(i));
- double V = CI->getValue().bitsToDouble();
- Result.push_back(ConstantFP::get(Type::DoubleTy, APFloat(V)));
- }
- return ConstantVector::get(Result);
- }
- assert(DstEltTy == Type::FloatTy && "Unknown fp type!");
- for (unsigned i = 0; i != SrcNumElts; ++i) {
- ConstantInt *CI = cast(CV->getOperand(i));
- float V = CI->getValue().bitsToFloat();
- Result.push_back(ConstantFP::get(Type::FloatTy, APFloat(V)));
- }
- return ConstantVector::get(Result);
- }
-
- // Otherwise, this is an fp-to-int cast.
- assert(SrcEltTy->isFloatingPoint() && DstEltTy->isInteger());
-
- if (SrcEltTy->getTypeID() == Type::DoubleTyID) {
- for (unsigned i = 0; i != SrcNumElts; ++i) {
- uint64_t V = cast(CV->getOperand(i))->
- getValueAPF().convertToAPInt().getZExtValue();
- Constant *C = ConstantInt::get(Type::Int64Ty, V);
- Result.push_back(ConstantExpr::getBitCast(C, DstEltTy ));
- }
- return ConstantVector::get(Result);
- }
-
- assert(SrcEltTy->getTypeID() == Type::FloatTyID);
- for (unsigned i = 0; i != SrcNumElts; ++i) {
- uint32_t V = (uint32_t)cast(CV->getOperand(i))->
- getValueAPF().convertToAPInt().getZExtValue();
- Constant *C = ConstantInt::get(Type::Int32Ty, V);
- Result.push_back(ConstantExpr::getBitCast(C, DstEltTy));
- }
- return ConstantVector::get(Result);
+ // Check to verify that all elements of the input are simple.
+ for (unsigned i = 0; i != NumElts; ++i) {
+ if (!isa(CV->getOperand(i)) &&
+ !isa(CV->getOperand(i)))
+ return 0;
}
-
- // Otherwise, this is a cast that changes element count and size. Handle
- // casts which shrink the elements here.
-
- // FIXME: We need to know endianness to do this!
-
- return 0;
+
+ // Bitcast each element now.
+ std::vector Result;
+ const Type *DstEltTy = DstTy->getElementType();
+ for (unsigned i = 0; i != NumElts; ++i)
+ Result.push_back(ConstantExpr::getBitCast(CV->getOperand(i), DstEltTy));
+ return ConstantVector::get(Result);
}
/// This function determines which opcode to use to fold two constant cast
@@ -178,23 +128,8 @@
if (isa(V))
return Constant::getNullValue(DestTy);
- if (const ConstantVector *CV = dyn_cast(V)) {
- // This is a cast from a ConstantVector of one type to a
- // ConstantVector of another type. Check to see if all elements of
- // the input are simple.
- bool AllSimpleConstants = true;
- for (unsigned i = 0, e = CV->getNumOperands(); i != e; ++i) {
- if (!isa(CV->getOperand(i)) &&
- !isa(CV->getOperand(i))) {
- AllSimpleConstants = false;
- break;
- }
- }
-
- // If all of the elements are simple constants, we can fold this.
- if (AllSimpleConstants)
- return CastConstantVector(const_cast(CV), DestPTy);
- }
+ if (ConstantVector *CV = dyn_cast(V))
+ return BitCastConstantVector(CV, DestPTy);
}
}
From asl at math.spbu.ru Tue Dec 11 00:53:44 2007
From: asl at math.spbu.ru (Anton Korobeynikov)
Date: Tue, 11 Dec 2007 06:53:44 -0000
Subject: [llvm-commits] [llvm] r44851 - /llvm/trunk/include/llvm/ADT/Trie.h
Message-ID: <200712110653.lBB6ri8c010340@zion.cs.uiuc.edu>
Author: asl
Date: Tue Dec 11 00:53:44 2007
New Revision: 44851
URL: http://llvm.org/viewvc/llvm-project?rev=44851&view=rev
Log:
Add first and really dirty version of generic Trie structure
Added:
llvm/trunk/include/llvm/ADT/Trie.h
Added: llvm/trunk/include/llvm/ADT/Trie.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/ADT/Trie.h?rev=44851&view=auto
==============================================================================
--- llvm/trunk/include/llvm/ADT/Trie.h (added)
+++ llvm/trunk/include/llvm/ADT/Trie.h Tue Dec 11 00:53:44 2007
@@ -0,0 +1,223 @@
+//===- llvm/ADT/Trie.h ---- Generic trie structure --------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by Anton Korobeynikov and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class defines a generic trie structure. The trie structure
+// is immutable after creation, but the payload contained within it is not.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ADT_TRIE_H
+#define LLVM_ADT_TRIE_H
+
+#include