From craig.topper at gmail.com Mon Apr 9 00:16:56 2012 From: craig.topper at gmail.com (Craig Topper) Date: Mon, 09 Apr 2012 05:16:56 -0000 Subject: [llvm-commits] [llvm] r154305 - /llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Message-ID: <20120409051656.8D6AA2A6C065@llvm.org> Author: ctopper Date: Mon Apr 9 00:16:56 2012 New Revision: 154305 URL: http://llvm.org/viewvc/llvm-project?rev=154305&view=rev Log: Replace some explicit checks with asserts for conditions that should never happen. Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=154305&r1=154304&r2=154305&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original) +++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Mon Apr 9 00:16:56 2012 @@ -7698,8 +7698,7 @@ SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); - assert(N0.getValueType().getVectorNumElements() == NumElts && - "Vector shuffle must be normalized in DAG"); + assert(N0.getValueType() == VT && "Vector shuffle must be normalized in DAG"); // Canonicalize shuffle undef, undef -> undef if (N0.getOpcode() == ISD::UNDEF && N1.getOpcode() == ISD::UNDEF) @@ -7804,27 +7803,21 @@ if (N0.getOperand(1).getOpcode() != ISD::UNDEF) return SDValue(); - // The incoming shuffle must be of the same type as the result of the current - // shuffle. - if (OtherSV->getOperand(0).getValueType() != VT) - return SDValue(); - - EVT InVT = N0.getValueType(); - int InNumElts = InVT.getVectorNumElements(); + // The incoming shuffle must be of the same type as the result of the + // current shuffle. + assert(OtherSV->getOperand(0).getValueType() == VT && + "Shuffle types don't match"); for (unsigned i = 0; i != NumElts; ++i) { int Idx = SVN->getMaskElt(i); - // If we access the second (undef) operand then this index can be - // canonicalized to undef as well. - if (Idx >= InNumElts) - Idx = -1; + assert(Idx < (int)NumElts && "Index references undef operand"); // Next, this index comes from the first value, which is the incoming // shuffle. Adopt the incoming index. if (Idx >= 0) Idx = OtherSV->getMaskElt(Idx); // The combined shuffle must map each index to itself. - if ((unsigned)Idx != i && Idx != -1) + if (Idx >= 0 && (unsigned)Idx != i) return SDValue(); } From isanbard at gmail.com Mon Apr 9 00:26:48 2012 From: isanbard at gmail.com (Bill Wendling) Date: Mon, 09 Apr 2012 05:26:48 -0000 Subject: [llvm-commits] [llvm] r154306 - in /llvm/trunk: include/llvm-c/lto.h tools/lto/LTOCodeGenerator.cpp tools/lto/LTOCodeGenerator.h tools/lto/lto.cpp tools/lto/lto.exports Message-ID: <20120409052648.A1C9C2A6C065@llvm.org> Author: void Date: Mon Apr 9 00:26:48 2012 New Revision: 154306 URL: http://llvm.org/viewvc/llvm-project?rev=154306&view=rev Log: Add a hook to turn on the internalize pass through the LTO interface. Modified: llvm/trunk/include/llvm-c/lto.h llvm/trunk/tools/lto/LTOCodeGenerator.cpp llvm/trunk/tools/lto/LTOCodeGenerator.h llvm/trunk/tools/lto/lto.cpp llvm/trunk/tools/lto/lto.exports Modified: llvm/trunk/include/llvm-c/lto.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm-c/lto.h?rev=154306&r1=154305&r2=154306&view=diff ============================================================================== --- llvm/trunk/include/llvm-c/lto.h (original) +++ llvm/trunk/include/llvm-c/lto.h Mon Apr 9 00:26:48 2012 @@ -251,6 +251,12 @@ int nargs); /** + * Enables the internalize pass during LTO optimizations. + */ +extern void +lto_codegen_whole_program_optimization(lto_code_gen_t cg); + +/** * Adds to a list of all global symbols that must exist in the final * generated code. If a function is not listed, it might be * inlined into every usage and optimized away. @@ -258,7 +264,6 @@ extern void lto_codegen_add_must_preserve_symbol(lto_code_gen_t cg, const char* symbol); - /** * Writes a new object file at the specified path that contains the * merged contents of all modules added so far. @@ -267,7 +272,6 @@ extern bool lto_codegen_write_merged_modules(lto_code_gen_t cg, const char* path); - /** * Generates code for all added modules into one native object file. * On success returns a pointer to a generated mach-o/ELF buffer and Modified: llvm/trunk/tools/lto/LTOCodeGenerator.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/lto/LTOCodeGenerator.cpp?rev=154306&r1=154305&r2=154306&view=diff ============================================================================== --- llvm/trunk/tools/lto/LTOCodeGenerator.cpp (original) +++ llvm/trunk/tools/lto/LTOCodeGenerator.cpp Mon Apr 9 00:26:48 2012 @@ -67,7 +67,7 @@ : _context(getGlobalContext()), _linker("LinkTimeOptimizer", "ld-temp.o", _context), _target(NULL), _emitDwarfDebugInfo(false), _scopeRestrictionsDone(false), - _codeModel(LTO_CODEGEN_PIC_MODEL_DYNAMIC), + _runInternalizePass(false), _codeModel(LTO_CODEGEN_PIC_MODEL_DYNAMIC), _nativeObjectFile(NULL) { InitializeAllTargets(); InitializeAllTargetMCs(); @@ -366,7 +366,8 @@ // Add an appropriate TargetData instance for this module... passes.add(new TargetData(*_target->getTargetData())); - PassManagerBuilder().populateLTOPassManager(passes, /*Internalize=*/ false, + PassManagerBuilder().populateLTOPassManager(passes, + _runInternalizePass, !DisableInline, DisableGVNLoadPRE); Modified: llvm/trunk/tools/lto/LTOCodeGenerator.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/lto/LTOCodeGenerator.h?rev=154306&r1=154305&r2=154306&view=diff ============================================================================== --- llvm/trunk/tools/lto/LTOCodeGenerator.h (original) +++ llvm/trunk/tools/lto/LTOCodeGenerator.h Mon Apr 9 00:26:48 2012 @@ -54,6 +54,8 @@ const void *compile(size_t *length, std::string &errMsg); void setCodeGenDebugOptions(const char *opts); + void enableInternalizePass() { _runInternalizePass = true; } + private: bool generateObjectFile(llvm::raw_ostream &out, std::string &errMsg); void applyScopeRestrictions(); @@ -70,6 +72,7 @@ llvm::TargetMachine* _target; bool _emitDwarfDebugInfo; bool _scopeRestrictionsDone; + bool _runInternalizePass; lto_codegen_model _codeModel; StringSet _mustPreserveSymbols; StringSet _asmUndefinedRefs; Modified: llvm/trunk/tools/lto/lto.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/lto/lto.cpp?rev=154306&r1=154305&r2=154306&view=diff ============================================================================== --- llvm/trunk/tools/lto/lto.cpp (original) +++ llvm/trunk/tools/lto/lto.cpp Mon Apr 9 00:26:48 2012 @@ -183,6 +183,12 @@ cg->addMustPreserveSymbol(symbol); } +/// lto_codegen_whole_program_optimization - Enable the internalize pass during +/// LTO optimizations. +void lto_codegen_whole_program_optimization(lto_code_gen_t cg) { + cg->enableInternalizePass(); +} + /// lto_codegen_write_merged_modules - Writes a new file at the specified path /// that contains the merged contents of all modules added so far. Returns true /// on error (check lto_get_error_message() for details). Modified: llvm/trunk/tools/lto/lto.exports URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/lto/lto.exports?rev=154306&r1=154305&r2=154306&view=diff ============================================================================== --- llvm/trunk/tools/lto/lto.exports (original) +++ llvm/trunk/tools/lto/lto.exports Mon Apr 9 00:26:48 2012 @@ -27,6 +27,7 @@ lto_codegen_set_assembler_path lto_codegen_set_cpu lto_codegen_compile_to_file +lto_codegen_whole_program_optimization LLVMCreateDisasm LLVMDisasmDispose LLVMDisasmInstruction From craig.topper at gmail.com Mon Apr 9 00:55:33 2012 From: craig.topper at gmail.com (Craig Topper) Date: Mon, 09 Apr 2012 05:55:33 -0000 Subject: [llvm-commits] [llvm] r154307 - /llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Message-ID: <20120409055533.689E72A6C065@llvm.org> Author: ctopper Date: Mon Apr 9 00:55:33 2012 New Revision: 154307 URL: http://llvm.org/viewvc/llvm-project?rev=154307&view=rev Log: Optimize code slightly. No functionality change. Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=154307&r1=154306&r2=154307&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original) +++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Mon Apr 9 00:55:33 2012 @@ -7723,12 +7723,13 @@ SmallVector NewMask; for (unsigned i = 0; i != NumElts; ++i) { int Idx = SVN->getMaskElt(i); - if (Idx < 0) - NewMask.push_back(Idx); - else if (Idx < (int)NumElts) - NewMask.push_back(Idx + NumElts); - else - NewMask.push_back(Idx - NumElts); + if (Idx >= 0) { + if (Idx < (int)NumElts) + Idx += NumElts; + else + Idx -= NumElts; + } + NewMask.push_back(Idx); } return DAG.getVectorShuffle(VT, N->getDebugLoc(), N1, DAG.getUNDEF(VT), &NewMask[0]); From craig.topper at gmail.com Mon Apr 9 00:59:53 2012 From: craig.topper at gmail.com (Craig Topper) Date: Mon, 09 Apr 2012 05:59:53 -0000 Subject: [llvm-commits] [llvm] r154308 - /llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Message-ID: <20120409055953.43E012A6C065@llvm.org> Author: ctopper Date: Mon Apr 9 00:59:53 2012 New Revision: 154308 URL: http://llvm.org/viewvc/llvm-project?rev=154308&view=rev Log: Remove unnecessary 'else' on an 'if' that always returns Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=154308&r1=154307&r2=154308&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original) +++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Mon Apr 9 00:59:53 2012 @@ -7900,7 +7900,8 @@ SDValue Elt = RHS.getOperand(i); if (!isa(Elt)) return SDValue(); - else if (cast(Elt)->isAllOnesValue()) + + if (cast(Elt)->isAllOnesValue()) Indices.push_back(i); else if (cast(Elt)->isNullValue()) Indices.push_back(NumElts); From craig.topper at gmail.com Mon Apr 9 02:19:09 2012 From: craig.topper at gmail.com (Craig Topper) Date: Mon, 09 Apr 2012 07:19:09 -0000 Subject: [llvm-commits] [llvm] r154309 - /llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Message-ID: <20120409071909.D59872A6C065@llvm.org> Author: ctopper Date: Mon Apr 9 02:19:09 2012 New Revision: 154309 URL: http://llvm.org/viewvc/llvm-project?rev=154309&view=rev Log: Remove unnecessary type check when combining and/or/xor of swizzles. Move some checks to allow better early out. Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=154309&r1=154308&r2=154309&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original) +++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Mon Apr 9 02:19:09 2012 @@ -2361,18 +2361,16 @@ // The type-legalizer generates this pattern when loading illegal // vector types from memory. In many cases this allows additional shuffle // optimizations. - if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) { + if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG && + N0.getOperand(1).getOpcode() == ISD::UNDEF && + N1.getOperand(1).getOpcode() == ISD::UNDEF) { ShuffleVectorSDNode *SVN0 = cast(N0); ShuffleVectorSDNode *SVN1 = cast(N1); - SDValue In0 = SVN0->getOperand(0); - SDValue In1 = SVN1->getOperand(0); - EVT In0Ty = In0.getValueType(); - EVT In1Ty = In1.getValueType(); + + assert(N0.getOperand(0).getValueType() == N1.getOperand(1).getValueType() && + "Inputs to shuffles are not the same type"); unsigned NumElts = VT.getVectorNumElements(); - // Check that both shuffles are swizzles. - bool SingleVecShuff = (N0.getOperand(1).getOpcode() == ISD::UNDEF && - N1.getOperand(1).getOpcode() == ISD::UNDEF); // Check that both shuffles use the same mask. The masks are known to be of // the same length because the result vector type is the same. @@ -2386,14 +2384,15 @@ } } - if (SameMask && SingleVecShuff && In0Ty == In1Ty) { - SDValue Op = DAG.getNode(N->getOpcode(), N->getDebugLoc(), VT, In0, In1); - SDValue Shuff = DAG.getVectorShuffle(VT, N->getDebugLoc(), Op, - DAG.getUNDEF(VT), &SVN0->getMask()[0]); + if (SameMask) { + SDValue Op = DAG.getNode(N->getOpcode(), N->getDebugLoc(), VT, + N0.getOperand(0), N1.getOperand(0)); AddToWorkList(Op.getNode()); - return Shuff; + return DAG.getVectorShuffle(VT, N->getDebugLoc(), Op, + DAG.getUNDEF(VT), &SVN0->getMask()[0]); } } + return SDValue(); } From nadav.rotem at intel.com Mon Apr 9 02:45:59 2012 From: nadav.rotem at intel.com (Nadav Rotem) Date: Mon, 09 Apr 2012 07:45:59 -0000 Subject: [llvm-commits] [llvm] r154310 - in /llvm/trunk: lib/Target/X86/X86ISelLowering.cpp lib/Target/X86/X86ISelLowering.h test/CodeGen/X86/avx2-vbroadcast.ll Message-ID: <20120409074559.79FFF2A6C066@llvm.org> Author: nadav Date: Mon Apr 9 02:45:58 2012 New Revision: 154310 URL: http://llvm.org/viewvc/llvm-project?rev=154310&view=rev Log: Fix a bug in the lowering of broadcasts: ConstantPools need to use the target pointer type. Move NormalizeVectorShuffle and LowerVectorBroadcast into X86TargetLowering. Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp llvm/trunk/lib/Target/X86/X86ISelLowering.h llvm/trunk/test/CodeGen/X86/avx2-vbroadcast.ll Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=154310&r1=154309&r2=154310&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original) +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Mon Apr 9 02:45:58 2012 @@ -4859,12 +4859,13 @@ /// a scalar load, or a constant. /// The VBROADCAST node is returned when a pattern is found, /// or SDValue() otherwise. -static SDValue LowerVectorBroadcast(SDValue &Op, const X86Subtarget *Subtarget, - DebugLoc &dl, SelectionDAG &DAG) { +SDValue +X86TargetLowering::LowerVectorBroadcast(SDValue &Op, SelectionDAG &DAG) const { if (!Subtarget->hasAVX()) return SDValue(); EVT VT = Op.getValueType(); + DebugLoc dl = Op.getDebugLoc(); SDValue Ld; bool ConstSplatVal; @@ -4905,7 +4906,7 @@ Ld = Sc.getOperand(0); ConstSplatVal = (Ld.getOpcode() == ISD::Constant || - Ld.getOpcode() == ISD::ConstantFP); + Ld.getOpcode() == ISD::ConstantFP); // The scalar_to_vector node and the suspected // load node must have exactly one user. @@ -4930,11 +4931,6 @@ if ((Is256 && (ScalarSize == 32 || ScalarSize == 64)) || (Is128 && (ScalarSize == 32))) { - // This is the type of the load operation for the constant that we save - // in the constant pool. We can't load float values from the constant pool - // because the DAG has to be legal at this stage. - MVT LdTy = (ScalarSize == 32 ? MVT::i32 : MVT::i64); - const Constant *C = 0; if (ConstantSDNode *CI = dyn_cast(Ld)) C = CI->getConstantIntValue(); @@ -4943,14 +4939,12 @@ assert(C && "Invalid constant type"); - SDValue CP = DAG.getConstantPool(C, LdTy); + SDValue CP = DAG.getConstantPool(C, getPointerTy()); unsigned Alignment = cast(CP)->getAlignment(); - Ld = DAG.getLoad(LdTy, dl, DAG.getEntryNode(), CP, + Ld = DAG.getLoad(CVT, dl, DAG.getEntryNode(), CP, MachinePointerInfo::getConstantPool(), false, false, false, Alignment); - // Bitcast the loaded constant back to the requested type. - Ld = DAG.getNode(ISD::BITCAST, dl, CVT, Ld); return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld); } } @@ -5017,7 +5011,7 @@ return getOnesVector(VT, Subtarget->hasAVX2(), DAG, dl); } - SDValue Broadcast = LowerVectorBroadcast(Op, Subtarget, dl, DAG); + SDValue Broadcast = LowerVectorBroadcast(Op, DAG); if (Broadcast.getNode()) return Broadcast; @@ -6226,10 +6220,8 @@ getShuffleSHUFImmediate(SVOp), DAG); } -static -SDValue NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG, - const TargetLowering &TLI, - const X86Subtarget *Subtarget) { +SDValue +X86TargetLowering::NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG) const { ShuffleVectorSDNode *SVOp = cast(Op); EVT VT = Op.getValueType(); DebugLoc dl = Op.getDebugLoc(); @@ -6245,7 +6237,7 @@ int Size = VT.getSizeInBits(); // Use vbroadcast whenever the splat comes from a foldable load - SDValue Broadcast = LowerVectorBroadcast(Op, Subtarget, dl, DAG); + SDValue Broadcast = LowerVectorBroadcast(Op, DAG); if (Broadcast.getNode()) return Broadcast; @@ -6332,7 +6324,7 @@ // Normalize the input vectors. Here splats, zeroed vectors, profitable // narrowing and commutation of operands should be handled. The actual code // doesn't include all of those, work in progress... - SDValue NewOp = NormalizeVectorShuffle(Op, DAG, *this, Subtarget); + SDValue NewOp = NormalizeVectorShuffle(Op, DAG); if (NewOp.getNode()) return NewOp; Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.h?rev=154310&r1=154309&r2=154310&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.h (original) +++ llvm/trunk/lib/Target/X86/X86ISelLowering.h Mon Apr 9 02:45:58 2012 @@ -780,6 +780,8 @@ // Utility functions to help LowerVECTOR_SHUFFLE SDValue LowerVECTOR_SHUFFLEv8i16(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerVectorBroadcast(SDValue &Op, SelectionDAG &DAG) const; + SDValue NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG) const; virtual SDValue LowerFormalArguments(SDValue Chain, Modified: llvm/trunk/test/CodeGen/X86/avx2-vbroadcast.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx2-vbroadcast.ll?rev=154310&r1=154309&r2=154310&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/X86/avx2-vbroadcast.ll (original) +++ llvm/trunk/test/CodeGen/X86/avx2-vbroadcast.ll Mon Apr 9 02:45:58 2012 @@ -171,3 +171,17 @@ ret <4 x float> %vecinit6.i } +; CHECK: _e4 +; CHECK-NOT: broadcast +; CHECK: ret +define <8 x i8> @_e4(i8* %ptr) nounwind uwtable readnone ssp { + %vecinit0.i = insertelement <8 x i8> undef, i8 52, i32 0 + %vecinit1.i = insertelement <8 x i8> %vecinit0.i, i8 52, i32 1 + %vecinit2.i = insertelement <8 x i8> %vecinit1.i, i8 52, i32 2 + %vecinit3.i = insertelement <8 x i8> %vecinit2.i, i8 52, i32 3 + %vecinit4.i = insertelement <8 x i8> %vecinit3.i, i8 52, i32 3 + %vecinit5.i = insertelement <8 x i8> %vecinit4.i, i8 52, i32 3 + %vecinit6.i = insertelement <8 x i8> %vecinit5.i, i8 52, i32 3 + %vecinit7.i = insertelement <8 x i8> %vecinit6.i, i8 52, i32 3 + ret <8 x i8> %vecinit7.i +} From anton at korobeynikov.info Mon Apr 9 03:02:54 2012 From: anton at korobeynikov.info (Anton Korobeynikov) Date: Mon, 9 Apr 2012 12:02:54 +0400 Subject: [llvm-commits] [llvm] r154304 - in /llvm/trunk: lib/Target/X86/X86ISelDAGToDAG.cpp test/CodeGen/X86/tls-pie.ll test/CodeGen/X86/tls.ll In-Reply-To: <20120409021306.4E2B22A6C065@llvm.org> References: <20120409021306.4E2B22A6C065@llvm.org> Message-ID: > 5) 64-bit small PIC code which is *not* using RIP-relative addressing. > ? This is the only case changed by the patch, and the primary place you > ? see it is in TLS, either the win64 section offset TLS or Linux > ? local-exec TLS model in a PIC compilation. Here the ABI again ensures > ? that the immediates fit because we are in small mode, Just to clarify: 1. In small code model all the data and code resides in positive 31-bit space. So, we can easily check whether everything fits. 2. In small + pic code model all the data + code + got/plt fits into 31-bit space "around" %rip. So, we can again fold stuff here. Same applies to TLS, but with %rip substituted by thread pointer :) Patch looks good to me, but I'd prefer additional PIC-aware testing, e.g. building some huge Qt app, or KDE, so something which uses shared libs here and there :) -- With best regards, Anton Korobeynikov Faculty of Mathematics and Mechanics, Saint Petersburg State University From baldrick at free.fr Mon Apr 9 03:12:38 2012 From: baldrick at free.fr (Duncan Sands) Date: Mon, 09 Apr 2012 08:12:38 -0000 Subject: [llvm-commits] [zorg] r154311 - /zorg/trunk/buildbot/osuosl/master/config/builders.py Message-ID: <20120409081238.8A8152A6C065@llvm.org> Author: baldrick Date: Mon Apr 9 03:12:38 2012 New Revision: 154311 URL: http://llvm.org/viewvc/llvm-project?rev=154311&view=rev Log: Increase the timeout for the nightly test builder. Modified: zorg/trunk/buildbot/osuosl/master/config/builders.py Modified: zorg/trunk/buildbot/osuosl/master/config/builders.py URL: http://llvm.org/viewvc/llvm-project/zorg/trunk/buildbot/osuosl/master/config/builders.py?rev=154311&r1=154310&r2=154311&view=diff ============================================================================== --- zorg/trunk/buildbot/osuosl/master/config/builders.py (original) +++ zorg/trunk/buildbot/osuosl/master/config/builders.py Mon Apr 9 03:12:38 2012 @@ -362,7 +362,7 @@ {'name' : 'dragonegg-x86_64-linux-gcc-4.6-fnt', 'slavenames' : ['gcc12'], 'builddir' : 'dragonegg-x86_64-linux-gcc-4.6-fnt', - 'factory' : DragonEggBuilder.getDragonEggNightlyTestBuildFactory(llvm_configure_args=['--enable-optimized', '--enable-assertions', '--with-externals=/home/baldrick/externals']), + 'factory' : DragonEggBuilder.getDragonEggNightlyTestBuildFactory(llvm_configure_args=['--enable-optimized', '--enable-assertions', '--with-externals=/home/baldrick/externals'], timeout=40), 'category' : 'dragonegg'}, {'name' : 'dragonegg-x86_64-linux-gcc-4.6-test', From isanbard at gmail.com Mon Apr 9 03:32:21 2012 From: isanbard at gmail.com (Bill Wendling) Date: Mon, 09 Apr 2012 08:32:21 -0000 Subject: [llvm-commits] [llvm] r154312 - in /llvm/trunk: include/llvm-c/lto.h tools/lto/lto.cpp tools/lto/lto.exports Message-ID: <20120409083221.6B0BA2A6C065@llvm.org> Author: void Date: Mon Apr 9 03:32:21 2012 New Revision: 154312 URL: http://llvm.org/viewvc/llvm-project?rev=154312&view=rev Log: s/lto_codegen_whole_program_optimization/lto_codegen_set_whole_program_optimization/ Modified: llvm/trunk/include/llvm-c/lto.h llvm/trunk/tools/lto/lto.cpp llvm/trunk/tools/lto/lto.exports Modified: llvm/trunk/include/llvm-c/lto.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm-c/lto.h?rev=154312&r1=154311&r2=154312&view=diff ============================================================================== --- llvm/trunk/include/llvm-c/lto.h (original) +++ llvm/trunk/include/llvm-c/lto.h Mon Apr 9 03:32:21 2012 @@ -254,7 +254,7 @@ * Enables the internalize pass during LTO optimizations. */ extern void -lto_codegen_whole_program_optimization(lto_code_gen_t cg); +lto_codegen_set_whole_program_optimization(lto_code_gen_t cg); /** * Adds to a list of all global symbols that must exist in the final Modified: llvm/trunk/tools/lto/lto.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/lto/lto.cpp?rev=154312&r1=154311&r2=154312&view=diff ============================================================================== --- llvm/trunk/tools/lto/lto.cpp (original) +++ llvm/trunk/tools/lto/lto.cpp Mon Apr 9 03:32:21 2012 @@ -183,9 +183,9 @@ cg->addMustPreserveSymbol(symbol); } -/// lto_codegen_whole_program_optimization - Enable the internalize pass during -/// LTO optimizations. -void lto_codegen_whole_program_optimization(lto_code_gen_t cg) { +/// lto_codegen_set_whole_program_optimization - Enable the internalize pass +/// during LTO optimizations. +void lto_codegen_set_whole_program_optimization(lto_code_gen_t cg) { cg->enableInternalizePass(); } Modified: llvm/trunk/tools/lto/lto.exports URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/lto/lto.exports?rev=154312&r1=154311&r2=154312&view=diff ============================================================================== --- llvm/trunk/tools/lto/lto.exports (original) +++ llvm/trunk/tools/lto/lto.exports Mon Apr 9 03:32:21 2012 @@ -27,7 +27,7 @@ lto_codegen_set_assembler_path lto_codegen_set_cpu lto_codegen_compile_to_file -lto_codegen_whole_program_optimization +lto_codegen_set_whole_program_optimization LLVMCreateDisasm LLVMDisasmDispose LLVMDisasmInstruction From nadav.rotem at intel.com Mon Apr 9 03:33:21 2012 From: nadav.rotem at intel.com (Nadav Rotem) Date: Mon, 09 Apr 2012 08:33:21 -0000 Subject: [llvm-commits] [llvm] r154313 - in /llvm/trunk: lib/Target/X86/X86ISelLowering.cpp test/CodeGen/X86/avx-shuffle.ll Message-ID: <20120409083321.7F7382A6C065@llvm.org> Author: nadav Date: Mon Apr 9 03:33:21 2012 New Revision: 154313 URL: http://llvm.org/viewvc/llvm-project?rev=154313&view=rev Log: Lower some x86 shuffle sequences to the vblend family of instructions. Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp llvm/trunk/test/CodeGen/X86/avx-shuffle.ll Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=154313&r1=154312&r2=154313&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original) +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Mon Apr 9 03:33:21 2012 @@ -5377,6 +5377,69 @@ return LowerAVXCONCAT_VECTORS(Op, DAG); } +// Try to lower a shuffle node into a simple blend instruction. +static SDValue LowerVECTOR_SHUFFLEtoBlend(SDValue Op, + const X86Subtarget *Subtarget, + SelectionDAG &DAG, EVT PtrTy) { + ShuffleVectorSDNode *SVOp = cast(Op); + SDValue V1 = SVOp->getOperand(0); + SDValue V2 = SVOp->getOperand(1); + DebugLoc dl = SVOp->getDebugLoc(); + LLVMContext *Context = DAG.getContext(); + EVT VT = Op.getValueType(); + EVT InVT = V1.getValueType(); + EVT EltVT = VT.getVectorElementType(); + unsigned EltSize = EltVT.getSizeInBits(); + int MaskSize = VT.getVectorNumElements(); + int InSize = InVT.getVectorNumElements(); + + // TODO: At the moment we only use AVX blends. We could also use SSE4 blends. + if (!Subtarget->hasAVX()) + return SDValue(); + + if (MaskSize != InSize) + return SDValue(); + + SmallVector MaskVals; + ConstantInt *Zero = ConstantInt::get(*Context, APInt(EltSize, 0)); + ConstantInt *NegOne = ConstantInt::get(*Context, APInt(EltSize, -1)); + + for (int i = 0; i < MaskSize; ++i) { + int EltIdx = SVOp->getMaskElt(i); + if (EltIdx == i || EltIdx == -1) + MaskVals.push_back(NegOne); + else if (EltIdx == (i + MaskSize)) + MaskVals.push_back(Zero); + else return SDValue(); + } + + Constant *MaskC = ConstantVector::get(MaskVals); + EVT MaskTy = EVT::getEVT(MaskC->getType()); + assert(MaskTy.getSizeInBits() == VT.getSizeInBits() && "Invalid mask size"); + SDValue MaskIdx = DAG.getConstantPool(MaskC, PtrTy); + unsigned Alignment = cast(MaskIdx)->getAlignment(); + SDValue Mask = DAG.getLoad(MaskTy, dl, DAG.getEntryNode(), MaskIdx, + MachinePointerInfo::getConstantPool(), + false, false, false, Alignment); + + if (Subtarget->hasAVX2() && MaskTy == MVT::v32i8) + return DAG.getNode(ISD::VSELECT, dl, VT, Mask, V1, V2); + + if (Subtarget->hasAVX()) { + switch (MaskTy.getSimpleVT().SimpleTy) { + default: return SDValue(); + case MVT::v16i8: + case MVT::v4i32: + case MVT::v2i64: + case MVT::v8i32: + case MVT::v4i64: + return DAG.getNode(ISD::VSELECT, dl, VT, Mask, V1, V2); + } + } + + return SDValue(); +} + // v8i16 shuffles - Prefer shuffles in the following order: // 1. [all] pshuflw, pshufhw, optional move // 2. [ssse3] 1 x pshufb @@ -6539,6 +6602,10 @@ return getTargetShuffleNode(X86ISD::VPERM2X128, dl, VT, V1, V2, getShuffleVPERM2X128Immediate(SVOp), DAG); + SDValue BlendOp = LowerVECTOR_SHUFFLEtoBlend(Op, Subtarget, DAG, getPointerTy()); + if (BlendOp.getNode()) + return BlendOp; + //===--------------------------------------------------------------------===// // Since no target specific shuffle was selected for this generic one, // lower it into other known shuffles. FIXME: this isn't true yet, but Modified: llvm/trunk/test/CodeGen/X86/avx-shuffle.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx-shuffle.ll?rev=154313&r1=154312&r2=154313&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/X86/avx-shuffle.ll (original) +++ llvm/trunk/test/CodeGen/X86/avx-shuffle.ll Mon Apr 9 03:33:21 2012 @@ -162,3 +162,43 @@ 62> ret <32 x i8> %0 } + +; CHECK: blend1 +; CHECK: vblendvps +; CHECK: ret +define <4 x i32> @blend1(<4 x i32> %a, <4 x i32> %b) nounwind alwaysinline { + %t = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %t +} + +; CHECK: blend2 +; CHECK: vblendvps +; CHECK: ret +define <4 x i32> @blend2(<4 x i32> %a, <4 x i32> %b) nounwind alwaysinline { + %t = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %t +} + +; CHECK: blend2a +; CHECK: vblendvps +; CHECK: ret +define <4 x float> @blend2a(<4 x float> %a, <4 x float> %b) nounwind alwaysinline { + %t = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> + ret <4 x float> %t +} + +; CHECK: blend3 +; CHECK-NOT: vblendvps +; CHECK: ret +define <4 x i32> @blend3(<4 x i32> %a, <4 x i32> %b) nounwind alwaysinline { + %t = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %t +} + +; CHECK: blend4 +; CHECK: vblendvpd +; CHECK: ret +define <4 x i64> @blend4(<4 x i64> %a, <4 x i64> %b) nounwind alwaysinline { + %t = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> + ret <4 x i64> %t +} From stpworld at narod.ru Mon Apr 9 05:47:43 2012 From: stpworld at narod.ru (Stepan Dyatkovskiy) Date: Mon, 09 Apr 2012 14:47:43 +0400 Subject: [llvm-commits] [PATCH] PR12156, SmallPtrMap In-Reply-To: <4F7ED410.1020903@narod.ru> References: <52361331642856@web107.yandex.ru> <8DC94219-253E-47EF-AB0A-6E8C7D751FA0@2pi.dk> <1040211332749957@web88.yandex.ru> <123241332954375@web74.yandex.ru> <567031333092844@web127.yandex.ru> <9FCF1FC0-4AFD-4883-B284-CA76AAEC3D15@apple.com> <4F7ED410.1020903@narod.ru> Message-ID: <4F82BE4F.7000108@narod.ru> Ping Stepan Dyatkovskiy wrote: > Hi Chris. I fixed all you ask. > > About MultiMapIterator. > For FlatArrayMap + DenseMap couple it is very reasonable to use > DenseMapIterator. > I defined MultiImplMapIteratorsFactory template that presents iterator > type itself and interface for iterators construction. > For FlatArrayMap + DenseMap it has specialization that allows to use > DenseMapIterator directly keeping genericity of default MultiImplMap > implementation. > > Just for clarity... > Code below: > typedef MultiImplMapIterator iterator; > is replaced with these ones: > typedef MultiImplIteratorsFactory ItFactory; > typedef typename ItFactory::iterator iterator; > Iterator construction that was used in previous patch: > return const_iterator(SmallMap.find(K)) > is replaced with: > return ItFactory::const_it(SmallMap, SmallMap.find(K)); > and so on. > This approach allows us to optimize iterators for some special cases and > to keep default behaviour for all others. > > So, please, find reworked patch in attachment for review. > > -Stepan. > > Chris Lattner wrote: >> >> On Mar 30, 2012, at 12:34 AM, Stepan Dyatkovskiy wrote: >> >>> Hi all. I reworked my patch again. >>> In MultiImplMap class I added "bool DenseMapCompatible" template >>> parameter that makes it DenseMap compatible. For DenseMapCompatible = >>> true user should present map implementations with DenseMap specific >>> methods, though. >>> SmallMap became special case of MultiImplMap that uses FlatArrayMap >>> for small mode, DenseMap for big mode and that set DenseMapCompatible >>> = true. >> >> Thank you for working on this Stepan! This looks like great progress, >> though I have a strong concern about the design (stemming from the >> iterator, see the end). Here are some random comments: >> >> >> Please include a patch that updates the data structures section of >> docs/ProgrammerManual.html. >> >> >> +//===- llvm/ADT/FlatArrayMap.h - 'Normally small' pointer set ----*- >> C++ -*-==// >> ... >> +// After maximum number of elements is reached, map declines any >> farther attempts >> >> "further attempts". Also, please fit to 80 columns. >> >> >> >> Please add a doxygen /// comment to the FlatArrayMap class definition. >> >> >> Out of curiosity, why use: >> + template >> ... >> + enum { ArraySize = N }; >> >> instead of just renaming 'N' to 'ArraySize'? >> >> ... >> + const_iterator begin() const { >> + return const_iterator(Array); } >> >> Please put } on the next line. >> >> >> + void clear() { >> + // Call destructor for each item and forget them. >> + for (unsigned i = 0; i< NumElements; ++i) { >> >> This loop will reload NumElements every time through the loop if the >> dtors are non-trivial, please cache it in a local variable. >> >> +//===- llvm/ADT/SmallMap.h - 'Normally small' pointer set ----*- C++ >> -*-===// >> >> Please fill this out to exactly 80 columns. >> >> +++ include/llvm/ADT/MultiImplMap.h (revision 0) >> >> + void moveAllToBigMap() { >> + BigMap.insert(SmallMap.begin(), SmallMap.end()); >> + SmallMap.clear(); >> + } >> >> This should just be inlined into its single caller. If you really want >> it out of line, please indent by 2, not 4. >> >> >> + MultiImplMap(const self& other) { >> + UseSmall = other.UseSmall; >> + if (UseSmall) >> + SmallMap = other.SmallMap; >> + else >> + BigMap = other.BigMap; >> + } >> >> Shouldn't this check the size of "other" and use SmallMap if it can? >> If "other" started grew to "large" size, then had most of its elements >> removed (so that it could fit in "small") then was copied, it would >> make sense to start the copy out small. >> >> >> + void clear() { >> + if (UseSmall) >> + SmallMap.clear(); >> + else >> + BigMap.clear(); >> + } >> >> In contrast to the copy ctor, this should stay the way it is. Clearing >> a densemap doesn't free its buckets. Please add a comment to this >> effect so that it is clearly intentional. >> >> >> + void swap(MultiImplMap& rhs) { >> +} >> >> Why not just define this as: >> >> SmallMap.swap(rhs.SmallMap); >> BigMap.swap(rhs.BitMap); >> >> with no cases in it? >> >> >> + self& operator=(const self& other) { >> >> Similar to the 'clear' method, switching back to small doesn't help >> you. If 'this' is already big, stay big. >> >> >> >> + template >> + class MultiImplMapIterator { >> >> I'm not a big fan of this class: it is going to be really inefficient >> to copy around and deal with, particularly because so many of the >> MultiImplMap methods are defined in terms of it. Can't we do something >> trickier if we drop the genericity of the underlying Array/Map >> classes? A densemap is just an array of buckets, exactly like the >> small case. If SmallMap took advantage of that, the iterator would be >> *much much* more efficient. >> >> -Chris > From stpworld at narod.ru Mon Apr 9 05:49:31 2012 From: stpworld at narod.ru (Stepan Dyatkovskiy) Date: Mon, 09 Apr 2012 14:49:31 +0400 Subject: [llvm-commits] New classes for PR1255: Should enhance LLVM switch instruction to take case ranges. In-Reply-To: <216051332771978@web158.yandex.ru> References: <318431328953921@web153.yandex.ru> <728541329064187@web155.yandex.ru> <4F37ED0D.5080709@free.fr> <503781329070448@web148.yandex.ru> <804331329200372@web140.yandex.ru> <2AFD05C6-756F-4405-BD48-E8E461C190CF@apple.com> <42501329219557@web128.yandex.ru> <92721329753525@web32.yandex.ru> <93481331456407@web131.yandex.ru> <375DBBC4-6F46-4A32-A0EC-EEB765AAE171@apple.com> <4F6C6F28.6090009@narod.ru> <216051332771978@web158.yandex.ru> Message-ID: <4F82BEBB.2070607@narod.ru> Hi Chris. Proceed discussion? ;-) -Stepan. Stepan Dyatkovskiy wrote: > ping > > 23.03.2012, 16:40, "Stepan Dyatkovskiy": >> Hi Chris. >> >> // Some range class it may be ConstantRange as Duncan proposed. >> class Range { >> APInt Low; >> APInt High; >> >> public: >> >> // Some interface here (getLow, getHigh,<,>, isInRange and so on). >> } >> >> class ConstantRangesSet { >> std::vector Ranges; >> >> /// Checks is the given constant satisfies this case. >> bool isSatisfies(const APInt& N); >> >> /// Returns number of ranges. >> unsigned getNumItems() const; >> >> /// Returns set item with given index. >> const Range& getItem(unsigned idx); >> } >> >> SingleNumber is APInt instead. >> >> -Stepan. >> >> Chris Lattner wrote: >> >>> On Mar 11, 2012, at 1:00 AM, Stepan Dyatkovskiy wrote: >>>> Hi Chris. >>>> >>>> Summary of previous discussion: >>>> 1. Finally ConstantInt should be replaced with APInt. >>>> 2. How to store case ranges. I remind your last proposal: >>>> >>>> [quote] >>>> >>>> The mapping of the range to entries in CaseValues could look like this: >>>> >>>> [N, N+1] -> Single element in CaseValues indicates the case value for the destination. >>>> [MAXINT, MININT] -> The entire case is "default" >>>> Otherwise, the pair indicates a series of ranges (inclusive). If one of the ranges is MAXINT/MININT, then it is the default case. For example [0, 4, 6,6] would handle 0,1,2,3,4,6. [4,6, MAXINT,MININT] would handle 4,5,6 and default. >>>> >>>> [/quote] >>>> >>>> My proposal was to create ConstantRangesSet that will store all numbers and ranges for complex case. I still propose to use this schema. >>>> You can ask what about most probable case, when for most of successors will be the only case value? >>>> OK. We can create collection for "classic" cases vector > too. Then finally on the top level we have 2 collections: >>>> class SwitchInst { >>>> .. >>>> vector > ClassicCases; >>>> vector > ComplexCases; >>>> .. >>>> } >>>> >>>> So, what do you think about this? >>> This approach seems sensible to me. Can you sketch out what SingleNumber and ConstantRangesSet look like? >>> >>> -Chris >> >> _______________________________________________ >> llvm-commits mailing list >> llvm-commits at cs.uiuc.edu >> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits From timurrrr at google.com Mon Apr 9 06:50:28 2012 From: timurrrr at google.com (Timur Iskhodzhanov) Date: Mon, 09 Apr 2012 11:50:28 -0000 Subject: [llvm-commits] [compiler-rt] r154314 - /compiler-rt/trunk/lib/asan/tests/asan_test.cc Message-ID: <20120409115028.28CAB2A6C065@llvm.org> Author: timurrrr Date: Mon Apr 9 06:50:27 2012 New Revision: 154314 URL: http://llvm.org/viewvc/llvm-project?rev=154314&view=rev Log: Use 'typename' instead of 'class' in template<> definitions supporting POD types Modified: compiler-rt/trunk/lib/asan/tests/asan_test.cc Modified: compiler-rt/trunk/lib/asan/tests/asan_test.cc URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/asan/tests/asan_test.cc?rev=154314&r1=154313&r2=154314&view=diff ============================================================================== --- compiler-rt/trunk/lib/asan/tests/asan_test.cc (original) +++ compiler-rt/trunk/lib/asan/tests/asan_test.cc Mon Apr 9 06:50:27 2012 @@ -66,7 +66,7 @@ const size_t kLargeMalloc = 1 << 24; -template +template NOINLINE void asan_write(T *a) { *a = 0; } @@ -112,7 +112,7 @@ NOINLINE void free_bbb(void *p) { free_ccc(p); break_optimization(0);} NOINLINE void free_aaa(void *p) { free_bbb(p); break_optimization(0);} -template +template NOINLINE void oob_test(int size, int off) { char *p = (char*)malloc_aaa(size); // fprintf(stderr, "writing %d byte(s) into [%p,%p) with offset %d\n", @@ -122,7 +122,7 @@ } -template +template NOINLINE void uaf_test(int size, int off) { char *p = (char *)malloc_aaa(size); free_aaa(p); @@ -282,7 +282,7 @@ pthread_key_delete(test_key); } -template +template void OOBTest() { char expected_str[100]; for (int size = sizeof(T); size < 20; size += 5) { @@ -795,7 +795,7 @@ return string(expected_str); } -template +template void MemSetOOBTestTemplate(size_t length) { if (length == 0) return; size_t size = Ident(sizeof(T) * length); @@ -852,7 +852,7 @@ } // Same test for memcpy and memmove functions -template +template void MemTransferOOBTestTemplate(size_t length) { if (length == 0) return; size_t size = Ident(sizeof(T) * length); From baldrick at free.fr Mon Apr 9 08:30:29 2012 From: baldrick at free.fr (Duncan Sands) Date: Mon, 09 Apr 2012 13:30:29 -0000 Subject: [llvm-commits] [zorg] r154316 - /zorg/trunk/zorg/buildbot/builders/DragonEggBuilder.py Message-ID: <20120409133029.0E62F2A6C065@llvm.org> Author: baldrick Date: Mon Apr 9 08:30:28 2012 New Revision: 154316 URL: http://llvm.org/viewvc/llvm-project?rev=154316&view=rev Log: Pass the time limit through to the testsuite. Make it possible to pass additional configure args to the testsuite. Modified: zorg/trunk/zorg/buildbot/builders/DragonEggBuilder.py Modified: zorg/trunk/zorg/buildbot/builders/DragonEggBuilder.py URL: http://llvm.org/viewvc/llvm-project/zorg/trunk/zorg/buildbot/builders/DragonEggBuilder.py?rev=154316&r1=154315&r2=154316&view=diff ============================================================================== --- zorg/trunk/zorg/buildbot/builders/DragonEggBuilder.py (original) +++ zorg/trunk/zorg/buildbot/builders/DragonEggBuilder.py Mon Apr 9 08:30:28 2012 @@ -227,6 +227,7 @@ def getDragonEggNightlyTestBuildFactory(gcc='gcc', gxx='g++', llvm_configure_args=[], + testsuite_configure_args=[], xfails=[], clean=True, env={}, jobs='%(jobs)s', timeout=20): f = buildbot.process.factory.BuildFactory() @@ -344,7 +345,8 @@ WithProperties('--with-llvmsrc=%(builddir)s/' + llvm_src_dir), WithProperties('--with-llvmobj=%(builddir)s/' + llvm_obj_dir), WithProperties('--with-llvmgccdir=%(builddir)s/'), - '--with-llvmcc=llvm-gcc', 'CC=' + gcc, 'CXX=' + gxx], + '--with-llvmcc=llvm-gcc', 'CC=' + gcc, 'CXX=' + gxx] + + testsuite_configure_args, description='configuring nightly test-suite', descriptionDone='configure nightly test-suite', haltOnFailure=True, workdir=testsuite_obj_dir, env=env)) @@ -360,6 +362,7 @@ command=['make', WithProperties('-j%s' % jobs), 'ENABLE_PARALLEL_REPORT=1', 'DISABLE_CBE=1', 'DISABLE_JIT=1', + 'RUNTIMELIMIT=%s' % (timeout*60), 'TEST=nightly', 'report'], logfiles={'report' : 'report.nightly.txt'}, description='running nightly test-suite', From baldrick at free.fr Mon Apr 9 08:32:51 2012 From: baldrick at free.fr (Duncan Sands) Date: Mon, 09 Apr 2012 13:32:51 -0000 Subject: [llvm-commits] [zorg] r154317 - /zorg/trunk/buildbot/osuosl/master/config/builders.py Message-ID: <20120409133251.71C8F2A6C065@llvm.org> Author: baldrick Date: Mon Apr 9 08:32:51 2012 New Revision: 154317 URL: http://llvm.org/viewvc/llvm-project?rev=154317&view=rev Log: The --with-externals argument needs to be passed directly to the testsuite. Modified: zorg/trunk/buildbot/osuosl/master/config/builders.py Modified: zorg/trunk/buildbot/osuosl/master/config/builders.py URL: http://llvm.org/viewvc/llvm-project/zorg/trunk/buildbot/osuosl/master/config/builders.py?rev=154317&r1=154316&r2=154317&view=diff ============================================================================== --- zorg/trunk/buildbot/osuosl/master/config/builders.py (original) +++ zorg/trunk/buildbot/osuosl/master/config/builders.py Mon Apr 9 08:32:51 2012 @@ -362,7 +362,7 @@ {'name' : 'dragonegg-x86_64-linux-gcc-4.6-fnt', 'slavenames' : ['gcc12'], 'builddir' : 'dragonegg-x86_64-linux-gcc-4.6-fnt', - 'factory' : DragonEggBuilder.getDragonEggNightlyTestBuildFactory(llvm_configure_args=['--enable-optimized', '--enable-assertions', '--with-externals=/home/baldrick/externals'], timeout=40), + 'factory' : DragonEggBuilder.getDragonEggNightlyTestBuildFactory(llvm_configure_args=['--enable-optimized', '--enable-assertions'], testsuite_configure_args=['--with-externals=/home/baldrick/externals'], timeout=40), 'category' : 'dragonegg'}, {'name' : 'dragonegg-x86_64-linux-gcc-4.6-test', From baldrick at free.fr Mon Apr 9 09:08:00 2012 From: baldrick at free.fr (Duncan Sands) Date: Mon, 09 Apr 2012 14:08:00 -0000 Subject: [llvm-commits] [llvm] r154319 - /llvm/trunk/docs/LangRef.html Message-ID: <20120409140801.042EA2A6C065@llvm.org> Author: baldrick Date: Mon Apr 9 09:08:00 2012 New Revision: 154319 URL: http://llvm.org/viewvc/llvm-project?rev=154319&view=rev Log: Clarify that fpaccuracy metadata is giving the compiler permission to use a less accurate method. Modified: llvm/trunk/docs/LangRef.html Modified: llvm/trunk/docs/LangRef.html URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/docs/LangRef.html?rev=154319&r1=154318&r2=154319&view=diff ============================================================================== --- llvm/trunk/docs/LangRef.html (original) +++ llvm/trunk/docs/LangRef.html Mon Apr 9 09:08:00 2012 @@ -3006,8 +3006,10 @@

fpaccuracy metadata may be attached to any instruction of floating - point type. It expresses the maximum relative error of the result of - that instruction, in ULPs. ULP is defined as follows:

+ point type. It expresses the maximum relative error allowed in the result + of that instruction, in ULPs, thus potentially allowing the compiler to use + a more efficient but less accurate method of computing it. + ULP is defined as follows:

From rafael.espindola at gmail.com Mon Apr 9 09:57:32 2012 From: rafael.espindola at gmail.com (=?UTF-8?Q?Rafael_Esp=C3=ADndola?=) Date: Mon, 9 Apr 2012 10:57:32 -0400 Subject: [llvm-commits] [PATCH][Review request] Add new MachineJumpTableInfo entry type (GPRel64BlockAddress) In-Reply-To: <95DD8BA8AA50B14BBFB86A1D541FA38001145F9CC7@exchdb03.mips.com> References: <95DD8BA8AA50B14BBFB86A1D541FA380F3088174@exchdb03.mips.com> <95DD8BA8AA50B14BBFB86A1D541FA380F3089209@exchdb03.mips.com> <95DD8BA8AA50B14BBFB86A1D541FA38001145F9CC7@exchdb03.mips.com> Message-ID: 2012/4/5 Hatanaka, Akira : > Hi Rafael, > > Sorry to bother you again. NP :-) > Could you take a look at a patch I submitted a while back? I pinged the list, but nobody has reviewed it. > http://lists.cs.uiuc.edu/pipermail/llvm-commits/Week-of-Mon-20120319/139547.html > > This was supposed to be a part of 149668, which was reviewed by you, but somehow I didn't notice I hadn't included it. It just adds code that checks whether the type of Jump table encoding is EK_GPRel64BlockAddress. The patch is OK. For small patches like this pinging on IRC is probably the most efficient thing to do. Cheers, Rafael From preston.gurd at intel.com Mon Apr 9 10:32:22 2012 From: preston.gurd at intel.com (Preston Gurd) Date: Mon, 09 Apr 2012 15:32:22 -0000 Subject: [llvm-commits] [llvm] r154320 - /llvm/trunk/lib/Target/X86/X86InstrArithmetic.td Message-ID: <20120409153222.D1FD42A6C065@llvm.org> Author: pgurd Date: Mon Apr 9 10:32:22 2012 New Revision: 154320 URL: http://llvm.org/viewvc/llvm-project?rev=154320&view=rev Log: This patch adds X86 instruction itineraries, which were missed by the original patch to add itineraries, to X86InstrArithmetc.td. Modified: llvm/trunk/lib/Target/X86/X86InstrArithmetic.td Modified: llvm/trunk/lib/Target/X86/X86InstrArithmetic.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrArithmetic.td?rev=154320&r1=154319&r2=154320&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86InstrArithmetic.td (original) +++ llvm/trunk/lib/Target/X86/X86InstrArithmetic.td Mon Apr 9 10:32:22 2012 @@ -53,7 +53,7 @@ // This probably ought to be moved to a def : Pat<> if the // syntax can be accepted. [(set AL, (mul AL, GR8:$src)), - (implicit EFLAGS)]>; // AL,AH = AL*GR8 + (implicit EFLAGS)], IIC_MUL8>; // AL,AH = AL*GR8 let Defs = [AX,DX,EFLAGS], Uses = [AX], neverHasSideEffects = 1 in def MUL16r : I<0xF7, MRM4r, (outs), (ins GR16:$src), @@ -97,31 +97,32 @@ let neverHasSideEffects = 1 in { let Defs = [AL,EFLAGS,AX], Uses = [AL] in -def IMUL8r : I<0xF6, MRM5r, (outs), (ins GR8:$src), "imul{b}\t$src", []>; - // AL,AH = AL*GR8 +def IMUL8r : I<0xF6, MRM5r, (outs), (ins GR8:$src), "imul{b}\t$src", [], + IIC_IMUL8>; // AL,AH = AL*GR8 let Defs = [AX,DX,EFLAGS], Uses = [AX] in -def IMUL16r : I<0xF7, MRM5r, (outs), (ins GR16:$src), "imul{w}\t$src", []>, - OpSize; // AX,DX = AX*GR16 +def IMUL16r : I<0xF7, MRM5r, (outs), (ins GR16:$src), "imul{w}\t$src", [], + IIC_IMUL16_RR>, OpSize; // AX,DX = AX*GR16 let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in -def IMUL32r : I<0xF7, MRM5r, (outs), (ins GR32:$src), "imul{l}\t$src", []>; - // EAX,EDX = EAX*GR32 +def IMUL32r : I<0xF7, MRM5r, (outs), (ins GR32:$src), "imul{l}\t$src", [], + IIC_IMUL32_RR>; // EAX,EDX = EAX*GR32 let Defs = [RAX,RDX,EFLAGS], Uses = [RAX] in -def IMUL64r : RI<0xF7, MRM5r, (outs), (ins GR64:$src), "imul{q}\t$src", []>; - // RAX,RDX = RAX*GR64 +def IMUL64r : RI<0xF7, MRM5r, (outs), (ins GR64:$src), "imul{q}\t$src", [], + IIC_IMUL64_RR>; // RAX,RDX = RAX*GR64 let mayLoad = 1 in { let Defs = [AL,EFLAGS,AX], Uses = [AL] in def IMUL8m : I<0xF6, MRM5m, (outs), (ins i8mem :$src), - "imul{b}\t$src", []>; // AL,AH = AL*[mem8] + "imul{b}\t$src", [], IIC_IMUL8>; // AL,AH = AL*[mem8] let Defs = [AX,DX,EFLAGS], Uses = [AX] in def IMUL16m : I<0xF7, MRM5m, (outs), (ins i16mem:$src), - "imul{w}\t$src", []>, OpSize; // AX,DX = AX*[mem16] + "imul{w}\t$src", [], IIC_IMUL16_MEM>, OpSize; + // AX,DX = AX*[mem16] let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in def IMUL32m : I<0xF7, MRM5m, (outs), (ins i32mem:$src), - "imul{l}\t$src", []>; // EAX,EDX = EAX*[mem32] + "imul{l}\t$src", [], IIC_IMUL32_MEM>; // EAX,EDX = EAX*[mem32] let Defs = [RAX,RDX,EFLAGS], Uses = [RAX] in def IMUL64m : RI<0xF7, MRM5m, (outs), (ins i64mem:$src), - "imul{q}\t$src", []>; // RAX,RDX = RAX*[mem64] + "imul{q}\t$src", [], IIC_IMUL64>; // RAX,RDX = RAX*[mem64] } } // neverHasSideEffects @@ -639,10 +640,11 @@ // BinOpRR - Instructions like "add reg, reg, reg". class BinOpRR opcode, string mnemonic, X86TypeInfo typeinfo, - dag outlist, list pattern, Format f = MRMDestReg> + dag outlist, list pattern, InstrItinClass itin, + Format f = MRMDestReg> : ITy; + mnemonic, "{$src2, $src1|$src1, $src2}", pattern, itin>; // BinOpRR_R - Instructions like "add reg, reg, reg", where the pattern has // just a regclass (no eflags) as a result. @@ -650,7 +652,8 @@ SDNode opnode> : BinOpRR; + (opnode typeinfo.RegClass:$src1, typeinfo.RegClass:$src2))], + IIC_BIN_NONMEM>; // BinOpRR_F - Instructions like "cmp reg, Reg", where the pattern has // just a EFLAGS as a result. @@ -659,7 +662,7 @@ : BinOpRR; + IIC_BIN_NONMEM, f>; // BinOpRR_RF - Instructions like "add reg, reg, reg", where the pattern has // both a regclass and EFLAGS as a result. @@ -667,7 +670,8 @@ SDNode opnode> : BinOpRR; + (opnode typeinfo.RegClass:$src1, typeinfo.RegClass:$src2))], + IIC_BIN_NONMEM>; // BinOpRR_RFF - Instructions like "adc reg, reg, reg", where the pattern has // both a regclass and EFLAGS as a result, and has EFLAGS as input. @@ -676,14 +680,14 @@ : BinOpRR; + EFLAGS))], IIC_BIN_NONMEM>; // BinOpRR_Rev - Instructions like "add reg, reg, reg" (reversed encoding). class BinOpRR_Rev opcode, string mnemonic, X86TypeInfo typeinfo> : ITy { + mnemonic, "{$src2, $dst|$dst, $src2}", [], IIC_BIN_NONMEM> { // The disassembler should know about this, but not the asmparser. let isCodeGenOnly = 1; } @@ -692,7 +696,7 @@ class BinOpRR_F_Rev opcode, string mnemonic, X86TypeInfo typeinfo> : ITy { + mnemonic, "{$src2, $src1|$src1, $src2}", [], IIC_BIN_NONMEM> { // The disassembler should know about this, but not the asmparser. let isCodeGenOnly = 1; } @@ -702,7 +706,7 @@ dag outlist, list pattern> : ITy; + mnemonic, "{$src2, $src1|$src1, $src2}", pattern, IIC_BIN_NONMEM>; // BinOpRM_R - Instructions like "add reg, reg, [mem]". class BinOpRM_R opcode, string mnemonic, X86TypeInfo typeinfo, @@ -738,7 +742,7 @@ Format f, dag outlist, list pattern> : ITy { + mnemonic, "{$src2, $src1|$src1, $src2}", pattern, IIC_BIN_NONMEM> { let ImmT = typeinfo.ImmEncoding; } @@ -762,7 +766,6 @@ : BinOpRI; - // BinOpRI_RFF - Instructions like "adc reg, reg, imm". class BinOpRI_RFF opcode, string mnemonic, X86TypeInfo typeinfo, SDNode opnode, Format f> @@ -776,7 +779,7 @@ Format f, dag outlist, list pattern> : ITy { + mnemonic, "{$src2, $src1|$src1, $src2}", pattern, IIC_BIN_NONMEM> { let ImmT = Imm8; // Always 8-bit immediate. } @@ -853,7 +856,6 @@ [(store (opnode (typeinfo.VT (load addr:$dst)), typeinfo.ImmOperator:$src), addr:$dst), (implicit EFLAGS)]>; - // BinOpMI_RMW_FF - Instructions like "adc [mem], imm". class BinOpMI_RMW_FF @@ -1219,12 +1221,12 @@ let isCommutable = 1 in def rr : I<0xF6, MRMSrcReg, (outs RC:$dst1, RC:$dst2), (ins RC:$src), !strconcat(mnemonic, "\t{$src, $dst2, $dst1|$dst1, $dst2, $src}"), - []>, T8XD, VEX_4V; + [], IIC_MUL8>, T8XD, VEX_4V; let mayLoad = 1 in def rm : I<0xF6, MRMSrcMem, (outs RC:$dst1, RC:$dst2), (ins x86memop:$src), !strconcat(mnemonic, "\t{$src, $dst2, $dst1|$dst1, $dst2, $src}"), - []>, T8XD, VEX_4V; + [], IIC_MUL8>, T8XD, VEX_4V; } } From rafael.espindola at gmail.com Mon Apr 9 10:46:37 2012 From: rafael.espindola at gmail.com (=?UTF-8?Q?Rafael_Esp=C3=ADndola?=) Date: Mon, 9 Apr 2012 11:46:37 -0400 Subject: [llvm-commits] [patch] Use the range metadata in CodeGen In-Reply-To: References: Message-ID: > A rebased version is attached. And a rebase without the helper function is attached. Cheers, Rafael -------------- next part -------------- A non-text attachment was scrubbed... Name: t.patch Type: application/octet-stream Size: 3836 bytes Desc: not available Url : http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20120409/6f1632f9/attachment.obj From chandlerc at google.com Mon Apr 9 10:51:38 2012 From: chandlerc at google.com (Chandler Carruth) Date: Mon, 9 Apr 2012 17:51:38 +0200 Subject: [llvm-commits] [patch] Use the range metadata in CodeGen In-Reply-To: References: Message-ID: On Mon, Apr 9, 2012 at 5:46 PM, Rafael Esp?ndola wrote: > > A rebased version is attached. > > And a rebase without the helper function is attached. > Discussed on IRC, but in summary: I think the much simpler code is the right approach here. I had hoped for some generalization to emerge, but it's just not there. We really want to detect "trunc to i1" when it is phrased as an icmp. The code does that now. I've asked for making the ISD::TRUNCATE case a single early-exit block, and comments documinging what all is going on here. With that, LGTM. -------------- next part -------------- An HTML attachment was scrubbed... URL: http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20120409/1158689d/attachment.html From rafael.espindola at gmail.com Mon Apr 9 11:06:03 2012 From: rafael.espindola at gmail.com (Rafael Espindola) Date: Mon, 09 Apr 2012 16:06:03 -0000 Subject: [llvm-commits] [llvm] r154322 - in /llvm/trunk: lib/CodeGen/SelectionDAG/DAGCombiner.cpp test/CodeGen/X86/pr12360.ll Message-ID: <20120409160603.724872A6C065@llvm.org> Author: rafael Date: Mon Apr 9 11:06:03 2012 New Revision: 154322 URL: http://llvm.org/viewvc/llvm-project?rev=154322&view=rev Log: Pattern match a setcc of boolean value with 0 as a truncate. Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp llvm/trunk/test/CodeGen/X86/pr12360.ll Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=154322&r1=154321&r2=154322&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original) +++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Mon Apr 9 11:06:03 2012 @@ -4408,6 +4408,44 @@ return SDValue(); } +// isTruncateOf - If N is a truncate of some other value, return true, record +// the value being truncated in Op and which of Op's bits are zero in KnownZero. +// This function computes KnownZero to avoid a duplicated call to +// ComputeMaskedBits in the caller. +static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op, + APInt &KnownZero) { + APInt KnownOne; + if (N->getOpcode() == ISD::TRUNCATE) { + Op = N->getOperand(0); + DAG.ComputeMaskedBits(Op, KnownZero, KnownOne); + return true; + } + + if (N->getOpcode() != ISD::SETCC || N->getValueType(0) != MVT::i1 || + cast(N->getOperand(2))->get() != ISD::SETNE) + return false; + + SDValue Op0 = N->getOperand(0); + SDValue Op1 = N->getOperand(1); + assert(Op0.getValueType() == Op1.getValueType()); + + ConstantSDNode *COp0 = dyn_cast(Op0); + ConstantSDNode *COp1 = dyn_cast(Op1); + if (COp0 && COp0->getZExtValue() == 0) + Op = Op1; + else if (COp1 && COp1->getZExtValue() == 0) + Op = Op0; + else + return false; + + DAG.ComputeMaskedBits(Op, KnownZero, KnownOne); + + if (!(KnownZero | APInt(Op.getValueSizeInBits(), 1)).isAllOnesValue()) + return false; + + return true; +} + SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); @@ -4425,15 +4463,16 @@ // (zext (truncate x)) -> (truncate x) // This is valid when the truncated bits of x are already zero. // FIXME: We should extend this to work for vectors too. - if (N0.getOpcode() == ISD::TRUNCATE && !VT.isVector()) { - SDValue Op = N0.getOperand(0); - APInt TruncatedBits - = APInt::getBitsSet(Op.getValueSizeInBits(), - N0.getValueSizeInBits(), - std::min(Op.getValueSizeInBits(), - VT.getSizeInBits())); - APInt KnownZero, KnownOne; - DAG.ComputeMaskedBits(Op, KnownZero, KnownOne); + SDValue Op; + APInt KnownZero; + if (!VT.isVector() && isTruncateOf(DAG, N0, Op, KnownZero)) { + APInt TruncatedBits = + (Op.getValueSizeInBits() == N0.getValueSizeInBits()) ? + APInt(Op.getValueSizeInBits(), 0) : + APInt::getBitsSet(Op.getValueSizeInBits(), + N0.getValueSizeInBits(), + std::min(Op.getValueSizeInBits(), + VT.getSizeInBits())); if (TruncatedBits == (KnownZero & TruncatedBits)) { if (VT.bitsGT(Op.getValueType())) return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, Op); Modified: llvm/trunk/test/CodeGen/X86/pr12360.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/pr12360.ll?rev=154322&r1=154321&r2=154322&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/X86/pr12360.ll (original) +++ llvm/trunk/test/CodeGen/X86/pr12360.ll Mon Apr 9 11:06:03 2012 @@ -1,14 +1,46 @@ ; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s define zeroext i1 @f1(i8* %x) { +; CHECK: f1: +; CHECK: movb (%rdi), %al +; CHECK-NEXT: ret + entry: %0 = load i8* %x, align 1, !range !0 %tobool = trunc i8 %0 to i1 ret i1 %tobool } -; CHECK: f1: +define zeroext i1 @f2(i8* %x) { +; CHECK: f2: ; CHECK: movb (%rdi), %al ; CHECK-NEXT: ret +entry: + %0 = load i8* %x, align 1, !range !0 + %tobool = icmp ne i8 %0, 0 + ret i1 %tobool +} + !0 = metadata !{i8 0, i8 2} + + +; check that we don't build a "trunc" from i1 to i1, which would assert. +define zeroext i1 @f3(i1 %x) { +; CHECK: f3: + +entry: + %tobool = icmp ne i1 %x, 0 + ret i1 %tobool +} + +; check that we don't build a trunc when other bits are needed +define zeroext i1 @f4(i32 %x) { +; CHECK: f4: +; CHECK: and + +entry: + %y = and i32 %x, 32768 + %z = icmp ne i32 %y, 0 + ret i1 %z +} From dblaikie at gmail.com Mon Apr 9 11:29:36 2012 From: dblaikie at gmail.com (David Blaikie) Date: Mon, 09 Apr 2012 16:29:36 -0000 Subject: [llvm-commits] [llvm] r154324 - in /llvm/trunk: include/llvm/Instructions.h lib/Target/MBlaze/MBlazeELFWriterInfo.cpp Message-ID: <20120409162936.9259D2A6C065@llvm.org> Author: dblaikie Date: Mon Apr 9 11:29:35 2012 New Revision: 154324 URL: http://llvm.org/viewvc/llvm-project?rev=154324&view=rev Log: Fix accidentally constant conditions found by uncommitted improvements to -Wconstant-conversion. A couple of cases where we were accidentally creating constant conditions by something like "x == a || b" instead of "x == a || x == b". In one case a conditional & then unreachable was used - I transformed this into a direct assert instead. Modified: llvm/trunk/include/llvm/Instructions.h llvm/trunk/lib/Target/MBlaze/MBlazeELFWriterInfo.cpp Modified: llvm/trunk/include/llvm/Instructions.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Instructions.h?rev=154324&r1=154323&r2=154324&view=diff ============================================================================== --- llvm/trunk/include/llvm/Instructions.h (original) +++ llvm/trunk/include/llvm/Instructions.h Mon Apr 9 11:29:35 2012 @@ -2507,7 +2507,8 @@ /// Resolves successor for current case. BasicBlockTy *getCaseSuccessor() { - assert((Index < SI->getNumCases() || DefaultPseudoIndex) && + assert((Index < SI->getNumCases() || + Index == DefaultPseudoIndex) && "Index out the number of cases."); return SI->getSuccessor(getSuccessorIndex()); } Modified: llvm/trunk/lib/Target/MBlaze/MBlazeELFWriterInfo.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/MBlaze/MBlazeELFWriterInfo.cpp?rev=154324&r1=154323&r2=154324&view=diff ============================================================================== --- llvm/trunk/lib/Target/MBlaze/MBlazeELFWriterInfo.cpp (original) +++ llvm/trunk/lib/Target/MBlaze/MBlazeELFWriterInfo.cpp Mon Apr 9 11:29:35 2012 @@ -100,8 +100,8 @@ long int MBlazeELFWriterInfo::computeRelocation(unsigned SymOffset, unsigned RelOffset, unsigned RelTy) const { - if (RelTy == ELF::R_MICROBLAZE_32_PCREL || ELF::R_MICROBLAZE_64_PCREL) - return SymOffset - (RelOffset + 4); - - llvm_unreachable("computeRelocation unknown for this relocation type"); + assert((RelTy == ELF::R_MICROBLAZE_32_PCREL || + RelTy == ELF::R_MICROBLAZE_64_PCREL) && + "computeRelocation unknown for this relocation type"); + return SymOffset - (RelOffset + 4); } From samsonov at google.com Mon Apr 9 11:45:18 2012 From: samsonov at google.com (Alexey Samsonov) Date: Mon, 09 Apr 2012 16:45:18 -0000 Subject: [llvm-commits] [compiler-rt] r154326 - /compiler-rt/trunk/lib/asan/asan_mac.cc Message-ID: <20120409164518.8199F2A6C065@llvm.org> Author: samsonov Date: Mon Apr 9 11:45:18 2012 New Revision: 154326 URL: http://llvm.org/viewvc/llvm-project?rev=154326&view=rev Log: [ASan] remove dispatch.h header once again - it's not present on Leopard, and we don't want to break the build of compiler-rt there. See https://trac.macports.org/ticket/33362 Modified: compiler-rt/trunk/lib/asan/asan_mac.cc Modified: compiler-rt/trunk/lib/asan/asan_mac.cc URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/asan/asan_mac.cc?rev=154326&r1=154325&r2=154326&view=diff ============================================================================== --- compiler-rt/trunk/lib/asan/asan_mac.cc (original) +++ compiler-rt/trunk/lib/asan/asan_mac.cc Mon Apr 9 11:45:18 2012 @@ -23,7 +23,6 @@ #include "asan_thread_registry.h" #include // for _NSGetEnviron -#include #include #include #include @@ -439,6 +438,11 @@ typedef void* pthread_workqueue_t; typedef void* pthread_workitem_handle_t; + +typedef void* dispatch_group_t; +typedef void* dispatch_queue_t; +typedef uint64_t dispatch_time_t; +typedef void (*dispatch_function_t)(void *block); typedef void* (*worker_t)(void *block); // A wrapper for the ObjC blocks used to support libdispatch. @@ -448,11 +452,22 @@ int parent_tid; } asan_block_context_t; +// We use extern declarations of libdispatch functions here instead +// of including . This header is not present on +// Mac OS X Leopard and eariler, and although we don't expect ASan to +// work on legacy systems, it's bad to break the build of +// LLVM compiler-rt there. extern "C" { -// dispatch_barrier_async_f() is not declared in . +void dispatch_async_f(dispatch_queue_t dq, void *ctxt, + dispatch_function_t func); +void dispatch_sync_f(dispatch_queue_t dq, void *ctxt, + dispatch_function_t func); +void dispatch_after_f(dispatch_time_t when, dispatch_queue_t dq, void *ctxt, + dispatch_function_t func); void dispatch_barrier_async_f(dispatch_queue_t dq, void *ctxt, dispatch_function_t func); -// Neither is pthread_workqueue_additem_np(). +void dispatch_group_async_f(dispatch_group_t group, dispatch_queue_t dq, + void *ctxt, dispatch_function_t func); int pthread_workqueue_additem_np(pthread_workqueue_t workq, void *(*workitem_func)(void *), void * workitem_arg, pthread_workitem_handle_t * itemhandlep, unsigned int *gencountp); From clattner at apple.com Mon Apr 9 12:03:22 2012 From: clattner at apple.com (Chris Lattner) Date: Mon, 09 Apr 2012 10:03:22 -0700 Subject: [llvm-commits] [llvm] r154303 - in /llvm/trunk/test/CodeGen/X86: tls.ll tls1.ll tls10.ll tls11.ll tls12.ll tls13.ll tls14.ll tls15.ll tls2.ll tls3.ll tls4.ll tls5.ll tls6.ll tls7.ll tls8.ll tls9.ll In-Reply-To: <20120409014317.970302A6C065@llvm.org> References: <20120409014317.970302A6C065@llvm.org> Message-ID: On Apr 8, 2012, at 6:43 PM, Chandler Carruth wrote: > Author: chandlerc > Date: Sun Apr 8 20:43:17 2012 > New Revision: 154303 > > URL: http://llvm.org/viewvc/llvm-project?rev=154303&view=rev > Log: > Fold 15 tiny test cases into a single file that implements the > comprehensive testing of TLS codegen for x86. Convert all of the ones > that were still using grep to use FileCheck. Remove some redundancies > between them. Thank you! -Chris From clattner at apple.com Mon Apr 9 12:11:49 2012 From: clattner at apple.com (Chris Lattner) Date: Mon, 09 Apr 2012 10:11:49 -0700 Subject: [llvm-commits] [llvm] r154285 - in /llvm/trunk: lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp test/Transforms/InstCombine/alloca.ll In-Reply-To: <20120408143657.09F8C2A6C065@llvm.org> References: <20120408143657.09F8C2A6C065@llvm.org> Message-ID: <0A7C694B-23B0-496D-8EBC-45E569E1CD6B@apple.com> On Apr 8, 2012, at 7:36 AM, Chandler Carruth wrote: > Author: chandlerc > Date: Sun Apr 8 09:36:56 2012 > New Revision: 154285 > > URL: http://llvm.org/viewvc/llvm-project?rev=154285&view=rev > Log: > Teach InstCombine to nuke a common alloca pattern -- an alloca which has > GEPs, bit casts, and stores reaching it but no other instructions. These > often show up during the iterative processing of the inliner, SROA, and > DCE. Once we hit this point, we can completely remove the alloca. These > were actually showing up in the final, fully optimized code in a bunch > of inliner tests I've been working on, and notably they show up after > LLVM finishes optimizing away all function calls involved in > hash_combine(a, b). This looks good to me. One typo: "poniter". Also you might want to handle memcpy to the alloca, which Clang generates for common C++ types, and memset for completeness. -Chris From clattner at apple.com Mon Apr 9 12:20:27 2012 From: clattner at apple.com (Chris Lattner) Date: Mon, 09 Apr 2012 10:20:27 -0700 Subject: [llvm-commits] [llvm] r153848 - in /llvm/trunk: lib/CodeGen/SelectionDAG/DAGCombiner.cpp lib/Target/X86/X86ISelLowering.cpp test/CodeGen/ARM/reg_sequence.ll test/CodeGen/CellSPU/rotate_ops.ll test/CodeGen/X86/2011-10-27-tstore.ll test/CodeGen/X86 In-Reply-To: <7DE70FDACDE4CD4887C4278C12A2E3050D1E36@HASMSX104.ger.corp.intel.com> References: <7DE70FDACDE4CD4887C4278C12A2E3050CE193@HASMSX104.ger.corp.intel.com> <7DE70FDACDE4CD4887C4278C12A2E3050D1E36@HASMSX104.ger.corp.intel.com> Message-ID: <537BDAE1-17A6-471F-8672-04DD5E9DB6BE@apple.com> On Apr 7, 2012, at 2:43 PM, Rotem, Nadav wrote: > Hi Eli and Chris, > > I removed the part of 153848 that generated new shuffles and added a more restrictive optimization that only removes shuffles. Thanks! The real problem here is that the optimizer has no way to reason about the cost of a shuffle. The perfect shuffle tables (used by ARM and PPC) have accurate cost info for the shuffles it supports, but we don't expose it, and don't expose any other cost information. This is something that we'll have to tackle when the target vectorization API comes up. Once it exists, instcombine (and dag combine) can smoosh two shuffles together and ask if it is at least as cheap as the two original shuffles. -Chris From daniel.malea at intel.com Mon Apr 9 12:23:07 2012 From: daniel.malea at intel.com (Malea, Daniel) Date: Mon, 9 Apr 2012 17:23:07 +0000 Subject: [llvm-commits] [PATCH] Fix incorrect llvm-config output by adding support for OptionalLibrary to llvm-build Message-ID: <0909233DB0CA134EB05251FCC8B1F17508D5AD@FMSMSX107.amr.corp.intel.com> Hi all, Please find the attached patch that addresses concerns identified by Alberto Magni in the llvm-dev thread: http://lists.cs.uiuc.edu/pipermail/llvmdev/2012-March/048521.html The patch adds a new "type" of library (in llvm-build terms) named OptionalLibrary which must be explicitly enabled (with command-line flags to llvm-build) by the build system in order to be included in the auto-generated library dependency table used to build the llvm-config tool. For end-users, this means the command "llvm-config -libs all" does not list libraries that are not built by the current configuration. The proposed commit message is: Make IntelJITEvents and OProfileJIT as optional libraries and add optional library support to the llvm-build tool: - Add new command line parameter to llvm-build: "--enable-optional-libraries" - Add handing of new llvm-build library type "OptionalLibrary" - Update Cmake and automake build systems to pass correct flags to llvm-build based on configuration Thanks, Dan Daniel Malea daniel.malea at intel.com Intel Embedded Computing, Debuggers & Libraries Intel Waterloo -------------- next part -------------- An HTML attachment was scrubbed... URL: http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20120409/fb8d06e9/attachment.html -------------- next part -------------- A non-text attachment was scrubbed... Name: 001_add_optional_library_support_to_llvm_build.patch Type: application/octet-stream Size: 7707 bytes Desc: 001_add_optional_library_support_to_llvm_build.patch Url : http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20120409/fb8d06e9/attachment.obj From echristo at apple.com Mon Apr 9 12:54:34 2012 From: echristo at apple.com (Eric Christopher) Date: Mon, 09 Apr 2012 17:54:34 -0000 Subject: [llvm-commits] [llvm] r154329 - /llvm/trunk/include/llvm/CodeGen/LexicalScopes.h Message-ID: <20120409175434.876DA2A6C065@llvm.org> Author: echristo Date: Mon Apr 9 12:54:34 2012 New Revision: 154329 URL: http://llvm.org/viewvc/llvm-project?rev=154329&view=rev Log: Typo. Modified: llvm/trunk/include/llvm/CodeGen/LexicalScopes.h Modified: llvm/trunk/include/llvm/CodeGen/LexicalScopes.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/LexicalScopes.h?rev=154329&r1=154328&r2=154329&view=diff ============================================================================== --- llvm/trunk/include/llvm/CodeGen/LexicalScopes.h (original) +++ llvm/trunk/include/llvm/CodeGen/LexicalScopes.h Mon Apr 9 12:54:34 2012 @@ -209,7 +209,7 @@ Parent->closeInsnRange(NewScope); } - /// dominates - Return true if current scope dominsates given lexical scope. + /// dominates - Return true if current scope dominates given lexical scope. bool dominates(const LexicalScope *S) const { if (S == this) return true; From rafael.espindola at gmail.com Mon Apr 9 13:54:22 2012 From: rafael.espindola at gmail.com (=?UTF-8?Q?Rafael_Esp=C3=ADndola?=) Date: Mon, 9 Apr 2012 14:54:22 -0400 Subject: [llvm-commits] [llvm] r154124 - /llvm/trunk/tools/lto/LTOCodeGenerator.cpp In-Reply-To: <4F7E9C78.2070402@free.fr> References: <20120405212644.F1CE92A6C069@llvm.org> <4F7E9C78.2070402@free.fr> Message-ID: > This looks like a bug: the linker should observe that bundle uses the symbol > foo > and that main provides the symbol foo, and pass "foo" to the internalize > pass in > the list of external symbols. ?I think this is how the gold plugin does it. > It's > not a reason to turn off internalize altogether. ?Hopefully Rafael can > comment. Sorry I got so much email lag. This does look like a bug in the llvm/apple ld interface. With this patch reverted and an extended test: void foo(void) { } __attribute__((visibility("hidden"))) void zed(void) { } int main(int argc, char *argv[]) { foo(); zed(); return 0; } I can run [espindola at desktop llvm]$ ./build/bin/clang -o main main.c -O4 [espindola at desktop llvm]$ readelf --dyn-syms main | grep foo [espindola at desktop llvm]$ ./build/bin/clang -o main main.c -O4 -Wl,-export-dynamic [espindola at desktop llvm]$ readelf --dyn-syms main | grep foo 5: 00000000004006c0 1 FUNC GLOBAL DEFAULT 13 foo Note how in the first run we correctly internalize both foo and zed. On the second one with -export-dynamic only zed is internalized because it is hidden. So the problem is not that internalized is run, it is that it is being told that foo is not needed. (and I assume the -export-dynamic behavior is the default on macho). Can this patch be reverted and the problem fixed in ld? If not, we at least need a way to enable this in the gold plugin, as we really want it to run internalize there. Cheers, Rafael From mcrosier at apple.com Mon Apr 9 14:38:15 2012 From: mcrosier at apple.com (Chad Rosier) Date: Mon, 09 Apr 2012 19:38:15 -0000 Subject: [llvm-commits] [llvm] r154336 - /llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp Message-ID: <20120409193815.D7BE02A6C065@llvm.org> Author: mcrosier Date: Mon Apr 9 14:38:15 2012 New Revision: 154336 URL: http://llvm.org/viewvc/llvm-project?rev=154336&view=rev Log: Update comments and remove unnecessary isVolatile() check. Modified: llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp Modified: llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp?rev=154336&r1=154335&r2=154336&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp (original) +++ llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp Mon Apr 9 14:38:15 2012 @@ -7338,15 +7338,15 @@ /// ISD::STORE. static SDValue PerformSTORECombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { - // Bitcast an i64 store extracted from a vector to f64. - // Otherwise, the i64 value will be legalized to a pair of i32 values. StoreSDNode *St = cast(N); SDValue StVal = St->getValue(); if (!ISD::isNormalStore(St) || St->isVolatile()) return SDValue(); + // Split a store of a VMOVDRR into two integer stores to avoid mixing NEON and + // ARM stores of arguments in the same cache line. if (StVal.getNode()->getOpcode() == ARMISD::VMOVDRR && - StVal.getNode()->hasOneUse() && !St->isVolatile()) { + StVal.getNode()->hasOneUse()) { SelectionDAG &DAG = DCI.DAG; DebugLoc DL = St->getDebugLoc(); SDValue BasePtr = St->getBasePtr(); @@ -7367,6 +7367,8 @@ StVal.getNode()->getOpcode() != ISD::EXTRACT_VECTOR_ELT) return SDValue(); + // Bitcast an i64 store extracted from a vector to f64. + // Otherwise, the i64 value will be legalized to a pair of i32 values. SelectionDAG &DAG = DCI.DAG; DebugLoc dl = StVal.getDebugLoc(); SDValue IntVec = StVal.getOperand(0); From grosser at fim.uni-passau.de Mon Apr 9 14:46:05 2012 From: grosser at fim.uni-passau.de (Tobias Grosser) Date: Mon, 09 Apr 2012 19:46:05 -0000 Subject: [llvm-commits] [polly] r154337 - in /polly/trunk/lib: Analysis/ScopInfo.cpp ScheduleOptimizer.cpp Support/SCEVValidator.cpp Message-ID: <20120409194605.5D1732A6C065@llvm.org> Author: grosser Date: Mon Apr 9 14:46:05 2012 New Revision: 154337 URL: http://llvm.org/viewvc/llvm-project?rev=154337&view=rev Log: Fix typos. Pointed out by: Sebastian Pop Modified: polly/trunk/lib/Analysis/ScopInfo.cpp polly/trunk/lib/ScheduleOptimizer.cpp polly/trunk/lib/Support/SCEVValidator.cpp Modified: polly/trunk/lib/Analysis/ScopInfo.cpp URL: http://llvm.org/viewvc/llvm-project/polly/trunk/lib/Analysis/ScopInfo.cpp?rev=154337&r1=154336&r2=154337&view=diff ============================================================================== --- polly/trunk/lib/Analysis/ScopInfo.cpp (original) +++ polly/trunk/lib/Analysis/ScopInfo.cpp Mon Apr 9 14:46:05 2012 @@ -324,12 +324,12 @@ setBaseName(); - // Devide the access function by the size of the elements in the array. + // Divide the access function by the size of the elements in the array. // // A stride one array access in C expressed as A[i] is expressed in LLVM-IR // as something like A[i * elementsize]. This hides the fact that two // subsequent values of 'i' index two values that are stored next to each - // other in memory. By this devision we make this characteristic obvious + // other in memory. By this division we make this characteristic obvious // again. isl_int v; isl_int_init(v); Modified: polly/trunk/lib/ScheduleOptimizer.cpp URL: http://llvm.org/viewvc/llvm-project/polly/trunk/lib/ScheduleOptimizer.cpp?rev=154337&r1=154336&r2=154337&view=diff ============================================================================== --- polly/trunk/lib/ScheduleOptimizer.cpp (original) +++ polly/trunk/lib/ScheduleOptimizer.cpp Mon Apr 9 14:46:05 2012 @@ -291,7 +291,7 @@ // // This transformation creates a loop at the innermost level. The loop has a // constant number of iterations, if the number of loop iterations at -// DimToVectorize can be devided by VectorWidth. The default VectorWidth is +// DimToVectorize can be divided by VectorWidth. The default VectorWidth is // currently constant and not yet target specific. This function does not reason // about parallelism. static isl_map *getPrevectorMap(isl_ctx *ctx, int DimToVectorize, Modified: polly/trunk/lib/Support/SCEVValidator.cpp URL: http://llvm.org/viewvc/llvm-project/polly/trunk/lib/Support/SCEVValidator.cpp?rev=154337&r1=154336&r2=154337&view=diff ============================================================================== --- polly/trunk/lib/Support/SCEVValidator.cpp (original) +++ polly/trunk/lib/Support/SCEVValidator.cpp Mon Apr 9 14:46:05 2012 @@ -213,7 +213,7 @@ ValidatorResult LHS = visit(Expr->getLHS()); ValidatorResult RHS = visit(Expr->getRHS()); - // We currently do not represent an unsigned devision as an affine + // We currently do not represent an unsigned division as an affine // expression. If the division is constant during Scop execution we treat it // as a parameter, otherwise we bail out. if (LHS.isConstant() && RHS.isConstant()) From andrew.kaylor at intel.com Mon Apr 9 15:06:45 2012 From: andrew.kaylor at intel.com (Kaylor, Andrew) Date: Mon, 9 Apr 2012 20:06:45 +0000 Subject: [llvm-commits] ObjectFile / DataRefImpl constructor In-Reply-To: <6AE1604EE3EC5F4296C096518C6B77EE1AA570D029@mail.accesssoftek.com> References: <6AE1604EE3EC5F4296C096518C6B77EE1AA570D029@mail.accesssoftek.com> Message-ID: <0983E6C011D2DC4188F8761B533492DE0C9785@ORSMSX105.amr.corp.intel.com> Looks good. From: llvm-commits-bounces at cs.uiuc.edu [mailto:llvm-commits-bounces at cs.uiuc.edu] On Behalf Of Danil Malyshev Sent: Thursday, April 05, 2012 5:22 PM To: llvm-commits at cs.uiuc.edu Subject: [llvm-commits] ObjectFile / DataRefImpl constructor Hello everyone, The DataRefImpl usually initialized with 0 after creation. If somewhere it will not be initialized, it's will have potential problem, for example: DataRefImpl a, b; a.p = 0; b.p = 0; if (a == b) // can be false for 32 bit platforms and always true for 64 bit. Please review attached the patch, it's adds constructor for DataRefImpl and removes excess initialization. Regards, Danil -------------- next part -------------- An HTML attachment was scrubbed... URL: http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20120409/a600b018/attachment.html From rafael.espindola at gmail.com Mon Apr 9 15:10:07 2012 From: rafael.espindola at gmail.com (=?UTF-8?Q?Rafael_Esp=C3=ADndola?=) Date: Mon, 9 Apr 2012 16:10:07 -0400 Subject: [llvm-commits] [llvm] r154280 - /llvm/trunk/lib/VMCore/Metadata.cpp In-Reply-To: <20120408102049.772772A6C066@llvm.org> References: <20120408102049.772772A6C066@llvm.org> Message-ID: > This saves a *lot* of space during LTO with -O0 -g flags. Awesome! Thanks, Rafael From lhames at gmail.com Mon Apr 9 15:17:30 2012 From: lhames at gmail.com (Lang Hames) Date: Mon, 09 Apr 2012 20:17:30 -0000 Subject: [llvm-commits] [llvm] r154338 - /llvm/trunk/lib/CodeGen/TwoAddressInstructionPass.cpp Message-ID: <20120409201730.4DECB2A6C065@llvm.org> Author: lhames Date: Mon Apr 9 15:17:30 2012 New Revision: 154338 URL: http://llvm.org/viewvc/llvm-project?rev=154338&view=rev Log: Patch r153892 for PR11861 apparently broke an external project (see PR12493). This patch restores TwoAddressInstructionPass's pre-r153892 behaviour when rescheduling instructions in TryInstructionTransform. Hopefully this will fix PR12493. To refix PR11861, lowering of INSERT_SUBREGS is deferred until after the copy that unties the operands is emitted (this seems to be a more appropriate fix for that issue anyway). Modified: llvm/trunk/lib/CodeGen/TwoAddressInstructionPass.cpp Modified: llvm/trunk/lib/CodeGen/TwoAddressInstructionPass.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/TwoAddressInstructionPass.cpp?rev=154338&r1=154337&r2=154338&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/TwoAddressInstructionPass.cpp (original) +++ llvm/trunk/lib/CodeGen/TwoAddressInstructionPass.cpp Mon Apr 9 15:17:30 2012 @@ -1183,8 +1183,9 @@ /// TryInstructionTransform - For the case where an instruction has a single /// pair of tied register operands, attempt some transformations that may /// either eliminate the tied operands or improve the opportunities for -/// coalescing away the register copy. Returns true if the tied operands -/// are eliminated altogether. +/// coalescing away the register copy. Returns true if no copy needs to be +/// inserted to untie mi's operands (either because they were untied, or +/// because mi was rescheduled, and will be visited again later). bool TwoAddressInstructionPass:: TryInstructionTransform(MachineBasicBlock::iterator &mi, MachineBasicBlock::iterator &nmi, @@ -1248,7 +1249,7 @@ // re-schedule this MI below it. if (RescheduleMIBelowKill(mbbi, mi, nmi, regB)) { ++NumReSchedDowns; - return false; + return true; } if (TargetRegisterInfo::isVirtualRegister(regA)) @@ -1270,7 +1271,7 @@ // re-schedule it before this MI if it's legal. if (RescheduleKillAboveMI(mbbi, mi, nmi, regB)) { ++NumReSchedUps; - return false; + return true; } // If this is an instruction with a load folded into it, try unfolding @@ -1594,19 +1595,19 @@ MadeChange = true; DEBUG(dbgs() << "\t\trewrite to:\t" << *mi); - } - // Rewrite INSERT_SUBREG as COPY now that we no longer need SSA form. - if (mi->isInsertSubreg()) { - // From %reg = INSERT_SUBREG %reg, %subreg, subidx - // To %reg:subidx = COPY %subreg - unsigned SubIdx = mi->getOperand(3).getImm(); - mi->RemoveOperand(3); - assert(mi->getOperand(0).getSubReg() == 0 && "Unexpected subreg idx"); - mi->getOperand(0).setSubReg(SubIdx); - mi->RemoveOperand(1); - mi->setDesc(TII->get(TargetOpcode::COPY)); - DEBUG(dbgs() << "\t\tconvert to:\t" << *mi); + // Rewrite INSERT_SUBREG as COPY now that we no longer need SSA form. + if (mi->isInsertSubreg()) { + // From %reg = INSERT_SUBREG %reg, %subreg, subidx + // To %reg:subidx = COPY %subreg + unsigned SubIdx = mi->getOperand(3).getImm(); + mi->RemoveOperand(3); + assert(mi->getOperand(0).getSubReg() == 0 && "Unexpected subreg idx"); + mi->getOperand(0).setSubReg(SubIdx); + mi->RemoveOperand(1); + mi->setDesc(TII->get(TargetOpcode::COPY)); + DEBUG(dbgs() << "\t\tconvert to:\t" << *mi); + } } // Clear TiedOperands here instead of at the top of the loop From mcrosier at apple.com Mon Apr 9 15:32:03 2012 From: mcrosier at apple.com (Chad Rosier) Date: Mon, 09 Apr 2012 20:32:03 -0000 Subject: [llvm-commits] [llvm] r154340 - in /llvm/trunk: lib/Target/ARM/ARMISelLowering.cpp test/CodeGen/ARM/2012-03-13-DAGCombineBug.ll test/CodeGen/ARM/opt-shuff-tstore.ll test/CodeGen/ARM/vrev.ll Message-ID: <20120409203203.4A6562A6C065@llvm.org> Author: mcrosier Date: Mon Apr 9 15:32:02 2012 New Revision: 154340 URL: http://llvm.org/viewvc/llvm-project?rev=154340&view=rev Log: When performing a truncating store, it's possible to rearrange the data in-register, such that we can use a single vector store rather then a series of scalar stores. For func_4_8 the generated code vldr d16, LCPI0_0 vmov d17, r0, r1 vadd.i16 d16, d17, d16 vmov.u16 r0, d16[3] strb r0, [r2, #3] vmov.u16 r0, d16[2] strb r0, [r2, #2] vmov.u16 r0, d16[1] strb r0, [r2, #1] vmov.u16 r0, d16[0] strb r0, [r2] bx lr becomes vldr d16, LCPI0_0 vmov d17, r0, r1 vadd.i16 d16, d17, d16 vuzp.8 d16, d17 vst1.32 {d16[0]}, [r2, :32] bx lr I'm not fond of how this combine pessimizes 2012-03-13-DAGCombineBug.ll, but I couldn't think of a way to judiciously apply this combine. This ldrh r0, [r0, #4] strh r0, [r1] becomes vldr d16, [r0] vmov.u16 r0, d16[2] vmov.32 d16[0], r0 vuzp.16 d16, d17 vst1.32 {d16[0]}, [r1, :32] PR11158 rdar://10703339 Added: llvm/trunk/test/CodeGen/ARM/opt-shuff-tstore.ll Modified: llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp llvm/trunk/test/CodeGen/ARM/2012-03-13-DAGCombineBug.ll llvm/trunk/test/CodeGen/ARM/vrev.ll Modified: llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp?rev=154340&r1=154339&r2=154340&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp (original) +++ llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp Mon Apr 9 15:32:02 2012 @@ -7339,8 +7339,92 @@ static SDValue PerformSTORECombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { StoreSDNode *St = cast(N); + if (St->isVolatile()) + return SDValue(); + + // Optimize trunc store (of multiple scalars) to shuffle and store. First, + // pack all of the elements in one place. Next, store to memory in fewer + // chunks. SDValue StVal = St->getValue(); - if (!ISD::isNormalStore(St) || St->isVolatile()) + EVT VT = StVal.getValueType(); + if (St->isTruncatingStore() && VT.isVector()) { + SelectionDAG &DAG = DCI.DAG; + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + EVT StVT = St->getMemoryVT(); + unsigned NumElems = VT.getVectorNumElements(); + assert(StVT != VT && "Cannot truncate to the same type"); + unsigned FromEltSz = VT.getVectorElementType().getSizeInBits(); + unsigned ToEltSz = StVT.getVectorElementType().getSizeInBits(); + + // From, To sizes and ElemCount must be pow of two + if (!isPowerOf2_32(NumElems * FromEltSz * ToEltSz)) return SDValue(); + + // We are going to use the original vector elt for storing. + // Accumulated smaller vector elements must be a multiple of the store size. + if (0 != (NumElems * FromEltSz) % ToEltSz) return SDValue(); + + unsigned SizeRatio = FromEltSz / ToEltSz; + assert(SizeRatio * NumElems * ToEltSz == VT.getSizeInBits()); + + // Create a type on which we perform the shuffle. + EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(), StVT.getScalarType(), + NumElems*SizeRatio); + assert(WideVecVT.getSizeInBits() == VT.getSizeInBits()); + + DebugLoc DL = St->getDebugLoc(); + SDValue WideVec = DAG.getNode(ISD::BITCAST, DL, WideVecVT, StVal); + SmallVector ShuffleVec(NumElems * SizeRatio, -1); + for (unsigned i = 0; i < NumElems; ++i) ShuffleVec[i] = i * SizeRatio; + + // Can't shuffle using an illegal type. + if (!TLI.isTypeLegal(WideVecVT)) return SDValue(); + + SDValue Shuff = DAG.getVectorShuffle(WideVecVT, DL, WideVec, + DAG.getUNDEF(WideVec.getValueType()), + ShuffleVec.data()); + // At this point all of the data is stored at the bottom of the + // register. We now need to save it to mem. + + // Find the largest store unit + MVT StoreType = MVT::i8; + for (unsigned tp = MVT::FIRST_INTEGER_VALUETYPE; + tp < MVT::LAST_INTEGER_VALUETYPE; ++tp) { + MVT Tp = (MVT::SimpleValueType)tp; + if (TLI.isTypeLegal(Tp) && Tp.getSizeInBits() <= NumElems * ToEltSz) + StoreType = Tp; + } + // Didn't find a legal store type. + if (!TLI.isTypeLegal(StoreType)) + return SDValue(); + + // Bitcast the original vector into a vector of store-size units + EVT StoreVecVT = EVT::getVectorVT(*DAG.getContext(), + StoreType, VT.getSizeInBits()/EVT(StoreType).getSizeInBits()); + assert(StoreVecVT.getSizeInBits() == VT.getSizeInBits()); + SDValue ShuffWide = DAG.getNode(ISD::BITCAST, DL, StoreVecVT, Shuff); + SmallVector Chains; + SDValue Increment = DAG.getConstant(StoreType.getSizeInBits()/8, + TLI.getPointerTy()); + SDValue BasePtr = St->getBasePtr(); + + // Perform one or more big stores into memory. + unsigned E = (ToEltSz*NumElems)/StoreType.getSizeInBits(); + for (unsigned I = 0; I < E; I++) { + SDValue SubVec = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, + StoreType, ShuffWide, + DAG.getIntPtrConstant(I)); + SDValue Ch = DAG.getStore(St->getChain(), DL, SubVec, BasePtr, + St->getPointerInfo(), St->isVolatile(), + St->isNonTemporal(), St->getAlignment()); + BasePtr = DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr, + Increment); + Chains.push_back(Ch); + } + return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, &Chains[0], + Chains.size()); + } + + if (!ISD::isNormalStore(St)) return SDValue(); // Split a store of a VMOVDRR into two integer stores to avoid mixing NEON and Modified: llvm/trunk/test/CodeGen/ARM/2012-03-13-DAGCombineBug.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/2012-03-13-DAGCombineBug.ll?rev=154340&r1=154339&r2=154340&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/ARM/2012-03-13-DAGCombineBug.ll (original) +++ llvm/trunk/test/CodeGen/ARM/2012-03-13-DAGCombineBug.ll Mon Apr 9 15:32:02 2012 @@ -6,8 +6,7 @@ ; (i32 extload $addr+c*sizeof(i16) define void @test_hi_short3(<3 x i16> * nocapture %srcA, <2 x i16> * nocapture %dst) nounwind { entry: -; CHECK: ldrh [[REG:r[0-9]+]] -; CHECK: strh [[REG]] +; CHECK: vst1.32 %0 = load <3 x i16> * %srcA, align 8 %1 = shufflevector <3 x i16> %0, <3 x i16> undef, <2 x i32> store <2 x i16> %1, <2 x i16> * %dst, align 4 Added: llvm/trunk/test/CodeGen/ARM/opt-shuff-tstore.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/opt-shuff-tstore.ll?rev=154340&view=auto ============================================================================== --- llvm/trunk/test/CodeGen/ARM/opt-shuff-tstore.ll (added) +++ llvm/trunk/test/CodeGen/ARM/opt-shuff-tstore.ll Mon Apr 9 15:32:02 2012 @@ -0,0 +1,19 @@ +; RUN: llc -mcpu=cortex-a9 -mtriple=arm-linux-unknown -promote-elements -mattr=+neon < %s | FileCheck %s + +; CHECK: func_4_8 +; CHECK: vst1.32 +; CHECK-NEXT: bx lr +define void @func_4_8(<4 x i8> %param, <4 x i8>* %p) { + %r = add <4 x i8> %param, + store <4 x i8> %r, <4 x i8>* %p + ret void +} + +; CHECK: func_2_16 +; CHECK: vst1.32 +; CHECK-NEXT: bx lr +define void @func_2_16(<2 x i16> %param, <2 x i16>* %p) { + %r = add <2 x i16> %param, + store <2 x i16> %r, <2 x i16>* %p + ret void +} Modified: llvm/trunk/test/CodeGen/ARM/vrev.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vrev.ll?rev=154340&r1=154339&r2=154340&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/ARM/vrev.ll (original) +++ llvm/trunk/test/CodeGen/ARM/vrev.ll Mon Apr 9 15:32:02 2012 @@ -149,12 +149,10 @@ } ; The type <2 x i16> is legalized to <2 x i32> and need to be trunc-stored -; to <2 x i16> when stored to memory. Currently ARM scalarizes these stores. -; See PR 11158 +; to <2 x i16> when stored to memory. define void @test_vrev64(<4 x i16>* nocapture %source, <2 x i16>* nocapture %dst) nounwind ssp { ; CHECK: test_vrev64: -; CHECK: vst1.16 -; CHECK: vst1.16 +; CHECK: vst1.32 entry: %0 = bitcast <4 x i16>* %source to <8 x i16>* %tmp2 = load <8 x i16>* %0, align 4 From ahatanaka at mips.com Mon Apr 9 15:32:12 2012 From: ahatanaka at mips.com (Akira Hatanaka) Date: Mon, 09 Apr 2012 20:32:12 -0000 Subject: [llvm-commits] [llvm] r154341 - in /llvm/trunk: lib/CodeGen/SelectionDAG/TargetLowering.cpp test/CodeGen/Mips/2010-07-20-Switch.ll Message-ID: <20120409203212.3C0912A6C065@llvm.org> Author: ahatanak Date: Mon Apr 9 15:32:12 2012 New Revision: 154341 URL: http://llvm.org/viewvc/llvm-project?rev=154341&view=rev Log: Have TargetLowering::getPICJumpTableRelocBase return a node that points to the GOT if jump table uses 64-bit gp-relative relocation. Modified: llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp llvm/trunk/test/CodeGen/Mips/2010-07-20-Switch.ll Modified: llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp?rev=154341&r1=154340&r2=154341&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp (original) +++ llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp Mon Apr 9 15:32:12 2012 @@ -1079,8 +1079,12 @@ SDValue TargetLowering::getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) const { // If our PIC model is GP relative, use the global offset table as the base. - if (getJumpTableEncoding() == MachineJumpTableInfo::EK_GPRel32BlockAddress) + unsigned JTEncoding = getJumpTableEncoding(); + + if ((JTEncoding == MachineJumpTableInfo::EK_GPRel64BlockAddress) || + (JTEncoding == MachineJumpTableInfo::EK_GPRel32BlockAddress)) return DAG.getGLOBAL_OFFSET_TABLE(getPointerTy()); + return Table; } Modified: llvm/trunk/test/CodeGen/Mips/2010-07-20-Switch.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Mips/2010-07-20-Switch.ll?rev=154341&r1=154340&r2=154341&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/Mips/2010-07-20-Switch.ll (original) +++ llvm/trunk/test/CodeGen/Mips/2010-07-20-Switch.ll Mon Apr 9 15:32:12 2012 @@ -13,9 +13,13 @@ ; PIC-O32: lw $[[R0:[0-9]+]], %got($JTI0_0) ; PIC-O32: addiu ${{[0-9]+}}, $[[R0]], %lo($JTI0_0) ; PIC-O32: sll ${{[0-9]+}}, ${{[0-9]+}}, 2 +; PIC-O32: addu $[[R1:[0-9]+]], ${{[0-9]+}}, $gp +; PIC-O32: jr $[[R1]] ; PIC-N64: ld $[[R0:[0-9]+]], %got_page($JTI0_0) ; PIC-N64: daddiu ${{[0-9]+}}, $[[R0]], %got_ofst($JTI0_0) ; PIC-N64: dsll ${{[0-9]+}}, ${{[0-9]+}}, 3 +; PIC-N64: daddu $[[R1:[0-9]+]], ${{[0-9]+}}, $gp +; PIC-N64: jr $[[R1]] switch i32 %0, label %bb4 [ i32 0, label %bb5 i32 1, label %bb1 From ahatanaka at mips.com Mon Apr 9 15:34:13 2012 From: ahatanaka at mips.com (Hatanaka, Akira) Date: Mon, 9 Apr 2012 20:34:13 +0000 Subject: [llvm-commits] [PATCH][Review request] Add new MachineJumpTableInfo entry type (GPRel64BlockAddress) In-Reply-To: References: <95DD8BA8AA50B14BBFB86A1D541FA380F3088174@exchdb03.mips.com> <95DD8BA8AA50B14BBFB86A1D541FA380F3089209@exchdb03.mips.com> <95DD8BA8AA50B14BBFB86A1D541FA38001145F9CC7@exchdb03.mips.com>, Message-ID: <95DD8BA8AA50B14BBFB86A1D541FA38001145F9DE8@exchdb03.mips.com> Thanks, I will ping IRC in the future. The patch was committed in r154341. ________________________________________ From: Rafael Esp?ndola [rafael.espindola at gmail.com] Sent: Monday, April 09, 2012 7:57 AM To: Hatanaka, Akira Cc: llvm-commits Subject: Re: [llvm-commits] [PATCH][Review request] Add new MachineJumpTableInfo entry type (GPRel64BlockAddress) 2012/4/5 Hatanaka, Akira : > Hi Rafael, > > Sorry to bother you again. NP :-) > Could you take a look at a patch I submitted a while back? I pinged the list, but nobody has reviewed it. > http://lists.cs.uiuc.edu/pipermail/llvm-commits/Week-of-Mon-20120319/139547.html > > This was supposed to be a part of 149668, which was reviewed by you, but somehow I didn't notice I hadn't included it. It just adds code that checks whether the type of Jump table encoding is EK_GPRel64BlockAddress. The patch is OK. For small patches like this pinging on IRC is probably the most efficient thing to do. Cheers, Rafael From eli.friedman at gmail.com Mon Apr 9 16:10:46 2012 From: eli.friedman at gmail.com (Eli Friedman) Date: Mon, 9 Apr 2012 14:10:46 -0700 Subject: [llvm-commits] [PATCH] llvm-stress fixes In-Reply-To: References: Message-ID: On Fri, Apr 6, 2012 at 8:31 AM, nobled wrote: > Attached are some fixes for assertion failures running llvm-stress. > However, there's a third problem that's uncovered once these get > applied. > > Now, with certain combinations of options, I get this assertion: > bin/llvm-stress -seed=1 -size=500 -generate-fp128 -generate-half-float > -generate-ppc-fp128 -generate-x86-fp80 -generate-x86-mmx -o /dev/null > llvm-stress: /llvm/lib/VMCore/Instructions.cpp:2719: > llvm::FPExtInst::FPExtInst(llvm::Value*, llvm::Type*, const > llvm::Twine&, llvm::Instruction*): Assertion `castIsValid(getOpcode(), > S, Ty) && "Illegal FPExt"' failed. > > ...because it's trying to use an fpext to cast from a ppc_fp128 type > to an IEEE-standard fp128 type. > > Which raises the question, just how *are* you supposed to cast between > those types? ?fptrunc asserts for the same reason; they're both 128 > bits. But they don't look like they can just be bitcast to each other > either. There isn't any straightforward way in LLVM IR to convert between the two types, although you can trunc to double and re-extend. I don't even know how you would actually implement the conversion, though, so it's probably okay to just avoid generating it. For the random-float patch, it would be nice to make it generate infinity and NaN's more frequently; otherwise, the patches seem okay. -Eli From hfinkel at anl.gov Mon Apr 9 16:23:00 2012 From: hfinkel at anl.gov (Hal Finkel) Date: Mon, 9 Apr 2012 16:23:00 -0500 Subject: [llvm-commits] [PATCH] llvm-stress fixes In-Reply-To: References: Message-ID: <20120409162300.30c71019@sapling2> On Mon, 9 Apr 2012 14:10:46 -0700 Eli Friedman wrote: > On Fri, Apr 6, 2012 at 8:31 AM, nobled wrote: > > Attached are some fixes for assertion failures running llvm-stress. > > However, there's a third problem that's uncovered once these get > > applied. > > > > Now, with certain combinations of options, I get this assertion: > > bin/llvm-stress -seed=1 -size=500 -generate-fp128 > > -generate-half-float -generate-ppc-fp128 -generate-x86-fp80 > > -generate-x86-mmx -o /dev/null > > llvm-stress: /llvm/lib/VMCore/Instructions.cpp:2719: > > llvm::FPExtInst::FPExtInst(llvm::Value*, llvm::Type*, const > > llvm::Twine&, llvm::Instruction*): Assertion > > `castIsValid(getOpcode(), S, Ty) && "Illegal FPExt"' failed. > > > > ...because it's trying to use an fpext to cast from a ppc_fp128 type > > to an IEEE-standard fp128 type. > > > > Which raises the question, just how *are* you supposed to cast > > between those types? ?fptrunc asserts for the same reason; they're > > both 128 bits. But they don't look like they can just be bitcast to > > each other either. > > There isn't any straightforward way in LLVM IR to convert between the > two types, although you can trunc to double and re-extend. I don't > even know how you would actually implement the conversion, though, so > it's probably okay to just avoid generating it. I agree with Eli, just avoid generating it. Regarding conversion, given that ppc128 is actually the sum of two IEEE (64-bit) doubles, you would need to convert both doubles to fp128 and then add the two resulting fp128 values. -Hal > > For the random-float patch, it would be nice to make it generate > infinity and NaN's more frequently; otherwise, the patches seem okay. > > -Eli > > _______________________________________________ > llvm-commits mailing list > llvm-commits at cs.uiuc.edu > http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits -- Hal Finkel Postdoctoral Appointee Leadership Computing Facility Argonne National Laboratory From grosbach at apple.com Mon Apr 9 16:33:02 2012 From: grosbach at apple.com (Jim Grosbach) Date: Mon, 09 Apr 2012 14:33:02 -0700 Subject: [llvm-commits] [llvm] r154340 - in /llvm/trunk: lib/Target/ARM/ARMISelLowering.cpp test/CodeGen/ARM/2012-03-13-DAGCombineBug.ll test/CodeGen/ARM/opt-shuff-tstore.ll test/CodeGen/ARM/vrev.ll In-Reply-To: <20120409203203.4A6562A6C065@llvm.org> References: <20120409203203.4A6562A6C065@llvm.org> Message-ID: On Apr 9, 2012, at 1:32 PM, Chad Rosier wrote: > Author: mcrosier > Date: Mon Apr 9 15:32:02 2012 > New Revision: 154340 > > URL: http://llvm.org/viewvc/llvm-project?rev=154340&view=rev > Log: > When performing a truncating store, it's possible to rearrange the data > in-register, such that we can use a single vector store rather then a > series of scalar stores. > > For func_4_8 the generated code > > vldr d16, LCPI0_0 > vmov d17, r0, r1 > vadd.i16 d16, d17, d16 > vmov.u16 r0, d16[3] > strb r0, [r2, #3] > vmov.u16 r0, d16[2] > strb r0, [r2, #2] > vmov.u16 r0, d16[1] > strb r0, [r2, #1] > vmov.u16 r0, d16[0] > strb r0, [r2] > bx lr > > becomes > > vldr d16, LCPI0_0 > vmov d17, r0, r1 > vadd.i16 d16, d17, d16 > vuzp.8 d16, d17 > vst1.32 {d16[0]}, [r2, :32] > bx lr > > I'm not fond of how this combine pessimizes 2012-03-13-DAGCombineBug.ll, > but I couldn't think of a way to judiciously apply this combine. > > This > > ldrh r0, [r0, #4] > strh r0, [r1] > > becomes > > vldr d16, [r0] > vmov.u16 r0, d16[2] > vmov.32 d16[0], r0 > vuzp.16 d16, d17 > vst1.32 {d16[0]}, [r1, :32] This worries me. We're now touching more memory than we were before. Can we perhaps use that sort of information to fine-tune when to fire the combine? > > PR11158 > rdar://10703339 > > Added: > llvm/trunk/test/CodeGen/ARM/opt-shuff-tstore.ll > Modified: > llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp > llvm/trunk/test/CodeGen/ARM/2012-03-13-DAGCombineBug.ll > llvm/trunk/test/CodeGen/ARM/vrev.ll > > Modified: llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp?rev=154340&r1=154339&r2=154340&view=diff > ============================================================================== > --- llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp (original) > +++ llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp Mon Apr 9 15:32:02 2012 > @@ -7339,8 +7339,92 @@ > static SDValue PerformSTORECombine(SDNode *N, > TargetLowering::DAGCombinerInfo &DCI) { > StoreSDNode *St = cast(N); > + if (St->isVolatile()) > + return SDValue(); > + > + // Optimize trunc store (of multiple scalars) to shuffle and store. First, > + // pack all of the elements in one place. Next, store to memory in fewer > + // chunks. > SDValue StVal = St->getValue(); > - if (!ISD::isNormalStore(St) || St->isVolatile()) > + EVT VT = StVal.getValueType(); > + if (St->isTruncatingStore() && VT.isVector()) { > + SelectionDAG &DAG = DCI.DAG; > + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); > + EVT StVT = St->getMemoryVT(); > + unsigned NumElems = VT.getVectorNumElements(); > + assert(StVT != VT && "Cannot truncate to the same type"); > + unsigned FromEltSz = VT.getVectorElementType().getSizeInBits(); > + unsigned ToEltSz = StVT.getVectorElementType().getSizeInBits(); > + > + // From, To sizes and ElemCount must be pow of two > + if (!isPowerOf2_32(NumElems * FromEltSz * ToEltSz)) return SDValue(); > + > + // We are going to use the original vector elt for storing. > + // Accumulated smaller vector elements must be a multiple of the store size. > + if (0 != (NumElems * FromEltSz) % ToEltSz) return SDValue(); > + > + unsigned SizeRatio = FromEltSz / ToEltSz; > + assert(SizeRatio * NumElems * ToEltSz == VT.getSizeInBits()); > + > + // Create a type on which we perform the shuffle. > + EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(), StVT.getScalarType(), > + NumElems*SizeRatio); > + assert(WideVecVT.getSizeInBits() == VT.getSizeInBits()); > + > + DebugLoc DL = St->getDebugLoc(); > + SDValue WideVec = DAG.getNode(ISD::BITCAST, DL, WideVecVT, StVal); > + SmallVector ShuffleVec(NumElems * SizeRatio, -1); > + for (unsigned i = 0; i < NumElems; ++i) ShuffleVec[i] = i * SizeRatio; > + > + // Can't shuffle using an illegal type. > + if (!TLI.isTypeLegal(WideVecVT)) return SDValue(); > + > + SDValue Shuff = DAG.getVectorShuffle(WideVecVT, DL, WideVec, > + DAG.getUNDEF(WideVec.getValueType()), > + ShuffleVec.data()); > + // At this point all of the data is stored at the bottom of the > + // register. We now need to save it to mem. > + > + // Find the largest store unit > + MVT StoreType = MVT::i8; > + for (unsigned tp = MVT::FIRST_INTEGER_VALUETYPE; > + tp < MVT::LAST_INTEGER_VALUETYPE; ++tp) { > + MVT Tp = (MVT::SimpleValueType)tp; > + if (TLI.isTypeLegal(Tp) && Tp.getSizeInBits() <= NumElems * ToEltSz) > + StoreType = Tp; > + } > + // Didn't find a legal store type. > + if (!TLI.isTypeLegal(StoreType)) > + return SDValue(); > + > + // Bitcast the original vector into a vector of store-size units > + EVT StoreVecVT = EVT::getVectorVT(*DAG.getContext(), > + StoreType, VT.getSizeInBits()/EVT(StoreType).getSizeInBits()); > + assert(StoreVecVT.getSizeInBits() == VT.getSizeInBits()); > + SDValue ShuffWide = DAG.getNode(ISD::BITCAST, DL, StoreVecVT, Shuff); > + SmallVector Chains; > + SDValue Increment = DAG.getConstant(StoreType.getSizeInBits()/8, > + TLI.getPointerTy()); > + SDValue BasePtr = St->getBasePtr(); > + > + // Perform one or more big stores into memory. > + unsigned E = (ToEltSz*NumElems)/StoreType.getSizeInBits(); > + for (unsigned I = 0; I < E; I++) { > + SDValue SubVec = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, > + StoreType, ShuffWide, > + DAG.getIntPtrConstant(I)); > + SDValue Ch = DAG.getStore(St->getChain(), DL, SubVec, BasePtr, > + St->getPointerInfo(), St->isVolatile(), > + St->isNonTemporal(), St->getAlignment()); > + BasePtr = DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr, > + Increment); > + Chains.push_back(Ch); > + } > + return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, &Chains[0], > + Chains.size()); > + } > + > + if (!ISD::isNormalStore(St)) > return SDValue(); > > // Split a store of a VMOVDRR into two integer stores to avoid mixing NEON and > > Modified: llvm/trunk/test/CodeGen/ARM/2012-03-13-DAGCombineBug.ll > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/2012-03-13-DAGCombineBug.ll?rev=154340&r1=154339&r2=154340&view=diff > ============================================================================== > --- llvm/trunk/test/CodeGen/ARM/2012-03-13-DAGCombineBug.ll (original) > +++ llvm/trunk/test/CodeGen/ARM/2012-03-13-DAGCombineBug.ll Mon Apr 9 15:32:02 2012 > @@ -6,8 +6,7 @@ > ; (i32 extload $addr+c*sizeof(i16) > define void @test_hi_short3(<3 x i16> * nocapture %srcA, <2 x i16> * nocapture %dst) nounwind { > entry: > -; CHECK: ldrh [[REG:r[0-9]+]] > -; CHECK: strh [[REG]] > +; CHECK: vst1.32 > %0 = load <3 x i16> * %srcA, align 8 > %1 = shufflevector <3 x i16> %0, <3 x i16> undef, <2 x i32> > store <2 x i16> %1, <2 x i16> * %dst, align 4 > > Added: llvm/trunk/test/CodeGen/ARM/opt-shuff-tstore.ll > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/opt-shuff-tstore.ll?rev=154340&view=auto > ============================================================================== > --- llvm/trunk/test/CodeGen/ARM/opt-shuff-tstore.ll (added) > +++ llvm/trunk/test/CodeGen/ARM/opt-shuff-tstore.ll Mon Apr 9 15:32:02 2012 > @@ -0,0 +1,19 @@ > +; RUN: llc -mcpu=cortex-a9 -mtriple=arm-linux-unknown -promote-elements -mattr=+neon < %s | FileCheck %s > + > +; CHECK: func_4_8 > +; CHECK: vst1.32 > +; CHECK-NEXT: bx lr > +define void @func_4_8(<4 x i8> %param, <4 x i8>* %p) { > + %r = add <4 x i8> %param, > + store <4 x i8> %r, <4 x i8>* %p > + ret void > +} > + > +; CHECK: func_2_16 > +; CHECK: vst1.32 > +; CHECK-NEXT: bx lr > +define void @func_2_16(<2 x i16> %param, <2 x i16>* %p) { > + %r = add <2 x i16> %param, > + store <2 x i16> %r, <2 x i16>* %p > + ret void > +} > > Modified: llvm/trunk/test/CodeGen/ARM/vrev.ll > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vrev.ll?rev=154340&r1=154339&r2=154340&view=diff > ============================================================================== > --- llvm/trunk/test/CodeGen/ARM/vrev.ll (original) > +++ llvm/trunk/test/CodeGen/ARM/vrev.ll Mon Apr 9 15:32:02 2012 > @@ -149,12 +149,10 @@ > } > > ; The type <2 x i16> is legalized to <2 x i32> and need to be trunc-stored > -; to <2 x i16> when stored to memory. Currently ARM scalarizes these stores. > -; See PR 11158 > +; to <2 x i16> when stored to memory. > define void @test_vrev64(<4 x i16>* nocapture %source, <2 x i16>* nocapture %dst) nounwind ssp { > ; CHECK: test_vrev64: > -; CHECK: vst1.16 > -; CHECK: vst1.16 > +; CHECK: vst1.32 > entry: > %0 = bitcast <4 x i16>* %source to <8 x i16>* > %tmp2 = load <8 x i16>* %0, align 4 > > > _______________________________________________ > llvm-commits mailing list > llvm-commits at cs.uiuc.edu > http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits From erhardt at cs.fau.de Mon Apr 9 16:52:06 2012 From: erhardt at cs.fau.de (Christoph Erhardt) Date: Mon, 09 Apr 2012 23:52:06 +0200 Subject: [llvm-commits] PATCH: Fix for PR6679 Message-ID: <4F835A06.6060000@cs.fau.de> Hi, here's a patch to fix bug 6679 ("FUCOMIP and related instructions generated even on -mcpu=pentium", http://llvm.org/bugs/show_bug.cgi?id=6679). The patch does the following: * Model FPSW (the FPU status word) as a register. * Add ISel patterns for the FUCOM*, FNSTSW and SAHF instructions. * During Legalize/Lowering, build a node sequence to transfer the comparison result from FPSW into EFLAGS. If you're wondering about the right-shift: That's an implicit sub-register extraction (%ax -> %ah) which is handled later on by the instruction selector. * Rename FNSTSW8r to FNSTSW16r because its destination operand is actually a 16-bit register. Best regards, Christoph -------------- next part -------------- Index: lib/Target/X86/X86RegisterInfo.td =================================================================== --- lib/Target/X86/X86RegisterInfo.td (revision 154255) +++ lib/Target/X86/X86RegisterInfo.td (working copy) @@ -223,6 +223,9 @@ def ST6 : STRegister<"st(6)", [FP1]>, DwarfRegNum<[39, 18, 17]>; def ST7 : STRegister<"st(7)", [FP0]>, DwarfRegNum<[40, 19, 18]>; + // Floating-point status word + def FPSW : Register<"fpsw">; + // Status flags register def EFLAGS : Register<"flags">; @@ -472,3 +475,7 @@ let CopyCost = -1; // Don't allow copying of status registers. let isAllocatable = 0; } +def FPCCR : RegisterClass<"X86", [i16], 16, (add FPSW)> { + let CopyCost = -1; // Don't allow copying of status registers. + let isAllocatable = 0; +} Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp (revision 154315) +++ lib/Target/X86/X86ISelLowering.cpp (working copy) @@ -8158,6 +8158,30 @@ return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op0, Op1); } +/// Convert a comparison if required by the subtarget. +SDValue X86TargetLowering::ConvertCmpIfNecessary(SDValue Cmp, + SelectionDAG &DAG) const { + // If the subtarget does not support the FUCOMI instruction, floating-point + // comparisons have to be converted. + if (Subtarget->hasCMov() || + Cmp.getOpcode() != X86ISD::CMP || + !Cmp.getOperand(0).getValueType().isFloatingPoint() || + !Cmp.getOperand(1).getValueType().isFloatingPoint()) + return Cmp; + + // The instruction selector will select an FUCOM instruction instead of + // FUCOMI, which writes the comparison result to FPSW instead of EFLAGS. Hence + // build an SDNode sequence that transfers the result from FPSW into EFLAGS: + // (X86sahf (trunc (srl (X86fp_stsw (trunc (X86cmp ...)), 8)))) + DebugLoc dl = Cmp.getDebugLoc(); + SDValue TruncFPSW = DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, Cmp); + SDValue FNStSW = DAG.getNode(X86ISD::FNSTSW16r, dl, MVT::i16, TruncFPSW); + SDValue Srl = DAG.getNode(ISD::SRL, dl, MVT::i16, FNStSW, + DAG.getConstant(8, MVT::i8)); + SDValue TruncSrl = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Srl); + return DAG.getNode(X86ISD::SAHF, dl, MVT::i32, TruncSrl); +} + /// LowerToBT - Result of 'and' is compared against zero. Turn it into a BT node /// if it's possible. SDValue X86TargetLowering::LowerToBT(SDValue And, ISD::CondCode CC, @@ -8279,6 +8303,7 @@ return SDValue(); SDValue EFLAGS = EmitCmp(Op0, Op1, X86CC, DAG); + EFLAGS = ConvertCmpIfNecessary(EFLAGS, DAG); return DAG.getNode(X86ISD::SETCC, dl, MVT::i8, DAG.getConstant(X86CC, MVT::i8), EFLAGS); } @@ -8448,7 +8473,8 @@ // isX86LogicalCmp - Return true if opcode is a X86 logical comparison. static bool isX86LogicalCmp(SDValue Op) { unsigned Opc = Op.getNode()->getOpcode(); - if (Opc == X86ISD::CMP || Opc == X86ISD::COMI || Opc == X86ISD::UCOMI) + if (Opc == X86ISD::CMP || Opc == X86ISD::COMI || Opc == X86ISD::UCOMI || + Opc == X86ISD::SAHF) return true; if (Op.getResNo() == 1 && (Opc == X86ISD::ADD || @@ -8512,6 +8538,7 @@ SDValue CmpOp0 = Cmp.getOperand(0); Cmp = DAG.getNode(X86ISD::CMP, DL, MVT::i32, CmpOp0, DAG.getConstant(1, CmpOp0.getValueType())); + Cmp = ConvertCmpIfNecessary(Cmp, DAG); SDValue Res = // Res = 0 or -1. DAG.getNode(X86ISD::SETCC_CARRY, DL, Op.getValueType(), @@ -8618,6 +8645,7 @@ // a >= b ? -1 : 0 -> RES = setcc_carry // a >= b ? 0 : -1 -> RES = ~setcc_carry if (Cond.getOpcode() == X86ISD::CMP) { + Cond = ConvertCmpIfNecessary(Cond, DAG); unsigned CondCode = cast(CC)->getZExtValue(); if ((CondCode == X86::COND_AE || CondCode == X86::COND_B) && @@ -8918,6 +8946,7 @@ CC = DAG.getConstant(X86::COND_NE, MVT::i8); Cond = EmitTest(Cond, X86::COND_NE, DAG); } + Cond = ConvertCmpIfNecessary(Cond, DAG); return DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(), Chain, Dest, CC, Cond); } @@ -11057,6 +11086,7 @@ case X86ISD::EH_RETURN: return "X86ISD::EH_RETURN"; case X86ISD::TC_RETURN: return "X86ISD::TC_RETURN"; case X86ISD::FNSTCW16m: return "X86ISD::FNSTCW16m"; + case X86ISD::FNSTSW16r: return "X86ISD::FNSTSW16r"; case X86ISD::LCMPXCHG_DAG: return "X86ISD::LCMPXCHG_DAG"; case X86ISD::LCMPXCHG8_DAG: return "X86ISD::LCMPXCHG8_DAG"; case X86ISD::ATOMADD64_DAG: return "X86ISD::ATOMADD64_DAG"; @@ -11123,6 +11153,7 @@ case X86ISD::MEMBARRIER: return "X86ISD::MEMBARRIER"; case X86ISD::SEG_ALLOCA: return "X86ISD::SEG_ALLOCA"; case X86ISD::WIN_FTOL: return "X86ISD::WIN_FTOL"; + case X86ISD::SAHF: return "X86ISD::SAHF"; } } Index: lib/Target/X86/X86ISelLowering.h =================================================================== --- lib/Target/X86/X86ISelLowering.h (revision 154315) +++ lib/Target/X86/X86ISelLowering.h (working copy) @@ -308,6 +308,12 @@ SFENCE, LFENCE, + // FNSTSW16r - Store FP status word into i16 register. + FNSTSW16r, + + // SAHF - Store contents of %ah into %eflags. + SAHF, + // ATOMADD64_DAG, ATOMSUB64_DAG, ATOMOR64_DAG, ATOMAND64_DAG, // ATOMXOR64_DAG, ATOMNAND64_DAG, ATOMSWAP64_DAG - // Atomic 64-bit binary operations. @@ -902,6 +908,9 @@ /// equivalent, for use with the given x86 condition code. SDValue EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC, SelectionDAG &DAG) const; + + /// Convert a comparison if required by the subtarget. + SDValue ConvertCmpIfNecessary(SDValue Cmp, SelectionDAG &DAG) const; }; namespace X86 { Index: lib/Target/X86/X86InstrInfo.td =================================================================== --- lib/Target/X86/X86InstrInfo.td (revision 154255) +++ lib/Target/X86/X86InstrInfo.td (working copy) @@ -63,6 +63,8 @@ [SDTCisInt<0>, SDTCisVT<1, i8>, SDTCisVT<2, i32>]>; +def SDTX86sahf : SDTypeProfile<1, 1, [SDTCisVT<0, i32>, SDTCisVT<1, i8>]>; + def SDTX86cas : SDTypeProfile<0, 3, [SDTCisPtrTy<0>, SDTCisInt<1>, SDTCisVT<2, i8>]>; def SDTX86caspair : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>; @@ -131,6 +133,8 @@ def X86setcc : SDNode<"X86ISD::SETCC", SDTX86SetCC>; def X86setcc_c : SDNode<"X86ISD::SETCC_CARRY", SDTX86SetCC_C>; +def X86sahf : SDNode<"X86ISD::SAHF", SDTX86sahf>; + def X86cas : SDNode<"X86ISD::LCMPXCHG_DAG", SDTX86cas, [SDNPHasChain, SDNPInGlue, SDNPOutGlue, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; @@ -1016,8 +1020,9 @@ // Condition code ops, incl. set if equal/not equal/... -let Defs = [EFLAGS], Uses = [AH], neverHasSideEffects = 1 in -def SAHF : I<0x9E, RawFrm, (outs), (ins), "sahf", []>; // flags = AH +let Defs = [EFLAGS], Uses = [AH] in +def SAHF : I<0x9E, RawFrm, (outs), (ins), "sahf", + [(set EFLAGS, (X86sahf AH))]>; let Defs = [AH], Uses = [EFLAGS], neverHasSideEffects = 1 in def LAHF : I<0x9F, RawFrm, (outs), (ins), "lahf", []>; // AH = flags @@ -1774,9 +1779,9 @@ def : InstAlias<"fdivrp %st(0), $op", (DIV_FPrST0 RST:$op)>; // We accept "fnstsw %eax" even though it only writes %ax. -def : InstAlias<"fnstsw %eax", (FNSTSW8r)>; -def : InstAlias<"fnstsw %al" , (FNSTSW8r)>; -def : InstAlias<"fnstsw" , (FNSTSW8r)>; +def : InstAlias<"fnstsw %eax", (FNSTSW16r)>; +def : InstAlias<"fnstsw %al" , (FNSTSW16r)>; +def : InstAlias<"fnstsw" , (FNSTSW16r)>; // lcall and ljmp aliases. This seems to be an odd mapping in 64-bit mode, but // this is compatible with what GAS does. Index: lib/Target/X86/X86InstrFPStack.td =================================================================== --- lib/Target/X86/X86InstrFPStack.td (revision 154255) +++ lib/Target/X86/X86InstrFPStack.td (working copy) @@ -27,6 +27,7 @@ SDTCisVT<2, OtherVT>]>; def SDTX86Fild : SDTypeProfile<1, 2, [SDTCisFP<0>, SDTCisPtrTy<1>, SDTCisVT<2, OtherVT>]>; +def SDTX86Fnstsw : SDTypeProfile<1, 1, [SDTCisVT<0, i16>, SDTCisVT<1, i16>]>; def SDTX86FpToIMem : SDTypeProfile<0, 2, [SDTCisFP<0>, SDTCisPtrTy<1>]>; def SDTX86CwdStore : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>; @@ -41,6 +42,7 @@ def X86fildflag : SDNode<"X86ISD::FILD_FLAG", SDTX86Fild, [SDNPHasChain, SDNPOutGlue, SDNPMayLoad, SDNPMemOperand]>; +def X86fp_stsw : SDNode<"X86ISD::FNSTSW16r", SDTX86Fnstsw>; def X86fp_to_i16mem : SDNode<"X86ISD::FP_TO_INT16_IN_MEM", SDTX86FpToIMem, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; def X86fp_to_i32mem : SDNode<"X86ISD::FP_TO_INT32_IN_MEM", SDTX86FpToIMem, @@ -203,6 +205,7 @@ } } +let Defs = [FPSW] in { defm ADD : FPBinary_rr; defm SUB : FPBinary_rr; defm MUL : FPBinary_rr; @@ -213,6 +216,7 @@ defm MUL : FPBinary; defm DIV : FPBinary; defm DIVR: FPBinary; +} class FPST0rInst o, string asm> : FPI, D8; @@ -257,6 +261,7 @@ def _F : FPI, D9; } +let Defs = [FPSW] in { defm CHS : FPUnary; defm ABS : FPUnary; defm SQRT: FPUnary; @@ -269,6 +274,7 @@ def TST_Fp80 : FpI_<(outs), (ins RFP80:$src), OneArgFP, []>; } def TST_F : FPI<0xE4, RawFrm, (outs), (ins), "ftst">, D9; +} // Defs = [FPSW] // Versions of FP instructions that take a single memory operand. Added for the // disassembler; remove as they are included with patterns elsewhere. @@ -316,6 +322,7 @@ Requires<[HasCMov]>; } +let Defs = [FPSW] in { let Uses = [EFLAGS], Constraints = "$src1 = $dst" in { defm CMOVB : FPCMov; defm CMOVBE : FPCMov; @@ -492,15 +499,16 @@ // Floating point compares. -let Defs = [EFLAGS] in { def UCOM_Fpr32 : FpIf32<(outs), (ins RFP32:$lhs, RFP32:$rhs), CompareFP, - []>; // FPSW = cmp ST(0) with ST(i) + [(set FPSW, (trunc (X86cmp RFP32:$lhs, RFP32:$rhs)))]>; def UCOM_Fpr64 : FpIf64<(outs), (ins RFP64:$lhs, RFP64:$rhs), CompareFP, - []>; // FPSW = cmp ST(0) with ST(i) + [(set FPSW, (trunc (X86cmp RFP64:$lhs, RFP64:$rhs)))]>; def UCOM_Fpr80 : FpI_ <(outs), (ins RFP80:$lhs, RFP80:$rhs), CompareFP, - []>; // FPSW = cmp ST(0) with ST(i) - + [(set FPSW, (trunc (X86cmp RFP80:$lhs, RFP80:$rhs)))]>; +} // Defs = [FPSW] + // CC = ST(0) cmp ST(i) +let Defs = [EFLAGS, FPSW] in { def UCOM_FpIr32: FpIf32<(outs), (ins RFP32:$lhs, RFP32:$rhs), CompareFP, [(set EFLAGS, (X86cmp RFP32:$lhs, RFP32:$rhs))]>; def UCOM_FpIr64: FpIf64<(outs), (ins RFP64:$lhs, RFP64:$rhs), CompareFP, @@ -509,7 +517,7 @@ [(set EFLAGS, (X86cmp RFP80:$lhs, RFP80:$rhs))]>; } -let Defs = [EFLAGS], Uses = [ST0] in { +let Defs = [FPSW], Uses = [ST0] in { def UCOM_Fr : FPI<0xE0, AddRegFrm, // FPSW = cmp ST(0) with ST(i) (outs), (ins RST:$reg), "fucom\t$reg">, DD; @@ -519,7 +527,9 @@ def UCOM_FPPr : FPI<0xE9, RawFrm, // cmp ST(0) with ST(1), pop, pop (outs), (ins), "fucompp">, DA; +} +let Defs = [EFLAGS, FPSW], Uses = [ST0] in { def UCOM_FIr : FPI<0xE8, AddRegFrm, // CC = cmp ST(0) with ST(i) (outs), (ins RST:$reg), "fucomi\t$reg">, DB; @@ -528,15 +538,18 @@ "fucompi\t$reg">, DF; } +let Defs = [EFLAGS, FPSW] in { def COM_FIr : FPI<0xF0, AddRegFrm, (outs), (ins RST:$reg), "fcomi\t$reg">, DB; def COM_FIPr : FPI<0xF0, AddRegFrm, (outs), (ins RST:$reg), "fcompi\t$reg">, DF; +} // Floating point flag ops. -let Defs = [AX] in -def FNSTSW8r : I<0xE0, RawFrm, // AX = fp flags - (outs), (ins), "fnstsw %ax", []>, DF; +let Defs = [AX], Uses = [FPSW] in +def FNSTSW16r : I<0xE0, RawFrm, // AX = fp flags + (outs), (ins), "fnstsw %ax", + [(set AX, (X86fp_stsw FPSW))]>, DF; def FNSTCW16m : I<0xD9, MRM7m, // [mem16] = X87 control world (outs), (ins i16mem:$dst), "fnstcw\t$dst", @@ -547,12 +560,14 @@ (outs), (ins i16mem:$dst), "fldcw\t$dst", []>; // FPU control instructions +let Defs = [FPSW] in def FNINIT : I<0xE3, RawFrm, (outs), (ins), "fninit", []>, DB; def FFREE : FPI<0xC0, AddRegFrm, (outs), (ins RST:$reg), "ffree\t$reg">, DD; // Clear exceptions +let Defs = [FPSW] in def FNCLEX : I<0xE2, RawFrm, (outs), (ins), "fnclex", []>, DB; // Operandless floating-point instructions for the disassembler. Index: test/CodeGen/X86/fp-stack-compare.ll =================================================================== --- test/CodeGen/X86/fp-stack-compare.ll (revision 154255) +++ test/CodeGen/X86/fp-stack-compare.ll (working copy) @@ -1,8 +1,11 @@ ; RUN: llc < %s -march=x86 -mcpu=i386 | FileCheck %s -; PR1012 +; PR6679 define float @foo(float* %col.2.0) { -; CHECK: fucompi +; CHECK: fucomp +; CHECK-NOT: fucompi +; CHECK: j +; CHECK-NOT: fcmov %tmp = load float* %col.2.0 %tmp16 = fcmp olt float %tmp, 0.000000e+00 %tmp20 = fsub float -0.000000e+00, %tmp Index: test/CodeGen/X86/fp-stack-compare-cmov.ll =================================================================== --- test/CodeGen/X86/fp-stack-compare-cmov.ll (revision 0) +++ test/CodeGen/X86/fp-stack-compare-cmov.ll (working copy) @@ -0,0 +1,12 @@ +; RUN: llc < %s -march=x86 -mcpu=pentiumpro | FileCheck %s +; PR1012 + +define float @foo(float* %col.2.0) { +; CHECK: fucompi +; CHECK: fcmov + %tmp = load float* %col.2.0 + %tmp16 = fcmp olt float %tmp, 0.000000e+00 + %tmp20 = fsub float -0.000000e+00, %tmp + %iftmp.2.0 = select i1 %tmp16, float %tmp20, float %tmp + ret float %iftmp.2.0 +} From rafael.espindola at gmail.com Mon Apr 9 17:09:57 2012 From: rafael.espindola at gmail.com (=?UTF-8?Q?Rafael_Esp=C3=ADndola?=) Date: Mon, 9 Apr 2012 18:09:57 -0400 Subject: [llvm-commits] [llvm] r154124 - /llvm/trunk/tools/lto/LTOCodeGenerator.cpp In-Reply-To: <20120405212644.F1CE92A6C069@llvm.org> References: <20120405212644.F1CE92A6C069@llvm.org> Message-ID: > Consider the following program: > > $ cat main.c > void foo(void) { } > > int main(int argc, char *argv[]) { > ? ?foo(); > ? ?return 0; > } > $ cat bundle.c > extern void foo(void); > > void bar(void) { > ? ? foo(); > } > $ clang -o main main.c > $ clang -o bundle.so bundle.c -bundle -bundle_loader ./main > $ nm -m bundle.so > 0000000000000f40 (__TEXT,__text) external _bar > ? ? ? ? ? ? ? ? (undefined) external _foo (from executable) > ? ? ? ? ? ? ? ? (undefined) external dyld_stub_binder (from libSystem) > $ clang -o main main.c -O4 > $ clang -o bundle.so bundle.c -bundle -bundle_loader ./main > Undefined symbols for architecture x86_64: > ?"_foo", referenced from: > ? ? ?_bar in bundle-elQN6d.o > ld: symbol(s) not found for architecture x86_64 > clang: error: linker command failed with exit code 1 (use -v to see invocation) Note that I get exactly the same error with: $ clang -o main main.c -Wl,-dead_strip -O3 $ clang -o bundle.so bundle.c -bundle -bundle_loader ./main Undefined symbols for architecture x86_64: "_foo", referenced from: _bar in bundle-LRWElp.o ld: symbol(s) not found for architecture x86_64 clang-3: error: linker command failed with exit code 1 (use -v to see invocation) Which shows that the problem is *not* with us running internalize, it is with the linker telling us the foo is not needed. I am not too familiar with Mach-O, but if your example with -O4 is suposed to work, it looks like the linker should tell us that all symbols are needed if -dead_strip is not used. Cheers, Rafael From isanbard at gmail.com Mon Apr 9 17:18:02 2012 From: isanbard at gmail.com (Bill Wendling) Date: Mon, 09 Apr 2012 22:18:02 -0000 Subject: [llvm-commits] [llvm] r154348 - /llvm/trunk/tools/lto/LTOCodeGenerator.cpp Message-ID: <20120409221802.2D0E22A6C065@llvm.org> Author: void Date: Mon Apr 9 17:18:01 2012 New Revision: 154348 URL: http://llvm.org/viewvc/llvm-project?rev=154348&view=rev Log: Apply the scope restrictions after parsing the command line options. There may be some which are used in that function. Modified: llvm/trunk/tools/lto/LTOCodeGenerator.cpp Modified: llvm/trunk/tools/lto/LTOCodeGenerator.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/lto/LTOCodeGenerator.cpp?rev=154348&r1=154347&r2=154348&view=diff ============================================================================== --- llvm/trunk/tools/lto/LTOCodeGenerator.cpp (original) +++ llvm/trunk/tools/lto/LTOCodeGenerator.cpp Mon Apr 9 17:18:01 2012 @@ -347,9 +347,6 @@ if ( this->determineTarget(errMsg) ) return true; - // mark which symbols can not be internalized - this->applyScopeRestrictions(); - Module* mergedModule = _linker.getModule(); // if options were requested, set them @@ -357,6 +354,9 @@ cl::ParseCommandLineOptions(_codegenOptions.size(), const_cast(&_codegenOptions[0])); + // mark which symbols can not be internalized + this->applyScopeRestrictions(); + // Instantiate the pass manager to organize the passes. PassManager passes; From wendling at apple.com Mon Apr 9 17:29:20 2012 From: wendling at apple.com (Bill Wendling) Date: Mon, 09 Apr 2012 15:29:20 -0700 Subject: [llvm-commits] [llvm] r154124 - /llvm/trunk/tools/lto/LTOCodeGenerator.cpp In-Reply-To: <2A962B06-19AD-4016-96FF-D50315B66C00@apple.com> References: <20120405212644.F1CE92A6C069@llvm.org> <4F7E9C78.2070402@free.fr> <2A962B06-19AD-4016-96FF-D50315B66C00@apple.com> Message-ID: On Apr 6, 2012, at 10:22 AM, Chris Lattner wrote: > That said, if clang is running internalize at "clang -O4" or "clang -flto" then we should talk about whether that really is the right default. I'm not convinced it is. GCC has a -fwhole-program flag (or something like that) which might be appropriate. Otherwise, the standard linker visibility stuff should be used. > Okay. Let's discuss this. What are the benefits of running internalize by default as opposed to running it via a command-line option? If we go the `-fwhole-program' option, it's more in line with what people might expect from LTO (those coming from gcc-land especially). There could be other, subtle reasons why a programmer may not want to run the internalize pass. Keeping around functions to call when debugging code springs to mind. They may not want to mess with export lists for these functions (so no internalize), but would like them to go away when shipping the product (so running internalize). There may be more reasons. -bw From rafael.espindola at gmail.com Mon Apr 9 17:38:21 2012 From: rafael.espindola at gmail.com (=?UTF-8?Q?Rafael_Esp=C3=ADndola?=) Date: Mon, 9 Apr 2012 18:38:21 -0400 Subject: [llvm-commits] [llvm] r154124 - /llvm/trunk/tools/lto/LTOCodeGenerator.cpp In-Reply-To: References: <20120405212644.F1CE92A6C069@llvm.org> <4F7E9C78.2070402@free.fr> <2A962B06-19AD-4016-96FF-D50315B66C00@apple.com> Message-ID: > Okay. Let's discuss this. What are the benefits of running internalize by default as opposed to running it via a command-line option? The example I posted before: void foo(void) { } __attribute__((visibility("hidden"))) void zed(void) { } int main(int argc, char *argv[]) { foo(); zed(); return 0; } The function zed should be internalized. The function foo should or should not be depending on -export_dynamic on ELF. Not sure about the semantics on Mach-O, but the linker should know it. As Nick pointed out, changing this is also breaking API compatibility with old versions of libLTO. > If we go the `-fwhole-program' option, it's more in line with what people might expect from LTO (those coming from gcc-land especially). There could be other, subtle reasons why a programmer may not want to run the internalize pass. Keeping around functions to call when debugging code springs to mind. They may not want to mess with export lists for these functions (so no internalize), but would like them to go away when shipping the product (so running internalize). LTO and internalize are beneficial when not in whole-program mode. The main use case I have in mind is building firefox which is a big shared library. -fwhole-program is completely redundant with a good plugin interface like the one we have. > There may be more reasons. What are they? Do note that you example breaks at -O3 -Wl,-dead_strip, so this in not specific to running internalize. > -bw > > Cheers, Rafael From wendling at apple.com Mon Apr 9 17:47:56 2012 From: wendling at apple.com (Bill Wendling) Date: Mon, 09 Apr 2012 15:47:56 -0700 Subject: [llvm-commits] [llvm] r154124 - /llvm/trunk/tools/lto/LTOCodeGenerator.cpp In-Reply-To: References: <20120405212644.F1CE92A6C069@llvm.org> <4F7E9C78.2070402@free.fr> <2A962B06-19AD-4016-96FF-D50315B66C00@apple.com> Message-ID: On Apr 9, 2012, at 3:38 PM, Rafael Esp?ndola wrote: >> Okay. Let's discuss this. What are the benefits of running internalize by default as opposed to running it via a command-line option? > > The example I posted before: > > void foo(void) { } > __attribute__((visibility("hidden"))) void zed(void) { } > int main(int argc, char *argv[]) { > foo(); > zed(); > return 0; > } > > The function zed should be internalized. The function foo should or > should not be depending on -export_dynamic on ELF. Not sure about the > semantics on Mach-O, but the linker should know it. > And what if the user would like zed to stick around for debugging purposes? What recourse do they have? > As Nick pointed out, changing this is also breaking API compatibility > with old versions of libLTO. > LTO hasn't really worked well in the past. I'm not convinced that this is a show-stopper. >> If we go the `-fwhole-program' option, it's more in line with what people might expect from LTO (those coming from gcc-land especially). There could be other, subtle reasons why a programmer may not want to run the internalize pass. Keeping around functions to call when debugging code springs to mind. They may not want to mess with export lists for these functions (so no internalize), but would like them to go away when shipping the product (so running internalize). > > LTO and internalize are beneficial when not in whole-program mode. The > main use case I have in mind is building firefox which is a big shared > library. -fwhole-program is completely redundant with a good plugin > interface like the one we have. > It's only redundant when internalize is the default. If not, then it's necessary and useful. >> There may be more reasons. > > What are they? I supplied two. Please address them first before demanding more examples. > Do note that you example breaks at -O3 -Wl,-dead_strip, > so this in not specific to running internalize. > Wait...it breaks non-LTO builds?! How? -bw From isanbard at gmail.com Mon Apr 9 17:52:43 2012 From: isanbard at gmail.com (Bill Wendling) Date: Mon, 09 Apr 2012 15:52:43 -0700 Subject: [llvm-commits] [llvm] r154124 - /llvm/trunk/tools/lto/LTOCodeGenerator.cpp In-Reply-To: References: <20120405212644.F1CE92A6C069@llvm.org> Message-ID: On Apr 9, 2012, at 3:09 PM, Rafael Esp?ndola wrote: >> Consider the following program: >> >> $ cat main.c >> void foo(void) { } >> >> int main(int argc, char *argv[]) { >> foo(); >> return 0; >> } >> $ cat bundle.c >> extern void foo(void); >> >> void bar(void) { >> foo(); >> } >> $ clang -o main main.c >> $ clang -o bundle.so bundle.c -bundle -bundle_loader ./main >> $ nm -m bundle.so >> 0000000000000f40 (__TEXT,__text) external _bar >> (undefined) external _foo (from executable) >> (undefined) external dyld_stub_binder (from libSystem) >> $ clang -o main main.c -O4 >> $ clang -o bundle.so bundle.c -bundle -bundle_loader ./main >> Undefined symbols for architecture x86_64: >> "_foo", referenced from: >> _bar in bundle-elQN6d.o >> ld: symbol(s) not found for architecture x86_64 >> clang: error: linker command failed with exit code 1 (use -v to see invocation) > > Note that I get exactly the same error with: > > $ clang -o main main.c -Wl,-dead_strip -O3 > $ clang -o bundle.so bundle.c -bundle -bundle_loader ./main > Undefined symbols for architecture x86_64: > "_foo", referenced from: > _bar in bundle-LRWElp.o > ld: symbol(s) not found for architecture x86_64 > clang-3: error: linker command failed with exit code 1 (use -v to see > invocation) > > Which shows that the problem is *not* with us running internalize, it > is with the linker telling us the foo is not needed. > This has nothing to do with my changes: [bwendling:llvm] clang -O3 -Wl,-dead_strip a.c [bwendling:llvm] nm -m a.out 0000000100000000 (__TEXT,__text) [referenced dynamically] external __mh_execute_header 0000000100000f70 (__TEXT,__text) external _main (undefined) external dyld_stub_binder (from libSystem) -bw From rafael.espindola at gmail.com Mon Apr 9 17:57:48 2012 From: rafael.espindola at gmail.com (=?UTF-8?Q?Rafael_Esp=C3=ADndola?=) Date: Mon, 9 Apr 2012 18:57:48 -0400 Subject: [llvm-commits] [llvm] r154124 - /llvm/trunk/tools/lto/LTOCodeGenerator.cpp In-Reply-To: References: <20120405212644.F1CE92A6C069@llvm.org> <4F7E9C78.2070402@free.fr> <2A962B06-19AD-4016-96FF-D50315B66C00@apple.com> Message-ID: >> The function zed should be internalized. The function foo should or >> should not be depending on -export_dynamic on ELF. Not sure about the >> semantics on Mach-O, but the linker should know it. >> > And what if the user would like zed to stick around for debugging purposes? What recourse do they have? I am fine with adding a debug option. >> As Nick pointed out, changing this is also breaking API compatibility >> with old versions of libLTO. >> > LTO hasn't really worked well in the past. I'm not convinced that this is a show-stopper. It was not perfect , but I was able to bootstrap clang and build firefox with it. Do we really have to break backwards compatibility to add a debug option? >> LTO and internalize are beneficial when not in whole-program mode. The >> main use case I have in mind is building firefox which is a big shared >> library. -fwhole-program is completely redundant with a good plugin >> interface like the one we have. >> > It's only redundant when internalize is the default. If not, then it's necessary and useful. Sorry, can you expand this? -fwhole-program lets the user tell the compiler that there will be no external references to this compilation unit. The linker and the object file semantics know the same. What is the gain in disabling the internalize pass and then adding -fwhole-program? >>> There may be more reasons. >> >> What are they? > > I supplied two. Please address them first before demanding more examples. This change broke a working system. Seems like a high bar for a regression. >> Do note that you example breaks at -O3 -Wl,-dead_strip, >> so this in not specific to running internalize. >> > > Wait...it breaks non-LTO builds?! How? The same way as with LTO almost. Instead of the linker telling us that we can remove the symbol, it removes the symbol by itself. > -bw > Cheers, Rafael From isanbard at gmail.com Mon Apr 9 18:16:51 2012 From: isanbard at gmail.com (Bill Wendling) Date: Mon, 09 Apr 2012 23:16:51 -0000 Subject: [llvm-commits] [llvm] r154356 - /llvm/trunk/tools/lto/LTOCodeGenerator.cpp Message-ID: <20120409231651.3B0292A6C065@llvm.org> Author: void Date: Mon Apr 9 18:16:51 2012 New Revision: 154356 URL: http://llvm.org/viewvc/llvm-project?rev=154356&view=rev Log: Revert the 'EnableInitializing' flag. There is debate on whether we should run that pass by default in LTO. Modified: llvm/trunk/tools/lto/LTOCodeGenerator.cpp Modified: llvm/trunk/tools/lto/LTOCodeGenerator.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/lto/LTOCodeGenerator.cpp?rev=154356&r1=154355&r2=154356&view=diff ============================================================================== --- llvm/trunk/tools/lto/LTOCodeGenerator.cpp (original) +++ llvm/trunk/tools/lto/LTOCodeGenerator.cpp Mon Apr 9 18:16:51 2012 @@ -46,9 +46,6 @@ #include "llvm/ADT/StringExtras.h" using namespace llvm; -static cl::opt EnableInternalizing("enable-internalizing", cl::init(false), - cl::desc("Internalize functions during LTO")); - static cl::opt DisableInline("disable-inlining", cl::init(false), cl::desc("Do not run the inliner pass")); @@ -278,14 +275,6 @@ } void LTOCodeGenerator::applyScopeRestrictions() { - // Internalize only if specifically asked for. Otherwise, global symbols which - // exist in the final image, but which are used outside of that image - // (e.g. bundling) may be removed. This also happens when a function is used - // only in inline asm. LLVM doesn't recognize that as a "use", so it could be - // stripped. - if (!EnableInternalizing) - return; - if (_scopeRestrictionsDone) return; Module *mergedModule = _linker.getModule(); From rafael.espindola at gmail.com Mon Apr 9 18:22:18 2012 From: rafael.espindola at gmail.com (=?UTF-8?Q?Rafael_Esp=C3=ADndola?=) Date: Mon, 9 Apr 2012 19:22:18 -0400 Subject: [llvm-commits] [llvm] r154356 - /llvm/trunk/tools/lto/LTOCodeGenerator.cpp In-Reply-To: <20120409231651.3B0292A6C065@llvm.org> References: <20120409231651.3B0292A6C065@llvm.org> Message-ID: > Revert the 'EnableInitializing' flag. There is debate on whether we should run that pass by default in LTO. Awesome, this fixes $ ./build/bin/clang -o main main.c -O4 $ nm main | grep foo $ ./build/bin/clang -o main main.c -O4 -Wl,-export-dynamic $ nm main | grep foo T foo Thanks, Rafael From echristo at apple.com Mon Apr 9 18:31:27 2012 From: echristo at apple.com (Eric Christopher) Date: Mon, 09 Apr 2012 16:31:27 -0700 Subject: [llvm-commits] [llvm] r154124 - /llvm/trunk/tools/lto/LTOCodeGenerator.cpp In-Reply-To: References: <20120405212644.F1CE92A6C069@llvm.org> <4F7E9C78.2070402@free.fr> <2A962B06-19AD-4016-96FF-D50315B66C00@apple.com> Message-ID: <7BAE99FD-EB05-4732-8120-18A544FA8745@apple.com> On Apr 9, 2012, at 3:57 PM, Rafael Esp?ndola wrote: >> And what if the user would like zed to stick around for debugging purposes? What recourse do they have? > > I am fine with adding a debug option. __attribute__((used)) is the standard mechanism for this sort of thing. -eric From lhames at gmail.com Mon Apr 9 18:58:59 2012 From: lhames at gmail.com (Lang Hames) Date: Mon, 09 Apr 2012 23:58:59 -0000 Subject: [llvm-commits] [llvm] r154359 - /llvm/trunk/test/CodeGen/X86/2012-04-09-TwoAddrPassBug.ll Message-ID: <20120409235859.E52482A6C065@llvm.org> Author: lhames Date: Mon Apr 9 18:58:59 2012 New Revision: 154359 URL: http://llvm.org/viewvc/llvm-project?rev=154359&view=rev Log: Test case for PR12495. Added: llvm/trunk/test/CodeGen/X86/2012-04-09-TwoAddrPassBug.ll Added: llvm/trunk/test/CodeGen/X86/2012-04-09-TwoAddrPassBug.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/2012-04-09-TwoAddrPassBug.ll?rev=154359&view=auto ============================================================================== --- llvm/trunk/test/CodeGen/X86/2012-04-09-TwoAddrPassBug.ll (added) +++ llvm/trunk/test/CodeGen/X86/2012-04-09-TwoAddrPassBug.ll Mon Apr 9 18:58:59 2012 @@ -0,0 +1,34 @@ +; RUN: llc -O1 -verify-coalescing < %s +; PR12495 +target datalayout = +"e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.7.0" + +define void @func(i8* nocapture) nounwind uwtable ssp align 2 { + br i1 undef, label %4, label %2 + +;
-

The maximum relative error may be any rational number. The metadata node - shall consist of a pair of unsigned integers respectively representing - the numerator and denominator. For example, 2.5 ULP:

+

The metadata node shall consist of a single non-negative floating + point number representing the maximum relative error. For example, + 2.5 ULP:

-!0 = metadata !{ i32 5, i32 2 }
+!0 = metadata !{ float 2.5 }
 
Modified: llvm/trunk/include/llvm/ADT/APFloat.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/ADT/APFloat.h?rev=154387&r1=154386&r2=154387&view=diff ============================================================================== --- llvm/trunk/include/llvm/ADT/APFloat.h (original) +++ llvm/trunk/include/llvm/ADT/APFloat.h Tue Apr 10 03:22:43 2012 @@ -320,6 +320,7 @@ const fltSemantics &getSemantics() const { return *semantics; } bool isZero() const { return category == fcZero; } bool isNonZero() const { return category != fcZero; } + bool isNormal() const { return category == fcNormal; } bool isNaN() const { return category == fcNaN; } bool isInfinity() const { return category == fcInfinity; } bool isNegative() const { return sign; } Modified: llvm/trunk/lib/VMCore/Verifier.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/VMCore/Verifier.cpp?rev=154387&r1=154386&r2=154387&view=diff ============================================================================== --- llvm/trunk/lib/VMCore/Verifier.cpp (original) +++ llvm/trunk/lib/VMCore/Verifier.cpp Tue Apr 10 03:22:43 2012 @@ -1653,6 +1653,18 @@ } } + if (MDNode *MD = I.getMetadata(LLVMContext::MD_fpaccuracy)) { + Assert1(I.getType()->isFPOrFPVectorTy(), + "fpaccuracy requires a floating point result!", &I); + Assert1(MD->getNumOperands() == 1, "fpaccuracy takes one operand!", &I); + ConstantFP *Op = dyn_cast_or_null(MD->getOperand(0)); + Assert1(Op, "fpaccuracy ULPs not a floating point number!", &I); + APFloat ULPs = Op->getValueAPF(); + Assert1(ULPs.isNormal() || ULPs.isZero(), + "fpaccuracy ULPs not a normal number!", &I); + Assert1(!ULPs.isNegative(), "fpaccuracy ULPs is negative!", &I); + } + MDNode *MD = I.getMetadata(LLVMContext::MD_range); Assert1(!MD || isa(I), "Ranges are only for loads!", &I); Added: llvm/trunk/test/Verifier/fpaccuracy.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Verifier/fpaccuracy.ll?rev=154387&view=auto ============================================================================== --- llvm/trunk/test/Verifier/fpaccuracy.ll (added) +++ llvm/trunk/test/Verifier/fpaccuracy.ll Tue Apr 10 03:22:43 2012 @@ -0,0 +1,31 @@ +; RUN: not llvm-as < %s |& FileCheck %s + +define void @foo(i32 %i, float %f, <2 x float> %g) { + %s = add i32 %i, %i, !fpaccuracy !0 +; CHECK: fpaccuracy requires a floating point result! + %t = fadd float %f, %f, !fpaccuracy !1 +; CHECK: fpaccuracy takes one operand! + %u = fadd float %f, %f, !fpaccuracy !2 +; CHECK: fpaccuracy takes one operand! + %v = fadd float %f, %f, !fpaccuracy !3 +; CHECK: fpaccuracy ULPs not a floating point number! + %w = fadd float %f, %f, !fpaccuracy !0 +; Above line is correct. + %w2 = fadd <2 x float> %g, %g, !fpaccuracy !0 +; Above line is correct. + %x = fadd float %f, %f, !fpaccuracy !4 +; CHECK: fpaccuracy ULPs is negative! + %y = fadd float %f, %f, !fpaccuracy !5 +; CHECK: fpaccuracy ULPs is negative! + %z = fadd float %f, %f, !fpaccuracy !6 +; CHECK: fpaccuracy ULPs not a normal number! + ret void +} + +!0 = metadata !{ float 1.0 } +!1 = metadata !{ } +!2 = metadata !{ float 1.0, float 1.0 } +!3 = metadata !{ i32 1 } +!4 = metadata !{ float -1.0 } +!5 = metadata !{ float -0.0 } +!6 = metadata !{ float 0x7FFFFFFF00000000 } From Tim.Northover at arm.com Tue Apr 10 04:45:37 2012 From: Tim.Northover at arm.com (Tim Northover) Date: Tue, 10 Apr 2012 10:45:37 +0100 Subject: [llvm-commits] [cfe-commits] [LLVMdev] [Patch?] Fix handling of ARM homogenous aggregates In-Reply-To: <3E94D039A2B82544B3E7D48F924B0B25E18AB88AED@base.imrc.kist.re.kr> References: <3E94D039A2B82544B3E7D48F924B0B25E18AB88AED@base.imrc.kist.re.kr> Message-ID: <201204101045.38068.Tim.Northover@arm.com> > I think that ABI of LLVM IR level is different from ABI on real architecture > such as ARM or x86. ABI of LLVM IR level doesn't consider about register > usage. It just describes parameters and padding information related to > alignment of parameters. I'm not sure what you mean here. LLVM's IR certainly doesn't care about registers and so on, but the LLVM backends have to, and front-ends have to know to a greater or lesser degree how the backends actually do it so that they can create ABI compliant code. My view (possibly biased by the ARM ABI) is that LLVM's primary goal should be to make writing an ABI-compliant front-end as easy as possible. After that it should aim to have a sane ABI for hand-written LLVM code, and finally it should try to follow the ABI itself where possible (the last two are possibly interchangeable, but the first is primary). The current situation with HFAs is that, without changes to make the backend aware of the concept, the front-end needs to know the entire sequence of previous arguments and how LLVM lowers them to work out how to pass an HFA correctly. The goal I'd like to see reached is that a front-end should be able to map one of its types to an LLVM type and know that if it uses that LLVM type then LLVM will do the right thing. As far as I'm aware, this is what happens for other targets already (we *are* a bit weird with the HFAs). I think this is achievable for the ARM ABI too: LLVM's type system is certainly rich enough to capture the distinctions necessary. >From Anton: > I think here stuff should be thought of from different points. While > providing source type for argument might be beneficial, it might cause > moving the code from frontend to backend. That could certainly go too far, but conceptually it's not necessarily a massive problem: if multiple front-ends implement the same ABI calling conventions, then perhaps the shared backend is the right place to put that common code. And conversely, I think that if a front-end is worrying about the allocation of register numbers then something is a little awry. But I suppose there will be a substantial cost to implementing this, wherever we put it. > Consider e.g. passing struct > by value including crazy padding inside. The ABI might specify that > padding should be removed and struct is passed field-by-field. To me that would still be a prime candidate for the front-end doing the work: it still seems to have an essentially context-free representation as a (sequence of) LLVM types. > Also, note that in many cases the ABI rules are worded in terms of > source language which might now be preserved during IR generation, > so... I'm not sure I follow this point. Is preserving the source language a bad thing for some reason I'm missing? Certainly, if it affects optimisation it would be. Tim. From glider at google.com Tue Apr 10 06:00:26 2012 From: glider at google.com (Alexander Potapenko) Date: Tue, 10 Apr 2012 11:00:26 -0000 Subject: [llvm-commits] [compiler-rt] r154390 - in /compiler-rt/trunk/lib/asan: asan_interceptors.cc tests/asan_test.cc Message-ID: <20120410110026.5A6D42A6C065@llvm.org> Author: glider Date: Tue Apr 10 06:00:26 2012 New Revision: 154390 URL: http://llvm.org/viewvc/llvm-project?rev=154390&view=rev Log: Change the way ASan interacts with custom signal handlers. >From now on we allow the clients to override signal handlers set by ASan, but print a warning in such a case. Remove the tests for signal() and sigaction(), because they made little sense even without this change. Modified: compiler-rt/trunk/lib/asan/asan_interceptors.cc compiler-rt/trunk/lib/asan/tests/asan_test.cc Modified: compiler-rt/trunk/lib/asan/asan_interceptors.cc URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/asan/asan_interceptors.cc?rev=154390&r1=154389&r2=154390&view=diff ============================================================================== --- compiler-rt/trunk/lib/asan/asan_interceptors.cc (original) +++ compiler-rt/trunk/lib/asan/asan_interceptors.cc Tue Apr 10 06:00:26 2012 @@ -339,19 +339,21 @@ #endif // !_WIN32 #if ASAN_INTERCEPT_SIGNAL_AND_SIGACTION +const char kOverrideSighandlerWarning[] = + "Warning: client program overrides the handler for signal %d.\n"; INTERCEPTOR(void*, signal, int signum, void *handler) { - if (!AsanInterceptsSignal(signum)) { - return REAL(signal)(signum, handler); + if (AsanInterceptsSignal(signum)) { + Report(kOverrideSighandlerWarning, signum); } - return NULL; + return REAL(signal)(signum, handler); } INTERCEPTOR(int, sigaction, int signum, const struct sigaction *act, struct sigaction *oldact) { - if (!AsanInterceptsSignal(signum)) { - return REAL(sigaction)(signum, act, oldact); + if (AsanInterceptsSignal(signum)) { + Report(kOverrideSighandlerWarning, signum); } - return 0; + return REAL(sigaction)(signum, act, oldact); } #elif ASAN_POSIX // We need to have defined REAL(sigaction) on posix systems. Modified: compiler-rt/trunk/lib/asan/tests/asan_test.cc URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/asan/tests/asan_test.cc?rev=154390&r1=154389&r2=154390&view=diff ============================================================================== --- compiler-rt/trunk/lib/asan/tests/asan_test.cc (original) +++ compiler-rt/trunk/lib/asan/tests/asan_test.cc Tue Apr 10 06:00:26 2012 @@ -210,48 +210,6 @@ } #endif // __APPLE__ -void NoOpSignalHandler(int unused) { - fprintf(stderr, "NoOpSignalHandler (should not happen). Aborting\n"); - abort(); -} - -void NoOpSigaction(int, siginfo_t *siginfo, void *context) { - fprintf(stderr, "NoOpSigaction (should not happen). Aborting\n"); - abort(); -} - -TEST(AddressSanitizer, SignalTest) { - signal(SIGSEGV, NoOpSignalHandler); - signal(SIGILL, NoOpSignalHandler); - // If asan did not intercept sigaction NoOpSigaction will fire. - char *x = Ident((char*)malloc(5)); - EXPECT_DEATH(x[6]++, "is located 1 bytes to the right"); - free(Ident(x)); -} - -TEST(AddressSanitizer, SigactionTest) { - { - struct sigaction sigact; - memset(&sigact, 0, sizeof(sigact)); - sigact.sa_sigaction = NoOpSigaction;; - sigact.sa_flags = SA_SIGINFO; - sigaction(SIGSEGV, &sigact, 0); - } - - { - struct sigaction sigact; - memset(&sigact, 0, sizeof(sigact)); - sigact.sa_sigaction = NoOpSigaction;; - sigact.sa_flags = SA_SIGINFO; - sigaction(SIGILL, &sigact, 0); - } - - // If asan did not intercept sigaction NoOpSigaction will fire. - char *x = Ident((char*)malloc(5)); - EXPECT_DEATH(x[6]++, "is located 1 bytes to the right"); - free(Ident(x)); -} - void *TSDWorker(void *test_key) { if (test_key) { pthread_setspecific(*(pthread_key_t*)test_key, (void*)0xfeedface); From elena.demikhovsky at intel.com Tue Apr 10 06:22:49 2012 From: elena.demikhovsky at intel.com (Demikhovsky, Elena) Date: Tue, 10 Apr 2012 11:22:49 +0000 Subject: [llvm-commits] Review Request: VPERM optimization for AVX2 Message-ID: I added VPERMQ/VPERMD/VPERMPD/VPERMPS patterns. Please review. - Elena --------------------------------------------------------------------- Intel Israel (74) Limited This e-mail and any attachments may contain confidential material for the sole use of the intended recipient(s). Any review or distribution by others is strictly prohibited. If you are not the intended recipient, please contact the sender and delete all copies. -------------- next part -------------- A non-text attachment was scrubbed... Name: vperm.diff Type: application/octet-stream Size: 9091 bytes Desc: vperm.diff Url : http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20120410/b69468ea/attachment.obj From csdavec at swan.ac.uk Tue Apr 10 06:44:33 2012 From: csdavec at swan.ac.uk (David Chisnall) Date: Tue, 10 Apr 2012 11:44:33 -0000 Subject: [llvm-commits] [llvm] r154391 - in /llvm/trunk: include/llvm/MC/MCObjectFileInfo.h lib/MC/MCObjectFileInfo.cpp Message-ID: <20120410114433.43FD12A6C065@llvm.org> Author: theraven Date: Tue Apr 10 06:44:33 2012 New Revision: 154391 URL: http://llvm.org/viewvc/llvm-project?rev=154391&view=rev Log: Use the correct section types on Solaris for unwind data on both x86 and x86-64. Patch by Dmitri Shubin! Modified: llvm/trunk/include/llvm/MC/MCObjectFileInfo.h llvm/trunk/lib/MC/MCObjectFileInfo.cpp Modified: llvm/trunk/include/llvm/MC/MCObjectFileInfo.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/MC/MCObjectFileInfo.h?rev=154391&r1=154390&r2=154391&view=diff ============================================================================== --- llvm/trunk/include/llvm/MC/MCObjectFileInfo.h (original) +++ llvm/trunk/include/llvm/MC/MCObjectFileInfo.h Tue Apr 10 06:44:33 2012 @@ -47,6 +47,7 @@ unsigned FDECFIEncoding; unsigned TTypeEncoding; // Section flags for eh_frame + unsigned EHSectionType; unsigned EHSectionFlags; /// TextSection - Section directive for standard text. Modified: llvm/trunk/lib/MC/MCObjectFileInfo.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/MC/MCObjectFileInfo.cpp?rev=154391&r1=154390&r2=154391&view=diff ============================================================================== --- llvm/trunk/lib/MC/MCObjectFileInfo.cpp (original) +++ llvm/trunk/lib/MC/MCObjectFileInfo.cpp Tue Apr 10 06:44:33 2012 @@ -260,9 +260,14 @@ // Solaris requires different flags for .eh_frame to seemingly every other // platform. + EHSectionType = ELF::SHT_PROGBITS; EHSectionFlags = ELF::SHF_ALLOC; - if (T.getOS() == Triple::Solaris) - EHSectionFlags |= ELF::SHF_WRITE; + if (T.getOS() == Triple::Solaris) { + if (T.getArch() == Triple::x86_64) + EHSectionType = ELF::SHT_X86_64_UNWIND; + else + EHSectionFlags |= ELF::SHF_WRITE; + } // ELF @@ -575,7 +580,7 @@ SectionKind::getReadOnly()); else if (Env == IsELF) EHFrameSection = - Ctx->getELFSection(".eh_frame", ELF::SHT_PROGBITS, + Ctx->getELFSection(".eh_frame", EHSectionType, EHSectionFlags, SectionKind::getDataRel()); else From rafael.espindola at gmail.com Tue Apr 10 07:30:52 2012 From: rafael.espindola at gmail.com (=?UTF-8?Q?Rafael_Esp=C3=ADndola?=) Date: Tue, 10 Apr 2012 08:30:52 -0400 Subject: [llvm-commits] [llvm] r154124 - /llvm/trunk/tools/lto/LTOCodeGenerator.cpp In-Reply-To: <4F83D073.7020005@mxc.ca> References: <20120405212644.F1CE92A6C069@llvm.org> <4F7E9C78.2070402@free.fr> <2A962B06-19AD-4016-96FF-D50315B66C00@apple.com> <4F83D073.7020005@mxc.ca> Message-ID: > Since my name has been evoked ... :) > > I have a serious concern with this API in the libLTO public interface. I > don't think we should talk about 'internalize' in the API, it should be > worded in terms of what we're trying to achieve, such as how the resulting > binary will be used (ie., must export symbols for a plugin, will be linked > into a standalone executable, to produce a .so file, performing .o -> .o > relinking, etc.) -- assuming that's what this feature is even implementing. > > I have to admit I don't understand the motivation and without having > focussed on this thread, it feels like a bug that internalize wouldn't work > as-is. The only time we shouldn't run it is if it wasn't going to have any > effect anyhow, or to work around other bugs (note: I'm counting things like > untracked symbol usage due to inline asm or the backend lowering to libcalls > as a bug, which when fixed would render this new switch obsolete). Those are good points and I agree with them. Assuming there is a use for internalizing nothing (even if for debugging), I suggest that we get it by * Keeping the current default. Changing it breaks existing code and it is a reasonable default to have. * Renaming lto_codegen_set_whole_program_optimization to lto_codegen_must_preserve_all_symbols. As an optimization, lto_codegen_must_preserve_all_symbols can be implemented by disabling the internalize pass. > Nick Cheers, Rafael From asl at math.spbu.ru Tue Apr 10 08:22:50 2012 From: asl at math.spbu.ru (Anton Korobeynikov) Date: Tue, 10 Apr 2012 13:22:50 -0000 Subject: [llvm-commits] [llvm] r154394 - in /llvm/trunk: lib/CodeGen/SelectionDAG/DAGCombiner.cpp test/CodeGen/ARM/2012-04-10-DAGCombine.ll Message-ID: <20120410132250.4728A2A6C065@llvm.org> Author: asl Date: Tue Apr 10 08:22:49 2012 New Revision: 154394 URL: http://llvm.org/viewvc/llvm-project?rev=154394&view=rev Log: Transform div to mul with reciprocal only when fp imm is legal. This fixes PR12516 and uncovers one weird problem in legalize (workarounded) Added: llvm/trunk/test/CodeGen/ARM/2012-04-10-DAGCombine.ll Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=154394&r1=154393&r2=154394&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original) +++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Tue Apr 10 08:22:49 2012 @@ -5769,8 +5769,15 @@ APFloat N1APF = N1CFP->getValueAPF(); APFloat Recip(N1APF.getSemantics(), 1); // 1.0 APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven); - // Only do the transform if the reciprocal is not too horrible (eg not NaN). - if (st == APFloat::opOK || st == APFloat::opInexact) + // Only do the transform if the reciprocal is not too horrible (eg not NaN) + // and the reciprocal is a legal fp imm. + if ((st == APFloat::opOK || st == APFloat::opInexact) && + (!LegalOperations || + // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM + // backend)... we should handle this gracefully after Legalize. + // TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT) || + TLI.isOperationLegal(llvm::ISD::ConstantFP, VT) || + TLI.isFPImmLegal(Recip, VT))) return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, N0, DAG.getConstantFP(Recip, VT)); } Added: llvm/trunk/test/CodeGen/ARM/2012-04-10-DAGCombine.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/2012-04-10-DAGCombine.ll?rev=154394&view=auto ============================================================================== --- llvm/trunk/test/CodeGen/ARM/2012-04-10-DAGCombine.ll (added) +++ llvm/trunk/test/CodeGen/ARM/2012-04-10-DAGCombine.ll Tue Apr 10 08:22:49 2012 @@ -0,0 +1,31 @@ +; RUN: llc < %s -march=arm -mcpu=cortex-a9 -enable-unsafe-fp-math +;target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64" +;target triple = "armv7-none-linux-gnueabi" + +define arm_aapcs_vfpcc void @foo(<4 x float> %arg) nounwind align 2 { +bb4: + %tmp = extractelement <2 x float> undef, i32 0 + br i1 undef, label %bb18, label %bb5 + +bb5: ; preds = %bb4 + %tmp6 = fadd float %tmp, -1.500000e+01 + %tmp7 = fdiv float %tmp6, 2.000000e+01 + %tmp8 = fadd float %tmp7, 1.000000e+00 + %tmp9 = fdiv float 1.000000e+00, %tmp8 + %tmp10 = fsub float 1.000000e+00, %tmp9 + %tmp11 = fmul float %tmp10, 1.000000e+01 + %tmp12 = fadd float %tmp11, 1.500000e+01 + %tmp13 = fdiv float %tmp12, %tmp + %tmp14 = insertelement <2 x float> undef, float %tmp13, i32 0 + %tmp15 = shufflevector <2 x float> %tmp14, <2 x float> undef, <4 x i32> zeroinitializer + %tmp16 = fmul <4 x float> zeroinitializer, %tmp15 + %tmp17 = fadd <4 x float> %tmp16, %arg + store <4 x float> %tmp17, <4 x float>* undef, align 8, !tbaa !0 + br label %bb18 + +bb18: ; preds = %bb5, %bb4 + ret void +} + +!0 = metadata !{metadata !"omnipotent char", metadata !1} +!1 = metadata !{metadata !"Simple C/C++ TBAA", null} From chandlerc at gmail.com Tue Apr 10 08:35:58 2012 From: chandlerc at gmail.com (Chandler Carruth) Date: Tue, 10 Apr 2012 13:35:58 -0000 Subject: [llvm-commits] [llvm] r154395 - /llvm/trunk/lib/CodeGen/MachineBlockPlacement.cpp Message-ID: <20120410133558.1577A2A6C065@llvm.org> Author: chandlerc Date: Tue Apr 10 08:35:57 2012 New Revision: 154395 URL: http://llvm.org/viewvc/llvm-project?rev=154395&view=rev Log: Make a somewhat subtle change in the logic of block placement. Sometimes the loop header has a non-loop predecessor which has been pre-fused into its chain due to unanalyzable branches. In this case, rotating the header into the body of the loop in order to place a loop exit at the bottom of the loop is a Very Bad Idea as it makes the loop non-contiguous. I'm working on a good test case for this, but it's a bit annoynig to craft. I should get one shortly, but I'm submitting this now so I can begin the (lengthy) performance analysis process. An initial run of LNT looks really, really good, but there is too much noise there for me to trust it much. Modified: llvm/trunk/lib/CodeGen/MachineBlockPlacement.cpp Modified: llvm/trunk/lib/CodeGen/MachineBlockPlacement.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/MachineBlockPlacement.cpp?rev=154395&r1=154394&r2=154395&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/MachineBlockPlacement.cpp (original) +++ llvm/trunk/lib/CodeGen/MachineBlockPlacement.cpp Tue Apr 10 08:35:57 2012 @@ -547,6 +547,18 @@ MachineBlockPlacement::findBestLoopTop(MachineFunction &F, MachineLoop &L, const BlockFilterSet &LoopBlockSet) { + // We don't want to layout the loop linearly in all cases. If the loop header + // is just a normal basic block in the loop, we want to look for what block + // within the loop is the best one to layout at the top. However, if the loop + // header has be pre-merged into a chain due to predecessors not having + // analyzable branches, *and* the predecessor it is merged with is *not* part + // of the loop, rotating the header into the middle of the loop will create + // a non-contiguous range of blocks which is Very Bad. So start with the + // header and only rotate if safe. + BlockChain &HeaderChain = *BlockToChain[L.getHeader()]; + if (!LoopBlockSet.count(*HeaderChain.begin())) + return L.getHeader(); + BlockFrequency BestExitEdgeFreq; MachineBasicBlock *ExitingBB = 0; MachineBasicBlock *LoopingBB = 0; From baldrick at free.fr Tue Apr 10 08:46:25 2012 From: baldrick at free.fr (Duncan Sands) Date: Tue, 10 Apr 2012 15:46:25 +0200 Subject: [llvm-commits] [llvm] r154395 - /llvm/trunk/lib/CodeGen/MachineBlockPlacement.cpp In-Reply-To: <20120410133558.1577A2A6C065@llvm.org> References: <20120410133558.1577A2A6C065@llvm.org> Message-ID: <4F8439B1.30303@free.fr> Hi Chandler, > --- llvm/trunk/lib/CodeGen/MachineBlockPlacement.cpp (original) > +++ llvm/trunk/lib/CodeGen/MachineBlockPlacement.cpp Tue Apr 10 08:35:57 2012 > @@ -547,6 +547,18 @@ > MachineBlockPlacement::findBestLoopTop(MachineFunction&F, > MachineLoop&L, > const BlockFilterSet&LoopBlockSet) { > + // We don't want to layout the loop linearly in all cases. If the loop header > + // is just a normal basic block in the loop, we want to look for what block > + // within the loop is the best one to layout at the top. However, if the loop > + // header has be pre-merged into a chain due to predecessors not having has be -> has been Ciao, Duncan. From nadav.rotem at intel.com Tue Apr 10 09:33:13 2012 From: nadav.rotem at intel.com (Nadav Rotem) Date: Tue, 10 Apr 2012 14:33:13 -0000 Subject: [llvm-commits] [llvm] r154396 - in /llvm/trunk: lib/Target/X86/X86ISelLowering.cpp lib/Target/X86/X86ISelLowering.h lib/Target/X86/X86InstrFragmentsSIMD.td lib/Target/X86/X86InstrSSE.td test/CodeGen/X86/avx-shuffle.ll test/CodeGen/X86/vec_shuffle-20.ll Message-ID: <20120410143313.C06CA2A6C065@llvm.org> Author: nadav Date: Tue Apr 10 09:33:13 2012 New Revision: 154396 URL: http://llvm.org/viewvc/llvm-project?rev=154396&view=rev Log: Modify the code that lowers shuffles to blends from using blendvXX to vblendXX. blendv uses a register for the selection while vblend uses an immediate. On sandybridge they still have the same latency and execute on the same execution ports. Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp llvm/trunk/lib/Target/X86/X86ISelLowering.h llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td llvm/trunk/lib/Target/X86/X86InstrSSE.td llvm/trunk/test/CodeGen/X86/avx-shuffle.ll llvm/trunk/test/CodeGen/X86/vec_shuffle-20.ll Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=154396&r1=154395&r2=154396&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original) +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Tue Apr 10 09:33:13 2012 @@ -5391,59 +5391,76 @@ SDValue V1 = SVOp->getOperand(0); SDValue V2 = SVOp->getOperand(1); DebugLoc dl = SVOp->getDebugLoc(); - LLVMContext *Context = DAG.getContext(); EVT VT = Op.getValueType(); EVT InVT = V1.getValueType(); EVT EltVT = VT.getVectorElementType(); - unsigned EltSize = EltVT.getSizeInBits(); int MaskSize = VT.getVectorNumElements(); int InSize = InVT.getVectorNumElements(); - // TODO: At the moment we only use AVX blends. We could also use SSE4 blends. - if (!Subtarget->hasAVX()) + if (!Subtarget->hasSSE41()) return SDValue(); if (MaskSize != InSize) return SDValue(); - SmallVector MaskVals; - ConstantInt *Zero = ConstantInt::get(*Context, APInt(EltSize, 0)); - ConstantInt *NegOne = ConstantInt::get(*Context, APInt(EltSize, -1)); + int ISDNo = 0; + MVT OpTy; + + switch (VT.getSimpleVT().SimpleTy) { + default: return SDValue(); + case MVT::v8i16: + ISDNo = X86ISD::BLENDPW; + OpTy = MVT::v8i16; + break; + case MVT::v4i32: + case MVT::v4f32: + ISDNo = X86ISD::BLENDPS; + OpTy = MVT::v4f32; + break; + case MVT::v2i64: + case MVT::v2f64: + ISDNo = X86ISD::BLENDPD; + OpTy = MVT::v2f64; + break; + case MVT::v8i32: + case MVT::v8f32: + if (!Subtarget->hasAVX()) + return SDValue(); + ISDNo = X86ISD::BLENDPS; + OpTy = MVT::v8f32; + break; + case MVT::v4i64: + case MVT::v4f64: + if (!Subtarget->hasAVX()) + return SDValue(); + ISDNo = X86ISD::BLENDPD; + OpTy = MVT::v4f64; + break; + case MVT::v16i16: + if (!Subtarget->hasAVX2()) + return SDValue(); + ISDNo = X86ISD::BLENDPW; + OpTy = MVT::v16i16; + break; + } + assert(ISDNo && "Invalid Op Number"); + + unsigned MaskVals = 0; for (int i = 0; i < MaskSize; ++i) { int EltIdx = SVOp->getMaskElt(i); if (EltIdx == i || EltIdx == -1) - MaskVals.push_back(NegOne); + MaskVals |= (1<getType()); - assert(MaskTy.getSizeInBits() == VT.getSizeInBits() && "Invalid mask size"); - SDValue MaskIdx = DAG.getConstantPool(MaskC, PtrTy); - unsigned Alignment = cast(MaskIdx)->getAlignment(); - SDValue Mask = DAG.getLoad(MaskTy, dl, DAG.getEntryNode(), MaskIdx, - MachinePointerInfo::getConstantPool(), - false, false, false, Alignment); - - if (Subtarget->hasAVX2() && MaskTy == MVT::v32i8) - return DAG.getNode(ISD::VSELECT, dl, VT, Mask, V1, V2); - - if (Subtarget->hasAVX()) { - switch (MaskTy.getSimpleVT().SimpleTy) { - default: return SDValue(); - case MVT::v16i8: - case MVT::v4i32: - case MVT::v2i64: - case MVT::v8i32: - case MVT::v4i64: - return DAG.getNode(ISD::VSELECT, dl, VT, Mask, V1, V2); - } - } - - return SDValue(); + V1 = DAG.getNode(ISD::BITCAST, dl, OpTy, V1); + V2 = DAG.getNode(ISD::BITCAST, dl, OpTy, V2); + SDValue Ret = DAG.getNode(ISDNo, dl, OpTy, V1, V2, + DAG.getConstant(MaskVals, MVT::i32)); + return DAG.getNode(ISD::BITCAST, dl, VT, Ret); } // v8i16 shuffles - Prefer shuffles in the following order: @@ -11050,6 +11067,9 @@ case X86ISD::ANDNP: return "X86ISD::ANDNP"; case X86ISD::PSIGN: return "X86ISD::PSIGN"; case X86ISD::BLENDV: return "X86ISD::BLENDV"; + case X86ISD::BLENDPW: return "X86ISD::BLENDPW"; + case X86ISD::BLENDPS: return "X86ISD::BLENDPS"; + case X86ISD::BLENDPD: return "X86ISD::BLENDPD"; case X86ISD::HADD: return "X86ISD::HADD"; case X86ISD::HSUB: return "X86ISD::HSUB"; case X86ISD::FHADD: return "X86ISD::FHADD"; Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.h?rev=154396&r1=154395&r2=154396&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.h (original) +++ llvm/trunk/lib/Target/X86/X86ISelLowering.h Tue Apr 10 09:33:13 2012 @@ -175,9 +175,14 @@ /// PSIGN - Copy integer sign. PSIGN, - /// BLEND family of opcodes + /// BLENDV - Blend where the selector is an XMM. BLENDV, + /// BLENDxx - Blend where the selector is an immediate. + BLENDPW, + BLENDPS, + BLENDPD, + /// HADD - Integer horizontal add. HADD, Modified: llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td?rev=154396&r1=154395&r2=154396&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td (original) +++ llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td Tue Apr 10 09:33:13 2012 @@ -126,6 +126,8 @@ SDTCisSameAs<0,2>, SDTCisInt<3>]>; def SDTVBroadcast : SDTypeProfile<1, 1, [SDTCisVec<0>]>; +def SDTBlend : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>, +SDTCisSameAs<1,2>, SDTCisVT<3, i32>]>; def X86PAlign : SDNode<"X86ISD::PALIGN", SDTShuff3OpI>; @@ -158,6 +160,10 @@ def X86VBroadcast : SDNode<"X86ISD::VBROADCAST", SDTVBroadcast>; +def X86Blendpw : SDNode<"X86ISD::BLENDPW", SDTBlend>; +def X86Blendps : SDNode<"X86ISD::BLENDPS", SDTBlend>; +def X86Blendpd : SDNode<"X86ISD::BLENDPD", SDTBlend>; + //===----------------------------------------------------------------------===// // SSE Complex Patterns //===----------------------------------------------------------------------===// Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=154396&r1=154395&r2=154396&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original) +++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Tue Apr 10 09:33:13 2012 @@ -6735,12 +6735,22 @@ def : Pat<(v4f64 (vselect (v4i64 VR256:$mask), (v4f64 VR256:$src1), (v4f64 VR256:$src2))), (VBLENDVPDYrr VR256:$src2, VR256:$src1, VR256:$mask)>; + + def : Pat<(v8f32 (X86Blendps (v8f32 VR256:$src1), (v8f32 VR256:$src2), + (imm:$mask))), + (VBLENDPSYrri VR256:$src2, VR256:$src1, imm:$mask)>; + def : Pat<(v4f64 (X86Blendpd (v4f64 VR256:$src1), (v4f64 VR256:$src2), + (imm:$mask))), + (VBLENDPDYrri VR256:$src2, VR256:$src1, imm:$mask)>; } let Predicates = [HasAVX2] in { def : Pat<(v32i8 (vselect (v32i8 VR256:$mask), (v32i8 VR256:$src1), (v32i8 VR256:$src2))), (VPBLENDVBYrr VR256:$src2, VR256:$src1, VR256:$mask)>; + def : Pat<(v16i16 (X86Blendpw (v16i16 VR256:$src1), (v16i16 VR256:$src2), + (imm:$mask))), + (VPBLENDWYrri VR256:$src2, VR256:$src1, imm:$mask)>; } /// SS41I_ternary_int - SSE 4.1 ternary operator @@ -6789,6 +6799,17 @@ def : Pat<(v2f64 (vselect (v2i64 XMM0), (v2f64 VR128:$src1), (v2f64 VR128:$src2))), (BLENDVPDrr0 VR128:$src2, VR128:$src1)>; + + def : Pat<(v8i16 (X86Blendpw (v8i16 VR128:$src1), (v8i16 VR128:$src2), + (imm:$mask))), + (VPBLENDWrri VR128:$src2, VR128:$src1, imm:$mask)>; + def : Pat<(v4f32 (X86Blendps (v4f32 VR128:$src1), (v4f32 VR128:$src2), + (imm:$mask))), + (VBLENDPSrri VR128:$src2, VR128:$src1, imm:$mask)>; + def : Pat<(v2f64 (X86Blendpd (v2f64 VR128:$src1), (v2f64 VR128:$src2), + (imm:$mask))), + (VBLENDPDrri VR128:$src2, VR128:$src1, imm:$mask)>; + } let Predicates = [HasAVX] in Modified: llvm/trunk/test/CodeGen/X86/avx-shuffle.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx-shuffle.ll?rev=154396&r1=154395&r2=154396&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/X86/avx-shuffle.ll (original) +++ llvm/trunk/test/CodeGen/X86/avx-shuffle.ll Tue Apr 10 09:33:13 2012 @@ -164,7 +164,7 @@ } ; CHECK: blend1 -; CHECK: vblendvps +; CHECK: vblendps ; CHECK: ret define <4 x i32> @blend1(<4 x i32> %a, <4 x i32> %b) nounwind alwaysinline { %t = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> @@ -172,7 +172,7 @@ } ; CHECK: blend2 -; CHECK: vblendvps +; CHECK: vblendps ; CHECK: ret define <4 x i32> @blend2(<4 x i32> %a, <4 x i32> %b) nounwind alwaysinline { %t = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> @@ -180,7 +180,7 @@ } ; CHECK: blend2a -; CHECK: vblendvps +; CHECK: vblendps ; CHECK: ret define <4 x float> @blend2a(<4 x float> %a, <4 x float> %b) nounwind alwaysinline { %t = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> @@ -188,7 +188,7 @@ } ; CHECK: blend3 -; CHECK-NOT: vblendvps +; CHECK-NOT: vblendps ; CHECK: ret define <4 x i32> @blend3(<4 x i32> %a, <4 x i32> %b) nounwind alwaysinline { %t = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> @@ -196,7 +196,7 @@ } ; CHECK: blend4 -; CHECK: vblendvpd +; CHECK: vblendpd ; CHECK: ret define <4 x i64> @blend4(<4 x i64> %a, <4 x i64> %b) nounwind alwaysinline { %t = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> Modified: llvm/trunk/test/CodeGen/X86/vec_shuffle-20.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_shuffle-20.ll?rev=154396&r1=154395&r2=154396&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/X86/vec_shuffle-20.ll (original) +++ llvm/trunk/test/CodeGen/X86/vec_shuffle-20.ll Tue Apr 10 09:33:13 2012 @@ -1,4 +1,4 @@ -; RUN: llc < %s -o /dev/null -march=x86 -mattr=+sse2 -mtriple=i686-apple-darwin9 -stats -info-output-file - | grep asm-printer | grep 3 +; RUN: llc < %s -o /dev/null -march=x86 -mattr=+sse2 -mtriple=i686-apple-darwin9 -stats -info-output-file - | grep asm-printer | grep 2 define <4 x float> @func(<4 x float> %fp0, <4 x float> %fp1) nounwind { entry: From nadav.rotem at intel.com Tue Apr 10 09:58:31 2012 From: nadav.rotem at intel.com (Nadav Rotem) Date: Tue, 10 Apr 2012 14:58:31 -0000 Subject: [llvm-commits] [llvm] r154397 - /llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Message-ID: <20120410145831.79CEA2A6C065@llvm.org> Author: nadav Date: Tue Apr 10 09:58:31 2012 New Revision: 154397 URL: http://llvm.org/viewvc/llvm-project?rev=154397&view=rev Log: Fix a dagcombine optimization which assumes that the vsetcc result type is always of the same size as the compared values. This is ture for SSE/AVX/NEON but not for all targets. Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=154397&r1=154396&r2=154397&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original) +++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Tue Apr 10 09:58:31 2012 @@ -4354,12 +4354,17 @@ // Only do this before legalize for now. if (VT.isVector() && !LegalOperations) { EVT N0VT = N0.getOperand(0).getValueType(); + // On some architectures (such as SSE/NEON/etc)the SETCC result type is + // of the same size as the compared operands. Only optimize sext(setcc()) + // if this is the case. + EVT SVT = TLI.getSetCCResultType(N0VT); + // We know that the # elements of the results is the same as the // # elements of the compare (and the # elements of the compare result // for that matter). Check to see that they are the same size. If so, // we know that the element size of the sext'd result matches the // element size of the compare operands. - if (VT.getSizeInBits() == N0VT.getSizeInBits()) + if (VT.getSizeInBits() == SVT.getSizeInBits()) return DAG.getSetCC(N->getDebugLoc(), VT, N0.getOperand(0), N0.getOperand(1), cast(N0.getOperand(2))->get()); @@ -4373,11 +4378,13 @@ EVT MatchingVectorType = EVT::getVectorVT(*DAG.getContext(), MatchingElementType, N0VT.getVectorNumElements()); - SDValue VsetCC = - DAG.getSetCC(N->getDebugLoc(), MatchingVectorType, N0.getOperand(0), - N0.getOperand(1), - cast(N0.getOperand(2))->get()); - return DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT); + + if (SVT == MatchingVectorType) { + SDValue VsetCC = DAG.getSetCC(N->getDebugLoc(), MatchingVectorType, + N0.getOperand(0), N0.getOperand(1), + cast(N0.getOperand(2))->get()); + return DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT); + } } } From baldrick at free.fr Tue Apr 10 10:03:07 2012 From: baldrick at free.fr (Duncan Sands) Date: Tue, 10 Apr 2012 17:03:07 +0200 Subject: [llvm-commits] [llvm] r154397 - /llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp In-Reply-To: <20120410145831.79CEA2A6C065@llvm.org> References: <20120410145831.79CEA2A6C065@llvm.org> Message-ID: <4F844BAB.90100@free.fr> Hi Nadav, some cosmetic remarks: > --- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original) > +++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Tue Apr 10 09:58:31 2012 > @@ -4354,12 +4354,17 @@ > // Only do this before legalize for now. > if (VT.isVector()&& !LegalOperations) { > EVT N0VT = N0.getOperand(0).getValueType(); > + // On some architectures (such as SSE/NEON/etc)the SETCC result type is missing space between ) and "the". > + // of the same size as the compared operands. Only optimize sext(setcc()) > + // if this is the case. > + EVT SVT = TLI.getSetCCResultType(N0VT); > + > // We know that the # elements of the results is the same as the > // # elements of the compare (and the # elements of the compare result > // for that matter). Check to see that they are the same size. If so, > // we know that the element size of the sext'd result matches the > // element size of the compare operands. This comment ^ is indented too much. > - if (VT.getSizeInBits() == N0VT.getSizeInBits()) > + if (VT.getSizeInBits() == SVT.getSizeInBits()) > return DAG.getSetCC(N->getDebugLoc(), VT, N0.getOperand(0), > N0.getOperand(1), > cast(N0.getOperand(2))->get()); Ciao, Duncan. From dblaikie at gmail.com Tue Apr 10 10:23:13 2012 From: dblaikie at gmail.com (David Blaikie) Date: Tue, 10 Apr 2012 15:23:13 -0000 Subject: [llvm-commits] [llvm] r154398 - /llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Message-ID: <20120410152313.EA5972A6C065@llvm.org> Author: dblaikie Date: Tue Apr 10 10:23:13 2012 New Revision: 154398 URL: http://llvm.org/viewvc/llvm-project?rev=154398&view=rev Log: Remove unused variable. Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=154398&r1=154397&r2=154398&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original) +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Tue Apr 10 10:23:13 2012 @@ -5393,7 +5393,6 @@ DebugLoc dl = SVOp->getDebugLoc(); EVT VT = Op.getValueType(); EVT InVT = V1.getValueType(); - EVT EltVT = VT.getVectorElementType(); int MaskSize = VT.getVectorNumElements(); int InSize = InVT.getVectorNumElements(); From dblaikie at gmail.com Tue Apr 10 10:24:34 2012 From: dblaikie at gmail.com (David Blaikie) Date: Tue, 10 Apr 2012 08:24:34 -0700 Subject: [llvm-commits] [llvm] r154396 - in /llvm/trunk: lib/Target/X86/X86ISelLowering.cpp lib/Target/X86/X86ISelLowering.h lib/Target/X86/X86InstrFragmentsSIMD.td lib/Target/X86/X86InstrSSE.td test/CodeGen/X86/avx-shuffle.ll test/CodeGen/X86/vec_shuffle Message-ID: On Tue, Apr 10, 2012 at 7:33 AM, Nadav Rotem wrote: > Author: nadav > Date: Tue Apr 10 09:33:13 2012 > New Revision: 154396 > > URL: http://llvm.org/viewvc/llvm-project?rev=154396&view=rev > Log: > Modify the code that lowers shuffles to blends from using blendvXX to vblendXX. > blendv uses a register for the selection while vblend uses an immediate. > On sandybridge they still have the same latency and execute on the same execution ports. > > > Modified: > ? ?llvm/trunk/lib/Target/X86/X86ISelLowering.cpp > ? ?llvm/trunk/lib/Target/X86/X86ISelLowering.h > ? ?llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td > ? ?llvm/trunk/lib/Target/X86/X86InstrSSE.td > ? ?llvm/trunk/test/CodeGen/X86/avx-shuffle.ll > ? ?llvm/trunk/test/CodeGen/X86/vec_shuffle-20.ll > > Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=154396&r1=154395&r2=154396&view=diff > ============================================================================== > --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original) > +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Tue Apr 10 09:33:13 2012 > @@ -5391,59 +5391,76 @@ > ? SDValue V1 = SVOp->getOperand(0); > ? SDValue V2 = SVOp->getOperand(1); > ? DebugLoc dl = SVOp->getDebugLoc(); > - ?LLVMContext *Context = DAG.getContext(); > ? EVT VT = Op.getValueType(); > ? EVT InVT = V1.getValueType(); > ? EVT EltVT = VT.getVectorElementType(); This variable became unused with your change, causing Clang selfhost to warn. I removed the variable to fix this in r154398. - David > - ?unsigned EltSize = EltVT.getSizeInBits(); > ? int MaskSize = VT.getVectorNumElements(); > ? int InSize = InVT.getVectorNumElements(); > > - ?// TODO: At the moment we only use AVX blends. We could also use SSE4 blends. > - ?if (!Subtarget->hasAVX()) > + ?if (!Subtarget->hasSSE41()) > ? ? return SDValue(); > > ? if (MaskSize != InSize) > ? ? return SDValue(); > > - ?SmallVector MaskVals; > - ?ConstantInt *Zero = ConstantInt::get(*Context, APInt(EltSize, 0)); > - ?ConstantInt *NegOne = ConstantInt::get(*Context, APInt(EltSize, -1)); > + ?int ISDNo = 0; > + ?MVT OpTy; > + > + ?switch (VT.getSimpleVT().SimpleTy) { > + ?default: return SDValue(); > + ?case MVT::v8i16: > + ? ? ? ? ? ISDNo = X86ISD::BLENDPW; > + ? ? ? ? ? OpTy = MVT::v8i16; > + ? ? ? ? ? break; > + ?case MVT::v4i32: > + ?case MVT::v4f32: > + ? ? ? ? ? ISDNo = X86ISD::BLENDPS; > + ? ? ? ? ? OpTy = MVT::v4f32; > + ? ? ? ? ? break; > + ?case MVT::v2i64: > + ?case MVT::v2f64: > + ? ? ? ? ? ISDNo = X86ISD::BLENDPD; > + ? ? ? ? ? OpTy = MVT::v2f64; > + ? ? ? ? ? break; > + ?case MVT::v8i32: > + ?case MVT::v8f32: > + ? ? ? ? ? if (!Subtarget->hasAVX()) > + ? ? ? ? ? ? return SDValue(); > + ? ? ? ? ? ISDNo = X86ISD::BLENDPS; > + ? ? ? ? ? OpTy = MVT::v8f32; > + ? ? ? ? ? break; > + ?case MVT::v4i64: > + ?case MVT::v4f64: > + ? ? ? ? ? if (!Subtarget->hasAVX()) > + ? ? ? ? ? ? return SDValue(); > + ? ? ? ? ? ISDNo = X86ISD::BLENDPD; > + ? ? ? ? ? OpTy = MVT::v4f64; > + ? ? ? ? ? break; > + ?case MVT::v16i16: > + ? ? ? ? ? if (!Subtarget->hasAVX2()) > + ? ? ? ? ? ? return SDValue(); > + ? ? ? ? ? ISDNo = X86ISD::BLENDPW; > + ? ? ? ? ? OpTy = MVT::v16i16; > + ? ? ? ? ? break; > + ?} > + ?assert(ISDNo && "Invalid Op Number"); > + > + ?unsigned MaskVals = 0; > > ? for (int i = 0; i < MaskSize; ++i) { > ? ? int EltIdx = SVOp->getMaskElt(i); > ? ? if (EltIdx == i || EltIdx == -1) > - ? ? ?MaskVals.push_back(NegOne); > + ? ? ?MaskVals |= (1< ? ? else if (EltIdx == (i + MaskSize)) > - ? ? ?MaskVals.push_back(Zero); > + ? ? ?continue; // Bit is set to zero; > ? ? else return SDValue(); > ? } > > - ?Constant *MaskC = ConstantVector::get(MaskVals); > - ?EVT MaskTy = EVT::getEVT(MaskC->getType()); > - ?assert(MaskTy.getSizeInBits() == VT.getSizeInBits() && "Invalid mask size"); > - ?SDValue MaskIdx = DAG.getConstantPool(MaskC, PtrTy); > - ?unsigned Alignment = cast(MaskIdx)->getAlignment(); > - ?SDValue Mask = DAG.getLoad(MaskTy, dl, DAG.getEntryNode(), MaskIdx, > - ? ? ? ? ? ? ? ? ? ? ? ? ? ? MachinePointerInfo::getConstantPool(), > - ? ? ? ? ? ? ? ? ? ? ? ? ? ? false, false, false, Alignment); > - > - ?if (Subtarget->hasAVX2() && MaskTy == MVT::v32i8) > - ? ?return DAG.getNode(ISD::VSELECT, dl, VT, Mask, V1, V2); > - > - ?if (Subtarget->hasAVX()) { > - ? ?switch (MaskTy.getSimpleVT().SimpleTy) { > - ? ?default: return SDValue(); > - ? ?case MVT::v16i8: > - ? ?case MVT::v4i32: > - ? ?case MVT::v2i64: > - ? ?case MVT::v8i32: > - ? ?case MVT::v4i64: > - ? ? ? ? ? ? return DAG.getNode(ISD::VSELECT, dl, VT, Mask, V1, V2); > - ? ?} > - ?} > - > - ?return SDValue(); > + ?V1 = DAG.getNode(ISD::BITCAST, dl, OpTy, V1); > + ?V2 = DAG.getNode(ISD::BITCAST, dl, OpTy, V2); > + ?SDValue Ret = ?DAG.getNode(ISDNo, dl, OpTy, V1, V2, > + ? ? ? ? ? ? ? ? ? ? ? ? ? ? DAG.getConstant(MaskVals, MVT::i32)); > + ?return DAG.getNode(ISD::BITCAST, dl, VT, Ret); > ?} > > ?// v8i16 shuffles - Prefer shuffles in the following order: > @@ -11050,6 +11067,9 @@ > ? case X86ISD::ANDNP: ? ? ? ? ? ? ?return "X86ISD::ANDNP"; > ? case X86ISD::PSIGN: ? ? ? ? ? ? ?return "X86ISD::PSIGN"; > ? case X86ISD::BLENDV: ? ? ? ? ? ? return "X86ISD::BLENDV"; > + ?case X86ISD::BLENDPW: ? ? ? ? ? ?return "X86ISD::BLENDPW"; > + ?case X86ISD::BLENDPS: ? ? ? ? ? ?return "X86ISD::BLENDPS"; > + ?case X86ISD::BLENDPD: ? ? ? ? ? ?return "X86ISD::BLENDPD"; > ? case X86ISD::HADD: ? ? ? ? ? ? ? return "X86ISD::HADD"; > ? case X86ISD::HSUB: ? ? ? ? ? ? ? return "X86ISD::HSUB"; > ? case X86ISD::FHADD: ? ? ? ? ? ? ?return "X86ISD::FHADD"; > > Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.h > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.h?rev=154396&r1=154395&r2=154396&view=diff > ============================================================================== > --- llvm/trunk/lib/Target/X86/X86ISelLowering.h (original) > +++ llvm/trunk/lib/Target/X86/X86ISelLowering.h Tue Apr 10 09:33:13 2012 > @@ -175,9 +175,14 @@ > ? ? ? /// PSIGN - Copy integer sign. > ? ? ? PSIGN, > > - ? ? ?/// BLEND family of opcodes > + ? ? ?/// BLENDV - Blend where the selector is an XMM. > ? ? ? BLENDV, > > + ? ? ?/// BLENDxx - Blend where the selector is an immediate. > + ? ? ?BLENDPW, > + ? ? ?BLENDPS, > + ? ? ?BLENDPD, > + > ? ? ? /// HADD - Integer horizontal add. > ? ? ? HADD, > > > Modified: llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td?rev=154396&r1=154395&r2=154396&view=diff > ============================================================================== > --- llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td (original) > +++ llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td Tue Apr 10 09:33:13 2012 > @@ -126,6 +126,8 @@ > ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?SDTCisSameAs<0,2>, SDTCisInt<3>]>; > > ?def SDTVBroadcast : SDTypeProfile<1, 1, [SDTCisVec<0>]>; > +def SDTBlend : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>, > +SDTCisSameAs<1,2>, SDTCisVT<3, i32>]>; > > ?def X86PAlign : SDNode<"X86ISD::PALIGN", SDTShuff3OpI>; > > @@ -158,6 +160,10 @@ > > ?def X86VBroadcast : SDNode<"X86ISD::VBROADCAST", SDTVBroadcast>; > > +def X86Blendpw : SDNode<"X86ISD::BLENDPW", SDTBlend>; > +def X86Blendps : SDNode<"X86ISD::BLENDPS", SDTBlend>; > +def X86Blendpd : SDNode<"X86ISD::BLENDPD", SDTBlend>; > + > ?//===----------------------------------------------------------------------===// > ?// SSE Complex Patterns > ?//===----------------------------------------------------------------------===// > > Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=154396&r1=154395&r2=154396&view=diff > ============================================================================== > --- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original) > +++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Tue Apr 10 09:33:13 2012 > @@ -6735,12 +6735,22 @@ > ? def : Pat<(v4f64 (vselect (v4i64 VR256:$mask), (v4f64 VR256:$src1), > ? ? ? ? ? ? ? ? ? ? ? ? ? ? (v4f64 VR256:$src2))), > ? ? ? ? ? ? (VBLENDVPDYrr VR256:$src2, VR256:$src1, VR256:$mask)>; > + > + ?def : Pat<(v8f32 (X86Blendps (v8f32 VR256:$src1), (v8f32 VR256:$src2), > + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? (imm:$mask))), > + ? ? ? ? ? ?(VBLENDPSYrri VR256:$src2, VR256:$src1, imm:$mask)>; > + ?def : Pat<(v4f64 (X86Blendpd (v4f64 VR256:$src1), (v4f64 VR256:$src2), > + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? (imm:$mask))), > + ? ? ? ? ? ?(VBLENDPDYrri VR256:$src2, VR256:$src1, imm:$mask)>; > ?} > > ?let Predicates = [HasAVX2] in { > ? def : Pat<(v32i8 (vselect (v32i8 VR256:$mask), (v32i8 VR256:$src1), > ? ? ? ? ? ? ? ? ? ? ? ? ? ? (v32i8 VR256:$src2))), > ? ? ? ? ? ? (VPBLENDVBYrr VR256:$src2, VR256:$src1, VR256:$mask)>; > + ?def : Pat<(v16i16 (X86Blendpw (v16i16 VR256:$src1), (v16i16 VR256:$src2), > + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? (imm:$mask))), > + ? ? ? ? ? ?(VPBLENDWYrri VR256:$src2, VR256:$src1, imm:$mask)>; > ?} > > ?/// SS41I_ternary_int - SSE 4.1 ternary operator > @@ -6789,6 +6799,17 @@ > ? def : Pat<(v2f64 (vselect (v2i64 XMM0), (v2f64 VR128:$src1), > ? ? ? ? ? ? ? ? ? ? ? ? ? ? (v2f64 VR128:$src2))), > ? ? ? ? ? ? (BLENDVPDrr0 VR128:$src2, VR128:$src1)>; > + > + ?def : Pat<(v8i16 (X86Blendpw (v8i16 VR128:$src1), (v8i16 VR128:$src2), > + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? (imm:$mask))), > + ? ? ? ? ? ?(VPBLENDWrri VR128:$src2, VR128:$src1, imm:$mask)>; > + ?def : Pat<(v4f32 (X86Blendps (v4f32 VR128:$src1), (v4f32 VR128:$src2), > + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? (imm:$mask))), > + ? ? ? ? ? ?(VBLENDPSrri VR128:$src2, VR128:$src1, imm:$mask)>; > + ?def : Pat<(v2f64 (X86Blendpd (v2f64 VR128:$src1), (v2f64 VR128:$src2), > + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? (imm:$mask))), > + ? ? ? ? ? ?(VBLENDPDrri VR128:$src2, VR128:$src1, imm:$mask)>; > + > ?} > > ?let Predicates = [HasAVX] in > > Modified: llvm/trunk/test/CodeGen/X86/avx-shuffle.ll > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx-shuffle.ll?rev=154396&r1=154395&r2=154396&view=diff > ============================================================================== > --- llvm/trunk/test/CodeGen/X86/avx-shuffle.ll (original) > +++ llvm/trunk/test/CodeGen/X86/avx-shuffle.ll Tue Apr 10 09:33:13 2012 > @@ -164,7 +164,7 @@ > ?} > > ?; CHECK: blend1 > -; CHECK: vblendvps > +; CHECK: vblendps > ?; CHECK: ret > ?define <4 x i32> @blend1(<4 x i32> %a, <4 x i32> %b) nounwind alwaysinline { > ? %t = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> > @@ -172,7 +172,7 @@ > ?} > > ?; CHECK: blend2 > -; CHECK: vblendvps > +; CHECK: vblendps > ?; CHECK: ret > ?define <4 x i32> @blend2(<4 x i32> %a, <4 x i32> %b) nounwind alwaysinline { > ? %t = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> > @@ -180,7 +180,7 @@ > ?} > > ?; CHECK: blend2a > -; CHECK: vblendvps > +; CHECK: vblendps > ?; CHECK: ret > ?define <4 x float> @blend2a(<4 x float> %a, <4 x float> %b) nounwind alwaysinline { > ? %t = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> > @@ -188,7 +188,7 @@ > ?} > > ?; CHECK: blend3 > -; CHECK-NOT: vblendvps > +; CHECK-NOT: vblendps > ?; CHECK: ret > ?define <4 x i32> @blend3(<4 x i32> %a, <4 x i32> %b) nounwind alwaysinline { > ? %t = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> > @@ -196,7 +196,7 @@ > ?} > > ?; CHECK: blend4 > -; CHECK: vblendvpd > +; CHECK: vblendpd > ?; CHECK: ret > ?define <4 x i64> @blend4(<4 x i64> %a, <4 x i64> %b) nounwind alwaysinline { > ? %t = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> > > Modified: llvm/trunk/test/CodeGen/X86/vec_shuffle-20.ll > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_shuffle-20.ll?rev=154396&r1=154395&r2=154396&view=diff > ============================================================================== > --- llvm/trunk/test/CodeGen/X86/vec_shuffle-20.ll (original) > +++ llvm/trunk/test/CodeGen/X86/vec_shuffle-20.ll Tue Apr 10 09:33:13 2012 > @@ -1,4 +1,4 @@ > -; RUN: llc < %s -o /dev/null -march=x86 -mattr=+sse2 -mtriple=i686-apple-darwin9 -stats -info-output-file - | grep asm-printer | grep 3 > +; RUN: llc < %s -o /dev/null -march=x86 -mattr=+sse2 -mtriple=i686-apple-darwin9 -stats -info-output-file - | grep asm-printer | grep 2 > > ?define <4 x float> @func(<4 x float> %fp0, <4 x float> %fp1) nounwind ?{ > ?entry: > > > _______________________________________________ > llvm-commits mailing list > llvm-commits at cs.uiuc.edu > http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits From clattner at apple.com Tue Apr 10 11:04:33 2012 From: clattner at apple.com (Chris Lattner) Date: Tue, 10 Apr 2012 09:04:33 -0700 Subject: [llvm-commits] [llvm] r154387 - in /llvm/trunk: docs/LangRef.html include/llvm/ADT/APFloat.h lib/VMCore/Verifier.cpp test/Verifier/fpaccuracy.ll In-Reply-To: <20120410082244.387742A6C065@llvm.org> References: <20120410082244.387742A6C065@llvm.org> Message-ID: <2EC50087-8E9E-4843-81E8-9C01FDF43564@apple.com> On Apr 10, 2012, at 1:22 AM, Duncan Sands wrote: > Author: baldrick > Date: Tue Apr 10 03:22:43 2012 > New Revision: 154387 > > URL: http://llvm.org/viewvc/llvm-project?rev=154387&view=rev > Log: > Express the number of ULPs in fpaccuracy metadata as a real rather than a > rational number, eg as 2.5 rather than 5, 2. OK'd by Peter Collingbourne. Did LLVM 3.0 support this metadata? If so, we'll need auto-upgrade logic. -Chris From clattner at apple.com Tue Apr 10 11:05:38 2012 From: clattner at apple.com (Chris Lattner) Date: Tue, 10 Apr 2012 09:05:38 -0700 Subject: [llvm-commits] [PATCH] llvm-stress fixes In-Reply-To: <4F83E972.1000707@free.fr> References: <20120409162300.30c71019@sapling2> <4F83E972.1000707@free.fr> Message-ID: On Apr 10, 2012, at 1:04 AM, Duncan Sands wrote: > Hi Hal, > >> Regarding conversion, given that ppc128 is actually the sum of two IEEE >> (64-bit) doubles, you would need to convert both doubles to fp128 and >> then add the two resulting fp128 values. > > I'm tempted to say that ppc128 should be removed from LLVM and instead > front-ends should just explicitly use the pair of doubles it really is, > and generate in the IR the sequence of operations for adding such a pair > etc that codegen currently takes care of. That makes a lot of sense to me too. The only issue here is that someone needs to do it. There are PPC users of clang out there (Hal?) that may care about this not regressing. -Chris From chandlerc at google.com Tue Apr 10 11:08:00 2012 From: chandlerc at google.com (Chandler Carruth) Date: Tue, 10 Apr 2012 18:08:00 +0200 Subject: [llvm-commits] [llvm] r154387 - in /llvm/trunk: docs/LangRef.html include/llvm/ADT/APFloat.h lib/VMCore/Verifier.cpp test/Verifier/fpaccuracy.ll In-Reply-To: <2EC50087-8E9E-4843-81E8-9C01FDF43564@apple.com> References: <20120410082244.387742A6C065@llvm.org> <2EC50087-8E9E-4843-81E8-9C01FDF43564@apple.com> Message-ID: On Tue, Apr 10, 2012 at 6:04 PM, Chris Lattner wrote: > Did LLVM 3.0 support this metadata? If so, we'll need auto-upgrade logic. > Also, release notes. But that can wait until you finish w/ the fpaccuracy stuff we've been discussing. =] -------------- next part -------------- An HTML attachment was scrubbed... URL: http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20120410/64719624/attachment.html From daniel at zuster.org Tue Apr 10 11:11:51 2012 From: daniel at zuster.org (Daniel Dunbar) Date: Tue, 10 Apr 2012 09:11:51 -0700 Subject: [llvm-commits] [lld] r154275 - in /lld/trunk/docs: _templates/index.html conf.py contents.rst index.rst intro.rst In-Reply-To: <20120408020604.AD8142A6C065@llvm.org> References: <20120408020604.AD8142A6C065@llvm.org> Message-ID: Cool. FWIW, there was an intention behind the old split, in that it makes the docs work out more nicely when one also wants to generate a PDF of the documentation (where the "index" page doesn't make sense). We don't currently produce that, nor is it particularly interesting, just wanted to explain the reasoning behind the old structure. There might be better ways to effect the split, anyway (maybe the PDF docs can get their own entry point or something). - Daniel On Sat, Apr 7, 2012 at 7:06 PM, Michael J. Spencer wrote: > Author: mspencer > Date: Sat Apr ?7 21:06:04 2012 > New Revision: 154275 > > URL: http://llvm.org/viewvc/llvm-project?rev=154275&view=rev > Log: > [docs] Make the index page ReST based instead of html based. > > Added: > ? ?lld/trunk/docs/index.rst > Removed: > ? ?lld/trunk/docs/_templates/index.html > ? ?lld/trunk/docs/contents.rst > ? ?lld/trunk/docs/intro.rst > Modified: > ? ?lld/trunk/docs/conf.py > > Removed: lld/trunk/docs/_templates/index.html > URL: http://llvm.org/viewvc/llvm-project/lld/trunk/docs/_templates/index.html?rev=154274&view=auto > ============================================================================== > --- lld/trunk/docs/_templates/index.html (original) > +++ lld/trunk/docs/_templates/index.html (removed) > @@ -1,37 +0,0 @@ > -{% extends "layout.html" %} > -{% set title = 'lld' %} > -{% block body %} > -

lld: a linker for LLVM

> - > -For more information, see the introduction > -and design available as part of the lld > -documentation below. > - > -

Documentation

> - > - ? > - ? ? > -
> - ? ? ? > - ? ? ? > - ? ? ? > - ?
> - > -

Source

> -

lld is available in the LLVM SVN repository: > - ?svn co > - ? ? > - ? ? ?http://llvm.org/svn/llvm-project/lld/trunk.

> - > -

lld is also available via the read-only git mirror: > - ?git clone > - ? ? > - ? ? ?http://llvm.org/git/lld.git.

> - > -{% endblock %} > > Modified: lld/trunk/docs/conf.py > URL: http://llvm.org/viewvc/llvm-project/lld/trunk/docs/conf.py?rev=154275&r1=154274&r2=154275&view=diff > ============================================================================== > --- lld/trunk/docs/conf.py (original) > +++ lld/trunk/docs/conf.py Sat Apr ?7 21:06:04 2012 > @@ -36,7 +36,7 @@ > ?#source_encoding = 'utf-8-sig' > > ?# The master toctree document. > -master_doc = 'contents' > +master_doc = 'index' > > ?# General information about the project. > ?project = u'lld' > @@ -134,7 +134,7 @@ > > ?# Additional templates that should be rendered to pages, maps page names to > ?# template names. > -html_additional_pages = {'index': 'index.html'} > +# html_additional_pages = {'index': 'index.html'} > > ?# If false, no module index is generated. > ?#html_domain_indices = True > > Removed: lld/trunk/docs/contents.rst > URL: http://llvm.org/viewvc/llvm-project/lld/trunk/docs/contents.rst?rev=154274&view=auto > ============================================================================== > --- lld/trunk/docs/contents.rst (original) > +++ lld/trunk/docs/contents.rst (removed) > @@ -1,19 +0,0 @@ > -.. _contents: > - > -Contents > -======== > - > -.. toctree:: > - ? :maxdepth: 2 > - > - ? intro > - ? design > - ? development > - ? open_projects > - ? sphinx_intro > - > -Indices and tables > -================== > - > -* :ref:`genindex` > -* :ref:`search` > > Added: lld/trunk/docs/index.rst > URL: http://llvm.org/viewvc/llvm-project/lld/trunk/docs/index.rst?rev=154275&view=auto > ============================================================================== > --- lld/trunk/docs/index.rst (added) > +++ lld/trunk/docs/index.rst Sat Apr ?7 21:06:04 2012 > @@ -0,0 +1,79 @@ > +.. _index: > + > +lld - The LLVM Linker > +===================== > + > +lld is a new set of modular code for creating linker tools. > + > +* End-User Features: > + > + ?* Compatible with existing linker options > + ?* Reads standard Object Files (e.g. ELF, Mach-O, PE/COFF) > + ?* Writes standard Executable Files (e.g. ELF, Mach-O, PE) > + ?* Fast link times > + ?* Minimal memory use > + ?* Remove clang's reliance on "the system linker" > + ?* Uses the LLVM `"UIUC" BSD-Style license`__. > + > +* Applications: > + > + ?* Modular design > + ?* Support cross linking > + ?* Easy to add new CPU support > + ?* Can be built as static tool or library > + > +* Design and Implementation: > + > + ?* Extensive unit tests > + ?* Internal linker model can be dumped/read to textual format > + ?* Internal linker model can be dumped/read to a new native format > + ?* Native format designed to be fast to read and write > + ?* Additional linking features can be plugged in as "passes" > + ?* OS specific and CPU specific code factored out > + > +Why a new linker? > +----------------- > + > +The fact that clang relies on whatever linker tool you happen to have installed > +means that clang has been very conservative adopting features which require a > +recent linker. > + > +In the same way that the MC layer of LLVM has removed clang's reliance on the > +system assembler tool, the lld project will remove clang's reliance on the > +system linker tool. > + > + > +Current Status > +-------------- > + > +lld is in its very early stages of development. > + > +Source > +------ > + > +lld is available in the LLVM SVN repository:: > + > + ?svn co http://llvm.org/svn/llvm-project/lld/trunk > + > +lld is also available via the read-only git mirror:: > + > + ?git clone http://llvm.org/git/lld.git > + > +Contents > +-------- > + > +.. toctree:: > + ? :maxdepth: 2 > + > + ? design > + ? development > + ? open_projects > + ? sphinx_intro > + > +Indices and tables > +------------------ > + > +* :ref:`genindex` > +* :ref:`search` > + > +__ http://llvm.org/docs/DeveloperPolicy.html#license > > Removed: lld/trunk/docs/intro.rst > URL: http://llvm.org/viewvc/llvm-project/lld/trunk/docs/intro.rst?rev=154274&view=auto > ============================================================================== > --- lld/trunk/docs/intro.rst (original) > +++ lld/trunk/docs/intro.rst (removed) > @@ -1,66 +0,0 @@ > -.. _intro: > - > -Introduction > -============ > - > -lld is a new set of modular code for creating linker tools. > - > - * End-User Features: > - > - ? * Compatible with existing linker options > - > - ? * Reads standard Object Files (e.g. ELF, Mach-O, PE/COFF) > - > - ? * Writes standard Executable Files (e.g. ELF, Mach-O, PE) > - > - ? * Fast link times > - > - ? * Minimal memory use > - > - ? * Remove clang's reliance on "the system linker" > - > - ? * Uses the LLVM `"UIUC" BSD-Style license`__. > - > - * Applications: > - > - ? * Modular design > - > - ? * Support cross linking > - > - ? * Easy to add new CPU support > - > - ? * Can be built as static tool or library > - > - * Design and Implementation: > - > - ? * Extensive unit tests > - > - ? * Internal linker model can be dumped/read to textual format > - > - ? * Internal linker model can be dumped/read to a new native format > - > - ? * Native format designed to be fast to read and write > - > - ? * Additional linking features can be plugged in as "passes" > - > - ? * OS specific and CPU specific code factored out > - > - > -Why a new linker? > ------------------ > - > -The fact that clang relies on whatever linker tool you happen to have installed > -means that clang has been very conservative adopting features which require a > -recent linker. > - > -In the same way that the MC layer of LLVM has removed clang's reliance on the > -system assembler tool, the lld project will remove clang's reliance on the > -system linker tool. > - > - > -Current Status > --------------- > - > -lld is in its very early stages of development. > - > -__ http://llvm.org/docs/DeveloperPolicy.html#license > > > _______________________________________________ > llvm-commits mailing list > llvm-commits at cs.uiuc.edu > http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits From daniel at zuster.org Tue Apr 10 11:18:17 2012 From: daniel at zuster.org (Daniel Dunbar) Date: Tue, 10 Apr 2012 16:18:17 -0000 Subject: [llvm-commits] [lld] r154400 - /lld/trunk/docs/_templates/layout.html Message-ID: <20120410161817.19BF02A6C065@llvm.org> Author: ddunbar Date: Tue Apr 10 11:18:16 2012 New Revision: 154400 URL: http://llvm.org/viewvc/llvm-project?rev=154400&view=rev Log: [docs] Remove a dead link. Modified: lld/trunk/docs/_templates/layout.html Modified: lld/trunk/docs/_templates/layout.html URL: http://llvm.org/viewvc/llvm-project/lld/trunk/docs/_templates/layout.html?rev=154400&r1=154399&r2=154400&view=diff ============================================================================== --- lld/trunk/docs/_templates/layout.html (original) +++ lld/trunk/docs/_templates/layout.html Tue Apr 10 11:18:16 2012 @@ -9,5 +9,4 @@ {% block rootrellink %}
  • lld Home | 
  • -
  • Documentation»
  • {% endblock %} From baldrick at free.fr Tue Apr 10 11:37:07 2012 From: baldrick at free.fr (Duncan Sands) Date: Tue, 10 Apr 2012 18:37:07 +0200 Subject: [llvm-commits] [llvm] r154387 - in /llvm/trunk: docs/LangRef.html include/llvm/ADT/APFloat.h lib/VMCore/Verifier.cpp test/Verifier/fpaccuracy.ll In-Reply-To: <2EC50087-8E9E-4843-81E8-9C01FDF43564@apple.com> References: <20120410082244.387742A6C065@llvm.org> <2EC50087-8E9E-4843-81E8-9C01FDF43564@apple.com> Message-ID: <4F8461B3.1090307@free.fr> Hi Chris, >> Express the number of ULPs in fpaccuracy metadata as a real rather than a >> rational number, eg as 2.5 rather than 5, 2. OK'd by Peter Collingbourne. > > Did LLVM 3.0 support this metadata? If so, we'll need auto-upgrade logic. no, it was added after 3.0 branched. Ciao, Duncan. From daniel at zuster.org Tue Apr 10 11:39:11 2012 From: daniel at zuster.org (Daniel Dunbar) Date: Tue, 10 Apr 2012 16:39:11 -0000 Subject: [llvm-commits] [LNT] r154401 - in /lnt/trunk: tests/.coveragerc tests/lit.cfg utils/check-coverage Message-ID: <20120410163911.CB3EE2A6C065@llvm.org> Author: ddunbar Date: Tue Apr 10 11:39:11 2012 New Revision: 154401 URL: http://llvm.org/viewvc/llvm-project?rev=154401&view=rev Log: [tests] Add easy support for running tests w/ code coverage. Added: lnt/trunk/tests/.coveragerc lnt/trunk/utils/check-coverage (with props) Modified: lnt/trunk/tests/lit.cfg Added: lnt/trunk/tests/.coveragerc URL: http://llvm.org/viewvc/llvm-project/lnt/trunk/tests/.coveragerc?rev=154401&view=auto ============================================================================== --- lnt/trunk/tests/.coveragerc (added) +++ lnt/trunk/tests/.coveragerc Tue Apr 10 11:39:11 2012 @@ -0,0 +1,8 @@ +# .coveragerc to control coverage.py +[run] +branch = True +parallel = True +source = lnt + +[html] +directory = coverage_html_report Modified: lnt/trunk/tests/lit.cfg URL: http://llvm.org/viewvc/llvm-project/lnt/trunk/tests/lit.cfg?rev=154401&r1=154400&r2=154401&view=diff ============================================================================== --- lnt/trunk/tests/lit.cfg (original) +++ lnt/trunk/tests/lit.cfg Tue Apr 10 11:39:11 2012 @@ -28,3 +28,9 @@ config.environment['PYTHONPATH'] = src_root config.substitutions.append(('%src_root', src_root)) + +# Enable coverage.py reporting, assuming sitecustomize.py in the virtualenv has +# been modified appropriately. +if lit.params.get('check-coverage', None): + config.environment['COVERAGE_PROCESS_START'] = os.path.join( + os.path.dirname(__file__), ".coveragerc") Added: lnt/trunk/utils/check-coverage URL: http://llvm.org/viewvc/llvm-project/lnt/trunk/utils/check-coverage?rev=154401&view=auto ============================================================================== --- lnt/trunk/utils/check-coverage (added) +++ lnt/trunk/utils/check-coverage Tue Apr 10 11:39:11 2012 @@ -0,0 +1,36 @@ +#!/bin/sh + +prog=$(basename $0) + +# Expect to be run from the parent LNT directory. +if [ ! -f README.txt ] || [ ! -d lnt ]; then + printf 1>&2 "%s: expected to be run from base LNT directory\n" "$prog" + exit 1 +fi + +# Check arguments. +if [ $# == "0" ]; then + printf 1>&2 "usage: %s {lit-arguments}*\n" "$prog" + exit 1 +fi + +# First, remove any existing coverage data files. +rm -f tests/.coverage +find tests -name .coverage.\* -exec rm {} \; + +# Next, run the tests. +lit -sv --param check-coverage=1 "$@" + +# Next, move all the data files from subdirectories up. +find tests -name .coverage.\* -exec mv {} tests \; + +# Combine all the data files. +(cd tests && python -m coverage combine) + +# Finally, generate the report. +(cd tests && python -m coverage report) + +# Generate the HTML report, if requested. +if [ ! -z "$GENERATE_HTML" ]; then + (cd tests && python -m coverage html) +fi Propchange: lnt/trunk/utils/check-coverage ------------------------------------------------------------------------------ svn:executable = * From daniel at zuster.org Tue Apr 10 11:39:17 2012 From: daniel at zuster.org (Daniel Dunbar) Date: Tue, 10 Apr 2012 16:39:17 -0000 Subject: [llvm-commits] [LNT] r154402 - in /lnt/trunk/tests: SharedInputs/ SharedInputs/sample-a-small.plist SharedInputs/sample-b-small.plist lit.cfg server/db/ImportV4TestSuiteInstance.py server/db/Inputs/sample-a-small.plist server/db/Inputs/sample-b-small.plist Message-ID: <20120410163917.28AE02A6C065@llvm.org> Author: ddunbar Date: Tue Apr 10 11:39:16 2012 New Revision: 154402 URL: http://llvm.org/viewvc/llvm-project?rev=154402&view=rev Log: [tests] Create a SharedInputs dir (and substitution). Added: lnt/trunk/tests/SharedInputs/ lnt/trunk/tests/SharedInputs/sample-a-small.plist - copied, changed from r154401, lnt/trunk/tests/server/db/Inputs/sample-a-small.plist lnt/trunk/tests/SharedInputs/sample-b-small.plist - copied, changed from r154401, lnt/trunk/tests/server/db/Inputs/sample-b-small.plist Removed: lnt/trunk/tests/server/db/Inputs/sample-a-small.plist lnt/trunk/tests/server/db/Inputs/sample-b-small.plist Modified: lnt/trunk/tests/lit.cfg lnt/trunk/tests/server/db/ImportV4TestSuiteInstance.py Copied: lnt/trunk/tests/SharedInputs/sample-a-small.plist (from r154401, lnt/trunk/tests/server/db/Inputs/sample-a-small.plist) URL: http://llvm.org/viewvc/llvm-project/lnt/trunk/tests/SharedInputs/sample-a-small.plist?p2=lnt/trunk/tests/SharedInputs/sample-a-small.plist&p1=lnt/trunk/tests/server/db/Inputs/sample-a-small.plist&r1=154401&r2=154402&rev=154402&view=diff ============================================================================== (empty) Copied: lnt/trunk/tests/SharedInputs/sample-b-small.plist (from r154401, lnt/trunk/tests/server/db/Inputs/sample-b-small.plist) URL: http://llvm.org/viewvc/llvm-project/lnt/trunk/tests/SharedInputs/sample-b-small.plist?p2=lnt/trunk/tests/SharedInputs/sample-b-small.plist&p1=lnt/trunk/tests/server/db/Inputs/sample-b-small.plist&r1=154401&r2=154402&rev=154402&view=diff ============================================================================== (empty) Modified: lnt/trunk/tests/lit.cfg URL: http://llvm.org/viewvc/llvm-project/lnt/trunk/tests/lit.cfg?rev=154402&r1=154401&r2=154402&view=diff ============================================================================== --- lnt/trunk/tests/lit.cfg (original) +++ lnt/trunk/tests/lit.cfg Tue Apr 10 11:39:16 2012 @@ -28,6 +28,8 @@ config.environment['PYTHONPATH'] = src_root config.substitutions.append(('%src_root', src_root)) +config.substitutions.append(('%{shared_inputs}', os.path.join( + src_root, 'tests', 'SharedInputs'))) # Enable coverage.py reporting, assuming sitecustomize.py in the virtualenv has # been modified appropriately. Modified: lnt/trunk/tests/server/db/ImportV4TestSuiteInstance.py URL: http://llvm.org/viewvc/llvm-project/lnt/trunk/tests/server/db/ImportV4TestSuiteInstance.py?rev=154402&r1=154401&r2=154402&view=diff ============================================================================== --- lnt/trunk/tests/server/db/ImportV4TestSuiteInstance.py (original) +++ lnt/trunk/tests/server/db/ImportV4TestSuiteInstance.py Tue Apr 10 11:39:16 2012 @@ -5,7 +5,7 @@ # RUN: lnt create %t.install # Import the first test set. -# RUN: lnt import %t.install %S/Inputs/sample-a-small.plist \ +# RUN: lnt import %t.install %{shared_inputs}/sample-a-small.plist \ # RUN: --commit=1 --show-sample-count > %t1.log # RUN: FileCheck -check-prefix=IMPORT-A-1 %s < %t1.log # @@ -15,7 +15,7 @@ # IMPORT-A-1: Added Samples : 2 # Import the second test set. -# RUN: lnt import %t.install %S/Inputs/sample-b-small.plist \ +# RUN: lnt import %t.install %{shared_inputs}/sample-b-small.plist \ # RUN: --commit=1 --show-sample-count --show-sql > %t2.log # RUN: FileCheck -check-prefix=IMPORT-B %s < %t2.log # @@ -23,7 +23,7 @@ # IMPORT-B: Added Samples : 1 # Check that reimporting the first test set properly reports as a duplicate. -# RUN: lnt import %t.install %S/Inputs/sample-a-small.plist \ +# RUN: lnt import %t.install %{shared_inputs}/sample-a-small.plist \ # RUN: --commit=1 --show-sample-count > %t3.log # RUN: FileCheck -check-prefix=IMPORT-A-2 %s < %t3.log # Removed: lnt/trunk/tests/server/db/Inputs/sample-a-small.plist URL: http://llvm.org/viewvc/llvm-project/lnt/trunk/tests/server/db/Inputs/sample-a-small.plist?rev=154401&view=auto ============================================================================== --- lnt/trunk/tests/server/db/Inputs/sample-a-small.plist (original) +++ lnt/trunk/tests/server/db/Inputs/sample-a-small.plist (removed) @@ -1,104 +0,0 @@ - - - - - Machine - - Info - - extrakey - extravalue - os - SAMPLE OS - hardware - x86_64 - - Name - LNT SAMPLE MACHINE - - Run - - End Time - 2009-11-17 03:44:48 - Info - - tag - nts - run_order - 1 - - Start Time - 2009-11-17 02:12:25 - - Tests - - - Data - - 0 - - Info - - - Name - nts.sampletest.compile.status - - - Data - - 0.019 - - Info - - - Name - nts.sampletest.compile - - - Data - - 0 - - Info - - - Name - nts.sampletest.exec.status - - - Data - - 0.3 - - Info - - - Name - nts.sampletest.exec - - - - Data - - 0.0189 - - Info - - - Name - nts.sampletest.compile - - - Data - - 0.29 - - Info - - - Name - nts.sampletest.exec - - - - Removed: lnt/trunk/tests/server/db/Inputs/sample-b-small.plist URL: http://llvm.org/viewvc/llvm-project/lnt/trunk/tests/server/db/Inputs/sample-b-small.plist?rev=154401&view=auto ============================================================================== --- lnt/trunk/tests/server/db/Inputs/sample-b-small.plist (original) +++ lnt/trunk/tests/server/db/Inputs/sample-b-small.plist (removed) @@ -1,81 +0,0 @@ - - - - - Machine - - Info - - extrakey - extravalue - os - SAMPLE OS - hardware - x86_64 - - Name - LNT SAMPLE MACHINE - - Run - - End Time - 2009-11-19 03:00:12 - Info - - tag - nts - run_order - 2 - - Start Time - 2009-11-19 01:27:49 - - Tests - - - Data - - 0 - - Info - - - Name - nts.sampletest.compile.status - - - Data - - 0.022 - - Info - - - Name - nts.sampletest.compile - - - Data - - 0 - - Info - - - Name - nts.sampletest.exec.status - - - Data - - 0.32 - - Info - - - Name - nts.sampletest.exec - - - - From daniel at zuster.org Tue Apr 10 11:39:20 2012 From: daniel at zuster.org (Daniel Dunbar) Date: Tue, 10 Apr 2012 16:39:20 -0000 Subject: [llvm-commits] [LNT] r154403 - in /lnt/trunk/tests/lnttool: ./ UpdateDB.py Message-ID: <20120410163920.0ACFD2A6C065@llvm.org> Author: ddunbar Date: Tue Apr 10 11:39:19 2012 New Revision: 154403 URL: http://llvm.org/viewvc/llvm-project?rev=154403&view=rev Log: [tests] Add a test for 'lnt updatedb'. Added: lnt/trunk/tests/lnttool/ lnt/trunk/tests/lnttool/UpdateDB.py Added: lnt/trunk/tests/lnttool/UpdateDB.py URL: http://llvm.org/viewvc/llvm-project/lnt/trunk/tests/lnttool/UpdateDB.py?rev=154403&view=auto ============================================================================== --- lnt/trunk/tests/lnttool/UpdateDB.py (added) +++ lnt/trunk/tests/lnttool/UpdateDB.py Tue Apr 10 11:39:19 2012 @@ -0,0 +1,33 @@ +# RUN: rm -rf %t.install +# RUN: lnt create %t.install + +# Import a test set. +# RUN: lnt import %t.install %{shared_inputs}/sample-a-small.plist \ +# RUN: --commit=1 --show-sample-count + +# Check that we remove both the sample and the run, and that we don't commit by +# default. +# +# RUN: lnt updatedb %t.install --testsuite nts \ +# RUN: --delete-run 1 --show-sql > %t.out +# RUN: FileCheck --check-prefix CHECK-RUNRM %s < %t.out + +# CHECK-RUNRM: DELETE FROM "NT_Sample" WHERE "NT_Sample"."RunID" IN (?) +# CHECK-RUNRM-NEXT: (1,) +# CHECK-RUNRM: DELETE FROM "NT_Run" WHERE "NT_Run"."ID" IN (?) +# CHECK-RUNRM-NEXT: (1,) +# CHECK-RUNRM: ROLLBACK + +# Check that we remove runs when we remove a machine. +# +# RUN: lnt updatedb %t.install --testsuite nts \ +# RUN: --delete-machine "LNT SAMPLE MACHINE" --commit=1 --show-sql > %t.out +# RUN: FileCheck --check-prefix CHECK-MACHINERM %s < %t.out + +# CHECK-MACHINERM: DELETE FROM "NT_Sample" WHERE "NT_Sample"."RunID" IN (?) +# CHECK-MACHINERM-NEXT: (1,) +# CHECK-MACHINERM: DELETE FROM "NT_Run" WHERE "NT_Run"."ID" IN (?) +# CHECK-MACHINERM-NEXT: (1,) +# CHECK-MACHINERM: DELETE FROM "NT_Machine" WHERE "NT_Machine"."Name" = ? +# CHECK-MACHINERM-NEXT: ('LNT SAMPLE MACHINE',) +# CHECK-MACHINERM: COMMIT From daniel at zuster.org Tue Apr 10 11:39:25 2012 From: daniel at zuster.org (Daniel Dunbar) Date: Tue, 10 Apr 2012 16:39:25 -0000 Subject: [llvm-commits] [LNT] r154404 - in /lnt/trunk: docs/tools.rst lnt/lnttool/import_data.py lnt/lnttool/main.py lnt/lnttool/updatedb.py lnt/server/config.py lnt/server/instance.py lnt/server/ui/app.py lnt/util/ServerUtil.py Message-ID: <20120410163925.9480E2A6C065@llvm.org> Author: ddunbar Date: Tue Apr 10 11:39:25 2012 New Revision: 154404 URL: http://llvm.org/viewvc/llvm-project?rev=154404&view=rev Log: Add lnt.server.instance.Instance class and unify all the different places that were loading instance config files to load an Instance. Added: lnt/trunk/lnt/server/instance.py Modified: lnt/trunk/docs/tools.rst lnt/trunk/lnt/lnttool/import_data.py lnt/trunk/lnt/lnttool/main.py lnt/trunk/lnt/lnttool/updatedb.py lnt/trunk/lnt/server/config.py lnt/trunk/lnt/server/ui/app.py lnt/trunk/lnt/util/ServerUtil.py Modified: lnt/trunk/docs/tools.rst URL: http://llvm.org/viewvc/llvm-project/lnt/trunk/docs/tools.rst?rev=154404&r1=154403&r2=154404&view=diff ============================================================================== --- lnt/trunk/docs/tools.rst (original) +++ lnt/trunk/docs/tools.rst Tue Apr 10 11:39:25 2012 @@ -56,19 +56,26 @@ ``lnt createdb `` Creates a new LNT sqlite3 database at the specified path. - ``lnt import +`` + ``lnt import +`` Import an LNT data file into a database. You can use ``--database`` to select the database to write to. Note that by default this will also generate report emails if enabled in the configuration, you can use ``--no-email`` to disable this. - ``lnt runserver `` + ``lnt runserver `` Start the LNT server using a development WSGI server. Additional options can be used to control the server host and port, as well as useful development features such as automatic reloading. - The command has built-in support for running the server on an instance which - has been packed into a (compressed) tarball. The tarball will be - automatically unpacked into a temporary directory and removed on exit. This - is useful for passing database instances back and forth, when others only - need to be able to view the results. + ``lnt updatedb --database --testsuite `` + Modify the given database and testsuite. + + Currently the only supported commands are ``--delete-machine`` and + ``--delete-run``. + +All commands which take an instance path support passing in either the path to +the ``lnt.cfg`` file, the path to the instance directory, or the path to a +(compressed) tarball. The tarball will be automatically unpacked into a +temporary directory and removed on exit. This is useful for testing and for +passing database instances back and forth, for example when others only need to +be able to view the results. Modified: lnt/trunk/lnt/lnttool/import_data.py URL: http://llvm.org/viewvc/llvm-project/lnt/trunk/lnt/lnttool/import_data.py?rev=154404&r1=154403&r2=154404&view=diff ============================================================================== --- lnt/trunk/lnt/lnttool/import_data.py (original) +++ lnt/trunk/lnt/lnttool/import_data.py Tue Apr 10 11:39:25 2012 @@ -5,13 +5,14 @@ import lnt.server.config import lnt.server.db.v4db import lnt.util.ImportData +import lnt.server.instance def action_import(name, args): """import test data into a database""" from optparse import OptionParser, OptionGroup - parser = OptionParser("%%prog %s [options] +"%name) + parser = OptionParser("%%prog %s [options] +"%name) parser.add_option("", "--database", dest="database", default="default", help="database to write to [%default]") parser.add_option("", "--format", dest="format", @@ -39,8 +40,9 @@ path = args.pop(0) - # Load the LNT configuration. - config = lnt.server.config.get_config_from_path(path) + # Load the LNT instance. + instance = lnt.server.instance.Instance.frompath(path) + config = instance.config # Get the database. db = config.get_database(opts.database, echo=opts.show_sql) Modified: lnt/trunk/lnt/lnttool/main.py URL: http://llvm.org/viewvc/llvm-project/lnt/trunk/lnt/lnttool/main.py?rev=154404&r1=154403&r2=154404&view=diff ============================================================================== --- lnt/trunk/lnt/lnttool/main.py (original) +++ lnt/trunk/lnt/lnttool/main.py Tue Apr 10 11:39:25 2012 @@ -1,9 +1,7 @@ """Implement the command line 'lnt' tool.""" import os -import shutil import sys -import tarfile import tempfile from optparse import OptionParser, OptionGroup @@ -18,7 +16,7 @@ """start a new development server""" parser = OptionParser("""\ -%%prog %s [options] [] +%%prog %s [options] Start the LNT server using a development WSGI server. Additional options can be used to control the server host and port, as well as useful development features @@ -51,51 +49,15 @@ input_path, = args - # Accept paths to config files, or to directories containing 'lnt.cfg'. - tmpdir = None - if os.path.isdir(input_path): - config_path = os.path.join(input_path, 'lnt.cfg') - elif tarfile.is_tarfile(input_path): - # Accept paths to tar/tgz etc. files, which we automatically unpack into - # a temporary directory. - tmpdir = tempfile.mkdtemp(suffix='lnt') - - note("extracting input tarfile %r to %r" % (input_path, tmpdir)) - tf = tarfile.open(input_path) - tf.extractall(tmpdir) - - # Find the LNT instance inside the tar file. Support tarballs that - # either contain the instance directly, or contain a single subdirectory - # which is the instance. - if os.path.exists(os.path.join(tmpdir, "lnt.cfg")): - config_path = os.path.join(tmpdir, "lnt.cfg") - else: - filenames = os.listdir(tmpdir) - if len(filenames) != 1: - fatal("unable to find LNT instance inside tarfile") - config_path = os.path.join(tmpdir, filenames[0], "lnt.cfg") - else: - config_path = input_path - - if not config_path or not os.path.exists(config_path): - raise SystemExit,"error: invalid config: %r" % config_path - import lnt.server.ui.app - instance = lnt.server.ui.app.App.create_standalone( - config_path = config_path) + app = lnt.server.ui.app.App.create_standalone(input_path,) if opts.debugger: - instance.debug = True - try: - instance.run(opts.hostname, opts.port, - use_reloader = opts.reloader, - use_debugger = opts.debugger, - threaded = opts.threaded, - processes = opts.processes) - finally: - # Clean up the tmpdir if we automatically unpacked a tarfile. - if tmpdir is not None: - print tmpdir - shutil.rmtree(tmpdir) + app.debug = True + app.run(opts.hostname, opts.port, + use_reloader = opts.reloader, + use_debugger = opts.debugger, + threaded = opts.threaded, + processes = opts.processes) from create import action_create from convert import action_convert Modified: lnt/trunk/lnt/lnttool/updatedb.py URL: http://llvm.org/viewvc/llvm-project/lnt/trunk/lnt/lnttool/updatedb.py?rev=154404&r1=154403&r2=154404&view=diff ============================================================================== --- lnt/trunk/lnt/lnttool/updatedb.py (original) +++ lnt/trunk/lnt/lnttool/updatedb.py Tue Apr 10 11:39:25 2012 @@ -1,7 +1,7 @@ import os from optparse import OptionParser, OptionGroup -import lnt.server.config +import lnt.server.instance from lnt.testing.util.commands import note, warning, error, fatal def action_updatedb(name, args): @@ -9,7 +9,7 @@ from optparse import OptionParser, OptionGroup - parser = OptionParser("%%prog %s [options] +"%name) + parser = OptionParser("%%prog %s [options] +"%name) parser.add_option("", "--database", dest="database", default="default", help="database to modify [%default]") parser.add_option("", "--testsuite", dest="testsuite", @@ -30,21 +30,13 @@ if opts.testsuite is None: parser.error("--testsuite is required") - config, = args + path, = args - # Accept paths to config files, or to directories containing 'lnt.cfg'. - if os.path.isdir(config): - tmp = os.path.join(config, 'lnt.cfg') - if os.path.exists(tmp): - config = tmp - - # Load the config file. - config_data = {} - exec open(config) in config_data - config = lnt.server.config.Config.fromData(config, config_data) + # Load the instance. + instance = lnt.server.instance.Instance.frompath(path) # Get the database and test suite. - db = config.get_database(opts.database, echo=opts.show_sql) + db = instance.get_database(opts.database, echo=opts.show_sql) ts = db.testsuite[opts.testsuite] # Compute a list of all the runs to delete. Modified: lnt/trunk/lnt/server/config.py URL: http://llvm.org/viewvc/llvm-project/lnt/trunk/lnt/server/config.py?rev=154404&r1=154403&r2=154404&view=diff ============================================================================== --- lnt/trunk/lnt/server/config.py (original) +++ lnt/trunk/lnt/server/config.py Tue Apr 10 11:39:25 2012 @@ -125,16 +125,3 @@ raise NotImplementedError,"unable to import to version %r database" % ( db_entry.db_version,) - -def get_config_from_path(path): - # Accept paths to config files or to directories containing 'lnt.cfg'. - if os.path.isdir(path): - config_path = os.path.join(path, 'lnt.cfg') - else: - config_path = path - - # Load the config file. - config_data = {} - exec open(config_path) in config_data - return lnt.server.config.Config.fromData(config_path, config_data) - Added: lnt/trunk/lnt/server/instance.py URL: http://llvm.org/viewvc/llvm-project/lnt/trunk/lnt/server/instance.py?rev=154404&view=auto ============================================================================== --- lnt/trunk/lnt/server/instance.py (added) +++ lnt/trunk/lnt/server/instance.py Tue Apr 10 11:39:25 2012 @@ -0,0 +1,73 @@ +import os +import shutil +import tarfile +import tempfile + +import lnt.server.config + +from lnt.testing.util.commands import note, warning, error, fatal + +class Instance(object): + """ + Wrapper object for representing an LNT instance. + """ + + @staticmethod + def frompath(path): + """ + frompath(path) -> Insance + + Load an LNT instance from the given instance specifier. The instance + path can be one of: + * The directory containing the instance. + * The instance config file. + * A tarball containing an instance. + """ + + # Accept paths to config files, or to directories containing 'lnt.cfg'. + tmpdir = None + if os.path.isdir(path): + config_path = os.path.join(path, 'lnt.cfg') + elif tarfile.is_tarfile(path): + # Accept paths to tar/tgz etc. files, which we automatically unpack + # into a temporary directory. + tmpdir = tempfile.mkdtemp(suffix='lnt') + + note("extracting input tarfile %r to %r" % (path, tmpdir)) + tf = tarfile.open(path) + tf.extractall(tmpdir) + + # Find the LNT instance inside the tar file. Support tarballs that + # either contain the instance directly, or contain a single + # subdirectory which is the instance. + if os.path.exists(os.path.join(tmpdir, "lnt.cfg")): + config_path = os.path.join(tmpdir, "lnt.cfg") + else: + filenames = os.listdir(tmpdir) + if len(filenames) != 1: + fatal("unable to find LNT instance inside tarfile") + config_path = os.path.join(tmpdir, filenames[0], "lnt.cfg") + else: + config_path = path + + if not config_path or not os.path.exists(config_path): + fatal("invalid config: %r" % config_path) + + config_data = {} + exec open(config_path) in config_data + config = lnt.server.config.Config.fromData(config_path, config_data) + + return Instance(config_path, config, tmpdir) + + def __init__(self, config_path, config, tmpdir=None): + self.config_path = config_path + self.config = config + self.tmpdir = tmpdir + + def __del__(self): + # If we have a temporary dir, clean it up now. + if self.tmpdir is not None: + shutil.rmtree(self.tmpdir) + + def get_database(self, *args, **kwargs): + return self.config.get_database(*args, **kwargs) Modified: lnt/trunk/lnt/server/ui/app.py URL: http://llvm.org/viewvc/llvm-project/lnt/trunk/lnt/server/ui/app.py?rev=154404&r1=154403&r2=154404&view=diff ============================================================================== --- lnt/trunk/lnt/server/ui/app.py (original) +++ lnt/trunk/lnt/server/ui/app.py Tue Apr 10 11:39:25 2012 @@ -10,11 +10,11 @@ from flask import url_for import lnt -import lnt.server.config +import lnt.server.db.v4db +import lnt.server.instance import lnt.server.ui.filters import lnt.server.ui.globals import lnt.server.ui.views -import lnt.server.db.v4db from lnt.db import perfdbsummary from lnt.db import perfdb @@ -109,11 +109,8 @@ self.wsgi_app = RootSlashPatchMiddleware(self.wsgi_app) def load_config(self, config_path): - config_data = {} - exec open(config_path) in config_data - - self.old_config = lnt.server.config.Config.fromData( - config_path, config_data) + self.instance = lnt.server.instance.Instance.frompath(config_path) + self.old_config = self.instance.config self.jinja_env.globals.update( app=current_app, Modified: lnt/trunk/lnt/util/ServerUtil.py URL: http://llvm.org/viewvc/llvm-project/lnt/trunk/lnt/util/ServerUtil.py?rev=154404&r1=154403&r2=154404&view=diff ============================================================================== --- lnt/trunk/lnt/util/ServerUtil.py (original) +++ lnt/trunk/lnt/util/ServerUtil.py Tue Apr 10 11:39:25 2012 @@ -7,7 +7,7 @@ import urllib import urllib2 -import lnt.server.config +import lnt.server.instance from lnt.util import json from lnt.util import ImportData @@ -42,7 +42,8 @@ def submitFileToInstance(path, file, commit): # Otherwise, assume it is a local url and submit to the default database # in the instance. - config = lnt.server.config.get_config_from_path(path) + instance = lnt.server.instance.Instance(path) + config = instance.config db_name = 'default' db = config.get_database(db_name) if db is None: From daniel at zuster.org Tue Apr 10 11:39:29 2012 From: daniel at zuster.org (Daniel Dunbar) Date: Tue, 10 Apr 2012 16:39:29 -0000 Subject: [llvm-commits] [LNT] r154405 - in /lnt/trunk: lnt/formats/AppleOpenSSLReader.py lnt/formats/NightlytestReader.py lnt/formats/__init__.py tests/Formats/nightlytest.py Message-ID: <20120410163929.64CDA2A6C066@llvm.org> Author: ddunbar Date: Tue Apr 10 11:39:29 2012 New Revision: 154405 URL: http://llvm.org/viewvc/llvm-project?rev=154405&view=rev Log: Remove two old and unused formats. Removed: lnt/trunk/lnt/formats/AppleOpenSSLReader.py lnt/trunk/lnt/formats/NightlytestReader.py lnt/trunk/tests/Formats/nightlytest.py Modified: lnt/trunk/lnt/formats/__init__.py Removed: lnt/trunk/lnt/formats/AppleOpenSSLReader.py URL: http://llvm.org/viewvc/llvm-project/lnt/trunk/lnt/formats/AppleOpenSSLReader.py?rev=154404&view=auto ============================================================================== --- lnt/trunk/lnt/formats/AppleOpenSSLReader.py (original) +++ lnt/trunk/lnt/formats/AppleOpenSSLReader.py (removed) @@ -1,111 +0,0 @@ -""" -Converter for a custom format with the output of OpenSSL test runs. -""" - -import os - -def parseOpenSSLFile(path): - data = open(path).read() - lines = list(open(path)) - lnfields = [ln.strip().split(':') for ln in lines] - assert(lnfields[0][0] == '+H') - header = lnfields[0] - blockSizes = map(int, header[1:]) - - # Cipher -> [(Block Size,Value)*] - data = {} - for fields in lnfields[1:]: - # Ignore other fields - if fields[0] != '+F': - continue - - name = fields[2] - countsPerBlock = fields[3:] - assert len(countsPerBlock) == len(blockSizes) - data[name] = [(b,float(c)) - for b,c in zip(blockSizes,countsPerBlock)] - - return data - -def _matches_format(path_or_file): - # If this is a file, we definitely can't load it. - if not isinstance(path_or_file,str): - return False - - # Assume an input matches this format if any of the key files exists. - return (os.path.exists(os.path.join(path_or_file, 'svn-revision')) or - os.path.exists(os.path.join(path_or_file, 'start.timestamp')) or - os.path.exists(os.path.join(path_or_file, 'finished.timestamp'))) - -def _load_data(path): - # Look for svn-revision and timestamps. - - llvmRevision = '' - startTime = endTime = '' - - f = os.path.join(path, 'svn-revision') - if os.path.exists(f): - svnRevisionData = open(f).read() - assert(svnRevisionData[0] == 'r') - llvmRevision = int(svnRevisionData[1:]) - - f = os.path.join(path, 'start.timestamp') - if os.path.exists(f): - startTime = open(f).read().strip() - - f = os.path.join(path, 'finished.timestamp') - if os.path.exists(f): - endTime = open(f).read().strip() - - # Look for sub directories - openSSLData = [] - for file in os.listdir(path): - p = os.path.join(path, file) - if os.path.isdir(p): - # Look for Tests/Apple.OpenSSL.64/speed.txt - p = os.path.join(p, 'Tests/Apple.OpenSSL.64/speed.txt') - if os.path.exists(p): - openSSLData.append((file, parseOpenSSLFile(p))) - - basename = 'apple_openssl' - - machine = { 'Name' : 'dgohman.apple.com', - 'Info' : { } } - - run = { 'Start Time' : startTime, - 'End Time' : endTime, - 'Info' : { 'llvm-revision' : llvmRevision, - 'tag' : 'apple_openssl' } } - - tests = [] - groupInfo = [] - - for dirName,dirData in openSSLData: - # Demangle compiler & flags - if dirName.startswith('gcc'): - compiler = 'gcc' - elif dirName.startswith('llvm-gcc'): - compiler = 'llvm-gcc' - else: - raise ValueError,compiler - assert dirName[len(compiler)] == '-' - flags = dirName[len(compiler)+1:] - - for cipher,values in dirData.items(): - testName = basename + '.' + cipher + '.ips' - for block,value in values: - parameters = { 'blockSize' : block, - 'compiler' : compiler, - 'compiler_flags' : flags } - tests.append( { 'Name' : testName, - 'Info' : parameters, - 'Data' : [value] } ) - - return { 'Machine' : machine, - 'Run' : run, - 'Tests' : tests, - 'Group Info' : groupInfo } - -format = { 'name' : 'apple_openssl', - 'predicate' : _matches_format, - 'read' : _load_data } Removed: lnt/trunk/lnt/formats/NightlytestReader.py URL: http://llvm.org/viewvc/llvm-project/lnt/trunk/lnt/formats/NightlytestReader.py?rev=154404&view=auto ============================================================================== --- lnt/trunk/lnt/formats/NightlytestReader.py (original) +++ lnt/trunk/lnt/formats/NightlytestReader.py (removed) @@ -1,233 +0,0 @@ -""" -Data converter from the llvm/utils/NewNightlyTest.pl report file format -(*-sentdata.txt) to the LNT plist format. -""" - -import re - -kDataKeyStart = re.compile('(.*) =>(.*)') - -def _matches_format(path_or_file): - if isinstance(path_or_file, str): - path_or_file = open(path_or_file) - - # Assume this is in nightlytes format if the first line matches the - # key-value format. - for ln in path_or_file: - m = kDataKeyStart.match(ln) - if m: - return True - return False - - -def _load_data(path_or_file): - def parseDGResults(text): - results = {} - if 'Dejagnu skipped by user choice' in text: - return results - for ln in text.strip().split('\n'): - result,value = ln.split(':',1) - results[result] = results.get(result,[]) - results[result].append(value) - return results - - if isinstance(path_or_file, str): - path_or_file = open(path_or_file) - - basename = 'nightlytest' - - # Guess the format (server side or client side) based on the first - # character. - f = path_or_file - isServerSide = (f.read(1) == '\'') - f.seek(0) - - data = {} - - current = None - inData = False - for ln in f: - if inData: - if ln == 'EOD\n': - inData = False - else: - data[current] += ln - continue - - m = kDataKeyStart.match(ln) - if m: - current,value = m.groups() - if isServerSide: - assert current[0] == current[-1] == "'" - current = current[1:-1] - assert value[0] == value[1] == ' ' - value = value[2:] - if value == '< References: <20120409162300.30c71019@sapling2> <4F83E972.1000707@free.fr> Message-ID: <20120410114130.44e79e30@sapling2> On Tue, 10 Apr 2012 09:05:38 -0700 Chris Lattner wrote: > > On Apr 10, 2012, at 1:04 AM, Duncan Sands wrote: > > > Hi Hal, > > > >> Regarding conversion, given that ppc128 is actually the sum of two > >> IEEE (64-bit) doubles, you would need to convert both doubles to > >> fp128 and then add the two resulting fp128 values. > > > > I'm tempted to say that ppc128 should be removed from LLVM and > > instead front-ends should just explicitly use the pair of doubles > > it really is, and generate in the IR the sequence of operations for > > adding such a pair etc that codegen currently takes care of. > > That makes a lot of sense to me too. The only issue here is that > someone needs to do it. There are PPC users of clang out there > (Hal?) that may care about this not regressing. The current situation is not great, support for ppc128 in APFloat is currently broken, and while fixing it is on my TODO list, I have not really worked on it yet. If you edit APFloat.cpp to disable the relevant asserts then you can compile codes with long doubles in them (which is important for C++ codes that #include , for example), but any code that actually uses long doubles will most likely output the wrong answer. Therefore, so long as some skeleton support exists, then there is no regression in practice. I would be happy to move the relevant code into the frontend, but while I imagine that in some ways this is very similar to how _Complex double is handled, any assistance would be appreciated. Thanks again, Hal > > -Chris -- Hal Finkel Postdoctoral Appointee Leadership Computing Facility Argonne National Laboratory From spop at codeaurora.org Tue Apr 10 12:05:09 2012 From: spop at codeaurora.org (Sebastian Pop) Date: Tue, 10 Apr 2012 12:05:09 -0500 Subject: [llvm-commits] [polly] r153739 - /polly/trunk/test/Makefile In-Reply-To: References: <20120330092717.037AF2A6C066@llvm.org> Message-ID: On Thu, Apr 5, 2012 at 10:58 PM, Hongbin Zheng wrote: > Hi Sebastian, > > Fixed in r154162, sorry for this. > > best regards > ether > On Fri, Apr 6, 2012 at 5:22 AM, Sebastian Pop wrote: >> On Fri, Mar 30, 2012 at 4:27 AM, Hongbin Zheng wrote: >>> Author: ether >>> Date: Fri Mar 30 04:27:16 2012 >>> New Revision: 153739 >>> >>> URL: http://llvm.org/viewvc/llvm-project?rev=153739&view=rev >>> Log: >>> Make the "all" target depend on polly-test, so that users can run regression >>> ?tests by simply typing "make -C tools/polly/test", like llvm's regression >>> ?tests. >>> >>> Modified: >>> ? ?polly/trunk/test/Makefile >>> >>> Modified: polly/trunk/test/Makefile >>> URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/Makefile?rev=153739&r1=153738&r2=153739&view=diff >>> ============================================================================== >>> --- polly/trunk/test/Makefile (original) >>> +++ polly/trunk/test/Makefile Fri Mar 30 04:27:16 2012 >>> @@ -31,6 +31,7 @@ >>> ? LIT_ARGS += "--vg" >>> ?endif >>> >>> +all:: polly-test >> >> Several of my automatic tests have failed because of this change: when >> make has finished building the polly/lib, it automatically goes to >> make all in the test dir, and because of this change, it starts >> testing polly, even though the rest of llvm and clang have not >> finished to be built. >> >> Please fix this by not including the test dir in the polly top level >> Makefile DIRS variable: > Or do you want to revert this commit? Yes, let's revert this commit. >> >> diff --git a/Makefile b/Makefile >> index 2ad5b36..665c13a 100644 >> --- a/Makefile >> +++ b/Makefile >> @@ -8,7 +8,7 @@ >> ?# Indicates our relative path to the top of the project's root directory. >> ?# >> ?LEVEL = . >> -DIRS = lib test tools >> +DIRS = lib tools >> ?EXTRA_DIST = include This change removed the test dir from the build dir, making it impossible to test Polly with a make polly-test -C tools/polly/test/ What about reverting both this and your previous change? Thanks, Sebastian -- Qualcomm Innovation Center, Inc is a member of Code Aurora Forum From andrew.kaylor at intel.com Tue Apr 10 12:23:46 2012 From: andrew.kaylor at intel.com (Kaylor, Andrew) Date: Tue, 10 Apr 2012 17:23:46 +0000 Subject: [llvm-commits] [llvm] [Patch] MCJIT, fix ARM ELF stub relocations In-Reply-To: <6AE1604EE3EC5F4296C096518C6B77EE1AA570D02A@mail.accesssoftek.com> References: <6AE1604EE3EC5F4296C096518C6B77EE1AA570D02A@mail.accesssoftek.com> Message-ID: <0983E6C011D2DC4188F8761B533492DE0C9F42@ORSMSX105.amr.corp.intel.com> It appears to me that this will result in the FinalAddress parameter being calculated from Section.Address. Shouldn't it be calculated from Section.LoadAddress? That is, rather than passing 'Target' should you be passing 'Section.LoadAddress + Rel.Offset'? It is my understanding that in the case of local execution Section.Address and Section.LoadAddress will be the same (and so existing tests would pass with the patch you submitted), but that the distinction exists to handle the case of remote JITing. I think that we may have some clean-up to do to make this work correctly, but it looks like the foundation is more or less in place. Let me know if I've misunderstood this in some way. -Andy From: llvm-commits-bounces at cs.uiuc.edu [mailto:llvm-commits-bounces at cs.uiuc.edu] On Behalf Of Danil Malyshev Sent: Friday, April 06, 2012 5:49 PM To: llvm-commits at cs.uiuc.edu Subject: [llvm-commits] [llvm] [Patch] MCJIT, fix ARM ELF stub relocations Hello everyone, Attached the patch fixed ARM stub relocations in RuntimeDyldELF. Regards, Danil -------------- next part -------------- An HTML attachment was scrubbed... URL: http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20120410/93065ff2/attachment.html From grosbach at apple.com Tue Apr 10 12:31:55 2012 From: grosbach at apple.com (Jim Grosbach) Date: Tue, 10 Apr 2012 17:31:55 -0000 Subject: [llvm-commits] [llvm] r154411 - in /llvm/trunk: lib/Target/ARM/ARMInstrThumb2.td lib/Target/ARM/AsmParser/ARMAsmParser.cpp test/MC/ARM/basic-thumb2-instructions.s Message-ID: <20120410173155.8EF3B2A6C065@llvm.org> Author: grosbach Date: Tue Apr 10 12:31:55 2012 New Revision: 154411 URL: http://llvm.org/viewvc/llvm-project?rev=154411&view=rev Log: ARM fix cc_out operand handling for t2SUBrr instructions. We were incorrectly conflating some add variants which don't have a cc_out operand with the mirroring sub encodings, which do. Part of the awesome non-orthogonality legacy of thumb1. Similarly, handling of add/sub of an immediate was sometimes incorrectly removing the cc_out operand for add/sub register variants. rdar://11216577 Modified: llvm/trunk/lib/Target/ARM/ARMInstrThumb2.td llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp llvm/trunk/test/MC/ARM/basic-thumb2-instructions.s Modified: llvm/trunk/lib/Target/ARM/ARMInstrThumb2.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrThumb2.td?rev=154411&r1=154410&r2=154411&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMInstrThumb2.td (original) +++ llvm/trunk/lib/Target/ARM/ARMInstrThumb2.td Tue Apr 10 12:31:55 2012 @@ -3984,13 +3984,14 @@ (t2SUBri GPRnopc:$Rdn, GPRnopc:$Rdn, t2_so_imm:$imm, pred:$p, cc_out:$s)>; def : t2InstAlias<"sub${p} $Rdn, $imm", (t2SUBri12 GPRnopc:$Rdn, GPRnopc:$Rdn, imm0_4095:$imm, pred:$p)>; +def : t2InstAlias<"sub${s}${p}.w $Rdn, $Rm", + (t2SUBrr GPRnopc:$Rdn, GPRnopc:$Rdn, rGPR:$Rm, pred:$p, cc_out:$s)>; def : t2InstAlias<"sub${s}${p} $Rdn, $Rm", (t2SUBrr GPRnopc:$Rdn, GPRnopc:$Rdn, rGPR:$Rm, pred:$p, cc_out:$s)>; def : t2InstAlias<"sub${s}${p} $Rdn, $ShiftedRm", (t2SUBrs GPRnopc:$Rdn, GPRnopc:$Rdn, t2_so_reg:$ShiftedRm, pred:$p, cc_out:$s)>; - // Alias for compares without the ".w" optional width specifier. def : t2InstAlias<"cmn${p} $Rn, $Rm", (t2CMNzrr GPRnopc:$Rn, rGPR:$Rm, pred:$p)>; Modified: llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp?rev=154411&r1=154410&r2=154411&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp (original) +++ llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp Tue Apr 10 12:31:55 2012 @@ -4770,7 +4770,7 @@ static_cast(Operands[4])->isReg() && static_cast(Operands[4])->getReg() == ARM::SP && static_cast(Operands[1])->getReg() == 0 && - (static_cast(Operands[5])->isReg() || + ((Mnemonic == "add" &&static_cast(Operands[5])->isReg()) || static_cast(Operands[5])->isImm0_1020s4())) return true; // For Thumb2, add/sub immediate does not have a cc_out operand for the @@ -4854,7 +4854,10 @@ (Operands.size() == 5 || Operands.size() == 6) && static_cast(Operands[3])->isReg() && static_cast(Operands[3])->getReg() == ARM::SP && - static_cast(Operands[1])->getReg() == 0) + static_cast(Operands[1])->getReg() == 0 && + (static_cast(Operands[4])->isImm() || + (Operands.size() == 6 && + static_cast(Operands[5])->isImm()))) return true; return false; Modified: llvm/trunk/test/MC/ARM/basic-thumb2-instructions.s URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/ARM/basic-thumb2-instructions.s?rev=154411&r1=154410&r2=154411&view=diff ============================================================================== --- llvm/trunk/test/MC/ARM/basic-thumb2-instructions.s (original) +++ llvm/trunk/test/MC/ARM/basic-thumb2-instructions.s Tue Apr 10 12:31:55 2012 @@ -2686,6 +2686,12 @@ sub r4, r5, r6, asr #5 sub r4, r5, r6, ror #5 sub.w r5, r2, r12, rrx + sub r2, sp, ip + sub sp, sp, ip + sub sp, ip + sub.w r2, sp, ip + sub.w sp, sp, ip + sub.w sp, ip @ CHECK: sub.w r4, r5, r6 @ encoding: [0xa5,0xeb,0x06,0x04] @ CHECK: sub.w r4, r5, r6, lsl #5 @ encoding: [0xa5,0xeb,0x46,0x14] @@ -2694,6 +2700,12 @@ @ CHECK: sub.w r4, r5, r6, asr #5 @ encoding: [0xa5,0xeb,0x66,0x14] @ CHECK: sub.w r4, r5, r6, ror #5 @ encoding: [0xa5,0xeb,0x76,0x14] @ CHECK: sub.w r5, r2, r12, rrx @ encoding: [0xa2,0xeb,0x3c,0x05] +@ CHECK: sub.w r2, sp, r12 @ encoding: [0xad,0xeb,0x0c,0x02] +@ CHECK: sub.w sp, sp, r12 @ encoding: [0xad,0xeb,0x0c,0x0d] +@ CHECK: sub.w sp, sp, r12 @ encoding: [0xad,0xeb,0x0c,0x0d] +@ CHECK: sub.w r2, sp, r12 @ encoding: [0xad,0xeb,0x0c,0x02] +@ CHECK: sub.w sp, sp, r12 @ encoding: [0xad,0xeb,0x0c,0x0d] +@ CHECK: sub.w sp, sp, r12 @ encoding: [0xad,0xeb,0x0c,0x0d] @------------------------------------------------------------------------------ From resistor at mac.com Tue Apr 10 12:54:04 2012 From: resistor at mac.com (Owen Anderson) Date: Tue, 10 Apr 2012 10:54:04 -0700 Subject: [llvm-commits] [llvm] r154397 - /llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp In-Reply-To: <20120410145831.79CEA2A6C065@llvm.org> References: <20120410145831.79CEA2A6C065@llvm.org> Message-ID: Nadav, This is causing CodeGen/X86/vec_shuffle-20.ll to fail on Darwin. I'm going to revert it to unblock the buildbots. --Owen On Apr 10, 2012, at 7:58 AM, Nadav Rotem wrote: > Author: nadav > Date: Tue Apr 10 09:58:31 2012 > New Revision: 154397 > > URL: http://llvm.org/viewvc/llvm-project?rev=154397&view=rev > Log: > Fix a dagcombine optimization which assumes that the vsetcc result type is always > of the same size as the compared values. This is ture for SSE/AVX/NEON but not > for all targets. > > > Modified: > llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp > > Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=154397&r1=154396&r2=154397&view=diff > ============================================================================== > --- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original) > +++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Tue Apr 10 09:58:31 2012 > @@ -4354,12 +4354,17 @@ > // Only do this before legalize for now. > if (VT.isVector() && !LegalOperations) { > EVT N0VT = N0.getOperand(0).getValueType(); > + // On some architectures (such as SSE/NEON/etc)the SETCC result type is > + // of the same size as the compared operands. Only optimize sext(setcc()) > + // if this is the case. > + EVT SVT = TLI.getSetCCResultType(N0VT); > + > // We know that the # elements of the results is the same as the > // # elements of the compare (and the # elements of the compare result > // for that matter). Check to see that they are the same size. If so, > // we know that the element size of the sext'd result matches the > // element size of the compare operands. > - if (VT.getSizeInBits() == N0VT.getSizeInBits()) > + if (VT.getSizeInBits() == SVT.getSizeInBits()) > return DAG.getSetCC(N->getDebugLoc(), VT, N0.getOperand(0), > N0.getOperand(1), > cast(N0.getOperand(2))->get()); > @@ -4373,11 +4378,13 @@ > EVT MatchingVectorType = > EVT::getVectorVT(*DAG.getContext(), MatchingElementType, > N0VT.getVectorNumElements()); > - SDValue VsetCC = > - DAG.getSetCC(N->getDebugLoc(), MatchingVectorType, N0.getOperand(0), > - N0.getOperand(1), > - cast(N0.getOperand(2))->get()); > - return DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT); > + > + if (SVT == MatchingVectorType) { > + SDValue VsetCC = DAG.getSetCC(N->getDebugLoc(), MatchingVectorType, > + N0.getOperand(0), N0.getOperand(1), > + cast(N0.getOperand(2))->get()); > + return DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT); > + } > } > } > > > > _______________________________________________ > llvm-commits mailing list > llvm-commits at cs.uiuc.edu > http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits From resistor at mac.com Tue Apr 10 13:02:13 2012 From: resistor at mac.com (Owen Anderson) Date: Tue, 10 Apr 2012 18:02:13 -0000 Subject: [llvm-commits] [llvm] r154414 - /llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Message-ID: <20120410180213.2AFC22A6C065@llvm.org> Author: resistor Date: Tue Apr 10 13:02:12 2012 New Revision: 154414 URL: http://llvm.org/viewvc/llvm-project?rev=154414&view=rev Log: Revert r154397, which was causing make check failures on the buildbots. Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=154414&r1=154413&r2=154414&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original) +++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Tue Apr 10 13:02:12 2012 @@ -4354,17 +4354,12 @@ // Only do this before legalize for now. if (VT.isVector() && !LegalOperations) { EVT N0VT = N0.getOperand(0).getValueType(); - // On some architectures (such as SSE/NEON/etc)the SETCC result type is - // of the same size as the compared operands. Only optimize sext(setcc()) - // if this is the case. - EVT SVT = TLI.getSetCCResultType(N0VT); - // We know that the # elements of the results is the same as the // # elements of the compare (and the # elements of the compare result // for that matter). Check to see that they are the same size. If so, // we know that the element size of the sext'd result matches the // element size of the compare operands. - if (VT.getSizeInBits() == SVT.getSizeInBits()) + if (VT.getSizeInBits() == N0VT.getSizeInBits()) return DAG.getSetCC(N->getDebugLoc(), VT, N0.getOperand(0), N0.getOperand(1), cast(N0.getOperand(2))->get()); @@ -4378,13 +4373,11 @@ EVT MatchingVectorType = EVT::getVectorVT(*DAG.getContext(), MatchingElementType, N0VT.getVectorNumElements()); - - if (SVT == MatchingVectorType) { - SDValue VsetCC = DAG.getSetCC(N->getDebugLoc(), MatchingVectorType, - N0.getOperand(0), N0.getOperand(1), - cast(N0.getOperand(2))->get()); - return DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT); - } + SDValue VsetCC = + DAG.getSetCC(N->getDebugLoc(), MatchingVectorType, N0.getOperand(0), + N0.getOperand(1), + cast(N0.getOperand(2))->get()); + return DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT); } } From grosser at fim.uni-passau.de Tue Apr 10 13:12:19 2012 From: grosser at fim.uni-passau.de (Tobias Grosser) Date: Tue, 10 Apr 2012 18:12:19 -0000 Subject: [llvm-commits] [polly] r154415 - /polly/trunk/lib/Analysis/ScopDetection.cpp Message-ID: <20120410181219.AF01B2A6C065@llvm.org> Author: grosser Date: Tue Apr 10 13:12:19 2012 New Revision: 154415 URL: http://llvm.org/viewvc/llvm-project?rev=154415&view=rev Log: Fix typo Suggested by: Sebastian Pop Modified: polly/trunk/lib/Analysis/ScopDetection.cpp Modified: polly/trunk/lib/Analysis/ScopDetection.cpp URL: http://llvm.org/viewvc/llvm-project/polly/trunk/lib/Analysis/ScopDetection.cpp?rev=154415&r1=154414&r2=154415&view=diff ============================================================================== --- polly/trunk/lib/Analysis/ScopDetection.cpp (original) +++ polly/trunk/lib/Analysis/ScopDetection.cpp Tue Apr 10 13:12:19 2012 @@ -513,7 +513,7 @@ return false; } - // SCoP can not contains the entry block of the function, because we need + // SCoP cannot contain the entry block of the function, because we need // to insert alloca instruction there when translate scalar to array. if (R.getEntry() == &(R.getEntry()->getParent()->getEntryBlock())) INVALID(Other, "Region containing entry block of function is invalid!"); From echristo at apple.com Tue Apr 10 13:18:11 2012 From: echristo at apple.com (Eric Christopher) Date: Tue, 10 Apr 2012 18:18:11 -0000 Subject: [llvm-commits] [llvm] r154417 - in /llvm/trunk: lib/CodeGen/SelectionDAG/FastISel.cpp test/CodeGen/ARM/fast-isel-br-const.ll Message-ID: <20120410181811.328C12A6C065@llvm.org> Author: echristo Date: Tue Apr 10 13:18:10 2012 New Revision: 154417 URL: http://llvm.org/viewvc/llvm-project?rev=154417&view=rev Log: To ensure that we have more accurate line information for a block don't elide the branch instruction if it's the only one in the block, otherwise it's ok. PR9796 and rdar://11215207 Modified: llvm/trunk/lib/CodeGen/SelectionDAG/FastISel.cpp llvm/trunk/test/CodeGen/ARM/fast-isel-br-const.ll Modified: llvm/trunk/lib/CodeGen/SelectionDAG/FastISel.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/FastISel.cpp?rev=154417&r1=154416&r2=154417&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/FastISel.cpp (original) +++ llvm/trunk/lib/CodeGen/SelectionDAG/FastISel.cpp Tue Apr 10 13:18:10 2012 @@ -821,8 +821,11 @@ /// the CFG. void FastISel::FastEmitBranch(MachineBasicBlock *MSucc, DebugLoc DL) { - if (FuncInfo.MBB->isLayoutSuccessor(MSucc)) { - // The unconditional fall-through case, which needs no instructions. + + if (FuncInfo.MBB->getBasicBlock()->size() > 1 && FuncInfo.MBB->isLayoutSuccessor(MSucc)) { + // For more accurate line information if this is the only instruction + // in the block then emit it, otherwise we have the unconditional + // fall-through case, which needs no instructions. } else { // The unconditional branch case. TII.InsertBranch(*FuncInfo.MBB, MSucc, NULL, Modified: llvm/trunk/test/CodeGen/ARM/fast-isel-br-const.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/fast-isel-br-const.ll?rev=154417&r1=154416&r2=154417&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/ARM/fast-isel-br-const.ll (original) +++ llvm/trunk/test/CodeGen/ARM/fast-isel-br-const.ll Tue Apr 10 13:18:10 2012 @@ -5,7 +5,7 @@ entry: ; THUMB: t1: ; ARM: t1: - + %x = add i32 %a, %b br i1 1, label %if.then, label %if.else ; THUMB-NOT: b LBB0_1 ; ARM-NOT: b LBB0_1 @@ -24,6 +24,7 @@ br label %if.end6 if.else3: ; preds = %if.else + %y = sub i32 %a, %b br i1 1, label %if.then5, label %if.end ; THUMB-NOT: b LBB0_5 ; ARM-NOT: b LBB0_5 From kcc at google.com Tue Apr 10 13:18:56 2012 From: kcc at google.com (Kostya Serebryany) Date: Tue, 10 Apr 2012 18:18:56 -0000 Subject: [llvm-commits] [llvm] r154418 - in /llvm/trunk: lib/Transforms/Instrumentation/ThreadSanitizer.cpp test/Instrumentation/ThreadSanitizer/read_before_write.ll Message-ID: <20120410181856.E4D502A6C065@llvm.org> Author: kcc Date: Tue Apr 10 13:18:56 2012 New Revision: 154418 URL: http://llvm.org/viewvc/llvm-project?rev=154418&view=rev Log: [tsan] compile-time instrumentation: do not instrument a read if a write to the same temp follows in the same BB. Also add stats printing. On Spec CPU2006 this optimization saves roughly 4% of instrumented reads (which is 3% of all instrumented accesses): Writes : 161216 Reads : 446458 Reads-before-write: 18295 Added: llvm/trunk/test/Instrumentation/ThreadSanitizer/read_before_write.ll Modified: llvm/trunk/lib/Transforms/Instrumentation/ThreadSanitizer.cpp Modified: llvm/trunk/lib/Transforms/Instrumentation/ThreadSanitizer.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Instrumentation/ThreadSanitizer.cpp?rev=154418&r1=154417&r2=154418&view=diff ============================================================================== --- llvm/trunk/lib/Transforms/Instrumentation/ThreadSanitizer.cpp (original) +++ llvm/trunk/lib/Transforms/Instrumentation/ThreadSanitizer.cpp Tue Apr 10 13:18:56 2012 @@ -22,6 +22,7 @@ #define DEBUG_TYPE "tsan" #include "FunctionBlackList.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" @@ -45,16 +46,33 @@ static cl::opt ClBlackListFile("tsan-blacklist", cl::desc("Blacklist file"), cl::Hidden); +static cl::opt ClPrintStats("tsan-print-stats", + cl::desc("Print ThreadSanitizer instrumentation stats"), cl::Hidden); + namespace { + +// Stats counters for ThreadSanitizer instrumentation. +struct ThreadSanitizerStats { + size_t NumInstrumentedReads; + size_t NumInstrumentedWrites; + size_t NumOmittedReadsBeforeWrite; + size_t NumAccessesWithBadSize; + size_t NumInstrumentedVtableWrites; +}; + /// ThreadSanitizer: instrument the code in module to find races. struct ThreadSanitizer : public FunctionPass { ThreadSanitizer(); bool runOnFunction(Function &F); bool doInitialization(Module &M); + bool doFinalization(Module &M); bool instrumentLoadOrStore(Instruction *I); static char ID; // Pass identification, replacement for typeid. private: + void choseInstructionsToInstrument(SmallVectorImpl &Local, + SmallVectorImpl &All); + TargetData *TD; OwningPtr BL; // Callbacks to run-time library are computed in doInitialization. @@ -65,6 +83,9 @@ Value *TsanRead[kNumberOfAccessSizes]; Value *TsanWrite[kNumberOfAccessSizes]; Value *TsanVptrUpdate; + + // Stats are modified w/o synchronization. + ThreadSanitizerStats stats; }; } // namespace @@ -87,6 +108,7 @@ if (!TD) return false; BL.reset(new FunctionBlackList(ClBlackListFile)); + memset(&stats, 0, sizeof(stats)); // Always insert a call to __tsan_init into the module's CTORs. IRBuilder<> IRB(M.getContext()); @@ -115,11 +137,59 @@ return true; } +bool ThreadSanitizer::doFinalization(Module &M) { + if (ClPrintStats) { + errs() << "ThreadSanitizerStats " << M.getModuleIdentifier() + << ": wr " << stats.NumInstrumentedWrites + << "; rd " << stats.NumInstrumentedReads + << "; vt " << stats.NumInstrumentedVtableWrites + << "; bs " << stats.NumAccessesWithBadSize + << "; rbw " << stats.NumOmittedReadsBeforeWrite + << "\n"; + } + return true; +} + +// Instrumenting some of the accesses may be proven redundant. +// Currently handled: +// - read-before-write (within same BB, no calls between) +// +// We do not handle some of the patterns that should not survive +// after the classic compiler optimizations. +// E.g. two reads from the same temp should be eliminated by CSE, +// two writes should be eliminated by DSE, etc. +// +// 'Local' is a vector of insns within the same BB (no calls between). +// 'All' is a vector of insns that will be instrumented. +void ThreadSanitizer::choseInstructionsToInstrument( + SmallVectorImpl &Local, + SmallVectorImpl &All) { + SmallSet WriteTargets; + // Iterate from the end. + for (SmallVectorImpl::reverse_iterator It = Local.rbegin(), + E = Local.rend(); It != E; ++It) { + Instruction *I = *It; + if (StoreInst *Store = dyn_cast(I)) { + WriteTargets.insert(Store->getPointerOperand()); + } else { + LoadInst *Load = cast(I); + if (WriteTargets.count(Load->getPointerOperand())) { + // We will write to this temp, so no reason to analyze the read. + stats.NumOmittedReadsBeforeWrite++; + continue; + } + } + All.push_back(I); + } + Local.clear(); +} + bool ThreadSanitizer::runOnFunction(Function &F) { if (!TD) return false; if (BL->isIn(F)) return false; SmallVector RetVec; - SmallVector LoadsAndStores; + SmallVector AllLoadsAndStores; + SmallVector LocalLoadsAndStores; bool Res = false; bool HasCalls = false; @@ -130,12 +200,15 @@ for (BasicBlock::iterator BI = BB.begin(), BE = BB.end(); BI != BE; ++BI) { if (isa(BI) || isa(BI)) - LoadsAndStores.push_back(BI); + LocalLoadsAndStores.push_back(BI); else if (isa(BI)) RetVec.push_back(BI); - else if (isa(BI) || isa(BI)) + else if (isa(BI) || isa(BI)) { HasCalls = true; + choseInstructionsToInstrument(LocalLoadsAndStores, AllLoadsAndStores); + } } + choseInstructionsToInstrument(LocalLoadsAndStores, AllLoadsAndStores); } // We have collected all loads and stores. @@ -143,8 +216,8 @@ // (e.g. variables that do not escape, etc). // Instrument memory accesses. - for (size_t i = 0, n = LoadsAndStores.size(); i < n; ++i) { - Res |= instrumentLoadOrStore(LoadsAndStores[i]); + for (size_t i = 0, n = AllLoadsAndStores.size(); i < n; ++i) { + Res |= instrumentLoadOrStore(AllLoadsAndStores[i]); } // Instrument function entry/exit points if there were instrumented accesses. @@ -185,6 +258,7 @@ uint32_t TypeSize = TD->getTypeStoreSizeInBits(OrigTy); if (TypeSize != 8 && TypeSize != 16 && TypeSize != 32 && TypeSize != 64 && TypeSize != 128) { + stats.NumAccessesWithBadSize++; // Ignore all unusual sizes. return false; } @@ -193,11 +267,14 @@ IRB.CreateCall2(TsanVptrUpdate, IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy()), IRB.CreatePointerCast(StoredValue, IRB.getInt8PtrTy())); + stats.NumInstrumentedVtableWrites++; return true; } size_t Idx = CountTrailingZeros_32(TypeSize / 8); assert(Idx < kNumberOfAccessSizes); Value *OnAccessFunc = IsWrite ? TsanWrite[Idx] : TsanRead[Idx]; IRB.CreateCall(OnAccessFunc, IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy())); + if (IsWrite) stats.NumInstrumentedWrites++; + else stats.NumInstrumentedReads++; return true; } Added: llvm/trunk/test/Instrumentation/ThreadSanitizer/read_before_write.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Instrumentation/ThreadSanitizer/read_before_write.ll?rev=154418&view=auto ============================================================================== --- llvm/trunk/test/Instrumentation/ThreadSanitizer/read_before_write.ll (added) +++ llvm/trunk/test/Instrumentation/ThreadSanitizer/read_before_write.ll Tue Apr 10 13:18:56 2012 @@ -0,0 +1,32 @@ +; RUN: opt < %s -tsan -S | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" + +define void @IncrementMe(i32* nocapture %ptr) nounwind uwtable { +entry: + %0 = load i32* %ptr, align 4 + %inc = add nsw i32 %0, 1 + store i32 %inc, i32* %ptr, align 4 + ret void +} +; CHECK: define void @IncrementMe +; CHECK-NOT: __tsan_read +; CHECK: __tsan_write +; CHECK: ret void + +define void @IncrementMeWithCallInBetween(i32* nocapture %ptr) nounwind uwtable { +entry: + %0 = load i32* %ptr, align 4 + %inc = add nsw i32 %0, 1 + call void @foo() + store i32 %inc, i32* %ptr, align 4 + ret void +} + +; CHECK: define void @IncrementMeWithCallInBetween +; CHECK: __tsan_read +; CHECK: __tsan_write +; CHECK: ret void + +declare void @foo() + From echristo at apple.com Tue Apr 10 14:33:16 2012 From: echristo at apple.com (Eric Christopher) Date: Tue, 10 Apr 2012 19:33:16 -0000 Subject: [llvm-commits] [llvm] r154425 - in /llvm/trunk: lib/Target/X86/X86ISelLowering.cpp lib/Target/X86/X86ISelLowering.h lib/Target/X86/X86InstrFragmentsSIMD.td lib/Target/X86/X86InstrSSE.td test/CodeGen/X86/avx-shuffle.ll test/CodeGen/X86/vec_shuffle-20.ll Message-ID: <20120410193316.EEC172A6C065@llvm.org> Author: echristo Date: Tue Apr 10 14:33:16 2012 New Revision: 154425 URL: http://llvm.org/viewvc/llvm-project?rev=154425&view=rev Log: Temporarily revert this patch to see if it brings the buildbots back. Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp llvm/trunk/lib/Target/X86/X86ISelLowering.h llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td llvm/trunk/lib/Target/X86/X86InstrSSE.td llvm/trunk/test/CodeGen/X86/avx-shuffle.ll llvm/trunk/test/CodeGen/X86/vec_shuffle-20.ll Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=154425&r1=154424&r2=154425&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original) +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Tue Apr 10 14:33:16 2012 @@ -5391,75 +5391,59 @@ SDValue V1 = SVOp->getOperand(0); SDValue V2 = SVOp->getOperand(1); DebugLoc dl = SVOp->getDebugLoc(); + LLVMContext *Context = DAG.getContext(); EVT VT = Op.getValueType(); EVT InVT = V1.getValueType(); + EVT EltVT = VT.getVectorElementType(); + unsigned EltSize = EltVT.getSizeInBits(); int MaskSize = VT.getVectorNumElements(); int InSize = InVT.getVectorNumElements(); - if (!Subtarget->hasSSE41()) + // TODO: At the moment we only use AVX blends. We could also use SSE4 blends. + if (!Subtarget->hasAVX()) return SDValue(); if (MaskSize != InSize) return SDValue(); - int ISDNo = 0; - MVT OpTy; - - switch (VT.getSimpleVT().SimpleTy) { - default: return SDValue(); - case MVT::v8i16: - ISDNo = X86ISD::BLENDPW; - OpTy = MVT::v8i16; - break; - case MVT::v4i32: - case MVT::v4f32: - ISDNo = X86ISD::BLENDPS; - OpTy = MVT::v4f32; - break; - case MVT::v2i64: - case MVT::v2f64: - ISDNo = X86ISD::BLENDPD; - OpTy = MVT::v2f64; - break; - case MVT::v8i32: - case MVT::v8f32: - if (!Subtarget->hasAVX()) - return SDValue(); - ISDNo = X86ISD::BLENDPS; - OpTy = MVT::v8f32; - break; - case MVT::v4i64: - case MVT::v4f64: - if (!Subtarget->hasAVX()) - return SDValue(); - ISDNo = X86ISD::BLENDPD; - OpTy = MVT::v4f64; - break; - case MVT::v16i16: - if (!Subtarget->hasAVX2()) - return SDValue(); - ISDNo = X86ISD::BLENDPW; - OpTy = MVT::v16i16; - break; - } - assert(ISDNo && "Invalid Op Number"); - - unsigned MaskVals = 0; + SmallVector MaskVals; + ConstantInt *Zero = ConstantInt::get(*Context, APInt(EltSize, 0)); + ConstantInt *NegOne = ConstantInt::get(*Context, APInt(EltSize, -1)); for (int i = 0; i < MaskSize; ++i) { int EltIdx = SVOp->getMaskElt(i); if (EltIdx == i || EltIdx == -1) - MaskVals |= (1<getType()); + assert(MaskTy.getSizeInBits() == VT.getSizeInBits() && "Invalid mask size"); + SDValue MaskIdx = DAG.getConstantPool(MaskC, PtrTy); + unsigned Alignment = cast(MaskIdx)->getAlignment(); + SDValue Mask = DAG.getLoad(MaskTy, dl, DAG.getEntryNode(), MaskIdx, + MachinePointerInfo::getConstantPool(), + false, false, false, Alignment); + + if (Subtarget->hasAVX2() && MaskTy == MVT::v32i8) + return DAG.getNode(ISD::VSELECT, dl, VT, Mask, V1, V2); + + if (Subtarget->hasAVX()) { + switch (MaskTy.getSimpleVT().SimpleTy) { + default: return SDValue(); + case MVT::v16i8: + case MVT::v4i32: + case MVT::v2i64: + case MVT::v8i32: + case MVT::v4i64: + return DAG.getNode(ISD::VSELECT, dl, VT, Mask, V1, V2); + } + } + + return SDValue(); } // v8i16 shuffles - Prefer shuffles in the following order: @@ -11066,9 +11050,6 @@ case X86ISD::ANDNP: return "X86ISD::ANDNP"; case X86ISD::PSIGN: return "X86ISD::PSIGN"; case X86ISD::BLENDV: return "X86ISD::BLENDV"; - case X86ISD::BLENDPW: return "X86ISD::BLENDPW"; - case X86ISD::BLENDPS: return "X86ISD::BLENDPS"; - case X86ISD::BLENDPD: return "X86ISD::BLENDPD"; case X86ISD::HADD: return "X86ISD::HADD"; case X86ISD::HSUB: return "X86ISD::HSUB"; case X86ISD::FHADD: return "X86ISD::FHADD"; Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.h?rev=154425&r1=154424&r2=154425&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.h (original) +++ llvm/trunk/lib/Target/X86/X86ISelLowering.h Tue Apr 10 14:33:16 2012 @@ -175,14 +175,9 @@ /// PSIGN - Copy integer sign. PSIGN, - /// BLENDV - Blend where the selector is an XMM. + /// BLEND family of opcodes BLENDV, - /// BLENDxx - Blend where the selector is an immediate. - BLENDPW, - BLENDPS, - BLENDPD, - /// HADD - Integer horizontal add. HADD, Modified: llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td?rev=154425&r1=154424&r2=154425&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td (original) +++ llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td Tue Apr 10 14:33:16 2012 @@ -126,8 +126,6 @@ SDTCisSameAs<0,2>, SDTCisInt<3>]>; def SDTVBroadcast : SDTypeProfile<1, 1, [SDTCisVec<0>]>; -def SDTBlend : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>, -SDTCisSameAs<1,2>, SDTCisVT<3, i32>]>; def X86PAlign : SDNode<"X86ISD::PALIGN", SDTShuff3OpI>; @@ -160,10 +158,6 @@ def X86VBroadcast : SDNode<"X86ISD::VBROADCAST", SDTVBroadcast>; -def X86Blendpw : SDNode<"X86ISD::BLENDPW", SDTBlend>; -def X86Blendps : SDNode<"X86ISD::BLENDPS", SDTBlend>; -def X86Blendpd : SDNode<"X86ISD::BLENDPD", SDTBlend>; - //===----------------------------------------------------------------------===// // SSE Complex Patterns //===----------------------------------------------------------------------===// Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=154425&r1=154424&r2=154425&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original) +++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Tue Apr 10 14:33:16 2012 @@ -6735,22 +6735,12 @@ def : Pat<(v4f64 (vselect (v4i64 VR256:$mask), (v4f64 VR256:$src1), (v4f64 VR256:$src2))), (VBLENDVPDYrr VR256:$src2, VR256:$src1, VR256:$mask)>; - - def : Pat<(v8f32 (X86Blendps (v8f32 VR256:$src1), (v8f32 VR256:$src2), - (imm:$mask))), - (VBLENDPSYrri VR256:$src2, VR256:$src1, imm:$mask)>; - def : Pat<(v4f64 (X86Blendpd (v4f64 VR256:$src1), (v4f64 VR256:$src2), - (imm:$mask))), - (VBLENDPDYrri VR256:$src2, VR256:$src1, imm:$mask)>; } let Predicates = [HasAVX2] in { def : Pat<(v32i8 (vselect (v32i8 VR256:$mask), (v32i8 VR256:$src1), (v32i8 VR256:$src2))), (VPBLENDVBYrr VR256:$src2, VR256:$src1, VR256:$mask)>; - def : Pat<(v16i16 (X86Blendpw (v16i16 VR256:$src1), (v16i16 VR256:$src2), - (imm:$mask))), - (VPBLENDWYrri VR256:$src2, VR256:$src1, imm:$mask)>; } /// SS41I_ternary_int - SSE 4.1 ternary operator @@ -6799,17 +6789,6 @@ def : Pat<(v2f64 (vselect (v2i64 XMM0), (v2f64 VR128:$src1), (v2f64 VR128:$src2))), (BLENDVPDrr0 VR128:$src2, VR128:$src1)>; - - def : Pat<(v8i16 (X86Blendpw (v8i16 VR128:$src1), (v8i16 VR128:$src2), - (imm:$mask))), - (VPBLENDWrri VR128:$src2, VR128:$src1, imm:$mask)>; - def : Pat<(v4f32 (X86Blendps (v4f32 VR128:$src1), (v4f32 VR128:$src2), - (imm:$mask))), - (VBLENDPSrri VR128:$src2, VR128:$src1, imm:$mask)>; - def : Pat<(v2f64 (X86Blendpd (v2f64 VR128:$src1), (v2f64 VR128:$src2), - (imm:$mask))), - (VBLENDPDrri VR128:$src2, VR128:$src1, imm:$mask)>; - } let Predicates = [HasAVX] in Modified: llvm/trunk/test/CodeGen/X86/avx-shuffle.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx-shuffle.ll?rev=154425&r1=154424&r2=154425&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/X86/avx-shuffle.ll (original) +++ llvm/trunk/test/CodeGen/X86/avx-shuffle.ll Tue Apr 10 14:33:16 2012 @@ -164,7 +164,7 @@ } ; CHECK: blend1 -; CHECK: vblendps +; CHECK: vblendvps ; CHECK: ret define <4 x i32> @blend1(<4 x i32> %a, <4 x i32> %b) nounwind alwaysinline { %t = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> @@ -172,7 +172,7 @@ } ; CHECK: blend2 -; CHECK: vblendps +; CHECK: vblendvps ; CHECK: ret define <4 x i32> @blend2(<4 x i32> %a, <4 x i32> %b) nounwind alwaysinline { %t = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> @@ -180,7 +180,7 @@ } ; CHECK: blend2a -; CHECK: vblendps +; CHECK: vblendvps ; CHECK: ret define <4 x float> @blend2a(<4 x float> %a, <4 x float> %b) nounwind alwaysinline { %t = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> @@ -188,7 +188,7 @@ } ; CHECK: blend3 -; CHECK-NOT: vblendps +; CHECK-NOT: vblendvps ; CHECK: ret define <4 x i32> @blend3(<4 x i32> %a, <4 x i32> %b) nounwind alwaysinline { %t = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> @@ -196,7 +196,7 @@ } ; CHECK: blend4 -; CHECK: vblendpd +; CHECK: vblendvpd ; CHECK: ret define <4 x i64> @blend4(<4 x i64> %a, <4 x i64> %b) nounwind alwaysinline { %t = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> Modified: llvm/trunk/test/CodeGen/X86/vec_shuffle-20.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_shuffle-20.ll?rev=154425&r1=154424&r2=154425&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/X86/vec_shuffle-20.ll (original) +++ llvm/trunk/test/CodeGen/X86/vec_shuffle-20.ll Tue Apr 10 14:33:16 2012 @@ -1,4 +1,4 @@ -; RUN: llc < %s -o /dev/null -march=x86 -mattr=+sse2 -mtriple=i686-apple-darwin9 -stats -info-output-file - | grep asm-printer | grep 2 +; RUN: llc < %s -o /dev/null -march=x86 -mattr=+sse2 -mtriple=i686-apple-darwin9 -stats -info-output-file - | grep asm-printer | grep 3 define <4 x float> @func(<4 x float> %fp0, <4 x float> %fp1) nounwind { entry: From mcrosier at apple.com Tue Apr 10 14:39:18 2012 From: mcrosier at apple.com (Chad Rosier) Date: Tue, 10 Apr 2012 19:39:18 -0000 Subject: [llvm-commits] [llvm] r154426 - /llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Message-ID: <20120410193918.ABE0F2A6C065@llvm.org> Author: mcrosier Date: Tue Apr 10 14:39:18 2012 New Revision: 154426 URL: http://llvm.org/viewvc/llvm-project?rev=154426&view=rev Log: Revert r154396, which looks to be the real culprit behind the bot failures. Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=154426&r1=154425&r2=154426&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original) +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Tue Apr 10 14:39:18 2012 @@ -5396,6 +5396,7 @@ EVT InVT = V1.getValueType(); EVT EltVT = VT.getVectorElementType(); unsigned EltSize = EltVT.getSizeInBits(); + int MaskSize = VT.getVectorNumElements(); int InSize = InVT.getVectorNumElements(); From mcrosier at apple.com Tue Apr 10 14:41:32 2012 From: mcrosier at apple.com (Chad Rosier) Date: Tue, 10 Apr 2012 12:41:32 -0700 Subject: [llvm-commits] [llvm] r154426 - /llvm/trunk/lib/Target/X86/X86ISelLowering.cpp In-Reply-To: <20120410193918.ABE0F2A6C065@llvm.org> References: <20120410193918.ABE0F2A6C065@llvm.org> Message-ID: Disregard, looks like Eric and I had the same idea? but he beat me to it.. On Apr 10, 2012, at 12:39 PM, Chad Rosier wrote: > Author: mcrosier > Date: Tue Apr 10 14:39:18 2012 > New Revision: 154426 > > URL: http://llvm.org/viewvc/llvm-project?rev=154426&view=rev > Log: > Revert r154396, which looks to be the real culprit behind the bot failures. > > Modified: > llvm/trunk/lib/Target/X86/X86ISelLowering.cpp > > Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=154426&r1=154425&r2=154426&view=diff > ============================================================================== > --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original) > +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Tue Apr 10 14:39:18 2012 > @@ -5396,6 +5396,7 @@ > EVT InVT = V1.getValueType(); > EVT EltVT = VT.getVectorElementType(); > unsigned EltSize = EltVT.getSizeInBits(); > + > int MaskSize = VT.getVectorNumElements(); > int InSize = InVT.getVectorNumElements(); > > > > _______________________________________________ > llvm-commits mailing list > llvm-commits at cs.uiuc.edu > http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits From mcrosier at apple.com Tue Apr 10 14:42:08 2012 From: mcrosier at apple.com (Chad Rosier) Date: Tue, 10 Apr 2012 19:42:08 -0000 Subject: [llvm-commits] [llvm] r154427 - /llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Message-ID: <20120410194208.14DBE2A6C065@llvm.org> Author: mcrosier Date: Tue Apr 10 14:42:07 2012 New Revision: 154427 URL: http://llvm.org/viewvc/llvm-project?rev=154427&view=rev Log: Whitespace. Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=154427&r1=154426&r2=154427&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original) +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Tue Apr 10 14:42:07 2012 @@ -5396,7 +5396,6 @@ EVT InVT = V1.getValueType(); EVT EltVT = VT.getVectorElementType(); unsigned EltSize = EltVT.getSizeInBits(); - int MaskSize = VT.getVectorNumElements(); int InSize = InVT.getVectorNumElements(); From gkistanova at gmail.com Tue Apr 10 14:43:18 2012 From: gkistanova at gmail.com (Galina Kistanova) Date: Tue, 10 Apr 2012 12:43:18 -0700 Subject: [llvm-commits] Buildmaster will be restarted today after 6 PM Pacific Message-ID: Hello everyone, Buildmaster will be restarted today after 6 PM Pacific to pick up small fix for wrong blame lists. As Duncan noticed, buidbot sometimes makes wrong blame list for build failures. Thanks Galina -------------- next part -------------- An HTML attachment was scrubbed... URL: http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20120410/351543ea/attachment.html From nadav.rotem at intel.com Tue Apr 10 14:47:09 2012 From: nadav.rotem at intel.com (Rotem, Nadav) Date: Tue, 10 Apr 2012 19:47:09 +0000 Subject: [llvm-commits] [llvm] r154426 - /llvm/trunk/lib/Target/X86/X86ISelLowering.cpp In-Reply-To: References: <20120410193918.ABE0F2A6C065@llvm.org> Message-ID: <7DE70FDACDE4CD4887C4278C12A2E3050D3688@HASMSX104.ger.corp.intel.com> It looks like the problem is in the test ../llvm/test/CodeGen/X86/vec_shuffle-20.ll which does not specify the '-mcpu' flag. If it's okay with you, I will fix the test and re-apply. -----Original Message----- From: llvm-commits-bounces at cs.uiuc.edu [mailto:llvm-commits-bounces at cs.uiuc.edu] On Behalf Of Chad Rosier Sent: Tuesday, April 10, 2012 22:42 To: llvm-commits at cs.uiuc.edu Subject: Re: [llvm-commits] [llvm] r154426 - /llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Disregard, looks like Eric and I had the same idea... but he beat me to it.. On Apr 10, 2012, at 12:39 PM, Chad Rosier wrote: > Author: mcrosier > Date: Tue Apr 10 14:39:18 2012 > New Revision: 154426 > > URL: http://llvm.org/viewvc/llvm-project?rev=154426&view=rev > Log: > Revert r154396, which looks to be the real culprit behind the bot failures. > > Modified: > llvm/trunk/lib/Target/X86/X86ISelLowering.cpp > > Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=154426&r1=154425&r2=154426&view=diff > ============================================================================== > --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original) > +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Tue Apr 10 14:39:18 2012 > @@ -5396,6 +5396,7 @@ > EVT InVT = V1.getValueType(); > EVT EltVT = VT.getVectorElementType(); > unsigned EltSize = EltVT.getSizeInBits(); > + > int MaskSize = VT.getVectorNumElements(); > int InSize = InVT.getVectorNumElements(); > > > > _______________________________________________ > llvm-commits mailing list > llvm-commits at cs.uiuc.edu > http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits _______________________________________________ llvm-commits mailing list llvm-commits at cs.uiuc.edu http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits --------------------------------------------------------------------- Intel Israel (74) Limited This e-mail and any attachments may contain confidential material for the sole use of the intended recipient(s). Any review or distribution by others is strictly prohibited. If you are not the intended recipient, please contact the sender and delete all copies. From clattner at apple.com Tue Apr 10 14:54:24 2012 From: clattner at apple.com (Chris Lattner) Date: Tue, 10 Apr 2012 12:54:24 -0700 Subject: [llvm-commits] [PATCH] llvm-stress fixes In-Reply-To: <20120410114130.44e79e30@sapling2> References: <20120409162300.30c71019@sapling2> <4F83E972.1000707@free.fr> <20120410114130.44e79e30@sapling2> Message-ID: On Apr 10, 2012, at 9:41 AM, Hal Finkel wrote: >> >> That makes a lot of sense to me too. The only issue here is that >> someone needs to do it. There are PPC users of clang out there >> (Hal?) that may care about this not regressing. > > The current situation is not great, support for ppc128 in APFloat is > currently broken, and while fixing it is on my TODO list, I have not > really worked on it yet. Makes sense. One major advantage to moving ppc long double to the frontend is that APFloat won't need to support it anymore. -Chris From isanbard at gmail.com Tue Apr 10 15:12:16 2012 From: isanbard at gmail.com (Bill Wendling) Date: Tue, 10 Apr 2012 20:12:16 -0000 Subject: [llvm-commits] [llvm] r154429 - in /llvm/trunk: include/llvm/Metadata.h include/llvm/Value.h lib/VMCore/LLVMContextImpl.h lib/VMCore/Metadata.cpp lib/VMCore/Value.cpp Message-ID: <20120410201216.9CFD32A6C065@llvm.org> Author: void Date: Tue Apr 10 15:12:16 2012 New Revision: 154429 URL: http://llvm.org/viewvc/llvm-project?rev=154429&view=rev Log: The MDString class stored a StringRef to the string which was already in a StringMap. This was redundant and unnecessarily bloated the MDString class. Because the MDString class is a "Value" and will never have a "name", and because the Name field in the Value class is a pointer to a StringMap entry, we repurpose the Name field for an MDString. It stores the StringMap entry in the Name field, and uses the normal methods to get the string (name) back. PR12474 Modified: llvm/trunk/include/llvm/Metadata.h llvm/trunk/include/llvm/Value.h llvm/trunk/lib/VMCore/LLVMContextImpl.h llvm/trunk/lib/VMCore/Metadata.cpp llvm/trunk/lib/VMCore/Value.cpp Modified: llvm/trunk/include/llvm/Metadata.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Metadata.h?rev=154429&r1=154428&r2=154429&view=diff ============================================================================== --- llvm/trunk/include/llvm/Metadata.h (original) +++ llvm/trunk/include/llvm/Metadata.h Tue Apr 10 15:12:16 2012 @@ -39,28 +39,24 @@ virtual void anchor(); MDString(const MDString &); // DO NOT IMPLEMENT - StringRef Str; - explicit MDString(LLVMContext &C, StringRef S); - + explicit MDString(LLVMContext &C); public: static MDString *get(LLVMContext &Context, StringRef Str); static MDString *get(LLVMContext &Context, const char *Str) { return get(Context, Str ? StringRef(Str) : StringRef()); } - StringRef getString() const { return Str; } + StringRef getString() const { return getName(); } - unsigned getLength() const { return (unsigned)Str.size(); } + unsigned getLength() const { return (unsigned)getName().size(); } typedef StringRef::iterator iterator; /// begin() - Pointer to the first byte of the string. - /// - iterator begin() const { return Str.begin(); } + iterator begin() const { return getName().begin(); } /// end() - Pointer to one byte past the end of the string. - /// - iterator end() const { return Str.end(); } + iterator end() const { return getName().end(); } /// Methods for support type inquiry through isa, cast, and dyn_cast: static inline bool classof(const MDString *) { return true; } Modified: llvm/trunk/include/llvm/Value.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Value.h?rev=154429&r1=154428&r2=154429&view=diff ============================================================================== --- llvm/trunk/include/llvm/Value.h (original) +++ llvm/trunk/include/llvm/Value.h Tue Apr 10 15:12:16 2012 @@ -107,9 +107,10 @@ /// All values hold a context through their type. LLVMContext &getContext() const; - // All values can potentially be named... - bool hasName() const { return Name != 0; } + // All values can potentially be named. + bool hasName() const { return Name != 0 && SubclassID != MDStringVal; } ValueName *getValueName() const { return Name; } + void setValueName(ValueName *VN) { Name = VN; } /// getName() - Return a constant reference to the value's name. This is cheap /// and guaranteed to return the same reference as long as the value is not Modified: llvm/trunk/lib/VMCore/LLVMContextImpl.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/VMCore/LLVMContextImpl.h?rev=154429&r1=154428&r2=154429&view=diff ============================================================================== --- llvm/trunk/lib/VMCore/LLVMContextImpl.h (original) +++ llvm/trunk/lib/VMCore/LLVMContextImpl.h Tue Apr 10 15:12:16 2012 @@ -234,7 +234,7 @@ DenseMapAPFloatKeyInfo> FPMapTy; FPMapTy FPConstants; - StringMap MDStringCache; + StringMap MDStringCache; FoldingSet MDNodeSet; // MDNodes may be uniqued or not uniqued. When they're not uniqued, they Modified: llvm/trunk/lib/VMCore/Metadata.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/VMCore/Metadata.cpp?rev=154429&r1=154428&r2=154429&view=diff ============================================================================== --- llvm/trunk/lib/VMCore/Metadata.cpp (original) +++ llvm/trunk/lib/VMCore/Metadata.cpp Tue Apr 10 15:12:16 2012 @@ -31,16 +31,17 @@ void MDString::anchor() { } -MDString::MDString(LLVMContext &C, StringRef S) - : Value(Type::getMetadataTy(C), Value::MDStringVal), Str(S) {} +MDString::MDString(LLVMContext &C) + : Value(Type::getMetadataTy(C), Value::MDStringVal) {} MDString *MDString::get(LLVMContext &Context, StringRef Str) { LLVMContextImpl *pImpl = Context.pImpl; - StringMapEntry &Entry = + StringMapEntry &Entry = pImpl->MDStringCache.GetOrCreateValue(Str); - MDString *&S = Entry.getValue(); - if (!S) S = new MDString(Context, Entry.getKey()); - return S; + Value *&S = Entry.getValue(); + if (!S) S = new MDString(Context); + S->setValueName(&Entry); + return cast(S); } //===----------------------------------------------------------------------===// Modified: llvm/trunk/lib/VMCore/Value.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/VMCore/Value.cpp?rev=154429&r1=154428&r2=154429&view=diff ============================================================================== --- llvm/trunk/lib/VMCore/Value.cpp (original) +++ llvm/trunk/lib/VMCore/Value.cpp Tue Apr 10 15:12:16 2012 @@ -76,7 +76,7 @@ // If this value is named, destroy the name. This should not be in a symtab // at this point. - if (Name) + if (Name && SubclassID != MDStringVal) Name->Destroy(); // There should be no uses of this object anymore, remove it. @@ -170,6 +170,9 @@ } void Value::setName(const Twine &NewName) { + assert(SubclassID != MDStringVal && + "Cannot set the name of MDString with this method!"); + // Fast path for common IRBuilder case of setName("") when there is no name. if (NewName.isTriviallyEmpty() && !hasName()) return; @@ -228,6 +231,8 @@ /// takeName - transfer the name from V to this value, setting V's name to /// empty. It is an error to call V->takeName(V). void Value::takeName(Value *V) { + assert(SubclassID != MDStringVal && "Cannot take the name of an MDString!"); + ValueSymbolTable *ST = 0; // If this value has a name, drop it. if (hasName()) { From baldrick at free.fr Tue Apr 10 15:35:27 2012 From: baldrick at free.fr (Duncan Sands) Date: Tue, 10 Apr 2012 20:35:27 -0000 Subject: [llvm-commits] [llvm] r154431 - in /llvm/trunk: lib/CodeGen/SelectionDAG/DAGCombiner.cpp test/CodeGen/X86/fdiv.ll Message-ID: <20120410203527.85EF42A6C065@llvm.org> Author: baldrick Date: Tue Apr 10 15:35:27 2012 New Revision: 154431 URL: http://llvm.org/viewvc/llvm-project?rev=154431&view=rev Log: Add a comment noting that the fdiv -> fmul conversion won't generate multiplication by a denormal, and some tests checking that. Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp llvm/trunk/test/CodeGen/X86/fdiv.ll Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=154431&r1=154430&r2=154431&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original) +++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Tue Apr 10 15:35:27 2012 @@ -5769,9 +5769,9 @@ APFloat N1APF = N1CFP->getValueAPF(); APFloat Recip(N1APF.getSemantics(), 1); // 1.0 APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven); - // Only do the transform if the reciprocal is not too horrible (eg not NaN) - // and the reciprocal is a legal fp imm. - if ((st == APFloat::opOK || st == APFloat::opInexact) && + // Only do the transform if the reciprocal is a legal fp immediate that + // isn't too nasty (eg NaN, denormal, ...). + if ((st == APFloat::opOK || st == APFloat::opInexact) && // Not too nasty (!LegalOperations || // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM // backend)... we should handle this gracefully after Legalize. Modified: llvm/trunk/test/CodeGen/X86/fdiv.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/fdiv.ll?rev=154431&r1=154430&r2=154431&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/X86/fdiv.ll (original) +++ llvm/trunk/test/CodeGen/X86/fdiv.ll Tue Apr 10 15:35:27 2012 @@ -23,3 +23,19 @@ %div = fdiv double %x, 0.0 ret double %div } + +define double @denormal1(double %x) { +; Don't generate multiplication by a denormal. +; CHECK: @denormal1 +; CHECK: divsd + %div = fdiv double %x, 0x7FD0000000000001 + ret double %div +} + +define double @denormal2(double %x) { +; Don't generate multiplication by a denormal. +; CHECK: @denormal +; CHECK: divsd + %div = fdiv double %x, 0x7FEFFFFFFFFFFFFF + ret double %div +} From anton at korobeynikov.info Tue Apr 10 15:35:55 2012 From: anton at korobeynikov.info (Anton Korobeynikov) Date: Wed, 11 Apr 2012 00:35:55 +0400 Subject: [llvm-commits] [cfe-commits] [LLVMdev] [Patch?] Fix handling of ARM homogenous aggregates In-Reply-To: <201204101045.38068.Tim.Northover@arm.com> References: <3E94D039A2B82544B3E7D48F924B0B25E18AB88AED@base.imrc.kist.re.kr> <201204101045.38068.Tim.Northover@arm.com> Message-ID: Hi Tim > I'm not sure I follow this point. Is preserving the source language a bad > thing for some reason I'm missing? Certainly, if it affects optimisation it > would be. Let's consider one example: union { float foo[4]; int bar[3]; }; This is definitely not a HFA. However, such a union can be represented via several different things in LLVM IR: [4 x float], [4 x i32], [32 x i8] (all involving bitcasts to access one of the fields of a union). And here we have a problem: 4 x float can be thought as HFA at IR level, however it's certainly not since the HFA rules are worded using C-level constructs and not IR-level. So, my point is that IR is not expressible enough to capture all source information necessary to model ABI properly. Do you have good solution for this problem? -- With best regards, Anton Korobeynikov Faculty of Mathematics and Mechanics, Saint Petersburg State University From david_dean at apple.com Tue Apr 10 15:50:35 2012 From: david_dean at apple.com (David Dean) Date: Tue, 10 Apr 2012 13:50:35 -0700 Subject: [llvm-commits] [llvm] r153812 - in /llvm/trunk: include/llvm/Analysis/ include/llvm/Transforms/IPO/ lib/Analysis/ lib/Transforms/IPO/ test/Transforms/Inline/ In-Reply-To: <20120331124242.93B692A6C065@llvm.org> References: <20120331124242.93B692A6C065@llvm.org> Message-ID: Chandler, we're seeing a 9.92% compile time regression in MultiSource/Applications/sqlite3/sqlite3 on ARMv7 -mthumb -O3. Can you please take a look? On 31 Mar 2012, at 5:42 AM, Chandler Carruth wrote: > Author: chandlerc > Date: Sat Mar 31 07:42:41 2012 > New Revision: 153812 > > URL: http://llvm.org/viewvc/llvm-project?rev=153812&view=rev > Log: > Initial commit for the rewrite of the inline cost analysis to operate > on a per-callsite walk of the called function's instructions, in > breadth-first order over the potentially reachable set of basic blocks. > > This is a major shift in how inline cost analysis works to improve the > accuracy and rationality of inlining decisions. A brief outline of the > algorithm this moves to: > > - Build a simplification mapping based on the callsite arguments to the > function arguments. > - Push the entry block onto a worklist of potentially-live basic blocks. > - Pop the first block off of the *front* of the worklist (for > breadth-first ordering) and walk its instructions using a custom > InstVisitor. > - For each instruction's operands, re-map them based on the > simplification mappings available for the given callsite. > - Compute any simplification possible of the instruction after > re-mapping, and store that back int othe simplification mapping. > - Compute any bonuses, costs, or other impacts of the instruction on the > cost metric. > - When the terminator is reached, replace any conditional value in the > terminator with any simplifications from the mapping we have, and add > any successors which are not proven to be dead from these > simplifications to the worklist. > - Pop the next block off of the front of the worklist, and repeat. > - As soon as the cost of inlining exceeds the threshold for the > callsite, stop analyzing the function in order to bound cost. > > The primary goal of this algorithm is to perfectly handle dead code > paths. We do not want any code in trivially dead code paths to impact > inlining decisions. The previous metric was *extremely* flawed here, and > would always subtract the average cost of two successors of > a conditional branch when it was proven to become an unconditional > branch at the callsite. There was no handling of wildly different costs > between the two successors, which would cause inlining when the path > actually taken was too large, and no inlining when the path actually > taken was trivially simple. There was also no handling of the code > *path*, only the immediate successors. These problems vanish completely > now. See the added regression tests for the shiny new features -- we > skip recursive function calls, SROA-killing instructions, and high cost > complex CFG structures when dead at the callsite being analyzed. > > Switching to this algorithm required refactoring the inline cost > interface to accept the actual threshold rather than simply returning > a single cost. The resulting interface is pretty bad, and I'm planning > to do lots of interface cleanup after this patch. > > Several other refactorings fell out of this, but I've tried to minimize > them for this patch. =/ There is still more cleanup that can be done > here. Please point out anything that you see in review. > > I've worked really hard to try to mirror at least the spirit of all of > the previous heuristics in the new model. It's not clear that they are > all correct any more, but I wanted to minimize the change in this single > patch, it's already a bit ridiculous. One heuristic that is *not* yet > mirrored is to allow inlining of functions with a dynamic alloca *if* > the caller has a dynamic alloca. I will add this back, but I think the > most reasonable way requires changes to the inliner itself rather than > just the cost metric, and so I've deferred this for a subsequent patch. > The test case is XFAIL-ed until then. > > As mentioned in the review mail, this seems to make Clang run about 1% > to 2% faster in -O0, but makes its binary size grow by just under 4%. > I've looked into the 4% growth, and it can be fixed, but requires > changes to other parts of the inliner. > > Modified: > llvm/trunk/include/llvm/Analysis/CodeMetrics.h > llvm/trunk/include/llvm/Analysis/InlineCost.h > llvm/trunk/include/llvm/Transforms/IPO/InlinerPass.h > llvm/trunk/lib/Analysis/CodeMetrics.cpp > llvm/trunk/lib/Analysis/InlineCost.cpp > llvm/trunk/lib/Transforms/IPO/InlineAlways.cpp > llvm/trunk/lib/Transforms/IPO/InlineSimple.cpp > llvm/trunk/lib/Transforms/IPO/Inliner.cpp > llvm/trunk/test/Transforms/Inline/alloca-bonus.ll > llvm/trunk/test/Transforms/Inline/dynamic_alloca_test.ll > llvm/trunk/test/Transforms/Inline/inline_constprop.ll > llvm/trunk/test/Transforms/Inline/noinline-recursive-fn.ll > llvm/trunk/test/Transforms/Inline/ptr-diff.ll > > Modified: llvm/trunk/include/llvm/Analysis/CodeMetrics.h > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Analysis/CodeMetrics.h?rev=153812&r1=153811&r2=153812&view=diff > ============================================================================== > --- llvm/trunk/include/llvm/Analysis/CodeMetrics.h (original) > +++ llvm/trunk/include/llvm/Analysis/CodeMetrics.h Sat Mar 31 07:42:41 2012 > @@ -20,9 +20,13 @@ > namespace llvm { > class BasicBlock; > class Function; > + class Instruction; > class TargetData; > class Value; > > + /// \brief Check whether an instruction is likely to be "free" when lowered. > + bool isInstructionFree(const Instruction *I, const TargetData *TD = 0); > + > /// \brief Check whether a call will lower to something small. > /// > /// This tests checks whether calls to this function will lower to something > > Modified: llvm/trunk/include/llvm/Analysis/InlineCost.h > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Analysis/InlineCost.h?rev=153812&r1=153811&r2=153812&view=diff > ============================================================================== > --- llvm/trunk/include/llvm/Analysis/InlineCost.h (original) > +++ llvm/trunk/include/llvm/Analysis/InlineCost.h Sat Mar 31 07:42:41 2012 > @@ -16,6 +16,7 @@ > > #include "llvm/Function.h" > #include "llvm/ADT/DenseMap.h" > +#include "llvm/ADT/SmallPtrSet.h" > #include "llvm/ADT/ValueMap.h" > #include "llvm/Analysis/CodeMetrics.h" > #include > @@ -25,162 +26,105 @@ > namespace llvm { > > class CallSite; > - template > - class SmallPtrSet; > class TargetData; > > namespace InlineConstants { > // Various magic constants used to adjust heuristics. > const int InstrCost = 5; > - const int IndirectCallBonus = -100; > + const int IndirectCallThreshold = 100; > const int CallPenalty = 25; > const int LastCallToStaticBonus = -15000; > const int ColdccPenalty = 2000; > const int NoreturnPenalty = 10000; > } > > - /// InlineCost - Represent the cost of inlining a function. This > - /// supports special values for functions which should "always" or > - /// "never" be inlined. Otherwise, the cost represents a unitless > - /// amount; smaller values increase the likelihood of the function > - /// being inlined. > + /// \brief Represents the cost of inlining a function. > + /// > + /// This supports special values for functions which should "always" or > + /// "never" be inlined. Otherwise, the cost represents a unitless amount; > + /// smaller values increase the likelihood of the function being inlined. > + /// > + /// Objects of this type also provide the adjusted threshold for inlining > + /// based on the information available for a particular callsite. They can be > + /// directly tested to determine if inlining should occur given the cost and > + /// threshold for this cost metric. > class InlineCost { > - enum Kind { > - Value, > - Always, > - Never > + enum CostKind { > + CK_Variable, > + CK_Always, > + CK_Never > }; > > - // This is a do-it-yourself implementation of > - // int Cost : 30; > - // unsigned Type : 2; > - // We used to use bitfields, but they were sometimes miscompiled (PR3822). > - enum { TYPE_BITS = 2 }; > - enum { COST_BITS = unsigned(sizeof(unsigned)) * CHAR_BIT - TYPE_BITS }; > - unsigned TypedCost; // int Cost : COST_BITS; unsigned Type : TYPE_BITS; > + const int Cost : 30; // The inlining cost if neither always nor never. > + const unsigned Kind : 2; // The type of cost, one of CostKind above. > > - Kind getType() const { > - return Kind(TypedCost >> COST_BITS); > - } > + /// \brief The adjusted threshold against which this cost should be tested. > + const int Threshold; > > - int getCost() const { > - // Sign-extend the bottom COST_BITS bits. > - return (int(TypedCost << TYPE_BITS)) >> TYPE_BITS; > + // Trivial constructor, interesting logic in the factory functions below. > + InlineCost(int Cost, CostKind Kind, int Threshold) > + : Cost(Cost), Kind(Kind), Threshold(Threshold) {} > + > + public: > + static InlineCost get(int Cost, int Threshold) { > + InlineCost Result(Cost, CK_Variable, Threshold); > + assert(Result.Cost == Cost && "Cost exceeds InlineCost precision"); > + return Result; > + } > + static InlineCost getAlways() { > + return InlineCost(0, CK_Always, 0); > + } > + static InlineCost getNever() { > + return InlineCost(0, CK_Never, 0); > } > > - InlineCost(int C, int T) { > - TypedCost = (unsigned(C << TYPE_BITS) >> TYPE_BITS) | (T << COST_BITS); > - assert(getCost() == C && "Cost exceeds InlineCost precision"); > + /// \brief Test whether the inline cost is low enough for inlining. > + operator bool() const { > + if (isAlways()) return true; > + if (isNever()) return false; > + return Cost < Threshold; > } > - public: > - static InlineCost get(int Cost) { return InlineCost(Cost, Value); } > - static InlineCost getAlways() { return InlineCost(0, Always); } > - static InlineCost getNever() { return InlineCost(0, Never); } > - > - bool isVariable() const { return getType() == Value; } > - bool isAlways() const { return getType() == Always; } > - bool isNever() const { return getType() == Never; } > > - /// getValue() - Return a "variable" inline cost's amount. It is > + bool isVariable() const { return Kind == CK_Variable; } > + bool isAlways() const { return Kind == CK_Always; } > + bool isNever() const { return Kind == CK_Never; } > + > + /// getCost() - Return a "variable" inline cost's amount. It is > /// an error to call this on an "always" or "never" InlineCost. > - int getValue() const { > - assert(getType() == Value && "Invalid access of InlineCost"); > - return getCost(); > + int getCost() const { > + assert(Kind == CK_Variable && "Invalid access of InlineCost"); > + return Cost; > + } > + > + /// \brief Get the cost delta from the threshold for inlining. > + /// Only valid if the cost is of the variable kind. Returns a negative > + /// value if the cost is too high to inline. > + int getCostDelta() const { > + return Threshold - getCost(); > } > }; > > /// InlineCostAnalyzer - Cost analyzer used by inliner. > class InlineCostAnalyzer { > - struct ArgInfo { > - public: > - unsigned ConstantWeight; > - unsigned AllocaWeight; > - > - ArgInfo(unsigned CWeight, unsigned AWeight) > - : ConstantWeight(CWeight), AllocaWeight(AWeight) > - {} > - }; > - > - struct FunctionInfo { > - CodeMetrics Metrics; > - > - /// ArgumentWeights - Each formal argument of the function is inspected to > - /// see if it is used in any contexts where making it a constant or alloca > - /// would reduce the code size. If so, we add some value to the argument > - /// entry here. > - std::vector ArgumentWeights; > - > - /// PointerArgPairWeights - Weights to use when giving an inline bonus to > - /// a call site due to correlated pairs of pointers. > - DenseMap, unsigned> PointerArgPairWeights; > - > - /// countCodeReductionForConstant - Figure out an approximation for how > - /// many instructions will be constant folded if the specified value is > - /// constant. > - unsigned countCodeReductionForConstant(const CodeMetrics &Metrics, > - Value *V); > - > - /// countCodeReductionForAlloca - Figure out an approximation of how much > - /// smaller the function will be if it is inlined into a context where an > - /// argument becomes an alloca. > - unsigned countCodeReductionForAlloca(const CodeMetrics &Metrics, > - Value *V); > - > - /// countCodeReductionForPointerPair - Count the bonus to apply to an > - /// inline call site where a pair of arguments are pointers and one > - /// argument is a constant offset from the other. The idea is to > - /// recognize a common C++ idiom where a begin and end iterator are > - /// actually pointers, and many operations on the pair of them will be > - /// constants if the function is called with arguments that have > - /// a constant offset. > - void countCodeReductionForPointerPair( > - const CodeMetrics &Metrics, > - DenseMap &PointerArgs, > - Value *V, unsigned ArgIdx); > - > - /// analyzeFunction - Add information about the specified function > - /// to the current structure. > - void analyzeFunction(Function *F, const TargetData *TD); > - > - /// NeverInline - Returns true if the function should never be > - /// inlined into any caller. > - bool NeverInline(); > - }; > - > - // The Function* for a function can be changed (by ArgumentPromotion); > - // the ValueMap will update itself when this happens. > - ValueMap CachedFunctionInfo; > - > // TargetData if available, or null. > const TargetData *TD; > > - int CountBonusForConstant(Value *V, Constant *C = NULL); > - int ConstantFunctionBonus(CallSite CS, Constant *C); > - int getInlineSize(CallSite CS, Function *Callee); > - int getInlineBonuses(CallSite CS, Function *Callee); > public: > InlineCostAnalyzer(): TD(0) {} > > void setTargetData(const TargetData *TData) { TD = TData; } > > - /// getInlineCost - The heuristic used to determine if we should inline the > - /// function call or not. > + /// \brief Get an InlineCost object representing the cost of inlining this > + /// callsite. > /// > - InlineCost getInlineCost(CallSite CS); > - /// getCalledFunction - The heuristic used to determine if we should inline > - /// the function call or not. The callee is explicitly specified, to allow > - /// you to calculate the cost of inlining a function via a pointer. The > - /// result assumes that the inlined version will always be used. You should > - /// weight it yourself in cases where this callee will not always be called. > - InlineCost getInlineCost(CallSite CS, Function *Callee); > - > - /// getInlineFudgeFactor - Return a > 1.0 factor if the inliner should use a > - /// higher threshold to determine if the function call should be inlined. > - float getInlineFudgeFactor(CallSite CS); > + /// Note that threshold is passed into this function. Only costs below the > + /// threshold are computed with any accuracy. The threshold can be used to > + /// bound the computation necessary to determine whether the cost is > + /// sufficiently low to warrant inlining. > + InlineCost getInlineCost(CallSite CS, int Threshold); > > /// resetCachedFunctionInfo - erase any cached cost info for this function. > void resetCachedCostInfo(Function* Caller) { > - CachedFunctionInfo[Caller] = FunctionInfo(); > } > > /// growCachedCostInfo - update the cached cost info for Caller after Callee > > Modified: llvm/trunk/include/llvm/Transforms/IPO/InlinerPass.h > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Transforms/IPO/InlinerPass.h?rev=153812&r1=153811&r2=153812&view=diff > ============================================================================== > --- llvm/trunk/include/llvm/Transforms/IPO/InlinerPass.h (original) > +++ llvm/trunk/include/llvm/Transforms/IPO/InlinerPass.h Sat Mar 31 07:42:41 2012 > @@ -65,11 +65,6 @@ > /// > virtual InlineCost getInlineCost(CallSite CS) = 0; > > - // getInlineFudgeFactor - Return a > 1.0 factor if the inliner should use a > - // higher threshold to determine if the function call should be inlined. > - /// > - virtual float getInlineFudgeFactor(CallSite CS) = 0; > - > /// resetCachedCostInfo - erase any cached cost data from the derived class. > /// If the derived class has no such data this can be empty. > /// > > Modified: llvm/trunk/lib/Analysis/CodeMetrics.cpp > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/CodeMetrics.cpp?rev=153812&r1=153811&r2=153812&view=diff > ============================================================================== > --- llvm/trunk/lib/Analysis/CodeMetrics.cpp (original) > +++ llvm/trunk/lib/Analysis/CodeMetrics.cpp Sat Mar 31 07:42:41 2012 > @@ -50,6 +50,52 @@ > return false; > } > > +bool llvm::isInstructionFree(const Instruction *I, const TargetData *TD) { > + if (isa(I)) > + return true; > + > + // If a GEP has all constant indices, it will probably be folded with > + // a load/store. > + if (const GetElementPtrInst *GEP = dyn_cast(I)) > + return GEP->hasAllConstantIndices(); > + > + if (const IntrinsicInst *II = dyn_cast(I)) { > + switch (II->getIntrinsicID()) { > + default: > + return false; > + case Intrinsic::dbg_declare: > + case Intrinsic::dbg_value: > + case Intrinsic::invariant_start: > + case Intrinsic::invariant_end: > + case Intrinsic::lifetime_start: > + case Intrinsic::lifetime_end: > + case Intrinsic::objectsize: > + case Intrinsic::ptr_annotation: > + case Intrinsic::var_annotation: > + // These intrinsics don't count as size. > + return true; > + } > + } > + > + if (const CastInst *CI = dyn_cast(I)) { > + // Noop casts, including ptr <-> int, don't count. > + if (CI->isLosslessCast() || isa(CI) || isa(CI)) > + return true; > + // trunc to a native type is free (assuming the target has compare and > + // shift-right of the same width). > + if (TD && isa(CI) && > + TD->isLegalInteger(TD->getTypeSizeInBits(CI->getType()))) > + return true; > + // Result of a cmp instruction is often extended (to be used by other > + // cmp instructions, logical or return instructions). These are usually > + // nop on most sane targets. > + if (isa(CI->getOperand(0))) > + return true; > + } > + > + return false; > +} > + > /// analyzeBasicBlock - Fill in the current structure with information gleaned > /// from the specified block. > void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB, > @@ -58,27 +104,11 @@ > unsigned NumInstsBeforeThisBB = NumInsts; > for (BasicBlock::const_iterator II = BB->begin(), E = BB->end(); > II != E; ++II) { > - if (isa(II)) continue; // PHI nodes don't count. > + if (isInstructionFree(II, TD)) > + continue; > > // Special handling for calls. > if (isa(II) || isa(II)) { > - if (const IntrinsicInst *IntrinsicI = dyn_cast(II)) { > - switch (IntrinsicI->getIntrinsicID()) { > - default: break; > - case Intrinsic::dbg_declare: > - case Intrinsic::dbg_value: > - case Intrinsic::invariant_start: > - case Intrinsic::invariant_end: > - case Intrinsic::lifetime_start: > - case Intrinsic::lifetime_end: > - case Intrinsic::objectsize: > - case Intrinsic::ptr_annotation: > - case Intrinsic::var_annotation: > - // These intrinsics don't count as size. > - continue; > - } > - } > - > ImmutableCallSite CS(cast(II)); > > if (const Function *F = CS.getCalledFunction()) { > @@ -115,28 +145,6 @@ > if (isa(II) || II->getType()->isVectorTy()) > ++NumVectorInsts; > > - if (const CastInst *CI = dyn_cast(II)) { > - // Noop casts, including ptr <-> int, don't count. > - if (CI->isLosslessCast() || isa(CI) || > - isa(CI)) > - continue; > - // trunc to a native type is free (assuming the target has compare and > - // shift-right of the same width). > - if (isa(CI) && TD && > - TD->isLegalInteger(TD->getTypeSizeInBits(CI->getType()))) > - continue; > - // Result of a cmp instruction is often extended (to be used by other > - // cmp instructions, logical or return instructions). These are usually > - // nop on most sane targets. > - if (isa(CI->getOperand(0))) > - continue; > - } else if (const GetElementPtrInst *GEPI = dyn_cast(II)){ > - // If a GEP has all constant indices, it will probably be folded with > - // a load/store. > - if (GEPI->hasAllConstantIndices()) > - continue; > - } > - > ++NumInsts; > } > > > Modified: llvm/trunk/lib/Analysis/InlineCost.cpp > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/InlineCost.cpp?rev=153812&r1=153811&r2=153812&view=diff > ============================================================================== > --- llvm/trunk/lib/Analysis/InlineCost.cpp (original) > +++ llvm/trunk/lib/Analysis/InlineCost.cpp Sat Mar 31 07:42:41 2012 > @@ -11,659 +11,1014 @@ > // > //===----------------------------------------------------------------------===// > > +#define DEBUG_TYPE "inline-cost" > #include "llvm/Analysis/InlineCost.h" > +#include "llvm/Analysis/ConstantFolding.h" > +#include "llvm/Analysis/InstructionSimplify.h" > #include "llvm/Support/CallSite.h" > +#include "llvm/Support/Debug.h" > +#include "llvm/Support/InstVisitor.h" > +#include "llvm/Support/GetElementPtrTypeIterator.h" > +#include "llvm/Support/raw_ostream.h" > #include "llvm/CallingConv.h" > #include "llvm/IntrinsicInst.h" > +#include "llvm/Operator.h" > +#include "llvm/GlobalAlias.h" > #include "llvm/Target/TargetData.h" > +#include "llvm/ADT/STLExtras.h" > +#include "llvm/ADT/SetVector.h" > +#include "llvm/ADT/SmallVector.h" > #include "llvm/ADT/SmallPtrSet.h" > > using namespace llvm; > > -unsigned InlineCostAnalyzer::FunctionInfo::countCodeReductionForConstant( > - const CodeMetrics &Metrics, Value *V) { > - unsigned Reduction = 0; > - SmallVector Worklist; > - Worklist.push_back(V); > - do { > - Value *V = Worklist.pop_back_val(); > - for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;++UI){ > - User *U = *UI; > - if (isa(U) || isa(U)) { > - // We will be able to eliminate all but one of the successors. > - const TerminatorInst &TI = cast(*U); > - const unsigned NumSucc = TI.getNumSuccessors(); > - unsigned Instrs = 0; > - for (unsigned I = 0; I != NumSucc; ++I) > - Instrs += Metrics.NumBBInsts.lookup(TI.getSuccessor(I)); > - // We don't know which blocks will be eliminated, so use the average size. > - Reduction += InlineConstants::InstrCost*Instrs*(NumSucc-1)/NumSucc; > - continue; > +namespace { > + > +class CallAnalyzer : public InstVisitor { > + typedef InstVisitor Base; > + friend class InstVisitor; > + > + // TargetData if available, or null. > + const TargetData *const TD; > + > + // The called function. > + Function &F; > + > + int Threshold; > + int Cost; > + const bool AlwaysInline; > + > + bool IsRecursive; > + bool ExposesReturnsTwice; > + bool HasDynamicAlloca; > + unsigned NumInstructions, NumVectorInstructions; > + int FiftyPercentVectorBonus, TenPercentVectorBonus; > + int VectorBonus; > + > + // While we walk the potentially-inlined instructions, we build up and > + // maintain a mapping of simplified values specific to this callsite. The > + // idea is to propagate any special information we have about arguments to > + // this call through the inlinable section of the function, and account for > + // likely simplifications post-inlining. The most important aspect we track > + // is CFG altering simplifications -- when we prove a basic block dead, that > + // can cause dramatic shifts in the cost of inlining a function. > + DenseMap SimplifiedValues; > + > + // Keep track of the values which map back (through function arguments) to > + // allocas on the caller stack which could be simplified through SROA. > + DenseMap SROAArgValues; > + > + // The mapping of caller Alloca values to their accumulated cost savings. If > + // we have to disable SROA for one of the allocas, this tells us how much > + // cost must be added. > + DenseMap SROAArgCosts; > + > + // Keep track of values which map to a pointer base and constant offset. > + DenseMap > ConstantOffsetPtrs; > + > + // Custom simplification helper routines. > + bool isAllocaDerivedArg(Value *V); > + bool lookupSROAArgAndCost(Value *V, Value *&Arg, > + DenseMap::iterator &CostIt); > + void disableSROA(DenseMap::iterator CostIt); > + void disableSROA(Value *V); > + void accumulateSROACost(DenseMap::iterator CostIt, > + int InstructionCost); > + bool handleSROACandidate(bool IsSROAValid, > + DenseMap::iterator CostIt, > + int InstructionCost); > + bool isGEPOffsetConstant(GetElementPtrInst &GEP); > + bool accumulateGEPOffset(GEPOperator &GEP, APInt &Offset); > + ConstantInt *stripAndComputeInBoundsConstantOffsets(Value *&V); > + > + // Custom analysis routines. > + bool analyzeBlock(BasicBlock *BB); > + > + // Disable several entry points to the visitor so we don't accidentally use > + // them by declaring but not defining them here. > + void visit(Module *); void visit(Module &); > + void visit(Function *); void visit(Function &); > + void visit(BasicBlock *); void visit(BasicBlock &); > + > + // Provide base case for our instruction visit. > + bool visitInstruction(Instruction &I); > + > + // Our visit overrides. > + bool visitAlloca(AllocaInst &I); > + bool visitPHI(PHINode &I); > + bool visitGetElementPtr(GetElementPtrInst &I); > + bool visitBitCast(BitCastInst &I); > + bool visitPtrToInt(PtrToIntInst &I); > + bool visitIntToPtr(IntToPtrInst &I); > + bool visitCastInst(CastInst &I); > + bool visitUnaryInstruction(UnaryInstruction &I); > + bool visitICmp(ICmpInst &I); > + bool visitSub(BinaryOperator &I); > + bool visitBinaryOperator(BinaryOperator &I); > + bool visitLoad(LoadInst &I); > + bool visitStore(StoreInst &I); > + bool visitCallSite(CallSite CS); > + > +public: > + CallAnalyzer(const TargetData *TD, Function &Callee, int Threshold) > + : TD(TD), F(Callee), Threshold(Threshold), Cost(0), > + AlwaysInline(F.hasFnAttr(Attribute::AlwaysInline)), > + IsRecursive(false), ExposesReturnsTwice(false), HasDynamicAlloca(false), > + NumInstructions(0), NumVectorInstructions(0), > + FiftyPercentVectorBonus(0), TenPercentVectorBonus(0), VectorBonus(0), > + NumConstantArgs(0), NumConstantOffsetPtrArgs(0), NumAllocaArgs(0), > + NumConstantPtrCmps(0), NumConstantPtrDiffs(0), > + NumInstructionsSimplified(0), SROACostSavings(0), SROACostSavingsLost(0) { > + } > + > + bool analyzeCall(CallSite CS); > + > + int getThreshold() { return Threshold; } > + int getCost() { return Cost; } > + > + // Keep a bunch of stats about the cost savings found so we can print them > + // out when debugging. > + unsigned NumConstantArgs; > + unsigned NumConstantOffsetPtrArgs; > + unsigned NumAllocaArgs; > + unsigned NumConstantPtrCmps; > + unsigned NumConstantPtrDiffs; > + unsigned NumInstructionsSimplified; > + unsigned SROACostSavings; > + unsigned SROACostSavingsLost; > + > + void dump(); > +}; > + > +} // namespace > + > +/// \brief Test whether the given value is an Alloca-derived function argument. > +bool CallAnalyzer::isAllocaDerivedArg(Value *V) { > + return SROAArgValues.count(V); > +} > + > +/// \brief Lookup the SROA-candidate argument and cost iterator which V maps to. > +/// Returns false if V does not map to a SROA-candidate. > +bool CallAnalyzer::lookupSROAArgAndCost( > + Value *V, Value *&Arg, DenseMap::iterator &CostIt) { > + if (SROAArgValues.empty() || SROAArgCosts.empty()) > + return false; > + > + DenseMap::iterator ArgIt = SROAArgValues.find(V); > + if (ArgIt == SROAArgValues.end()) > + return false; > + > + Arg = ArgIt->second; > + CostIt = SROAArgCosts.find(Arg); > + return CostIt != SROAArgCosts.end(); > +} > + > +/// \brief Disable SROA for the candidate marked by this cost iterator. > +/// > +/// This markes the candidate as no longer viable for SROA, and adds the cost > +/// savings associated with it back into the inline cost measurement. > +void CallAnalyzer::disableSROA(DenseMap::iterator CostIt) { > + // If we're no longer able to perform SROA we need to undo its cost savings > + // and prevent subsequent analysis. > + Cost += CostIt->second; > + SROACostSavings -= CostIt->second; > + SROACostSavingsLost += CostIt->second; > + SROAArgCosts.erase(CostIt); > +} > + > +/// \brief If 'V' maps to a SROA candidate, disable SROA for it. > +void CallAnalyzer::disableSROA(Value *V) { > + Value *SROAArg; > + DenseMap::iterator CostIt; > + if (lookupSROAArgAndCost(V, SROAArg, CostIt)) > + disableSROA(CostIt); > +} > + > +/// \brief Accumulate the given cost for a particular SROA candidate. > +void CallAnalyzer::accumulateSROACost(DenseMap::iterator CostIt, > + int InstructionCost) { > + CostIt->second += InstructionCost; > + SROACostSavings += InstructionCost; > +} > + > +/// \brief Helper for the common pattern of handling a SROA candidate. > +/// Either accumulates the cost savings if the SROA remains valid, or disables > +/// SROA for the candidate. > +bool CallAnalyzer::handleSROACandidate(bool IsSROAValid, > + DenseMap::iterator CostIt, > + int InstructionCost) { > + if (IsSROAValid) { > + accumulateSROACost(CostIt, InstructionCost); > + return true; > + } > + > + disableSROA(CostIt); > + return false; > +} > + > +/// \brief Check whether a GEP's indices are all constant. > +/// > +/// Respects any simplified values known during the analysis of this callsite. > +bool CallAnalyzer::isGEPOffsetConstant(GetElementPtrInst &GEP) { > + for (User::op_iterator I = GEP.idx_begin(), E = GEP.idx_end(); I != E; ++I) > + if (!isa(*I) && !SimplifiedValues.lookup(*I)) > + return false; > + > + return true; > +} > + > +/// \brief Accumulate a constant GEP offset into an APInt if possible. > +/// > +/// Returns false if unable to compute the offset for any reason. Respects any > +/// simplified values known during the analysis of this callsite. > +bool CallAnalyzer::accumulateGEPOffset(GEPOperator &GEP, APInt &Offset) { > + if (!TD) > + return false; > + > + unsigned IntPtrWidth = TD->getPointerSizeInBits(); > + assert(IntPtrWidth == Offset.getBitWidth()); > + > + for (gep_type_iterator GTI = gep_type_begin(GEP), GTE = gep_type_end(GEP); > + GTI != GTE; ++GTI) { > + ConstantInt *OpC = dyn_cast(GTI.getOperand()); > + if (!OpC) > + if (Constant *SimpleOp = SimplifiedValues.lookup(GTI.getOperand())) > + OpC = dyn_cast(SimpleOp); > + if (!OpC) > + return false; > + if (OpC->isZero()) continue; > + > + // Handle a struct index, which adds its field offset to the pointer. > + if (StructType *STy = dyn_cast(*GTI)) { > + unsigned ElementIdx = OpC->getZExtValue(); > + const StructLayout *SL = TD->getStructLayout(STy); > + Offset += APInt(IntPtrWidth, SL->getElementOffset(ElementIdx)); > + continue; > + } > + > + APInt TypeSize(IntPtrWidth, TD->getTypeAllocSize(GTI.getIndexedType())); > + Offset += OpC->getValue().sextOrTrunc(IntPtrWidth) * TypeSize; > + } > + return true; > +} > + > +bool CallAnalyzer::visitAlloca(AllocaInst &I) { > + // FIXME: Check whether inlining will turn a dynamic alloca into a static > + // alloca, and handle that case. > + > + // We will happily inline tatic alloca instructions or dynamic alloca > + // instructions in always-inline situations. > + if (AlwaysInline || I.isStaticAlloca()) > + return Base::visitAlloca(I); > + > + // FIXME: This is overly conservative. Dynamic allocas are inefficient for > + // a variety of reasons, and so we would like to not inline them into > + // functions which don't currently have a dynamic alloca. This simply > + // disables inlining altogether in the presence of a dynamic alloca. > + HasDynamicAlloca = true; > + return false; > +} > + > +bool CallAnalyzer::visitPHI(PHINode &I) { > + // FIXME: We should potentially be tracking values through phi nodes, > + // especially when they collapse to a single value due to deleted CFG edges > + // during inlining. > + > + // FIXME: We need to propagate SROA *disabling* through phi nodes, even > + // though we don't want to propagate it's bonuses. The idea is to disable > + // SROA if it *might* be used in an inappropriate manner. > + > + // Phi nodes are always zero-cost. > + return true; > +} > + > +bool CallAnalyzer::visitGetElementPtr(GetElementPtrInst &I) { > + Value *SROAArg; > + DenseMap::iterator CostIt; > + bool SROACandidate = lookupSROAArgAndCost(I.getPointerOperand(), > + SROAArg, CostIt); > + > + // Try to fold GEPs of constant-offset call site argument pointers. This > + // requires target data and inbounds GEPs. > + if (TD && I.isInBounds()) { > + // Check if we have a base + offset for the pointer. > + Value *Ptr = I.getPointerOperand(); > + std::pair BaseAndOffset = ConstantOffsetPtrs.lookup(Ptr); > + if (BaseAndOffset.first) { > + // Check if the offset of this GEP is constant, and if so accumulate it > + // into Offset. > + if (!accumulateGEPOffset(cast(I), BaseAndOffset.second)) { > + // Non-constant GEPs aren't folded, and disable SROA. > + if (SROACandidate) > + disableSROA(CostIt); > + return false; > } > > - // Figure out if this instruction will be removed due to simple constant > - // propagation. > - Instruction &Inst = cast(*U); > - > - // We can't constant propagate instructions which have effects or > - // read memory. > - // > - // FIXME: It would be nice to capture the fact that a load from a > - // pointer-to-constant-global is actually a *really* good thing to zap. > - // Unfortunately, we don't know the pointer that may get propagated here, > - // so we can't make this decision. > - if (Inst.mayReadFromMemory() || Inst.mayHaveSideEffects() || > - isa(Inst)) > - continue; > + // Add the result as a new mapping to Base + Offset. > + ConstantOffsetPtrs[&I] = BaseAndOffset; > > - bool AllOperandsConstant = true; > - for (unsigned i = 0, e = Inst.getNumOperands(); i != e; ++i) > - if (!isa(Inst.getOperand(i)) && Inst.getOperand(i) != V) { > - AllOperandsConstant = false; > - break; > - } > - if (!AllOperandsConstant) > - continue; > + // Also handle SROA candidates here, we already know that the GEP is > + // all-constant indexed. > + if (SROACandidate) > + SROAArgValues[&I] = SROAArg; > > - // We will get to remove this instruction... > - Reduction += InlineConstants::InstrCost; > + return true; > + } > + } > + > + if (isGEPOffsetConstant(I)) { > + if (SROACandidate) > + SROAArgValues[&I] = SROAArg; > + > + // Constant GEPs are modeled as free. > + return true; > + } > + > + // Variable GEPs will require math and will disable SROA. > + if (SROACandidate) > + disableSROA(CostIt); > + return false; > +} > > - // And any other instructions that use it which become constants > - // themselves. > - Worklist.push_back(&Inst); > +bool CallAnalyzer::visitBitCast(BitCastInst &I) { > + // Propagate constants through bitcasts. > + if (Constant *COp = dyn_cast(I.getOperand(0))) > + if (Constant *C = ConstantExpr::getBitCast(COp, I.getType())) { > + SimplifiedValues[&I] = C; > + return true; > + } > + > + // Track base/offsets through casts > + std::pair BaseAndOffset > + = ConstantOffsetPtrs.lookup(I.getOperand(0)); > + // Casts don't change the offset, just wrap it up. > + if (BaseAndOffset.first) > + ConstantOffsetPtrs[&I] = BaseAndOffset; > + > + // Also look for SROA candidates here. > + Value *SROAArg; > + DenseMap::iterator CostIt; > + if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) > + SROAArgValues[&I] = SROAArg; > + > + // Bitcasts are always zero cost. > + return true; > +} > + > +bool CallAnalyzer::visitPtrToInt(PtrToIntInst &I) { > + // Propagate constants through ptrtoint. > + if (Constant *COp = dyn_cast(I.getOperand(0))) > + if (Constant *C = ConstantExpr::getPtrToInt(COp, I.getType())) { > + SimplifiedValues[&I] = C; > + return true; > } > - } while (!Worklist.empty()); > - return Reduction; > + > + // Track base/offset pairs when converted to a plain integer provided the > + // integer is large enough to represent the pointer. > + unsigned IntegerSize = I.getType()->getScalarSizeInBits(); > + if (TD && IntegerSize >= TD->getPointerSizeInBits()) { > + std::pair BaseAndOffset > + = ConstantOffsetPtrs.lookup(I.getOperand(0)); > + if (BaseAndOffset.first) > + ConstantOffsetPtrs[&I] = BaseAndOffset; > + } > + > + // This is really weird. Technically, ptrtoint will disable SROA. However, > + // unless that ptrtoint is *used* somewhere in the live basic blocks after > + // inlining, it will be nuked, and SROA should proceed. All of the uses which > + // would block SROA would also block SROA if applied directly to a pointer, > + // and so we can just add the integer in here. The only places where SROA is > + // preserved either cannot fire on an integer, or won't in-and-of themselves > + // disable SROA (ext) w/o some later use that we would see and disable. > + Value *SROAArg; > + DenseMap::iterator CostIt; > + if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) > + SROAArgValues[&I] = SROAArg; > + > + // A ptrtoint cast is free so long as the result is large enough to store the > + // pointer, and a legal integer type. > + return TD && TD->isLegalInteger(IntegerSize) && > + IntegerSize >= TD->getPointerSizeInBits(); > +} > + > +bool CallAnalyzer::visitIntToPtr(IntToPtrInst &I) { > + // Propagate constants through ptrtoint. > + if (Constant *COp = dyn_cast(I.getOperand(0))) > + if (Constant *C = ConstantExpr::getIntToPtr(COp, I.getType())) { > + SimplifiedValues[&I] = C; > + return true; > + } > + > + // Track base/offset pairs when round-tripped through a pointer without > + // modifications provided the integer is not too large. > + Value *Op = I.getOperand(0); > + unsigned IntegerSize = Op->getType()->getScalarSizeInBits(); > + if (TD && IntegerSize <= TD->getPointerSizeInBits()) { > + std::pair BaseAndOffset = ConstantOffsetPtrs.lookup(Op); > + if (BaseAndOffset.first) > + ConstantOffsetPtrs[&I] = BaseAndOffset; > + } > + > + // "Propagate" SROA here in the same manner as we do for ptrtoint above. > + Value *SROAArg; > + DenseMap::iterator CostIt; > + if (lookupSROAArgAndCost(Op, SROAArg, CostIt)) > + SROAArgValues[&I] = SROAArg; > + > + // An inttoptr cast is free so long as the input is a legal integer type > + // which doesn't contain values outside the range of a pointer. > + return TD && TD->isLegalInteger(IntegerSize) && > + IntegerSize <= TD->getPointerSizeInBits(); > +} > + > +bool CallAnalyzer::visitCastInst(CastInst &I) { > + // Propagate constants through ptrtoint. > + if (Constant *COp = dyn_cast(I.getOperand(0))) > + if (Constant *C = ConstantExpr::getCast(I.getOpcode(), COp, I.getType())) { > + SimplifiedValues[&I] = C; > + return true; > + } > + > + // Disable SROA in the face of arbitrary casts we don't whitelist elsewhere. > + disableSROA(I.getOperand(0)); > + > + // No-op casts don't have any cost. > + if (I.isLosslessCast()) > + return true; > + > + // trunc to a native type is free (assuming the target has compare and > + // shift-right of the same width). > + if (TD && isa(I) && > + TD->isLegalInteger(TD->getTypeSizeInBits(I.getType()))) > + return true; > + > + // Result of a cmp instruction is often extended (to be used by other > + // cmp instructions, logical or return instructions). These are usually > + // no-ops on most sane targets. > + if (isa(I.getOperand(0))) > + return true; > + > + // Assume the rest of the casts require work. > + return false; > } > > -static unsigned countCodeReductionForAllocaICmp(const CodeMetrics &Metrics, > - ICmpInst *ICI) { > - unsigned Reduction = 0; > +bool CallAnalyzer::visitUnaryInstruction(UnaryInstruction &I) { > + Value *Operand = I.getOperand(0); > + Constant *Ops[1] = { dyn_cast(Operand) }; > + if (Ops[0] || (Ops[0] = SimplifiedValues.lookup(Operand))) > + if (Constant *C = ConstantFoldInstOperands(I.getOpcode(), I.getType(), > + Ops, TD)) { > + SimplifiedValues[&I] = C; > + return true; > + } > > - // Bail if this is comparing against a non-constant; there is nothing we can > - // do there. > - if (!isa(ICI->getOperand(1))) > - return Reduction; > + // Disable any SROA on the argument to arbitrary unary operators. > + disableSROA(Operand); > > - // An icmp pred (alloca, C) becomes true if the predicate is true when > - // equal and false otherwise. > - bool Result = ICI->isTrueWhenEqual(); > + return false; > +} > > - SmallVector Worklist; > - Worklist.push_back(ICI); > - do { > - Instruction *U = Worklist.pop_back_val(); > - Reduction += InlineConstants::InstrCost; > - for (Value::use_iterator UI = U->use_begin(), UE = U->use_end(); > - UI != UE; ++UI) { > - Instruction *I = dyn_cast(*UI); > - if (!I || I->mayHaveSideEffects()) continue; > - if (I->getNumOperands() == 1) > - Worklist.push_back(I); > - if (BinaryOperator *BO = dyn_cast(I)) { > - // If BO produces the same value as U, then the other operand is > - // irrelevant and we can put it into the Worklist to continue > - // deleting dead instructions. If BO produces the same value as the > - // other operand, we can delete BO but that's it. > - if (Result == true) { > - if (BO->getOpcode() == Instruction::Or) > - Worklist.push_back(I); > - if (BO->getOpcode() == Instruction::And) > - Reduction += InlineConstants::InstrCost; > - } else { > - if (BO->getOpcode() == Instruction::Or || > - BO->getOpcode() == Instruction::Xor) > - Reduction += InlineConstants::InstrCost; > - if (BO->getOpcode() == Instruction::And) > - Worklist.push_back(I); > - } > +bool CallAnalyzer::visitICmp(ICmpInst &I) { > + Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); > + // First try to handle simplified comparisons. > + if (!isa(LHS)) > + if (Constant *SimpleLHS = SimplifiedValues.lookup(LHS)) > + LHS = SimpleLHS; > + if (!isa(RHS)) > + if (Constant *SimpleRHS = SimplifiedValues.lookup(RHS)) > + RHS = SimpleRHS; > + if (Constant *CLHS = dyn_cast(LHS)) > + if (Constant *CRHS = dyn_cast(RHS)) > + if (Constant *C = ConstantExpr::getICmp(I.getPredicate(), CLHS, CRHS)) { > + SimplifiedValues[&I] = C; > + return true; > } > - if (BranchInst *BI = dyn_cast(I)) { > - BasicBlock *BB = BI->getSuccessor(Result ? 0 : 1); > - if (BB->getSinglePredecessor()) > - Reduction > - += InlineConstants::InstrCost * Metrics.NumBBInsts.lookup(BB); > + > + // Otherwise look for a comparison between constant offset pointers with > + // a common base. > + Value *LHSBase, *RHSBase; > + APInt LHSOffset, RHSOffset; > + llvm::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.lookup(LHS); > + if (LHSBase) { > + llvm::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.lookup(RHS); > + if (RHSBase && LHSBase == RHSBase) { > + // We have common bases, fold the icmp to a constant based on the > + // offsets. > + Constant *CLHS = ConstantInt::get(LHS->getContext(), LHSOffset); > + Constant *CRHS = ConstantInt::get(RHS->getContext(), RHSOffset); > + if (Constant *C = ConstantExpr::getICmp(I.getPredicate(), CLHS, CRHS)) { > + SimplifiedValues[&I] = C; > + ++NumConstantPtrCmps; > + return true; > } > } > - } while (!Worklist.empty()); > + } > > - return Reduction; > -} > + // If the comparison is an equality comparison with null, we can simplify it > + // for any alloca-derived argument. > + if (I.isEquality() && isa(I.getOperand(1))) > + if (isAllocaDerivedArg(I.getOperand(0))) { > + // We can actually predict the result of comparisons between an > + // alloca-derived value and null. Note that this fires regardless of > + // SROA firing. > + bool IsNotEqual = I.getPredicate() == CmpInst::ICMP_NE; > + SimplifiedValues[&I] = IsNotEqual ? ConstantInt::getTrue(I.getType()) > + : ConstantInt::getFalse(I.getType()); > + return true; > + } > > -/// \brief Compute the reduction possible for a given instruction if we are able > -/// to SROA an alloca. > -/// > -/// The reduction for this instruction is added to the SROAReduction output > -/// parameter. Returns false if this instruction is expected to defeat SROA in > -/// general. > -static bool countCodeReductionForSROAInst(Instruction *I, > - SmallVectorImpl &Worklist, > - unsigned &SROAReduction) { > - if (LoadInst *LI = dyn_cast(I)) { > - if (!LI->isSimple()) > - return false; > - SROAReduction += InlineConstants::InstrCost; > - return true; > + // Finally check for SROA candidates in comparisons. > + Value *SROAArg; > + DenseMap::iterator CostIt; > + if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) { > + if (isa(I.getOperand(1))) { > + accumulateSROACost(CostIt, InlineConstants::InstrCost); > + return true; > + } > + > + disableSROA(CostIt); > } > > - if (StoreInst *SI = dyn_cast(I)) { > - if (!SI->isSimple()) > - return false; > - SROAReduction += InlineConstants::InstrCost; > - return true; > + return false; > +} > + > +bool CallAnalyzer::visitSub(BinaryOperator &I) { > + // Try to handle a special case: we can fold computing the difference of two > + // constant-related pointers. > + Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); > + Value *LHSBase, *RHSBase; > + APInt LHSOffset, RHSOffset; > + llvm::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.lookup(LHS); > + if (LHSBase) { > + llvm::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.lookup(RHS); > + if (RHSBase && LHSBase == RHSBase) { > + // We have common bases, fold the subtract to a constant based on the > + // offsets. > + Constant *CLHS = ConstantInt::get(LHS->getContext(), LHSOffset); > + Constant *CRHS = ConstantInt::get(RHS->getContext(), RHSOffset); > + if (Constant *C = ConstantExpr::getSub(CLHS, CRHS)) { > + SimplifiedValues[&I] = C; > + ++NumConstantPtrDiffs; > + return true; > + } > + } > } > > - if (GetElementPtrInst *GEP = dyn_cast(I)) { > - // If the GEP has variable indices, we won't be able to do much with it. > - if (!GEP->hasAllConstantIndices()) > - return false; > - // A non-zero GEP will likely become a mask operation after SROA. > - if (GEP->hasAllZeroIndices()) > - SROAReduction += InlineConstants::InstrCost; > - Worklist.push_back(GEP); > + // Otherwise, fall back to the generic logic for simplifying and handling > + // instructions. > + return Base::visitSub(I); > +} > + > +bool CallAnalyzer::visitBinaryOperator(BinaryOperator &I) { > + Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); > + if (!isa(LHS)) > + if (Constant *SimpleLHS = SimplifiedValues.lookup(LHS)) > + LHS = SimpleLHS; > + if (!isa(RHS)) > + if (Constant *SimpleRHS = SimplifiedValues.lookup(RHS)) > + RHS = SimpleRHS; > + Value *SimpleV = SimplifyBinOp(I.getOpcode(), LHS, RHS, TD); > + if (Constant *C = dyn_cast_or_null(SimpleV)) { > + SimplifiedValues[&I] = C; > return true; > } > > - if (BitCastInst *BCI = dyn_cast(I)) { > - // Track pointer through bitcasts. > - Worklist.push_back(BCI); > - SROAReduction += InlineConstants::InstrCost; > - return true; > + // Disable any SROA on arguments to arbitrary, unsimplified binary operators. > + disableSROA(LHS); > + disableSROA(RHS); > + > + return false; > +} > + > +bool CallAnalyzer::visitLoad(LoadInst &I) { > + Value *SROAArg; > + DenseMap::iterator CostIt; > + if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) { > + if (I.isSimple()) { > + accumulateSROACost(CostIt, InlineConstants::InstrCost); > + return true; > + } > + > + disableSROA(CostIt); > } > > - // We just look for non-constant operands to ICmp instructions as those will > - // defeat SROA. The actual reduction for these happens even without SROA. > - if (ICmpInst *ICI = dyn_cast(I)) > - return isa(ICI->getOperand(1)); > - > - if (SelectInst *SI = dyn_cast(I)) { > - // SROA can handle a select of alloca iff all uses of the alloca are > - // loads, and dereferenceable. We assume it's dereferenceable since > - // we're told the input is an alloca. > - for (Value::use_iterator UI = SI->use_begin(), UE = SI->use_end(); > - UI != UE; ++UI) { > - LoadInst *LI = dyn_cast(*UI); > - if (LI == 0 || !LI->isSimple()) > - return false; > + return false; > +} > + > +bool CallAnalyzer::visitStore(StoreInst &I) { > + Value *SROAArg; > + DenseMap::iterator CostIt; > + if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) { > + if (I.isSimple()) { > + accumulateSROACost(CostIt, InlineConstants::InstrCost); > + return true; > } > - // We don't know whether we'll be deleting the rest of the chain of > - // instructions from the SelectInst on, because we don't know whether > - // the other side of the select is also an alloca or not. > - return true; > + > + disableSROA(CostIt); > + } > + > + return false; > +} > + > +bool CallAnalyzer::visitCallSite(CallSite CS) { > + if (CS.isCall() && cast(CS.getInstruction())->canReturnTwice() && > + !F.hasFnAttr(Attribute::ReturnsTwice)) { > + // This aborts the entire analysis. > + ExposesReturnsTwice = true; > + return false; > } > > - if (IntrinsicInst *II = dyn_cast(I)) { > + if (IntrinsicInst *II = dyn_cast(CS.getInstruction())) { > switch (II->getIntrinsicID()) { > default: > - return false; > + return Base::visitCallSite(CS); > + > + case Intrinsic::dbg_declare: > + case Intrinsic::dbg_value: > + case Intrinsic::invariant_start: > + case Intrinsic::invariant_end: > + case Intrinsic::lifetime_start: > + case Intrinsic::lifetime_end: > case Intrinsic::memset: > case Intrinsic::memcpy: > case Intrinsic::memmove: > - case Intrinsic::lifetime_start: > - case Intrinsic::lifetime_end: > - // SROA can usually chew through these intrinsics. > - SROAReduction += InlineConstants::InstrCost; > + case Intrinsic::objectsize: > + case Intrinsic::ptr_annotation: > + case Intrinsic::var_annotation: > + // SROA can usually chew through these intrinsics and they have no cost > + // so don't pay the price of analyzing them in detail. > return true; > } > } > > - // If there is some other strange instruction, we're not going to be > - // able to do much if we inline this. > + if (Function *F = CS.getCalledFunction()) { > + if (F == CS.getInstruction()->getParent()->getParent()) { > + // This flag will fully abort the analysis, so don't bother with anything > + // else. > + IsRecursive = true; > + return false; > + } > + > + if (!callIsSmall(F)) { > + // We account for the average 1 instruction per call argument setup > + // here. > + Cost += CS.arg_size() * InlineConstants::InstrCost; > + > + // Everything other than inline ASM will also have a significant cost > + // merely from making the call. > + if (!isa(CS.getCalledValue())) > + Cost += InlineConstants::CallPenalty; > + } > + > + return Base::visitCallSite(CS); > + } > + > + // Otherwise we're in a very special case -- an indirect function call. See > + // if we can be particularly clever about this. > + Value *Callee = CS.getCalledValue(); > + > + // First, pay the price of the argument setup. We account for the average > + // 1 instruction per call argument setup here. > + Cost += CS.arg_size() * InlineConstants::InstrCost; > + > + // Next, check if this happens to be an indirect function call to a known > + // function in this inline context. If not, we've done all we can. > + Function *F = dyn_cast_or_null(SimplifiedValues.lookup(Callee)); > + if (!F) > + return Base::visitCallSite(CS); > + > + // If we have a constant that we are calling as a function, we can peer > + // through it and see the function target. This happens not infrequently > + // during devirtualization and so we want to give it a hefty bonus for > + // inlining, but cap that bonus in the event that inlining wouldn't pan > + // out. Pretend to inline the function, with a custom threshold. > + CallAnalyzer CA(TD, *F, InlineConstants::IndirectCallThreshold); > + if (CA.analyzeCall(CS)) { > + // We were able to inline the indirect call! Subtract the cost from the > + // bonus we want to apply, but don't go below zero. > + Cost -= std::max(0, InlineConstants::IndirectCallThreshold - CA.getCost()); > + } > + > + return Base::visitCallSite(CS); > +} > + > +bool CallAnalyzer::visitInstruction(Instruction &I) { > + // We found something we don't understand or can't handle. Mark any SROA-able > + // values in the operand list as no longer viable. > + for (User::op_iterator OI = I.op_begin(), OE = I.op_end(); OI != OE; ++OI) > + disableSROA(*OI); > + > return false; > } > > -unsigned InlineCostAnalyzer::FunctionInfo::countCodeReductionForAlloca( > - const CodeMetrics &Metrics, Value *V) { > - if (!V->getType()->isPointerTy()) return 0; // Not a pointer > - unsigned Reduction = 0; > - unsigned SROAReduction = 0; > - bool CanSROAAlloca = true; > > - SmallVector Worklist; > - Worklist.push_back(V); > - do { > - Value *V = Worklist.pop_back_val(); > - for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); > - UI != E; ++UI){ > - Instruction *I = cast(*UI); > +/// \brief Analyze a basic block for its contribution to the inline cost. > +/// > +/// This method walks the analyzer over every instruction in the given basic > +/// block and accounts for their cost during inlining at this callsite. It > +/// aborts early if the threshold has been exceeded or an impossible to inline > +/// construct has been detected. It returns false if inlining is no longer > +/// viable, and true if inlining remains viable. > +bool CallAnalyzer::analyzeBlock(BasicBlock *BB) { > + for (BasicBlock::iterator I = BB->begin(), E = llvm::prior(BB->end()); > + I != E; ++I) { > + ++NumInstructions; > + if (isa(I) || I->getType()->isVectorTy()) > + ++NumVectorInstructions; > + > + // If the instruction simplified to a constant, there is no cost to this > + // instruction. Visit the instructions using our InstVisitor to account for > + // all of the per-instruction logic. The visit tree returns true if we > + // consumed the instruction in any way, and false if the instruction's base > + // cost should count against inlining. > + if (Base::visit(I)) > + ++NumInstructionsSimplified; > + else > + Cost += InlineConstants::InstrCost; > > - if (ICmpInst *ICI = dyn_cast(I)) > - Reduction += countCodeReductionForAllocaICmp(Metrics, ICI); > + // If the visit this instruction detected an uninlinable pattern, abort. > + if (IsRecursive || ExposesReturnsTwice || HasDynamicAlloca) > + return false; > > - if (CanSROAAlloca) > - CanSROAAlloca = countCodeReductionForSROAInst(I, Worklist, > - SROAReduction); > - } > - } while (!Worklist.empty()); > + if (NumVectorInstructions > NumInstructions/2) > + VectorBonus = FiftyPercentVectorBonus; > + else if (NumVectorInstructions > NumInstructions/10) > + VectorBonus = TenPercentVectorBonus; > + else > + VectorBonus = 0; > + > + // Check if we've past the threshold so we don't spin in huge basic > + // blocks that will never inline. > + if (!AlwaysInline && Cost > (Threshold + VectorBonus)) > + return false; > + } > > - return Reduction + (CanSROAAlloca ? SROAReduction : 0); > + return true; > } > > -void InlineCostAnalyzer::FunctionInfo::countCodeReductionForPointerPair( > - const CodeMetrics &Metrics, DenseMap &PointerArgs, > - Value *V, unsigned ArgIdx) { > - SmallVector Worklist; > - Worklist.push_back(V); > +/// \brief Compute the base pointer and cumulative constant offsets for V. > +/// > +/// This strips all constant offsets off of V, leaving it the base pointer, and > +/// accumulates the total constant offset applied in the returned constant. It > +/// returns 0 if V is not a pointer, and returns the constant '0' if there are > +/// no constant offsets applied. > +ConstantInt *CallAnalyzer::stripAndComputeInBoundsConstantOffsets(Value *&V) { > + if (!TD || !V->getType()->isPointerTy()) > + return 0; > + > + unsigned IntPtrWidth = TD->getPointerSizeInBits(); > + APInt Offset = APInt::getNullValue(IntPtrWidth); > + > + // Even though we don't look through PHI nodes, we could be called on an > + // instruction in an unreachable block, which may be on a cycle. > + SmallPtrSet Visited; > + Visited.insert(V); > do { > - Value *V = Worklist.pop_back_val(); > - for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); > - UI != E; ++UI){ > - Instruction *I = cast(*UI); > - > - if (GetElementPtrInst *GEP = dyn_cast(I)) { > - // If the GEP has variable indices, we won't be able to do much with it. > - if (!GEP->hasAllConstantIndices()) > - continue; > - // Unless the GEP is in-bounds, some comparisons will be non-constant. > - // Fortunately, the real-world cases where this occurs uses in-bounds > - // GEPs, and so we restrict the optimization to them here. > - if (!GEP->isInBounds()) > - continue; > + if (GEPOperator *GEP = dyn_cast(V)) { > + if (!GEP->isInBounds() || !accumulateGEPOffset(*GEP, Offset)) > + return 0; > + V = GEP->getPointerOperand(); > + } else if (Operator::getOpcode(V) == Instruction::BitCast) { > + V = cast(V)->getOperand(0); > + } else if (GlobalAlias *GA = dyn_cast(V)) { > + if (GA->mayBeOverridden()) > + break; > + V = GA->getAliasee(); > + } else { > + break; > + } > + assert(V->getType()->isPointerTy() && "Unexpected operand type!"); > + } while (Visited.insert(V)); > > - // Constant indices just change the constant offset. Add the resulting > - // value both to our worklist for this argument, and to the set of > - // viable paired values with future arguments. > - PointerArgs[GEP] = ArgIdx; > - Worklist.push_back(GEP); > - continue; > - } > + Type *IntPtrTy = TD->getIntPtrType(V->getContext()); > + return cast(ConstantInt::get(IntPtrTy, Offset)); > +} > > - // Track pointer through casts. Even when the result is not a pointer, it > - // remains a constant relative to constants derived from other constant > - // pointers. > - if (CastInst *CI = dyn_cast(I)) { > - PointerArgs[CI] = ArgIdx; > - Worklist.push_back(CI); > - continue; > - } > +/// \brief Analyze a call site for potential inlining. > +/// > +/// Returns true if inlining this call is viable, and false if it is not > +/// viable. It computes the cost and adjusts the threshold based on numerous > +/// factors and heuristics. If this method returns false but the computed cost > +/// is below the computed threshold, then inlining was forcibly disabled by > +/// some artifact of the rountine. > +bool CallAnalyzer::analyzeCall(CallSite CS) { > + // Track whether the post-inlining function would have more than one basic > + // block. A single basic block is often intended for inlining. Balloon the > + // threshold by 50% until we pass the single-BB phase. > + bool SingleBB = true; > + int SingleBBBonus = Threshold / 2; > + Threshold += SingleBBBonus; > + > + // Unless we are always-inlining, perform some tweaks to the cost and > + // threshold based on the direct callsite information. > + if (!AlwaysInline) { > + // We want to more aggressively inline vector-dense kernels, so up the > + // threshold, and we'll lower it if the % of vector instructions gets too > + // low. > + assert(NumInstructions == 0); > + assert(NumVectorInstructions == 0); > + FiftyPercentVectorBonus = Threshold; > + TenPercentVectorBonus = Threshold / 2; > + > + // Subtract off one instruction per call argument as those will be free after > + // inlining. > + Cost -= CS.arg_size() * InlineConstants::InstrCost; > + > + // If there is only one call of the function, and it has internal linkage, > + // the cost of inlining it drops dramatically. > + if (F.hasLocalLinkage() && F.hasOneUse() && &F == CS.getCalledFunction()) > + Cost += InlineConstants::LastCallToStaticBonus; > + > + // If the instruction after the call, or if the normal destination of the > + // invoke is an unreachable instruction, the function is noreturn. As such, > + // there is little point in inlining this unless there is literally zero cost. > + if (InvokeInst *II = dyn_cast(CS.getInstruction())) { > + if (isa(II->getNormalDest()->begin())) > + Threshold = 1; > + } else if (isa(++BasicBlock::iterator(CS.getInstruction()))) > + Threshold = 1; > + > + // If this function uses the coldcc calling convention, prefer not to inline > + // it. > + if (F.getCallingConv() == CallingConv::Cold) > + Cost += InlineConstants::ColdccPenalty; > > - // There are two instructions which produce a strict constant value when > - // applied to two related pointer values. Ignore everything else. > - if (!isa(I) && I->getOpcode() != Instruction::Sub) > - continue; > - assert(I->getNumOperands() == 2); > + // Check if we're done. This can happen due to bonuses and penalties. > + if (Cost > Threshold) > + return false; > + } > > - // Ensure that the two operands are in our set of potentially paired > - // pointers (or are derived from them). > - Value *OtherArg = I->getOperand(0); > - if (OtherArg == V) > - OtherArg = I->getOperand(1); > - DenseMap::const_iterator ArgIt > - = PointerArgs.find(OtherArg); > - if (ArgIt == PointerArgs.end()) > - continue; > - std::pair ArgPair(ArgIt->second, ArgIdx); > - if (ArgPair.first > ArgPair.second) > - std::swap(ArgPair.first, ArgPair.second); > - > - PointerArgPairWeights[ArgPair] > - += countCodeReductionForConstant(Metrics, I); > - } > - } while (!Worklist.empty()); > -} > - > -/// analyzeFunction - Fill in the current structure with information gleaned > -/// from the specified function. > -void InlineCostAnalyzer::FunctionInfo::analyzeFunction(Function *F, > - const TargetData *TD) { > - Metrics.analyzeFunction(F, TD); > - > - // A function with exactly one return has it removed during the inlining > - // process (see InlineFunction), so don't count it. > - // FIXME: This knowledge should really be encoded outside of FunctionInfo. > - if (Metrics.NumRets==1) > - --Metrics.NumInsts; > - > - ArgumentWeights.reserve(F->arg_size()); > - DenseMap PointerArgs; > - unsigned ArgIdx = 0; > - for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; > - ++I, ++ArgIdx) { > - // Count how much code can be eliminated if one of the arguments is > - // a constant or an alloca. > - ArgumentWeights.push_back(ArgInfo(countCodeReductionForConstant(Metrics, I), > - countCodeReductionForAlloca(Metrics, I))); > - > - // If the argument is a pointer, also check for pairs of pointers where > - // knowing a fixed offset between them allows simplification. This pattern > - // arises mostly due to STL algorithm patterns where pointers are used as > - // random access iterators. > - if (!I->getType()->isPointerTy()) > - continue; > - PointerArgs[I] = ArgIdx; > - countCodeReductionForPointerPair(Metrics, PointerArgs, I, ArgIdx); > + if (F.empty()) > + return true; > + > + // Track whether we've seen a return instruction. The first return > + // instruction is free, as at least one will usually disappear in inlining. > + bool HasReturn = false; > + > + // Populate our simplified values by mapping from function arguments to call > + // arguments with known important simplifications. > + CallSite::arg_iterator CAI = CS.arg_begin(); > + for (Function::arg_iterator FAI = F.arg_begin(), FAE = F.arg_end(); > + FAI != FAE; ++FAI, ++CAI) { > + assert(CAI != CS.arg_end()); > + if (Constant *C = dyn_cast(CAI)) > + SimplifiedValues[FAI] = C; > + > + Value *PtrArg = *CAI; > + if (ConstantInt *C = stripAndComputeInBoundsConstantOffsets(PtrArg)) { > + ConstantOffsetPtrs[FAI] = std::make_pair(PtrArg, C->getValue()); > + > + // We can SROA any pointer arguments derived from alloca instructions. > + if (isa(PtrArg)) { > + SROAArgValues[FAI] = PtrArg; > + SROAArgCosts[PtrArg] = 0; > + } > + } > } > -} > + NumConstantArgs = SimplifiedValues.size(); > + NumConstantOffsetPtrArgs = ConstantOffsetPtrs.size(); > + NumAllocaArgs = SROAArgValues.size(); > + > + // The worklist of live basic blocks in the callee *after* inlining. We avoid > + // adding basic blocks of the callee which can be proven to be dead for this > + // particular call site in order to get more accurate cost estimates. This > + // requires a somewhat heavyweight iteration pattern: we need to walk the > + // basic blocks in a breadth-first order as we insert live successors. To > + // accomplish this, prioritizing for small iterations because we exit after > + // crossing our threshold, we use a small-size optimized SetVector. > + typedef SetVector, > + SmallPtrSet > BBSetVector; > + BBSetVector BBWorklist; > + BBWorklist.insert(&F.getEntryBlock()); > + // Note that we *must not* cache the size, this loop grows the worklist. > + for (unsigned Idx = 0; Idx != BBWorklist.size(); ++Idx) { > + // Bail out the moment we cross the threshold. This means we'll under-count > + // the cost, but only when undercounting doesn't matter. > + if (!AlwaysInline && Cost > (Threshold + VectorBonus)) > + break; > > -/// NeverInline - returns true if the function should never be inlined into > -/// any caller > -bool InlineCostAnalyzer::FunctionInfo::NeverInline() { > - return (Metrics.exposesReturnsTwice || Metrics.isRecursive || > - Metrics.containsIndirectBr); > -} > - > -// ConstantFunctionBonus - Figure out how much of a bonus we can get for > -// possibly devirtualizing a function. We'll subtract the size of the function > -// we may wish to inline from the indirect call bonus providing a limit on > -// growth. Leave an upper limit of 0 for the bonus - we don't want to penalize > -// inlining because we decide we don't want to give a bonus for > -// devirtualizing. > -int InlineCostAnalyzer::ConstantFunctionBonus(CallSite CS, Constant *C) { > - > - // This could just be NULL. > - if (!C) return 0; > - > - Function *F = dyn_cast(C); > - if (!F) return 0; > - > - int Bonus = InlineConstants::IndirectCallBonus + getInlineSize(CS, F); > - return (Bonus > 0) ? 0 : Bonus; > -} > - > -// CountBonusForConstant - Figure out an approximation for how much per-call > -// performance boost we can expect if the specified value is constant. > -int InlineCostAnalyzer::CountBonusForConstant(Value *V, Constant *C) { > - unsigned Bonus = 0; > - for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;++UI){ > - User *U = *UI; > - if (CallInst *CI = dyn_cast(U)) { > - // Turning an indirect call into a direct call is a BIG win > - if (CI->getCalledValue() == V) > - Bonus += ConstantFunctionBonus(CallSite(CI), C); > - } else if (InvokeInst *II = dyn_cast(U)) { > - // Turning an indirect call into a direct call is a BIG win > - if (II->getCalledValue() == V) > - Bonus += ConstantFunctionBonus(CallSite(II), C); > - } > - // FIXME: Eliminating conditional branches and switches should > - // also yield a per-call performance boost. > - else { > - // Figure out the bonuses that wll accrue due to simple constant > - // propagation. > - Instruction &Inst = cast(*U); > - > - // We can't constant propagate instructions which have effects or > - // read memory. > - // > - // FIXME: It would be nice to capture the fact that a load from a > - // pointer-to-constant-global is actually a *really* good thing to zap. > - // Unfortunately, we don't know the pointer that may get propagated here, > - // so we can't make this decision. > - if (Inst.mayReadFromMemory() || Inst.mayHaveSideEffects() || > - isa(Inst)) > - continue; > + BasicBlock *BB = BBWorklist[Idx]; > + if (BB->empty()) > + continue; > > - bool AllOperandsConstant = true; > - for (unsigned i = 0, e = Inst.getNumOperands(); i != e; ++i) > - if (!isa(Inst.getOperand(i)) && Inst.getOperand(i) != V) { > - AllOperandsConstant = false; > - break; > + // Handle the terminator cost here where we can track returns and other > + // function-wide constructs. > + TerminatorInst *TI = BB->getTerminator(); > + > + // We never want to inline functions that contain an indirectbr. This is > + // incorrect because all the blockaddress's (in static global initializers > + // for example) would be referring to the original function, and this indirect > + // jump would jump from the inlined copy of the function into the original > + // function which is extremely undefined behavior. > + // FIXME: This logic isn't really right; we can safely inline functions > + // with indirectbr's as long as no other function or global references the > + // blockaddress of a block within the current function. And as a QOI issue, > + // if someone is using a blockaddress without an indirectbr, and that > + // reference somehow ends up in another function or global, we probably > + // don't want to inline this function. > + if (isa(TI)) > + return false; > + > + if (!HasReturn && isa(TI)) > + HasReturn = true; > + else > + Cost += InlineConstants::InstrCost; > + > + // Analyze the cost of this block. If we blow through the threshold, this > + // returns false, and we can bail on out. > + if (!analyzeBlock(BB)) { > + if (IsRecursive || ExposesReturnsTwice || HasDynamicAlloca) > + return false; > + break; > + } > + > + // Add in the live successors by first checking whether we have terminator > + // that may be simplified based on the values simplified by this call. > + if (BranchInst *BI = dyn_cast(TI)) { > + if (BI->isConditional()) { > + Value *Cond = BI->getCondition(); > + if (ConstantInt *SimpleCond > + = dyn_cast_or_null(SimplifiedValues.lookup(Cond))) { > + BBWorklist.insert(BI->getSuccessor(SimpleCond->isZero() ? 1 : 0)); > + continue; > } > + } > + } else if (SwitchInst *SI = dyn_cast(TI)) { > + Value *Cond = SI->getCondition(); > + if (ConstantInt *SimpleCond > + = dyn_cast_or_null(SimplifiedValues.lookup(Cond))) { > + BBWorklist.insert(SI->findCaseValue(SimpleCond).getCaseSuccessor()); > + continue; > + } > + } > > - if (AllOperandsConstant) > - Bonus += CountBonusForConstant(&Inst); > + // If we're unable to select a particular successor, just count all of > + // them. > + for (unsigned TIdx = 0, TSize = TI->getNumSuccessors(); TIdx != TSize; ++TIdx) > + BBWorklist.insert(TI->getSuccessor(TIdx)); > + > + // If we had any successors at this point, than post-inlining is likely to > + // have them as well. Note that we assume any basic blocks which existed > + // due to branches or switches which folded above will also fold after > + // inlining. > + if (SingleBB && TI->getNumSuccessors() > 1) { > + // Take off the bonus we applied to the threshold. > + Threshold -= SingleBBBonus; > + SingleBB = false; > } > } > > - return Bonus; > -} > + Threshold += VectorBonus; > > -int InlineCostAnalyzer::getInlineSize(CallSite CS, Function *Callee) { > - // Get information about the callee. > - FunctionInfo *CalleeFI = &CachedFunctionInfo[Callee]; > - > - // If we haven't calculated this information yet, do so now. > - if (CalleeFI->Metrics.NumBlocks == 0) > - CalleeFI->analyzeFunction(Callee, TD); > - > - // InlineCost - This value measures how good of an inline candidate this call > - // site is to inline. A lower inline cost make is more likely for the call to > - // be inlined. This value may go negative. > - // > - int InlineCost = 0; > - > - // Compute any size reductions we can expect due to arguments being passed into > - // the function. > - // > - unsigned ArgNo = 0; > - CallSite::arg_iterator I = CS.arg_begin(); > - for (Function::arg_iterator FI = Callee->arg_begin(), FE = Callee->arg_end(); > - FI != FE; ++I, ++FI, ++ArgNo) { > - > - // If an alloca is passed in, inlining this function is likely to allow > - // significant future optimization possibilities (like scalar promotion, and > - // scalarization), so encourage the inlining of the function. > - // > - if (isa(I)) > - InlineCost -= CalleeFI->ArgumentWeights[ArgNo].AllocaWeight; > - > - // If this is a constant being passed into the function, use the argument > - // weights calculated for the callee to determine how much will be folded > - // away with this information. > - else if (isa(I)) > - InlineCost -= CalleeFI->ArgumentWeights[ArgNo].ConstantWeight; > - } > - > - const DenseMap, unsigned> &ArgPairWeights > - = CalleeFI->PointerArgPairWeights; > - for (DenseMap, unsigned>::const_iterator I > - = ArgPairWeights.begin(), E = ArgPairWeights.end(); > - I != E; ++I) > - if (CS.getArgument(I->first.first)->stripInBoundsConstantOffsets() == > - CS.getArgument(I->first.second)->stripInBoundsConstantOffsets()) > - InlineCost -= I->second; > - > - // Each argument passed in has a cost at both the caller and the callee > - // sides. Measurements show that each argument costs about the same as an > - // instruction. > - InlineCost -= (CS.arg_size() * InlineConstants::InstrCost); > - > - // Now that we have considered all of the factors that make the call site more > - // likely to be inlined, look at factors that make us not want to inline it. > - > - // Calls usually take a long time, so they make the inlining gain smaller. > - InlineCost += CalleeFI->Metrics.NumCalls * InlineConstants::CallPenalty; > - > - // Look at the size of the callee. Each instruction counts as 5. > - InlineCost += CalleeFI->Metrics.NumInsts * InlineConstants::InstrCost; > - > - return InlineCost; > -} > - > -int InlineCostAnalyzer::getInlineBonuses(CallSite CS, Function *Callee) { > - // Get information about the callee. > - FunctionInfo *CalleeFI = &CachedFunctionInfo[Callee]; > - > - // If we haven't calculated this information yet, do so now. > - if (CalleeFI->Metrics.NumBlocks == 0) > - CalleeFI->analyzeFunction(Callee, TD); > - > - bool isDirectCall = CS.getCalledFunction() == Callee; > - Instruction *TheCall = CS.getInstruction(); > - int Bonus = 0; > - > - // If there is only one call of the function, and it has internal linkage, > - // make it almost guaranteed to be inlined. > - // > - if (Callee->hasLocalLinkage() && Callee->hasOneUse() && isDirectCall) > - Bonus += InlineConstants::LastCallToStaticBonus; > - > - // If the instruction after the call, or if the normal destination of the > - // invoke is an unreachable instruction, the function is noreturn. As such, > - // there is little point in inlining this. > - if (InvokeInst *II = dyn_cast(TheCall)) { > - if (isa(II->getNormalDest()->begin())) > - Bonus += InlineConstants::NoreturnPenalty; > - } else if (isa(++BasicBlock::iterator(TheCall))) > - Bonus += InlineConstants::NoreturnPenalty; > - > - // If this function uses the coldcc calling convention, prefer not to inline > - // it. > - if (Callee->getCallingConv() == CallingConv::Cold) > - Bonus += InlineConstants::ColdccPenalty; > - > - // Add to the inline quality for properties that make the call valuable to > - // inline. This includes factors that indicate that the result of inlining > - // the function will be optimizable. Currently this just looks at arguments > - // passed into the function. > - // > - CallSite::arg_iterator I = CS.arg_begin(); > - for (Function::arg_iterator FI = Callee->arg_begin(), FE = Callee->arg_end(); > - FI != FE; ++I, ++FI) > - // Compute any constant bonus due to inlining we want to give here. > - if (isa(I)) > - Bonus += CountBonusForConstant(FI, cast(I)); > - > - return Bonus; > + return AlwaysInline || Cost < Threshold; > } > > -// getInlineCost - The heuristic used to determine if we should inline the > -// function call or not. > -// > -InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS) { > - return getInlineCost(CS, CS.getCalledFunction()); > +/// \brief Dump stats about this call's analysis. > +void CallAnalyzer::dump() { > +#define DEBUG_PRINT_STAT(x) llvm::dbgs() << " " #x ": " << x << "\n" > + DEBUG_PRINT_STAT(NumConstantArgs); > + DEBUG_PRINT_STAT(NumConstantOffsetPtrArgs); > + DEBUG_PRINT_STAT(NumAllocaArgs); > + DEBUG_PRINT_STAT(NumConstantPtrCmps); > + DEBUG_PRINT_STAT(NumConstantPtrDiffs); > + DEBUG_PRINT_STAT(NumInstructionsSimplified); > + DEBUG_PRINT_STAT(SROACostSavings); > + DEBUG_PRINT_STAT(SROACostSavingsLost); > +#undef DEBUG_PRINT_STAT > } > > -InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS, Function *Callee) { > - Instruction *TheCall = CS.getInstruction(); > - Function *Caller = TheCall->getParent()->getParent(); > +InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS, int Threshold) { > + Function *Callee = CS.getCalledFunction(); > > // Don't inline functions which can be redefined at link-time to mean > // something else. Don't inline functions marked noinline or call sites > // marked noinline. > - if (Callee->mayBeOverridden() || Callee->hasFnAttr(Attribute::NoInline) || > - CS.isNoInline()) > + if (!Callee || Callee->mayBeOverridden() || > + Callee->hasFnAttr(Attribute::NoInline) || CS.isNoInline()) > return llvm::InlineCost::getNever(); > > - // Get information about the callee. > - FunctionInfo *CalleeFI = &CachedFunctionInfo[Callee]; > + DEBUG(llvm::dbgs() << " Analyzing call of " << Callee->getName() << "...\n"); > > - // If we haven't calculated this information yet, do so now. > - if (CalleeFI->Metrics.NumBlocks == 0) > - CalleeFI->analyzeFunction(Callee, TD); > + CallAnalyzer CA(TD, *Callee, Threshold); > + bool ShouldInline = CA.analyzeCall(CS); > > - // If we should never inline this, return a huge cost. > - if (CalleeFI->NeverInline()) > - return InlineCost::getNever(); > + DEBUG(CA.dump()); > > - // FIXME: It would be nice to kill off CalleeFI->NeverInline. Then we > - // could move this up and avoid computing the FunctionInfo for > - // things we are going to just return always inline for. This > - // requires handling setjmp somewhere else, however. > - if (!Callee->isDeclaration() && Callee->hasFnAttr(Attribute::AlwaysInline)) > + // Check if there was a reason to force inlining or no inlining. > + if (!ShouldInline && CA.getCost() < CA.getThreshold()) > + return InlineCost::getNever(); > + if (ShouldInline && CA.getCost() >= CA.getThreshold()) > return InlineCost::getAlways(); > > - if (CalleeFI->Metrics.usesDynamicAlloca) { > - // Get information about the caller. > - FunctionInfo &CallerFI = CachedFunctionInfo[Caller]; > - > - // If we haven't calculated this information yet, do so now. > - if (CallerFI.Metrics.NumBlocks == 0) { > - CallerFI.analyzeFunction(Caller, TD); > - > - // Recompute the CalleeFI pointer, getting Caller could have invalidated > - // it. > - CalleeFI = &CachedFunctionInfo[Callee]; > - } > - > - // Don't inline a callee with dynamic alloca into a caller without them. > - // Functions containing dynamic alloca's are inefficient in various ways; > - // don't create more inefficiency. > - if (!CallerFI.Metrics.usesDynamicAlloca) > - return InlineCost::getNever(); > - } > - > - // InlineCost - This value measures how good of an inline candidate this call > - // site is to inline. A lower inline cost make is more likely for the call to > - // be inlined. This value may go negative due to the fact that bonuses > - // are negative numbers. > - // > - int InlineCost = getInlineSize(CS, Callee) + getInlineBonuses(CS, Callee); > - return llvm::InlineCost::get(InlineCost); > -} > - > -// getInlineFudgeFactor - Return a > 1.0 factor if the inliner should use a > -// higher threshold to determine if the function call should be inlined. > -float InlineCostAnalyzer::getInlineFudgeFactor(CallSite CS) { > - Function *Callee = CS.getCalledFunction(); > - > - // Get information about the callee. > - FunctionInfo &CalleeFI = CachedFunctionInfo[Callee]; > - > - // If we haven't calculated this information yet, do so now. > - if (CalleeFI.Metrics.NumBlocks == 0) > - CalleeFI.analyzeFunction(Callee, TD); > - > - float Factor = 1.0f; > - // Single BB functions are often written to be inlined. > - if (CalleeFI.Metrics.NumBlocks == 1) > - Factor += 0.5f; > - > - // Be more aggressive if the function contains a good chunk (if it mades up > - // at least 10% of the instructions) of vector instructions. > - if (CalleeFI.Metrics.NumVectorInsts > CalleeFI.Metrics.NumInsts/2) > - Factor += 2.0f; > - else if (CalleeFI.Metrics.NumVectorInsts > CalleeFI.Metrics.NumInsts/10) > - Factor += 1.5f; > - return Factor; > + return llvm::InlineCost::get(CA.getCost(), CA.getThreshold()); > } > > /// growCachedCostInfo - update the cached cost info for Caller after Callee has > /// been inlined. > void > InlineCostAnalyzer::growCachedCostInfo(Function *Caller, Function *Callee) { > - CodeMetrics &CallerMetrics = CachedFunctionInfo[Caller].Metrics; > - > - // For small functions we prefer to recalculate the cost for better accuracy. > - if (CallerMetrics.NumBlocks < 10 && CallerMetrics.NumInsts < 1000) { > - resetCachedCostInfo(Caller); > - return; > - } > - > - // For large functions, we can save a lot of computation time by skipping > - // recalculations. > - if (CallerMetrics.NumCalls > 0) > - --CallerMetrics.NumCalls; > - > - if (Callee == 0) return; > - > - CodeMetrics &CalleeMetrics = CachedFunctionInfo[Callee].Metrics; > - > - // If we don't have metrics for the callee, don't recalculate them just to > - // update an approximation in the caller. Instead, just recalculate the > - // caller info from scratch. > - if (CalleeMetrics.NumBlocks == 0) { > - resetCachedCostInfo(Caller); > - return; > - } > - > - // Since CalleeMetrics were already calculated, we know that the CallerMetrics > - // reference isn't invalidated: both were in the DenseMap. > - CallerMetrics.usesDynamicAlloca |= CalleeMetrics.usesDynamicAlloca; > - > - // FIXME: If any of these three are true for the callee, the callee was > - // not inlined into the caller, so I think they're redundant here. > - CallerMetrics.exposesReturnsTwice |= CalleeMetrics.exposesReturnsTwice; > - CallerMetrics.isRecursive |= CalleeMetrics.isRecursive; > - CallerMetrics.containsIndirectBr |= CalleeMetrics.containsIndirectBr; > - > - CallerMetrics.NumInsts += CalleeMetrics.NumInsts; > - CallerMetrics.NumBlocks += CalleeMetrics.NumBlocks; > - CallerMetrics.NumCalls += CalleeMetrics.NumCalls; > - CallerMetrics.NumVectorInsts += CalleeMetrics.NumVectorInsts; > - CallerMetrics.NumRets += CalleeMetrics.NumRets; > - > - // analyzeBasicBlock counts each function argument as an inst. > - if (CallerMetrics.NumInsts >= Callee->arg_size()) > - CallerMetrics.NumInsts -= Callee->arg_size(); > - else > - CallerMetrics.NumInsts = 0; > - > - // We are not updating the argument weights. We have already determined that > - // Caller is a fairly large function, so we accept the loss of precision. > } > > /// clear - empty the cache of inline costs > void InlineCostAnalyzer::clear() { > - CachedFunctionInfo.clear(); > } > > Modified: llvm/trunk/lib/Transforms/IPO/InlineAlways.cpp > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/IPO/InlineAlways.cpp?rev=153812&r1=153811&r2=153812&view=diff > ============================================================================== > --- llvm/trunk/lib/Transforms/IPO/InlineAlways.cpp (original) > +++ llvm/trunk/lib/Transforms/IPO/InlineAlways.cpp Sat Mar 31 07:42:41 2012 > @@ -59,10 +59,7 @@ > // We still have to check the inline cost in case there are reasons to > // not inline which trump the always-inline attribute such as setjmp and > // indirectbr. > - return CA.getInlineCost(CS); > - } > - float getInlineFudgeFactor(CallSite CS) { > - return CA.getInlineFudgeFactor(CS); > + return CA.getInlineCost(CS, getInlineThreshold(CS)); > } > void resetCachedCostInfo(Function *Caller) { > CA.resetCachedCostInfo(Caller); > > Modified: llvm/trunk/lib/Transforms/IPO/InlineSimple.cpp > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/IPO/InlineSimple.cpp?rev=153812&r1=153811&r2=153812&view=diff > ============================================================================== > --- llvm/trunk/lib/Transforms/IPO/InlineSimple.cpp (original) > +++ llvm/trunk/lib/Transforms/IPO/InlineSimple.cpp Sat Mar 31 07:42:41 2012 > @@ -40,10 +40,7 @@ > } > static char ID; // Pass identification, replacement for typeid > InlineCost getInlineCost(CallSite CS) { > - return CA.getInlineCost(CS); > - } > - float getInlineFudgeFactor(CallSite CS) { > - return CA.getInlineFudgeFactor(CS); > + return CA.getInlineCost(CS, getInlineThreshold(CS)); > } > void resetCachedCostInfo(Function *Caller) { > CA.resetCachedCostInfo(Caller); > > Modified: llvm/trunk/lib/Transforms/IPO/Inliner.cpp > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/IPO/Inliner.cpp?rev=153812&r1=153811&r2=153812&view=diff > ============================================================================== > --- llvm/trunk/lib/Transforms/IPO/Inliner.cpp (original) > +++ llvm/trunk/lib/Transforms/IPO/Inliner.cpp Sat Mar 31 07:42:41 2012 > @@ -231,14 +231,10 @@ > return false; > } > > - int Cost = IC.getValue(); > Function *Caller = CS.getCaller(); > - int CurrentThreshold = getInlineThreshold(CS); > - float FudgeFactor = getInlineFudgeFactor(CS); > - int AdjThreshold = (int)(CurrentThreshold * FudgeFactor); > - if (Cost >= AdjThreshold) { > - DEBUG(dbgs() << " NOT Inlining: cost=" << Cost > - << ", thres=" << AdjThreshold > + if (!IC) { > + DEBUG(dbgs() << " NOT Inlining: cost=" << IC.getCost() > + << ", thres=" << (IC.getCostDelta() + IC.getCost()) > << ", Call: " << *CS.getInstruction() << "\n"); > return false; > } > @@ -255,10 +251,15 @@ > // are used. Thus we will always have the opportunity to make local inlining > // decisions. Importantly the linkonce-ODR linkage covers inline functions > // and templates in C++. > + // > + // FIXME: All of this logic should be sunk into getInlineCost. It relies on > + // the internal implementation of the inline cost metrics rather than > + // treating them as truly abstract units etc. > if (Caller->hasLocalLinkage() || > Caller->getLinkage() == GlobalValue::LinkOnceODRLinkage) { > int TotalSecondaryCost = 0; > - bool outerCallsFound = false; > + // The candidate cost to be imposed upon the current function. > + int CandidateCost = IC.getCost() - (InlineConstants::CallPenalty + 1); > // This bool tracks what happens if we do NOT inline C into B. > bool callerWillBeRemoved = Caller->hasLocalLinkage(); > // This bool tracks what happens if we DO inline C into B. > @@ -276,26 +277,19 @@ > } > > InlineCost IC2 = getInlineCost(CS2); > - if (IC2.isNever()) > + if (!IC2) { > callerWillBeRemoved = false; > - if (IC2.isAlways() || IC2.isNever()) > + continue; > + } > + if (IC2.isAlways()) > continue; > > - outerCallsFound = true; > - int Cost2 = IC2.getValue(); > - int CurrentThreshold2 = getInlineThreshold(CS2); > - float FudgeFactor2 = getInlineFudgeFactor(CS2); > - > - if (Cost2 >= (int)(CurrentThreshold2 * FudgeFactor2)) > - callerWillBeRemoved = false; > - > - // See if we have this case. We subtract off the penalty > - // for the call instruction, which we would be deleting. > - if (Cost2 < (int)(CurrentThreshold2 * FudgeFactor2) && > - Cost2 + Cost - (InlineConstants::CallPenalty + 1) >= > - (int)(CurrentThreshold2 * FudgeFactor2)) { > + // See if inlining or original callsite would erase the cost delta of > + // this callsite. We subtract off the penalty for the call instruction, > + // which we would be deleting. > + if (IC2.getCostDelta() <= CandidateCost) { > inliningPreventsSomeOuterInline = true; > - TotalSecondaryCost += Cost2; > + TotalSecondaryCost += IC2.getCost(); > } > } > // If all outer calls to Caller would get inlined, the cost for the last > @@ -305,17 +299,16 @@ > if (callerWillBeRemoved && Caller->use_begin() != Caller->use_end()) > TotalSecondaryCost += InlineConstants::LastCallToStaticBonus; > > - if (outerCallsFound && inliningPreventsSomeOuterInline && > - TotalSecondaryCost < Cost) { > - DEBUG(dbgs() << " NOT Inlining: " << *CS.getInstruction() << > - " Cost = " << Cost << > + if (inliningPreventsSomeOuterInline && TotalSecondaryCost < IC.getCost()) { > + DEBUG(dbgs() << " NOT Inlining: " << *CS.getInstruction() << > + " Cost = " << IC.getCost() << > ", outer Cost = " << TotalSecondaryCost << '\n'); > return false; > } > } > > - DEBUG(dbgs() << " Inlining: cost=" << Cost > - << ", thres=" << AdjThreshold > + DEBUG(dbgs() << " Inlining: cost=" << IC.getCost() > + << ", thres=" << (IC.getCostDelta() + IC.getCost()) > << ", Call: " << *CS.getInstruction() << '\n'); > return true; > } > > Modified: llvm/trunk/test/Transforms/Inline/alloca-bonus.ll > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/Inline/alloca-bonus.ll?rev=153812&r1=153811&r2=153812&view=diff > ============================================================================== > --- llvm/trunk/test/Transforms/Inline/alloca-bonus.ll (original) > +++ llvm/trunk/test/Transforms/Inline/alloca-bonus.ll Sat Mar 31 07:42:41 2012 > @@ -1,5 +1,7 @@ > ; RUN: opt -inline < %s -S -o - -inline-threshold=8 | FileCheck %s > > +target datalayout = "p:32:32" > + > declare void @llvm.lifetime.start(i64 %size, i8* nocapture %ptr) > > @glbl = external global i32 > @@ -15,8 +17,8 @@ > define void @inner1(i32 *%ptr) { > %A = load i32* %ptr > store i32 0, i32* %ptr > - %C = getelementptr i32* %ptr, i32 0 > - %D = getelementptr i32* %ptr, i32 1 > + %C = getelementptr inbounds i32* %ptr, i32 0 > + %D = getelementptr inbounds i32* %ptr, i32 1 > %E = bitcast i32* %ptr to i8* > %F = select i1 false, i32* %ptr, i32* @glbl > call void @llvm.lifetime.start(i64 0, i8* %E) > @@ -35,8 +37,8 @@ > define void @inner2(i32 *%ptr) { > %A = load i32* %ptr > store i32 0, i32* %ptr > - %C = getelementptr i32* %ptr, i32 0 > - %D = getelementptr i32* %ptr, i32 %A > + %C = getelementptr inbounds i32* %ptr, i32 0 > + %D = getelementptr inbounds i32* %ptr, i32 %A > %E = bitcast i32* %ptr to i8* > %F = select i1 false, i32* %ptr, i32* @glbl > call void @llvm.lifetime.start(i64 0, i8* %E) > @@ -93,7 +95,7 @@ > ; %B poisons this call, scalar-repl can't handle that instruction. However, we > ; still want to detect that the icmp and branch *can* be handled. > define void @inner4(i32 *%ptr, i32 %A) { > - %B = getelementptr i32* %ptr, i32 %A > + %B = getelementptr inbounds i32* %ptr, i32 %A > %C = icmp eq i32* %ptr, null > br i1 %C, label %bb.true, label %bb.false > bb.true: > @@ -122,3 +124,32 @@ > bb.false: > ret void > } > + > +define void @outer5() { > +; CHECK: @outer5 > +; CHECK-NOT: call void @inner5 > + %ptr = alloca i32 > + call void @inner5(i1 false, i32* %ptr) > + ret void > +} > + > +; %D poisons this call, scalar-repl can't handle that instruction. However, if > +; the flag is set appropriately, the poisoning instruction is inside of dead > +; code, and so shouldn't be counted. > +define void @inner5(i1 %flag, i32 *%ptr) { > + %A = load i32* %ptr > + store i32 0, i32* %ptr > + %C = getelementptr inbounds i32* %ptr, i32 0 > + br i1 %flag, label %if.then, label %exit > + > +if.then: > + %D = getelementptr inbounds i32* %ptr, i32 %A > + %E = bitcast i32* %ptr to i8* > + %F = select i1 false, i32* %ptr, i32* @glbl > + call void @llvm.lifetime.start(i64 0, i8* %E) > + ret void > + > +exit: > + ret void > +} > + > > Modified: llvm/trunk/test/Transforms/Inline/dynamic_alloca_test.ll > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/Inline/dynamic_alloca_test.ll?rev=153812&r1=153811&r2=153812&view=diff > ============================================================================== > --- llvm/trunk/test/Transforms/Inline/dynamic_alloca_test.ll (original) > +++ llvm/trunk/test/Transforms/Inline/dynamic_alloca_test.ll Sat Mar 31 07:42:41 2012 > @@ -4,6 +4,11 @@ > ; already have dynamic allocas. > > ; RUN: opt < %s -inline -S | FileCheck %s > +; > +; FIXME: This test is xfailed because the inline cost rewrite disabled *all* > +; inlining of functions which contain a dynamic alloca. It should be re-enabled > +; once that functionality is restored. > +; XFAIL: * > > declare void @ext(i32*) > > > Modified: llvm/trunk/test/Transforms/Inline/inline_constprop.ll > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/Inline/inline_constprop.ll?rev=153812&r1=153811&r2=153812&view=diff > ============================================================================== > --- llvm/trunk/test/Transforms/Inline/inline_constprop.ll (original) > +++ llvm/trunk/test/Transforms/Inline/inline_constprop.ll Sat Mar 31 07:42:41 2012 > @@ -1,4 +1,4 @@ > -; RUN: opt < %s -inline -S | FileCheck %s > +; RUN: opt < %s -inline -inline-threshold=20 -S | FileCheck %s > > define internal i32 @callee1(i32 %A, i32 %B) { > %C = sdiv i32 %A, %B > @@ -14,17 +14,18 @@ > } > > define i32 @caller2() { > +; Check that we can constant-prop through instructions after inlining callee21 > +; to get constants in the inlined callsite to callee22. > +; FIXME: Currently, the threshold is fixed at 20 because we don't perform > +; *recursive* cost analysis to realize that the nested call site will definitely > +; inline and be cheap. We should eventually do that and lower the threshold here > +; to 1. > +; > ; CHECK: @caller2 > ; CHECK-NOT: call void @callee2 > ; CHECK: ret > > -; We contrive to make this hard for *just* the inline pass to do in order to > -; simulate what can actually happen with large, complex functions getting > -; inlined. > - %a = add i32 42, 0 > - %b = add i32 48, 0 > - > - %x = call i32 @callee21(i32 %a, i32 %b) > + %x = call i32 @callee21(i32 42, i32 48) > ret i32 %x > } > > @@ -41,49 +42,71 @@ > br i1 %icmp, label %bb.true, label %bb.false > bb.true: > ; This block musn't be counted in the inline cost. > - %ptr = call i8* @getptr() > - load volatile i8* %ptr > - load volatile i8* %ptr > - load volatile i8* %ptr > - load volatile i8* %ptr > - load volatile i8* %ptr > - load volatile i8* %ptr > - load volatile i8* %ptr > - load volatile i8* %ptr > - load volatile i8* %ptr > - load volatile i8* %ptr > - load volatile i8* %ptr > - load volatile i8* %ptr > - load volatile i8* %ptr > - load volatile i8* %ptr > - load volatile i8* %ptr > - load volatile i8* %ptr > - load volatile i8* %ptr > - load volatile i8* %ptr > - load volatile i8* %ptr > - load volatile i8* %ptr > - load volatile i8* %ptr > - load volatile i8* %ptr > - load volatile i8* %ptr > - load volatile i8* %ptr > - load volatile i8* %ptr > - load volatile i8* %ptr > - load volatile i8* %ptr > - load volatile i8* %ptr > - load volatile i8* %ptr > - load volatile i8* %ptr > - load volatile i8* %ptr > - load volatile i8* %ptr > - load volatile i8* %ptr > - load volatile i8* %ptr > - load volatile i8* %ptr > - load volatile i8* %ptr > - load volatile i8* %ptr > - load volatile i8* %ptr > - load volatile i8* %ptr > - load volatile i8* %ptr > + %x1 = add i32 %x, 1 > + %x2 = add i32 %x1, 1 > + %x3 = add i32 %x2, 1 > + %x4 = add i32 %x3, 1 > + %x5 = add i32 %x4, 1 > + %x6 = add i32 %x5, 1 > + %x7 = add i32 %x6, 1 > + %x8 = add i32 %x7, 1 > > - ret i32 %x > + ret i32 %x8 > bb.false: > ret i32 %x > } > + > +define i32 @caller3() { > +; Check that even if the expensive path is hidden behind several basic blocks, > +; it doesn't count toward the inline cost when constant-prop proves those paths > +; dead. > +; > +; CHECK: @caller3 > +; CHECK-NOT: call > +; CHECK: ret i32 6 > + > +entry: > + %x = call i32 @callee3(i32 42, i32 48) > + ret i32 %x > +} > + > +define i32 @callee3(i32 %x, i32 %y) { > + %sub = sub i32 %y, %x > + %icmp = icmp ugt i32 %sub, 42 > + br i1 %icmp, label %bb.true, label %bb.false > + > +bb.true: > + %icmp2 = icmp ult i32 %sub, 64 > + br i1 %icmp2, label %bb.true.true, label %bb.true.false > + > +bb.true.true: > + ; This block musn't be counted in the inline cost. > + %x1 = add i32 %x, 1 > + %x2 = add i32 %x1, 1 > + %x3 = add i32 %x2, 1 > + %x4 = add i32 %x3, 1 > + %x5 = add i32 %x4, 1 > + %x6 = add i32 %x5, 1 > + %x7 = add i32 %x6, 1 > + %x8 = add i32 %x7, 1 > + br label %bb.merge > + > +bb.true.false: > + ; This block musn't be counted in the inline cost. > + %y1 = add i32 %y, 1 > + %y2 = add i32 %y1, 1 > + %y3 = add i32 %y2, 1 > + %y4 = add i32 %y3, 1 > + %y5 = add i32 %y4, 1 > + %y6 = add i32 %y5, 1 > + %y7 = add i32 %y6, 1 > + %y8 = add i32 %y7, 1 > + br label %bb.merge > + > +bb.merge: > + %result = phi i32 [ %x8, %bb.true.true ], [ %y8, %bb.true.false ] > + ret i32 %result > + > +bb.false: > + ret i32 %sub > +} > > Modified: llvm/trunk/test/Transforms/Inline/noinline-recursive-fn.ll > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/Inline/noinline-recursive-fn.ll?rev=153812&r1=153811&r2=153812&view=diff > ============================================================================== > --- llvm/trunk/test/Transforms/Inline/noinline-recursive-fn.ll (original) > +++ llvm/trunk/test/Transforms/Inline/noinline-recursive-fn.ll Sat Mar 31 07:42:41 2012 > @@ -71,3 +71,40 @@ > call void @f2(i32 123, i8* bitcast (void (i32, i8*, i8*)* @f1 to i8*), i8* bitcast (void (i32, i8*, i8*)* @f2 to i8*)) nounwind ssp > ret void > } > + > + > +; Check that a recursive function, when called with a constant that makes the > +; recursive path dead code can actually be inlined. > +define i32 @fib(i32 %i) { > +entry: > + %is.zero = icmp eq i32 %i, 0 > + br i1 %is.zero, label %zero.then, label %zero.else > + > +zero.then: > + ret i32 0 > + > +zero.else: > + %is.one = icmp eq i32 %i, 1 > + br i1 %is.one, label %one.then, label %one.else > + > +one.then: > + ret i32 1 > + > +one.else: > + %i1 = sub i32 %i, 1 > + %f1 = call i32 @fib(i32 %i1) > + %i2 = sub i32 %i, 2 > + %f2 = call i32 @fib(i32 %i2) > + %f = add i32 %f1, %f2 > + ret i32 %f > +} > + > +define i32 @fib_caller() { > +; CHECK: @fib_caller > +; CHECK-NOT: call > +; CHECK: ret > + %f1 = call i32 @fib(i32 0) > + %f2 = call i32 @fib(i32 1) > + %result = add i32 %f1, %f2 > + ret i32 %result > +} > > Modified: llvm/trunk/test/Transforms/Inline/ptr-diff.ll > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/Inline/ptr-diff.ll?rev=153812&r1=153811&r2=153812&view=diff > ============================================================================== > --- llvm/trunk/test/Transforms/Inline/ptr-diff.ll (original) > +++ llvm/trunk/test/Transforms/Inline/ptr-diff.ll Sat Mar 31 07:42:41 2012 > @@ -1,5 +1,7 @@ > ; RUN: opt -inline < %s -S -o - -inline-threshold=10 | FileCheck %s > > +target datalayout = "p:32:32" > + > define i32 @outer1() { > ; CHECK: @outer1 > ; CHECK-NOT: call > > > _______________________________________________ > llvm-commits mailing list > llvm-commits at cs.uiuc.edu > http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits -David From echristo at apple.com Tue Apr 10 15:59:31 2012 From: echristo at apple.com (Eric Christopher) Date: Tue, 10 Apr 2012 13:59:31 -0700 Subject: [llvm-commits] [llvm] r154426 - /llvm/trunk/lib/Target/X86/X86ISelLowering.cpp In-Reply-To: <7DE70FDACDE4CD4887C4278C12A2E3050D3688@HASMSX104.ger.corp.intel.com> References: <20120410193918.ABE0F2A6C065@llvm.org> <7DE70FDACDE4CD4887C4278C12A2E3050D3688@HASMSX104.ger.corp.intel.com> Message-ID: On Apr 10, 2012, at 12:47 PM, "Rotem, Nadav" wrote: > It looks like the problem is in the test ../llvm/test/CodeGen/X86/vec_shuffle-20.ll which does not specify the '-mcpu' flag. If it's okay with you, I will fix the test and re-apply. Only if you watch for bot errors this time? :) -eric From hwennborg at google.com Tue Apr 10 14:41:36 2012 From: hwennborg at google.com (Hans Wennborg) Date: Tue, 10 Apr 2012 20:41:36 +0100 Subject: [llvm-commits] Patch for llvm-c to expose targetmachine & allow emit obj/asm files In-Reply-To: <4F7EF0D1.4020807@remobjects.com> References: <4F7EF0D1.4020807@remobjects.com> Message-ID: (Including the list this time, sorry for the mess.) On Fri, Apr 6, 2012 at 14:34, Carlo Kok wrote: > Attached is a fairly simple patch for llvm-c that exposes: > * Target class > * TargetMachine class > > To allow for emitting binary and assembly. > > This is the first time I try to contribute to llvm, if there's something > wrong with my patch, let me know. Hi Carlo, Thanks for this! I've been annoyed in a project of my own that I couldn't use the C bindings to emit objects or assembly. I'm not very familiar with the classes you are wrapping, so someone else should take a look too, but I have read through your patch and made some comments below. Thanks, Hans > +/*===-- llvm-c/TargetMachine.h - Target Machine Library C Interface - C++ -*-=*\ Shouldn't it be C rather than C++ at the top? > +|* This header declares the C interface to libLLVMBitReader.a, which *| > +|* implements input of the LLVM bitcode format. *| > +|* *| > +|* Many exotic languages can interoperate with C code but have a harder time *| > +|* with C++ due to name mangling. So in addition to C, this interface enables *| > +|* tools written in such languages. *| I don't think this second paragraph is necessary? > +typedef enum { > + LLVMRelocDefault = 0, > + LLVMRelocStatic = 1, > + LLVMRelocPIC = 2, > + LLVMRelocDynamicNoPic = 3 Looks like tabs have been used here instead of spaces. Is there a reason that you set values for the enumerators here but not in other enumerations such as LLVMCodeGenOptLevel? > +// Target Rather than this type of comments, I think proper Doxygen comments would be awesome. > +const char * LLVMGetTargetName(LLVMTargetRef T); > +const char * LLVMGetTargetDescription(LLVMTargetRef T); I think it's more common not to have a space between the * and the function name. > +// Target Machine > +LLVMTargetMachineRef LLVMCreateTargetMachine(LLVMTargetRef T, char* Triple, char* CPU, char* Features, LLVMCodeGenOptLevel Level, LLVMRelocMode Reloc, LLVMCodeModel CodeModel); The * should go on the variable, not the type. This applies throughout the patch (below, and in the .cpp file as well). > +#ifdef __cplusplus Maybe put some newlines before this, to separate it from the API. > +namespace llvm { > + class TargetMachine; > + class Target; > + Trailing space on this line? > + inline TargetMachine *unwrap(LLVMTargetMachineRef P) { > + return reinterpret_cast(P); I think it's more common to leave out the space between the type and the * here. This applies in the other (un)wrap methods below as well. > +//===-- TargetMachine.cpp ----------------------------------------------------------===// This line looks too long. > +LLVMTargetRef LLVMGetFirstTarget() > +{ The opening curly brace usually goes on the same line as the function name in LLVM. This applies throughout this file. > + const Target* target = &*TargetRegistry::begin(); > + return wrap(target); > +} I think there should be a blank line between each functions. This applies below as well. > +// Target I don't think this comment adds much value. > +const char * LLVMGetTargetName(LLVMTargetRef T) There's a trailing whitespace at the end of this line, and also for the function declarations below. > +LLVMTargetMachineRef LLVMCreateTargetMachine(LLVMTargetRef T, char* Triple, char* CPU, char* Features, LLVMCodeGenOptLevel Level, LLVMRelocMode Reloc, LLVMCodeModel CodeModel) > +{ > + Reloc::Model RM = (Reloc::Model)Reloc; > + CodeModel::Model CM = (CodeModel::Model)CodeModel; > + CodeGenOpt::Level OL = (CodeGenOpt::Level)Level; I think it's more common to use static_cast here rather than c-style casts. > +LLVMBool LLVMTargetMachineEmitToFile(LLVMTargetMachineRef T, LLVMModuleRef M, char* Filename, LLVMCodeGenFileType codegen, char** ErrorMessage) > +{ // Inspired by the D binding > + TargetMachine* TM = unwrap(T); > + Module* Mod = unwrap(M); > + > + FunctionPassManager pass(Mod); Wouldn't it be better to use a PassManager here, that you can run over the Module once, rather than iterating over each function? > + if (!td) { > + error = "No TargetData in TargetMachine"; > + *ErrorMessage = strdup(error.c_str()); > + return true; Looks like tabs on the two previous lines. > + pass.add(new TargetData(*td)); > + Trailing whitespace. > + TargetMachine::CodeGenFileType ft = (TargetMachine::CodeGenFileType)codegen; static_cast instead of c-style cast > + raw_fd_ostream dest(Filename, error, raw_fd_ostream::F_Binary); > + formatted_raw_ostream destf(dest); > + if (!error.empty()) { > + *ErrorMessage = strdup(error.c_str()); > + return true; Tab > + } > + Trailing whitespace > + if (TM->addPassesToEmitFile(pass, destf, ft)) { > + error = "No TargetData in TargetMachine"; > + *ErrorMessage = strdup(error.c_str()); > + return true; Tabs > + } > + > + pass.doInitialization(); > + > + for (llvm::Module::iterator it = Mod->begin(), end = Mod->end(); it != end; ++it){ If you had a PassManager instead of a FunctionPassManager, you could just do PM.run(*Mod); > + errs() << "Did 1 iteration"; > + if (!it->isDeclaration()) { > + errs() << "Did 1 proper iterations"; > + pass.run(*it); > + } Tabs. > + } > + errs() << "Done"; You probably want to remove the printouts to errs :) From evan.cheng at apple.com Tue Apr 10 16:40:28 2012 From: evan.cheng at apple.com (Evan Cheng) Date: Tue, 10 Apr 2012 21:40:28 -0000 Subject: [llvm-commits] [llvm] r154439 - in /llvm/trunk: lib/Target/ARM/ARMISelLowering.cpp lib/Target/ARM/ARMInstrNEON.td lib/Target/ARM/ARMInstrVFP.td test/CodeGen/ARM/fma.ll Message-ID: <20120410214028.7C55B2A6C065@llvm.org> Author: evancheng Date: Tue Apr 10 16:40:28 2012 New Revision: 154439 URL: http://llvm.org/viewvc/llvm-project?rev=154439&view=rev Log: Handle llvm.fma.* intrinsics. rdar://10914096 Added: llvm/trunk/test/CodeGen/ARM/fma.ll Modified: llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp llvm/trunk/lib/Target/ARM/ARMInstrNEON.td llvm/trunk/lib/Target/ARM/ARMInstrVFP.td Modified: llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp?rev=154439&r1=154438&r2=154439&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp (original) +++ llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp Tue Apr 10 16:40:28 2012 @@ -769,8 +769,10 @@ setOperationAction(ISD::FPOW, MVT::f64, Expand); setOperationAction(ISD::FPOW, MVT::f32, Expand); - setOperationAction(ISD::FMA, MVT::f64, Expand); - setOperationAction(ISD::FMA, MVT::f32, Expand); + if (!Subtarget->hasVFP4()) { + setOperationAction(ISD::FMA, MVT::f64, Expand); + setOperationAction(ISD::FMA, MVT::f32, Expand); + } // Various VFP goodness if (!TM.Options.UseSoftFloat && !Subtarget->isThumb1Only()) { Modified: llvm/trunk/lib/Target/ARM/ARMInstrNEON.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrNEON.td?rev=154439&r1=154438&r2=154439&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMInstrNEON.td (original) +++ llvm/trunk/lib/Target/ARM/ARMInstrNEON.td Tue Apr 10 16:40:28 2012 @@ -4133,6 +4133,14 @@ v4f32, fmul_su, fsub_mlx>, Requires<[HasNEON2,FPContractions]>; +// Match @llvm.fma.* intrinsics +def : Pat<(fma (v2f32 DPR:$src1), (v2f32 DPR:$Vn), (v2f32 DPR:$Vm)), + (VFMAfd DPR:$src1, DPR:$Vn, DPR:$Vm)>, + Requires<[HasNEON, HasVFP4]>; +def : Pat<(fma (v4f32 QPR:$src1), (v4f32 QPR:$Vn), (v4f32 QPR:$Vm)), + (VFMAfq QPR:$src1, QPR:$Vn, QPR:$Vm)>, + Requires<[HasNEON, HasVFP4]>; + // Vector Subtract Operations. // VSUB : Vector Subtract (integer and floating-point) Modified: llvm/trunk/lib/Target/ARM/ARMInstrVFP.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrVFP.td?rev=154439&r1=154438&r2=154439&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMInstrVFP.td (original) +++ llvm/trunk/lib/Target/ARM/ARMInstrVFP.td Tue Apr 10 16:40:28 2012 @@ -1080,6 +1080,14 @@ (VFMAS SPR:$dstin, SPR:$a, SPR:$b)>, Requires<[HasVFP4,DontUseNEONForFP,FPContractions]>; +// Match @llvm.fma.* intrinsics +def : Pat<(fma (f64 DPR:$Ddin), (f64 DPR:$Dn), (f64 DPR:$Dm)), + (VFMAD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>, + Requires<[HasVFP4]>; +def : Pat<(fma (f32 SPR:$Sdin), (f32 SPR:$Sn), (f32 SPR:$Sm)), + (VFMAS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>, + Requires<[HasVFP4]>; + def VFMSD : ADbI<0b11101, 0b10, 1, 0, (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm), IIC_fpFMAC64, "vfms", ".f64\t$Dd, $Dn, $Dm", Added: llvm/trunk/test/CodeGen/ARM/fma.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/fma.ll?rev=154439&view=auto ============================================================================== --- llvm/trunk/test/CodeGen/ARM/fma.ll (added) +++ llvm/trunk/test/CodeGen/ARM/fma.ll Tue Apr 10 16:40:28 2012 @@ -0,0 +1,30 @@ +; RUN: llc < %s -mtriple=thumbv7-apple-ios -mattr=+vfp4 | FileCheck %s + +define float @test_f32(float %a, float %b, float %c) nounwind readnone ssp { +entry: +; CHECK: test_f32 +; CHECK: vfma.f32 + %call = tail call float @llvm.fma.f32(float %a, float %b, float %c) nounwind readnone + ret float %call +} + +define double @test_f64(double %a, double %b, double %c) nounwind readnone ssp { +entry: +; CHECK: test_f64 +; CHECK: vfma.f64 + %call = tail call double @llvm.fma.f64(double %a, double %b, double %c) nounwind readnone + ret double %call +} + +define <2 x float> @test_v2f32(<2 x float> %a, <2 x float> %b, <2 x float> %c) nounwind readnone ssp { +entry: +; CHECK: test_v2f32 +; CHECK: vfma.f32 + %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %a, <2 x float> %b, <2 x float> %c) nounwind + ret <2 x float> %0 +} + +declare float @llvm.fma.f32(float, float, float) nounwind readnone +declare double @llvm.fma.f64(double, double, double) nounwind readnone + +declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>) nounwind readnone From kcc at google.com Tue Apr 10 17:29:18 2012 From: kcc at google.com (Kostya Serebryany) Date: Tue, 10 Apr 2012 22:29:18 -0000 Subject: [llvm-commits] [llvm] r154444 - in /llvm/trunk: lib/Transforms/Instrumentation/ThreadSanitizer.cpp test/Instrumentation/ThreadSanitizer/read_from_global.ll Message-ID: <20120410222918.143F92A6C065@llvm.org> Author: kcc Date: Tue Apr 10 17:29:17 2012 New Revision: 154444 URL: http://llvm.org/viewvc/llvm-project?rev=154444&view=rev Log: [tsan] two more compile-time optimizations: - don't isntrument reads from constant globals. Saves ~1.5% of instrumented instructions on CPU2006 (counting static instructions, not their execution). - don't insrument reads from vtable (which is a global constant too). Saves ~5%. I did not measure the run-time impact of this, but it is certainly non-negative. Added: llvm/trunk/test/Instrumentation/ThreadSanitizer/read_from_global.ll Modified: llvm/trunk/lib/Transforms/Instrumentation/ThreadSanitizer.cpp Modified: llvm/trunk/lib/Transforms/Instrumentation/ThreadSanitizer.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Instrumentation/ThreadSanitizer.cpp?rev=154444&r1=154443&r2=154444&view=diff ============================================================================== --- llvm/trunk/lib/Transforms/Instrumentation/ThreadSanitizer.cpp (original) +++ llvm/trunk/lib/Transforms/Instrumentation/ThreadSanitizer.cpp Tue Apr 10 17:29:17 2012 @@ -58,6 +58,8 @@ size_t NumOmittedReadsBeforeWrite; size_t NumAccessesWithBadSize; size_t NumInstrumentedVtableWrites; + size_t NumOmittedReadsFromConstantGlobals; + size_t NumOmittedReadsFromVtable; }; /// ThreadSanitizer: instrument the code in module to find races. @@ -72,6 +74,7 @@ private: void choseInstructionsToInstrument(SmallVectorImpl &Local, SmallVectorImpl &All); + bool addrPointsToConstantData(Value *Addr); TargetData *TD; OwningPtr BL; @@ -145,11 +148,44 @@ << "; vt " << stats.NumInstrumentedVtableWrites << "; bs " << stats.NumAccessesWithBadSize << "; rbw " << stats.NumOmittedReadsBeforeWrite + << "; rcg " << stats.NumOmittedReadsFromConstantGlobals + << "; rvt " << stats.NumOmittedReadsFromVtable << "\n"; } return true; } +static bool isVtableAccess(Instruction *I) { + if (MDNode *Tag = I->getMetadata(LLVMContext::MD_tbaa)) { + if (Tag->getNumOperands() < 1) return false; + if (MDString *Tag1 = dyn_cast(Tag->getOperand(0))) { + if (Tag1->getString() == "vtable pointer") return true; + } + } + return false; +} + +bool ThreadSanitizer::addrPointsToConstantData(Value *Addr) { + // If this is a GEP, just analyze its pointer operand. + if (GetElementPtrInst *GEP = dyn_cast(Addr)) + Addr = GEP->getPointerOperand(); + + if (GlobalVariable *GV = dyn_cast(Addr)) { + if (GV->isConstant()) { + // Reads from constant globals can not race with any writes. + stats.NumOmittedReadsFromConstantGlobals++; + return true; + } + } else if(LoadInst *L = dyn_cast(Addr)) { + if (isVtableAccess(L)) { + // Reads from a vtable pointer can not race with any writes. + stats.NumOmittedReadsFromVtable++; + return true; + } + } + return false; +} + // Instrumenting some of the accesses may be proven redundant. // Currently handled: // - read-before-write (within same BB, no calls between) @@ -173,11 +209,16 @@ WriteTargets.insert(Store->getPointerOperand()); } else { LoadInst *Load = cast(I); - if (WriteTargets.count(Load->getPointerOperand())) { + Value *Addr = Load->getPointerOperand(); + if (WriteTargets.count(Addr)) { // We will write to this temp, so no reason to analyze the read. stats.NumOmittedReadsBeforeWrite++; continue; } + if (addrPointsToConstantData(Addr)) { + // Addr points to some constant data -- it can not race with any writes. + continue; + } } All.push_back(I); } @@ -236,16 +277,6 @@ return Res; } -static bool isVtableAccess(Instruction *I) { - if (MDNode *Tag = I->getMetadata(LLVMContext::MD_tbaa)) { - if (Tag->getNumOperands() < 1) return false; - if (MDString *Tag1 = dyn_cast(Tag->getOperand(0))) { - if (Tag1->getString() == "vtable pointer") return true; - } - } - return false; -} - bool ThreadSanitizer::instrumentLoadOrStore(Instruction *I) { IRBuilder<> IRB(I); bool IsWrite = isa(*I); Added: llvm/trunk/test/Instrumentation/ThreadSanitizer/read_from_global.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Instrumentation/ThreadSanitizer/read_from_global.ll?rev=154444&view=auto ============================================================================== --- llvm/trunk/test/Instrumentation/ThreadSanitizer/read_from_global.ll (added) +++ llvm/trunk/test/Instrumentation/ThreadSanitizer/read_from_global.ll Tue Apr 10 17:29:17 2012 @@ -0,0 +1,61 @@ +; RUN: opt < %s -tsan -S | FileCheck %s +; Check that tsan does not instrument reads from constant globals. + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" + + at const_global = external constant i32 +define i32 @read_from_const_global() nounwind uwtable readnone { +entry: + %0 = load i32* @const_global, align 4 + ret i32 %0 +} +; CHECK: define i32 @read_from_const_global +; CHECK-NOT: __tsan +; CHECK: ret i32 + + at non_const_global = global i32 0, align 4 +define i32 @read_from_non_const_global() nounwind uwtable readonly { +entry: + %0 = load i32* @non_const_global, align 4 + ret i32 %0 +} + +; CHECK: define i32 @read_from_non_const_global +; CHECK: __tsan_read +; CHECK: ret i32 + + at const_global_array = external constant [10 x i32] +define i32 @read_from_const_global_array(i32 %idx) nounwind uwtable readnone { +entry: + %idxprom = sext i32 %idx to i64 + %arrayidx = getelementptr inbounds [10 x i32]* @const_global_array, i64 0, i64 %idxprom + %0 = load i32* %arrayidx, align 4 + ret i32 %0 +} + +; CHECK: define i32 @read_from_const_global_array +; CHECK-NOT: __tsan +; CHECK: ret i32 + +%struct.Foo = type { i32 (...)** } +define void @call_virtual_func(%struct.Foo* %f) uwtable { +entry: + %0 = bitcast %struct.Foo* %f to void (%struct.Foo*)*** + %vtable = load void (%struct.Foo*)*** %0, align 8, !tbaa !3 + %1 = load void (%struct.Foo*)** %vtable, align 8 + call void %1(%struct.Foo* %f) + ret void +} + +; CHECK: define void @call_virtual_func +; CHECK: __tsan_read +; CHECK: = load +; CHECK-NOT: __tsan_read +; CHECK: = load +; CHECK: ret void + +!0 = metadata !{metadata !"int", metadata !1} +!1 = metadata !{metadata !"omnipotent char", metadata !2} +!2 = metadata !{metadata !"Simple C/C++ TBAA", null} +!3 = metadata !{metadata !"vtable pointer", metadata !2} + From nobled at dreamwidth.org Tue Apr 10 17:44:49 2012 From: nobled at dreamwidth.org (Dylan Noblesmith) Date: Tue, 10 Apr 2012 22:44:49 -0000 Subject: [llvm-commits] [llvm] r154445 - /llvm/trunk/tools/llvm-stress/llvm-stress.cpp Message-ID: <20120410224449.420B32A6C065@llvm.org> Author: nobled Date: Tue Apr 10 17:44:49 2012 New Revision: 154445 URL: http://llvm.org/viewvc/llvm-project?rev=154445&view=rev Log: llvm-stress: don't make vectors of x86_mmx type LangRef.html says: "There are no arrays, vectors or constants of this type." This was hitting assertions when passing the -generate-x86-mmx option. PR12452. Modified: llvm/trunk/tools/llvm-stress/llvm-stress.cpp Modified: llvm/trunk/tools/llvm-stress/llvm-stress.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-stress/llvm-stress.cpp?rev=154445&r1=154444&r2=154445&view=diff ============================================================================== --- llvm/trunk/tools/llvm-stress/llvm-stress.cpp (original) +++ llvm/trunk/tools/llvm-stress/llvm-stress.cpp Tue Apr 10 17:44:49 2012 @@ -202,11 +202,17 @@ /// Pick a random vector type. Type *pickVectorType(unsigned len = (unsigned)-1) { - Type *Ty = pickScalarType(); // Pick a random vector width in the range 2**0 to 2**4. // by adding two randoms we are generating a normal-like distribution // around 2**3. unsigned width = 1<<((Ran->Rand() % 3) + (Ran->Rand() % 3)); + Type *Ty; + + // Vectors of x86mmx are illegal; keep trying till we get something else. + do { + Ty = pickScalarType(); + } while (Ty->isX86_MMXTy()); + if (len != (unsigned)-1) width = len; return VectorType::get(Ty, width); From nobled at dreamwidth.org Tue Apr 10 17:44:51 2012 From: nobled at dreamwidth.org (Dylan Noblesmith) Date: Tue, 10 Apr 2012 22:44:51 -0000 Subject: [llvm-commits] [llvm] r154446 - /llvm/trunk/tools/llvm-stress/llvm-stress.cpp Message-ID: <20120410224451.D63472A6C065@llvm.org> Author: nobled Date: Tue Apr 10 17:44:51 2012 New Revision: 154446 URL: http://llvm.org/viewvc/llvm-project?rev=154446&view=rev Log: llvm-stress: stop abusing ConstantFP::get() ConstantFP::get(Type*, double) is unreliably host-specific: it can't handle a type like PPC128 on an x86 host. It even has a comment to that effect: "This should only be used for simple constant values like 2.0/1.0 etc, that are known-valid both as host double and as the target format." Instead, use APFloat. While we're at it, randomize the floating point value more thoroughly; it was previously limited to the range 0 to 2**19 - 1. PR12451. Modified: llvm/trunk/tools/llvm-stress/llvm-stress.cpp Modified: llvm/trunk/tools/llvm-stress/llvm-stress.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-stress/llvm-stress.cpp?rev=154446&r1=154445&r2=154446&view=diff ============================================================================== --- llvm/trunk/tools/llvm-stress/llvm-stress.cpp (original) +++ llvm/trunk/tools/llvm-stress/llvm-stress.cpp Tue Apr 10 17:44:51 2012 @@ -60,14 +60,28 @@ public: /// C'tor Random(unsigned _seed):Seed(_seed) {} - /// Return the next random value. - unsigned Rand() { - unsigned Val = Seed + 0x000b07a1; + + /// Return a random integer, up to a + /// maximum of 2**19 - 1. + uint32_t Rand() { + uint32_t Val = Seed + 0x000b07a1; Seed = (Val * 0x3c7c0ac1); // Only lowest 19 bits are random-ish. return Seed & 0x7ffff; } + /// Return a random 32 bit integer. + uint32_t Rand32() { + uint32_t Val = Rand(); + Val &= 0xffff; + return Val | (Rand() << 16); + } + + /// Return a random 64 bit integer. + uint64_t Rand64() { + uint64_t Val = Rand32(); + return Val | (uint64_t(Rand32()) << 32); + } private: unsigned Seed; }; @@ -348,10 +362,20 @@ } if (Ty->isFloatingPointTy()) { + // Generate 128 random bits, the size of the (currently) + // largest floating-point types. + uint64_t RandomBits[2]; + for (unsigned i = 0; i < 2; ++i) + RandomBits[i] = Ran->Rand64(); + + APInt RandomInt(Ty->getPrimitiveSizeInBits(), makeArrayRef(RandomBits)); + + bool isIEEE = !Ty->isX86_FP80Ty() && !Ty->isPPC_FP128Ty(); + APFloat RandomFloat(RandomInt, isIEEE); + if (Ran->Rand() & 1) return PT->push_back(ConstantFP::getNullValue(Ty)); - return PT->push_back(ConstantFP::get(Ty, - static_cast(1)/Ran->Rand())); + return PT->push_back(ConstantFP::get(Ty->getContext(), RandomFloat)); } if (Ty->isIntegerTy()) { From resistor at mac.com Tue Apr 10 17:46:54 2012 From: resistor at mac.com (Owen Anderson) Date: Tue, 10 Apr 2012 22:46:54 -0000 Subject: [llvm-commits] [llvm] r154447 - in /llvm/trunk: lib/CodeGen/SelectionDAG/SelectionDAG.cpp test/CodeGen/ARM/fcopysign.ll Message-ID: <20120410224654.41FE82A6C065@llvm.org> Author: resistor Date: Tue Apr 10 17:46:53 2012 New Revision: 154447 URL: http://llvm.org/viewvc/llvm-project?rev=154447&view=rev Log: Move the constant-folding support for FP_ROUND in SelectionDAG from the one-operand version of getNode() to the two-operand version, since it became a two-operand node at sound point. Zap a testcase that this allows us to completely fold away. Modified: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp llvm/trunk/test/CodeGen/ARM/fcopysign.ll Modified: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp?rev=154447&r1=154446&r2=154447&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp (original) +++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp Tue Apr 10 17:46:53 2012 @@ -2428,7 +2428,6 @@ case ISD::FABS: V.clearSign(); return getConstantFP(V, VT); - case ISD::FP_ROUND: case ISD::FP_EXTEND: { bool ignored; // This can return overflow, underflow, or inexact; we don't care. @@ -2994,6 +2993,16 @@ default: break; } } + + if (Opcode == ISD::FP_ROUND) { + APFloat V = N1CFP->getValueAPF(); // make copy + bool ignored; + // This can return overflow, underflow, or inexact; we don't care. + // FIXME need to be more flexible about rounding mode. + (void)V.convert(*EVTToAPFloatSemantics(VT), + APFloat::rmNearestTiesToEven, &ignored); + return getConstantFP(V, VT); + } } // Canonicalize an UNDEF to the RHS, even over a constant. Modified: llvm/trunk/test/CodeGen/ARM/fcopysign.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/fcopysign.ll?rev=154447&r1=154446&r2=154447&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/ARM/fcopysign.ll (original) +++ llvm/trunk/test/CodeGen/ARM/fcopysign.ll Tue Apr 10 17:46:53 2012 @@ -40,26 +40,10 @@ ret double %1 } -; rdar://9059537 -define i32 @test4() ssp { -entry: -; SOFT: test4: -; SOFT: vmov.f64 [[REG4:(d[0-9]+)]], #1.000000e+00 -; This S-reg must be the first sub-reg of the last D-reg on vbsl. -; SOFT: vcvt.f32.f64 {{s1?[02468]}}, [[REG4]] -; SOFT: vshr.u64 [[REG4]], [[REG4]], #32 -; SOFT: vmov.i32 [[REG5:(d[0-9]+)]], #0x80000000 -; SOFT: vbsl [[REG5]], [[REG4]], {{d[0-9]+}} - %call80 = tail call double @copysign(double 1.000000e+00, double undef) - %conv81 = fptrunc double %call80 to float - %tmp88 = bitcast float %conv81 to i32 - ret i32 %tmp88 -} - ; rdar://9287902 -define float @test5() nounwind { +define float @test4() nounwind { entry: -; SOFT: test5: +; SOFT: test4: ; SOFT: vmov [[REG7:(d[0-9]+)]], r0, r1 ; SOFT: vmov.i32 [[REG6:(d[0-9]+)]], #0x80000000 ; SOFT: vshr.u64 [[REG7]], [[REG7]], #32 From chandlerc at gmail.com Tue Apr 10 18:31:41 2012 From: chandlerc at gmail.com (Chandler Carruth) Date: Wed, 11 Apr 2012 01:31:41 +0200 Subject: [llvm-commits] [llvm] r153812 - in /llvm/trunk: include/llvm/Analysis/ include/llvm/Transforms/IPO/ lib/Analysis/ lib/Transforms/IPO/ test/Transforms/Inline/ In-Reply-To: References: <20120331124242.93B692A6C065@llvm.org> Message-ID: On Tue, Apr 10, 2012 at 10:50 PM, David Dean wrote: > Chandler, > we're seeing a 9.92% compile time regression in > MultiSource/Applications/sqlite3/sqlite3 on ARMv7 -mthumb -O3. Can you > please take a look? > Absolutely. One thing that would help me immensely -- can you make the bitcode you're using available somewhere? I worry that I may see different timings and results targeting non-ARM, and I don't have an ARM cross toolchain set up handy at the moment. I'll start looking for smoking guns right away though. Note that I'm traveling for the EU dev meeting, and so may be a bit unresponsive during your working hours, but I'll put this at the top of the queue. > On 31 Mar 2012, at 5:42 AM, Chandler Carruth wrote: > > > Author: chandlerc > > Date: Sat Mar 31 07:42:41 2012 > > New Revision: 153812 > > > > URL: http://llvm.org/viewvc/llvm-project?rev=153812&view=rev > > Log: > > Initial commit for the rewrite of the inline cost analysis to operate > > on a per-callsite walk of the called function's instructions, in > > breadth-first order over the potentially reachable set of basic blocks. > > > > This is a major shift in how inline cost analysis works to improve the > > accuracy and rationality of inlining decisions. A brief outline of the > > algorithm this moves to: > > > > - Build a simplification mapping based on the callsite arguments to the > > function arguments. > > - Push the entry block onto a worklist of potentially-live basic blocks. > > - Pop the first block off of the *front* of the worklist (for > > breadth-first ordering) and walk its instructions using a custom > > InstVisitor. > > - For each instruction's operands, re-map them based on the > > simplification mappings available for the given callsite. > > - Compute any simplification possible of the instruction after > > re-mapping, and store that back int othe simplification mapping. > > - Compute any bonuses, costs, or other impacts of the instruction on the > > cost metric. > > - When the terminator is reached, replace any conditional value in the > > terminator with any simplifications from the mapping we have, and add > > any successors which are not proven to be dead from these > > simplifications to the worklist. > > - Pop the next block off of the front of the worklist, and repeat. > > - As soon as the cost of inlining exceeds the threshold for the > > callsite, stop analyzing the function in order to bound cost. > > > > The primary goal of this algorithm is to perfectly handle dead code > > paths. We do not want any code in trivially dead code paths to impact > > inlining decisions. The previous metric was *extremely* flawed here, and > > would always subtract the average cost of two successors of > > a conditional branch when it was proven to become an unconditional > > branch at the callsite. There was no handling of wildly different costs > > between the two successors, which would cause inlining when the path > > actually taken was too large, and no inlining when the path actually > > taken was trivially simple. There was also no handling of the code > > *path*, only the immediate successors. These problems vanish completely > > now. See the added regression tests for the shiny new features -- we > > skip recursive function calls, SROA-killing instructions, and high cost > > complex CFG structures when dead at the callsite being analyzed. > > > > Switching to this algorithm required refactoring the inline cost > > interface to accept the actual threshold rather than simply returning > > a single cost. The resulting interface is pretty bad, and I'm planning > > to do lots of interface cleanup after this patch. > > > > Several other refactorings fell out of this, but I've tried to minimize > > them for this patch. =/ There is still more cleanup that can be done > > here. Please point out anything that you see in review. > > > > I've worked really hard to try to mirror at least the spirit of all of > > the previous heuristics in the new model. It's not clear that they are > > all correct any more, but I wanted to minimize the change in this single > > patch, it's already a bit ridiculous. One heuristic that is *not* yet > > mirrored is to allow inlining of functions with a dynamic alloca *if* > > the caller has a dynamic alloca. I will add this back, but I think the > > most reasonable way requires changes to the inliner itself rather than > > just the cost metric, and so I've deferred this for a subsequent patch. > > The test case is XFAIL-ed until then. > > > > As mentioned in the review mail, this seems to make Clang run about 1% > > to 2% faster in -O0, but makes its binary size grow by just under 4%. > > I've looked into the 4% growth, and it can be fixed, but requires > > changes to other parts of the inliner. > > > > Modified: > > llvm/trunk/include/llvm/Analysis/CodeMetrics.h > > llvm/trunk/include/llvm/Analysis/InlineCost.h > > llvm/trunk/include/llvm/Transforms/IPO/InlinerPass.h > > llvm/trunk/lib/Analysis/CodeMetrics.cpp > > llvm/trunk/lib/Analysis/InlineCost.cpp > > llvm/trunk/lib/Transforms/IPO/InlineAlways.cpp > > llvm/trunk/lib/Transforms/IPO/InlineSimple.cpp > > llvm/trunk/lib/Transforms/IPO/Inliner.cpp > > llvm/trunk/test/Transforms/Inline/alloca-bonus.ll > > llvm/trunk/test/Transforms/Inline/dynamic_alloca_test.ll > > llvm/trunk/test/Transforms/Inline/inline_constprop.ll > > llvm/trunk/test/Transforms/Inline/noinline-recursive-fn.ll > > llvm/trunk/test/Transforms/Inline/ptr-diff.ll > > > > Modified: llvm/trunk/include/llvm/Analysis/CodeMetrics.h > > URL: > http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Analysis/CodeMetrics.h?rev=153812&r1=153811&r2=153812&view=diff > > > ============================================================================== > > --- llvm/trunk/include/llvm/Analysis/CodeMetrics.h (original) > > +++ llvm/trunk/include/llvm/Analysis/CodeMetrics.h Sat Mar 31 07:42:41 > 2012 > > @@ -20,9 +20,13 @@ > > namespace llvm { > > class BasicBlock; > > class Function; > > + class Instruction; > > class TargetData; > > class Value; > > > > + /// \brief Check whether an instruction is likely to be "free" when > lowered. > > + bool isInstructionFree(const Instruction *I, const TargetData *TD = > 0); > > + > > /// \brief Check whether a call will lower to something small. > > /// > > /// This tests checks whether calls to this function will lower to > something > > > > Modified: llvm/trunk/include/llvm/Analysis/InlineCost.h > > URL: > http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Analysis/InlineCost.h?rev=153812&r1=153811&r2=153812&view=diff > > > ============================================================================== > > --- llvm/trunk/include/llvm/Analysis/InlineCost.h (original) > > +++ llvm/trunk/include/llvm/Analysis/InlineCost.h Sat Mar 31 07:42:41 > 2012 > > @@ -16,6 +16,7 @@ > > > > #include "llvm/Function.h" > > #include "llvm/ADT/DenseMap.h" > > +#include "llvm/ADT/SmallPtrSet.h" > > #include "llvm/ADT/ValueMap.h" > > #include "llvm/Analysis/CodeMetrics.h" > > #include > > @@ -25,162 +26,105 @@ > > namespace llvm { > > > > class CallSite; > > - template > > - class SmallPtrSet; > > class TargetData; > > > > namespace InlineConstants { > > // Various magic constants used to adjust heuristics. > > const int InstrCost = 5; > > - const int IndirectCallBonus = -100; > > + const int IndirectCallThreshold = 100; > > const int CallPenalty = 25; > > const int LastCallToStaticBonus = -15000; > > const int ColdccPenalty = 2000; > > const int NoreturnPenalty = 10000; > > } > > > > - /// InlineCost - Represent the cost of inlining a function. This > > - /// supports special values for functions which should "always" or > > - /// "never" be inlined. Otherwise, the cost represents a unitless > > - /// amount; smaller values increase the likelihood of the function > > - /// being inlined. > > + /// \brief Represents the cost of inlining a function. > > + /// > > + /// This supports special values for functions which should "always" > or > > + /// "never" be inlined. Otherwise, the cost represents a unitless > amount; > > + /// smaller values increase the likelihood of the function being > inlined. > > + /// > > + /// Objects of this type also provide the adjusted threshold for > inlining > > + /// based on the information available for a particular callsite. > They can be > > + /// directly tested to determine if inlining should occur given the > cost and > > + /// threshold for this cost metric. > > class InlineCost { > > - enum Kind { > > - Value, > > - Always, > > - Never > > + enum CostKind { > > + CK_Variable, > > + CK_Always, > > + CK_Never > > }; > > > > - // This is a do-it-yourself implementation of > > - // int Cost : 30; > > - // unsigned Type : 2; > > - // We used to use bitfields, but they were sometimes miscompiled > (PR3822). > > - enum { TYPE_BITS = 2 }; > > - enum { COST_BITS = unsigned(sizeof(unsigned)) * CHAR_BIT - > TYPE_BITS }; > > - unsigned TypedCost; // int Cost : COST_BITS; unsigned Type : > TYPE_BITS; > > + const int Cost : 30; // The inlining cost if neither always > nor never. > > + const unsigned Kind : 2; // The type of cost, one of CostKind > above. > > > > - Kind getType() const { > > - return Kind(TypedCost >> COST_BITS); > > - } > > + /// \brief The adjusted threshold against which this cost should be > tested. > > + const int Threshold; > > > > - int getCost() const { > > - // Sign-extend the bottom COST_BITS bits. > > - return (int(TypedCost << TYPE_BITS)) >> TYPE_BITS; > > + // Trivial constructor, interesting logic in the factory functions > below. > > + InlineCost(int Cost, CostKind Kind, int Threshold) > > + : Cost(Cost), Kind(Kind), Threshold(Threshold) {} > > + > > + public: > > + static InlineCost get(int Cost, int Threshold) { > > + InlineCost Result(Cost, CK_Variable, Threshold); > > + assert(Result.Cost == Cost && "Cost exceeds InlineCost > precision"); > > + return Result; > > + } > > + static InlineCost getAlways() { > > + return InlineCost(0, CK_Always, 0); > > + } > > + static InlineCost getNever() { > > + return InlineCost(0, CK_Never, 0); > > } > > > > - InlineCost(int C, int T) { > > - TypedCost = (unsigned(C << TYPE_BITS) >> TYPE_BITS) | (T << > COST_BITS); > > - assert(getCost() == C && "Cost exceeds InlineCost precision"); > > + /// \brief Test whether the inline cost is low enough for inlining. > > + operator bool() const { > > + if (isAlways()) return true; > > + if (isNever()) return false; > > + return Cost < Threshold; > > } > > - public: > > - static InlineCost get(int Cost) { return InlineCost(Cost, Value); } > > - static InlineCost getAlways() { return InlineCost(0, Always); } > > - static InlineCost getNever() { return InlineCost(0, Never); } > > - > > - bool isVariable() const { return getType() == Value; } > > - bool isAlways() const { return getType() == Always; } > > - bool isNever() const { return getType() == Never; } > > > > - /// getValue() - Return a "variable" inline cost's amount. It is > > + bool isVariable() const { return Kind == CK_Variable; } > > + bool isAlways() const { return Kind == CK_Always; } > > + bool isNever() const { return Kind == CK_Never; } > > + > > + /// getCost() - Return a "variable" inline cost's amount. It is > > /// an error to call this on an "always" or "never" InlineCost. > > - int getValue() const { > > - assert(getType() == Value && "Invalid access of InlineCost"); > > - return getCost(); > > + int getCost() const { > > + assert(Kind == CK_Variable && "Invalid access of InlineCost"); > > + return Cost; > > + } > > + > > + /// \brief Get the cost delta from the threshold for inlining. > > + /// Only valid if the cost is of the variable kind. Returns a > negative > > + /// value if the cost is too high to inline. > > + int getCostDelta() const { > > + return Threshold - getCost(); > > } > > }; > > > > /// InlineCostAnalyzer - Cost analyzer used by inliner. > > class InlineCostAnalyzer { > > - struct ArgInfo { > > - public: > > - unsigned ConstantWeight; > > - unsigned AllocaWeight; > > - > > - ArgInfo(unsigned CWeight, unsigned AWeight) > > - : ConstantWeight(CWeight), AllocaWeight(AWeight) > > - {} > > - }; > > - > > - struct FunctionInfo { > > - CodeMetrics Metrics; > > - > > - /// ArgumentWeights - Each formal argument of the function is > inspected to > > - /// see if it is used in any contexts where making it a constant > or alloca > > - /// would reduce the code size. If so, we add some value to the > argument > > - /// entry here. > > - std::vector ArgumentWeights; > > - > > - /// PointerArgPairWeights - Weights to use when giving an inline > bonus to > > - /// a call site due to correlated pairs of pointers. > > - DenseMap, unsigned> > PointerArgPairWeights; > > - > > - /// countCodeReductionForConstant - Figure out an approximation > for how > > - /// many instructions will be constant folded if the specified > value is > > - /// constant. > > - unsigned countCodeReductionForConstant(const CodeMetrics &Metrics, > > - Value *V); > > - > > - /// countCodeReductionForAlloca - Figure out an approximation of > how much > > - /// smaller the function will be if it is inlined into a context > where an > > - /// argument becomes an alloca. > > - unsigned countCodeReductionForAlloca(const CodeMetrics &Metrics, > > - Value *V); > > - > > - /// countCodeReductionForPointerPair - Count the bonus to apply > to an > > - /// inline call site where a pair of arguments are pointers and > one > > - /// argument is a constant offset from the other. The idea is to > > - /// recognize a common C++ idiom where a begin and end iterator > are > > - /// actually pointers, and many operations on the pair of them > will be > > - /// constants if the function is called with arguments that have > > - /// a constant offset. > > - void countCodeReductionForPointerPair( > > - const CodeMetrics &Metrics, > > - DenseMap &PointerArgs, > > - Value *V, unsigned ArgIdx); > > - > > - /// analyzeFunction - Add information about the specified function > > - /// to the current structure. > > - void analyzeFunction(Function *F, const TargetData *TD); > > - > > - /// NeverInline - Returns true if the function should never be > > - /// inlined into any caller. > > - bool NeverInline(); > > - }; > > - > > - // The Function* for a function can be changed (by > ArgumentPromotion); > > - // the ValueMap will update itself when this happens. > > - ValueMap CachedFunctionInfo; > > - > > // TargetData if available, or null. > > const TargetData *TD; > > > > - int CountBonusForConstant(Value *V, Constant *C = NULL); > > - int ConstantFunctionBonus(CallSite CS, Constant *C); > > - int getInlineSize(CallSite CS, Function *Callee); > > - int getInlineBonuses(CallSite CS, Function *Callee); > > public: > > InlineCostAnalyzer(): TD(0) {} > > > > void setTargetData(const TargetData *TData) { TD = TData; } > > > > - /// getInlineCost - The heuristic used to determine if we should > inline the > > - /// function call or not. > > + /// \brief Get an InlineCost object representing the cost of > inlining this > > + /// callsite. > > /// > > - InlineCost getInlineCost(CallSite CS); > > - /// getCalledFunction - The heuristic used to determine if we > should inline > > - /// the function call or not. The callee is explicitly specified, > to allow > > - /// you to calculate the cost of inlining a function via a pointer. > The > > - /// result assumes that the inlined version will always be used. > You should > > - /// weight it yourself in cases where this callee will not always > be called. > > - InlineCost getInlineCost(CallSite CS, Function *Callee); > > - > > - /// getInlineFudgeFactor - Return a > 1.0 factor if the inliner > should use a > > - /// higher threshold to determine if the function call should be > inlined. > > - float getInlineFudgeFactor(CallSite CS); > > + /// Note that threshold is passed into this function. Only costs > below the > > + /// threshold are computed with any accuracy. The threshold can be > used to > > + /// bound the computation necessary to determine whether the cost is > > + /// sufficiently low to warrant inlining. > > + InlineCost getInlineCost(CallSite CS, int Threshold); > > > > /// resetCachedFunctionInfo - erase any cached cost info for this > function. > > void resetCachedCostInfo(Function* Caller) { > > - CachedFunctionInfo[Caller] = FunctionInfo(); > > } > > > > /// growCachedCostInfo - update the cached cost info for Caller > after Callee > > > > Modified: llvm/trunk/include/llvm/Transforms/IPO/InlinerPass.h > > URL: > http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Transforms/IPO/InlinerPass.h?rev=153812&r1=153811&r2=153812&view=diff > > > ============================================================================== > > --- llvm/trunk/include/llvm/Transforms/IPO/InlinerPass.h (original) > > +++ llvm/trunk/include/llvm/Transforms/IPO/InlinerPass.h Sat Mar 31 > 07:42:41 2012 > > @@ -65,11 +65,6 @@ > > /// > > virtual InlineCost getInlineCost(CallSite CS) = 0; > > > > - // getInlineFudgeFactor - Return a > 1.0 factor if the inliner should > use a > > - // higher threshold to determine if the function call should be > inlined. > > - /// > > - virtual float getInlineFudgeFactor(CallSite CS) = 0; > > - > > /// resetCachedCostInfo - erase any cached cost data from the derived > class. > > /// If the derived class has no such data this can be empty. > > /// > > > > Modified: llvm/trunk/lib/Analysis/CodeMetrics.cpp > > URL: > http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/CodeMetrics.cpp?rev=153812&r1=153811&r2=153812&view=diff > > > ============================================================================== > > --- llvm/trunk/lib/Analysis/CodeMetrics.cpp (original) > > +++ llvm/trunk/lib/Analysis/CodeMetrics.cpp Sat Mar 31 07:42:41 2012 > > @@ -50,6 +50,52 @@ > > return false; > > } > > > > +bool llvm::isInstructionFree(const Instruction *I, const TargetData > *TD) { > > + if (isa(I)) > > + return true; > > + > > + // If a GEP has all constant indices, it will probably be folded with > > + // a load/store. > > + if (const GetElementPtrInst *GEP = dyn_cast(I)) > > + return GEP->hasAllConstantIndices(); > > + > > + if (const IntrinsicInst *II = dyn_cast(I)) { > > + switch (II->getIntrinsicID()) { > > + default: > > + return false; > > + case Intrinsic::dbg_declare: > > + case Intrinsic::dbg_value: > > + case Intrinsic::invariant_start: > > + case Intrinsic::invariant_end: > > + case Intrinsic::lifetime_start: > > + case Intrinsic::lifetime_end: > > + case Intrinsic::objectsize: > > + case Intrinsic::ptr_annotation: > > + case Intrinsic::var_annotation: > > + // These intrinsics don't count as size. > > + return true; > > + } > > + } > > + > > + if (const CastInst *CI = dyn_cast(I)) { > > + // Noop casts, including ptr <-> int, don't count. > > + if (CI->isLosslessCast() || isa(CI) || > isa(CI)) > > + return true; > > + // trunc to a native type is free (assuming the target has compare > and > > + // shift-right of the same width). > > + if (TD && isa(CI) && > > + TD->isLegalInteger(TD->getTypeSizeInBits(CI->getType()))) > > + return true; > > + // Result of a cmp instruction is often extended (to be used by > other > > + // cmp instructions, logical or return instructions). These are > usually > > + // nop on most sane targets. > > + if (isa(CI->getOperand(0))) > > + return true; > > + } > > + > > + return false; > > +} > > + > > /// analyzeBasicBlock - Fill in the current structure with information > gleaned > > /// from the specified block. > > void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB, > > @@ -58,27 +104,11 @@ > > unsigned NumInstsBeforeThisBB = NumInsts; > > for (BasicBlock::const_iterator II = BB->begin(), E = BB->end(); > > II != E; ++II) { > > - if (isa(II)) continue; // PHI nodes don't count. > > + if (isInstructionFree(II, TD)) > > + continue; > > > > // Special handling for calls. > > if (isa(II) || isa(II)) { > > - if (const IntrinsicInst *IntrinsicI = > dyn_cast(II)) { > > - switch (IntrinsicI->getIntrinsicID()) { > > - default: break; > > - case Intrinsic::dbg_declare: > > - case Intrinsic::dbg_value: > > - case Intrinsic::invariant_start: > > - case Intrinsic::invariant_end: > > - case Intrinsic::lifetime_start: > > - case Intrinsic::lifetime_end: > > - case Intrinsic::objectsize: > > - case Intrinsic::ptr_annotation: > > - case Intrinsic::var_annotation: > > - // These intrinsics don't count as size. > > - continue; > > - } > > - } > > - > > ImmutableCallSite CS(cast(II)); > > > > if (const Function *F = CS.getCalledFunction()) { > > @@ -115,28 +145,6 @@ > > if (isa(II) || II->getType()->isVectorTy()) > > ++NumVectorInsts; > > > > - if (const CastInst *CI = dyn_cast(II)) { > > - // Noop casts, including ptr <-> int, don't count. > > - if (CI->isLosslessCast() || isa(CI) || > > - isa(CI)) > > - continue; > > - // trunc to a native type is free (assuming the target has > compare and > > - // shift-right of the same width). > > - if (isa(CI) && TD && > > - TD->isLegalInteger(TD->getTypeSizeInBits(CI->getType()))) > > - continue; > > - // Result of a cmp instruction is often extended (to be used by > other > > - // cmp instructions, logical or return instructions). These are > usually > > - // nop on most sane targets. > > - if (isa(CI->getOperand(0))) > > - continue; > > - } else if (const GetElementPtrInst *GEPI = > dyn_cast(II)){ > > - // If a GEP has all constant indices, it will probably be folded > with > > - // a load/store. > > - if (GEPI->hasAllConstantIndices()) > > - continue; > > - } > > - > > ++NumInsts; > > } > > > > > > Modified: llvm/trunk/lib/Analysis/InlineCost.cpp > > URL: > http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/InlineCost.cpp?rev=153812&r1=153811&r2=153812&view=diff > > > ============================================================================== > > --- llvm/trunk/lib/Analysis/InlineCost.cpp (original) > > +++ llvm/trunk/lib/Analysis/InlineCost.cpp Sat Mar 31 07:42:41 2012 > > @@ -11,659 +11,1014 @@ > > // > > > //===----------------------------------------------------------------------===// > > > > +#define DEBUG_TYPE "inline-cost" > > #include "llvm/Analysis/InlineCost.h" > > +#include "llvm/Analysis/ConstantFolding.h" > > +#include "llvm/Analysis/InstructionSimplify.h" > > #include "llvm/Support/CallSite.h" > > +#include "llvm/Support/Debug.h" > > +#include "llvm/Support/InstVisitor.h" > > +#include "llvm/Support/GetElementPtrTypeIterator.h" > > +#include "llvm/Support/raw_ostream.h" > > #include "llvm/CallingConv.h" > > #include "llvm/IntrinsicInst.h" > > +#include "llvm/Operator.h" > > +#include "llvm/GlobalAlias.h" > > #include "llvm/Target/TargetData.h" > > +#include "llvm/ADT/STLExtras.h" > > +#include "llvm/ADT/SetVector.h" > > +#include "llvm/ADT/SmallVector.h" > > #include "llvm/ADT/SmallPtrSet.h" > > > > using namespace llvm; > > > > -unsigned > InlineCostAnalyzer::FunctionInfo::countCodeReductionForConstant( > > - const CodeMetrics &Metrics, Value *V) { > > - unsigned Reduction = 0; > > - SmallVector Worklist; > > - Worklist.push_back(V); > > - do { > > - Value *V = Worklist.pop_back_val(); > > - for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI > != E;++UI){ > > - User *U = *UI; > > - if (isa(U) || isa(U)) { > > - // We will be able to eliminate all but one of the successors. > > - const TerminatorInst &TI = cast(*U); > > - const unsigned NumSucc = TI.getNumSuccessors(); > > - unsigned Instrs = 0; > > - for (unsigned I = 0; I != NumSucc; ++I) > > - Instrs += Metrics.NumBBInsts.lookup(TI.getSuccessor(I)); > > - // We don't know which blocks will be eliminated, so use the > average size. > > - Reduction += > InlineConstants::InstrCost*Instrs*(NumSucc-1)/NumSucc; > > - continue; > > +namespace { > > + > > +class CallAnalyzer : public InstVisitor { > > + typedef InstVisitor Base; > > + friend class InstVisitor; > > + > > + // TargetData if available, or null. > > + const TargetData *const TD; > > + > > + // The called function. > > + Function &F; > > + > > + int Threshold; > > + int Cost; > > + const bool AlwaysInline; > > + > > + bool IsRecursive; > > + bool ExposesReturnsTwice; > > + bool HasDynamicAlloca; > > + unsigned NumInstructions, NumVectorInstructions; > > + int FiftyPercentVectorBonus, TenPercentVectorBonus; > > + int VectorBonus; > > + > > + // While we walk the potentially-inlined instructions, we build up and > > + // maintain a mapping of simplified values specific to this callsite. > The > > + // idea is to propagate any special information we have about > arguments to > > + // this call through the inlinable section of the function, and > account for > > + // likely simplifications post-inlining. The most important aspect we > track > > + // is CFG altering simplifications -- when we prove a basic block > dead, that > > + // can cause dramatic shifts in the cost of inlining a function. > > + DenseMap SimplifiedValues; > > + > > + // Keep track of the values which map back (through function > arguments) to > > + // allocas on the caller stack which could be simplified through SROA. > > + DenseMap SROAArgValues; > > + > > + // The mapping of caller Alloca values to their accumulated cost > savings. If > > + // we have to disable SROA for one of the allocas, this tells us how > much > > + // cost must be added. > > + DenseMap SROAArgCosts; > > + > > + // Keep track of values which map to a pointer base and constant > offset. > > + DenseMap > ConstantOffsetPtrs; > > + > > + // Custom simplification helper routines. > > + bool isAllocaDerivedArg(Value *V); > > + bool lookupSROAArgAndCost(Value *V, Value *&Arg, > > + DenseMap::iterator &CostIt); > > + void disableSROA(DenseMap::iterator CostIt); > > + void disableSROA(Value *V); > > + void accumulateSROACost(DenseMap::iterator CostIt, > > + int InstructionCost); > > + bool handleSROACandidate(bool IsSROAValid, > > + DenseMap::iterator CostIt, > > + int InstructionCost); > > + bool isGEPOffsetConstant(GetElementPtrInst &GEP); > > + bool accumulateGEPOffset(GEPOperator &GEP, APInt &Offset); > > + ConstantInt *stripAndComputeInBoundsConstantOffsets(Value *&V); > > + > > + // Custom analysis routines. > > + bool analyzeBlock(BasicBlock *BB); > > + > > + // Disable several entry points to the visitor so we don't > accidentally use > > + // them by declaring but not defining them here. > > + void visit(Module *); void visit(Module &); > > + void visit(Function *); void visit(Function &); > > + void visit(BasicBlock *); void visit(BasicBlock &); > > + > > + // Provide base case for our instruction visit. > > + bool visitInstruction(Instruction &I); > > + > > + // Our visit overrides. > > + bool visitAlloca(AllocaInst &I); > > + bool visitPHI(PHINode &I); > > + bool visitGetElementPtr(GetElementPtrInst &I); > > + bool visitBitCast(BitCastInst &I); > > + bool visitPtrToInt(PtrToIntInst &I); > > + bool visitIntToPtr(IntToPtrInst &I); > > + bool visitCastInst(CastInst &I); > > + bool visitUnaryInstruction(UnaryInstruction &I); > > + bool visitICmp(ICmpInst &I); > > + bool visitSub(BinaryOperator &I); > > + bool visitBinaryOperator(BinaryOperator &I); > > + bool visitLoad(LoadInst &I); > > + bool visitStore(StoreInst &I); > > + bool visitCallSite(CallSite CS); > > + > > +public: > > + CallAnalyzer(const TargetData *TD, Function &Callee, int Threshold) > > + : TD(TD), F(Callee), Threshold(Threshold), Cost(0), > > + AlwaysInline(F.hasFnAttr(Attribute::AlwaysInline)), > > + IsRecursive(false), ExposesReturnsTwice(false), > HasDynamicAlloca(false), > > + NumInstructions(0), NumVectorInstructions(0), > > + FiftyPercentVectorBonus(0), TenPercentVectorBonus(0), > VectorBonus(0), > > + NumConstantArgs(0), NumConstantOffsetPtrArgs(0), NumAllocaArgs(0), > > + NumConstantPtrCmps(0), NumConstantPtrDiffs(0), > > + NumInstructionsSimplified(0), SROACostSavings(0), > SROACostSavingsLost(0) { > > + } > > + > > + bool analyzeCall(CallSite CS); > > + > > + int getThreshold() { return Threshold; } > > + int getCost() { return Cost; } > > + > > + // Keep a bunch of stats about the cost savings found so we can print > them > > + // out when debugging. > > + unsigned NumConstantArgs; > > + unsigned NumConstantOffsetPtrArgs; > > + unsigned NumAllocaArgs; > > + unsigned NumConstantPtrCmps; > > + unsigned NumConstantPtrDiffs; > > + unsigned NumInstructionsSimplified; > > + unsigned SROACostSavings; > > + unsigned SROACostSavingsLost; > > + > > + void dump(); > > +}; > > + > > +} // namespace > > + > > +/// \brief Test whether the given value is an Alloca-derived function > argument. > > +bool CallAnalyzer::isAllocaDerivedArg(Value *V) { > > + return SROAArgValues.count(V); > > +} > > + > > +/// \brief Lookup the SROA-candidate argument and cost iterator which V > maps to. > > +/// Returns false if V does not map to a SROA-candidate. > > +bool CallAnalyzer::lookupSROAArgAndCost( > > + Value *V, Value *&Arg, DenseMap::iterator &CostIt) { > > + if (SROAArgValues.empty() || SROAArgCosts.empty()) > > + return false; > > + > > + DenseMap::iterator ArgIt = SROAArgValues.find(V); > > + if (ArgIt == SROAArgValues.end()) > > + return false; > > + > > + Arg = ArgIt->second; > > + CostIt = SROAArgCosts.find(Arg); > > + return CostIt != SROAArgCosts.end(); > > +} > > + > > +/// \brief Disable SROA for the candidate marked by this cost iterator. > > +/// > > +/// This markes the candidate as no longer viable for SROA, and adds > the cost > > +/// savings associated with it back into the inline cost measurement. > > +void CallAnalyzer::disableSROA(DenseMap::iterator CostIt) > { > > + // If we're no longer able to perform SROA we need to undo its cost > savings > > + // and prevent subsequent analysis. > > + Cost += CostIt->second; > > + SROACostSavings -= CostIt->second; > > + SROACostSavingsLost += CostIt->second; > > + SROAArgCosts.erase(CostIt); > > +} > > + > > +/// \brief If 'V' maps to a SROA candidate, disable SROA for it. > > +void CallAnalyzer::disableSROA(Value *V) { > > + Value *SROAArg; > > + DenseMap::iterator CostIt; > > + if (lookupSROAArgAndCost(V, SROAArg, CostIt)) > > + disableSROA(CostIt); > > +} > > + > > +/// \brief Accumulate the given cost for a particular SROA candidate. > > +void CallAnalyzer::accumulateSROACost(DenseMap::iterator > CostIt, > > + int InstructionCost) { > > + CostIt->second += InstructionCost; > > + SROACostSavings += InstructionCost; > > +} > > + > > +/// \brief Helper for the common pattern of handling a SROA candidate. > > +/// Either accumulates the cost savings if the SROA remains valid, or > disables > > +/// SROA for the candidate. > > +bool CallAnalyzer::handleSROACandidate(bool IsSROAValid, > > + DenseMap::iterator > CostIt, > > + int InstructionCost) { > > + if (IsSROAValid) { > > + accumulateSROACost(CostIt, InstructionCost); > > + return true; > > + } > > + > > + disableSROA(CostIt); > > + return false; > > +} > > + > > +/// \brief Check whether a GEP's indices are all constant. > > +/// > > +/// Respects any simplified values known during the analysis of this > callsite. > > +bool CallAnalyzer::isGEPOffsetConstant(GetElementPtrInst &GEP) { > > + for (User::op_iterator I = GEP.idx_begin(), E = GEP.idx_end(); I != > E; ++I) > > + if (!isa(*I) && !SimplifiedValues.lookup(*I)) > > + return false; > > + > > + return true; > > +} > > + > > +/// \brief Accumulate a constant GEP offset into an APInt if possible. > > +/// > > +/// Returns false if unable to compute the offset for any reason. > Respects any > > +/// simplified values known during the analysis of this callsite. > > +bool CallAnalyzer::accumulateGEPOffset(GEPOperator &GEP, APInt &Offset) > { > > + if (!TD) > > + return false; > > + > > + unsigned IntPtrWidth = TD->getPointerSizeInBits(); > > + assert(IntPtrWidth == Offset.getBitWidth()); > > + > > + for (gep_type_iterator GTI = gep_type_begin(GEP), GTE = > gep_type_end(GEP); > > + GTI != GTE; ++GTI) { > > + ConstantInt *OpC = dyn_cast(GTI.getOperand()); > > + if (!OpC) > > + if (Constant *SimpleOp = > SimplifiedValues.lookup(GTI.getOperand())) > > + OpC = dyn_cast(SimpleOp); > > + if (!OpC) > > + return false; > > + if (OpC->isZero()) continue; > > + > > + // Handle a struct index, which adds its field offset to the > pointer. > > + if (StructType *STy = dyn_cast(*GTI)) { > > + unsigned ElementIdx = OpC->getZExtValue(); > > + const StructLayout *SL = TD->getStructLayout(STy); > > + Offset += APInt(IntPtrWidth, SL->getElementOffset(ElementIdx)); > > + continue; > > + } > > + > > + APInt TypeSize(IntPtrWidth, > TD->getTypeAllocSize(GTI.getIndexedType())); > > + Offset += OpC->getValue().sextOrTrunc(IntPtrWidth) * TypeSize; > > + } > > + return true; > > +} > > + > > +bool CallAnalyzer::visitAlloca(AllocaInst &I) { > > + // FIXME: Check whether inlining will turn a dynamic alloca into a > static > > + // alloca, and handle that case. > > + > > + // We will happily inline tatic alloca instructions or dynamic alloca > > + // instructions in always-inline situations. > > + if (AlwaysInline || I.isStaticAlloca()) > > + return Base::visitAlloca(I); > > + > > + // FIXME: This is overly conservative. Dynamic allocas are > inefficient for > > + // a variety of reasons, and so we would like to not inline them into > > + // functions which don't currently have a dynamic alloca. This simply > > + // disables inlining altogether in the presence of a dynamic alloca. > > + HasDynamicAlloca = true; > > + return false; > > +} > > + > > +bool CallAnalyzer::visitPHI(PHINode &I) { > > + // FIXME: We should potentially be tracking values through phi nodes, > > + // especially when they collapse to a single value due to deleted CFG > edges > > + // during inlining. > > + > > + // FIXME: We need to propagate SROA *disabling* through phi nodes, > even > > + // though we don't want to propagate it's bonuses. The idea is to > disable > > + // SROA if it *might* be used in an inappropriate manner. > > + > > + // Phi nodes are always zero-cost. > > + return true; > > +} > > + > > +bool CallAnalyzer::visitGetElementPtr(GetElementPtrInst &I) { > > + Value *SROAArg; > > + DenseMap::iterator CostIt; > > + bool SROACandidate = lookupSROAArgAndCost(I.getPointerOperand(), > > + SROAArg, CostIt); > > + > > + // Try to fold GEPs of constant-offset call site argument pointers. > This > > + // requires target data and inbounds GEPs. > > + if (TD && I.isInBounds()) { > > + // Check if we have a base + offset for the pointer. > > + Value *Ptr = I.getPointerOperand(); > > + std::pair BaseAndOffset = > ConstantOffsetPtrs.lookup(Ptr); > > + if (BaseAndOffset.first) { > > + // Check if the offset of this GEP is constant, and if so > accumulate it > > + // into Offset. > > + if (!accumulateGEPOffset(cast(I), > BaseAndOffset.second)) { > > + // Non-constant GEPs aren't folded, and disable SROA. > > + if (SROACandidate) > > + disableSROA(CostIt); > > + return false; > > } > > > > - // Figure out if this instruction will be removed due to simple > constant > > - // propagation. > > - Instruction &Inst = cast(*U); > > - > > - // We can't constant propagate instructions which have effects or > > - // read memory. > > - // > > - // FIXME: It would be nice to capture the fact that a load from a > > - // pointer-to-constant-global is actually a *really* good thing > to zap. > > - // Unfortunately, we don't know the pointer that may get > propagated here, > > - // so we can't make this decision. > > - if (Inst.mayReadFromMemory() || Inst.mayHaveSideEffects() || > > - isa(Inst)) > > - continue; > > + // Add the result as a new mapping to Base + Offset. > > + ConstantOffsetPtrs[&I] = BaseAndOffset; > > > > - bool AllOperandsConstant = true; > > - for (unsigned i = 0, e = Inst.getNumOperands(); i != e; ++i) > > - if (!isa(Inst.getOperand(i)) && Inst.getOperand(i) != > V) { > > - AllOperandsConstant = false; > > - break; > > - } > > - if (!AllOperandsConstant) > > - continue; > > + // Also handle SROA candidates here, we already know that the GEP > is > > + // all-constant indexed. > > + if (SROACandidate) > > + SROAArgValues[&I] = SROAArg; > > > > - // We will get to remove this instruction... > > - Reduction += InlineConstants::InstrCost; > > + return true; > > + } > > + } > > + > > + if (isGEPOffsetConstant(I)) { > > + if (SROACandidate) > > + SROAArgValues[&I] = SROAArg; > > + > > + // Constant GEPs are modeled as free. > > + return true; > > + } > > + > > + // Variable GEPs will require math and will disable SROA. > > + if (SROACandidate) > > + disableSROA(CostIt); > > + return false; > > +} > > > > - // And any other instructions that use it which become constants > > - // themselves. > > - Worklist.push_back(&Inst); > > +bool CallAnalyzer::visitBitCast(BitCastInst &I) { > > + // Propagate constants through bitcasts. > > + if (Constant *COp = dyn_cast(I.getOperand(0))) > > + if (Constant *C = ConstantExpr::getBitCast(COp, I.getType())) { > > + SimplifiedValues[&I] = C; > > + return true; > > + } > > + > > + // Track base/offsets through casts > > + std::pair BaseAndOffset > > + = ConstantOffsetPtrs.lookup(I.getOperand(0)); > > + // Casts don't change the offset, just wrap it up. > > + if (BaseAndOffset.first) > > + ConstantOffsetPtrs[&I] = BaseAndOffset; > > + > > + // Also look for SROA candidates here. > > + Value *SROAArg; > > + DenseMap::iterator CostIt; > > + if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) > > + SROAArgValues[&I] = SROAArg; > > + > > + // Bitcasts are always zero cost. > > + return true; > > +} > > + > > +bool CallAnalyzer::visitPtrToInt(PtrToIntInst &I) { > > + // Propagate constants through ptrtoint. > > + if (Constant *COp = dyn_cast(I.getOperand(0))) > > + if (Constant *C = ConstantExpr::getPtrToInt(COp, I.getType())) { > > + SimplifiedValues[&I] = C; > > + return true; > > } > > - } while (!Worklist.empty()); > > - return Reduction; > > + > > + // Track base/offset pairs when converted to a plain integer provided > the > > + // integer is large enough to represent the pointer. > > + unsigned IntegerSize = I.getType()->getScalarSizeInBits(); > > + if (TD && IntegerSize >= TD->getPointerSizeInBits()) { > > + std::pair BaseAndOffset > > + = ConstantOffsetPtrs.lookup(I.getOperand(0)); > > + if (BaseAndOffset.first) > > + ConstantOffsetPtrs[&I] = BaseAndOffset; > > + } > > + > > + // This is really weird. Technically, ptrtoint will disable SROA. > However, > > + // unless that ptrtoint is *used* somewhere in the live basic blocks > after > > + // inlining, it will be nuked, and SROA should proceed. All of the > uses which > > + // would block SROA would also block SROA if applied directly to a > pointer, > > + // and so we can just add the integer in here. The only places where > SROA is > > + // preserved either cannot fire on an integer, or won't in-and-of > themselves > > + // disable SROA (ext) w/o some later use that we would see and > disable. > > + Value *SROAArg; > > + DenseMap::iterator CostIt; > > + if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) > > + SROAArgValues[&I] = SROAArg; > > + > > + // A ptrtoint cast is free so long as the result is large enough to > store the > > + // pointer, and a legal integer type. > > + return TD && TD->isLegalInteger(IntegerSize) && > > + IntegerSize >= TD->getPointerSizeInBits(); > > +} > > + > > +bool CallAnalyzer::visitIntToPtr(IntToPtrInst &I) { > > + // Propagate constants through ptrtoint. > > + if (Constant *COp = dyn_cast(I.getOperand(0))) > > + if (Constant *C = ConstantExpr::getIntToPtr(COp, I.getType())) { > > + SimplifiedValues[&I] = C; > > + return true; > > + } > > + > > + // Track base/offset pairs when round-tripped through a pointer > without > > + // modifications provided the integer is not too large. > > + Value *Op = I.getOperand(0); > > + unsigned IntegerSize = Op->getType()->getScalarSizeInBits(); > > + if (TD && IntegerSize <= TD->getPointerSizeInBits()) { > > + std::pair BaseAndOffset = > ConstantOffsetPtrs.lookup(Op); > > + if (BaseAndOffset.first) > > + ConstantOffsetPtrs[&I] = BaseAndOffset; > > + } > > + > > + // "Propagate" SROA here in the same manner as we do for ptrtoint > above. > > + Value *SROAArg; > > + DenseMap::iterator CostIt; > > + if (lookupSROAArgAndCost(Op, SROAArg, CostIt)) > > + SROAArgValues[&I] = SROAArg; > > + > > + // An inttoptr cast is free so long as the input is a legal integer > type > > + // which doesn't contain values outside the range of a pointer. > > + return TD && TD->isLegalInteger(IntegerSize) && > > + IntegerSize <= TD->getPointerSizeInBits(); > > +} > > + > > +bool CallAnalyzer::visitCastInst(CastInst &I) { > > + // Propagate constants through ptrtoint. > > + if (Constant *COp = dyn_cast(I.getOperand(0))) > > + if (Constant *C = ConstantExpr::getCast(I.getOpcode(), COp, > I.getType())) { > > + SimplifiedValues[&I] = C; > > + return true; > > + } > > + > > + // Disable SROA in the face of arbitrary casts we don't whitelist > elsewhere. > > + disableSROA(I.getOperand(0)); > > + > > + // No-op casts don't have any cost. > > + if (I.isLosslessCast()) > > + return true; > > + > > + // trunc to a native type is free (assuming the target has compare and > > + // shift-right of the same width). > > + if (TD && isa(I) && > > + TD->isLegalInteger(TD->getTypeSizeInBits(I.getType()))) > > + return true; > > + > > + // Result of a cmp instruction is often extended (to be used by other > > + // cmp instructions, logical or return instructions). These are > usually > > + // no-ops on most sane targets. > > + if (isa(I.getOperand(0))) > > + return true; > > + > > + // Assume the rest of the casts require work. > > + return false; > > } > > > > -static unsigned countCodeReductionForAllocaICmp(const CodeMetrics > &Metrics, > > - ICmpInst *ICI) { > > - unsigned Reduction = 0; > > +bool CallAnalyzer::visitUnaryInstruction(UnaryInstruction &I) { > > + Value *Operand = I.getOperand(0); > > + Constant *Ops[1] = { dyn_cast(Operand) }; > > + if (Ops[0] || (Ops[0] = SimplifiedValues.lookup(Operand))) > > + if (Constant *C = ConstantFoldInstOperands(I.getOpcode(), > I.getType(), > > + Ops, TD)) { > > + SimplifiedValues[&I] = C; > > + return true; > > + } > > > > - // Bail if this is comparing against a non-constant; there is nothing > we can > > - // do there. > > - if (!isa(ICI->getOperand(1))) > > - return Reduction; > > + // Disable any SROA on the argument to arbitrary unary operators. > > + disableSROA(Operand); > > > > - // An icmp pred (alloca, C) becomes true if the predicate is true when > > - // equal and false otherwise. > > - bool Result = ICI->isTrueWhenEqual(); > > + return false; > > +} > > > > - SmallVector Worklist; > > - Worklist.push_back(ICI); > > - do { > > - Instruction *U = Worklist.pop_back_val(); > > - Reduction += InlineConstants::InstrCost; > > - for (Value::use_iterator UI = U->use_begin(), UE = U->use_end(); > > - UI != UE; ++UI) { > > - Instruction *I = dyn_cast(*UI); > > - if (!I || I->mayHaveSideEffects()) continue; > > - if (I->getNumOperands() == 1) > > - Worklist.push_back(I); > > - if (BinaryOperator *BO = dyn_cast(I)) { > > - // If BO produces the same value as U, then the other operand is > > - // irrelevant and we can put it into the Worklist to continue > > - // deleting dead instructions. If BO produces the same value as > the > > - // other operand, we can delete BO but that's it. > > - if (Result == true) { > > - if (BO->getOpcode() == Instruction::Or) > > - Worklist.push_back(I); > > - if (BO->getOpcode() == Instruction::And) > > - Reduction += InlineConstants::InstrCost; > > - } else { > > - if (BO->getOpcode() == Instruction::Or || > > - BO->getOpcode() == Instruction::Xor) > > - Reduction += InlineConstants::InstrCost; > > - if (BO->getOpcode() == Instruction::And) > > - Worklist.push_back(I); > > - } > > +bool CallAnalyzer::visitICmp(ICmpInst &I) { > > + Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); > > + // First try to handle simplified comparisons. > > + if (!isa(LHS)) > > + if (Constant *SimpleLHS = SimplifiedValues.lookup(LHS)) > > + LHS = SimpleLHS; > > + if (!isa(RHS)) > > + if (Constant *SimpleRHS = SimplifiedValues.lookup(RHS)) > > + RHS = SimpleRHS; > > + if (Constant *CLHS = dyn_cast(LHS)) > > + if (Constant *CRHS = dyn_cast(RHS)) > > + if (Constant *C = ConstantExpr::getICmp(I.getPredicate(), CLHS, > CRHS)) { > > + SimplifiedValues[&I] = C; > > + return true; > > } > > - if (BranchInst *BI = dyn_cast(I)) { > > - BasicBlock *BB = BI->getSuccessor(Result ? 0 : 1); > > - if (BB->getSinglePredecessor()) > > - Reduction > > - += InlineConstants::InstrCost * > Metrics.NumBBInsts.lookup(BB); > > + > > + // Otherwise look for a comparison between constant offset pointers > with > > + // a common base. > > + Value *LHSBase, *RHSBase; > > + APInt LHSOffset, RHSOffset; > > + llvm::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.lookup(LHS); > > + if (LHSBase) { > > + llvm::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.lookup(RHS); > > + if (RHSBase && LHSBase == RHSBase) { > > + // We have common bases, fold the icmp to a constant based on the > > + // offsets. > > + Constant *CLHS = ConstantInt::get(LHS->getContext(), LHSOffset); > > + Constant *CRHS = ConstantInt::get(RHS->getContext(), RHSOffset); > > + if (Constant *C = ConstantExpr::getICmp(I.getPredicate(), CLHS, > CRHS)) { > > + SimplifiedValues[&I] = C; > > + ++NumConstantPtrCmps; > > + return true; > > } > > } > > - } while (!Worklist.empty()); > > + } > > > > - return Reduction; > > -} > > + // If the comparison is an equality comparison with null, we can > simplify it > > + // for any alloca-derived argument. > > + if (I.isEquality() && isa(I.getOperand(1))) > > + if (isAllocaDerivedArg(I.getOperand(0))) { > > + // We can actually predict the result of comparisons between an > > + // alloca-derived value and null. Note that this fires regardless > of > > + // SROA firing. > > + bool IsNotEqual = I.getPredicate() == CmpInst::ICMP_NE; > > + SimplifiedValues[&I] = IsNotEqual ? > ConstantInt::getTrue(I.getType()) > > + : > ConstantInt::getFalse(I.getType()); > > + return true; > > + } > > > > -/// \brief Compute the reduction possible for a given instruction if we > are able > > -/// to SROA an alloca. > > -/// > > -/// The reduction for this instruction is added to the SROAReduction > output > > -/// parameter. Returns false if this instruction is expected to defeat > SROA in > > -/// general. > > -static bool countCodeReductionForSROAInst(Instruction *I, > > - SmallVectorImpl > &Worklist, > > - unsigned &SROAReduction) { > > - if (LoadInst *LI = dyn_cast(I)) { > > - if (!LI->isSimple()) > > - return false; > > - SROAReduction += InlineConstants::InstrCost; > > - return true; > > + // Finally check for SROA candidates in comparisons. > > + Value *SROAArg; > > + DenseMap::iterator CostIt; > > + if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) { > > + if (isa(I.getOperand(1))) { > > + accumulateSROACost(CostIt, InlineConstants::InstrCost); > > + return true; > > + } > > + > > + disableSROA(CostIt); > > } > > > > - if (StoreInst *SI = dyn_cast(I)) { > > - if (!SI->isSimple()) > > - return false; > > - SROAReduction += InlineConstants::InstrCost; > > - return true; > > + return false; > > +} > > + > > +bool CallAnalyzer::visitSub(BinaryOperator &I) { > > + // Try to handle a special case: we can fold computing the difference > of two > > + // constant-related pointers. > > + Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); > > + Value *LHSBase, *RHSBase; > > + APInt LHSOffset, RHSOffset; > > + llvm::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.lookup(LHS); > > + if (LHSBase) { > > + llvm::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.lookup(RHS); > > + if (RHSBase && LHSBase == RHSBase) { > > + // We have common bases, fold the subtract to a constant based on > the > > + // offsets. > > + Constant *CLHS = ConstantInt::get(LHS->getContext(), LHSOffset); > > + Constant *CRHS = ConstantInt::get(RHS->getContext(), RHSOffset); > > + if (Constant *C = ConstantExpr::getSub(CLHS, CRHS)) { > > + SimplifiedValues[&I] = C; > > + ++NumConstantPtrDiffs; > > + return true; > > + } > > + } > > } > > > > - if (GetElementPtrInst *GEP = dyn_cast(I)) { > > - // If the GEP has variable indices, we won't be able to do much > with it. > > - if (!GEP->hasAllConstantIndices()) > > - return false; > > - // A non-zero GEP will likely become a mask operation after SROA. > > - if (GEP->hasAllZeroIndices()) > > - SROAReduction += InlineConstants::InstrCost; > > - Worklist.push_back(GEP); > > + // Otherwise, fall back to the generic logic for simplifying and > handling > > + // instructions. > > + return Base::visitSub(I); > > +} > > + > > +bool CallAnalyzer::visitBinaryOperator(BinaryOperator &I) { > > + Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); > > + if (!isa(LHS)) > > + if (Constant *SimpleLHS = SimplifiedValues.lookup(LHS)) > > + LHS = SimpleLHS; > > + if (!isa(RHS)) > > + if (Constant *SimpleRHS = SimplifiedValues.lookup(RHS)) > > + RHS = SimpleRHS; > > + Value *SimpleV = SimplifyBinOp(I.getOpcode(), LHS, RHS, TD); > > + if (Constant *C = dyn_cast_or_null(SimpleV)) { > > + SimplifiedValues[&I] = C; > > return true; > > } > > > > - if (BitCastInst *BCI = dyn_cast(I)) { > > - // Track pointer through bitcasts. > > - Worklist.push_back(BCI); > > - SROAReduction += InlineConstants::InstrCost; > > - return true; > > + // Disable any SROA on arguments to arbitrary, unsimplified binary > operators. > > + disableSROA(LHS); > > + disableSROA(RHS); > > + > > + return false; > > +} > > + > > +bool CallAnalyzer::visitLoad(LoadInst &I) { > > + Value *SROAArg; > > + DenseMap::iterator CostIt; > > + if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) { > > + if (I.isSimple()) { > > + accumulateSROACost(CostIt, InlineConstants::InstrCost); > > + return true; > > + } > > + > > + disableSROA(CostIt); > > } > > > > - // We just look for non-constant operands to ICmp instructions as > those will > > - // defeat SROA. The actual reduction for these happens even without > SROA. > > - if (ICmpInst *ICI = dyn_cast(I)) > > - return isa(ICI->getOperand(1)); > > - > > - if (SelectInst *SI = dyn_cast(I)) { > > - // SROA can handle a select of alloca iff all uses of the alloca are > > - // loads, and dereferenceable. We assume it's dereferenceable since > > - // we're told the input is an alloca. > > - for (Value::use_iterator UI = SI->use_begin(), UE = SI->use_end(); > > - UI != UE; ++UI) { > > - LoadInst *LI = dyn_cast(*UI); > > - if (LI == 0 || !LI->isSimple()) > > - return false; > > + return false; > > +} > > + > > +bool CallAnalyzer::visitStore(StoreInst &I) { > > + Value *SROAArg; > > + DenseMap::iterator CostIt; > > + if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) { > > + if (I.isSimple()) { > > + accumulateSROACost(CostIt, InlineConstants::InstrCost); > > + return true; > > } > > - // We don't know whether we'll be deleting the rest of the chain of > > - // instructions from the SelectInst on, because we don't know > whether > > - // the other side of the select is also an alloca or not. > > - return true; > > + > > + disableSROA(CostIt); > > + } > > + > > + return false; > > +} > > + > > +bool CallAnalyzer::visitCallSite(CallSite CS) { > > + if (CS.isCall() && > cast(CS.getInstruction())->canReturnTwice() && > > + !F.hasFnAttr(Attribute::ReturnsTwice)) { > > + // This aborts the entire analysis. > > + ExposesReturnsTwice = true; > > + return false; > > } > > > > - if (IntrinsicInst *II = dyn_cast(I)) { > > + if (IntrinsicInst *II = dyn_cast(CS.getInstruction())) > { > > switch (II->getIntrinsicID()) { > > default: > > - return false; > > + return Base::visitCallSite(CS); > > + > > + case Intrinsic::dbg_declare: > > + case Intrinsic::dbg_value: > > + case Intrinsic::invariant_start: > > + case Intrinsic::invariant_end: > > + case Intrinsic::lifetime_start: > > + case Intrinsic::lifetime_end: > > case Intrinsic::memset: > > case Intrinsic::memcpy: > > case Intrinsic::memmove: > > - case Intrinsic::lifetime_start: > > - case Intrinsic::lifetime_end: > > - // SROA can usually chew through these intrinsics. > > - SROAReduction += InlineConstants::InstrCost; > > + case Intrinsic::objectsize: > > + case Intrinsic::ptr_annotation: > > + case Intrinsic::var_annotation: > > + // SROA can usually chew through these intrinsics and they have > no cost > > + // so don't pay the price of analyzing them in detail. > > return true; > > } > > } > > > > - // If there is some other strange instruction, we're not going to be > > - // able to do much if we inline this. > > + if (Function *F = CS.getCalledFunction()) { > > + if (F == CS.getInstruction()->getParent()->getParent()) { > > + // This flag will fully abort the analysis, so don't bother with > anything > > + // else. > > + IsRecursive = true; > > + return false; > > + } > > + > > + if (!callIsSmall(F)) { > > + // We account for the average 1 instruction per call argument > setup > > + // here. > > + Cost += CS.arg_size() * InlineConstants::InstrCost; > > + > > + // Everything other than inline ASM will also have a significant > cost > > + // merely from making the call. > > + if (!isa(CS.getCalledValue())) > > + Cost += InlineConstants::CallPenalty; > > + } > > + > > + return Base::visitCallSite(CS); > > + } > > + > > + // Otherwise we're in a very special case -- an indirect function > call. See > > + // if we can be particularly clever about this. > > + Value *Callee = CS.getCalledValue(); > > + > > + // First, pay the price of the argument setup. We account for the > average > > + // 1 instruction per call argument setup here. > > + Cost += CS.arg_size() * InlineConstants::InstrCost; > > + > > + // Next, check if this happens to be an indirect function call to a > known > > + // function in this inline context. If not, we've done all we can. > > + Function *F = > dyn_cast_or_null(SimplifiedValues.lookup(Callee)); > > + if (!F) > > + return Base::visitCallSite(CS); > > + > > + // If we have a constant that we are calling as a function, we can > peer > > + // through it and see the function target. This happens not > infrequently > > + // during devirtualization and so we want to give it a hefty bonus for > > + // inlining, but cap that bonus in the event that inlining wouldn't > pan > > + // out. Pretend to inline the function, with a custom threshold. > > + CallAnalyzer CA(TD, *F, InlineConstants::IndirectCallThreshold); > > + if (CA.analyzeCall(CS)) { > > + // We were able to inline the indirect call! Subtract the cost from > the > > + // bonus we want to apply, but don't go below zero. > > + Cost -= std::max(0, InlineConstants::IndirectCallThreshold - > CA.getCost()); > > + } > > + > > + return Base::visitCallSite(CS); > > +} > > + > > +bool CallAnalyzer::visitInstruction(Instruction &I) { > > + // We found something we don't understand or can't handle. Mark any > SROA-able > > + // values in the operand list as no longer viable. > > + for (User::op_iterator OI = I.op_begin(), OE = I.op_end(); OI != OE; > ++OI) > > + disableSROA(*OI); > > + > > return false; > > } > > > > -unsigned InlineCostAnalyzer::FunctionInfo::countCodeReductionForAlloca( > > - const CodeMetrics &Metrics, Value *V) { > > - if (!V->getType()->isPointerTy()) return 0; // Not a pointer > > - unsigned Reduction = 0; > > - unsigned SROAReduction = 0; > > - bool CanSROAAlloca = true; > > > > - SmallVector Worklist; > > - Worklist.push_back(V); > > - do { > > - Value *V = Worklist.pop_back_val(); > > - for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); > > - UI != E; ++UI){ > > - Instruction *I = cast(*UI); > > +/// \brief Analyze a basic block for its contribution to the inline > cost. > > +/// > > +/// This method walks the analyzer over every instruction in the given > basic > > +/// block and accounts for their cost during inlining at this callsite. > It > > +/// aborts early if the threshold has been exceeded or an impossible to > inline > > +/// construct has been detected. It returns false if inlining is no > longer > > +/// viable, and true if inlining remains viable. > > +bool CallAnalyzer::analyzeBlock(BasicBlock *BB) { > > + for (BasicBlock::iterator I = BB->begin(), E = llvm::prior(BB->end()); > > + I != E; ++I) { > > + ++NumInstructions; > > + if (isa(I) || I->getType()->isVectorTy()) > > + ++NumVectorInstructions; > > + > > + // If the instruction simplified to a constant, there is no cost to > this > > + // instruction. Visit the instructions using our InstVisitor to > account for > > + // all of the per-instruction logic. The visit tree returns true if > we > > + // consumed the instruction in any way, and false if the > instruction's base > > + // cost should count against inlining. > > + if (Base::visit(I)) > > + ++NumInstructionsSimplified; > > + else > > + Cost += InlineConstants::InstrCost; > > > > - if (ICmpInst *ICI = dyn_cast(I)) > > - Reduction += countCodeReductionForAllocaICmp(Metrics, ICI); > > + // If the visit this instruction detected an uninlinable pattern, > abort. > > + if (IsRecursive || ExposesReturnsTwice || HasDynamicAlloca) > > + return false; > > > > - if (CanSROAAlloca) > > - CanSROAAlloca = countCodeReductionForSROAInst(I, Worklist, > > - SROAReduction); > > - } > > - } while (!Worklist.empty()); > > + if (NumVectorInstructions > NumInstructions/2) > > + VectorBonus = FiftyPercentVectorBonus; > > + else if (NumVectorInstructions > NumInstructions/10) > > + VectorBonus = TenPercentVectorBonus; > > + else > > + VectorBonus = 0; > > + > > + // Check if we've past the threshold so we don't spin in huge basic > > + // blocks that will never inline. > > + if (!AlwaysInline && Cost > (Threshold + VectorBonus)) > > + return false; > > + } > > > > - return Reduction + (CanSROAAlloca ? SROAReduction : 0); > > + return true; > > } > > > > -void InlineCostAnalyzer::FunctionInfo::countCodeReductionForPointerPair( > > - const CodeMetrics &Metrics, DenseMap > &PointerArgs, > > - Value *V, unsigned ArgIdx) { > > - SmallVector Worklist; > > - Worklist.push_back(V); > > +/// \brief Compute the base pointer and cumulative constant offsets for > V. > > +/// > > +/// This strips all constant offsets off of V, leaving it the base > pointer, and > > +/// accumulates the total constant offset applied in the returned > constant. It > > +/// returns 0 if V is not a pointer, and returns the constant '0' if > there are > > +/// no constant offsets applied. > > +ConstantInt *CallAnalyzer::stripAndComputeInBoundsConstantOffsets(Value > *&V) { > > + if (!TD || !V->getType()->isPointerTy()) > > + return 0; > > + > > + unsigned IntPtrWidth = TD->getPointerSizeInBits(); > > + APInt Offset = APInt::getNullValue(IntPtrWidth); > > + > > + // Even though we don't look through PHI nodes, we could be called on > an > > + // instruction in an unreachable block, which may be on a cycle. > > + SmallPtrSet Visited; > > + Visited.insert(V); > > do { > > - Value *V = Worklist.pop_back_val(); > > - for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); > > - UI != E; ++UI){ > > - Instruction *I = cast(*UI); > > - > > - if (GetElementPtrInst *GEP = dyn_cast(I)) { > > - // If the GEP has variable indices, we won't be able to do much > with it. > > - if (!GEP->hasAllConstantIndices()) > > - continue; > > - // Unless the GEP is in-bounds, some comparisons will be > non-constant. > > - // Fortunately, the real-world cases where this occurs uses > in-bounds > > - // GEPs, and so we restrict the optimization to them here. > > - if (!GEP->isInBounds()) > > - continue; > > + if (GEPOperator *GEP = dyn_cast(V)) { > > + if (!GEP->isInBounds() || !accumulateGEPOffset(*GEP, Offset)) > > + return 0; > > + V = GEP->getPointerOperand(); > > + } else if (Operator::getOpcode(V) == Instruction::BitCast) { > > + V = cast(V)->getOperand(0); > > + } else if (GlobalAlias *GA = dyn_cast(V)) { > > + if (GA->mayBeOverridden()) > > + break; > > + V = GA->getAliasee(); > > + } else { > > + break; > > + } > > + assert(V->getType()->isPointerTy() && "Unexpected operand type!"); > > + } while (Visited.insert(V)); > > > > - // Constant indices just change the constant offset. Add the > resulting > > - // value both to our worklist for this argument, and to the set > of > > - // viable paired values with future arguments. > > - PointerArgs[GEP] = ArgIdx; > > - Worklist.push_back(GEP); > > - continue; > > - } > > + Type *IntPtrTy = TD->getIntPtrType(V->getContext()); > > + return cast(ConstantInt::get(IntPtrTy, Offset)); > > +} > > > > - // Track pointer through casts. Even when the result is not a > pointer, it > > - // remains a constant relative to constants derived from other > constant > > - // pointers. > > - if (CastInst *CI = dyn_cast(I)) { > > - PointerArgs[CI] = ArgIdx; > > - Worklist.push_back(CI); > > - continue; > > - } > > +/// \brief Analyze a call site for potential inlining. > > +/// > > +/// Returns true if inlining this call is viable, and false if it is not > > +/// viable. It computes the cost and adjusts the threshold based on > numerous > > +/// factors and heuristics. If this method returns false but the > computed cost > > +/// is below the computed threshold, then inlining was forcibly > disabled by > > +/// some artifact of the rountine. > > +bool CallAnalyzer::analyzeCall(CallSite CS) { > > + // Track whether the post-inlining function would have more than one > basic > > + // block. A single basic block is often intended for inlining. > Balloon the > > + // threshold by 50% until we pass the single-BB phase. > > + bool SingleBB = true; > > + int SingleBBBonus = Threshold / 2; > > + Threshold += SingleBBBonus; > > + > > + // Unless we are always-inlining, perform some tweaks to the cost and > > + // threshold based on the direct callsite information. > > + if (!AlwaysInline) { > > + // We want to more aggressively inline vector-dense kernels, so up > the > > + // threshold, and we'll lower it if the % of vector instructions > gets too > > + // low. > > + assert(NumInstructions == 0); > > + assert(NumVectorInstructions == 0); > > + FiftyPercentVectorBonus = Threshold; > > + TenPercentVectorBonus = Threshold / 2; > > + > > + // Subtract off one instruction per call argument as those will be > free after > > + // inlining. > > + Cost -= CS.arg_size() * InlineConstants::InstrCost; > > + > > + // If there is only one call of the function, and it has internal > linkage, > > + // the cost of inlining it drops dramatically. > > + if (F.hasLocalLinkage() && F.hasOneUse() && &F == > CS.getCalledFunction()) > > + Cost += InlineConstants::LastCallToStaticBonus; > > + > > + // If the instruction after the call, or if the normal destination > of the > > + // invoke is an unreachable instruction, the function is noreturn. > As such, > > + // there is little point in inlining this unless there is literally > zero cost. > > + if (InvokeInst *II = dyn_cast(CS.getInstruction())) { > > + if (isa(II->getNormalDest()->begin())) > > + Threshold = 1; > > + } else if > (isa(++BasicBlock::iterator(CS.getInstruction()))) > > + Threshold = 1; > > + > > + // If this function uses the coldcc calling convention, prefer not > to inline > > + // it. > > + if (F.getCallingConv() == CallingConv::Cold) > > + Cost += InlineConstants::ColdccPenalty; > > > > - // There are two instructions which produce a strict constant > value when > > - // applied to two related pointer values. Ignore everything else. > > - if (!isa(I) && I->getOpcode() != Instruction::Sub) > > - continue; > > - assert(I->getNumOperands() == 2); > > + // Check if we're done. This can happen due to bonuses and > penalties. > > + if (Cost > Threshold) > > + return false; > > + } > > > > - // Ensure that the two operands are in our set of potentially > paired > > - // pointers (or are derived from them). > > - Value *OtherArg = I->getOperand(0); > > - if (OtherArg == V) > > - OtherArg = I->getOperand(1); > > - DenseMap::const_iterator ArgIt > > - = PointerArgs.find(OtherArg); > > - if (ArgIt == PointerArgs.end()) > > - continue; > > - std::pair ArgPair(ArgIt->second, ArgIdx); > > - if (ArgPair.first > ArgPair.second) > > - std::swap(ArgPair.first, ArgPair.second); > > - > > - PointerArgPairWeights[ArgPair] > > - += countCodeReductionForConstant(Metrics, I); > > - } > > - } while (!Worklist.empty()); > > -} > > - > > -/// analyzeFunction - Fill in the current structure with information > gleaned > > -/// from the specified function. > > -void InlineCostAnalyzer::FunctionInfo::analyzeFunction(Function *F, > > - const TargetData > *TD) { > > - Metrics.analyzeFunction(F, TD); > > - > > - // A function with exactly one return has it removed during the > inlining > > - // process (see InlineFunction), so don't count it. > > - // FIXME: This knowledge should really be encoded outside of > FunctionInfo. > > - if (Metrics.NumRets==1) > > - --Metrics.NumInsts; > > - > > - ArgumentWeights.reserve(F->arg_size()); > > - DenseMap PointerArgs; > > - unsigned ArgIdx = 0; > > - for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I > != E; > > - ++I, ++ArgIdx) { > > - // Count how much code can be eliminated if one of the arguments is > > - // a constant or an alloca. > > - > ArgumentWeights.push_back(ArgInfo(countCodeReductionForConstant(Metrics, > I), > > - > countCodeReductionForAlloca(Metrics, I))); > > - > > - // If the argument is a pointer, also check for pairs of pointers > where > > - // knowing a fixed offset between them allows simplification. This > pattern > > - // arises mostly due to STL algorithm patterns where pointers are > used as > > - // random access iterators. > > - if (!I->getType()->isPointerTy()) > > - continue; > > - PointerArgs[I] = ArgIdx; > > - countCodeReductionForPointerPair(Metrics, PointerArgs, I, ArgIdx); > > + if (F.empty()) > > + return true; > > + > > + // Track whether we've seen a return instruction. The first return > > + // instruction is free, as at least one will usually disappear in > inlining. > > + bool HasReturn = false; > > + > > + // Populate our simplified values by mapping from function arguments > to call > > + // arguments with known important simplifications. > > + CallSite::arg_iterator CAI = CS.arg_begin(); > > + for (Function::arg_iterator FAI = F.arg_begin(), FAE = F.arg_end(); > > + FAI != FAE; ++FAI, ++CAI) { > > + assert(CAI != CS.arg_end()); > > + if (Constant *C = dyn_cast(CAI)) > > + SimplifiedValues[FAI] = C; > > + > > + Value *PtrArg = *CAI; > > + if (ConstantInt *C = > stripAndComputeInBoundsConstantOffsets(PtrArg)) { > > + ConstantOffsetPtrs[FAI] = std::make_pair(PtrArg, C->getValue()); > > + > > + // We can SROA any pointer arguments derived from alloca > instructions. > > + if (isa(PtrArg)) { > > + SROAArgValues[FAI] = PtrArg; > > + SROAArgCosts[PtrArg] = 0; > > + } > > + } > > } > > -} > > + NumConstantArgs = SimplifiedValues.size(); > > + NumConstantOffsetPtrArgs = ConstantOffsetPtrs.size(); > > + NumAllocaArgs = SROAArgValues.size(); > > + > > + // The worklist of live basic blocks in the callee *after* inlining. > We avoid > > + // adding basic blocks of the callee which can be proven to be dead > for this > > + // particular call site in order to get more accurate cost estimates. > This > > + // requires a somewhat heavyweight iteration pattern: we need to walk > the > > + // basic blocks in a breadth-first order as we insert live > successors. To > > + // accomplish this, prioritizing for small iterations because we exit > after > > + // crossing our threshold, we use a small-size optimized SetVector. > > + typedef SetVector, > > + SmallPtrSet > > BBSetVector; > > + BBSetVector BBWorklist; > > + BBWorklist.insert(&F.getEntryBlock()); > > + // Note that we *must not* cache the size, this loop grows the > worklist. > > + for (unsigned Idx = 0; Idx != BBWorklist.size(); ++Idx) { > > + // Bail out the moment we cross the threshold. This means we'll > under-count > > + // the cost, but only when undercounting doesn't matter. > > + if (!AlwaysInline && Cost > (Threshold + VectorBonus)) > > + break; > > > > -/// NeverInline - returns true if the function should never be inlined > into > > -/// any caller > > -bool InlineCostAnalyzer::FunctionInfo::NeverInline() { > > - return (Metrics.exposesReturnsTwice || Metrics.isRecursive || > > - Metrics.containsIndirectBr); > > -} > > - > > -// ConstantFunctionBonus - Figure out how much of a bonus we can get for > > -// possibly devirtualizing a function. We'll subtract the size of the > function > > -// we may wish to inline from the indirect call bonus providing a limit > on > > -// growth. Leave an upper limit of 0 for the bonus - we don't want to > penalize > > -// inlining because we decide we don't want to give a bonus for > > -// devirtualizing. > > -int InlineCostAnalyzer::ConstantFunctionBonus(CallSite CS, Constant *C) > { > > - > > - // This could just be NULL. > > - if (!C) return 0; > > - > > - Function *F = dyn_cast(C); > > - if (!F) return 0; > > - > > - int Bonus = InlineConstants::IndirectCallBonus + getInlineSize(CS, F); > > - return (Bonus > 0) ? 0 : Bonus; > > -} > > - > > -// CountBonusForConstant - Figure out an approximation for how much > per-call > > -// performance boost we can expect if the specified value is constant. > > -int InlineCostAnalyzer::CountBonusForConstant(Value *V, Constant *C) { > > - unsigned Bonus = 0; > > - for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != > E;++UI){ > > - User *U = *UI; > > - if (CallInst *CI = dyn_cast(U)) { > > - // Turning an indirect call into a direct call is a BIG win > > - if (CI->getCalledValue() == V) > > - Bonus += ConstantFunctionBonus(CallSite(CI), C); > > - } else if (InvokeInst *II = dyn_cast(U)) { > > - // Turning an indirect call into a direct call is a BIG win > > - if (II->getCalledValue() == V) > > - Bonus += ConstantFunctionBonus(CallSite(II), C); > > - } > > - // FIXME: Eliminating conditional branches and switches should > > - // also yield a per-call performance boost. > > - else { > > - // Figure out the bonuses that wll accrue due to simple constant > > - // propagation. > > - Instruction &Inst = cast(*U); > > - > > - // We can't constant propagate instructions which have effects or > > - // read memory. > > - // > > - // FIXME: It would be nice to capture the fact that a load from a > > - // pointer-to-constant-global is actually a *really* good thing > to zap. > > - // Unfortunately, we don't know the pointer that may get > propagated here, > > - // so we can't make this decision. > > - if (Inst.mayReadFromMemory() || Inst.mayHaveSideEffects() || > > - isa(Inst)) > > - continue; > > + BasicBlock *BB = BBWorklist[Idx]; > > + if (BB->empty()) > > + continue; > > > > - bool AllOperandsConstant = true; > > - for (unsigned i = 0, e = Inst.getNumOperands(); i != e; ++i) > > - if (!isa(Inst.getOperand(i)) && Inst.getOperand(i) != > V) { > > - AllOperandsConstant = false; > > - break; > > + // Handle the terminator cost here where we can track returns and > other > > + // function-wide constructs. > > + TerminatorInst *TI = BB->getTerminator(); > > + > > + // We never want to inline functions that contain an indirectbr. > This is > > + // incorrect because all the blockaddress's (in static global > initializers > > + // for example) would be referring to the original function, and > this indirect > > + // jump would jump from the inlined copy of the function into the > original > > + // function which is extremely undefined behavior. > > + // FIXME: This logic isn't really right; we can safely inline > functions > > + // with indirectbr's as long as no other function or global > references the > > + // blockaddress of a block within the current function. And as a > QOI issue, > > + // if someone is using a blockaddress without an indirectbr, and > that > > + // reference somehow ends up in another function or global, we > probably > > + // don't want to inline this function. > > + if (isa(TI)) > > + return false; > > + > > + if (!HasReturn && isa(TI)) > > + HasReturn = true; > > + else > > + Cost += InlineConstants::InstrCost; > > + > > + // Analyze the cost of this block. If we blow through the > threshold, this > > + // returns false, and we can bail on out. > > + if (!analyzeBlock(BB)) { > > + if (IsRecursive || ExposesReturnsTwice || HasDynamicAlloca) > > + return false; > > + break; > > + } > > + > > + // Add in the live successors by first checking whether we have > terminator > > + // that may be simplified based on the values simplified by this > call. > > + if (BranchInst *BI = dyn_cast(TI)) { > > + if (BI->isConditional()) { > > + Value *Cond = BI->getCondition(); > > + if (ConstantInt *SimpleCond > > + = > dyn_cast_or_null(SimplifiedValues.lookup(Cond))) { > > + BBWorklist.insert(BI->getSuccessor(SimpleCond->isZero() ? 1 : > 0)); > > + continue; > > } > > + } > > + } else if (SwitchInst *SI = dyn_cast(TI)) { > > + Value *Cond = SI->getCondition(); > > + if (ConstantInt *SimpleCond > > + = > dyn_cast_or_null(SimplifiedValues.lookup(Cond))) { > > + > BBWorklist.insert(SI->findCaseValue(SimpleCond).getCaseSuccessor()); > > + continue; > > + } > > + } > > > > - if (AllOperandsConstant) > > - Bonus += CountBonusForConstant(&Inst); > > + // If we're unable to select a particular successor, just count all > of > > + // them. > > + for (unsigned TIdx = 0, TSize = TI->getNumSuccessors(); TIdx != > TSize; ++TIdx) > > + BBWorklist.insert(TI->getSuccessor(TIdx)); > > + > > + // If we had any successors at this point, than post-inlining is > likely to > > + // have them as well. Note that we assume any basic blocks which > existed > > + // due to branches or switches which folded above will also fold > after > > + // inlining. > > + if (SingleBB && TI->getNumSuccessors() > 1) { > > + // Take off the bonus we applied to the threshold. > > + Threshold -= SingleBBBonus; > > + SingleBB = false; > > } > > } > > > > - return Bonus; > > -} > > + Threshold += VectorBonus; > > > > -int InlineCostAnalyzer::getInlineSize(CallSite CS, Function *Callee) { > > - // Get information about the callee. > > - FunctionInfo *CalleeFI = &CachedFunctionInfo[Callee]; > > - > > - // If we haven't calculated this information yet, do so now. > > - if (CalleeFI->Metrics.NumBlocks == 0) > > - CalleeFI->analyzeFunction(Callee, TD); > > - > > - // InlineCost - This value measures how good of an inline candidate > this call > > - // site is to inline. A lower inline cost make is more likely for > the call to > > - // be inlined. This value may go negative. > > - // > > - int InlineCost = 0; > > - > > - // Compute any size reductions we can expect due to arguments being > passed into > > - // the function. > > - // > > - unsigned ArgNo = 0; > > - CallSite::arg_iterator I = CS.arg_begin(); > > - for (Function::arg_iterator FI = Callee->arg_begin(), FE = > Callee->arg_end(); > > - FI != FE; ++I, ++FI, ++ArgNo) { > > - > > - // If an alloca is passed in, inlining this function is likely to > allow > > - // significant future optimization possibilities (like scalar > promotion, and > > - // scalarization), so encourage the inlining of the function. > > - // > > - if (isa(I)) > > - InlineCost -= CalleeFI->ArgumentWeights[ArgNo].AllocaWeight; > > - > > - // If this is a constant being passed into the function, use the > argument > > - // weights calculated for the callee to determine how much will be > folded > > - // away with this information. > > - else if (isa(I)) > > - InlineCost -= CalleeFI->ArgumentWeights[ArgNo].ConstantWeight; > > - } > > - > > - const DenseMap, unsigned> > &ArgPairWeights > > - = CalleeFI->PointerArgPairWeights; > > - for (DenseMap, > unsigned>::const_iterator I > > - = ArgPairWeights.begin(), E = ArgPairWeights.end(); > > - I != E; ++I) > > - if (CS.getArgument(I->first.first)->stripInBoundsConstantOffsets() > == > > - CS.getArgument(I->first.second)->stripInBoundsConstantOffsets()) > > - InlineCost -= I->second; > > - > > - // Each argument passed in has a cost at both the caller and the > callee > > - // sides. Measurements show that each argument costs about the same > as an > > - // instruction. > > - InlineCost -= (CS.arg_size() * InlineConstants::InstrCost); > > - > > - // Now that we have considered all of the factors that make the call > site more > > - // likely to be inlined, look at factors that make us not want to > inline it. > > - > > - // Calls usually take a long time, so they make the inlining gain > smaller. > > - InlineCost += CalleeFI->Metrics.NumCalls * > InlineConstants::CallPenalty; > > - > > - // Look at the size of the callee. Each instruction counts as 5. > > - InlineCost += CalleeFI->Metrics.NumInsts * InlineConstants::InstrCost; > > - > > - return InlineCost; > > -} > > - > > -int InlineCostAnalyzer::getInlineBonuses(CallSite CS, Function *Callee) > { > > - // Get information about the callee. > > - FunctionInfo *CalleeFI = &CachedFunctionInfo[Callee]; > > - > > - // If we haven't calculated this information yet, do so now. > > - if (CalleeFI->Metrics.NumBlocks == 0) > > - CalleeFI->analyzeFunction(Callee, TD); > > - > > - bool isDirectCall = CS.getCalledFunction() == Callee; > > - Instruction *TheCall = CS.getInstruction(); > > - int Bonus = 0; > > - > > - // If there is only one call of the function, and it has internal > linkage, > > - // make it almost guaranteed to be inlined. > > - // > > - if (Callee->hasLocalLinkage() && Callee->hasOneUse() && isDirectCall) > > - Bonus += InlineConstants::LastCallToStaticBonus; > > - > > - // If the instruction after the call, or if the normal destination of > the > > - // invoke is an unreachable instruction, the function is noreturn. > As such, > > - // there is little point in inlining this. > > - if (InvokeInst *II = dyn_cast(TheCall)) { > > - if (isa(II->getNormalDest()->begin())) > > - Bonus += InlineConstants::NoreturnPenalty; > > - } else if (isa(++BasicBlock::iterator(TheCall))) > > - Bonus += InlineConstants::NoreturnPenalty; > > - > > - // If this function uses the coldcc calling convention, prefer not to > inline > > - // it. > > - if (Callee->getCallingConv() == CallingConv::Cold) > > - Bonus += InlineConstants::ColdccPenalty; > > - > > - // Add to the inline quality for properties that make the call > valuable to > > - // inline. This includes factors that indicate that the result of > inlining > > - // the function will be optimizable. Currently this just looks at > arguments > > - // passed into the function. > > - // > > - CallSite::arg_iterator I = CS.arg_begin(); > > - for (Function::arg_iterator FI = Callee->arg_begin(), FE = > Callee->arg_end(); > > - FI != FE; ++I, ++FI) > > - // Compute any constant bonus due to inlining we want to give here. > > - if (isa(I)) > > - Bonus += CountBonusForConstant(FI, cast(I)); > > - > > - return Bonus; > > + return AlwaysInline || Cost < Threshold; > > } > > > > -// getInlineCost - The heuristic used to determine if we should inline > the > > -// function call or not. > > -// > > -InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS) { > > - return getInlineCost(CS, CS.getCalledFunction()); > > +/// \brief Dump stats about this call's analysis. > > +void CallAnalyzer::dump() { > > +#define DEBUG_PRINT_STAT(x) llvm::dbgs() << " " #x ": " << x << > "\n" > > + DEBUG_PRINT_STAT(NumConstantArgs); > > + DEBUG_PRINT_STAT(NumConstantOffsetPtrArgs); > > + DEBUG_PRINT_STAT(NumAllocaArgs); > > + DEBUG_PRINT_STAT(NumConstantPtrCmps); > > + DEBUG_PRINT_STAT(NumConstantPtrDiffs); > > + DEBUG_PRINT_STAT(NumInstructionsSimplified); > > + DEBUG_PRINT_STAT(SROACostSavings); > > + DEBUG_PRINT_STAT(SROACostSavingsLost); > > +#undef DEBUG_PRINT_STAT > > } > > > > -InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS, Function > *Callee) { > > - Instruction *TheCall = CS.getInstruction(); > > - Function *Caller = TheCall->getParent()->getParent(); > > +InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS, int > Threshold) { > > + Function *Callee = CS.getCalledFunction(); > > > > // Don't inline functions which can be redefined at link-time to mean > > // something else. Don't inline functions marked noinline or call > sites > > // marked noinline. > > - if (Callee->mayBeOverridden() || > Callee->hasFnAttr(Attribute::NoInline) || > > - CS.isNoInline()) > > + if (!Callee || Callee->mayBeOverridden() || > > + Callee->hasFnAttr(Attribute::NoInline) || CS.isNoInline()) > > return llvm::InlineCost::getNever(); > > > > - // Get information about the callee. > > - FunctionInfo *CalleeFI = &CachedFunctionInfo[Callee]; > > + DEBUG(llvm::dbgs() << " Analyzing call of " << Callee->getName() > << "...\n"); > > > > - // If we haven't calculated this information yet, do so now. > > - if (CalleeFI->Metrics.NumBlocks == 0) > > - CalleeFI->analyzeFunction(Callee, TD); > > + CallAnalyzer CA(TD, *Callee, Threshold); > > + bool ShouldInline = CA.analyzeCall(CS); > > > > - // If we should never inline this, return a huge cost. > > - if (CalleeFI->NeverInline()) > > - return InlineCost::getNever(); > > + DEBUG(CA.dump()); > > > > - // FIXME: It would be nice to kill off CalleeFI->NeverInline. Then we > > - // could move this up and avoid computing the FunctionInfo for > > - // things we are going to just return always inline for. This > > - // requires handling setjmp somewhere else, however. > > - if (!Callee->isDeclaration() && > Callee->hasFnAttr(Attribute::AlwaysInline)) > > + // Check if there was a reason to force inlining or no inlining. > > + if (!ShouldInline && CA.getCost() < CA.getThreshold()) > > + return InlineCost::getNever(); > > + if (ShouldInline && CA.getCost() >= CA.getThreshold()) > > return InlineCost::getAlways(); > > > > - if (CalleeFI->Metrics.usesDynamicAlloca) { > > - // Get information about the caller. > > - FunctionInfo &CallerFI = CachedFunctionInfo[Caller]; > > - > > - // If we haven't calculated this information yet, do so now. > > - if (CallerFI.Metrics.NumBlocks == 0) { > > - CallerFI.analyzeFunction(Caller, TD); > > - > > - // Recompute the CalleeFI pointer, getting Caller could have > invalidated > > - // it. > > - CalleeFI = &CachedFunctionInfo[Callee]; > > - } > > - > > - // Don't inline a callee with dynamic alloca into a caller without > them. > > - // Functions containing dynamic alloca's are inefficient in various > ways; > > - // don't create more inefficiency. > > - if (!CallerFI.Metrics.usesDynamicAlloca) > > - return InlineCost::getNever(); > > - } > > - > > - // InlineCost - This value measures how good of an inline candidate > this call > > - // site is to inline. A lower inline cost make is more likely for > the call to > > - // be inlined. This value may go negative due to the fact that > bonuses > > - // are negative numbers. > > - // > > - int InlineCost = getInlineSize(CS, Callee) + getInlineBonuses(CS, > Callee); > > - return llvm::InlineCost::get(InlineCost); > > -} > > - > > -// getInlineFudgeFactor - Return a > 1.0 factor if the inliner should > use a > > -// higher threshold to determine if the function call should be inlined. > > -float InlineCostAnalyzer::getInlineFudgeFactor(CallSite CS) { > > - Function *Callee = CS.getCalledFunction(); > > - > > - // Get information about the callee. > > - FunctionInfo &CalleeFI = CachedFunctionInfo[Callee]; > > - > > - // If we haven't calculated this information yet, do so now. > > - if (CalleeFI.Metrics.NumBlocks == 0) > > - CalleeFI.analyzeFunction(Callee, TD); > > - > > - float Factor = 1.0f; > > - // Single BB functions are often written to be inlined. > > - if (CalleeFI.Metrics.NumBlocks == 1) > > - Factor += 0.5f; > > - > > - // Be more aggressive if the function contains a good chunk (if it > mades up > > - // at least 10% of the instructions) of vector instructions. > > - if (CalleeFI.Metrics.NumVectorInsts > CalleeFI.Metrics.NumInsts/2) > > - Factor += 2.0f; > > - else if (CalleeFI.Metrics.NumVectorInsts > > CalleeFI.Metrics.NumInsts/10) > > - Factor += 1.5f; > > - return Factor; > > + return llvm::InlineCost::get(CA.getCost(), CA.getThreshold()); > > } > > > > /// growCachedCostInfo - update the cached cost info for Caller after > Callee has > > /// been inlined. > > void > > InlineCostAnalyzer::growCachedCostInfo(Function *Caller, Function > *Callee) { > > - CodeMetrics &CallerMetrics = CachedFunctionInfo[Caller].Metrics; > > - > > - // For small functions we prefer to recalculate the cost for better > accuracy. > > - if (CallerMetrics.NumBlocks < 10 && CallerMetrics.NumInsts < 1000) { > > - resetCachedCostInfo(Caller); > > - return; > > - } > > - > > - // For large functions, we can save a lot of computation time by > skipping > > - // recalculations. > > - if (CallerMetrics.NumCalls > 0) > > - --CallerMetrics.NumCalls; > > - > > - if (Callee == 0) return; > > - > > - CodeMetrics &CalleeMetrics = CachedFunctionInfo[Callee].Metrics; > > - > > - // If we don't have metrics for the callee, don't recalculate them > just to > > - // update an approximation in the caller. Instead, just recalculate > the > > - // caller info from scratch. > > - if (CalleeMetrics.NumBlocks == 0) { > > - resetCachedCostInfo(Caller); > > - return; > > - } > > - > > - // Since CalleeMetrics were already calculated, we know that the > CallerMetrics > > - // reference isn't invalidated: both were in the DenseMap. > > - CallerMetrics.usesDynamicAlloca |= CalleeMetrics.usesDynamicAlloca; > > - > > - // FIXME: If any of these three are true for the callee, the callee > was > > - // not inlined into the caller, so I think they're redundant here. > > - CallerMetrics.exposesReturnsTwice |= > CalleeMetrics.exposesReturnsTwice; > > - CallerMetrics.isRecursive |= CalleeMetrics.isRecursive; > > - CallerMetrics.containsIndirectBr |= CalleeMetrics.containsIndirectBr; > > - > > - CallerMetrics.NumInsts += CalleeMetrics.NumInsts; > > - CallerMetrics.NumBlocks += CalleeMetrics.NumBlocks; > > - CallerMetrics.NumCalls += CalleeMetrics.NumCalls; > > - CallerMetrics.NumVectorInsts += CalleeMetrics.NumVectorInsts; > > - CallerMetrics.NumRets += CalleeMetrics.NumRets; > > - > > - // analyzeBasicBlock counts each function argument as an inst. > > - if (CallerMetrics.NumInsts >= Callee->arg_size()) > > - CallerMetrics.NumInsts -= Callee->arg_size(); > > - else > > - CallerMetrics.NumInsts = 0; > > - > > - // We are not updating the argument weights. We have already > determined that > > - // Caller is a fairly large function, so we accept the loss of > precision. > > } > > > > /// clear - empty the cache of inline costs > > void InlineCostAnalyzer::clear() { > > - CachedFunctionInfo.clear(); > > } > > > > Modified: llvm/trunk/lib/Transforms/IPO/InlineAlways.cpp > > URL: > http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/IPO/InlineAlways.cpp?rev=153812&r1=153811&r2=153812&view=diff > > > ============================================================================== > > --- llvm/trunk/lib/Transforms/IPO/InlineAlways.cpp (original) > > +++ llvm/trunk/lib/Transforms/IPO/InlineAlways.cpp Sat Mar 31 07:42:41 > 2012 > > @@ -59,10 +59,7 @@ > > // We still have to check the inline cost in case there are > reasons to > > // not inline which trump the always-inline attribute such as > setjmp and > > // indirectbr. > > - return CA.getInlineCost(CS); > > - } > > - float getInlineFudgeFactor(CallSite CS) { > > - return CA.getInlineFudgeFactor(CS); > > + return CA.getInlineCost(CS, getInlineThreshold(CS)); > > } > > void resetCachedCostInfo(Function *Caller) { > > CA.resetCachedCostInfo(Caller); > > > > Modified: llvm/trunk/lib/Transforms/IPO/InlineSimple.cpp > > URL: > http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/IPO/InlineSimple.cpp?rev=153812&r1=153811&r2=153812&view=diff > > > ============================================================================== > > --- llvm/trunk/lib/Transforms/IPO/InlineSimple.cpp (original) > > +++ llvm/trunk/lib/Transforms/IPO/InlineSimple.cpp Sat Mar 31 07:42:41 > 2012 > > @@ -40,10 +40,7 @@ > > } > > static char ID; // Pass identification, replacement for typeid > > InlineCost getInlineCost(CallSite CS) { > > - return CA.getInlineCost(CS); > > - } > > - float getInlineFudgeFactor(CallSite CS) { > > - return CA.getInlineFudgeFactor(CS); > > + return CA.getInlineCost(CS, getInlineThreshold(CS)); > > } > > void resetCachedCostInfo(Function *Caller) { > > CA.resetCachedCostInfo(Caller); > > > > Modified: llvm/trunk/lib/Transforms/IPO/Inliner.cpp > > URL: > http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/IPO/Inliner.cpp?rev=153812&r1=153811&r2=153812&view=diff > > > ============================================================================== > > --- llvm/trunk/lib/Transforms/IPO/Inliner.cpp (original) > > +++ llvm/trunk/lib/Transforms/IPO/Inliner.cpp Sat Mar 31 07:42:41 2012 > > @@ -231,14 +231,10 @@ > > return false; > > } > > > > - int Cost = IC.getValue(); > > Function *Caller = CS.getCaller(); > > - int CurrentThreshold = getInlineThreshold(CS); > > - float FudgeFactor = getInlineFudgeFactor(CS); > > - int AdjThreshold = (int)(CurrentThreshold * FudgeFactor); > > - if (Cost >= AdjThreshold) { > > - DEBUG(dbgs() << " NOT Inlining: cost=" << Cost > > - << ", thres=" << AdjThreshold > > + if (!IC) { > > + DEBUG(dbgs() << " NOT Inlining: cost=" << IC.getCost() > > + << ", thres=" << (IC.getCostDelta() + IC.getCost()) > > << ", Call: " << *CS.getInstruction() << "\n"); > > return false; > > } > > @@ -255,10 +251,15 @@ > > // are used. Thus we will always have the opportunity to make local > inlining > > // decisions. Importantly the linkonce-ODR linkage covers inline > functions > > // and templates in C++. > > + // > > + // FIXME: All of this logic should be sunk into getInlineCost. It > relies on > > + // the internal implementation of the inline cost metrics rather than > > + // treating them as truly abstract units etc. > > if (Caller->hasLocalLinkage() || > > Caller->getLinkage() == GlobalValue::LinkOnceODRLinkage) { > > int TotalSecondaryCost = 0; > > - bool outerCallsFound = false; > > + // The candidate cost to be imposed upon the current function. > > + int CandidateCost = IC.getCost() - (InlineConstants::CallPenalty + > 1); > > // This bool tracks what happens if we do NOT inline C into B. > > bool callerWillBeRemoved = Caller->hasLocalLinkage(); > > // This bool tracks what happens if we DO inline C into B. > > @@ -276,26 +277,19 @@ > > } > > > > InlineCost IC2 = getInlineCost(CS2); > > - if (IC2.isNever()) > > + if (!IC2) { > > callerWillBeRemoved = false; > > - if (IC2.isAlways() || IC2.isNever()) > > + continue; > > + } > > + if (IC2.isAlways()) > > continue; > > > > - outerCallsFound = true; > > - int Cost2 = IC2.getValue(); > > - int CurrentThreshold2 = getInlineThreshold(CS2); > > - float FudgeFactor2 = getInlineFudgeFactor(CS2); > > - > > - if (Cost2 >= (int)(CurrentThreshold2 * FudgeFactor2)) > > - callerWillBeRemoved = false; > > - > > - // See if we have this case. We subtract off the penalty > > - // for the call instruction, which we would be deleting. > > - if (Cost2 < (int)(CurrentThreshold2 * FudgeFactor2) && > > - Cost2 + Cost - (InlineConstants::CallPenalty + 1) >= > > - (int)(CurrentThreshold2 * FudgeFactor2)) { > > + // See if inlining or original callsite would erase the cost > delta of > > + // this callsite. We subtract off the penalty for the call > instruction, > > + // which we would be deleting. > > + if (IC2.getCostDelta() <= CandidateCost) { > > inliningPreventsSomeOuterInline = true; > > - TotalSecondaryCost += Cost2; > > + TotalSecondaryCost += IC2.getCost(); > > } > > } > > // If all outer calls to Caller would get inlined, the cost for the > last > > @@ -305,17 +299,16 @@ > > if (callerWillBeRemoved && Caller->use_begin() != Caller->use_end()) > > TotalSecondaryCost += InlineConstants::LastCallToStaticBonus; > > > > - if (outerCallsFound && inliningPreventsSomeOuterInline && > > - TotalSecondaryCost < Cost) { > > - DEBUG(dbgs() << " NOT Inlining: " << *CS.getInstruction() << > > - " Cost = " << Cost << > > + if (inliningPreventsSomeOuterInline && TotalSecondaryCost < > IC.getCost()) { > > + DEBUG(dbgs() << " NOT Inlining: " << *CS.getInstruction() << > > + " Cost = " << IC.getCost() << > > ", outer Cost = " << TotalSecondaryCost << '\n'); > > return false; > > } > > } > > > > - DEBUG(dbgs() << " Inlining: cost=" << Cost > > - << ", thres=" << AdjThreshold > > + DEBUG(dbgs() << " Inlining: cost=" << IC.getCost() > > + << ", thres=" << (IC.getCostDelta() + IC.getCost()) > > << ", Call: " << *CS.getInstruction() << '\n'); > > return true; > > } > > > > Modified: llvm/trunk/test/Transforms/Inline/alloca-bonus.ll > > URL: > http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/Inline/alloca-bonus.ll?rev=153812&r1=153811&r2=153812&view=diff > > > ============================================================================== > > --- llvm/trunk/test/Transforms/Inline/alloca-bonus.ll (original) > > +++ llvm/trunk/test/Transforms/Inline/alloca-bonus.ll Sat Mar 31 > 07:42:41 2012 > > @@ -1,5 +1,7 @@ > > ; RUN: opt -inline < %s -S -o - -inline-threshold=8 | FileCheck %s > > > > +target datalayout = "p:32:32" > > + > > declare void @llvm.lifetime.start(i64 %size, i8* nocapture %ptr) > > > > @glbl = external global i32 > > @@ -15,8 +17,8 @@ > > define void @inner1(i32 *%ptr) { > > %A = load i32* %ptr > > store i32 0, i32* %ptr > > - %C = getelementptr i32* %ptr, i32 0 > > - %D = getelementptr i32* %ptr, i32 1 > > + %C = getelementptr inbounds i32* %ptr, i32 0 > > + %D = getelementptr inbounds i32* %ptr, i32 1 > > %E = bitcast i32* %ptr to i8* > > %F = select i1 false, i32* %ptr, i32* @glbl > > call void @llvm.lifetime.start(i64 0, i8* %E) > > @@ -35,8 +37,8 @@ > > define void @inner2(i32 *%ptr) { > > %A = load i32* %ptr > > store i32 0, i32* %ptr > > - %C = getelementptr i32* %ptr, i32 0 > > - %D = getelementptr i32* %ptr, i32 %A > > + %C = getelementptr inbounds i32* %ptr, i32 0 > > + %D = getelementptr inbounds i32* %ptr, i32 %A > > %E = bitcast i32* %ptr to i8* > > %F = select i1 false, i32* %ptr, i32* @glbl > > call void @llvm.lifetime.start(i64 0, i8* %E) > > @@ -93,7 +95,7 @@ > > ; %B poisons this call, scalar-repl can't handle that instruction. > However, we > > ; still want to detect that the icmp and branch *can* be handled. > > define void @inner4(i32 *%ptr, i32 %A) { > > - %B = getelementptr i32* %ptr, i32 %A > > + %B = getelementptr inbounds i32* %ptr, i32 %A > > %C = icmp eq i32* %ptr, null > > br i1 %C, label %bb.true, label %bb.false > > bb.true: > > @@ -122,3 +124,32 @@ > > bb.false: > > ret void > > } > > + > > +define void @outer5() { > > +; CHECK: @outer5 > > +; CHECK-NOT: call void @inner5 > > + %ptr = alloca i32 > > + call void @inner5(i1 false, i32* %ptr) > > + ret void > > +} > > + > > +; %D poisons this call, scalar-repl can't handle that instruction. > However, if > > +; the flag is set appropriately, the poisoning instruction is inside of > dead > > +; code, and so shouldn't be counted. > > +define void @inner5(i1 %flag, i32 *%ptr) { > > + %A = load i32* %ptr > > + store i32 0, i32* %ptr > > + %C = getelementptr inbounds i32* %ptr, i32 0 > > + br i1 %flag, label %if.then, label %exit > > + > > +if.then: > > + %D = getelementptr inbounds i32* %ptr, i32 %A > > + %E = bitcast i32* %ptr to i8* > > + %F = select i1 false, i32* %ptr, i32* @glbl > > + call void @llvm.lifetime.start(i64 0, i8* %E) > > + ret void > > + > > +exit: > > + ret void > > +} > > + > > > > Modified: llvm/trunk/test/Transforms/Inline/dynamic_alloca_test.ll > > URL: > http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/Inline/dynamic_alloca_test.ll?rev=153812&r1=153811&r2=153812&view=diff > > > ============================================================================== > > --- llvm/trunk/test/Transforms/Inline/dynamic_alloca_test.ll (original) > > +++ llvm/trunk/test/Transforms/Inline/dynamic_alloca_test.ll Sat Mar 31 > 07:42:41 2012 > > @@ -4,6 +4,11 @@ > > ; already have dynamic allocas. > > > > ; RUN: opt < %s -inline -S | FileCheck %s > > +; > > +; FIXME: This test is xfailed because the inline cost rewrite disabled > *all* > > +; inlining of functions which contain a dynamic alloca. It should be > re-enabled > > +; once that functionality is restored. > > +; XFAIL: * > > > > declare void @ext(i32*) > > > > > > Modified: llvm/trunk/test/Transforms/Inline/inline_constprop.ll > > URL: > http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/Inline/inline_constprop.ll?rev=153812&r1=153811&r2=153812&view=diff > > > ============================================================================== > > --- llvm/trunk/test/Transforms/Inline/inline_constprop.ll (original) > > +++ llvm/trunk/test/Transforms/Inline/inline_constprop.ll Sat Mar 31 > 07:42:41 2012 > > @@ -1,4 +1,4 @@ > > -; RUN: opt < %s -inline -S | FileCheck %s > > +; RUN: opt < %s -inline -inline-threshold=20 -S | FileCheck %s > > > > define internal i32 @callee1(i32 %A, i32 %B) { > > %C = sdiv i32 %A, %B > > @@ -14,17 +14,18 @@ > > } > > > > define i32 @caller2() { > > +; Check that we can constant-prop through instructions after inlining > callee21 > > +; to get constants in the inlined callsite to callee22. > > +; FIXME: Currently, the threshold is fixed at 20 because we don't > perform > > +; *recursive* cost analysis to realize that the nested call site will > definitely > > +; inline and be cheap. We should eventually do that and lower the > threshold here > > +; to 1. > > +; > > ; CHECK: @caller2 > > ; CHECK-NOT: call void @callee2 > > ; CHECK: ret > > > > -; We contrive to make this hard for *just* the inline pass to do in > order to > > -; simulate what can actually happen with large, complex functions > getting > > -; inlined. > > - %a = add i32 42, 0 > > - %b = add i32 48, 0 > > - > > - %x = call i32 @callee21(i32 %a, i32 %b) > > + %x = call i32 @callee21(i32 42, i32 48) > > ret i32 %x > > } > > > > @@ -41,49 +42,71 @@ > > br i1 %icmp, label %bb.true, label %bb.false > > bb.true: > > ; This block musn't be counted in the inline cost. > > - %ptr = call i8* @getptr() > > - load volatile i8* %ptr > > - load volatile i8* %ptr > > - load volatile i8* %ptr > > - load volatile i8* %ptr > > - load volatile i8* %ptr > > - load volatile i8* %ptr > > - load volatile i8* %ptr > > - load volatile i8* %ptr > > - load volatile i8* %ptr > > - load volatile i8* %ptr > > - load volatile i8* %ptr > > - load volatile i8* %ptr > > - load volatile i8* %ptr > > - load volatile i8* %ptr > > - load volatile i8* %ptr > > - load volatile i8* %ptr > > - load volatile i8* %ptr > > - load volatile i8* %ptr > > - load volatile i8* %ptr > > - load volatile i8* %ptr > > - load volatile i8* %ptr > > - load volatile i8* %ptr > > - load volatile i8* %ptr > > - load volatile i8* %ptr > > - load volatile i8* %ptr > > - load volatile i8* %ptr > > - load volatile i8* %ptr > > - load volatile i8* %ptr > > - load volatile i8* %ptr > > - load volatile i8* %ptr > > - load volatile i8* %ptr > > - load volatile i8* %ptr > > - load volatile i8* %ptr > > - load volatile i8* %ptr > > - load volatile i8* %ptr > > - load volatile i8* %ptr > > - load volatile i8* %ptr > > - load volatile i8* %ptr > > - load volatile i8* %ptr > > - load volatile i8* %ptr > > + %x1 = add i32 %x, 1 > > + %x2 = add i32 %x1, 1 > > + %x3 = add i32 %x2, 1 > > + %x4 = add i32 %x3, 1 > > + %x5 = add i32 %x4, 1 > > + %x6 = add i32 %x5, 1 > > + %x7 = add i32 %x6, 1 > > + %x8 = add i32 %x7, 1 > > > > - ret i32 %x > > + ret i32 %x8 > > bb.false: > > ret i32 %x > > } > > + > > +define i32 @caller3() { > > +; Check that even if the expensive path is hidden behind several basic > blocks, > > +; it doesn't count toward the inline cost when constant-prop proves > those paths > > +; dead. > > +; > > +; CHECK: @caller3 > > +; CHECK-NOT: call > > +; CHECK: ret i32 6 > > + > > +entry: > > + %x = call i32 @callee3(i32 42, i32 48) > > + ret i32 %x > > +} > > + > > +define i32 @callee3(i32 %x, i32 %y) { > > + %sub = sub i32 %y, %x > > + %icmp = icmp ugt i32 %sub, 42 > > + br i1 %icmp, label %bb.true, label %bb.false > > + > > +bb.true: > > + %icmp2 = icmp ult i32 %sub, 64 > > + br i1 %icmp2, label %bb.true.true, label %bb.true.false > > + > > +bb.true.true: > > + ; This block musn't be counted in the inline cost. > > + %x1 = add i32 %x, 1 > > + %x2 = add i32 %x1, 1 > > + %x3 = add i32 %x2, 1 > > + %x4 = add i32 %x3, 1 > > + %x5 = add i32 %x4, 1 > > + %x6 = add i32 %x5, 1 > > + %x7 = add i32 %x6, 1 > > + %x8 = add i32 %x7, 1 > > + br label %bb.merge > > + > > +bb.true.false: > > + ; This block musn't be counted in the inline cost. > > + %y1 = add i32 %y, 1 > > + %y2 = add i32 %y1, 1 > > + %y3 = add i32 %y2, 1 > > + %y4 = add i32 %y3, 1 > > + %y5 = add i32 %y4, 1 > > + %y6 = add i32 %y5, 1 > > + %y7 = add i32 %y6, 1 > > + %y8 = add i32 %y7, 1 > > + br label %bb.merge > > + > > +bb.merge: > > + %result = phi i32 [ %x8, %bb.true.true ], [ %y8, %bb.true.false ] > > + ret i32 %result > > + > > +bb.false: > > + ret i32 %sub > > +} > > > > Modified: llvm/trunk/test/Transforms/Inline/noinline-recursive-fn.ll > > URL: > http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/Inline/noinline-recursive-fn.ll?rev=153812&r1=153811&r2=153812&view=diff > > > ============================================================================== > > --- llvm/trunk/test/Transforms/Inline/noinline-recursive-fn.ll (original) > > +++ llvm/trunk/test/Transforms/Inline/noinline-recursive-fn.ll Sat Mar > 31 07:42:41 2012 > > @@ -71,3 +71,40 @@ > > call void @f2(i32 123, i8* bitcast (void (i32, i8*, i8*)* @f1 to i8*), > i8* bitcast (void (i32, i8*, i8*)* @f2 to i8*)) nounwind ssp > > ret void > > } > > + > > + > > +; Check that a recursive function, when called with a constant that > makes the > > +; recursive path dead code can actually be inlined. > > +define i32 @fib(i32 %i) { > > +entry: > > + %is.zero = icmp eq i32 %i, 0 > > + br i1 %is.zero, label %zero.then, label %zero.else > > + > > +zero.then: > > + ret i32 0 > > + > > +zero.else: > > + %is.one = icmp eq i32 %i, 1 > > + br i1 %is.one, label %one.then, label %one.else > > + > > +one.then: > > + ret i32 1 > > + > > +one.else: > > + %i1 = sub i32 %i, 1 > > + %f1 = call i32 @fib(i32 %i1) > > + %i2 = sub i32 %i, 2 > > + %f2 = call i32 @fib(i32 %i2) > > + %f = add i32 %f1, %f2 > > + ret i32 %f > > +} > > + > > +define i32 @fib_caller() { > > +; CHECK: @fib_caller > > +; CHECK-NOT: call > > +; CHECK: ret > > + %f1 = call i32 @fib(i32 0) > > + %f2 = call i32 @fib(i32 1) > > + %result = add i32 %f1, %f2 > > + ret i32 %result > > +} > > > > Modified: llvm/trunk/test/Transforms/Inline/ptr-diff.ll > > URL: > http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/Inline/ptr-diff.ll?rev=153812&r1=153811&r2=153812&view=diff > > > ============================================================================== > > --- llvm/trunk/test/Transforms/Inline/ptr-diff.ll (original) > > +++ llvm/trunk/test/Transforms/Inline/ptr-diff.ll Sat Mar 31 07:42:41 > 2012 > > @@ -1,5 +1,7 @@ > > ; RUN: opt -inline < %s -S -o - -inline-threshold=10 | FileCheck %s > > > > +target datalayout = "p:32:32" > > + > > define i32 @outer1() { > > ; CHECK: @outer1 > > ; CHECK-NOT: call > > > > > > _______________________________________________ > > llvm-commits mailing list > > llvm-commits at cs.uiuc.edu > > http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits > > -David > > > _______________________________________________ > llvm-commits mailing list > llvm-commits at cs.uiuc.edu > http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits > -------------- next part -------------- An HTML attachment was scrubbed... URL: http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20120411/2f470ec0/attachment-0001.html From pichet2000 at gmail.com Tue Apr 10 18:40:36 2012 From: pichet2000 at gmail.com (Francois Pichet) Date: Tue, 10 Apr 2012 19:40:36 -0400 Subject: [llvm-commits] [llvm] r154374 - in /llvm/trunk/utils/TableGen: CodeGenRegisters.cpp CodeGenRegisters.h RegisterInfoEmitter.cpp RegisterInfoEmitter.h In-Reply-To: <20120410022524.7FD2A2A6C066@llvm.org> References: <20120410022524.7FD2A2A6C066@llvm.org> Message-ID: On Mon, Apr 9, 2012 at 10:25 PM, Andrew Trick wrote: > Author: atrick > Date: Mon Apr ?9 21:25:24 2012 > New Revision: 154374 > > URL: http://llvm.org/viewvc/llvm-project?rev=154374&view=rev > Log: > Added register unit sets to the target description. > > This is a new algorithm that finds sets of register units that can be > used to model registers pressure. This handles arbitrary, overlapping > register classes. Each register class is associated with a (small) > list of pressure sets. These are the dimensions of pressure affected > by the register class's liveness. > > Modified: > ? ?llvm/trunk/utils/TableGen/CodeGenRegisters.cpp > ? ?llvm/trunk/utils/TableGen/CodeGenRegisters.h > ? ?llvm/trunk/utils/TableGen/RegisterInfoEmitter.cpp > ? ?llvm/trunk/utils/TableGen/RegisterInfoEmitter.h > > Modified: llvm/trunk/utils/TableGen/CodeGenRegisters.cpp > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/utils/TableGen/CodeGenRegisters.cpp?rev=154374&r1=154373&r2=154374&view=diff > ============================================================================== > --- llvm/trunk/utils/TableGen/CodeGenRegisters.cpp (original) > +++ llvm/trunk/utils/TableGen/CodeGenRegisters.cpp Mon Apr ?9 21:25:24 2012 > @@ -1118,6 +1118,169 @@ > ? } > ?} > > +// Find a set in UniqueSets with the same elements as Set. > +// Return an iterator into UniqueSets. > +static std::vector::const_iterator > +findRegUnitSet(const std::vector &UniqueSets, > + ? ? ? ? ? ? ? const RegUnitSet &Set) { > + ?std::vector::const_iterator > + ? ?I = UniqueSets.begin(), E = UniqueSets.end(); > + ?for(;I != E; ++I) { > + ? ?if (I->Units == Set.Units) > + ? ? ?break; > + ?} > + ?return I; > +} > + > +// Return true if the RUSubSet is a subset of RUSuperSet. > +static bool isRegUnitSubSet(const std::vector &RUSubSet, > + ? ? ? ? ? ? ? ? ? ? ? ? ? ?const std::vector &RUSuperSet) { > + ?for (RegUnitSet::iterator SubIdx = RUSubSet.begin(), EndIdx = RUSubSet.end(), > + ? ? ? ? SearchIdx = RUSuperSet.begin(), SearchEnd = RUSuperSet.end(); > + ? ? ? SubIdx != EndIdx; ++SubIdx) { > + ? ?SearchIdx = find(SearchIdx, SearchEnd, *SubIdx); > + ? ?if (SearchIdx == SearchEnd) > + ? ? ?return false; > + ? ?++SearchIdx; > + ?} > + ?return true; > +} > + > +// Iteratively prune unit sets. > +void CodeGenRegBank::pruneUnitSets() { > + ?assert(RegClassUnitSets.empty() && "this invalidates RegClassUnitSets"); > + > + ?// Form an equivalence class of UnitSets with no significant difference. > + ?IntEqClasses RepUnitSetIDs(RegUnitSets.size()); > + ?for (unsigned SubIdx = 0, EndIdx = RegUnitSets.size(); > + ? ? ? SubIdx != EndIdx; ++SubIdx) { > + ? ?const RegUnitSet &SubSet = RegUnitSets[SubIdx]; > + ? ?for (unsigned SuperIdx = 0; SuperIdx != EndIdx; ++SuperIdx) { > + ? ? ?if (SuperIdx == SubIdx) > + ? ? ? ?continue; > + > + ? ? ?const RegUnitSet &SuperSet = RegUnitSets[SuperIdx]; > + ? ? ?if (isRegUnitSubSet(SubSet.Units, SuperSet.Units) > + ? ? ? ? ?&& (SubSet.Units.size() + 3 > SuperSet.Units.size())) { > + ? ? ? ?RepUnitSetIDs.join(SubIdx, SuperIdx); > + ? ? ?} > + ? ?} > + ?} > + ?RepUnitSetIDs.compress(); > + > + ?// Populate PrunedUnitSets with each equivalence class's superset. > + ?std::vector PrunedUnitSets(RepUnitSetIDs.getNumClasses()); > + ?for (unsigned i = 0, e = RegUnitSets.size(); i != e; ++i) { > + ? ?RegUnitSet &SuperSet = PrunedUnitSets[RepUnitSetIDs[i]]; > + ? ?if (SuperSet.Units.size() < RegUnitSets[i].Units.size()) > + ? ? ?SuperSet = RegUnitSets[i]; > + ?} > + ?RegUnitSets.swap(PrunedUnitSets); > +} > + > +// Create a RegUnitSet for each RegClass that contains all units in the class > +// including adopted units that are necessary to model register pressure. Then > +// iteratively compute RegUnitSets such that the union of any two overlapping > +// RegUnitSets is repreresented. > +// > +// RegisterInfoEmitter will map each RegClass to its RegUnitClass and any > +// RegUnitSet that is a superset of that RegUnitClass. > +void CodeGenRegBank::computeRegUnitSets() { > + > + ?// Compute a unique RegUnitSet for each RegClass. > + ?const ArrayRef &RegClasses = getRegClasses(); > + ?unsigned NumRegClasses = RegClasses.size(); > + ?for (unsigned RCIdx = 0, RCEnd = NumRegClasses; RCIdx != RCEnd; ++RCIdx) { > + > + ? ?// Compute a sorted list of units in this class. > + ? ?std::vector RegUnits; > + ? ?const CodeGenRegister::Set &Regs = RegClasses[RCIdx]->getMembers(); > + ? ?for (RegUnitIterator UnitI(Regs); UnitI.isValid(); ++UnitI) > + ? ? ?RegUnits.push_back(*UnitI); > + ? ?std::sort(RegUnits.begin(), RegUnits.end()); > + > + ? ?// Speculatively grow the RegUnitSets to hold the new set. > + ? ?RegUnitSets.resize(RegUnitSets.size() + 1); > + ? ?RegUnitSets.back().Name = RegClasses[RCIdx]->getName(); > + ? ?std::unique_copy(RegUnits.begin(), RegUnits.end(), > + ? ? ? ? ? ? ? ? ? ? std::back_inserter(RegUnitSets.back().Units)); > + > + ? ?// Find an existing RegUnitSet. > + ? ?std::vector::const_iterator SetI = > + ? ? ?findRegUnitSet(RegUnitSets, RegUnitSets.back()); > + ? ?if (SetI != llvm::prior(RegUnitSets.end())) > + ? ? ?RegUnitSets.pop_back(); > + ?} > + > + ?// Iteratively prune unit sets. > + ?pruneUnitSets(); > + > + ?// Iterate over all unit sets, including new ones added by this loop. > + ?unsigned NumRegUnitSubSets = RegUnitSets.size(); > + ?for (unsigned Idx = 0, EndIdx = RegUnitSets.size(); Idx != EndIdx; ++Idx) { > + ? ?// In theory, this is combinatorial. In practice, it needs to be bounded > + ? ?// by a small number of sets for regpressure to be efficient. > + ? ?// If the assert is hit, we need to implement pruning. > + ? ?assert(Idx < (2*NumRegUnitSubSets) && "runaway unit set inference"); > + > + ? ?// Compare new sets with all original classes. > + ? ?for (unsigned SearchIdx = (SearchIdx >= NumRegUnitSubSets) ? 0 : Idx+1; > + ? ? ? ? SearchIdx != EndIdx; ++SearchIdx) { This doesn't look right; MSVC warning: 4>c:\dev\llvm\llvm_trunk2\utils\tablegen\codegenregisters.cpp(1225): warning C4700: uninitialized local variable 'SearchIdx' used From atrick at apple.com Tue Apr 10 18:53:32 2012 From: atrick at apple.com (Andrew Trick) Date: Tue, 10 Apr 2012 23:53:32 -0000 Subject: [llvm-commits] [llvm] r154452 - /llvm/trunk/utils/TableGen/CodeGenRegisters.cpp Message-ID: <20120410235332.8D23F2A6C065@llvm.org> Author: atrick Date: Tue Apr 10 18:53:32 2012 New Revision: 154452 URL: http://llvm.org/viewvc/llvm-project?rev=154452&view=rev Log: TableGen/reginfo potential bug: typo from previous checkin. Modified: llvm/trunk/utils/TableGen/CodeGenRegisters.cpp Modified: llvm/trunk/utils/TableGen/CodeGenRegisters.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/utils/TableGen/CodeGenRegisters.cpp?rev=154452&r1=154451&r2=154452&view=diff ============================================================================== --- llvm/trunk/utils/TableGen/CodeGenRegisters.cpp (original) +++ llvm/trunk/utils/TableGen/CodeGenRegisters.cpp Tue Apr 10 18:53:32 2012 @@ -1222,7 +1222,7 @@ assert(Idx < (2*NumRegUnitSubSets) && "runaway unit set inference"); // Compare new sets with all original classes. - for (unsigned SearchIdx = (SearchIdx >= NumRegUnitSubSets) ? 0 : Idx+1; + for (unsigned SearchIdx = (Idx >= NumRegUnitSubSets) ? 0 : Idx+1; SearchIdx != EndIdx; ++SearchIdx) { std::set Intersection; std::set_intersection(RegUnitSets[Idx].Units.begin(), From atrick at apple.com Tue Apr 10 18:55:22 2012 From: atrick at apple.com (Andrew Trick) Date: Tue, 10 Apr 2012 16:55:22 -0700 Subject: [llvm-commits] [llvm] r154374 - in /llvm/trunk/utils/TableGen: CodeGenRegisters.cpp CodeGenRegisters.h RegisterInfoEmitter.cpp RegisterInfoEmitter.h In-Reply-To: References: <20120410022524.7FD2A2A6C066@llvm.org> Message-ID: <0DA440FE-8A5C-4BA5-8148-A7E433B4B64C@apple.com> On Apr 10, 2012, at 4:40 PM, Francois Pichet wrote: >> >> + // Compare new sets with all original classes. >> + for (unsigned SearchIdx = (SearchIdx >= NumRegUnitSubSets) ? 0 : Idx+1; >> + SearchIdx != EndIdx; ++SearchIdx) { > > This doesn't look right; MSVC warning: > > 4>c:\dev\llvm\llvm_trunk2\utils\tablegen\codegenregisters.cpp(1225): > warning C4700: uninitialized local variable 'SearchIdx' used Good warning. I hadn't noticed that search/replace typo yet. Fixed in r154452. -Andy -------------- next part -------------- An HTML attachment was scrubbed... URL: http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20120410/2b924457/attachment.html From stoklund at 2pi.dk Tue Apr 10 19:00:24 2012 From: stoklund at 2pi.dk (Jakob Stoklund Olesen) Date: Wed, 11 Apr 2012 00:00:24 -0000 Subject: [llvm-commits] [llvm] r154453 - /llvm/trunk/test/CodeGen/X86/2012-02-20-MachineCPBug.ll Message-ID: <20120411000024.D8E7E2A6C065@llvm.org> Author: stoklund Date: Tue Apr 10 19:00:24 2012 New Revision: 154453 URL: http://llvm.org/viewvc/llvm-project?rev=154453&view=rev Log: Fix test to be register assignment invariant. Modified: llvm/trunk/test/CodeGen/X86/2012-02-20-MachineCPBug.ll Modified: llvm/trunk/test/CodeGen/X86/2012-02-20-MachineCPBug.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/2012-02-20-MachineCPBug.ll?rev=154453&r1=154452&r2=154453&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/X86/2012-02-20-MachineCPBug.ll (original) +++ llvm/trunk/test/CodeGen/X86/2012-02-20-MachineCPBug.ll Tue Apr 10 19:00:24 2012 @@ -52,7 +52,8 @@ for.body9.i: ; preds = %for.body9.i, %for.body9.lr.ph.i ; CHECK: %for.body9.i -; CHECK: movb %al, %ch +; CHECK: movb +; CHECK: shrdl %i6.02.i = phi i32 [ 0, %for.body9.lr.ph.i ], [ %inc21.i, %for.body9.i ] %add10.i = add i32 %i6.02.i, %div.i %arrayidx12.i = getelementptr inbounds i64* %.pre.i, i32 %add10.i From stoklund at 2pi.dk Tue Apr 10 19:00:26 2012 From: stoklund at 2pi.dk (Jakob Stoklund Olesen) Date: Wed, 11 Apr 2012 00:00:26 -0000 Subject: [llvm-commits] [llvm] r154454 - /llvm/trunk/lib/CodeGen/MachineLICM.cpp Message-ID: <20120411000026.CDEDC2A6C065@llvm.org> Author: stoklund Date: Tue Apr 10 19:00:26 2012 New Revision: 154454 URL: http://llvm.org/viewvc/llvm-project?rev=154454&view=rev Log: Only check for PHI uses inside the current loop. Hoisting a value that is used by a PHI in the loop will introduce a copy because the live range is extended to cross the PHI. The same applies to PHIs in exit blocks. Also use this opportunity to make HasLoopPHIUse() non-recursive. Modified: llvm/trunk/lib/CodeGen/MachineLICM.cpp Modified: llvm/trunk/lib/CodeGen/MachineLICM.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/MachineLICM.cpp?rev=154454&r1=154453&r2=154454&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/MachineLICM.cpp (original) +++ llvm/trunk/lib/CodeGen/MachineLICM.cpp Tue Apr 10 19:00:26 2012 @@ -80,6 +80,14 @@ MachineLoop *CurLoop; // The current loop we are working on. MachineBasicBlock *CurPreheader; // The preheader for CurLoop. + // Exit blocks for CurLoop. + SmallVector ExitBlocks; + + bool isExitBlock(const MachineBasicBlock *MBB) const { + return std::find(ExitBlocks.begin(), ExitBlocks.end(), MBB) != + ExitBlocks.end(); + } + // Track 'estimated' register pressure. SmallSet RegSeen; SmallVector RegPressure; @@ -182,9 +190,9 @@ /// bool IsLoopInvariantInst(MachineInstr &I); - /// HasAnyPHIUse - Return true if the specified register is used by any - /// phi node. - bool HasAnyPHIUse(unsigned Reg) const; + /// HasLoopPHIUse - Return true if the specified instruction is used by any + /// phi node in the current loop. + bool HasLoopPHIUse(const MachineInstr *MI) const; /// HasHighOperandLatency - Compute operand latency between a def of 'Reg' /// and an use in the current loop, return true if the target considered @@ -348,6 +356,7 @@ while (!Worklist.empty()) { CurLoop = Worklist.pop_back_val(); CurPreheader = 0; + ExitBlocks.clear(); // If this is done before regalloc, only visit outer-most preheader-sporting // loops. @@ -356,6 +365,8 @@ continue; } + CurLoop->getExitBlocks(ExitBlocks); + if (!PreRegAlloc) HoistRegionPostRA(); else { @@ -955,22 +966,40 @@ } -/// HasAnyPHIUse - Return true if the specified register is used by any -/// phi node. -bool MachineLICM::HasAnyPHIUse(unsigned Reg) const { - for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(Reg), - UE = MRI->use_end(); UI != UE; ++UI) { - MachineInstr *UseMI = &*UI; - if (UseMI->isPHI()) - return true; - // Look pass copies as well. - if (UseMI->isCopy()) { - unsigned Def = UseMI->getOperand(0).getReg(); - if (TargetRegisterInfo::isVirtualRegister(Def) && - HasAnyPHIUse(Def)) - return true; +/// HasLoopPHIUse - Return true if the specified instruction is used by a +/// phi node and hoisting it could cause a copy to be inserted. +bool MachineLICM::HasLoopPHIUse(const MachineInstr *MI) const { + SmallVector Work(1, MI); + do { + MI = Work.pop_back_val(); + for (ConstMIOperands MO(MI); MO.isValid(); ++MO) { + if (!MO->isReg() || !MO->isDef()) + continue; + unsigned Reg = MO->getReg(); + if (!TargetRegisterInfo::isVirtualRegister(Reg)) + continue; + for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(Reg), + UE = MRI->use_end(); UI != UE; ++UI) { + MachineInstr *UseMI = &*UI; + // A PHI may cause a copy to be inserted. + if (UseMI->isPHI()) { + // A PHI inside the loop causes a copy because the live range of Reg is + // extended across the PHI. + if (CurLoop->contains(UseMI)) + return true; + // A PHI in an exit block can cause a copy to be inserted if the PHI + // has multiple predecessors in the loop with different values. + // For now, approximate by rejecting all exit blocks. + if (isExitBlock(UseMI->getParent())) + return true; + continue; + } + // Look past copies as well. + if (UseMI->isCopy() && CurLoop->contains(UseMI)) + Work.push_back(UseMI); + } } - } + } while (!Work.empty()); return false; } @@ -1182,15 +1211,10 @@ return false; } - // If result(s) of this instruction is used by PHIs outside of the loop, then - // don't hoist it if the instruction because it will introduce an extra copy. - for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI.getOperand(i); - if (!MO.isReg() || !MO.isDef()) - continue; - if (HasAnyPHIUse(MO.getReg())) - return false; - } + // If result(s) of this instruction is used by PHIs inside the loop, then + // don't hoist it because it will introduce an extra copy. + if (HasLoopPHIUse(&MI)) + return false; return true; } From stoklund at 2pi.dk Tue Apr 10 19:00:28 2012 From: stoklund at 2pi.dk (Jakob Stoklund Olesen) Date: Wed, 11 Apr 2012 00:00:28 -0000 Subject: [llvm-commits] [llvm] r154455 - /llvm/trunk/lib/CodeGen/MachineLICM.cpp Message-ID: <20120411000028.C0C4D2A6C065@llvm.org> Author: stoklund Date: Tue Apr 10 19:00:28 2012 New Revision: 154455 URL: http://llvm.org/viewvc/llvm-project?rev=154455&view=rev Log: Tweak MachineLICM heuristics for cheap instructions. Allow cheap instructions to be hoisted if they are register pressure neutral or better. This happens if the instruction is the last loop use of another virtual register. Only expensive instructions are allowed to increase loop register pressure. Modified: llvm/trunk/lib/CodeGen/MachineLICM.cpp Modified: llvm/trunk/lib/CodeGen/MachineLICM.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/MachineLICM.cpp?rev=154455&r1=154454&r2=154455&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/MachineLICM.cpp (original) +++ llvm/trunk/lib/CodeGen/MachineLICM.cpp Tue Apr 10 19:00:28 2012 @@ -205,7 +205,7 @@ /// CanCauseHighRegPressure - Visit BBs from header to current BB, /// check if hoisting an instruction of the given cost matrix can cause high /// register pressure. - bool CanCauseHighRegPressure(DenseMap &Cost); + bool CanCauseHighRegPressure(DenseMap &Cost, bool Cheap); /// UpdateBackTraceRegPressure - Traverse the back trace from header to /// the current block and update their register pressures to reflect the @@ -1067,7 +1067,8 @@ /// CanCauseHighRegPressure - Visit BBs from header to current BB, check /// if hoisting an instruction of the given cost matrix can cause high /// register pressure. -bool MachineLICM::CanCauseHighRegPressure(DenseMap &Cost) { +bool MachineLICM::CanCauseHighRegPressure(DenseMap &Cost, + bool CheapInstr) { for (DenseMap::iterator CI = Cost.begin(), CE = Cost.end(); CI != CE; ++CI) { if (CI->second <= 0) @@ -1076,6 +1077,12 @@ unsigned RCId = CI->first; unsigned Limit = RegLimit[RCId]; int Cost = CI->second; + + // Don't hoist cheap instructions if they would increase register pressure, + // even if we're under the limit. + if (CheapInstr) + return true; + for (unsigned i = BackTrace.size(); i != 0; --i) { SmallVector &RP = BackTrace[i-1]; if (RP[RCId] + Cost >= Limit) @@ -1138,83 +1145,96 @@ if (MI.isImplicitDef()) return true; - // If the instruction is cheap, only hoist if it is re-materilizable. LICM - // will increase register pressure. It's probably not worth it if the - // instruction is cheap. - // Also hoist loads from constant memory, e.g. load from stubs, GOT. Hoisting - // these tend to help performance in low register pressure situation. The - // trade off is it may cause spill in high pressure situation. It will end up - // adding a store in the loop preheader. But the reload is no more expensive. - // The side benefit is these loads are frequently CSE'ed. - if (IsCheapInstruction(MI)) { - if (!TII->isTriviallyReMaterializable(&MI, AA)) - return false; - } else { - // Estimate register pressure to determine whether to LICM the instruction. - // In low register pressure situation, we can be more aggressive about - // hoisting. Also, favors hoisting long latency instructions even in - // moderately high pressure situation. - // FIXME: If there are long latency loop-invariant instructions inside the - // loop at this point, why didn't the optimizer's LICM hoist them? - DenseMap Cost; - for (unsigned i = 0, e = MI.getDesc().getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI.getOperand(i); - if (!MO.isReg() || MO.isImplicit()) - continue; - unsigned Reg = MO.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(Reg)) - continue; + // Besides removing computation from the loop, hoisting an instruction has + // these effects: + // + // - The value defined by the instruction becomes live across the entire + // loop. This increases register pressure in the loop. + // + // - If the value is used by a PHI in the loop, a copy will be required for + // lowering the PHI after extending the live range. + // + // - When hoisting the last use of a value in the loop, that value no longer + // needs to be live in the loop. This lowers register pressure in the loop. + + bool CheapInstr = IsCheapInstruction(MI); + bool CreatesCopy = HasLoopPHIUse(&MI); + + // Don't hoist a cheap instruction if it would create a copy in the loop. + if (CheapInstr && CreatesCopy) { + DEBUG(dbgs() << "Won't hoist cheap instr with loop PHI use: " << MI); + return false; + } - unsigned RCId, RCCost; - getRegisterClassIDAndCost(&MI, Reg, i, RCId, RCCost); - if (MO.isDef()) { - if (HasHighOperandLatency(MI, i, Reg)) { - ++NumHighLatency; - return true; - } + // Rematerializable instructions should always be hoisted since the register + // allocator can just pull them down again when needed. + if (TII->isTriviallyReMaterializable(&MI, AA)) + return true; + + // Estimate register pressure to determine whether to LICM the instruction. + // In low register pressure situation, we can be more aggressive about + // hoisting. Also, favors hoisting long latency instructions even in + // moderately high pressure situation. + // Cheap instructions will only be hoisted if they don't increase register + // pressure at all. + // FIXME: If there are long latency loop-invariant instructions inside the + // loop at this point, why didn't the optimizer's LICM hoist them? + DenseMap Cost; + for (unsigned i = 0, e = MI.getDesc().getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI.getOperand(i); + if (!MO.isReg() || MO.isImplicit()) + continue; + unsigned Reg = MO.getReg(); + if (!TargetRegisterInfo::isVirtualRegister(Reg)) + continue; - DenseMap::iterator CI = Cost.find(RCId); - if (CI != Cost.end()) - CI->second += RCCost; - else - Cost.insert(std::make_pair(RCId, RCCost)); - } else if (isOperandKill(MO, MRI)) { - // Is a virtual register use is a kill, hoisting it out of the loop - // may actually reduce register pressure or be register pressure - // neutral. - DenseMap::iterator CI = Cost.find(RCId); - if (CI != Cost.end()) - CI->second -= RCCost; - else - Cost.insert(std::make_pair(RCId, -RCCost)); + unsigned RCId, RCCost; + getRegisterClassIDAndCost(&MI, Reg, i, RCId, RCCost); + if (MO.isDef()) { + if (HasHighOperandLatency(MI, i, Reg)) { + DEBUG(dbgs() << "Hoist High Latency: " << MI); + ++NumHighLatency; + return true; } + Cost[RCId] += RCCost; + } else if (isOperandKill(MO, MRI)) { + // Is a virtual register use is a kill, hoisting it out of the loop + // may actually reduce register pressure or be register pressure + // neutral. + Cost[RCId] -= RCCost; } + } - // Visit BBs from header to current BB, if hoisting this doesn't cause - // high register pressure, then it's safe to proceed. - if (!CanCauseHighRegPressure(Cost)) { - ++NumLowRP; - return true; - } + // Visit BBs from header to current BB, if hoisting this doesn't cause + // high register pressure, then it's safe to proceed. + if (!CanCauseHighRegPressure(Cost, CheapInstr)) { + DEBUG(dbgs() << "Hoist non-reg-pressure: " << MI); + ++NumLowRP; + return true; + } - // Do not "speculate" in high register pressure situation. If an - // instruction is not guaranteed to be executed in the loop, it's best to be - // conservative. - if (AvoidSpeculation && - (!IsGuaranteedToExecute(MI.getParent()) && !MayCSE(&MI))) - return false; + // Don't risk increasing register pressure if it would create copies. + if (CreatesCopy) { + DEBUG(dbgs() << "Won't hoist instr with loop PHI use: " << MI); + return false; + } - // High register pressure situation, only hoist if the instruction is going - // to be remat'ed. - if (!TII->isTriviallyReMaterializable(&MI, AA) && - !MI.isInvariantLoad(AA)) - return false; + // Do not "speculate" in high register pressure situation. If an + // instruction is not guaranteed to be executed in the loop, it's best to be + // conservative. + if (AvoidSpeculation && + (!IsGuaranteedToExecute(MI.getParent()) && !MayCSE(&MI))) { + DEBUG(dbgs() << "Won't speculate: " << MI); + return false; } - // If result(s) of this instruction is used by PHIs inside the loop, then - // don't hoist it because it will introduce an extra copy. - if (HasLoopPHIUse(&MI)) + // High register pressure situation, only hoist if the instruction is going + // to be remat'ed. + if (!TII->isTriviallyReMaterializable(&MI, AA) && + !MI.isInvariantLoad(AA)) { + DEBUG(dbgs() << "Can't remat / high reg-pressure: " << MI); return false; + } return true; } From evan.cheng at apple.com Tue Apr 10 19:13:00 2012 From: evan.cheng at apple.com (Evan Cheng) Date: Wed, 11 Apr 2012 00:13:00 -0000 Subject: [llvm-commits] [llvm] r154456 - in /llvm/trunk: lib/Target/ARM/ARM.td lib/Target/ARM/ARMInstrInfo.td lib/Target/ARM/ARMInstrNEON.td lib/Target/ARM/ARMScheduleA8.td lib/Target/ARM/ARMScheduleA9.td lib/Target/ARM/ARMScheduleV6.td lib/Target/ARM/ARMSubtarget.h lib/Target/ARM/AsmParser/ARMAsmParser.cpp test/MC/ARM/vfp4.s test/MC/Disassembler/ARM/vfp4.txt Message-ID: <20120411001300.D3F2E2A6C065@llvm.org> Author: evancheng Date: Tue Apr 10 19:13:00 2012 New Revision: 154456 URL: http://llvm.org/viewvc/llvm-project?rev=154456&view=rev Log: Fix a number of problems with ARM fused multiply add/subtract instructions. 1. The new instruction itinerary entries are not properly described. 2. The asm parser can't handle vfms and vfnms. 3. There were no assembler, disassembler test cases. 4. HasNEON2 has the wrong assembler predicate. rdar://10139676 Added: llvm/trunk/test/MC/ARM/vfp4.s llvm/trunk/test/MC/Disassembler/ARM/vfp4.txt Modified: llvm/trunk/lib/Target/ARM/ARM.td llvm/trunk/lib/Target/ARM/ARMInstrInfo.td llvm/trunk/lib/Target/ARM/ARMInstrNEON.td llvm/trunk/lib/Target/ARM/ARMScheduleA8.td llvm/trunk/lib/Target/ARM/ARMScheduleA9.td llvm/trunk/lib/Target/ARM/ARMScheduleV6.td llvm/trunk/lib/Target/ARM/ARMSubtarget.h llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp Modified: llvm/trunk/lib/Target/ARM/ARM.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARM.td?rev=154456&r1=154455&r2=154456&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARM.td (original) +++ llvm/trunk/lib/Target/ARM/ARM.td Tue Apr 10 19:13:00 2012 @@ -76,8 +76,6 @@ def FeatureNEONForFP : SubtargetFeature<"neonfp", "UseNEONForSinglePrecisionFP", "true", "Use NEON for single precision FP">; -// Allow more precision in FP computation -def FPContractions : Predicate<"!TM.Options.NoExcessFPPrecision">; // Disable 32-bit to 16-bit narrowing for experimentation. def FeaturePref32BitThumb : SubtargetFeature<"32bit", "Pref32BitThumb", "true", Modified: llvm/trunk/lib/Target/ARM/ARMInstrInfo.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrInfo.td?rev=154456&r1=154455&r2=154456&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMInstrInfo.td (original) +++ llvm/trunk/lib/Target/ARM/ARMInstrInfo.td Tue Apr 10 19:13:00 2012 @@ -181,11 +181,11 @@ AssemblerPredicate<"FeatureVFP3">; def HasVFP4 : Predicate<"Subtarget->hasVFP4()">, AssemblerPredicate<"FeatureVFP4">; -def NoVFP4 : Predicate<"!Subtarget->hasVFP4()">; +def NoVFP4 : Predicate<"!Subtarget->hasVFP4()">; def HasNEON : Predicate<"Subtarget->hasNEON()">, AssemblerPredicate<"FeatureNEON">; def HasNEON2 : Predicate<"Subtarget->hasNEON2()">, - AssemblerPredicate<"FeatureNEON2">; + AssemblerPredicate<"FeatureNEON,FeatureVFP4">; def NoNEON2 : Predicate<"!Subtarget->hasNEON2()">; def HasFP16 : Predicate<"Subtarget->hasFP16()">, AssemblerPredicate<"FeatureFP16">; @@ -221,6 +221,9 @@ def DontUseMovt : Predicate<"!Subtarget->useMovt()">; def UseFPVMLx : Predicate<"Subtarget->useFPVMLx()">; +// Allow more precision in FP computation +def FPContractions : Predicate<"!TM.Options.NoExcessFPPrecision">; + //===----------------------------------------------------------------------===// // ARM Flag Definitions. Modified: llvm/trunk/lib/Target/ARM/ARMInstrNEON.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrNEON.td?rev=154456&r1=154455&r2=154456&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMInstrNEON.td (original) +++ llvm/trunk/lib/Target/ARM/ARMInstrNEON.td Tue Apr 10 19:13:00 2012 @@ -4115,7 +4115,6 @@ "vqdmlsl", "s", int_arm_neon_vqdmlsl>; defm VQDMLSLsl: N3VLInt3SL_HS<0, 0b111, "vqdmlsl", "s", int_arm_neon_vqdmlsl>; - // Fused Vector Multiply-Accumulate and Fused Multiply-Subtract Operations. def VFMAfd : N3VDMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACD, "vfma", "f32", v2f32, fmul_su, fadd_mlx>, @@ -4136,10 +4135,10 @@ // Match @llvm.fma.* intrinsics def : Pat<(fma (v2f32 DPR:$src1), (v2f32 DPR:$Vn), (v2f32 DPR:$Vm)), (VFMAfd DPR:$src1, DPR:$Vn, DPR:$Vm)>, - Requires<[HasNEON, HasVFP4]>; + Requires<[HasNEON2]>; def : Pat<(fma (v4f32 QPR:$src1), (v4f32 QPR:$Vn), (v4f32 QPR:$Vm)), (VFMAfq QPR:$src1, QPR:$Vn, QPR:$Vm)>, - Requires<[HasNEON, HasVFP4]>; + Requires<[HasNEON2]>; // Vector Subtract Operations. @@ -5497,9 +5496,9 @@ def : N3VSMulOpPat, Requires<[HasNEON, UseNEONForFP, UseFPVMLx, NoNEON2]>; def : N3VSMulOpPat, - Requires<[HasNEON2, UseNEONForFP,FPContractions]>; + Requires<[HasNEON2, UseNEONForFP, FPContractions]>; def : N3VSMulOpPat, - Requires<[HasNEON2, UseNEONForFP,FPContractions]>; + Requires<[HasNEON2, UseNEONForFP, FPContractions]>; def : N2VSPat; def : N2VSPat; def : N3VSPat; Modified: llvm/trunk/lib/Target/ARM/ARMScheduleA8.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMScheduleA8.td?rev=154456&r1=154455&r2=154456&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMScheduleA8.td (original) +++ llvm/trunk/lib/Target/ARM/ARMScheduleA8.td Tue Apr 10 19:13:00 2012 @@ -324,6 +324,15 @@ InstrStage<19, [A8_NPipe], 0>, InstrStage<19, [A8_NLSPipe]>], [19, 2, 1, 1]>, // + // Single-precision Fused FP MAC + InstrItinData, + InstrStage<1, [A8_NPipe]>], [7, 2, 1, 1]>, + // + // Double-precision Fused FP MAC + InstrItinData, + InstrStage<19, [A8_NPipe], 0>, + InstrStage<19, [A8_NLSPipe]>], [19, 2, 1, 1]>, + // // Single-precision FP DIV InstrItinData, InstrStage<20, [A8_NPipe], 0>, @@ -860,6 +869,16 @@ InstrItinData, InstrStage<2, [A8_NPipe]>], [10, 3, 2, 2]>, // + // Double-register Fused FP Multiple-Accumulate + InstrItinData, + InstrStage<1, [A8_NPipe]>], [9, 3, 2, 2]>, + // + // Quad-register Fused FP Multiple-Accumulate + // Result written in N9, but that is relative to the last cycle of multicycle, + // so we use 10 for those cases + InstrItinData, + InstrStage<2, [A8_NPipe]>], [10, 3, 2, 2]>, + // // Double-register Reciprical Step InstrItinData, InstrStage<1, [A8_NPipe]>], [9, 2, 2]>, Modified: llvm/trunk/lib/Target/ARM/ARMScheduleA9.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMScheduleA9.td?rev=154456&r1=154455&r2=154456&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMScheduleA9.td (original) +++ llvm/trunk/lib/Target/ARM/ARMScheduleA9.td Tue Apr 10 19:13:00 2012 @@ -604,6 +604,22 @@ InstrStage<2, [A9_NPipe]>], [9, 1, 1, 1]>, // + // Single-precision Fused FP MAC + InstrItinData, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsVFP], 0, Required>, + InstrStage<9, [A9_DRegsN], 0, Reserved>, + InstrStage<1, [A9_NPipe]>], + [8, 1, 1, 1]>, + // + // Double-precision Fused FP MAC + InstrItinData, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsVFP], 0, Required>, + InstrStage<10, [A9_DRegsN], 0, Reserved>, + InstrStage<2, [A9_NPipe]>], + [9, 1, 1, 1]>, + // // Single-precision FP DIV InstrItinData, InstrStage<1, [A9_MUX0], 0>, @@ -1697,6 +1713,26 @@ InstrStage<4, [A9_NPipe]>], [8, 4, 2, 1]>, // + // Double-register Fused FP Multiple-Accumulate + InstrItinData, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 7 cycles + InstrStage<8, [A9_DRegsVFP], 0, Reserved>, + InstrStage<2, [A9_NPipe]>], + [6, 3, 2, 1]>, + // + // Quad-register Fused FP Multiple-Accumulate + // Result written in N9, but that is relative to the last cycle of multicycle, + // so we use 10 for those cases + InstrItinData, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 9 cycles + InstrStage<10, [A9_DRegsVFP], 0, Reserved>, + InstrStage<4, [A9_NPipe]>], + [8, 4, 2, 1]>, + // // Double-register Reciprical Step InstrItinData, InstrStage<1, [A9_MUX0], 0>, Modified: llvm/trunk/lib/Target/ARM/ARMScheduleV6.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMScheduleV6.td?rev=154456&r1=154455&r2=154456&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMScheduleV6.td (original) +++ llvm/trunk/lib/Target/ARM/ARMScheduleV6.td Tue Apr 10 19:13:00 2012 @@ -243,6 +243,12 @@ // Double-precision FP MAC InstrItinData], [9, 2, 2, 2]>, // + // Single-precision Fused FP MAC + InstrItinData], [9, 2, 2, 2]>, + // + // Double-precision Fused FP MAC + InstrItinData], [9, 2, 2, 2]>, + // // Single-precision FP DIV InstrItinData], [20, 2, 2]>, // Modified: llvm/trunk/lib/Target/ARM/ARMSubtarget.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMSubtarget.h?rev=154456&r1=154455&r2=154456&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMSubtarget.h (original) +++ llvm/trunk/lib/Target/ARM/ARMSubtarget.h Tue Apr 10 19:13:00 2012 @@ -45,7 +45,7 @@ bool HasV6T2Ops; bool HasV7Ops; - /// HasVFPv2, HasVFPv3, HasVFPv4, HasNEON, HasNEONVFPv4 - Specify what + /// HasVFPv2, HasVFPv3, HasVFPv4, HasNEON, HasNEON2 - Specify what /// floating point ISAs are supported. bool HasVFPv2; bool HasVFPv3; Modified: llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp?rev=154456&r1=154455&r2=154456&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp (original) +++ llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp Tue Apr 10 19:13:00 2012 @@ -4659,6 +4659,7 @@ Mnemonic == "fmrs" || Mnemonic == "fsqrts" || Mnemonic == "fsubs" || Mnemonic == "fsts" || Mnemonic == "fcpys" || Mnemonic == "fdivs" || Mnemonic == "fmuls" || Mnemonic == "fcmps" || Mnemonic == "fcmpzs" || + Mnemonic == "vfms" || Mnemonic == "vfnms" || (Mnemonic == "movs" && isThumb()))) { Mnemonic = Mnemonic.slice(0, Mnemonic.size() - 1); CarrySetting = true; @@ -4702,6 +4703,7 @@ Mnemonic == "orr" || Mnemonic == "mvn" || Mnemonic == "rsb" || Mnemonic == "rsc" || Mnemonic == "orn" || Mnemonic == "sbc" || Mnemonic == "eor" || Mnemonic == "neg" || + Mnemonic == "vfm" || Mnemonic == "vfnm" || (!isThumb() && (Mnemonic == "smull" || Mnemonic == "mov" || Mnemonic == "mla" || Mnemonic == "smlal" || Mnemonic == "umlal" || Mnemonic == "umull"))) { Added: llvm/trunk/test/MC/ARM/vfp4.s URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/ARM/vfp4.s?rev=154456&view=auto ============================================================================== --- llvm/trunk/test/MC/ARM/vfp4.s (added) +++ llvm/trunk/test/MC/ARM/vfp4.s Tue Apr 10 19:13:00 2012 @@ -0,0 +1,50 @@ +@ RUN: llvm-mc < %s -triple armv7-unknown-unknown -show-encoding -mattr=+neon,+vfp4 | FileCheck %s --check-prefix=ARM +@ RUN: llvm-mc < %s -triple thumbv7-unknown-unknown -show-encoding -mattr=+neon,+vfp4 | FileCheck %s --check-prefix=THUMB + + @ ARM: vfma.f64 d16, d18, d17 @ encoding: [0xa1,0x0b,0xe2,0xee] +@ THUMB: vfma.f64 d16, d18, d17 @ encoding: [0xe2,0xee,0xa1,0x0b] +vfma.f64 d16, d18, d17 + +@ ARM: vfma.f32 s2, s4, s0 @ encoding: [0x00,0x1a,0xa2,0xee] +@ THUMB: vfma.f32 s2, s4, s0 @ encoding: [0xa2,0xee,0x00,0x1a] +vfma.f32 s2, s4, s0 + +@ ARM: vfma.f32 d16, d18, d17 @ encoding: [0xb1,0x0c,0x42,0xf2] +@ THUMB: vfma.f32 d16, d18, d17 @ encoding: [0x42,0xef,0xb1,0x0c] +vfma.f32 d16, d18, d17 + +@ ARM: vfma.f32 q2, q4, q0 @ encoding: [0x50,0x4c,0x08,0xf2] +@ THUMB: vfma.f32 q2, q4, q0 @ encoding: [0x08,0xef,0x50,0x4c] +vfma.f32 q2, q4, q0 + +@ ARM: vfnma.f64 d16, d18, d17 @ encoding: [0xe1,0x0b,0xd2,0xee] +@ THUMB: vfnma.f64 d16, d18, d17 @ encoding: [0xd2,0xee,0xe1,0x0b] +vfnma.f64 d16, d18, d17 + +@ ARM: vfnma.f32 s2, s4, s0 @ encoding: [0x40,0x1a,0x92,0xee] +@ THUMB: vfnma.f32 s2, s4, s0 @ encoding: [0x92,0xee,0x40,0x1a] +vfnma.f32 s2, s4, s0 + +@ ARM: vfms.f64 d16, d18, d17 @ encoding: [0xe1,0x0b,0xe2,0xee] +@ THUMB: vfms.f64 d16, d18, d17 @ encoding: [0xe2,0xee,0xe1,0x0b] +vfms.f64 d16, d18, d17 + +@ ARM: vfms.f32 s2, s4, s0 @ encoding: [0x40,0x1a,0xa2,0xee] +@ THUMB: vfms.f32 s2, s4, s0 @ encoding: [0xa2,0xee,0x40,0x1a] +vfms.f32 s2, s4, s0 + +@ ARM: vfms.f32 d16, d18, d17 @ encoding: [0xb1,0x0c,0x62,0xf2] +@ THUMB: vfms.f32 d16, d18, d17 @ encoding: [0x62,0xef,0xb1,0x0c] +vfms.f32 d16, d18, d17 + +@ ARM: vfms.f32 q2, q4, q0 @ encoding: [0x50,0x4c,0x28,0xf2] +@ THUMB: vfms.f32 q2, q4, q0 @ encoding: [0x28,0xef,0x50,0x4c] +vfms.f32 q2, q4, q0 + +@ ARM: vfnms.f64 d16, d18, d17 @ encoding: [0xa1,0x0b,0xd2,0xee] +@ THUMB: vfnms.f64 d16, d18, d17 @ encoding: [0xd2,0xee,0xa1,0x0b] +vfnms.f64 d16, d18, d17 + +@ ARM: vfnms.f32 s2, s4, s0 @ encoding: [0x00,0x1a,0x92,0xee] +@ THUMB: vfnms.f32 s2, s4, s0 @ encoding: [0x92,0xee,0x00,0x1a] +vfnms.f32 s2, s4, s0 Added: llvm/trunk/test/MC/Disassembler/ARM/vfp4.txt URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/Disassembler/ARM/vfp4.txt?rev=154456&view=auto ============================================================================== --- llvm/trunk/test/MC/Disassembler/ARM/vfp4.txt (added) +++ llvm/trunk/test/MC/Disassembler/ARM/vfp4.txt Tue Apr 10 19:13:00 2012 @@ -0,0 +1,37 @@ +# RUN: llvm-mc < %s -triple thumbv7-unknown-unknown --disassemble -mattr=+neon,+vfp4 | FileCheck %s + +# CHECK: vfma.f64 d16, d18, d17 +0xe2 0xee 0xa1 0x0b + +# CHECK: vfma.f32 s2, s4, s0 +0xa2 0xee 0x00 0x1a + +# CHECK: vfma.f32 d16, d18, d17 +0x42 0xef 0xb1 0x0c + +# CHECK: vfma.f32 q2, q4, q0 +0x08 0xef 0x50 0x4c + +# CHECK: vfnms.f64 d16, d18, d17 +0xd2 0xee 0xa1 0x0b + +# CHECK: vfnms.f32 s2, s4, s0 +0x92 0xee 0x00 0x1a + +# CHECK: vfms.f64 d16, d18, d17 +0xe2 0xee 0xe1 0x0b + +# CHECK: vfms.f32 s2, s4, s0 +0xa2 0xee 0x40 0x1a + +# CHECK: vfms.f32 d16, d18, d17 +0x62 0xef 0xb1 0x0c + +# CHECK: vfms.f32 q2, q4, q0 +0x28 0xef 0x50 0x4c + +# CHECK: vfnma.f64 d16, d18, d17 +0xd2 0xee 0xe1 0x0b + +# CHECK: vfnma.f32 s2, s4, s0 +0x92 0xee 0x40 0x1a From grosbach at apple.com Tue Apr 10 19:15:16 2012 From: grosbach at apple.com (Jim Grosbach) Date: Wed, 11 Apr 2012 00:15:16 -0000 Subject: [llvm-commits] [llvm] r154457 - in /llvm/trunk: lib/Target/ARM/ARMInstrThumb.td lib/Target/ARM/AsmParser/ARMAsmParser.cpp test/MC/ARM/basic-thumb-instructions.s Message-ID: <20120411001516.E23B92A6C065@llvm.org> Author: grosbach Date: Tue Apr 10 19:15:16 2012 New Revision: 154457 URL: http://llvm.org/viewvc/llvm-project?rev=154457&view=rev Log: ARM add missing Thumb1 two-operand aliases for shift-by-immediate. rdar://11222742 Modified: llvm/trunk/lib/Target/ARM/ARMInstrThumb.td llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp llvm/trunk/test/MC/ARM/basic-thumb-instructions.s Modified: llvm/trunk/lib/Target/ARM/ARMInstrThumb.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrThumb.td?rev=154457&r1=154456&r2=154457&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMInstrThumb.td (original) +++ llvm/trunk/lib/Target/ARM/ARMInstrThumb.td Tue Apr 10 19:15:16 2012 @@ -1407,3 +1407,11 @@ def : tInstAlias<"neg${s}${p} $Rd, $Rm", (tRSB tGPR:$Rd, s_cc_out:$s, tGPR:$Rm, pred:$p)>; + +// Implied destination operand forms for shifts. +def : tInstAlias<"lsl${s}${p} $Rdm, $imm", + (tLSLri tGPR:$Rdm, cc_out:$s, tGPR:$Rdm, imm0_31:$imm, pred:$p)>; +def : tInstAlias<"lsr${s}${p} $Rdm, $imm", + (tLSRri tGPR:$Rdm, cc_out:$s, tGPR:$Rdm, imm_sr:$imm, pred:$p)>; +def : tInstAlias<"asr${s}${p} $Rdm, $imm", + (tASRri tGPR:$Rdm, cc_out:$s, tGPR:$Rdm, imm_sr:$imm, pred:$p)>; Modified: llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp?rev=154457&r1=154456&r2=154457&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp (original) +++ llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp Tue Apr 10 19:15:16 2012 @@ -6650,6 +6650,37 @@ return true; } + // Handle encoding choice for the shift-immediate instructions. + case ARM::t2LSLri: + case ARM::t2LSRri: + case ARM::t2ASRri: { + if (isARMLowRegister(Inst.getOperand(0).getReg()) && + Inst.getOperand(0).getReg() == Inst.getOperand(1).getReg() && + Inst.getOperand(5).getReg() == (inITBlock() ? 0 : ARM::CPSR) && + !(static_cast(Operands[3])->isToken() && + static_cast(Operands[3])->getToken() == ".w")) { + unsigned NewOpc; + switch (Inst.getOpcode()) { + default: llvm_unreachable("unexpected opcode"); + case ARM::t2LSLri: NewOpc = ARM::tLSLri; break; + case ARM::t2LSRri: NewOpc = ARM::tLSRri; break; + case ARM::t2ASRri: NewOpc = ARM::tASRri; break; + } + // The Thumb1 operands aren't in the same order. Awesome, eh? + MCInst TmpInst; + TmpInst.setOpcode(NewOpc); + TmpInst.addOperand(Inst.getOperand(0)); + TmpInst.addOperand(Inst.getOperand(5)); + TmpInst.addOperand(Inst.getOperand(1)); + TmpInst.addOperand(Inst.getOperand(2)); + TmpInst.addOperand(Inst.getOperand(3)); + TmpInst.addOperand(Inst.getOperand(4)); + Inst = TmpInst; + return true; + } + return false; + } + // Handle the Thumb2 mode MOV complex aliases. case ARM::t2MOVsr: case ARM::t2MOVSsr: { Modified: llvm/trunk/test/MC/ARM/basic-thumb-instructions.s URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/ARM/basic-thumb-instructions.s?rev=154457&r1=154456&r2=154457&view=diff ============================================================================== --- llvm/trunk/test/MC/ARM/basic-thumb-instructions.s (original) +++ llvm/trunk/test/MC/ARM/basic-thumb-instructions.s Tue Apr 10 19:15:16 2012 @@ -97,10 +97,16 @@ asrs r2, r3, #32 asrs r2, r3, #5 asrs r2, r3, #1 + asrs r5, #21 + asrs r5, r5, #21 + asrs r3, r5, #21 @ CHECK: asrs r2, r3, #32 @ encoding: [0x1a,0x10] @ CHECK: asrs r2, r3, #5 @ encoding: [0x5a,0x11] @ CHECK: asrs r2, r3, #1 @ encoding: [0x5a,0x10] +@ CHECK: asrs r5, r5, #21 @ encoding: [0x6d,0x15] +@ CHECK: asrs r5, r5, #21 @ encoding: [0x6d,0x15] +@ CHECK: asrs r3, r5, #21 @ encoding: [0x6b,0x15] @------------------------------------------------------------------------------ @@ -319,9 +325,15 @@ @------------------------------------------------------------------------------ lsls r4, r5, #0 lsls r4, r5, #4 + lsls r3, #12 + lsls r3, r3, #12 + lsls r1, r3, #12 @ CHECK: lsls r4, r5, #0 @ encoding: [0x2c,0x00] @ CHECK: lsls r4, r5, #4 @ encoding: [0x2c,0x01] +@ CHECK: lsls r3, r3, #12 @ encoding: [0x1b,0x03] +@ CHECK: lsls r3, r3, #12 @ encoding: [0x1b,0x03] +@ CHECK: lsls r1, r3, #12 @ encoding: [0x19,0x03] @------------------------------------------------------------------------------ @@ -337,9 +349,15 @@ @------------------------------------------------------------------------------ lsrs r1, r3, #1 lsrs r1, r3, #32 + lsrs r4, #20 + lsrs r4, r4, #20 + lsrs r2, r4, #20 @ CHECK: lsrs r1, r3, #1 @ encoding: [0x59,0x08] @ CHECK: lsrs r1, r3, #32 @ encoding: [0x19,0x08] +@ CHECK: lsrs r4, r4, #20 @ encoding: [0x24,0x0d] +@ CHECK: lsrs r4, r4, #20 @ encoding: [0x24,0x0d] +@ CHECK: lsrs r2, r4, #20 @ encoding: [0x22,0x0d] @------------------------------------------------------------------------------ From enderby at apple.com Tue Apr 10 19:25:40 2012 From: enderby at apple.com (Kevin Enderby) Date: Wed, 11 Apr 2012 00:25:40 -0000 Subject: [llvm-commits] [llvm] r154459 - in /llvm/trunk: lib/Target/ARM/Disassembler/ARMDisassembler.cpp test/MC/Disassembler/ARM/neon.txt test/MC/Disassembler/ARM/neont2.txt Message-ID: <20120411002540.5E4B92A6C065@llvm.org> Author: enderby Date: Tue Apr 10 19:25:40 2012 New Revision: 154459 URL: http://llvm.org/viewvc/llvm-project?rev=154459&view=rev Log: Fix ARM disassembly of VLD instructions with writebacks. ?And add test a case for all opcodes handed by DecodeVLDInstruction() in ARMDisassembler.cpp . Modified: llvm/trunk/lib/Target/ARM/Disassembler/ARMDisassembler.cpp llvm/trunk/test/MC/Disassembler/ARM/neon.txt llvm/trunk/test/MC/Disassembler/ARM/neont2.txt Modified: llvm/trunk/lib/Target/ARM/Disassembler/ARMDisassembler.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/Disassembler/ARMDisassembler.cpp?rev=154459&r1=154458&r2=154459&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/Disassembler/ARMDisassembler.cpp (original) +++ llvm/trunk/lib/Target/ARM/Disassembler/ARMDisassembler.cpp Tue Apr 10 19:25:40 2012 @@ -2262,6 +2262,8 @@ case ARM::VLD2b8wb_register: case ARM::VLD2b16wb_register: case ARM::VLD2b32wb_register: + Inst.addOperand(MCOperand::CreateImm(0)); + break; case ARM::VLD3d8_UPD: case ARM::VLD3d16_UPD: case ARM::VLD3d32_UPD: @@ -2330,6 +2332,16 @@ !Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder))) return MCDisassembler::Fail; break; + case ARM::VLD2d8wb_fixed: + case ARM::VLD2d16wb_fixed: + case ARM::VLD2d32wb_fixed: + case ARM::VLD2b8wb_fixed: + case ARM::VLD2b16wb_fixed: + case ARM::VLD2b32wb_fixed: + case ARM::VLD2q8wb_fixed: + case ARM::VLD2q16wb_fixed: + case ARM::VLD2q32wb_fixed: + break; } return S; Modified: llvm/trunk/test/MC/Disassembler/ARM/neon.txt URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/Disassembler/ARM/neon.txt?rev=154459&r1=154458&r2=154459&view=diff ============================================================================== --- llvm/trunk/test/MC/Disassembler/ARM/neon.txt (original) +++ llvm/trunk/test/MC/Disassembler/ARM/neon.txt Tue Apr 10 19:25:40 2012 @@ -2061,3 +2061,185 @@ # CHECK: vst4.16 {d8, d10, d12, d14}, [r4] 0x8f 0x81 0x04 0xf4 # CHECK: vst4.32 {d8, d10, d12, d14}, [r4] + +# rdar://11204059 +0x0d 0x87 0x24 0xf4 +# CHECK: vld1.8 {d8}, [r4]! +0x4d 0x87 0x24 0xf4 +# CHECK: vld1.16 {d8}, [r4]! +0x8d 0x87 0x24 0xf4 +# CHECK: vld1.32 {d8}, [r4]! +0xcd 0x87 0x24 0xf4 +# CHECK: vld1.64 {d8}, [r4]! +0x06 0x87 0x24 0xf4 +# CHECK: vld1.8 {d8}, [r4], r6 +0x46 0x87 0x24 0xf4 +# CHECK: vld1.16 {d8}, [r4], r6 +0x86 0x87 0x24 0xf4 +# CHECK: vld1.32 {d8}, [r4], r6 +0xc6 0x87 0x24 0xf4 +# CHECK: vld1.64 {d8}, [r4], r6 +0x0d 0x8a 0x24 0xf4 +# CHECK: vld1.8 {d8, d9}, [r4]! +0x4d 0x8a 0x24 0xf4 +# CHECK: vld1.16 {d8, d9}, [r4]! +0x8d 0x8a 0x24 0xf4 +# CHECK: vld1.32 {d8, d9}, [r4]! +0xcd 0x8a 0x24 0xf4 +# CHECK: vld1.64 {d8, d9}, [r4]! +0x06 0x8a 0x24 0xf4 +# CHECK: vld1.8 {d8, d9}, [r4], r6 +0x46 0x8a 0x24 0xf4 +# CHECK: vld1.16 {d8, d9}, [r4], r6 +0x86 0x8a 0x24 0xf4 +# CHECK: vld1.32 {d8, d9}, [r4], r6 +0xc6 0x8a 0x24 0xf4 +# CHECK: vld1.64 {d8, d9}, [r4], r6 +0x0d 0x86 0x24 0xf4 +# CHECK: vld1.8 {d8, d9, d10}, [r4]! +0x4d 0x86 0x24 0xf4 +# CHECK: vld1.16 {d8, d9, d10}, [r4]! +0x8d 0x86 0x24 0xf4 +# CHECK: vld1.32 {d8, d9, d10}, [r4]! +0xcd 0x86 0x24 0xf4 +# CHECK: vld1.64 {d8, d9, d10}, [r4]! +0x06 0x86 0x24 0xf4 +# CHECK: vld1.8 {d8, d9, d10}, [r4], r6 +0x46 0x86 0x24 0xf4 +# CHECK: vld1.16 {d8, d9, d10}, [r4], r6 +0x86 0x86 0x24 0xf4 +# CHECK: vld1.32 {d8, d9, d10}, [r4], r6 +0xc6 0x86 0x24 0xf4 +# CHECK: vld1.64 {d8, d9, d10}, [r4], r6 +0x0d 0x82 0x24 0xf4 +# CHECK: vld1.8 {d8, d9, d10, d11}, [r4]! +0x4d 0x82 0x24 0xf4 +# CHECK: vld1.16 {d8, d9, d10, d11}, [r4]! +0x8d 0x82 0x24 0xf4 +# CHECK: vld1.32 {d8, d9, d10, d11}, [r4]! +0xcd 0x82 0x24 0xf4 +# CHECK: vld1.64 {d8, d9, d10, d11}, [r4]! +0x06 0x82 0x24 0xf4 +# CHECK: vld1.8 {d8, d9, d10, d11}, [r4], r6 +0x46 0x82 0x24 0xf4 +# CHECK: vld1.16 {d8, d9, d10, d11}, [r4], r6 +0x86 0x82 0x24 0xf4 +# CHECK: vld1.32 {d8, d9, d10, d11}, [r4], r6 +0xc6 0x82 0x24 0xf4 +# CHECK: vld1.64 {d8, d9, d10, d11}, [r4], r6 +0x0d 0x88 0x24 0xf4 +# CHECK: vld2.8 {d8, d9}, [r4]! +0x4d 0x88 0x24 0xf4 +# CHECK: vld2.16 {d8, d9}, [r4]! +0x8d 0x88 0x24 0xf4 +# CHECK: vld2.32 {d8, d9}, [r4]! +0x06 0x88 0x24 0xf4 +# CHECK: vld2.8 {d8, d9}, [r4], r6 +0x46 0x88 0x24 0xf4 +# CHECK: vld2.16 {d8, d9}, [r4], r6 +0x86 0x88 0x24 0xf4 +# CHECK: vld2.32 {d8, d9}, [r4], r6 +0x0d 0x89 0x24 0xf4 +# CHECK: vld2.8 {d8, d10}, [r4]! +0x4d 0x89 0x24 0xf4 +# CHECK: vld2.16 {d8, d10}, [r4]! +0x8d 0x89 0x24 0xf4 +# CHECK: vld2.32 {d8, d10}, [r4]! +0x06 0x89 0x24 0xf4 +# CHECK: vld2.8 {d8, d10}, [r4], r6 +0x46 0x89 0x24 0xf4 +# CHECK: vld2.16 {d8, d10}, [r4], r6 +0x86 0x89 0x24 0xf4 +# CHECK: vld2.32 {d8, d10}, [r4], r6 +0x0d 0x84 0x24 0xf4 +# CHECK: vld3.8 {d8, d9, d10}, [r4]! +0x4d 0x84 0x24 0xf4 +# CHECK: vld3.16 {d8, d9, d10}, [r4]! +0x8d 0x84 0x24 0xf4 +# CHECK: vld3.32 {d8, d9, d10}, [r4]! +0x06 0x85 0x24 0xf4 +# CHECK: vld3.8 {d8, d10, d12}, [r4], r6 +0x46 0x85 0x24 0xf4 +# CHECK: vld3.16 {d8, d10, d12}, [r4], r6 +0x86 0x85 0x24 0xf4 +# CHECK: vld3.32 {d8, d10, d12}, [r4], r6 +0x0d 0x80 0x24 0xf4 +# CHECK: vld4.8 {d8, d9, d10, d11}, [r4]! +0x4d 0x80 0x24 0xf4 +# CHECK: vld4.16 {d8, d9, d10, d11}, [r4]! +0x8d 0x80 0x24 0xf4 +# CHECK: vld4.32 {d8, d9, d10, d11}, [r4]! +0x06 0x81 0x24 0xf4 +# CHECK: vld4.8 {d8, d10, d12, d14}, [r4], r6 +0x46 0x81 0x24 0xf4 +# CHECK: vld4.16 {d8, d10, d12, d14}, [r4], r6 +0x86 0x81 0x24 0xf4 +# CHECK: vld4.32 {d8, d10, d12, d14}, [r4], r6 +0x4f 0x8a 0x24 0xf4 +# CHECK: vld1.16 {d8, d9}, [r4] +0x8f 0x8a 0x24 0xf4 +# CHECK: vld1.32 {d8, d9}, [r4] +0xcf 0x8a 0x24 0xf4 +# CHECK: vld1.64 {d8, d9}, [r4] +0x0f 0x8a 0x24 0xf4 +# CHECK: vld1.8 {d8, d9}, [r4] +0x4f 0x88 0x24 0xf4 +# CHECK: vld2.16 {d8, d9}, [r4] +0x8f 0x88 0x24 0xf4 +# CHECK: vld2.32 {d8, d9}, [r4] +0x0f 0x88 0x24 0xf4 +# CHECK: vld2.8 {d8, d9}, [r4] +0x4d 0x88 0x24 0xf4 +# CHECK: vld2.16 {d8, d9}, [r4]! +0x46 0x88 0x24 0xf4 +# CHECK: vld2.16 {d8, d9}, [r4], r6 +0x8d 0x88 0x24 0xf4 +# CHECK: vld2.32 {d8, d9}, [r4]! +0x86 0x88 0x24 0xf4 +# CHECK: vld2.32 {d8, d9}, [r4], r6 +0x0d 0x88 0x24 0xf4 +# CHECK: vld2.8 {d8, d9}, [r4]! +0x06 0x88 0x24 0xf4 +# CHECK: vld2.8 {d8, d9}, [r4], r6 +0x4f 0x89 0x24 0xf4 +# CHECK: vld2.16 {d8, d10}, [r4] +0x8f 0x89 0x24 0xf4 +# CHECK: vld2.32 {d8, d10}, [r4] +0x0f 0x89 0x24 0xf4 +# CHECK: vld2.8 {d8, d10}, [r4] +0x4d 0x83 0x24 0xf4 +# CHECK: vld2.16 {d8, d9, d10, d11}, [r4]! +0x46 0x83 0x24 0xf4 +# CHECK: vld2.16 {d8, d9, d10, d11}, [r4], r6 +0x8d 0x83 0x24 0xf4 +# CHECK: vld2.32 {d8, d9, d10, d11}, [r4]! +0x86 0x83 0x24 0xf4 +# CHECK: vld2.32 {d8, d9, d10, d11}, [r4], r6 +0x0d 0x83 0x24 0xf4 +# CHECK: vld2.8 {d8, d9, d10, d11}, [r4]! +0x06 0x83 0x24 0xf4 +# CHECK: vld2.8 {d8, d9, d10, d11}, [r4], r6 +0x0f 0x84 0x24 0xf4 +# CHECK: vld3.8 {d8, d9, d10}, [r4] +0x4f 0x84 0x24 0xf4 +# CHECK: vld3.16 {d8, d9, d10}, [r4] +0x8f 0x84 0x24 0xf4 +# CHECK: vld3.32 {d8, d9, d10}, [r4] +0x0f 0x80 0x24 0xf4 +# CHECK: vld4.8 {d8, d9, d10, d11}, [r4] +0x4f 0x80 0x24 0xf4 +# CHECK: vld4.16 {d8, d9, d10, d11}, [r4] +0x8f 0x80 0x24 0xf4 +# CHECK: vld4.32 {d8, d9, d10, d11}, [r4] +0x0f 0x85 0x24 0xf4 +# CHECK: vld3.8 {d8, d10, d12}, [r4] +0x4f 0x85 0x24 0xf4 +# CHECK: vld3.16 {d8, d10, d12}, [r4] +0x8f 0x85 0x24 0xf4 +# CHECK: vld3.32 {d8, d10, d12}, [r4] +0x0f 0x81 0x24 0xf4 +# CHECK: vld4.8 {d8, d10, d12, d14}, [r4] +0x4f 0x81 0x24 0xf4 +# CHECK: vld4.16 {d8, d10, d12, d14}, [r4] +0x8f 0x81 0x24 0xf4 +# CHECK: vld4.32 {d8, d10, d12, d14}, [r4] Modified: llvm/trunk/test/MC/Disassembler/ARM/neont2.txt URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/Disassembler/ARM/neont2.txt?rev=154459&r1=154458&r2=154459&view=diff ============================================================================== --- llvm/trunk/test/MC/Disassembler/ARM/neont2.txt (original) +++ llvm/trunk/test/MC/Disassembler/ARM/neont2.txt Tue Apr 10 19:25:40 2012 @@ -1778,3 +1778,185 @@ # CHECK: vst4.16 {d8, d10, d12, d14}, [r4] 0x04 0xf9 0x8f 0x81 # CHECK: vst4.32 {d8, d10, d12, d14}, [r4] + +# rdar://11204059 +0x24 0xf9 0x0d 0x87 +# CHECK: vld1.8 {d8}, [r4]! +0x24 0xf9 0x4d 0x87 +# CHECK: vld1.16 {d8}, [r4]! +0x24 0xf9 0x8d 0x87 +# CHECK: vld1.32 {d8}, [r4]! +0x24 0xf9 0xcd 0x87 +# CHECK: vld1.64 {d8}, [r4]! +0x24 0xf9 0x06 0x87 +# CHECK: vld1.8 {d8}, [r4], r6 +0x24 0xf9 0x46 0x87 +# CHECK: vld1.16 {d8}, [r4], r6 +0x24 0xf9 0x86 0x87 +# CHECK: vld1.32 {d8}, [r4], r6 +0x24 0xf9 0xc6 0x87 +# CHECK: vld1.64 {d8}, [r4], r6 +0x24 0xf9 0x0d 0x8a +# CHECK: vld1.8 {d8, d9}, [r4]! +0x24 0xf9 0x4d 0x8a +# CHECK: vld1.16 {d8, d9}, [r4]! +0x24 0xf9 0x8d 0x8a +# CHECK: vld1.32 {d8, d9}, [r4]! +0x24 0xf9 0xcd 0x8a +# CHECK: vld1.64 {d8, d9}, [r4]! +0x24 0xf9 0x06 0x8a +# CHECK: vld1.8 {d8, d9}, [r4], r6 +0x24 0xf9 0x46 0x8a +# CHECK: vld1.16 {d8, d9}, [r4], r6 +0x24 0xf9 0x86 0x8a +# CHECK: vld1.32 {d8, d9}, [r4], r6 +0x24 0xf9 0xc6 0x8a +# CHECK: vld1.64 {d8, d9}, [r4], r6 +0x24 0xf9 0x0d 0x86 +# CHECK: vld1.8 {d8, d9, d10}, [r4]! +0x24 0xf9 0x4d 0x86 +# CHECK: vld1.16 {d8, d9, d10}, [r4]! +0x24 0xf9 0x8d 0x86 +# CHECK: vld1.32 {d8, d9, d10}, [r4]! +0x24 0xf9 0xcd 0x86 +# CHECK: vld1.64 {d8, d9, d10}, [r4]! +0x24 0xf9 0x06 0x86 +# CHECK: vld1.8 {d8, d9, d10}, [r4], r6 +0x24 0xf9 0x46 0x86 +# CHECK: vld1.16 {d8, d9, d10}, [r4], r6 +0x24 0xf9 0x86 0x86 +# CHECK: vld1.32 {d8, d9, d10}, [r4], r6 +0x24 0xf9 0xc6 0x86 +# CHECK: vld1.64 {d8, d9, d10}, [r4], r6 +0x24 0xf9 0x0d 0x82 +# CHECK: vld1.8 {d8, d9, d10, d11}, [r4]! +0x24 0xf9 0x4d 0x82 +# CHECK: vld1.16 {d8, d9, d10, d11}, [r4]! +0x24 0xf9 0x8d 0x82 +# CHECK: vld1.32 {d8, d9, d10, d11}, [r4]! +0x24 0xf9 0xcd 0x82 +# CHECK: vld1.64 {d8, d9, d10, d11}, [r4]! +0x24 0xf9 0x06 0x82 +# CHECK: vld1.8 {d8, d9, d10, d11}, [r4], r6 +0x24 0xf9 0x46 0x82 +# CHECK: vld1.16 {d8, d9, d10, d11}, [r4], r6 +0x24 0xf9 0x86 0x82 +# CHECK: vld1.32 {d8, d9, d10, d11}, [r4], r6 +0x24 0xf9 0xc6 0x82 +# CHECK: vld1.64 {d8, d9, d10, d11}, [r4], r6 +0x24 0xf9 0x0d 0x88 +# CHECK: vld2.8 {d8, d9}, [r4]! +0x24 0xf9 0x4d 0x88 +# CHECK: vld2.16 {d8, d9}, [r4]! +0x24 0xf9 0x8d 0x88 +# CHECK: vld2.32 {d8, d9}, [r4]! +0x24 0xf9 0x06 0x88 +# CHECK: vld2.8 {d8, d9}, [r4], r6 +0x24 0xf9 0x46 0x88 +# CHECK: vld2.16 {d8, d9}, [r4], r6 +0x24 0xf9 0x86 0x88 +# CHECK: vld2.32 {d8, d9}, [r4], r6 +0x24 0xf9 0x0d 0x89 +# CHECK: vld2.8 {d8, d10}, [r4]! +0x24 0xf9 0x4d 0x89 +# CHECK: vld2.16 {d8, d10}, [r4]! +0x24 0xf9 0x8d 0x89 +# CHECK: vld2.32 {d8, d10}, [r4]! +0x24 0xf9 0x06 0x89 +# CHECK: vld2.8 {d8, d10}, [r4], r6 +0x24 0xf9 0x46 0x89 +# CHECK: vld2.16 {d8, d10}, [r4], r6 +0x24 0xf9 0x86 0x89 +# CHECK: vld2.32 {d8, d10}, [r4], r6 +0x24 0xf9 0x0d 0x84 +# CHECK: vld3.8 {d8, d9, d10}, [r4]! +0x24 0xf9 0x4d 0x84 +# CHECK: vld3.16 {d8, d9, d10}, [r4]! +0x24 0xf9 0x8d 0x84 +# CHECK: vld3.32 {d8, d9, d10}, [r4]! +0x24 0xf9 0x06 0x85 +# CHECK: vld3.8 {d8, d10, d12}, [r4], r6 +0x24 0xf9 0x46 0x85 +# CHECK: vld3.16 {d8, d10, d12}, [r4], r6 +0x24 0xf9 0x86 0x85 +# CHECK: vld3.32 {d8, d10, d12}, [r4], r6 +0x24 0xf9 0x0d 0x80 +# CHECK: vld4.8 {d8, d9, d10, d11}, [r4]! +0x24 0xf9 0x4d 0x80 +# CHECK: vld4.16 {d8, d9, d10, d11}, [r4]! +0x24 0xf9 0x8d 0x80 +# CHECK: vld4.32 {d8, d9, d10, d11}, [r4]! +0x24 0xf9 0x06 0x81 +# CHECK: vld4.8 {d8, d10, d12, d14}, [r4], r6 +0x24 0xf9 0x46 0x81 +# CHECK: vld4.16 {d8, d10, d12, d14}, [r4], r6 +0x24 0xf9 0x86 0x81 +# CHECK: vld4.32 {d8, d10, d12, d14}, [r4], r6 +0x24 0xf9 0x4f 0x8a +# CHECK: vld1.16 {d8, d9}, [r4] +0x24 0xf9 0x8f 0x8a +# CHECK: vld1.32 {d8, d9}, [r4] +0x24 0xf9 0xcf 0x8a +# CHECK: vld1.64 {d8, d9}, [r4] +0x24 0xf9 0x0f 0x8a +# CHECK: vld1.8 {d8, d9}, [r4] +0x24 0xf9 0x4f 0x88 +# CHECK: vld2.16 {d8, d9}, [r4] +0x24 0xf9 0x8f 0x88 +# CHECK: vld2.32 {d8, d9}, [r4] +0x24 0xf9 0x0f 0x88 +# CHECK: vld2.8 {d8, d9}, [r4] +0x24 0xf9 0x4d 0x88 +# CHECK: vld2.16 {d8, d9}, [r4]! +0x24 0xf9 0x46 0x88 +# CHECK: vld2.16 {d8, d9}, [r4], r6 +0x24 0xf9 0x8d 0x88 +# CHECK: vld2.32 {d8, d9}, [r4]! +0x24 0xf9 0x86 0x88 +# CHECK: vld2.32 {d8, d9}, [r4], r6 +0x24 0xf9 0x0d 0x88 +# CHECK: vld2.8 {d8, d9}, [r4]! +0x24 0xf9 0x06 0x88 +# CHECK: vld2.8 {d8, d9}, [r4], r6 +0x24 0xf9 0x4f 0x89 +# CHECK: vld2.16 {d8, d10}, [r4] +0x24 0xf9 0x8f 0x89 +# CHECK: vld2.32 {d8, d10}, [r4] +0x24 0xf9 0x0f 0x89 +# CHECK: vld2.8 {d8, d10}, [r4] +0x24 0xf9 0x4d 0x83 +# CHECK: vld2.16 {d8, d9, d10, d11}, [r4]! +0x24 0xf9 0x46 0x83 +# CHECK: vld2.16 {d8, d9, d10, d11}, [r4], r6 +0x24 0xf9 0x8d 0x83 +# CHECK: vld2.32 {d8, d9, d10, d11}, [r4]! +0x24 0xf9 0x86 0x83 +# CHECK: vld2.32 {d8, d9, d10, d11}, [r4], r6 +0x24 0xf9 0x0d 0x83 +# CHECK: vld2.8 {d8, d9, d10, d11}, [r4]! +0x24 0xf9 0x06 0x83 +# CHECK: vld2.8 {d8, d9, d10, d11}, [r4], r6 +0x24 0xf9 0x0f 0x84 +# CHECK: vld3.8 {d8, d9, d10}, [r4] +0x24 0xf9 0x4f 0x84 +# CHECK: vld3.16 {d8, d9, d10}, [r4] +0x24 0xf9 0x8f 0x84 +# CHECK: vld3.32 {d8, d9, d10}, [r4] +0x24 0xf9 0x0f 0x80 +# CHECK: vld4.8 {d8, d9, d10, d11}, [r4] +0x24 0xf9 0x4f 0x80 +# CHECK: vld4.16 {d8, d9, d10, d11}, [r4] +0x24 0xf9 0x8f 0x80 +# CHECK: vld4.32 {d8, d9, d10, d11}, [r4] +0x24 0xf9 0x0f 0x85 +# CHECK: vld3.8 {d8, d10, d12}, [r4] +0x24 0xf9 0x4f 0x85 +# CHECK: vld3.16 {d8, d10, d12}, [r4] +0x24 0xf9 0x8f 0x85 +# CHECK: vld3.32 {d8, d10, d12}, [r4] +0x24 0xf9 0x0f 0x81 +# CHECK: vld4.8 {d8, d10, d12, d14}, [r4] +0x24 0xf9 0x4f 0x81 +# CHECK: vld4.16 {d8, d10, d12, d14}, [r4] +0x24 0xf9 0x8f 0x81 +# CHECK: vld4.32 {d8, d10, d12, d14}, [r4] From isanbard at gmail.com Tue Apr 10 19:34:58 2012 From: isanbard at gmail.com (Bill Wendling) Date: Wed, 11 Apr 2012 00:34:58 -0000 Subject: [llvm-commits] [test-suite] r154461 - /test-suite/trunk/MultiSource/Applications/JM/lencod/global.h Message-ID: <20120411003458.0C3082A6C065@llvm.org> Author: void Date: Tue Apr 10 19:34:57 2012 New Revision: 154461 URL: http://llvm.org/viewvc/llvm-project?rev=154461&view=rev Log: Increase the file name size so that some long file names won't cause a failure. Modified: test-suite/trunk/MultiSource/Applications/JM/lencod/global.h Modified: test-suite/trunk/MultiSource/Applications/JM/lencod/global.h URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Applications/JM/lencod/global.h?rev=154461&r1=154460&r2=154461&view=diff ============================================================================== --- test-suite/trunk/MultiSource/Applications/JM/lencod/global.h (original) +++ test-suite/trunk/MultiSource/Applications/JM/lencod/global.h Tue Apr 10 19:34:57 2012 @@ -595,7 +595,7 @@ int frame_ctr; //!< number of coded frames } SNRParameters; -#define FILE_NAME_SIZE 200 +#define FILE_NAME_SIZE 256 //! all input parameters typedef struct { From cdavis at mymail.mines.edu Tue Apr 10 19:43:10 2012 From: cdavis at mymail.mines.edu (Charles Davis) Date: Tue, 10 Apr 2012 18:43:10 -0600 Subject: [llvm-commits] [PATCH] Add RETW and LRETW instructions In-Reply-To: <7FB5E475-31FA-440C-B9B0-87164CBE36CD@mymail.mines.edu> References: <978783A3-2ACD-4820-AD7C-8BDDB230A0D6@mymail.mines.edu> <00800EB8-9E4F-4E44-82E3-02A295257FEF@apple.com> <19916F2D-8A6D-4A52-B2BA-012E877D39D4@mymail.mines.edu> <20120324224459.GA2197@britannica.bec.de> <55C446C0-C8BC-42F0-80D7-82CF7E529BC7@mymail.mines.edu> <7FB5E475-31FA-440C-B9B0-87164CBE36CD@mymail.mines.edu> Message-ID: Ping... Chip -------------- next part -------------- A non-text attachment was scrubbed... Name: retw.patch Type: application/octet-stream Size: 3486 bytes Desc: not available Url : http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20120410/77ec6a58/attachment.obj -------------- next part -------------- On Apr 1, 2012, at 5:26 PM, Charles Davis wrote: > Ping... > > Patch reattached. > > Chip > On Mar 24, 2012, at 4:50 PM, Charles Davis wrote: > >> >> On Mar 24, 2012, at 4:44 PM, Joerg Sonnenberger wrote: >> >>> On Sat, Mar 24, 2012 at 03:32:44PM -0600, Charles Davis wrote: >>>> Anyway, here is a patch that addresses all of Kay and Joerg's concerns. >>> >>> Still no test case :) >> Oops, heh heh... :). This patch should have the test cases I added. >> >> Chip From isanbard at gmail.com Tue Apr 10 19:50:41 2012 From: isanbard at gmail.com (Bill Wendling) Date: Wed, 11 Apr 2012 00:50:41 -0000 Subject: [llvm-commits] [test-suite] r154464 - /test-suite/trunk/MultiSource/Benchmarks/MallocBench/gs/gs.c Message-ID: <20120411005041.DC3622A6C065@llvm.org> Author: void Date: Tue Apr 10 19:50:41 2012 New Revision: 154464 URL: http://llvm.org/viewvc/llvm-project?rev=154464&view=rev Log: Increase the file name size so that some long file names won't cause a failure. Modified: test-suite/trunk/MultiSource/Benchmarks/MallocBench/gs/gs.c Modified: test-suite/trunk/MultiSource/Benchmarks/MallocBench/gs/gs.c URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Benchmarks/MallocBench/gs/gs.c?rev=154464&r1=154463&r2=154464&view=diff ============================================================================== --- test-suite/trunk/MultiSource/Benchmarks/MallocBench/gs/gs.c (original) +++ test-suite/trunk/MultiSource/Benchmarks/MallocBench/gs/gs.c Tue Apr 10 19:50:41 2012 @@ -197,7 +197,7 @@ /* Process file names */ void argproc(char *arg, int index) -{ char line[200]; +{ char line[256]; int code; /* Finish initialization */ init2(); From chandlerc at gmail.com Tue Apr 10 19:56:29 2012 From: chandlerc at gmail.com (Chandler Carruth) Date: Wed, 11 Apr 2012 01:56:29 +0100 Subject: [llvm-commits] [llvm] r153812 - in /llvm/trunk: include/llvm/Analysis/ include/llvm/Transforms/IPO/ lib/Analysis/ lib/Transforms/IPO/ test/Transforms/Inline/ In-Reply-To: References: <20120331124242.93B692A6C065@llvm.org> Message-ID: On Wed, Apr 11, 2012 at 12:31 AM, Chandler Carruth wrote: > I'll start looking for smoking guns right away though. This looks very much like the previous cases where inliner changes caused compile-time regressions. Looking at x86-64 of sqlite3, the profile with the trunk clang shows only 3.5% of all the time in the inline cost analysis. That's a bit higher than I would like (I've got some ideas to shrink it on two fronts that I will implement right away), it's not likely responsible for the near 10% regression your seeing; this function wasn't even free before. However, I'm seeing time spread pretty well between: JumpThreading, the RA, CorrelatedValueProp, GVN, and InstCombine. This looks like increased kicking in of the host of scalar optimizations giving us a broad slight slowdown. I'm still working on doing before/after profile comparisons and other things to see if I can tease out the culprit here. I also see several places where we can recoup a few percent in all likelihood; I'll try to tackle those if I can. -------------- next part -------------- An HTML attachment was scrubbed... URL: http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20120411/8bf1b952/attachment-0001.html From evan.cheng at apple.com Tue Apr 10 20:03:11 2012 From: evan.cheng at apple.com (Evan Cheng) Date: Wed, 11 Apr 2012 01:03:11 -0000 Subject: [llvm-commits] [llvm] r154466 - in /llvm/trunk/test/CodeGen/ARM: fma.ll fusedMAC.ll Message-ID: <20120411010311.6FEFB2A6C065@llvm.org> Author: evancheng Date: Tue Apr 10 20:03:11 2012 New Revision: 154466 URL: http://llvm.org/viewvc/llvm-project?rev=154466&view=rev Log: Merge fma.ll into fusedMAC.ll Removed: llvm/trunk/test/CodeGen/ARM/fma.ll Modified: llvm/trunk/test/CodeGen/ARM/fusedMAC.ll Removed: llvm/trunk/test/CodeGen/ARM/fma.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/fma.ll?rev=154465&view=auto ============================================================================== --- llvm/trunk/test/CodeGen/ARM/fma.ll (original) +++ llvm/trunk/test/CodeGen/ARM/fma.ll (removed) @@ -1,30 +0,0 @@ -; RUN: llc < %s -mtriple=thumbv7-apple-ios -mattr=+vfp4 | FileCheck %s - -define float @test_f32(float %a, float %b, float %c) nounwind readnone ssp { -entry: -; CHECK: test_f32 -; CHECK: vfma.f32 - %call = tail call float @llvm.fma.f32(float %a, float %b, float %c) nounwind readnone - ret float %call -} - -define double @test_f64(double %a, double %b, double %c) nounwind readnone ssp { -entry: -; CHECK: test_f64 -; CHECK: vfma.f64 - %call = tail call double @llvm.fma.f64(double %a, double %b, double %c) nounwind readnone - ret double %call -} - -define <2 x float> @test_v2f32(<2 x float> %a, <2 x float> %b, <2 x float> %c) nounwind readnone ssp { -entry: -; CHECK: test_v2f32 -; CHECK: vfma.f32 - %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %a, <2 x float> %b, <2 x float> %c) nounwind - ret <2 x float> %0 -} - -declare float @llvm.fma.f32(float, float, float) nounwind readnone -declare double @llvm.fma.f64(double, double, double) nounwind readnone - -declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>) nounwind readnone Modified: llvm/trunk/test/CodeGen/ARM/fusedMAC.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/fusedMAC.ll?rev=154466&r1=154465&r2=154466&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/ARM/fusedMAC.ll (original) +++ llvm/trunk/test/CodeGen/ARM/fusedMAC.ll Tue Apr 10 20:03:11 2012 @@ -98,3 +98,31 @@ %sub = fsub <4 x float> %a, %mul ret <4 x float> %sub } + +define float @test_f32(float %a, float %b, float %c) nounwind readnone ssp { +entry: +; CHECK: test_f32 +; CHECK: vfma.f32 + %call = tail call float @llvm.fma.f32(float %a, float %b, float %c) nounwind readnone + ret float %call +} + +define double @test_f64(double %a, double %b, double %c) nounwind readnone ssp { +entry: +; CHECK: test_f64 +; CHECK: vfma.f64 + %call = tail call double @llvm.fma.f64(double %a, double %b, double %c) nounwind readnone + ret double %call +} + +define <2 x float> @test_v2f32(<2 x float> %a, <2 x float> %b, <2 x float> %c) nounwind readnone ssp { +entry: +; CHECK: test_v2f32 +; CHECK: vfma.f32 + %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %a, <2 x float> %b, <2 x float> %c) nounwind + ret <2 x float> %0 +} + +declare float @llvm.fma.f32(float, float, float) nounwind readnone +declare double @llvm.fma.f64(double, double, double) nounwind readnone +declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>) nounwind readnone From wendling at apple.com Tue Apr 10 20:06:23 2012 From: wendling at apple.com (Bill Wendling) Date: Tue, 10 Apr 2012 18:06:23 -0700 Subject: [llvm-commits] [PATCH] Add RETW and LRETW instructions In-Reply-To: References: <978783A3-2ACD-4820-AD7C-8BDDB230A0D6@mymail.mines.edu> <00800EB8-9E4F-4E44-82E3-02A295257FEF@apple.com> <19916F2D-8A6D-4A52-B2BA-012E877D39D4@mymail.mines.edu> <20120324224459.GA2197@britannica.bec.de> <55C446C0-C8BC-42F0-80D7-82CF7E529BC7@mymail.mines.edu> <7FB5E475-31FA-440C-B9B0-87164CBE36CD@mymail.mines.edu> Message-ID: LGTM. -bw On Apr 10, 2012, at 5:43 PM, Charles Davis wrote: > Ping... > > Chip > > > On Apr 1, 2012, at 5:26 PM, Charles Davis wrote: > >> Ping... >> >> Patch reattached. >> >> Chip >> On Mar 24, 2012, at 4:50 PM, Charles Davis wrote: >> >>> >>> On Mar 24, 2012, at 4:44 PM, Joerg Sonnenberger wrote: >>> >>>> On Sat, Mar 24, 2012 at 03:32:44PM -0600, Charles Davis wrote: >>>>> Anyway, here is a patch that addresses all of Kay and Joerg's concerns. >>>> >>>> Still no test case :) >>> Oops, heh heh... :). This patch should have the test cases I added. >>> >>> Chip > > _______________________________________________ > llvm-commits mailing list > llvm-commits at cs.uiuc.edu > http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits From cdavis at mines.edu Tue Apr 10 20:10:53 2012 From: cdavis at mines.edu (Charles Davis) Date: Wed, 11 Apr 2012 01:10:53 -0000 Subject: [llvm-commits] [llvm] r154468 - in /llvm/trunk: lib/Target/X86/X86InstrControl.td test/MC/Disassembler/X86/intel-syntax.txt test/MC/X86/intel-syntax-encoding.s test/MC/X86/x86-32.s test/MC/X86/x86-64.s Message-ID: <20120411011053.D8B992A6C065@llvm.org> Author: cdavis Date: Tue Apr 10 20:10:53 2012 New Revision: 154468 URL: http://llvm.org/viewvc/llvm-project?rev=154468&view=rev Log: Add retw and lretw instructions. Also, fix Intel syntax parsing for all ret instructions. Modified: llvm/trunk/lib/Target/X86/X86InstrControl.td llvm/trunk/test/MC/Disassembler/X86/intel-syntax.txt llvm/trunk/test/MC/X86/intel-syntax-encoding.s llvm/trunk/test/MC/X86/x86-32.s llvm/trunk/test/MC/X86/x86-64.s Modified: llvm/trunk/lib/Target/X86/X86InstrControl.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrControl.td?rev=154468&r1=154467&r2=154468&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86InstrControl.td (original) +++ llvm/trunk/lib/Target/X86/X86InstrControl.td Tue Apr 10 20:10:53 2012 @@ -21,20 +21,25 @@ def RET : I <0xC3, RawFrm, (outs), (ins variable_ops), "ret", [(X86retflag 0)], IIC_RET>; + def RETW : I <0xC3, RawFrm, (outs), (ins variable_ops), + "ret{w}", + [], IIC_RET>, OpSize; def RETI : Ii16<0xC2, RawFrm, (outs), (ins i16imm:$amt, variable_ops), "ret\t$amt", [(X86retflag timm:$amt)], IIC_RET_IMM>; def RETIW : Ii16<0xC2, RawFrm, (outs), (ins i16imm:$amt, variable_ops), - "retw\t$amt", + "ret{w}\t$amt", [], IIC_RET_IMM>, OpSize; def LRETL : I <0xCB, RawFrm, (outs), (ins), - "lretl", [], IIC_RET>; + "{l}ret{l|f}", [], IIC_RET>; + def LRETW : I <0xCB, RawFrm, (outs), (ins), + "{l}ret{w|f}", [], IIC_RET>, OpSize; def LRETQ : RI <0xCB, RawFrm, (outs), (ins), - "lretq", [], IIC_RET>; + "{l}ret{q|f}", [], IIC_RET>; def LRETI : Ii16<0xCA, RawFrm, (outs), (ins i16imm:$amt), - "lret\t$amt", [], IIC_RET>; + "{l}ret{l|f}\t$amt", [], IIC_RET>; def LRETIW : Ii16<0xCA, RawFrm, (outs), (ins i16imm:$amt), - "lretw\t$amt", [], IIC_RET>, OpSize; + "{l}ret{w|f}\t$amt", [], IIC_RET>, OpSize; } // Unconditional branches. Modified: llvm/trunk/test/MC/Disassembler/X86/intel-syntax.txt URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/Disassembler/X86/intel-syntax.txt?rev=154468&r1=154467&r2=154468&view=diff ============================================================================== --- llvm/trunk/test/MC/Disassembler/X86/intel-syntax.txt (original) +++ llvm/trunk/test/MC/Disassembler/X86/intel-syntax.txt Tue Apr 10 20:10:53 2012 @@ -99,3 +99,9 @@ # CHECK: iretq 0x48 0xcf +# CHECK: ret +0x66 0xc3 + +# CHECK: retf +0x66 0xcb + Modified: llvm/trunk/test/MC/X86/intel-syntax-encoding.s URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/X86/intel-syntax-encoding.s?rev=154468&r1=154467&r2=154468&view=diff ============================================================================== --- llvm/trunk/test/MC/X86/intel-syntax-encoding.s (original) +++ llvm/trunk/test/MC/X86/intel-syntax-encoding.s Tue Apr 10 20:10:53 2012 @@ -42,3 +42,16 @@ // CHECK: encoding: [0x0f,0xc2,0xd1,0x01] cmpltps XMM2, XMM1 + +// CHECK: encoding: [0xc3] + ret + +// CHECK: encoding: [0xcb] + retf + +// CHECK: encoding: [0xc2,0x08,0x00] + ret 8 + +// CHECK: encoding: [0xca,0x08,0x00] + retf 8 + Modified: llvm/trunk/test/MC/X86/x86-32.s URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/X86/x86-32.s?rev=154468&r1=154467&r2=154468&view=diff ============================================================================== --- llvm/trunk/test/MC/X86/x86-32.s (original) +++ llvm/trunk/test/MC/X86/x86-32.s Tue Apr 10 20:10:53 2012 @@ -990,3 +990,11 @@ // CHECK: xchgl %ecx, %eax // CHECK: encoding: [0x91] xchgl %eax, %ecx + +// CHECK: retw +// CHECK: encoding: [0x66,0xc3] +retw + +// CHECK: lretw +// CHECK: encoding: [0x66,0xcb] +lretw Modified: llvm/trunk/test/MC/X86/x86-64.s URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/X86/x86-64.s?rev=154468&r1=154467&r2=154468&view=diff ============================================================================== --- llvm/trunk/test/MC/X86/x86-64.s (original) +++ llvm/trunk/test/MC/X86/x86-64.s Tue Apr 10 20:10:53 2012 @@ -50,6 +50,9 @@ // CHECK: ret ret +// CHECK: retw + retw + // FIXME: Check that this matches SUB32ri8 // CHECK: subl $1, %eax subl $1, %eax @@ -841,6 +844,7 @@ lretq // CHECK: lretq # encoding: [0x48,0xcb] lretl // CHECK: lretl # encoding: [0xcb] lret // CHECK: lretl # encoding: [0xcb] +lretw // CHECK: lretw # encoding: [0x66,0xcb] // rdar://8403907 sysret From cdavis at mymail.mines.edu Tue Apr 10 20:11:22 2012 From: cdavis at mymail.mines.edu (Charles Davis) Date: Tue, 10 Apr 2012 19:11:22 -0600 Subject: [llvm-commits] [PATCH] Add RETW and LRETW instructions In-Reply-To: References: <978783A3-2ACD-4820-AD7C-8BDDB230A0D6@mymail.mines.edu> <00800EB8-9E4F-4E44-82E3-02A295257FEF@apple.com> <19916F2D-8A6D-4A52-B2BA-012E877D39D4@mymail.mines.edu> <20120324224459.GA2197@britannica.bec.de> <55C446C0-C8BC-42F0-80D7-82CF7E529BC7@mymail.mines.edu> <7FB5E475-31FA-440C-B9B0-87164CBE36CD@mymail.mines.edu> Message-ID: <402C4DD0-0E8E-49F7-AFA9-64FB4F9F5480@mymail.mines.edu> Thanks, committed r154468. Chip On Apr 10, 2012, at 7:06 PM, Bill Wendling wrote: > LGTM. > > -bw > > On Apr 10, 2012, at 5:43 PM, Charles Davis wrote: > >> Ping... >> >> Chip >> >> >> On Apr 1, 2012, at 5:26 PM, Charles Davis wrote: >> >>> Ping... >>> >>> Patch reattached. >>> >>> Chip >>> On Mar 24, 2012, at 4:50 PM, Charles Davis wrote: >>> >>>> >>>> On Mar 24, 2012, at 4:44 PM, Joerg Sonnenberger wrote: >>>> >>>>> On Sat, Mar 24, 2012 at 03:32:44PM -0600, Charles Davis wrote: >>>>>> Anyway, here is a patch that addresses all of Kay and Joerg's concerns. >>>>> >>>>> Still no test case :) >>>> Oops, heh heh... :). This patch should have the test cases I added. >>>> >>>> Chip >> >> _______________________________________________ >> llvm-commits mailing list >> llvm-commits at cs.uiuc.edu >> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits > From evan.cheng at apple.com Tue Apr 10 20:21:26 2012 From: evan.cheng at apple.com (Evan Cheng) Date: Wed, 11 Apr 2012 01:21:26 -0000 Subject: [llvm-commits] [llvm] r154469 - in /llvm/trunk: lib/Target/ARM/ARMInstrVFP.td test/CodeGen/ARM/fusedMAC.ll Message-ID: <20120411012126.133D92A6C065@llvm.org> Author: evancheng Date: Tue Apr 10 20:21:25 2012 New Revision: 154469 URL: http://llvm.org/viewvc/llvm-project?rev=154469&view=rev Log: Match (fneg (fma) to vfnma. rdar://10139676 Modified: llvm/trunk/lib/Target/ARM/ARMInstrVFP.td llvm/trunk/test/CodeGen/ARM/fusedMAC.ll Modified: llvm/trunk/lib/Target/ARM/ARMInstrVFP.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrVFP.td?rev=154469&r1=154468&r2=154469&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMInstrVFP.td (original) +++ llvm/trunk/lib/Target/ARM/ARMInstrVFP.td Tue Apr 10 20:21:25 2012 @@ -1140,6 +1140,14 @@ (VFNMAS SPR:$dstin, SPR:$a, SPR:$b)>, Requires<[HasVFP4,DontUseNEONForFP,FPContractions]>; +// Match @llvm.fma.* intrinsics +def : Pat<(fneg (fma (f64 DPR:$Ddin), (f64 DPR:$Dn), (f64 DPR:$Dm))), + (VFNMAD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>, + Requires<[HasVFP4]>; +def : Pat<(fneg (fma (f32 SPR:$Sdin), (f32 SPR:$Sn), (f32 SPR:$Sm))), + (VFNMAS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>, + Requires<[HasVFP4]>; + def VFNMSD : ADbI<0b11101, 0b01, 0, 0, (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm), IIC_fpFMAC64, "vfnms", ".f64\t$Dd, $Dn, $Dm", Modified: llvm/trunk/test/CodeGen/ARM/fusedMAC.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/fusedMAC.ll?rev=154469&r1=154468&r2=154469&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/ARM/fusedMAC.ll (original) +++ llvm/trunk/test/CodeGen/ARM/fusedMAC.ll Tue Apr 10 20:21:25 2012 @@ -99,30 +99,49 @@ ret <4 x float> %sub } -define float @test_f32(float %a, float %b, float %c) nounwind readnone ssp { +define float @test_fma_f32(float %a, float %b, float %c) nounwind readnone ssp { entry: -; CHECK: test_f32 +; CHECK: test_fma_f32 ; CHECK: vfma.f32 %call = tail call float @llvm.fma.f32(float %a, float %b, float %c) nounwind readnone ret float %call } -define double @test_f64(double %a, double %b, double %c) nounwind readnone ssp { +define double @test_fma_f64(double %a, double %b, double %c) nounwind readnone ssp { entry: -; CHECK: test_f64 +; CHECK: test_fma_f64 ; CHECK: vfma.f64 %call = tail call double @llvm.fma.f64(double %a, double %b, double %c) nounwind readnone ret double %call } -define <2 x float> @test_v2f32(<2 x float> %a, <2 x float> %b, <2 x float> %c) nounwind readnone ssp { +define <2 x float> @test_fma_v2f32(<2 x float> %a, <2 x float> %b, <2 x float> %c) nounwind readnone ssp { entry: -; CHECK: test_v2f32 +; CHECK: test_fma_v2f32 ; CHECK: vfma.f32 %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %a, <2 x float> %b, <2 x float> %c) nounwind ret <2 x float> %0 } +define float @test_fnma_f32(float %a, float %b, float %c) nounwind readnone ssp { +entry: +; CHECK: test_fnma_f32 +; CHECK: vfnma.f32 + %call = tail call float @llvm.fma.f32(float %a, float %b, float %c) nounwind readnone + %tmp1 = fsub float -0.0, %call + %tmp2 = fsub float %tmp1, %c + ret float %tmp2 +} + +define double @test_fnma_f64(double %a, double %b, double %c) nounwind readnone ssp { +entry: +; CHECK: test_fnma_f64 +; CHECK: vfnma.f64 + %call = tail call double @llvm.fma.f64(double %a, double %b, double %c) nounwind readnone + %tmp = fsub double -0.0, %call + ret double %tmp +} + declare float @llvm.fma.f32(float, float, float) nounwind readnone declare double @llvm.fma.f64(double, double, double) nounwind readnone declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>) nounwind readnone From gkistanova at gmail.com Tue Apr 10 20:26:24 2012 From: gkistanova at gmail.com (Galina Kistanova) Date: Tue, 10 Apr 2012 18:26:24 -0700 Subject: [llvm-commits] Buildmaster will be restarted today after 6 PM Pacific In-Reply-To: References: Message-ID: Hello everyone, I hope the issue with wrong blame list for build failures is fixed by now. But please keep an eye on this and in case of wrong behaviour report it to me. Thanks Galina On Tue, Apr 10, 2012 at 12:43 PM, Galina Kistanova wrote: > Hello everyone, > Buildmaster will be restarted today after 6 PM Pacific to pick up small > fix for wrong blame lists. > As Duncan noticed, buidbot sometimes makes wrong blame list for build > failures. > > Thanks > > Galina > -------------- next part -------------- An HTML attachment was scrubbed... URL: http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20120410/b3350de1/attachment.html From craig.topper at gmail.com Tue Apr 10 22:06:36 2012 From: craig.topper at gmail.com (Craig Topper) Date: Wed, 11 Apr 2012 03:06:36 -0000 Subject: [llvm-commits] [llvm] r154473 - /llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp Message-ID: <20120411030636.272A72A6C065@llvm.org> Author: ctopper Date: Tue Apr 10 22:06:35 2012 New Revision: 154473 URL: http://llvm.org/viewvc/llvm-project?rev=154473&view=rev Log: Optimize code a bit by calling push_back only once in some loops. Reduces compiled code size a bit. Modified: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp Modified: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp?rev=154473&r1=154472&r2=154473&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp (original) +++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp Tue Apr 10 22:06:35 2012 @@ -2804,11 +2804,11 @@ } // Utility for visitShuffleVector - Return true if every element in Mask, -// begining // from position Pos and ending in Pos+Size, falls within the +// begining from position Pos and ending in Pos+Size, falls within the // specified sequential range [L, L+Pos). or is undef. static bool isSequentialInRange(const SmallVectorImpl &Mask, - int Pos, int Size, int Low) { - for (int i = Pos, e = Pos+Size; i != e; ++i, ++Low) + unsigned Pos, unsigned Size, int Low) { + for (unsigned i = Pos, e = Pos+Size; i != e; ++i, ++Low) if (Mask[i] >= 0 && Mask[i] != Low) return false; return true; @@ -2878,10 +2878,9 @@ SmallVector MappedOps; for (unsigned i = 0; i != MaskNumElts; ++i) { int Idx = Mask[i]; - if (Idx < (int)SrcNumElts) - MappedOps.push_back(Idx); - else - MappedOps.push_back(Idx + MaskNumElts - SrcNumElts); + if (Idx >= (int)SrcNumElts) + Idx -= SrcNumElts - MaskNumElts; + MappedOps.push_back(Idx); } setValue(&I, DAG.getVectorShuffle(VT, getCurDebugLoc(), Src1, Src2, @@ -2952,12 +2951,13 @@ SmallVector MappedOps; for (unsigned i = 0; i != MaskNumElts; ++i) { int Idx = Mask[i]; - if (Idx < 0) - MappedOps.push_back(Idx); - else if (Idx < (int)SrcNumElts) - MappedOps.push_back(Idx - StartIdx[0]); - else - MappedOps.push_back(Idx - SrcNumElts - StartIdx[1] + MaskNumElts); + if (Idx >= 0) { + if (Idx < (int)SrcNumElts) + Idx -= StartIdx[0]; + else + Idx -= SrcNumElts + StartIdx[1] - MaskNumElts; + } + MappedOps.push_back(Idx); } setValue(&I, DAG.getVectorShuffle(VT, getCurDebugLoc(), Src1, Src2, @@ -2973,22 +2973,20 @@ EVT PtrVT = TLI.getPointerTy(); SmallVector Ops; for (unsigned i = 0; i != MaskNumElts; ++i) { - if (Mask[i] < 0) { - Ops.push_back(DAG.getUNDEF(EltVT)); - } else { - int Idx = Mask[i]; - SDValue Res; + int Idx = Mask[i]; + SDValue Res; - if (Idx < (int)SrcNumElts) - Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurDebugLoc(), - EltVT, Src1, DAG.getConstant(Idx, PtrVT)); - else - Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurDebugLoc(), - EltVT, Src2, - DAG.getConstant(Idx - SrcNumElts, PtrVT)); + if (Idx < 0) { + Res = DAG.getUNDEF(EltVT); + } else { + SDValue &Src = Idx < (int)SrcNumElts ? Src1 : Src2; + if (Idx >= (int)SrcNumElts) Idx -= SrcNumElts; - Ops.push_back(Res); + Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurDebugLoc(), + EltVT, Src, DAG.getConstant(Idx, PtrVT)); } + + Ops.push_back(Res); } setValue(&I, DAG.getNode(ISD::BUILD_VECTOR, getCurDebugLoc(), From atrick at apple.com Tue Apr 10 22:19:15 2012 From: atrick at apple.com (Andrew Trick) Date: Wed, 11 Apr 2012 03:19:15 -0000 Subject: [llvm-commits] [llvm] r154474 - /llvm/trunk/utils/TableGen/CodeGenRegisters.cpp Message-ID: <20120411031915.B7EF32A6C065@llvm.org> Author: atrick Date: Tue Apr 10 22:19:15 2012 New Revision: 154474 URL: http://llvm.org/viewvc/llvm-project?rev=154474&view=rev Log: Table-generated register pressure fixes. Handle mixing allocatable and unallocatable register gracefully. Simplify the pruning of register unit sets. Modified: llvm/trunk/utils/TableGen/CodeGenRegisters.cpp Modified: llvm/trunk/utils/TableGen/CodeGenRegisters.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/utils/TableGen/CodeGenRegisters.cpp?rev=154474&r1=154473&r2=154474&view=diff ============================================================================== --- llvm/trunk/utils/TableGen/CodeGenRegisters.cpp (original) +++ llvm/trunk/utils/TableGen/CodeGenRegisters.cpp Tue Apr 10 22:19:15 2012 @@ -945,22 +945,34 @@ // For simplicitly make the SetID the same as EnumValue. IntEqClasses UberSetIDs(Registers.size()+1); + std::set AllocatableRegs; for (unsigned i = 0, e = RegBank.getRegClasses().size(); i != e; ++i) { + CodeGenRegisterClass *RegClass = RegBank.getRegClasses()[i]; + if (!RegClass->Allocatable) + continue; + const CodeGenRegister::Set &Regs = RegClass->getMembers(); - if (Regs.empty()) continue; + if (Regs.empty()) + continue; unsigned USetID = UberSetIDs.findLeader((*Regs.begin())->EnumValue); assert(USetID && "register number 0 is invalid"); - // combine non-allocatable classes - if (!RegClass->Allocatable) { - UberSetIDs.join(0, USetID); - USetID = 0; - } + AllocatableRegs.insert((*Regs.begin())->EnumValue); for (CodeGenRegister::Set::const_iterator I = llvm::next(Regs.begin()), - E = Regs.end(); I != E; ++I) + E = Regs.end(); I != E; ++I) { + AllocatableRegs.insert((*I)->EnumValue); UberSetIDs.join(USetID, (*I)->EnumValue); + } + } + // Combine non-allocatable regs. + for (unsigned i = 0, e = Registers.size(); i != e; ++i) { + unsigned RegNum = Registers[i]->EnumValue; + if (AllocatableRegs.count(RegNum)) + continue; + + UberSetIDs.join(0, RegNum); } UberSetIDs.compress(); @@ -1155,29 +1167,34 @@ assert(RegClassUnitSets.empty() && "this invalidates RegClassUnitSets"); // Form an equivalence class of UnitSets with no significant difference. - IntEqClasses RepUnitSetIDs(RegUnitSets.size()); + // Populate PrunedUnitSets with each equivalence class's superset. + std::vector PrunedUnitSets; for (unsigned SubIdx = 0, EndIdx = RegUnitSets.size(); SubIdx != EndIdx; ++SubIdx) { const RegUnitSet &SubSet = RegUnitSets[SubIdx]; - for (unsigned SuperIdx = 0; SuperIdx != EndIdx; ++SuperIdx) { + unsigned SuperIdx = 0; + for (; SuperIdx != EndIdx; ++SuperIdx) { if (SuperIdx == SubIdx) continue; - - const RegUnitSet &SuperSet = RegUnitSets[SuperIdx]; - if (isRegUnitSubSet(SubSet.Units, SuperSet.Units) - && (SubSet.Units.size() + 3 > SuperSet.Units.size())) { - RepUnitSetIDs.join(SubIdx, SuperIdx); + const RegUnitSet *SuperSet = 0; + if (SuperIdx > SubIdx) + SuperSet = &RegUnitSets[SuperIdx]; + else { + // Compare with already-pruned sets. + if (SuperIdx >= PrunedUnitSets.size()) + continue; + SuperSet = &PrunedUnitSets[SuperIdx]; + } + if (isRegUnitSubSet(SubSet.Units, SuperSet->Units) + && (SubSet.Units.size() + 3 > SuperSet->Units.size())) { + break; } } - } - RepUnitSetIDs.compress(); - - // Populate PrunedUnitSets with each equivalence class's superset. - std::vector PrunedUnitSets(RepUnitSetIDs.getNumClasses()); - for (unsigned i = 0, e = RegUnitSets.size(); i != e; ++i) { - RegUnitSet &SuperSet = PrunedUnitSets[RepUnitSetIDs[i]]; - if (SuperSet.Units.size() < RegUnitSets[i].Units.size()) - SuperSet = RegUnitSets[i]; + if (SuperIdx != EndIdx) + continue; + PrunedUnitSets.resize(PrunedUnitSets.size()+1); + PrunedUnitSets.back().Name = RegUnitSets[SubIdx].Name; + PrunedUnitSets.back().Units.swap(RegUnitSets[SubIdx].Units); } RegUnitSets.swap(PrunedUnitSets); } @@ -1195,6 +1212,8 @@ const ArrayRef &RegClasses = getRegClasses(); unsigned NumRegClasses = RegClasses.size(); for (unsigned RCIdx = 0, RCEnd = NumRegClasses; RCIdx != RCEnd; ++RCIdx) { + if (!RegClasses[RCIdx]->Allocatable) + continue; // Speculatively grow the RegUnitSets to hold the new set. RegUnitSets.resize(RegUnitSets.size() + 1); @@ -1253,12 +1272,15 @@ } } - // Iteratively prune unit sets again after inferring supersets. + // Iteratively prune unit sets after inferring supersets. pruneUnitSets(); // For each register class, list the UnitSets that are supersets. RegClassUnitSets.resize(NumRegClasses); for (unsigned RCIdx = 0, RCEnd = NumRegClasses; RCIdx != RCEnd; ++RCIdx) { + if (!RegClasses[RCIdx]->Allocatable) + continue; + // Recompute the sorted list of units in this class. std::vector RegUnits; buildRegUnitSet(RegClasses[RCIdx]->getMembers(), RegUnits); @@ -1273,6 +1295,7 @@ if (isRegUnitSubSet(RegUnits, RegUnitSets[USIdx].Units)) RegClassUnitSets[RCIdx].push_back(USIdx); } + assert(!RegClassUnitSets[RCIdx].empty() && "missing unit set for regclass"); } } From atrick at apple.com Tue Apr 10 23:31:33 2012 From: atrick at apple.com (Andrew Trick) Date: Wed, 11 Apr 2012 04:31:33 -0000 Subject: [llvm-commits] [llvm] r154477 - /llvm/trunk/utils/TableGen/RegisterInfoEmitter.cpp Message-ID: <20120411043133.DDC6D2A6C065@llvm.org> Author: atrick Date: Tue Apr 10 23:31:33 2012 New Revision: 154477 URL: http://llvm.org/viewvc/llvm-project?rev=154477&view=rev Log: Tablegen'd regpressure: emit the weighted pressure limit. Modified: llvm/trunk/utils/TableGen/RegisterInfoEmitter.cpp Modified: llvm/trunk/utils/TableGen/RegisterInfoEmitter.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/utils/TableGen/RegisterInfoEmitter.cpp?rev=154477&r1=154476&r2=154477&view=diff ============================================================================== --- llvm/trunk/utils/TableGen/RegisterInfoEmitter.cpp (original) +++ llvm/trunk/utils/TableGen/RegisterInfoEmitter.cpp Tue Apr 10 23:31:33 2012 @@ -152,7 +152,13 @@ << "getRegPressureSetLimit(unsigned Idx) const {\n" << " static const unsigned PressureLimitTable[] = {\n"; for (unsigned i = 0; i < NumSets; ++i ) { - OS << " " << RegBank.getRegPressureSet(i).Units.size() + const RegUnitSet &RegUnits = RegBank.getRegPressureSet(i); + unsigned Weight = 0; + for (RegUnitSet::iterator + I = RegUnits.Units.begin(), E = RegUnits.Units.end(); I != E; ++I) { + Weight += RegBank.getRegUnitWeight(*I); + } + OS << " " << Weight << ", \t// " << i << ": " << RegBank.getRegPressureSet(i).Name << "\n"; } OS << " 0 };\n" From craig.topper at gmail.com Tue Apr 10 23:34:11 2012 From: craig.topper at gmail.com (Craig Topper) Date: Wed, 11 Apr 2012 04:34:11 -0000 Subject: [llvm-commits] [llvm] r154478 - in /llvm/trunk/lib/CodeGen/SelectionDAG: SelectionDAGBuilder.cpp SelectionDAGBuilder.h Message-ID: <20120411043411.F30442A6C065@llvm.org> Author: ctopper Date: Tue Apr 10 23:34:11 2012 New Revision: 154478 URL: http://llvm.org/viewvc/llvm-project?rev=154478&view=rev Log: Inline implVisitAluOverflow by introducing a nested switch to convert the intrinsic to an nodetype. Modified: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h Modified: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp?rev=154478&r1=154477&r2=154478&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp (original) +++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp Tue Apr 10 23:34:11 2012 @@ -3629,17 +3629,6 @@ return DAG.getConstantFP(APFloat(APInt(32, Flt)), MVT::f32); } -// implVisitAluOverflow - Lower arithmetic overflow instrinsics. -const char * -SelectionDAGBuilder::implVisitAluOverflow(const CallInst &I, ISD::NodeType Op) { - SDValue Op1 = getValue(I.getArgOperand(0)); - SDValue Op2 = getValue(I.getArgOperand(1)); - - SDVTList VTs = DAG.getVTList(Op1.getValueType(), MVT::i1); - setValue(&I, DAG.getNode(Op, getCurDebugLoc(), VTs, Op1, Op2)); - return 0; -} - /// visitExp - Lower an exp intrinsic. Handles the special sequences for /// limited-precision mode. void @@ -4867,6 +4856,7 @@ case Intrinsic::convertuu: { ISD::CvtCode Code = ISD::CVT_INVALID; switch (Intrinsic) { + default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. case Intrinsic::convertff: Code = ISD::CVT_FF; break; case Intrinsic::convertfsi: Code = ISD::CVT_FS; break; case Intrinsic::convertfui: Code = ISD::CVT_FU; break; @@ -5099,18 +5089,28 @@ return 0; } case Intrinsic::uadd_with_overflow: - return implVisitAluOverflow(I, ISD::UADDO); case Intrinsic::sadd_with_overflow: - return implVisitAluOverflow(I, ISD::SADDO); case Intrinsic::usub_with_overflow: - return implVisitAluOverflow(I, ISD::USUBO); case Intrinsic::ssub_with_overflow: - return implVisitAluOverflow(I, ISD::SSUBO); case Intrinsic::umul_with_overflow: - return implVisitAluOverflow(I, ISD::UMULO); - case Intrinsic::smul_with_overflow: - return implVisitAluOverflow(I, ISD::SMULO); + case Intrinsic::smul_with_overflow: { + ISD::NodeType Op; + switch (Intrinsic) { + default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. + case Intrinsic::uadd_with_overflow: Op = ISD::UADDO; break; + case Intrinsic::sadd_with_overflow: Op = ISD::SADDO; break; + case Intrinsic::usub_with_overflow: Op = ISD::USUBO; break; + case Intrinsic::ssub_with_overflow: Op = ISD::SSUBO; break; + case Intrinsic::umul_with_overflow: Op = ISD::UMULO; break; + case Intrinsic::smul_with_overflow: Op = ISD::SMULO; break; + } + SDValue Op1 = getValue(I.getArgOperand(0)); + SDValue Op2 = getValue(I.getArgOperand(1)); + SDVTList VTs = DAG.getVTList(Op1.getValueType(), MVT::i1); + setValue(&I, DAG.getNode(Op, getCurDebugLoc(), VTs, Op1, Op2)); + return 0; + } case Intrinsic::prefetch: { SDValue Ops[5]; unsigned rw = cast(I.getArgOperand(1))->getZExtValue(); Modified: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h?rev=154478&r1=154477&r2=154478&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h (original) +++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h Tue Apr 10 23:34:11 2012 @@ -556,8 +556,6 @@ void visitUserOp2(const Instruction &I) { llvm_unreachable("UserOp2 should not exist at instruction selection time!"); } - - const char *implVisitAluOverflow(const CallInst &I, ISD::NodeType Op); void HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB); From craig.topper at gmail.com Tue Apr 10 23:55:51 2012 From: craig.topper at gmail.com (Craig Topper) Date: Wed, 11 Apr 2012 04:55:51 -0000 Subject: [llvm-commits] [llvm] r154479 - /llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp Message-ID: <20120411045551.9D9C22A6C065@llvm.org> Author: ctopper Date: Tue Apr 10 23:55:51 2012 New Revision: 154479 URL: http://llvm.org/viewvc/llvm-project?rev=154479&view=rev Log: Fix an overly indented line. Remove an 'else' after an 'if' that returns. Modified: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp Modified: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp?rev=154479&r1=154478&r2=154479&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp (original) +++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp Tue Apr 10 23:55:51 2012 @@ -4388,9 +4388,8 @@ const SDValue &CFR = Ext.getOperand(0); if (CFR.getOpcode() == ISD::CopyFromReg) return cast(CFR.getOperand(1))->getReg(); - else - if (CFR.getOpcode() == ISD::TRUNCATE) - return getTruncatedArgReg(CFR); + if (CFR.getOpcode() == ISD::TRUNCATE) + return getTruncatedArgReg(CFR); } return 0; } @@ -4419,7 +4418,7 @@ // Some arguments' frame index is recorded during argument lowering. Offset = FuncInfo.getArgumentFrameIndex(Arg); if (Offset) - Reg = TRI->getFrameRegister(MF); + Reg = TRI->getFrameRegister(MF); if (!Reg && N.getNode()) { if (N.getOpcode() == ISD::CopyFromReg) From evan.cheng at apple.com Wed Apr 11 00:33:08 2012 From: evan.cheng at apple.com (Evan Cheng) Date: Wed, 11 Apr 2012 05:33:08 -0000 Subject: [llvm-commits] [llvm] r154480 - in /llvm/trunk: lib/Target/ARM/ARM.td lib/Target/ARM/ARMAsmPrinter.cpp lib/Target/ARM/ARMInstrInfo.td lib/Target/ARM/ARMInstrNEON.td lib/Target/ARM/ARMInstrVFP.td lib/Target/ARM/ARMSubtarget.cpp lib/Target/ARM/ARMSubtarget.h test/MC/ARM/vfp4.s Message-ID: <20120411053308.3EBB02A6C065@llvm.org> Author: evancheng Date: Wed Apr 11 00:33:07 2012 New Revision: 154480 URL: http://llvm.org/viewvc/llvm-project?rev=154480&view=rev Log: Clean up ARM fused multiply + add/sub support some more: rename some isel predicates. Also remove NEON2 since it's not really useful and it is confusing. If NEON + VFP4 implies NEON2 but NEON2 doesn't imply NEON + VFP4, what does it really mean? rdar://10139676 Modified: llvm/trunk/lib/Target/ARM/ARM.td llvm/trunk/lib/Target/ARM/ARMAsmPrinter.cpp llvm/trunk/lib/Target/ARM/ARMInstrInfo.td llvm/trunk/lib/Target/ARM/ARMInstrNEON.td llvm/trunk/lib/Target/ARM/ARMInstrVFP.td llvm/trunk/lib/Target/ARM/ARMSubtarget.cpp llvm/trunk/lib/Target/ARM/ARMSubtarget.h llvm/trunk/test/MC/ARM/vfp4.s Modified: llvm/trunk/lib/Target/ARM/ARM.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARM.td?rev=154480&r1=154479&r2=154480&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARM.td (original) +++ llvm/trunk/lib/Target/ARM/ARM.td Wed Apr 11 00:33:07 2012 @@ -38,9 +38,6 @@ def FeatureNEON : SubtargetFeature<"neon", "HasNEON", "true", "Enable NEON instructions", [FeatureVFP3]>; -def FeatureNEON2 : SubtargetFeature<"neon2", "HasNEON2", "true", - "Enable Advanced SIMD2 instructions", - [FeatureNEON]>; def FeatureThumb2 : SubtargetFeature<"thumb2", "HasThumb2", "true", "Enable Thumb2 instructions">; def FeatureNoARM : SubtargetFeature<"noarm", "NoARM", "true", Modified: llvm/trunk/lib/Target/ARM/ARMAsmPrinter.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMAsmPrinter.cpp?rev=154480&r1=154479&r2=154480&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMAsmPrinter.cpp (original) +++ llvm/trunk/lib/Target/ARM/ARMAsmPrinter.cpp Wed Apr 11 00:33:07 2012 @@ -728,7 +728,7 @@ if (Subtarget->hasNEON() && emitFPU) { /* NEON is not exactly a VFP architecture, but GAS emit one of * neon/neon-vfpv4/vfpv3/vfpv2 for .fpu parameters */ - if (Subtarget->hasNEON2()) + if (Subtarget->hasVFP4()) AttrEmitter->EmitTextAttribute(ARMBuildAttrs::Advanced_SIMD_arch, "neon-vfpv4"); else Modified: llvm/trunk/lib/Target/ARM/ARMInstrInfo.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrInfo.td?rev=154480&r1=154479&r2=154480&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMInstrInfo.td (original) +++ llvm/trunk/lib/Target/ARM/ARMInstrInfo.td Wed Apr 11 00:33:07 2012 @@ -181,12 +181,8 @@ AssemblerPredicate<"FeatureVFP3">; def HasVFP4 : Predicate<"Subtarget->hasVFP4()">, AssemblerPredicate<"FeatureVFP4">; -def NoVFP4 : Predicate<"!Subtarget->hasVFP4()">; def HasNEON : Predicate<"Subtarget->hasNEON()">, AssemblerPredicate<"FeatureNEON">; -def HasNEON2 : Predicate<"Subtarget->hasNEON2()">, - AssemblerPredicate<"FeatureNEON,FeatureVFP4">; -def NoNEON2 : Predicate<"!Subtarget->hasNEON2()">; def HasFP16 : Predicate<"Subtarget->hasFP16()">, AssemblerPredicate<"FeatureFP16">; def HasDivide : Predicate<"Subtarget->hasDivide()">, @@ -221,8 +217,10 @@ def DontUseMovt : Predicate<"!Subtarget->useMovt()">; def UseFPVMLx : Predicate<"Subtarget->useFPVMLx()">; -// Allow more precision in FP computation -def FPContractions : Predicate<"!TM.Options.NoExcessFPPrecision">; +// Prefer fused MAC for fp mul + add over fp VMLA / VMLS if they are available. +// But only select them if more precision in FP computation is allowed. +def UseFusedMAC : Predicate<"!TM.Options.NoExcessFPPrecision">; +def DontUseFusedMAC : Predicate<"!Subtarget->hasVFP4()">; //===----------------------------------------------------------------------===// // ARM Flag Definitions. Modified: llvm/trunk/lib/Target/ARM/ARMInstrNEON.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrNEON.td?rev=154480&r1=154479&r2=154480&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMInstrNEON.td (original) +++ llvm/trunk/lib/Target/ARM/ARMInstrNEON.td Wed Apr 11 00:33:07 2012 @@ -4005,10 +4005,10 @@ IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>; def VMLAfd : N3VDMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACD, "vmla", "f32", v2f32, fmul_su, fadd_mlx>, - Requires<[HasNEON, UseFPVMLx, NoNEON2]>; + Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>; def VMLAfq : N3VQMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACQ, "vmla", "f32", v4f32, fmul_su, fadd_mlx>, - Requires<[HasNEON, UseFPVMLx, NoNEON2]>; + Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>; defm VMLAsl : N3VMulOpSL_HS<0b0000, IIC_VMACi16D, IIC_VMACi32D, IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>; def VMLAslfd : N3VDMulOpSL<0b10, 0b0001, IIC_VMACD, "vmla", "f32", @@ -4063,10 +4063,10 @@ IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>; def VMLSfd : N3VDMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACD, "vmls", "f32", v2f32, fmul_su, fsub_mlx>, - Requires<[HasNEON, UseFPVMLx, NoNEON2]>; + Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>; def VMLSfq : N3VQMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACQ, "vmls", "f32", v4f32, fmul_su, fsub_mlx>, - Requires<[HasNEON, UseFPVMLx, NoNEON2]>; + Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>; defm VMLSsl : N3VMulOpSL_HS<0b0100, IIC_VMACi16D, IIC_VMACi32D, IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>; def VMLSslfd : N3VDMulOpSL<0b10, 0b0101, IIC_VMACD, "vmls", "f32", @@ -4118,27 +4118,27 @@ // Fused Vector Multiply-Accumulate and Fused Multiply-Subtract Operations. def VFMAfd : N3VDMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACD, "vfma", "f32", v2f32, fmul_su, fadd_mlx>, - Requires<[HasNEON2,FPContractions]>; + Requires<[HasVFP4,UseFusedMAC]>; def VFMAfq : N3VQMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACQ, "vfma", "f32", v4f32, fmul_su, fadd_mlx>, - Requires<[HasNEON2,FPContractions]>; + Requires<[HasVFP4,UseFusedMAC]>; // Fused Vector Multiply Subtract (floating-point) def VFMSfd : N3VDMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACD, "vfms", "f32", v2f32, fmul_su, fsub_mlx>, - Requires<[HasNEON2,FPContractions]>; + Requires<[HasVFP4,UseFusedMAC]>; def VFMSfq : N3VQMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACQ, "vfms", "f32", v4f32, fmul_su, fsub_mlx>, - Requires<[HasNEON2,FPContractions]>; + Requires<[HasVFP4,UseFusedMAC]>; // Match @llvm.fma.* intrinsics def : Pat<(fma (v2f32 DPR:$src1), (v2f32 DPR:$Vn), (v2f32 DPR:$Vm)), (VFMAfd DPR:$src1, DPR:$Vn, DPR:$Vm)>, - Requires<[HasNEON2]>; + Requires<[HasVFP4]>; def : Pat<(fma (v4f32 QPR:$src1), (v4f32 QPR:$Vn), (v4f32 QPR:$Vm)), (VFMAfq QPR:$src1, QPR:$Vn, QPR:$Vm)>, - Requires<[HasNEON2]>; + Requires<[HasVFP4]>; // Vector Subtract Operations. @@ -5492,13 +5492,13 @@ def : N3VSPat; def : N3VSPat; def : N3VSMulOpPat, - Requires<[HasNEON, UseNEONForFP, UseFPVMLx, NoNEON2]>; + Requires<[HasNEON, UseNEONForFP, UseFPVMLx, DontUseFusedMAC]>; def : N3VSMulOpPat, - Requires<[HasNEON, UseNEONForFP, UseFPVMLx, NoNEON2]>; + Requires<[HasNEON, UseNEONForFP, UseFPVMLx, DontUseFusedMAC]>; def : N3VSMulOpPat, - Requires<[HasNEON2, UseNEONForFP, FPContractions]>; + Requires<[HasVFP4, UseNEONForFP, UseFusedMAC]>; def : N3VSMulOpPat, - Requires<[HasNEON2, UseNEONForFP, FPContractions]>; + Requires<[HasVFP4, UseNEONForFP, UseFusedMAC]>; def : N2VSPat; def : N2VSPat; def : N3VSPat; Modified: llvm/trunk/lib/Target/ARM/ARMInstrVFP.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrVFP.td?rev=154480&r1=154479&r2=154480&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMInstrVFP.td (original) +++ llvm/trunk/lib/Target/ARM/ARMInstrVFP.td Wed Apr 11 00:33:07 2012 @@ -950,7 +950,7 @@ [(set DPR:$Dd, (fadd_mlx (fmul_su DPR:$Dn, DPR:$Dm), (f64 DPR:$Ddin)))]>, RegConstraint<"$Ddin = $Dd">, - Requires<[HasVFP2,UseFPVMLx,NoVFP4]>; + Requires<[HasVFP2,UseFPVMLx,DontUseFusedMAC]>; def VMLAS : ASbIn<0b11100, 0b00, 0, 0, (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), @@ -958,7 +958,7 @@ [(set SPR:$Sd, (fadd_mlx (fmul_su SPR:$Sn, SPR:$Sm), SPR:$Sdin))]>, RegConstraint<"$Sdin = $Sd">, - Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,NoVFP4]> { + Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines on A8. let D = VFPNeonA8Domain; @@ -966,10 +966,10 @@ def : Pat<(fadd_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))), (VMLAD DPR:$dstin, DPR:$a, DPR:$b)>, - Requires<[HasVFP2,UseFPVMLx,NoVFP4]>; + Requires<[HasVFP2,UseFPVMLx,DontUseFusedMAC]>; def : Pat<(fadd_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)), (VMLAS SPR:$dstin, SPR:$a, SPR:$b)>, - Requires<[HasVFP2,DontUseNEONForFP, UseFPVMLx,NoVFP4]>; + Requires<[HasVFP2,DontUseNEONForFP, UseFPVMLx,DontUseFusedMAC]>; def VMLSD : ADbI<0b11100, 0b00, 1, 0, (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm), @@ -977,7 +977,7 @@ [(set DPR:$Dd, (fadd_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)), (f64 DPR:$Ddin)))]>, RegConstraint<"$Ddin = $Dd">, - Requires<[HasVFP2,UseFPVMLx,NoVFP4]>; + Requires<[HasVFP2,UseFPVMLx,DontUseFusedMAC]>; def VMLSS : ASbIn<0b11100, 0b00, 1, 0, (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), @@ -985,7 +985,7 @@ [(set SPR:$Sd, (fadd_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)), SPR:$Sdin))]>, RegConstraint<"$Sdin = $Sd">, - Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,NoVFP4]> { + Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines on A8. let D = VFPNeonA8Domain; @@ -993,10 +993,10 @@ def : Pat<(fsub_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))), (VMLSD DPR:$dstin, DPR:$a, DPR:$b)>, - Requires<[HasVFP2,UseFPVMLx,NoVFP4]>; + Requires<[HasVFP2,UseFPVMLx,DontUseFusedMAC]>; def : Pat<(fsub_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)), (VMLSS SPR:$dstin, SPR:$a, SPR:$b)>, - Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,NoVFP4]>; + Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>; def VNMLAD : ADbI<0b11100, 0b01, 1, 0, (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm), @@ -1004,7 +1004,7 @@ [(set DPR:$Dd,(fsub_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)), (f64 DPR:$Ddin)))]>, RegConstraint<"$Ddin = $Dd">, - Requires<[HasVFP2,UseFPVMLx,NoVFP4]>; + Requires<[HasVFP2,UseFPVMLx,DontUseFusedMAC]>; def VNMLAS : ASbI<0b11100, 0b01, 1, 0, (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), @@ -1012,7 +1012,7 @@ [(set SPR:$Sd, (fsub_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)), SPR:$Sdin))]>, RegConstraint<"$Sdin = $Sd">, - Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,NoVFP4]> { + Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines on A8. let D = VFPNeonA8Domain; @@ -1020,10 +1020,10 @@ def : Pat<(fsub_mlx (fneg (fmul_su DPR:$a, (f64 DPR:$b))), DPR:$dstin), (VNMLAD DPR:$dstin, DPR:$a, DPR:$b)>, - Requires<[HasVFP2,UseFPVMLx,NoVFP4]>; + Requires<[HasVFP2,UseFPVMLx,DontUseFusedMAC]>; def : Pat<(fsub_mlx (fneg (fmul_su SPR:$a, SPR:$b)), SPR:$dstin), (VNMLAS SPR:$dstin, SPR:$a, SPR:$b)>, - Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,NoVFP4]>; + Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>; def VNMLSD : ADbI<0b11100, 0b01, 0, 0, (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm), @@ -1031,14 +1031,14 @@ [(set DPR:$Dd, (fsub_mlx (fmul_su DPR:$Dn, DPR:$Dm), (f64 DPR:$Ddin)))]>, RegConstraint<"$Ddin = $Dd">, - Requires<[HasVFP2,UseFPVMLx,NoVFP4]>; + Requires<[HasVFP2,UseFPVMLx,DontUseFusedMAC]>; def VNMLSS : ASbI<0b11100, 0b01, 0, 0, (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), IIC_fpMAC32, "vnmls", ".f32\t$Sd, $Sn, $Sm", [(set SPR:$Sd, (fsub_mlx (fmul_su SPR:$Sn, SPR:$Sm), SPR:$Sdin))]>, RegConstraint<"$Sdin = $Sd">, - Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,NoVFP4]> { + Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines on A8. let D = VFPNeonA8Domain; @@ -1046,10 +1046,10 @@ def : Pat<(fsub_mlx (fmul_su DPR:$a, (f64 DPR:$b)), DPR:$dstin), (VNMLSD DPR:$dstin, DPR:$a, DPR:$b)>, - Requires<[HasVFP2,UseFPVMLx,NoVFP4]>; + Requires<[HasVFP2,UseFPVMLx,DontUseFusedMAC]>; def : Pat<(fsub_mlx (fmul_su SPR:$a, SPR:$b), SPR:$dstin), (VNMLSS SPR:$dstin, SPR:$a, SPR:$b)>, - Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,NoVFP4]>; + Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>; //===----------------------------------------------------------------------===// // Fused FP Multiply-Accumulate Operations. @@ -1060,7 +1060,7 @@ [(set DPR:$Dd, (fadd_mlx (fmul_su DPR:$Dn, DPR:$Dm), (f64 DPR:$Ddin)))]>, RegConstraint<"$Ddin = $Dd">, - Requires<[HasVFP4,FPContractions]>; + Requires<[HasVFP4,UseFusedMAC]>; def VFMAS : ASbIn<0b11101, 0b10, 0, 0, (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), @@ -1068,17 +1068,17 @@ [(set SPR:$Sd, (fadd_mlx (fmul_su SPR:$Sn, SPR:$Sm), SPR:$Sdin))]>, RegConstraint<"$Sdin = $Sd">, - Requires<[HasVFP4,DontUseNEONForFP,FPContractions]> { + Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines. } def : Pat<(fadd_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))), (VFMAD DPR:$dstin, DPR:$a, DPR:$b)>, - Requires<[HasVFP4,FPContractions]>; + Requires<[HasVFP4,UseFusedMAC]>; def : Pat<(fadd_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)), (VFMAS SPR:$dstin, SPR:$a, SPR:$b)>, - Requires<[HasVFP4,DontUseNEONForFP,FPContractions]>; + Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>; // Match @llvm.fma.* intrinsics def : Pat<(fma (f64 DPR:$Ddin), (f64 DPR:$Dn), (f64 DPR:$Dm)), @@ -1094,7 +1094,7 @@ [(set DPR:$Dd, (fadd_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)), (f64 DPR:$Ddin)))]>, RegConstraint<"$Ddin = $Dd">, - Requires<[HasVFP4,FPContractions]>; + Requires<[HasVFP4,UseFusedMAC]>; def VFMSS : ASbIn<0b11101, 0b10, 1, 0, (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), @@ -1102,17 +1102,17 @@ [(set SPR:$Sd, (fadd_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)), SPR:$Sdin))]>, RegConstraint<"$Sdin = $Sd">, - Requires<[HasVFP4,DontUseNEONForFP,FPContractions]> { + Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines. } def : Pat<(fsub_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))), (VFMSD DPR:$dstin, DPR:$a, DPR:$b)>, - Requires<[HasVFP4,FPContractions]>; + Requires<[HasVFP4,UseFusedMAC]>; def : Pat<(fsub_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)), (VFMSS SPR:$dstin, SPR:$a, SPR:$b)>, - Requires<[HasVFP4,DontUseNEONForFP,FPContractions]>; + Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>; def VFNMAD : ADbI<0b11101, 0b01, 1, 0, (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm), @@ -1120,7 +1120,7 @@ [(set DPR:$Dd,(fsub_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)), (f64 DPR:$Ddin)))]>, RegConstraint<"$Ddin = $Dd">, - Requires<[HasVFP4,FPContractions]>; + Requires<[HasVFP4,UseFusedMAC]>; def VFNMAS : ASbI<0b11101, 0b01, 1, 0, (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), @@ -1128,17 +1128,17 @@ [(set SPR:$Sd, (fsub_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)), SPR:$Sdin))]>, RegConstraint<"$Sdin = $Sd">, - Requires<[HasVFP4,DontUseNEONForFP,FPContractions]> { + Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines. } def : Pat<(fsub_mlx (fneg (fmul_su DPR:$a, (f64 DPR:$b))), DPR:$dstin), (VFNMAD DPR:$dstin, DPR:$a, DPR:$b)>, - Requires<[HasVFP4,FPContractions]>; + Requires<[HasVFP4,UseFusedMAC]>; def : Pat<(fsub_mlx (fneg (fmul_su SPR:$a, SPR:$b)), SPR:$dstin), (VFNMAS SPR:$dstin, SPR:$a, SPR:$b)>, - Requires<[HasVFP4,DontUseNEONForFP,FPContractions]>; + Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>; // Match @llvm.fma.* intrinsics def : Pat<(fneg (fma (f64 DPR:$Ddin), (f64 DPR:$Dn), (f64 DPR:$Dm))), @@ -1154,24 +1154,24 @@ [(set DPR:$Dd, (fsub_mlx (fmul_su DPR:$Dn, DPR:$Dm), (f64 DPR:$Ddin)))]>, RegConstraint<"$Ddin = $Dd">, - Requires<[HasVFP4,FPContractions]>; + Requires<[HasVFP4,UseFusedMAC]>; def VFNMSS : ASbI<0b11101, 0b01, 0, 0, (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), IIC_fpFMAC32, "vfnms", ".f32\t$Sd, $Sn, $Sm", [(set SPR:$Sd, (fsub_mlx (fmul_su SPR:$Sn, SPR:$Sm), SPR:$Sdin))]>, RegConstraint<"$Sdin = $Sd">, - Requires<[HasVFP4,DontUseNEONForFP,FPContractions]> { + Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines. } def : Pat<(fsub_mlx (fmul_su DPR:$a, (f64 DPR:$b)), DPR:$dstin), (VFNMSD DPR:$dstin, DPR:$a, DPR:$b)>, - Requires<[HasVFP4,FPContractions]>; + Requires<[HasVFP4,UseFusedMAC]>; def : Pat<(fsub_mlx (fmul_su SPR:$a, SPR:$b), SPR:$dstin), (VFNMSS SPR:$dstin, SPR:$a, SPR:$b)>, - Requires<[HasVFP4,DontUseNEONForFP,FPContractions]>; + Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>; //===----------------------------------------------------------------------===// // FP Conditional moves. Modified: llvm/trunk/lib/Target/ARM/ARMSubtarget.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMSubtarget.cpp?rev=154480&r1=154479&r2=154480&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMSubtarget.cpp (original) +++ llvm/trunk/lib/Target/ARM/ARMSubtarget.cpp Wed Apr 11 00:33:07 2012 @@ -48,7 +48,6 @@ , HasVFPv3(false) , HasVFPv4(false) , HasNEON(false) - , HasNEON2(false) , UseNEONForSinglePrecisionFP(false) , SlowFPVMLx(false) , HasVMLxForwarding(false) Modified: llvm/trunk/lib/Target/ARM/ARMSubtarget.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMSubtarget.h?rev=154480&r1=154479&r2=154480&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMSubtarget.h (original) +++ llvm/trunk/lib/Target/ARM/ARMSubtarget.h Wed Apr 11 00:33:07 2012 @@ -45,13 +45,12 @@ bool HasV6T2Ops; bool HasV7Ops; - /// HasVFPv2, HasVFPv3, HasVFPv4, HasNEON, HasNEON2 - Specify what + /// HasVFPv2, HasVFPv3, HasVFPv4, HasNEON - Specify what /// floating point ISAs are supported. bool HasVFPv2; bool HasVFPv3; bool HasVFPv4; bool HasNEON; - bool HasNEON2; /// UseNEONForSinglePrecisionFP - if the NEONFP attribute has been /// specified. Use the method useNEONForSinglePrecisionFP() to @@ -205,7 +204,6 @@ bool hasVFP3() const { return HasVFPv3; } bool hasVFP4() const { return HasVFPv4; } bool hasNEON() const { return HasNEON; } - bool hasNEON2() const { return HasNEON2 || (HasNEON && HasVFPv4); } bool useNEONForSinglePrecisionFP() const { return hasNEON() && UseNEONForSinglePrecisionFP; } Modified: llvm/trunk/test/MC/ARM/vfp4.s URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/ARM/vfp4.s?rev=154480&r1=154479&r2=154480&view=diff ============================================================================== --- llvm/trunk/test/MC/ARM/vfp4.s (original) +++ llvm/trunk/test/MC/ARM/vfp4.s Wed Apr 11 00:33:07 2012 @@ -1,7 +1,7 @@ @ RUN: llvm-mc < %s -triple armv7-unknown-unknown -show-encoding -mattr=+neon,+vfp4 | FileCheck %s --check-prefix=ARM @ RUN: llvm-mc < %s -triple thumbv7-unknown-unknown -show-encoding -mattr=+neon,+vfp4 | FileCheck %s --check-prefix=THUMB - @ ARM: vfma.f64 d16, d18, d17 @ encoding: [0xa1,0x0b,0xe2,0xee] +@ ARM: vfma.f64 d16, d18, d17 @ encoding: [0xa1,0x0b,0xe2,0xee] @ THUMB: vfma.f64 d16, d18, d17 @ encoding: [0xe2,0xee,0xa1,0x0b] vfma.f64 d16, d18, d17 From craig.topper at gmail.com Wed Apr 11 00:52:29 2012 From: craig.topper at gmail.com (Craig Topper) Date: Tue, 10 Apr 2012 22:52:29 -0700 Subject: [llvm-commits] Review Request: VPERM optimization for AVX2 In-Reply-To: References: Message-ID: Functionality wise, this looks fine. Some style comments below. This comment is stale + // Handle 128 and 256-bit vector lengths. AVX defines PSHUF/SHUFP to operate + // independently on 128-bit lanes. + unsigned NumElts = VT.getVectorNumElements(); 80 columns + SDValue res = DAG.getNode(VT.isInteger()? X86ISD::VPERMD : X86ISD::VPERMPS, dl, VT, + DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i32, &permclMask[0], 8), V1); + return res; + } + if (V2IsUndef && HasAVX2 && (VT == MVT::v4i64 || VT == MVT::v4f64)) { + return getTargetShuffleNode(VT.isInteger()? X86ISD::VPERMQ : X86ISD::VPERMPD, dl, VT, V1, + getShuffleCLImmediate(SVOp), DAG); + } Dangling space after the else. What is the 0x80 value? The instruction only uses the lower 3-bits of each value. Probably cleaner codewise to make the value part conditional and not repeat the the push_back and getConstant calls twice. + if (M[i] < 0) + permclMask.push_back(DAG.getConstant(0x80, MVT::i32)); + else + permclMask.push_back(DAG.getConstant(M[i], MVT::i32)); + } What does the "CL" here stand for? +/// getShuffleCLImmediate - Return the appropriate immediate to shuffle +/// the specified VECTOR_SHUFFLE mask with VPERMQ and VPERMPD instructions. +/// Handles 256-bit. +static unsigned getShuffleCLImmediate(ShuffleVectorSDNode *N) { Either align the SDTShuff2Opl part across all 5 rows or remove the extra spaces from VPERMD/VPERMPS/VPERMQ def X86VPermilp : SDNode<"X86ISD::VPERMILP", SDTShuff2OpI>; +def X86VPermd : SDNode<"X86ISD::VPERMD", SDTShuff2Op>; +def X86VPermps : SDNode<"X86ISD::VPERMPS", SDTShuff2Op>; +def X86VPermq : SDNode<"X86ISD::VPERMQ", SDTShuff2OpI>; +def X86VPermpd : SDNode<"X86ISD::VPERMPD", SDTShuff2OpI>; ~Craig On Tue, Apr 10, 2012 at 4:22 AM, Demikhovsky, Elena < elena.demikhovsky at intel.com> wrote: > I added VPERMQ/VPERMD/VPERMPD/VPERMPS patterns. Please review. > > > - Elena > > > --------------------------------------------------------------------- > Intel Israel (74) Limited > > This e-mail and any attachments may contain confidential material for > the sole use of the intended recipient(s). Any review or distribution > by others is strictly prohibited. If you are not the intended > recipient, please contact the sender and delete all copies. > > _______________________________________________ > llvm-commits mailing list > llvm-commits at cs.uiuc.edu > http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits > > -- ~Craig -------------- next part -------------- An HTML attachment was scrubbed... URL: http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20120410/6198b968/attachment.html From ck at remobjects.com Wed Apr 11 01:14:05 2012 From: ck at remobjects.com (Carlo Kok) Date: Wed, 11 Apr 2012 08:14:05 +0200 Subject: [llvm-commits] Patch for llvm-c to expose targetmachine & allow emit obj/asm files In-Reply-To: References: <4F7EF0D1.4020807@remobjects.com> Message-ID: <4F85212D.1070100@remobjects.com> Op 4/10/2012 9:41 PM, Hans Wennborg schreef: > (Including the list this time, sorry for the mess.) > > On Fri, Apr 6, 2012 at 14:34, Carlo Kok wrote: >> Attached is a fairly simple patch for llvm-c that exposes: >> * Target class >> * TargetMachine class >> >> To allow for emitting binary and assembly. >> >> This is the first time I try to contribute to llvm, if there's something >> wrong with my patch, let me know. > > Hi Carlo, > > Thanks for this! I've been annoyed in a project of my own that I > couldn't use the C bindings to emit objects or assembly. > > I'm not very familiar with the classes you are wrapping, so someone > else should take a look too, but I have read through your patch and > made some comments below. Attached is an updated version with your recommendations. Thanks! > > Thanks, > Hans > > >> +/*===-- llvm-c/TargetMachine.h - Target Machine Library C Interface - C++ -*-=*\ > > Shouldn't it be C rather than C++ at the top? > >> +|* This header declares the C interface to libLLVMBitReader.a, which *| >> +|* implements input of the LLVM bitcode format. *| >> +|* *| >> +|* Many exotic languages can interoperate with C code but have a harder time *| >> +|* with C++ due to name mangling. So in addition to C, this interface enables *| >> +|* tools written in such languages. *| > > I don't think this second paragraph is necessary? All the llvm-c includes have this header, I just copied it. -------------- next part -------------- Index: include/llvm-c/TargetMachine.h =================================================================== --- include/llvm-c/TargetMachine.h (revision 0) +++ include/llvm-c/TargetMachine.h (working copy) @@ -0,0 +1,129 @@ +/*===-- llvm-c/TargetMachine.h - Target Machine Library C Interface - C++ -*-=*\ +|* *| +|* The LLVM Compiler Infrastructure *| +|* *| +|* This file is distributed under the University of Illinois Open Source *| +|* License. See LICENSE.TXT for details. *| +|* *| +|*===----------------------------------------------------------------------===*| +|* *| +|* This header declares the C interface to libLLVMBitReader.a, which *| +|* implements input of the LLVM bitcode format. *| +|* *| +|* Many exotic languages can interoperate with C code but have a harder time *| +|* with C++ due to name mangling. So in addition to C, this interface enables *| +|* tools written in such languages. *| +|* *| +\*===----------------------------------------------------------------------===*/ + +#ifndef LLVM_C_TARGETMACHINE_H +#define LLVM_C_TARGETMACHINE_H + +#include "llvm-c/Core.h" + +#ifdef __cplusplus +extern "C" { +#endif +typedef struct LLVMTargetMachine *LLVMTargetMachineRef; +typedef struct LLVMTarget *LLVMTargetRef; + +typedef enum { + LLVMCodeGenLevelNone, + LLVMCodeGenLevelLess, + LLVMCodeGenLevelDefault, + LLVMCodeGenLevelAggressive +} LLVMCodeGenOptLevel; + +typedef enum { + LLVMRelocDefault, + LLVMRelocStatic, + LLVMRelocPIC, + LLVMRelocDynamicNoPic +} LLVMRelocMode; + +typedef enum { + LLVMCodeModelDefault, + LLVMCodeModelJITDefault, + LLVMCodeModelSmall, + LLVMCodeModelKernel, + LLVMCodeModelMedium, + LLVMCodeModelLarge +} LLVMCodeModel; + +typedef enum { + LLVMAssemblyFile, + LLVMObjectFile +} LLVMCodeGenFileType; + +/** Returns the first llvm::Target in the registered targets list. */ +LLVMTargetRef LLVMGetFirstTarget(); +/** Returns the next llvm::Target given a previous one (or null if there's none) */ +LLVMTargetRef LLVMGetNextTarget(LLVMTargetRef T); + +/*===-- Target -------------------------------------------------------===*/ +/** Returns the name of a target. See llvm::Target::getName */ +const char *LLVMGetTargetName(LLVMTargetRef T); + +/** Returns the description of a target. See llvm::Target::getDescription */ +const char *LLVMGetTargetDescription(LLVMTargetRef T); + +/** Returns if the target has a JIT */ +LLVMBool LLVMTargetHasJIT(LLVMTargetRef T); + +/** Returns if the target has a TargetMachine associated */ +LLVMBool LLVMTargetHasTargetMachine(LLVMTargetRef T); + +/** Returns if the target as an ASM backend (required for emitting output) */ +LLVMBool LLVMTargetHasAsmBackend(LLVMTargetRef T); + +/*===-- Target Machine -------------------------------------------------------===*/ +/** Creates a new llvm::TargetMachine. See llvm::Target::createTargetMachine */ +LLVMTargetMachineRef LLVMCreateTargetMachine(LLVMTargetRef T, char *Triple, char *CPU, char *Features, LLVMCodeGenOptLevel Level, LLVMRelocMode Reloc, LLVMCodeModel CodeModel); + +/** Dispose the LLVMTargetMachineRef instance generated by LLVMCreateTargetMachine. */ +void LLVMDisposeTargetMachine(LLVMTargetMachineRef T); + +/** Returns the Target used in a TargetMachine */ +LLVMTargetRef LLVMGetTargetMachineTarget(LLVMTargetMachineRef T); + +/** Returns the triple used creating this target machine. See llvm::TargetMachine::getTriple. The result needs to be Disposed with LLVMDisposeMessage. */ +char *LLVMGetTargetMachineTriple(LLVMTargetMachineRef T); + +/** Returns the cpu used creating this target machine. See llvm::TargetMachine::getCPU. The result needs to be Disposed with LLVMDisposeMessage. */ +char *LLVMGetTargetMachineCPU(LLVMTargetMachineRef T); + +/** Returns the feature string used creating this target machine. See llvm::TargetMachine::getFeatureString. The result needs to be Disposed with LLVMDisposeMessage. */ +char *LLVMGetTargetMachineFeatureString(LLVMTargetMachineRef T); + +/** Returns the llvm::TargetData used for this llvm:TargetMachine. */ +LLVMTargetDataRef LLVMGetTargetMachineData(LLVMTargetMachineRef T); + +/** Emits an asm or object file for the given module to the filename. This wraps several c++ only classes (among them a file stream). Returns any error in ErrorMessage */ +LLVMBool LLVMTargetMachineEmitToFile(LLVMTargetMachineRef T, LLVMModuleRef M, char *Filename, LLVMCodeGenFileType codegen, char **ErrorMessage); + + + + +#ifdef __cplusplus +} + +namespace llvm { + class TargetMachine; + class Target; + + inline TargetMachine *unwrap(LLVMTargetMachineRef P) { + return reinterpret_cast(P); + } + inline Target *unwrap(LLVMTargetRef P) { + return reinterpret_cast(P); + } + inline LLVMTargetMachineRef wrap(const TargetMachine *P) { + return reinterpret_cast(const_cast(P)); + } + inline LLVMTargetRef wrap(const Target * P) { + return reinterpret_cast(const_cast(P)); + } +} +#endif + +#endif Index: lib/Target/CMakeLists.txt =================================================================== --- lib/Target/CMakeLists.txt (revision 153955) +++ lib/Target/CMakeLists.txt (working copy) @@ -9,6 +9,7 @@ TargetLibraryInfo.cpp TargetLoweringObjectFile.cpp TargetMachine.cpp + TargetMachineC.cpp TargetRegisterInfo.cpp TargetSubtargetInfo.cpp ) Index: lib/Target/TargetMachineC.cpp =================================================================== --- lib/Target/TargetMachineC.cpp (revision 0) +++ lib/Target/TargetMachineC.cpp (working copy) @@ -0,0 +1,134 @@ +//===-- TargetMachine.cpp -------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the LLVM-C part of TargetMachine.h +// +//===----------------------------------------------------------------------===// + +#include "llvm-c/Core.h" +#include "llvm-c/Target.h" +#include "llvm-c/TargetMachine.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/CodeGen.h" +#include "llvm/Support/FormattedStream.h" +#include "llvm/Module.h" +#include "llvm/PassManager.h" +#include +#include +#include + +using namespace llvm; + + +LLVMTargetRef LLVMGetFirstTarget() { + const Target* target = &*TargetRegistry::begin(); + return wrap(target); +} +LLVMTargetRef LLVMGetNextTarget(LLVMTargetRef T) { + return wrap(unwrap(T)->getNext()); +} + +const char * LLVMGetTargetName(LLVMTargetRef T) { + return unwrap(T)->getName(); +} + +const char * LLVMGetTargetDescription(LLVMTargetRef T) { + return unwrap(T)->getShortDescription(); +} + +LLVMBool LLVMTargetHasJIT(LLVMTargetRef T) { + return unwrap(T)->hasJIT(); +} + +LLVMBool LLVMTargetHasTargetMachine(LLVMTargetRef T) { + return unwrap(T)->hasTargetMachine(); +} + +LLVMBool LLVMTargetHasAsmBackend(LLVMTargetRef T) { + return unwrap(T)->hasMCAsmBackend(); +} + +LLVMTargetMachineRef LLVMCreateTargetMachine(LLVMTargetRef T, char* Triple, char* CPU, char* Features, LLVMCodeGenOptLevel Level, LLVMRelocMode Reloc, LLVMCodeModel CodeModel) { + Reloc::Model RM = static_cast(Reloc); + CodeModel::Model CM = static_cast(CodeModel); + CodeGenOpt::Level OL = static_cast(Level); + TargetOptions opt; + return wrap(unwrap(T)->createTargetMachine(Triple, CPU, Features, opt, RM, CM, OL)); +} + + +void LLVMDisposeTargetMachine(LLVMTargetMachineRef T) { + delete unwrap(T); +} + +LLVMTargetRef LLVMGetTargetMachineTarget(LLVMTargetMachineRef T) { + const Target* target = &(unwrap(T)->getTarget()); + return wrap(target); +} + +char* LLVMGetTargetMachineTriple(LLVMTargetMachineRef T) { + std::string StringRep = unwrap(T)->getTargetTriple(); + return strdup(StringRep.c_str()); +} + +char* LLVMGetTargetMachineCPU(LLVMTargetMachineRef T) { + std::string StringRep = unwrap(T)->getTargetCPU(); + return strdup(StringRep.c_str()); +} + +char* LLVMGetTargetMachineFeatureString(LLVMTargetMachineRef T) { + std::string StringRep = unwrap(T)->getTargetFeatureString(); + return strdup(StringRep.c_str()); +} + +LLVMTargetDataRef LLVMGetTargetMachineData(LLVMTargetMachineRef T) { + return wrap(unwrap(T)->getTargetData()); +} + +LLVMBool LLVMTargetMachineEmitToFile(LLVMTargetMachineRef T, LLVMModuleRef M, char* Filename, LLVMCodeGenFileType codegen, char** ErrorMessage) { +// Inspired by the D binding + TargetMachine* TM = unwrap(T); + Module* Mod = unwrap(M); + + PassManager pass; + + std::string error; + + const TargetData* td = TM->getTargetData(); + + if (!td) { + error = "No TargetData in TargetMachine"; + *ErrorMessage = strdup(error.c_str()); + return true; + } + pass.add(new TargetData(*td)); + + TargetMachine::CodeGenFileType ft = static_cast(codegen); + raw_fd_ostream dest(Filename, error, raw_fd_ostream::F_Binary); + formatted_raw_ostream destf(dest); + if (!error.empty()) { + *ErrorMessage = strdup(error.c_str()); + return true; + } + + if (TM->addPassesToEmitFile(pass, destf, ft)) { + error = "No TargetData in TargetMachine"; + *ErrorMessage = strdup(error.c_str()); + return true; + } + + pass.run(*Mod); + + destf.flush(); + dest.flush(); + return false; +} From nadav.rotem at intel.com Wed Apr 11 01:40:27 2012 From: nadav.rotem at intel.com (Nadav Rotem) Date: Wed, 11 Apr 2012 06:40:27 -0000 Subject: [llvm-commits] [llvm] r154483 - in /llvm/trunk: lib/Target/X86/X86ISelLowering.cpp lib/Target/X86/X86ISelLowering.h lib/Target/X86/X86InstrFragmentsSIMD.td lib/Target/X86/X86InstrSSE.td test/CodeGen/X86/avx-shuffle.ll test/CodeGen/X86/vec_shuffle-20.ll Message-ID: <20120411064027.CE0402A6C065@llvm.org> Author: nadav Date: Wed Apr 11 01:40:27 2012 New Revision: 154483 URL: http://llvm.org/viewvc/llvm-project?rev=154483&view=rev Log: Reapply 154396 after fixing a test. Original message: Modify the code that lowers shuffles to blends from using blendvXX to vblendXX. blendV uses a register for the selection while Vblend uses an immediate. On sandybridge they still have the same latency and execute on the same execution ports. Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp llvm/trunk/lib/Target/X86/X86ISelLowering.h llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td llvm/trunk/lib/Target/X86/X86InstrSSE.td llvm/trunk/test/CodeGen/X86/avx-shuffle.ll llvm/trunk/test/CodeGen/X86/vec_shuffle-20.ll Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=154483&r1=154482&r2=154483&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original) +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Wed Apr 11 01:40:27 2012 @@ -5391,59 +5391,75 @@ SDValue V1 = SVOp->getOperand(0); SDValue V2 = SVOp->getOperand(1); DebugLoc dl = SVOp->getDebugLoc(); - LLVMContext *Context = DAG.getContext(); EVT VT = Op.getValueType(); EVT InVT = V1.getValueType(); - EVT EltVT = VT.getVectorElementType(); - unsigned EltSize = EltVT.getSizeInBits(); int MaskSize = VT.getVectorNumElements(); int InSize = InVT.getVectorNumElements(); - // TODO: At the moment we only use AVX blends. We could also use SSE4 blends. - if (!Subtarget->hasAVX()) + if (!Subtarget->hasSSE41()) return SDValue(); if (MaskSize != InSize) return SDValue(); - SmallVector MaskVals; - ConstantInt *Zero = ConstantInt::get(*Context, APInt(EltSize, 0)); - ConstantInt *NegOne = ConstantInt::get(*Context, APInt(EltSize, -1)); + int ISDNo = 0; + MVT OpTy; + + switch (VT.getSimpleVT().SimpleTy) { + default: return SDValue(); + case MVT::v8i16: + ISDNo = X86ISD::BLENDPW; + OpTy = MVT::v8i16; + break; + case MVT::v4i32: + case MVT::v4f32: + ISDNo = X86ISD::BLENDPS; + OpTy = MVT::v4f32; + break; + case MVT::v2i64: + case MVT::v2f64: + ISDNo = X86ISD::BLENDPD; + OpTy = MVT::v2f64; + break; + case MVT::v8i32: + case MVT::v8f32: + if (!Subtarget->hasAVX()) + return SDValue(); + ISDNo = X86ISD::BLENDPS; + OpTy = MVT::v8f32; + break; + case MVT::v4i64: + case MVT::v4f64: + if (!Subtarget->hasAVX()) + return SDValue(); + ISDNo = X86ISD::BLENDPD; + OpTy = MVT::v4f64; + break; + case MVT::v16i16: + if (!Subtarget->hasAVX2()) + return SDValue(); + ISDNo = X86ISD::BLENDPW; + OpTy = MVT::v16i16; + break; + } + assert(ISDNo && "Invalid Op Number"); + + unsigned MaskVals = 0; for (int i = 0; i < MaskSize; ++i) { int EltIdx = SVOp->getMaskElt(i); if (EltIdx == i || EltIdx == -1) - MaskVals.push_back(NegOne); + MaskVals |= (1<getType()); - assert(MaskTy.getSizeInBits() == VT.getSizeInBits() && "Invalid mask size"); - SDValue MaskIdx = DAG.getConstantPool(MaskC, PtrTy); - unsigned Alignment = cast(MaskIdx)->getAlignment(); - SDValue Mask = DAG.getLoad(MaskTy, dl, DAG.getEntryNode(), MaskIdx, - MachinePointerInfo::getConstantPool(), - false, false, false, Alignment); - - if (Subtarget->hasAVX2() && MaskTy == MVT::v32i8) - return DAG.getNode(ISD::VSELECT, dl, VT, Mask, V1, V2); - - if (Subtarget->hasAVX()) { - switch (MaskTy.getSimpleVT().SimpleTy) { - default: return SDValue(); - case MVT::v16i8: - case MVT::v4i32: - case MVT::v2i64: - case MVT::v8i32: - case MVT::v4i64: - return DAG.getNode(ISD::VSELECT, dl, VT, Mask, V1, V2); - } - } - - return SDValue(); + V1 = DAG.getNode(ISD::BITCAST, dl, OpTy, V1); + V2 = DAG.getNode(ISD::BITCAST, dl, OpTy, V2); + SDValue Ret = DAG.getNode(ISDNo, dl, OpTy, V1, V2, + DAG.getConstant(MaskVals, MVT::i32)); + return DAG.getNode(ISD::BITCAST, dl, VT, Ret); } // v8i16 shuffles - Prefer shuffles in the following order: @@ -11050,6 +11066,9 @@ case X86ISD::ANDNP: return "X86ISD::ANDNP"; case X86ISD::PSIGN: return "X86ISD::PSIGN"; case X86ISD::BLENDV: return "X86ISD::BLENDV"; + case X86ISD::BLENDPW: return "X86ISD::BLENDPW"; + case X86ISD::BLENDPS: return "X86ISD::BLENDPS"; + case X86ISD::BLENDPD: return "X86ISD::BLENDPD"; case X86ISD::HADD: return "X86ISD::HADD"; case X86ISD::HSUB: return "X86ISD::HSUB"; case X86ISD::FHADD: return "X86ISD::FHADD"; Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.h?rev=154483&r1=154482&r2=154483&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.h (original) +++ llvm/trunk/lib/Target/X86/X86ISelLowering.h Wed Apr 11 01:40:27 2012 @@ -175,9 +175,14 @@ /// PSIGN - Copy integer sign. PSIGN, - /// BLEND family of opcodes + /// BLENDV - Blend where the selector is an XMM. BLENDV, + /// BLENDxx - Blend where the selector is an immediate. + BLENDPW, + BLENDPS, + BLENDPD, + /// HADD - Integer horizontal add. HADD, Modified: llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td?rev=154483&r1=154482&r2=154483&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td (original) +++ llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td Wed Apr 11 01:40:27 2012 @@ -126,6 +126,8 @@ SDTCisSameAs<0,2>, SDTCisInt<3>]>; def SDTVBroadcast : SDTypeProfile<1, 1, [SDTCisVec<0>]>; +def SDTBlend : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>, +SDTCisSameAs<1,2>, SDTCisVT<3, i32>]>; def X86PAlign : SDNode<"X86ISD::PALIGN", SDTShuff3OpI>; @@ -158,6 +160,10 @@ def X86VBroadcast : SDNode<"X86ISD::VBROADCAST", SDTVBroadcast>; +def X86Blendpw : SDNode<"X86ISD::BLENDPW", SDTBlend>; +def X86Blendps : SDNode<"X86ISD::BLENDPS", SDTBlend>; +def X86Blendpd : SDNode<"X86ISD::BLENDPD", SDTBlend>; + //===----------------------------------------------------------------------===// // SSE Complex Patterns //===----------------------------------------------------------------------===// Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=154483&r1=154482&r2=154483&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original) +++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Wed Apr 11 01:40:27 2012 @@ -6735,12 +6735,22 @@ def : Pat<(v4f64 (vselect (v4i64 VR256:$mask), (v4f64 VR256:$src1), (v4f64 VR256:$src2))), (VBLENDVPDYrr VR256:$src2, VR256:$src1, VR256:$mask)>; + + def : Pat<(v8f32 (X86Blendps (v8f32 VR256:$src1), (v8f32 VR256:$src2), + (imm:$mask))), + (VBLENDPSYrri VR256:$src2, VR256:$src1, imm:$mask)>; + def : Pat<(v4f64 (X86Blendpd (v4f64 VR256:$src1), (v4f64 VR256:$src2), + (imm:$mask))), + (VBLENDPDYrri VR256:$src2, VR256:$src1, imm:$mask)>; } let Predicates = [HasAVX2] in { def : Pat<(v32i8 (vselect (v32i8 VR256:$mask), (v32i8 VR256:$src1), (v32i8 VR256:$src2))), (VPBLENDVBYrr VR256:$src2, VR256:$src1, VR256:$mask)>; + def : Pat<(v16i16 (X86Blendpw (v16i16 VR256:$src1), (v16i16 VR256:$src2), + (imm:$mask))), + (VPBLENDWYrri VR256:$src2, VR256:$src1, imm:$mask)>; } /// SS41I_ternary_int - SSE 4.1 ternary operator @@ -6789,6 +6799,17 @@ def : Pat<(v2f64 (vselect (v2i64 XMM0), (v2f64 VR128:$src1), (v2f64 VR128:$src2))), (BLENDVPDrr0 VR128:$src2, VR128:$src1)>; + + def : Pat<(v8i16 (X86Blendpw (v8i16 VR128:$src1), (v8i16 VR128:$src2), + (imm:$mask))), + (VPBLENDWrri VR128:$src2, VR128:$src1, imm:$mask)>; + def : Pat<(v4f32 (X86Blendps (v4f32 VR128:$src1), (v4f32 VR128:$src2), + (imm:$mask))), + (VBLENDPSrri VR128:$src2, VR128:$src1, imm:$mask)>; + def : Pat<(v2f64 (X86Blendpd (v2f64 VR128:$src1), (v2f64 VR128:$src2), + (imm:$mask))), + (VBLENDPDrri VR128:$src2, VR128:$src1, imm:$mask)>; + } let Predicates = [HasAVX] in Modified: llvm/trunk/test/CodeGen/X86/avx-shuffle.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx-shuffle.ll?rev=154483&r1=154482&r2=154483&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/X86/avx-shuffle.ll (original) +++ llvm/trunk/test/CodeGen/X86/avx-shuffle.ll Wed Apr 11 01:40:27 2012 @@ -164,7 +164,7 @@ } ; CHECK: blend1 -; CHECK: vblendvps +; CHECK: vblendps ; CHECK: ret define <4 x i32> @blend1(<4 x i32> %a, <4 x i32> %b) nounwind alwaysinline { %t = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> @@ -172,7 +172,7 @@ } ; CHECK: blend2 -; CHECK: vblendvps +; CHECK: vblendps ; CHECK: ret define <4 x i32> @blend2(<4 x i32> %a, <4 x i32> %b) nounwind alwaysinline { %t = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> @@ -180,7 +180,7 @@ } ; CHECK: blend2a -; CHECK: vblendvps +; CHECK: vblendps ; CHECK: ret define <4 x float> @blend2a(<4 x float> %a, <4 x float> %b) nounwind alwaysinline { %t = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> @@ -188,7 +188,7 @@ } ; CHECK: blend3 -; CHECK-NOT: vblendvps +; CHECK-NOT: vblendps ; CHECK: ret define <4 x i32> @blend3(<4 x i32> %a, <4 x i32> %b) nounwind alwaysinline { %t = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> @@ -196,7 +196,7 @@ } ; CHECK: blend4 -; CHECK: vblendvpd +; CHECK: vblendpd ; CHECK: ret define <4 x i64> @blend4(<4 x i64> %a, <4 x i64> %b) nounwind alwaysinline { %t = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> Modified: llvm/trunk/test/CodeGen/X86/vec_shuffle-20.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_shuffle-20.ll?rev=154483&r1=154482&r2=154483&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/X86/vec_shuffle-20.ll (original) +++ llvm/trunk/test/CodeGen/X86/vec_shuffle-20.ll Wed Apr 11 01:40:27 2012 @@ -1,4 +1,4 @@ -; RUN: llc < %s -o /dev/null -march=x86 -mattr=+sse2 -mtriple=i686-apple-darwin9 -stats -info-output-file - | grep asm-printer | grep 3 +; RUN: llc < %s -o /dev/null -march=x86 -mcpu=corei7 -mtriple=i686-apple-darwin9 -stats -info-output-file - | grep asm-printer | grep 2 define <4 x float> @func(<4 x float> %fp0, <4 x float> %fp1) nounwind { entry: From evan.cheng at apple.com Wed Apr 11 01:59:47 2012 From: evan.cheng at apple.com (Evan Cheng) Date: Wed, 11 Apr 2012 06:59:47 -0000 Subject: [llvm-commits] [llvm] r154484 - in /llvm/trunk: lib/Target/ARM/ARMInstrNEON.td lib/Target/ARM/ARMInstrVFP.td test/CodeGen/ARM/fusedMAC.ll Message-ID: <20120411065947.695082A6C065@llvm.org> Author: evancheng Date: Wed Apr 11 01:59:47 2012 New Revision: 154484 URL: http://llvm.org/viewvc/llvm-project?rev=154484&view=rev Log: Add more fused mul+add/sub patterns. rdar://10139676 Modified: llvm/trunk/lib/Target/ARM/ARMInstrNEON.td llvm/trunk/lib/Target/ARM/ARMInstrVFP.td llvm/trunk/test/CodeGen/ARM/fusedMAC.ll Modified: llvm/trunk/lib/Target/ARM/ARMInstrNEON.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrNEON.td?rev=154484&r1=154483&r2=154484&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMInstrNEON.td (original) +++ llvm/trunk/lib/Target/ARM/ARMInstrNEON.td Wed Apr 11 01:59:47 2012 @@ -4133,12 +4133,18 @@ Requires<[HasVFP4,UseFusedMAC]>; // Match @llvm.fma.* intrinsics -def : Pat<(fma (v2f32 DPR:$src1), (v2f32 DPR:$Vn), (v2f32 DPR:$Vm)), +def : Pat<(v2f32 (fma DPR:$src1, DPR:$Vn, DPR:$Vm)), (VFMAfd DPR:$src1, DPR:$Vn, DPR:$Vm)>, Requires<[HasVFP4]>; -def : Pat<(fma (v4f32 QPR:$src1), (v4f32 QPR:$Vn), (v4f32 QPR:$Vm)), +def : Pat<(v4f32 (fma QPR:$src1, QPR:$Vn, QPR:$Vm)), (VFMAfq QPR:$src1, QPR:$Vn, QPR:$Vm)>, Requires<[HasVFP4]>; +def : Pat<(v2f32 (fma (fneg DPR:$src1), DPR:$Vn, DPR:$Vm)), + (VFMSfd DPR:$src1, DPR:$Vn, DPR:$Vm)>, + Requires<[HasVFP4]>; +def : Pat<(v4f32 (fma (fneg QPR:$src1), QPR:$Vn, QPR:$Vm)), + (VFMSfq QPR:$src1, QPR:$Vn, QPR:$Vm)>, + Requires<[HasVFP4]>; // Vector Subtract Operations. Modified: llvm/trunk/lib/Target/ARM/ARMInstrVFP.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrVFP.td?rev=154484&r1=154483&r2=154484&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMInstrVFP.td (original) +++ llvm/trunk/lib/Target/ARM/ARMInstrVFP.td Wed Apr 11 01:59:47 2012 @@ -1081,10 +1081,10 @@ Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>; // Match @llvm.fma.* intrinsics -def : Pat<(fma (f64 DPR:$Ddin), (f64 DPR:$Dn), (f64 DPR:$Dm)), +def : Pat<(f64 (fma DPR:$Ddin, DPR:$Dn, DPR:$Dm)), (VFMAD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>, Requires<[HasVFP4]>; -def : Pat<(fma (f32 SPR:$Sdin), (f32 SPR:$Sn), (f32 SPR:$Sm)), +def : Pat<(f32 (fma SPR:$Sdin, SPR:$Sn, SPR:$Sm)), (VFMAS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>, Requires<[HasVFP4]>; @@ -1114,6 +1114,22 @@ (VFMSS SPR:$dstin, SPR:$a, SPR:$b)>, Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>; +// Match @llvm.fma.* intrinsics +// (fma (fneg x), y, z) -> (vfms x, y, z) +def : Pat<(f64 (fma (fneg DPR:$Ddin), DPR:$Dn, DPR:$Dm)), + (VFMSD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>, + Requires<[HasVFP4]>; +def : Pat<(f32 (fma (fneg SPR:$Sdin), SPR:$Sn, SPR:$Sm)), + (VFMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>, + Requires<[HasVFP4]>; +// (fneg (fma x, (fneg y), z) -> (vfms x, y, z) +def : Pat<(fneg (f64 (fma DPR:$Ddin, (fneg DPR:$Dn), DPR:$Dm))), + (VFMSD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>, + Requires<[HasVFP4]>; +def : Pat<(fneg (f32 (fma SPR:$Sdin, (fneg SPR:$Sn), SPR:$Sm))), + (VFMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>, + Requires<[HasVFP4]>; + def VFNMAD : ADbI<0b11101, 0b01, 1, 0, (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm), IIC_fpFMAC64, "vfnma", ".f64\t$Dd, $Dn, $Dm", @@ -1141,12 +1157,20 @@ Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>; // Match @llvm.fma.* intrinsics +// (fneg (fma x, y, z)) -> (vfnma x, y, z) def : Pat<(fneg (fma (f64 DPR:$Ddin), (f64 DPR:$Dn), (f64 DPR:$Dm))), (VFNMAD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>, Requires<[HasVFP4]>; def : Pat<(fneg (fma (f32 SPR:$Sdin), (f32 SPR:$Sn), (f32 SPR:$Sm))), (VFNMAS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>, Requires<[HasVFP4]>; +// (fma (fneg x), y, (fneg z)) -> (vfnma x, y, z) +def : Pat<(f64 (fma (fneg DPR:$Ddin), DPR:$Dn, (fneg DPR:$Dm))), + (VFNMAD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>, + Requires<[HasVFP4]>; +def : Pat<(f32 (fma (fneg SPR:$Sdin), SPR:$Sn, (fneg SPR:$Sm))), + (VFNMAS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>, + Requires<[HasVFP4]>; def VFNMSD : ADbI<0b11101, 0b01, 0, 0, (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm), @@ -1173,6 +1197,22 @@ (VFNMSS SPR:$dstin, SPR:$a, SPR:$b)>, Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>; +// Match @llvm.fma.* intrinsics +// (fneg (fma (fneg x), y, z)) -> (vnfms x, y, z) +def : Pat<(fneg (f64 (fma (fneg DPR:$Ddin), DPR:$Dn, DPR:$Dm))), + (VFNMSD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>, + Requires<[HasVFP4]>; +def : Pat<(fneg (f32 (fma (fneg SPR:$Sdin), SPR:$Sn, SPR:$Sm))), + (VFNMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>, + Requires<[HasVFP4]>; +// (fma x, (fneg y), z) -> (vnfms x, y, z) +def : Pat<(f64 (fma DPR:$Ddin, (fneg DPR:$Dn), DPR:$Dm)), + (VFNMSD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>, + Requires<[HasVFP4]>; +def : Pat<(f32 (fma SPR:$Sdin, (fneg SPR:$Sn), SPR:$Sm)), + (VFNMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>, + Requires<[HasVFP4]>; + //===----------------------------------------------------------------------===// // FP Conditional moves. // Modified: llvm/trunk/test/CodeGen/ARM/fusedMAC.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/fusedMAC.ll?rev=154484&r1=154483&r2=154484&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/ARM/fusedMAC.ll (original) +++ llvm/trunk/test/CodeGen/ARM/fusedMAC.ll Wed Apr 11 01:59:47 2012 @@ -103,43 +103,81 @@ entry: ; CHECK: test_fma_f32 ; CHECK: vfma.f32 - %call = tail call float @llvm.fma.f32(float %a, float %b, float %c) nounwind readnone - ret float %call + %tmp1 = tail call float @llvm.fma.f32(float %a, float %b, float %c) nounwind readnone + ret float %tmp1 } define double @test_fma_f64(double %a, double %b, double %c) nounwind readnone ssp { entry: ; CHECK: test_fma_f64 ; CHECK: vfma.f64 - %call = tail call double @llvm.fma.f64(double %a, double %b, double %c) nounwind readnone - ret double %call + %tmp1 = tail call double @llvm.fma.f64(double %a, double %b, double %c) nounwind readnone + ret double %tmp1 } define <2 x float> @test_fma_v2f32(<2 x float> %a, <2 x float> %b, <2 x float> %c) nounwind readnone ssp { entry: ; CHECK: test_fma_v2f32 ; CHECK: vfma.f32 - %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %a, <2 x float> %b, <2 x float> %c) nounwind - ret <2 x float> %0 + %tmp1 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %a, <2 x float> %b, <2 x float> %c) nounwind + ret <2 x float> %tmp1 } -define float @test_fnma_f32(float %a, float %b, float %c) nounwind readnone ssp { +define double @test_fms_f64(double %a, double %b, double %c) nounwind readnone ssp { entry: -; CHECK: test_fnma_f32 -; CHECK: vfnma.f32 - %call = tail call float @llvm.fma.f32(float %a, float %b, float %c) nounwind readnone - %tmp1 = fsub float -0.0, %call - %tmp2 = fsub float %tmp1, %c - ret float %tmp2 +; CHECK: test_fms_f64 +; CHECK: vfms.f64 + %tmp1 = fsub double -0.0, %a + %tmp2 = tail call double @llvm.fma.f64(double %tmp1, double %b, double %c) nounwind readnone + ret double %tmp2 +} + +define double @test_fms_f64_2(double %a, double %b, double %c) nounwind readnone ssp { +entry: +; CHECK: test_fms_f64_2 +; CHECK: vfms.f64 + %tmp1 = fsub double -0.0, %b + %tmp2 = tail call double @llvm.fma.f64(double %a, double %tmp1, double %c) nounwind readnone + %tmp3 = fsub double -0.0, %tmp2 + ret double %tmp3 +} + +define double @test_fnms_f64(double %a, double %b, double %c) nounwind readnone ssp { +entry: +; CHECK: test_fnms_f64 +; CHECK: vfnms.f64 + %tmp1 = fsub double -0.0, %a + %tmp2 = tail call double @llvm.fma.f64(double %tmp1, double %b, double %c) nounwind readnone + %tmp3 = fsub double -0.0, %tmp2 + ret double %tmp3 +} + +define double @test_fnms_f64_2(double %a, double %b, double %c) nounwind readnone ssp { +entry: +; CHECK: test_fnms_f64_2 +; CHECK: vfnms.f64 + %tmp1 = fsub double -0.0, %b + %tmp2 = tail call double @llvm.fma.f64(double %a, double %tmp1, double %c) nounwind readnone + ret double %tmp2 } define double @test_fnma_f64(double %a, double %b, double %c) nounwind readnone ssp { entry: ; CHECK: test_fnma_f64 ; CHECK: vfnma.f64 - %call = tail call double @llvm.fma.f64(double %a, double %b, double %c) nounwind readnone - %tmp = fsub double -0.0, %call - ret double %tmp + %tmp1 = tail call double @llvm.fma.f64(double %a, double %b, double %c) nounwind readnone + %tmp2 = fsub double -0.0, %tmp1 + ret double %tmp2 +} + +define double @test_fnma_f64_2(double %a, double %b, double %c) nounwind readnone ssp { +entry: +; CHECK: test_fnma_f64_2 +; CHECK: vfnma.f64 + %tmp1 = fsub double -0.0, %a + %tmp2 = fsub double -0.0, %c + %tmp3 = tail call double @llvm.fma.f64(double %tmp1, double %b, double %tmp2) nounwind readnone + ret double %tmp3 } declare float @llvm.fma.f32(float, float, float) nounwind readnone From etherzhhb at gmail.com Wed Apr 11 02:43:14 2012 From: etherzhhb at gmail.com (Hongbin Zheng) Date: Wed, 11 Apr 2012 07:43:14 -0000 Subject: [llvm-commits] [polly] r154486 - /polly/trunk/Makefile Message-ID: <20120411074314.64E922A6C066@llvm.org> Author: ether Date: Wed Apr 11 02:43:13 2012 New Revision: 154486 URL: http://llvm.org/viewvc/llvm-project?rev=154486&view=rev Log: Revert "Fix a bug introduced by r153739: We are not able to provide the correct" This reverts commit 2c6bdbf972ac966498489d30a33bfd252df9107d. Modified: polly/trunk/Makefile Modified: polly/trunk/Makefile URL: http://llvm.org/viewvc/llvm-project/polly/trunk/Makefile?rev=154486&r1=154485&r2=154486&view=diff ============================================================================== --- polly/trunk/Makefile (original) +++ polly/trunk/Makefile Wed Apr 11 02:43:13 2012 @@ -8,7 +8,7 @@ # Indicates our relative path to the top of the project's root directory. # LEVEL = . -DIRS = lib tools +DIRS = lib test tools EXTRA_DIST = include # From etherzhhb at gmail.com Wed Apr 11 02:43:24 2012 From: etherzhhb at gmail.com (Hongbin Zheng) Date: Wed, 11 Apr 2012 07:43:24 -0000 Subject: [llvm-commits] [polly] r154487 - /polly/trunk/test/Makefile Message-ID: <20120411074325.7B1B72A6C066@llvm.org> Author: ether Date: Wed Apr 11 02:43:24 2012 New Revision: 154487 URL: http://llvm.org/viewvc/llvm-project?rev=154487&view=rev Log: Revert "Make the "all" target depend on polly-test, so that users can run regression" This reverts commit 97bd8d50881000c11b65b0e033996ec5f57bcd15. Modified: polly/trunk/test/Makefile Modified: polly/trunk/test/Makefile URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/Makefile?rev=154487&r1=154486&r2=154487&view=diff ============================================================================== --- polly/trunk/test/Makefile (original) +++ polly/trunk/test/Makefile Wed Apr 11 02:43:24 2012 @@ -31,7 +31,6 @@ LIT_ARGS += "--vg" endif -all:: polly-test polly-test:: lit.site.cfg polly-lib @ echo '--- Running polly tests for $(TARGET_TRIPLE) ---' From etherzhhb at gmail.com Wed Apr 11 02:44:56 2012 From: etherzhhb at gmail.com (Hongbin Zheng) Date: Wed, 11 Apr 2012 15:44:56 +0800 Subject: [llvm-commits] [polly] r153739 - /polly/trunk/test/Makefile In-Reply-To: References: <20120330092717.037AF2A6C066@llvm.org> Message-ID: Hi Sebastian, Done, i think i should find another way to ensure the LLVMPolly.so is up to date before the regression tests run. best regards ether On Wed, Apr 11, 2012 at 1:05 AM, Sebastian Pop wrote: > On Thu, Apr 5, 2012 at 10:58 PM, Hongbin Zheng wrote: >> Hi Sebastian, >> >> Fixed in r154162, sorry for this. >> >> best regards >> ether >> On Fri, Apr 6, 2012 at 5:22 AM, Sebastian Pop wrote: >>> On Fri, Mar 30, 2012 at 4:27 AM, Hongbin Zheng wrote: >>>> Author: ether >>>> Date: Fri Mar 30 04:27:16 2012 >>>> New Revision: 153739 >>>> >>>> URL: http://llvm.org/viewvc/llvm-project?rev=153739&view=rev >>>> Log: >>>> Make the "all" target depend on polly-test, so that users can run regression >>>> ?tests by simply typing "make -C tools/polly/test", like llvm's regression >>>> ?tests. >>>> >>>> Modified: >>>> ? ?polly/trunk/test/Makefile >>>> >>>> Modified: polly/trunk/test/Makefile >>>> URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/Makefile?rev=153739&r1=153738&r2=153739&view=diff >>>> ============================================================================== >>>> --- polly/trunk/test/Makefile (original) >>>> +++ polly/trunk/test/Makefile Fri Mar 30 04:27:16 2012 >>>> @@ -31,6 +31,7 @@ >>>> ? LIT_ARGS += "--vg" >>>> ?endif >>>> >>>> +all:: polly-test >>> >>> Several of my automatic tests have failed because of this change: when >>> make has finished building the polly/lib, it automatically goes to >>> make all in the test dir, and because of this change, it starts >>> testing polly, even though the rest of llvm and clang have not >>> finished to be built. >>> >>> Please fix this by not including the test dir in the polly top level >>> Makefile DIRS variable: >> Or do you want to revert this commit? > > Yes, let's revert this commit. > >>> >>> diff --git a/Makefile b/Makefile >>> index 2ad5b36..665c13a 100644 >>> --- a/Makefile >>> +++ b/Makefile >>> @@ -8,7 +8,7 @@ >>> ?# Indicates our relative path to the top of the project's root directory. >>> ?# >>> ?LEVEL = . >>> -DIRS = lib test tools >>> +DIRS = lib tools >>> ?EXTRA_DIST = include > > This change removed the test dir from the build dir, making it impossible to > test Polly with a make polly-test -C tools/polly/test/ > > What about reverting both this and your previous change? > > Thanks, > Sebastian > -- > Qualcomm Innovation Center, Inc is a member of Code Aurora Forum From baldrick at free.fr Wed Apr 11 02:48:21 2012 From: baldrick at free.fr (Duncan Sands) Date: Wed, 11 Apr 2012 09:48:21 +0200 Subject: [llvm-commits] Patch for llvm-c to expose targetmachine & allow emit obj/asm files In-Reply-To: <4F85212D.1070100@remobjects.com> References: <4F7EF0D1.4020807@remobjects.com> <4F85212D.1070100@remobjects.com> Message-ID: <4F853745.5080709@free.fr> Hi Carlo, > --- include/llvm-c/TargetMachine.h (revision 0) > +++ include/llvm-c/TargetMachine.h (working copy) ... > +|* This header declares the C interface to libLLVMBitReader.a, which *| > +|* implements input of the LLVM bitcode format. *| ^ This description is not correct. ... > +/*===-- Target Machine -------------------------------------------------------===*/ > +/** Creates a new llvm::TargetMachine. See llvm::Target::createTargetMachine */ > +LLVMTargetMachineRef LLVMCreateTargetMachine(LLVMTargetRef T, char *Triple, char *CPU, char *Features, LLVMCodeGenOptLevel Level, LLVMRelocMode Reloc, LLVMCodeModel CodeModel); This and a bunch of other lines are too long: maximum line length is 80 characters. ... > +/** Returns the triple used creating this target machine. See llvm::TargetMachine::getTriple. The result needs to be Disposed with LLVMDisposeMessage. */ No need for a capital D on Disposed. ... > +/** Emits an asm or object file for the given module to the filename. This wraps several c++ only classes (among them a file stream). Returns any error in ErrorMessage */ Does ErrorMessage need to be disposed of with LLVMDisposeMessage? > --- lib/Target/TargetMachineC.cpp (revision 0) > +++ lib/Target/TargetMachineC.cpp (working copy) ... > +LLVMTargetMachineRef LLVMCreateTargetMachine(LLVMTargetRef T, char* Triple, char* CPU, char* Features, LLVMCodeGenOptLevel Level, LLVMRelocMode Reloc, LLVMCodeModel CodeModel) { ^ More lines that are too long. Also, there seems to be trailing whitespace here and there. > + Reloc::Model RM = static_cast(Reloc); > + CodeModel::Model CM = static_cast(CodeModel); > + CodeGenOpt::Level OL = static_cast(Level); ^ You shouldn't be using static_cast on these enums: if the LLVM enum is reordered or changed one day this needs to still work without reordering the llvm-c enum. You need to use switches: switch (Reloc) { case CPP_XYZ: val = C_XYZ; break; ... } etc. > +LLVMBool LLVMTargetMachineEmitToFile(LLVMTargetMachineRef T, LLVMModuleRef M, char* Filename, LLVMCodeGenFileType codegen, char** ErrorMessage) { > +// Inspired by the D binding If you are grateful to D then mention it in the commit message not here. Ciao, Duncan. From baldrick at free.fr Wed Apr 11 03:13:47 2012 From: baldrick at free.fr (Duncan Sands) Date: Wed, 11 Apr 2012 08:13:47 -0000 Subject: [llvm-commits] [llvm] r154488 - /llvm/trunk/include/llvm/Target/TargetOptions.h Message-ID: <20120411081347.C18852A6C065@llvm.org> Author: baldrick Date: Wed Apr 11 03:13:47 2012 New Revision: 154488 URL: http://llvm.org/viewvc/llvm-project?rev=154488&view=rev Log: Comment typo fix. Modified: llvm/trunk/include/llvm/Target/TargetOptions.h Modified: llvm/trunk/include/llvm/Target/TargetOptions.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Target/TargetOptions.h?rev=154488&r1=154487&r2=154488&view=diff ============================================================================== --- llvm/trunk/include/llvm/Target/TargetOptions.h (original) +++ llvm/trunk/include/llvm/Target/TargetOptions.h Wed Apr 11 03:13:47 2012 @@ -24,7 +24,7 @@ // Possible float ABI settings. Used with FloatABIType in TargetOptions.h. namespace FloatABI { enum ABIType { - Default, // Target-specific (either soft of hard depending on triple, etc). + Default, // Target-specific (either soft or hard depending on triple, etc). Soft, // Soft float. Hard // Hard float. }; From baldrick at free.fr Wed Apr 11 03:20:12 2012 From: baldrick at free.fr (Duncan Sands) Date: Wed, 11 Apr 2012 08:20:12 -0000 Subject: [llvm-commits] [dragonegg] r154489 - /dragonegg/trunk/src/Backend.cpp Message-ID: <20120411082012.3ADC32A6C065@llvm.org> Author: baldrick Date: Wed Apr 11 03:20:11 2012 New Revision: 154489 URL: http://llvm.org/viewvc/llvm-project?rev=154489&view=rev Log: Hook up the -fpie/-fPIE flags. Modified: dragonegg/trunk/src/Backend.cpp Modified: dragonegg/trunk/src/Backend.cpp URL: http://llvm.org/viewvc/llvm-project/dragonegg/trunk/src/Backend.cpp?rev=154489&r1=154488&r2=154489&view=diff ============================================================================== --- dragonegg/trunk/src/Backend.cpp (original) +++ dragonegg/trunk/src/Backend.cpp Wed Apr 11 03:20:11 2012 @@ -447,6 +447,7 @@ Options.NoInfsFPMath = flag_finite_math_only; Options.NoNaNsFPMath = flag_finite_math_only; Options.NoZerosInBSS = !flag_zero_initialized_in_bss; + Options.PositionIndependentExecutable = flag_pie; #if (GCC_MINOR > 5) Options.EnableSegmentedStacks = flag_split_stack; #endif From ck at remobjects.com Wed Apr 11 03:25:15 2012 From: ck at remobjects.com (Carlo Kok) Date: Wed, 11 Apr 2012 10:25:15 +0200 Subject: [llvm-commits] Patch for llvm-c to expose targetmachine & allow emit obj/asm files In-Reply-To: <4F853745.5080709@free.fr> References: <4F7EF0D1.4020807@remobjects.com> <4F85212D.1070100@remobjects.com> <4F853745.5080709@free.fr> Message-ID: <4F853FEB.3040603@remobjects.com> Op 4/11/2012 9:48 AM, Duncan Sands schreef: > Hi Carlo, > >> --- include/llvm-c/TargetMachine.h (revision 0) >> +++ include/llvm-c/TargetMachine.h (working copy) > ... > >> +|* This header declares the C interface to libLLVMBitReader.a, which *| >> +|* implements input of the LLVM bitcode format. *| > > ^ This description is not correct. Thanks Duncan, all addressed in this updated latest patch. Updated description: Attached is a fairly simple patch for llvm-c that exposes: * Target class * TargetMachine class To allow for emitting binary and assembly. Emitting was inspired by but not based on the D llvm bindings. -------------- next part -------------- Index: include/llvm-c/TargetMachine.h =================================================================== --- include/llvm-c/TargetMachine.h (revision 0) +++ include/llvm-c/TargetMachine.h (working copy) @@ -0,0 +1,142 @@ +/*===-- llvm-c/TargetMachine.h - Target Machine Library C Interface - C++ -*-=*\ +|* *| +|* The LLVM Compiler Infrastructure *| +|* *| +|* This file is distributed under the University of Illinois Open Source *| +|* License. See LICENSE.TXT for details. *| +|* *| +|*===----------------------------------------------------------------------===*| +|* *| +|* This header declares the C interface to the Target and TargetMachine *| +|* classes, which can be used to generate assembly or object files. *| +|* *| +|* Many exotic languages can interoperate with C code but have a harder time *| +|* with C++ due to name mangling. So in addition to C, this interface enables *| +|* tools written in such languages. *| +|* *| +\*===----------------------------------------------------------------------===*/ + +#ifndef LLVM_C_TARGETMACHINE_H +#define LLVM_C_TARGETMACHINE_H + +#include "llvm-c/Core.h" + +#ifdef __cplusplus +extern "C" { +#endif +typedef struct LLVMTargetMachine *LLVMTargetMachineRef; +typedef struct LLVMTarget *LLVMTargetRef; + +typedef enum { + LLVMCodeGenLevelNone, + LLVMCodeGenLevelLess, + LLVMCodeGenLevelDefault, + LLVMCodeGenLevelAggressive +} LLVMCodeGenOptLevel; + +typedef enum { + LLVMRelocDefault, + LLVMRelocStatic, + LLVMRelocPIC, + LLVMRelocDynamicNoPic +} LLVMRelocMode; + +typedef enum { + LLVMCodeModelDefault, + LLVMCodeModelJITDefault, + LLVMCodeModelSmall, + LLVMCodeModelKernel, + LLVMCodeModelMedium, + LLVMCodeModelLarge +} LLVMCodeModel; + +typedef enum { + LLVMAssemblyFile, + LLVMObjectFile +} LLVMCodeGenFileType; + +/** Returns the first llvm::Target in the registered targets list. */ +LLVMTargetRef LLVMGetFirstTarget(); +/** Returns the next llvm::Target given a previous one (or null if there's none) */ +LLVMTargetRef LLVMGetNextTarget(LLVMTargetRef T); + +/*===-- Target ------------------------------------------------------------===*/ +/** Returns the name of a target. See llvm::Target::getName */ +const char *LLVMGetTargetName(LLVMTargetRef T); + +/** Returns the description of a target. See llvm::Target::getDescription */ +const char *LLVMGetTargetDescription(LLVMTargetRef T); + +/** Returns if the target has a JIT */ +LLVMBool LLVMTargetHasJIT(LLVMTargetRef T); + +/** Returns if the target has a TargetMachine associated */ +LLVMBool LLVMTargetHasTargetMachine(LLVMTargetRef T); + +/** Returns if the target as an ASM backend (required for emitting output) */ +LLVMBool LLVMTargetHasAsmBackend(LLVMTargetRef T); + +/*===-- Target Machine ----------------------------------------------------===*/ +/** Creates a new llvm::TargetMachine. See llvm::Target::createTargetMachine */ +LLVMTargetMachineRef LLVMCreateTargetMachine(LLVMTargetRef T, char *Triple, + char *CPU, char *Features, LLVMCodeGenOptLevel Level, LLVMRelocMode Reloc, + LLVMCodeModel CodeModel); + +/** Dispose the LLVMTargetMachineRef instance generated by + LLVMCreateTargetMachine. */ +void LLVMDisposeTargetMachine(LLVMTargetMachineRef T); + +/** Returns the Target used in a TargetMachine */ +LLVMTargetRef LLVMGetTargetMachineTarget(LLVMTargetMachineRef T); + +/** Returns the triple used creating this target machine. See + llvm::TargetMachine::getTriple. The result needs to be disposed with + LLVMDisposeMessage. */ +char *LLVMGetTargetMachineTriple(LLVMTargetMachineRef T); + +/** Returns the cpu used creating this target machine. See + llvm::TargetMachine::getCPU. The result needs to be disposed with + LLVMDisposeMessage. */ +char *LLVMGetTargetMachineCPU(LLVMTargetMachineRef T); + +/** Returns the feature string used creating this target machine. See + llvm::TargetMachine::getFeatureString. The result needs to be disposed with + LLVMDisposeMessage. */ +char *LLVMGetTargetMachineFeatureString(LLVMTargetMachineRef T); + +/** Returns the llvm::TargetData used for this llvm:TargetMachine. */ +LLVMTargetDataRef LLVMGetTargetMachineData(LLVMTargetMachineRef T); + +/** Emits an asm or object file for the given module to the filename. This + wraps several c++ only classes (among them a file stream). Returns any + error in ErrorMessage. Use LLVMDisposeMessage to dispose the message. */ +LLVMBool LLVMTargetMachineEmitToFile(LLVMTargetMachineRef T, LLVMModuleRef M, + char *Filename, LLVMCodeGenFileType codegen, char **ErrorMessage); + + + + +#ifdef __cplusplus +} + +namespace llvm { + class TargetMachine; + class Target; + + inline TargetMachine *unwrap(LLVMTargetMachineRef P) { + return reinterpret_cast(P); + } + inline Target *unwrap(LLVMTargetRef P) { + return reinterpret_cast(P); + } + inline LLVMTargetMachineRef wrap(const TargetMachine *P) { + return reinterpret_cast( + const_cast(P)); + } + inline LLVMTargetRef wrap(const Target * P) { + return reinterpret_cast(const_cast(P)); + } +} +#endif + +#endif Index: lib/Target/CMakeLists.txt =================================================================== --- lib/Target/CMakeLists.txt (revision 153955) +++ lib/Target/CMakeLists.txt (working copy) @@ -9,6 +9,7 @@ TargetLibraryInfo.cpp TargetLoweringObjectFile.cpp TargetMachine.cpp + TargetMachineC.cpp TargetRegisterInfo.cpp TargetSubtargetInfo.cpp ) Index: lib/Target/TargetMachineC.cpp =================================================================== --- lib/Target/TargetMachineC.cpp (revision 0) +++ lib/Target/TargetMachineC.cpp (working copy) @@ -0,0 +1,196 @@ +//===-- TargetMachine.cpp -------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the LLVM-C part of TargetMachine.h +// +//===----------------------------------------------------------------------===// + +#include "llvm-c/Core.h" +#include "llvm-c/Target.h" +#include "llvm-c/TargetMachine.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/CodeGen.h" +#include "llvm/Support/FormattedStream.h" +#include "llvm/Module.h" +#include "llvm/PassManager.h" +#include +#include +#include + +using namespace llvm; + + +LLVMTargetRef LLVMGetFirstTarget() { + const Target* target = &*TargetRegistry::begin(); + return wrap(target); +} +LLVMTargetRef LLVMGetNextTarget(LLVMTargetRef T) { + return wrap(unwrap(T)->getNext()); +} + +const char * LLVMGetTargetName(LLVMTargetRef T) { + return unwrap(T)->getName(); +} + +const char * LLVMGetTargetDescription(LLVMTargetRef T) { + return unwrap(T)->getShortDescription(); +} + +LLVMBool LLVMTargetHasJIT(LLVMTargetRef T) { + return unwrap(T)->hasJIT(); +} + +LLVMBool LLVMTargetHasTargetMachine(LLVMTargetRef T) { + return unwrap(T)->hasTargetMachine(); +} + +LLVMBool LLVMTargetHasAsmBackend(LLVMTargetRef T) { + return unwrap(T)->hasMCAsmBackend(); +} + +LLVMTargetMachineRef LLVMCreateTargetMachine(LLVMTargetRef T, char* Triple, + char* CPU, char* Features, LLVMCodeGenOptLevel Level, LLVMRelocMode Reloc, + LLVMCodeModel CodeModel) { + Reloc::Model RM; + switch (Reloc){ + case LLVMRelocStatic: + RM = Reloc::Model::Static; + break; + case LLVMRelocPIC: + RM = Reloc::Model::PIC_; + break; + case LLVMRelocDynamicNoPic: + RM = Reloc::Model::DynamicNoPIC; + break; + default: + RM = Reloc::Model::Default; + break; + } + + CodeModel::Model CM; + switch (CodeModel) { + case LLVMCodeModelJITDefault: + CM = CodeModel::Model::JITDefault; + break; + case LLVMCodeModelSmall: + CM = CodeModel::Model::Small; + break; + case LLVMCodeModelKernel: + CM = CodeModel::Model::Kernel; + break; + case LLVMCodeModelMedium: + CM = CodeModel::Model::Medium; + break; + case LLVMCodeModelLarge: + CM = CodeModel::Model::Large; + break; + default: + CM = CodeModel::Model::Default; + break; + } + CodeGenOpt::Level OL; + + switch (Level) { + case LLVMCodeGenLevelNone: + OL = CodeGenOpt::Level::None; + break; + case LLVMCodeGenLevelLess: + OL = CodeGenOpt::Level::Less; + break; + case LLVMCodeGenLevelAggressive: + OL = CodeGenOpt::Level::Aggressive; + break; + default: + OL = CodeGenOpt::Level::Default; + break; + } + + TargetOptions opt; + return wrap(unwrap(T)->createTargetMachine(Triple, CPU, Features, opt, RM, + CM, OL)); +} + + +void LLVMDisposeTargetMachine(LLVMTargetMachineRef T) { + delete unwrap(T); +} + +LLVMTargetRef LLVMGetTargetMachineTarget(LLVMTargetMachineRef T) { + const Target* target = &(unwrap(T)->getTarget()); + return wrap(target); +} + +char* LLVMGetTargetMachineTriple(LLVMTargetMachineRef T) { + std::string StringRep = unwrap(T)->getTargetTriple(); + return strdup(StringRep.c_str()); +} + +char* LLVMGetTargetMachineCPU(LLVMTargetMachineRef T) { + std::string StringRep = unwrap(T)->getTargetCPU(); + return strdup(StringRep.c_str()); +} + +char* LLVMGetTargetMachineFeatureString(LLVMTargetMachineRef T) { + std::string StringRep = unwrap(T)->getTargetFeatureString(); + return strdup(StringRep.c_str()); +} + +LLVMTargetDataRef LLVMGetTargetMachineData(LLVMTargetMachineRef T) { + return wrap(unwrap(T)->getTargetData()); +} + +LLVMBool LLVMTargetMachineEmitToFile(LLVMTargetMachineRef T, LLVMModuleRef M, + char* Filename, LLVMCodeGenFileType codegen, char** ErrorMessage) { + TargetMachine* TM = unwrap(T); + Module* Mod = unwrap(M); + + PassManager pass; + + std::string error; + + const TargetData* td = TM->getTargetData(); + + if (!td) { + error = "No TargetData in TargetMachine"; + *ErrorMessage = strdup(error.c_str()); + return true; + } + pass.add(new TargetData(*td)); + + TargetMachine::CodeGenFileType ft; + switch (codegen) { + case LLVMAssemblyFile: + ft = TargetMachine::CodeGenFileType::CGFT_AssemblyFile; + break; + default: + ft = TargetMachine::CodeGenFileType::CGFT_ObjectFile; + break; + } + raw_fd_ostream dest(Filename, error, raw_fd_ostream::F_Binary); + formatted_raw_ostream destf(dest); + if (!error.empty()) { + *ErrorMessage = strdup(error.c_str()); + return true; + } + + if (TM->addPassesToEmitFile(pass, destf, ft)) { + error = "No TargetData in TargetMachine"; + *ErrorMessage = strdup(error.c_str()); + return true; + } + + pass.run(*Mod); + + destf.flush(); + dest.flush(); + return false; +} From nadav.rotem at intel.com Wed Apr 11 03:26:11 2012 From: nadav.rotem at intel.com (Nadav Rotem) Date: Wed, 11 Apr 2012 08:26:11 -0000 Subject: [llvm-commits] [llvm] r154490 - /llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Message-ID: <20120411082611.F01AB2A6C065@llvm.org> Author: nadav Date: Wed Apr 11 03:26:11 2012 New Revision: 154490 URL: http://llvm.org/viewvc/llvm-project?rev=154490&view=rev Log: Reapply 154397. Original message: Fix a dagcombine optimization which assumes that the vsetcc result type is always of the same size as the compared values. This is ture for SSE/AVX/NEON but not for all targets. Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=154490&r1=154489&r2=154490&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original) +++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Wed Apr 11 03:26:11 2012 @@ -4354,12 +4354,17 @@ // Only do this before legalize for now. if (VT.isVector() && !LegalOperations) { EVT N0VT = N0.getOperand(0).getValueType(); - // We know that the # elements of the results is the same as the - // # elements of the compare (and the # elements of the compare result - // for that matter). Check to see that they are the same size. If so, - // we know that the element size of the sext'd result matches the - // element size of the compare operands. - if (VT.getSizeInBits() == N0VT.getSizeInBits()) + // On some architectures (such as SSE/NEON/etc) the SETCC result type is + // of the same size as the compared operands. Only optimize sext(setcc()) + // if this is the case. + EVT SVT = TLI.getSetCCResultType(N0VT); + + // We know that the # elements of the results is the same as the + // # elements of the compare (and the # elements of the compare result + // for that matter). Check to see that they are the same size. If so, + // we know that the element size of the sext'd result matches the + // element size of the compare operands. + if (VT.getSizeInBits() == SVT.getSizeInBits()) return DAG.getSetCC(N->getDebugLoc(), VT, N0.getOperand(0), N0.getOperand(1), cast(N0.getOperand(2))->get()); @@ -4373,11 +4378,13 @@ EVT MatchingVectorType = EVT::getVectorVT(*DAG.getContext(), MatchingElementType, N0VT.getVectorNumElements()); - SDValue VsetCC = - DAG.getSetCC(N->getDebugLoc(), MatchingVectorType, N0.getOperand(0), - N0.getOperand(1), - cast(N0.getOperand(2))->get()); - return DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT); + + if (SVT == MatchingVectorType) { + SDValue VsetCC = DAG.getSetCC(N->getDebugLoc(), MatchingVectorType, + N0.getOperand(0), N0.getOperand(1), + cast(N0.getOperand(2))->get()); + return DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT); + } } } From baldrick at free.fr Wed Apr 11 03:54:30 2012 From: baldrick at free.fr (Duncan Sands) Date: Wed, 11 Apr 2012 10:54:30 +0200 Subject: [llvm-commits] Patch for llvm-c to expose targetmachine & allow emit obj/asm files In-Reply-To: <4F853FEB.3040603@remobjects.com> References: <4F7EF0D1.4020807@remobjects.com> <4F85212D.1070100@remobjects.com> <4F853745.5080709@free.fr> <4F853FEB.3040603@remobjects.com> Message-ID: <4F8546C6.5050704@free.fr> Hi Carlo, it fails to compile (gcc-4.6): TargetMachineC.cpp:66:19: error: ?llvm::Reloc::Model? is not a class or namespace Ciao, Duncan. From baldrick at free.fr Wed Apr 11 04:17:48 2012 From: baldrick at free.fr (Duncan Sands) Date: Wed, 11 Apr 2012 09:17:48 -0000 Subject: [llvm-commits] [dragonegg] r154491 - /dragonegg/trunk/TODO Message-ID: <20120411091748.9209B2A6C065@llvm.org> Author: baldrick Date: Wed Apr 11 04:17:48 2012 New Revision: 154491 URL: http://llvm.org/viewvc/llvm-project?rev=154491&view=rev Log: Decent type based alias analysis info is now generated, however more could be done. Add a note about this. Modified: dragonegg/trunk/TODO Modified: dragonegg/trunk/TODO URL: http://llvm.org/viewvc/llvm-project/dragonegg/trunk/TODO?rev=154491&r1=154490&r2=154491&view=diff ============================================================================== --- dragonegg/trunk/TODO (original) +++ dragonegg/trunk/TODO Wed Apr 11 04:17:48 2012 @@ -103,4 +103,6 @@ Add support for non-temporal stores. -Generate LLVM type based alias analysis information from gcc's. +Add type based alias analysis tags in more cases. Extend LLVM's tbaa +representation so it can represent a DAG and generate tags for struct +types too. From ck at remobjects.com Wed Apr 11 04:53:41 2012 From: ck at remobjects.com (Carlo Kok) Date: Wed, 11 Apr 2012 11:53:41 +0200 Subject: [llvm-commits] Patch for llvm-c to expose targetmachine & allow emit obj/asm files In-Reply-To: <4F8546C6.5050704@free.fr> References: <4F7EF0D1.4020807@remobjects.com> <4F85212D.1070100@remobjects.com> <4F853745.5080709@free.fr> <4F853FEB.3040603@remobjects.com> <4F8546C6.5050704@free.fr> Message-ID: <4F8554A5.2070608@remobjects.com> Op 4/11/2012 10:54 AM, Duncan Sands schreef: > Hi Carlo, it fails to compile (gcc-4.6): > > TargetMachineC.cpp:66:19: error: ?llvm::Reloc::Model? is not a class or namespace > Hi, Sorry about that, seems MSVC allows more than GCC. Fixed in attached. tested with mingw/gcc too now. -- Carlo Kok -------------- next part -------------- Index: include/llvm-c/TargetMachine.h =================================================================== --- include/llvm-c/TargetMachine.h (revision 0) +++ include/llvm-c/TargetMachine.h (working copy) @@ -0,0 +1,142 @@ +/*===-- llvm-c/TargetMachine.h - Target Machine Library C Interface - C++ -*-=*\ +|* *| +|* The LLVM Compiler Infrastructure *| +|* *| +|* This file is distributed under the University of Illinois Open Source *| +|* License. See LICENSE.TXT for details. *| +|* *| +|*===----------------------------------------------------------------------===*| +|* *| +|* This header declares the C interface to the Target and TargetMachine *| +|* classes, which can be used to generate assembly or object files. *| +|* *| +|* Many exotic languages can interoperate with C code but have a harder time *| +|* with C++ due to name mangling. So in addition to C, this interface enables *| +|* tools written in such languages. *| +|* *| +\*===----------------------------------------------------------------------===*/ + +#ifndef LLVM_C_TARGETMACHINE_H +#define LLVM_C_TARGETMACHINE_H + +#include "llvm-c/Core.h" + +#ifdef __cplusplus +extern "C" { +#endif +typedef struct LLVMTargetMachine *LLVMTargetMachineRef; +typedef struct LLVMTarget *LLVMTargetRef; + +typedef enum { + LLVMCodeGenLevelNone, + LLVMCodeGenLevelLess, + LLVMCodeGenLevelDefault, + LLVMCodeGenLevelAggressive +} LLVMCodeGenOptLevel; + +typedef enum { + LLVMRelocDefault, + LLVMRelocStatic, + LLVMRelocPIC, + LLVMRelocDynamicNoPic +} LLVMRelocMode; + +typedef enum { + LLVMCodeModelDefault, + LLVMCodeModelJITDefault, + LLVMCodeModelSmall, + LLVMCodeModelKernel, + LLVMCodeModelMedium, + LLVMCodeModelLarge +} LLVMCodeModel; + +typedef enum { + LLVMAssemblyFile, + LLVMObjectFile +} LLVMCodeGenFileType; + +/** Returns the first llvm::Target in the registered targets list. */ +LLVMTargetRef LLVMGetFirstTarget(); +/** Returns the next llvm::Target given a previous one (or null if there's none) */ +LLVMTargetRef LLVMGetNextTarget(LLVMTargetRef T); + +/*===-- Target ------------------------------------------------------------===*/ +/** Returns the name of a target. See llvm::Target::getName */ +const char *LLVMGetTargetName(LLVMTargetRef T); + +/** Returns the description of a target. See llvm::Target::getDescription */ +const char *LLVMGetTargetDescription(LLVMTargetRef T); + +/** Returns if the target has a JIT */ +LLVMBool LLVMTargetHasJIT(LLVMTargetRef T); + +/** Returns if the target has a TargetMachine associated */ +LLVMBool LLVMTargetHasTargetMachine(LLVMTargetRef T); + +/** Returns if the target as an ASM backend (required for emitting output) */ +LLVMBool LLVMTargetHasAsmBackend(LLVMTargetRef T); + +/*===-- Target Machine ----------------------------------------------------===*/ +/** Creates a new llvm::TargetMachine. See llvm::Target::createTargetMachine */ +LLVMTargetMachineRef LLVMCreateTargetMachine(LLVMTargetRef T, char *Triple, + char *CPU, char *Features, LLVMCodeGenOptLevel Level, LLVMRelocMode Reloc, + LLVMCodeModel CodeModel); + +/** Dispose the LLVMTargetMachineRef instance generated by + LLVMCreateTargetMachine. */ +void LLVMDisposeTargetMachine(LLVMTargetMachineRef T); + +/** Returns the Target used in a TargetMachine */ +LLVMTargetRef LLVMGetTargetMachineTarget(LLVMTargetMachineRef T); + +/** Returns the triple used creating this target machine. See + llvm::TargetMachine::getTriple. The result needs to be disposed with + LLVMDisposeMessage. */ +char *LLVMGetTargetMachineTriple(LLVMTargetMachineRef T); + +/** Returns the cpu used creating this target machine. See + llvm::TargetMachine::getCPU. The result needs to be disposed with + LLVMDisposeMessage. */ +char *LLVMGetTargetMachineCPU(LLVMTargetMachineRef T); + +/** Returns the feature string used creating this target machine. See + llvm::TargetMachine::getFeatureString. The result needs to be disposed with + LLVMDisposeMessage. */ +char *LLVMGetTargetMachineFeatureString(LLVMTargetMachineRef T); + +/** Returns the llvm::TargetData used for this llvm:TargetMachine. */ +LLVMTargetDataRef LLVMGetTargetMachineData(LLVMTargetMachineRef T); + +/** Emits an asm or object file for the given module to the filename. This + wraps several c++ only classes (among them a file stream). Returns any + error in ErrorMessage. Use LLVMDisposeMessage to dispose the message. */ +LLVMBool LLVMTargetMachineEmitToFile(LLVMTargetMachineRef T, LLVMModuleRef M, + char *Filename, LLVMCodeGenFileType codegen, char **ErrorMessage); + + + + +#ifdef __cplusplus +} + +namespace llvm { + class TargetMachine; + class Target; + + inline TargetMachine *unwrap(LLVMTargetMachineRef P) { + return reinterpret_cast(P); + } + inline Target *unwrap(LLVMTargetRef P) { + return reinterpret_cast(P); + } + inline LLVMTargetMachineRef wrap(const TargetMachine *P) { + return reinterpret_cast( + const_cast(P)); + } + inline LLVMTargetRef wrap(const Target * P) { + return reinterpret_cast(const_cast(P)); + } +} +#endif + +#endif Index: lib/Target/CMakeLists.txt =================================================================== --- lib/Target/CMakeLists.txt (revision 153955) +++ lib/Target/CMakeLists.txt (working copy) @@ -9,6 +9,7 @@ TargetLibraryInfo.cpp TargetLoweringObjectFile.cpp TargetMachine.cpp + TargetMachineC.cpp TargetRegisterInfo.cpp TargetSubtargetInfo.cpp ) Index: lib/Target/TargetMachineC.cpp =================================================================== --- lib/Target/TargetMachineC.cpp (revision 0) +++ lib/Target/TargetMachineC.cpp (working copy) @@ -0,0 +1,197 @@ +//===-- TargetMachine.cpp -------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the LLVM-C part of TargetMachine.h +// +//===----------------------------------------------------------------------===// + +#include "llvm-c/Core.h" +#include "llvm-c/Target.h" +#include "llvm-c/TargetMachine.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/CodeGen.h" +#include "llvm/Support/FormattedStream.h" +#include "llvm/Module.h" +#include "llvm/PassManager.h" +#include +#include +#include + +using namespace llvm; + + + +LLVMTargetRef LLVMGetFirstTarget() { + const Target* target = &*TargetRegistry::begin(); + return wrap(target); +} +LLVMTargetRef LLVMGetNextTarget(LLVMTargetRef T) { + return wrap(unwrap(T)->getNext()); +} + +const char * LLVMGetTargetName(LLVMTargetRef T) { + return unwrap(T)->getName(); +} + +const char * LLVMGetTargetDescription(LLVMTargetRef T) { + return unwrap(T)->getShortDescription(); +} + +LLVMBool LLVMTargetHasJIT(LLVMTargetRef T) { + return unwrap(T)->hasJIT(); +} + +LLVMBool LLVMTargetHasTargetMachine(LLVMTargetRef T) { + return unwrap(T)->hasTargetMachine(); +} + +LLVMBool LLVMTargetHasAsmBackend(LLVMTargetRef T) { + return unwrap(T)->hasMCAsmBackend(); +} + +LLVMTargetMachineRef LLVMCreateTargetMachine(LLVMTargetRef T, char* Triple, + char* CPU, char* Features, LLVMCodeGenOptLevel Level, LLVMRelocMode Reloc, + LLVMCodeModel CodeModel) { + Reloc::Model RM; + switch (Reloc){ + case LLVMRelocStatic: + RM = Reloc::Static; + break; + case LLVMRelocPIC: + RM = Reloc::PIC_; + break; + case LLVMRelocDynamicNoPic: + RM = Reloc::DynamicNoPIC; + break; + default: + RM = Reloc::Default; + break; + } + + CodeModel::Model CM; + switch (CodeModel) { + case LLVMCodeModelJITDefault: + CM = CodeModel::JITDefault; + break; + case LLVMCodeModelSmall: + CM = CodeModel::Small; + break; + case LLVMCodeModelKernel: + CM = CodeModel::Kernel; + break; + case LLVMCodeModelMedium: + CM = CodeModel::Medium; + break; + case LLVMCodeModelLarge: + CM = CodeModel::Large; + break; + default: + CM = CodeModel::Default; + break; + } + CodeGenOpt::Level OL; + + switch (Level) { + case LLVMCodeGenLevelNone: + OL = CodeGenOpt::None; + break; + case LLVMCodeGenLevelLess: + OL = CodeGenOpt::Less; + break; + case LLVMCodeGenLevelAggressive: + OL = CodeGenOpt::Aggressive; + break; + default: + OL = CodeGenOpt::Default; + break; + } + + TargetOptions opt; + return wrap(unwrap(T)->createTargetMachine(Triple, CPU, Features, opt, RM, + CM, OL)); +} + + +void LLVMDisposeTargetMachine(LLVMTargetMachineRef T) { + delete unwrap(T); +} + +LLVMTargetRef LLVMGetTargetMachineTarget(LLVMTargetMachineRef T) { + const Target* target = &(unwrap(T)->getTarget()); + return wrap(target); +} + +char* LLVMGetTargetMachineTriple(LLVMTargetMachineRef T) { + std::string StringRep = unwrap(T)->getTargetTriple(); + return strdup(StringRep.c_str()); +} + +char* LLVMGetTargetMachineCPU(LLVMTargetMachineRef T) { + std::string StringRep = unwrap(T)->getTargetCPU(); + return strdup(StringRep.c_str()); +} + +char* LLVMGetTargetMachineFeatureString(LLVMTargetMachineRef T) { + std::string StringRep = unwrap(T)->getTargetFeatureString(); + return strdup(StringRep.c_str()); +} + +LLVMTargetDataRef LLVMGetTargetMachineData(LLVMTargetMachineRef T) { + return wrap(unwrap(T)->getTargetData()); +} + +LLVMBool LLVMTargetMachineEmitToFile(LLVMTargetMachineRef T, LLVMModuleRef M, + char* Filename, LLVMCodeGenFileType codegen, char** ErrorMessage) { + TargetMachine* TM = unwrap(T); + Module* Mod = unwrap(M); + + PassManager pass; + + std::string error; + + const TargetData* td = TM->getTargetData(); + + if (!td) { + error = "No TargetData in TargetMachine"; + *ErrorMessage = strdup(error.c_str()); + return true; + } + pass.add(new TargetData(*td)); + + TargetMachine::CodeGenFileType ft; + switch (codegen) { + case LLVMAssemblyFile: + ft = TargetMachine::CGFT_AssemblyFile; + break; + default: + ft = TargetMachine::CGFT_ObjectFile; + break; + } + raw_fd_ostream dest(Filename, error, raw_fd_ostream::F_Binary); + formatted_raw_ostream destf(dest); + if (!error.empty()) { + *ErrorMessage = strdup(error.c_str()); + return true; + } + + if (TM->addPassesToEmitFile(pass, destf, ft)) { + error = "No TargetData in TargetMachine"; + *ErrorMessage = strdup(error.c_str()); + return true; + } + + pass.run(*Mod); + + destf.flush(); + dest.flush(); + return false; +} From chandlerc at gmail.com Wed Apr 11 05:15:10 2012 From: chandlerc at gmail.com (Chandler Carruth) Date: Wed, 11 Apr 2012 10:15:10 -0000 Subject: [llvm-commits] [llvm] r154492 - in /llvm/trunk/lib: Analysis/InlineCost.cpp Transforms/IPO/Inliner.cpp Message-ID: <20120411101510.B10842A6C065@llvm.org> Author: chandlerc Date: Wed Apr 11 05:15:10 2012 New Revision: 154492 URL: http://llvm.org/viewvc/llvm-project?rev=154492&view=rev Log: Add two statistics to help track how we are computing the inline cost. Yea, 'NumCallerCallersAnalyzed' isn't a great name, suggestions welcome. Modified: llvm/trunk/lib/Analysis/InlineCost.cpp llvm/trunk/lib/Transforms/IPO/Inliner.cpp Modified: llvm/trunk/lib/Analysis/InlineCost.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/InlineCost.cpp?rev=154492&r1=154491&r2=154492&view=diff ============================================================================== --- llvm/trunk/lib/Analysis/InlineCost.cpp (original) +++ llvm/trunk/lib/Analysis/InlineCost.cpp Wed Apr 11 05:15:10 2012 @@ -29,9 +29,12 @@ #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/Statistic.h" using namespace llvm; +STATISTIC(NumCallsAnalyzed, "Number of call sites analyzed"); + namespace { class CallAnalyzer : public InstVisitor { @@ -802,6 +805,8 @@ /// is below the computed threshold, then inlining was forcibly disabled by /// some artifact of the rountine. bool CallAnalyzer::analyzeCall(CallSite CS) { + ++NumCallsAnalyzed; + // Track whether the post-inlining function would have more than one basic // block. A single basic block is often intended for inlining. Balloon the // threshold by 50% until we pass the single-BB phase. Modified: llvm/trunk/lib/Transforms/IPO/Inliner.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/IPO/Inliner.cpp?rev=154492&r1=154491&r2=154492&view=diff ============================================================================== --- llvm/trunk/lib/Transforms/IPO/Inliner.cpp (original) +++ llvm/trunk/lib/Transforms/IPO/Inliner.cpp Wed Apr 11 05:15:10 2012 @@ -36,6 +36,11 @@ STATISTIC(NumDeleted, "Number of functions deleted because all callers found"); STATISTIC(NumMergedAllocas, "Number of allocas merged together"); +// This weirdly named statistic tracks the number of times that, when attemting +// to inline a function A into B, we analyze the callers of B in order to see +// if those would be more profitable and blocked inline steps. +STATISTIC(NumCallerCallersAnalyzed, "Number of caller-callers analyzed"); + static cl::opt InlineLimit("inline-threshold", cl::Hidden, cl::init(225), cl::ZeroOrMore, cl::desc("Control the amount of inlining to perform (default = 225)")); @@ -277,6 +282,7 @@ } InlineCost IC2 = getInlineCost(CS2); + ++NumCallerCallersAnalyzed; if (!IC2) { callerWillBeRemoved = false; continue; From baldrick at free.fr Wed Apr 11 05:25:24 2012 From: baldrick at free.fr (Duncan Sands) Date: Wed, 11 Apr 2012 10:25:24 -0000 Subject: [llvm-commits] [llvm] r154493 - in /llvm/trunk: include/llvm-c/TargetMachine.h lib/Target/CMakeLists.txt lib/Target/TargetMachineC.cpp Message-ID: <20120411102524.E4C1F2A6C065@llvm.org> Author: baldrick Date: Wed Apr 11 05:25:24 2012 New Revision: 154493 URL: http://llvm.org/viewvc/llvm-project?rev=154493&view=rev Log: Add a C binding to the Target and TargetMachine classes to allow for emitting binary and assembly. Patch by Carlo Kok. Emitting was inspired by but not based on the D llvm bindings. Added: llvm/trunk/include/llvm-c/TargetMachine.h llvm/trunk/lib/Target/TargetMachineC.cpp Modified: llvm/trunk/lib/Target/CMakeLists.txt Added: llvm/trunk/include/llvm-c/TargetMachine.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm-c/TargetMachine.h?rev=154493&view=auto ============================================================================== --- llvm/trunk/include/llvm-c/TargetMachine.h (added) +++ llvm/trunk/include/llvm-c/TargetMachine.h Wed Apr 11 05:25:24 2012 @@ -0,0 +1,142 @@ +/*===-- llvm-c/TargetMachine.h - Target Machine Library C Interface - C++ -*-=*\ +|* *| +|* The LLVM Compiler Infrastructure *| +|* *| +|* This file is distributed under the University of Illinois Open Source *| +|* License. See LICENSE.TXT for details. *| +|* *| +|*===----------------------------------------------------------------------===*| +|* *| +|* This header declares the C interface to the Target and TargetMachine *| +|* classes, which can be used to generate assembly or object files. *| +|* *| +|* Many exotic languages can interoperate with C code but have a harder time *| +|* with C++ due to name mangling. So in addition to C, this interface enables *| +|* tools written in such languages. *| +|* *| +\*===----------------------------------------------------------------------===*/ + +#ifndef LLVM_C_TARGETMACHINE_H +#define LLVM_C_TARGETMACHINE_H + +#include "llvm-c/Core.h" + +#ifdef __cplusplus +extern "C" { +#endif +typedef struct LLVMTargetMachine *LLVMTargetMachineRef; +typedef struct LLVMTarget *LLVMTargetRef; + +typedef enum { + LLVMCodeGenLevelNone, + LLVMCodeGenLevelLess, + LLVMCodeGenLevelDefault, + LLVMCodeGenLevelAggressive +} LLVMCodeGenOptLevel; + +typedef enum { + LLVMRelocDefault, + LLVMRelocStatic, + LLVMRelocPIC, + LLVMRelocDynamicNoPic +} LLVMRelocMode; + +typedef enum { + LLVMCodeModelDefault, + LLVMCodeModelJITDefault, + LLVMCodeModelSmall, + LLVMCodeModelKernel, + LLVMCodeModelMedium, + LLVMCodeModelLarge +} LLVMCodeModel; + +typedef enum { + LLVMAssemblyFile, + LLVMObjectFile +} LLVMCodeGenFileType; + +/** Returns the first llvm::Target in the registered targets list. */ +LLVMTargetRef LLVMGetFirstTarget(); +/** Returns the next llvm::Target given a previous one (or null if there's none) */ +LLVMTargetRef LLVMGetNextTarget(LLVMTargetRef T); + +/*===-- Target ------------------------------------------------------------===*/ +/** Returns the name of a target. See llvm::Target::getName */ +const char *LLVMGetTargetName(LLVMTargetRef T); + +/** Returns the description of a target. See llvm::Target::getDescription */ +const char *LLVMGetTargetDescription(LLVMTargetRef T); + +/** Returns if the target has a JIT */ +LLVMBool LLVMTargetHasJIT(LLVMTargetRef T); + +/** Returns if the target has a TargetMachine associated */ +LLVMBool LLVMTargetHasTargetMachine(LLVMTargetRef T); + +/** Returns if the target as an ASM backend (required for emitting output) */ +LLVMBool LLVMTargetHasAsmBackend(LLVMTargetRef T); + +/*===-- Target Machine ----------------------------------------------------===*/ +/** Creates a new llvm::TargetMachine. See llvm::Target::createTargetMachine */ +LLVMTargetMachineRef LLVMCreateTargetMachine(LLVMTargetRef T, char *Triple, + char *CPU, char *Features, LLVMCodeGenOptLevel Level, LLVMRelocMode Reloc, + LLVMCodeModel CodeModel); + +/** Dispose the LLVMTargetMachineRef instance generated by + LLVMCreateTargetMachine. */ +void LLVMDisposeTargetMachine(LLVMTargetMachineRef T); + +/** Returns the Target used in a TargetMachine */ +LLVMTargetRef LLVMGetTargetMachineTarget(LLVMTargetMachineRef T); + +/** Returns the triple used creating this target machine. See + llvm::TargetMachine::getTriple. The result needs to be disposed with + LLVMDisposeMessage. */ +char *LLVMGetTargetMachineTriple(LLVMTargetMachineRef T); + +/** Returns the cpu used creating this target machine. See + llvm::TargetMachine::getCPU. The result needs to be disposed with + LLVMDisposeMessage. */ +char *LLVMGetTargetMachineCPU(LLVMTargetMachineRef T); + +/** Returns the feature string used creating this target machine. See + llvm::TargetMachine::getFeatureString. The result needs to be disposed with + LLVMDisposeMessage. */ +char *LLVMGetTargetMachineFeatureString(LLVMTargetMachineRef T); + +/** Returns the llvm::TargetData used for this llvm:TargetMachine. */ +LLVMTargetDataRef LLVMGetTargetMachineData(LLVMTargetMachineRef T); + +/** Emits an asm or object file for the given module to the filename. This + wraps several c++ only classes (among them a file stream). Returns any + error in ErrorMessage. Use LLVMDisposeMessage to dispose the message. */ +LLVMBool LLVMTargetMachineEmitToFile(LLVMTargetMachineRef T, LLVMModuleRef M, + char *Filename, LLVMCodeGenFileType codegen, char **ErrorMessage); + + + + +#ifdef __cplusplus +} + +namespace llvm { + class TargetMachine; + class Target; + + inline TargetMachine *unwrap(LLVMTargetMachineRef P) { + return reinterpret_cast(P); + } + inline Target *unwrap(LLVMTargetRef P) { + return reinterpret_cast(P); + } + inline LLVMTargetMachineRef wrap(const TargetMachine *P) { + return reinterpret_cast( + const_cast(P)); + } + inline LLVMTargetRef wrap(const Target * P) { + return reinterpret_cast(const_cast(P)); + } +} +#endif + +#endif Modified: llvm/trunk/lib/Target/CMakeLists.txt URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/CMakeLists.txt?rev=154493&r1=154492&r2=154493&view=diff ============================================================================== --- llvm/trunk/lib/Target/CMakeLists.txt (original) +++ llvm/trunk/lib/Target/CMakeLists.txt Wed Apr 11 05:25:24 2012 @@ -9,6 +9,7 @@ TargetLibraryInfo.cpp TargetLoweringObjectFile.cpp TargetMachine.cpp + TargetMachineC.cpp TargetRegisterInfo.cpp TargetSubtargetInfo.cpp ) Added: llvm/trunk/lib/Target/TargetMachineC.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/TargetMachineC.cpp?rev=154493&view=auto ============================================================================== --- llvm/trunk/lib/Target/TargetMachineC.cpp (added) +++ llvm/trunk/lib/Target/TargetMachineC.cpp Wed Apr 11 05:25:24 2012 @@ -0,0 +1,197 @@ +//===-- TargetMachine.cpp -------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the LLVM-C part of TargetMachine.h +// +//===----------------------------------------------------------------------===// + +#include "llvm-c/Core.h" +#include "llvm-c/Target.h" +#include "llvm-c/TargetMachine.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/CodeGen.h" +#include "llvm/Support/FormattedStream.h" +#include "llvm/Module.h" +#include "llvm/PassManager.h" +#include +#include +#include + +using namespace llvm; + + + +LLVMTargetRef LLVMGetFirstTarget() { + const Target* target = &*TargetRegistry::begin(); + return wrap(target); +} +LLVMTargetRef LLVMGetNextTarget(LLVMTargetRef T) { + return wrap(unwrap(T)->getNext()); +} + +const char * LLVMGetTargetName(LLVMTargetRef T) { + return unwrap(T)->getName(); +} + +const char * LLVMGetTargetDescription(LLVMTargetRef T) { + return unwrap(T)->getShortDescription(); +} + +LLVMBool LLVMTargetHasJIT(LLVMTargetRef T) { + return unwrap(T)->hasJIT(); +} + +LLVMBool LLVMTargetHasTargetMachine(LLVMTargetRef T) { + return unwrap(T)->hasTargetMachine(); +} + +LLVMBool LLVMTargetHasAsmBackend(LLVMTargetRef T) { + return unwrap(T)->hasMCAsmBackend(); +} + +LLVMTargetMachineRef LLVMCreateTargetMachine(LLVMTargetRef T, char* Triple, + char* CPU, char* Features, LLVMCodeGenOptLevel Level, LLVMRelocMode Reloc, + LLVMCodeModel CodeModel) { + Reloc::Model RM; + switch (Reloc){ + case LLVMRelocStatic: + RM = Reloc::Static; + break; + case LLVMRelocPIC: + RM = Reloc::PIC_; + break; + case LLVMRelocDynamicNoPic: + RM = Reloc::DynamicNoPIC; + break; + default: + RM = Reloc::Default; + break; + } + + CodeModel::Model CM; + switch (CodeModel) { + case LLVMCodeModelJITDefault: + CM = CodeModel::JITDefault; + break; + case LLVMCodeModelSmall: + CM = CodeModel::Small; + break; + case LLVMCodeModelKernel: + CM = CodeModel::Kernel; + break; + case LLVMCodeModelMedium: + CM = CodeModel::Medium; + break; + case LLVMCodeModelLarge: + CM = CodeModel::Large; + break; + default: + CM = CodeModel::Default; + break; + } + CodeGenOpt::Level OL; + + switch (Level) { + case LLVMCodeGenLevelNone: + OL = CodeGenOpt::None; + break; + case LLVMCodeGenLevelLess: + OL = CodeGenOpt::Less; + break; + case LLVMCodeGenLevelAggressive: + OL = CodeGenOpt::Aggressive; + break; + default: + OL = CodeGenOpt::Default; + break; + } + + TargetOptions opt; + return wrap(unwrap(T)->createTargetMachine(Triple, CPU, Features, opt, RM, + CM, OL)); +} + + +void LLVMDisposeTargetMachine(LLVMTargetMachineRef T) { + delete unwrap(T); +} + +LLVMTargetRef LLVMGetTargetMachineTarget(LLVMTargetMachineRef T) { + const Target* target = &(unwrap(T)->getTarget()); + return wrap(target); +} + +char* LLVMGetTargetMachineTriple(LLVMTargetMachineRef T) { + std::string StringRep = unwrap(T)->getTargetTriple(); + return strdup(StringRep.c_str()); +} + +char* LLVMGetTargetMachineCPU(LLVMTargetMachineRef T) { + std::string StringRep = unwrap(T)->getTargetCPU(); + return strdup(StringRep.c_str()); +} + +char* LLVMGetTargetMachineFeatureString(LLVMTargetMachineRef T) { + std::string StringRep = unwrap(T)->getTargetFeatureString(); + return strdup(StringRep.c_str()); +} + +LLVMTargetDataRef LLVMGetTargetMachineData(LLVMTargetMachineRef T) { + return wrap(unwrap(T)->getTargetData()); +} + +LLVMBool LLVMTargetMachineEmitToFile(LLVMTargetMachineRef T, LLVMModuleRef M, + char* Filename, LLVMCodeGenFileType codegen, char** ErrorMessage) { + TargetMachine* TM = unwrap(T); + Module* Mod = unwrap(M); + + PassManager pass; + + std::string error; + + const TargetData* td = TM->getTargetData(); + + if (!td) { + error = "No TargetData in TargetMachine"; + *ErrorMessage = strdup(error.c_str()); + return true; + } + pass.add(new TargetData(*td)); + + TargetMachine::CodeGenFileType ft; + switch (codegen) { + case LLVMAssemblyFile: + ft = TargetMachine::CGFT_AssemblyFile; + break; + default: + ft = TargetMachine::CGFT_ObjectFile; + break; + } + raw_fd_ostream dest(Filename, error, raw_fd_ostream::F_Binary); + formatted_raw_ostream destf(dest); + if (!error.empty()) { + *ErrorMessage = strdup(error.c_str()); + return true; + } + + if (TM->addPassesToEmitFile(pass, destf, ft)) { + error = "No TargetData in TargetMachine"; + *ErrorMessage = strdup(error.c_str()); + return true; + } + + pass.run(*Mod); + + destf.flush(); + dest.flush(); + return false; +} From baldrick at free.fr Wed Apr 11 05:27:57 2012 From: baldrick at free.fr (Duncan Sands) Date: Wed, 11 Apr 2012 12:27:57 +0200 Subject: [llvm-commits] Patch for llvm-c to expose targetmachine & allow emit obj/asm files In-Reply-To: <4F8554A5.2070608@remobjects.com> References: <4F7EF0D1.4020807@remobjects.com> <4F85212D.1070100@remobjects.com> <4F853745.5080709@free.fr> <4F853FEB.3040603@remobjects.com> <4F8546C6.5050704@free.fr> <4F8554A5.2070608@remobjects.com> Message-ID: <4F855CAD.1020207@free.fr> Hi Carlo, > Sorry about that, seems MSVC allows more than GCC. Fixed in attached. tested > with mingw/gcc too now. thanks for the patch. Added in commit 154493. Ciao, Duncan. From nadav.rotem at intel.com Wed Apr 11 06:05:21 2012 From: nadav.rotem at intel.com (Nadav Rotem) Date: Wed, 11 Apr 2012 11:05:21 -0000 Subject: [llvm-commits] [llvm] r154494 - /llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Message-ID: <20120411110521.AF7C02A6C065@llvm.org> Author: nadav Date: Wed Apr 11 06:05:21 2012 New Revision: 154494 URL: http://llvm.org/viewvc/llvm-project?rev=154494&view=rev Log: remove unused argument Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=154494&r1=154493&r2=154494&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original) +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Wed Apr 11 06:05:21 2012 @@ -5386,7 +5386,7 @@ // Try to lower a shuffle node into a simple blend instruction. static SDValue LowerVECTOR_SHUFFLEtoBlend(SDValue Op, const X86Subtarget *Subtarget, - SelectionDAG &DAG, EVT PtrTy) { + SelectionDAG &DAG) { ShuffleVectorSDNode *SVOp = cast(Op); SDValue V1 = SVOp->getOperand(0); SDValue V2 = SVOp->getOperand(1); @@ -6624,7 +6624,7 @@ return getTargetShuffleNode(X86ISD::VPERM2X128, dl, VT, V1, V2, getShuffleVPERM2X128Immediate(SVOp), DAG); - SDValue BlendOp = LowerVECTOR_SHUFFLEtoBlend(Op, Subtarget, DAG, getPointerTy()); + SDValue BlendOp = LowerVECTOR_SHUFFLEtoBlend(Op, Subtarget, DAG); if (BlendOp.getNode()) return BlendOp; From anton at korobeynikov.info Wed Apr 11 06:40:10 2012 From: anton at korobeynikov.info (Anton Korobeynikov) Date: Wed, 11 Apr 2012 15:40:10 +0400 Subject: [llvm-commits] [llvm] r154480 - in /llvm/trunk: lib/Target/ARM/ARM.td lib/Target/ARM/ARMAsmPrinter.cpp lib/Target/ARM/ARMInstrInfo.td lib/Target/ARM/ARMInstrNEON.td lib/Target/ARM/ARMInstrVFP.td lib/Target/ARM/ARMSubtarget.cpp lib/Target/ARM/ARMS Message-ID: Hello Evan, > Clean up ARM fused multiply + add/sub support some more: rename some isel > predicates. > Also remove NEON2 since it's not really useful and it is confusing. If > NEON + VFP4 implies NEON2 but NEON2 doesn't imply NEON + VFP4, what does it > really mean? There was the discussion about this in ML, consider checking it. -- With best regards, Anton Korobeynikov Faculty of Mathematics and Mechanics, Saint Petersburg State University From Tim.Northover at arm.com Wed Apr 11 07:04:16 2012 From: Tim.Northover at arm.com (Tim Northover) Date: Wed, 11 Apr 2012 13:04:16 +0100 Subject: [llvm-commits] [cfe-commits] [LLVMdev] [Patch?] Fix handling of ARM homogenous aggregates In-Reply-To: References: <201204101045.38068.Tim.Northover@arm.com> Message-ID: <201204111304.16785.Tim.Northover@arm.com> On Tuesday 10 Apr 2012 21:35:55 Anton Korobeynikov wrote: > Hi Tim > > > I'm not sure I follow this point. Is preserving the source language a bad > > thing for some reason I'm missing? Certainly, if it affects optimisation > > it would be. > > Let's consider one example: > > union { > float foo[4]; > int bar[3]; > }; > > This is definitely not a HFA. However, such a union can be represented > via several different things in LLVM IR: [4 x float], [4 x i32], [32 x > i8] (all involving bitcasts to access one of the fields of a union). > And here we have a problem: 4 x float can be thought as HFA at IR > level, however it's certainly not since the HFA rules are worded using > C-level constructs and not IR-level. I'd say the bulk of the ABI is specified in simpler terms than the C language, much closer to LLVM's IR (in fact, in at least one respect higher level than C: arrays can be first-class argument types). Only after the actual rules have been given does the ABI say what the C/C++ mapping to these concepts is. Presumably other languages that want to be compatible will define their own mapping. It's a two-phase approach which seems fairly well-suited to LLVM's IR and structure. > So, my point is that IR is not expressible enough to capture all > source information necessary to model ABI properly. Do you have good > solution for this problem? I think it's expressive enough to provide an interface for each category the ABI cares about though: + Integer types of various widths and alignments. + Floating types, similarly. + Vectors as above. + Composite types that are HFAs. + (In the 64-bit case) Composite types less than 16 bytes in size. + Non-HFA, non-small composite types. In this example I'd say clang's job (ideally) would be to represent the union using some type in the final category ([4 x i32] is probably sufficient in the 32-bit world right now, because it turns out the ABI doesn't care about splitting between registers and stack). This kind of issue is always going to be present: any front-end is going to have to lower its internal representation to some LLVM type and discard information doing so, but I think it's neater if that's all it has to do. We should have a chat about this at the conference later. I'm in favour of the backend solution, but could certainly live with the other. I think deciding the correct approach is the most important thing. Tim. From benny.kra at googlemail.com Wed Apr 11 09:06:40 2012 From: benny.kra at googlemail.com (Benjamin Kramer) Date: Wed, 11 Apr 2012 14:06:40 -0000 Subject: [llvm-commits] [llvm] r154495 - in /llvm/trunk/lib: MC/MCDwarf.cpp VMCore/DebugLoc.cpp Message-ID: <20120411140640.298AE2A6C065@llvm.org> Author: d0k Date: Wed Apr 11 09:06:39 2012 New Revision: 154495 URL: http://llvm.org/viewvc/llvm-project?rev=154495&view=rev Log: Compute hashes directly with hash_combine instead of taking a detour through FoldingSetNodeID. Modified: llvm/trunk/lib/MC/MCDwarf.cpp llvm/trunk/lib/VMCore/DebugLoc.cpp Modified: llvm/trunk/lib/MC/MCDwarf.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/MC/MCDwarf.cpp?rev=154495&r1=154494&r2=154495&view=diff ============================================================================== --- llvm/trunk/lib/MC/MCDwarf.cpp (original) +++ llvm/trunk/lib/MC/MCDwarf.cpp Wed Apr 11 09:06:39 2012 @@ -21,7 +21,7 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Support/Path.h" #include "llvm/Support/SourceMgr.h" -#include "llvm/ADT/FoldingSet.h" +#include "llvm/ADT/Hashing.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/Twine.h" #include "llvm/Config/config.h" @@ -1361,12 +1361,10 @@ return CIEKey::getTombstoneKey(); } static unsigned getHashValue(const CIEKey &Key) { - FoldingSetNodeID ID; - ID.AddPointer(Key.Personality); - ID.AddInteger(Key.PersonalityEncoding); - ID.AddInteger(Key.LsdaEncoding); - ID.AddBoolean(Key.IsSignalFrame); - return ID.ComputeHash(); + return static_cast(hash_combine(Key.Personality, + Key.PersonalityEncoding, + Key.LsdaEncoding, + Key.IsSignalFrame)); } static bool isEqual(const CIEKey &LHS, const CIEKey &RHS) { Modified: llvm/trunk/lib/VMCore/DebugLoc.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/VMCore/DebugLoc.cpp?rev=154495&r1=154494&r2=154495&view=diff ============================================================================== --- llvm/trunk/lib/VMCore/DebugLoc.cpp (original) +++ llvm/trunk/lib/VMCore/DebugLoc.cpp Wed Apr 11 09:06:39 2012 @@ -173,10 +173,7 @@ } unsigned DenseMapInfo::getHashValue(const DebugLoc &Key) { - FoldingSetNodeID ID; - ID.AddInteger(Key.LineCol); - ID.AddInteger(Key.ScopeIdx); - return ID.ComputeHash(); + return static_cast(hash_combine(Key.LineCol, Key.ScopeIdx)); } bool DenseMapInfo::isEqual(const DebugLoc &LHS, const DebugLoc &RHS) { From benny.kra at googlemail.com Wed Apr 11 09:06:54 2012 From: benny.kra at googlemail.com (Benjamin Kramer) Date: Wed, 11 Apr 2012 14:06:54 -0000 Subject: [llvm-commits] [llvm] r154497 - in /llvm/trunk: include/llvm/Metadata.h lib/VMCore/LLVMContextImpl.h lib/VMCore/Metadata.cpp Message-ID: <20120411140654.BD9F82A6C066@llvm.org> Author: d0k Date: Wed Apr 11 09:06:54 2012 New Revision: 154497 URL: http://llvm.org/viewvc/llvm-project?rev=154497&view=rev Log: Cache the hash value of the operands in the MDNode. FoldingSet is implemented as a chained hash table. When there is a hash collision during insertion, which is common as we fill the table until a load factor of 2.0 is hit, we walk the chained elements, comparing every operand with the new element's operands. This can be very expensive if the MDNode has many operands. We sacrifice a word of space in MDNode to cache the full hash value, reducing compares on collision to a minimum. MDNode grows from 28 to 32 bytes + operands on x86. On x86_64 the new bits fit nicely into existing padding, not growing the struct at all. The actual speedup depends a lot on the test case and is typically between 1% and 2% for C++ code with clang -c -O0 -g. Modified: llvm/trunk/include/llvm/Metadata.h llvm/trunk/lib/VMCore/LLVMContextImpl.h llvm/trunk/lib/VMCore/Metadata.cpp Modified: llvm/trunk/include/llvm/Metadata.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Metadata.h?rev=154497&r1=154496&r2=154497&view=diff ============================================================================== --- llvm/trunk/include/llvm/Metadata.h (original) +++ llvm/trunk/include/llvm/Metadata.h Wed Apr 11 09:06:54 2012 @@ -75,6 +75,10 @@ void operator=(const MDNode &); // DO NOT IMPLEMENT friend class MDNodeOperand; friend class LLVMContextImpl; + friend struct FoldingSetTrait; + + /// NumOperands - If the MDNode is uniqued cache the hash to speed up lookup. + unsigned Hash; /// NumOperands - This many 'MDNodeOperand' items are co-allocated onto the /// end of this MDNode. Modified: llvm/trunk/lib/VMCore/LLVMContextImpl.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/VMCore/LLVMContextImpl.h?rev=154497&r1=154496&r2=154497&view=diff ============================================================================== --- llvm/trunk/lib/VMCore/LLVMContextImpl.h (original) +++ llvm/trunk/lib/VMCore/LLVMContextImpl.h Wed Apr 11 09:06:54 2012 @@ -194,6 +194,26 @@ } }; +// Provide a FoldingSetTrait::Equals specialization for MDNode that can use a +// shortcut to avoid comparing all operands. +template<> struct FoldingSetTrait : DefaultFoldingSetTrait { + static bool Equals(const MDNode &X, const FoldingSetNodeID &ID, + unsigned IDHash, FoldingSetNodeID &TempID) { + assert(!X.isNotUniqued() && "Non-uniqued MDNode in FoldingSet?"); + // First, check if the cached hashes match. If they don't we can skip the + // expensive operand walk. + if (X.Hash != IDHash) + return false; + + // If they match we have to compare the operands. + X.Profile(TempID); + return TempID == ID; + } + static unsigned ComputeHash(const MDNode &X, FoldingSetNodeID &) { + return X.Hash; // Return cached hash. + } +}; + /// DebugRecVH - This is a CallbackVH used to keep the Scope -> index maps /// up to date as MDNodes mutate. This class is implemented in DebugLoc.cpp. class DebugRecVH : public CallbackVH { Modified: llvm/trunk/lib/VMCore/Metadata.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/VMCore/Metadata.cpp?rev=154497&r1=154496&r2=154497&view=diff ============================================================================== --- llvm/trunk/lib/VMCore/Metadata.cpp (original) +++ llvm/trunk/lib/VMCore/Metadata.cpp Wed Apr 11 09:06:54 2012 @@ -250,6 +250,9 @@ void *Ptr = malloc(sizeof(MDNode)+Vals.size()*sizeof(MDNodeOperand)); N = new (Ptr) MDNode(Context, Vals, isFunctionLocal); + // Cache the operand hash. + N->Hash = ID.ComputeHash(); + // InsertPoint will have been set by the FindNodeOrInsertPos call. pImpl->MDNodeSet.InsertNode(N, InsertPoint); @@ -373,6 +376,8 @@ return; } + // Cache the operand hash. + Hash = ID.ComputeHash(); // InsertPoint will have been set by the FindNodeOrInsertPos call. pImpl->MDNodeSet.InsertNode(this, InsertPoint); From benny.kra at googlemail.com Wed Apr 11 09:06:48 2012 From: benny.kra at googlemail.com (Benjamin Kramer) Date: Wed, 11 Apr 2012 14:06:48 -0000 Subject: [llvm-commits] [llvm] r154496 - in /llvm/trunk: include/llvm/ADT/FoldingSet.h include/llvm/Analysis/ScalarEvolution.h lib/Support/FoldingSet.cpp Message-ID: <20120411140648.1B8822A6C065@llvm.org> Author: d0k Date: Wed Apr 11 09:06:47 2012 New Revision: 154496 URL: http://llvm.org/viewvc/llvm-project?rev=154496&view=rev Log: FoldingSet: Push the hash through FoldingSetTraits::Equals, so clients can use it. Modified: llvm/trunk/include/llvm/ADT/FoldingSet.h llvm/trunk/include/llvm/Analysis/ScalarEvolution.h llvm/trunk/lib/Support/FoldingSet.cpp Modified: llvm/trunk/include/llvm/ADT/FoldingSet.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/ADT/FoldingSet.h?rev=154496&r1=154495&r2=154496&view=diff ============================================================================== --- llvm/trunk/include/llvm/ADT/FoldingSet.h (original) +++ llvm/trunk/include/llvm/ADT/FoldingSet.h Wed Apr 11 09:06:47 2012 @@ -193,12 +193,11 @@ virtual void GetNodeProfile(Node *N, FoldingSetNodeID &ID) const = 0; /// NodeEquals - Instantiations of the FoldingSet template implement /// this function to compare the given node with the given ID. - virtual bool NodeEquals(Node *N, const FoldingSetNodeID &ID, + virtual bool NodeEquals(Node *N, const FoldingSetNodeID &ID, unsigned IDHash, FoldingSetNodeID &TempID) const=0; - /// NodeEquals - Instantiations of the FoldingSet template implement + /// ComputeNodeHash - Instantiations of the FoldingSet template implement /// this function to compute a hash value for the given node. - virtual unsigned ComputeNodeHash(Node *N, - FoldingSetNodeID &TempID) const = 0; + virtual unsigned ComputeNodeHash(Node *N, FoldingSetNodeID &TempID) const = 0; }; //===----------------------------------------------------------------------===// @@ -220,7 +219,7 @@ // to compute a temporary ID if necessary. The default implementation // just calls Profile and does a regular comparison. Implementations // can override this to provide more efficient implementations. - static inline bool Equals(T &X, const FoldingSetNodeID &ID, + static inline bool Equals(T &X, const FoldingSetNodeID &ID, unsigned IDHash, FoldingSetNodeID &TempID); // ComputeHash - Compute a hash value for X, using TempID to @@ -249,7 +248,7 @@ static void Profile(T &X, FoldingSetNodeID &ID, Ctx Context) { X.Profile(ID, Context); } - static inline bool Equals(T &X, const FoldingSetNodeID &ID, + static inline bool Equals(T &X, const FoldingSetNodeID &ID, unsigned IDHash, FoldingSetNodeID &TempID, Ctx Context); static inline unsigned ComputeHash(T &X, FoldingSetNodeID &TempID, Ctx Context); @@ -344,7 +343,7 @@ template inline bool DefaultFoldingSetTrait::Equals(T &X, const FoldingSetNodeID &ID, - FoldingSetNodeID &TempID) { + unsigned IDHash, FoldingSetNodeID &TempID) { FoldingSetTrait::Profile(X, TempID); return TempID == ID; } @@ -358,6 +357,7 @@ inline bool DefaultContextualFoldingSetTrait::Equals(T &X, const FoldingSetNodeID &ID, + unsigned IDHash, FoldingSetNodeID &TempID, Ctx Context) { ContextualFoldingSetTrait::Profile(X, TempID, Context); @@ -387,15 +387,14 @@ } /// NodeEquals - Instantiations may optionally provide a way to compare a /// node with a specified ID. - virtual bool NodeEquals(Node *N, const FoldingSetNodeID &ID, + virtual bool NodeEquals(Node *N, const FoldingSetNodeID &ID, unsigned IDHash, FoldingSetNodeID &TempID) const { T *TN = static_cast(N); - return FoldingSetTrait::Equals(*TN, ID, TempID); + return FoldingSetTrait::Equals(*TN, ID, IDHash, TempID); } - /// NodeEquals - Instantiations may optionally provide a way to compute a + /// ComputeNodeHash - Instantiations may optionally provide a way to compute a /// hash value directly from a node. - virtual unsigned ComputeNodeHash(Node *N, - FoldingSetNodeID &TempID) const { + virtual unsigned ComputeNodeHash(Node *N, FoldingSetNodeID &TempID) const { T *TN = static_cast(N); return FoldingSetTrait::ComputeHash(*TN, TempID); } @@ -465,10 +464,11 @@ ContextualFoldingSetTrait::Profile(*TN, ID, Context); } virtual bool NodeEquals(FoldingSetImpl::Node *N, - const FoldingSetNodeID &ID, + const FoldingSetNodeID &ID, unsigned IDHash, FoldingSetNodeID &TempID) const { T *TN = static_cast(N); - return ContextualFoldingSetTrait::Equals(*TN, ID, TempID, Context); + return ContextualFoldingSetTrait::Equals(*TN, ID, IDHash, TempID, + Context); } virtual unsigned ComputeNodeHash(FoldingSetImpl::Node *N, FoldingSetNodeID &TempID) const { Modified: llvm/trunk/include/llvm/Analysis/ScalarEvolution.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Analysis/ScalarEvolution.h?rev=154496&r1=154495&r2=154496&view=diff ============================================================================== --- llvm/trunk/include/llvm/Analysis/ScalarEvolution.h (original) +++ llvm/trunk/include/llvm/Analysis/ScalarEvolution.h Wed Apr 11 09:06:47 2012 @@ -140,7 +140,7 @@ ID = X.FastID; } static bool Equals(const SCEV &X, const FoldingSetNodeID &ID, - FoldingSetNodeID &TempID) { + unsigned IDHash, FoldingSetNodeID &TempID) { return ID == X.FastID; } static unsigned ComputeHash(const SCEV &X, FoldingSetNodeID &TempID) { Modified: llvm/trunk/lib/Support/FoldingSet.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Support/FoldingSet.cpp?rev=154496&r1=154495&r2=154496&view=diff ============================================================================== --- llvm/trunk/lib/Support/FoldingSet.cpp (original) +++ llvm/trunk/lib/Support/FoldingSet.cpp Wed Apr 11 09:06:47 2012 @@ -265,15 +265,15 @@ FoldingSetImpl::Node *FoldingSetImpl::FindNodeOrInsertPos(const FoldingSetNodeID &ID, void *&InsertPos) { - - void **Bucket = GetBucketFor(ID.ComputeHash(), Buckets, NumBuckets); + unsigned IDHash = ID.ComputeHash(); + void **Bucket = GetBucketFor(IDHash, Buckets, NumBuckets); void *Probe = *Bucket; InsertPos = 0; FoldingSetNodeID TempID; while (Node *NodeInBucket = GetNextPtr(Probe)) { - if (NodeEquals(NodeInBucket, ID, TempID)) + if (NodeEquals(NodeInBucket, ID, IDHash, TempID)) return NodeInBucket; TempID.clear(); From james.molloy at arm.com Wed Apr 11 10:10:49 2012 From: james.molloy at arm.com (James Molloy) Date: Wed, 11 Apr 2012 15:10:49 -0000 Subject: [llvm-commits] [www] r154498 - /www/trunk/devmtg/2012-04-12/index.html Message-ID: <20120411151049.88D452A6C065@llvm.org> Author: jamesm Date: Wed Apr 11 10:10:49 2012 New Revision: 154498 URL: http://llvm.org/viewvc/llvm-project?rev=154498&view=rev Log: Update euro-llvm devmtg page. Modified: www/trunk/devmtg/2012-04-12/index.html Modified: www/trunk/devmtg/2012-04-12/index.html URL: http://llvm.org/viewvc/llvm-project/www/trunk/devmtg/2012-04-12/index.html?rev=154498&r1=154497&r2=154498&view=diff ============================================================================== --- www/trunk/devmtg/2012-04-12/index.html (original) +++ www/trunk/devmtg/2012-04-12/index.html Wed Apr 11 10:10:49 2012 @@ -18,12 +18,11 @@

    Announcements

      -
    • Registration is still open - email Euro-LLVM at arm.com to register! Don't forget to state whether you'll be wanting dinner and any dietary requirements you have.
    • -
    • BoF registration is now open - email Euro-LLVM at arm.com with a title, short description and the name of a BoF leader.
    • -
    • Some accommodations at the Hotel Russell has been subsidised - click here to book online. Note that we have already paid for these rooms so please do make use of this service!
    • +
    • Registration is now closed!
    • +
    • We still have room for more lightning talks - please email Euro-LLVM at arm.com.
    -

    Accepted presentations

    +

    Presentations

    @@ -31,19 +30,18 @@ - - + + -
    AuthorTitle
    Michael Spencer
    Sony Computer Entertainment America
    lld - the LLVM Linker
    Mark Charlebois
    QuIC
    Building Linux with LLVM
    Hal Finkel
    Argonne National Laboratory
    Autovectorization with LLVM
    Wayne Palmer
    Barclays Capital
    Verifying Serialisability and Thread-safety Markup using Clang (Barclays Capital Quantitative Analytics Library)
    Eli Bendersky
    Intel
    MCJIT Note that Eli's presentation has been accepted, but he has not yet fully confirmed that he will be able to attend.
    Wayne Palmer
    Barclays Capital
    Generating Serialisation Code with Clang
    Eli Bendersky
    Intel
    MCJIT
    Manuel Klimek
    Google
    Refactoring C++ with Clang
    Pablo Barrio
    Univ. Politécnica de Madrid
    Turning control flow graphs into function call graphs: transformation of partitioned codes for execution in heterogeneous architectures
    Richard Barton
    ARM
    Guaranteeing the correctness of MC for ARM
    Igor Bohm
    University of Edinburgh
    Reducing dynamic compilation latency - concurrent and parallel dynamic compilation
    Ralf Karrenberg & Sebastian Hack
    Saarland University
    Improving Performance of OpenCL on CPUs
    Reed Kotler
    MIPS
    Anatomy of the MIPS LLVM Port
    +1 still to decide.
    -

    Accepted workshops / tutorials

    +

    Workshops

    @@ -51,62 +49,38 @@
    AuthorTitle
    David ChisnallWhat LLVM can do for you
    -
    - -

    Registration is now open - email Euro-LLVM at ARM.com

    - -

    Dear LLVM user,

    -

    We are proud to announce the second European LLVM event on April 12-13 2012 in London, UK, starting at noon on April 12th . -This will be a full one-day conference with the intention and aim of exposing new developments and supporting and strengthening the network of LLVM developers around Europe. The format will resemble that of the previous meeting held in London in September 2011 but with more time for presentations and networking. The meeting is open to anyone from corporate to academia, professionals to enthusiasts, and is not in any way limited to those from Europe - people from other regions are welcome.

    - -

    Format

    - -

    We intend the conference to consist of one full day of presentations and BoFs running from noon to noon (12pm Thursday - 12pm Friday), followed by optional workshops on Friday afternoon to cover broader technical areas and help familiarise attendees with LLVM internals and use cases.

    - -

    This format depends on receiving sufficient presentation and workshop proposals.

    - -

    Call for Speakers, Posters, Demos

    - -

    We invite academic, industrial and hobbyist speakers to present their work on developing or using LLVM and Clang. We invite abstracts for technical presentations, posters, workshops, demonstrations and BoFs relating to LLVM/Clang development and use. Material will be chosen to cover a broad spectrum of themes and topics at various depths, some technical deep-diving, some surface-scratching.

    - -

    We are looking for: -

      -
    • Keynote speakers.
    • -
    • Technical presentations (30 minutes plus questions and discussion) relation to LLVM and Clang.
    • -
    • Presentations relating to academic or commercial use of LLVM and Clang.
    • -
    • Workshops and in-depth tutorials (1-2 hours - please specify in your abstract).
    • -
    -

    - -

    The deadline for receiving an extended abstract is February 10th, 2012. Speakers will be notified of acceptance or rejection before February 24th. The final submission deadline is March 30th. Slides and posters must be in PDF format.

    - -

    Please note that presentation materials and videos for the technical sessions will be posted on llvm.org after the conference.

    - -

    Organisation

    - -

    We'll be discussing the organisation of the event on the main LLVM mailing list (llvmdev at cs.uiuc.edu), and we welcome suggestions and help.

    - -

    Registration

    - -

    Registration is now open and is via email (Euro-LLVM at arm.com), on a first-come-first-served basis, free of charge. Please send your details (name, email, company/institution) and we'll publish this on the official LLVM website (unless requested otherwise). Attendance will be limited to about 100 people.

    - -

    A dinner will be provided on Thursday evening - this will also be free of charge. Please include whether you'll be attending the dinner in your registration email. The venue is a hotel and we will make some rooms available at a discounted price at https://hotelres.vbookings.co.uk/b/armeullvm/.

    - -

    Financial Support

    - -

    There may be a possibility of limited funding to help students or contributors who could not otherwise attend the conference. This will depend on overall sponsorship and companies' interest in supporting the event. Those who are funded will be required to present something at the meeting (a poster, chair a BoF, a lightning talk, a full presentation...) and may have other obligations to the sponsor (e.g. writing a blog post or a trip report).

    - -

    If you need funding to attend the meeting, or can help sponsor, please tell us in your registration email (to Euro-LLVM at arm.com).

    - -

    About LLVM

    +

    Schedule

    -

    The Low-Level Virtual Machine (LLVM) is a collection of libraries and tools that make it easy to build compilers, optimizers, Just-In-Time code generators, and many other compiler-related programs. LLVM uses a single, language-independent virtual instruction set both as an offline code representation (to communicate code between compiler phases and to run-time systems) and as the compiler internal representation (to analyse and transform programs). This persistent code representation allows a common set of sophisticated compiler techniques to be applied at compile-time, link-time, install-time, run- time, or "idle-time" (between program runs).

    +The schedule is now available here. -

    The strengths of the LLVM infrastructure are its extremely simple design (which makes it easy to understand and use), source-language independence, powerful mid-level optimizer, automated compiler debugging support, extensibility, and its stability and reliability. LLVM is currently being used to host a wide variety of academic research projects and commercial projects.

    +

    Latest communication

    -

    For more information, please visit http://llvm.org.

    +

    Hi,

    + +

    The first EU LLVM conference is tomorrow! You're receiving this email because you're registered ??? if you???re no longer interested in coming please reply back ASAP as we have a waiting list!

    + +

    Ideally you should all know this already but the event is being held at the Hotel Russell, Russell Square, London WC1B 5BE. From the Russell Square tube station, exit the station turning left, then turn left again and the hotel is immediately on your left (turn 270 degrees counterclockwise whilst walking forwards...)

    + +

    The event will be on the ground floor towards the left as you come in the main door, but there should be signage.

    + +

    Please note, we have a slot available for an impromptu BoF or extra lightning talks - this will be announced at the introduction and anyone is welcome to suggest a topic. We also have plenty of available slots for lightning talks still (we only have 2 so far!) so if you have something prepared or need a soapbox for your (targetted, to-the-point) rant, please let one of the organisers know when you turn up :)

    + +

    Event registration begins at 12:00 noon. There will be a buffet lunch served from 12:30, followed by the conference opening at 13:30. The schedule is attached.

    + +

    The main presentations end at 12:45 on Friday, after which (and after lunch) there will be optional workshops. We???ll be asking you when you sign up to register for these, in order to give their organisers some sort of heads-up as to expected numbers. You are by no means pinned down to either attending or not attending based on this! :) David Chisnall's workshop is expected to be longer than Anton's, so if you wish to attend that the expected finishing time is 15:30 ?? delta-t

    + +

    We'll be handing out feedback forms at opportune moments throughout the conference - these contain ratings for each individual presentation as well as aspects of the event itself - please fill these in, they are invaluable for producing a quality event next year. Please also don't pull any punches - This is the first event on this scale we have put on, so for sure there will be areas where we will be lacking.

    + +

    There will be complementary wireless available for the 24h period from 12:00 noon Thursday to 12:00 noon Friday. More information about this will be provided on registration. Note that if you are staying at the hotel overnight, the wireless coverage does not extend upstairs - you will have wired Ethernet access in your room. Plenty of power sockets should be supplied throughout the main meeting room.

    + +

    If you have any questions, please don't hesitate to contact me. I'll also be the slightly flustered looking bloke in the ARM t-shirt tomorrow.

    + +

    See you all tomorrow!

    + +

    Cheers,

    + +

    James

    -

    -The Euro-LLVM 2012 committee


    From stoklund at 2pi.dk Wed Apr 11 10:20:50 2012 From: stoklund at 2pi.dk (Jakob Stoklund Olesen) Date: Wed, 11 Apr 2012 08:20:50 -0700 Subject: [llvm-commits] [llvm] r154497 - in /llvm/trunk: include/llvm/Metadata.h lib/VMCore/LLVMContextImpl.h lib/VMCore/Metadata.cpp In-Reply-To: <20120411140654.BD9F82A6C066@llvm.org> References: <20120411140654.BD9F82A6C066@llvm.org> Message-ID: <7188425B-8D6E-4480-A9E0-1E956E674BA7@2pi.dk> On Apr 11, 2012, at 7:06 AM, Benjamin Kramer wrote: > Author: d0k > Date: Wed Apr 11 09:06:54 2012 > New Revision: 154497 > > URL: http://llvm.org/viewvc/llvm-project?rev=154497&view=rev > Log: > Cache the hash value of the operands in the MDNode. > > FoldingSet is implemented as a chained hash table. When there is a hash > collision during insertion, which is common as we fill the table until a > load factor of 2.0 is hit, we walk the chained elements, comparing every > operand with the new element's operands. This can be very expensive if the > MDNode has many operands. > > We sacrifice a word of space in MDNode to cache the full hash value, reducing > compares on collision to a minimum. MDNode grows from 28 to 32 bytes + operands > on x86. On x86_64 the new bits fit nicely into existing padding, not growing > the struct at all. > > The actual speedup depends a lot on the test case and is typically between > 1% and 2% for C++ code with clang -c -O0 -g. Neat! I would suggest one tweak: All the nodes in a hash table chain are going to have identical low bits in the hash value. If you compute a 64-bit hash value and store the high 32 bits in the node while using the low bits to index the hash table, you can lower the probability of collisions even further. /jakob From benny.kra at googlemail.com Wed Apr 11 10:32:20 2012 From: benny.kra at googlemail.com (Benjamin Kramer) Date: Wed, 11 Apr 2012 17:32:20 +0200 Subject: [llvm-commits] [llvm] r154497 - in /llvm/trunk: include/llvm/Metadata.h lib/VMCore/LLVMContextImpl.h lib/VMCore/Metadata.cpp In-Reply-To: <7188425B-8D6E-4480-A9E0-1E956E674BA7@2pi.dk> References: <20120411140654.BD9F82A6C066@llvm.org> <7188425B-8D6E-4480-A9E0-1E956E674BA7@2pi.dk> Message-ID: <949A1FE0-C967-44DF-8E6F-67D74312DCE8@googlemail.com> On 11.04.2012, at 17:20, Jakob Stoklund Olesen wrote: > > On Apr 11, 2012, at 7:06 AM, Benjamin Kramer wrote: > >> Author: d0k >> Date: Wed Apr 11 09:06:54 2012 >> New Revision: 154497 >> >> URL: http://llvm.org/viewvc/llvm-project?rev=154497&view=rev >> Log: >> Cache the hash value of the operands in the MDNode. >> >> FoldingSet is implemented as a chained hash table. When there is a hash >> collision during insertion, which is common as we fill the table until a >> load factor of 2.0 is hit, we walk the chained elements, comparing every >> operand with the new element's operands. This can be very expensive if the >> MDNode has many operands. >> >> We sacrifice a word of space in MDNode to cache the full hash value, reducing >> compares on collision to a minimum. MDNode grows from 28 to 32 bytes + operands >> on x86. On x86_64 the new bits fit nicely into existing padding, not growing >> the struct at all. >> >> The actual speedup depends a lot on the test case and is typically between >> 1% and 2% for C++ code with clang -c -O0 -g. > > Neat! > > I would suggest one tweak: > > All the nodes in a hash table chain are going to have identical low bits in the hash value. If you compute a 64-bit hash value and store the high 32 bits in the node while using the low bits to index the hash table, you can lower the probability of collisions even further. I thought about that, our hashing infrastructure computes a size_t hash so we could easily take the upper half of it on x86_64. OTOH we can't reuse the cached hash value when the FoldingSet grows if we store the upper bits, which would probably eat away any speedup from the saved collisions :/ - Ben > > /jakob > From sylvestre at debian.org Wed Apr 11 10:35:37 2012 From: sylvestre at debian.org (Sylvestre Ledru) Date: Wed, 11 Apr 2012 15:35:37 -0000 Subject: [llvm-commits] [llvm] r154500 - in /llvm/trunk: Makefile.rules lib/Support/Unix/Path.inc lib/Support/Unix/PathV2.inc lib/Support/Unix/Process.inc tools/llvm-shlib/Makefile Message-ID: <20120411153537.8079F2A6C065@llvm.org> Author: sylvestre Date: Wed Apr 11 10:35:36 2012 New Revision: 154500 URL: http://llvm.org/viewvc/llvm-project?rev=154500&view=rev Log: Fix the build under Debian GNU/Hurd. Thanks to Pino Toscano for the patch Modified: llvm/trunk/Makefile.rules llvm/trunk/lib/Support/Unix/Path.inc llvm/trunk/lib/Support/Unix/PathV2.inc llvm/trunk/lib/Support/Unix/Process.inc llvm/trunk/tools/llvm-shlib/Makefile Modified: llvm/trunk/Makefile.rules URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/Makefile.rules?rev=154500&r1=154499&r2=154500&view=diff ============================================================================== --- llvm/trunk/Makefile.rules (original) +++ llvm/trunk/Makefile.rules Wed Apr 11 10:35:36 2012 @@ -1432,7 +1432,7 @@ endif endif -ifeq ($(HOST_OS), $(filter $(HOST_OS), Linux NetBSD FreeBSD)) +ifeq ($(HOST_OS), $(filter $(HOST_OS), Linux NetBSD FreeBSD GNU)) ifneq ($(ARCH), Mips) LD.Flags += -Wl,--version-script=$(LLVM_SRC_ROOT)/autoconf/ExportMap.map endif Modified: llvm/trunk/lib/Support/Unix/Path.inc URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Support/Unix/Path.inc?rev=154500&r1=154499&r2=154500&view=diff ============================================================================== --- llvm/trunk/lib/Support/Unix/Path.inc (original) +++ llvm/trunk/lib/Support/Unix/Path.inc Wed Apr 11 10:35:36 2012 @@ -60,6 +60,11 @@ #include #endif +// For GNU Hurd +#if defined(__GNU__) && !defined(MAXPATHLEN) +# define MAXPATHLEN 4096 +#endif + // Put in a hack for Cygwin which falsely reports that the mkdtemp function // is available when it is not. #ifdef __CYGWIN__ Modified: llvm/trunk/lib/Support/Unix/PathV2.inc URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Support/Unix/PathV2.inc?rev=154500&r1=154499&r2=154500&view=diff ============================================================================== --- llvm/trunk/lib/Support/Unix/PathV2.inc (original) +++ llvm/trunk/lib/Support/Unix/PathV2.inc Wed Apr 11 10:35:36 2012 @@ -46,6 +46,11 @@ #include #endif +// For GNU Hurd +#if defined(__GNU__) && !defined(PATH_MAX) +# define PATH_MAX 4096 +#endif + using namespace llvm; namespace { @@ -96,7 +101,12 @@ namespace fs { error_code current_path(SmallVectorImpl &result) { +#ifdef MAXPATHLEN result.reserve(MAXPATHLEN); +#else +// For GNU Hurd + result.reserve(1024); +#endif while (true) { if (::getcwd(result.data(), result.capacity()) == 0) { Modified: llvm/trunk/lib/Support/Unix/Process.inc URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Support/Unix/Process.inc?rev=154500&r1=154499&r2=154500&view=diff ============================================================================== --- llvm/trunk/lib/Support/Unix/Process.inc (original) +++ llvm/trunk/lib/Support/Unix/Process.inc Wed Apr 11 10:35:36 2012 @@ -136,7 +136,7 @@ return getgid(); } -#ifdef HAVE_MACH_MACH_H +#if defined(HAVE_MACH_MACH_H) && !defined(__GNU__) #include #endif @@ -150,7 +150,7 @@ setrlimit(RLIMIT_CORE, &rlim); #endif -#ifdef HAVE_MACH_MACH_H +#if defined(HAVE_MACH_MACH_H) && !defined(__GNU__) // Disable crash reporting on Mac OS X 10.0-10.4 // get information about the original set of exception ports for the task Modified: llvm/trunk/tools/llvm-shlib/Makefile URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-shlib/Makefile?rev=154500&r1=154499&r2=154500&view=diff ============================================================================== --- llvm/trunk/tools/llvm-shlib/Makefile (original) +++ llvm/trunk/tools/llvm-shlib/Makefile Wed Apr 11 10:35:36 2012 @@ -63,7 +63,7 @@ endif endif -ifeq ($(HOST_OS), $(filter $(HOST_OS), Linux FreeBSD OpenBSD)) +ifeq ($(HOST_OS), $(filter $(HOST_OS), Linux FreeBSD OpenBSD GNU)) # Include everything from the .a's into the shared library. LLVMLibsOptions := -Wl,--whole-archive $(LLVMLibsOptions) \ -Wl,--no-whole-archive @@ -71,7 +71,7 @@ LLVMLibsOptions += -Wl,--soname,lib$(LIBRARYNAME)$(SHLIBEXT) endif -ifeq ($(HOST_OS),Linux) +ifeq ($(HOST_OS), $(filter $(HOST_OS), Linux GNU)) # Don't allow unresolved symbols. LLVMLibsOptions += -Wl,--no-undefined endif From chandlerc at google.com Wed Apr 11 10:44:52 2012 From: chandlerc at google.com (Chandler Carruth) Date: Wed, 11 Apr 2012 16:44:52 +0100 Subject: [llvm-commits] [llvm] r154497 - in /llvm/trunk: include/llvm/Metadata.h lib/VMCore/LLVMContextImpl.h lib/VMCore/Metadata.cpp In-Reply-To: <949A1FE0-C967-44DF-8E6F-67D74312DCE8@googlemail.com> References: <20120411140654.BD9F82A6C066@llvm.org> <7188425B-8D6E-4480-A9E0-1E956E674BA7@2pi.dk> <949A1FE0-C967-44DF-8E6F-67D74312DCE8@googlemail.com> Message-ID: On Wed, Apr 11, 2012 at 4:32 PM, Benjamin Kramer wrote: > > On 11.04.2012, at 17:20, Jakob Stoklund Olesen wrote: > > > > > On Apr 11, 2012, at 7:06 AM, Benjamin Kramer wrote: > > > >> Author: d0k > >> Date: Wed Apr 11 09:06:54 2012 > >> New Revision: 154497 > >> > >> URL: http://llvm.org/viewvc/llvm-project?rev=154497&view=rev > >> Log: > >> Cache the hash value of the operands in the MDNode. > >> > >> FoldingSet is implemented as a chained hash table. When there is a hash > >> collision during insertion, which is common as we fill the table until a > >> load factor of 2.0 is hit, we walk the chained elements, comparing every > >> operand with the new element's operands. This can be very expensive if > the > >> MDNode has many operands. > >> > >> We sacrifice a word of space in MDNode to cache the full hash value, > reducing > >> compares on collision to a minimum. MDNode grows from 28 to 32 bytes + > operands > >> on x86. On x86_64 the new bits fit nicely into existing padding, not > growing > >> the struct at all. > >> > >> The actual speedup depends a lot on the test case and is typically > between > >> 1% and 2% for C++ code with clang -c -O0 -g. > > > > Neat! > > > > I would suggest one tweak: > > > > All the nodes in a hash table chain are going to have identical low bits > in the hash value. If you compute a 64-bit hash value and store the high 32 > bits in the node while using the low bits to index the hash table, you can > lower the probability of collisions even further. > > I thought about that, our hashing infrastructure computes a size_t hash so > we could easily take the upper half of it on x86_64. > > OTOH we can't reuse the cached hash value when the FoldingSet grows if we > store the upper bits, which would probably eat away any speedup from the > saved collisions :/ > Collisions across the entire 32-bit key are really quite rare. I wouldn't stress about this. It is nice to avoid re-computing the hash as the set grows. I think it would be good to look into doing similar tricks for other large foldingsets with large keys that would be slow to compare. -------------- next part -------------- An HTML attachment was scrubbed... URL: http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20120411/dd19b211/attachment.html From chandlerc at gmail.com Wed Apr 11 11:09:05 2012 From: chandlerc at gmail.com (Chandler Carruth) Date: Wed, 11 Apr 2012 17:09:05 +0100 Subject: [llvm-commits] [llvm] r153812 - in /llvm/trunk: include/llvm/Analysis/ include/llvm/Transforms/IPO/ lib/Analysis/ lib/Transforms/IPO/ test/Transforms/Inline/ In-Reply-To: References: <20120331124242.93B692A6C065@llvm.org> Message-ID: On Wed, Apr 11, 2012 at 1:56 AM, Chandler Carruth wrote: > On Wed, Apr 11, 2012 at 12:31 AM, Chandler Carruth wrote: > >> I'll start looking for smoking guns right away though. > > > This looks very much like the previous cases where inliner changes caused > compile-time regressions. > > Looking at x86-64 of sqlite3, the profile with the trunk clang shows only > 3.5% of all the time in the inline cost analysis. That's a bit higher than > I would like (I've got some ideas to shrink it on two fronts that I will > implement right away), it's not likely responsible for the near 10% > regression your seeing; this function wasn't even free before. > > However, I'm seeing time spread pretty well between: JumpThreading, the > RA, CorrelatedValueProp, GVN, and InstCombine. This looks like increased > kicking in of the host of scalar optimizations giving us a broad slight > slowdown. > > I'm still working on doing before/after profile comparisons and other > things to see if I can tease out the culprit here. > > I also see several places where we can recoup a few percent in all > likelihood; I'll try to tackle those if I can. > Ok, thanks to Chad for helping me get set up to look at this. I've implemented the type of optimization that *should* help matters if the inline cost computation were the problem. I've attached the patch. It reduces the number of inline cost computations by 25% for sqlite3. I have a plan for how to make even more invasive changes to the inliner that could potentially save another 10% or so, but the alarming thing is that this patch has *zero* impact on the -O3 compile time of the sqlite3 bitcode. =/ However, if I tweak the inline cost computation to simple return higher costs, or to reject a larger percentage of the functions, I can immediately recoup all 9% regressions and a lot more. As far as I can tell, this is a symptom of the new inline cost metric exposing more (good) inlining opportunities, and the scalar optimizations in LLVM taking advantage of them, and chewing on the code more. The unfortunate thing is that we're not getting any significant runtime improvements out of this (or are we?). I think the only real solution is to work on making the various scalar optimizations less expensive. I see a few opportunities for this already after staring at the profile for a while. I'll try to look into those as I have time. =/ I wish I had a better answer here. Other ideas? Thoughts? I've attached the patch which caches some inline cost queries. As I said, it caches about 25% of them, at least on this test case. Even so, I'm not sure we should do it because it adds complexity and ugliness to the code. Let me know. -------------- next part -------------- An HTML attachment was scrubbed... URL: http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20120411/ef41390b/attachment-0001.html -------------- next part -------------- A non-text attachment was scrubbed... Name: cache-callercallers.diff Type: application/octet-stream Size: 5161 bytes Desc: not available Url : http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20120411/ef41390b/attachment-0001.obj From mcrosier at apple.com Wed Apr 11 11:40:56 2012 From: mcrosier at apple.com (Chad Rosier) Date: Wed, 11 Apr 2012 09:40:56 -0700 Subject: [llvm-commits] [llvm] r153812 - in /llvm/trunk: include/llvm/Analysis/ include/llvm/Transforms/IPO/ lib/Analysis/ lib/Transforms/IPO/ test/Transforms/Inline/ In-Reply-To: References: <20120331124242.93B692A6C065@llvm.org> Message-ID: <838533BF-FCE3-4C77-B6EA-3062A125F12F@apple.com> On Apr 11, 2012, at 9:09 AM, Chandler Carruth wrote: > On Wed, Apr 11, 2012 at 1:56 AM, Chandler Carruth wrote: > On Wed, Apr 11, 2012 at 12:31 AM, Chandler Carruth wrote: > I'll start looking for smoking guns right away though. > > This looks very much like the previous cases where inliner changes caused compile-time regressions. > > Looking at x86-64 of sqlite3, the profile with the trunk clang shows only 3.5% of all the time in the inline cost analysis. That's a bit higher than I would like (I've got some ideas to shrink it on two fronts that I will implement right away), it's not likely responsible for the near 10% regression your seeing; this function wasn't even free before. > > However, I'm seeing time spread pretty well between: JumpThreading, the RA, CorrelatedValueProp, GVN, and InstCombine. This looks like increased kicking in of the host of scalar optimizations giving us a broad slight slowdown. > > I'm still working on doing before/after profile comparisons and other things to see if I can tease out the culprit here. > > I also see several places where we can recoup a few percent in all likelihood; I'll try to tackle those if I can. > > Ok, thanks to Chad for helping me get set up to look at this. I've implemented the type of optimization that *should* help matters if the inline cost computation were the problem. I've attached the patch. It reduces the number of inline cost computations by 25% for sqlite3. I have a plan for how to make even more invasive changes to the inliner that could potentially save another 10% or so, but the alarming thing is that this patch has *zero* impact on the -O3 compile time of the sqlite3 bitcode. =/ However, if I tweak the inline cost computation to simple return higher costs, or to reject a larger percentage of the functions, I can immediately recoup all 9% regressions and a lot more. > > As far as I can tell, this is a symptom of the new inline cost metric exposing more (good) inlining opportunities, and the scalar optimizations in LLVM taking advantage of them, and chewing on the code more. The unfortunate thing is that we're not getting any significant runtime improvements out of this (or are we?). I saw improvements and regressions in both compile-time and execution-time. Either Daniel or I will send you more comprehensive numbers off-list. > > I think the only real solution is to work on making the various scalar optimizations less expensive. I see a few opportunities for this already after staring at the profile for a while. I'll try to look into those as I have time. =/ I wish I had a better answer here. Other ideas? Thoughts? I completely agree. No great ideas here, but feel free to file bug reports as you see opportunities for improvement.. > > I've attached the patch which caches some inline cost queries. As I said, it caches about 25% of them, at least on this test case. Even so, I'm not sure we should do it because it adds complexity and ugliness to the code. Let me know. > I'm fine with leaving the code as is and focusing on the other scalar optimizations. Chad -------------- next part -------------- An HTML attachment was scrubbed... URL: http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20120411/45ce163c/attachment.html From matthewbg at google.com Wed Apr 11 11:50:15 2012 From: matthewbg at google.com (Matt Beaumont-Gay) Date: Wed, 11 Apr 2012 09:50:15 -0700 Subject: [llvm-commits] [llvm] r154492 - in /llvm/trunk/lib: Analysis/InlineCost.cpp Transforms/IPO/Inliner.cpp In-Reply-To: <20120411101510.B10842A6C065@llvm.org> References: <20120411101510.B10842A6C065@llvm.org> Message-ID: On Wed, Apr 11, 2012 at 03:15, Chandler Carruth wrote: > Author: chandlerc > Date: Wed Apr 11 05:15:10 2012 > New Revision: 154492 > > URL: http://llvm.org/viewvc/llvm-project?rev=154492&view=rev > Log: > Add two statistics to help track how we are computing the inline cost. > > Yea, 'NumCallerCallersAnalyzed' isn't a great name, suggestions welcome. NumParentCallersAnalyzed (or NumCallerParentsAnalyzed)? From grosbach at apple.com Wed Apr 11 11:53:25 2012 From: grosbach at apple.com (Jim Grosbach) Date: Wed, 11 Apr 2012 16:53:25 -0000 Subject: [llvm-commits] [llvm] r154505 - in /llvm/trunk: lib/Target/ARM/ARMISelDAGToDAG.cpp lib/Target/ARM/ARMInstrNEON.td test/MC/ARM/neon-shuffle-encoding.s Message-ID: <20120411165325.750C02A6C065@llvm.org> Author: grosbach Date: Wed Apr 11 11:53:25 2012 New Revision: 154505 URL: http://llvm.org/viewvc/llvm-project?rev=154505&view=rev Log: ARM 'vzip.32 Dd, Dm' is a pseudo-instruction. While there is an encoding for it in VZIP, the result of that is undefined, so we should avoid it. Define the instruction as a pseudo for VTRN.32 instead, as the ARM ARM indicates. rdar://11221911 Modified: llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp llvm/trunk/lib/Target/ARM/ARMInstrNEON.td llvm/trunk/test/MC/ARM/neon-shuffle-encoding.s Modified: llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp?rev=154505&r1=154504&r2=154505&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp (original) +++ llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp Wed Apr 11 11:53:25 2012 @@ -2825,7 +2825,8 @@ case MVT::v8i8: Opc = ARM::VZIPd8; break; case MVT::v4i16: Opc = ARM::VZIPd16; break; case MVT::v2f32: - case MVT::v2i32: Opc = ARM::VZIPd32; break; + // vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm. + case MVT::v2i32: Opc = ARM::VTRNd32; break; case MVT::v16i8: Opc = ARM::VZIPq8; break; case MVT::v8i16: Opc = ARM::VZIPq16; break; case MVT::v4f32: Modified: llvm/trunk/lib/Target/ARM/ARMInstrNEON.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrNEON.td?rev=154505&r1=154504&r2=154505&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMInstrNEON.td (original) +++ llvm/trunk/lib/Target/ARM/ARMInstrNEON.td Wed Apr 11 11:53:25 2012 @@ -5388,7 +5388,9 @@ def VZIPd8 : N2VDShuffle<0b00, 0b00011, "vzip", "8">; def VZIPd16 : N2VDShuffle<0b01, 0b00011, "vzip", "16">; -def VZIPd32 : N2VDShuffle<0b10, 0b00011, "vzip", "32">; +// vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm. +def : NEONInstAlias<"vzip${p}.32 $Dd, $Dm", + (VTRNd32 DPR:$Dd, DPR:$Dm, pred:$p)>; def VZIPq8 : N2VQShuffle<0b00, 0b00011, IIC_VPERMQ3, "vzip", "8">; def VZIPq16 : N2VQShuffle<0b01, 0b00011, IIC_VPERMQ3, "vzip", "16">; Modified: llvm/trunk/test/MC/ARM/neon-shuffle-encoding.s URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/ARM/neon-shuffle-encoding.s?rev=154505&r1=154504&r2=154505&view=diff ============================================================================== --- llvm/trunk/test/MC/ARM/neon-shuffle-encoding.s (original) +++ llvm/trunk/test/MC/ARM/neon-shuffle-encoding.s Wed Apr 11 11:53:25 2012 @@ -59,6 +59,7 @@ vzip.8 q9, q8 vzip.16 q9, q8 vzip.32 q9, q8 + vzip.32 d2, d3 @ CHECK: vuzp.8 d17, d16 @ encoding: [0x20,0x11,0xf2,0xf3] @ CHECK: vuzp.16 d17, d16 @ encoding: [0x20,0x11,0xf6,0xf3] @@ -70,6 +71,7 @@ @ CHECK: vzip.8 q9, q8 @ encoding: [0xe0,0x21,0xf2,0xf3] @ CHECK: vzip.16 q9, q8 @ encoding: [0xe0,0x21,0xf6,0xf3] @ CHECK: vzip.32 q9, q8 @ encoding: [0xe0,0x21,0xfa,0xf3] +@ CHECK: vtrn.32 d2, d3 @ encoding: [0x83,0x20,0xba,0xf3] @ VTRN alternate size suffices From evan.cheng at apple.com Wed Apr 11 12:21:19 2012 From: evan.cheng at apple.com (Evan Cheng) Date: Wed, 11 Apr 2012 10:21:19 -0700 Subject: [llvm-commits] [llvm] r154480 - in /llvm/trunk: lib/Target/ARM/ARM.td lib/Target/ARM/ARMAsmPrinter.cpp lib/Target/ARM/ARMInstrInfo.td lib/Target/ARM/ARMInstrNEON.td lib/Target/ARM/ARMInstrVFP.td lib/Target/ARM/ARMSubtarget.cpp lib/Target/ARM/ARMS In-Reply-To: References: Message-ID: <5B4AB141-CB24-4D80-8C11-498DE74529F0@apple.com> On Apr 11, 2012, at 4:40 AM, Anton Korobeynikov wrote: > Hello Evan, > >> Clean up ARM fused multiply + add/sub support some more: rename some isel >> predicates. >> Also remove NEON2 since it's not really useful and it is confusing. If >> NEON + VFP4 implies NEON2 but NEON2 doesn't imply NEON + VFP4, what does it >> really mean? > There was the discussion about this in ML, consider checking it. I only saw this: > Here is an updated patch for the ARM fused multiply add/sub feature with the > relevant suggestions incorporated. > > In this update: > - I assumed neon2 does not imply vfpv4, but neon and vfpv4 imply neon2. Previously hasNEON2 is both a subtarget feature as well as a isel predicate: 'return HasNEON2 || (HasNEON && HasVFPv4);'. This is wrong because it would break the assembler and disassembler when you pass -mattr=+vfp4 (because HasNEON2 would still be false). Furthermore, since (HasNEON && HasVFPv4) implies HasNEON2, it means NEON2 is equal or is a subset of the features implied by NEON && HasVFPv4. Since currently there are no instructions that are predicated on HasVFPv4 should not be available for a target with only NEON2, there is really no point in having this subtarget feature. Also note: > - Regarding Neon2 flag, Code Sourcery GCC seems to support only these flags: > -mfpu=vfpv4 > -mfpu=vfpv4-d16 > -mfpu=neon-vfpv4 > -mfpu=vfpv3-d16-fp16 > -mfpu=vfpv3-fp16 There isn't a -mfpu=neon2 flag. Evan > > -- > With best regards, Anton Korobeynikov > Faculty of Mathematics and Mechanics, Saint Petersburg State University From atrick at apple.com Wed Apr 11 12:35:27 2012 From: atrick at apple.com (Andrew Trick) Date: Wed, 11 Apr 2012 17:35:27 -0000 Subject: [llvm-commits] [llvm] r154510 - /llvm/trunk/utils/TableGen/CodeGenRegisters.cpp Message-ID: <20120411173527.2A26A2A6C065@llvm.org> Author: atrick Date: Wed Apr 11 12:35:26 2012 New Revision: 154510 URL: http://llvm.org/viewvc/llvm-project?rev=154510&view=rev Log: TableGen'd regpressure: register unit set pruning. The pruning is more complete if it is not done incrementally. The code is also a tad less convluted. Modified: llvm/trunk/utils/TableGen/CodeGenRegisters.cpp Modified: llvm/trunk/utils/TableGen/CodeGenRegisters.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/utils/TableGen/CodeGenRegisters.cpp?rev=154510&r1=154509&r2=154510&view=diff ============================================================================== --- llvm/trunk/utils/TableGen/CodeGenRegisters.cpp (original) +++ llvm/trunk/utils/TableGen/CodeGenRegisters.cpp Wed Apr 11 12:35:26 2012 @@ -1167,8 +1167,7 @@ assert(RegClassUnitSets.empty() && "this invalidates RegClassUnitSets"); // Form an equivalence class of UnitSets with no significant difference. - // Populate PrunedUnitSets with each equivalence class's superset. - std::vector PrunedUnitSets; + std::vector SuperSetIDs; for (unsigned SubIdx = 0, EndIdx = RegUnitSets.size(); SubIdx != EndIdx; ++SubIdx) { const RegUnitSet &SubSet = RegUnitSets[SubIdx]; @@ -1176,25 +1175,22 @@ for (; SuperIdx != EndIdx; ++SuperIdx) { if (SuperIdx == SubIdx) continue; - const RegUnitSet *SuperSet = 0; - if (SuperIdx > SubIdx) - SuperSet = &RegUnitSets[SuperIdx]; - else { - // Compare with already-pruned sets. - if (SuperIdx >= PrunedUnitSets.size()) - continue; - SuperSet = &PrunedUnitSets[SuperIdx]; - } - if (isRegUnitSubSet(SubSet.Units, SuperSet->Units) - && (SubSet.Units.size() + 3 > SuperSet->Units.size())) { + + const RegUnitSet &SuperSet = RegUnitSets[SuperIdx]; + if (isRegUnitSubSet(SubSet.Units, SuperSet.Units) + && (SubSet.Units.size() + 3 > SuperSet.Units.size())) { break; } } - if (SuperIdx != EndIdx) - continue; - PrunedUnitSets.resize(PrunedUnitSets.size()+1); - PrunedUnitSets.back().Name = RegUnitSets[SubIdx].Name; - PrunedUnitSets.back().Units.swap(RegUnitSets[SubIdx].Units); + if (SuperIdx == EndIdx) + SuperSetIDs.push_back(SubIdx); + } + // Populate PrunedUnitSets with each equivalence class's superset. + std::vector PrunedUnitSets(SuperSetIDs.size()); + for (unsigned i = 0, e = SuperSetIDs.size(); i != e; ++i) { + unsigned SuperIdx = SuperSetIDs[i]; + PrunedUnitSets[i].Name = RegUnitSets[SuperIdx].Name; + PrunedUnitSets[i].Units.swap(RegUnitSets[SuperIdx].Units); } RegUnitSets.swap(PrunedUnitSets); } From grosbach at apple.com Wed Apr 11 12:40:18 2012 From: grosbach at apple.com (Jim Grosbach) Date: Wed, 11 Apr 2012 17:40:18 -0000 Subject: [llvm-commits] [llvm] r154511 - in /llvm/trunk: lib/Target/ARM/ARMISelDAGToDAG.cpp lib/Target/ARM/ARMInstrNEON.td test/MC/ARM/neon-shuffle-encoding.s Message-ID: <20120411174018.DFF4C2A6C065@llvm.org> Author: grosbach Date: Wed Apr 11 12:40:18 2012 New Revision: 154511 URL: http://llvm.org/viewvc/llvm-project?rev=154511&view=rev Log: ARM 'vuzp.32 Dd, Dm' is a pseudo-instruction. While there is an encoding for it in VUZP, the result of that is undefined, so we should avoid it. Define the instruction as a pseudo for VTRN.32 instead, as the ARM ARM indicates. rdar://11222366 Modified: llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp llvm/trunk/lib/Target/ARM/ARMInstrNEON.td llvm/trunk/test/MC/ARM/neon-shuffle-encoding.s Modified: llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp?rev=154511&r1=154510&r2=154511&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp (original) +++ llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp Wed Apr 11 12:40:18 2012 @@ -2845,7 +2845,8 @@ case MVT::v8i8: Opc = ARM::VUZPd8; break; case MVT::v4i16: Opc = ARM::VUZPd16; break; case MVT::v2f32: - case MVT::v2i32: Opc = ARM::VUZPd32; break; + // vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm. + case MVT::v2i32: Opc = ARM::VTRNd32; break; case MVT::v16i8: Opc = ARM::VUZPq8; break; case MVT::v8i16: Opc = ARM::VUZPq16; break; case MVT::v4f32: Modified: llvm/trunk/lib/Target/ARM/ARMInstrNEON.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrNEON.td?rev=154511&r1=154510&r2=154511&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMInstrNEON.td (original) +++ llvm/trunk/lib/Target/ARM/ARMInstrNEON.td Wed Apr 11 12:40:18 2012 @@ -5378,7 +5378,9 @@ def VUZPd8 : N2VDShuffle<0b00, 0b00010, "vuzp", "8">; def VUZPd16 : N2VDShuffle<0b01, 0b00010, "vuzp", "16">; -def VUZPd32 : N2VDShuffle<0b10, 0b00010, "vuzp", "32">; +// vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm. +def : NEONInstAlias<"vuzp${p}.32 $Dd, $Dm", + (VTRNd32 DPR:$Dd, DPR:$Dm, pred:$p)>; def VUZPq8 : N2VQShuffle<0b00, 0b00010, IIC_VPERMQ3, "vuzp", "8">; def VUZPq16 : N2VQShuffle<0b01, 0b00010, IIC_VPERMQ3, "vuzp", "16">; Modified: llvm/trunk/test/MC/ARM/neon-shuffle-encoding.s URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/ARM/neon-shuffle-encoding.s?rev=154511&r1=154510&r2=154511&view=diff ============================================================================== --- llvm/trunk/test/MC/ARM/neon-shuffle-encoding.s (original) +++ llvm/trunk/test/MC/ARM/neon-shuffle-encoding.s Wed Apr 11 12:40:18 2012 @@ -60,6 +60,7 @@ vzip.16 q9, q8 vzip.32 q9, q8 vzip.32 d2, d3 + vuzp.32 d2, d3 @ CHECK: vuzp.8 d17, d16 @ encoding: [0x20,0x11,0xf2,0xf3] @ CHECK: vuzp.16 d17, d16 @ encoding: [0x20,0x11,0xf6,0xf3] @@ -72,6 +73,7 @@ @ CHECK: vzip.16 q9, q8 @ encoding: [0xe0,0x21,0xf6,0xf3] @ CHECK: vzip.32 q9, q8 @ encoding: [0xe0,0x21,0xfa,0xf3] @ CHECK: vtrn.32 d2, d3 @ encoding: [0x83,0x20,0xba,0xf3] +@ CHECK: vtrn.32 d2, d3 @ encoding: [0x83,0x20,0xba,0xf3] @ VTRN alternate size suffices From apazos at codeaurora.org Wed Apr 11 13:13:47 2012 From: apazos at codeaurora.org (Ana Pazos) Date: Wed, 11 Apr 2012 11:13:47 -0700 Subject: [llvm-commits] [llvm] r154480 - in /llvm/trunk: lib/Target/ARM/ARM.td lib/Target/ARM/ARMAsmPrinter.cpp lib/Target/ARM/ARMInstrInfo.td lib/Target/ARM/ARMInstrNEON.td lib/Target/ARM/ARMInstrVFP.td lib/Target/ARM/ARMSubtarget.cpp lib/Target/ARM/ARMS In-Reply-To: <5B4AB141-CB24-4D80-8C11-498DE74529F0@apple.com> References: <5B4AB141-CB24-4D80-8C11-498DE74529F0@apple.com> Message-ID: <03ce01cd180e$d6b928c0$842b7a40$@codeaurora.org> Hi Evan, There was a long discussion on this mailing list about Neon2 when I submitted the patch. You can find them all by searching the archive. See some of the emails attached. Btw, you can add Neon2 support to clang instead of reusing neon-vfp4 flag which the reviewers did not like. Thanks, Ana. -----Original Message----- From: llvm-commits-bounces at cs.uiuc.edu [mailto:llvm-commits-bounces at cs.uiuc.edu] On Behalf Of Evan Cheng Sent: Wednesday, April 11, 2012 10:21 AM To: Anton Korobeynikov Cc: llvm-commits at cs.uiuc.edu Subject: Re: [llvm-commits] [llvm] r154480 - in /llvm/trunk: lib/Target/ARM/ARM.td lib/Target/ARM/ARMAsmPrinter.cpp lib/Target/ARM/ARMInstrInfo.td lib/Target/ARM/ARMInstrNEON.td lib/Target/ARM/ARMInstrVFP.td lib/Target/ARM/ARMSubtarget.cpp lib/Target/ARM/ARMS On Apr 11, 2012, at 4:40 AM, Anton Korobeynikov wrote: > Hello Evan, > >> Clean up ARM fused multiply + add/sub support some more: rename some >> isel predicates. >> Also remove NEON2 since it's not really useful and it is confusing. >> If NEON + VFP4 implies NEON2 but NEON2 doesn't imply NEON + VFP4, >> what does it really mean? > There was the discussion about this in ML, consider checking it. I only saw this: > Here is an updated patch for the ARM fused multiply add/sub feature > with the relevant suggestions incorporated. > > In this update: > - I assumed neon2 does not imply vfpv4, but neon and vfpv4 imply neon2. Previously hasNEON2 is both a subtarget feature as well as a isel predicate: 'return HasNEON2 || (HasNEON && HasVFPv4);'. This is wrong because it would break the assembler and disassembler when you pass -mattr=+vfp4 (because HasNEON2 would still be false). Furthermore, since (HasNEON && HasVFPv4) implies HasNEON2, it means NEON2 is equal or is a subset of the features implied by NEON && HasVFPv4. Since currently there are no instructions that are predicated on HasVFPv4 should not be available for a target with only NEON2, there is really no point in having this subtarget feature. Also note: > - Regarding Neon2 flag, Code Sourcery GCC seems to support only these flags: > -mfpu=vfpv4 > -mfpu=vfpv4-d16 > -mfpu=neon-vfpv4 > -mfpu=vfpv3-d16-fp16 > -mfpu=vfpv3-fp16 There isn't a -mfpu=neon2 flag. Evan > > -- > With best regards, Anton Korobeynikov > Faculty of Mathematics and Mechanics, Saint Petersburg State > University _______________________________________________ llvm-commits mailing list llvm-commits at cs.uiuc.edu http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits -------------- next part -------------- An embedded message was scrubbed... From: "James Molloy" Subject: RE: [llvm-commits] LLVM patch to support ARM fused multiply add/subtract instructions Date: Tue, 24 Jan 2012 02:01:15 -0700 Size: 3332 Url: http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20120411/e6b91e8f/attachment.mht -------------- next part -------------- An embedded message was scrubbed... From: "James Molloy" Subject: RE: [llvm-commits] LLVM patch to support ARM fused multiply add/subtract instructions Date: Tue, 24 Jan 2012 02:33:19 -0700 Size: 3037 Url: http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20120411/e6b91e8f/attachment-0001.mht -------------- next part -------------- An embedded message was scrubbed... From: "Anton Korobeynikov" Subject: Re: [llvm-commits] LLVM patch to support ARM fused multiply add/subtract instructions Date: Tue, 24 Jan 2012 03:39:12 -0700 Size: 3460 Url: http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20120411/e6b91e8f/attachment-0002.mht -------------- next part -------------- An embedded message was scrubbed... From: "James Molloy" Subject: Re: [llvm-commits] LLVM patch to support ARM fused multiply add/subtract instructions Date: Tue, 24 Jan 2012 03:47:19 -0700 Size: 4103 Url: http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20120411/e6b91e8f/attachment-0003.mht -------------- next part -------------- An embedded message was scrubbed... From: "James Molloy" Subject: RE: [llvm-commits] LLVM patch to support ARM fused multiply add/subtract instructions Date: Wed, 25 Jan 2012 02:27:18 -0700 Size: 3690 Url: http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20120411/e6b91e8f/attachment-0004.mht -------------- next part -------------- An embedded message was scrubbed... From: "Anton Korobeynikov" Subject: Re: [llvm-commits] LLVM patch to support ARM fused multiply add/subtract instructions Date: Wed, 25 Jan 2012 06:42:05 -0700 Size: 2588 Url: http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20120411/e6b91e8f/attachment-0005.mht From atrick at apple.com Wed Apr 11 13:16:28 2012 From: atrick at apple.com (Andrew Trick) Date: Wed, 11 Apr 2012 18:16:28 -0000 Subject: [llvm-commits] [llvm] r154518 - in /llvm/trunk: include/llvm/Target/TargetRegisterInfo.h utils/TableGen/CodeGenRegisters.cpp utils/TableGen/CodeGenRegisters.h utils/TableGen/RegisterInfoEmitter.cpp Message-ID: <20120411181628.DA99C2A6C065@llvm.org> Author: atrick Date: Wed Apr 11 13:16:28 2012 New Revision: 154518 URL: http://llvm.org/viewvc/llvm-project?rev=154518&view=rev Log: TableGen's regpressure: emit per-registerclass weight limits. Modified: llvm/trunk/include/llvm/Target/TargetRegisterInfo.h llvm/trunk/utils/TableGen/CodeGenRegisters.cpp llvm/trunk/utils/TableGen/CodeGenRegisters.h llvm/trunk/utils/TableGen/RegisterInfoEmitter.cpp Modified: llvm/trunk/include/llvm/Target/TargetRegisterInfo.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Target/TargetRegisterInfo.h?rev=154518&r1=154517&r2=154518&view=diff ============================================================================== --- llvm/trunk/include/llvm/Target/TargetRegisterInfo.h (original) +++ llvm/trunk/include/llvm/Target/TargetRegisterInfo.h Wed Apr 11 13:16:28 2012 @@ -202,6 +202,13 @@ bool inAllocatableClass; // Register belongs to an allocatable regclass. }; +/// Each TargetRegisterClass has a per register weight, and weight +/// limit which must be less than the limits of its pressure sets. +struct RegClassWeight { + unsigned RegWeigt; + unsigned WeightLimit; +}; + /// TargetRegisterInfo base class - We assume that the target defines a static /// array of TargetRegisterDesc objects that represent all of the machine /// registers that the target has. As such, we simply have to track a pointer @@ -509,7 +516,8 @@ } /// Get the weight in units of pressure for this register class. - virtual unsigned getRegClassWeight(const TargetRegisterClass *RC) const = 0; + virtual const RegClassWeight &getRegClassWeight( + const TargetRegisterClass *RC) const = 0; /// Get the number of dimensions of register pressure. virtual unsigned getNumRegPressureSets() const = 0; Modified: llvm/trunk/utils/TableGen/CodeGenRegisters.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/utils/TableGen/CodeGenRegisters.cpp?rev=154518&r1=154517&r2=154518&view=diff ============================================================================== --- llvm/trunk/utils/TableGen/CodeGenRegisters.cpp (original) +++ llvm/trunk/utils/TableGen/CodeGenRegisters.cpp Wed Apr 11 13:16:28 2012 @@ -722,6 +722,16 @@ Out.set((*I)->EnumValue); } +// Populate a unique sorted list of units from a register set. +void CodeGenRegisterClass::buildRegUnitSet( + std::vector &RegUnits) const { + std::vector TmpUnits; + for (RegUnitIterator UnitI(Members); UnitI.isValid(); ++UnitI) + TmpUnits.push_back(*UnitI); + std::sort(TmpUnits.begin(), TmpUnits.end()); + std::unique_copy(TmpUnits.begin(), TmpUnits.end(), + std::back_inserter(RegUnits)); +} //===----------------------------------------------------------------------===// // CodeGenRegBank @@ -1130,17 +1140,6 @@ } } -// Populate a unique sorted list of units from a register set. -static void buildRegUnitSet(const CodeGenRegister::Set &Regs, - std::vector &RegUnits) { - std::vector TmpUnits; - for (RegUnitIterator UnitI(Regs); UnitI.isValid(); ++UnitI) - TmpUnits.push_back(*UnitI); - std::sort(TmpUnits.begin(), TmpUnits.end()); - std::unique_copy(TmpUnits.begin(), TmpUnits.end(), - std::back_inserter(RegUnits)); -} - // Find a set in UniqueSets with the same elements as Set. // Return an iterator into UniqueSets. static std::vector::const_iterator @@ -1216,7 +1215,7 @@ RegUnitSets.back().Name = RegClasses[RCIdx]->getName(); // Compute a sorted list of units in this class. - buildRegUnitSet(RegClasses[RCIdx]->getMembers(), RegUnitSets.back().Units); + RegClasses[RCIdx]->buildRegUnitSet(RegUnitSets.back().Units); // Find an existing RegUnitSet. std::vector::const_iterator SetI = @@ -1279,7 +1278,7 @@ // Recompute the sorted list of units in this class. std::vector RegUnits; - buildRegUnitSet(RegClasses[RCIdx]->getMembers(), RegUnits); + RegClasses[RCIdx]->buildRegUnitSet(RegUnits); // Don't increase pressure for unallocatable regclasses. if (RegUnits.empty()) Modified: llvm/trunk/utils/TableGen/CodeGenRegisters.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/utils/TableGen/CodeGenRegisters.h?rev=154518&r1=154517&r2=154518&view=diff ============================================================================== --- llvm/trunk/utils/TableGen/CodeGenRegisters.h (original) +++ llvm/trunk/utils/TableGen/CodeGenRegisters.h Wed Apr 11 13:16:28 2012 @@ -279,6 +279,9 @@ // getOrder(0). const CodeGenRegister::Set &getMembers() const { return Members; } + // Populate a unique sorted list of units from a register set. + void buildRegUnitSet(std::vector &RegUnits) const; + CodeGenRegisterClass(CodeGenRegBank&, Record *R); // A key representing the parts of a register class used for forming @@ -449,6 +452,15 @@ return RegUnitWeights[RUID]; } + // Get the sum of unit weights. + unsigned getRegUnitSetWeight(const std::vector &Units) const { + unsigned Weight = 0; + for (std::vector::const_iterator + I = Units.begin(), E = Units.end(); I != E; ++I) + Weight += getRegUnitWeight(*I); + return Weight; + } + // Increase a RegUnitWeight. void increaseRegUnitWeight(unsigned RUID, unsigned Inc) { RegUnitWeights[RUID] += Inc; Modified: llvm/trunk/utils/TableGen/RegisterInfoEmitter.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/utils/TableGen/RegisterInfoEmitter.cpp?rev=154518&r1=154517&r2=154518&view=diff ============================================================================== --- llvm/trunk/utils/TableGen/RegisterInfoEmitter.cpp (original) +++ llvm/trunk/utils/TableGen/RegisterInfoEmitter.cpp Wed Apr 11 13:16:28 2012 @@ -125,19 +125,23 @@ unsigned NumSets = RegBank.getNumRegPressureSets(); OS << "/// Get the weight in units of pressure for this register class.\n" - << "unsigned " << ClassName << "::\n" + << "const RegClassWeight &" << ClassName << "::\n" << "getRegClassWeight(const TargetRegisterClass *RC) const {\n" - << " static const unsigned RCWeightTable[] = {\n"; + << " static const RegClassWeight RCWeightTable[] = {\n"; for (unsigned i = 0, e = NumRCs; i != e; ++i) { const CodeGenRegisterClass &RC = *RegBank.getRegClasses()[i]; const CodeGenRegister::Set &Regs = RC.getMembers(); if (Regs.empty()) - OS << " 0"; - else - OS << " " << (*Regs.begin())->getWeight(RegBank); - OS << ", \t// " << RC.getName() << "\n"; + OS << " {0, 0"; + else { + std::vector RegUnits; + RC.buildRegUnitSet(RegUnits); + OS << " {" << (*Regs.begin())->getWeight(RegBank) + << ", " << RegBank.getRegUnitSetWeight(RegUnits); + } + OS << "}, \t// " << RC.getName() << "\n"; } - OS << " 0 };\n" + OS << " {0, 0} };\n" << " return RCWeightTable[RC->getID()];\n" << "}\n\n"; @@ -153,12 +157,7 @@ << " static const unsigned PressureLimitTable[] = {\n"; for (unsigned i = 0; i < NumSets; ++i ) { const RegUnitSet &RegUnits = RegBank.getRegPressureSet(i); - unsigned Weight = 0; - for (RegUnitSet::iterator - I = RegUnits.Units.begin(), E = RegUnits.Units.end(); I != E; ++I) { - Weight += RegBank.getRegUnitWeight(*I); - } - OS << " " << Weight + OS << " " << RegBank.getRegUnitSetWeight(RegUnits.Units) << ", \t// " << i << ": " << RegBank.getRegPressureSet(i).Name << "\n"; } OS << " 0 };\n" @@ -668,7 +667,8 @@ << " const TargetRegisterClass *getMatchingSuperRegClass(" "const TargetRegisterClass*, const TargetRegisterClass*, " "unsigned) const;\n" - << " unsigned getRegClassWeight(const TargetRegisterClass *RC) const;\n" + << " const RegClassWeight &getRegClassWeight(" + << "const TargetRegisterClass *RC) const;\n" << " unsigned getNumRegPressureSets() const;\n" << " unsigned getRegPressureSetLimit(unsigned Idx) const;\n" << " const int *getRegClassPressureSets(" From eli.friedman at gmail.com Wed Apr 11 13:33:44 2012 From: eli.friedman at gmail.com (Eli Friedman) Date: Wed, 11 Apr 2012 11:33:44 -0700 Subject: [llvm-commits] [llvm] r153812 - in /llvm/trunk: include/llvm/Analysis/ include/llvm/Transforms/IPO/ lib/Analysis/ lib/Transforms/IPO/ test/Transforms/Inline/ In-Reply-To: References: <20120331124242.93B692A6C065@llvm.org> Message-ID: On Wed, Apr 11, 2012 at 9:09 AM, Chandler Carruth wrote: > On Wed, Apr 11, 2012 at 1:56 AM, Chandler Carruth > wrote: >> >> On Wed, Apr 11, 2012 at 12:31 AM, Chandler Carruth >> wrote: >>> >>> I'll start looking for smoking guns right away though. >> >> >> This looks very much like the previous cases where inliner changes caused >> compile-time regressions. >> >> Looking at x86-64 of sqlite3, the profile with the trunk clang shows only >> 3.5% of all the time in the inline cost analysis. That's a bit higher than I >> would like (I've got some ideas to shrink it on two fronts that I will >> implement right away), it's not likely responsible for the near 10% >> regression your seeing; this function wasn't even free before. >> >> However, I'm seeing time spread pretty well between: JumpThreading, the >> RA, CorrelatedValueProp, GVN, and InstCombine. This looks like increased >> kicking in of the host of scalar optimizations giving us a broad slight >> slowdown. >> >> I'm still working on doing before/after profile comparisons and other >> things to see if I can tease out the culprit here. >> >> I also see several places where we can recoup a few percent in all >> likelihood; I'll try to tackle those if I can. > > > Ok, thanks to Chad for helping me get set up to look at this. I've > implemented the type of optimization that *should* help matters if the > inline cost computation were the problem. I've attached the patch. It > reduces the number of inline cost computations by 25% for sqlite3. I have a > plan for how to make even more invasive changes to the inliner that could > potentially save another 10% or so, but the alarming thing is that this > patch has *zero* impact on the -O3 compile time of the sqlite3 bitcode. =/ > However, if I tweak the inline cost computation to simple return higher > costs, or to reject a larger percentage of the functions, I can immediately > recoup all 9% regressions and a lot more. If your patch consistently leads to lower computed inlining costs, perhaps we should lower the inlining threshold? -Eli From elena.demikhovsky at intel.com Wed Apr 11 14:12:21 2012 From: elena.demikhovsky at intel.com (Demikhovsky, Elena) Date: Wed, 11 Apr 2012 19:12:21 +0000 Subject: [llvm-commits] Review Request: VPERM optimization for AVX2 In-Reply-To: References: Message-ID: Craig, thank you for the review. CL means cross-lane. - Elena From: Craig Topper [mailto:craig.topper at gmail.com] Sent: Wednesday, April 11, 2012 08:52 To: Demikhovsky, Elena Cc: llvm-commits at cs.uiuc.edu Subject: Re: [llvm-commits] Review Request: VPERM optimization for AVX2 Functionality wise, this looks fine. Some style comments below. This comment is stale + // Handle 128 and 256-bit vector lengths. AVX defines PSHUF/SHUFP to operate + // independently on 128-bit lanes. + unsigned NumElts = VT.getVectorNumElements(); 80 columns + SDValue res = DAG.getNode(VT.isInteger()? X86ISD::VPERMD : X86ISD::VPERMPS, dl, VT, + DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i32, &permclMask[0], 8), V1); + return res; + } + if (V2IsUndef && HasAVX2 && (VT == MVT::v4i64 || VT == MVT::v4f64)) { + return getTargetShuffleNode(VT.isInteger()? X86ISD::VPERMQ : X86ISD::VPERMPD, dl, VT, V1, + getShuffleCLImmediate(SVOp), DAG); + } Dangling space after the else. What is the 0x80 value? The instruction only uses the lower 3-bits of each value. Probably cleaner codewise to make the value part conditional and not repeat the the push_back and getConstant calls twice. + if (M[i] < 0) + permclMask.push_back(DAG.getConstant(0x80, MVT::i32)); + else + permclMask.push_back(DAG.getConstant(M[i], MVT::i32)); + } What does the "CL" here stand for? +/// getShuffleCLImmediate - Return the appropriate immediate to shuffle +/// the specified VECTOR_SHUFFLE mask with VPERMQ and VPERMPD instructions. +/// Handles 256-bit. +static unsigned getShuffleCLImmediate(ShuffleVectorSDNode *N) { Either align the SDTShuff2Opl part across all 5 rows or remove the extra spaces from VPERMD/VPERMPS/VPERMQ def X86VPermilp : SDNode<"X86ISD::VPERMILP", SDTShuff2OpI>; +def X86VPermd : SDNode<"X86ISD::VPERMD", SDTShuff2Op>; +def X86VPermps : SDNode<"X86ISD::VPERMPS", SDTShuff2Op>; +def X86VPermq : SDNode<"X86ISD::VPERMQ", SDTShuff2OpI>; +def X86VPermpd : SDNode<"X86ISD::VPERMPD", SDTShuff2OpI>; ~Craig On Tue, Apr 10, 2012 at 4:22 A