From craig.topper at gmail.com Mon Nov 21 00:57:39 2011 From: craig.topper at gmail.com (Craig Topper) Date: Mon, 21 Nov 2011 06:57:39 -0000 Subject: [llvm-commits] [llvm] r145026 - in /llvm/trunk/lib/Target/X86: X86ISelLowering.cpp X86ISelLowering.h X86InstrFragmentsSIMD.td X86InstrSSE.td Message-ID: <20111121065740.1055B2A6C134@llvm.org> Author: ctopper Date: Mon Nov 21 00:57:39 2011 New Revision: 145026 URL: http://llvm.org/viewvc/llvm-project?rev=145026&view=rev Log: Add support for lowering 256-bit shuffles to VPUNPCKL/H for i16, i32, i64 if AVX2 is enabled. Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp llvm/trunk/lib/Target/X86/X86ISelLowering.h llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td llvm/trunk/lib/Target/X86/X86InstrSSE.td Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=145026&r1=145025&r2=145026&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original) +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Mon Nov 21 00:57:39 2011 @@ -2851,6 +2851,9 @@ case X86ISD::PUNPCKLBW: case X86ISD::PUNPCKLDQ: case X86ISD::PUNPCKLQDQ: + case X86ISD::VPUNPCKLWDY: + case X86ISD::VPUNPCKLDQY: + case X86ISD::VPUNPCKLQDQY: case X86ISD::UNPCKHPS: case X86ISD::UNPCKHPD: case X86ISD::VUNPCKHPSY: @@ -2859,6 +2862,9 @@ case X86ISD::PUNPCKHBW: case X86ISD::PUNPCKHDQ: case X86ISD::PUNPCKHQDQ: + case X86ISD::VPUNPCKHWDY: + case X86ISD::VPUNPCKHDQY: + case X86ISD::VPUNPCKHQDQY: case X86ISD::VPERMILPS: case X86ISD::VPERMILPSY: case X86ISD::VPERMILPD: @@ -2932,6 +2938,9 @@ case X86ISD::PUNPCKLBW: case X86ISD::PUNPCKLDQ: case X86ISD::PUNPCKLQDQ: + case X86ISD::VPUNPCKLWDY: + case X86ISD::VPUNPCKLDQY: + case X86ISD::VPUNPCKLQDQY: case X86ISD::UNPCKHPS: case X86ISD::UNPCKHPD: case X86ISD::VUNPCKHPSY: @@ -2940,6 +2949,9 @@ case X86ISD::PUNPCKHBW: case X86ISD::PUNPCKHDQ: case X86ISD::PUNPCKHQDQ: + case X86ISD::VPUNPCKHWDY: + case X86ISD::VPUNPCKHDQY: + case X86ISD::VPUNPCKHQDQY: return DAG.getNode(Opc, dl, VT, V1, V2); } return SDValue(); @@ -3550,13 +3562,14 @@ /// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to UNPCKL. static bool isUNPCKLMask(const SmallVectorImpl &Mask, EVT VT, - bool V2IsSplat = false) { + bool HasAVX2, bool V2IsSplat = false) { int NumElts = VT.getVectorNumElements(); assert((VT.is128BitVector() || VT.is256BitVector()) && "Unsupported vector type for unpckh"); - if (VT.getSizeInBits() == 256 && NumElts != 4 && NumElts != 8) + if (VT.getSizeInBits() == 256 && NumElts != 4 && NumElts != 8 && + (!HasAVX2 || NumElts != 16)) return false; // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate @@ -3590,22 +3603,23 @@ return true; } -bool X86::isUNPCKLMask(ShuffleVectorSDNode *N, bool V2IsSplat) { +bool X86::isUNPCKLMask(ShuffleVectorSDNode *N, bool HasAVX2, bool V2IsSplat) { SmallVector M; N->getMask(M); - return ::isUNPCKLMask(M, N->getValueType(0), V2IsSplat); + return ::isUNPCKLMask(M, N->getValueType(0), HasAVX2, V2IsSplat); } /// isUNPCKHMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to UNPCKH. static bool isUNPCKHMask(const SmallVectorImpl &Mask, EVT VT, - bool V2IsSplat = false) { + bool HasAVX2, bool V2IsSplat = false) { int NumElts = VT.getVectorNumElements(); assert((VT.is128BitVector() || VT.is256BitVector()) && "Unsupported vector type for unpckh"); - if (VT.getSizeInBits() == 256 && NumElts != 4 && NumElts != 8) + if (VT.getSizeInBits() == 256 && NumElts != 4 && NumElts != 8 && + (!HasAVX2 || NumElts != 16)) return false; // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate @@ -3637,10 +3651,10 @@ return true; } -bool X86::isUNPCKHMask(ShuffleVectorSDNode *N, bool V2IsSplat) { +bool X86::isUNPCKHMask(ShuffleVectorSDNode *N, bool HasAVX2, bool V2IsSplat) { SmallVector M; N->getMask(M); - return ::isUNPCKHMask(M, N->getValueType(0), V2IsSplat); + return ::isUNPCKHMask(M, N->getValueType(0), HasAVX2, V2IsSplat); } /// isUNPCKL_v_undef_Mask - Special case of isUNPCKLMask for canonical form @@ -4625,6 +4639,9 @@ case X86ISD::PUNPCKHWD: case X86ISD::PUNPCKHDQ: case X86ISD::PUNPCKHQDQ: + case X86ISD::VPUNPCKHWDY: + case X86ISD::VPUNPCKHDQY: + case X86ISD::VPUNPCKHQDQY: DecodePUNPCKHMask(NumElems, ShuffleMask); break; case X86ISD::UNPCKHPS: @@ -4637,6 +4654,9 @@ case X86ISD::PUNPCKLWD: case X86ISD::PUNPCKLDQ: case X86ISD::PUNPCKLQDQ: + case X86ISD::VPUNPCKLWDY: + case X86ISD::VPUNPCKLDQY: + case X86ISD::VPUNPCKLQDQY: DecodePUNPCKLMask(VT, ShuffleMask); break; case X86ISD::UNPCKLPS: @@ -6558,36 +6578,46 @@ X86::getShuffleSHUFImmediate(SVOp), DAG); } -static inline unsigned getUNPCKLOpcode(EVT VT) { +static inline unsigned getUNPCKLOpcode(EVT VT, bool HasAVX2) { switch(VT.getSimpleVT().SimpleTy) { case MVT::v4i32: return X86ISD::PUNPCKLDQ; case MVT::v2i64: return X86ISD::PUNPCKLQDQ; case MVT::v4f32: return X86ISD::UNPCKLPS; case MVT::v2f64: return X86ISD::UNPCKLPD; - case MVT::v8i32: // Use fp unit for int unpack. + case MVT::v8i32: + if (HasAVX2) return X86ISD::VPUNPCKLDQY; + // else use fp unit for int unpack. case MVT::v8f32: return X86ISD::VUNPCKLPSY; - case MVT::v4i64: // Use fp unit for int unpack. + case MVT::v4i64: + if (HasAVX2) return X86ISD::VPUNPCKLQDQY; + // else use fp unit for int unpack. case MVT::v4f64: return X86ISD::VUNPCKLPDY; case MVT::v16i8: return X86ISD::PUNPCKLBW; case MVT::v8i16: return X86ISD::PUNPCKLWD; + case MVT::v16i16: return X86ISD::VPUNPCKLWDY; default: llvm_unreachable("Unknown type for unpckl"); } return 0; } -static inline unsigned getUNPCKHOpcode(EVT VT) { +static inline unsigned getUNPCKHOpcode(EVT VT, bool HasAVX2) { switch(VT.getSimpleVT().SimpleTy) { case MVT::v4i32: return X86ISD::PUNPCKHDQ; case MVT::v2i64: return X86ISD::PUNPCKHQDQ; case MVT::v4f32: return X86ISD::UNPCKHPS; case MVT::v2f64: return X86ISD::UNPCKHPD; - case MVT::v8i32: // Use fp unit for int unpack. + case MVT::v8i32: + if (HasAVX2) return X86ISD::VPUNPCKHDQY; + // else use fp unit for int unpack. case MVT::v8f32: return X86ISD::VUNPCKHPSY; - case MVT::v4i64: // Use fp unit for int unpack. + case MVT::v4i64: + if (HasAVX2) return X86ISD::VPUNPCKHQDQY; + // else use fp unit for int unpack. case MVT::v4f64: return X86ISD::VUNPCKHPDY; case MVT::v16i8: return X86ISD::PUNPCKHBW; case MVT::v8i16: return X86ISD::PUNPCKHWD; + case MVT::v16i16: return X86ISD::VPUNPCKHWDY; default: llvm_unreachable("Unknown type for unpckh"); } @@ -6688,6 +6718,7 @@ bool V1IsSplat = false; bool V2IsSplat = false; bool HasXMMInt = Subtarget->hasXMMInt(); + bool HasAVX2 = Subtarget->hasAVX2(); MachineFunction &MF = DAG.getMachineFunction(); bool OptForSize = MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize); @@ -6717,9 +6748,11 @@ // NOTE: isPSHUFDMask can also match both masks below (unpckl_undef and // unpckh_undef). Only use pshufd if speed is more important than size. if (OptForSize && X86::isUNPCKL_v_undef_Mask(SVOp)) - return getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V1, V1, DAG); + return getTargetShuffleNode(getUNPCKLOpcode(VT, HasAVX2), dl, VT, V1, V1, + DAG); if (OptForSize && X86::isUNPCKH_v_undef_Mask(SVOp)) - return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V1, DAG); + return getTargetShuffleNode(getUNPCKHOpcode(VT, HasAVX2), dl, VT, V1, V1, + DAG); if (X86::isMOVDDUPMask(SVOp) && (Subtarget->hasSSE3() || Subtarget->hasAVX()) && @@ -6730,9 +6763,10 @@ return getMOVHighToLow(Op, dl, DAG); // Use to match splats - if (HasXMMInt && X86::isUNPCKHMask(SVOp) && V2IsUndef && + if (HasXMMInt && X86::isUNPCKHMask(SVOp, Subtarget->hasAVX2()) && V2IsUndef && (VT == MVT::v2f64 || VT == MVT::v2i64)) - return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V1, DAG); + return getTargetShuffleNode(getUNPCKHOpcode(VT, HasAVX2), dl, VT, V1, V1, + DAG); if (X86::isPSHUFDMask(SVOp)) { // The actual implementation will match the mask in the if above and then @@ -6779,7 +6813,8 @@ } // FIXME: fold these into legal mask. - if (X86::isMOVLHPSMask(SVOp) && !X86::isUNPCKLMask(SVOp)) + if (X86::isMOVLHPSMask(SVOp) && + !X86::isUNPCKLMask(SVOp, Subtarget->hasAVX2())) return getMOVLowToHigh(Op, dl, DAG, HasXMMInt); if (X86::isMOVHLPSMask(SVOp)) @@ -6832,11 +6867,13 @@ return getMOVL(DAG, dl, VT, V2, V1); } - if (X86::isUNPCKLMask(SVOp)) - return getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V1, V2, DAG); + if (X86::isUNPCKLMask(SVOp, Subtarget->hasAVX2())) + return getTargetShuffleNode(getUNPCKLOpcode(VT, HasAVX2), dl, VT, V1, V2, + DAG); - if (X86::isUNPCKHMask(SVOp)) - return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V2, DAG); + if (X86::isUNPCKHMask(SVOp, Subtarget->hasAVX2())) + return getTargetShuffleNode(getUNPCKHOpcode(VT, HasAVX2), dl, VT, V1, V2, + DAG); if (V2IsSplat) { // Normalize mask so all entries that point to V2 points to its first @@ -6845,9 +6882,9 @@ SDValue NewMask = NormalizeMask(SVOp, DAG); ShuffleVectorSDNode *NSVOp = cast(NewMask); if (NSVOp != SVOp) { - if (X86::isUNPCKLMask(NSVOp, true)) { + if (X86::isUNPCKLMask(NSVOp, Subtarget->hasAVX2(), true)) { return NewMask; - } else if (X86::isUNPCKHMask(NSVOp, true)) { + } else if (X86::isUNPCKHMask(NSVOp, Subtarget->hasAVX2(), true)) { return NewMask; } } @@ -6859,11 +6896,13 @@ SDValue NewOp = CommuteVectorShuffle(SVOp, DAG); ShuffleVectorSDNode *NewSVOp = cast(NewOp); - if (X86::isUNPCKLMask(NewSVOp)) - return getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V2, V1, DAG); - - if (X86::isUNPCKHMask(NewSVOp)) - return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V2, V1, DAG); + if (X86::isUNPCKLMask(NewSVOp, Subtarget->hasAVX2())) + return getTargetShuffleNode(getUNPCKLOpcode(VT, HasAVX2), dl, VT, V2, V1, + DAG); + + if (X86::isUNPCKHMask(NewSVOp, Subtarget->hasAVX2())) + return getTargetShuffleNode(getUNPCKHOpcode(VT, HasAVX2), dl, VT, V2, V1, + DAG); } // Normalize the node to match x86 shuffle ops if needed @@ -6904,9 +6943,11 @@ X86::getShuffleSHUFImmediate(SVOp), DAG); if (X86::isUNPCKL_v_undef_Mask(SVOp)) - return getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V1, V1, DAG); + return getTargetShuffleNode(getUNPCKLOpcode(VT, HasAVX2), dl, VT, V1, V1, + DAG); if (X86::isUNPCKH_v_undef_Mask(SVOp)) - return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V1, DAG); + return getTargetShuffleNode(getUNPCKHOpcode(VT, HasAVX2), dl, VT, V1, V1, + DAG); //===--------------------------------------------------------------------===// // Generate target specific nodes for 128 or 256-bit shuffles only @@ -11221,6 +11262,7 @@ case X86ISD::MOVSS: return "X86ISD::MOVSS"; case X86ISD::UNPCKLPS: return "X86ISD::UNPCKLPS"; case X86ISD::UNPCKLPD: return "X86ISD::UNPCKLPD"; + case X86ISD::VUNPCKLPSY: return "X86ISD::VUNPCKLPSY"; case X86ISD::VUNPCKLPDY: return "X86ISD::VUNPCKLPDY"; case X86ISD::UNPCKHPS: return "X86ISD::UNPCKHPS"; case X86ISD::UNPCKHPD: return "X86ISD::UNPCKHPD"; @@ -11228,10 +11270,16 @@ case X86ISD::PUNPCKLWD: return "X86ISD::PUNPCKLWD"; case X86ISD::PUNPCKLDQ: return "X86ISD::PUNPCKLDQ"; case X86ISD::PUNPCKLQDQ: return "X86ISD::PUNPCKLQDQ"; + case X86ISD::VPUNPCKLWDY: return "X86ISD::VPUNPCKLWDY"; + case X86ISD::VPUNPCKLDQY: return "X86ISD::VPUNPCKLDQY"; + case X86ISD::VPUNPCKLQDQY: return "X86ISD::VPUNPCKLQDQY"; case X86ISD::PUNPCKHBW: return "X86ISD::PUNPCKHBW"; case X86ISD::PUNPCKHWD: return "X86ISD::PUNPCKHWD"; case X86ISD::PUNPCKHDQ: return "X86ISD::PUNPCKHDQ"; case X86ISD::PUNPCKHQDQ: return "X86ISD::PUNPCKHQDQ"; + case X86ISD::VPUNPCKHWDY: return "X86ISD::VPUNPCKHWDY"; + case X86ISD::VPUNPCKHDQY: return "X86ISD::VPUNPCKHDQY"; + case X86ISD::VPUNPCKHQDQY: return "X86ISD::VPUNPCKHQDQY"; case X86ISD::VBROADCAST: return "X86ISD::VBROADCAST"; case X86ISD::VPERMILPS: return "X86ISD::VPERMILPS"; case X86ISD::VPERMILPSY: return "X86ISD::VPERMILPSY"; @@ -11356,8 +11404,8 @@ isPSHUFHWMask(M, VT) || isPSHUFLWMask(M, VT) || isPALIGNRMask(M, VT, Subtarget->hasSSSE3() || Subtarget->hasAVX()) || - isUNPCKLMask(M, VT) || - isUNPCKHMask(M, VT) || + isUNPCKLMask(M, VT, Subtarget->hasAVX2()) || + isUNPCKHMask(M, VT, Subtarget->hasAVX2()) || isUNPCKL_v_undef_Mask(M, VT) || isUNPCKH_v_undef_Mask(M, VT)); } @@ -14819,6 +14867,9 @@ case X86ISD::PUNPCKHWD: case X86ISD::PUNPCKHDQ: case X86ISD::PUNPCKHQDQ: + case X86ISD::VPUNPCKHWDY: + case X86ISD::VPUNPCKHDQY: + case X86ISD::VPUNPCKHQDQY: case X86ISD::UNPCKHPS: case X86ISD::UNPCKHPD: case X86ISD::VUNPCKHPSY: @@ -14827,6 +14878,9 @@ case X86ISD::PUNPCKLWD: case X86ISD::PUNPCKLDQ: case X86ISD::PUNPCKLQDQ: + case X86ISD::VPUNPCKLWDY: + case X86ISD::VPUNPCKLDQY: + case X86ISD::VPUNPCKLQDQY: case X86ISD::UNPCKLPS: case X86ISD::UNPCKLPD: case X86ISD::VUNPCKLPSY: Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.h?rev=145026&r1=145025&r2=145026&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.h (original) +++ llvm/trunk/lib/Target/X86/X86ISelLowering.h Mon Nov 21 00:57:39 2011 @@ -285,10 +285,16 @@ PUNPCKLWD, PUNPCKLDQ, PUNPCKLQDQ, + VPUNPCKLWDY, + VPUNPCKLDQY, + VPUNPCKLQDQY, PUNPCKHBW, PUNPCKHWD, PUNPCKHDQ, PUNPCKHQDQ, + VPUNPCKHWDY, + VPUNPCKHDQY, + VPUNPCKHQDQY, VPERMILPS, VPERMILPSY, VPERMILPD, @@ -414,11 +420,13 @@ /// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to UNPCKL. - bool isUNPCKLMask(ShuffleVectorSDNode *N, bool V2IsSplat = false); + bool isUNPCKLMask(ShuffleVectorSDNode *N, bool HasAVX2, + bool V2IsSplat = false); /// isUNPCKHMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to UNPCKH. - bool isUNPCKHMask(ShuffleVectorSDNode *N, bool V2IsSplat = false); + bool isUNPCKHMask(ShuffleVectorSDNode *N, bool HasAVX2, + bool V2IsSplat = false); /// isUNPCKL_v_undef_Mask - Special case of isUNPCKLMask for canonical form /// of vector_shuffle v, v, <0, 4, 1, 5>, i.e. vector_shuffle v, undef, Modified: llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td?rev=145026&r1=145025&r2=145026&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td (original) +++ llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td Mon Nov 21 00:57:39 2011 @@ -144,11 +144,17 @@ def X86Punpcklwd : SDNode<"X86ISD::PUNPCKLWD", SDTShuff2Op>; def X86Punpckldq : SDNode<"X86ISD::PUNPCKLDQ", SDTShuff2Op>; def X86Punpcklqdq : SDNode<"X86ISD::PUNPCKLQDQ", SDTShuff2Op>; +def X86Punpcklwdy : SDNode<"X86ISD::VPUNPCKLWDY", SDTShuff2Op>; +def X86Punpckldqy : SDNode<"X86ISD::VPUNPCKLDQY", SDTShuff2Op>; +def X86Punpcklqdqy : SDNode<"X86ISD::VPUNPCKLQDQY", SDTShuff2Op>; def X86Punpckhbw : SDNode<"X86ISD::PUNPCKHBW", SDTShuff2Op>; def X86Punpckhwd : SDNode<"X86ISD::PUNPCKHWD", SDTShuff2Op>; def X86Punpckhdq : SDNode<"X86ISD::PUNPCKHDQ", SDTShuff2Op>; def X86Punpckhqdq : SDNode<"X86ISD::PUNPCKHQDQ", SDTShuff2Op>; +def X86Punpckhwdy : SDNode<"X86ISD::VPUNPCKHWDY", SDTShuff2Op>; +def X86Punpckhdqy : SDNode<"X86ISD::VPUNPCKHDQY", SDTShuff2Op>; +def X86Punpckhqdqy : SDNode<"X86ISD::VPUNPCKHQDQY", SDTShuff2Op>; def X86VPermilps : SDNode<"X86ISD::VPERMILPS", SDTShuff2OpI>; def X86VPermilpsy : SDNode<"X86ISD::VPERMILPSY", SDTShuff2OpI>; @@ -423,12 +429,12 @@ def unpckl : PatFrag<(ops node:$lhs, node:$rhs), (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isUNPCKLMask(cast(N)); + return X86::isUNPCKLMask(cast(N), Subtarget->hasAVX2()); }]>; def unpckh : PatFrag<(ops node:$lhs, node:$rhs), (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isUNPCKHMask(cast(N)); + return X86::isUNPCKHMask(cast(N), Subtarget->hasAVX2()); }]>; def pshufd : PatFrag<(ops node:$lhs, node:$rhs), Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=145026&r1=145025&r2=145026&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original) +++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Mon Nov 21 00:57:39 2011 @@ -4242,9 +4242,9 @@ let Predicates = [HasAVX2] in { defm VPUNPCKLBW : sse2_unpack_y<0x60, "vpunpcklbw", v32i8, X86Punpcklbw, bc_v32i8>, VEX_4V; - defm VPUNPCKLWD : sse2_unpack_y<0x61, "vpunpcklwd", v16i16, X86Punpcklwd, + defm VPUNPCKLWD : sse2_unpack_y<0x61, "vpunpcklwd", v16i16, X86Punpcklwdy, bc_v16i16>, VEX_4V; - defm VPUNPCKLDQ : sse2_unpack_y<0x62, "vpunpckldq", v8i32, X86Punpckldq, + defm VPUNPCKLDQ : sse2_unpack_y<0x62, "vpunpckldq", v8i32, X86Punpckldqy, bc_v8i32>, VEX_4V; /// FIXME: we could eliminate this and use sse2_unpack_y instead if tblgen @@ -4252,19 +4252,19 @@ def VPUNPCKLQDQYrr : PDI<0x6C, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src1, VR256:$src2), "vpunpcklqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set VR256:$dst, (v4i64 (X86Punpcklqdq VR256:$src1, - VR256:$src2)))]>, VEX_4V; + [(set VR256:$dst, (v4i64 (X86Punpcklqdqy VR256:$src1, + VR256:$src2)))]>, VEX_4V; def VPUNPCKLQDQYrm : PDI<0x6C, MRMSrcMem, (outs VR256:$dst), (ins VR256:$src1, i256mem:$src2), "vpunpcklqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set VR256:$dst, (v4i64 (X86Punpcklqdq VR256:$src1, + [(set VR256:$dst, (v4i64 (X86Punpcklqdqy VR256:$src1, (memopv4i64 addr:$src2))))]>, VEX_4V; defm VPUNPCKHBW : sse2_unpack_y<0x68, "vpunpckhbw", v32i8, X86Punpckhbw, bc_v32i8>, VEX_4V; - defm VPUNPCKHWD : sse2_unpack_y<0x69, "vpunpckhwd", v16i16, X86Punpckhwd, + defm VPUNPCKHWD : sse2_unpack_y<0x69, "vpunpckhwd", v16i16, X86Punpckhwdy, bc_v16i16>, VEX_4V; - defm VPUNPCKHDQ : sse2_unpack_y<0x6A, "vpunpckhdq", v8i32, X86Punpckhdq, + defm VPUNPCKHDQ : sse2_unpack_y<0x6A, "vpunpckhdq", v8i32, X86Punpckhdqy, bc_v8i32>, VEX_4V; /// FIXME: we could eliminate this and use sse2_unpack_y instead if tblgen @@ -4272,12 +4272,12 @@ def VPUNPCKHQDQYrr : PDI<0x6D, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src1, VR256:$src2), "vpunpckhqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set VR256:$dst, (v4i64 (X86Punpckhqdq VR256:$src1, - VR256:$src2)))]>, VEX_4V; + [(set VR256:$dst, (v4i64 (X86Punpckhqdqy VR256:$src1, + VR256:$src2)))]>, VEX_4V; def VPUNPCKHQDQYrm : PDI<0x6D, MRMSrcMem, (outs VR256:$dst), (ins VR256:$src1, i256mem:$src2), "vpunpckhqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set VR256:$dst, (v4i64 (X86Punpckhqdq VR256:$src1, + [(set VR256:$dst, (v4i64 (X86Punpckhqdqy VR256:$src1, (memopv4i64 addr:$src2))))]>, VEX_4V; } From craig.topper at gmail.com Mon Nov 21 00:58:09 2011 From: craig.topper at gmail.com (Craig Topper) Date: Mon, 21 Nov 2011 06:58:09 -0000 Subject: [llvm-commits] [llvm] r145027 - /llvm/trunk/test/CodeGen/X86/avx2-unpack.ll Message-ID: <20111121065809.EB5232A6C134@llvm.org> Author: ctopper Date: Mon Nov 21 00:58:09 2011 New Revision: 145027 URL: http://llvm.org/viewvc/llvm-project?rev=145027&view=rev Log: Test case for r145026 Added: llvm/trunk/test/CodeGen/X86/avx2-unpack.ll Added: llvm/trunk/test/CodeGen/X86/avx2-unpack.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx2-unpack.ll?rev=145027&view=auto ============================================================================== --- llvm/trunk/test/CodeGen/X86/avx2-unpack.ll (added) +++ llvm/trunk/test/CodeGen/X86/avx2-unpack.ll Mon Nov 21 00:58:09 2011 @@ -0,0 +1,43 @@ +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s + +; CHECK: vpunpckhdq +define <8 x i32> @unpackhidq1(<8 x i32> %src1, <8 x i32> %src2) nounwind uwtable readnone ssp { +entry: + %shuffle.i = shufflevector <8 x i32> %src1, <8 x i32> %src2, <8 x i32> + ret <8 x i32> %shuffle.i +} + +; CHECK: vpunpckhqdq +define <4 x i64> @unpackhiqdq1(<4 x i64> %src1, <4 x i64> %src2) nounwind uwtable readnone ssp { +entry: + %shuffle.i = shufflevector <4 x i64> %src1, <4 x i64> %src2, <4 x i32> + ret <4 x i64> %shuffle.i +} + +; CHECK: vpunpckldq +define <8 x i32> @unpacklodq1(<8 x i32> %src1, <8 x i32> %src2) nounwind uwtable readnone ssp { +entry: + %shuffle.i = shufflevector <8 x i32> %src1, <8 x i32> %src2, <8 x i32> + ret <8 x i32> %shuffle.i +} + +; CHECK: vpunpcklqdq +define <4 x i64> @unpacklqdq1(<4 x i64> %src1, <4 x i64> %src2) nounwind uwtable readnone ssp { +entry: + %shuffle.i = shufflevector <4 x i64> %src1, <4 x i64> %src2, <4 x i32> + ret <4 x i64> %shuffle.i +} + +; CHECK: vpunpckhwd +define <16 x i16> @unpackhwd(<16 x i16> %src1, <16 x i16> %src2) nounwind uwtable readnone ssp { +entry: + %shuffle.i = shufflevector <16 x i16> %src1, <16 x i16> %src2, <16 x i32> + ret <16 x i16> %shuffle.i +} + +; CHECK: vpunpcklwd +define <16 x i16> @unpacklwd(<16 x i16> %src1, <16 x i16> %src2) nounwind uwtable readnone ssp { +entry: + %shuffle.i = shufflevector <16 x i16> %src1, <16 x i16> %src2, <16 x i32> + ret <16 x i16> %shuffle.i +} From craig.topper at gmail.com Mon Nov 21 02:26:50 2011 From: craig.topper at gmail.com (Craig Topper) Date: Mon, 21 Nov 2011 08:26:50 -0000 Subject: [llvm-commits] [llvm] r145028 - in /llvm/trunk: lib/Target/X86/X86ISelLowering.cpp lib/Target/X86/X86ISelLowering.h lib/Target/X86/X86InstrFragmentsSIMD.td lib/Target/X86/X86InstrSSE.td test/CodeGen/X86/avx2-unpack.ll Message-ID: <20111121082650.D32552A6C134@llvm.org> Author: ctopper Date: Mon Nov 21 02:26:50 2011 New Revision: 145028 URL: http://llvm.org/viewvc/llvm-project?rev=145028&view=rev Log: Lowering for v32i8 to VPUNPCKLBW/VPUNPCKHBW when AVX2 is enabled. Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp llvm/trunk/lib/Target/X86/X86ISelLowering.h llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td llvm/trunk/lib/Target/X86/X86InstrSSE.td llvm/trunk/test/CodeGen/X86/avx2-unpack.ll Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=145028&r1=145027&r2=145028&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original) +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Mon Nov 21 02:26:50 2011 @@ -2852,6 +2852,7 @@ case X86ISD::PUNPCKLDQ: case X86ISD::PUNPCKLQDQ: case X86ISD::VPUNPCKLWDY: + case X86ISD::VPUNPCKLBWY: case X86ISD::VPUNPCKLDQY: case X86ISD::VPUNPCKLQDQY: case X86ISD::UNPCKHPS: @@ -2863,6 +2864,7 @@ case X86ISD::PUNPCKHDQ: case X86ISD::PUNPCKHQDQ: case X86ISD::VPUNPCKHWDY: + case X86ISD::VPUNPCKHBWY: case X86ISD::VPUNPCKHDQY: case X86ISD::VPUNPCKHQDQY: case X86ISD::VPERMILPS: @@ -2939,6 +2941,7 @@ case X86ISD::PUNPCKLDQ: case X86ISD::PUNPCKLQDQ: case X86ISD::VPUNPCKLWDY: + case X86ISD::VPUNPCKLBWY: case X86ISD::VPUNPCKLDQY: case X86ISD::VPUNPCKLQDQY: case X86ISD::UNPCKHPS: @@ -2950,6 +2953,7 @@ case X86ISD::PUNPCKHDQ: case X86ISD::PUNPCKHQDQ: case X86ISD::VPUNPCKHWDY: + case X86ISD::VPUNPCKHBWY: case X86ISD::VPUNPCKHDQY: case X86ISD::VPUNPCKHQDQY: return DAG.getNode(Opc, dl, VT, V1, V2); @@ -3569,7 +3573,7 @@ "Unsupported vector type for unpckh"); if (VT.getSizeInBits() == 256 && NumElts != 4 && NumElts != 8 && - (!HasAVX2 || NumElts != 16)) + (!HasAVX2 || (NumElts != 16 && NumElts != 32))) return false; // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate @@ -3619,7 +3623,7 @@ "Unsupported vector type for unpckh"); if (VT.getSizeInBits() == 256 && NumElts != 4 && NumElts != 8 && - (!HasAVX2 || NumElts != 16)) + (!HasAVX2 || (NumElts != 16 && NumElts != 32))) return false; // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate @@ -4639,6 +4643,7 @@ case X86ISD::PUNPCKHWD: case X86ISD::PUNPCKHDQ: case X86ISD::PUNPCKHQDQ: + case X86ISD::VPUNPCKHBWY: case X86ISD::VPUNPCKHWDY: case X86ISD::VPUNPCKHDQY: case X86ISD::VPUNPCKHQDQY: @@ -4654,6 +4659,7 @@ case X86ISD::PUNPCKLWD: case X86ISD::PUNPCKLDQ: case X86ISD::PUNPCKLQDQ: + case X86ISD::VPUNPCKLBWY: case X86ISD::VPUNPCKLWDY: case X86ISD::VPUNPCKLDQY: case X86ISD::VPUNPCKLQDQY: @@ -6595,6 +6601,7 @@ case MVT::v16i8: return X86ISD::PUNPCKLBW; case MVT::v8i16: return X86ISD::PUNPCKLWD; case MVT::v16i16: return X86ISD::VPUNPCKLWDY; + case MVT::v32i8: return X86ISD::VPUNPCKLBWY; default: llvm_unreachable("Unknown type for unpckl"); } @@ -6618,6 +6625,7 @@ case MVT::v16i8: return X86ISD::PUNPCKHBW; case MVT::v8i16: return X86ISD::PUNPCKHWD; case MVT::v16i16: return X86ISD::VPUNPCKHWDY; + case MVT::v32i8: return X86ISD::VPUNPCKHBWY; default: llvm_unreachable("Unknown type for unpckh"); } @@ -11270,6 +11278,7 @@ case X86ISD::PUNPCKLWD: return "X86ISD::PUNPCKLWD"; case X86ISD::PUNPCKLDQ: return "X86ISD::PUNPCKLDQ"; case X86ISD::PUNPCKLQDQ: return "X86ISD::PUNPCKLQDQ"; + case X86ISD::VPUNPCKLBWY: return "X86ISD::VPUNPCKLBWY"; case X86ISD::VPUNPCKLWDY: return "X86ISD::VPUNPCKLWDY"; case X86ISD::VPUNPCKLDQY: return "X86ISD::VPUNPCKLDQY"; case X86ISD::VPUNPCKLQDQY: return "X86ISD::VPUNPCKLQDQY"; @@ -11277,6 +11286,7 @@ case X86ISD::PUNPCKHWD: return "X86ISD::PUNPCKHWD"; case X86ISD::PUNPCKHDQ: return "X86ISD::PUNPCKHDQ"; case X86ISD::PUNPCKHQDQ: return "X86ISD::PUNPCKHQDQ"; + case X86ISD::VPUNPCKHBWY: return "X86ISD::VPUNPCKHBWY"; case X86ISD::VPUNPCKHWDY: return "X86ISD::VPUNPCKHWDY"; case X86ISD::VPUNPCKHDQY: return "X86ISD::VPUNPCKHDQY"; case X86ISD::VPUNPCKHQDQY: return "X86ISD::VPUNPCKHQDQY"; @@ -14867,6 +14877,7 @@ case X86ISD::PUNPCKHWD: case X86ISD::PUNPCKHDQ: case X86ISD::PUNPCKHQDQ: + case X86ISD::VPUNPCKHBWY: case X86ISD::VPUNPCKHWDY: case X86ISD::VPUNPCKHDQY: case X86ISD::VPUNPCKHQDQY: @@ -14878,6 +14889,7 @@ case X86ISD::PUNPCKLWD: case X86ISD::PUNPCKLDQ: case X86ISD::PUNPCKLQDQ: + case X86ISD::VPUNPCKLBWY: case X86ISD::VPUNPCKLWDY: case X86ISD::VPUNPCKLDQY: case X86ISD::VPUNPCKLQDQY: Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.h?rev=145028&r1=145027&r2=145028&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.h (original) +++ llvm/trunk/lib/Target/X86/X86ISelLowering.h Mon Nov 21 02:26:50 2011 @@ -285,6 +285,7 @@ PUNPCKLWD, PUNPCKLDQ, PUNPCKLQDQ, + VPUNPCKLBWY, VPUNPCKLWDY, VPUNPCKLDQY, VPUNPCKLQDQY, @@ -292,6 +293,7 @@ PUNPCKHWD, PUNPCKHDQ, PUNPCKHQDQ, + VPUNPCKHBWY, VPUNPCKHWDY, VPUNPCKHDQY, VPUNPCKHQDQY, Modified: llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td?rev=145028&r1=145027&r2=145028&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td (original) +++ llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td Mon Nov 21 02:26:50 2011 @@ -144,6 +144,7 @@ def X86Punpcklwd : SDNode<"X86ISD::PUNPCKLWD", SDTShuff2Op>; def X86Punpckldq : SDNode<"X86ISD::PUNPCKLDQ", SDTShuff2Op>; def X86Punpcklqdq : SDNode<"X86ISD::PUNPCKLQDQ", SDTShuff2Op>; +def X86Punpcklbwy : SDNode<"X86ISD::VPUNPCKLBWY", SDTShuff2Op>; def X86Punpcklwdy : SDNode<"X86ISD::VPUNPCKLWDY", SDTShuff2Op>; def X86Punpckldqy : SDNode<"X86ISD::VPUNPCKLDQY", SDTShuff2Op>; def X86Punpcklqdqy : SDNode<"X86ISD::VPUNPCKLQDQY", SDTShuff2Op>; @@ -152,6 +153,7 @@ def X86Punpckhwd : SDNode<"X86ISD::PUNPCKHWD", SDTShuff2Op>; def X86Punpckhdq : SDNode<"X86ISD::PUNPCKHDQ", SDTShuff2Op>; def X86Punpckhqdq : SDNode<"X86ISD::PUNPCKHQDQ", SDTShuff2Op>; +def X86Punpckhbwy : SDNode<"X86ISD::VPUNPCKHBWY", SDTShuff2Op>; def X86Punpckhwdy : SDNode<"X86ISD::VPUNPCKHWDY", SDTShuff2Op>; def X86Punpckhdqy : SDNode<"X86ISD::VPUNPCKHDQY", SDTShuff2Op>; def X86Punpckhqdqy : SDNode<"X86ISD::VPUNPCKHQDQY", SDTShuff2Op>; Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=145028&r1=145027&r2=145028&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original) +++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Mon Nov 21 02:26:50 2011 @@ -4204,19 +4204,8 @@ bc_v8i16, 0>, VEX_4V; defm VPUNPCKLDQ : sse2_unpack<0x62, "vpunpckldq", v4i32, X86Punpckldq, bc_v4i32, 0>, VEX_4V; - - /// FIXME: we could eliminate this and use sse2_unpack instead if tblgen - /// knew to collapse (bitconvert VT to VT) into its operand. - def VPUNPCKLQDQrr : PDI<0x6C, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), - "vpunpcklqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set VR128:$dst, (v2i64 (X86Punpcklqdq VR128:$src1, - VR128:$src2)))]>, VEX_4V; - def VPUNPCKLQDQrm : PDI<0x6C, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), - "vpunpcklqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set VR128:$dst, (v2i64 (X86Punpcklqdq VR128:$src1, - (memopv2i64 addr:$src2))))]>, VEX_4V; + defm VPUNPCKLQDQ : sse2_unpack<0x6C, "vpunpcklqdq", v2i64, X86Punpcklqdq, + bc_v2i64, 0>, VEX_4V; defm VPUNPCKHBW : sse2_unpack<0x68, "vpunpckhbw", v16i8, X86Punpckhbw, bc_v16i8, 0>, VEX_4V; @@ -4224,99 +4213,40 @@ bc_v8i16, 0>, VEX_4V; defm VPUNPCKHDQ : sse2_unpack<0x6A, "vpunpckhdq", v4i32, X86Punpckhdq, bc_v4i32, 0>, VEX_4V; - - /// FIXME: we could eliminate this and use sse2_unpack instead if tblgen - /// knew to collapse (bitconvert VT to VT) into its operand. - def VPUNPCKHQDQrr : PDI<0x6D, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), - "vpunpckhqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set VR128:$dst, (v2i64 (X86Punpckhqdq VR128:$src1, - VR128:$src2)))]>, VEX_4V; - def VPUNPCKHQDQrm : PDI<0x6D, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), - "vpunpckhqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set VR128:$dst, (v2i64 (X86Punpckhqdq VR128:$src1, - (memopv2i64 addr:$src2))))]>, VEX_4V; + defm VPUNPCKHQDQ : sse2_unpack<0x6D, "vpunpckhqdq", v2i64, X86Punpckhqdq, + bc_v2i64, 0>, VEX_4V; } let Predicates = [HasAVX2] in { - defm VPUNPCKLBW : sse2_unpack_y<0x60, "vpunpcklbw", v32i8, X86Punpcklbw, + defm VPUNPCKLBW : sse2_unpack_y<0x60, "vpunpcklbw", v32i8, X86Punpcklbwy, bc_v32i8>, VEX_4V; defm VPUNPCKLWD : sse2_unpack_y<0x61, "vpunpcklwd", v16i16, X86Punpcklwdy, bc_v16i16>, VEX_4V; defm VPUNPCKLDQ : sse2_unpack_y<0x62, "vpunpckldq", v8i32, X86Punpckldqy, bc_v8i32>, VEX_4V; + defm VPUNPCKLQDQ : sse2_unpack_y<0x6C, "vpunpcklqdq", v4i64, X86Punpcklqdqy, + bc_v4i64>, VEX_4V; - /// FIXME: we could eliminate this and use sse2_unpack_y instead if tblgen - /// knew to collapse (bitconvert VT to VT) into its operand. - def VPUNPCKLQDQYrr : PDI<0x6C, MRMSrcReg, - (outs VR256:$dst), (ins VR256:$src1, VR256:$src2), - "vpunpcklqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set VR256:$dst, (v4i64 (X86Punpcklqdqy VR256:$src1, - VR256:$src2)))]>, VEX_4V; - def VPUNPCKLQDQYrm : PDI<0x6C, MRMSrcMem, - (outs VR256:$dst), (ins VR256:$src1, i256mem:$src2), - "vpunpcklqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set VR256:$dst, (v4i64 (X86Punpcklqdqy VR256:$src1, - (memopv4i64 addr:$src2))))]>, VEX_4V; - - defm VPUNPCKHBW : sse2_unpack_y<0x68, "vpunpckhbw", v32i8, X86Punpckhbw, + defm VPUNPCKHBW : sse2_unpack_y<0x68, "vpunpckhbw", v32i8, X86Punpckhbwy, bc_v32i8>, VEX_4V; defm VPUNPCKHWD : sse2_unpack_y<0x69, "vpunpckhwd", v16i16, X86Punpckhwdy, bc_v16i16>, VEX_4V; defm VPUNPCKHDQ : sse2_unpack_y<0x6A, "vpunpckhdq", v8i32, X86Punpckhdqy, bc_v8i32>, VEX_4V; - - /// FIXME: we could eliminate this and use sse2_unpack_y instead if tblgen - /// knew to collapse (bitconvert VT to VT) into its operand. - def VPUNPCKHQDQYrr : PDI<0x6D, MRMSrcReg, - (outs VR256:$dst), (ins VR256:$src1, VR256:$src2), - "vpunpckhqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set VR256:$dst, (v4i64 (X86Punpckhqdqy VR256:$src1, - VR256:$src2)))]>, VEX_4V; - def VPUNPCKHQDQYrm : PDI<0x6D, MRMSrcMem, - (outs VR256:$dst), (ins VR256:$src1, i256mem:$src2), - "vpunpckhqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set VR256:$dst, (v4i64 (X86Punpckhqdqy VR256:$src1, - (memopv4i64 addr:$src2))))]>, VEX_4V; + defm VPUNPCKHQDQ : sse2_unpack_y<0x6D, "vpunpckhqdq", v4i64, X86Punpckhqdqy, + bc_v4i64>, VEX_4V; } let Constraints = "$src1 = $dst" in { defm PUNPCKLBW : sse2_unpack<0x60, "punpcklbw", v16i8, X86Punpcklbw, bc_v16i8>; defm PUNPCKLWD : sse2_unpack<0x61, "punpcklwd", v8i16, X86Punpcklwd, bc_v8i16>; defm PUNPCKLDQ : sse2_unpack<0x62, "punpckldq", v4i32, X86Punpckldq, bc_v4i32>; - - /// FIXME: we could eliminate this and use sse2_unpack instead if tblgen - /// knew to collapse (bitconvert VT to VT) into its operand. - def PUNPCKLQDQrr : PDI<0x6C, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), - "punpcklqdq\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (v2i64 (X86Punpcklqdq VR128:$src1, VR128:$src2)))]>; - def PUNPCKLQDQrm : PDI<0x6C, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), - "punpcklqdq\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (v2i64 (X86Punpcklqdq VR128:$src1, - (memopv2i64 addr:$src2))))]>; + defm PUNPCKLQDQ : sse2_unpack<0x6C, "punpcklqdq", v2i64, X86Punpcklqdq, bc_v2i64>; defm PUNPCKHBW : sse2_unpack<0x68, "punpckhbw", v16i8, X86Punpckhbw, bc_v16i8>; defm PUNPCKHWD : sse2_unpack<0x69, "punpckhwd", v8i16, X86Punpckhwd, bc_v8i16>; defm PUNPCKHDQ : sse2_unpack<0x6A, "punpckhdq", v4i32, X86Punpckhdq, bc_v4i32>; - - /// FIXME: we could eliminate this and use sse2_unpack instead if tblgen - /// knew to collapse (bitconvert VT to VT) into its operand. - def PUNPCKHQDQrr : PDI<0x6D, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), - "punpckhqdq\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (v2i64 (X86Punpckhqdq VR128:$src1, VR128:$src2)))]>; - def PUNPCKHQDQrm : PDI<0x6D, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), - "punpckhqdq\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (v2i64 (X86Punpckhqdq VR128:$src1, - (memopv2i64 addr:$src2))))]>; + defm PUNPCKHQDQ : sse2_unpack<0x6D, "punpckhqdq", v2i64, X86Punpckhqdq, bc_v2i64>; } } // ExeDomain = SSEPackedInt Modified: llvm/trunk/test/CodeGen/X86/avx2-unpack.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx2-unpack.ll?rev=145028&r1=145027&r2=145028&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/X86/avx2-unpack.ll (original) +++ llvm/trunk/test/CodeGen/X86/avx2-unpack.ll Mon Nov 21 02:26:50 2011 @@ -41,3 +41,17 @@ %shuffle.i = shufflevector <16 x i16> %src1, <16 x i16> %src2, <16 x i32> ret <16 x i16> %shuffle.i } + +; CHECK: vpunpckhbw +define <32 x i8> @unpackhbw(<32 x i8> %src1, <32 x i8> %src2) nounwind uwtable readnone ssp { +entry: + %shuffle.i = shufflevector <32 x i8> %src1, <32 x i8> %src2, <32 x i32> + ret <32 x i8> %shuffle.i +} + +; CHECK: vpunpcklbw +define <32 x i8> @unpacklbw(<32 x i8> %src1, <32 x i8> %src2) nounwind uwtable readnone ssp { +entry: + %shuffle.i = shufflevector <32 x i8> %src1, <32 x i8> %src2, <32 x i32> + ret <32 x i8> %shuffle.i +} From hfinkel at anl.gov Mon Nov 21 11:55:08 2011 From: hfinkel at anl.gov (Hal Finkel) Date: Mon, 21 Nov 2011 11:55:08 -0600 Subject: [llvm-commits] [LLVMdev] [PATCH] BasicBlock Autovectorization Pass In-Reply-To: <4EC504B5.2020408@grosser.es> References: <1319909412.23036.851.camel@sapling> <1319914924.23036.852.camel@sapling> <1319919418.23036.881.camel@sapling> <1319928991.23036.957.camel@sapling> <1320108633.23036.1266.camel@sapling> <1320172356.23036.1298.camel@sapling> <4EB0462C.5010209@grosser.es> <1320184739.23036.1334.camel@sapling> <1320191694.23036.1497.camel@sapling> <1320749109.19359.76.camel@sapling> <4EB90E98.4010805@grosser.es> <1320762963.19359.117.camel@sapling> <4EB98207.2070807@grosser.es> <1320791390.19359.262.camel@sapling> <4EBC4B0F.6010609@grosser.es> <1321050998.19359.539.camel@sapling> <4EBDA7F9.9080709@grosser.es> <1321053083.19359.550.camel@sapling> <4EBDB1BF.7090006@grosser.es> <1321400339.19359.782.camel@sapling> <1321486739.19359.1067.camel@sapling> <4EC504B5.2020408@grosser.es> Message-ID: <1321898108.2507.36.camel@sapling> Tobias, I've attached an updated patch. It contains a few bug fixes and many (refactoring and coding-convention) changes inspired by your comments. I'm currently trying to fix the bug responsible for causing a compile failure when compiling test-suite/MultiSource/Applications/obsequi/toggle_move.c; after the pass begins to fuse instructions in a basic block in this file, the aliasing analysis starts producing different (more pessimistic) query answers. I've never before seen it do this, and I'm not sure why it is happening. Also odd, at the same time, the numeric labels that are assigned to otherwise unnamed instructions, change. I don't think I've seen this happen either (they generally seem to stay fixed once assigned). I don't know if these two things are related, but if anyone can provide some insight, I'd appreciate it. In any case, this version of the patch should be much more suitable for your (or anyone else's) further review. Thanks again, Hal On Thu, 2011-11-17 at 13:57 +0100, Tobias Grosser wrote: > On 11/17/2011 12:38 AM, Hal Finkel wrote: > > Tobias, et al., > > > > Attached is the my autovectorization pass. > > Very nice. Will you be at the developer summit? Maybe we could discuss > the integration there? > > Here a first review of the source code. > > > > diff --git a/docs/Passes.html b/docs/Passes.html > > index 5c42f3f..076effa 100644 > > --- a/docs/Passes.html > > +++ b/docs/Passes.html > > @@ -126,6 +126,7 @@ perl -e '$/ = undef; for (split(/\n/,<>)) { s:^ *///? ?::; print "

\n" if ! > > -adceAggressive Dead Code Elimination > > -always-inlineInliner for always_inline functions > > -argpromotionPromote 'by reference' arguments to scalars > > +-bb-vectorizeCombine instructions to vectorize within basic blocks > > Maybe 'Combine instructions to vector instructions within basic blocks' > > > diff --git a/include/llvm-c/Transforms/Vectorize.h b/include/llvm-c/Transforms/Vectorize.h > > new file mode 100644 > > index 0000000..497518a > > --- /dev/null > > +++ b/include/llvm-c/Transforms/Vectorize.h > > @@ -0,0 +1,36 @@ > > +/*===---------------------------Vectorize.h ------------------- -*- C++ -*-===*\ > > +|*===----------- Vectorization Transformation Library C Interface ---------===*| > > +|* *| > > +|* The LLVM Compiler Infrastructure *| > > +|* *| > > +|* This file is distributed under the University of Illinois Open Source *| > > +|* License. See LICENSE.TXT for details. *| > > +|* *| > > +|*===----------------------------------------------------------------------===*| > > +|* *| > > +|* This header declares the C interface to libLLVMScalarOpts.a, which *| > > +|* implements various scalar transformations of the LLVM IR. *| > > +|* *| > > +|* Many exotic languages can interoperate with C code but have a harder time *| > > +|* with C++ due to name mangling. So in addition to C, this interface enables *| > > +|* tools written in such languages. *| > > +|* *| > > +\*===----------------------------------------------------------------------===*/ > > This comment does not match the content of the file. > > > > > +static cl::opt > > +RunVectorization("vectorize", cl::desc("Run vectorization passes")); > > + > > PassManagerBuilder::PassManagerBuilder() { > > OptLevel = 2; > > SizeLevel = 0; > > @@ -38,6 +43,7 @@ PassManagerBuilder::PassManagerBuilder() { > > DisableSimplifyLibCalls = false; > > DisableUnitAtATime = false; > > DisableUnrollLoops = false; > > + Vectorize = RunVectorization; > > Integrating vectorization like this seems to work for now. However, it > does not seem to be 100% clean. I wonder if we could follow the other > flags here and set Vectorize = false in the constructor and add the > flags which enable this to the tools that use the PassManagerBuilder. > You may even had this or something similar before, because I remember > for your earlier patches -mllvm did not work for you in clang. If we > require the tools to explicitly set Vectorizer, we would neeed to add a > clang specific flag that could drive the vectorizer. I am not sure if we > want to do this at this stage or even at all. > > For now I would keep it like this. I don't have a better solution, but > wanted to write down my thoughts. > > > > } > > > > PassManagerBuilder::~PassManagerBuilder() { > > @@ -170,6 +176,14 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase&MPM) { > > > > addExtensionsToPM(EP_ScalarOptimizerLate, MPM); > > > > + if (Vectorize) { > > + MPM.add(createBBVectorizePass()); > > + if (OptLevel> 1) { > > + MPM.add(createInstructionCombiningPass()); > > + MPM.add(createGVNPass()); // Remove redundancies > > + } > > + } > > + > > What is the reason you will not enable this at -O1. LLVM runs even at > -O1 a number of instcombine and GVN passes. I don't see a strong reason > why we would not want to do this after vectorization (In case it is > known to be useful in general). > > > > +++ b/lib/Transforms/Vectorize/BBVectorize.cpp > > @@ -0,0 +1,1338 @@ > > > + typedef std::pair value_pair; > > + typedef std::pair vp_pair; > > + typedef std::pair::iterator, > > + std::multimap::iterator> vp_iterator_pair; > > + typedef std::pair::iterator, > > + std::multimap::iterator> > > + vpp_iterator_pair; > > http://llvm.org/docs/CodingStandards.html#ll_naming > > "Type names (including classes, structs, enums, typedefs, etc) should be > nouns and start with an upper-case letter (e.g. TextFileReader)." > > > > + void getCandPairs(unsigned vBits, BasicBlock&BB, > > + std::multimap &candPairs, > > + std::vector &pairableInsts); > > "Variable names should be nouns (as they represent state). The name > should be camel case, and start with an upper case letter (e.g. Leader > or Boats)." > > This happens at several places. Can you check your code for this. > > > + void replValues(BasicBlock&BB, > > + std::vector &pairableInsts, > > + DenseMap& chosenPairs); > 'repl' is no obvious abbreviation for me. Can you use the full word or > equivalent shorter one? > > > + > > + virtual bool runOnBasicBlock(BasicBlock&BB) { > > + // const TargetData *TD = getAnalysisIfAvailable(); > > + > > + bool changed = false; > > + // Iterate a sufficient number of times to merge types of > > + // size 1, then 2, then 4, etc. up to half of the > > + // target vector width of the target vector register. > Any reason you don't use the 80 columns? > > Also, why are you iterating here? > > > > + for (unsigned vBits = 2, n = 1; vBits<= VectorBits&& > > + (!MaxIter || n<= MaxIter); vBits *= 2, ++n) { > > + DEBUG(dbgs()<< "BBV: fusing loop #"<< n<< "...\n"); > > + if (vectorizePairs(VectorBits, BB)) { > > + changed = true; > > + } > > + else { > > + break; > > + } > > In general you should use '} else {'. Here braces are not needed at all. > > > + static inline VectorType *getVecType(Type *iType) { > > + if (iType->isVectorTy()) { > > + unsigned numElem = cast(iType)->getNumElements(); > > + return VectorType::get(iType->getScalarType(), numElem*2); > > + } > > + else { > > + return VectorType::get(iType, 2); > > + } > > + } > > + > > + // Note: when this function returns 0, the > > + // resulting instructions are not actually fused. > > Use all 80 columns. What is a depth factor? Can you explain in the > comment what this function calculates and not what happens when it is > used. If you want to keep the use case, just put is an example. > > > + static inline size_t depthFactor(Value *v) { > > + if (isa(v) || isa(v)) { > > + return 0; > > + } > No '{}' needed. > > > + > > + return 1; > > + } > > + > > + // Returns 1 if J accesses the memory directly after I; -1 if I accesses > > + // the memory directly after J; and 0 otherwise. > > + int getPairPtrInfo(Instruction *I, Instruction *J, const TargetData&TD, > > + Value *&Iptr, Value *&Jptr, unsigned&Ialign, unsigned&Jalign) { > > Also getPairPtrInfo() does not sound like a very descriptive name. What > about getOffset(). > > I am not sure about returning 0 for the remaining cases. I think > an offset of 0 is actually a very useful case, which can be implemented > as a scalar load plus a splat. What about returning the offset as a > reference value and return a boolean if it was successfully computed. > The other alternative I see is to use an enum that specifies the known > offsets and also includes an UNKNOWN value. > > > + if (isa(I)) { > > + Iptr = cast(I)->getPointerOperand(); > > + Jptr = cast(J)->getPointerOperand(); > > + Ialign = cast(I)->getAlignment(); > > + Jalign = cast(J)->getAlignment(); > > + } > > + else { > > + Iptr = cast(I)->getPointerOperand(); > > + Jptr = cast(J)->getPointerOperand(); > > + Ialign = cast(I)->getAlignment(); > > + Jalign = cast(J)->getAlignment(); > > + } > > Use '} else {'. Also you are just checking just the type of one > instruction. You may want to check both and add a call to > llvm_unreachable in case the types do not match. > > > + ScalarEvolution&SE = getAnalysis(); > > + const SCEV *IptrSCEV = SE.getSCEV(Iptr); > > + const SCEV *JptrSCEV = SE.getSCEV(Jptr); > > + > > + // If this is a trivial offset, then we'll get something > > + // like 1*sizeof(type). With target data, which we need > > + // anyway, this will get constant folded into a number. > Why don't you use the full 80 columns? > > > > + const SCEV *RelOffSCEV = SE.getMinusSCEV(JptrSCEV, IptrSCEV); > > What about just alling this 'Offset'. Those appreviations are hard to > read. > > > + if (const SCEVConstant *ConstOffSCEV = > > + dyn_cast(RelOffSCEV)) { > > + ConstantInt *IntOff = ConstOffSCEV->getValue(); > > + int64_t off = IntOff->getSExtValue(); > > + > > + Type *VTy = cast(Iptr->getType())->getElementType(); > > You should assert that the types of both pointers are indentical. > Also you should document that this function only works for vector types. > I actually expected it to just work for scalar types. Vector types look > a little bit wired here and I am actually not even sure if it is correct > for vector types. > > > + int64_t VTy_tss = (int64_t) TD.getTypeStoreSize(VTy); > > + > > + if (off == VTy_tss) { > > + return 1; > > + } else if (-off == VTy_tss) { > > + return -1; > > + } > Braces not needed. > > > + } > Did you think of using SE.getSizeOfExpr()? > > const SCEV *ElementSize = SE.getSizeofExpr(Iprt->getAllocType()) > const SCEV *ElementOffset = SE.getUDivExpr(RelOffSCEV, ElementSize); > > if (const SCEVConstant *ConstOffset = > dyn_cast(ElementOffset)) > return ConstOffset->getValue(); > > else > return "Unknown offset" > > > + bool BBVectorize::vectorizePairs(unsigned vBits, BasicBlock&BB) { > > + std::vector pairableInsts; > > + std::multimap candPairs; > > Variables should start with Uppercase letters. > > > + getCandPairs(vBits, BB, candPairs, pairableInsts); > > + if (!pairableInsts.size()) return false; > > + > > + // Now we have a map of all of the pairable instructions and we need to > > + // select the best possible pairing. A good pairing is one such that the > > + // users of the pair are also paired. This defines a (directed) forest > > + // over the pairs such that two pairs are connected iff the second pair > > + // uses the first. > > + > > + // Note that it only matters that both members of the second pair use some > > + // element of the first pair (to allow for splatting). > > + > > + std::multimap connPairs; > > + computeConnPairs(candPairs, pairableInsts, connPairs); > > + if (!connPairs.size()) return false; > > + > > + // Build the pairable-instruction dependency map > > + DenseSet pairableInstUsers; > > + buildDepMap(BB, candPairs, pairableInsts, pairableInstUsers); > > + > > + // There is now a graph of the connected pairs. For each variable, pick the > > + // pairing with the largest tree meeting the depth requirement on at least > > + // one branch. Then select all pairings that are part of that tree and > > + // remove them from the list of available parings and pairable variables. > > + > > + DenseMap chosenPairs; > > + choosePairs(candPairs, pairableInsts, connPairs, > > + pairableInstUsers, chosenPairs); > > + > > + if (!chosenPairs.size()) return false; > > + NumFusedOps += chosenPairs.size(); > > + > > + // A set of chosen pairs has now been selected. It is now necessary to > > + // replace the paired functions with vector functions. For this procedure > instructions with vector instructions. > > > + // each argument much be replaced with a vector argument. This vector > must > > > + // is formed by using build_vector on the old arguments. The replaced > > + // values are then replaced with a vector_extract on the result. > > + // Subsequent optimization passes should coalesce the build/extract > > + // combinations. > > + > > + replValues(BB, pairableInsts, chosenPairs); > > + > > + return true; > > + } > > + > > + void BBVectorize::getCandPairs(unsigned vBits, BasicBlock&BB, > > + std::multimap &candPairs, > > + std::vector &pairableInsts) { > This function is too big. It does not even fit on my large screen. Can > you extract sub functons. E.g. isValidInst() > > > + AliasAnalysis&AA = getAnalysis(); > > + BasicBlock::iterator E = BB.end(); > > + for (BasicBlock::iterator I = BB.getFirstInsertionPt(); I != E; ++I) { > > The common pattern is: > > for (BasicBlock::iterator I = BB.getFirstInsertionPt(), E.BB.end(); I != E; > ++I) { > > BTW, why don't you start at BB.begin()? > > > + bool isGoodIntr = false; > > + if (isa(I)) { > > + if (Function *F = cast(I)->getCalledFunction()) { > > if (CallInst *CallInst = dyn_cast(I)) { > if (Function *F = CallInst->getCalledFunction()) { > > > + if (unsigned IID = F->getIntrinsicID()) { > > + switch(IID) { > > + case Intrinsic::sqrt: > > + case Intrinsic::powi: > > + case Intrinsic::sin: > > + case Intrinsic::cos: > > + case Intrinsic::log: > > + case Intrinsic::log2: > > + case Intrinsic::log10: > > + case Intrinsic::exp: > > + case Intrinsic::exp2: > > + case Intrinsic::pow: > > + isGoodIntr = !NoMath; > Is the fallthrough intended here? > > > + case Intrinsic::fma: > > + isGoodIntr = !NoFMA; > I would also put a break here. > > What happends in the default case or do you cover all intrinsics.? > > + } > > + } > > + } > Most of these '{}' are not needed. > > > + } > > + > > + // Vectorize simple loads and stores if possbile: > > + bool isLdStr = false; > > IsSimpleLoad? > > > + if (isa(I)) { > > + isLdStr = cast(I)->isSimple(); > > + } else if (isa(I)) { > > + isLdStr = cast(I)->isSimple(); > > + } > > if (LoadInst *Load = dyn_cast(I)) { > isLdStr = Load->isSimple(); > } else if (StoreInst *Store = dyn_cast(I)) { > isLdStr = Store->isSimple(); > } > > > + > > + // We can vectorize casts, but not casts of pointer types, etc. > > + bool isCast = false; > > + if (I->isCast()) { > > + isCast = true; > > + if (!cast(I)->getSrcTy()->isSingleValueType()) { > > + isCast = false; > > + } else if (!cast(I)->getDestTy()->isSingleValueType()) { > > + isCast = false; > > + } else if (cast(I)->getSrcTy()->isPointerTy()) { > > + isCast = false; > > + } else if (cast(I)->getDestTy()->isPointerTy()) { > > + isCast = false; > > + } > > + } > > + > > + if (!(I->isBinaryOp() || isa(I) || > > + isa(I) || isa(I) || > > + (!NoCasts&& isCast) || isGoodIntr || > > + (!NoMemOps&& isLdStr))) { > > + continue; > > + } > > + > > + // We can't vectorize memory operations without target data > > + if (AA.getTargetData() == 0&& isLdStr) { > > + continue; > > + } > > + > > + Type *T1, *T2; > > + if (isa(I)) { > > + // For stores, it is the value type, not the pointer type that matters > > + // because the value is what will come from a vector register. > > + > > + Value *Ival = cast(I)->getValueOperand(); > > + T1 = Ival->getType(); > > + } > > + else { > > + T1 = I->getType(); > > + } > > + > > + if (I->isCast()) { > > + T2 = cast(I)->getSrcTy(); > > + } > > + else { > > + T2 = T1; > > + } > > + > > + // Not every type can be vectorized... > > + if (!(VectorType::isValidElementType(T1) || T1->isVectorTy()) || > > + !(VectorType::isValidElementType(T2) || T2->isVectorTy())) { > > + continue; > > + } > > + > > + if (NoInts&& (T1->isIntOrIntVectorTy() || T2->isIntOrIntVectorTy())) { > > + continue; > > + } > > + > > + if (NoFloats&& (T1->isFPOrFPVectorTy() || T2->isFPOrFPVectorTy())) { > > + continue; > > + } > > + > > + if (T1->getPrimitiveSizeInBits()> vBits/2 || > > + T2->getPrimitiveSizeInBits()> vBits/2) { > > + continue; > > + } > > + > > + // Look for an instruction with which to pair instruction *I... > This could be a separate function, no? > > > + DenseSet users; > > + AliasSetTracker writes(AA); > > + BasicBlock::iterator J = I; ++J; > > + for (unsigned ss = 0; J != E&& ss<= SearchLimit; ++J, ++ss) { > > + // Determine if J uses I, if so, exit the loop. > > This loop is way to big. You can extract here also a couple of > subfunctions. I will review this after this in etail more readable. > > + bool usesI = false; > > + for (User::op_iterator i = J->op_begin(), e = J->op_end(); > > + i != e; ++i) { > > + Value *v = *i; > > + if (I == v || > > + (!FastDep&& users.count(v))) { > > Why a newline here? It should not break the 80 col limit. > > > + usesI = true; break; > One instruction per line, please. > > > + } > > + } > > + if (!usesI&& J->mayReadFromMemory()) { > > + for (AliasSetTracker::iterator i = writes.begin(), e = writes.end(); > > + i != e; ++i) { > > + for (AliasSet::iterator j = i->begin(), e2 = i->end(); > > + j != e2; ++j) { > > + AliasAnalysis::Location ptrLoc(j->getValue(), j->getSize(), > > + j->getTBAAInfo()); > > + if (AA.getModRefInfo(J, ptrLoc) != AliasAnalysis::NoModRef) { > > + usesI = true; break; > One instruction per line. > > > + } > > + } > > + if (usesI) break; > > + } > > + } > Many of these braces are not necessarily needed. In case this function > would be smaller they would also not be required for readability. > > > + if (FastDep) { > > + // Note: For this heuristic to be effective, independent operations > > + // must tend to be intermixed. This is likely to be true from some > > + // kinds of loop unrolling (but not the generic LLVM pass), > > + // but otherwise may require some kind of reordering pass. > > What does it mean that independent operations must be intermixed? Can > you give an example? > > > + > > + // When using fast dependency analysis, > > + // stop searching after first use: > > + if (usesI) break; > > + } > > + else { > > + if (usesI) { > > } else if (usesI) { > > > + if (J->mayWriteToMemory()) writes.add(J); > > + users.insert(J); > > + continue; > > + } > > + } > > + > > + // J does not use I, and comes before the first use of I, so it can be > > + // merged with I if the instructions are compatible. > > + bool isCompat = J->isSameOperationAs(I); > > + // FIXME: handle addsub-type operations! > > + > > + // Only merge two shuffles if they're both constant > > + // or both not constant. > > + if (isCompat&& isa(I)) { > > + isCompat = isa(I->getOperand(2))&& > > + isa(J->getOperand(2)); > > + // FIXME: We may want to vectorize non-constant shuffles also. > > + } > > + > > + // Loads and stores can be merged if they have different alignments, > > + // but are otherwise the same. > > + if (!isCompat&& isa(I)&& isa(J)) { > > + if (I->getType() == J->getType()) { > > + if (cast(I)->getPointerOperand()->getType() == > > + cast(J)->getPointerOperand()->getType()&& > > + cast(I)->isVolatile() == > > + cast(J)->isVolatile()&& > > + cast(I)->getOrdering() == > > + cast(J)->getOrdering()&& > > + cast(I)->getSynchScope() == > > + cast(J)->getSynchScope() > > + ) { > > + isCompat = true; > > + } > > + } > > + } else if (!isCompat&& isa(I)&& isa(J)) { > > + if (cast(I)->getValueOperand()->getType() == > > + cast(J)->getValueOperand()->getType()&& > > + cast(I)->getPointerOperand()->getType() == > > + cast(J)->getPointerOperand()->getType()&& > > + cast(I)->isVolatile() == > > + cast(J)->isVolatile()&& > > + cast(I)->getOrdering() == > > + cast(J)->getOrdering()&& > > + cast(I)->getSynchScope() == > > + cast(J)->getSynchScope() > > + ) { > > + isCompat = true; > > + } > > + } > > + > > + if (isCompat&& isLdStr) { > > + const TargetData&TD = *AA.getTargetData(); > > + > > + Value *Iptr, *Jptr; > > + unsigned Ialign, Jalign; > > + int rel = getPairPtrInfo(I, J, TD, Iptr, Jptr, Ialign, Jalign); > > Use uppercase letters. What does 'rel' mean? Can you use a more > descriptive name? > > > + > > + if (rel != 0) { > > + if (AlignedOnly) { > > + Type *aType = isa(I) ? > > + cast(I)->getValueOperand()->getType() : I->getType(); > > + // An aligned load or store is possible only if the instruction > > + // with the lower offset has an alignment suitable for the > > + // vector type. > > + > > + unsigned balign = Ialign; > > Uppercase letter. > > > + if (rel< 0) balign = Jalign; > > + > > + Type *vType = getVecType(aType); > > + unsigned vecalign = TD.getPrefTypeAlignment(vType); > > Uppercase. > > > + if (balign< vecalign) { > > + isCompat = false; > > This could just be a continue (or a return false if extracted to a > separate function. Also no braces needed. > > > + } > > + } > > + } > > + else { > > } else { > > > + isCompat = false; > This could just be a continue (or a return false if extracted to a > separate function. > > > > + } > > + } > > + > > + if (!isCompat) continue; > > + > > + // J is a candidate for merging with I. > > + if (!pairableInsts.size() || > > + pairableInsts[pairableInsts.size()-1] != I) { > > + pairableInsts.push_back(I); > > + } > > No braces needed. > > > + candPairs.insert(value_pair(I, J)); > > + DEBUG(dbgs()<< "BBV: candidate pair "<< *I<< > > + "<-> "<< *J<< "\n"); > > + } > > + } > > + > > + DEBUG(dbgs()<< "BBV: found "<< pairableInsts.size() > > +<< " instructions with candidate pairs\n"); > > + } > > + > > + void BBVectorize::computeConnPairs( > > + std::multimap &candPairs, > > + std::vector &pairableInsts, > > + std::multimap &connPairs) { > Uppercase names for arguments. Also, the function is way too long. Can > you extract smaller helper functions. I will review this then. > > > + > > + for (std::vector::iterator i = pairableInsts.begin(), > > + e = pairableInsts.end(); i != e; ++i) { > > + vp_iterator_pair choiceRange = candPairs.equal_range(*i); > > + > > + for (std::multimap::iterator j = choiceRange.first; > > + j != choiceRange.second; ++j) { > > + > > + // For each possible pairing for this variable, look at the uses of > > + // the first value... > > + for (Value::use_iterator I = j->first->use_begin(), > > + E = j->first->use_end(); I != E; ++I) { > > + vp_iterator_pair iPairRange = candPairs.equal_range(*I); > > + > > + // For each use of the first variable, look for uses of the second > > + // variable... > > + for (Value::use_iterator J = j->second->use_begin(), > > + E2 = j->second->use_end(); J != E2; ++J) { > > + vp_iterator_pair jPairRange = candPairs.equal_range(*J); > > + > > + // Look for: > > + for (std::multimap::iterator k = iPairRange.first; > > + k != iPairRange.second; ++k) { > > + if (k->second == *J) { > > + connPairs.insert(vp_pair(*j, value_pair(*I, *J))); > > + break; > > + } > > + } > > + // Look for: > > + for (std::multimap::iterator k = jPairRange.first; > > + k != jPairRange.second; ++k) { > > + if (k->second == *I) { > > + connPairs.insert(vp_pair(*j, value_pair(*J, *I))); > > + break; > > + } > > + } > > + } > > + > > + // Look for cases where just the first value in the pair is used by > > + // both members of another pair (splatting). > > + Value::use_iterator J = j->first->use_begin(); > > + if (!SplatBreaksChain) for (; J != E; ++J) { > > + for (std::multimap::iterator k = iPairRange.first; > > + k != iPairRange.second; ++k) { > > + if (k->second == *J) { > > + connPairs.insert(vp_pair(*j, value_pair(*I, *J))); > > + break; > > + } > > + } > > + } > > + } > > + > > + // Look for cases where just the second value in the pair is used by > > + // both members of another pair (splatting). > > + if (!SplatBreaksChain) > > + for (Value::use_iterator I = j->second->use_begin(), > > + E = j->second->use_end(); I != E; ++I) { > > + vp_iterator_pair iPairRange = candPairs.equal_range(*I); > > + > > + Value::use_iterator J = j->second->use_begin(); > > + for (; J != E; ++J) { > > + for (std::multimap::iterator k = iPairRange.first; > > + k != iPairRange.second; ++k) { > > + if (k->second == *J) { > > + connPairs.insert(vp_pair(*j, value_pair(*I, *J))); > > + break; > > + } > > + } > > + } > > + } > > + } > > + } > > + > > + DEBUG(dbgs()<< "BBV: found "<< connPairs.size() > > +<< " pair connections.\n"); > > + } > > > > + void BBVectorize::buildDepMap( > > + BasicBlock&BB, > > + std::multimap &candPairs, > > + std::vector &pairableInsts, > > + DenseSet &pairableInstUsers) { > Uppercase variable names. > > > + DenseSet isInPair; > > + for (std::multimap::iterator i = candPairs.begin(), > > + e = candPairs.end(); i != e; ++i) { > > Uppercase letters are used in LLVM for iterators. > > > + isInPair.insert(i->first); isInPair.insert(i->second); > > + } > > No braces needed. > > > + > > + // Iterate through the basic block, recording all users of each > > + // pairable instruction. > > + > > + AliasAnalysis&AA = getAnalysis(); > > + BasicBlock::iterator E = BB.end(); > > + for (BasicBlock::iterator I = BB.getFirstInsertionPt(); I != E; ++I) { > > + if (isInPair.find(I) == isInPair.end()) continue; > > + > > + DenseSet users; > > + AliasSetTracker writes(AA); > > + BasicBlock::iterator J = I; ++J; > > + for (; J != E; ++J) { > > + // Determine if J uses I, if so, exit the loop. > Just extract this into a small helper function. No need to keep it here. > > > + bool usesI = false; > > + for (User::op_iterator i = J->op_begin(), e = J->op_end(); > > + i != e; ++i) { > > + Value *v = *i; > > + if (I == v || users.count(v)) { > > + usesI = true; break; > > + } > > + } > > + if (!usesI&& J->mayReadFromMemory()) { > > And this one as well > > + for (AliasSetTracker::iterator i = writes.begin(), e = writes.end(); > > + i != e; ++i) { > > + for (AliasSet::iterator j = i->begin(), e2 = i->end(); > > + j != e2; ++j) { > > + AliasAnalysis::Location ptrLoc(j->getValue(), j->getSize(), > > + j->getTBAAInfo()); > > + if (AA.getModRefInfo(J, ptrLoc) != AliasAnalysis::NoModRef) { > > + usesI = true; break; > > + } > > + } > > + if (usesI) break; > > + } > > + } > > + if (usesI) { > > + if (J->mayWriteToMemory()) writes.add(J); > > + users.insert(J); > > + } > > > + } > The resulting loop should be very simple. > > > + > > + for (DenseSet::iterator i = users.begin(), e = users.end(); > > + i != e; ++i) { > > + pairableInstUsers.insert(value_pair(I, *i)); > > + } > > + } > > + } > > + > > What does it mean pairs are in conflict? What about adding a comment > about what this actually means. > > > + bool BBVectorize::pairsConflict(value_pair i, value_pair j, > > + DenseSet &pairableInstUsers) { > > + // Two pairs are in conflict if they are mutual users of eachother. > > + bool jui = pairableInstUsers.count(value_pair(i.first, j.first)) || > > + pairableInstUsers.count(value_pair(i.first, j.second)) || > > + pairableInstUsers.count(value_pair(i.second, j.first)) || > > + pairableInstUsers.count(value_pair(i.second, j.second)); > > + bool iuj = pairableInstUsers.count(value_pair(j.first, i.first)) || > > + pairableInstUsers.count(value_pair(j.first, i.second)) || > > + pairableInstUsers.count(value_pair(j.second, i.first)) || > > + pairableInstUsers.count(value_pair(j.second, i.second)); > > + return (jui&& iuj); > > + } > > + > > + void BBVectorize::choosePairs( > > + std::multimap &candPairs, > > + std::vector &pairableInsts, > > + std::multimap &connPairs, > > + DenseSet &pairableInstUsers, > > + DenseMap& chosenPairs) { > > This one is again way to large. Can you please extract subfunctions. > > I need to stop here, as I run out of time for today. I hope you already > get an impression what kind of improvements I suggest. If you agree, > please go ahead and integrate them. Especially the use of smaller > subfunctions will help me a lot when reviewing this again. Most probably > many of your inline comments, could become the descriptions of some of > those extracted functions. > > Cheers > Tobi > -- Hal Finkel Postdoctoral Appointee Leadership Computing Facility Argonne National Laboratory -------------- next part -------------- A non-text attachment was scrubbed... Name: llvm_bb_vectorize-20111121.diff Type: text/x-patch Size: 94105 bytes Desc: not available Url : http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20111121/15e8fc19/attachment-0001.bin From nicholas at mxc.ca Mon Nov 21 12:32:21 2011 From: nicholas at mxc.ca (Nick Lewycky) Date: Mon, 21 Nov 2011 18:32:21 -0000 Subject: [llvm-commits] [llvm] r145044 - in /llvm/trunk: include/llvm/Analysis/CaptureTracking.h lib/Analysis/CaptureTracking.cpp Message-ID: <20111121183221.48F2E1BE001@llvm.org> Author: nicholas Date: Mon Nov 21 12:32:21 2011 New Revision: 145044 URL: http://llvm.org/viewvc/llvm-project?rev=145044&view=rev Log: Add virtual destructor. Whoops! Modified: llvm/trunk/include/llvm/Analysis/CaptureTracking.h llvm/trunk/lib/Analysis/CaptureTracking.cpp Modified: llvm/trunk/include/llvm/Analysis/CaptureTracking.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Analysis/CaptureTracking.h?rev=145044&r1=145043&r2=145044&view=diff ============================================================================== --- llvm/trunk/include/llvm/Analysis/CaptureTracking.h (original) +++ llvm/trunk/include/llvm/Analysis/CaptureTracking.h Mon Nov 21 12:32:21 2011 @@ -37,6 +37,8 @@ /// addition to the interface here, you'll need to provide your own getters /// to see whether anything was captured. struct CaptureTracker { + virtual ~CaptureTracker(); + /// tooManyUses - The depth of traversal has breached a limit. There may be /// capturing instructions that will not be passed into captured(). virtual void tooManyUses() = 0; Modified: llvm/trunk/lib/Analysis/CaptureTracking.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/CaptureTracking.cpp?rev=145044&r1=145043&r2=145044&view=diff ============================================================================== --- llvm/trunk/lib/Analysis/CaptureTracking.cpp (original) +++ llvm/trunk/lib/Analysis/CaptureTracking.cpp Mon Nov 21 12:32:21 2011 @@ -19,6 +19,8 @@ #include "llvm/Analysis/CaptureTracking.h" using namespace llvm; +CaptureTracker::~CaptureTracker() {} + namespace { struct SimpleCaptureTracker : public CaptureTracker { explicit SimpleCaptureTracker(bool ReturnCaptures) From nicholas at mxc.ca Mon Nov 21 13:42:56 2011 From: nicholas at mxc.ca (Nick Lewycky) Date: Mon, 21 Nov 2011 19:42:56 -0000 Subject: [llvm-commits] [llvm] r145047 - in /llvm/trunk: lib/Analysis/CaptureTracking.cpp lib/Analysis/MemoryDependenceAnalysis.cpp test/Transforms/GVN/rle.ll Message-ID: <20111121194256.836791BE001@llvm.org> Author: nicholas Date: Mon Nov 21 13:42:56 2011 New Revision: 145047 URL: http://llvm.org/viewvc/llvm-project?rev=145047&view=rev Log: Fix crasher in GVN due to my recent capture tracking changes. Modified: llvm/trunk/lib/Analysis/CaptureTracking.cpp llvm/trunk/lib/Analysis/MemoryDependenceAnalysis.cpp llvm/trunk/test/Transforms/GVN/rle.ll Modified: llvm/trunk/lib/Analysis/CaptureTracking.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/CaptureTracking.cpp?rev=145047&r1=145046&r2=145047&view=diff ============================================================================== --- llvm/trunk/lib/Analysis/CaptureTracking.cpp (original) +++ llvm/trunk/lib/Analysis/CaptureTracking.cpp Mon Nov 21 13:42:56 2011 @@ -53,6 +53,9 @@ /// counts as capturing it or not. bool llvm::PointerMayBeCaptured(const Value *V, bool ReturnCaptures, bool StoreCaptures) { + assert(!isa(V) && + "It doesn't make sense to ask whether a global is captured."); + // TODO: If StoreCaptures is not true, we could do Fancy analysis // to determine whether this store is not actually an escape point. // In that case, BasicAliasAnalysis should be updated as well to Modified: llvm/trunk/lib/Analysis/MemoryDependenceAnalysis.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/MemoryDependenceAnalysis.cpp?rev=145047&r1=145046&r2=145047&view=diff ============================================================================== --- llvm/trunk/lib/Analysis/MemoryDependenceAnalysis.cpp (original) +++ llvm/trunk/lib/Analysis/MemoryDependenceAnalysis.cpp Mon Nov 21 13:42:56 2011 @@ -375,7 +375,7 @@ // with a smarter AA in place, this test is just wasting compile time. if (!DT) return AliasAnalysis::ModRef; const Value *Object = GetUnderlyingObject(MemLoc.Ptr, TD); - if (!isIdentifiedObject(Object) || isa(Object)) + if (!isIdentifiedObject(Object) || isa(Object)) return AliasAnalysis::ModRef; ImmutableCallSite CS(Inst); if (!CS.getInstruction()) return AliasAnalysis::ModRef; Modified: llvm/trunk/test/Transforms/GVN/rle.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/GVN/rle.ll?rev=145047&r1=145046&r2=145047&view=diff ============================================================================== --- llvm/trunk/test/Transforms/GVN/rle.ll (original) +++ llvm/trunk/test/Transforms/GVN/rle.ll Mon Nov 21 13:42:56 2011 @@ -26,6 +26,15 @@ ret i8 %Y } +;; No PR filed, crashed in CaptureTracker. +declare void @helper() +define void @crash1() { + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* undef, i8* undef, i64 undef, i32 1, i1 false) nounwind + %tmp = load i8* bitcast (void ()* @helper to i8*) + %x = icmp eq i8 %tmp, 15 + ret void +} + ;;===----------------------------------------------------------------------===;; ;; Store -> Load and Load -> Load forwarding where src and dst are different From spop at codeaurora.org Mon Nov 21 14:46:55 2011 From: spop at codeaurora.org (Sebastian Pop) Date: Mon, 21 Nov 2011 20:46:55 -0000 Subject: [llvm-commits] [llvm] r145048 - /llvm/trunk/include/llvm/Support/Host.h Message-ID: <20111121204655.BDEDE1BE001@llvm.org> Author: spop Date: Mon Nov 21 14:46:55 2011 New Revision: 145048 URL: http://llvm.org/viewvc/llvm-project?rev=145048&view=rev Log: fix typo in comment Modified: llvm/trunk/include/llvm/Support/Host.h Modified: llvm/trunk/include/llvm/Support/Host.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Support/Host.h?rev=145048&r1=145047&r2=145048&view=diff ============================================================================== --- llvm/trunk/include/llvm/Support/Host.h (original) +++ llvm/trunk/include/llvm/Support/Host.h Mon Nov 21 14:46:55 2011 @@ -33,8 +33,8 @@ return !isLittleEndianHost(); } - /// getDefaultTargetTriple() - Return the target triple of the running - /// system. + /// getDefaultTargetTriple() - Return the default target triple the compiler + /// has been configured to produce code for. /// /// The target triple is a string in the format of: /// CPU_TYPE-VENDOR-OPERATING_SYSTEM From nlewycky at google.com Mon Nov 21 17:38:54 2011 From: nlewycky at google.com (Nick Lewycky) Date: Mon, 21 Nov 2011 15:38:54 -0800 Subject: [llvm-commits] Proposal/patch: Enable bitcode streaming In-Reply-To: References: Message-ID: I don't have many comments since not only does this patch work, I've also reviewed it before. I didn't notice a single logic error. :) Regardless I do have a few simple comments. +class BitstreamBytes { +public: + BitstreamBytes() { } + + virtual ~BitstreamBytes() { } This is in a header file. Please move the implementation of the virtual destructor to BitcodeReader.cpp, or else copies will end up in every .o file that includes this header. Same with MemoryBitstreamBytes and LazyBitstreamBytes. More general question, any reason to put MemoryBitstreamBytes and LazyBitstreamBytes implementations inside the header? You could create lib/Bitcode/Reader/BitstreamReader.cpp. +private: + BitstreamBytes(const BitstreamBytes&); // NOT IMPLEMENTED + void operator=(const BitstreamBytes&); // NOT IMPLEMENTED We use "DO NOT IMPLEMENT" elsewhere. It makes it clearer that it's not a todo. Also on the subclasses. + // fetch enough bytes such that Pos can be read or EOF is reached + // (i.e. BytesRead > Pos). Return true if Pos can be read. + // Unlike most of the functions in BitcodeReader, returns true on success + bool fetchToPos(size_t Pos) { Missing period on "success". + virtual size_t getEndPos() { + if (BitcodeSize) return BitcodeSize; + size_t pos = BytesRead + kChunkSize; + // keep fetching until we run out of bytes + while(fetchToPos(pos)) pos += kChunkSize; + return BitcodeSize; + } Space after "while", like there is on "if". http://llvm.org/docs/CodingStandards.html#micro_spaceparen +BitcodeStreamer* getBitcodeFileStreamer(const std::string &Filename, + std::string *Err); Inconsistent placement of & and * (the rest of your patch put them before the space). +/// FindFunctionInStream - Find the function body in the bitcode stream +bool BitcodeReader::FindFunctionInStream(Function *F, + DenseMap::iterator DeferredFunctionInfoIterator) { + while (DeferredFunctionInfoIterator->second == 0) { + if (Stream.AtEndOfStream()) + return Error("Could not find Function in stream"); + // ParseModule will parse the next body in the stream and set its + // position in the DeferredFunctionInfo map + if(ParseModule(true)) return true; Space after "if". +Module *llvm::getStreamedBitcodeModule(const std::string& name, + BitcodeStreamer* streamer, + LLVMContext& Context, + std::string *ErrMsg) { This file is following space-then-sigil format. Please update three occurrences there. "BitcodeStreamer* streamer" --> "BitcodeStreamer *streamer" +// This file implements BitcodeStreamer, which fetches bytes of bitcode from +// a stream source. It provides support for streaming (lazy reading) of +// bitcode. TODO(dschuff) describe which final implementations are included Did you intend to fix the TODO before committing or did you want to leave that in? +// * BitstreamBytes doesn't care about complexities like using +// threads/async callbacks to actuall overlap download+compile Typo: actuall --> actually +// Very simple stream backed by a file. Mostly useful for stdin and debugging; +// actual file access is probably still best done with mmap +class BitcodeFileStreamer : public BitcodeStreamer { + int Fd; +public: + BitcodeFileStreamer() : Fd(0) {} It looks this class has 1-space indent? Please use two. + if (LazyBitcode) { + std::string StrError; + BitcodeStreamer* streamer = getBitcodeFileStreamer(InputFilename,&StrError); Star on the right. + if (LazyBitcode) { + FunctionPassManager* P = static_cast(PM.get()); Star on the right! One other thing. The behaviour when LazyBitcode is true could be really different from when it off on the same .bc file on disk. It'd be good to add comments pointing out why (module-level assembly comes to mind, anything else?). This looks really good, thanks for working on it! Because of the potential for slowdown, I'd prefer if Chris could give it a thumbs-up before it lands. Nick On 9 November 2011 16:57, Derek Schuff wrote: > Hello all, > The following is a proposal (and a prototype patch) to enable bitcode > streaming. The overall goal is to be able to overlap bitcode > reading/download with compilation, a functionality useful obviously for > pnacl and renderscript but also potentially for any situation where the > interface between the frontend and backend is something other than a file. > > In the current state of the world, at a high level, there are 2 things > keeping this from happening. The first is that BitcodeReader construction > takes a MemoryBuffer which it expects to be filled with bitcode, and inside > BitcodeReader, the BitstreamCursor (which is the primary interface to the > bitcode itself) gets pointers to the bitcode in memory, and does all of its > magic with pointer arithmetic. The second issue is that in > BitcodeReader::ParseModule (which is run when right after the Module and > BitcodeReader objects are created), the reader makes a pass over the entire > bitcode file. This step does everything except read the function bodies, > but it records the bit locations of each function for future > materialization. > > High-level change description: > This patch creates a class called BitcodeStream, which is a very simple > interface with one method (GetBytes), which fetches the requested number of > bytes from the stream, and writes them into the requested destination. This > method may block the calling thread if there are not yet enough bytes > available in the stream buffer (similarly to a stdin or socket read). > > The first issue above is addressed by introducing the BitstreamVector, an > abstraction that wraps the bitcode in memory. Instead of using pointers, > the BitstreamCursor uses indices and gets bitcode bytes by indexing (i.e. > operator[] ) the BitstreamVector. When streaming is not used, the > BitstreamVector itself keeps pointers to the start and end of the backing > MemoryBuffer and the indexing operator is just a pointer dereference. For > streaming use, the BitstreamVector has a BitcodeStream object. If a byte is > requested that has not yet been fetched, it calls GetBytes to get more, > until it has enough to return the requested byte. > This model of allowing any byte to be requested and blocking the caller > has the advantage that there is no structural/architectural change required > at this lowest level, nor at the high level (A FunctionPassManager is used > to iterate over all the functions and compile each one). > > The second issue is solved by 2 simple changes. The first is in > ParseModule. Instead of a single pass over all the bitcode, ParseModule > becomes resumable. ParseModule will do its normal handling for top-level > records, type table blocks, metadata, etc, but if streaming is in use, it > will save its state and return as soon as a function subblock is > encountered (rather than saving its location and skipping over it). Each > subsequent time it is called, it bookmarks and skips one function block. > Later, when a function needs to be materialized, if the function body has > been seen already, then materialization is the same as before. Otherwise, > Materialize will keep calling ParseModule (each time bookmarking and > skipping one function body) until the requested function is found. The one > other change required to make this work simply is that the bitcode writer > writes function bodies as the last subblock (currently the attachment > metadata and value symbol table are written after the function bodies). > > The prototype patch is attached and can also be viewed online at > http://codereview.chromium.org/8393017/ . Feedback is welcome, as well as > guidance from the relevant code owners/reviewers regarding what the next > step needs to be toward committing this. > > Thanks, > -Derek > > _______________________________________________ > llvm-commits mailing list > llvm-commits at cs.uiuc.edu > http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits > > -------------- next part -------------- An HTML attachment was scrubbed... URL: http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20111121/5804dcf1/attachment.html From craig.topper at gmail.com Mon Nov 21 18:44:42 2011 From: craig.topper at gmail.com (Craig Topper) Date: Tue, 22 Nov 2011 00:44:42 -0000 Subject: [llvm-commits] [llvm] r145053 - in /llvm/trunk/lib/Target/X86: X86ISelLowering.cpp X86Subtarget.h Message-ID: <20111122004442.4B9591BE001@llvm.org> Author: ctopper Date: Mon Nov 21 18:44:41 2011 New Revision: 145053 URL: http://llvm.org/viewvc/llvm-project?rev=145053&view=rev Log: Add methods for querying minimum SSE version along with AVX. Simplifies all the places that had to check a version of SSE and AVX. Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp llvm/trunk/lib/Target/X86/X86Subtarget.h Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=145053&r1=145052&r2=145053&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original) +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Mon Nov 21 18:44:41 2011 @@ -908,7 +908,7 @@ setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal); } - if (Subtarget->hasSSE41() || Subtarget->hasAVX()) { + if (Subtarget->hasSSE41orAVX()) { setOperationAction(ISD::FFLOOR, MVT::f32, Legal); setOperationAction(ISD::FCEIL, MVT::f32, Legal); setOperationAction(ISD::FTRUNC, MVT::f32, Legal); @@ -980,7 +980,7 @@ } } - if (Subtarget->hasSSE42() || Subtarget->hasAVX()) + if (Subtarget->hasSSE42orAVX()) setOperationAction(ISD::SETCC, MVT::v2i64, Custom); if (!UseSoftFloat && Subtarget->hasAVX()) { @@ -3970,7 +3970,7 @@ /// Masks to match: <1, 1, 3, 3> or <1, 1, 3, 3, 5, 5, 7, 7> bool X86::isMOVSHDUPMask(ShuffleVectorSDNode *N, const X86Subtarget *Subtarget) { - if (!Subtarget->hasSSE3() && !Subtarget->hasAVX()) + if (!Subtarget->hasSSE3orAVX()) return false; // The second vector must be undef @@ -3998,7 +3998,7 @@ /// Masks to match: <0, 0, 2, 2> or <0, 0, 2, 2, 4, 4, 6, 6> bool X86::isMOVSLDUPMask(ShuffleVectorSDNode *N, const X86Subtarget *Subtarget) { - if (!Subtarget->hasSSE3() && !Subtarget->hasAVX()) + if (!Subtarget->hasSSE3orAVX()) return false; // The second vector must be undef @@ -5509,7 +5509,7 @@ return LD; // For SSE 4.1, use insertps to put the high elements into the low element. - if (getSubtarget()->hasSSE41() || getSubtarget()->hasAVX()) { + if (getSubtarget()->hasSSE41orAVX()) { SDValue Result; if (Op.getOperand(0).getOpcode() != ISD::UNDEF) Result = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(0)); @@ -5680,7 +5680,7 @@ // quads, disable the next transformation since it does not help SSSE3. bool V1Used = InputQuads[0] || InputQuads[1]; bool V2Used = InputQuads[2] || InputQuads[3]; - if (Subtarget->hasSSSE3() || Subtarget->hasAVX()) { + if (Subtarget->hasSSSE3orAVX()) { if (InputQuads.count() == 2 && V1Used && V2Used) { BestLoQuad = InputQuads.find_first(); BestHiQuad = InputQuads.find_next(BestLoQuad); @@ -5753,7 +5753,7 @@ // If we have SSSE3, and all words of the result are from 1 input vector, // case 2 is generated, otherwise case 3 is generated. If no SSSE3 // is present, fall back to case 4. - if (Subtarget->hasSSSE3() || Subtarget->hasAVX()) { + if (Subtarget->hasSSSE3orAVX()) { SmallVector pshufbMask; // If we have elements from both input vectors, set the high bit of the @@ -5821,8 +5821,7 @@ NewV = DAG.getVectorShuffle(MVT::v8i16, dl, NewV, DAG.getUNDEF(MVT::v8i16), &MaskV[0]); - if (NewV.getOpcode() == ISD::VECTOR_SHUFFLE && - (Subtarget->hasSSSE3() || Subtarget->hasAVX())) + if (NewV.getOpcode() == ISD::VECTOR_SHUFFLE && Subtarget->hasSSSE3orAVX()) NewV = getTargetShuffleNode(X86ISD::PSHUFLW, dl, MVT::v8i16, NewV.getOperand(0), X86::getShufflePSHUFLWImmediate(NewV.getNode()), @@ -5850,8 +5849,7 @@ NewV = DAG.getVectorShuffle(MVT::v8i16, dl, NewV, DAG.getUNDEF(MVT::v8i16), &MaskV[0]); - if (NewV.getOpcode() == ISD::VECTOR_SHUFFLE && - (Subtarget->hasSSSE3() || Subtarget->hasAVX())) + if (NewV.getOpcode() == ISD::VECTOR_SHUFFLE && Subtarget->hasSSSE3orAVX()) NewV = getTargetShuffleNode(X86ISD::PSHUFHW, dl, MVT::v8i16, NewV.getOperand(0), X86::getShufflePSHUFHWImmediate(NewV.getNode()), @@ -5917,7 +5915,7 @@ } // If SSSE3, use 1 pshufb instruction per vector with elements in the result. - if (TLI.getSubtarget()->hasSSSE3() || TLI.getSubtarget()->hasAVX()) { + if (TLI.getSubtarget()->hasSSSE3orAVX()) { SmallVector pshufbMask; // If all result elements are from one input vector, then only translate @@ -6762,8 +6760,7 @@ return getTargetShuffleNode(getUNPCKHOpcode(VT, HasAVX2), dl, VT, V1, V1, DAG); - if (X86::isMOVDDUPMask(SVOp) && - (Subtarget->hasSSE3() || Subtarget->hasAVX()) && + if (X86::isMOVDDUPMask(SVOp) && Subtarget->hasSSE3orAVX() && V2IsUndef && RelaxedMayFoldVectorLoad(V1)) return getMOVDDup(Op, dl, V1, DAG); @@ -6771,7 +6768,7 @@ return getMOVHighToLow(Op, dl, DAG); // Use to match splats - if (HasXMMInt && X86::isUNPCKHMask(SVOp, Subtarget->hasAVX2()) && V2IsUndef && + if (HasXMMInt && X86::isUNPCKHMask(SVOp, HasAVX2) && V2IsUndef && (VT == MVT::v2f64 || VT == MVT::v2i64)) return getTargetShuffleNode(getUNPCKHOpcode(VT, HasAVX2), dl, VT, V1, V1, DAG); @@ -6796,8 +6793,7 @@ bool isLeft = false; unsigned ShAmt = 0; SDValue ShVal; - bool isShift = getSubtarget()->hasXMMInt() && - isVectorShift(SVOp, DAG, isLeft, ShVal, ShAmt); + bool isShift = HasXMMInt && isVectorShift(SVOp, DAG, isLeft, ShVal, ShAmt); if (isShift && ShVal.hasOneUse()) { // If the shifted value has multiple uses, it may be cheaper to use // v_set0 + movlhps or movhlps, etc. @@ -6821,8 +6817,7 @@ } // FIXME: fold these into legal mask. - if (X86::isMOVLHPSMask(SVOp) && - !X86::isUNPCKLMask(SVOp, Subtarget->hasAVX2())) + if (X86::isMOVLHPSMask(SVOp) && !X86::isUNPCKLMask(SVOp, HasAVX2)) return getMOVLowToHigh(Op, dl, DAG, HasXMMInt); if (X86::isMOVHLPSMask(SVOp)) @@ -6875,11 +6870,11 @@ return getMOVL(DAG, dl, VT, V2, V1); } - if (X86::isUNPCKLMask(SVOp, Subtarget->hasAVX2())) + if (X86::isUNPCKLMask(SVOp, HasAVX2)) return getTargetShuffleNode(getUNPCKLOpcode(VT, HasAVX2), dl, VT, V1, V2, DAG); - if (X86::isUNPCKHMask(SVOp, Subtarget->hasAVX2())) + if (X86::isUNPCKHMask(SVOp, HasAVX2)) return getTargetShuffleNode(getUNPCKHOpcode(VT, HasAVX2), dl, VT, V1, V2, DAG); @@ -6890,9 +6885,9 @@ SDValue NewMask = NormalizeMask(SVOp, DAG); ShuffleVectorSDNode *NSVOp = cast(NewMask); if (NSVOp != SVOp) { - if (X86::isUNPCKLMask(NSVOp, Subtarget->hasAVX2(), true)) { + if (X86::isUNPCKLMask(NSVOp, HasAVX2, true)) { return NewMask; - } else if (X86::isUNPCKHMask(NSVOp, Subtarget->hasAVX2(), true)) { + } else if (X86::isUNPCKHMask(NSVOp, HasAVX2, true)) { return NewMask; } } @@ -6904,11 +6899,11 @@ SDValue NewOp = CommuteVectorShuffle(SVOp, DAG); ShuffleVectorSDNode *NewSVOp = cast(NewOp); - if (X86::isUNPCKLMask(NewSVOp, Subtarget->hasAVX2())) + if (X86::isUNPCKLMask(NewSVOp, HasAVX2)) return getTargetShuffleNode(getUNPCKLOpcode(VT, HasAVX2), dl, VT, V2, V1, DAG); - if (X86::isUNPCKHMask(NewSVOp, Subtarget->hasAVX2())) + if (X86::isUNPCKHMask(NewSVOp, HasAVX2)) return getTargetShuffleNode(getUNPCKHOpcode(VT, HasAVX2), dl, VT, V2, V1, DAG); } @@ -6923,7 +6918,7 @@ SmallVector M; SVOp->getMask(M); - if (isPALIGNRMask(M, VT, Subtarget->hasSSSE3() || Subtarget->hasAVX())) + if (isPALIGNRMask(M, VT, Subtarget->hasSSSE3orAVX())) return getTargetShuffleNode(X86ISD::PALIGN, dl, VT, V1, V2, X86::getShufflePALIGNRImmediate(SVOp), DAG); @@ -7109,7 +7104,7 @@ assert(Vec.getValueSizeInBits() <= 128 && "Unexpected vector length"); - if (Subtarget->hasSSE41() || Subtarget->hasAVX()) { + if (Subtarget->hasSSE41orAVX()) { SDValue Res = LowerEXTRACT_VECTOR_ELT_SSE4(Op, DAG); if (Res.getNode()) return Res; @@ -7251,7 +7246,7 @@ return Insert128BitVector(N0, V, Ins128Idx, DAG, dl); } - if (Subtarget->hasSSE41() || Subtarget->hasAVX()) + if (Subtarget->hasSSE41orAVX()) return LowerINSERT_VECTOR_ELT_SSE4(Op, DAG); if (EltVT == MVT::i8) @@ -8741,9 +8736,9 @@ // Check that the operation in question is available (most are plain SSE2, // but PCMPGTQ and PCMPEQQ have different requirements). - if (Opc == X86ISD::PCMPGTQ && !Subtarget->hasSSE42() && !Subtarget->hasAVX()) + if (Opc == X86ISD::PCMPGTQ && !Subtarget->hasSSE42orAVX()) return SDValue(); - if (Opc == X86ISD::PCMPEQQ && !Subtarget->hasSSE41() && !Subtarget->hasAVX()) + if (Opc == X86ISD::PCMPEQQ && !Subtarget->hasSSE41orAVX()) return SDValue(); // Since SSE has no unsigned integer comparisons, we need to flip the sign @@ -11403,7 +11398,7 @@ EVT VT) const { // Very little shuffling can be done for 64-bit vectors right now. if (VT.getSizeInBits() == 64) - return isPALIGNRMask(M, VT, Subtarget->hasSSSE3() || Subtarget->hasAVX()); + return isPALIGNRMask(M, VT, Subtarget->hasSSSE3orAVX()); // FIXME: pshufb, blends, shifts. return (VT.getVectorNumElements() == 2 || @@ -11413,7 +11408,7 @@ isPSHUFDMask(M, VT) || isPSHUFHWMask(M, VT) || isPSHUFLWMask(M, VT) || - isPALIGNRMask(M, VT, Subtarget->hasSSSE3() || Subtarget->hasAVX()) || + isPALIGNRMask(M, VT, Subtarget->hasSSSE3orAVX()) || isUNPCKLMask(M, VT, Subtarget->hasAVX2()) || isUNPCKHMask(M, VT, Subtarget->hasAVX2()) || isUNPCKL_v_undef_Mask(M, VT) || @@ -11822,7 +11817,7 @@ MachineBasicBlock * X86TargetLowering::EmitPCMP(MachineInstr *MI, MachineBasicBlock *BB, unsigned numArgs, bool memArg) const { - assert((Subtarget->hasSSE42() || Subtarget->hasAVX()) && + assert(Subtarget->hasSSE42orAVX() && "Target must have SSE4.2 or AVX features enabled"); DebugLoc dl = MI->getDebugLoc(); @@ -13982,7 +13977,7 @@ // look for psign/blend if (VT == MVT::v2i64 || VT == MVT::v4i64) { - if (!(Subtarget->hasSSSE3() || Subtarget->hasAVX()) || + if (!Subtarget->hasSSSE3orAVX() || (VT == MVT::v4i64 && !Subtarget->hasAVX2())) return SDValue(); @@ -14052,7 +14047,7 @@ return DAG.getNode(ISD::BITCAST, DL, VT, Sign); } // PBLENDVB only available on SSE 4.1 - if (!(Subtarget->hasSSE41() || Subtarget->hasAVX())) + if (!Subtarget->hasSSE41orAVX()) return SDValue(); EVT BlendVT = (VT == MVT::v4i64) ? MVT::v32i8 : MVT::v16i8; @@ -14577,8 +14572,7 @@ SDValue RHS = N->getOperand(1); // Try to synthesize horizontal adds from adds of shuffles. - if ((Subtarget->hasSSE3() || Subtarget->hasAVX()) && - (VT == MVT::v4f32 || VT == MVT::v2f64) && + if (Subtarget->hasSSE3orAVX() && (VT == MVT::v4f32 || VT == MVT::v2f64) && isHorizontalBinOp(LHS, RHS, true)) return DAG.getNode(X86ISD::FHADD, N->getDebugLoc(), VT, LHS, RHS); return SDValue(); @@ -14592,8 +14586,7 @@ SDValue RHS = N->getOperand(1); // Try to synthesize horizontal subs from subs of shuffles. - if ((Subtarget->hasSSE3() || Subtarget->hasAVX()) && - (VT == MVT::v4f32 || VT == MVT::v2f64) && + if (Subtarget->hasSSE3orAVX() && (VT == MVT::v4f32 || VT == MVT::v2f64) && isHorizontalBinOp(LHS, RHS, false)) return DAG.getNode(X86ISD::FHSUB, N->getDebugLoc(), VT, LHS, RHS); return SDValue(); @@ -14797,8 +14790,7 @@ SDValue Op1 = N->getOperand(1); // Try to synthesize horizontal adds from adds of shuffles. - if ((Subtarget->hasSSSE3() || Subtarget->hasAVX()) && - (VT == MVT::v8i16 || VT == MVT::v4i32) && + if ((Subtarget->hasSSSE3orAVX()) && (VT == MVT::v8i16 || VT == MVT::v4i32) && isHorizontalBinOp(Op0, Op1, true)) return DAG.getNode(X86ISD::HADD, N->getDebugLoc(), VT, Op0, Op1); @@ -14830,8 +14822,7 @@ // Try to synthesize horizontal adds from adds of shuffles. EVT VT = N->getValueType(0); - if ((Subtarget->hasSSSE3() || Subtarget->hasAVX()) && - (VT == MVT::v8i16 || VT == MVT::v4i32) && + if ((Subtarget->hasSSSE3orAVX()) && (VT == MVT::v8i16 || VT == MVT::v4i32) && isHorizontalBinOp(Op0, Op1, false)) return DAG.getNode(X86ISD::HSUB, N->getDebugLoc(), VT, Op0, Op1); Modified: llvm/trunk/lib/Target/X86/X86Subtarget.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86Subtarget.h?rev=145053&r1=145052&r2=145053&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86Subtarget.h (original) +++ llvm/trunk/lib/Target/X86/X86Subtarget.h Mon Nov 21 18:44:41 2011 @@ -190,6 +190,10 @@ bool hasAVX2() const { return HasAVX2; } bool hasXMM() const { return hasSSE1() || hasAVX(); } bool hasXMMInt() const { return hasSSE2() || hasAVX(); } + bool hasSSE3orAVX() const { return hasSSE3() || hasAVX(); } + bool hasSSSE3orAVX() const { return hasSSSE3() || hasAVX(); } + bool hasSSE41orAVX() const { return hasSSE41() || hasAVX(); } + bool hasSSE42orAVX() const { return hasSSE42() || hasAVX(); } bool hasAES() const { return HasAES; } bool hasCLMUL() const { return HasCLMUL; } bool hasFMA3() const { return HasFMA3; } From craig.topper at gmail.com Mon Nov 21 19:57:35 2011 From: craig.topper at gmail.com (Craig Topper) Date: Tue, 22 Nov 2011 01:57:35 -0000 Subject: [llvm-commits] [llvm] r145055 - in /llvm/trunk/lib/Target/X86: InstPrinter/X86InstComments.cpp Utils/X86ShuffleDecode.cpp Utils/X86ShuffleDecode.h X86ISelLowering.cpp Message-ID: <20111122015735.7C5101BE001@llvm.org> Author: ctopper Date: Mon Nov 21 19:57:35 2011 New Revision: 145055 URL: http://llvm.org/viewvc/llvm-project?rev=145055&view=rev Log: Fix shuffle decoding logic to handle UNPCKLPS/UNPCKLPD on 256-bit vectors correctly. Add support for decoding UNPCKHPS/UNPCKHPD for AVX 128-bit and 256-bit forms. Modified: llvm/trunk/lib/Target/X86/InstPrinter/X86InstComments.cpp llvm/trunk/lib/Target/X86/Utils/X86ShuffleDecode.cpp llvm/trunk/lib/Target/X86/Utils/X86ShuffleDecode.h llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Modified: llvm/trunk/lib/Target/X86/InstPrinter/X86InstComments.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/InstPrinter/X86InstComments.cpp?rev=145055&r1=145054&r2=145055&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/InstPrinter/X86InstComments.cpp (original) +++ llvm/trunk/lib/Target/X86/InstPrinter/X86InstComments.cpp Mon Nov 21 19:57:35 2011 @@ -197,16 +197,44 @@ Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. case X86::UNPCKHPDrm: - DecodeUNPCKHPMask(2, ShuffleMask); + DecodeUNPCKHPDMask(2, ShuffleMask); Src1Name = getRegName(MI->getOperand(0).getReg()); break; + case X86::VUNPCKHPDrr: + Src2Name = getRegName(MI->getOperand(2).getReg()); + // FALL THROUGH. + case X86::VUNPCKHPDrm: + DecodeUNPCKHPDMask(2, ShuffleMask); + Src1Name = getRegName(MI->getOperand(1).getReg()); + break; + case X86::VUNPCKHPDYrr: + Src2Name = getRegName(MI->getOperand(2).getReg()); + // FALL THROUGH. + case X86::VUNPCKHPDYrm: + DecodeUNPCKLPDMask(4, ShuffleMask); + Src1Name = getRegName(MI->getOperand(1).getReg()); + break; case X86::UNPCKHPSrr: Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. case X86::UNPCKHPSrm: - DecodeUNPCKHPMask(4, ShuffleMask); + DecodeUNPCKHPSMask(4, ShuffleMask); Src1Name = getRegName(MI->getOperand(0).getReg()); break; + case X86::VUNPCKHPSrr: + Src2Name = getRegName(MI->getOperand(2).getReg()); + // FALL THROUGH. + case X86::VUNPCKHPSrm: + DecodeUNPCKHPSMask(4, ShuffleMask); + Src1Name = getRegName(MI->getOperand(1).getReg()); + break; + case X86::VUNPCKHPSYrr: + Src2Name = getRegName(MI->getOperand(2).getReg()); + // FALL THROUGH. + case X86::VUNPCKHPSYrm: + DecodeUNPCKHPSMask(8, ShuffleMask); + Src1Name = getRegName(MI->getOperand(1).getReg()); + break; case X86::VPERMILPSri: DecodeVPERMILPSMask(4, MI->getOperand(2).getImm(), ShuffleMask); Modified: llvm/trunk/lib/Target/X86/Utils/X86ShuffleDecode.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/Utils/X86ShuffleDecode.cpp?rev=145055&r1=145054&r2=145055&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/Utils/X86ShuffleDecode.cpp (original) +++ llvm/trunk/lib/Target/X86/Utils/X86ShuffleDecode.cpp Mon Nov 21 19:57:35 2011 @@ -142,11 +142,32 @@ } } -void DecodeUNPCKHPMask(unsigned NElts, - SmallVectorImpl &ShuffleMask) { - for (unsigned i = 0; i != NElts/2; ++i) { - ShuffleMask.push_back(i+NElts/2); // Reads from dest - ShuffleMask.push_back(i+NElts+NElts/2); // Reads from src +void DecodeUNPCKHPSMask(unsigned NElts, + SmallVectorImpl &ShuffleMask) { + DecodeUNPCKHPMask(MVT::getVectorVT(MVT::i32, NElts), ShuffleMask); +} + +void DecodeUNPCKHPDMask(unsigned NElts, + SmallVectorImpl &ShuffleMask) { + DecodeUNPCKHPMask(MVT::getVectorVT(MVT::i64, NElts), ShuffleMask); +} + +void DecodeUNPCKHPMask(EVT VT, SmallVectorImpl &ShuffleMask) { + unsigned NumElts = VT.getVectorNumElements(); + + // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate + // independently on 128-bit lanes. + unsigned NumLanes = VT.getSizeInBits() / 128; + if (NumLanes == 0 ) NumLanes = 1; // Handle MMX + unsigned NumLaneElts = NumElts / NumLanes; + + for (unsigned s = 0; s < NumLanes; ++s) { + unsigned Start = s * NumLaneElts + NumLaneElts/2; + unsigned End = s * NumLaneElts + NumLaneElts; + for (unsigned i = Start; i != End; ++i) { + ShuffleMask.push_back(i); // Reads from dest/src1 + ShuffleMask.push_back(i+NumElts); // Reads from src/src2 + } } } @@ -163,8 +184,7 @@ /// DecodeUNPCKLPMask - This decodes the shuffle masks for unpcklps/unpcklpd /// etc. VT indicates the type of the vector allowing it to handle different /// datatypes and vector widths. -void DecodeUNPCKLPMask(EVT VT, - SmallVectorImpl &ShuffleMask) { +void DecodeUNPCKLPMask(EVT VT, SmallVectorImpl &ShuffleMask) { unsigned NumElts = VT.getVectorNumElements(); // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate @@ -173,16 +193,13 @@ if (NumLanes == 0 ) NumLanes = 1; // Handle MMX unsigned NumLaneElts = NumElts / NumLanes; - unsigned Start = 0; - unsigned End = NumLaneElts / 2; for (unsigned s = 0; s < NumLanes; ++s) { + unsigned Start = s * NumLaneElts; + unsigned End = s * NumLaneElts + NumLaneElts/2; for (unsigned i = Start; i != End; ++i) { - ShuffleMask.push_back(i); // Reads from dest/src1 - ShuffleMask.push_back(i+NumLaneElts); // Reads from src/src2 + ShuffleMask.push_back(i); // Reads from dest/src1 + ShuffleMask.push_back(i+NumElts); // Reads from src/src2 } - // Process the next 128 bits. - Start += NumLaneElts; - End += NumLaneElts; } } Modified: llvm/trunk/lib/Target/X86/Utils/X86ShuffleDecode.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/Utils/X86ShuffleDecode.h?rev=145055&r1=145054&r2=145055&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/Utils/X86ShuffleDecode.h (original) +++ llvm/trunk/lib/Target/X86/Utils/X86ShuffleDecode.h Mon Nov 21 19:57:35 2011 @@ -67,8 +67,16 @@ void DecodeSHUFPSMask(unsigned NElts, unsigned Imm, SmallVectorImpl &ShuffleMask); -void DecodeUNPCKHPMask(unsigned NElts, - SmallVectorImpl &ShuffleMask); +void DecodeUNPCKHPSMask(unsigned NElts, + SmallVectorImpl &ShuffleMask); + +void DecodeUNPCKHPDMask(unsigned NElts, + SmallVectorImpl &ShuffleMask); + +/// DecodeUNPCKHPMask - This decodes the shuffle masks for unpckhps/unpckhpd +/// etc. VT indicates the type of the vector allowing it to handle different +/// datatypes and vector widths. +void DecodeUNPCKHPMask(EVT VT, SmallVectorImpl &ShuffleMask); void DecodeUNPCKLPSMask(unsigned NElts, SmallVectorImpl &ShuffleMask); @@ -79,8 +87,7 @@ /// DecodeUNPCKLPMask - This decodes the shuffle masks for unpcklps/unpcklpd /// etc. VT indicates the type of the vector allowing it to handle different /// datatypes and vector widths. -void DecodeUNPCKLPMask(EVT VT, - SmallVectorImpl &ShuffleMask); +void DecodeUNPCKLPMask(EVT VT, SmallVectorImpl &ShuffleMask); // DecodeVPERMILPSMask - Decodes VPERMILPS permutes for any 128-bit 32-bit Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=145055&r1=145054&r2=145055&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original) +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Mon Nov 21 19:57:35 2011 @@ -4653,7 +4653,7 @@ case X86ISD::UNPCKHPD: case X86ISD::VUNPCKHPSY: case X86ISD::VUNPCKHPDY: - DecodeUNPCKHPMask(NumElems, ShuffleMask); + DecodeUNPCKHPMask(VT, ShuffleMask); break; case X86ISD::PUNPCKLBW: case X86ISD::PUNPCKLWD: From hfinkel at anl.gov Mon Nov 21 21:22:41 2011 From: hfinkel at anl.gov (Hal Finkel) Date: Mon, 21 Nov 2011 21:22:41 -0600 Subject: [llvm-commits] [LLVMdev] [PATCH] BasicBlock Autovectorization Pass In-Reply-To: <1321898108.2507.36.camel@sapling> References: <1319909412.23036.851.camel@sapling> <1319914924.23036.852.camel@sapling> <1319919418.23036.881.camel@sapling> <1319928991.23036.957.camel@sapling> <1320108633.23036.1266.camel@sapling> <1320172356.23036.1298.camel@sapling> <4EB0462C.5010209@grosser.es> <1320184739.23036.1334.camel@sapling> <1320191694.23036.1497.camel@sapling> <1320749109.19359.76.camel@sapling> <4EB90E98.4010805@grosser.es> <1320762963.19359.117.camel@sapling> <4EB98207.2070807@grosser.es> <1320791390.19359.262.camel@sapling> <4EBC4B0F.6010609@grosser.es> <1321050998.19359.539.camel@sapling> <4EBDA7F9.9080709@grosser.es> <1321053083.19359.550.camel@sapling> <4EBDB1BF.7090006@grosser.es> <1321400339.19359.782.camel@sapling> <1321486739.19359.1067.camel@sapling> <4EC504B5.2020408@grosser.es> <1321898108.2507.36.camel@sapling> Message-ID: <1321932161.2507.101.camel@sapling> On Mon, 2011-11-21 at 11:55 -0600, Hal Finkel wrote: > Tobias, > > I've attached an updated patch. It contains a few bug fixes and many > (refactoring and coding-convention) changes inspired by your comments. > > I'm currently trying to fix the bug responsible for causing a compile > failure when compiling > test-suite/MultiSource/Applications/obsequi/toggle_move.c; after the > pass begins to fuse instructions in a basic block in this file, the > aliasing analysis starts producing different (more pessimistic) query > answers. I've never before seen it do this, and I'm not sure why it is > happening. Also odd, at the same time, the numeric labels that are > assigned to otherwise unnamed instructions, change. I don't think I've > seen this happen either (they generally seem to stay fixed once > assigned). I don't know if these two things are related, but if anyone > can provide some insight, I'd appreciate it. I think that I see what is happening in this case (please someone tell me if this does not make sense). In the problematic basic block, there are some loads and stores that are independent. The default aliasing analysis can tell that these loads and stores don't reference the same memory region. Then, some of the inputs to the getelementptr instructions used for these loads and stores are fused by the vectorization. After this happens, the aliasing analysis loses its ability to tell that the loads and stores that make use of those vector-calculated indices are independent. -Hal > > In any case, this version of the patch should be much more suitable for > your (or anyone else's) further review. > > Thanks again, > Hal > > On Thu, 2011-11-17 at 13:57 +0100, Tobias Grosser wrote: > > On 11/17/2011 12:38 AM, Hal Finkel wrote: > > > Tobias, et al., > > > > > > Attached is the my autovectorization pass. > > > > Very nice. Will you be at the developer summit? Maybe we could discuss > > the integration there? > > > > Here a first review of the source code. > > -- Hal Finkel Postdoctoral Appointee Leadership Computing Facility Argonne National Laboratory From rafael.espindola at gmail.com Mon Nov 21 22:27:31 2011 From: rafael.espindola at gmail.com (=?UTF-8?Q?Rafael_Esp=C3=ADndola?=) Date: Mon, 21 Nov 2011 23:27:31 -0500 Subject: [llvm-commits] [patch] Fix pr11415 Message-ID: It looks like the fast register allocator thinks that early clobbers are defined a bit earlier than they really are. The problem is that when a register is both a tied use and an early clobber of a asm statement, we process the early clobber first and then think that the register is already set, which causes the missing copy to RBP in llvm.org/pr11415. This is one case I am not sure how to test. I can include the testcase that is attached to the PR, but it is fairly brittle. Cheers, Rafael -------------- next part -------------- A non-text attachment was scrubbed... Name: pr11415.patch Type: application/octet-stream Size: 1827 bytes Desc: not available Url : http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20111121/93a33c17/attachment.obj From stoklund at 2pi.dk Mon Nov 21 23:12:14 2011 From: stoklund at 2pi.dk (Jakob Stoklund Olesen) Date: Mon, 21 Nov 2011 21:12:14 -0800 Subject: [llvm-commits] [patch] Fix pr11415 In-Reply-To: References: Message-ID: On Nov 21, 2011, at 8:27 PM, Rafael Esp?ndola wrote: > It looks like the fast register allocator thinks that early clobbers > are defined a bit earlier than they really are. > > The problem is that when a register is both a tied use and an early > clobber of a asm statement, we process the early clobber first and > then think that the register is already set, which causes the missing > copy to RBP in llvm.org/pr11415. > > This is one case I am not sure how to test. I can include the testcase > that is attached to the PR, but it is fairly brittle. The patch looks good. I am not sure why you can't test it. Isn't it enough with a "&=r" inline asm constraint? /jakob From rafael.espindola at gmail.com Tue Nov 22 00:27:18 2011 From: rafael.espindola at gmail.com (Rafael Espindola) Date: Tue, 22 Nov 2011 06:27:18 -0000 Subject: [llvm-commits] [llvm] r145056 - in /llvm/trunk: lib/CodeGen/RegAllocFast.cpp test/CodeGen/X86/pr11415.ll Message-ID: <20111122062718.F3F6E1BE001@llvm.org> Author: rafael Date: Tue Nov 22 00:27:18 2011 New Revision: 145056 URL: http://llvm.org/viewvc/llvm-project?rev=145056&view=rev Log: If a register is both an early clobber and part of a tied use, handle the use before the clobber so that we copy the value if needed. Fixes pr11415. Added: llvm/trunk/test/CodeGen/X86/pr11415.ll Modified: llvm/trunk/lib/CodeGen/RegAllocFast.cpp Modified: llvm/trunk/lib/CodeGen/RegAllocFast.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/RegAllocFast.cpp?rev=145056&r1=145055&r2=145056&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/RegAllocFast.cpp (original) +++ llvm/trunk/lib/CodeGen/RegAllocFast.cpp Tue Nov 22 00:27:18 2011 @@ -682,7 +682,7 @@ } SmallVector PartialDefs; - DEBUG(dbgs() << "Allocating tied uses and early clobbers.\n"); + DEBUG(dbgs() << "Allocating tied uses.\n"); for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { MachineOperand &MO = MI->getOperand(i); if (!MO.isReg()) continue; @@ -704,15 +704,24 @@ // That would confuse the later phys-def processing pass. LiveRegMap::iterator LRI = reloadVirtReg(MI, i, Reg, 0); PartialDefs.push_back(LRI->second.PhysReg); - } else if (MO.isEarlyClobber()) { - // Note: defineVirtReg may invalidate MO. - LiveRegMap::iterator LRI = defineVirtReg(MI, i, Reg, 0); - unsigned PhysReg = LRI->second.PhysReg; - if (setPhysReg(MI, i, PhysReg)) - VirtDead.push_back(Reg); } } + DEBUG(dbgs() << "Allocating early clobbers.\n"); + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) continue; + unsigned Reg = MO.getReg(); + if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue; + if (!MO.isEarlyClobber()) + continue; + // Note: defineVirtReg may invalidate MO. + LiveRegMap::iterator LRI = defineVirtReg(MI, i, Reg, 0); + unsigned PhysReg = LRI->second.PhysReg; + if (setPhysReg(MI, i, PhysReg)) + VirtDead.push_back(Reg); + } + // Restore UsedInInstr to a state usable for allocating normal virtual uses. UsedInInstr.reset(); for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { Added: llvm/trunk/test/CodeGen/X86/pr11415.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/pr11415.ll?rev=145056&view=auto ============================================================================== --- llvm/trunk/test/CodeGen/X86/pr11415.ll (added) +++ llvm/trunk/test/CodeGen/X86/pr11415.ll Tue Nov 22 00:27:18 2011 @@ -0,0 +1,23 @@ +; RUN: llc %s -o - -regalloc=fast | FileCheck %s + +; We used to consider the early clobber in the second asm statement as +; defining %0 before it was read. This caused us to omit the +; movq -8(%rsp), %rdx + +; CHECK: #APP +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: movq %rcx, %rax +; CHECK-NEXT: movq %rax, -8(%rsp) +; CHECK-NEXT: movq -8(%rsp), %rdx +; CHECK-NEXT: #APP +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: movq %rdx, %rax +; CHECK-NEXT: movq %rdx, -8(%rsp) +; CHECK-NEXT: ret + +define i64 @foo() { +entry: + %0 = tail call i64 asm "", "={cx}"() nounwind + %1 = tail call i64 asm "", "=&r,0,r,~{rax}"(i64 %0, i64 %0) nounwind + ret i64 %1 +} From rafael.espindola at gmail.com Tue Nov 22 00:36:25 2011 From: rafael.espindola at gmail.com (Rafael Espindola) Date: Tue, 22 Nov 2011 06:36:25 -0000 Subject: [llvm-commits] [llvm] r145057 - /llvm/trunk/test/CodeGen/X86/pr11415.ll Message-ID: <20111122063626.0E28B1BE001@llvm.org> Author: rafael Date: Tue Nov 22 00:36:25 2011 New Revision: 145057 URL: http://llvm.org/viewvc/llvm-project?rev=145057&view=rev Log: Add triple to the test. Modified: llvm/trunk/test/CodeGen/X86/pr11415.ll Modified: llvm/trunk/test/CodeGen/X86/pr11415.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/pr11415.ll?rev=145057&r1=145056&r2=145057&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/X86/pr11415.ll (original) +++ llvm/trunk/test/CodeGen/X86/pr11415.ll Tue Nov 22 00:36:25 2011 @@ -1,4 +1,4 @@ -; RUN: llc %s -o - -regalloc=fast | FileCheck %s +; RUN: llc -mtriple=x86_64-pc-linux %s -o - -regalloc=fast | FileCheck %s ; We used to consider the early clobber in the second asm statement as ; defining %0 before it was read. This caused us to omit the From stpworld at narod.ru Tue Nov 22 05:01:16 2011 From: stpworld at narod.ru (Stepan Dyatkovskiy) Date: Tue, 22 Nov 2011 15:01:16 +0400 Subject: [llvm-commits] [LLVM, loop-unswitch] Potential bug in RewriteLoopBodyWithConditionConstant Message-ID: <4ECB80FC.3070307@narod.ru> Hi all. It seems that the code inside the LoopUnswitch::RewriteLoopBodyWithConditionConstant contains potential bugs. UseList is changed inside the loops that are goes through its items: LoopUnswitch.cpp, string #905: for (Value::use_iterator UI = LIC->use_begin(), E = LIC->use_end(); UI != E; ++UI) { ...and then inside the loop body UseList is changed implicitly ( LoopUnswitch.cpp, string #910): U->replaceUsesOfWith(LIC, Replacement); It seems that after editing UseList loop may produce unpreditable results. But we are lucky by now though :-) I propose to collect all to be changed and then do some changes? If so, please find the patch attached for review. -Stepan -------------- next part -------------- A non-text attachment was scrubbed... Name: loop-unswitch-uselist.patch Type: text/x-patch Size: 1014 bytes Desc: not available Url : http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20111122/fc49ed4e/attachment.bin From baldrick at free.fr Tue Nov 22 05:15:03 2011 From: baldrick at free.fr (Duncan Sands) Date: Tue, 22 Nov 2011 12:15:03 +0100 Subject: [llvm-commits] [LLVM, loop-unswitch] Potential bug in RewriteLoopBodyWithConditionConstant In-Reply-To: <4ECB80FC.3070307@narod.ru> References: <4ECB80FC.3070307@narod.ru> Message-ID: <4ECB8437.9010708@free.fr> Hi Stepan, > It seems that the code inside the > LoopUnswitch::RewriteLoopBodyWithConditionConstant contains potential bugs. > UseList is changed inside the loops that are goes through its items: > > LoopUnswitch.cpp, string #905: for (Value::use_iterator UI = LIC->use_begin(), E > = LIC->use_end(); UI != E; ++UI) { > > ...and then inside the loop body UseList is changed implicitly ( > LoopUnswitch.cpp, string #910): > U->replaceUsesOfWith(LIC, Replacement); > > It seems that after editing UseList loop may produce unpreditable results. But > we are lucky by now though :-) > > I propose to collect all to be changed and then do some changes? > If so, please find the patch attached for review. can you just do something like this instead? Also, do you have a testcase? Replacement = ConstantInt::get(Type::getInt1Ty(Val->getContext()), !cast(Val)->getZExtValue()); for (Value::use_iterator UI = LIC->use_begin(), E = LIC->use_end(); - UI != E; ++UI) { - Instruction *U = dyn_cast(*UI); + UI != E; ) { + Instruction *U = dyn_cast(*UI++); if (!U || !L->contains(U)) continue; U->replaceUsesOfWith(LIC, Replacement); Worklist.push_back(U); } Ciao, Duncan. From benny.kra at googlemail.com Tue Nov 22 05:37:11 2011 From: benny.kra at googlemail.com (Benjamin Kramer) Date: Tue, 22 Nov 2011 11:37:11 -0000 Subject: [llvm-commits] [llvm] r145059 - /llvm/trunk/lib/Support/MemoryBuffer.cpp Message-ID: <20111122113712.020621BE001@llvm.org> Author: d0k Date: Tue Nov 22 05:37:11 2011 New Revision: 145059 URL: http://llvm.org/viewvc/llvm-project?rev=145059&view=rev Log: Turn error recovery into an assert. This was put in because in a certain version of DragonFlyBSD stat(2) lied about the size of some files. This was fixed a long time ago so we can remove the workaround. Modified: llvm/trunk/lib/Support/MemoryBuffer.cpp Modified: llvm/trunk/lib/Support/MemoryBuffer.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Support/MemoryBuffer.cpp?rev=145059&r1=145058&r2=145059&view=diff ============================================================================== --- llvm/trunk/lib/Support/MemoryBuffer.cpp (original) +++ llvm/trunk/lib/Support/MemoryBuffer.cpp Tue Nov 22 05:37:11 2011 @@ -330,13 +330,8 @@ continue; // Error while reading. return error_code(errno, posix_category()); - } else if (NumRead == 0) { - // We hit EOF early, truncate and terminate buffer. - Buf->BufferEnd = BufPtr; - *BufPtr = 0; - result.swap(SB); - return success; } + assert(NumRead != 0 && "fstat reported an invalid file size."); BytesLeft -= NumRead; BufPtr += NumRead; } From chandlerc at gmail.com Tue Nov 22 05:37:46 2011 From: chandlerc at gmail.com (Chandler Carruth) Date: Tue, 22 Nov 2011 11:37:46 -0000 Subject: [llvm-commits] [llvm] r145060 - /llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp Message-ID: <20111122113746.C92B31BE001@llvm.org> Author: chandlerc Date: Tue Nov 22 05:37:46 2011 New Revision: 145060 URL: http://llvm.org/viewvc/llvm-project?rev=145060&view=rev Log: Fix an obvious omission in the SelectionDAGBuilder where we were dropping weights on the floor for invokes. This was impeding my writing further test cases for invoke when interacting with probabilities and block placement. No test case as there doesn't appear to be a way to test this stuff. =/ Suggestions for a test case of course welcome. I hope to be able to add test cases that indirectly cover this eventually by adding probabilities to the exceptional edge and reordering blocks as a result. Modified: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp Modified: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp?rev=145060&r1=145059&r2=145060&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp (original) +++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp Tue Nov 22 05:37:46 2011 @@ -1810,8 +1810,8 @@ CopyToExportRegsIfNeeded(&I); // Update successor info - InvokeMBB->addSuccessor(Return); - InvokeMBB->addSuccessor(LandingPad); + addSuccessorWithWeight(InvokeMBB, Return); + addSuccessorWithWeight(InvokeMBB, LandingPad); // Drop into normal successor. DAG.setRoot(DAG.getNode(ISD::BR, getCurDebugLoc(), From anton at korobeynikov.info Tue Nov 22 06:23:24 2011 From: anton at korobeynikov.info (Anton Korobeynikov) Date: Tue, 22 Nov 2011 15:23:24 +0300 Subject: [llvm-commits] [LLVM, loop-unswitch] Potential bug in RewriteLoopBodyWithConditionConstant In-Reply-To: <4ECB80FC.3070307@narod.ru> References: <4ECB80FC.3070307@narod.ru> Message-ID: Hi Stepan, > I propose to collect all to be changed and then do some changes? They are already collected inside WorkList. Why do you need something same but different? -- With best regards, Anton Korobeynikov Faculty of Mathematics and Mechanics, Saint Petersburg State University From benny.kra at googlemail.com Tue Nov 22 06:31:54 2011 From: benny.kra at googlemail.com (Benjamin Kramer) Date: Tue, 22 Nov 2011 12:31:54 -0000 Subject: [llvm-commits] [llvm] r145061 - in /llvm/trunk: autoconf/configure.ac cmake/config-ix.cmake configure include/llvm/Config/config.h.cmake include/llvm/Config/config.h.in lib/Support/MemoryBuffer.cpp Message-ID: <20111122123154.33AEA1BE001@llvm.org> Author: d0k Date: Tue Nov 22 06:31:53 2011 New Revision: 145061 URL: http://llvm.org/viewvc/llvm-project?rev=145061&view=rev Log: Add configure checking for pread(2) and use it to save a syscall when reading files. Modified: llvm/trunk/autoconf/configure.ac llvm/trunk/cmake/config-ix.cmake llvm/trunk/configure llvm/trunk/include/llvm/Config/config.h.cmake llvm/trunk/include/llvm/Config/config.h.in llvm/trunk/lib/Support/MemoryBuffer.cpp Modified: llvm/trunk/autoconf/configure.ac URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/autoconf/configure.ac?rev=145061&r1=145060&r2=145061&view=diff ============================================================================== --- llvm/trunk/autoconf/configure.ac (original) +++ llvm/trunk/autoconf/configure.ac Tue Nov 22 06:31:53 2011 @@ -1293,7 +1293,7 @@ AC_CHECK_FUNCS([powf fmodf strtof round ]) AC_CHECK_FUNCS([getpagesize getrusage getrlimit setrlimit gettimeofday ]) AC_CHECK_FUNCS([isatty mkdtemp mkstemp ]) -AC_CHECK_FUNCS([mktemp posix_spawn realpath sbrk setrlimit strdup ]) +AC_CHECK_FUNCS([mktemp posix_spawn pread realpath sbrk setrlimit strdup ]) AC_CHECK_FUNCS([strerror strerror_r setenv ]) AC_CHECK_FUNCS([strtoll strtoq sysconf malloc_zone_statistics ]) AC_CHECK_FUNCS([setjmp longjmp sigsetjmp siglongjmp writev]) Modified: llvm/trunk/cmake/config-ix.cmake URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/cmake/config-ix.cmake?rev=145061&r1=145060&r2=145061&view=diff ============================================================================== --- llvm/trunk/cmake/config-ix.cmake (original) +++ llvm/trunk/cmake/config-ix.cmake Tue Nov 22 06:31:53 2011 @@ -126,6 +126,8 @@ check_symbol_exists(getcwd unistd.h HAVE_GETCWD) check_symbol_exists(gettimeofday sys/time.h HAVE_GETTIMEOFDAY) check_symbol_exists(getrlimit "sys/types.h;sys/time.h;sys/resource.h" HAVE_GETRLIMIT) +check_symbol_exists(posix_spawn spawn.h HAVE_POSIX_SPAWN) +check_symbol_exists(pread unistd.h HAVE_PREAD) check_symbol_exists(rindex strings.h HAVE_RINDEX) check_symbol_exists(strchr string.h HAVE_STRCHR) check_symbol_exists(strcmp string.h HAVE_STRCMP) Modified: llvm/trunk/configure URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/configure?rev=145061&r1=145060&r2=145061&view=diff ============================================================================== --- llvm/trunk/configure (original) +++ llvm/trunk/configure Tue Nov 22 06:31:53 2011 @@ -17151,7 +17151,8 @@ -for ac_func in mktemp posix_spawn realpath sbrk setrlimit strdup + +for ac_func in mktemp posix_spawn pread realpath sbrk setrlimit strdup do as_ac_var=`echo "ac_cv_func_$ac_func" | $as_tr_sh` { echo "$as_me:$LINENO: checking for $ac_func" >&5 Modified: llvm/trunk/include/llvm/Config/config.h.cmake URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Config/config.h.cmake?rev=145061&r1=145060&r2=145061&view=diff ============================================================================== --- llvm/trunk/include/llvm/Config/config.h.cmake (original) +++ llvm/trunk/include/llvm/Config/config.h.cmake Tue Nov 22 06:31:53 2011 @@ -297,6 +297,9 @@ /* Define to 1 if you have the `powf' function. */ #cmakedefine HAVE_POWF ${HAVE_POWF} +/* Define to 1 if you have the `pread' function. */ +#cmakedefine HAVE_PREAD ${HAVE_PREAD} + /* Define if libtool can extract symbol lists from object files. */ #undef HAVE_PRELOADED_SYMBOLS Modified: llvm/trunk/include/llvm/Config/config.h.in URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Config/config.h.in?rev=145061&r1=145060&r2=145061&view=diff ============================================================================== --- llvm/trunk/include/llvm/Config/config.h.in (original) +++ llvm/trunk/include/llvm/Config/config.h.in Tue Nov 22 06:31:53 2011 @@ -295,6 +295,9 @@ /* Define to 1 if you have the `powf' function. */ #undef HAVE_POWF +/* Define to 1 if you have the `pread' function. */ +#undef HAVE_PREAD + /* Define if libtool can extract symbol lists from object files. */ #undef HAVE_PRELOADED_SYMBOLS Modified: llvm/trunk/lib/Support/MemoryBuffer.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Support/MemoryBuffer.cpp?rev=145061&r1=145060&r2=145061&view=diff ============================================================================== --- llvm/trunk/lib/Support/MemoryBuffer.cpp (original) +++ llvm/trunk/lib/Support/MemoryBuffer.cpp Tue Nov 22 06:31:53 2011 @@ -14,6 +14,7 @@ #include "llvm/Support/MemoryBuffer.h" #include "llvm/ADT/OwningPtr.h" #include "llvm/ADT/SmallString.h" +#include "llvm/Config/config.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/Errno.h" #include "llvm/Support/Path.h" @@ -320,11 +321,17 @@ char *BufPtr = const_cast(SB->getBufferStart()); size_t BytesLeft = MapSize; +#ifndef HAVE_PREAD if (lseek(FD, Offset, SEEK_SET) == -1) return error_code(errno, posix_category()); +#endif while (BytesLeft) { +#ifdef HAVE_PREAD + ssize_t NumRead = ::pread(FD, BufPtr, BytesLeft, MapSize-BytesLeft+Offset); +#else ssize_t NumRead = ::read(FD, BufPtr, BytesLeft); +#endif if (NumRead == -1) { if (errno == EINTR) continue; From stpworld at narod.ru Tue Nov 22 06:37:26 2011 From: stpworld at narod.ru (Stepan Dyatkovskiy) Date: Tue, 22 Nov 2011 16:37:26 +0400 Subject: [llvm-commits] [LLVM, loop-unswitch] Potential bug in RewriteLoopBodyWithConditionConstant In-Reply-To: <4ECB8437.9010708@free.fr> References: <4ECB80FC.3070307@narod.ru> <4ECB8437.9010708@free.fr> Message-ID: <4ECB9786.9070104@narod.ru> Hi Duncan. You are right. Please find the fixed patch. It is a pretty diffucult to create a test case. By now I can only say that after replaceUsesOfWith was invoked we implicitly proceed to enumerate Replacement's UseList instead of LIC's. But we try to unswitch instructions that uses both LIC and Replacement. That why all looks fine. I also attached dumped instructions enumerated before and after fix (for first invocation of LoopUnswitch::RewriteLoopBodyWithConditionConstant with IsEqual = true). .ll file that was proceed is also attached. -Stepan. Duncan Sands wrote: > Hi Stepan, > >> It seems that the code inside the >> LoopUnswitch::RewriteLoopBodyWithConditionConstant contains potential bugs. >> UseList is changed inside the loops that are goes through its items: >> >> LoopUnswitch.cpp, string #905: for (Value::use_iterator UI = LIC->use_begin(), E >> = LIC->use_end(); UI != E; ++UI) { >> >> ...and then inside the loop body UseList is changed implicitly ( >> LoopUnswitch.cpp, string #910): >> U->replaceUsesOfWith(LIC, Replacement); >> >> It seems that after editing UseList loop may produce unpreditable results. But >> we are lucky by now though :-) >> >> I propose to collect all to be changed and then do some changes? >> If so, please find the patch attached for review. > > can you just do something like this instead? Also, do you have a testcase? > > Replacement = ConstantInt::get(Type::getInt1Ty(Val->getContext()), > !cast(Val)->getZExtValue()); > > for (Value::use_iterator UI = LIC->use_begin(), E = LIC->use_end(); > - UI != E; ++UI) { > - Instruction *U = dyn_cast(*UI); > + UI != E; ) { > + Instruction *U = dyn_cast(*UI++); > if (!U || !L->contains(U)) > continue; > U->replaceUsesOfWith(LIC, Replacement); > Worklist.push_back(U); > } > > Ciao, Duncan. > _______________________________________________ > llvm-commits mailing list > llvm-commits at cs.uiuc.edu > http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits -------------- next part -------------- A non-text attachment was scrubbed... Name: loop-unswitch-uselist.patch Type: text/x-patch Size: 667 bytes Desc: not available Url : http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20111122/33c59d6f/attachment.bin -------------- next part -------------- An embedded and charset-unspecified text was scrubbed... Name: uselist-afterfix.txt Url: http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20111122/33c59d6f/attachment.txt -------------- next part -------------- An embedded and charset-unspecified text was scrubbed... Name: loop-case-3.ll Url: http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20111122/33c59d6f/attachment.pl -------------- next part -------------- An embedded and charset-unspecified text was scrubbed... Name: uselist-beforefix.txt Url: http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20111122/33c59d6f/attachment-0001.txt From geek4civic at gmail.com Tue Nov 22 06:41:25 2011 From: geek4civic at gmail.com (NAKAMURA Takumi) Date: Tue, 22 Nov 2011 21:41:25 +0900 Subject: [llvm-commits] [PATCH] lit/TestRunner.py: [Win32] Introduce WinWaitReleased(f), to wait for file handles released. Message-ID: When wait() has finished, it tends opened handles (especially writing stdout to file) might not be released immediately. To wait for released, poll to attempt renaming. --- utils/lit/lit/TestRunner.py | 40 ++++++++++++++++++++++++++++++++++++++++ 1 files changed, 40 insertions(+), 0 deletions(-) -------------- next part -------------- A non-text attachment was scrubbed... Name: 0001-lit-TestRunner.py-Win32-Introduce-WinWaitRelease.patch.txt Type: text/x-patch Size: 2305 bytes Desc: not available Url : http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20111122/1466ee32/attachment.bin From chandlerc at gmail.com Tue Nov 22 07:13:17 2011 From: chandlerc at gmail.com (Chandler Carruth) Date: Tue, 22 Nov 2011 13:13:17 -0000 Subject: [llvm-commits] [llvm] r145062 - in /llvm/trunk: lib/CodeGen/MachineBasicBlock.cpp test/CodeGen/X86/block-placement.ll Message-ID: <20111122131317.1E6302A6C134@llvm.org> Author: chandlerc Date: Tue Nov 22 07:13:16 2011 New Revision: 145062 URL: http://llvm.org/viewvc/llvm-project?rev=145062&view=rev Log: Fix a devilish miscompile exposed by block placement. The updateTerminator code didn't correctly handle EH terminators in one very specific case. AnalyzeBranch would find no terminator instruction, and so the fallback in updateTerminator is to assume fallthrough. This is correct, but the destination of the fallthrough was assumed to be the first successor. This is *almost always* true, but in certain cases the loop transformations will cause the landing pad to be the first successor! Instead of this brittle logic, actually look through the successors for a non-landing-pad accessor, and to assert if more than one is found. This will hopefully fix some (if not all) of the self host miscompiles with block placement. Thanks to Benjamin Kramer for reporting, Nick Lewycky for an initial stab at a reduction, and Duncan for endless advice on EH (which I know nothing about) as well as reviewing the actual fix. Modified: llvm/trunk/lib/CodeGen/MachineBasicBlock.cpp llvm/trunk/test/CodeGen/X86/block-placement.ll Modified: llvm/trunk/lib/CodeGen/MachineBasicBlock.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/MachineBasicBlock.cpp?rev=145062&r1=145061&r2=145062&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/MachineBasicBlock.cpp (original) +++ llvm/trunk/lib/CodeGen/MachineBasicBlock.cpp Tue Nov 22 07:13:16 2011 @@ -297,8 +297,14 @@ TII->RemoveBranch(*this); } else { // The block has an unconditional fallthrough. If its successor is not - // its layout successor, insert a branch. - TBB = *succ_begin(); + // its layout successor, insert a branch. First we have to locate the + // only non-landing-pad successor, as that is the fallthrough block. + for (succ_iterator SI = succ_begin(), SE = succ_end(); SI != SE; ++SI) { + if ((*SI)->isLandingPad()) + continue; + assert(!TBB && "Found more than one non-landing-pad successor!"); + TBB = *SI; + } if (!isLayoutSuccessor(TBB)) TII->InsertBranch(*this, TBB, 0, Cond, dl); } Modified: llvm/trunk/test/CodeGen/X86/block-placement.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/block-placement.ll?rev=145062&r1=145061&r2=145062&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/X86/block-placement.ll (original) +++ llvm/trunk/test/CodeGen/X86/block-placement.ll Tue Nov 22 07:13:16 2011 @@ -475,3 +475,31 @@ } !2 = metadata !{metadata !"branch_weights", i32 3, i32 1} + +declare i32 @__gxx_personality_v0(...) + +define void @test_eh_lpad_successor() { +; Some times the landing pad ends up as the first successor of an invoke block. +; When this happens, a strange result used to fall out of updateTerminators: we +; didn't correctly locate the fallthrough successor, assuming blindly that the +; first one was the fallthrough successor. As a result, we would add an +; erroneous jump to the landing pad thinking *that* was the default successor. +; CHECK: test_eh_lpad_successor +; CHECK: %entry +; CHECK-NOT: jmp +; CHECK: %loop + +entry: + invoke i32 @f() to label %preheader unwind label %lpad + +preheader: + br label %loop + +lpad: + %lpad.val = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) + cleanup + resume { i8*, i32 } %lpad.val + +loop: + br label %loop +} From craig.topper at gmail.com Tue Nov 22 08:27:57 2011 From: craig.topper at gmail.com (Craig Topper) Date: Tue, 22 Nov 2011 14:27:57 -0000 Subject: [llvm-commits] [llvm] r145063 - in /llvm/trunk/lib/Target/X86: InstPrinter/X86InstComments.cpp Utils/X86ShuffleDecode.cpp Utils/X86ShuffleDecode.h Message-ID: <20111122142757.CFEA72A6C134@llvm.org> Author: ctopper Date: Tue Nov 22 08:27:57 2011 New Revision: 145063 URL: http://llvm.org/viewvc/llvm-project?rev=145063&view=rev Log: More fixes to the X86InstComments for shuffle instructions. In particular add AVX flavors of many instructions and fix the destination operand for some of the existing AVX entries. Modified: llvm/trunk/lib/Target/X86/InstPrinter/X86InstComments.cpp llvm/trunk/lib/Target/X86/Utils/X86ShuffleDecode.cpp llvm/trunk/lib/Target/X86/Utils/X86ShuffleDecode.h Modified: llvm/trunk/lib/Target/X86/InstPrinter/X86InstComments.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/InstPrinter/X86InstComments.cpp?rev=145063&r1=145062&r2=145063&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/InstPrinter/X86InstComments.cpp (original) +++ llvm/trunk/lib/Target/X86/InstPrinter/X86InstComments.cpp Tue Nov 22 08:27:57 2011 @@ -34,6 +34,12 @@ switch (MI->getOpcode()) { case X86::INSERTPSrr: + Src1Name = getRegName(MI->getOperand(0).getReg()); + Src2Name = getRegName(MI->getOperand(2).getReg()); + DecodeINSERTPSMask(MI->getOperand(3).getImm(), ShuffleMask); + break; + case X86::VINSERTPSrr: + DestName = getRegName(MI->getOperand(0).getReg()); Src1Name = getRegName(MI->getOperand(1).getReg()); Src2Name = getRegName(MI->getOperand(2).getReg()); DecodeINSERTPSMask(MI->getOperand(3).getImm(), ShuffleMask); @@ -44,34 +50,52 @@ Src1Name = getRegName(MI->getOperand(0).getReg()); DecodeMOVLHPSMask(2, ShuffleMask); break; + case X86::VMOVLHPSrr: + Src2Name = getRegName(MI->getOperand(2).getReg()); + Src1Name = getRegName(MI->getOperand(1).getReg()); + DestName = getRegName(MI->getOperand(0).getReg()); + DecodeMOVLHPSMask(2, ShuffleMask); + break; case X86::MOVHLPSrr: Src2Name = getRegName(MI->getOperand(2).getReg()); Src1Name = getRegName(MI->getOperand(0).getReg()); DecodeMOVHLPSMask(2, ShuffleMask); break; + case X86::VMOVHLPSrr: + Src2Name = getRegName(MI->getOperand(2).getReg()); + Src1Name = getRegName(MI->getOperand(1).getReg()); + DestName = getRegName(MI->getOperand(0).getReg()); + DecodeMOVHLPSMask(2, ShuffleMask); + break; case X86::PSHUFDri: + case X86::VPSHUFDri: Src1Name = getRegName(MI->getOperand(1).getReg()); // FALL THROUGH. case X86::PSHUFDmi: + case X86::VPSHUFDmi: DestName = getRegName(MI->getOperand(0).getReg()); DecodePSHUFMask(4, MI->getOperand(MI->getNumOperands()-1).getImm(), ShuffleMask); break; case X86::PSHUFHWri: + case X86::VPSHUFHWri: Src1Name = getRegName(MI->getOperand(1).getReg()); // FALL THROUGH. case X86::PSHUFHWmi: + case X86::VPSHUFHWmi: DestName = getRegName(MI->getOperand(0).getReg()); DecodePSHUFHWMask(MI->getOperand(MI->getNumOperands()-1).getImm(), ShuffleMask); break; case X86::PSHUFLWri: + case X86::VPSHUFLWri: Src1Name = getRegName(MI->getOperand(1).getReg()); // FALL THROUGH. case X86::PSHUFLWmi: + case X86::VPSHUFLWmi: DestName = getRegName(MI->getOperand(0).getReg()); DecodePSHUFLWMask(MI->getOperand(MI->getNumOperands()-1).getImm(), ShuffleMask); @@ -142,6 +166,14 @@ DecodeSHUFPSMask(2, MI->getOperand(3).getImm(), ShuffleMask); Src1Name = getRegName(MI->getOperand(0).getReg()); break; + case X86::VSHUFPDrri: + Src2Name = getRegName(MI->getOperand(2).getReg()); + // FALL THROUGH. + case X86::VSHUFPDrmi: + DecodeSHUFPSMask(2, MI->getOperand(3).getImm(), ShuffleMask); + Src1Name = getRegName(MI->getOperand(1).getReg()); + DestName = getRegName(MI->getOperand(0).getReg()); + break; case X86::SHUFPSrri: Src2Name = getRegName(MI->getOperand(2).getReg()); @@ -150,90 +182,106 @@ DecodeSHUFPSMask(4, MI->getOperand(3).getImm(), ShuffleMask); Src1Name = getRegName(MI->getOperand(0).getReg()); break; + case X86::VSHUFPSrri: + Src2Name = getRegName(MI->getOperand(2).getReg()); + // FALL THROUGH. + case X86::VSHUFPSrmi: + DecodeSHUFPSMask(4, MI->getOperand(3).getImm(), ShuffleMask); + Src1Name = getRegName(MI->getOperand(1).getReg()); + DestName = getRegName(MI->getOperand(0).getReg()); + break; case X86::UNPCKLPDrr: Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. case X86::UNPCKLPDrm: - DecodeUNPCKLPDMask(2, ShuffleMask); + DecodeUNPCKLPMask(MVT::v2f64, ShuffleMask); Src1Name = getRegName(MI->getOperand(0).getReg()); break; case X86::VUNPCKLPDrr: Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. case X86::VUNPCKLPDrm: - DecodeUNPCKLPDMask(2, ShuffleMask); + DecodeUNPCKLPMask(MVT::v2f64, ShuffleMask); Src1Name = getRegName(MI->getOperand(1).getReg()); + DestName = getRegName(MI->getOperand(0).getReg()); break; case X86::VUNPCKLPDYrr: Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. case X86::VUNPCKLPDYrm: - DecodeUNPCKLPDMask(4, ShuffleMask); + DecodeUNPCKLPMask(MVT::v4f64, ShuffleMask); Src1Name = getRegName(MI->getOperand(1).getReg()); + DestName = getRegName(MI->getOperand(0).getReg()); break; case X86::UNPCKLPSrr: Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. case X86::UNPCKLPSrm: - DecodeUNPCKLPSMask(4, ShuffleMask); + DecodeUNPCKLPMask(MVT::v4f32, ShuffleMask); Src1Name = getRegName(MI->getOperand(0).getReg()); break; case X86::VUNPCKLPSrr: Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. case X86::VUNPCKLPSrm: - DecodeUNPCKLPSMask(4, ShuffleMask); + DecodeUNPCKLPMask(MVT::v4f32, ShuffleMask); Src1Name = getRegName(MI->getOperand(1).getReg()); + DestName = getRegName(MI->getOperand(0).getReg()); break; case X86::VUNPCKLPSYrr: Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. case X86::VUNPCKLPSYrm: - DecodeUNPCKLPSMask(8, ShuffleMask); + DecodeUNPCKLPMask(MVT::v8f32, ShuffleMask); Src1Name = getRegName(MI->getOperand(1).getReg()); + DestName = getRegName(MI->getOperand(0).getReg()); break; case X86::UNPCKHPDrr: Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. case X86::UNPCKHPDrm: - DecodeUNPCKHPDMask(2, ShuffleMask); + DecodeUNPCKHPMask(MVT::v2f64, ShuffleMask); Src1Name = getRegName(MI->getOperand(0).getReg()); break; case X86::VUNPCKHPDrr: Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. case X86::VUNPCKHPDrm: - DecodeUNPCKHPDMask(2, ShuffleMask); + DecodeUNPCKHPMask(MVT::v2f64, ShuffleMask); Src1Name = getRegName(MI->getOperand(1).getReg()); + DestName = getRegName(MI->getOperand(0).getReg()); break; case X86::VUNPCKHPDYrr: Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. case X86::VUNPCKHPDYrm: - DecodeUNPCKLPDMask(4, ShuffleMask); + DecodeUNPCKLPMask(MVT::v4f64, ShuffleMask); Src1Name = getRegName(MI->getOperand(1).getReg()); + DestName = getRegName(MI->getOperand(0).getReg()); break; case X86::UNPCKHPSrr: Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. case X86::UNPCKHPSrm: - DecodeUNPCKHPSMask(4, ShuffleMask); + DecodeUNPCKHPMask(MVT::v4f32, ShuffleMask); Src1Name = getRegName(MI->getOperand(0).getReg()); break; case X86::VUNPCKHPSrr: Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. case X86::VUNPCKHPSrm: - DecodeUNPCKHPSMask(4, ShuffleMask); + DecodeUNPCKHPMask(MVT::v4f32, ShuffleMask); Src1Name = getRegName(MI->getOperand(1).getReg()); + DestName = getRegName(MI->getOperand(0).getReg()); break; case X86::VUNPCKHPSYrr: Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. case X86::VUNPCKHPSYrm: - DecodeUNPCKHPSMask(8, ShuffleMask); + DecodeUNPCKHPMask(MVT::v8f32, ShuffleMask); Src1Name = getRegName(MI->getOperand(1).getReg()); + DestName = getRegName(MI->getOperand(0).getReg()); break; case X86::VPERMILPSri: DecodeVPERMILPSMask(4, MI->getOperand(2).getImm(), Modified: llvm/trunk/lib/Target/X86/Utils/X86ShuffleDecode.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/Utils/X86ShuffleDecode.cpp?rev=145063&r1=145062&r2=145063&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/Utils/X86ShuffleDecode.cpp (original) +++ llvm/trunk/lib/Target/X86/Utils/X86ShuffleDecode.cpp Tue Nov 22 08:27:57 2011 @@ -142,16 +142,6 @@ } } -void DecodeUNPCKHPSMask(unsigned NElts, - SmallVectorImpl &ShuffleMask) { - DecodeUNPCKHPMask(MVT::getVectorVT(MVT::i32, NElts), ShuffleMask); -} - -void DecodeUNPCKHPDMask(unsigned NElts, - SmallVectorImpl &ShuffleMask) { - DecodeUNPCKHPMask(MVT::getVectorVT(MVT::i64, NElts), ShuffleMask); -} - void DecodeUNPCKHPMask(EVT VT, SmallVectorImpl &ShuffleMask) { unsigned NumElts = VT.getVectorNumElements(); @@ -171,16 +161,6 @@ } } -void DecodeUNPCKLPSMask(unsigned NElts, - SmallVectorImpl &ShuffleMask) { - DecodeUNPCKLPMask(MVT::getVectorVT(MVT::i32, NElts), ShuffleMask); -} - -void DecodeUNPCKLPDMask(unsigned NElts, - SmallVectorImpl &ShuffleMask) { - DecodeUNPCKLPMask(MVT::getVectorVT(MVT::i64, NElts), ShuffleMask); -} - /// DecodeUNPCKLPMask - This decodes the shuffle masks for unpcklps/unpcklpd /// etc. VT indicates the type of the vector allowing it to handle different /// datatypes and vector widths. Modified: llvm/trunk/lib/Target/X86/Utils/X86ShuffleDecode.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/Utils/X86ShuffleDecode.h?rev=145063&r1=145062&r2=145063&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/Utils/X86ShuffleDecode.h (original) +++ llvm/trunk/lib/Target/X86/Utils/X86ShuffleDecode.h Tue Nov 22 08:27:57 2011 @@ -67,23 +67,11 @@ void DecodeSHUFPSMask(unsigned NElts, unsigned Imm, SmallVectorImpl &ShuffleMask); -void DecodeUNPCKHPSMask(unsigned NElts, - SmallVectorImpl &ShuffleMask); - -void DecodeUNPCKHPDMask(unsigned NElts, - SmallVectorImpl &ShuffleMask); - /// DecodeUNPCKHPMask - This decodes the shuffle masks for unpckhps/unpckhpd /// etc. VT indicates the type of the vector allowing it to handle different /// datatypes and vector widths. void DecodeUNPCKHPMask(EVT VT, SmallVectorImpl &ShuffleMask); -void DecodeUNPCKLPSMask(unsigned NElts, - SmallVectorImpl &ShuffleMask); - -void DecodeUNPCKLPDMask(unsigned NElts, - SmallVectorImpl &ShuffleMask); - /// DecodeUNPCKLPMask - This decodes the shuffle masks for unpcklps/unpcklpd /// etc. VT indicates the type of the vector allowing it to handle different /// datatypes and vector widths. From hfinkel at anl.gov Tue Nov 22 10:21:05 2011 From: hfinkel at anl.gov (Hal Finkel) Date: Tue, 22 Nov 2011 16:21:05 -0000 Subject: [llvm-commits] [llvm] r145065 - in /llvm/trunk: lib/Target/PowerPC/PPCISelLowering.cpp lib/Target/PowerPC/PPCInstrInfo.cpp lib/Target/PowerPC/PPCRegisterInfo.cpp lib/Target/PowerPC/PPCRegisterInfo.h test/CodeGen/PowerPC/ppc32-vaarg.ll Message-ID: <20111122162105.6A1642A6C134@llvm.org> Author: hfinkel Date: Tue Nov 22 10:21:04 2011 New Revision: 145065 URL: http://llvm.org/viewvc/llvm-project?rev=145065&view=rev Log: add basic PPC register-pressure feedback; adjust the vaarg test to match the new register-allocation pattern Modified: llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.cpp llvm/trunk/lib/Target/PowerPC/PPCRegisterInfo.cpp llvm/trunk/lib/Target/PowerPC/PPCRegisterInfo.h llvm/trunk/test/CodeGen/PowerPC/ppc32-vaarg.ll Modified: llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp?rev=145065&r1=145064&r2=145065&view=diff ============================================================================== --- llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp (original) +++ llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp Tue Nov 22 10:21:04 2011 @@ -407,6 +407,8 @@ setInsertFencesForAtomic(true); + setSchedulingPreference(Sched::Hybrid); + computeRegisterProperties(); } Modified: llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.cpp?rev=145065&r1=145064&r2=145065&view=diff ============================================================================== --- llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.cpp (original) +++ llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.cpp Tue Nov 22 10:21:04 2011 @@ -56,11 +56,8 @@ unsigned Directive = TM->getSubtarget().getDarwinDirective(); if (Directive == PPC::DIR_440) { - // Disable the hazard recognizer for now, as it doesn't support - // bottom-up scheduling. - //const InstrItineraryData *II = TM->getInstrItineraryData(); - //return new PPCHazardRecognizer440(II, DAG); - return new ScheduleHazardRecognizer(); + const InstrItineraryData *II = TM->getInstrItineraryData(); + return new PPCHazardRecognizer440(II, DAG); } else { // Disable the hazard recognizer for now, as it doesn't support Modified: llvm/trunk/lib/Target/PowerPC/PPCRegisterInfo.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCRegisterInfo.cpp?rev=145065&r1=145064&r2=145065&view=diff ============================================================================== --- llvm/trunk/lib/Target/PowerPC/PPCRegisterInfo.cpp (original) +++ llvm/trunk/lib/Target/PowerPC/PPCRegisterInfo.cpp Tue Nov 22 10:21:04 2011 @@ -273,6 +273,27 @@ return Reserved; } +unsigned +PPCRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, + MachineFunction &MF) const { + const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); + const unsigned DefaultSafety = 1; + + switch (RC->getID()) { + default: + return 0; + case PPC::G8RCRegClassID: + case PPC::GPRCRegClassID: { + unsigned FP = TFI->hasFP(MF) ? 1 : 0; + return 32 - FP - DefaultSafety; + } + case PPC::F8RCRegClassID: + case PPC::F4RCRegClassID: + case PPC::VRRCRegClassID: + return 32 - DefaultSafety; + } +} + //===----------------------------------------------------------------------===// // Stack Frame Processing methods //===----------------------------------------------------------------------===// Modified: llvm/trunk/lib/Target/PowerPC/PPCRegisterInfo.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCRegisterInfo.h?rev=145065&r1=145064&r2=145065&view=diff ============================================================================== --- llvm/trunk/lib/Target/PowerPC/PPCRegisterInfo.h (original) +++ llvm/trunk/lib/Target/PowerPC/PPCRegisterInfo.h Tue Nov 22 10:21:04 2011 @@ -37,6 +37,9 @@ /// This is used for addressing modes. virtual const TargetRegisterClass *getPointerRegClass(unsigned Kind=0) const; + unsigned getRegPressureLimit(const TargetRegisterClass *RC, + MachineFunction &MF) const; + /// Code Generation virtual methods... const unsigned *getCalleeSavedRegs(const MachineFunction* MF = 0) const; Modified: llvm/trunk/test/CodeGen/PowerPC/ppc32-vaarg.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/ppc32-vaarg.ll?rev=145065&r1=145064&r2=145065&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/PowerPC/ppc32-vaarg.ll (original) +++ llvm/trunk/test/CodeGen/PowerPC/ppc32-vaarg.ll Tue Nov 22 10:21:04 2011 @@ -12,10 +12,9 @@ define void @ppcvaargtest(%struct.__va_list_tag* %ap) nounwind { entry: %x = va_arg %struct.__va_list_tag* %ap, i64; Get from r5,r6 -; CHECK: lbz 4, 0(3) -; CHECK-NEXT: rlwinm 5, 4, 0, 31, 31 -; CHECK-NEXT: cmplwi 0, 5, 0 -; CHECK-NEXT: addi 5, 4, 1 +; CHECK: addi 5, 4, 1 +; CHECK-NEXT: rlwinm 6, 4, 0, 31, 31 +; CHECK-NEXT: cmplwi 0, 6, 0 ; CHECK-NEXT: stw 3, -4(1) ; CHECK-NEXT: stw 5, -8(1) ; CHECK-NEXT: stw 4, -12(1) @@ -25,138 +24,137 @@ ; CHECK-NEXT: stw 3, -8(1) ; CHECK-NEXT: .LBB0_2: # %entry ; CHECK-NEXT: lwz 3, -8(1) -; CHECK-NEXT: slwi 4, 3, 2 +; CHECK-NEXT: addi 4, 3, 2 ; CHECK-NEXT: lwz 5, -4(1) ; CHECK-NEXT: lwz 6, 4(5) ; CHECK-NEXT: lwz 7, 8(5) -; CHECK-NEXT: add 4, 7, 4 +; CHECK-NEXT: stb 4, 0(5) ; CHECK-NEXT: cmpwi 0, 3, 8 +; CHECK-NEXT: addi 4, 6, 4 +; CHECK-NEXT: mr 8, 6 +; CHECK-NEXT: stw 7, -16(1) +; CHECK-NEXT: stw 4, -20(1) +; CHECK-NEXT: stw 3, -24(1) +; CHECK-NEXT: stw 8, -28(1) +; CHECK-NEXT: stw 6, -32(1) ; CHECK-NEXT: mfcr 0 # cr0 -; CHECK-NEXT: stw 0, -16(1) -; CHECK-NEXT: stw 3, -20(1) -; CHECK-NEXT: stw 4, -24(1) -; CHECK-NEXT: stw 6, -28(1) +; CHECK-NEXT: stw 0, -36(1) ; CHECK-NEXT: blt 0, .LBB0_4 ; CHECK-NEXT: # BB#3: # %entry -; CHECK-NEXT: lwz 3, -28(1) -; CHECK-NEXT: stw 3, -24(1) +; CHECK-NEXT: lwz 3, -20(1) +; CHECK-NEXT: stw 3, -28(1) ; CHECK-NEXT: .LBB0_4: # %entry +; CHECK-NEXT: lwz 3, -28(1) +; CHECK-NEXT: lwz 4, -4(1) +; CHECK-NEXT: stw 3, 4(4) + store i64 %x, i64* @var1, align 8 ; CHECK-NEXT: lwz 3, -24(1) -; CHECK-NEXT: lwz 4, -28(1) -; CHECK-NEXT: addi 5, 4, 4 -; CHECK-NEXT: lwz 0, -16(1) +; CHECK-NEXT: slwi 5, 3, 2 +; CHECK-NEXT: lwz 6, -16(1) +; CHECK-NEXT: add 5, 6, 5 +; CHECK-NEXT: lwz 0, -36(1) ; CHECK-NEXT: mtcrf 128, 0 -; CHECK-NEXT: stw 4, -32(1) -; CHECK-NEXT: stw 5, -36(1) -; CHECK-NEXT: stw 3, -40(1) +; CHECK-NEXT: stw 5, -40(1) ; CHECK-NEXT: blt 0, .LBB0_6 ; CHECK-NEXT: # BB#5: # %entry -; CHECK-NEXT: lwz 3, -36(1) -; CHECK-NEXT: stw 3, -32(1) -; CHECK-NEXT: .LBB0_6: # %entry ; CHECK-NEXT: lwz 3, -32(1) -; CHECK-NEXT: lwz 4, -20(1) -; CHECK-NEXT: addi 5, 4, 2 -; CHECK-NEXT: lwz 6, -4(1) -; CHECK-NEXT: stb 5, 0(6) -; CHECK-NEXT: stw 3, 4(6) - store i64 %x, i64* @var1, align 8 +; CHECK-NEXT: stw 3, -40(1) +; CHECK-NEXT: .LBB0_6: # %entry ; CHECK-NEXT: lwz 3, -40(1) -; CHECK-NEXT: lwz 5, 0(3) -; CHECK-NEXT: lwz 7, 4(3) -; CHECK-NEXT: lis 8, var1 at ha -; CHECK-NEXT: la 9, var1 at l(8) -; CHECK-NEXT: stw 7, 4(9) -; CHECK-NEXT: stw 5, var1 at l(8) +; CHECK-NEXT: lwz 4, 0(3) +; CHECK-NEXT: lwz 3, 4(3) +; CHECK-NEXT: lis 5, var1 at ha +; CHECK-NEXT: la 6, var1 at l(5) +; CHECK-NEXT: stw 3, 4(6) +; CHECK-NEXT: stw 4, var1 at l(5) +; CHECK-NEXT: lwz 3, -4(1) %y = va_arg %struct.__va_list_tag* %ap, double; From f1 -; CHECK-NEXT: lbz 5, 1(6) -; CHECK-NEXT: lwz 7, 4(6) -; CHECK-NEXT: lwz 8, 8(6) -; CHECK-NEXT: slwi 9, 5, 3 -; CHECK-NEXT: add 8, 8, 9 -; CHECK-NEXT: cmpwi 0, 5, 8 -; CHECK-NEXT: addi 9, 7, 8 -; CHECK-NEXT: mr 10, 7 -; CHECK-NEXT: stw 9, -44(1) +; CHECK-NEXT: lbz 4, 1(3) +; CHECK-NEXT: lwz 5, 4(3) +; CHECK-NEXT: lwz 6, 8(3) +; CHECK-NEXT: addi 7, 4, 1 +; CHECK-NEXT: stb 7, 1(3) +; CHECK-NEXT: cmpwi 0, 4, 8 +; CHECK-NEXT: addi 7, 5, 8 +; CHECK-NEXT: mr 8, 5 +; CHECK-NEXT: stw 5, -44(1) ; CHECK-NEXT: stw 7, -48(1) +; CHECK-NEXT: stw 4, -52(1) +; CHECK-NEXT: stw 6, -56(1) +; CHECK-NEXT: stw 8, -60(1) ; CHECK-NEXT: mfcr 0 # cr0 -; CHECK-NEXT: stw 0, -52(1) -; CHECK-NEXT: stw 5, -56(1) -; CHECK-NEXT: stw 10, -60(1) -; CHECK-NEXT: stw 8, -64(1) +; CHECK-NEXT: stw 0, -64(1) ; CHECK-NEXT: blt 0, .LBB0_8 ; CHECK-NEXT: # BB#7: # %entry -; CHECK-NEXT: lwz 3, -44(1) +; CHECK-NEXT: lwz 3, -48(1) ; CHECK-NEXT: stw 3, -60(1) ; CHECK-NEXT: .LBB0_8: # %entry ; CHECK-NEXT: lwz 3, -60(1) -; CHECK-NEXT: lwz 4, -64(1) -; CHECK-NEXT: addi 4, 4, 32 -; CHECK-NEXT: lwz 0, -52(1) +; CHECK-NEXT: lwz 4, -4(1) +; CHECK-NEXT: stw 3, 4(4) +; CHECK-NEXT: lwz 3, -52(1) +; CHECK-NEXT: slwi 5, 3, 3 +; CHECK-NEXT: lwz 6, -56(1) +; CHECK-NEXT: add 5, 6, 5 +; CHECK-NEXT: addi 5, 5, 32 +; CHECK-NEXT: lwz 0, -64(1) ; CHECK-NEXT: mtcrf 128, 0 -; CHECK-NEXT: stw 4, -68(1) -; CHECK-NEXT: stw 3, -72(1) +; CHECK-NEXT: stw 5, -68(1) ; CHECK-NEXT: blt 0, .LBB0_10 ; CHECK-NEXT: # BB#9: # %entry -; CHECK-NEXT: lwz 3, -48(1) +; CHECK-NEXT: lwz 3, -44(1) ; CHECK-NEXT: stw 3, -68(1) ; CHECK-NEXT: .LBB0_10: # %entry ; CHECK-NEXT: lwz 3, -68(1) -; CHECK-NEXT: lwz 4, -56(1) -; CHECK-NEXT: addi 5, 4, 1 -; CHECK-NEXT: lwz 6, -4(1) -; CHECK-NEXT: stb 5, 1(6) -; CHECK-NEXT: lwz 5, -72(1) -; CHECK-NEXT: stw 5, 4(6) ; CHECK-NEXT: lfd 0, 0(3) store double %y, double* @var2, align 8 ; CHECK-NEXT: lis 3, var2 at ha ; CHECK-NEXT: stfd 0, var2 at l(3) %z = va_arg %struct.__va_list_tag* %ap, i32; From r7 -; CHECK-NEXT: lbz 3, 0(6) -; CHECK-NEXT: lwz 5, 4(6) -; CHECK-NEXT: lwz 7, 8(6) -; CHECK-NEXT: slwi 8, 3, 2 -; CHECK-NEXT: add 7, 7, 8 -; CHECK-NEXT: cmpwi 0, 3, 8 -; CHECK-NEXT: addi 8, 5, 4 -; CHECK-NEXT: mr 9, 5 -; CHECK-NEXT: stw 3, -76(1) -; CHECK-NEXT: stw 7, -80(1) -; CHECK-NEXT: stw 8, -84(1) -; CHECK-NEXT: stw 5, -88(1) -; CHECK-NEXT: stw 9, -92(1) +; CHECK-NEXT: lwz 3, -4(1) +; CHECK-NEXT: lbz 4, 0(3) +; CHECK-NEXT: lwz 5, 4(3) +; CHECK-NEXT: lwz 6, 8(3) +; CHECK-NEXT: addi 7, 4, 1 +; CHECK-NEXT: stb 7, 0(3) +; CHECK-NEXT: cmpwi 0, 4, 8 +; CHECK-NEXT: addi 7, 5, 4 +; CHECK-NEXT: mr 8, 5 +; CHECK-NEXT: stw 4, -72(1) +; CHECK-NEXT: stw 6, -76(1) ; CHECK-NEXT: mfcr 0 # cr0 -; CHECK-NEXT: stw 0, -96(1) +; CHECK-NEXT: stw 0, -80(1) +; CHECK-NEXT: stw 5, -84(1) +; CHECK-NEXT: stw 8, -88(1) +; CHECK-NEXT: stw 7, -92(1) ; CHECK-NEXT: blt 0, .LBB0_12 ; CHECK-NEXT: # BB#11: # %entry -; CHECK-NEXT: lwz 3, -84(1) -; CHECK-NEXT: stw 3, -92(1) -; CHECK-NEXT: .LBB0_12: # %entry ; CHECK-NEXT: lwz 3, -92(1) -; CHECK-NEXT: lwz 4, -80(1) -; CHECK-NEXT: lwz 0, -96(1) +; CHECK-NEXT: stw 3, -88(1) +; CHECK-NEXT: .LBB0_12: # %entry +; CHECK-NEXT: lwz 3, -88(1) +; CHECK-NEXT: lwz 4, -4(1) +; CHECK-NEXT: stw 3, 4(4) +; CHECK-NEXT: lwz 3, -72(1) +; CHECK-NEXT: slwi 5, 3, 2 +; CHECK-NEXT: lwz 6, -76(1) +; CHECK-NEXT: add 5, 6, 5 +; CHECK-NEXT: lwz 0, -80(1) ; CHECK-NEXT: mtcrf 128, 0 -; CHECK-NEXT: stw 3, -100(1) -; CHECK-NEXT: stw 4, -104(1) +; CHECK-NEXT: stw 5, -96(1) ; CHECK-NEXT: blt 0, .LBB0_14 ; CHECK-NEXT: # BB#13: # %entry -; CHECK-NEXT: lwz 3, -88(1) -; CHECK-NEXT: stw 3, -104(1) +; CHECK-NEXT: lwz 3, -84(1) +; CHECK-NEXT: stw 3, -96(1) ; CHECK-NEXT: .LBB0_14: # %entry -; CHECK-NEXT: lwz 3, -104(1) -; CHECK-NEXT: lwz 4, -76(1) -; CHECK-NEXT: addi 5, 4, 1 -; CHECK-NEXT: lwz 6, -4(1) -; CHECK-NEXT: stb 5, 0(6) -; CHECK-NEXT: lwz 5, -100(1) -; CHECK-NEXT: stw 5, 4(6) +; CHECK-NEXT: lwz 3, -96(1) ; CHECK-NEXT: lwz 3, 0(3) store i32 %z, i32* @var3, align 4 -; CHECK-NEXT: lis 5, var3 at ha -; CHECK-NEXT: stw 3, var3 at l(5) +; CHECK-NEXT: lis 4, var3 at ha +; CHECK-NEXT: stw 3, var3 at l(4) +; CHECK-NEXT: lwz 3, -4(1) ret void -; CHECK-NEXT: stw 6, -108(1) +; CHECK-NEXT: stw 3, -100(1) ; CHECK-NEXT: blr } From rafael.espindola at gmail.com Tue Nov 22 10:28:34 2011 From: rafael.espindola at gmail.com (=?UTF-8?Q?Rafael_Esp=C3=ADndola?=) Date: Tue, 22 Nov 2011 11:28:34 -0500 Subject: [llvm-commits] arm mc: fix relocation of an alias to an alias In-Reply-To: References: <8AE16CFC-D9D4-431B-9716-C8493971AEF0@apple.com> <4E9F0948.9060808@gmail.com> <4EC8079B.6050405@gmail.com> Message-ID: > The bug was the crasher. This test at least checks that llc does not crash. > I'm fine to write .s -> .o test if you can point me to such a test case in > the tree (I didn't see them yet, sorry) Something like test/MC/ELF/alias.s is what I had in mind. Cheers, Rafael From grosser at fim.uni-passau.de Tue Nov 22 13:40:19 2011 From: grosser at fim.uni-passau.de (Tobias Grosser) Date: Tue, 22 Nov 2011 19:40:19 -0000 Subject: [llvm-commits] [polly] r145071 - in /polly/trunk: lib/Analysis/ScopDetection.cpp lib/Pocc.cpp lib/RegisterPasses.cpp lib/ScheduleOptimizer.cpp test/ScheduleOptimizer/2011-08-25-crash_in_vectorizer.ll www/documentation/passes.html Message-ID: <20111122194019.EA6EF2A6C134@llvm.org> Author: grosser Date: Tue Nov 22 13:40:19 2011 New Revision: 145071 URL: http://llvm.org/viewvc/llvm-project?rev=145071&view=rev Log: Register Passes: Use -polly-optimizer=(isl|pocc) to switch optimizers This replaces the old option -polly-use-pocc. Also call the passes uniformly -polly-opt-pocc and -polly-opt-isl. Modified: polly/trunk/lib/Analysis/ScopDetection.cpp polly/trunk/lib/Pocc.cpp polly/trunk/lib/RegisterPasses.cpp polly/trunk/lib/ScheduleOptimizer.cpp polly/trunk/test/ScheduleOptimizer/2011-08-25-crash_in_vectorizer.ll polly/trunk/www/documentation/passes.html Modified: polly/trunk/lib/Analysis/ScopDetection.cpp URL: http://llvm.org/viewvc/llvm-project/polly/trunk/lib/Analysis/ScopDetection.cpp?rev=145071&r1=145070&r2=145071&view=diff ============================================================================== --- polly/trunk/lib/Analysis/ScopDetection.cpp (original) +++ polly/trunk/lib/Analysis/ScopDetection.cpp Tue Nov 22 13:40:19 2011 @@ -268,7 +268,8 @@ // disable this check to not cause irrelevant verification failures. if (!AS.isMustAlias() && !IgnoreAliasing) INVALID_NOVERIFY(Alias, - "Possible aliasing found for value: " << *BaseValue); + "Possible aliasing for value: " << BaseValue->getName() + << "\n"); return true; } Modified: polly/trunk/lib/Pocc.cpp URL: http://llvm.org/viewvc/llvm-project/polly/trunk/lib/Pocc.cpp?rev=145071&r1=145070&r2=145071&view=diff ============================================================================== --- polly/trunk/lib/Pocc.cpp (original) +++ polly/trunk/lib/Pocc.cpp Tue Nov 22 13:40:19 2011 @@ -274,11 +274,11 @@ AU.addRequired(); } -INITIALIZE_PASS_BEGIN(Pocc, "polly-optimize", - "Polly - Optimize the scop using pocc", false, false) +INITIALIZE_PASS_BEGIN(Pocc, "polly-opt-pocc", + "Polly - Optimize the scop using pocc", false, false) INITIALIZE_PASS_DEPENDENCY(Dependences) INITIALIZE_PASS_DEPENDENCY(ScopInfo) -INITIALIZE_PASS_END(Pocc, "polly-optimize", +INITIALIZE_PASS_END(Pocc, "polly-opt-pocc", "Polly - Optimize the scop using pocc", false, false) Pass* polly::createPoccPass() { Modified: polly/trunk/lib/RegisterPasses.cpp URL: http://llvm.org/viewvc/llvm-project/polly/trunk/lib/RegisterPasses.cpp?rev=145071&r1=145070&r2=145071&view=diff ============================================================================== --- polly/trunk/lib/RegisterPasses.cpp (original) +++ polly/trunk/lib/RegisterPasses.cpp Tue Nov 22 13:40:19 2011 @@ -26,6 +26,8 @@ #include "polly/ScopInfo.h" #include "polly/TempScopInfo.h" +#include + using namespace llvm; static cl::opt @@ -40,10 +42,11 @@ DisableCodegen("polly-no-codegen", cl::desc("Disable Polly Code Generation"), cl::Hidden, cl::init(false)); -static cl::opt -UsePocc("polly-use-pocc", - cl::desc("Use the PoCC optimizer instead of the one in isl"), cl::Hidden, - cl::init(false)); +static cl::opt +Optimizer("polly-optimizer", + cl::desc("Select the scheduling optimizer. " + "Either isl (default) or pocc."), + cl::Hidden, cl::init("isl")); static cl::opt ImportJScop("polly-run-import-jscop", cl::desc("Export the JScop description of the detected Scops"), @@ -113,10 +116,6 @@ PollyEnabled = true; if (!PollyEnabled) { - if (UsePocc) - errs() << "The option -polly-use-pocc has no effect. " - "Polly was not enabled\n"; - if (DisableCodegen) errs() << "The option -polly-no-codegen has no effect. " "Polly was not enabled\n"; @@ -183,16 +182,20 @@ PM.add(polly::createJSONImporterPass()); if (RunScheduler) { - if (UsePocc) { + if (Optimizer == "pocc") { #ifdef SCOPLIB_FOUND PM.add(polly::createPoccPass()); #else errs() << "Polly is compiled without scoplib support. As scoplib is " - << "required to run PoCC, PoCC is also not available. Falling " - << "back to the isl optimizer.\n"; + "required to run PoCC, PoCC is also not available. Falling " + "back to the isl optimizer.\n"; PM.add(polly::createIslScheduleOptimizerPass()); #endif + } else if (Optimizer == "isl") { + PM.add(polly::createIslScheduleOptimizerPass()); } else { + errs() << "Invalid optimizer. Only 'isl' and 'pocc' allowed. " + "Falling back to 'isl'.\n"; PM.add(polly::createIslScheduleOptimizerPass()); } } Modified: polly/trunk/lib/ScheduleOptimizer.cpp URL: http://llvm.org/viewvc/llvm-project/polly/trunk/lib/ScheduleOptimizer.cpp?rev=145071&r1=145070&r2=145071&view=diff ============================================================================== --- polly/trunk/lib/ScheduleOptimizer.cpp (original) +++ polly/trunk/lib/ScheduleOptimizer.cpp Tue Nov 22 13:40:19 2011 @@ -32,7 +32,7 @@ #include "isl/schedule.h" #include "isl/band.h" -#define DEBUG_TYPE "polly-optimize-isl" +#define DEBUG_TYPE "polly-opt-isl" #include "llvm/Support/Debug.h" #include "llvm/Support/CommandLine.h" @@ -432,11 +432,11 @@ AU.addRequired(); } -INITIALIZE_PASS_BEGIN(IslScheduleOptimizer, "polly-optimize-isl", +INITIALIZE_PASS_BEGIN(IslScheduleOptimizer, "polly-opt-isl", "Polly - Optimize schedule of SCoP", false, false) INITIALIZE_PASS_DEPENDENCY(Dependences) INITIALIZE_PASS_DEPENDENCY(ScopInfo) -INITIALIZE_PASS_END(IslScheduleOptimizer, "polly-optimize-isl", +INITIALIZE_PASS_END(IslScheduleOptimizer, "polly-opt-isl", "Polly - Optimize schedule of SCoP", false, false) Pass* polly::createIslScheduleOptimizerPass() { Modified: polly/trunk/test/ScheduleOptimizer/2011-08-25-crash_in_vectorizer.ll URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/ScheduleOptimizer/2011-08-25-crash_in_vectorizer.ll?rev=145071&r1=145070&r2=145071&view=diff ============================================================================== --- polly/trunk/test/ScheduleOptimizer/2011-08-25-crash_in_vectorizer.ll (original) +++ polly/trunk/test/ScheduleOptimizer/2011-08-25-crash_in_vectorizer.ll Tue Nov 22 13:40:19 2011 @@ -1,5 +1,5 @@ -; RUN: opt %loadPolly -polly-optimize-isl -polly-cloog -analyze %s -S | FileCheck %s -; RUN: opt %loadPolly -polly-optimize-isl -polly-cloog -analyze -enable-polly-vector %s -S | FileCheck %s -check-prefix=VECTOR +; RUN: opt %loadPolly -polly-opt-isl -polly-cloog -analyze %s -S | FileCheck %s +; RUN: opt %loadPolly -polly-opt-isl -polly-cloog -analyze -enable-polly-vector %s -S | FileCheck %s -check-prefix=VECTOR target datalayout = Modified: polly/trunk/www/documentation/passes.html URL: http://llvm.org/viewvc/llvm-project/polly/trunk/www/documentation/passes.html?rev=145071&r1=145070&r2=145071&view=diff ============================================================================== --- polly/trunk/www/documentation/passes.html (original) +++ polly/trunk/www/documentation/passes.html Tue Nov 22 13:40:19 2011 @@ -29,8 +29,8 @@

Middle End